2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_amd64_toIR.c ---*/
4 /*--------------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2017 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 /* Translates AMD64 code to IR. */
38 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
39 to ensure a 64-bit value is being written.
43 * all arithmetic done at 64 bits
45 * no FP exceptions, except for handling stack over/underflow
47 * FP rounding mode observed only for float->int conversions and
48 int->float conversions which could lose accuracy, and for
49 float-to-float rounding. For all other operations,
50 round-to-nearest is used, regardless.
52 * some of the FCOM cases could do with testing -- not convinced
53 that the args are the right way round.
55 * FSAVE does not re-initialise the FPU; it should do
57 * FINIT not only initialises the FPU environment, it also zeroes
58 all the FP registers. It should leave the registers unchanged.
60 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
61 per Intel docs this bit has no meaning anyway. Since PUSHF is the
62 only way to observe eflags[1], a proper fix would be to make that
65 This module uses global variables and so is not MT-safe (if that
66 should ever become relevant).
69 /* Notes re address size overrides (0x67).
71 According to the AMD documentation (24594 Rev 3.09, Sept 2003,
72 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose
73 and System Instructions"), Section 1.2.3 ("Address-Size Override
76 0x67 applies to all explicit memory references, causing the top
77 32 bits of the effective address to become zero.
79 0x67 has no effect on stack references (push/pop); these always
82 0x67 changes the interpretation of instructions which implicitly
83 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used
98 /* "Special" instructions.
100 This instruction decoder can decode three special instructions
101 which mean nothing natively (are no-ops as far as regs/mem are
102 concerned) but have meaning for supporting Valgrind. A special
103 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D
104 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq
105 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi).
106 Following that, one of the following 3 are allowed (standard
107 interpretation in parentheses):
109 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX )
110 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR
111 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX
112 4887F6 (xchgq %rdi,%rdi) IR injection
114 Any other bytes following the 16-byte preamble are illegal and
115 constitute a failure in instruction decoding. This all assumes
116 that the preamble will never occur except in specific code
117 fragments designed for Valgrind to catch.
119 No prefixes may precede a "Special" instruction.
122 /* casLE (implementation of lock-prefixed insns) and rep-prefixed
123 insns: the side-exit back to the start of the insn is done with
124 Ijk_Boring. This is quite wrong, it should be done with
125 Ijk_NoRedir, since otherwise the side exit, which is intended to
126 restart the instruction for whatever reason, could go somewhere
127 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
128 no-redir jumps performance critical, at least for rep-prefixed
129 instructions, since all iterations thereof would involve such a
130 jump. It's not such a big deal with casLE since the side exit is
131 only taken if the CAS fails, that is, the location is contended,
132 which is relatively unlikely.
134 Note also, the test for CAS success vs failure is done using
135 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
136 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
137 shouldn't definedness-check these comparisons. See
138 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
139 background/rationale.
142 /* LOCK prefixed instructions. These are translated using IR-level
143 CAS statements (IRCAS) and are believed to preserve atomicity, even
144 from the point of view of some other process racing against a
145 simulated one (presumably they communicate via a shared memory
148 Handlers which are aware of LOCK prefixes are:
149 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
150 dis_cmpxchg_G_E (cmpxchg)
151 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
155 dis_Grp8_Imm (bts, btc, btr)
156 dis_bt_G_E (bts, btc, btr)
161 #include "libvex_basictypes.h"
162 #include "libvex_ir.h"
164 #include "libvex_guest_amd64.h"
166 #include "main_util.h"
167 #include "main_globals.h"
168 #include "guest_generic_bb_to_IR.h"
169 #include "guest_generic_x87.h"
170 #include "guest_amd64_defs.h"
173 /*------------------------------------------------------------*/
175 /*------------------------------------------------------------*/
177 /* These are set at the start of the translation of an insn, right
178 down in disInstr_AMD64, so that we don't have to pass them around
179 endlessly. They are all constant during the translation of any
182 /* These are set at the start of the translation of a BB, so
183 that we don't have to pass them around endlessly. */
185 /* We need to know this to do sub-register accesses correctly. */
186 static VexEndness host_endness
;
188 /* Pointer to the guest code area (points to start of BB, not to the
189 insn being processed). */
190 static const UChar
* guest_code
;
192 /* The guest address corresponding to guest_code[0]. */
193 static Addr64 guest_RIP_bbstart
;
195 /* The guest address for the instruction currently being
197 static Addr64 guest_RIP_curr_instr
;
199 /* The IRSB* into which we're generating code. */
202 /* For ensuring that %rip-relative addressing is done right. A read
203 of %rip generates the address of the next instruction. It may be
204 that we don't conveniently know that inside disAMode(). For sanity
205 checking, if the next insn %rip is needed, we make a guess at what
206 it is, record that guess here, and set the accompanying Bool to
207 indicate that -- after this insn's decode is finished -- that guess
208 needs to be checked. */
210 /* At the start of each insn decode, is set to (0, False).
211 After the decode, if _mustcheck is now True, _assumed is
214 static Addr64 guest_RIP_next_assumed
;
215 static Bool guest_RIP_next_mustcheck
;
218 /*------------------------------------------------------------*/
219 /*--- Helpers for constructing IR. ---*/
220 /*------------------------------------------------------------*/
222 /* Generate a new temporary of the given type. */
223 static IRTemp
newTemp ( IRType ty
)
225 vassert(isPlausibleIRType(ty
));
226 return newIRTemp( irsb
->tyenv
, ty
);
229 /* Add a statement to the list held by "irsb". */
230 static void stmt ( IRStmt
* st
)
232 addStmtToIRSB( irsb
, st
);
235 /* Generate a statement "dst := e". */
236 static void assign ( IRTemp dst
, IRExpr
* e
)
238 stmt( IRStmt_WrTmp(dst
, e
) );
241 static IRExpr
* unop ( IROp op
, IRExpr
* a
)
243 return IRExpr_Unop(op
, a
);
246 static IRExpr
* binop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
)
248 return IRExpr_Binop(op
, a1
, a2
);
251 static IRExpr
* triop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
, IRExpr
* a3
)
253 return IRExpr_Triop(op
, a1
, a2
, a3
);
256 static IRExpr
* mkexpr ( IRTemp tmp
)
258 return IRExpr_RdTmp(tmp
);
261 static IRExpr
* mkU8 ( ULong i
)
264 return IRExpr_Const(IRConst_U8( (UChar
)i
));
267 static IRExpr
* mkU16 ( ULong i
)
269 vassert(i
< 0x10000ULL
);
270 return IRExpr_Const(IRConst_U16( (UShort
)i
));
273 static IRExpr
* mkU32 ( ULong i
)
275 vassert(i
< 0x100000000ULL
);
276 return IRExpr_Const(IRConst_U32( (UInt
)i
));
279 static IRExpr
* mkU64 ( ULong i
)
281 return IRExpr_Const(IRConst_U64(i
));
284 static IRExpr
* mkU ( IRType ty
, ULong i
)
287 case Ity_I8
: return mkU8(i
);
288 case Ity_I16
: return mkU16(i
);
289 case Ity_I32
: return mkU32(i
);
290 case Ity_I64
: return mkU64(i
);
291 default: vpanic("mkU(amd64)");
295 static void storeLE ( IRExpr
* addr
, IRExpr
* data
)
297 stmt( IRStmt_Store(Iend_LE
, addr
, data
) );
300 static IRExpr
* loadLE ( IRType ty
, IRExpr
* addr
)
302 return IRExpr_Load(Iend_LE
, ty
, addr
);
305 static IROp
mkSizedOp ( IRType ty
, IROp op8
)
307 vassert(op8
== Iop_Add8
|| op8
== Iop_Sub8
309 || op8
== Iop_Or8
|| op8
== Iop_And8
|| op8
== Iop_Xor8
310 || op8
== Iop_Shl8
|| op8
== Iop_Shr8
|| op8
== Iop_Sar8
311 || op8
== Iop_CmpEQ8
|| op8
== Iop_CmpNE8
312 || op8
== Iop_CasCmpNE8
313 || op8
== Iop_Not8
);
315 case Ity_I8
: return 0 +op8
;
316 case Ity_I16
: return 1 +op8
;
317 case Ity_I32
: return 2 +op8
;
318 case Ity_I64
: return 3 +op8
;
319 default: vpanic("mkSizedOp(amd64)");
324 IRExpr
* doScalarWidening ( Int szSmall
, Int szBig
, Bool signd
, IRExpr
* src
)
326 if (szSmall
== 1 && szBig
== 4) {
327 return unop(signd
? Iop_8Sto32
: Iop_8Uto32
, src
);
329 if (szSmall
== 1 && szBig
== 2) {
330 return unop(signd
? Iop_8Sto16
: Iop_8Uto16
, src
);
332 if (szSmall
== 2 && szBig
== 4) {
333 return unop(signd
? Iop_16Sto32
: Iop_16Uto32
, src
);
335 if (szSmall
== 1 && szBig
== 8 && !signd
) {
336 return unop(Iop_8Uto64
, src
);
338 if (szSmall
== 1 && szBig
== 8 && signd
) {
339 return unop(Iop_8Sto64
, src
);
341 if (szSmall
== 2 && szBig
== 8 && !signd
) {
342 return unop(Iop_16Uto64
, src
);
344 if (szSmall
== 2 && szBig
== 8 && signd
) {
345 return unop(Iop_16Sto64
, src
);
347 vpanic("doScalarWidening(amd64)");
351 void putGuarded ( Int gstOffB
, IRExpr
* guard
, IRExpr
* value
)
353 IRType ty
= typeOfIRExpr(irsb
->tyenv
, value
);
354 stmt( IRStmt_Put(gstOffB
,
355 IRExpr_ITE(guard
, value
, IRExpr_Get(gstOffB
, ty
))) );
359 /*------------------------------------------------------------*/
360 /*--- Debugging output ---*/
361 /*------------------------------------------------------------*/
363 /* Bomb out if we can't handle something. */
364 __attribute__ ((noreturn
))
365 static void unimplemented ( const HChar
* str
)
367 vex_printf("amd64toIR: unimplemented feature\n");
371 #define DIP(format, args...) \
372 if (vex_traceflags & VEX_TRACE_FE) \
373 vex_printf(format, ## args)
375 #define DIS(buf, format, args...) \
376 if (vex_traceflags & VEX_TRACE_FE) \
377 vex_sprintf(buf, format, ## args)
380 /*------------------------------------------------------------*/
381 /*--- Offsets of various parts of the amd64 guest state. ---*/
382 /*------------------------------------------------------------*/
384 #define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX)
385 #define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX)
386 #define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX)
387 #define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX)
388 #define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP)
389 #define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP)
390 #define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI)
391 #define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI)
392 #define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8)
393 #define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9)
394 #define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10)
395 #define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11)
396 #define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12)
397 #define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13)
398 #define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14)
399 #define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15)
401 #define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP)
403 #define OFFB_FS_CONST offsetof(VexGuestAMD64State,guest_FS_CONST)
404 #define OFFB_GS_CONST offsetof(VexGuestAMD64State,guest_GS_CONST)
406 #define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP)
407 #define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1)
408 #define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2)
409 #define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP)
411 #define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0])
412 #define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0])
413 #define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG)
414 #define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG)
415 #define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG)
416 #define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP)
417 #define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210)
418 #define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND)
420 #define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND)
421 #define OFFB_YMM0 offsetof(VexGuestAMD64State,guest_YMM0)
422 #define OFFB_YMM1 offsetof(VexGuestAMD64State,guest_YMM1)
423 #define OFFB_YMM2 offsetof(VexGuestAMD64State,guest_YMM2)
424 #define OFFB_YMM3 offsetof(VexGuestAMD64State,guest_YMM3)
425 #define OFFB_YMM4 offsetof(VexGuestAMD64State,guest_YMM4)
426 #define OFFB_YMM5 offsetof(VexGuestAMD64State,guest_YMM5)
427 #define OFFB_YMM6 offsetof(VexGuestAMD64State,guest_YMM6)
428 #define OFFB_YMM7 offsetof(VexGuestAMD64State,guest_YMM7)
429 #define OFFB_YMM8 offsetof(VexGuestAMD64State,guest_YMM8)
430 #define OFFB_YMM9 offsetof(VexGuestAMD64State,guest_YMM9)
431 #define OFFB_YMM10 offsetof(VexGuestAMD64State,guest_YMM10)
432 #define OFFB_YMM11 offsetof(VexGuestAMD64State,guest_YMM11)
433 #define OFFB_YMM12 offsetof(VexGuestAMD64State,guest_YMM12)
434 #define OFFB_YMM13 offsetof(VexGuestAMD64State,guest_YMM13)
435 #define OFFB_YMM14 offsetof(VexGuestAMD64State,guest_YMM14)
436 #define OFFB_YMM15 offsetof(VexGuestAMD64State,guest_YMM15)
437 #define OFFB_YMM16 offsetof(VexGuestAMD64State,guest_YMM16)
439 #define OFFB_EMNOTE offsetof(VexGuestAMD64State,guest_EMNOTE)
440 #define OFFB_CMSTART offsetof(VexGuestAMD64State,guest_CMSTART)
441 #define OFFB_CMLEN offsetof(VexGuestAMD64State,guest_CMLEN)
443 #define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR)
446 /*------------------------------------------------------------*/
447 /*--- Helper bits and pieces for deconstructing the ---*/
448 /*--- amd64 insn stream. ---*/
449 /*------------------------------------------------------------*/
451 /* This is the AMD64 register encoding -- integer regs. */
469 /* This is the Intel register encoding -- segment regs. */
478 /* Various simple conversions */
480 static ULong
extend_s_8to64 ( UChar x
)
482 return (ULong
)((Long
)(((ULong
)x
) << 56) >> 56);
485 static ULong
extend_s_16to64 ( UShort x
)
487 return (ULong
)((Long
)(((ULong
)x
) << 48) >> 48);
490 static ULong
extend_s_32to64 ( UInt x
)
492 return (ULong
)((Long
)(((ULong
)x
) << 32) >> 32);
495 /* Figure out whether the mod and rm parts of a modRM byte refer to a
496 register or memory. If so, the byte will have the form 11XXXYYY,
497 where YYY is the register number. */
499 static Bool
epartIsReg ( UChar mod_reg_rm
)
501 return toBool(0xC0 == (mod_reg_rm
& 0xC0));
504 /* Extract the 'g' field from a modRM byte. This only produces 3
505 bits, which is not a complete register number. You should avoid
506 this function if at all possible. */
508 static Int
gregLO3ofRM ( UChar mod_reg_rm
)
510 return (Int
)( (mod_reg_rm
>> 3) & 7 );
513 /* Ditto the 'e' field of a modRM byte. */
515 static Int
eregLO3ofRM ( UChar mod_reg_rm
)
517 return (Int
)(mod_reg_rm
& 0x7);
520 /* Get a 8/16/32-bit unsigned value out of the insn stream. */
522 static inline UChar
getUChar ( Long delta
)
524 UChar v
= guest_code
[delta
+0];
528 static UInt
getUDisp16 ( Long delta
)
530 UInt v
= guest_code
[delta
+1]; v
<<= 8;
531 v
|= guest_code
[delta
+0];
535 //.. static UInt getUDisp ( Int size, Long delta )
538 //.. case 4: return getUDisp32(delta);
539 //.. case 2: return getUDisp16(delta);
540 //.. case 1: return getUChar(delta);
541 //.. default: vpanic("getUDisp(x86)");
543 //.. return 0; /*notreached*/
547 /* Get a byte value out of the insn stream and sign-extend to 64
549 static Long
getSDisp8 ( Long delta
)
551 return extend_s_8to64( guest_code
[delta
] );
554 /* Get a 16-bit value out of the insn stream and sign-extend to 64
556 static Long
getSDisp16 ( Long delta
)
558 UInt v
= guest_code
[delta
+1]; v
<<= 8;
559 v
|= guest_code
[delta
+0];
560 return extend_s_16to64( (UShort
)v
);
563 /* Get a 32-bit value out of the insn stream and sign-extend to 64
565 static Long
getSDisp32 ( Long delta
)
567 UInt v
= guest_code
[delta
+3]; v
<<= 8;
568 v
|= guest_code
[delta
+2]; v
<<= 8;
569 v
|= guest_code
[delta
+1]; v
<<= 8;
570 v
|= guest_code
[delta
+0];
571 return extend_s_32to64( v
);
574 /* Get a 64-bit value out of the insn stream. */
575 static Long
getDisp64 ( Long delta
)
578 v
|= guest_code
[delta
+7]; v
<<= 8;
579 v
|= guest_code
[delta
+6]; v
<<= 8;
580 v
|= guest_code
[delta
+5]; v
<<= 8;
581 v
|= guest_code
[delta
+4]; v
<<= 8;
582 v
|= guest_code
[delta
+3]; v
<<= 8;
583 v
|= guest_code
[delta
+2]; v
<<= 8;
584 v
|= guest_code
[delta
+1]; v
<<= 8;
585 v
|= guest_code
[delta
+0];
589 /* Note: because AMD64 doesn't allow 64-bit literals, it is an error
590 if this is called with size==8. Should not happen. */
591 static Long
getSDisp ( Int size
, Long delta
)
594 case 4: return getSDisp32(delta
);
595 case 2: return getSDisp16(delta
);
596 case 1: return getSDisp8(delta
);
597 default: vpanic("getSDisp(amd64)");
601 static ULong
mkSizeMask ( Int sz
)
604 case 1: return 0x00000000000000FFULL
;
605 case 2: return 0x000000000000FFFFULL
;
606 case 4: return 0x00000000FFFFFFFFULL
;
607 case 8: return 0xFFFFFFFFFFFFFFFFULL
;
608 default: vpanic("mkSzMask(amd64)");
612 static Int
imin ( Int a
, Int b
)
614 return (a
< b
) ? a
: b
;
617 static IRType
szToITy ( Int n
)
620 case 1: return Ity_I8
;
621 case 2: return Ity_I16
;
622 case 4: return Ity_I32
;
623 case 8: return Ity_I64
;
624 default: vex_printf("\nszToITy(%d)\n", n
);
625 vpanic("szToITy(amd64)");
630 /*------------------------------------------------------------*/
631 /*--- For dealing with prefixes. ---*/
632 /*------------------------------------------------------------*/
634 /* The idea is to pass around an int holding a bitmask summarising
635 info from the prefixes seen on the current instruction, including
636 info from the REX byte. This info is used in various places, but
637 most especially when making sense of register fields in
640 The top 8 bits of the prefix are 0x55, just as a hacky way to
641 ensure it really is a valid prefix.
643 Things you can safely assume about a well-formed prefix:
644 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set.
645 * if REX is not present then REXW,REXR,REXX,REXB will read
647 * F2 and F3 will not both be 1.
652 #define PFX_ASO (1<<0) /* address-size override present (0x67) */
653 #define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */
654 #define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */
655 #define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */
656 #define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */
657 #define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */
658 #define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */
659 #define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */
660 #define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */
661 #define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */
662 #define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */
663 #define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */
664 #define PFX_ES (1<<12) /* ES segment prefix present (0x26) */
665 #define PFX_FS (1<<13) /* FS segment prefix present (0x64) */
666 #define PFX_GS (1<<14) /* GS segment prefix present (0x65) */
667 #define PFX_SS (1<<15) /* SS segment prefix present (0x36) */
668 #define PFX_VEX (1<<16) /* VEX prefix present (0xC4 or 0xC5) */
669 #define PFX_VEXL (1<<17) /* VEX L bit, if VEX present, else 0 */
670 /* The extra register field VEX.vvvv is encoded (after not-ing it) as
671 PFX_VEXnV3 .. PFX_VEXnV0, so these must occupy adjacent bit
673 #define PFX_VEXnV0 (1<<18) /* ~VEX vvvv[0], if VEX present, else 0 */
674 #define PFX_VEXnV1 (1<<19) /* ~VEX vvvv[1], if VEX present, else 0 */
675 #define PFX_VEXnV2 (1<<20) /* ~VEX vvvv[2], if VEX present, else 0 */
676 #define PFX_VEXnV3 (1<<21) /* ~VEX vvvv[3], if VEX present, else 0 */
679 #define PFX_EMPTY 0x55000000
681 static Bool
IS_VALID_PFX ( Prefix pfx
) {
682 return toBool((pfx
& 0xFF000000) == PFX_EMPTY
);
685 static Bool
haveREX ( Prefix pfx
) {
686 return toBool(pfx
& PFX_REX
);
689 static Int
getRexW ( Prefix pfx
) {
690 return (pfx
& PFX_REXW
) ? 1 : 0;
692 static Int
getRexR ( Prefix pfx
) {
693 return (pfx
& PFX_REXR
) ? 1 : 0;
695 static Int
getRexX ( Prefix pfx
) {
696 return (pfx
& PFX_REXX
) ? 1 : 0;
698 static Int
getRexB ( Prefix pfx
) {
699 return (pfx
& PFX_REXB
) ? 1 : 0;
702 /* Check a prefix doesn't have F2 or F3 set in it, since usually that
703 completely changes what instruction it really is. */
704 static Bool
haveF2orF3 ( Prefix pfx
) {
705 return toBool((pfx
& (PFX_F2
|PFX_F3
)) > 0);
707 static Bool
haveF2andF3 ( Prefix pfx
) {
708 return toBool((pfx
& (PFX_F2
|PFX_F3
)) == (PFX_F2
|PFX_F3
));
710 static Bool
haveF2 ( Prefix pfx
) {
711 return toBool((pfx
& PFX_F2
) > 0);
713 static Bool
haveF3 ( Prefix pfx
) {
714 return toBool((pfx
& PFX_F3
) > 0);
717 static Bool
have66 ( Prefix pfx
) {
718 return toBool((pfx
& PFX_66
) > 0);
720 static Bool
haveASO ( Prefix pfx
) {
721 return toBool((pfx
& PFX_ASO
) > 0);
723 static Bool
haveLOCK ( Prefix pfx
) {
724 return toBool((pfx
& PFX_LOCK
) > 0);
727 /* Return True iff pfx has 66 set and F2 and F3 clear */
728 static Bool
have66noF2noF3 ( Prefix pfx
)
731 toBool((pfx
& (PFX_66
|PFX_F2
|PFX_F3
)) == PFX_66
);
734 /* Return True iff pfx has F2 set and 66 and F3 clear */
735 static Bool
haveF2no66noF3 ( Prefix pfx
)
738 toBool((pfx
& (PFX_66
|PFX_F2
|PFX_F3
)) == PFX_F2
);
741 /* Return True iff pfx has F3 set and 66 and F2 clear */
742 static Bool
haveF3no66noF2 ( Prefix pfx
)
745 toBool((pfx
& (PFX_66
|PFX_F2
|PFX_F3
)) == PFX_F3
);
748 /* Return True iff pfx has F3 set and F2 clear */
749 static Bool
haveF3noF2 ( Prefix pfx
)
752 toBool((pfx
& (PFX_F2
|PFX_F3
)) == PFX_F3
);
755 /* Return True iff pfx has F2 set and F3 clear */
756 static Bool
haveF2noF3 ( Prefix pfx
)
759 toBool((pfx
& (PFX_F2
|PFX_F3
)) == PFX_F2
);
762 /* Return True iff pfx has F2 and F3 clear */
763 static Bool
haveNoF2noF3 ( Prefix pfx
)
766 toBool((pfx
& (PFX_F2
|PFX_F3
)) == 0);
769 /* Return True iff pfx has 66, F2 and F3 clear */
770 static Bool
haveNo66noF2noF3 ( Prefix pfx
)
773 toBool((pfx
& (PFX_66
|PFX_F2
|PFX_F3
)) == 0);
776 /* Return True iff pfx has any of 66, F2 and F3 set */
777 static Bool
have66orF2orF3 ( Prefix pfx
)
779 return toBool( ! haveNo66noF2noF3(pfx
) );
782 /* Return True iff pfx has 66 or F3 set */
783 static Bool
have66orF3 ( Prefix pfx
)
785 return toBool((pfx
& (PFX_66
|PFX_F3
)) > 0);
788 /* Clear all the segment-override bits in a prefix. */
789 static Prefix
clearSegBits ( Prefix p
)
792 p
& ~(PFX_CS
| PFX_DS
| PFX_ES
| PFX_FS
| PFX_GS
| PFX_SS
);
795 /* Get the (inverted, hence back to "normal") VEX.vvvv field. */
796 static UInt
getVexNvvvv ( Prefix pfx
) {
798 r
/= (UInt
)PFX_VEXnV0
; /* pray this turns into a shift */
802 static Bool
haveVEX ( Prefix pfx
) {
803 return toBool(pfx
& PFX_VEX
);
806 static Int
getVexL ( Prefix pfx
) {
807 return (pfx
& PFX_VEXL
) ? 1 : 0;
811 /*------------------------------------------------------------*/
812 /*--- For dealing with escapes ---*/
813 /*------------------------------------------------------------*/
816 /* Escapes come after the prefixes, but before the primary opcode
817 byte. They escape the primary opcode byte into a bigger space.
818 The 0xF0000000 isn't significant, except so as to make it not
819 overlap valid Prefix values, for sanity checking.
824 ESC_NONE
=0xF0000000, // none
832 /*------------------------------------------------------------*/
833 /*--- For dealing with integer registers ---*/
834 /*------------------------------------------------------------*/
836 /* This is somewhat complex. The rules are:
838 For 64, 32 and 16 bit register references, the e or g fields in the
839 modrm bytes supply the low 3 bits of the register number. The
840 fourth (most-significant) bit of the register number is supplied by
841 the REX byte, if it is present; else that bit is taken to be zero.
843 The REX.R bit supplies the high bit corresponding to the g register
844 field, and the REX.B bit supplies the high bit corresponding to the
845 e register field (when the mod part of modrm indicates that modrm's
846 e component refers to a register and not to memory).
848 The REX.X bit supplies a high register bit for certain registers
849 in SIB address modes, and is generally rarely used.
851 For 8 bit register references, the presence of the REX byte itself
852 has significance. If there is no REX present, then the 3-bit
853 number extracted from the modrm e or g field is treated as an index
854 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the
855 old x86 encoding scheme.
857 But if there is a REX present, the register reference is
858 interpreted in the same way as for 64/32/16-bit references: a high
859 bit is extracted from REX, giving a 4-bit number, and the denoted
860 register is the lowest 8 bits of the 16 integer registers denoted
861 by the number. In particular, values 3 through 7 of this sequence
862 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of
865 The REX.W bit has no bearing at all on register numbers. Instead
866 its presence indicates that the operand size is to be overridden
867 from its default value (32 bits) to 64 bits instead. This is in
868 the same fashion that an 0x66 prefix indicates the operand size is
869 to be overridden from 32 bits down to 16 bits. When both REX.W and
870 0x66 are present there is a conflict, and REX.W takes precedence.
872 Rather than try to handle this complexity using a single huge
873 function, several smaller ones are provided. The aim is to make it
874 as difficult as possible to screw up register decoding in a subtle
875 and hard-to-track-down way.
877 Because these routines fish around in the host's memory (that is,
878 in the guest state area) for sub-parts of guest registers, their
879 correctness depends on the host's endianness. So far these
880 routines only work for little-endian hosts. Those for which
881 endianness is important have assertions to ensure sanity.
885 /* About the simplest question you can ask: where do the 64-bit
886 integer registers live (in the guest state) ? */
888 static Int
integerGuestReg64Offset ( UInt reg
)
891 case R_RAX
: return OFFB_RAX
;
892 case R_RCX
: return OFFB_RCX
;
893 case R_RDX
: return OFFB_RDX
;
894 case R_RBX
: return OFFB_RBX
;
895 case R_RSP
: return OFFB_RSP
;
896 case R_RBP
: return OFFB_RBP
;
897 case R_RSI
: return OFFB_RSI
;
898 case R_RDI
: return OFFB_RDI
;
899 case R_R8
: return OFFB_R8
;
900 case R_R9
: return OFFB_R9
;
901 case R_R10
: return OFFB_R10
;
902 case R_R11
: return OFFB_R11
;
903 case R_R12
: return OFFB_R12
;
904 case R_R13
: return OFFB_R13
;
905 case R_R14
: return OFFB_R14
;
906 case R_R15
: return OFFB_R15
;
907 default: vpanic("integerGuestReg64Offset(amd64)");
912 /* Produce the name of an integer register, for printing purposes.
913 reg is a number in the range 0 .. 15 that has been generated from a
914 3-bit reg-field number and a REX extension bit. irregular denotes
915 the case where sz==1 and no REX byte is present. */
918 const HChar
* nameIReg ( Int sz
, UInt reg
, Bool irregular
)
920 static const HChar
* ireg64_names
[16]
921 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
922 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
923 static const HChar
* ireg32_names
[16]
924 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
925 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" };
926 static const HChar
* ireg16_names
[16]
927 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di",
928 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" };
929 static const HChar
* ireg8_names
[16]
930 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil",
931 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" };
932 static const HChar
* ireg8_irregular
[8]
933 = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" };
940 vassert(irregular
== False
);
944 case 8: return ireg64_names
[reg
];
945 case 4: return ireg32_names
[reg
];
946 case 2: return ireg16_names
[reg
];
947 case 1: if (irregular
) {
948 return ireg8_irregular
[reg
];
950 return ireg8_names
[reg
];
952 default: vpanic("nameIReg(amd64)");
956 /* Using the same argument conventions as nameIReg, produce the
957 guest state offset of an integer register. */
960 Int
offsetIReg ( Int sz
, UInt reg
, Bool irregular
)
967 vassert(irregular
== False
);
970 /* Deal with irregular case -- sz==1 and no REX present */
971 if (sz
== 1 && irregular
) {
973 case R_RSP
: return 1+ OFFB_RAX
;
974 case R_RBP
: return 1+ OFFB_RCX
;
975 case R_RSI
: return 1+ OFFB_RDX
;
976 case R_RDI
: return 1+ OFFB_RBX
;
977 default: break; /* use the normal case */
982 return integerGuestReg64Offset(reg
);
986 /* Read the %CL register :: Ity_I8, for shift/rotate operations. */
988 static IRExpr
* getIRegCL ( void )
990 vassert(host_endness
== VexEndnessLE
);
991 return IRExpr_Get( OFFB_RCX
, Ity_I8
);
995 /* Write to the %AH register. */
997 static void putIRegAH ( IRExpr
* e
)
999 vassert(host_endness
== VexEndnessLE
);
1000 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I8
);
1001 stmt( IRStmt_Put( OFFB_RAX
+1, e
) );
1005 /* Read/write various widths of %RAX, as it has various
1006 special-purpose uses. */
1008 static const HChar
* nameIRegRAX ( Int sz
)
1011 case 1: return "%al";
1012 case 2: return "%ax";
1013 case 4: return "%eax";
1014 case 8: return "%rax";
1015 default: vpanic("nameIRegRAX(amd64)");
1019 static IRExpr
* getIRegRAX ( Int sz
)
1021 vassert(host_endness
== VexEndnessLE
);
1023 case 1: return IRExpr_Get( OFFB_RAX
, Ity_I8
);
1024 case 2: return IRExpr_Get( OFFB_RAX
, Ity_I16
);
1025 case 4: return unop(Iop_64to32
, IRExpr_Get( OFFB_RAX
, Ity_I64
));
1026 case 8: return IRExpr_Get( OFFB_RAX
, Ity_I64
);
1027 default: vpanic("getIRegRAX(amd64)");
1031 static void putIRegRAX ( Int sz
, IRExpr
* e
)
1033 IRType ty
= typeOfIRExpr(irsb
->tyenv
, e
);
1034 vassert(host_endness
== VexEndnessLE
);
1036 case 8: vassert(ty
== Ity_I64
);
1037 stmt( IRStmt_Put( OFFB_RAX
, e
));
1039 case 4: vassert(ty
== Ity_I32
);
1040 stmt( IRStmt_Put( OFFB_RAX
, unop(Iop_32Uto64
,e
) ));
1042 case 2: vassert(ty
== Ity_I16
);
1043 stmt( IRStmt_Put( OFFB_RAX
, e
));
1045 case 1: vassert(ty
== Ity_I8
);
1046 stmt( IRStmt_Put( OFFB_RAX
, e
));
1048 default: vpanic("putIRegRAX(amd64)");
1053 /* Read/write various widths of %RDX, as it has various
1054 special-purpose uses. */
1056 static const HChar
* nameIRegRDX ( Int sz
)
1059 case 1: return "%dl";
1060 case 2: return "%dx";
1061 case 4: return "%edx";
1062 case 8: return "%rdx";
1063 default: vpanic("nameIRegRDX(amd64)");
1067 static IRExpr
* getIRegRDX ( Int sz
)
1069 vassert(host_endness
== VexEndnessLE
);
1071 case 1: return IRExpr_Get( OFFB_RDX
, Ity_I8
);
1072 case 2: return IRExpr_Get( OFFB_RDX
, Ity_I16
);
1073 case 4: return unop(Iop_64to32
, IRExpr_Get( OFFB_RDX
, Ity_I64
));
1074 case 8: return IRExpr_Get( OFFB_RDX
, Ity_I64
);
1075 default: vpanic("getIRegRDX(amd64)");
1079 static void putIRegRDX ( Int sz
, IRExpr
* e
)
1081 vassert(host_endness
== VexEndnessLE
);
1082 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == szToITy(sz
));
1084 case 8: stmt( IRStmt_Put( OFFB_RDX
, e
));
1086 case 4: stmt( IRStmt_Put( OFFB_RDX
, unop(Iop_32Uto64
,e
) ));
1088 case 2: stmt( IRStmt_Put( OFFB_RDX
, e
));
1090 case 1: stmt( IRStmt_Put( OFFB_RDX
, e
));
1092 default: vpanic("putIRegRDX(amd64)");
1097 /* Simplistic functions to deal with the integer registers as a
1098 straightforward bank of 16 64-bit regs. */
1100 static IRExpr
* getIReg64 ( UInt regno
)
1102 return IRExpr_Get( integerGuestReg64Offset(regno
),
1106 static void putIReg64 ( UInt regno
, IRExpr
* e
)
1108 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I64
);
1109 stmt( IRStmt_Put( integerGuestReg64Offset(regno
), e
) );
1112 static const HChar
* nameIReg64 ( UInt regno
)
1114 return nameIReg( 8, regno
, False
);
1118 /* Simplistic functions to deal with the lower halves of integer
1119 registers as a straightforward bank of 16 32-bit regs. */
1121 static IRExpr
* getIReg32 ( UInt regno
)
1123 vassert(host_endness
== VexEndnessLE
);
1124 return unop(Iop_64to32
,
1125 IRExpr_Get( integerGuestReg64Offset(regno
),
1129 static void putIReg32 ( UInt regno
, IRExpr
* e
)
1131 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I32
);
1132 stmt( IRStmt_Put( integerGuestReg64Offset(regno
),
1133 unop(Iop_32Uto64
,e
) ) );
1136 static const HChar
* nameIReg32 ( UInt regno
)
1138 return nameIReg( 4, regno
, False
);
1142 /* Simplistic functions to deal with the lower quarters of integer
1143 registers as a straightforward bank of 16 16-bit regs. */
1145 static IRExpr
* getIReg16 ( UInt regno
)
1147 vassert(host_endness
== VexEndnessLE
);
1148 return IRExpr_Get( integerGuestReg64Offset(regno
),
1152 static void putIReg16 ( UInt regno
, IRExpr
* e
)
1154 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I16
);
1155 stmt( IRStmt_Put( integerGuestReg64Offset(regno
),
1156 unop(Iop_16Uto64
,e
) ) );
1159 static const HChar
* nameIReg16 ( UInt regno
)
1161 return nameIReg( 2, regno
, False
);
1165 /* Sometimes what we know is a 3-bit register number, a REX byte, and
1166 which field of the REX byte is to be used to extend to a 4-bit
1167 number. These functions cater for that situation.
1169 static IRExpr
* getIReg64rexX ( Prefix pfx
, UInt lo3bits
)
1171 vassert(lo3bits
< 8);
1172 vassert(IS_VALID_PFX(pfx
));
1173 return getIReg64( lo3bits
| (getRexX(pfx
) << 3) );
1176 static const HChar
* nameIReg64rexX ( Prefix pfx
, UInt lo3bits
)
1178 vassert(lo3bits
< 8);
1179 vassert(IS_VALID_PFX(pfx
));
1180 return nameIReg( 8, lo3bits
| (getRexX(pfx
) << 3), False
);
1183 static const HChar
* nameIRegRexB ( Int sz
, Prefix pfx
, UInt lo3bits
)
1185 vassert(lo3bits
< 8);
1186 vassert(IS_VALID_PFX(pfx
));
1187 vassert(sz
== 8 || sz
== 4 || sz
== 2 || sz
== 1);
1188 return nameIReg( sz
, lo3bits
| (getRexB(pfx
) << 3),
1189 toBool(sz
==1 && !haveREX(pfx
)) );
1192 static IRExpr
* getIRegRexB ( Int sz
, Prefix pfx
, UInt lo3bits
)
1194 vassert(lo3bits
< 8);
1195 vassert(IS_VALID_PFX(pfx
));
1196 vassert(sz
== 8 || sz
== 4 || sz
== 2 || sz
== 1);
1199 return unop(Iop_64to32
,
1201 offsetIReg( sz
, lo3bits
| (getRexB(pfx
) << 3),
1202 False
/*!irregular*/ ),
1208 offsetIReg( sz
, lo3bits
| (getRexB(pfx
) << 3),
1209 toBool(sz
==1 && !haveREX(pfx
)) ),
1215 static void putIRegRexB ( Int sz
, Prefix pfx
, UInt lo3bits
, IRExpr
* e
)
1217 vassert(lo3bits
< 8);
1218 vassert(IS_VALID_PFX(pfx
));
1219 vassert(sz
== 8 || sz
== 4 || sz
== 2 || sz
== 1);
1220 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == szToITy(sz
));
1222 offsetIReg( sz
, lo3bits
| (getRexB(pfx
) << 3),
1223 toBool(sz
==1 && !haveREX(pfx
)) ),
1224 sz
==4 ? unop(Iop_32Uto64
,e
) : e
1229 /* Functions for getting register numbers from modrm bytes and REX
1230 when we don't have to consider the complexities of integer subreg
1233 /* Extract the g reg field from a modRM byte, and augment it using the
1234 REX.R bit from the supplied REX byte. The R bit usually is
1235 associated with the g register field.
1237 static UInt
gregOfRexRM ( Prefix pfx
, UChar mod_reg_rm
)
1239 Int reg
= (Int
)( (mod_reg_rm
>> 3) & 7 );
1240 reg
+= (pfx
& PFX_REXR
) ? 8 : 0;
1244 /* Extract the e reg field from a modRM byte, and augment it using the
1245 REX.B bit from the supplied REX byte. The B bit usually is
1246 associated with the e register field (when modrm indicates e is a
1249 static UInt
eregOfRexRM ( Prefix pfx
, UChar mod_reg_rm
)
1252 vassert(epartIsReg(mod_reg_rm
));
1253 rm
= (Int
)(mod_reg_rm
& 0x7);
1254 rm
+= (pfx
& PFX_REXB
) ? 8 : 0;
1259 /* General functions for dealing with integer register access. */
1261 /* Produce the guest state offset for a reference to the 'g' register
1262 field in a modrm byte, taking into account REX (or its absence),
1263 and the size of the access.
1265 static UInt
offsetIRegG ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1268 vassert(host_endness
== VexEndnessLE
);
1269 vassert(IS_VALID_PFX(pfx
));
1270 vassert(sz
== 8 || sz
== 4 || sz
== 2 || sz
== 1);
1271 reg
= gregOfRexRM( pfx
, mod_reg_rm
);
1272 return offsetIReg( sz
, reg
, toBool(sz
== 1 && !haveREX(pfx
)) );
1276 IRExpr
* getIRegG ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1280 return unop(Iop_64to32
,
1281 IRExpr_Get( offsetIRegG( sz
, pfx
, mod_reg_rm
),
1284 return IRExpr_Get( offsetIRegG( sz
, pfx
, mod_reg_rm
),
1290 void putIRegG ( Int sz
, Prefix pfx
, UChar mod_reg_rm
, IRExpr
* e
)
1292 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == szToITy(sz
));
1294 e
= unop(Iop_32Uto64
,e
);
1296 stmt( IRStmt_Put( offsetIRegG( sz
, pfx
, mod_reg_rm
), e
) );
1300 const HChar
* nameIRegG ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1302 return nameIReg( sz
, gregOfRexRM(pfx
,mod_reg_rm
),
1303 toBool(sz
==1 && !haveREX(pfx
)) );
1308 IRExpr
* getIRegV ( Int sz
, Prefix pfx
)
1312 return unop(Iop_64to32
,
1313 IRExpr_Get( offsetIReg( sz
, getVexNvvvv(pfx
), False
),
1316 return IRExpr_Get( offsetIReg( sz
, getVexNvvvv(pfx
), False
),
1322 void putIRegV ( Int sz
, Prefix pfx
, IRExpr
* e
)
1324 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == szToITy(sz
));
1326 e
= unop(Iop_32Uto64
,e
);
1328 stmt( IRStmt_Put( offsetIReg( sz
, getVexNvvvv(pfx
), False
), e
) );
1332 const HChar
* nameIRegV ( Int sz
, Prefix pfx
)
1334 return nameIReg( sz
, getVexNvvvv(pfx
), False
);
1339 /* Produce the guest state offset for a reference to the 'e' register
1340 field in a modrm byte, taking into account REX (or its absence),
1341 and the size of the access. eregOfRexRM will assert if mod_reg_rm
1342 denotes a memory access rather than a register access.
1344 static UInt
offsetIRegE ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1347 vassert(host_endness
== VexEndnessLE
);
1348 vassert(IS_VALID_PFX(pfx
));
1349 vassert(sz
== 8 || sz
== 4 || sz
== 2 || sz
== 1);
1350 reg
= eregOfRexRM( pfx
, mod_reg_rm
);
1351 return offsetIReg( sz
, reg
, toBool(sz
== 1 && !haveREX(pfx
)) );
1355 IRExpr
* getIRegE ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1359 return unop(Iop_64to32
,
1360 IRExpr_Get( offsetIRegE( sz
, pfx
, mod_reg_rm
),
1363 return IRExpr_Get( offsetIRegE( sz
, pfx
, mod_reg_rm
),
1369 void putIRegE ( Int sz
, Prefix pfx
, UChar mod_reg_rm
, IRExpr
* e
)
1371 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == szToITy(sz
));
1373 e
= unop(Iop_32Uto64
,e
);
1375 stmt( IRStmt_Put( offsetIRegE( sz
, pfx
, mod_reg_rm
), e
) );
1379 const HChar
* nameIRegE ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1381 return nameIReg( sz
, eregOfRexRM(pfx
,mod_reg_rm
),
1382 toBool(sz
==1 && !haveREX(pfx
)) );
1386 /*------------------------------------------------------------*/
1387 /*--- For dealing with XMM registers ---*/
1388 /*------------------------------------------------------------*/
1390 static Int
ymmGuestRegOffset ( UInt ymmreg
)
1393 case 0: return OFFB_YMM0
;
1394 case 1: return OFFB_YMM1
;
1395 case 2: return OFFB_YMM2
;
1396 case 3: return OFFB_YMM3
;
1397 case 4: return OFFB_YMM4
;
1398 case 5: return OFFB_YMM5
;
1399 case 6: return OFFB_YMM6
;
1400 case 7: return OFFB_YMM7
;
1401 case 8: return OFFB_YMM8
;
1402 case 9: return OFFB_YMM9
;
1403 case 10: return OFFB_YMM10
;
1404 case 11: return OFFB_YMM11
;
1405 case 12: return OFFB_YMM12
;
1406 case 13: return OFFB_YMM13
;
1407 case 14: return OFFB_YMM14
;
1408 case 15: return OFFB_YMM15
;
1409 default: vpanic("ymmGuestRegOffset(amd64)");
1413 static Int
xmmGuestRegOffset ( UInt xmmreg
)
1415 /* Correct for little-endian host only. */
1416 vassert(host_endness
== VexEndnessLE
);
1417 return ymmGuestRegOffset( xmmreg
);
1420 /* Lanes of vector registers are always numbered from zero being the
1421 least significant lane (rightmost in the register). */
1423 static Int
xmmGuestRegLane16offset ( UInt xmmreg
, Int laneno
)
1425 /* Correct for little-endian host only. */
1426 vassert(host_endness
== VexEndnessLE
);
1427 vassert(laneno
>= 0 && laneno
< 8);
1428 return xmmGuestRegOffset( xmmreg
) + 2 * laneno
;
1431 static Int
xmmGuestRegLane32offset ( UInt xmmreg
, Int laneno
)
1433 /* Correct for little-endian host only. */
1434 vassert(host_endness
== VexEndnessLE
);
1435 vassert(laneno
>= 0 && laneno
< 4);
1436 return xmmGuestRegOffset( xmmreg
) + 4 * laneno
;
1439 static Int
xmmGuestRegLane64offset ( UInt xmmreg
, Int laneno
)
1441 /* Correct for little-endian host only. */
1442 vassert(host_endness
== VexEndnessLE
);
1443 vassert(laneno
>= 0 && laneno
< 2);
1444 return xmmGuestRegOffset( xmmreg
) + 8 * laneno
;
1447 static Int
ymmGuestRegLane128offset ( UInt ymmreg
, Int laneno
)
1449 /* Correct for little-endian host only. */
1450 vassert(host_endness
== VexEndnessLE
);
1451 vassert(laneno
>= 0 && laneno
< 2);
1452 return ymmGuestRegOffset( ymmreg
) + 16 * laneno
;
1455 static Int
ymmGuestRegLane64offset ( UInt ymmreg
, Int laneno
)
1457 /* Correct for little-endian host only. */
1458 vassert(host_endness
== VexEndnessLE
);
1459 vassert(laneno
>= 0 && laneno
< 4);
1460 return ymmGuestRegOffset( ymmreg
) + 8 * laneno
;
1463 static Int
ymmGuestRegLane32offset ( UInt ymmreg
, Int laneno
)
1465 /* Correct for little-endian host only. */
1466 vassert(host_endness
== VexEndnessLE
);
1467 vassert(laneno
>= 0 && laneno
< 8);
1468 return ymmGuestRegOffset( ymmreg
) + 4 * laneno
;
1471 static IRExpr
* getXMMReg ( UInt xmmreg
)
1473 return IRExpr_Get( xmmGuestRegOffset(xmmreg
), Ity_V128
);
1476 static IRExpr
* getXMMRegLane64 ( UInt xmmreg
, Int laneno
)
1478 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg
,laneno
), Ity_I64
);
1481 static IRExpr
* getXMMRegLane64F ( UInt xmmreg
, Int laneno
)
1483 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg
,laneno
), Ity_F64
);
1486 static IRExpr
* getXMMRegLane32 ( UInt xmmreg
, Int laneno
)
1488 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg
,laneno
), Ity_I32
);
1491 static IRExpr
* getXMMRegLane32F ( UInt xmmreg
, Int laneno
)
1493 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg
,laneno
), Ity_F32
);
1496 static IRExpr
* getXMMRegLane16 ( UInt xmmreg
, Int laneno
)
1498 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg
,laneno
), Ity_I16
);
1501 static void putXMMReg ( UInt xmmreg
, IRExpr
* e
)
1503 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_V128
);
1504 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg
), e
) );
1507 static void putXMMRegLane64 ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
1509 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I64
);
1510 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg
,laneno
), e
) );
1513 static void putXMMRegLane64F ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
1515 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_F64
);
1516 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg
,laneno
), e
) );
1519 static void putXMMRegLane32F ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
1521 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_F32
);
1522 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg
,laneno
), e
) );
1525 static void putXMMRegLane32 ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
1527 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I32
);
1528 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg
,laneno
), e
) );
1531 static IRExpr
* getYMMReg ( UInt xmmreg
)
1533 return IRExpr_Get( ymmGuestRegOffset(xmmreg
), Ity_V256
);
1536 static IRExpr
* getYMMRegLane128 ( UInt ymmreg
, Int laneno
)
1538 return IRExpr_Get( ymmGuestRegLane128offset(ymmreg
,laneno
), Ity_V128
);
1541 static IRExpr
* getYMMRegLane64F ( UInt ymmreg
, Int laneno
)
1543 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg
,laneno
), Ity_F64
);
1546 static IRExpr
* getYMMRegLane64 ( UInt ymmreg
, Int laneno
)
1548 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg
,laneno
), Ity_I64
);
1551 static IRExpr
* getYMMRegLane32F ( UInt ymmreg
, Int laneno
)
1553 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg
,laneno
), Ity_F32
);
1556 static IRExpr
* getYMMRegLane32 ( UInt ymmreg
, Int laneno
)
1558 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg
,laneno
), Ity_I32
);
1561 static void putYMMReg ( UInt ymmreg
, IRExpr
* e
)
1563 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_V256
);
1564 stmt( IRStmt_Put( ymmGuestRegOffset(ymmreg
), e
) );
1567 static void putYMMRegLane128 ( UInt ymmreg
, Int laneno
, IRExpr
* e
)
1569 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_V128
);
1570 stmt( IRStmt_Put( ymmGuestRegLane128offset(ymmreg
,laneno
), e
) );
1573 static void putYMMRegLane64F ( UInt ymmreg
, Int laneno
, IRExpr
* e
)
1575 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_F64
);
1576 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg
,laneno
), e
) );
1579 static void putYMMRegLane64 ( UInt ymmreg
, Int laneno
, IRExpr
* e
)
1581 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I64
);
1582 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg
,laneno
), e
) );
1585 static void putYMMRegLane32F ( UInt ymmreg
, Int laneno
, IRExpr
* e
)
1587 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_F32
);
1588 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg
,laneno
), e
) );
1591 static void putYMMRegLane32 ( UInt ymmreg
, Int laneno
, IRExpr
* e
)
1593 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I32
);
1594 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg
,laneno
), e
) );
1597 static IRExpr
* mkV128 ( UShort mask
)
1599 return IRExpr_Const(IRConst_V128(mask
));
1602 /* Write the low half of a YMM reg and zero out the upper half. */
1603 static void putYMMRegLoAndZU ( UInt ymmreg
, IRExpr
* e
)
1605 putYMMRegLane128( ymmreg
, 0, e
);
1606 putYMMRegLane128( ymmreg
, 1, mkV128(0) );
1609 static IRExpr
* mkAnd1 ( IRExpr
* x
, IRExpr
* y
)
1611 vassert(typeOfIRExpr(irsb
->tyenv
,x
) == Ity_I1
);
1612 vassert(typeOfIRExpr(irsb
->tyenv
,y
) == Ity_I1
);
1613 return unop(Iop_64to1
,
1616 unop(Iop_1Uto64
,y
)));
1619 /* Generate a compare-and-swap operation, operating on memory at
1620 'addr'. The expected value is 'expVal' and the new value is
1621 'newVal'. If the operation fails, then transfer control (with a
1622 no-redir jump (XXX no -- see comment at top of this file)) to
1623 'restart_point', which is presumably the address of the guest
1624 instruction again -- retrying, essentially. */
1625 static void casLE ( IRExpr
* addr
, IRExpr
* expVal
, IRExpr
* newVal
,
1626 Addr64 restart_point
)
1629 IRType tyE
= typeOfIRExpr(irsb
->tyenv
, expVal
);
1630 IRType tyN
= typeOfIRExpr(irsb
->tyenv
, newVal
);
1631 IRTemp oldTmp
= newTemp(tyE
);
1632 IRTemp expTmp
= newTemp(tyE
);
1633 vassert(tyE
== tyN
);
1634 vassert(tyE
== Ity_I64
|| tyE
== Ity_I32
1635 || tyE
== Ity_I16
|| tyE
== Ity_I8
);
1636 assign(expTmp
, expVal
);
1637 cas
= mkIRCAS( IRTemp_INVALID
, oldTmp
, Iend_LE
, addr
,
1638 NULL
, mkexpr(expTmp
), NULL
, newVal
);
1639 stmt( IRStmt_CAS(cas
) );
1641 binop( mkSizedOp(tyE
,Iop_CasCmpNE8
),
1642 mkexpr(oldTmp
), mkexpr(expTmp
) ),
1643 Ijk_Boring
, /*Ijk_NoRedir*/
1644 IRConst_U64( restart_point
),
1650 /*------------------------------------------------------------*/
1651 /*--- Helpers for %rflags. ---*/
1652 /*------------------------------------------------------------*/
1654 /* -------------- Evaluating the flags-thunk. -------------- */
1656 /* Build IR to calculate all the eflags from stored
1657 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1659 static IRExpr
* mk_amd64g_calculate_rflags_all ( void )
1662 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP
, Ity_I64
),
1663 IRExpr_Get(OFFB_CC_DEP1
, Ity_I64
),
1664 IRExpr_Get(OFFB_CC_DEP2
, Ity_I64
),
1665 IRExpr_Get(OFFB_CC_NDEP
, Ity_I64
) );
1670 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all
,
1673 /* Exclude OP and NDEP from definedness checking. We're only
1674 interested in DEP1 and DEP2. */
1675 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
1679 /* Build IR to calculate some particular condition from stored
1680 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1682 static IRExpr
* mk_amd64g_calculate_condition ( AMD64Condcode cond
)
1685 = mkIRExprVec_5( mkU64(cond
),
1686 IRExpr_Get(OFFB_CC_OP
, Ity_I64
),
1687 IRExpr_Get(OFFB_CC_DEP1
, Ity_I64
),
1688 IRExpr_Get(OFFB_CC_DEP2
, Ity_I64
),
1689 IRExpr_Get(OFFB_CC_NDEP
, Ity_I64
) );
1694 "amd64g_calculate_condition", &amd64g_calculate_condition
,
1697 /* Exclude the requested condition, OP and NDEP from definedness
1698 checking. We're only interested in DEP1 and DEP2. */
1699 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<1) | (1<<4);
1700 return unop(Iop_64to1
, call
);
1703 /* Build IR to calculate just the carry flag from stored
1704 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */
1705 static IRExpr
* mk_amd64g_calculate_rflags_c ( void )
1708 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP
, Ity_I64
),
1709 IRExpr_Get(OFFB_CC_DEP1
, Ity_I64
),
1710 IRExpr_Get(OFFB_CC_DEP2
, Ity_I64
),
1711 IRExpr_Get(OFFB_CC_NDEP
, Ity_I64
) );
1716 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c
,
1719 /* Exclude OP and NDEP from definedness checking. We're only
1720 interested in DEP1 and DEP2. */
1721 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
1726 /* -------------- Building the flags-thunk. -------------- */
1728 /* The machinery in this section builds the flag-thunk following a
1729 flag-setting operation. Hence the various setFlags_* functions.
1732 static Bool
isAddSub ( IROp op8
)
1734 return toBool(op8
== Iop_Add8
|| op8
== Iop_Sub8
);
1737 static Bool
isLogic ( IROp op8
)
1739 return toBool(op8
== Iop_And8
|| op8
== Iop_Or8
|| op8
== Iop_Xor8
);
1742 /* U-widen 1/8/16/32/64 bit int expr to 64. */
1743 static IRExpr
* widenUto64 ( IRExpr
* e
)
1745 switch (typeOfIRExpr(irsb
->tyenv
,e
)) {
1746 case Ity_I64
: return e
;
1747 case Ity_I32
: return unop(Iop_32Uto64
, e
);
1748 case Ity_I16
: return unop(Iop_16Uto64
, e
);
1749 case Ity_I8
: return unop(Iop_8Uto64
, e
);
1750 case Ity_I1
: return unop(Iop_1Uto64
, e
);
1751 default: vpanic("widenUto64");
1755 /* S-widen 8/16/32/64 bit int expr to 32. */
1756 static IRExpr
* widenSto64 ( IRExpr
* e
)
1758 switch (typeOfIRExpr(irsb
->tyenv
,e
)) {
1759 case Ity_I64
: return e
;
1760 case Ity_I32
: return unop(Iop_32Sto64
, e
);
1761 case Ity_I16
: return unop(Iop_16Sto64
, e
);
1762 case Ity_I8
: return unop(Iop_8Sto64
, e
);
1763 default: vpanic("widenSto64");
1767 /* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some
1768 of these combinations make sense. */
1769 static IRExpr
* narrowTo ( IRType dst_ty
, IRExpr
* e
)
1771 IRType src_ty
= typeOfIRExpr(irsb
->tyenv
,e
);
1772 if (src_ty
== dst_ty
)
1774 if (src_ty
== Ity_I32
&& dst_ty
== Ity_I16
)
1775 return unop(Iop_32to16
, e
);
1776 if (src_ty
== Ity_I32
&& dst_ty
== Ity_I8
)
1777 return unop(Iop_32to8
, e
);
1778 if (src_ty
== Ity_I64
&& dst_ty
== Ity_I32
)
1779 return unop(Iop_64to32
, e
);
1780 if (src_ty
== Ity_I64
&& dst_ty
== Ity_I16
)
1781 return unop(Iop_64to16
, e
);
1782 if (src_ty
== Ity_I64
&& dst_ty
== Ity_I8
)
1783 return unop(Iop_64to8
, e
);
1785 vex_printf("\nsrc, dst tys are: ");
1790 vpanic("narrowTo(amd64)");
1794 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
1795 auto-sized up to the real op. */
1798 void setFlags_DEP1_DEP2 ( IROp op8
, IRTemp dep1
, IRTemp dep2
, IRType ty
)
1802 case Ity_I8
: ccOp
= 0; break;
1803 case Ity_I16
: ccOp
= 1; break;
1804 case Ity_I32
: ccOp
= 2; break;
1805 case Ity_I64
: ccOp
= 3; break;
1806 default: vassert(0);
1809 case Iop_Add8
: ccOp
+= AMD64G_CC_OP_ADDB
; break;
1810 case Iop_Sub8
: ccOp
+= AMD64G_CC_OP_SUBB
; break;
1811 default: ppIROp(op8
);
1812 vpanic("setFlags_DEP1_DEP2(amd64)");
1814 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(ccOp
)) );
1815 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dep1
))) );
1816 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(dep2
))) );
1820 /* Set the OP and DEP1 fields only, and write zero to DEP2. */
1823 void setFlags_DEP1 ( IROp op8
, IRTemp dep1
, IRType ty
)
1827 case Ity_I8
: ccOp
= 0; break;
1828 case Ity_I16
: ccOp
= 1; break;
1829 case Ity_I32
: ccOp
= 2; break;
1830 case Ity_I64
: ccOp
= 3; break;
1831 default: vassert(0);
1836 case Iop_Xor8
: ccOp
+= AMD64G_CC_OP_LOGICB
; break;
1837 default: ppIROp(op8
);
1838 vpanic("setFlags_DEP1(amd64)");
1840 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(ccOp
)) );
1841 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dep1
))) );
1842 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0)) );
1846 /* For shift operations, we put in the result and the undershifted
1847 result. Except if the shift amount is zero, the thunk is left
1850 static void setFlags_DEP1_DEP2_shift ( IROp op64
,
1858 case Ity_I8
: ccOp
= 0; break;
1859 case Ity_I16
: ccOp
= 1; break;
1860 case Ity_I32
: ccOp
= 2; break;
1861 case Ity_I64
: ccOp
= 3; break;
1862 default: vassert(0);
1867 /* Both kinds of right shifts are handled by the same thunk
1871 case Iop_Sar64
: ccOp
+= AMD64G_CC_OP_SHRB
; break;
1872 case Iop_Shl64
: ccOp
+= AMD64G_CC_OP_SHLB
; break;
1873 default: ppIROp(op64
);
1874 vpanic("setFlags_DEP1_DEP2_shift(amd64)");
1877 /* guard :: Ity_I8. We need to convert it to I1. */
1878 IRTemp guardB
= newTemp(Ity_I1
);
1879 assign( guardB
, binop(Iop_CmpNE8
, mkexpr(guard
), mkU8(0)) );
1881 /* DEP1 contains the result, DEP2 contains the undershifted value. */
1882 stmt( IRStmt_Put( OFFB_CC_OP
,
1883 IRExpr_ITE( mkexpr(guardB
),
1885 IRExpr_Get(OFFB_CC_OP
,Ity_I64
) ) ));
1886 stmt( IRStmt_Put( OFFB_CC_DEP1
,
1887 IRExpr_ITE( mkexpr(guardB
),
1888 widenUto64(mkexpr(res
)),
1889 IRExpr_Get(OFFB_CC_DEP1
,Ity_I64
) ) ));
1890 stmt( IRStmt_Put( OFFB_CC_DEP2
,
1891 IRExpr_ITE( mkexpr(guardB
),
1892 widenUto64(mkexpr(resUS
)),
1893 IRExpr_Get(OFFB_CC_DEP2
,Ity_I64
) ) ));
1897 /* For the inc/dec case, we store in DEP1 the result value and in NDEP
1898 the former value of the carry flag, which unfortunately we have to
1901 static void setFlags_INC_DEC ( Bool inc
, IRTemp res
, IRType ty
)
1903 Int ccOp
= inc
? AMD64G_CC_OP_INCB
: AMD64G_CC_OP_DECB
;
1906 case Ity_I8
: ccOp
+= 0; break;
1907 case Ity_I16
: ccOp
+= 1; break;
1908 case Ity_I32
: ccOp
+= 2; break;
1909 case Ity_I64
: ccOp
+= 3; break;
1910 default: vassert(0);
1913 /* This has to come first, because calculating the C flag
1914 may require reading all four thunk fields. */
1915 stmt( IRStmt_Put( OFFB_CC_NDEP
, mk_amd64g_calculate_rflags_c()) );
1916 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(ccOp
)) );
1917 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(res
))) );
1918 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0)) );
1922 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
1926 void setFlags_MUL ( IRType ty
, IRTemp arg1
, IRTemp arg2
, ULong base_op
)
1930 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(base_op
+0) ) );
1933 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(base_op
+1) ) );
1936 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(base_op
+2) ) );
1939 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(base_op
+3) ) );
1942 vpanic("setFlags_MUL(amd64)");
1944 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(arg1
)) ));
1945 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(arg2
)) ));
1949 /* -------------- Condition codes. -------------- */
1951 /* Condition codes, using the AMD encoding. */
1953 static const HChar
* name_AMD64Condcode ( AMD64Condcode cond
)
1956 case AMD64CondO
: return "o";
1957 case AMD64CondNO
: return "no";
1958 case AMD64CondB
: return "b";
1959 case AMD64CondNB
: return "ae"; /*"nb";*/
1960 case AMD64CondZ
: return "e"; /*"z";*/
1961 case AMD64CondNZ
: return "ne"; /*"nz";*/
1962 case AMD64CondBE
: return "be";
1963 case AMD64CondNBE
: return "a"; /*"nbe";*/
1964 case AMD64CondS
: return "s";
1965 case AMD64CondNS
: return "ns";
1966 case AMD64CondP
: return "p";
1967 case AMD64CondNP
: return "np";
1968 case AMD64CondL
: return "l";
1969 case AMD64CondNL
: return "ge"; /*"nl";*/
1970 case AMD64CondLE
: return "le";
1971 case AMD64CondNLE
: return "g"; /*"nle";*/
1972 case AMD64CondAlways
: return "ALWAYS";
1973 default: vpanic("name_AMD64Condcode");
1978 AMD64Condcode
positiveIse_AMD64Condcode ( AMD64Condcode cond
,
1979 /*OUT*/Bool
* needInvert
)
1981 vassert(cond
>= AMD64CondO
&& cond
<= AMD64CondNLE
);
1986 *needInvert
= False
;
1992 /* -------------- Helpers for ADD/SUB with carry. -------------- */
1994 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
1997 Optionally, generate a store for the 'tres' value. This can either
1998 be a normal store, or it can be a cas-with-possible-failure style
2001 if taddr is IRTemp_INVALID, then no store is generated.
2003 if taddr is not IRTemp_INVALID, then a store (using taddr as
2004 the address) is generated:
2006 if texpVal is IRTemp_INVALID then a normal store is
2007 generated, and restart_point must be zero (it is irrelevant).
2009 if texpVal is not IRTemp_INVALID then a cas-style store is
2010 generated. texpVal is the expected value, restart_point
2011 is the restart point if the store fails, and texpVal must
2012 have the same type as tres.
2015 static void helper_ADC ( Int sz
,
2016 IRTemp tres
, IRTemp ta1
, IRTemp ta2
,
2017 /* info about optional store: */
2018 IRTemp taddr
, IRTemp texpVal
, Addr64 restart_point
)
2021 IRType ty
= szToITy(sz
);
2022 IRTemp oldc
= newTemp(Ity_I64
);
2023 IRTemp oldcn
= newTemp(ty
);
2024 IROp plus
= mkSizedOp(ty
, Iop_Add8
);
2025 IROp
xor = mkSizedOp(ty
, Iop_Xor8
);
2027 vassert(typeOfIRTemp(irsb
->tyenv
, tres
) == ty
);
2030 case 8: thunkOp
= AMD64G_CC_OP_ADCQ
; break;
2031 case 4: thunkOp
= AMD64G_CC_OP_ADCL
; break;
2032 case 2: thunkOp
= AMD64G_CC_OP_ADCW
; break;
2033 case 1: thunkOp
= AMD64G_CC_OP_ADCB
; break;
2034 default: vassert(0);
2037 /* oldc = old carry flag, 0 or 1 */
2038 assign( oldc
, binop(Iop_And64
,
2039 mk_amd64g_calculate_rflags_c(),
2042 assign( oldcn
, narrowTo(ty
, mkexpr(oldc
)) );
2044 assign( tres
, binop(plus
,
2045 binop(plus
,mkexpr(ta1
),mkexpr(ta2
)),
2048 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2049 start of this function. */
2050 if (taddr
!= IRTemp_INVALID
) {
2051 if (texpVal
== IRTemp_INVALID
) {
2052 vassert(restart_point
== 0);
2053 storeLE( mkexpr(taddr
), mkexpr(tres
) );
2055 vassert(typeOfIRTemp(irsb
->tyenv
, texpVal
) == ty
);
2056 /* .. and hence 'texpVal' has the same type as 'tres'. */
2057 casLE( mkexpr(taddr
),
2058 mkexpr(texpVal
), mkexpr(tres
), restart_point
);
2062 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(thunkOp
) ) );
2063 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(ta1
)) ));
2064 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(binop(xor, mkexpr(ta2
),
2065 mkexpr(oldcn
)) )) );
2066 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(oldc
) ) );
2070 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
2071 appropriately. As with helper_ADC, possibly generate a store of
2072 the result -- see comments on helper_ADC for details.
2074 static void helper_SBB ( Int sz
,
2075 IRTemp tres
, IRTemp ta1
, IRTemp ta2
,
2076 /* info about optional store: */
2077 IRTemp taddr
, IRTemp texpVal
, Addr64 restart_point
)
2080 IRType ty
= szToITy(sz
);
2081 IRTemp oldc
= newTemp(Ity_I64
);
2082 IRTemp oldcn
= newTemp(ty
);
2083 IROp minus
= mkSizedOp(ty
, Iop_Sub8
);
2084 IROp
xor = mkSizedOp(ty
, Iop_Xor8
);
2086 vassert(typeOfIRTemp(irsb
->tyenv
, tres
) == ty
);
2089 case 8: thunkOp
= AMD64G_CC_OP_SBBQ
; break;
2090 case 4: thunkOp
= AMD64G_CC_OP_SBBL
; break;
2091 case 2: thunkOp
= AMD64G_CC_OP_SBBW
; break;
2092 case 1: thunkOp
= AMD64G_CC_OP_SBBB
; break;
2093 default: vassert(0);
2096 /* oldc = old carry flag, 0 or 1 */
2097 assign( oldc
, binop(Iop_And64
,
2098 mk_amd64g_calculate_rflags_c(),
2101 assign( oldcn
, narrowTo(ty
, mkexpr(oldc
)) );
2103 assign( tres
, binop(minus
,
2104 binop(minus
,mkexpr(ta1
),mkexpr(ta2
)),
2107 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2108 start of this function. */
2109 if (taddr
!= IRTemp_INVALID
) {
2110 if (texpVal
== IRTemp_INVALID
) {
2111 vassert(restart_point
== 0);
2112 storeLE( mkexpr(taddr
), mkexpr(tres
) );
2114 vassert(typeOfIRTemp(irsb
->tyenv
, texpVal
) == ty
);
2115 /* .. and hence 'texpVal' has the same type as 'tres'. */
2116 casLE( mkexpr(taddr
),
2117 mkexpr(texpVal
), mkexpr(tres
), restart_point
);
2121 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(thunkOp
) ) );
2122 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(ta1
) )) );
2123 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(binop(xor, mkexpr(ta2
),
2124 mkexpr(oldcn
)) )) );
2125 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(oldc
) ) );
2129 /* Given ta1, ta2 and tres, compute tres = ADCX(ta1,ta2) or tres = ADOX(ta1,ta2)
2130 and set flags appropriately.
2132 static void helper_ADCX_ADOX ( Bool isADCX
, Int sz
,
2133 IRTemp tres
, IRTemp ta1
, IRTemp ta2
)
2136 IRType ty
= szToITy(sz
);
2137 IRTemp oldflags
= newTemp(Ity_I64
);
2138 IRTemp oldOC
= newTemp(Ity_I64
); // old O or C flag
2139 IRTemp oldOCn
= newTemp(ty
); // old O or C flag, narrowed
2140 IROp plus
= mkSizedOp(ty
, Iop_Add8
);
2141 IROp
xor = mkSizedOp(ty
, Iop_Xor8
);
2143 vassert(typeOfIRTemp(irsb
->tyenv
, tres
) == ty
);
2146 case 8: thunkOp
= isADCX
? AMD64G_CC_OP_ADCX64
2147 : AMD64G_CC_OP_ADOX64
; break;
2148 case 4: thunkOp
= isADCX
? AMD64G_CC_OP_ADCX32
2149 : AMD64G_CC_OP_ADOX32
; break;
2150 default: vassert(0);
2153 assign( oldflags
, mk_amd64g_calculate_rflags_all() );
2155 /* oldOC = old overflow/carry flag, 0 or 1 */
2156 assign( oldOC
, binop(Iop_And64
,
2159 mkU8(isADCX
? AMD64G_CC_SHIFT_C
2160 : AMD64G_CC_SHIFT_O
)),
2163 assign( oldOCn
, narrowTo(ty
, mkexpr(oldOC
)) );
2165 assign( tres
, binop(plus
,
2166 binop(plus
,mkexpr(ta1
),mkexpr(ta2
)),
2169 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(thunkOp
) ) );
2170 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(ta1
)) ));
2171 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(binop(xor, mkexpr(ta2
),
2172 mkexpr(oldOCn
)) )) );
2173 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(oldflags
) ) );
2177 /* -------------- Helpers for disassembly printing. -------------- */
2179 static const HChar
* nameGrp1 ( Int opc_aux
)
2181 static const HChar
* grp1_names
[8]
2182 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
2183 if (opc_aux
< 0 || opc_aux
> 7) vpanic("nameGrp1(amd64)");
2184 return grp1_names
[opc_aux
];
2187 static const HChar
* nameGrp2 ( Int opc_aux
)
2189 static const HChar
* grp2_names
[8]
2190 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
2191 if (opc_aux
< 0 || opc_aux
> 7) vpanic("nameGrp2(amd64)");
2192 return grp2_names
[opc_aux
];
2195 static const HChar
* nameGrp4 ( Int opc_aux
)
2197 static const HChar
* grp4_names
[8]
2198 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
2199 if (opc_aux
< 0 || opc_aux
> 1) vpanic("nameGrp4(amd64)");
2200 return grp4_names
[opc_aux
];
2203 static const HChar
* nameGrp5 ( Int opc_aux
)
2205 static const HChar
* grp5_names
[8]
2206 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
2207 if (opc_aux
< 0 || opc_aux
> 6) vpanic("nameGrp5(amd64)");
2208 return grp5_names
[opc_aux
];
2211 static const HChar
* nameGrp8 ( Int opc_aux
)
2213 static const HChar
* grp8_names
[8]
2214 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
2215 if (opc_aux
< 4 || opc_aux
> 7) vpanic("nameGrp8(amd64)");
2216 return grp8_names
[opc_aux
];
2219 static const HChar
* nameSReg ( UInt sreg
)
2222 case R_ES
: return "%es";
2223 case R_CS
: return "%cs";
2224 case R_SS
: return "%ss";
2225 case R_DS
: return "%ds";
2226 case R_FS
: return "%fs";
2227 case R_GS
: return "%gs";
2228 default: vpanic("nameSReg(amd64)");
2232 static const HChar
* nameMMXReg ( Int mmxreg
)
2234 static const HChar
* mmx_names
[8]
2235 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
2236 if (mmxreg
< 0 || mmxreg
> 7) vpanic("nameMMXReg(amd64,guest)");
2237 return mmx_names
[mmxreg
];
2240 static const HChar
* nameXMMReg ( Int xmmreg
)
2242 static const HChar
* xmm_names
[16]
2243 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
2244 "%xmm4", "%xmm5", "%xmm6", "%xmm7",
2245 "%xmm8", "%xmm9", "%xmm10", "%xmm11",
2246 "%xmm12", "%xmm13", "%xmm14", "%xmm15" };
2247 if (xmmreg
< 0 || xmmreg
> 15) vpanic("nameXMMReg(amd64)");
2248 return xmm_names
[xmmreg
];
2251 static const HChar
* nameMMXGran ( Int gran
)
2258 default: vpanic("nameMMXGran(amd64,guest)");
2262 static HChar
nameISize ( Int size
)
2269 default: vpanic("nameISize(amd64)");
2273 static const HChar
* nameYMMReg ( Int ymmreg
)
2275 static const HChar
* ymm_names
[16]
2276 = { "%ymm0", "%ymm1", "%ymm2", "%ymm3",
2277 "%ymm4", "%ymm5", "%ymm6", "%ymm7",
2278 "%ymm8", "%ymm9", "%ymm10", "%ymm11",
2279 "%ymm12", "%ymm13", "%ymm14", "%ymm15" };
2280 if (ymmreg
< 0 || ymmreg
> 15) vpanic("nameYMMReg(amd64)");
2281 return ymm_names
[ymmreg
];
2285 /*------------------------------------------------------------*/
2286 /*--- JMP helpers ---*/
2287 /*------------------------------------------------------------*/
2289 static void jmp_lit( /*MOD*/DisResult
* dres
,
2290 IRJumpKind kind
, Addr64 d64
)
2292 vassert(dres
->whatNext
== Dis_Continue
);
2293 vassert(dres
->len
== 0);
2294 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
2295 dres
->whatNext
= Dis_StopHere
;
2296 dres
->jk_StopHere
= kind
;
2297 stmt( IRStmt_Put( OFFB_RIP
, mkU64(d64
) ) );
2300 static void jmp_treg( /*MOD*/DisResult
* dres
,
2301 IRJumpKind kind
, IRTemp t
)
2303 vassert(dres
->whatNext
== Dis_Continue
);
2304 vassert(dres
->len
== 0);
2305 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
2306 dres
->whatNext
= Dis_StopHere
;
2307 dres
->jk_StopHere
= kind
;
2308 stmt( IRStmt_Put( OFFB_RIP
, mkexpr(t
) ) );
2312 void jcc_01 ( /*MOD*/DisResult
* dres
,
2313 AMD64Condcode cond
, Addr64 d64_false
, Addr64 d64_true
)
2316 AMD64Condcode condPos
;
2317 vassert(dres
->whatNext
== Dis_Continue
);
2318 vassert(dres
->len
== 0);
2319 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
2320 dres
->whatNext
= Dis_StopHere
;
2321 dres
->jk_StopHere
= Ijk_Boring
;
2322 condPos
= positiveIse_AMD64Condcode ( cond
, &invert
);
2324 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos
),
2326 IRConst_U64(d64_false
),
2328 stmt( IRStmt_Put( OFFB_RIP
, mkU64(d64_true
) ) );
2330 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos
),
2332 IRConst_U64(d64_true
),
2334 stmt( IRStmt_Put( OFFB_RIP
, mkU64(d64_false
) ) );
2338 /* Let new_rsp be the %rsp value after a call/return. Let nia be the
2339 guest address of the next instruction to be executed.
2341 This function generates an AbiHint to say that -128(%rsp)
2342 .. -1(%rsp) should now be regarded as uninitialised.
2345 void make_redzone_AbiHint ( const VexAbiInfo
* vbi
,
2346 IRTemp new_rsp
, IRTemp nia
, const HChar
* who
)
2348 Int szB
= vbi
->guest_stack_redzone_size
;
2351 /* A bit of a kludge. Currently the only AbI we've guested AMD64
2352 for is ELF. So just check it's the expected 128 value
2354 vassert(szB
== 128);
2356 if (0) vex_printf("AbiHint: %s\n", who
);
2357 vassert(typeOfIRTemp(irsb
->tyenv
, new_rsp
) == Ity_I64
);
2358 vassert(typeOfIRTemp(irsb
->tyenv
, nia
) == Ity_I64
);
2360 stmt( IRStmt_AbiHint(
2361 binop(Iop_Sub64
, mkexpr(new_rsp
), mkU64(szB
)),
2368 /*------------------------------------------------------------*/
2369 /*--- Disassembling addressing modes ---*/
2370 /*------------------------------------------------------------*/
2373 const HChar
* segRegTxt ( Prefix pfx
)
2375 if (pfx
& PFX_CS
) return "%cs:";
2376 if (pfx
& PFX_DS
) return "%ds:";
2377 if (pfx
& PFX_ES
) return "%es:";
2378 if (pfx
& PFX_FS
) return "%fs:";
2379 if (pfx
& PFX_GS
) return "%gs:";
2380 if (pfx
& PFX_SS
) return "%ss:";
2381 return ""; /* no override */
2385 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
2386 linear address by adding any required segment override as indicated
2387 by sorb, and also dealing with any address size override
2390 IRExpr
* handleAddrOverrides ( const VexAbiInfo
* vbi
,
2391 Prefix pfx
, IRExpr
* virtual )
2393 /* --- address size override --- */
2395 virtual = unop(Iop_32Uto64
, unop(Iop_64to32
, virtual));
2397 /* Note that the below are hacks that relies on the assumption
2398 that %fs or %gs are constant.
2399 Typically, %fs is always 0x63 on linux (in the main thread, it
2400 stays at value 0), %gs always 0x60 on Darwin, ... */
2401 /* --- segment overrides --- */
2403 if (vbi
->guest_amd64_assume_fs_is_const
) {
2404 /* return virtual + guest_FS_CONST. */
2405 virtual = binop(Iop_Add64
, virtual,
2406 IRExpr_Get(OFFB_FS_CONST
, Ity_I64
));
2408 unimplemented("amd64 %fs segment override");
2413 if (vbi
->guest_amd64_assume_gs_is_const
) {
2414 /* return virtual + guest_GS_CONST. */
2415 virtual = binop(Iop_Add64
, virtual,
2416 IRExpr_Get(OFFB_GS_CONST
, Ity_I64
));
2418 unimplemented("amd64 %gs segment override");
2422 /* cs, ds, es and ss are simply ignored in 64-bit mode. */
2429 //.. IRType hWordTy;
2430 //.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
2433 //.. /* the common case - no override */
2434 //.. return virtual;
2436 //.. switch (sorb) {
2437 //.. case 0x3E: sreg = R_DS; break;
2438 //.. case 0x26: sreg = R_ES; break;
2439 //.. case 0x64: sreg = R_FS; break;
2440 //.. case 0x65: sreg = R_GS; break;
2441 //.. default: vpanic("handleAddrOverrides(x86,guest)");
2444 //.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
2446 //.. seg_selector = newTemp(Ity_I32);
2447 //.. ldt_ptr = newTemp(hWordTy);
2448 //.. gdt_ptr = newTemp(hWordTy);
2449 //.. r64 = newTemp(Ity_I64);
2451 //.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
2452 //.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
2453 //.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
2456 //.. Call this to do the translation and limit checks:
2457 //.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2458 //.. UInt seg_selector, UInt virtual_addr )
2465 //.. "x86g_use_seg_selector",
2466 //.. &x86g_use_seg_selector,
2467 //.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
2468 //.. mkexpr(seg_selector), virtual)
2472 //.. /* If the high 32 of the result are non-zero, there was a
2473 //.. failure in address translation. In which case, make a
2478 //.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
2480 //.. IRConst_U32( guest_eip_curr_instr )
2484 //.. /* otherwise, here's the translated result. */
2485 //.. return unop(Iop_64to32, mkexpr(r64));
2489 /* Generate IR to calculate an address indicated by a ModRM and
2490 following SIB bytes. The expression, and the number of bytes in
2491 the address mode, are returned (the latter in *len). Note that
2492 this fn should not be called if the R/M part of the address denotes
2493 a register instead of memory. If print_codegen is true, text of
2494 the addressing mode is placed in buf.
2496 The computed address is stored in a new tempreg, and the
2497 identity of the tempreg is returned.
2499 extra_bytes holds the number of bytes after the amode, as supplied
2500 by the caller. This is needed to make sense of %rip-relative
2501 addresses. Note that the value that *len is set to is only the
2502 length of the amode itself and does not include the value supplied
2506 static IRTemp
disAMode_copy2tmp ( IRExpr
* addr64
)
2508 IRTemp tmp
= newTemp(Ity_I64
);
2509 assign( tmp
, addr64
);
2514 IRTemp
disAMode ( /*OUT*/Int
* len
,
2515 const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
2516 /*OUT*/HChar
* buf
, Int extra_bytes
)
2518 UChar mod_reg_rm
= getUChar(delta
);
2522 vassert(extra_bytes
>= 0 && extra_bytes
< 10);
2524 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2525 jump table seems a bit excessive.
2527 mod_reg_rm
&= 0xC7; /* is now XX000YYY */
2528 mod_reg_rm
= toUChar(mod_reg_rm
| (mod_reg_rm
>> 3));
2529 /* is now XX0XXYYY */
2530 mod_reg_rm
&= 0x1F; /* is now 000XXYYY */
2531 switch (mod_reg_rm
) {
2533 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2534 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2536 case 0x00: case 0x01: case 0x02: case 0x03:
2537 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
2538 { UChar rm
= toUChar(mod_reg_rm
& 7);
2539 DIS(buf
, "%s(%s)", segRegTxt(pfx
), nameIRegRexB(8,pfx
,rm
));
2541 return disAMode_copy2tmp(
2542 handleAddrOverrides(vbi
, pfx
, getIRegRexB(8,pfx
,rm
)));
2545 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2546 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2548 case 0x08: case 0x09: case 0x0A: case 0x0B:
2549 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
2550 { UChar rm
= toUChar(mod_reg_rm
& 7);
2551 Long d
= getSDisp8(delta
);
2553 DIS(buf
, "%s(%s)", segRegTxt(pfx
), nameIRegRexB(8,pfx
,rm
));
2555 DIS(buf
, "%s%lld(%s)", segRegTxt(pfx
), d
, nameIRegRexB(8,pfx
,rm
));
2558 return disAMode_copy2tmp(
2559 handleAddrOverrides(vbi
, pfx
,
2560 binop(Iop_Add64
,getIRegRexB(8,pfx
,rm
),mkU64(d
))));
2563 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2564 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2566 case 0x10: case 0x11: case 0x12: case 0x13:
2567 /* ! 14 */ case 0x15: case 0x16: case 0x17:
2568 { UChar rm
= toUChar(mod_reg_rm
& 7);
2569 Long d
= getSDisp32(delta
);
2570 DIS(buf
, "%s%lld(%s)", segRegTxt(pfx
), d
, nameIRegRexB(8,pfx
,rm
));
2572 return disAMode_copy2tmp(
2573 handleAddrOverrides(vbi
, pfx
,
2574 binop(Iop_Add64
,getIRegRexB(8,pfx
,rm
),mkU64(d
))));
2577 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2578 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2579 case 0x18: case 0x19: case 0x1A: case 0x1B:
2580 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2581 vpanic("disAMode(amd64): not an addr!");
2583 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set
2584 correctly at the start of handling each instruction. */
2586 { Long d
= getSDisp32(delta
);
2588 DIS(buf
, "%s%lld(%%rip)", segRegTxt(pfx
), d
);
2589 /* We need to know the next instruction's start address.
2590 Try and figure out what it is, record the guess, and ask
2591 the top-level driver logic (bbToIR_AMD64) to check we
2592 guessed right, after the instruction is completely
2594 guest_RIP_next_mustcheck
= True
;
2595 guest_RIP_next_assumed
= guest_RIP_bbstart
2596 + delta
+4 + extra_bytes
;
2597 return disAMode_copy2tmp(
2598 handleAddrOverrides(vbi
, pfx
,
2599 binop(Iop_Add64
, mkU64(guest_RIP_next_assumed
),
2604 /* SIB, with no displacement. Special cases:
2605 -- %rsp cannot act as an index value.
2606 If index_r indicates %rsp, zero is used for the index.
2607 -- when mod is zero and base indicates RBP or R13, base is
2608 instead a 32-bit sign-extended literal.
2609 It's all madness, I tell you. Extract %index, %base and
2610 scale from the SIB byte. The value denoted is then:
2611 | %index == %RSP && (%base == %RBP || %base == %R13)
2612 = d32 following SIB byte
2613 | %index == %RSP && !(%base == %RBP || %base == %R13)
2615 | %index != %RSP && (%base == %RBP || %base == %R13)
2616 = d32 following SIB byte + (%index << scale)
2617 | %index != %RSP && !(%base == %RBP || %base == %R13)
2618 = %base + (%index << scale)
2620 UChar sib
= getUChar(delta
);
2621 UChar scale
= toUChar((sib
>> 6) & 3);
2622 UChar index_r
= toUChar((sib
>> 3) & 7);
2623 UChar base_r
= toUChar(sib
& 7);
2624 /* correct since #(R13) == 8 + #(RBP) */
2625 Bool base_is_BPor13
= toBool(base_r
== R_RBP
);
2626 Bool index_is_SP
= toBool(index_r
== R_RSP
&& 0==getRexX(pfx
));
2629 if ((!index_is_SP
) && (!base_is_BPor13
)) {
2631 DIS(buf
, "%s(%s,%s)", segRegTxt(pfx
),
2632 nameIRegRexB(8,pfx
,base_r
),
2633 nameIReg64rexX(pfx
,index_r
));
2635 DIS(buf
, "%s(%s,%s,%d)", segRegTxt(pfx
),
2636 nameIRegRexB(8,pfx
,base_r
),
2637 nameIReg64rexX(pfx
,index_r
), 1<<scale
);
2642 handleAddrOverrides(vbi
, pfx
,
2644 getIRegRexB(8,pfx
,base_r
),
2645 binop(Iop_Shl64
, getIReg64rexX(pfx
,index_r
),
2649 if ((!index_is_SP
) && base_is_BPor13
) {
2650 Long d
= getSDisp32(delta
);
2651 DIS(buf
, "%s%lld(,%s,%d)", segRegTxt(pfx
), d
,
2652 nameIReg64rexX(pfx
,index_r
), 1<<scale
);
2656 handleAddrOverrides(vbi
, pfx
,
2658 binop(Iop_Shl64
, getIReg64rexX(pfx
,index_r
),
2663 if (index_is_SP
&& (!base_is_BPor13
)) {
2664 DIS(buf
, "%s(%s)", segRegTxt(pfx
), nameIRegRexB(8,pfx
,base_r
));
2666 return disAMode_copy2tmp(
2667 handleAddrOverrides(vbi
, pfx
, getIRegRexB(8,pfx
,base_r
)));
2670 if (index_is_SP
&& base_is_BPor13
) {
2671 Long d
= getSDisp32(delta
);
2672 DIS(buf
, "%s%lld", segRegTxt(pfx
), d
);
2674 return disAMode_copy2tmp(
2675 handleAddrOverrides(vbi
, pfx
, mkU64(d
)));
2681 /* SIB, with 8-bit displacement. Special cases:
2682 -- %esp cannot act as an index value.
2683 If index_r indicates %esp, zero is used for the index.
2688 = d8 + %base + (%index << scale)
2691 UChar sib
= getUChar(delta
);
2692 UChar scale
= toUChar((sib
>> 6) & 3);
2693 UChar index_r
= toUChar((sib
>> 3) & 7);
2694 UChar base_r
= toUChar(sib
& 7);
2695 Long d
= getSDisp8(delta
+1);
2697 if (index_r
== R_RSP
&& 0==getRexX(pfx
)) {
2698 DIS(buf
, "%s%lld(%s)", segRegTxt(pfx
),
2699 d
, nameIRegRexB(8,pfx
,base_r
));
2701 return disAMode_copy2tmp(
2702 handleAddrOverrides(vbi
, pfx
,
2703 binop(Iop_Add64
, getIRegRexB(8,pfx
,base_r
), mkU64(d
)) ));
2706 DIS(buf
, "%s%lld(%s,%s)", segRegTxt(pfx
), d
,
2707 nameIRegRexB(8,pfx
,base_r
),
2708 nameIReg64rexX(pfx
,index_r
));
2710 DIS(buf
, "%s%lld(%s,%s,%d)", segRegTxt(pfx
), d
,
2711 nameIRegRexB(8,pfx
,base_r
),
2712 nameIReg64rexX(pfx
,index_r
), 1<<scale
);
2717 handleAddrOverrides(vbi
, pfx
,
2720 getIRegRexB(8,pfx
,base_r
),
2722 getIReg64rexX(pfx
,index_r
), mkU8(scale
))),
2725 vassert(0); /*NOTREACHED*/
2728 /* SIB, with 32-bit displacement. Special cases:
2729 -- %rsp cannot act as an index value.
2730 If index_r indicates %rsp, zero is used for the index.
2735 = d32 + %base + (%index << scale)
2738 UChar sib
= getUChar(delta
);
2739 UChar scale
= toUChar((sib
>> 6) & 3);
2740 UChar index_r
= toUChar((sib
>> 3) & 7);
2741 UChar base_r
= toUChar(sib
& 7);
2742 Long d
= getSDisp32(delta
+1);
2744 if (index_r
== R_RSP
&& 0==getRexX(pfx
)) {
2745 DIS(buf
, "%s%lld(%s)", segRegTxt(pfx
),
2746 d
, nameIRegRexB(8,pfx
,base_r
));
2748 return disAMode_copy2tmp(
2749 handleAddrOverrides(vbi
, pfx
,
2750 binop(Iop_Add64
, getIRegRexB(8,pfx
,base_r
), mkU64(d
)) ));
2753 DIS(buf
, "%s%lld(%s,%s)", segRegTxt(pfx
), d
,
2754 nameIRegRexB(8,pfx
,base_r
),
2755 nameIReg64rexX(pfx
,index_r
));
2757 DIS(buf
, "%s%lld(%s,%s,%d)", segRegTxt(pfx
), d
,
2758 nameIRegRexB(8,pfx
,base_r
),
2759 nameIReg64rexX(pfx
,index_r
), 1<<scale
);
2764 handleAddrOverrides(vbi
, pfx
,
2767 getIRegRexB(8,pfx
,base_r
),
2769 getIReg64rexX(pfx
,index_r
), mkU8(scale
))),
2772 vassert(0); /*NOTREACHED*/
2776 vpanic("disAMode(amd64)");
2777 return 0; /*notreached*/
2782 /* Similarly for VSIB addressing. This returns just the addend,
2783 and fills in *rI and *vscale with the register number of the vector
2784 index and its multiplicand. */
2786 IRTemp
disAVSIBMode ( /*OUT*/Int
* len
,
2787 const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
2788 /*OUT*/HChar
* buf
, /*OUT*/UInt
* rI
,
2789 IRType ty
, /*OUT*/Int
* vscale
)
2791 UChar mod_reg_rm
= getUChar(delta
);
2792 const HChar
*vindex
;
2798 if ((mod_reg_rm
& 7) != 4 || epartIsReg(mod_reg_rm
))
2799 return IRTemp_INVALID
;
2801 UChar sib
= getUChar(delta
+1);
2802 UChar scale
= toUChar((sib
>> 6) & 3);
2803 UChar index_r
= toUChar((sib
>> 3) & 7);
2804 UChar base_r
= toUChar(sib
& 7);
2806 /* correct since #(R13) == 8 + #(RBP) */
2807 Bool base_is_BPor13
= toBool(base_r
== R_RBP
);
2811 *rI
= index_r
| (getRexX(pfx
) << 3);
2813 vindex
= nameXMMReg(*rI
);
2815 vindex
= nameYMMReg(*rI
);
2818 switch (mod_reg_rm
>> 6) {
2820 if (base_is_BPor13
) {
2821 d
= getSDisp32(delta
);
2824 DIS(buf
, "%s%lld(,%s)", segRegTxt(pfx
), d
, vindex
);
2826 DIS(buf
, "%s%lld(,%s,%d)", segRegTxt(pfx
), d
, vindex
, 1<<scale
);
2828 return disAMode_copy2tmp( mkU64(d
) );
2831 DIS(buf
, "%s(%s,%s)", segRegTxt(pfx
),
2832 nameIRegRexB(8,pfx
,base_r
), vindex
);
2834 DIS(buf
, "%s(%s,%s,%d)", segRegTxt(pfx
),
2835 nameIRegRexB(8,pfx
,base_r
), vindex
, 1<<scale
);
2840 d
= getSDisp8(delta
);
2844 d
= getSDisp32(delta
);
2848 DIS(buf
, "%s%lld(%s,%s)", segRegTxt(pfx
), d
,
2849 nameIRegRexB(8,pfx
,base_r
), vindex
);
2851 DIS(buf
, "%s%lld(%s,%s,%d)", segRegTxt(pfx
), d
,
2852 nameIRegRexB(8,pfx
,base_r
), vindex
, 1<<scale
);
2858 return disAMode_copy2tmp( getIRegRexB(8,pfx
,base_r
) );
2859 return disAMode_copy2tmp( binop(Iop_Add64
, getIRegRexB(8,pfx
,base_r
),
2864 /* Figure out the number of (insn-stream) bytes constituting the amode
2865 beginning at delta. Is useful for getting hold of literals beyond
2866 the end of the amode before it has been disassembled. */
2868 static UInt
lengthAMode ( Prefix pfx
, Long delta
)
2870 UChar mod_reg_rm
= getUChar(delta
);
2873 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2874 jump table seems a bit excessive.
2876 mod_reg_rm
&= 0xC7; /* is now XX000YYY */
2877 mod_reg_rm
= toUChar(mod_reg_rm
| (mod_reg_rm
>> 3));
2878 /* is now XX0XXYYY */
2879 mod_reg_rm
&= 0x1F; /* is now 000XXYYY */
2880 switch (mod_reg_rm
) {
2882 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2883 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2885 case 0x00: case 0x01: case 0x02: case 0x03:
2886 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
2889 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2890 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2892 case 0x08: case 0x09: case 0x0A: case 0x0B:
2893 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
2896 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2897 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2899 case 0x10: case 0x11: case 0x12: case 0x13:
2900 /* ! 14 */ case 0x15: case 0x16: case 0x17:
2903 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2904 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2905 /* Not an address, but still handled. */
2906 case 0x18: case 0x19: case 0x1A: case 0x1B:
2907 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2915 /* SIB, with no displacement. */
2916 UChar sib
= getUChar(delta
);
2917 UChar base_r
= toUChar(sib
& 7);
2918 /* correct since #(R13) == 8 + #(RBP) */
2919 Bool base_is_BPor13
= toBool(base_r
== R_RBP
);
2921 if (base_is_BPor13
) {
2928 /* SIB, with 8-bit displacement. */
2932 /* SIB, with 32-bit displacement. */
2937 vpanic("lengthAMode(amd64)");
2938 return 0; /*notreached*/
2943 /*------------------------------------------------------------*/
2944 /*--- Disassembling common idioms ---*/
2945 /*------------------------------------------------------------*/
2948 enum { WithFlagNone
=2, WithFlagCarry
, WithFlagCarryX
, WithFlagOverX
}
2951 /* Handle binary integer instructions of the form
2954 Is passed the a ptr to the modRM byte, the actual operation, and the
2955 data size. Returns the address advanced completely over this
2958 E(src) is reg-or-mem
2961 If E is reg, --> GET %G, tmp
2965 If E is mem and OP is not reversible,
2966 --> (getAddr E) -> tmpa
2972 If E is mem and OP is reversible
2973 --> (getAddr E) -> tmpa
2979 ULong
dis_op2_E_G ( const VexAbiInfo
* vbi
,
2986 const HChar
* t_amd64opc
)
2990 IRType ty
= szToITy(size
);
2991 IRTemp dst1
= newTemp(ty
);
2992 IRTemp src
= newTemp(ty
);
2993 IRTemp dst0
= newTemp(ty
);
2994 UChar rm
= getUChar(delta0
);
2995 IRTemp addr
= IRTemp_INVALID
;
2997 /* Stay sane -- check for valid (op8, flag, keep) combinations. */
3001 case WithFlagNone
: case WithFlagCarry
:
3002 case WithFlagCarryX
: case WithFlagOverX
:
3010 vassert(flag
== WithFlagNone
|| flag
== WithFlagCarry
);
3011 if (flag
== WithFlagCarry
) vassert(keep
);
3014 vassert(flag
== WithFlagNone
);
3016 case Iop_Or8
: case Iop_Xor8
:
3017 vassert(flag
== WithFlagNone
);
3024 if (epartIsReg(rm
)) {
3025 /* Specially handle XOR reg,reg, because that doesn't really
3026 depend on reg, and doing the obvious thing potentially
3027 generates a spurious value check failure due to the bogus
3028 dependency. Ditto SUB/SBB reg,reg. */
3029 if ((op8
== Iop_Xor8
|| ((op8
== Iop_Sub8
) && keep
))
3030 && offsetIRegG(size
,pfx
,rm
) == offsetIRegE(size
,pfx
,rm
)) {
3031 putIRegG(size
,pfx
,rm
, mkU(ty
,0));
3034 assign( dst0
, getIRegG(size
,pfx
,rm
) );
3035 assign( src
, getIRegE(size
,pfx
,rm
) );
3037 if (op8
== Iop_Add8
&& flag
== WithFlagCarry
) {
3038 helper_ADC( size
, dst1
, dst0
, src
,
3039 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3040 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3042 if (op8
== Iop_Sub8
&& flag
== WithFlagCarry
) {
3043 helper_SBB( size
, dst1
, dst0
, src
,
3044 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3045 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3047 if (op8
== Iop_Add8
&& flag
== WithFlagCarryX
) {
3048 helper_ADCX_ADOX( True
/*isADCX*/, size
, dst1
, dst0
, src
);
3049 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3051 if (op8
== Iop_Add8
&& flag
== WithFlagOverX
) {
3052 helper_ADCX_ADOX( False
/*!isADCX*/, size
, dst1
, dst0
, src
);
3053 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3055 assign( dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
3057 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3059 setFlags_DEP1(op8
, dst1
, ty
);
3061 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3064 DIP("%s%c %s,%s\n", t_amd64opc
, nameISize(size
),
3065 nameIRegE(size
,pfx
,rm
),
3066 nameIRegG(size
,pfx
,rm
));
3069 /* E refers to memory */
3070 addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
3071 assign( dst0
, getIRegG(size
,pfx
,rm
) );
3072 assign( src
, loadLE(szToITy(size
), mkexpr(addr
)) );
3074 if (op8
== Iop_Add8
&& flag
== WithFlagCarry
) {
3075 helper_ADC( size
, dst1
, dst0
, src
,
3076 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3077 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3079 if (op8
== Iop_Sub8
&& flag
== WithFlagCarry
) {
3080 helper_SBB( size
, dst1
, dst0
, src
,
3081 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3082 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3084 if (op8
== Iop_Add8
&& flag
== WithFlagCarryX
) {
3085 helper_ADCX_ADOX( True
/*isADCX*/, size
, dst1
, dst0
, src
);
3086 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3088 if (op8
== Iop_Add8
&& flag
== WithFlagOverX
) {
3089 helper_ADCX_ADOX( False
/*!isADCX*/, size
, dst1
, dst0
, src
);
3090 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3092 assign( dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
3094 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3096 setFlags_DEP1(op8
, dst1
, ty
);
3098 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3101 DIP("%s%c %s,%s\n", t_amd64opc
, nameISize(size
),
3102 dis_buf
, nameIRegG(size
, pfx
, rm
));
3109 /* Handle binary integer instructions of the form
3112 Is passed the a ptr to the modRM byte, the actual operation, and the
3113 data size. Returns the address advanced completely over this
3117 E(dst) is reg-or-mem
3119 If E is reg, --> GET %E, tmp
3123 If E is mem, --> (getAddr E) -> tmpa
3129 ULong
dis_op2_G_E ( const VexAbiInfo
* vbi
,
3136 const HChar
* t_amd64opc
)
3140 IRType ty
= szToITy(size
);
3141 IRTemp dst1
= newTemp(ty
);
3142 IRTemp src
= newTemp(ty
);
3143 IRTemp dst0
= newTemp(ty
);
3144 UChar rm
= getUChar(delta0
);
3145 IRTemp addr
= IRTemp_INVALID
;
3147 /* Stay sane -- check for valid (op8, flag, keep) combinations. */
3150 vassert(flag
== WithFlagNone
|| flag
== WithFlagCarry
);
3154 vassert(flag
== WithFlagNone
|| flag
== WithFlagCarry
);
3155 if (flag
== WithFlagCarry
) vassert(keep
);
3157 case Iop_And8
: case Iop_Or8
: case Iop_Xor8
:
3158 vassert(flag
== WithFlagNone
);
3165 /* flag != WithFlagNone is only allowed for Add and Sub and indicates the
3166 intended operation is add-with-carry or subtract-with-borrow. */
3168 if (epartIsReg(rm
)) {
3169 /* Specially handle XOR reg,reg, because that doesn't really
3170 depend on reg, and doing the obvious thing potentially
3171 generates a spurious value check failure due to the bogus
3172 dependency. Ditto SUB/SBB reg,reg. */
3173 if ((op8
== Iop_Xor8
|| ((op8
== Iop_Sub8
) && keep
))
3174 && offsetIRegG(size
,pfx
,rm
) == offsetIRegE(size
,pfx
,rm
)) {
3175 putIRegE(size
,pfx
,rm
, mkU(ty
,0));
3178 assign(dst0
, getIRegE(size
,pfx
,rm
));
3179 assign(src
, getIRegG(size
,pfx
,rm
));
3181 if (op8
== Iop_Add8
&& flag
== WithFlagCarry
) {
3182 helper_ADC( size
, dst1
, dst0
, src
,
3183 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3184 putIRegE(size
, pfx
, rm
, mkexpr(dst1
));
3186 if (op8
== Iop_Sub8
&& flag
== WithFlagCarry
) {
3187 helper_SBB( size
, dst1
, dst0
, src
,
3188 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3189 putIRegE(size
, pfx
, rm
, mkexpr(dst1
));
3191 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
3193 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3195 setFlags_DEP1(op8
, dst1
, ty
);
3197 putIRegE(size
, pfx
, rm
, mkexpr(dst1
));
3200 DIP("%s%c %s,%s\n", t_amd64opc
, nameISize(size
),
3201 nameIRegG(size
,pfx
,rm
),
3202 nameIRegE(size
,pfx
,rm
));
3206 /* E refers to memory */
3208 addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
3209 assign(dst0
, loadLE(ty
,mkexpr(addr
)));
3210 assign(src
, getIRegG(size
,pfx
,rm
));
3212 if (op8
== Iop_Add8
&& flag
== WithFlagCarry
) {
3213 if (haveLOCK(pfx
)) {
3214 /* cas-style store */
3215 helper_ADC( size
, dst1
, dst0
, src
,
3216 /*store*/addr
, dst0
/*expVal*/, guest_RIP_curr_instr
);
3219 helper_ADC( size
, dst1
, dst0
, src
,
3220 /*store*/addr
, IRTemp_INVALID
, 0 );
3223 if (op8
== Iop_Sub8
&& flag
== WithFlagCarry
) {
3224 if (haveLOCK(pfx
)) {
3225 /* cas-style store */
3226 helper_SBB( size
, dst1
, dst0
, src
,
3227 /*store*/addr
, dst0
/*expVal*/, guest_RIP_curr_instr
);
3230 helper_SBB( size
, dst1
, dst0
, src
,
3231 /*store*/addr
, IRTemp_INVALID
, 0 );
3234 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
3236 if (haveLOCK(pfx
)) {
3237 if (0) vex_printf("locked case\n" );
3238 casLE( mkexpr(addr
),
3239 mkexpr(dst0
)/*expval*/,
3240 mkexpr(dst1
)/*newval*/, guest_RIP_curr_instr
);
3242 if (0) vex_printf("nonlocked case\n");
3243 storeLE(mkexpr(addr
), mkexpr(dst1
));
3247 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3249 setFlags_DEP1(op8
, dst1
, ty
);
3252 DIP("%s%c %s,%s\n", t_amd64opc
, nameISize(size
),
3253 nameIRegG(size
,pfx
,rm
), dis_buf
);
3259 /* Handle move instructions of the form
3262 Is passed the a ptr to the modRM byte, and the data size. Returns
3263 the address advanced completely over this instruction.
3265 E(src) is reg-or-mem
3268 If E is reg, --> GET %E, tmpv
3271 If E is mem --> (getAddr E) -> tmpa
3276 ULong
dis_mov_E_G ( const VexAbiInfo
* vbi
,
3282 UChar rm
= getUChar(delta0
);
3285 if (epartIsReg(rm
)) {
3286 putIRegG(size
, pfx
, rm
, getIRegE(size
, pfx
, rm
));
3287 DIP("mov%c %s,%s\n", nameISize(size
),
3288 nameIRegE(size
,pfx
,rm
),
3289 nameIRegG(size
,pfx
,rm
));
3293 /* E refers to memory */
3295 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
3296 putIRegG(size
, pfx
, rm
, loadLE(szToITy(size
), mkexpr(addr
)));
3297 DIP("mov%c %s,%s\n", nameISize(size
),
3299 nameIRegG(size
,pfx
,rm
));
3305 /* Handle move instructions of the form
3308 Is passed the a ptr to the modRM byte, and the data size. Returns
3309 the address advanced completely over this instruction.
3310 We have to decide here whether F2 or F3 are acceptable. F2 never is.
3313 E(dst) is reg-or-mem
3315 If E is reg, --> GET %G, tmp
3318 If E is mem, --> (getAddr E) -> tmpa
3323 ULong
dis_mov_G_E ( const VexAbiInfo
* vbi
,
3330 UChar rm
= getUChar(delta0
);
3335 if (epartIsReg(rm
)) {
3336 if (haveF2orF3(pfx
)) { *ok
= False
; return delta0
; }
3337 putIRegE(size
, pfx
, rm
, getIRegG(size
, pfx
, rm
));
3338 DIP("mov%c %s,%s\n", nameISize(size
),
3339 nameIRegG(size
,pfx
,rm
),
3340 nameIRegE(size
,pfx
,rm
));
3344 /* E refers to memory */
3346 if (haveF2(pfx
)) { *ok
= False
; return delta0
; }
3347 /* F3(XRELEASE) is acceptable, though. */
3348 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
3349 storeLE( mkexpr(addr
), getIRegG(size
, pfx
, rm
) );
3350 DIP("mov%c %s,%s\n", nameISize(size
),
3351 nameIRegG(size
,pfx
,rm
),
3358 /* op $immediate, AL/AX/EAX/RAX. */
3360 ULong
dis_op_imm_A ( Int size
,
3365 const HChar
* t_amd64opc
)
3367 Int size4
= imin(size
,4);
3368 IRType ty
= szToITy(size
);
3369 IRTemp dst0
= newTemp(ty
);
3370 IRTemp src
= newTemp(ty
);
3371 IRTemp dst1
= newTemp(ty
);
3372 Long lit
= getSDisp(size4
,delta
);
3373 assign(dst0
, getIRegRAX(size
));
3374 assign(src
, mkU(ty
,lit
& mkSizeMask(size
)));
3376 if (isAddSub(op8
) && !carrying
) {
3377 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
3378 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3383 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
3384 setFlags_DEP1(op8
, dst1
, ty
);
3387 if (op8
== Iop_Add8
&& carrying
) {
3388 helper_ADC( size
, dst1
, dst0
, src
,
3389 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3392 if (op8
== Iop_Sub8
&& carrying
) {
3393 helper_SBB( size
, dst1
, dst0
, src
,
3394 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3397 vpanic("dis_op_imm_A(amd64,guest)");
3400 putIRegRAX(size
, mkexpr(dst1
));
3402 DIP("%s%c $%lld, %s\n", t_amd64opc
, nameISize(size
),
3403 lit
, nameIRegRAX(size
));
3408 /* Sign- and Zero-extending moves. */
3410 ULong
dis_movx_E_G ( const VexAbiInfo
* vbi
,
3412 Long delta
, Int szs
, Int szd
, Bool sign_extend
)
3414 UChar rm
= getUChar(delta
);
3415 if (epartIsReg(rm
)) {
3416 putIRegG(szd
, pfx
, rm
,
3418 szs
,szd
,sign_extend
,
3419 getIRegE(szs
,pfx
,rm
)));
3420 DIP("mov%c%c%c %s,%s\n", sign_extend
? 's' : 'z',
3423 nameIRegE(szs
,pfx
,rm
),
3424 nameIRegG(szd
,pfx
,rm
));
3428 /* E refers to memory */
3432 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
3433 putIRegG(szd
, pfx
, rm
,
3435 szs
,szd
,sign_extend
,
3436 loadLE(szToITy(szs
),mkexpr(addr
))));
3437 DIP("mov%c%c%c %s,%s\n", sign_extend
? 's' : 'z',
3441 nameIRegG(szd
,pfx
,rm
));
3447 /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by
3448 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */
3450 void codegen_div ( Int sz
, IRTemp t
, Bool signed_divide
)
3452 /* special-case the 64-bit case */
3454 IROp op
= signed_divide
? Iop_DivModS128to64
3455 : Iop_DivModU128to64
;
3456 IRTemp src128
= newTemp(Ity_I128
);
3457 IRTemp dst128
= newTemp(Ity_I128
);
3458 assign( src128
, binop(Iop_64HLto128
,
3460 getIReg64(R_RAX
)) );
3461 assign( dst128
, binop(op
, mkexpr(src128
), mkexpr(t
)) );
3462 putIReg64( R_RAX
, unop(Iop_128to64
,mkexpr(dst128
)) );
3463 putIReg64( R_RDX
, unop(Iop_128HIto64
,mkexpr(dst128
)) );
3465 IROp op
= signed_divide
? Iop_DivModS64to32
3466 : Iop_DivModU64to32
;
3467 IRTemp src64
= newTemp(Ity_I64
);
3468 IRTemp dst64
= newTemp(Ity_I64
);
3472 binop(Iop_32HLto64
, getIRegRDX(4), getIRegRAX(4)) );
3474 binop(op
, mkexpr(src64
), mkexpr(t
)) );
3475 putIRegRAX( 4, unop(Iop_64to32
,mkexpr(dst64
)) );
3476 putIRegRDX( 4, unop(Iop_64HIto32
,mkexpr(dst64
)) );
3479 IROp widen3264
= signed_divide
? Iop_32Sto64
: Iop_32Uto64
;
3480 IROp widen1632
= signed_divide
? Iop_16Sto32
: Iop_16Uto32
;
3481 assign( src64
, unop(widen3264
,
3485 assign( dst64
, binop(op
, mkexpr(src64
), unop(widen1632
,mkexpr(t
))) );
3486 putIRegRAX( 2, unop(Iop_32to16
,unop(Iop_64to32
,mkexpr(dst64
))) );
3487 putIRegRDX( 2, unop(Iop_32to16
,unop(Iop_64HIto32
,mkexpr(dst64
))) );
3491 IROp widen3264
= signed_divide
? Iop_32Sto64
: Iop_32Uto64
;
3492 IROp widen1632
= signed_divide
? Iop_16Sto32
: Iop_16Uto32
;
3493 IROp widen816
= signed_divide
? Iop_8Sto16
: Iop_8Uto16
;
3494 assign( src64
, unop(widen3264
,
3495 unop(widen1632
, getIRegRAX(2))) );
3497 binop(op
, mkexpr(src64
),
3498 unop(widen1632
, unop(widen816
, mkexpr(t
)))) );
3499 putIRegRAX( 1, unop(Iop_16to8
,
3501 unop(Iop_64to32
,mkexpr(dst64
)))) );
3502 putIRegAH( unop(Iop_16to8
,
3504 unop(Iop_64HIto32
,mkexpr(dst64
)))) );
3508 vpanic("codegen_div(amd64)");
3514 ULong
dis_Grp1 ( const VexAbiInfo
* vbi
,
3516 Long delta
, UChar modrm
,
3517 Int am_sz
, Int d_sz
, Int sz
, Long d64
)
3521 IRType ty
= szToITy(sz
);
3522 IRTemp dst1
= newTemp(ty
);
3523 IRTemp src
= newTemp(ty
);
3524 IRTemp dst0
= newTemp(ty
);
3525 IRTemp addr
= IRTemp_INVALID
;
3526 IROp op8
= Iop_INVALID
;
3527 ULong mask
= mkSizeMask(sz
);
3529 switch (gregLO3ofRM(modrm
)) {
3530 case 0: op8
= Iop_Add8
; break; case 1: op8
= Iop_Or8
; break;
3531 case 2: break; // ADC
3532 case 3: break; // SBB
3533 case 4: op8
= Iop_And8
; break; case 5: op8
= Iop_Sub8
; break;
3534 case 6: op8
= Iop_Xor8
; break; case 7: op8
= Iop_Sub8
; break;
3536 default: vpanic("dis_Grp1(amd64): unhandled case");
3539 if (epartIsReg(modrm
)) {
3540 vassert(am_sz
== 1);
3542 assign(dst0
, getIRegE(sz
,pfx
,modrm
));
3543 assign(src
, mkU(ty
,d64
& mask
));
3545 if (gregLO3ofRM(modrm
) == 2 /* ADC */) {
3546 helper_ADC( sz
, dst1
, dst0
, src
,
3547 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3549 if (gregLO3ofRM(modrm
) == 3 /* SBB */) {
3550 helper_SBB( sz
, dst1
, dst0
, src
,
3551 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3553 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
3555 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3557 setFlags_DEP1(op8
, dst1
, ty
);
3560 if (gregLO3ofRM(modrm
) < 7)
3561 putIRegE(sz
, pfx
, modrm
, mkexpr(dst1
));
3563 delta
+= (am_sz
+ d_sz
);
3564 DIP("%s%c $%lld, %s\n",
3565 nameGrp1(gregLO3ofRM(modrm
)), nameISize(sz
), d64
,
3566 nameIRegE(sz
,pfx
,modrm
));
3568 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, /*xtra*/d_sz
);
3570 assign(dst0
, loadLE(ty
,mkexpr(addr
)));
3571 assign(src
, mkU(ty
,d64
& mask
));
3573 if (gregLO3ofRM(modrm
) == 2 /* ADC */) {
3574 if (haveLOCK(pfx
)) {
3575 /* cas-style store */
3576 helper_ADC( sz
, dst1
, dst0
, src
,
3577 /*store*/addr
, dst0
/*expVal*/, guest_RIP_curr_instr
);
3580 helper_ADC( sz
, dst1
, dst0
, src
,
3581 /*store*/addr
, IRTemp_INVALID
, 0 );
3584 if (gregLO3ofRM(modrm
) == 3 /* SBB */) {
3585 if (haveLOCK(pfx
)) {
3586 /* cas-style store */
3587 helper_SBB( sz
, dst1
, dst0
, src
,
3588 /*store*/addr
, dst0
/*expVal*/, guest_RIP_curr_instr
);
3591 helper_SBB( sz
, dst1
, dst0
, src
,
3592 /*store*/addr
, IRTemp_INVALID
, 0 );
3595 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
3596 if (gregLO3ofRM(modrm
) < 7) {
3597 if (haveLOCK(pfx
)) {
3598 casLE( mkexpr(addr
), mkexpr(dst0
)/*expVal*/,
3599 mkexpr(dst1
)/*newVal*/,
3600 guest_RIP_curr_instr
);
3602 storeLE(mkexpr(addr
), mkexpr(dst1
));
3606 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3608 setFlags_DEP1(op8
, dst1
, ty
);
3611 delta
+= (len
+d_sz
);
3612 DIP("%s%c $%lld, %s\n",
3613 nameGrp1(gregLO3ofRM(modrm
)), nameISize(sz
),
3620 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed
3624 ULong
dis_Grp2 ( const VexAbiInfo
* vbi
,
3626 Long delta
, UChar modrm
,
3627 Int am_sz
, Int d_sz
, Int sz
, IRExpr
* shift_expr
,
3628 const HChar
* shift_expr_txt
, Bool
* decode_OK
)
3630 /* delta on entry points at the modrm byte. */
3633 Bool isShift
, isRotate
, isRotateC
;
3634 IRType ty
= szToITy(sz
);
3635 IRTemp dst0
= newTemp(ty
);
3636 IRTemp dst1
= newTemp(ty
);
3637 IRTemp addr
= IRTemp_INVALID
;
3641 vassert(sz
== 1 || sz
== 2 || sz
== 4 || sz
== 8);
3643 /* Put value to shift/rotate in dst0. */
3644 if (epartIsReg(modrm
)) {
3645 assign(dst0
, getIRegE(sz
, pfx
, modrm
));
3646 delta
+= (am_sz
+ d_sz
);
3648 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, /*xtra*/d_sz
);
3649 assign(dst0
, loadLE(ty
,mkexpr(addr
)));
3650 delta
+= len
+ d_sz
;
3654 switch (gregLO3ofRM(modrm
)) { case 4: case 5: case 6: case 7: isShift
= True
; }
3657 switch (gregLO3ofRM(modrm
)) { case 0: case 1: isRotate
= True
; }
3660 switch (gregLO3ofRM(modrm
)) { case 2: case 3: isRotateC
= True
; }
3662 if (!isShift
&& !isRotate
&& !isRotateC
) {
3664 vpanic("dis_Grp2(Reg): unhandled case(amd64)");
3668 /* Call a helper; this insn is so ridiculous it does not deserve
3669 better. One problem is, the helper has to calculate both the
3670 new value and the new flags. This is more than 64 bits, and
3671 there is no way to return more than 64 bits from the helper.
3672 Hence the crude and obvious solution is to call it twice,
3673 using the sign of the sz field to indicate whether it is the
3674 value or rflags result we want.
3676 Bool left
= toBool(gregLO3ofRM(modrm
) == 2);
3678 IRExpr
** argsRFLAGS
;
3680 IRTemp new_value
= newTemp(Ity_I64
);
3681 IRTemp new_rflags
= newTemp(Ity_I64
);
3682 IRTemp old_rflags
= newTemp(Ity_I64
);
3684 assign( old_rflags
, widenUto64(mk_amd64g_calculate_rflags_all()) );
3687 = mkIRExprVec_4( widenUto64(mkexpr(dst0
)), /* thing to rotate */
3688 widenUto64(shift_expr
), /* rotate amount */
3695 left
? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3696 left
? &amd64g_calculate_RCL
: &amd64g_calculate_RCR
,
3702 = mkIRExprVec_4( widenUto64(mkexpr(dst0
)), /* thing to rotate */
3703 widenUto64(shift_expr
), /* rotate amount */
3710 left
? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3711 left
? &amd64g_calculate_RCL
: &amd64g_calculate_RCR
,
3716 assign( dst1
, narrowTo(ty
, mkexpr(new_value
)) );
3717 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
3718 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(new_rflags
) ));
3719 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
3720 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
3726 IRTemp pre64
= newTemp(Ity_I64
);
3727 IRTemp res64
= newTemp(Ity_I64
);
3728 IRTemp res64ss
= newTemp(Ity_I64
);
3729 IRTemp shift_amt
= newTemp(Ity_I8
);
3730 UChar mask
= toUChar(sz
==8 ? 63 : 31);
3733 switch (gregLO3ofRM(modrm
)) {
3734 case 4: op64
= Iop_Shl64
; break;
3735 case 5: op64
= Iop_Shr64
; break;
3736 case 6: op64
= Iop_Shl64
; break;
3737 case 7: op64
= Iop_Sar64
; break;
3739 default: vpanic("dis_Grp2:shift"); break;
3742 /* Widen the value to be shifted to 64 bits, do the shift, and
3743 narrow back down. This seems surprisingly long-winded, but
3744 unfortunately the AMD semantics requires that 8/16/32-bit
3745 shifts give defined results for shift values all the way up
3746 to 32, and this seems the simplest way to do it. It has the
3747 advantage that the only IR level shifts generated are of 64
3748 bit values, and the shift amount is guaranteed to be in the
3749 range 0 .. 63, thereby observing the IR semantics requiring
3750 all shift values to be in the range 0 .. 2^word_size-1.
3752 Therefore the shift amount is masked with 63 for 64-bit shifts
3753 and 31 for all others.
3755 /* shift_amt = shift_expr & MASK, regardless of operation size */
3756 assign( shift_amt
, binop(Iop_And8
, shift_expr
, mkU8(mask
)) );
3758 /* suitably widen the value to be shifted to 64 bits. */
3759 assign( pre64
, op64
==Iop_Sar64
? widenSto64(mkexpr(dst0
))
3760 : widenUto64(mkexpr(dst0
)) );
3762 /* res64 = pre64 `shift` shift_amt */
3763 assign( res64
, binop(op64
, mkexpr(pre64
), mkexpr(shift_amt
)) );
3765 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */
3771 mkexpr(shift_amt
), mkU8(1)),
3774 /* Build the flags thunk. */
3775 setFlags_DEP1_DEP2_shift(op64
, res64
, res64ss
, ty
, shift_amt
);
3777 /* Narrow the result back down. */
3778 assign( dst1
, narrowTo(ty
, mkexpr(res64
)) );
3780 } /* if (isShift) */
3784 Int ccOp
= ty
==Ity_I8
? 0 : (ty
==Ity_I16
? 1
3785 : (ty
==Ity_I32
? 2 : 3));
3786 Bool left
= toBool(gregLO3ofRM(modrm
) == 0);
3787 IRTemp rot_amt
= newTemp(Ity_I8
);
3788 IRTemp rot_amt64
= newTemp(Ity_I8
);
3789 IRTemp oldFlags
= newTemp(Ity_I64
);
3790 UChar mask
= toUChar(sz
==8 ? 63 : 31);
3792 /* rot_amt = shift_expr & mask */
3793 /* By masking the rotate amount thusly, the IR-level Shl/Shr
3794 expressions never shift beyond the word size and thus remain
3796 assign(rot_amt64
, binop(Iop_And8
, shift_expr
, mkU8(mask
)));
3799 assign(rot_amt
, mkexpr(rot_amt64
));
3801 assign(rot_amt
, binop(Iop_And8
, mkexpr(rot_amt64
), mkU8(8*sz
-1)));
3805 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
3807 binop( mkSizedOp(ty
,Iop_Or8
),
3808 binop( mkSizedOp(ty
,Iop_Shl8
),
3812 binop( mkSizedOp(ty
,Iop_Shr8
),
3814 binop(Iop_Sub8
,mkU8(8*sz
), mkexpr(rot_amt
))
3818 ccOp
+= AMD64G_CC_OP_ROLB
;
3820 } else { /* right */
3822 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
3824 binop( mkSizedOp(ty
,Iop_Or8
),
3825 binop( mkSizedOp(ty
,Iop_Shr8
),
3829 binop( mkSizedOp(ty
,Iop_Shl8
),
3831 binop(Iop_Sub8
,mkU8(8*sz
), mkexpr(rot_amt
))
3835 ccOp
+= AMD64G_CC_OP_RORB
;
3839 /* dst1 now holds the rotated value. Build flag thunk. We
3840 need the resulting value for this, and the previous flags.
3841 Except don't set it if the rotate count is zero. */
3843 assign(oldFlags
, mk_amd64g_calculate_rflags_all());
3845 /* rot_amt64 :: Ity_I8. We need to convert it to I1. */
3846 IRTemp rot_amt64b
= newTemp(Ity_I1
);
3847 assign(rot_amt64b
, binop(Iop_CmpNE8
, mkexpr(rot_amt64
), mkU8(0)) );
3849 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
3850 stmt( IRStmt_Put( OFFB_CC_OP
,
3851 IRExpr_ITE( mkexpr(rot_amt64b
),
3853 IRExpr_Get(OFFB_CC_OP
,Ity_I64
) ) ));
3854 stmt( IRStmt_Put( OFFB_CC_DEP1
,
3855 IRExpr_ITE( mkexpr(rot_amt64b
),
3856 widenUto64(mkexpr(dst1
)),
3857 IRExpr_Get(OFFB_CC_DEP1
,Ity_I64
) ) ));
3858 stmt( IRStmt_Put( OFFB_CC_DEP2
,
3859 IRExpr_ITE( mkexpr(rot_amt64b
),
3861 IRExpr_Get(OFFB_CC_DEP2
,Ity_I64
) ) ));
3862 stmt( IRStmt_Put( OFFB_CC_NDEP
,
3863 IRExpr_ITE( mkexpr(rot_amt64b
),
3865 IRExpr_Get(OFFB_CC_NDEP
,Ity_I64
) ) ));
3866 } /* if (isRotate) */
3868 /* Save result, and finish up. */
3869 if (epartIsReg(modrm
)) {
3870 putIRegE(sz
, pfx
, modrm
, mkexpr(dst1
));
3871 if (vex_traceflags
& VEX_TRACE_FE
) {
3873 nameGrp2(gregLO3ofRM(modrm
)), nameISize(sz
) );
3875 vex_printf("%s", shift_expr_txt
);
3877 ppIRExpr(shift_expr
);
3878 vex_printf(", %s\n", nameIRegE(sz
,pfx
,modrm
));
3881 storeLE(mkexpr(addr
), mkexpr(dst1
));
3882 if (vex_traceflags
& VEX_TRACE_FE
) {
3884 nameGrp2(gregLO3ofRM(modrm
)), nameISize(sz
) );
3886 vex_printf("%s", shift_expr_txt
);
3888 ppIRExpr(shift_expr
);
3889 vex_printf(", %s\n", dis_buf
);
3896 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
3898 ULong
dis_Grp8_Imm ( const VexAbiInfo
* vbi
,
3900 Long delta
, UChar modrm
,
3901 Int am_sz
, Int sz
, ULong src_val
,
3904 /* src_val denotes a d8.
3905 And delta on entry points at the modrm byte. */
3907 IRType ty
= szToITy(sz
);
3908 IRTemp t2
= newTemp(Ity_I64
);
3909 IRTemp t2m
= newTemp(Ity_I64
);
3910 IRTemp t_addr
= IRTemp_INVALID
;
3914 /* we're optimists :-) */
3917 /* Check whether F2 or F3 are acceptable. */
3918 if (epartIsReg(modrm
)) {
3919 /* F2 or F3 are not allowed in the register case. */
3920 if (haveF2orF3(pfx
)) {
3925 /* F2 or F3 (but not both) are allowable provided LOCK is also
3927 if (haveF2orF3(pfx
)) {
3928 if (haveF2andF3(pfx
) || !haveLOCK(pfx
)) {
3935 /* Limit src_val -- the bit offset -- to something within a word.
3936 The Intel docs say that literal offsets larger than a word are
3937 masked in this way. */
3939 case 2: src_val
&= 15; break;
3940 case 4: src_val
&= 31; break;
3941 case 8: src_val
&= 63; break;
3942 default: *decode_OK
= False
; return delta
;
3945 /* Invent a mask suitable for the operation. */
3946 switch (gregLO3ofRM(modrm
)) {
3947 case 4: /* BT */ mask
= 0; break;
3948 case 5: /* BTS */ mask
= 1ULL << src_val
; break;
3949 case 6: /* BTR */ mask
= ~(1ULL << src_val
); break;
3950 case 7: /* BTC */ mask
= 1ULL << src_val
; break;
3951 /* If this needs to be extended, probably simplest to make a
3952 new function to handle the other cases (0 .. 3). The
3953 Intel docs do however not indicate any use for 0 .. 3, so
3954 we don't expect this to happen. */
3955 default: *decode_OK
= False
; return delta
;
3958 /* Fetch the value to be tested and modified into t2, which is
3959 64-bits wide regardless of sz. */
3960 if (epartIsReg(modrm
)) {
3961 vassert(am_sz
== 1);
3962 assign( t2
, widenUto64(getIRegE(sz
, pfx
, modrm
)) );
3963 delta
+= (am_sz
+ 1);
3964 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm
)),
3966 src_val
, nameIRegE(sz
,pfx
,modrm
));
3969 t_addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, 1 );
3971 assign( t2
, widenUto64(loadLE(ty
, mkexpr(t_addr
))) );
3972 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm
)),
3977 /* Compute the new value into t2m, if non-BT. */
3978 switch (gregLO3ofRM(modrm
)) {
3982 assign( t2m
, binop(Iop_Or64
, mkU64(mask
), mkexpr(t2
)) );
3985 assign( t2m
, binop(Iop_And64
, mkU64(mask
), mkexpr(t2
)) );
3988 assign( t2m
, binop(Iop_Xor64
, mkU64(mask
), mkexpr(t2
)) );
3991 /*NOTREACHED*/ /*the previous switch guards this*/
3995 /* Write the result back, if non-BT. */
3996 if (gregLO3ofRM(modrm
) != 4 /* BT */) {
3997 if (epartIsReg(modrm
)) {
3998 putIRegE(sz
, pfx
, modrm
, narrowTo(ty
, mkexpr(t2m
)));
4000 if (haveLOCK(pfx
)) {
4001 casLE( mkexpr(t_addr
),
4002 narrowTo(ty
, mkexpr(t2
))/*expd*/,
4003 narrowTo(ty
, mkexpr(t2m
))/*new*/,
4004 guest_RIP_curr_instr
);
4006 storeLE(mkexpr(t_addr
), narrowTo(ty
, mkexpr(t2m
)));
4011 /* Copy relevant bit from t2 into the carry flag. */
4012 /* Flags: C=selected bit, O,S,A,P undefined, Z unchanged */
4013 /* so let's also keep O,S,A,P unchanged */
4014 const ULong maskC
= AMD64G_CC_MASK_C
;
4015 const ULong maskOSZAP
= AMD64G_CC_MASK_O
| AMD64G_CC_MASK_S
4016 | AMD64G_CC_MASK_Z
| AMD64G_CC_MASK_A
4019 IRTemp old_rflags
= newTemp(Ity_I64
);
4020 assign(old_rflags
, mk_amd64g_calculate_rflags_all());
4022 IRTemp new_rflags
= newTemp(Ity_I64
);
4025 binop(Iop_And64
, mkexpr(old_rflags
), mkU64(maskOSZAP
)),
4027 binop(Iop_Shr64
, mkexpr(t2
), mkU8(src_val
)),
4030 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
4031 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
4032 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(new_rflags
) ));
4033 /* Set NDEP even though it isn't used. This makes redundant-PUT
4034 elimination of previous stores to this field work better. */
4035 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
4041 /* Signed/unsigned widening multiply. Generate IR to multiply the
4042 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in
4043 RDX:RAX/EDX:EAX/DX:AX/AX.
4045 static void codegen_mulL_A_D ( Int sz
, Bool syned
,
4046 IRTemp tmp
, const HChar
* tmp_txt
)
4048 IRType ty
= szToITy(sz
);
4049 IRTemp t1
= newTemp(ty
);
4051 assign( t1
, getIRegRAX(sz
) );
4055 IRTemp res128
= newTemp(Ity_I128
);
4056 IRTemp resHi
= newTemp(Ity_I64
);
4057 IRTemp resLo
= newTemp(Ity_I64
);
4058 IROp mulOp
= syned
? Iop_MullS64
: Iop_MullU64
;
4059 UInt tBaseOp
= syned
? AMD64G_CC_OP_SMULB
: AMD64G_CC_OP_UMULB
;
4060 setFlags_MUL ( Ity_I64
, t1
, tmp
, tBaseOp
);
4061 assign( res128
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
4062 assign( resHi
, unop(Iop_128HIto64
,mkexpr(res128
)));
4063 assign( resLo
, unop(Iop_128to64
,mkexpr(res128
)));
4064 putIReg64(R_RDX
, mkexpr(resHi
));
4065 putIReg64(R_RAX
, mkexpr(resLo
));
4069 IRTemp res64
= newTemp(Ity_I64
);
4070 IRTemp resHi
= newTemp(Ity_I32
);
4071 IRTemp resLo
= newTemp(Ity_I32
);
4072 IROp mulOp
= syned
? Iop_MullS32
: Iop_MullU32
;
4073 UInt tBaseOp
= syned
? AMD64G_CC_OP_SMULB
: AMD64G_CC_OP_UMULB
;
4074 setFlags_MUL ( Ity_I32
, t1
, tmp
, tBaseOp
);
4075 assign( res64
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
4076 assign( resHi
, unop(Iop_64HIto32
,mkexpr(res64
)));
4077 assign( resLo
, unop(Iop_64to32
,mkexpr(res64
)));
4078 putIRegRDX(4, mkexpr(resHi
));
4079 putIRegRAX(4, mkexpr(resLo
));
4083 IRTemp res32
= newTemp(Ity_I32
);
4084 IRTemp resHi
= newTemp(Ity_I16
);
4085 IRTemp resLo
= newTemp(Ity_I16
);
4086 IROp mulOp
= syned
? Iop_MullS16
: Iop_MullU16
;
4087 UInt tBaseOp
= syned
? AMD64G_CC_OP_SMULB
: AMD64G_CC_OP_UMULB
;
4088 setFlags_MUL ( Ity_I16
, t1
, tmp
, tBaseOp
);
4089 assign( res32
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
4090 assign( resHi
, unop(Iop_32HIto16
,mkexpr(res32
)));
4091 assign( resLo
, unop(Iop_32to16
,mkexpr(res32
)));
4092 putIRegRDX(2, mkexpr(resHi
));
4093 putIRegRAX(2, mkexpr(resLo
));
4097 IRTemp res16
= newTemp(Ity_I16
);
4098 IRTemp resHi
= newTemp(Ity_I8
);
4099 IRTemp resLo
= newTemp(Ity_I8
);
4100 IROp mulOp
= syned
? Iop_MullS8
: Iop_MullU8
;
4101 UInt tBaseOp
= syned
? AMD64G_CC_OP_SMULB
: AMD64G_CC_OP_UMULB
;
4102 setFlags_MUL ( Ity_I8
, t1
, tmp
, tBaseOp
);
4103 assign( res16
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
4104 assign( resHi
, unop(Iop_16HIto8
,mkexpr(res16
)));
4105 assign( resLo
, unop(Iop_16to8
,mkexpr(res16
)));
4106 putIRegRAX(2, mkexpr(res16
));
4111 vpanic("codegen_mulL_A_D(amd64)");
4113 DIP("%s%c %s\n", syned
? "imul" : "mul", nameISize(sz
), tmp_txt
);
4117 /* Group 3 extended opcodes. We have to decide here whether F2 and F3
4120 ULong
dis_Grp3 ( const VexAbiInfo
* vbi
,
4121 Prefix pfx
, Int sz
, Long delta
, Bool
* decode_OK
)
4128 IRType ty
= szToITy(sz
);
4129 IRTemp t1
= newTemp(ty
);
4130 IRTemp dst1
, src
, dst0
;
4132 modrm
= getUChar(delta
);
4133 if (epartIsReg(modrm
)) {
4134 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */
4135 if (haveF2orF3(pfx
)) goto unhandled
;
4136 switch (gregLO3ofRM(modrm
)) {
4137 case 0: { /* TEST */
4139 d64
= getSDisp(imin(4,sz
), delta
);
4140 delta
+= imin(4,sz
);
4142 assign(dst1
, binop(mkSizedOp(ty
,Iop_And8
),
4143 getIRegE(sz
,pfx
,modrm
),
4144 mkU(ty
, d64
& mkSizeMask(sz
))));
4145 setFlags_DEP1( Iop_And8
, dst1
, ty
);
4146 DIP("test%c $%lld, %s\n",
4148 nameIRegE(sz
, pfx
, modrm
));
4156 putIRegE(sz
, pfx
, modrm
,
4157 unop(mkSizedOp(ty
,Iop_Not8
),
4158 getIRegE(sz
, pfx
, modrm
)));
4159 DIP("not%c %s\n", nameISize(sz
),
4160 nameIRegE(sz
, pfx
, modrm
));
4167 assign(dst0
, mkU(ty
,0));
4168 assign(src
, getIRegE(sz
, pfx
, modrm
));
4169 assign(dst1
, binop(mkSizedOp(ty
,Iop_Sub8
), mkexpr(dst0
),
4171 setFlags_DEP1_DEP2(Iop_Sub8
, dst0
, src
, ty
);
4172 putIRegE(sz
, pfx
, modrm
, mkexpr(dst1
));
4173 DIP("neg%c %s\n", nameISize(sz
), nameIRegE(sz
, pfx
, modrm
));
4175 case 4: /* MUL (unsigned widening) */
4178 assign(src
, getIRegE(sz
,pfx
,modrm
));
4179 codegen_mulL_A_D ( sz
, False
, src
,
4180 nameIRegE(sz
,pfx
,modrm
) );
4182 case 5: /* IMUL (signed widening) */
4185 assign(src
, getIRegE(sz
,pfx
,modrm
));
4186 codegen_mulL_A_D ( sz
, True
, src
,
4187 nameIRegE(sz
,pfx
,modrm
) );
4191 assign( t1
, getIRegE(sz
, pfx
, modrm
) );
4192 codegen_div ( sz
, t1
, False
);
4193 DIP("div%c %s\n", nameISize(sz
),
4194 nameIRegE(sz
, pfx
, modrm
));
4198 assign( t1
, getIRegE(sz
, pfx
, modrm
) );
4199 codegen_div ( sz
, t1
, True
);
4200 DIP("idiv%c %s\n", nameISize(sz
),
4201 nameIRegE(sz
, pfx
, modrm
));
4205 vpanic("Grp3(amd64,R)");
4208 /* Decide if F2/XACQ or F3/XREL might be valid. */
4209 Bool validF2orF3
= haveF2orF3(pfx
) ? False
: True
;
4210 if ((gregLO3ofRM(modrm
) == 3/*NEG*/ || gregLO3ofRM(modrm
) == 2/*NOT*/)
4211 && haveF2orF3(pfx
) && !haveF2andF3(pfx
) && haveLOCK(pfx
)) {
4214 if (!validF2orF3
) goto unhandled
;
4216 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
,
4217 /* we have to inform disAMode of any immediate
4219 gregLO3ofRM(modrm
)==0/*TEST*/
4225 assign(t1
, loadLE(ty
,mkexpr(addr
)));
4226 switch (gregLO3ofRM(modrm
)) {
4227 case 0: { /* TEST */
4228 d64
= getSDisp(imin(4,sz
), delta
);
4229 delta
+= imin(4,sz
);
4231 assign(dst1
, binop(mkSizedOp(ty
,Iop_And8
),
4233 mkU(ty
, d64
& mkSizeMask(sz
))));
4234 setFlags_DEP1( Iop_And8
, dst1
, ty
);
4235 DIP("test%c $%lld, %s\n", nameISize(sz
), d64
, dis_buf
);
4243 assign(dst1
, unop(mkSizedOp(ty
,Iop_Not8
), mkexpr(t1
)));
4244 if (haveLOCK(pfx
)) {
4245 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(dst1
)/*new*/,
4246 guest_RIP_curr_instr
);
4248 storeLE( mkexpr(addr
), mkexpr(dst1
) );
4250 DIP("not%c %s\n", nameISize(sz
), dis_buf
);
4256 assign(dst0
, mkU(ty
,0));
4257 assign(src
, mkexpr(t1
));
4258 assign(dst1
, binop(mkSizedOp(ty
,Iop_Sub8
), mkexpr(dst0
),
4260 if (haveLOCK(pfx
)) {
4261 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(dst1
)/*new*/,
4262 guest_RIP_curr_instr
);
4264 storeLE( mkexpr(addr
), mkexpr(dst1
) );
4266 setFlags_DEP1_DEP2(Iop_Sub8
, dst0
, src
, ty
);
4267 DIP("neg%c %s\n", nameISize(sz
), dis_buf
);
4269 case 4: /* MUL (unsigned widening) */
4270 codegen_mulL_A_D ( sz
, False
, t1
, dis_buf
);
4273 codegen_mulL_A_D ( sz
, True
, t1
, dis_buf
);
4276 codegen_div ( sz
, t1
, False
);
4277 DIP("div%c %s\n", nameISize(sz
), dis_buf
);
4280 codegen_div ( sz
, t1
, True
);
4281 DIP("idiv%c %s\n", nameISize(sz
), dis_buf
);
4285 vpanic("Grp3(amd64,M)");
4295 /* Group 4 extended opcodes. We have to decide here whether F2 and F3
4298 ULong
dis_Grp4 ( const VexAbiInfo
* vbi
,
4299 Prefix pfx
, Long delta
, Bool
* decode_OK
)
4305 IRTemp t1
= newTemp(ty
);
4306 IRTemp t2
= newTemp(ty
);
4310 modrm
= getUChar(delta
);
4311 if (epartIsReg(modrm
)) {
4312 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */
4313 if (haveF2orF3(pfx
)) goto unhandled
;
4314 assign(t1
, getIRegE(1, pfx
, modrm
));
4315 switch (gregLO3ofRM(modrm
)) {
4317 assign(t2
, binop(Iop_Add8
, mkexpr(t1
), mkU8(1)));
4318 putIRegE(1, pfx
, modrm
, mkexpr(t2
));
4319 setFlags_INC_DEC( True
, t2
, ty
);
4322 assign(t2
, binop(Iop_Sub8
, mkexpr(t1
), mkU8(1)));
4323 putIRegE(1, pfx
, modrm
, mkexpr(t2
));
4324 setFlags_INC_DEC( False
, t2
, ty
);
4331 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm
)),
4332 nameIRegE(1, pfx
, modrm
));
4334 /* Decide if F2/XACQ or F3/XREL might be valid. */
4335 Bool validF2orF3
= haveF2orF3(pfx
) ? False
: True
;
4336 if ((gregLO3ofRM(modrm
) == 0/*INC*/ || gregLO3ofRM(modrm
) == 1/*DEC*/)
4337 && haveF2orF3(pfx
) && !haveF2andF3(pfx
) && haveLOCK(pfx
)) {
4340 if (!validF2orF3
) goto unhandled
;
4342 IRTemp addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
4343 assign( t1
, loadLE(ty
, mkexpr(addr
)) );
4344 switch (gregLO3ofRM(modrm
)) {
4346 assign(t2
, binop(Iop_Add8
, mkexpr(t1
), mkU8(1)));
4347 if (haveLOCK(pfx
)) {
4348 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(t2
)/*new*/,
4349 guest_RIP_curr_instr
);
4351 storeLE( mkexpr(addr
), mkexpr(t2
) );
4353 setFlags_INC_DEC( True
, t2
, ty
);
4356 assign(t2
, binop(Iop_Sub8
, mkexpr(t1
), mkU8(1)));
4357 if (haveLOCK(pfx
)) {
4358 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(t2
)/*new*/,
4359 guest_RIP_curr_instr
);
4361 storeLE( mkexpr(addr
), mkexpr(t2
) );
4363 setFlags_INC_DEC( False
, t2
, ty
);
4370 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm
)), dis_buf
);
4379 /* Group 5 extended opcodes. We have to decide here whether F2 and F3
4382 ULong
dis_Grp5 ( const VexAbiInfo
* vbi
,
4383 Prefix pfx
, Int sz
, Long delta
,
4384 /*MOD*/DisResult
* dres
, /*OUT*/Bool
* decode_OK
)
4389 IRTemp addr
= IRTemp_INVALID
;
4390 IRType ty
= szToITy(sz
);
4391 IRTemp t1
= newTemp(ty
);
4392 IRTemp t2
= IRTemp_INVALID
;
4393 IRTemp t3
= IRTemp_INVALID
;
4398 modrm
= getUChar(delta
);
4399 if (epartIsReg(modrm
)) {
4400 /* F2/XACQ and F3/XREL are always invalid in the non-mem case.
4401 F2/CALL and F2/JMP may have bnd prefix. */
4404 && (gregLO3ofRM(modrm
) == 2 || gregLO3ofRM(modrm
) == 4)))
4406 assign(t1
, getIRegE(sz
,pfx
,modrm
));
4407 switch (gregLO3ofRM(modrm
)) {
4410 assign(t2
, binop(mkSizedOp(ty
,Iop_Add8
),
4411 mkexpr(t1
), mkU(ty
,1)));
4412 setFlags_INC_DEC( True
, t2
, ty
);
4413 putIRegE(sz
,pfx
,modrm
, mkexpr(t2
));
4417 assign(t2
, binop(mkSizedOp(ty
,Iop_Sub8
),
4418 mkexpr(t1
), mkU(ty
,1)));
4419 setFlags_INC_DEC( False
, t2
, ty
);
4420 putIRegE(sz
,pfx
,modrm
, mkexpr(t2
));
4422 case 2: /* call Ev */
4423 /* Ignore any sz value and operate as if sz==8. */
4424 if (!(sz
== 4 || sz
== 8)) goto unhandledR
;
4425 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
4427 t3
= newTemp(Ity_I64
);
4428 assign(t3
, getIRegE(sz
,pfx
,modrm
));
4429 t2
= newTemp(Ity_I64
);
4430 assign(t2
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(8)));
4431 putIReg64(R_RSP
, mkexpr(t2
));
4432 storeLE( mkexpr(t2
), mkU64(guest_RIP_bbstart
+delta
+1));
4433 make_redzone_AbiHint(vbi
, t2
, t3
/*nia*/, "call-Ev(reg)");
4434 jmp_treg(dres
, Ijk_Call
, t3
);
4435 vassert(dres
->whatNext
== Dis_StopHere
);
4438 case 4: /* jmp Ev */
4439 /* Ignore any sz value and operate as if sz==8. */
4440 if (!(sz
== 4 || sz
== 8)) goto unhandledR
;
4441 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
4443 t3
= newTemp(Ity_I64
);
4444 assign(t3
, getIRegE(sz
,pfx
,modrm
));
4445 jmp_treg(dres
, Ijk_Boring
, t3
);
4446 vassert(dres
->whatNext
== Dis_StopHere
);
4449 case 6: /* PUSH Ev */
4450 /* There is no encoding for 32-bit operand size; hence ... */
4451 if (sz
== 4) sz
= 8;
4452 if (sz
== 8 || sz
== 2) {
4453 ty
= szToITy(sz
); /* redo it, since sz might have changed */
4455 assign(t3
, getIRegE(sz
,pfx
,modrm
));
4456 t2
= newTemp(Ity_I64
);
4457 assign( t2
, binop(Iop_Sub64
,getIReg64(R_RSP
),mkU64(sz
)) );
4458 putIReg64(R_RSP
, mkexpr(t2
) );
4459 storeLE( mkexpr(t2
), mkexpr(t3
) );
4462 goto unhandledR
; /* awaiting test case */
4470 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm
)),
4471 showSz
? nameISize(sz
) : ' ',
4472 nameIRegE(sz
, pfx
, modrm
));
4474 /* Decide if F2/XACQ, F3/XREL, F2/CALL or F2/JMP might be valid. */
4475 Bool validF2orF3
= haveF2orF3(pfx
) ? False
: True
;
4476 if ((gregLO3ofRM(modrm
) == 0/*INC*/ || gregLO3ofRM(modrm
) == 1/*DEC*/)
4477 && haveF2orF3(pfx
) && !haveF2andF3(pfx
) && haveLOCK(pfx
)) {
4479 } else if ((gregLO3ofRM(modrm
) == 2 || gregLO3ofRM(modrm
) == 4)
4480 && (haveF2(pfx
) && !haveF3(pfx
))) {
4483 if (!validF2orF3
) goto unhandledM
;
4485 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
4486 if (gregLO3ofRM(modrm
) != 2 && gregLO3ofRM(modrm
) != 4
4487 && gregLO3ofRM(modrm
) != 6) {
4488 assign(t1
, loadLE(ty
,mkexpr(addr
)));
4490 switch (gregLO3ofRM(modrm
)) {
4493 assign(t2
, binop(mkSizedOp(ty
,Iop_Add8
),
4494 mkexpr(t1
), mkU(ty
,1)));
4495 if (haveLOCK(pfx
)) {
4496 casLE( mkexpr(addr
),
4497 mkexpr(t1
), mkexpr(t2
), guest_RIP_curr_instr
);
4499 storeLE(mkexpr(addr
),mkexpr(t2
));
4501 setFlags_INC_DEC( True
, t2
, ty
);
4505 assign(t2
, binop(mkSizedOp(ty
,Iop_Sub8
),
4506 mkexpr(t1
), mkU(ty
,1)));
4507 if (haveLOCK(pfx
)) {
4508 casLE( mkexpr(addr
),
4509 mkexpr(t1
), mkexpr(t2
), guest_RIP_curr_instr
);
4511 storeLE(mkexpr(addr
),mkexpr(t2
));
4513 setFlags_INC_DEC( False
, t2
, ty
);
4515 case 2: /* call Ev */
4516 /* Ignore any sz value and operate as if sz==8. */
4517 if (!(sz
== 4 || sz
== 8)) goto unhandledM
;
4518 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
4520 t3
= newTemp(Ity_I64
);
4521 assign(t3
, loadLE(Ity_I64
,mkexpr(addr
)));
4522 t2
= newTemp(Ity_I64
);
4523 assign(t2
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(8)));
4524 putIReg64(R_RSP
, mkexpr(t2
));
4525 storeLE( mkexpr(t2
), mkU64(guest_RIP_bbstart
+delta
+len
));
4526 make_redzone_AbiHint(vbi
, t2
, t3
/*nia*/, "call-Ev(mem)");
4527 jmp_treg(dres
, Ijk_Call
, t3
);
4528 vassert(dres
->whatNext
== Dis_StopHere
);
4531 case 4: /* JMP Ev */
4532 /* Ignore any sz value and operate as if sz==8. */
4533 if (!(sz
== 4 || sz
== 8)) goto unhandledM
;
4534 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
4536 t3
= newTemp(Ity_I64
);
4537 assign(t3
, loadLE(Ity_I64
,mkexpr(addr
)));
4538 jmp_treg(dres
, Ijk_Boring
, t3
);
4539 vassert(dres
->whatNext
== Dis_StopHere
);
4542 case 6: /* PUSH Ev */
4543 /* There is no encoding for 32-bit operand size; hence ... */
4544 if (sz
== 4) sz
= 8;
4545 if (sz
== 8 || sz
== 2) {
4546 ty
= szToITy(sz
); /* redo it, since sz might have changed */
4548 assign(t3
, loadLE(ty
,mkexpr(addr
)));
4549 t2
= newTemp(Ity_I64
);
4550 assign( t2
, binop(Iop_Sub64
,getIReg64(R_RSP
),mkU64(sz
)) );
4551 putIReg64(R_RSP
, mkexpr(t2
) );
4552 storeLE( mkexpr(t2
), mkexpr(t3
) );
4555 goto unhandledM
; /* awaiting test case */
4563 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm
)),
4564 showSz
? nameISize(sz
) : ' ',
4571 /*------------------------------------------------------------*/
4572 /*--- Disassembling string ops (including REP prefixes) ---*/
4573 /*------------------------------------------------------------*/
4575 /* Code shared by all the string ops */
4577 void dis_string_op_increment ( Int sz
, IRTemp t_inc
)
4580 if (sz
== 8 || sz
== 4 || sz
== 2) {
4582 if (sz
== 4) logSz
= 2;
4583 if (sz
== 8) logSz
= 3;
4585 binop(Iop_Shl64
, IRExpr_Get( OFFB_DFLAG
, Ity_I64
),
4589 IRExpr_Get( OFFB_DFLAG
, Ity_I64
) );
4594 void dis_string_op( void (*dis_OP
)( Int
, IRTemp
, Prefix pfx
),
4595 Int sz
, const HChar
* name
, Prefix pfx
)
4597 IRTemp t_inc
= newTemp(Ity_I64
);
4598 /* Really we ought to inspect the override prefixes, but we don't.
4599 The following assertion catches any resulting sillyness. */
4600 vassert(pfx
== clearSegBits(pfx
));
4601 dis_string_op_increment(sz
, t_inc
);
4602 dis_OP( sz
, t_inc
, pfx
);
4603 DIP("%s%c\n", name
, nameISize(sz
));
4607 void dis_MOVS ( Int sz
, IRTemp t_inc
, Prefix pfx
)
4609 IRType ty
= szToITy(sz
);
4610 IRTemp td
= newTemp(Ity_I64
); /* RDI */
4611 IRTemp ts
= newTemp(Ity_I64
); /* RSI */
4612 IRExpr
*incd
, *incs
;
4615 assign( td
, unop(Iop_32Uto64
, getIReg32(R_RDI
)) );
4616 assign( ts
, unop(Iop_32Uto64
, getIReg32(R_RSI
)) );
4618 assign( td
, getIReg64(R_RDI
) );
4619 assign( ts
, getIReg64(R_RSI
) );
4622 storeLE( mkexpr(td
), loadLE(ty
,mkexpr(ts
)) );
4624 incd
= binop(Iop_Add64
, mkexpr(td
), mkexpr(t_inc
));
4625 incs
= binop(Iop_Add64
, mkexpr(ts
), mkexpr(t_inc
));
4627 incd
= unop(Iop_32Uto64
, unop(Iop_64to32
, incd
));
4628 incs
= unop(Iop_32Uto64
, unop(Iop_64to32
, incs
));
4630 putIReg64( R_RDI
, incd
);
4631 putIReg64( R_RSI
, incs
);
4635 void dis_LODS ( Int sz
, IRTemp t_inc
, Prefix pfx
)
4637 IRType ty
= szToITy(sz
);
4638 IRTemp ts
= newTemp(Ity_I64
); /* RSI */
4642 assign( ts
, unop(Iop_32Uto64
, getIReg32(R_RSI
)) );
4644 assign( ts
, getIReg64(R_RSI
) );
4646 putIRegRAX ( sz
, loadLE(ty
, mkexpr(ts
)) );
4648 incs
= binop(Iop_Add64
, mkexpr(ts
), mkexpr(t_inc
));
4650 incs
= unop(Iop_32Uto64
, unop(Iop_64to32
, incs
));
4651 putIReg64( R_RSI
, incs
);
4655 void dis_STOS ( Int sz
, IRTemp t_inc
, Prefix pfx
)
4657 IRType ty
= szToITy(sz
);
4658 IRTemp ta
= newTemp(ty
); /* rAX */
4659 IRTemp td
= newTemp(Ity_I64
); /* RDI */
4662 assign( ta
, getIRegRAX(sz
) );
4665 assign( td
, unop(Iop_32Uto64
, getIReg32(R_RDI
)) );
4667 assign( td
, getIReg64(R_RDI
) );
4669 storeLE( mkexpr(td
), mkexpr(ta
) );
4671 incd
= binop(Iop_Add64
, mkexpr(td
), mkexpr(t_inc
));
4673 incd
= unop(Iop_32Uto64
, unop(Iop_64to32
, incd
));
4674 putIReg64( R_RDI
, incd
);
4678 void dis_CMPS ( Int sz
, IRTemp t_inc
, Prefix pfx
)
4680 IRType ty
= szToITy(sz
);
4681 IRTemp tdv
= newTemp(ty
); /* (RDI) */
4682 IRTemp tsv
= newTemp(ty
); /* (RSI) */
4683 IRTemp td
= newTemp(Ity_I64
); /* RDI */
4684 IRTemp ts
= newTemp(Ity_I64
); /* RSI */
4685 IRExpr
*incd
, *incs
;
4688 assign( td
, unop(Iop_32Uto64
, getIReg32(R_RDI
)) );
4689 assign( ts
, unop(Iop_32Uto64
, getIReg32(R_RSI
)) );
4691 assign( td
, getIReg64(R_RDI
) );
4692 assign( ts
, getIReg64(R_RSI
) );
4695 assign( tdv
, loadLE(ty
,mkexpr(td
)) );
4697 assign( tsv
, loadLE(ty
,mkexpr(ts
)) );
4699 setFlags_DEP1_DEP2 ( Iop_Sub8
, tsv
, tdv
, ty
);
4701 incd
= binop(Iop_Add64
, mkexpr(td
), mkexpr(t_inc
));
4702 incs
= binop(Iop_Add64
, mkexpr(ts
), mkexpr(t_inc
));
4704 incd
= unop(Iop_32Uto64
, unop(Iop_64to32
, incd
));
4705 incs
= unop(Iop_32Uto64
, unop(Iop_64to32
, incs
));
4707 putIReg64( R_RDI
, incd
);
4708 putIReg64( R_RSI
, incs
);
4712 void dis_SCAS ( Int sz
, IRTemp t_inc
, Prefix pfx
)
4714 IRType ty
= szToITy(sz
);
4715 IRTemp ta
= newTemp(ty
); /* rAX */
4716 IRTemp td
= newTemp(Ity_I64
); /* RDI */
4717 IRTemp tdv
= newTemp(ty
); /* (RDI) */
4720 assign( ta
, getIRegRAX(sz
) );
4723 assign( td
, unop(Iop_32Uto64
, getIReg32(R_RDI
)) );
4725 assign( td
, getIReg64(R_RDI
) );
4727 assign( tdv
, loadLE(ty
,mkexpr(td
)) );
4729 setFlags_DEP1_DEP2 ( Iop_Sub8
, ta
, tdv
, ty
);
4731 incd
= binop(Iop_Add64
, mkexpr(td
), mkexpr(t_inc
));
4733 incd
= unop(Iop_32Uto64
, unop(Iop_64to32
, incd
));
4734 putIReg64( R_RDI
, incd
);
4738 /* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume
4739 the insn is the last one in the basic block, and so emit a jump to
4740 the next insn, rather than just falling through. */
4742 void dis_REP_op ( /*MOD*/DisResult
* dres
,
4744 void (*dis_OP
)(Int
, IRTemp
, Prefix
),
4745 Int sz
, Addr64 rip
, Addr64 rip_next
, const HChar
* name
,
4748 IRTemp t_inc
= newTemp(Ity_I64
);
4752 /* Really we ought to inspect the override prefixes, but we don't.
4753 The following assertion catches any resulting sillyness. */
4754 vassert(pfx
== clearSegBits(pfx
));
4757 tc
= newTemp(Ity_I32
); /* ECX */
4758 assign( tc
, getIReg32(R_RCX
) );
4759 cmp
= binop(Iop_CmpEQ32
, mkexpr(tc
), mkU32(0));
4761 tc
= newTemp(Ity_I64
); /* RCX */
4762 assign( tc
, getIReg64(R_RCX
) );
4763 cmp
= binop(Iop_CmpEQ64
, mkexpr(tc
), mkU64(0));
4766 stmt( IRStmt_Exit( cmp
, Ijk_Boring
,
4767 IRConst_U64(rip_next
), OFFB_RIP
) );
4770 putIReg32(R_RCX
, binop(Iop_Sub32
, mkexpr(tc
), mkU32(1)) );
4772 putIReg64(R_RCX
, binop(Iop_Sub64
, mkexpr(tc
), mkU64(1)) );
4774 dis_string_op_increment(sz
, t_inc
);
4775 dis_OP (sz
, t_inc
, pfx
);
4777 if (cond
== AMD64CondAlways
) {
4778 jmp_lit(dres
, Ijk_Boring
, rip
);
4779 vassert(dres
->whatNext
== Dis_StopHere
);
4781 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond
),
4785 jmp_lit(dres
, Ijk_Boring
, rip_next
);
4786 vassert(dres
->whatNext
== Dis_StopHere
);
4788 DIP("%s%c\n", name
, nameISize(sz
));
4792 /*------------------------------------------------------------*/
4793 /*--- Arithmetic, etc. ---*/
4794 /*------------------------------------------------------------*/
4796 /* IMUL E, G. Supplied eip points to the modR/M byte. */
4798 ULong
dis_mul_E_G ( const VexAbiInfo
* vbi
,
4805 UChar rm
= getUChar(delta0
);
4806 IRType ty
= szToITy(size
);
4807 IRTemp te
= newTemp(ty
);
4808 IRTemp tg
= newTemp(ty
);
4809 IRTemp resLo
= newTemp(ty
);
4811 assign( tg
, getIRegG(size
, pfx
, rm
) );
4812 if (epartIsReg(rm
)) {
4813 assign( te
, getIRegE(size
, pfx
, rm
) );
4815 IRTemp addr
= disAMode( &alen
, vbi
, pfx
, delta0
, dis_buf
, 0 );
4816 assign( te
, loadLE(ty
,mkexpr(addr
)) );
4819 setFlags_MUL ( ty
, te
, tg
, AMD64G_CC_OP_SMULB
);
4821 assign( resLo
, binop( mkSizedOp(ty
, Iop_Mul8
), mkexpr(te
), mkexpr(tg
) ) );
4823 putIRegG(size
, pfx
, rm
, mkexpr(resLo
) );
4825 if (epartIsReg(rm
)) {
4826 DIP("imul%c %s, %s\n", nameISize(size
),
4827 nameIRegE(size
,pfx
,rm
),
4828 nameIRegG(size
,pfx
,rm
));
4831 DIP("imul%c %s, %s\n", nameISize(size
),
4833 nameIRegG(size
,pfx
,rm
));
4839 /* IMUL I * E -> G. Supplied rip points to the modR/M byte. */
4841 ULong
dis_imul_I_E_G ( const VexAbiInfo
* vbi
,
4850 UChar rm
= getUChar(delta
);
4851 IRType ty
= szToITy(size
);
4852 IRTemp te
= newTemp(ty
);
4853 IRTemp tl
= newTemp(ty
);
4854 IRTemp resLo
= newTemp(ty
);
4856 vassert(/*size == 1 ||*/ size
== 2 || size
== 4 || size
== 8);
4858 if (epartIsReg(rm
)) {
4859 assign(te
, getIRegE(size
, pfx
, rm
));
4862 IRTemp addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
4864 assign(te
, loadLE(ty
, mkexpr(addr
)));
4867 d64
= getSDisp(imin(4,litsize
),delta
);
4868 delta
+= imin(4,litsize
);
4870 d64
&= mkSizeMask(size
);
4871 assign(tl
, mkU(ty
,d64
));
4873 assign( resLo
, binop( mkSizedOp(ty
, Iop_Mul8
), mkexpr(te
), mkexpr(tl
) ));
4875 setFlags_MUL ( ty
, te
, tl
, AMD64G_CC_OP_SMULB
);
4877 putIRegG(size
, pfx
, rm
, mkexpr(resLo
));
4879 DIP("imul%c $%lld, %s, %s\n",
4880 nameISize(size
), d64
,
4881 ( epartIsReg(rm
) ? nameIRegE(size
,pfx
,rm
) : dis_buf
),
4882 nameIRegG(size
,pfx
,rm
) );
4887 /* Generate an IR sequence to do a popcount operation on the supplied
4888 IRTemp, and return a new IRTemp holding the result. 'ty' may be
4889 Ity_I16, Ity_I32 or Ity_I64 only. */
4890 static IRTemp
gen_POPCOUNT ( IRType ty
, IRTemp src
)
4893 if (ty
== Ity_I16
) {
4894 IRTemp old
= IRTemp_INVALID
;
4895 IRTemp nyu
= IRTemp_INVALID
;
4896 IRTemp mask
[4], shift
[4];
4897 for (i
= 0; i
< 4; i
++) {
4898 mask
[i
] = newTemp(ty
);
4901 assign(mask
[0], mkU16(0x5555));
4902 assign(mask
[1], mkU16(0x3333));
4903 assign(mask
[2], mkU16(0x0F0F));
4904 assign(mask
[3], mkU16(0x00FF));
4906 for (i
= 0; i
< 4; i
++) {
4914 binop(Iop_Shr16
, mkexpr(old
), mkU8(shift
[i
])),
4920 if (ty
== Ity_I32
) {
4921 IRTemp old
= IRTemp_INVALID
;
4922 IRTemp nyu
= IRTemp_INVALID
;
4923 IRTemp mask
[5], shift
[5];
4924 for (i
= 0; i
< 5; i
++) {
4925 mask
[i
] = newTemp(ty
);
4928 assign(mask
[0], mkU32(0x55555555));
4929 assign(mask
[1], mkU32(0x33333333));
4930 assign(mask
[2], mkU32(0x0F0F0F0F));
4931 assign(mask
[3], mkU32(0x00FF00FF));
4932 assign(mask
[4], mkU32(0x0000FFFF));
4934 for (i
= 0; i
< 5; i
++) {
4942 binop(Iop_Shr32
, mkexpr(old
), mkU8(shift
[i
])),
4948 if (ty
== Ity_I64
) {
4949 IRTemp old
= IRTemp_INVALID
;
4950 IRTemp nyu
= IRTemp_INVALID
;
4951 IRTemp mask
[6], shift
[6];
4952 for (i
= 0; i
< 6; i
++) {
4953 mask
[i
] = newTemp(ty
);
4956 assign(mask
[0], mkU64(0x5555555555555555ULL
));
4957 assign(mask
[1], mkU64(0x3333333333333333ULL
));
4958 assign(mask
[2], mkU64(0x0F0F0F0F0F0F0F0FULL
));
4959 assign(mask
[3], mkU64(0x00FF00FF00FF00FFULL
));
4960 assign(mask
[4], mkU64(0x0000FFFF0000FFFFULL
));
4961 assign(mask
[5], mkU64(0x00000000FFFFFFFFULL
));
4963 for (i
= 0; i
< 6; i
++) {
4971 binop(Iop_Shr64
, mkexpr(old
), mkU8(shift
[i
])),
4982 /* Generate an IR sequence to do a count-leading-zeroes operation on
4983 the supplied IRTemp, and return a new IRTemp holding the result.
4984 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
4985 the argument is zero, return the number of bits in the word (the
4986 natural semantics). */
4987 static IRTemp
gen_LZCNT ( IRType ty
, IRTemp src
)
4989 vassert(ty
== Ity_I64
|| ty
== Ity_I32
|| ty
== Ity_I16
);
4991 IRTemp src64
= newTemp(Ity_I64
);
4992 assign(src64
, widenUto64( mkexpr(src
) ));
4994 IRTemp src64x
= newTemp(Ity_I64
);
4996 binop(Iop_Shl64
, mkexpr(src64
),
4997 mkU8(64 - 8 * sizeofIRType(ty
))));
4999 // Clz64 has undefined semantics when its input is zero, so
5000 // special-case around that.
5001 IRTemp res64
= newTemp(Ity_I64
);
5004 binop(Iop_CmpEQ64
, mkexpr(src64x
), mkU64(0)),
5005 mkU64(8 * sizeofIRType(ty
)),
5006 unop(Iop_Clz64
, mkexpr(src64x
))
5009 IRTemp res
= newTemp(ty
);
5010 assign(res
, narrowTo(ty
, mkexpr(res64
)));
5015 /* Generate an IR sequence to do a count-trailing-zeroes operation on
5016 the supplied IRTemp, and return a new IRTemp holding the result.
5017 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
5018 the argument is zero, return the number of bits in the word (the
5019 natural semantics). */
5020 static IRTemp
gen_TZCNT ( IRType ty
, IRTemp src
)
5022 vassert(ty
== Ity_I64
|| ty
== Ity_I32
|| ty
== Ity_I16
);
5024 IRTemp src64
= newTemp(Ity_I64
);
5025 assign(src64
, widenUto64( mkexpr(src
) ));
5027 // Ctz64 has undefined semantics when its input is zero, so
5028 // special-case around that.
5029 IRTemp res64
= newTemp(Ity_I64
);
5032 binop(Iop_CmpEQ64
, mkexpr(src64
), mkU64(0)),
5033 mkU64(8 * sizeofIRType(ty
)),
5034 unop(Iop_Ctz64
, mkexpr(src64
))
5037 IRTemp res
= newTemp(ty
);
5038 assign(res
, narrowTo(ty
, mkexpr(res64
)));
5043 /*------------------------------------------------------------*/
5045 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/
5047 /*------------------------------------------------------------*/
5049 /* --- Helper functions for dealing with the register stack. --- */
5051 /* --- Set the emulation-warning pseudo-register. --- */
5053 static void put_emwarn ( IRExpr
* e
/* :: Ity_I32 */ )
5055 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
5056 stmt( IRStmt_Put( OFFB_EMNOTE
, e
) );
5059 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
5061 static IRExpr
* mkQNaN64 ( void )
5063 /* QNaN is 0 2047 1 0(51times)
5064 == 0b 11111111111b 1 0(51times)
5065 == 0x7FF8 0000 0000 0000
5067 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL
));
5070 /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */
5072 static IRExpr
* get_ftop ( void )
5074 return IRExpr_Get( OFFB_FTOP
, Ity_I32
);
5077 static void put_ftop ( IRExpr
* e
)
5079 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
5080 stmt( IRStmt_Put( OFFB_FTOP
, e
) );
5083 /* --------- Get/put the C3210 bits. --------- */
5085 static IRExpr
* /* :: Ity_I64 */ get_C3210 ( void )
5087 return IRExpr_Get( OFFB_FC3210
, Ity_I64
);
5090 static void put_C3210 ( IRExpr
* e
/* :: Ity_I64 */ )
5092 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I64
);
5093 stmt( IRStmt_Put( OFFB_FC3210
, e
) );
5096 /* --------- Get/put the FPU rounding mode. --------- */
5097 static IRExpr
* /* :: Ity_I32 */ get_fpround ( void )
5099 return unop(Iop_64to32
, IRExpr_Get( OFFB_FPROUND
, Ity_I64
));
5102 static void put_fpround ( IRExpr
* /* :: Ity_I32 */ e
)
5104 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
5105 stmt( IRStmt_Put( OFFB_FPROUND
, unop(Iop_32Uto64
,e
) ) );
5109 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
5110 /* Produces a value in 0 .. 3, which is encoded as per the type
5111 IRRoundingMode. Since the guest_FPROUND value is also encoded as
5112 per IRRoundingMode, we merely need to get it and mask it for
5115 static IRExpr
* /* :: Ity_I32 */ get_roundingmode ( void )
5117 return binop( Iop_And32
, get_fpround(), mkU32(3) );
5120 static IRExpr
* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
5122 return mkU32(Irrm_NEAREST
);
5126 /* --------- Get/set FP register tag bytes. --------- */
5128 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
5130 static void put_ST_TAG ( Int i
, IRExpr
* value
)
5133 vassert(typeOfIRExpr(irsb
->tyenv
, value
) == Ity_I8
);
5134 descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
5135 stmt( IRStmt_PutI( mkIRPutI(descr
, get_ftop(), i
, value
) ) );
5138 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
5139 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
5141 static IRExpr
* get_ST_TAG ( Int i
)
5143 IRRegArray
* descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
5144 return IRExpr_GetI( descr
, get_ftop(), i
);
5148 /* --------- Get/set FP registers. --------- */
5150 /* Given i, and some expression e, emit 'ST(i) = e' and set the
5151 register's tag to indicate the register is full. The previous
5152 state of the register is not checked. */
5154 static void put_ST_UNCHECKED ( Int i
, IRExpr
* value
)
5157 vassert(typeOfIRExpr(irsb
->tyenv
, value
) == Ity_F64
);
5158 descr
= mkIRRegArray( OFFB_FPREGS
, Ity_F64
, 8 );
5159 stmt( IRStmt_PutI( mkIRPutI(descr
, get_ftop(), i
, value
) ) );
5160 /* Mark the register as in-use. */
5161 put_ST_TAG(i
, mkU8(1));
5164 /* Given i, and some expression e, emit
5165 ST(i) = is_full(i) ? NaN : e
5166 and set the tag accordingly.
5169 static void put_ST ( Int i
, IRExpr
* value
)
5173 IRExpr_ITE( binop(Iop_CmpNE8
, get_ST_TAG(i
), mkU8(0)),
5174 /* non-0 means full */
5183 /* Given i, generate an expression yielding 'ST(i)'. */
5185 static IRExpr
* get_ST_UNCHECKED ( Int i
)
5187 IRRegArray
* descr
= mkIRRegArray( OFFB_FPREGS
, Ity_F64
, 8 );
5188 return IRExpr_GetI( descr
, get_ftop(), i
);
5192 /* Given i, generate an expression yielding
5193 is_full(i) ? ST(i) : NaN
5196 static IRExpr
* get_ST ( Int i
)
5199 IRExpr_ITE( binop(Iop_CmpNE8
, get_ST_TAG(i
), mkU8(0)),
5200 /* non-0 means full */
5201 get_ST_UNCHECKED(i
),
5207 /* Given i, and some expression e, and a condition cond, generate IR
5208 which has the same effect as put_ST(i,e) when cond is true and has
5209 no effect when cond is false. Given the lack of proper
5210 if-then-else in the IR, this is pretty tricky.
5213 static void maybe_put_ST ( IRTemp cond
, Int i
, IRExpr
* value
)
5215 // new_tag = if cond then FULL else old_tag
5216 // new_val = if cond then (if old_tag==FULL then NaN else val)
5219 IRTemp old_tag
= newTemp(Ity_I8
);
5220 assign(old_tag
, get_ST_TAG(i
));
5221 IRTemp new_tag
= newTemp(Ity_I8
);
5223 IRExpr_ITE(mkexpr(cond
), mkU8(1)/*FULL*/, mkexpr(old_tag
)));
5225 IRTemp old_val
= newTemp(Ity_F64
);
5226 assign(old_val
, get_ST_UNCHECKED(i
));
5227 IRTemp new_val
= newTemp(Ity_F64
);
5229 IRExpr_ITE(mkexpr(cond
),
5230 IRExpr_ITE(binop(Iop_CmpNE8
, mkexpr(old_tag
), mkU8(0)),
5231 /* non-0 means full */
5237 put_ST_UNCHECKED(i
, mkexpr(new_val
));
5238 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So
5239 // now set it to new_tag instead.
5240 put_ST_TAG(i
, mkexpr(new_tag
));
5243 /* Adjust FTOP downwards by one register. */
5245 static void fp_push ( void )
5247 put_ftop( binop(Iop_Sub32
, get_ftop(), mkU32(1)) );
5250 /* Adjust FTOP downwards by one register when COND is 1:I1. Else
5253 static void maybe_fp_push ( IRTemp cond
)
5255 put_ftop( binop(Iop_Sub32
, get_ftop(), unop(Iop_1Uto32
,mkexpr(cond
))) );
5258 /* Adjust FTOP upwards by one register, and mark the vacated register
5261 static void fp_pop ( void )
5263 put_ST_TAG(0, mkU8(0));
5264 put_ftop( binop(Iop_Add32
, get_ftop(), mkU32(1)) );
5267 /* Set the C2 bit of the FPU status register to e[0]. Assumes that
5270 static void set_C2 ( IRExpr
* e
)
5272 IRExpr
* cleared
= binop(Iop_And64
, get_C3210(), mkU64(~AMD64G_FC_MASK_C2
));
5273 put_C3210( binop(Iop_Or64
,
5275 binop(Iop_Shl64
, e
, mkU8(AMD64G_FC_SHIFT_C2
))) );
5278 /* Generate code to check that abs(d64) < 2^63 and is finite. This is
5279 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The
5280 test is simple, but the derivation of it is not so simple.
5282 The exponent field for an IEEE754 double is 11 bits. That means it
5283 can take values 0 through 0x7FF. If the exponent has value 0x7FF,
5284 the number is either a NaN or an Infinity and so is not finite.
5285 Furthermore, a finite value of exactly 2^63 is the smallest value
5286 that has exponent value 0x43E. Hence, what we need to do is
5287 extract the exponent, ignoring the sign bit and mantissa, and check
5288 it is < 0x43E, or <= 0x43D.
5290 To make this easily applicable to 32- and 64-bit targets, a
5291 roundabout approach is used. First the number is converted to I64,
5292 then the top 32 bits are taken. Shifting them right by 20 bits
5293 places the sign bit and exponent in the bottom 12 bits. Anding
5294 with 0x7FF gets rid of the sign bit, leaving just the exponent
5295 available for comparison.
5297 static IRTemp
math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64
)
5299 IRTemp i64
= newTemp(Ity_I64
);
5300 assign(i64
, unop(Iop_ReinterpF64asI64
, mkexpr(d64
)) );
5301 IRTemp exponent
= newTemp(Ity_I32
);
5304 binop(Iop_Shr32
, unop(Iop_64HIto32
, mkexpr(i64
)), mkU8(20)),
5306 IRTemp in_range_and_finite
= newTemp(Ity_I1
);
5307 assign(in_range_and_finite
,
5308 binop(Iop_CmpLE32U
, mkexpr(exponent
), mkU32(0x43D)));
5309 return in_range_and_finite
;
5312 /* Invent a plausible-looking FPU status word value:
5313 ((ftop & 7) << 11) | (c3210 & 0x4700)
5315 static IRExpr
* get_FPU_sw ( void )
5321 binop(Iop_And32
, get_ftop(), mkU32(7)),
5323 binop(Iop_And32
, unop(Iop_64to32
, get_C3210()),
5329 /* Generate a dirty helper call that initialises the x87 state a la
5330 FINIT. If |guard| is NULL, it is done unconditionally. Otherwise
5331 |guard| is used as a guarding condition.
5333 static void gen_FINIT_SEQUENCE ( IRExpr
* guard
)
5335 /* Uses dirty helper:
5336 void amd64g_do_FINIT ( VexGuestAMD64State* ) */
5337 IRDirty
* d
= unsafeIRDirty_0_N (
5339 "amd64g_dirtyhelper_FINIT",
5340 &amd64g_dirtyhelper_FINIT
,
5341 mkIRExprVec_1( IRExpr_GSPTR() )
5344 /* declare we're writing guest state */
5346 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
5348 d
->fxState
[0].fx
= Ifx_Write
;
5349 d
->fxState
[0].offset
= OFFB_FTOP
;
5350 d
->fxState
[0].size
= sizeof(UInt
);
5352 d
->fxState
[1].fx
= Ifx_Write
;
5353 d
->fxState
[1].offset
= OFFB_FPREGS
;
5354 d
->fxState
[1].size
= 8 * sizeof(ULong
);
5356 d
->fxState
[2].fx
= Ifx_Write
;
5357 d
->fxState
[2].offset
= OFFB_FPTAGS
;
5358 d
->fxState
[2].size
= 8 * sizeof(UChar
);
5360 d
->fxState
[3].fx
= Ifx_Write
;
5361 d
->fxState
[3].offset
= OFFB_FPROUND
;
5362 d
->fxState
[3].size
= sizeof(ULong
);
5364 d
->fxState
[4].fx
= Ifx_Write
;
5365 d
->fxState
[4].offset
= OFFB_FC3210
;
5366 d
->fxState
[4].size
= sizeof(ULong
);
5371 stmt( IRStmt_Dirty(d
) );
5375 /* ------------------------------------------------------- */
5376 /* Given all that stack-mangling junk, we can now go ahead
5377 and describe FP instructions.
5380 /* ST(0) = ST(0) `op` mem64/32(addr)
5381 Need to check ST(0)'s tag on read, but not on write.
5384 void fp_do_op_mem_ST_0 ( IRTemp addr
, const HChar
* op_txt
, HChar
* dis_buf
,
5387 DIP("f%s%c %s\n", op_txt
, dbl
?'l':'s', dis_buf
);
5391 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5393 loadLE(Ity_F64
,mkexpr(addr
))
5398 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5400 unop(Iop_F32toF64
, loadLE(Ity_F32
,mkexpr(addr
)))
5406 /* ST(0) = mem64/32(addr) `op` ST(0)
5407 Need to check ST(0)'s tag on read, but not on write.
5410 void fp_do_oprev_mem_ST_0 ( IRTemp addr
, const HChar
* op_txt
, HChar
* dis_buf
,
5413 DIP("f%s%c %s\n", op_txt
, dbl
?'l':'s', dis_buf
);
5417 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5418 loadLE(Ity_F64
,mkexpr(addr
)),
5424 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5425 unop(Iop_F32toF64
, loadLE(Ity_F32
,mkexpr(addr
))),
5432 /* ST(dst) = ST(dst) `op` ST(src).
5433 Check dst and src tags when reading but not on write.
5436 void fp_do_op_ST_ST ( const HChar
* op_txt
, IROp op
, UInt st_src
, UInt st_dst
,
5439 DIP("f%s%s st(%u), st(%u)\n", op_txt
, pop_after
?"p":"", st_src
, st_dst
);
5443 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5451 /* ST(dst) = ST(src) `op` ST(dst).
5452 Check dst and src tags when reading but not on write.
5455 void fp_do_oprev_ST_ST ( const HChar
* op_txt
, IROp op
, UInt st_src
, UInt st_dst
,
5458 DIP("f%s%s st(%u), st(%u)\n", op_txt
, pop_after
?"p":"", st_src
, st_dst
);
5462 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5470 /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */
5471 static void fp_do_ucomi_ST0_STi ( UInt i
, Bool pop_after
)
5473 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after
? "p" : "", i
);
5474 /* This is a bit of a hack (and isn't really right). It sets
5475 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
5476 documentation implies A and S are unchanged.
5478 /* It's also fishy in that it is used both for COMIP and
5479 UCOMIP, and they aren't the same (although similar). */
5480 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
5481 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
5486 binop(Iop_CmpF64
, get_ST(0), get_ST(i
))),
5495 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 )
5497 static IRExpr
* x87ishly_qnarrow_32_to_16 ( IRExpr
* e32
)
5499 IRTemp t32
= newTemp(Ity_I32
);
5505 binop(Iop_Add32
, mkexpr(t32
), mkU32(32768))),
5507 unop(Iop_32to16
, mkexpr(t32
)),
5513 ULong
dis_FPU ( /*OUT*/Bool
* decode_ok
,
5514 const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
)
5521 /* On entry, delta points at the second byte of the insn (the modrm
5523 UChar first_opcode
= getUChar(delta
-1);
5524 UChar modrm
= getUChar(delta
+0);
5526 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
5528 if (first_opcode
== 0xD8) {
5531 /* bits 5,4,3 are an opcode extension, and the modRM also
5532 specifies an address. */
5533 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
5536 switch (gregLO3ofRM(modrm
)) {
5538 case 0: /* FADD single-real */
5539 fp_do_op_mem_ST_0 ( addr
, "add", dis_buf
, Iop_AddF64
, False
);
5542 case 1: /* FMUL single-real */
5543 fp_do_op_mem_ST_0 ( addr
, "mul", dis_buf
, Iop_MulF64
, False
);
5546 case 2: /* FCOM single-real */
5547 DIP("fcoms %s\n", dis_buf
);
5548 /* This forces C1 to zero, which isn't right. */
5549 /* The AMD documentation suggests that forcing C1 to
5550 zero is correct (Eliot Moss) */
5558 loadLE(Ity_F32
,mkexpr(addr
)))),
5564 case 3: /* FCOMP single-real */
5565 /* The AMD documentation suggests that forcing C1 to
5566 zero is correct (Eliot Moss) */
5567 DIP("fcomps %s\n", dis_buf
);
5568 /* This forces C1 to zero, which isn't right. */
5576 loadLE(Ity_F32
,mkexpr(addr
)))),
5583 case 4: /* FSUB single-real */
5584 fp_do_op_mem_ST_0 ( addr
, "sub", dis_buf
, Iop_SubF64
, False
);
5587 case 5: /* FSUBR single-real */
5588 fp_do_oprev_mem_ST_0 ( addr
, "subr", dis_buf
, Iop_SubF64
, False
);
5591 case 6: /* FDIV single-real */
5592 fp_do_op_mem_ST_0 ( addr
, "div", dis_buf
, Iop_DivF64
, False
);
5595 case 7: /* FDIVR single-real */
5596 fp_do_oprev_mem_ST_0 ( addr
, "divr", dis_buf
, Iop_DivF64
, False
);
5600 vex_printf("unhandled opc_aux = 0x%2x\n",
5601 (UInt
)gregLO3ofRM(modrm
));
5602 vex_printf("first_opcode == 0xD8\n");
5609 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
5610 fp_do_op_ST_ST ( "add", Iop_AddF64
, modrm
- 0xC0, 0, False
);
5613 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
5614 fp_do_op_ST_ST ( "mul", Iop_MulF64
, modrm
- 0xC8, 0, False
);
5617 /* Dunno if this is right */
5618 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
5619 r_dst
= (UInt
)modrm
- 0xD0;
5620 DIP("fcom %%st(0),%%st(%u)\n", r_dst
);
5621 /* This forces C1 to zero, which isn't right. */
5626 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
5632 /* Dunno if this is right */
5633 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
5634 r_dst
= (UInt
)modrm
- 0xD8;
5635 DIP("fcomp %%st(0),%%st(%u)\n", r_dst
);
5636 /* This forces C1 to zero, which isn't right. */
5641 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
5648 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
5649 fp_do_op_ST_ST ( "sub", Iop_SubF64
, modrm
- 0xE0, 0, False
);
5652 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
5653 fp_do_oprev_ST_ST ( "subr", Iop_SubF64
, modrm
- 0xE8, 0, False
);
5656 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
5657 fp_do_op_ST_ST ( "div", Iop_DivF64
, modrm
- 0xF0, 0, False
);
5660 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
5661 fp_do_oprev_ST_ST ( "divr", Iop_DivF64
, modrm
- 0xF8, 0, False
);
5670 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
5672 if (first_opcode
== 0xD9) {
5675 /* bits 5,4,3 are an opcode extension, and the modRM also
5676 specifies an address. */
5677 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
5680 switch (gregLO3ofRM(modrm
)) {
5682 case 0: /* FLD single-real */
5683 DIP("flds %s\n", dis_buf
);
5685 put_ST(0, unop(Iop_F32toF64
,
5686 loadLE(Ity_F32
, mkexpr(addr
))));
5689 case 2: /* FST single-real */
5690 DIP("fsts %s\n", dis_buf
);
5691 storeLE(mkexpr(addr
),
5692 binop(Iop_F64toF32
, get_roundingmode(), get_ST(0)));
5695 case 3: /* FSTP single-real */
5696 DIP("fstps %s\n", dis_buf
);
5697 storeLE(mkexpr(addr
),
5698 binop(Iop_F64toF32
, get_roundingmode(), get_ST(0)));
5702 case 4: { /* FLDENV m28 */
5703 /* Uses dirty helper:
5704 VexEmNote amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */
5705 IRTemp ew
= newTemp(Ity_I32
);
5706 IRTemp w64
= newTemp(Ity_I64
);
5707 IRDirty
* d
= unsafeIRDirty_0_N (
5709 "amd64g_dirtyhelper_FLDENV",
5710 &amd64g_dirtyhelper_FLDENV
,
5711 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
5714 /* declare we're reading memory */
5716 d
->mAddr
= mkexpr(addr
);
5719 /* declare we're writing guest state */
5721 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
5723 d
->fxState
[0].fx
= Ifx_Write
;
5724 d
->fxState
[0].offset
= OFFB_FTOP
;
5725 d
->fxState
[0].size
= sizeof(UInt
);
5727 d
->fxState
[1].fx
= Ifx_Write
;
5728 d
->fxState
[1].offset
= OFFB_FPTAGS
;
5729 d
->fxState
[1].size
= 8 * sizeof(UChar
);
5731 d
->fxState
[2].fx
= Ifx_Write
;
5732 d
->fxState
[2].offset
= OFFB_FPROUND
;
5733 d
->fxState
[2].size
= sizeof(ULong
);
5735 d
->fxState
[3].fx
= Ifx_Write
;
5736 d
->fxState
[3].offset
= OFFB_FC3210
;
5737 d
->fxState
[3].size
= sizeof(ULong
);
5739 stmt( IRStmt_Dirty(d
) );
5741 /* ew contains any emulation warning we may need to
5742 issue. If needed, side-exit to the next insn,
5743 reporting the warning, so that Valgrind's dispatcher
5744 sees the warning. */
5745 assign(ew
, unop(Iop_64to32
,mkexpr(w64
)) );
5746 put_emwarn( mkexpr(ew
) );
5749 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
5751 IRConst_U64( guest_RIP_bbstart
+delta
),
5756 DIP("fldenv %s\n", dis_buf
);
5760 case 5: {/* FLDCW */
5761 /* The only thing we observe in the control word is the
5762 rounding mode. Therefore, pass the 16-bit value
5763 (x87 native-format control word) to a clean helper,
5764 getting back a 64-bit value, the lower half of which
5765 is the FPROUND value to store, and the upper half of
5766 which is the emulation-warning token which may be
5769 /* ULong amd64h_check_fldcw ( ULong ); */
5770 IRTemp t64
= newTemp(Ity_I64
);
5771 IRTemp ew
= newTemp(Ity_I32
);
5772 DIP("fldcw %s\n", dis_buf
);
5773 assign( t64
, mkIRExprCCall(
5774 Ity_I64
, 0/*regparms*/,
5775 "amd64g_check_fldcw",
5776 &amd64g_check_fldcw
,
5779 loadLE(Ity_I16
, mkexpr(addr
)))
5784 put_fpround( unop(Iop_64to32
, mkexpr(t64
)) );
5785 assign( ew
, unop(Iop_64HIto32
, mkexpr(t64
) ) );
5786 put_emwarn( mkexpr(ew
) );
5787 /* Finally, if an emulation warning was reported,
5788 side-exit to the next insn, reporting the warning,
5789 so that Valgrind's dispatcher sees the warning. */
5792 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
5794 IRConst_U64( guest_RIP_bbstart
+delta
),
5801 case 6: { /* FNSTENV m28 */
5802 /* Uses dirty helper:
5803 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */
5804 IRDirty
* d
= unsafeIRDirty_0_N (
5806 "amd64g_dirtyhelper_FSTENV",
5807 &amd64g_dirtyhelper_FSTENV
,
5808 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
5810 /* declare we're writing memory */
5812 d
->mAddr
= mkexpr(addr
);
5815 /* declare we're reading guest state */
5817 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
5819 d
->fxState
[0].fx
= Ifx_Read
;
5820 d
->fxState
[0].offset
= OFFB_FTOP
;
5821 d
->fxState
[0].size
= sizeof(UInt
);
5823 d
->fxState
[1].fx
= Ifx_Read
;
5824 d
->fxState
[1].offset
= OFFB_FPTAGS
;
5825 d
->fxState
[1].size
= 8 * sizeof(UChar
);
5827 d
->fxState
[2].fx
= Ifx_Read
;
5828 d
->fxState
[2].offset
= OFFB_FPROUND
;
5829 d
->fxState
[2].size
= sizeof(ULong
);
5831 d
->fxState
[3].fx
= Ifx_Read
;
5832 d
->fxState
[3].offset
= OFFB_FC3210
;
5833 d
->fxState
[3].size
= sizeof(ULong
);
5835 stmt( IRStmt_Dirty(d
) );
5837 DIP("fnstenv %s\n", dis_buf
);
5841 case 7: /* FNSTCW */
5842 /* Fake up a native x87 FPU control word. The only
5843 thing it depends on is FPROUND[1:0], so call a clean
5844 helper to cook it up. */
5845 /* ULong amd64g_create_fpucw ( ULong fpround ) */
5846 DIP("fnstcw %s\n", dis_buf
);
5852 "amd64g_create_fpucw", &amd64g_create_fpucw
,
5853 mkIRExprVec_1( unop(Iop_32Uto64
, get_fpround()) )
5860 vex_printf("unhandled opc_aux = 0x%2x\n",
5861 (UInt
)gregLO3ofRM(modrm
));
5862 vex_printf("first_opcode == 0xD9\n");
5870 case 0xC0 ... 0xC7: /* FLD %st(?) */
5871 r_src
= (UInt
)modrm
- 0xC0;
5872 DIP("fld %%st(%u)\n", r_src
);
5873 t1
= newTemp(Ity_F64
);
5874 assign(t1
, get_ST(r_src
));
5876 put_ST(0, mkexpr(t1
));
5879 case 0xC8 ... 0xCF: /* FXCH %st(?) */
5880 r_src
= (UInt
)modrm
- 0xC8;
5881 DIP("fxch %%st(%u)\n", r_src
);
5882 t1
= newTemp(Ity_F64
);
5883 t2
= newTemp(Ity_F64
);
5884 assign(t1
, get_ST(0));
5885 assign(t2
, get_ST(r_src
));
5886 put_ST_UNCHECKED(0, mkexpr(t2
));
5887 put_ST_UNCHECKED(r_src
, mkexpr(t1
));
5890 case 0xE0: /* FCHS */
5892 put_ST_UNCHECKED(0, unop(Iop_NegF64
, get_ST(0)));
5895 case 0xE1: /* FABS */
5897 put_ST_UNCHECKED(0, unop(Iop_AbsF64
, get_ST(0)));
5900 case 0xE5: { /* FXAM */
5901 /* This is an interesting one. It examines %st(0),
5902 regardless of whether the tag says it's empty or not.
5903 Here, just pass both the tag (in our format) and the
5904 value (as a double, actually a ULong) to a helper
5907 = mkIRExprVec_2( unop(Iop_8Uto64
, get_ST_TAG(0)),
5908 unop(Iop_ReinterpF64asI64
,
5909 get_ST_UNCHECKED(0)) );
5910 put_C3210(mkIRExprCCall(
5913 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM
,
5920 case 0xE8: /* FLD1 */
5923 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
5924 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL
)));
5927 case 0xE9: /* FLDL2T */
5930 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
5931 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL
)));
5934 case 0xEA: /* FLDL2E */
5937 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
5938 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL
)));
5941 case 0xEB: /* FLDPI */
5944 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
5945 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL
)));
5948 case 0xEC: /* FLDLG2 */
5951 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
5952 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL
)));
5955 case 0xED: /* FLDLN2 */
5958 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
5959 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL
)));
5962 case 0xEE: /* FLDZ */
5965 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
5966 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL
)));
5969 case 0xF0: /* F2XM1 */
5973 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5977 case 0xF1: /* FYL2X */
5981 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5987 case 0xF2: { /* FPTAN */
5989 IRTemp argD
= newTemp(Ity_F64
);
5990 assign(argD
, get_ST(0));
5991 IRTemp argOK
= math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD
);
5992 IRTemp resD
= newTemp(Ity_F64
);
5997 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6001 put_ST_UNCHECKED(0, mkexpr(resD
));
6002 /* Conditionally push 1.0 on the stack, if the arg is
6004 maybe_fp_push(argOK
);
6005 maybe_put_ST(argOK
, 0,
6006 IRExpr_Const(IRConst_F64(1.0)));
6007 set_C2( binop(Iop_Xor64
,
6008 unop(Iop_1Uto64
, mkexpr(argOK
)),
6013 case 0xF3: /* FPATAN */
6017 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6023 case 0xF4: { /* FXTRACT */
6024 IRTemp argF
= newTemp(Ity_F64
);
6025 IRTemp sigF
= newTemp(Ity_F64
);
6026 IRTemp expF
= newTemp(Ity_F64
);
6027 IRTemp argI
= newTemp(Ity_I64
);
6028 IRTemp sigI
= newTemp(Ity_I64
);
6029 IRTemp expI
= newTemp(Ity_I64
);
6031 assign( argF
, get_ST(0) );
6032 assign( argI
, unop(Iop_ReinterpF64asI64
, mkexpr(argF
)));
6035 Ity_I64
, 0/*regparms*/,
6036 "x86amd64g_calculate_FXTRACT",
6037 &x86amd64g_calculate_FXTRACT
,
6038 mkIRExprVec_2( mkexpr(argI
),
6039 mkIRExpr_HWord(0)/*sig*/ ))
6043 Ity_I64
, 0/*regparms*/,
6044 "x86amd64g_calculate_FXTRACT",
6045 &x86amd64g_calculate_FXTRACT
,
6046 mkIRExprVec_2( mkexpr(argI
),
6047 mkIRExpr_HWord(1)/*exp*/ ))
6049 assign( sigF
, unop(Iop_ReinterpI64asF64
, mkexpr(sigI
)) );
6050 assign( expF
, unop(Iop_ReinterpI64asF64
, mkexpr(expI
)) );
6052 put_ST_UNCHECKED(0, mkexpr(expF
) );
6055 put_ST(0, mkexpr(sigF
) );
6059 case 0xF5: { /* FPREM1 -- IEEE compliant */
6060 IRTemp a1
= newTemp(Ity_F64
);
6061 IRTemp a2
= newTemp(Ity_F64
);
6063 /* Do FPREM1 twice, once to get the remainder, and once
6064 to get the C3210 flag values. */
6065 assign( a1
, get_ST(0) );
6066 assign( a2
, get_ST(1) );
6069 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6074 triop(Iop_PRem1C3210F64
,
6075 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6081 case 0xF7: /* FINCSTP */
6083 put_ftop( binop(Iop_Add32
, get_ftop(), mkU32(1)) );
6086 case 0xF8: { /* FPREM -- not IEEE compliant */
6087 IRTemp a1
= newTemp(Ity_F64
);
6088 IRTemp a2
= newTemp(Ity_F64
);
6090 /* Do FPREM twice, once to get the remainder, and once
6091 to get the C3210 flag values. */
6092 assign( a1
, get_ST(0) );
6093 assign( a2
, get_ST(1) );
6096 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6101 triop(Iop_PRemC3210F64
,
6102 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6108 case 0xF9: /* FYL2XP1 */
6111 triop(Iop_Yl2xp1F64
,
6112 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6118 case 0xFA: /* FSQRT */
6122 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6126 case 0xFB: { /* FSINCOS */
6128 IRTemp argD
= newTemp(Ity_F64
);
6129 assign(argD
, get_ST(0));
6130 IRTemp argOK
= math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD
);
6131 IRTemp resD
= newTemp(Ity_F64
);
6136 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6140 put_ST_UNCHECKED(0, mkexpr(resD
));
6141 /* Conditionally push the cos value on the stack, if
6142 the arg is in range */
6143 maybe_fp_push(argOK
);
6144 maybe_put_ST(argOK
, 0,
6146 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6148 set_C2( binop(Iop_Xor64
,
6149 unop(Iop_1Uto64
, mkexpr(argOK
)),
6154 case 0xFC: /* FRNDINT */
6157 binop(Iop_RoundF64toInt
, get_roundingmode(), get_ST(0)) );
6160 case 0xFD: /* FSCALE */
6164 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6169 case 0xFE: /* FSIN */
6170 case 0xFF: { /* FCOS */
6171 Bool isSIN
= modrm
== 0xFE;
6172 DIP("%s\n", isSIN
? "fsin" : "fcos");
6173 IRTemp argD
= newTemp(Ity_F64
);
6174 assign(argD
, get_ST(0));
6175 IRTemp argOK
= math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD
);
6176 IRTemp resD
= newTemp(Ity_F64
);
6180 binop(isSIN
? Iop_SinF64
: Iop_CosF64
,
6181 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6185 put_ST_UNCHECKED(0, mkexpr(resD
));
6186 set_C2( binop(Iop_Xor64
,
6187 unop(Iop_1Uto64
, mkexpr(argOK
)),
6198 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
6200 if (first_opcode
== 0xDA) {
6204 /* bits 5,4,3 are an opcode extension, and the modRM also
6205 specifies an address. */
6207 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
6209 switch (gregLO3ofRM(modrm
)) {
6211 case 0: /* FIADD m32int */ /* ST(0) += m32int */
6212 DIP("fiaddl %s\n", dis_buf
);
6216 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
6217 DIP("fimull %s\n", dis_buf
);
6221 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
6222 DIP("fisubl %s\n", dis_buf
);
6226 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
6227 DIP("fisubrl %s\n", dis_buf
);
6231 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
6232 DIP("fisubl %s\n", dis_buf
);
6236 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
6237 DIP("fidivrl %s\n", dis_buf
);
6244 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6247 loadLE(Ity_I32
, mkexpr(addr
)))));
6253 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6255 loadLE(Ity_I32
, mkexpr(addr
))),
6260 vex_printf("unhandled opc_aux = 0x%2x\n",
6261 (UInt
)gregLO3ofRM(modrm
));
6262 vex_printf("first_opcode == 0xDA\n");
6271 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
6272 r_src
= (UInt
)modrm
- 0xC0;
6273 DIP("fcmovb %%st(%u), %%st(0)\n", r_src
);
6276 mk_amd64g_calculate_condition(AMD64CondB
),
6277 get_ST(r_src
), get_ST(0)) );
6280 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
6281 r_src
= (UInt
)modrm
- 0xC8;
6282 DIP("fcmovz %%st(%u), %%st(0)\n", r_src
);
6285 mk_amd64g_calculate_condition(AMD64CondZ
),
6286 get_ST(r_src
), get_ST(0)) );
6289 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
6290 r_src
= (UInt
)modrm
- 0xD0;
6291 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src
);
6294 mk_amd64g_calculate_condition(AMD64CondBE
),
6295 get_ST(r_src
), get_ST(0)) );
6298 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
6299 r_src
= (UInt
)modrm
- 0xD8;
6300 DIP("fcmovu %%st(%u), %%st(0)\n", r_src
);
6303 mk_amd64g_calculate_condition(AMD64CondP
),
6304 get_ST(r_src
), get_ST(0)) );
6307 case 0xE9: /* FUCOMPP %st(0),%st(1) */
6308 DIP("fucompp %%st(0),%%st(1)\n");
6309 /* This forces C1 to zero, which isn't right. */
6314 binop(Iop_CmpF64
, get_ST(0), get_ST(1)),
6329 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
6331 if (first_opcode
== 0xDB) {
6334 /* bits 5,4,3 are an opcode extension, and the modRM also
6335 specifies an address. */
6336 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
6339 switch (gregLO3ofRM(modrm
)) {
6341 case 0: /* FILD m32int */
6342 DIP("fildl %s\n", dis_buf
);
6344 put_ST(0, unop(Iop_I32StoF64
,
6345 loadLE(Ity_I32
, mkexpr(addr
))));
6348 case 1: /* FISTTPL m32 (SSE3) */
6349 DIP("fisttpl %s\n", dis_buf
);
6350 storeLE( mkexpr(addr
),
6351 binop(Iop_F64toI32S
, mkU32(Irrm_ZERO
), get_ST(0)) );
6355 case 2: /* FIST m32 */
6356 DIP("fistl %s\n", dis_buf
);
6357 storeLE( mkexpr(addr
),
6358 binop(Iop_F64toI32S
, get_roundingmode(), get_ST(0)) );
6361 case 3: /* FISTP m32 */
6362 DIP("fistpl %s\n", dis_buf
);
6363 storeLE( mkexpr(addr
),
6364 binop(Iop_F64toI32S
, get_roundingmode(), get_ST(0)) );
6368 case 5: { /* FLD extended-real */
6369 /* Uses dirty helper:
6370 ULong amd64g_loadF80le ( ULong )
6371 addr holds the address. First, do a dirty call to
6372 get hold of the data. */
6373 IRTemp val
= newTemp(Ity_I64
);
6374 IRExpr
** args
= mkIRExprVec_1 ( mkexpr(addr
) );
6376 IRDirty
* d
= unsafeIRDirty_1_N (
6379 "amd64g_dirtyhelper_loadF80le",
6380 &amd64g_dirtyhelper_loadF80le
,
6383 /* declare that we're reading memory */
6385 d
->mAddr
= mkexpr(addr
);
6388 /* execute the dirty call, dumping the result in val. */
6389 stmt( IRStmt_Dirty(d
) );
6391 put_ST(0, unop(Iop_ReinterpI64asF64
, mkexpr(val
)));
6393 DIP("fldt %s\n", dis_buf
);
6397 case 7: { /* FSTP extended-real */
6398 /* Uses dirty helper:
6399 void amd64g_storeF80le ( ULong addr, ULong data )
6402 = mkIRExprVec_2( mkexpr(addr
),
6403 unop(Iop_ReinterpF64asI64
, get_ST(0)) );
6405 IRDirty
* d
= unsafeIRDirty_0_N (
6407 "amd64g_dirtyhelper_storeF80le",
6408 &amd64g_dirtyhelper_storeF80le
,
6411 /* declare we're writing memory */
6413 d
->mAddr
= mkexpr(addr
);
6416 /* execute the dirty call. */
6417 stmt( IRStmt_Dirty(d
) );
6420 DIP("fstpt\n %s", dis_buf
);
6425 vex_printf("unhandled opc_aux = 0x%2x\n",
6426 (UInt
)gregLO3ofRM(modrm
));
6427 vex_printf("first_opcode == 0xDB\n");
6436 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
6437 r_src
= (UInt
)modrm
- 0xC0;
6438 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src
);
6441 mk_amd64g_calculate_condition(AMD64CondNB
),
6442 get_ST(r_src
), get_ST(0)) );
6445 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
6446 r_src
= (UInt
)modrm
- 0xC8;
6447 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src
);
6451 mk_amd64g_calculate_condition(AMD64CondNZ
),
6458 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
6459 r_src
= (UInt
)modrm
- 0xD0;
6460 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src
);
6464 mk_amd64g_calculate_condition(AMD64CondNBE
),
6471 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
6472 r_src
= (UInt
)modrm
- 0xD8;
6473 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src
);
6477 mk_amd64g_calculate_condition(AMD64CondNP
),
6489 gen_FINIT_SEQUENCE(NULL
/*no guarding condition*/);
6494 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
6495 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xE8, False
);
6498 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
6499 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xF0, False
);
6508 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
6510 if (first_opcode
== 0xDC) {
6513 /* bits 5,4,3 are an opcode extension, and the modRM also
6514 specifies an address. */
6515 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
6518 switch (gregLO3ofRM(modrm
)) {
6520 case 0: /* FADD double-real */
6521 fp_do_op_mem_ST_0 ( addr
, "add", dis_buf
, Iop_AddF64
, True
);
6524 case 1: /* FMUL double-real */
6525 fp_do_op_mem_ST_0 ( addr
, "mul", dis_buf
, Iop_MulF64
, True
);
6528 case 2: /* FCOM double-real */
6529 DIP("fcoml %s\n", dis_buf
);
6530 /* This forces C1 to zero, which isn't right. */
6537 loadLE(Ity_F64
,mkexpr(addr
))),
6543 case 3: /* FCOMP double-real */
6544 DIP("fcompl %s\n", dis_buf
);
6545 /* This forces C1 to zero, which isn't right. */
6552 loadLE(Ity_F64
,mkexpr(addr
))),
6559 case 4: /* FSUB double-real */
6560 fp_do_op_mem_ST_0 ( addr
, "sub", dis_buf
, Iop_SubF64
, True
);
6563 case 5: /* FSUBR double-real */
6564 fp_do_oprev_mem_ST_0 ( addr
, "subr", dis_buf
, Iop_SubF64
, True
);
6567 case 6: /* FDIV double-real */
6568 fp_do_op_mem_ST_0 ( addr
, "div", dis_buf
, Iop_DivF64
, True
);
6571 case 7: /* FDIVR double-real */
6572 fp_do_oprev_mem_ST_0 ( addr
, "divr", dis_buf
, Iop_DivF64
, True
);
6576 vex_printf("unhandled opc_aux = 0x%2x\n",
6577 (UInt
)gregLO3ofRM(modrm
));
6578 vex_printf("first_opcode == 0xDC\n");
6587 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
6588 fp_do_op_ST_ST ( "add", Iop_AddF64
, 0, modrm
- 0xC0, False
);
6591 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
6592 fp_do_op_ST_ST ( "mul", Iop_MulF64
, 0, modrm
- 0xC8, False
);
6595 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
6596 fp_do_oprev_ST_ST ( "subr", Iop_SubF64
, 0, modrm
- 0xE0, False
);
6599 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
6600 fp_do_op_ST_ST ( "sub", Iop_SubF64
, 0, modrm
- 0xE8, False
);
6603 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
6604 fp_do_oprev_ST_ST ( "divr", Iop_DivF64
, 0, modrm
- 0xF0, False
);
6607 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
6608 fp_do_op_ST_ST ( "div", Iop_DivF64
, 0, modrm
- 0xF8, False
);
6618 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
6620 if (first_opcode
== 0xDD) {
6624 /* bits 5,4,3 are an opcode extension, and the modRM also
6625 specifies an address. */
6626 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
6629 switch (gregLO3ofRM(modrm
)) {
6631 case 0: /* FLD double-real */
6632 DIP("fldl %s\n", dis_buf
);
6634 put_ST(0, loadLE(Ity_F64
, mkexpr(addr
)));
6637 case 1: /* FISTTPQ m64 (SSE3) */
6638 DIP("fistppll %s\n", dis_buf
);
6639 storeLE( mkexpr(addr
),
6640 binop(Iop_F64toI64S
, mkU32(Irrm_ZERO
), get_ST(0)) );
6644 case 2: /* FST double-real */
6645 DIP("fstl %s\n", dis_buf
);
6646 storeLE(mkexpr(addr
), get_ST(0));
6649 case 3: /* FSTP double-real */
6650 DIP("fstpl %s\n", dis_buf
);
6651 storeLE(mkexpr(addr
), get_ST(0));
6655 case 4: { /* FRSTOR m94/m108 */
6656 IRTemp ew
= newTemp(Ity_I32
);
6657 IRTemp w64
= newTemp(Ity_I64
);
6659 if ( have66(pfx
) ) {
6660 /* Uses dirty helper:
6661 VexEmNote amd64g_dirtyhelper_FRSTORS
6662 ( VexGuestAMD64State*, HWord ) */
6663 d
= unsafeIRDirty_0_N (
6665 "amd64g_dirtyhelper_FRSTORS",
6666 &amd64g_dirtyhelper_FRSTORS
,
6667 mkIRExprVec_1( mkexpr(addr
) )
6671 /* Uses dirty helper:
6672 VexEmNote amd64g_dirtyhelper_FRSTOR
6673 ( VexGuestAMD64State*, HWord ) */
6674 d
= unsafeIRDirty_0_N (
6676 "amd64g_dirtyhelper_FRSTOR",
6677 &amd64g_dirtyhelper_FRSTOR
,
6678 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
6684 /* declare we're reading memory */
6686 d
->mAddr
= mkexpr(addr
);
6687 /* d->mSize set above */
6689 /* declare we're writing guest state */
6691 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
6693 d
->fxState
[0].fx
= Ifx_Write
;
6694 d
->fxState
[0].offset
= OFFB_FTOP
;
6695 d
->fxState
[0].size
= sizeof(UInt
);
6697 d
->fxState
[1].fx
= Ifx_Write
;
6698 d
->fxState
[1].offset
= OFFB_FPREGS
;
6699 d
->fxState
[1].size
= 8 * sizeof(ULong
);
6701 d
->fxState
[2].fx
= Ifx_Write
;
6702 d
->fxState
[2].offset
= OFFB_FPTAGS
;
6703 d
->fxState
[2].size
= 8 * sizeof(UChar
);
6705 d
->fxState
[3].fx
= Ifx_Write
;
6706 d
->fxState
[3].offset
= OFFB_FPROUND
;
6707 d
->fxState
[3].size
= sizeof(ULong
);
6709 d
->fxState
[4].fx
= Ifx_Write
;
6710 d
->fxState
[4].offset
= OFFB_FC3210
;
6711 d
->fxState
[4].size
= sizeof(ULong
);
6713 stmt( IRStmt_Dirty(d
) );
6715 /* ew contains any emulation warning we may need to
6716 issue. If needed, side-exit to the next insn,
6717 reporting the warning, so that Valgrind's dispatcher
6718 sees the warning. */
6719 assign(ew
, unop(Iop_64to32
,mkexpr(w64
)) );
6720 put_emwarn( mkexpr(ew
) );
6723 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
6725 IRConst_U64( guest_RIP_bbstart
+delta
),
6730 if ( have66(pfx
) ) {
6731 DIP("frstors %s\n", dis_buf
);
6733 DIP("frstor %s\n", dis_buf
);
6738 case 6: { /* FNSAVE m94/m108 */
6740 if ( have66(pfx
) ) {
6741 /* Uses dirty helper:
6742 void amd64g_dirtyhelper_FNSAVES ( VexGuestAMD64State*,
6744 d
= unsafeIRDirty_0_N (
6746 "amd64g_dirtyhelper_FNSAVES",
6747 &amd64g_dirtyhelper_FNSAVES
,
6748 mkIRExprVec_1( mkexpr(addr
) )
6752 /* Uses dirty helper:
6753 void amd64g_dirtyhelper_FNSAVE ( VexGuestAMD64State*,
6755 d
= unsafeIRDirty_0_N (
6757 "amd64g_dirtyhelper_FNSAVE",
6758 &amd64g_dirtyhelper_FNSAVE
,
6759 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
6764 /* declare we're writing memory */
6766 d
->mAddr
= mkexpr(addr
);
6767 /* d->mSize set above */
6769 /* declare we're reading guest state */
6771 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
6773 d
->fxState
[0].fx
= Ifx_Read
;
6774 d
->fxState
[0].offset
= OFFB_FTOP
;
6775 d
->fxState
[0].size
= sizeof(UInt
);
6777 d
->fxState
[1].fx
= Ifx_Read
;
6778 d
->fxState
[1].offset
= OFFB_FPREGS
;
6779 d
->fxState
[1].size
= 8 * sizeof(ULong
);
6781 d
->fxState
[2].fx
= Ifx_Read
;
6782 d
->fxState
[2].offset
= OFFB_FPTAGS
;
6783 d
->fxState
[2].size
= 8 * sizeof(UChar
);
6785 d
->fxState
[3].fx
= Ifx_Read
;
6786 d
->fxState
[3].offset
= OFFB_FPROUND
;
6787 d
->fxState
[3].size
= sizeof(ULong
);
6789 d
->fxState
[4].fx
= Ifx_Read
;
6790 d
->fxState
[4].offset
= OFFB_FC3210
;
6791 d
->fxState
[4].size
= sizeof(ULong
);
6793 stmt( IRStmt_Dirty(d
) );
6795 if ( have66(pfx
) ) {
6796 DIP("fnsaves %s\n", dis_buf
);
6798 DIP("fnsave %s\n", dis_buf
);
6803 case 7: { /* FNSTSW m16 */
6804 IRExpr
* sw
= get_FPU_sw();
6805 vassert(typeOfIRExpr(irsb
->tyenv
, sw
) == Ity_I16
);
6806 storeLE( mkexpr(addr
), sw
);
6807 DIP("fnstsw %s\n", dis_buf
);
6812 vex_printf("unhandled opc_aux = 0x%2x\n",
6813 (UInt
)gregLO3ofRM(modrm
));
6814 vex_printf("first_opcode == 0xDD\n");
6821 case 0xC0 ... 0xC7: /* FFREE %st(?) */
6822 r_dst
= (UInt
)modrm
- 0xC0;
6823 DIP("ffree %%st(%u)\n", r_dst
);
6824 put_ST_TAG ( r_dst
, mkU8(0) );
6827 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
6828 r_dst
= (UInt
)modrm
- 0xD0;
6829 DIP("fst %%st(0),%%st(%u)\n", r_dst
);
6830 /* P4 manual says: "If the destination operand is a
6831 non-empty register, the invalid-operation exception
6832 is not generated. Hence put_ST_UNCHECKED. */
6833 put_ST_UNCHECKED(r_dst
, get_ST(0));
6836 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
6837 r_dst
= (UInt
)modrm
- 0xD8;
6838 DIP("fstp %%st(0),%%st(%u)\n", r_dst
);
6839 /* P4 manual says: "If the destination operand is a
6840 non-empty register, the invalid-operation exception
6841 is not generated. Hence put_ST_UNCHECKED. */
6842 put_ST_UNCHECKED(r_dst
, get_ST(0));
6846 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
6847 r_dst
= (UInt
)modrm
- 0xE0;
6848 DIP("fucom %%st(0),%%st(%u)\n", r_dst
);
6849 /* This forces C1 to zero, which isn't right. */
6854 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
6860 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
6861 r_dst
= (UInt
)modrm
- 0xE8;
6862 DIP("fucomp %%st(0),%%st(%u)\n", r_dst
);
6863 /* This forces C1 to zero, which isn't right. */
6868 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
6881 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
6883 if (first_opcode
== 0xDE) {
6887 /* bits 5,4,3 are an opcode extension, and the modRM also
6888 specifies an address. */
6890 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
6893 switch (gregLO3ofRM(modrm
)) {
6895 case 0: /* FIADD m16int */ /* ST(0) += m16int */
6896 DIP("fiaddw %s\n", dis_buf
);
6900 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
6901 DIP("fimulw %s\n", dis_buf
);
6905 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
6906 DIP("fisubw %s\n", dis_buf
);
6910 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
6911 DIP("fisubrw %s\n", dis_buf
);
6915 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
6916 DIP("fisubw %s\n", dis_buf
);
6920 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
6921 DIP("fidivrw %s\n", dis_buf
);
6928 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6932 loadLE(Ity_I16
, mkexpr(addr
))))));
6938 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6941 loadLE(Ity_I16
, mkexpr(addr
)))),
6946 vex_printf("unhandled opc_aux = 0x%2x\n",
6947 (UInt
)gregLO3ofRM(modrm
));
6948 vex_printf("first_opcode == 0xDE\n");
6957 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
6958 fp_do_op_ST_ST ( "add", Iop_AddF64
, 0, modrm
- 0xC0, True
);
6961 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
6962 fp_do_op_ST_ST ( "mul", Iop_MulF64
, 0, modrm
- 0xC8, True
);
6965 case 0xD9: /* FCOMPP %st(0),%st(1) */
6966 DIP("fcompp %%st(0),%%st(1)\n");
6967 /* This forces C1 to zero, which isn't right. */
6972 binop(Iop_CmpF64
, get_ST(0), get_ST(1)),
6980 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
6981 fp_do_oprev_ST_ST ( "subr", Iop_SubF64
, 0, modrm
- 0xE0, True
);
6984 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
6985 fp_do_op_ST_ST ( "sub", Iop_SubF64
, 0, modrm
- 0xE8, True
);
6988 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
6989 fp_do_oprev_ST_ST ( "divr", Iop_DivF64
, 0, modrm
- 0xF0, True
);
6992 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
6993 fp_do_op_ST_ST ( "div", Iop_DivF64
, 0, modrm
- 0xF8, True
);
7003 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
7005 if (first_opcode
== 0xDF) {
7009 /* bits 5,4,3 are an opcode extension, and the modRM also
7010 specifies an address. */
7011 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7014 switch (gregLO3ofRM(modrm
)) {
7016 case 0: /* FILD m16int */
7017 DIP("fildw %s\n", dis_buf
);
7019 put_ST(0, unop(Iop_I32StoF64
,
7021 loadLE(Ity_I16
, mkexpr(addr
)))));
7024 case 1: /* FISTTPS m16 (SSE3) */
7025 DIP("fisttps %s\n", dis_buf
);
7026 storeLE( mkexpr(addr
),
7027 x87ishly_qnarrow_32_to_16(
7028 binop(Iop_F64toI32S
, mkU32(Irrm_ZERO
), get_ST(0)) ));
7032 case 2: /* FIST m16 */
7033 DIP("fists %s\n", dis_buf
);
7034 storeLE( mkexpr(addr
),
7035 x87ishly_qnarrow_32_to_16(
7036 binop(Iop_F64toI32S
, get_roundingmode(), get_ST(0)) ));
7039 case 3: /* FISTP m16 */
7040 DIP("fistps %s\n", dis_buf
);
7041 storeLE( mkexpr(addr
),
7042 x87ishly_qnarrow_32_to_16(
7043 binop(Iop_F64toI32S
, get_roundingmode(), get_ST(0)) ));
7047 case 5: /* FILD m64 */
7048 DIP("fildll %s\n", dis_buf
);
7050 put_ST(0, binop(Iop_I64StoF64
,
7052 loadLE(Ity_I64
, mkexpr(addr
))));
7055 case 7: /* FISTP m64 */
7056 DIP("fistpll %s\n", dis_buf
);
7057 storeLE( mkexpr(addr
),
7058 binop(Iop_F64toI64S
, get_roundingmode(), get_ST(0)) );
7063 vex_printf("unhandled opc_aux = 0x%2x\n",
7064 (UInt
)gregLO3ofRM(modrm
));
7065 vex_printf("first_opcode == 0xDF\n");
7074 case 0xC0: /* FFREEP %st(0) */
7075 DIP("ffreep %%st(%d)\n", 0);
7076 put_ST_TAG ( 0, mkU8(0) );
7080 case 0xE0: /* FNSTSW %ax */
7081 DIP("fnstsw %%ax\n");
7082 /* Invent a plausible-looking FPU status word value and
7084 ((ftop & 7) << 11) | (c3210 & 0x4700)
7091 binop(Iop_And32
, get_ftop(), mkU32(7)),
7094 unop(Iop_64to32
, get_C3210()),
7099 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
7100 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xE8, True
);
7103 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
7104 /* not really right since COMIP != UCOMIP */
7105 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xF0, True
);
7127 /*------------------------------------------------------------*/
7129 /*--- MMX INSTRUCTIONS ---*/
7131 /*------------------------------------------------------------*/
7133 /* Effect of MMX insns on x87 FPU state (table 11-2 of
7134 IA32 arch manual, volume 3):
7136 Read from, or write to MMX register (viz, any insn except EMMS):
7137 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
7138 * FP stack pointer set to zero
7141 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
7142 * FP stack pointer set to zero
7145 static void do_MMX_preamble ( void )
7148 IRRegArray
* descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
7149 IRExpr
* zero
= mkU32(0);
7150 IRExpr
* tag1
= mkU8(1);
7152 for (i
= 0; i
< 8; i
++)
7153 stmt( IRStmt_PutI( mkIRPutI(descr
, zero
, i
, tag1
) ) );
7156 static void do_EMMS_preamble ( void )
7159 IRRegArray
* descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
7160 IRExpr
* zero
= mkU32(0);
7161 IRExpr
* tag0
= mkU8(0);
7163 for (i
= 0; i
< 8; i
++)
7164 stmt( IRStmt_PutI( mkIRPutI(descr
, zero
, i
, tag0
) ) );
7168 static IRExpr
* getMMXReg ( UInt archreg
)
7170 vassert(archreg
< 8);
7171 return IRExpr_Get( OFFB_FPREGS
+ 8 * archreg
, Ity_I64
);
7175 static void putMMXReg ( UInt archreg
, IRExpr
* e
)
7177 vassert(archreg
< 8);
7178 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I64
);
7179 stmt( IRStmt_Put( OFFB_FPREGS
+ 8 * archreg
, e
) );
7183 /* Helper for non-shift MMX insns. Note this is incomplete in the
7184 sense that it does not first call do_MMX_preamble() -- that is the
7185 responsibility of its caller. */
7188 ULong
dis_MMXop_regmem_to_reg ( const VexAbiInfo
* vbi
,
7193 Bool show_granularity
)
7196 UChar modrm
= getUChar(delta
);
7197 Bool isReg
= epartIsReg(modrm
);
7198 IRExpr
* argL
= NULL
;
7199 IRExpr
* argR
= NULL
;
7200 IRExpr
* argG
= NULL
;
7201 IRExpr
* argE
= NULL
;
7202 IRTemp res
= newTemp(Ity_I64
);
7205 IROp op
= Iop_INVALID
;
7207 const HChar
* hName
= NULL
;
7210 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
7213 /* Original MMX ones */
7214 case 0xFC: op
= Iop_Add8x8
; break;
7215 case 0xFD: op
= Iop_Add16x4
; break;
7216 case 0xFE: op
= Iop_Add32x2
; break;
7218 case 0xEC: op
= Iop_QAdd8Sx8
; break;
7219 case 0xED: op
= Iop_QAdd16Sx4
; break;
7221 case 0xDC: op
= Iop_QAdd8Ux8
; break;
7222 case 0xDD: op
= Iop_QAdd16Ux4
; break;
7224 case 0xF8: op
= Iop_Sub8x8
; break;
7225 case 0xF9: op
= Iop_Sub16x4
; break;
7226 case 0xFA: op
= Iop_Sub32x2
; break;
7228 case 0xE8: op
= Iop_QSub8Sx8
; break;
7229 case 0xE9: op
= Iop_QSub16Sx4
; break;
7231 case 0xD8: op
= Iop_QSub8Ux8
; break;
7232 case 0xD9: op
= Iop_QSub16Ux4
; break;
7234 case 0xE5: op
= Iop_MulHi16Sx4
; break;
7235 case 0xD5: op
= Iop_Mul16x4
; break;
7236 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd
); break;
7238 case 0x74: op
= Iop_CmpEQ8x8
; break;
7239 case 0x75: op
= Iop_CmpEQ16x4
; break;
7240 case 0x76: op
= Iop_CmpEQ32x2
; break;
7242 case 0x64: op
= Iop_CmpGT8Sx8
; break;
7243 case 0x65: op
= Iop_CmpGT16Sx4
; break;
7244 case 0x66: op
= Iop_CmpGT32Sx2
; break;
7246 case 0x6B: op
= Iop_QNarrowBin32Sto16Sx4
; eLeft
= True
; break;
7247 case 0x63: op
= Iop_QNarrowBin16Sto8Sx8
; eLeft
= True
; break;
7248 case 0x67: op
= Iop_QNarrowBin16Sto8Ux8
; eLeft
= True
; break;
7250 case 0x68: op
= Iop_InterleaveHI8x8
; eLeft
= True
; break;
7251 case 0x69: op
= Iop_InterleaveHI16x4
; eLeft
= True
; break;
7252 case 0x6A: op
= Iop_InterleaveHI32x2
; eLeft
= True
; break;
7254 case 0x60: op
= Iop_InterleaveLO8x8
; eLeft
= True
; break;
7255 case 0x61: op
= Iop_InterleaveLO16x4
; eLeft
= True
; break;
7256 case 0x62: op
= Iop_InterleaveLO32x2
; eLeft
= True
; break;
7258 case 0xDB: op
= Iop_And64
; break;
7259 case 0xDF: op
= Iop_And64
; invG
= True
; break;
7260 case 0xEB: op
= Iop_Or64
; break;
7261 case 0xEF: /* Possibly do better here if argL and argR are the
7263 op
= Iop_Xor64
; break;
7265 /* Introduced in SSE1 */
7266 case 0xE0: op
= Iop_Avg8Ux8
; break;
7267 case 0xE3: op
= Iop_Avg16Ux4
; break;
7268 case 0xEE: op
= Iop_Max16Sx4
; break;
7269 case 0xDE: op
= Iop_Max8Ux8
; break;
7270 case 0xEA: op
= Iop_Min16Sx4
; break;
7271 case 0xDA: op
= Iop_Min8Ux8
; break;
7272 case 0xE4: op
= Iop_MulHi16Ux4
; break;
7273 case 0xF6: XXX(amd64g_calculate_mmx_psadbw
); break;
7275 /* Introduced in SSE2 */
7276 case 0xD4: op
= Iop_Add64
; break;
7277 case 0xFB: op
= Iop_Sub64
; break;
7280 vex_printf("\n0x%x\n", (UInt
)opc
);
7281 vpanic("dis_MMXop_regmem_to_reg");
7286 argG
= getMMXReg(gregLO3ofRM(modrm
));
7288 argG
= unop(Iop_Not64
, argG
);
7292 argE
= getMMXReg(eregLO3ofRM(modrm
));
7295 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7297 argE
= loadLE(Ity_I64
, mkexpr(addr
));
7308 if (op
!= Iop_INVALID
) {
7309 vassert(hName
== NULL
);
7310 vassert(hAddr
== NULL
);
7311 assign(res
, binop(op
, argL
, argR
));
7313 vassert(hName
!= NULL
);
7314 vassert(hAddr
!= NULL
);
7318 0/*regparms*/, hName
, hAddr
,
7319 mkIRExprVec_2( argL
, argR
)
7324 putMMXReg( gregLO3ofRM(modrm
), mkexpr(res
) );
7326 DIP("%s%s %s, %s\n",
7327 name
, show_granularity
? nameMMXGran(opc
& 3) : "",
7328 ( isReg
? nameMMXReg(eregLO3ofRM(modrm
)) : dis_buf
),
7329 nameMMXReg(gregLO3ofRM(modrm
)) );
7335 /* Vector by scalar shift of G by the amount specified at the bottom
7336 of E. This is a straight copy of dis_SSE_shiftG_byE. */
7338 static ULong
dis_MMX_shiftG_byE ( const VexAbiInfo
* vbi
,
7339 Prefix pfx
, Long delta
,
7340 const HChar
* opname
, IROp op
)
7346 UChar rm
= getUChar(delta
);
7347 IRTemp g0
= newTemp(Ity_I64
);
7348 IRTemp g1
= newTemp(Ity_I64
);
7349 IRTemp amt
= newTemp(Ity_I64
);
7350 IRTemp amt8
= newTemp(Ity_I8
);
7352 if (epartIsReg(rm
)) {
7353 assign( amt
, getMMXReg(eregLO3ofRM(rm
)) );
7354 DIP("%s %s,%s\n", opname
,
7355 nameMMXReg(eregLO3ofRM(rm
)),
7356 nameMMXReg(gregLO3ofRM(rm
)) );
7359 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
7360 assign( amt
, loadLE(Ity_I64
, mkexpr(addr
)) );
7361 DIP("%s %s,%s\n", opname
,
7363 nameMMXReg(gregLO3ofRM(rm
)) );
7366 assign( g0
, getMMXReg(gregLO3ofRM(rm
)) );
7367 assign( amt8
, unop(Iop_64to8
, mkexpr(amt
)) );
7369 shl
= shr
= sar
= False
;
7372 case Iop_ShlN16x4
: shl
= True
; size
= 32; break;
7373 case Iop_ShlN32x2
: shl
= True
; size
= 32; break;
7374 case Iop_Shl64
: shl
= True
; size
= 64; break;
7375 case Iop_ShrN16x4
: shr
= True
; size
= 16; break;
7376 case Iop_ShrN32x2
: shr
= True
; size
= 32; break;
7377 case Iop_Shr64
: shr
= True
; size
= 64; break;
7378 case Iop_SarN16x4
: sar
= True
; size
= 16; break;
7379 case Iop_SarN32x2
: sar
= True
; size
= 32; break;
7380 default: vassert(0);
7387 binop(Iop_CmpLT64U
,mkexpr(amt
),mkU64(size
)),
7388 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
7397 binop(Iop_CmpLT64U
,mkexpr(amt
),mkU64(size
)),
7398 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
7399 binop(op
, mkexpr(g0
), mkU8(size
-1))
7406 putMMXReg( gregLO3ofRM(rm
), mkexpr(g1
) );
7411 /* Vector by scalar shift of E by an immediate byte. This is a
7412 straight copy of dis_SSE_shiftE_imm. */
7415 ULong
dis_MMX_shiftE_imm ( Long delta
, const HChar
* opname
, IROp op
)
7418 UChar rm
= getUChar(delta
);
7419 IRTemp e0
= newTemp(Ity_I64
);
7420 IRTemp e1
= newTemp(Ity_I64
);
7422 vassert(epartIsReg(rm
));
7423 vassert(gregLO3ofRM(rm
) == 2
7424 || gregLO3ofRM(rm
) == 4 || gregLO3ofRM(rm
) == 6);
7425 amt
= getUChar(delta
+1);
7427 DIP("%s $%d,%s\n", opname
,
7429 nameMMXReg(eregLO3ofRM(rm
)) );
7431 assign( e0
, getMMXReg(eregLO3ofRM(rm
)) );
7433 shl
= shr
= sar
= False
;
7436 case Iop_ShlN16x4
: shl
= True
; size
= 16; break;
7437 case Iop_ShlN32x2
: shl
= True
; size
= 32; break;
7438 case Iop_Shl64
: shl
= True
; size
= 64; break;
7439 case Iop_SarN16x4
: sar
= True
; size
= 16; break;
7440 case Iop_SarN32x2
: sar
= True
; size
= 32; break;
7441 case Iop_ShrN16x4
: shr
= True
; size
= 16; break;
7442 case Iop_ShrN32x2
: shr
= True
; size
= 32; break;
7443 case Iop_Shr64
: shr
= True
; size
= 64; break;
7444 default: vassert(0);
7448 assign( e1
, amt
>= size
7450 : binop(op
, mkexpr(e0
), mkU8(amt
))
7454 assign( e1
, amt
>= size
7455 ? binop(op
, mkexpr(e0
), mkU8(size
-1))
7456 : binop(op
, mkexpr(e0
), mkU8(amt
))
7462 putMMXReg( eregLO3ofRM(rm
), mkexpr(e1
) );
7467 /* Completely handle all MMX instructions except emms. */
7470 ULong
dis_MMX ( Bool
* decode_ok
,
7471 const VexAbiInfo
* vbi
, Prefix pfx
, Int sz
, Long delta
)
7476 UChar opc
= getUChar(delta
);
7479 /* dis_MMX handles all insns except emms. */
7486 /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/
7487 modrm
= getUChar(delta
);
7488 if (epartIsReg(modrm
)) {
7492 binop( Iop_32HLto64
,
7494 getIReg32(eregOfRexRM(pfx
,modrm
)) ) );
7495 DIP("movd %s, %s\n",
7496 nameIReg32(eregOfRexRM(pfx
,modrm
)),
7497 nameMMXReg(gregLO3ofRM(modrm
)));
7499 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7503 binop( Iop_32HLto64
,
7505 loadLE(Ity_I32
, mkexpr(addr
)) ) );
7506 DIP("movd %s, %s\n", dis_buf
, nameMMXReg(gregLO3ofRM(modrm
)));
7511 /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/
7512 modrm
= getUChar(delta
);
7513 if (epartIsReg(modrm
)) {
7515 putMMXReg( gregLO3ofRM(modrm
),
7516 getIReg64(eregOfRexRM(pfx
,modrm
)) );
7517 DIP("movd %s, %s\n",
7518 nameIReg64(eregOfRexRM(pfx
,modrm
)),
7519 nameMMXReg(gregLO3ofRM(modrm
)));
7521 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7523 putMMXReg( gregLO3ofRM(modrm
),
7524 loadLE(Ity_I64
, mkexpr(addr
)) );
7525 DIP("movd{64} %s, %s\n", dis_buf
, nameMMXReg(gregLO3ofRM(modrm
)));
7529 goto mmx_decode_failure
;
7535 /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */
7536 modrm
= getUChar(delta
);
7537 if (epartIsReg(modrm
)) {
7539 putIReg32( eregOfRexRM(pfx
,modrm
),
7540 unop(Iop_64to32
, getMMXReg(gregLO3ofRM(modrm
)) ) );
7541 DIP("movd %s, %s\n",
7542 nameMMXReg(gregLO3ofRM(modrm
)),
7543 nameIReg32(eregOfRexRM(pfx
,modrm
)));
7545 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7547 storeLE( mkexpr(addr
),
7548 unop(Iop_64to32
, getMMXReg(gregLO3ofRM(modrm
)) ) );
7549 DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm
)), dis_buf
);
7554 /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */
7555 modrm
= getUChar(delta
);
7556 if (epartIsReg(modrm
)) {
7558 putIReg64( eregOfRexRM(pfx
,modrm
),
7559 getMMXReg(gregLO3ofRM(modrm
)) );
7560 DIP("movd %s, %s\n",
7561 nameMMXReg(gregLO3ofRM(modrm
)),
7562 nameIReg64(eregOfRexRM(pfx
,modrm
)));
7564 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7566 storeLE( mkexpr(addr
),
7567 getMMXReg(gregLO3ofRM(modrm
)) );
7568 DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm
)), dis_buf
);
7571 goto mmx_decode_failure
;
7576 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
7578 && /*ignore redundant REX.W*/!(sz
==8 && haveNo66noF2noF3(pfx
)))
7579 goto mmx_decode_failure
;
7580 modrm
= getUChar(delta
);
7581 if (epartIsReg(modrm
)) {
7583 putMMXReg( gregLO3ofRM(modrm
), getMMXReg(eregLO3ofRM(modrm
)) );
7584 DIP("movq %s, %s\n",
7585 nameMMXReg(eregLO3ofRM(modrm
)),
7586 nameMMXReg(gregLO3ofRM(modrm
)));
7588 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7590 putMMXReg( gregLO3ofRM(modrm
), loadLE(Ity_I64
, mkexpr(addr
)) );
7591 DIP("movq %s, %s\n",
7592 dis_buf
, nameMMXReg(gregLO3ofRM(modrm
)));
7597 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
7599 && /*ignore redundant REX.W*/!(sz
==8 && haveNo66noF2noF3(pfx
)))
7600 goto mmx_decode_failure
;
7601 modrm
= getUChar(delta
);
7602 if (epartIsReg(modrm
)) {
7604 putMMXReg( eregLO3ofRM(modrm
), getMMXReg(gregLO3ofRM(modrm
)) );
7605 DIP("movq %s, %s\n",
7606 nameMMXReg(gregLO3ofRM(modrm
)),
7607 nameMMXReg(eregLO3ofRM(modrm
)));
7609 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7611 storeLE( mkexpr(addr
), getMMXReg(gregLO3ofRM(modrm
)) );
7612 DIP("mov(nt)q %s, %s\n",
7613 nameMMXReg(gregLO3ofRM(modrm
)), dis_buf
);
7619 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
7621 goto mmx_decode_failure
;
7622 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "padd", True
);
7626 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
7628 && /*ignore redundant REX.W*/!(sz
==8 && haveNo66noF2noF3(pfx
)))
7629 goto mmx_decode_failure
;
7630 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "padds", True
);
7634 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7636 goto mmx_decode_failure
;
7637 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "paddus", True
);
7642 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
7644 goto mmx_decode_failure
;
7645 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "psub", True
);
7649 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
7651 goto mmx_decode_failure
;
7652 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "psubs", True
);
7656 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7658 goto mmx_decode_failure
;
7659 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "psubus", True
);
7662 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
7664 goto mmx_decode_failure
;
7665 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pmulhw", False
);
7668 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
7670 goto mmx_decode_failure
;
7671 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pmullw", False
);
7674 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
7676 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pmaddwd", False
);
7681 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
7683 goto mmx_decode_failure
;
7684 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pcmpeq", True
);
7689 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
7691 goto mmx_decode_failure
;
7692 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pcmpgt", True
);
7695 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
7697 goto mmx_decode_failure
;
7698 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "packssdw", False
);
7701 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
7703 goto mmx_decode_failure
;
7704 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "packsswb", False
);
7707 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
7709 goto mmx_decode_failure
;
7710 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "packuswb", False
);
7715 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
7717 && /*ignore redundant REX.W*/!(sz
==8 && haveNo66noF2noF3(pfx
)))
7718 goto mmx_decode_failure
;
7719 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "punpckh", True
);
7724 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
7726 && /*ignore redundant REX.W*/!(sz
==8 && haveNo66noF2noF3(pfx
)))
7727 goto mmx_decode_failure
;
7728 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "punpckl", True
);
7731 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
7733 goto mmx_decode_failure
;
7734 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pand", False
);
7737 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
7739 goto mmx_decode_failure
;
7740 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pandn", False
);
7743 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
7745 goto mmx_decode_failure
;
7746 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "por", False
);
7749 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
7751 goto mmx_decode_failure
;
7752 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pxor", False
);
7755 # define SHIFT_BY_REG(_name,_op) \
7756 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \
7759 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
7760 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4
);
7761 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2
);
7762 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64
);
7764 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
7765 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4
);
7766 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2
);
7767 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64
);
7769 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
7770 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4
);
7771 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2
);
7773 # undef SHIFT_BY_REG
7778 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
7779 UChar byte2
, subopc
;
7781 goto mmx_decode_failure
;
7782 byte2
= getUChar(delta
); /* amode / sub-opcode */
7783 subopc
= toUChar( (byte2
>> 3) & 7 );
7785 # define SHIFT_BY_IMM(_name,_op) \
7786 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
7789 if (subopc
== 2 /*SRL*/ && opc
== 0x71)
7790 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4
);
7791 else if (subopc
== 2 /*SRL*/ && opc
== 0x72)
7792 SHIFT_BY_IMM("psrld", Iop_ShrN32x2
);
7793 else if (subopc
== 2 /*SRL*/ && opc
== 0x73)
7794 SHIFT_BY_IMM("psrlq", Iop_Shr64
);
7796 else if (subopc
== 4 /*SAR*/ && opc
== 0x71)
7797 SHIFT_BY_IMM("psraw", Iop_SarN16x4
);
7798 else if (subopc
== 4 /*SAR*/ && opc
== 0x72)
7799 SHIFT_BY_IMM("psrad", Iop_SarN32x2
);
7801 else if (subopc
== 6 /*SHL*/ && opc
== 0x71)
7802 SHIFT_BY_IMM("psllw", Iop_ShlN16x4
);
7803 else if (subopc
== 6 /*SHL*/ && opc
== 0x72)
7804 SHIFT_BY_IMM("pslld", Iop_ShlN32x2
);
7805 else if (subopc
== 6 /*SHL*/ && opc
== 0x73)
7806 SHIFT_BY_IMM("psllq", Iop_Shl64
);
7808 else goto mmx_decode_failure
;
7810 # undef SHIFT_BY_IMM
7815 IRTemp addr
= newTemp(Ity_I64
);
7816 IRTemp regD
= newTemp(Ity_I64
);
7817 IRTemp regM
= newTemp(Ity_I64
);
7818 IRTemp mask
= newTemp(Ity_I64
);
7819 IRTemp olddata
= newTemp(Ity_I64
);
7820 IRTemp newdata
= newTemp(Ity_I64
);
7822 modrm
= getUChar(delta
);
7823 if (sz
!= 4 || (!epartIsReg(modrm
)))
7824 goto mmx_decode_failure
;
7827 assign( addr
, handleAddrOverrides( vbi
, pfx
, getIReg64(R_RDI
) ));
7828 assign( regM
, getMMXReg( eregLO3ofRM(modrm
) ));
7829 assign( regD
, getMMXReg( gregLO3ofRM(modrm
) ));
7830 assign( mask
, binop(Iop_SarN8x8
, mkexpr(regM
), mkU8(7)) );
7831 assign( olddata
, loadLE( Ity_I64
, mkexpr(addr
) ));
7839 unop(Iop_Not64
, mkexpr(mask
)))) );
7840 storeLE( mkexpr(addr
), mkexpr(newdata
) );
7841 DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm
) ),
7842 nameMMXReg( gregLO3ofRM(modrm
) ) );
7846 /* --- MMX decode failure --- */
7850 return delta
; /* ignored */
7859 /*------------------------------------------------------------*/
7860 /*--- More misc arithmetic and other obscure insns. ---*/
7861 /*------------------------------------------------------------*/
7863 /* Generate base << amt with vacated places filled with stuff
7864 from xtra. amt guaranteed in 0 .. 63. */
7866 IRExpr
* shiftL64_with_extras ( IRTemp base
, IRTemp xtra
, IRTemp amt
)
7870 else (base << amt) | (xtra >>u (64-amt))
7874 binop(Iop_CmpNE8
, mkexpr(amt
), mkU8(0)),
7876 binop(Iop_Shl64
, mkexpr(base
), mkexpr(amt
)),
7877 binop(Iop_Shr64
, mkexpr(xtra
),
7878 binop(Iop_Sub8
, mkU8(64), mkexpr(amt
)))
7884 /* Generate base >>u amt with vacated places filled with stuff
7885 from xtra. amt guaranteed in 0 .. 63. */
7887 IRExpr
* shiftR64_with_extras ( IRTemp xtra
, IRTemp base
, IRTemp amt
)
7891 else (base >>u amt) | (xtra << (64-amt))
7895 binop(Iop_CmpNE8
, mkexpr(amt
), mkU8(0)),
7897 binop(Iop_Shr64
, mkexpr(base
), mkexpr(amt
)),
7898 binop(Iop_Shl64
, mkexpr(xtra
),
7899 binop(Iop_Sub8
, mkU8(64), mkexpr(amt
)))
7905 /* Double length left and right shifts. Apparently only required in
7906 v-size (no b- variant). */
7908 ULong
dis_SHLRD_Gv_Ev ( const VexAbiInfo
* vbi
,
7910 Long delta
, UChar modrm
,
7913 Bool amt_is_literal
,
7914 const HChar
* shift_amt_txt
,
7917 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
7918 for printing it. And eip on entry points at the modrm byte. */
7922 IRType ty
= szToITy(sz
);
7923 IRTemp gsrc
= newTemp(ty
);
7924 IRTemp esrc
= newTemp(ty
);
7925 IRTemp addr
= IRTemp_INVALID
;
7926 IRTemp tmpSH
= newTemp(Ity_I8
);
7927 IRTemp tmpSS
= newTemp(Ity_I8
);
7928 IRTemp tmp64
= IRTemp_INVALID
;
7929 IRTemp res64
= IRTemp_INVALID
;
7930 IRTemp rss64
= IRTemp_INVALID
;
7931 IRTemp resTy
= IRTemp_INVALID
;
7932 IRTemp rssTy
= IRTemp_INVALID
;
7933 Int mask
= sz
==8 ? 63 : 31;
7935 vassert(sz
== 2 || sz
== 4 || sz
== 8);
7937 /* The E-part is the destination; this is shifted. The G-part
7938 supplies bits to be shifted into the E-part, but is not
7941 If shifting left, form a double-length word with E at the top
7942 and G at the bottom, and shift this left. The result is then in
7945 If shifting right, form a double-length word with G at the top
7946 and E at the bottom, and shift this right. The result is then
7949 /* Fetch the operands. */
7951 assign( gsrc
, getIRegG(sz
, pfx
, modrm
) );
7953 if (epartIsReg(modrm
)) {
7955 assign( esrc
, getIRegE(sz
, pfx
, modrm
) );
7956 DIP("sh%cd%c %s, %s, %s\n",
7957 ( left_shift
? 'l' : 'r' ), nameISize(sz
),
7959 nameIRegG(sz
, pfx
, modrm
), nameIRegE(sz
, pfx
, modrm
));
7961 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
,
7962 /* # bytes following amode */
7963 amt_is_literal
? 1 : 0 );
7965 assign( esrc
, loadLE(ty
, mkexpr(addr
)) );
7966 DIP("sh%cd%c %s, %s, %s\n",
7967 ( left_shift
? 'l' : 'r' ), nameISize(sz
),
7969 nameIRegG(sz
, pfx
, modrm
), dis_buf
);
7972 /* Calculate the masked shift amount (tmpSH), the masked subshift
7973 amount (tmpSS), the shifted value (res64) and the subshifted
7976 assign( tmpSH
, binop(Iop_And8
, shift_amt
, mkU8(mask
)) );
7977 assign( tmpSS
, binop(Iop_And8
,
7978 binop(Iop_Sub8
, mkexpr(tmpSH
), mkU8(1) ),
7981 tmp64
= newTemp(Ity_I64
);
7982 res64
= newTemp(Ity_I64
);
7983 rss64
= newTemp(Ity_I64
);
7985 if (sz
== 2 || sz
== 4) {
7987 /* G is xtra; E is data */
7988 /* what a freaking nightmare: */
7989 if (sz
== 4 && left_shift
) {
7990 assign( tmp64
, binop(Iop_32HLto64
, mkexpr(esrc
), mkexpr(gsrc
)) );
7993 binop(Iop_Shl64
, mkexpr(tmp64
), mkexpr(tmpSH
)),
7997 binop(Iop_Shl64
, mkexpr(tmp64
), mkexpr(tmpSS
)),
8001 if (sz
== 4 && !left_shift
) {
8002 assign( tmp64
, binop(Iop_32HLto64
, mkexpr(gsrc
), mkexpr(esrc
)) );
8003 assign( res64
, binop(Iop_Shr64
, mkexpr(tmp64
), mkexpr(tmpSH
)) );
8004 assign( rss64
, binop(Iop_Shr64
, mkexpr(tmp64
), mkexpr(tmpSS
)) );
8007 if (sz
== 2 && left_shift
) {
8010 binop(Iop_16HLto32
, mkexpr(esrc
), mkexpr(gsrc
)),
8011 binop(Iop_16HLto32
, mkexpr(gsrc
), mkexpr(gsrc
))
8013 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */
8016 binop(Iop_Shl64
, mkexpr(tmp64
), mkexpr(tmpSH
)),
8018 /* subshift formed by shifting [esrc'0000'0000'0000] */
8022 binop(Iop_Shl64
, unop(Iop_16Uto64
, mkexpr(esrc
)),
8028 if (sz
== 2 && !left_shift
) {
8031 binop(Iop_16HLto32
, mkexpr(gsrc
), mkexpr(gsrc
)),
8032 binop(Iop_16HLto32
, mkexpr(gsrc
), mkexpr(esrc
))
8034 /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */
8035 assign( res64
, binop(Iop_Shr64
, mkexpr(tmp64
), mkexpr(tmpSH
)) );
8036 /* subshift formed by shifting [0000'0000'0000'esrc] */
8037 assign( rss64
, binop(Iop_Shr64
,
8038 unop(Iop_16Uto64
, mkexpr(esrc
)),
8046 assign( res64
, shiftL64_with_extras( esrc
, gsrc
, tmpSH
));
8047 assign( rss64
, shiftL64_with_extras( esrc
, gsrc
, tmpSS
));
8049 assign( res64
, shiftR64_with_extras( gsrc
, esrc
, tmpSH
));
8050 assign( rss64
, shiftR64_with_extras( gsrc
, esrc
, tmpSS
));
8055 resTy
= newTemp(ty
);
8056 rssTy
= newTemp(ty
);
8057 assign( resTy
, narrowTo(ty
, mkexpr(res64
)) );
8058 assign( rssTy
, narrowTo(ty
, mkexpr(rss64
)) );
8060 /* Put result back and write the flags thunk. */
8061 setFlags_DEP1_DEP2_shift ( left_shift
? Iop_Shl64
: Iop_Sar64
,
8062 resTy
, rssTy
, ty
, tmpSH
);
8064 if (epartIsReg(modrm
)) {
8065 putIRegE(sz
, pfx
, modrm
, mkexpr(resTy
));
8067 storeLE( mkexpr(addr
), mkexpr(resTy
) );
8070 if (amt_is_literal
) delta
++;
8075 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
8078 typedef enum { BtOpNone
, BtOpSet
, BtOpReset
, BtOpComp
} BtOp
;
8080 static const HChar
* nameBtOp ( BtOp op
)
8083 case BtOpNone
: return "";
8084 case BtOpSet
: return "s";
8085 case BtOpReset
: return "r";
8086 case BtOpComp
: return "c";
8087 default: vpanic("nameBtOp(amd64)");
8093 ULong
dis_bt_G_E ( const VexAbiInfo
* vbi
,
8094 Prefix pfx
, Int sz
, Long delta
, BtOp op
,
8095 /*OUT*/Bool
* decode_OK
)
8100 IRTemp t_fetched
, t_bitno0
, t_bitno1
, t_bitno2
, t_addr0
,
8101 t_addr1
, t_rsp
, t_mask
, t_new
;
8103 vassert(sz
== 2 || sz
== 4 || sz
== 8);
8105 t_fetched
= t_bitno0
= t_bitno1
= t_bitno2
8106 = t_addr0
= t_addr1
= t_rsp
8107 = t_mask
= t_new
= IRTemp_INVALID
;
8109 t_fetched
= newTemp(Ity_I8
);
8110 t_new
= newTemp(Ity_I8
);
8111 t_bitno0
= newTemp(Ity_I64
);
8112 t_bitno1
= newTemp(Ity_I64
);
8113 t_bitno2
= newTemp(Ity_I8
);
8114 t_addr1
= newTemp(Ity_I64
);
8115 modrm
= getUChar(delta
);
8118 if (epartIsReg(modrm
)) {
8119 /* F2 and F3 are never acceptable. */
8120 if (haveF2orF3(pfx
)) {
8125 /* F2 or F3 (but not both) are allowed, provided LOCK is also
8126 present, and only for the BTC/BTS/BTR cases (not BT). */
8127 if (haveF2orF3(pfx
)) {
8128 if (haveF2andF3(pfx
) || !haveLOCK(pfx
) || op
== BtOpNone
) {
8135 assign( t_bitno0
, widenSto64(getIRegG(sz
, pfx
, modrm
)) );
8137 if (epartIsReg(modrm
)) {
8139 /* Get it onto the client's stack. Oh, this is a horrible
8140 kludge. See https://bugs.kde.org/show_bug.cgi?id=245925.
8141 Because of the ELF ABI stack redzone, there may be live data
8142 up to 128 bytes below %RSP. So we can't just push it on the
8143 stack, else we may wind up trashing live data, and causing
8144 impossible-to-find simulation errors. (Yes, this did
8145 happen.) So we need to drop RSP before at least 128 before
8146 pushing it. That unfortunately means hitting Memcheck's
8147 fast-case painting code. Ideally we should drop more than
8148 128, to reduce the chances of breaking buggy programs that
8149 have live data below -128(%RSP). Memcheck fast-cases moves
8150 of 288 bytes due to the need to handle ppc64-linux quickly,
8151 so let's use 288. Of course the real fix is to get rid of
8152 this kludge entirely. */
8153 t_rsp
= newTemp(Ity_I64
);
8154 t_addr0
= newTemp(Ity_I64
);
8156 vassert(vbi
->guest_stack_redzone_size
== 128);
8157 assign( t_rsp
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(288)) );
8158 putIReg64(R_RSP
, mkexpr(t_rsp
));
8160 storeLE( mkexpr(t_rsp
), getIRegE(sz
, pfx
, modrm
) );
8162 /* Make t_addr0 point at it. */
8163 assign( t_addr0
, mkexpr(t_rsp
) );
8165 /* Mask out upper bits of the shift amount, since we're doing a
8167 assign( t_bitno1
, binop(Iop_And64
,
8169 mkU64(sz
== 8 ? 63 : sz
== 4 ? 31 : 15)) );
8172 t_addr0
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
8174 assign( t_bitno1
, mkexpr(t_bitno0
) );
8177 /* At this point: t_addr0 is the address being operated on. If it
8178 was a reg, we will have pushed it onto the client's stack.
8179 t_bitno1 is the bit number, suitably masked in the case of a
8182 /* Now the main sequence. */
8186 binop(Iop_Sar64
, mkexpr(t_bitno1
), mkU8(3))) );
8188 /* t_addr1 now holds effective address */
8192 binop(Iop_And64
, mkexpr(t_bitno1
), mkU64(7))) );
8194 /* t_bitno2 contains offset of bit within byte */
8196 if (op
!= BtOpNone
) {
8197 t_mask
= newTemp(Ity_I8
);
8198 assign( t_mask
, binop(Iop_Shl8
, mkU8(1), mkexpr(t_bitno2
)) );
8201 /* t_mask is now a suitable byte mask */
8203 assign( t_fetched
, loadLE(Ity_I8
, mkexpr(t_addr1
)) );
8205 if (op
!= BtOpNone
) {
8209 binop(Iop_Or8
, mkexpr(t_fetched
), mkexpr(t_mask
)) );
8213 binop(Iop_Xor8
, mkexpr(t_fetched
), mkexpr(t_mask
)) );
8217 binop(Iop_And8
, mkexpr(t_fetched
),
8218 unop(Iop_Not8
, mkexpr(t_mask
))) );
8221 vpanic("dis_bt_G_E(amd64)");
8223 if ((haveLOCK(pfx
)) && !epartIsReg(modrm
)) {
8224 casLE( mkexpr(t_addr1
), mkexpr(t_fetched
)/*expd*/,
8225 mkexpr(t_new
)/*new*/,
8226 guest_RIP_curr_instr
);
8228 storeLE( mkexpr(t_addr1
), mkexpr(t_new
) );
8232 /* Side effect done; now get selected bit into Carry flag. The Intel docs
8233 (as of 2015, at least) say that C holds the result, Z is unchanged, and
8234 O,S,A and P are undefined. However, on Skylake it appears that O,S,A,P
8235 are also unchanged, so let's do that. */
8236 const ULong maskC
= AMD64G_CC_MASK_C
;
8237 const ULong maskOSZAP
= AMD64G_CC_MASK_O
| AMD64G_CC_MASK_S
8238 | AMD64G_CC_MASK_Z
| AMD64G_CC_MASK_A
8241 IRTemp old_rflags
= newTemp(Ity_I64
);
8242 assign(old_rflags
, mk_amd64g_calculate_rflags_all());
8244 IRTemp new_rflags
= newTemp(Ity_I64
);
8247 binop(Iop_And64
, mkexpr(old_rflags
), mkU64(maskOSZAP
)),
8250 unop(Iop_8Uto64
, mkexpr(t_fetched
)),
8254 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
8255 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
8256 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(new_rflags
) ));
8257 /* Set NDEP even though it isn't used. This makes redundant-PUT
8258 elimination of previous stores to this field work better. */
8259 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
8261 /* Move reg operand from stack back to reg */
8262 if (epartIsReg(modrm
)) {
8263 /* t_rsp still points at it. */
8264 /* only write the reg if actually modifying it; doing otherwise
8265 zeroes the top half erroneously when doing btl due to
8266 standard zero-extend rule */
8268 putIRegE(sz
, pfx
, modrm
, loadLE(szToITy(sz
), mkexpr(t_rsp
)) );
8269 putIReg64(R_RSP
, binop(Iop_Add64
, mkexpr(t_rsp
), mkU64(288)) );
8272 DIP("bt%s%c %s, %s\n",
8273 nameBtOp(op
), nameISize(sz
), nameIRegG(sz
, pfx
, modrm
),
8274 ( epartIsReg(modrm
) ? nameIRegE(sz
, pfx
, modrm
) : dis_buf
) );
8281 /* Handle BSF/BSR. Only v-size seems necessary. */
8283 ULong
dis_bs_E_G ( const VexAbiInfo
* vbi
,
8284 Prefix pfx
, Int sz
, Long delta
, Bool fwds
)
8290 IRType ty
= szToITy(sz
);
8291 IRTemp src
= newTemp(ty
);
8292 IRTemp dst
= newTemp(ty
);
8293 IRTemp src64
= newTemp(Ity_I64
);
8294 IRTemp dst64
= newTemp(Ity_I64
);
8295 IRTemp srcB
= newTemp(Ity_I1
);
8297 vassert(sz
== 8 || sz
== 4 || sz
== 2);
8299 modrm
= getUChar(delta
);
8300 isReg
= epartIsReg(modrm
);
8303 assign( src
, getIRegE(sz
, pfx
, modrm
) );
8306 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
8308 assign( src
, loadLE(ty
, mkexpr(addr
)) );
8311 DIP("bs%c%c %s, %s\n",
8312 fwds
? 'f' : 'r', nameISize(sz
),
8313 ( isReg
? nameIRegE(sz
, pfx
, modrm
) : dis_buf
),
8314 nameIRegG(sz
, pfx
, modrm
));
8316 /* First, widen src to 64 bits if it is not already. */
8317 assign( src64
, widenUto64(mkexpr(src
)) );
8319 /* Generate a bool expression which is zero iff the original is
8320 zero, and nonzero otherwise. Ask for a CmpNE version which, if
8321 instrumented by Memcheck, is instrumented expensively, since
8322 this may be used on the output of a preceding movmskb insn,
8323 which has been known to be partially defined, and in need of
8324 careful handling. */
8325 assign( srcB
, binop(Iop_ExpCmpNE64
, mkexpr(src64
), mkU64(0)) );
8327 /* Flags: Z is 1 iff source value is zero. All others
8328 are undefined -- we force them to zero. */
8329 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
8330 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
8333 IRExpr_ITE( mkexpr(srcB
),
8337 mkU64(AMD64G_CC_MASK_Z
)
8340 /* Set NDEP even though it isn't used. This makes redundant-PUT
8341 elimination of previous stores to this field work better. */
8342 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
8344 /* Result: iff source value is zero, we can't use
8345 Iop_Clz64/Iop_Ctz64 as they have no defined result in that case.
8346 But anyway, amd64 semantics say the result is undefined in
8347 such situations. Hence handle the zero case specially. */
8349 /* Bleh. What we compute:
8351 bsf64: if src == 0 then {dst is unchanged}
8354 bsr64: if src == 0 then {dst is unchanged}
8355 else 63 - Clz64(src)
8357 bsf32: if src == 0 then {dst is unchanged}
8358 else Ctz64(32Uto64(src))
8360 bsr32: if src == 0 then {dst is unchanged}
8361 else 63 - Clz64(32Uto64(src))
8363 bsf16: if src == 0 then {dst is unchanged}
8364 else Ctz64(32Uto64(16Uto32(src)))
8366 bsr16: if src == 0 then {dst is unchanged}
8367 else 63 - Clz64(32Uto64(16Uto32(src)))
8370 /* The main computation, guarding against zero. */
8375 fwds
? unop(Iop_Ctz64
, mkexpr(src64
))
8378 unop(Iop_Clz64
, mkexpr(src64
))),
8379 /* src == 0 -- leave dst unchanged */
8380 widenUto64( getIRegG( sz
, pfx
, modrm
) )
8385 assign( dst
, unop(Iop_64to16
, mkexpr(dst64
)) );
8388 assign( dst
, unop(Iop_64to32
, mkexpr(dst64
)) );
8390 assign( dst
, mkexpr(dst64
) );
8392 /* dump result back */
8393 putIRegG( sz
, pfx
, modrm
, mkexpr(dst
) );
8399 /* swap rAX with the reg specified by reg and REX.B */
8401 void codegen_xchg_rAX_Reg ( Prefix pfx
, Int sz
, UInt regLo3
)
8403 IRType ty
= szToITy(sz
);
8404 IRTemp t1
= newTemp(ty
);
8405 IRTemp t2
= newTemp(ty
);
8406 vassert(sz
== 2 || sz
== 4 || sz
== 8);
8407 vassert(regLo3
< 8);
8409 assign( t1
, getIReg64(R_RAX
) );
8410 assign( t2
, getIRegRexB(8, pfx
, regLo3
) );
8411 putIReg64( R_RAX
, mkexpr(t2
) );
8412 putIRegRexB(8, pfx
, regLo3
, mkexpr(t1
) );
8413 } else if (sz
== 4) {
8414 assign( t1
, getIReg32(R_RAX
) );
8415 assign( t2
, getIRegRexB(4, pfx
, regLo3
) );
8416 putIReg32( R_RAX
, mkexpr(t2
) );
8417 putIRegRexB(4, pfx
, regLo3
, mkexpr(t1
) );
8419 assign( t1
, getIReg16(R_RAX
) );
8420 assign( t2
, getIRegRexB(2, pfx
, regLo3
) );
8421 putIReg16( R_RAX
, mkexpr(t2
) );
8422 putIRegRexB(2, pfx
, regLo3
, mkexpr(t1
) );
8424 DIP("xchg%c %s, %s\n",
8425 nameISize(sz
), nameIRegRAX(sz
),
8426 nameIRegRexB(sz
,pfx
, regLo3
));
8431 void codegen_SAHF ( void )
8433 /* Set the flags to:
8434 (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O)
8435 -- retain the old O flag
8436 | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8437 |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C)
8439 ULong mask_SZACP
= AMD64G_CC_MASK_S
|AMD64G_CC_MASK_Z
|AMD64G_CC_MASK_A
8440 |AMD64G_CC_MASK_C
|AMD64G_CC_MASK_P
;
8441 IRTemp oldflags
= newTemp(Ity_I64
);
8442 assign( oldflags
, mk_amd64g_calculate_rflags_all() );
8443 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
8444 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
8445 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
8446 stmt( IRStmt_Put( OFFB_CC_DEP1
,
8448 binop(Iop_And64
, mkexpr(oldflags
), mkU64(AMD64G_CC_MASK_O
)),
8450 binop(Iop_Shr64
, getIReg64(R_RAX
), mkU8(8)),
8458 void codegen_LAHF ( void )
8460 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
8461 IRExpr
* rax_with_hole
;
8464 ULong mask_SZACP
= AMD64G_CC_MASK_S
|AMD64G_CC_MASK_Z
|AMD64G_CC_MASK_A
8465 |AMD64G_CC_MASK_C
|AMD64G_CC_MASK_P
;
8467 IRTemp flags
= newTemp(Ity_I64
);
8468 assign( flags
, mk_amd64g_calculate_rflags_all() );
8471 = binop(Iop_And64
, getIReg64(R_RAX
), mkU64(~0xFF00ULL
));
8473 = binop(Iop_Or64
, binop(Iop_And64
, mkexpr(flags
), mkU64(mask_SZACP
)),
8476 = binop(Iop_Or64
, rax_with_hole
,
8477 binop(Iop_Shl64
, new_byte
, mkU8(8)));
8478 putIReg64(R_RAX
, new_rax
);
8483 ULong
dis_cmpxchg_G_E ( /*OUT*/Bool
* ok
,
8484 const VexAbiInfo
* vbi
,
8492 IRType ty
= szToITy(size
);
8493 IRTemp acc
= newTemp(ty
);
8494 IRTemp src
= newTemp(ty
);
8495 IRTemp dest
= newTemp(ty
);
8496 IRTemp dest2
= newTemp(ty
);
8497 IRTemp acc2
= newTemp(ty
);
8498 IRTemp cond
= newTemp(Ity_I1
);
8499 IRTemp addr
= IRTemp_INVALID
;
8500 UChar rm
= getUChar(delta0
);
8502 /* There are 3 cases to consider:
8504 reg-reg: ignore any lock prefix, generate sequence based
8507 reg-mem, not locked: ignore any lock prefix, generate sequence
8510 reg-mem, locked: use IRCAS
8513 /* Decide whether F2 or F3 are acceptable. Never for register
8514 case, but for the memory case, one or the other is OK provided
8515 LOCK is also present. */
8516 if (epartIsReg(rm
)) {
8517 if (haveF2orF3(pfx
)) {
8522 if (haveF2orF3(pfx
)) {
8523 if (haveF2andF3(pfx
) || !haveLOCK(pfx
)) {
8530 if (epartIsReg(rm
)) {
8532 assign( dest
, getIRegE(size
, pfx
, rm
) );
8534 assign( src
, getIRegG(size
, pfx
, rm
) );
8535 assign( acc
, getIRegRAX(size
) );
8536 setFlags_DEP1_DEP2(Iop_Sub8
, acc
, dest
, ty
);
8537 assign( cond
, mk_amd64g_calculate_condition(AMD64CondZ
) );
8538 assign( dest2
, IRExpr_ITE(mkexpr(cond
), mkexpr(src
), mkexpr(dest
)) );
8539 assign( acc2
, IRExpr_ITE(mkexpr(cond
), mkexpr(acc
), mkexpr(dest
)) );
8540 putIRegRAX(size
, mkexpr(acc2
));
8541 putIRegE(size
, pfx
, rm
, mkexpr(dest2
));
8542 DIP("cmpxchg%c %s,%s\n", nameISize(size
),
8543 nameIRegG(size
,pfx
,rm
),
8544 nameIRegE(size
,pfx
,rm
) );
8546 else if (!epartIsReg(rm
) && !haveLOCK(pfx
)) {
8548 addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
8549 assign( dest
, loadLE(ty
, mkexpr(addr
)) );
8551 assign( src
, getIRegG(size
, pfx
, rm
) );
8552 assign( acc
, getIRegRAX(size
) );
8553 setFlags_DEP1_DEP2(Iop_Sub8
, acc
, dest
, ty
);
8554 assign( cond
, mk_amd64g_calculate_condition(AMD64CondZ
) );
8555 assign( dest2
, IRExpr_ITE(mkexpr(cond
), mkexpr(src
), mkexpr(dest
)) );
8556 assign( acc2
, IRExpr_ITE(mkexpr(cond
), mkexpr(acc
), mkexpr(dest
)) );
8557 putIRegRAX(size
, mkexpr(acc2
));
8558 storeLE( mkexpr(addr
), mkexpr(dest2
) );
8559 DIP("cmpxchg%c %s,%s\n", nameISize(size
),
8560 nameIRegG(size
,pfx
,rm
), dis_buf
);
8562 else if (!epartIsReg(rm
) && haveLOCK(pfx
)) {
8564 /* src is new value. acc is expected value. dest is old value.
8565 Compute success from the output of the IRCAS, and steer the
8566 new value for RAX accordingly: in case of success, RAX is
8568 addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
8570 assign( src
, getIRegG(size
, pfx
, rm
) );
8571 assign( acc
, getIRegRAX(size
) );
8573 mkIRCAS( IRTemp_INVALID
, dest
, Iend_LE
, mkexpr(addr
),
8574 NULL
, mkexpr(acc
), NULL
, mkexpr(src
) )
8576 setFlags_DEP1_DEP2(Iop_Sub8
, acc
, dest
, ty
);
8577 assign( cond
, mk_amd64g_calculate_condition(AMD64CondZ
) );
8578 assign( acc2
, IRExpr_ITE(mkexpr(cond
), mkexpr(acc
), mkexpr(dest
)) );
8579 putIRegRAX(size
, mkexpr(acc2
));
8580 DIP("cmpxchg%c %s,%s\n", nameISize(size
),
8581 nameIRegG(size
,pfx
,rm
), dis_buf
);
8590 /* Handle conditional move instructions of the form
8591 cmovcc E(reg-or-mem), G(reg)
8593 E(src) is reg-or-mem
8596 If E is reg, --> GET %E, tmps
8601 If E is mem --> (getAddr E) -> tmpa
8608 ULong
dis_cmov_E_G ( const VexAbiInfo
* vbi
,
8614 UChar rm
= getUChar(delta0
);
8618 IRType ty
= szToITy(sz
);
8619 IRTemp tmps
= newTemp(ty
);
8620 IRTemp tmpd
= newTemp(ty
);
8622 if (epartIsReg(rm
)) {
8623 assign( tmps
, getIRegE(sz
, pfx
, rm
) );
8624 assign( tmpd
, getIRegG(sz
, pfx
, rm
) );
8626 putIRegG( sz
, pfx
, rm
,
8627 IRExpr_ITE( mk_amd64g_calculate_condition(cond
),
8631 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond
),
8632 nameIRegE(sz
,pfx
,rm
),
8633 nameIRegG(sz
,pfx
,rm
));
8637 /* E refers to memory */
8639 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
8640 assign( tmps
, loadLE(ty
, mkexpr(addr
)) );
8641 assign( tmpd
, getIRegG(sz
, pfx
, rm
) );
8643 putIRegG( sz
, pfx
, rm
,
8644 IRExpr_ITE( mk_amd64g_calculate_condition(cond
),
8649 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond
),
8651 nameIRegG(sz
,pfx
,rm
));
8658 ULong
dis_xadd_G_E ( /*OUT*/Bool
* decode_ok
,
8659 const VexAbiInfo
* vbi
,
8660 Prefix pfx
, Int sz
, Long delta0
)
8663 UChar rm
= getUChar(delta0
);
8666 IRType ty
= szToITy(sz
);
8667 IRTemp tmpd
= newTemp(ty
);
8668 IRTemp tmpt0
= newTemp(ty
);
8669 IRTemp tmpt1
= newTemp(ty
);
8671 /* There are 3 cases to consider:
8673 reg-reg: ignore any lock prefix,
8674 generate 'naive' (non-atomic) sequence
8676 reg-mem, not locked: ignore any lock prefix, generate 'naive'
8677 (non-atomic) sequence
8679 reg-mem, locked: use IRCAS
8682 if (epartIsReg(rm
)) {
8684 assign( tmpd
, getIRegE(sz
, pfx
, rm
) );
8685 assign( tmpt0
, getIRegG(sz
, pfx
, rm
) );
8686 assign( tmpt1
, binop(mkSizedOp(ty
,Iop_Add8
),
8687 mkexpr(tmpd
), mkexpr(tmpt0
)) );
8688 setFlags_DEP1_DEP2( Iop_Add8
, tmpd
, tmpt0
, ty
);
8689 putIRegG(sz
, pfx
, rm
, mkexpr(tmpd
));
8690 putIRegE(sz
, pfx
, rm
, mkexpr(tmpt1
));
8691 DIP("xadd%c %s, %s\n",
8692 nameISize(sz
), nameIRegG(sz
,pfx
,rm
), nameIRegE(sz
,pfx
,rm
));
8696 else if (!epartIsReg(rm
) && !haveLOCK(pfx
)) {
8698 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
8699 assign( tmpd
, loadLE(ty
, mkexpr(addr
)) );
8700 assign( tmpt0
, getIRegG(sz
, pfx
, rm
) );
8701 assign( tmpt1
, binop(mkSizedOp(ty
,Iop_Add8
),
8702 mkexpr(tmpd
), mkexpr(tmpt0
)) );
8703 setFlags_DEP1_DEP2( Iop_Add8
, tmpd
, tmpt0
, ty
);
8704 storeLE( mkexpr(addr
), mkexpr(tmpt1
) );
8705 putIRegG(sz
, pfx
, rm
, mkexpr(tmpd
));
8706 DIP("xadd%c %s, %s\n",
8707 nameISize(sz
), nameIRegG(sz
,pfx
,rm
), dis_buf
);
8711 else if (!epartIsReg(rm
) && haveLOCK(pfx
)) {
8713 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
8714 assign( tmpd
, loadLE(ty
, mkexpr(addr
)) );
8715 assign( tmpt0
, getIRegG(sz
, pfx
, rm
) );
8716 assign( tmpt1
, binop(mkSizedOp(ty
,Iop_Add8
),
8717 mkexpr(tmpd
), mkexpr(tmpt0
)) );
8718 casLE( mkexpr(addr
), mkexpr(tmpd
)/*expVal*/,
8719 mkexpr(tmpt1
)/*newVal*/, guest_RIP_curr_instr
);
8720 setFlags_DEP1_DEP2( Iop_Add8
, tmpd
, tmpt0
, ty
);
8721 putIRegG(sz
, pfx
, rm
, mkexpr(tmpd
));
8722 DIP("xadd%c %s, %s\n",
8723 nameISize(sz
), nameIRegG(sz
,pfx
,rm
), dis_buf
);
8731 //.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
8734 //.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 )
8738 //.. UChar rm = getUChar(delta0);
8739 //.. HChar dis_buf[50];
8741 //.. if (epartIsReg(rm)) {
8742 //.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) );
8743 //.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm)));
8744 //.. return 1+delta0;
8746 //.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8747 //.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) );
8748 //.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm)));
8749 //.. return len+delta0;
8753 //.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
8754 //.. dst is ireg and sz==4, zero out top half of it. */
8757 //.. UInt dis_mov_Sw_Ew ( UChar sorb,
8763 //.. UChar rm = getUChar(delta0);
8764 //.. HChar dis_buf[50];
8766 //.. vassert(sz == 2 || sz == 4);
8768 //.. if (epartIsReg(rm)) {
8770 //.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm))));
8772 //.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm)));
8774 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
8775 //.. return 1+delta0;
8777 //.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8778 //.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) );
8779 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf);
8780 //.. return len+delta0;
8784 /* Handle move instructions of the form
8786 mov sreg, reg-or-mem
8787 Is passed the a ptr to the modRM byte, and the data size. Returns
8788 the address advanced completely over this instruction.
8790 VEX does not currently simulate segment registers on AMD64 which means that
8791 instead of moving a value of a segment register, zero is moved to the
8792 destination. The zero value represents a null (unused) selector. This is
8793 not correct (especially for the %cs, %fs and %gs registers) but it seems to
8794 provide a sufficient simulation for currently seen programs that use this
8795 instruction. If some program actually decides to use the obtained segment
8796 selector for something meaningful then the zero value should be a clear
8797 indicator that there is some problem.
8800 E(dst) is reg-or-mem
8802 If E is reg, --> PUT $0, %E
8804 If E is mem, --> (getAddr E) -> tmpa
8808 ULong
dis_mov_S_E ( const VexAbiInfo
* vbi
,
8814 UChar rm
= getUChar(delta0
);
8817 if (epartIsReg(rm
)) {
8818 putIRegE(size
, pfx
, rm
, mkU(szToITy(size
), 0));
8819 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx
, rm
)),
8820 nameIRegE(size
, pfx
, rm
));
8824 /* E refers to memory */
8826 IRTemp addr
= disAMode(&len
, vbi
, pfx
, delta0
, dis_buf
, 0);
8827 storeLE(mkexpr(addr
), mkU16(0));
8828 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx
, rm
)),
8835 //.. void dis_push_segreg ( UInt sreg, Int sz )
8837 //.. IRTemp t1 = newTemp(Ity_I16);
8838 //.. IRTemp ta = newTemp(Ity_I32);
8839 //.. vassert(sz == 2 || sz == 4);
8841 //.. assign( t1, getSReg(sreg) );
8842 //.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
8843 //.. putIReg(4, R_ESP, mkexpr(ta));
8844 //.. storeLE( mkexpr(ta), mkexpr(t1) );
8846 //.. DIP("pushw %s\n", nameSReg(sreg));
8850 //.. void dis_pop_segreg ( UInt sreg, Int sz )
8852 //.. IRTemp t1 = newTemp(Ity_I16);
8853 //.. IRTemp ta = newTemp(Ity_I32);
8854 //.. vassert(sz == 2 || sz == 4);
8856 //.. assign( ta, getIReg(4, R_ESP) );
8857 //.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) );
8859 //.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) );
8860 //.. putSReg( sreg, mkexpr(t1) );
8861 //.. DIP("pop %s\n", nameSReg(sreg));
8865 void dis_ret ( /*MOD*/DisResult
* dres
, const VexAbiInfo
* vbi
, ULong d64
)
8867 IRTemp t1
= newTemp(Ity_I64
);
8868 IRTemp t2
= newTemp(Ity_I64
);
8869 IRTemp t3
= newTemp(Ity_I64
);
8870 assign(t1
, getIReg64(R_RSP
));
8871 assign(t2
, loadLE(Ity_I64
,mkexpr(t1
)));
8872 assign(t3
, binop(Iop_Add64
, mkexpr(t1
), mkU64(8+d64
)));
8873 putIReg64(R_RSP
, mkexpr(t3
));
8874 make_redzone_AbiHint(vbi
, t3
, t2
/*nia*/, "ret");
8875 jmp_treg(dres
, Ijk_Ret
, t2
);
8876 vassert(dres
->whatNext
== Dis_StopHere
);
8880 /*------------------------------------------------------------*/
8881 /*--- SSE/SSE2/SSE3 helpers ---*/
8882 /*------------------------------------------------------------*/
8884 /* Indicates whether the op requires a rounding-mode argument. Note
8885 that this covers only vector floating point arithmetic ops, and
8886 omits the scalar ones that need rounding modes. Note also that
8887 inconsistencies here will get picked up later by the IR sanity
8888 checker, so this isn't correctness-critical. */
8889 static Bool
requiresRMode ( IROp op
)
8893 case Iop_Add32Fx4
: case Iop_Sub32Fx4
:
8894 case Iop_Mul32Fx4
: case Iop_Div32Fx4
:
8895 case Iop_Add64Fx2
: case Iop_Sub64Fx2
:
8896 case Iop_Mul64Fx2
: case Iop_Div64Fx2
:
8898 case Iop_Add32Fx8
: case Iop_Sub32Fx8
:
8899 case Iop_Mul32Fx8
: case Iop_Div32Fx8
:
8900 case Iop_Add64Fx4
: case Iop_Sub64Fx4
:
8901 case Iop_Mul64Fx4
: case Iop_Div64Fx4
:
8910 /* Worker function; do not call directly.
8911 Handles full width G = G `op` E and G = (not G) `op` E.
8914 static ULong
dis_SSE_E_to_G_all_wrk (
8915 const VexAbiInfo
* vbi
,
8916 Prefix pfx
, Long delta
,
8917 const HChar
* opname
, IROp op
,
8924 UChar rm
= getUChar(delta
);
8925 Bool needsRMode
= requiresRMode(op
);
8927 = invertG
? unop(Iop_NotV128
, getXMMReg(gregOfRexRM(pfx
,rm
)))
8928 : getXMMReg(gregOfRexRM(pfx
,rm
));
8929 if (epartIsReg(rm
)) {
8931 gregOfRexRM(pfx
,rm
),
8933 ? triop(op
, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
8935 getXMMReg(eregOfRexRM(pfx
,rm
)))
8937 getXMMReg(eregOfRexRM(pfx
,rm
)))
8939 DIP("%s %s,%s\n", opname
,
8940 nameXMMReg(eregOfRexRM(pfx
,rm
)),
8941 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
8944 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
8946 gregOfRexRM(pfx
,rm
),
8948 ? triop(op
, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
8950 loadLE(Ity_V128
, mkexpr(addr
)))
8952 loadLE(Ity_V128
, mkexpr(addr
)))
8954 DIP("%s %s,%s\n", opname
,
8956 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
8962 /* All lanes SSE binary operation, G = G `op` E. */
8965 ULong
dis_SSE_E_to_G_all ( const VexAbiInfo
* vbi
,
8966 Prefix pfx
, Long delta
,
8967 const HChar
* opname
, IROp op
)
8969 return dis_SSE_E_to_G_all_wrk( vbi
, pfx
, delta
, opname
, op
, False
);
8972 /* All lanes SSE binary operation, G = (not G) `op` E. */
8975 ULong
dis_SSE_E_to_G_all_invG ( const VexAbiInfo
* vbi
,
8976 Prefix pfx
, Long delta
,
8977 const HChar
* opname
, IROp op
)
8979 return dis_SSE_E_to_G_all_wrk( vbi
, pfx
, delta
, opname
, op
, True
);
8983 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
8985 static ULong
dis_SSE_E_to_G_lo32 ( const VexAbiInfo
* vbi
,
8986 Prefix pfx
, Long delta
,
8987 const HChar
* opname
, IROp op
)
8992 UChar rm
= getUChar(delta
);
8993 IRExpr
* gpart
= getXMMReg(gregOfRexRM(pfx
,rm
));
8994 if (epartIsReg(rm
)) {
8995 putXMMReg( gregOfRexRM(pfx
,rm
),
8997 getXMMReg(eregOfRexRM(pfx
,rm
))) );
8998 DIP("%s %s,%s\n", opname
,
8999 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9000 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9003 /* We can only do a 32-bit memory read, so the upper 3/4 of the
9004 E operand needs to be made simply of zeroes. */
9005 IRTemp epart
= newTemp(Ity_V128
);
9006 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9007 assign( epart
, unop( Iop_32UtoV128
,
9008 loadLE(Ity_I32
, mkexpr(addr
))) );
9009 putXMMReg( gregOfRexRM(pfx
,rm
),
9010 binop(op
, gpart
, mkexpr(epart
)) );
9011 DIP("%s %s,%s\n", opname
,
9013 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9019 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
9021 static ULong
dis_SSE_E_to_G_lo64 ( const VexAbiInfo
* vbi
,
9022 Prefix pfx
, Long delta
,
9023 const HChar
* opname
, IROp op
)
9028 UChar rm
= getUChar(delta
);
9029 IRExpr
* gpart
= getXMMReg(gregOfRexRM(pfx
,rm
));
9030 if (epartIsReg(rm
)) {
9031 putXMMReg( gregOfRexRM(pfx
,rm
),
9033 getXMMReg(eregOfRexRM(pfx
,rm
))) );
9034 DIP("%s %s,%s\n", opname
,
9035 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9036 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9039 /* We can only do a 64-bit memory read, so the upper half of the
9040 E operand needs to be made simply of zeroes. */
9041 IRTemp epart
= newTemp(Ity_V128
);
9042 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9043 assign( epart
, unop( Iop_64UtoV128
,
9044 loadLE(Ity_I64
, mkexpr(addr
))) );
9045 putXMMReg( gregOfRexRM(pfx
,rm
),
9046 binop(op
, gpart
, mkexpr(epart
)) );
9047 DIP("%s %s,%s\n", opname
,
9049 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9055 /* All lanes unary SSE operation, G = op(E). */
9057 static ULong
dis_SSE_E_to_G_unary_all (
9058 const VexAbiInfo
* vbi
,
9059 Prefix pfx
, Long delta
,
9060 const HChar
* opname
, IROp op
9066 UChar rm
= getUChar(delta
);
9067 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
9068 // up in the usual way.
9069 Bool needsIRRM
= op
== Iop_Sqrt32Fx4
|| op
== Iop_Sqrt64Fx2
;
9070 if (epartIsReg(rm
)) {
9071 IRExpr
* src
= getXMMReg(eregOfRexRM(pfx
,rm
));
9072 /* XXXROUNDINGFIXME */
9073 IRExpr
* res
= needsIRRM
? binop(op
, get_FAKE_roundingmode(), src
)
9075 putXMMReg( gregOfRexRM(pfx
,rm
), res
);
9076 DIP("%s %s,%s\n", opname
,
9077 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9078 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9081 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9082 IRExpr
* src
= loadLE(Ity_V128
, mkexpr(addr
));
9083 /* XXXROUNDINGFIXME */
9084 IRExpr
* res
= needsIRRM
? binop(op
, get_FAKE_roundingmode(), src
)
9086 putXMMReg( gregOfRexRM(pfx
,rm
), res
);
9087 DIP("%s %s,%s\n", opname
,
9089 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9095 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */
9097 static ULong
dis_SSE_E_to_G_unary_lo32 (
9098 const VexAbiInfo
* vbi
,
9099 Prefix pfx
, Long delta
,
9100 const HChar
* opname
, IROp op
9103 /* First we need to get the old G value and patch the low 32 bits
9104 of the E operand into it. Then apply op and write back to G. */
9108 UChar rm
= getUChar(delta
);
9109 IRTemp oldG0
= newTemp(Ity_V128
);
9110 IRTemp oldG1
= newTemp(Ity_V128
);
9112 assign( oldG0
, getXMMReg(gregOfRexRM(pfx
,rm
)) );
9114 if (epartIsReg(rm
)) {
9116 binop( Iop_SetV128lo32
,
9118 getXMMRegLane32(eregOfRexRM(pfx
,rm
), 0)) );
9119 putXMMReg( gregOfRexRM(pfx
,rm
), unop(op
, mkexpr(oldG1
)) );
9120 DIP("%s %s,%s\n", opname
,
9121 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9122 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9125 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9127 binop( Iop_SetV128lo32
,
9129 loadLE(Ity_I32
, mkexpr(addr
)) ));
9130 putXMMReg( gregOfRexRM(pfx
,rm
), unop(op
, mkexpr(oldG1
)) );
9131 DIP("%s %s,%s\n", opname
,
9133 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9139 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */
9141 static ULong
dis_SSE_E_to_G_unary_lo64 (
9142 const VexAbiInfo
* vbi
,
9143 Prefix pfx
, Long delta
,
9144 const HChar
* opname
, IROp op
9147 /* First we need to get the old G value and patch the low 64 bits
9148 of the E operand into it. Then apply op and write back to G. */
9152 UChar rm
= getUChar(delta
);
9153 IRTemp oldG0
= newTemp(Ity_V128
);
9154 IRTemp oldG1
= newTemp(Ity_V128
);
9156 assign( oldG0
, getXMMReg(gregOfRexRM(pfx
,rm
)) );
9158 if (epartIsReg(rm
)) {
9160 binop( Iop_SetV128lo64
,
9162 getXMMRegLane64(eregOfRexRM(pfx
,rm
), 0)) );
9163 putXMMReg( gregOfRexRM(pfx
,rm
), unop(op
, mkexpr(oldG1
)) );
9164 DIP("%s %s,%s\n", opname
,
9165 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9166 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9169 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9171 binop( Iop_SetV128lo64
,
9173 loadLE(Ity_I64
, mkexpr(addr
)) ));
9174 putXMMReg( gregOfRexRM(pfx
,rm
), unop(op
, mkexpr(oldG1
)) );
9175 DIP("%s %s,%s\n", opname
,
9177 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9183 /* SSE integer binary operation:
9184 G = G `op` E (eLeft == False)
9185 G = E `op` G (eLeft == True)
9187 static ULong
dis_SSEint_E_to_G(
9188 const VexAbiInfo
* vbi
,
9189 Prefix pfx
, Long delta
,
9190 const HChar
* opname
, IROp op
,
9197 UChar rm
= getUChar(delta
);
9198 IRExpr
* gpart
= getXMMReg(gregOfRexRM(pfx
,rm
));
9199 IRExpr
* epart
= NULL
;
9200 if (epartIsReg(rm
)) {
9201 epart
= getXMMReg(eregOfRexRM(pfx
,rm
));
9202 DIP("%s %s,%s\n", opname
,
9203 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9204 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9207 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9208 epart
= loadLE(Ity_V128
, mkexpr(addr
));
9209 DIP("%s %s,%s\n", opname
,
9211 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9214 putXMMReg( gregOfRexRM(pfx
,rm
),
9215 eLeft
? binop(op
, epart
, gpart
)
9216 : binop(op
, gpart
, epart
) );
9221 /* Helper for doing SSE FP comparisons. False return ==> unhandled.
9222 This is all a bit of a kludge in that it ignores the subtleties of
9223 ordered-vs-unordered and signalling-vs-nonsignalling in the Intel
9224 spec. The meaning of the outputs is as follows:
9226 preZeroP: the active lanes of both incoming arguments should be set to zero
9227 before performing the operation. IOW the actual args are to be ignored
9228 and instead zero bits are to be used. This is a bit strange but is needed
9229 to make the constant-false/true variants (FALSE_OQ, TRUE_UQ, FALSE_OS,
9232 preSwapP: the args should be swapped before performing the operation. Note
9233 that zeroing arg input sections (per preZeroP) and swapping them (per
9234 preSwapP) are allowed to happen in either order; the result is the same.
9236 opP: this returns the actual comparison op to perform.
9238 postNotP: if true, the result(ing vector) of the comparison operation should
9239 be bitwise-not-ed. Note that only the lanes of the output actually
9240 computed by opP should be not-ed.
9242 static Bool
findSSECmpOp ( /*OUT*/Bool
* preZeroP
,
9243 /*OUT*/Bool
* preSwapP
,
9245 /*OUT*/Bool
* postNotP
,
9246 UInt imm8
, Bool all_lanes
, Int sz
)
9248 vassert(*preZeroP
== False
);
9249 vassert(*preSwapP
== False
);
9250 vassert(*opP
== Iop_INVALID
);
9251 vassert(*postNotP
== False
);
9253 if (imm8
>= 32) return False
;
9255 /* First, compute a (preZero, preSwap, op, postNot) quad from
9256 the supplied imm8. */
9257 Bool preZero
= False
;
9258 Bool preSwap
= False
;
9259 IROp op
= Iop_INVALID
;
9260 Bool postNot
= False
;
9262 # define XXX(_preZero, _preSwap, _op, _postNot) \
9263 { preZero = _preZero; preSwap = _preSwap; op = _op; postNot = _postNot; }
9264 // If you add a case here, add a corresponding test for both VCMPSD_128
9265 // and VCMPSS_128 in avx-1.c.
9266 // Cases 0xA and above are
9267 // "Enhanced Comparison Predicate[s] for VEX-Encoded [insns]"
9269 // "O" = ordered, "U" = unordered
9270 // "Q" = non-signalling (quiet), "S" = signalling
9272 // replace active arg lanes in operands with zero
9274 // | swap operands before applying the cmp op?
9276 // | | cmp op invert active lanes after?
9279 case 0x0: XXX(False
, False
, Iop_CmpEQ32Fx4
, False
); break; // EQ_OQ
9280 case 0x8: XXX(False
, False
, Iop_CmpEQ32Fx4
, False
); break; // EQ_UQ
9281 case 0x10: XXX(False
, False
, Iop_CmpEQ32Fx4
, False
); break; // EQ_OS
9282 case 0x18: XXX(False
, False
, Iop_CmpEQ32Fx4
, False
); break; // EQ_US
9284 case 0x1: XXX(False
, False
, Iop_CmpLT32Fx4
, False
); break; // LT_OS
9285 case 0x11: XXX(False
, False
, Iop_CmpLT32Fx4
, False
); break; // LT_OQ
9287 case 0x2: XXX(False
, False
, Iop_CmpLE32Fx4
, False
); break; // LE_OS
9288 case 0x12: XXX(False
, False
, Iop_CmpLE32Fx4
, False
); break; // LE_OQ
9290 case 0x3: XXX(False
, False
, Iop_CmpUN32Fx4
, False
); break; // UNORD_Q
9291 case 0x13: XXX(False
, False
, Iop_CmpUN32Fx4
, False
); break; // UNORD_S
9293 // 0xC: this isn't really right because it returns all-1s when
9294 // either operand is a NaN, and it should return all-0s.
9295 case 0x4: XXX(False
, False
, Iop_CmpEQ32Fx4
, True
); break; // NEQ_UQ
9296 case 0xC: XXX(False
, False
, Iop_CmpEQ32Fx4
, True
); break; // NEQ_OQ
9297 case 0x14: XXX(False
, False
, Iop_CmpEQ32Fx4
, True
); break; // NEQ_US
9298 case 0x1C: XXX(False
, False
, Iop_CmpEQ32Fx4
, True
); break; // NEQ_OS
9300 case 0x5: XXX(False
, False
, Iop_CmpLT32Fx4
, True
); break; // NLT_US
9301 case 0x15: XXX(False
, False
, Iop_CmpLT32Fx4
, True
); break; // NLT_UQ
9303 case 0x6: XXX(False
, False
, Iop_CmpLE32Fx4
, True
); break; // NLE_US
9304 case 0x16: XXX(False
, False
, Iop_CmpLE32Fx4
, True
); break; // NLE_UQ
9306 case 0x7: XXX(False
, False
, Iop_CmpUN32Fx4
, True
); break; // ORD_Q
9307 case 0x17: XXX(False
, False
, Iop_CmpUN32Fx4
, True
); break; // ORD_S
9309 case 0x9: XXX(False
, True
, Iop_CmpLE32Fx4
, True
); break; // NGE_US
9310 case 0x19: XXX(False
, True
, Iop_CmpLE32Fx4
, True
); break; // NGE_UQ
9312 case 0xA: XXX(False
, True
, Iop_CmpLT32Fx4
, True
); break; // NGT_US
9313 case 0x1A: XXX(False
, True
, Iop_CmpLT32Fx4
, True
); break; // NGT_UQ
9315 case 0xD: XXX(False
, True
, Iop_CmpLE32Fx4
, False
); break; // GE_OS
9316 case 0x1D: XXX(False
, True
, Iop_CmpLE32Fx4
, False
); break; // GE_OQ
9318 case 0xE: XXX(False
, True
, Iop_CmpLT32Fx4
, False
); break; // GT_OS
9319 case 0x1E: XXX(False
, True
, Iop_CmpLT32Fx4
, False
); break; // GT_OQ
9320 // Constant-value-result ops
9321 case 0xB: XXX(True
, False
, Iop_CmpEQ32Fx4
, True
); break; // FALSE_OQ
9322 case 0xF: XXX(True
, False
, Iop_CmpEQ32Fx4
, False
); break; // TRUE_UQ
9323 case 0x1B: XXX(True
, False
, Iop_CmpEQ32Fx4
, True
); break; // FALSE_OS
9324 case 0x1F: XXX(True
, False
, Iop_CmpEQ32Fx4
, False
); break; // TRUE_US
9325 /* Don't forget to add test cases to VCMPSS_128_<imm8> in
9326 avx-1.c if new cases turn up. */
9330 if (op
== Iop_INVALID
) return False
;
9332 /* Now convert the op into one with the same arithmetic but that is
9333 correct for the width and laneage requirements. */
9335 /**/ if (sz
== 4 && all_lanes
) {
9337 case Iop_CmpEQ32Fx4
: op
= Iop_CmpEQ32Fx4
; break;
9338 case Iop_CmpLT32Fx4
: op
= Iop_CmpLT32Fx4
; break;
9339 case Iop_CmpLE32Fx4
: op
= Iop_CmpLE32Fx4
; break;
9340 case Iop_CmpUN32Fx4
: op
= Iop_CmpUN32Fx4
; break;
9341 default: vassert(0);
9344 else if (sz
== 4 && !all_lanes
) {
9346 case Iop_CmpEQ32Fx4
: op
= Iop_CmpEQ32F0x4
; break;
9347 case Iop_CmpLT32Fx4
: op
= Iop_CmpLT32F0x4
; break;
9348 case Iop_CmpLE32Fx4
: op
= Iop_CmpLE32F0x4
; break;
9349 case Iop_CmpUN32Fx4
: op
= Iop_CmpUN32F0x4
; break;
9350 default: vassert(0);
9353 else if (sz
== 8 && all_lanes
) {
9355 case Iop_CmpEQ32Fx4
: op
= Iop_CmpEQ64Fx2
; break;
9356 case Iop_CmpLT32Fx4
: op
= Iop_CmpLT64Fx2
; break;
9357 case Iop_CmpLE32Fx4
: op
= Iop_CmpLE64Fx2
; break;
9358 case Iop_CmpUN32Fx4
: op
= Iop_CmpUN64Fx2
; break;
9359 default: vassert(0);
9362 else if (sz
== 8 && !all_lanes
) {
9364 case Iop_CmpEQ32Fx4
: op
= Iop_CmpEQ64F0x2
; break;
9365 case Iop_CmpLT32Fx4
: op
= Iop_CmpLT64F0x2
; break;
9366 case Iop_CmpLE32Fx4
: op
= Iop_CmpLE64F0x2
; break;
9367 case Iop_CmpUN32Fx4
: op
= Iop_CmpUN64F0x2
; break;
9368 default: vassert(0);
9372 vpanic("findSSECmpOp(amd64,guest)");
9376 // In this case, preSwap is irrelevant, but assert anyway.
9377 vassert(preSwap
== False
);
9379 *preZeroP
= preZero
; *preSwapP
= preSwap
; *opP
= op
; *postNotP
= postNot
;
9384 /* Handles SSE 32F/64F comparisons. It can fail, in which case it
9385 returns the original delta to indicate failure. */
9387 static Long
dis_SSE_cmp_E_to_G ( const VexAbiInfo
* vbi
,
9388 Prefix pfx
, Long delta
,
9389 const HChar
* opname
, Bool all_lanes
, Int sz
)
9391 Long delta0
= delta
;
9396 Bool preZero
= False
;
9397 Bool preSwap
= False
;
9398 IROp op
= Iop_INVALID
;
9399 Bool postNot
= False
;
9400 IRTemp plain
= newTemp(Ity_V128
);
9401 UChar rm
= getUChar(delta
);
9403 vassert(sz
== 4 || sz
== 8);
9404 if (epartIsReg(rm
)) {
9405 imm8
= getUChar(delta
+1);
9406 if (imm8
>= 8) return delta0
; /* FAIL */
9407 Bool ok
= findSSECmpOp(&preZero
, &preSwap
, &op
, &postNot
,
9408 imm8
, all_lanes
, sz
);
9409 if (!ok
) return delta0
; /* FAIL */
9410 vassert(!preZero
); /* never needed for imm8 < 8 */
9411 vassert(!preSwap
); /* never needed for imm8 < 8 */
9412 assign( plain
, binop(op
, getXMMReg(gregOfRexRM(pfx
,rm
)),
9413 getXMMReg(eregOfRexRM(pfx
,rm
))) );
9415 DIP("%s $%u,%s,%s\n", opname
,
9417 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9418 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9420 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
9421 imm8
= getUChar(delta
+alen
);
9422 if (imm8
>= 8) return delta0
; /* FAIL */
9423 Bool ok
= findSSECmpOp(&preZero
, &preSwap
, &op
, &postNot
,
9424 imm8
, all_lanes
, sz
);
9425 if (!ok
) return delta0
; /* FAIL */
9426 vassert(!preZero
); /* never needed for imm8 < 8 */
9427 vassert(!preSwap
); /* never needed for imm8 < 8 */
9431 getXMMReg(gregOfRexRM(pfx
,rm
)),
9433 ? loadLE(Ity_V128
, mkexpr(addr
))
9435 ? unop( Iop_64UtoV128
, loadLE(Ity_I64
, mkexpr(addr
)))
9437 unop( Iop_32UtoV128
, loadLE(Ity_I32
, mkexpr(addr
)))
9441 DIP("%s $%u,%s,%s\n", opname
,
9444 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9447 if (postNot
&& all_lanes
) {
9448 putXMMReg( gregOfRexRM(pfx
,rm
),
9449 unop(Iop_NotV128
, mkexpr(plain
)) );
9452 if (postNot
&& !all_lanes
) {
9453 mask
= toUShort(sz
==4 ? 0x000F : 0x00FF);
9454 putXMMReg( gregOfRexRM(pfx
,rm
),
9455 binop(Iop_XorV128
, mkexpr(plain
), mkV128(mask
)) );
9458 putXMMReg( gregOfRexRM(pfx
,rm
), mkexpr(plain
) );
9465 /* Vector by scalar shift of G by the amount specified at the bottom
9468 static ULong
dis_SSE_shiftG_byE ( const VexAbiInfo
* vbi
,
9469 Prefix pfx
, Long delta
,
9470 const HChar
* opname
, IROp op
)
9476 UChar rm
= getUChar(delta
);
9477 IRTemp g0
= newTemp(Ity_V128
);
9478 IRTemp g1
= newTemp(Ity_V128
);
9479 IRTemp amt
= newTemp(Ity_I64
);
9480 IRTemp amt8
= newTemp(Ity_I8
);
9481 if (epartIsReg(rm
)) {
9482 assign( amt
, getXMMRegLane64(eregOfRexRM(pfx
,rm
), 0) );
9483 DIP("%s %s,%s\n", opname
,
9484 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9485 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9488 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9489 assign( amt
, loadLE(Ity_I64
, mkexpr(addr
)) );
9490 DIP("%s %s,%s\n", opname
,
9492 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9495 assign( g0
, getXMMReg(gregOfRexRM(pfx
,rm
)) );
9496 assign( amt8
, unop(Iop_64to8
, mkexpr(amt
)) );
9498 shl
= shr
= sar
= False
;
9501 case Iop_ShlN16x8
: shl
= True
; size
= 32; break;
9502 case Iop_ShlN32x4
: shl
= True
; size
= 32; break;
9503 case Iop_ShlN64x2
: shl
= True
; size
= 64; break;
9504 case Iop_SarN16x8
: sar
= True
; size
= 16; break;
9505 case Iop_SarN32x4
: sar
= True
; size
= 32; break;
9506 case Iop_ShrN16x8
: shr
= True
; size
= 16; break;
9507 case Iop_ShrN32x4
: shr
= True
; size
= 32; break;
9508 case Iop_ShrN64x2
: shr
= True
; size
= 64; break;
9509 default: vassert(0);
9516 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
9517 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
9526 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
9527 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
9528 binop(op
, mkexpr(g0
), mkU8(size
-1))
9535 putXMMReg( gregOfRexRM(pfx
,rm
), mkexpr(g1
) );
9540 /* Vector by scalar shift of E by an immediate byte. */
9543 ULong
dis_SSE_shiftE_imm ( Prefix pfx
,
9544 Long delta
, const HChar
* opname
, IROp op
)
9547 UChar rm
= getUChar(delta
);
9548 IRTemp e0
= newTemp(Ity_V128
);
9549 IRTemp e1
= newTemp(Ity_V128
);
9551 vassert(epartIsReg(rm
));
9552 vassert(gregLO3ofRM(rm
) == 2
9553 || gregLO3ofRM(rm
) == 4 || gregLO3ofRM(rm
) == 6);
9554 amt
= getUChar(delta
+1);
9556 DIP("%s $%d,%s\n", opname
,
9558 nameXMMReg(eregOfRexRM(pfx
,rm
)) );
9559 assign( e0
, getXMMReg(eregOfRexRM(pfx
,rm
)) );
9561 shl
= shr
= sar
= False
;
9564 case Iop_ShlN16x8
: shl
= True
; size
= 16; break;
9565 case Iop_ShlN32x4
: shl
= True
; size
= 32; break;
9566 case Iop_ShlN64x2
: shl
= True
; size
= 64; break;
9567 case Iop_SarN16x8
: sar
= True
; size
= 16; break;
9568 case Iop_SarN32x4
: sar
= True
; size
= 32; break;
9569 case Iop_ShrN16x8
: shr
= True
; size
= 16; break;
9570 case Iop_ShrN32x4
: shr
= True
; size
= 32; break;
9571 case Iop_ShrN64x2
: shr
= True
; size
= 64; break;
9572 default: vassert(0);
9576 assign( e1
, amt
>= size
9578 : binop(op
, mkexpr(e0
), mkU8(amt
))
9582 assign( e1
, amt
>= size
9583 ? binop(op
, mkexpr(e0
), mkU8(size
-1))
9584 : binop(op
, mkexpr(e0
), mkU8(amt
))
9590 putXMMReg( eregOfRexRM(pfx
,rm
), mkexpr(e1
) );
9595 /* Get the current SSE rounding mode. */
9597 static IRExpr
* /* :: Ity_I32 */ get_sse_roundingmode ( void )
9602 IRExpr_Get( OFFB_SSEROUND
, Ity_I64
),
9606 static void put_sse_roundingmode ( IRExpr
* sseround
)
9608 vassert(typeOfIRExpr(irsb
->tyenv
, sseround
) == Ity_I32
);
9609 stmt( IRStmt_Put( OFFB_SSEROUND
,
9610 unop(Iop_32Uto64
,sseround
) ) );
9613 /* Break a V128-bit value up into four 32-bit ints. */
9615 static void breakupV128to32s ( IRTemp t128
,
9617 IRTemp
* t3
, IRTemp
* t2
,
9618 IRTemp
* t1
, IRTemp
* t0
)
9620 IRTemp hi64
= newTemp(Ity_I64
);
9621 IRTemp lo64
= newTemp(Ity_I64
);
9622 assign( hi64
, unop(Iop_V128HIto64
, mkexpr(t128
)) );
9623 assign( lo64
, unop(Iop_V128to64
, mkexpr(t128
)) );
9625 vassert(t0
&& *t0
== IRTemp_INVALID
);
9626 vassert(t1
&& *t1
== IRTemp_INVALID
);
9627 vassert(t2
&& *t2
== IRTemp_INVALID
);
9628 vassert(t3
&& *t3
== IRTemp_INVALID
);
9630 *t0
= newTemp(Ity_I32
);
9631 *t1
= newTemp(Ity_I32
);
9632 *t2
= newTemp(Ity_I32
);
9633 *t3
= newTemp(Ity_I32
);
9634 assign( *t0
, unop(Iop_64to32
, mkexpr(lo64
)) );
9635 assign( *t1
, unop(Iop_64HIto32
, mkexpr(lo64
)) );
9636 assign( *t2
, unop(Iop_64to32
, mkexpr(hi64
)) );
9637 assign( *t3
, unop(Iop_64HIto32
, mkexpr(hi64
)) );
9640 /* Construct a V128-bit value from four 32-bit ints. */
9642 static IRExpr
* mkV128from32s ( IRTemp t3
, IRTemp t2
,
9643 IRTemp t1
, IRTemp t0
)
9646 binop( Iop_64HLtoV128
,
9647 binop(Iop_32HLto64
, mkexpr(t3
), mkexpr(t2
)),
9648 binop(Iop_32HLto64
, mkexpr(t1
), mkexpr(t0
))
9652 /* Break a 64-bit value up into four 16-bit ints. */
9654 static void breakup64to16s ( IRTemp t64
,
9656 IRTemp
* t3
, IRTemp
* t2
,
9657 IRTemp
* t1
, IRTemp
* t0
)
9659 IRTemp hi32
= newTemp(Ity_I32
);
9660 IRTemp lo32
= newTemp(Ity_I32
);
9661 assign( hi32
, unop(Iop_64HIto32
, mkexpr(t64
)) );
9662 assign( lo32
, unop(Iop_64to32
, mkexpr(t64
)) );
9664 vassert(t0
&& *t0
== IRTemp_INVALID
);
9665 vassert(t1
&& *t1
== IRTemp_INVALID
);
9666 vassert(t2
&& *t2
== IRTemp_INVALID
);
9667 vassert(t3
&& *t3
== IRTemp_INVALID
);
9669 *t0
= newTemp(Ity_I16
);
9670 *t1
= newTemp(Ity_I16
);
9671 *t2
= newTemp(Ity_I16
);
9672 *t3
= newTemp(Ity_I16
);
9673 assign( *t0
, unop(Iop_32to16
, mkexpr(lo32
)) );
9674 assign( *t1
, unop(Iop_32HIto16
, mkexpr(lo32
)) );
9675 assign( *t2
, unop(Iop_32to16
, mkexpr(hi32
)) );
9676 assign( *t3
, unop(Iop_32HIto16
, mkexpr(hi32
)) );
9679 /* Construct a 64-bit value from four 16-bit ints. */
9681 static IRExpr
* mk64from16s ( IRTemp t3
, IRTemp t2
,
9682 IRTemp t1
, IRTemp t0
)
9685 binop( Iop_32HLto64
,
9686 binop(Iop_16HLto32
, mkexpr(t3
), mkexpr(t2
)),
9687 binop(Iop_16HLto32
, mkexpr(t1
), mkexpr(t0
))
9691 /* Break a V256-bit value up into four 64-bit ints. */
9693 static void breakupV256to64s ( IRTemp t256
,
9695 IRTemp
* t3
, IRTemp
* t2
,
9696 IRTemp
* t1
, IRTemp
* t0
)
9698 vassert(t0
&& *t0
== IRTemp_INVALID
);
9699 vassert(t1
&& *t1
== IRTemp_INVALID
);
9700 vassert(t2
&& *t2
== IRTemp_INVALID
);
9701 vassert(t3
&& *t3
== IRTemp_INVALID
);
9702 *t0
= newTemp(Ity_I64
);
9703 *t1
= newTemp(Ity_I64
);
9704 *t2
= newTemp(Ity_I64
);
9705 *t3
= newTemp(Ity_I64
);
9706 assign( *t0
, unop(Iop_V256to64_0
, mkexpr(t256
)) );
9707 assign( *t1
, unop(Iop_V256to64_1
, mkexpr(t256
)) );
9708 assign( *t2
, unop(Iop_V256to64_2
, mkexpr(t256
)) );
9709 assign( *t3
, unop(Iop_V256to64_3
, mkexpr(t256
)) );
9712 /* Break a V256-bit value up into two V128s. */
9714 static void breakupV256toV128s ( IRTemp t256
,
9716 IRTemp
* t1
, IRTemp
* t0
)
9718 vassert(t0
&& *t0
== IRTemp_INVALID
);
9719 vassert(t1
&& *t1
== IRTemp_INVALID
);
9720 *t0
= newTemp(Ity_V128
);
9721 *t1
= newTemp(Ity_V128
);
9722 assign(*t1
, unop(Iop_V256toV128_1
, mkexpr(t256
)));
9723 assign(*t0
, unop(Iop_V256toV128_0
, mkexpr(t256
)));
9726 /* Break a V256-bit value up into eight 32-bit ints. */
9728 static void breakupV256to32s ( IRTemp t256
,
9730 IRTemp
* t7
, IRTemp
* t6
,
9731 IRTemp
* t5
, IRTemp
* t4
,
9732 IRTemp
* t3
, IRTemp
* t2
,
9733 IRTemp
* t1
, IRTemp
* t0
)
9735 IRTemp t128_1
= IRTemp_INVALID
;
9736 IRTemp t128_0
= IRTemp_INVALID
;
9737 breakupV256toV128s( t256
, &t128_1
, &t128_0
);
9738 breakupV128to32s( t128_1
, t7
, t6
, t5
, t4
);
9739 breakupV128to32s( t128_0
, t3
, t2
, t1
, t0
);
9742 /* Break a V128-bit value up into two 64-bit ints. */
9744 static void breakupV128to64s ( IRTemp t128
,
9746 IRTemp
* t1
, IRTemp
* t0
)
9748 vassert(t0
&& *t0
== IRTemp_INVALID
);
9749 vassert(t1
&& *t1
== IRTemp_INVALID
);
9750 *t0
= newTemp(Ity_I64
);
9751 *t1
= newTemp(Ity_I64
);
9752 assign( *t0
, unop(Iop_V128to64
, mkexpr(t128
)) );
9753 assign( *t1
, unop(Iop_V128HIto64
, mkexpr(t128
)) );
9756 /* Construct a V256-bit value from eight 32-bit ints. */
9758 static IRExpr
* mkV256from32s ( IRTemp t7
, IRTemp t6
,
9759 IRTemp t5
, IRTemp t4
,
9760 IRTemp t3
, IRTemp t2
,
9761 IRTemp t1
, IRTemp t0
)
9764 binop( Iop_V128HLtoV256
,
9765 binop( Iop_64HLtoV128
,
9766 binop(Iop_32HLto64
, mkexpr(t7
), mkexpr(t6
)),
9767 binop(Iop_32HLto64
, mkexpr(t5
), mkexpr(t4
)) ),
9768 binop( Iop_64HLtoV128
,
9769 binop(Iop_32HLto64
, mkexpr(t3
), mkexpr(t2
)),
9770 binop(Iop_32HLto64
, mkexpr(t1
), mkexpr(t0
)) )
9774 /* Construct a V256-bit value from four 64-bit ints. */
9776 static IRExpr
* mkV256from64s ( IRTemp t3
, IRTemp t2
,
9777 IRTemp t1
, IRTemp t0
)
9780 binop( Iop_V128HLtoV256
,
9781 binop(Iop_64HLtoV128
, mkexpr(t3
), mkexpr(t2
)),
9782 binop(Iop_64HLtoV128
, mkexpr(t1
), mkexpr(t0
))
9786 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
9787 values (aa,bb), computes, for each of the 4 16-bit lanes:
9789 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
9791 static IRExpr
* dis_PMULHRSW_helper ( IRExpr
* aax
, IRExpr
* bbx
)
9793 IRTemp aa
= newTemp(Ity_I64
);
9794 IRTemp bb
= newTemp(Ity_I64
);
9795 IRTemp aahi32s
= newTemp(Ity_I64
);
9796 IRTemp aalo32s
= newTemp(Ity_I64
);
9797 IRTemp bbhi32s
= newTemp(Ity_I64
);
9798 IRTemp bblo32s
= newTemp(Ity_I64
);
9799 IRTemp rHi
= newTemp(Ity_I64
);
9800 IRTemp rLo
= newTemp(Ity_I64
);
9801 IRTemp one32x2
= newTemp(Ity_I64
);
9806 binop(Iop_InterleaveHI16x4
, mkexpr(aa
), mkexpr(aa
)),
9810 binop(Iop_InterleaveLO16x4
, mkexpr(aa
), mkexpr(aa
)),
9814 binop(Iop_InterleaveHI16x4
, mkexpr(bb
), mkexpr(bb
)),
9818 binop(Iop_InterleaveLO16x4
, mkexpr(bb
), mkexpr(bb
)),
9820 assign(one32x2
, mkU64( (1ULL << 32) + 1 ));
9829 binop(Iop_Mul32x2
, mkexpr(aahi32s
), mkexpr(bbhi32s
)),
9845 binop(Iop_Mul32x2
, mkexpr(aalo32s
), mkexpr(bblo32s
)),
9854 binop(Iop_CatEvenLanes16x4
, mkexpr(rHi
), mkexpr(rLo
));
9857 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
9858 values (aa,bb), computes, for each lane:
9860 if aa_lane < 0 then - bb_lane
9861 else if aa_lane > 0 then bb_lane
9864 static IRExpr
* dis_PSIGN_helper ( IRExpr
* aax
, IRExpr
* bbx
, Int laneszB
)
9866 IRTemp aa
= newTemp(Ity_I64
);
9867 IRTemp bb
= newTemp(Ity_I64
);
9868 IRTemp zero
= newTemp(Ity_I64
);
9869 IRTemp bbNeg
= newTemp(Ity_I64
);
9870 IRTemp negMask
= newTemp(Ity_I64
);
9871 IRTemp posMask
= newTemp(Ity_I64
);
9872 IROp opSub
= Iop_INVALID
;
9873 IROp opCmpGTS
= Iop_INVALID
;
9876 case 1: opSub
= Iop_Sub8x8
; opCmpGTS
= Iop_CmpGT8Sx8
; break;
9877 case 2: opSub
= Iop_Sub16x4
; opCmpGTS
= Iop_CmpGT16Sx4
; break;
9878 case 4: opSub
= Iop_Sub32x2
; opCmpGTS
= Iop_CmpGT32Sx2
; break;
9879 default: vassert(0);
9884 assign( zero
, mkU64(0) );
9885 assign( bbNeg
, binop(opSub
, mkexpr(zero
), mkexpr(bb
)) );
9886 assign( negMask
, binop(opCmpGTS
, mkexpr(zero
), mkexpr(aa
)) );
9887 assign( posMask
, binop(opCmpGTS
, mkexpr(aa
), mkexpr(zero
)) );
9891 binop(Iop_And64
, mkexpr(bb
), mkexpr(posMask
)),
9892 binop(Iop_And64
, mkexpr(bbNeg
), mkexpr(negMask
)) );
9897 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
9898 value aa, computes, for each lane
9900 if aa < 0 then -aa else aa
9902 Note that the result is interpreted as unsigned, so that the
9903 absolute value of the most negative signed input can be
9906 static IRTemp
math_PABS_MMX ( IRTemp aa
, Int laneszB
)
9908 IRTemp res
= newTemp(Ity_I64
);
9909 IRTemp zero
= newTemp(Ity_I64
);
9910 IRTemp aaNeg
= newTemp(Ity_I64
);
9911 IRTemp negMask
= newTemp(Ity_I64
);
9912 IRTemp posMask
= newTemp(Ity_I64
);
9913 IROp opSub
= Iop_INVALID
;
9914 IROp opSarN
= Iop_INVALID
;
9917 case 1: opSub
= Iop_Sub8x8
; opSarN
= Iop_SarN8x8
; break;
9918 case 2: opSub
= Iop_Sub16x4
; opSarN
= Iop_SarN16x4
; break;
9919 case 4: opSub
= Iop_Sub32x2
; opSarN
= Iop_SarN32x2
; break;
9920 default: vassert(0);
9923 assign( negMask
, binop(opSarN
, mkexpr(aa
), mkU8(8*laneszB
-1)) );
9924 assign( posMask
, unop(Iop_Not64
, mkexpr(negMask
)) );
9925 assign( zero
, mkU64(0) );
9926 assign( aaNeg
, binop(opSub
, mkexpr(zero
), mkexpr(aa
)) );
9929 binop(Iop_And64
, mkexpr(aa
), mkexpr(posMask
)),
9930 binop(Iop_And64
, mkexpr(aaNeg
), mkexpr(negMask
)) ));
9934 /* XMM version of math_PABS_MMX. */
9935 static IRTemp
math_PABS_XMM ( IRTemp aa
, Int laneszB
)
9937 IRTemp res
= newTemp(Ity_V128
);
9938 IRTemp aaHi
= newTemp(Ity_I64
);
9939 IRTemp aaLo
= newTemp(Ity_I64
);
9940 assign(aaHi
, unop(Iop_V128HIto64
, mkexpr(aa
)));
9941 assign(aaLo
, unop(Iop_V128to64
, mkexpr(aa
)));
9942 assign(res
, binop(Iop_64HLtoV128
,
9943 mkexpr(math_PABS_MMX(aaHi
, laneszB
)),
9944 mkexpr(math_PABS_MMX(aaLo
, laneszB
))));
9948 /* Specialisations of math_PABS_XMM, since there's no easy way to do
9949 partial applications in C :-( */
9950 static IRTemp
math_PABS_XMM_pap4 ( IRTemp aa
) {
9951 return math_PABS_XMM(aa
, 4);
9954 static IRTemp
math_PABS_XMM_pap2 ( IRTemp aa
) {
9955 return math_PABS_XMM(aa
, 2);
9958 static IRTemp
math_PABS_XMM_pap1 ( IRTemp aa
) {
9959 return math_PABS_XMM(aa
, 1);
9962 /* YMM version of math_PABS_XMM. */
9963 static IRTemp
math_PABS_YMM ( IRTemp aa
, Int laneszB
)
9965 IRTemp res
= newTemp(Ity_V256
);
9966 IRTemp aaHi
= IRTemp_INVALID
;
9967 IRTemp aaLo
= IRTemp_INVALID
;
9968 breakupV256toV128s(aa
, &aaHi
, &aaLo
);
9969 assign(res
, binop(Iop_V128HLtoV256
,
9970 mkexpr(math_PABS_XMM(aaHi
, laneszB
)),
9971 mkexpr(math_PABS_XMM(aaLo
, laneszB
))));
9975 static IRTemp
math_PABS_YMM_pap4 ( IRTemp aa
) {
9976 return math_PABS_YMM(aa
, 4);
9979 static IRTemp
math_PABS_YMM_pap2 ( IRTemp aa
) {
9980 return math_PABS_YMM(aa
, 2);
9983 static IRTemp
math_PABS_YMM_pap1 ( IRTemp aa
) {
9984 return math_PABS_YMM(aa
, 1);
9987 static IRExpr
* dis_PALIGNR_XMM_helper ( IRTemp hi64
,
9988 IRTemp lo64
, Long byteShift
)
9990 vassert(byteShift
>= 1 && byteShift
<= 7);
9993 binop(Iop_Shl64
, mkexpr(hi64
), mkU8(8*(8-byteShift
))),
9994 binop(Iop_Shr64
, mkexpr(lo64
), mkU8(8*byteShift
))
9998 static IRTemp
math_PALIGNR_XMM ( IRTemp sV
, IRTemp dV
, UInt imm8
)
10000 IRTemp res
= newTemp(Ity_V128
);
10001 IRTemp sHi
= newTemp(Ity_I64
);
10002 IRTemp sLo
= newTemp(Ity_I64
);
10003 IRTemp dHi
= newTemp(Ity_I64
);
10004 IRTemp dLo
= newTemp(Ity_I64
);
10005 IRTemp rHi
= newTemp(Ity_I64
);
10006 IRTemp rLo
= newTemp(Ity_I64
);
10008 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
10009 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
10010 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
10011 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
10014 assign( rHi
, mkexpr(sHi
) );
10015 assign( rLo
, mkexpr(sLo
) );
10017 else if (imm8
>= 1 && imm8
<= 7) {
10018 assign( rHi
, dis_PALIGNR_XMM_helper(dLo
, sHi
, imm8
) );
10019 assign( rLo
, dis_PALIGNR_XMM_helper(sHi
, sLo
, imm8
) );
10021 else if (imm8
== 8) {
10022 assign( rHi
, mkexpr(dLo
) );
10023 assign( rLo
, mkexpr(sHi
) );
10025 else if (imm8
>= 9 && imm8
<= 15) {
10026 assign( rHi
, dis_PALIGNR_XMM_helper(dHi
, dLo
, imm8
-8) );
10027 assign( rLo
, dis_PALIGNR_XMM_helper(dLo
, sHi
, imm8
-8) );
10029 else if (imm8
== 16) {
10030 assign( rHi
, mkexpr(dHi
) );
10031 assign( rLo
, mkexpr(dLo
) );
10033 else if (imm8
>= 17 && imm8
<= 23) {
10034 assign( rHi
, binop(Iop_Shr64
, mkexpr(dHi
), mkU8(8*(imm8
-16))) );
10035 assign( rLo
, dis_PALIGNR_XMM_helper(dHi
, dLo
, imm8
-16) );
10037 else if (imm8
== 24) {
10038 assign( rHi
, mkU64(0) );
10039 assign( rLo
, mkexpr(dHi
) );
10041 else if (imm8
>= 25 && imm8
<= 31) {
10042 assign( rHi
, mkU64(0) );
10043 assign( rLo
, binop(Iop_Shr64
, mkexpr(dHi
), mkU8(8*(imm8
-24))) );
10045 else if (imm8
>= 32 && imm8
<= 255) {
10046 assign( rHi
, mkU64(0) );
10047 assign( rLo
, mkU64(0) );
10052 assign( res
, binop(Iop_64HLtoV128
, mkexpr(rHi
), mkexpr(rLo
)));
10057 /* Generate a SIGSEGV followed by a restart of the current instruction
10058 if effective_addr is not 16-aligned. This is required behaviour
10059 for some SSE3 instructions and all 128-bit SSSE3 instructions.
10060 This assumes that guest_RIP_curr_instr is set correctly! */
10062 void gen_SEGV_if_not_XX_aligned ( IRTemp effective_addr
, ULong mask
)
10067 binop(Iop_And64
,mkexpr(effective_addr
),mkU64(mask
)),
10070 IRConst_U64(guest_RIP_curr_instr
),
10076 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr
) {
10077 gen_SEGV_if_not_XX_aligned(effective_addr
, 16-1);
10080 static void gen_SEGV_if_not_32_aligned ( IRTemp effective_addr
) {
10081 gen_SEGV_if_not_XX_aligned(effective_addr
, 32-1);
10084 static void gen_SEGV_if_not_64_aligned ( IRTemp effective_addr
) {
10085 gen_SEGV_if_not_XX_aligned(effective_addr
, 64-1);
10088 /* Helper for deciding whether a given insn (starting at the opcode
10089 byte) may validly be used with a LOCK prefix. The following insns
10090 may be used with LOCK when their destination operand is in memory.
10091 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
10093 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
10094 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
10095 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
10096 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
10097 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
10098 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
10099 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
10109 BTC 0F BB, 0F BA /7
10110 BTR 0F B3, 0F BA /6
10111 BTS 0F AB, 0F BA /5
10113 CMPXCHG 0F B0, 0F B1
10118 ------------------------------
10120 80 /0 = addb $imm8, rm8
10121 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
10122 82 /0 = addb $imm8, rm8
10123 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
10126 01 = addl r32, rm32 and addw r16, rm16
10128 Same for ADD OR ADC SBB AND SUB XOR
10131 FF /1 = dec rm32 and dec rm16
10134 FF /0 = inc rm32 and inc rm16
10137 F7 /3 = neg rm32 and neg rm16
10140 F7 /2 = not rm32 and not rm16
10142 0F BB = btcw r16, rm16 and btcl r32, rm32
10143 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
10147 static Bool
can_be_used_with_LOCK_prefix ( const UChar
* opc
)
10150 case 0x00: case 0x01: case 0x08: case 0x09:
10151 case 0x10: case 0x11: case 0x18: case 0x19:
10152 case 0x20: case 0x21: case 0x28: case 0x29:
10153 case 0x30: case 0x31:
10154 if (!epartIsReg(opc
[1]))
10158 case 0x80: case 0x81: case 0x82: case 0x83:
10159 if (gregLO3ofRM(opc
[1]) >= 0 && gregLO3ofRM(opc
[1]) <= 6
10160 && !epartIsReg(opc
[1]))
10164 case 0xFE: case 0xFF:
10165 if (gregLO3ofRM(opc
[1]) >= 0 && gregLO3ofRM(opc
[1]) <= 1
10166 && !epartIsReg(opc
[1]))
10170 case 0xF6: case 0xF7:
10171 if (gregLO3ofRM(opc
[1]) >= 2 && gregLO3ofRM(opc
[1]) <= 3
10172 && !epartIsReg(opc
[1]))
10176 case 0x86: case 0x87:
10177 if (!epartIsReg(opc
[1]))
10183 case 0xBB: case 0xB3: case 0xAB:
10184 if (!epartIsReg(opc
[2]))
10188 if (gregLO3ofRM(opc
[2]) >= 5 && gregLO3ofRM(opc
[2]) <= 7
10189 && !epartIsReg(opc
[2]))
10192 case 0xB0: case 0xB1:
10193 if (!epartIsReg(opc
[2]))
10197 if (gregLO3ofRM(opc
[2]) == 1 && !epartIsReg(opc
[2]) )
10200 case 0xC0: case 0xC1:
10201 if (!epartIsReg(opc
[2]))
10206 } /* switch (opc[1]) */
10212 } /* switch (opc[0]) */
10218 /*------------------------------------------------------------*/
10220 /*--- Top-level SSE/SSE2: dis_ESC_0F__SSE2 ---*/
10222 /*------------------------------------------------------------*/
10224 static Long
dis_COMISD ( const VexAbiInfo
* vbi
, Prefix pfx
,
10225 Long delta
, Bool isAvx
, UChar opc
)
10227 vassert(opc
== 0x2F/*COMISD*/ || opc
== 0x2E/*UCOMISD*/);
10230 IRTemp argL
= newTemp(Ity_F64
);
10231 IRTemp argR
= newTemp(Ity_F64
);
10232 UChar modrm
= getUChar(delta
);
10233 IRTemp addr
= IRTemp_INVALID
;
10234 if (epartIsReg(modrm
)) {
10235 assign( argR
, getXMMRegLane64F( eregOfRexRM(pfx
,modrm
),
10236 0/*lowest lane*/ ) );
10238 DIP("%s%scomisd %s,%s\n", isAvx
? "v" : "",
10239 opc
==0x2E ? "u" : "",
10240 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
10241 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
10243 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10244 assign( argR
, loadLE(Ity_F64
, mkexpr(addr
)) );
10246 DIP("%s%scomisd %s,%s\n", isAvx
? "v" : "",
10247 opc
==0x2E ? "u" : "",
10249 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
10251 assign( argL
, getXMMRegLane64F( gregOfRexRM(pfx
,modrm
),
10252 0/*lowest lane*/ ) );
10254 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
10255 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
10260 binop(Iop_CmpF64
, mkexpr(argL
), mkexpr(argR
)) ),
10267 static Long
dis_COMISS ( const VexAbiInfo
* vbi
, Prefix pfx
,
10268 Long delta
, Bool isAvx
, UChar opc
)
10270 vassert(opc
== 0x2F/*COMISS*/ || opc
== 0x2E/*UCOMISS*/);
10273 IRTemp argL
= newTemp(Ity_F32
);
10274 IRTemp argR
= newTemp(Ity_F32
);
10275 UChar modrm
= getUChar(delta
);
10276 IRTemp addr
= IRTemp_INVALID
;
10277 if (epartIsReg(modrm
)) {
10278 assign( argR
, getXMMRegLane32F( eregOfRexRM(pfx
,modrm
),
10279 0/*lowest lane*/ ) );
10281 DIP("%s%scomiss %s,%s\n", isAvx
? "v" : "",
10282 opc
==0x2E ? "u" : "",
10283 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
10284 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
10286 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10287 assign( argR
, loadLE(Ity_F32
, mkexpr(addr
)) );
10289 DIP("%s%scomiss %s,%s\n", isAvx
? "v" : "",
10290 opc
==0x2E ? "u" : "",
10292 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
10294 assign( argL
, getXMMRegLane32F( gregOfRexRM(pfx
,modrm
),
10295 0/*lowest lane*/ ) );
10297 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
10298 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
10304 unop(Iop_F32toF64
,mkexpr(argL
)),
10305 unop(Iop_F32toF64
,mkexpr(argR
)))),
10312 static Long
dis_PSHUFD_32x4 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10313 Long delta
, Bool writesYmm
)
10318 IRTemp sV
= newTemp(Ity_V128
);
10319 UChar modrm
= getUChar(delta
);
10320 const HChar
* strV
= writesYmm
? "v" : "";
10321 IRTemp addr
= IRTemp_INVALID
;
10322 if (epartIsReg(modrm
)) {
10323 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
10324 order
= (Int
)getUChar(delta
+1);
10326 DIP("%spshufd $%d,%s,%s\n", strV
, order
,
10327 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
10328 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
10330 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
,
10331 1/*byte after the amode*/ );
10332 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10333 order
= (Int
)getUChar(delta
+alen
);
10335 DIP("%spshufd $%d,%s,%s\n", strV
, order
,
10337 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
10340 IRTemp s3
, s2
, s1
, s0
;
10341 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
10342 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
10344 # define SEL(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
10345 IRTemp dV
= newTemp(Ity_V128
);
10347 mkV128from32s( SEL((order
>>6)&3), SEL((order
>>4)&3),
10348 SEL((order
>>2)&3), SEL((order
>>0)&3) )
10352 (writesYmm
? putYMMRegLoAndZU
: putXMMReg
)
10353 (gregOfRexRM(pfx
,modrm
), mkexpr(dV
));
10358 static Long
dis_PSHUFD_32x8 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
)
10363 IRTemp sV
= newTemp(Ity_V256
);
10364 UChar modrm
= getUChar(delta
);
10365 IRTemp addr
= IRTemp_INVALID
;
10366 UInt rG
= gregOfRexRM(pfx
,modrm
);
10367 if (epartIsReg(modrm
)) {
10368 UInt rE
= eregOfRexRM(pfx
,modrm
);
10369 assign( sV
, getYMMReg(rE
) );
10370 order
= (Int
)getUChar(delta
+1);
10372 DIP("vpshufd $%d,%s,%s\n", order
, nameYMMReg(rE
), nameYMMReg(rG
));
10374 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
,
10375 1/*byte after the amode*/ );
10376 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
10377 order
= (Int
)getUChar(delta
+alen
);
10379 DIP("vpshufd $%d,%s,%s\n", order
, dis_buf
, nameYMMReg(rG
));
10383 s
[7] = s
[6] = s
[5] = s
[4] = s
[3] = s
[2] = s
[1] = s
[0] = IRTemp_INVALID
;
10384 breakupV256to32s( sV
, &s
[7], &s
[6], &s
[5], &s
[4],
10385 &s
[3], &s
[2], &s
[1], &s
[0] );
10387 putYMMReg( rG
, mkV256from32s( s
[4 + ((order
>>6)&3)],
10388 s
[4 + ((order
>>4)&3)],
10389 s
[4 + ((order
>>2)&3)],
10390 s
[4 + ((order
>>0)&3)],
10391 s
[0 + ((order
>>6)&3)],
10392 s
[0 + ((order
>>4)&3)],
10393 s
[0 + ((order
>>2)&3)],
10394 s
[0 + ((order
>>0)&3)] ) );
10399 static IRTemp
math_PSRLDQ ( IRTemp sV
, Int imm
)
10401 IRTemp dV
= newTemp(Ity_V128
);
10402 IRTemp hi64
= newTemp(Ity_I64
);
10403 IRTemp lo64
= newTemp(Ity_I64
);
10404 IRTemp hi64r
= newTemp(Ity_I64
);
10405 IRTemp lo64r
= newTemp(Ity_I64
);
10407 vassert(imm
>= 0 && imm
<= 255);
10409 assign(dV
, mkV128(0x0000));
10413 assign( hi64
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
10414 assign( lo64
, unop(Iop_V128to64
, mkexpr(sV
)) );
10417 assign( lo64r
, mkexpr(lo64
) );
10418 assign( hi64r
, mkexpr(hi64
) );
10422 assign( hi64r
, mkU64(0) );
10423 assign( lo64r
, mkexpr(hi64
) );
10427 assign( hi64r
, mkU64(0) );
10428 assign( lo64r
, binop( Iop_Shr64
, mkexpr(hi64
), mkU8( 8*(imm
-8) ) ));
10430 assign( hi64r
, binop( Iop_Shr64
, mkexpr(hi64
), mkU8(8 * imm
) ));
10433 binop(Iop_Shr64
, mkexpr(lo64
),
10435 binop(Iop_Shl64
, mkexpr(hi64
),
10436 mkU8(8 * (8 - imm
)) )
10441 assign( dV
, binop(Iop_64HLtoV128
, mkexpr(hi64r
), mkexpr(lo64r
)) );
10446 static IRTemp
math_PSLLDQ ( IRTemp sV
, Int imm
)
10448 IRTemp dV
= newTemp(Ity_V128
);
10449 IRTemp hi64
= newTemp(Ity_I64
);
10450 IRTemp lo64
= newTemp(Ity_I64
);
10451 IRTemp hi64r
= newTemp(Ity_I64
);
10452 IRTemp lo64r
= newTemp(Ity_I64
);
10454 vassert(imm
>= 0 && imm
<= 255);
10456 assign(dV
, mkV128(0x0000));
10460 assign( hi64
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
10461 assign( lo64
, unop(Iop_V128to64
, mkexpr(sV
)) );
10464 assign( lo64r
, mkexpr(lo64
) );
10465 assign( hi64r
, mkexpr(hi64
) );
10469 assign( lo64r
, mkU64(0) );
10470 assign( hi64r
, mkexpr(lo64
) );
10474 assign( lo64r
, mkU64(0) );
10475 assign( hi64r
, binop( Iop_Shl64
, mkexpr(lo64
), mkU8( 8*(imm
-8) ) ));
10477 assign( lo64r
, binop( Iop_Shl64
, mkexpr(lo64
), mkU8(8 * imm
) ));
10480 binop(Iop_Shl64
, mkexpr(hi64
),
10482 binop(Iop_Shr64
, mkexpr(lo64
),
10483 mkU8(8 * (8 - imm
)) )
10488 assign( dV
, binop(Iop_64HLtoV128
, mkexpr(hi64r
), mkexpr(lo64r
)) );
10493 static Long
dis_CVTxSD2SI ( const VexAbiInfo
* vbi
, Prefix pfx
,
10494 Long delta
, Bool isAvx
, UChar opc
, Int sz
)
10496 vassert(opc
== 0x2D/*CVTSD2SI*/ || opc
== 0x2C/*CVTTSD2SI*/);
10499 UChar modrm
= getUChar(delta
);
10500 IRTemp addr
= IRTemp_INVALID
;
10501 IRTemp rmode
= newTemp(Ity_I32
);
10502 IRTemp f64lo
= newTemp(Ity_F64
);
10503 Bool r2zero
= toBool(opc
== 0x2C);
10505 if (epartIsReg(modrm
)) {
10507 assign(f64lo
, getXMMRegLane64F(eregOfRexRM(pfx
,modrm
), 0));
10508 DIP("%scvt%ssd2si %s,%s\n", isAvx
? "v" : "", r2zero
? "t" : "",
10509 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
10510 nameIReg(sz
, gregOfRexRM(pfx
,modrm
),
10513 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10514 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)));
10516 DIP("%scvt%ssd2si %s,%s\n", isAvx
? "v" : "", r2zero
? "t" : "",
10518 nameIReg(sz
, gregOfRexRM(pfx
,modrm
),
10523 assign( rmode
, mkU32((UInt
)Irrm_ZERO
) );
10525 assign( rmode
, get_sse_roundingmode() );
10529 putIReg32( gregOfRexRM(pfx
,modrm
),
10530 binop( Iop_F64toI32S
, mkexpr(rmode
), mkexpr(f64lo
)) );
10533 putIReg64( gregOfRexRM(pfx
,modrm
),
10534 binop( Iop_F64toI64S
, mkexpr(rmode
), mkexpr(f64lo
)) );
10541 static Long
dis_CVTxSS2SI ( const VexAbiInfo
* vbi
, Prefix pfx
,
10542 Long delta
, Bool isAvx
, UChar opc
, Int sz
)
10544 vassert(opc
== 0x2D/*CVTSS2SI*/ || opc
== 0x2C/*CVTTSS2SI*/);
10547 UChar modrm
= getUChar(delta
);
10548 IRTemp addr
= IRTemp_INVALID
;
10549 IRTemp rmode
= newTemp(Ity_I32
);
10550 IRTemp f32lo
= newTemp(Ity_F32
);
10551 Bool r2zero
= toBool(opc
== 0x2C);
10553 if (epartIsReg(modrm
)) {
10555 assign(f32lo
, getXMMRegLane32F(eregOfRexRM(pfx
,modrm
), 0));
10556 DIP("%scvt%sss2si %s,%s\n", isAvx
? "v" : "", r2zero
? "t" : "",
10557 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
10558 nameIReg(sz
, gregOfRexRM(pfx
,modrm
),
10561 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10562 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)));
10564 DIP("%scvt%sss2si %s,%s\n", isAvx
? "v" : "", r2zero
? "t" : "",
10566 nameIReg(sz
, gregOfRexRM(pfx
,modrm
),
10571 assign( rmode
, mkU32((UInt
)Irrm_ZERO
) );
10573 assign( rmode
, get_sse_roundingmode() );
10577 putIReg32( gregOfRexRM(pfx
,modrm
),
10578 binop( Iop_F64toI32S
,
10580 unop(Iop_F32toF64
, mkexpr(f32lo
))) );
10583 putIReg64( gregOfRexRM(pfx
,modrm
),
10584 binop( Iop_F64toI64S
,
10586 unop(Iop_F32toF64
, mkexpr(f32lo
))) );
10593 static Long
dis_CVTPS2PD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10594 Long delta
, Bool isAvx
)
10596 IRTemp addr
= IRTemp_INVALID
;
10599 IRTemp f32lo
= newTemp(Ity_F32
);
10600 IRTemp f32hi
= newTemp(Ity_F32
);
10601 UChar modrm
= getUChar(delta
);
10602 UInt rG
= gregOfRexRM(pfx
,modrm
);
10603 if (epartIsReg(modrm
)) {
10604 UInt rE
= eregOfRexRM(pfx
,modrm
);
10605 assign( f32lo
, getXMMRegLane32F(rE
, 0) );
10606 assign( f32hi
, getXMMRegLane32F(rE
, 1) );
10608 DIP("%scvtps2pd %s,%s\n",
10609 isAvx
? "v" : "", nameXMMReg(rE
), nameXMMReg(rG
));
10611 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10612 assign( f32lo
, loadLE(Ity_F32
, mkexpr(addr
)) );
10613 assign( f32hi
, loadLE(Ity_F32
,
10614 binop(Iop_Add64
,mkexpr(addr
),mkU64(4))) );
10616 DIP("%scvtps2pd %s,%s\n",
10617 isAvx
? "v" : "", dis_buf
, nameXMMReg(rG
));
10620 putXMMRegLane64F( rG
, 1, unop(Iop_F32toF64
, mkexpr(f32hi
)) );
10621 putXMMRegLane64F( rG
, 0, unop(Iop_F32toF64
, mkexpr(f32lo
)) );
10623 putYMMRegLane128( rG
, 1, mkV128(0));
10628 static Long
dis_CVTPS2PD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10631 IRTemp addr
= IRTemp_INVALID
;
10634 IRTemp f32_0
= newTemp(Ity_F32
);
10635 IRTemp f32_1
= newTemp(Ity_F32
);
10636 IRTemp f32_2
= newTemp(Ity_F32
);
10637 IRTemp f32_3
= newTemp(Ity_F32
);
10638 UChar modrm
= getUChar(delta
);
10639 UInt rG
= gregOfRexRM(pfx
,modrm
);
10640 if (epartIsReg(modrm
)) {
10641 UInt rE
= eregOfRexRM(pfx
,modrm
);
10642 assign( f32_0
, getXMMRegLane32F(rE
, 0) );
10643 assign( f32_1
, getXMMRegLane32F(rE
, 1) );
10644 assign( f32_2
, getXMMRegLane32F(rE
, 2) );
10645 assign( f32_3
, getXMMRegLane32F(rE
, 3) );
10647 DIP("vcvtps2pd %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
10649 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10650 assign( f32_0
, loadLE(Ity_F32
, mkexpr(addr
)) );
10651 assign( f32_1
, loadLE(Ity_F32
,
10652 binop(Iop_Add64
,mkexpr(addr
),mkU64(4))) );
10653 assign( f32_2
, loadLE(Ity_F32
,
10654 binop(Iop_Add64
,mkexpr(addr
),mkU64(8))) );
10655 assign( f32_3
, loadLE(Ity_F32
,
10656 binop(Iop_Add64
,mkexpr(addr
),mkU64(12))) );
10658 DIP("vcvtps2pd %s,%s\n", dis_buf
, nameYMMReg(rG
));
10661 putYMMRegLane64F( rG
, 3, unop(Iop_F32toF64
, mkexpr(f32_3
)) );
10662 putYMMRegLane64F( rG
, 2, unop(Iop_F32toF64
, mkexpr(f32_2
)) );
10663 putYMMRegLane64F( rG
, 1, unop(Iop_F32toF64
, mkexpr(f32_1
)) );
10664 putYMMRegLane64F( rG
, 0, unop(Iop_F32toF64
, mkexpr(f32_0
)) );
10669 static Long
dis_CVTPD2PS_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10670 Long delta
, Bool isAvx
)
10672 IRTemp addr
= IRTemp_INVALID
;
10675 UChar modrm
= getUChar(delta
);
10676 UInt rG
= gregOfRexRM(pfx
,modrm
);
10677 IRTemp argV
= newTemp(Ity_V128
);
10678 IRTemp rmode
= newTemp(Ity_I32
);
10679 if (epartIsReg(modrm
)) {
10680 UInt rE
= eregOfRexRM(pfx
,modrm
);
10681 assign( argV
, getXMMReg(rE
) );
10683 DIP("%scvtpd2ps %s,%s\n", isAvx
? "v" : "",
10684 nameXMMReg(rE
), nameXMMReg(rG
));
10686 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10687 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10689 DIP("%scvtpd2ps %s,%s\n", isAvx
? "v" : "",
10690 dis_buf
, nameXMMReg(rG
) );
10693 assign( rmode
, get_sse_roundingmode() );
10694 IRTemp t0
= newTemp(Ity_F64
);
10695 IRTemp t1
= newTemp(Ity_F64
);
10696 assign( t0
, unop(Iop_ReinterpI64asF64
,
10697 unop(Iop_V128to64
, mkexpr(argV
))) );
10698 assign( t1
, unop(Iop_ReinterpI64asF64
,
10699 unop(Iop_V128HIto64
, mkexpr(argV
))) );
10701 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), mkexpr(_t) )
10702 putXMMRegLane32( rG
, 3, mkU32(0) );
10703 putXMMRegLane32( rG
, 2, mkU32(0) );
10704 putXMMRegLane32F( rG
, 1, CVT(t1
) );
10705 putXMMRegLane32F( rG
, 0, CVT(t0
) );
10708 putYMMRegLane128( rG
, 1, mkV128(0) );
10714 static Long
dis_CVTxPS2DQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10715 Long delta
, Bool isAvx
, Bool r2zero
)
10717 IRTemp addr
= IRTemp_INVALID
;
10720 UChar modrm
= getUChar(delta
);
10721 IRTemp argV
= newTemp(Ity_V128
);
10722 IRTemp rmode
= newTemp(Ity_I32
);
10723 UInt rG
= gregOfRexRM(pfx
,modrm
);
10725 if (epartIsReg(modrm
)) {
10726 UInt rE
= eregOfRexRM(pfx
,modrm
);
10727 assign( argV
, getXMMReg(rE
) );
10729 DIP("%scvt%sps2dq %s,%s\n",
10730 isAvx
? "v" : "", r2zero
? "t" : "", nameXMMReg(rE
), nameXMMReg(rG
));
10732 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10733 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10735 DIP("%scvt%sps2dq %s,%s\n",
10736 isAvx
? "v" : "", r2zero
? "t" : "", dis_buf
, nameXMMReg(rG
) );
10739 assign( rmode
, r2zero
? mkU32((UInt
)Irrm_ZERO
)
10740 : get_sse_roundingmode() );
10741 putXMMReg( rG
, binop(Iop_F32toI32Sx4
, mkexpr(rmode
), mkexpr(argV
)) );
10743 putYMMRegLane128( rG
, 1, mkV128(0) );
10749 static Long
dis_CVTxPS2DQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10750 Long delta
, Bool r2zero
)
10752 IRTemp addr
= IRTemp_INVALID
;
10755 UChar modrm
= getUChar(delta
);
10756 IRTemp argV
= newTemp(Ity_V256
);
10757 IRTemp rmode
= newTemp(Ity_I32
);
10758 UInt rG
= gregOfRexRM(pfx
,modrm
);
10760 if (epartIsReg(modrm
)) {
10761 UInt rE
= eregOfRexRM(pfx
,modrm
);
10762 assign( argV
, getYMMReg(rE
) );
10764 DIP("vcvt%sps2dq %s,%s\n",
10765 r2zero
? "t" : "", nameYMMReg(rE
), nameYMMReg(rG
));
10767 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10768 assign( argV
, loadLE(Ity_V256
, mkexpr(addr
)) );
10770 DIP("vcvt%sps2dq %s,%s\n",
10771 r2zero
? "t" : "", dis_buf
, nameYMMReg(rG
) );
10774 assign( rmode
, r2zero
? mkU32((UInt
)Irrm_ZERO
)
10775 : get_sse_roundingmode() );
10776 putYMMReg( rG
, binop(Iop_F32toI32Sx8
, mkexpr(rmode
), mkexpr(argV
)) );
10781 static Long
dis_CVTxPD2DQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10782 Long delta
, Bool isAvx
, Bool r2zero
)
10784 IRTemp addr
= IRTemp_INVALID
;
10787 UChar modrm
= getUChar(delta
);
10788 IRTemp argV
= newTemp(Ity_V128
);
10789 IRTemp rmode
= newTemp(Ity_I32
);
10790 UInt rG
= gregOfRexRM(pfx
,modrm
);
10793 if (epartIsReg(modrm
)) {
10794 UInt rE
= eregOfRexRM(pfx
,modrm
);
10795 assign( argV
, getXMMReg(rE
) );
10797 DIP("%scvt%spd2dq %s,%s\n",
10798 isAvx
? "v" : "", r2zero
? "t" : "", nameXMMReg(rE
), nameXMMReg(rG
));
10800 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10801 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10803 DIP("%scvt%spd2dqx %s,%s\n",
10804 isAvx
? "v" : "", r2zero
? "t" : "", dis_buf
, nameXMMReg(rG
) );
10808 assign(rmode
, mkU32((UInt
)Irrm_ZERO
) );
10810 assign( rmode
, get_sse_roundingmode() );
10813 t0
= newTemp(Ity_F64
);
10814 t1
= newTemp(Ity_F64
);
10815 assign( t0
, unop(Iop_ReinterpI64asF64
,
10816 unop(Iop_V128to64
, mkexpr(argV
))) );
10817 assign( t1
, unop(Iop_ReinterpI64asF64
,
10818 unop(Iop_V128HIto64
, mkexpr(argV
))) );
10820 # define CVT(_t) binop( Iop_F64toI32S, \
10824 putXMMRegLane32( rG
, 3, mkU32(0) );
10825 putXMMRegLane32( rG
, 2, mkU32(0) );
10826 putXMMRegLane32( rG
, 1, CVT(t1
) );
10827 putXMMRegLane32( rG
, 0, CVT(t0
) );
10830 putYMMRegLane128( rG
, 1, mkV128(0) );
10836 static Long
dis_CVTxPD2DQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10837 Long delta
, Bool r2zero
)
10839 IRTemp addr
= IRTemp_INVALID
;
10842 UChar modrm
= getUChar(delta
);
10843 IRTemp argV
= newTemp(Ity_V256
);
10844 IRTemp rmode
= newTemp(Ity_I32
);
10845 UInt rG
= gregOfRexRM(pfx
,modrm
);
10846 IRTemp t0
, t1
, t2
, t3
;
10848 if (epartIsReg(modrm
)) {
10849 UInt rE
= eregOfRexRM(pfx
,modrm
);
10850 assign( argV
, getYMMReg(rE
) );
10852 DIP("vcvt%spd2dq %s,%s\n",
10853 r2zero
? "t" : "", nameYMMReg(rE
), nameXMMReg(rG
));
10855 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10856 assign( argV
, loadLE(Ity_V256
, mkexpr(addr
)) );
10858 DIP("vcvt%spd2dqy %s,%s\n",
10859 r2zero
? "t" : "", dis_buf
, nameXMMReg(rG
) );
10863 assign(rmode
, mkU32((UInt
)Irrm_ZERO
) );
10865 assign( rmode
, get_sse_roundingmode() );
10868 t0
= IRTemp_INVALID
;
10869 t1
= IRTemp_INVALID
;
10870 t2
= IRTemp_INVALID
;
10871 t3
= IRTemp_INVALID
;
10872 breakupV256to64s( argV
, &t3
, &t2
, &t1
, &t0
);
10874 # define CVT(_t) binop( Iop_F64toI32S, \
10876 unop( Iop_ReinterpI64asF64, \
10879 putXMMRegLane32( rG
, 3, CVT(t3
) );
10880 putXMMRegLane32( rG
, 2, CVT(t2
) );
10881 putXMMRegLane32( rG
, 1, CVT(t1
) );
10882 putXMMRegLane32( rG
, 0, CVT(t0
) );
10884 putYMMRegLane128( rG
, 1, mkV128(0) );
10890 static Long
dis_CVTDQ2PS_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10891 Long delta
, Bool isAvx
)
10893 IRTemp addr
= IRTemp_INVALID
;
10896 UChar modrm
= getUChar(delta
);
10897 IRTemp argV
= newTemp(Ity_V128
);
10898 IRTemp rmode
= newTemp(Ity_I32
);
10899 UInt rG
= gregOfRexRM(pfx
,modrm
);
10901 if (epartIsReg(modrm
)) {
10902 UInt rE
= eregOfRexRM(pfx
,modrm
);
10903 assign( argV
, getXMMReg(rE
) );
10905 DIP("%scvtdq2ps %s,%s\n",
10906 isAvx
? "v" : "", nameXMMReg(rE
), nameXMMReg(rG
));
10908 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10909 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10911 DIP("%scvtdq2ps %s,%s\n",
10912 isAvx
? "v" : "", dis_buf
, nameXMMReg(rG
) );
10915 assign( rmode
, get_sse_roundingmode() );
10916 putXMMReg(rG
, binop(Iop_I32StoF32x4
, mkexpr(rmode
), mkexpr(argV
)));
10919 putYMMRegLane128( rG
, 1, mkV128(0) );
10924 static Long
dis_CVTDQ2PS_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10927 IRTemp addr
= IRTemp_INVALID
;
10930 UChar modrm
= getUChar(delta
);
10931 IRTemp argV
= newTemp(Ity_V256
);
10932 IRTemp rmode
= newTemp(Ity_I32
);
10933 UInt rG
= gregOfRexRM(pfx
,modrm
);
10935 if (epartIsReg(modrm
)) {
10936 UInt rE
= eregOfRexRM(pfx
,modrm
);
10937 assign( argV
, getYMMReg(rE
) );
10939 DIP("vcvtdq2ps %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
10941 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10942 assign( argV
, loadLE(Ity_V256
, mkexpr(addr
)) );
10944 DIP("vcvtdq2ps %s,%s\n", dis_buf
, nameYMMReg(rG
) );
10947 assign( rmode
, get_sse_roundingmode() );
10948 putYMMReg(rG
, binop(Iop_I32StoF32x8
, mkexpr(rmode
), mkexpr(argV
)));
10954 static Long
dis_PMOVMSKB_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10955 Long delta
, Bool isAvx
)
10957 UChar modrm
= getUChar(delta
);
10958 vassert(epartIsReg(modrm
)); /* ensured by caller */
10959 UInt rE
= eregOfRexRM(pfx
,modrm
);
10960 UInt rG
= gregOfRexRM(pfx
,modrm
);
10961 IRTemp t0
= newTemp(Ity_V128
);
10962 IRTemp t1
= newTemp(Ity_I32
);
10963 assign(t0
, getXMMReg(rE
));
10964 assign(t1
, unop(Iop_16Uto32
, unop(Iop_GetMSBs8x16
, mkexpr(t0
))));
10965 putIReg32(rG
, mkexpr(t1
));
10966 DIP("%spmovmskb %s,%s\n", isAvx
? "v" : "", nameXMMReg(rE
),
10973 static Long
dis_PMOVMSKB_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10976 UChar modrm
= getUChar(delta
);
10977 vassert(epartIsReg(modrm
)); /* ensured by caller */
10978 UInt rE
= eregOfRexRM(pfx
,modrm
);
10979 UInt rG
= gregOfRexRM(pfx
,modrm
);
10980 IRTemp t0
= newTemp(Ity_V128
);
10981 IRTemp t1
= newTemp(Ity_V128
);
10982 IRTemp t2
= newTemp(Ity_I16
);
10983 IRTemp t3
= newTemp(Ity_I16
);
10984 assign(t0
, getYMMRegLane128(rE
, 0));
10985 assign(t1
, getYMMRegLane128(rE
, 1));
10986 assign(t2
, unop(Iop_GetMSBs8x16
, mkexpr(t0
)));
10987 assign(t3
, unop(Iop_GetMSBs8x16
, mkexpr(t1
)));
10988 putIReg32(rG
, binop(Iop_16HLto32
, mkexpr(t3
), mkexpr(t2
)));
10989 DIP("vpmovmskb %s,%s\n", nameYMMReg(rE
), nameIReg32(rG
));
10995 /* FIXME: why not just use InterleaveLO / InterleaveHI? I think the
10996 relevant ops are "xIsH ? InterleaveHI32x4 : InterleaveLO32x4". */
10997 /* Does the maths for 128 bit versions of UNPCKLPS and UNPCKHPS */
10998 static IRTemp
math_UNPCKxPS_128 ( IRTemp sV
, IRTemp dV
, Bool xIsH
)
11000 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11001 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11002 breakupV128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
11003 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11004 IRTemp res
= newTemp(Ity_V128
);
11005 assign(res
, xIsH
? mkV128from32s( s3
, d3
, s2
, d2
)
11006 : mkV128from32s( s1
, d1
, s0
, d0
));
11011 /* FIXME: why not just use InterleaveLO / InterleaveHI ?? */
11012 /* Does the maths for 128 bit versions of UNPCKLPD and UNPCKHPD */
11013 static IRTemp
math_UNPCKxPD_128 ( IRTemp sV
, IRTemp dV
, Bool xIsH
)
11015 IRTemp s1
= newTemp(Ity_I64
);
11016 IRTemp s0
= newTemp(Ity_I64
);
11017 IRTemp d1
= newTemp(Ity_I64
);
11018 IRTemp d0
= newTemp(Ity_I64
);
11019 assign( d1
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
11020 assign( d0
, unop(Iop_V128to64
, mkexpr(dV
)) );
11021 assign( s1
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
11022 assign( s0
, unop(Iop_V128to64
, mkexpr(sV
)) );
11023 IRTemp res
= newTemp(Ity_V128
);
11024 assign(res
, xIsH
? binop(Iop_64HLtoV128
, mkexpr(s1
), mkexpr(d1
))
11025 : binop(Iop_64HLtoV128
, mkexpr(s0
), mkexpr(d0
)));
11030 /* Does the maths for 256 bit versions of UNPCKLPD and UNPCKHPD.
11031 Doesn't seem like this fits in either of the Iop_Interleave{LO,HI}
11032 or the Iop_Cat{Odd,Even}Lanes idioms, hence just do it the stupid
11034 static IRTemp
math_UNPCKxPD_256 ( IRTemp sV
, IRTemp dV
, Bool xIsH
)
11036 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11037 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11038 breakupV256to64s( dV
, &d3
, &d2
, &d1
, &d0
);
11039 breakupV256to64s( sV
, &s3
, &s2
, &s1
, &s0
);
11040 IRTemp res
= newTemp(Ity_V256
);
11042 ? IRExpr_Qop(Iop_64x4toV256
, mkexpr(s3
), mkexpr(d3
),
11043 mkexpr(s1
), mkexpr(d1
))
11044 : IRExpr_Qop(Iop_64x4toV256
, mkexpr(s2
), mkexpr(d2
),
11045 mkexpr(s0
), mkexpr(d0
)));
11050 /* FIXME: this is really bad. Surely can do something better here?
11051 One observation is that the steering in the upper and lower 128 bit
11052 halves is the same as with math_UNPCKxPS_128, so we simply split
11053 into two halves, and use that. Consequently any improvement in
11054 math_UNPCKxPS_128 (probably, to use interleave-style primops)
11055 benefits this too. */
11056 static IRTemp
math_UNPCKxPS_256 ( IRTemp sV
, IRTemp dV
, Bool xIsH
)
11058 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
11059 IRTemp dVhi
= IRTemp_INVALID
, dVlo
= IRTemp_INVALID
;
11060 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
11061 breakupV256toV128s( dV
, &dVhi
, &dVlo
);
11062 IRTemp rVhi
= math_UNPCKxPS_128(sVhi
, dVhi
, xIsH
);
11063 IRTemp rVlo
= math_UNPCKxPS_128(sVlo
, dVlo
, xIsH
);
11064 IRTemp rV
= newTemp(Ity_V256
);
11065 assign(rV
, binop(Iop_V128HLtoV256
, mkexpr(rVhi
), mkexpr(rVlo
)));
11070 static IRTemp
math_SHUFPS_128 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11072 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11073 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11074 vassert(imm8
< 256);
11076 breakupV128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
11077 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11079 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
11080 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11081 IRTemp res
= newTemp(Ity_V128
);
11083 mkV128from32s( SELS((imm8
>>6)&3), SELS((imm8
>>4)&3),
11084 SELD((imm8
>>2)&3), SELD((imm8
>>0)&3) ) );
11091 /* 256-bit SHUFPS appears to steer each of the 128-bit halves
11092 identically. Hence do the clueless thing and use math_SHUFPS_128
11094 static IRTemp
math_SHUFPS_256 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11096 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
11097 IRTemp dVhi
= IRTemp_INVALID
, dVlo
= IRTemp_INVALID
;
11098 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
11099 breakupV256toV128s( dV
, &dVhi
, &dVlo
);
11100 IRTemp rVhi
= math_SHUFPS_128(sVhi
, dVhi
, imm8
);
11101 IRTemp rVlo
= math_SHUFPS_128(sVlo
, dVlo
, imm8
);
11102 IRTemp rV
= newTemp(Ity_V256
);
11103 assign(rV
, binop(Iop_V128HLtoV256
, mkexpr(rVhi
), mkexpr(rVlo
)));
11108 static IRTemp
math_SHUFPD_128 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11110 IRTemp s1
= newTemp(Ity_I64
);
11111 IRTemp s0
= newTemp(Ity_I64
);
11112 IRTemp d1
= newTemp(Ity_I64
);
11113 IRTemp d0
= newTemp(Ity_I64
);
11115 assign( d1
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
11116 assign( d0
, unop(Iop_V128to64
, mkexpr(dV
)) );
11117 assign( s1
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
11118 assign( s0
, unop(Iop_V128to64
, mkexpr(sV
)) );
11120 # define SELD(n) mkexpr((n)==0 ? d0 : d1)
11121 # define SELS(n) mkexpr((n)==0 ? s0 : s1)
11123 IRTemp res
= newTemp(Ity_V128
);
11124 assign(res
, binop( Iop_64HLtoV128
,
11125 SELS((imm8
>>1)&1), SELD((imm8
>>0)&1) ) );
11133 static IRTemp
math_SHUFPD_256 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11135 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
11136 IRTemp dVhi
= IRTemp_INVALID
, dVlo
= IRTemp_INVALID
;
11137 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
11138 breakupV256toV128s( dV
, &dVhi
, &dVlo
);
11139 IRTemp rVhi
= math_SHUFPD_128(sVhi
, dVhi
, (imm8
>> 2) & 3);
11140 IRTemp rVlo
= math_SHUFPD_128(sVlo
, dVlo
, imm8
& 3);
11141 IRTemp rV
= newTemp(Ity_V256
);
11142 assign(rV
, binop(Iop_V128HLtoV256
, mkexpr(rVhi
), mkexpr(rVlo
)));
11147 static IRTemp
math_BLENDPD_128 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11149 UShort imm8_mask_16
;
11150 IRTemp imm8_mask
= newTemp(Ity_V128
);
11152 switch( imm8
& 3 ) {
11153 case 0: imm8_mask_16
= 0x0000; break;
11154 case 1: imm8_mask_16
= 0x00FF; break;
11155 case 2: imm8_mask_16
= 0xFF00; break;
11156 case 3: imm8_mask_16
= 0xFFFF; break;
11157 default: vassert(0); break;
11159 assign( imm8_mask
, mkV128( imm8_mask_16
) );
11161 IRTemp res
= newTemp(Ity_V128
);
11162 assign ( res
, binop( Iop_OrV128
,
11163 binop( Iop_AndV128
, mkexpr(sV
),
11164 mkexpr(imm8_mask
) ),
11165 binop( Iop_AndV128
, mkexpr(dV
),
11166 unop( Iop_NotV128
, mkexpr(imm8_mask
) ) ) ) );
11171 static IRTemp
math_BLENDPD_256 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11173 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
11174 IRTemp dVhi
= IRTemp_INVALID
, dVlo
= IRTemp_INVALID
;
11175 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
11176 breakupV256toV128s( dV
, &dVhi
, &dVlo
);
11177 IRTemp rVhi
= math_BLENDPD_128(sVhi
, dVhi
, (imm8
>> 2) & 3);
11178 IRTemp rVlo
= math_BLENDPD_128(sVlo
, dVlo
, imm8
& 3);
11179 IRTemp rV
= newTemp(Ity_V256
);
11180 assign(rV
, binop(Iop_V128HLtoV256
, mkexpr(rVhi
), mkexpr(rVlo
)));
11185 static IRTemp
math_BLENDPS_128 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11187 UShort imm8_perms
[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
11188 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
11189 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
11191 IRTemp imm8_mask
= newTemp(Ity_V128
);
11192 assign( imm8_mask
, mkV128( imm8_perms
[ (imm8
& 15) ] ) );
11194 IRTemp res
= newTemp(Ity_V128
);
11195 assign ( res
, binop( Iop_OrV128
,
11196 binop( Iop_AndV128
, mkexpr(sV
),
11197 mkexpr(imm8_mask
) ),
11198 binop( Iop_AndV128
, mkexpr(dV
),
11199 unop( Iop_NotV128
, mkexpr(imm8_mask
) ) ) ) );
11204 static IRTemp
math_BLENDPS_256 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11206 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
11207 IRTemp dVhi
= IRTemp_INVALID
, dVlo
= IRTemp_INVALID
;
11208 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
11209 breakupV256toV128s( dV
, &dVhi
, &dVlo
);
11210 IRTemp rVhi
= math_BLENDPS_128(sVhi
, dVhi
, (imm8
>> 4) & 15);
11211 IRTemp rVlo
= math_BLENDPS_128(sVlo
, dVlo
, imm8
& 15);
11212 IRTemp rV
= newTemp(Ity_V256
);
11213 assign(rV
, binop(Iop_V128HLtoV256
, mkexpr(rVhi
), mkexpr(rVlo
)));
11218 static IRTemp
math_PBLENDW_128 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11220 /* Make w be a 16-bit version of imm8, formed by duplicating each
11224 for (i
= 0; i
< 8; i
++) {
11225 if (imm8
& (1 << i
))
11226 imm16
|= (3 << (2*i
));
11228 IRTemp imm16_mask
= newTemp(Ity_V128
);
11229 assign( imm16_mask
, mkV128( imm16
));
11231 IRTemp res
= newTemp(Ity_V128
);
11232 assign ( res
, binop( Iop_OrV128
,
11233 binop( Iop_AndV128
, mkexpr(sV
),
11234 mkexpr(imm16_mask
) ),
11235 binop( Iop_AndV128
, mkexpr(dV
),
11236 unop( Iop_NotV128
, mkexpr(imm16_mask
) ) ) ) );
11241 static IRTemp
math_PMULUDQ_128 ( IRTemp sV
, IRTemp dV
)
11243 /* This is a really poor translation -- could be improved if
11244 performance critical */
11245 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11246 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11247 breakupV128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
11248 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11249 IRTemp res
= newTemp(Ity_V128
);
11250 assign(res
, binop(Iop_64HLtoV128
,
11251 binop( Iop_MullU32
, mkexpr(d2
), mkexpr(s2
)),
11252 binop( Iop_MullU32
, mkexpr(d0
), mkexpr(s0
)) ));
11257 static IRTemp
math_PMULUDQ_256 ( IRTemp sV
, IRTemp dV
)
11259 /* This is a really poor translation -- could be improved if
11260 performance critical */
11261 IRTemp sHi
, sLo
, dHi
, dLo
;
11262 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
11263 breakupV256toV128s( dV
, &dHi
, &dLo
);
11264 breakupV256toV128s( sV
, &sHi
, &sLo
);
11265 IRTemp res
= newTemp(Ity_V256
);
11266 assign(res
, binop(Iop_V128HLtoV256
,
11267 mkexpr(math_PMULUDQ_128(sHi
, dHi
)),
11268 mkexpr(math_PMULUDQ_128(sLo
, dLo
))));
11273 static IRTemp
math_PMULDQ_128 ( IRTemp dV
, IRTemp sV
)
11275 /* This is a really poor translation -- could be improved if
11276 performance critical */
11277 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11278 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11279 breakupV128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
11280 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11281 IRTemp res
= newTemp(Ity_V128
);
11282 assign(res
, binop(Iop_64HLtoV128
,
11283 binop( Iop_MullS32
, mkexpr(d2
), mkexpr(s2
)),
11284 binop( Iop_MullS32
, mkexpr(d0
), mkexpr(s0
)) ));
11289 static IRTemp
math_PMULDQ_256 ( IRTemp sV
, IRTemp dV
)
11291 /* This is a really poor translation -- could be improved if
11292 performance critical */
11293 IRTemp sHi
, sLo
, dHi
, dLo
;
11294 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
11295 breakupV256toV128s( dV
, &dHi
, &dLo
);
11296 breakupV256toV128s( sV
, &sHi
, &sLo
);
11297 IRTemp res
= newTemp(Ity_V256
);
11298 assign(res
, binop(Iop_V128HLtoV256
,
11299 mkexpr(math_PMULDQ_128(sHi
, dHi
)),
11300 mkexpr(math_PMULDQ_128(sLo
, dLo
))));
11305 static IRTemp
math_PMADDWD_128 ( IRTemp dV
, IRTemp sV
)
11307 IRTemp sVhi
, sVlo
, dVhi
, dVlo
;
11308 IRTemp resHi
= newTemp(Ity_I64
);
11309 IRTemp resLo
= newTemp(Ity_I64
);
11310 sVhi
= sVlo
= dVhi
= dVlo
= IRTemp_INVALID
;
11311 breakupV128to64s( sV
, &sVhi
, &sVlo
);
11312 breakupV128to64s( dV
, &dVhi
, &dVlo
);
11313 assign( resHi
, mkIRExprCCall(Ity_I64
, 0/*regparms*/,
11314 "amd64g_calculate_mmx_pmaddwd",
11315 &amd64g_calculate_mmx_pmaddwd
,
11316 mkIRExprVec_2( mkexpr(sVhi
), mkexpr(dVhi
))));
11317 assign( resLo
, mkIRExprCCall(Ity_I64
, 0/*regparms*/,
11318 "amd64g_calculate_mmx_pmaddwd",
11319 &amd64g_calculate_mmx_pmaddwd
,
11320 mkIRExprVec_2( mkexpr(sVlo
), mkexpr(dVlo
))));
11321 IRTemp res
= newTemp(Ity_V128
);
11322 assign( res
, binop(Iop_64HLtoV128
, mkexpr(resHi
), mkexpr(resLo
))) ;
11327 static IRTemp
math_PMADDWD_256 ( IRTemp dV
, IRTemp sV
)
11329 IRTemp sHi
, sLo
, dHi
, dLo
;
11330 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
11331 breakupV256toV128s( dV
, &dHi
, &dLo
);
11332 breakupV256toV128s( sV
, &sHi
, &sLo
);
11333 IRTemp res
= newTemp(Ity_V256
);
11334 assign(res
, binop(Iop_V128HLtoV256
,
11335 mkexpr(math_PMADDWD_128(dHi
, sHi
)),
11336 mkexpr(math_PMADDWD_128(dLo
, sLo
))));
11341 static IRTemp
math_ADDSUBPD_128 ( IRTemp dV
, IRTemp sV
)
11343 IRTemp addV
= newTemp(Ity_V128
);
11344 IRTemp subV
= newTemp(Ity_V128
);
11345 IRTemp a1
= newTemp(Ity_I64
);
11346 IRTemp s0
= newTemp(Ity_I64
);
11347 IRTemp rm
= newTemp(Ity_I32
);
11349 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11350 assign( addV
, triop(Iop_Add64Fx2
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11351 assign( subV
, triop(Iop_Sub64Fx2
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11353 assign( a1
, unop(Iop_V128HIto64
, mkexpr(addV
) ));
11354 assign( s0
, unop(Iop_V128to64
, mkexpr(subV
) ));
11356 IRTemp res
= newTemp(Ity_V128
);
11357 assign( res
, binop(Iop_64HLtoV128
, mkexpr(a1
), mkexpr(s0
)) );
11362 static IRTemp
math_ADDSUBPD_256 ( IRTemp dV
, IRTemp sV
)
11364 IRTemp a3
, a2
, a1
, a0
, s3
, s2
, s1
, s0
;
11365 IRTemp addV
= newTemp(Ity_V256
);
11366 IRTemp subV
= newTemp(Ity_V256
);
11367 IRTemp rm
= newTemp(Ity_I32
);
11368 a3
= a2
= a1
= a0
= s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11370 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11371 assign( addV
, triop(Iop_Add64Fx4
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11372 assign( subV
, triop(Iop_Sub64Fx4
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11374 breakupV256to64s( addV
, &a3
, &a2
, &a1
, &a0
);
11375 breakupV256to64s( subV
, &s3
, &s2
, &s1
, &s0
);
11377 IRTemp res
= newTemp(Ity_V256
);
11378 assign( res
, mkV256from64s( a3
, s2
, a1
, s0
) );
11383 static IRTemp
math_ADDSUBPS_128 ( IRTemp dV
, IRTemp sV
)
11385 IRTemp a3
, a2
, a1
, a0
, s3
, s2
, s1
, s0
;
11386 IRTemp addV
= newTemp(Ity_V128
);
11387 IRTemp subV
= newTemp(Ity_V128
);
11388 IRTemp rm
= newTemp(Ity_I32
);
11389 a3
= a2
= a1
= a0
= s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11391 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11392 assign( addV
, triop(Iop_Add32Fx4
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11393 assign( subV
, triop(Iop_Sub32Fx4
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11395 breakupV128to32s( addV
, &a3
, &a2
, &a1
, &a0
);
11396 breakupV128to32s( subV
, &s3
, &s2
, &s1
, &s0
);
11398 IRTemp res
= newTemp(Ity_V128
);
11399 assign( res
, mkV128from32s( a3
, s2
, a1
, s0
) );
11404 static IRTemp
math_ADDSUBPS_256 ( IRTemp dV
, IRTemp sV
)
11406 IRTemp a7
, a6
, a5
, a4
, a3
, a2
, a1
, a0
;
11407 IRTemp s7
, s6
, s5
, s4
, s3
, s2
, s1
, s0
;
11408 IRTemp addV
= newTemp(Ity_V256
);
11409 IRTemp subV
= newTemp(Ity_V256
);
11410 IRTemp rm
= newTemp(Ity_I32
);
11411 a7
= a6
= a5
= a4
= a3
= a2
= a1
= a0
= IRTemp_INVALID
;
11412 s7
= s6
= s5
= s4
= s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11414 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11415 assign( addV
, triop(Iop_Add32Fx8
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11416 assign( subV
, triop(Iop_Sub32Fx8
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11418 breakupV256to32s( addV
, &a7
, &a6
, &a5
, &a4
, &a3
, &a2
, &a1
, &a0
);
11419 breakupV256to32s( subV
, &s7
, &s6
, &s5
, &s4
, &s3
, &s2
, &s1
, &s0
);
11421 IRTemp res
= newTemp(Ity_V256
);
11422 assign( res
, mkV256from32s( a7
, s6
, a5
, s4
, a3
, s2
, a1
, s0
) );
11427 /* Handle 128 bit PSHUFLW and PSHUFHW. */
11428 static Long
dis_PSHUFxW_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
11429 Long delta
, Bool isAvx
, Bool xIsH
)
11431 IRTemp addr
= IRTemp_INVALID
;
11434 UChar modrm
= getUChar(delta
);
11435 UInt rG
= gregOfRexRM(pfx
,modrm
);
11437 IRTemp sVmut
, dVmut
, sVcon
, sV
, dV
, s3
, s2
, s1
, s0
;
11438 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11439 sV
= newTemp(Ity_V128
);
11440 dV
= newTemp(Ity_V128
);
11441 sVmut
= newTemp(Ity_I64
);
11442 dVmut
= newTemp(Ity_I64
);
11443 sVcon
= newTemp(Ity_I64
);
11444 if (epartIsReg(modrm
)) {
11445 UInt rE
= eregOfRexRM(pfx
,modrm
);
11446 assign( sV
, getXMMReg(rE
) );
11447 imm8
= (UInt
)getUChar(delta
+1);
11449 DIP("%spshuf%cw $%u,%s,%s\n",
11450 isAvx
? "v" : "", xIsH
? 'h' : 'l',
11451 imm8
, nameXMMReg(rE
), nameXMMReg(rG
));
11453 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
11454 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11455 imm8
= (UInt
)getUChar(delta
+alen
);
11457 DIP("%spshuf%cw $%u,%s,%s\n",
11458 isAvx
? "v" : "", xIsH
? 'h' : 'l',
11459 imm8
, dis_buf
, nameXMMReg(rG
));
11462 /* Get the to-be-changed (mut) and unchanging (con) bits of the
11464 assign( sVmut
, unop(xIsH
? Iop_V128HIto64
: Iop_V128to64
, mkexpr(sV
)) );
11465 assign( sVcon
, unop(xIsH
? Iop_V128to64
: Iop_V128HIto64
, mkexpr(sV
)) );
11467 breakup64to16s( sVmut
, &s3
, &s2
, &s1
, &s0
);
11469 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11470 assign(dVmut
, mk64from16s( SEL((imm8
>>6)&3), SEL((imm8
>>4)&3),
11471 SEL((imm8
>>2)&3), SEL((imm8
>>0)&3) ));
11474 assign(dV
, xIsH
? binop(Iop_64HLtoV128
, mkexpr(dVmut
), mkexpr(sVcon
))
11475 : binop(Iop_64HLtoV128
, mkexpr(sVcon
), mkexpr(dVmut
)) );
11477 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)(rG
, mkexpr(dV
));
11482 /* Handle 256 bit PSHUFLW and PSHUFHW. */
11483 static Long
dis_PSHUFxW_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
11484 Long delta
, Bool xIsH
)
11486 IRTemp addr
= IRTemp_INVALID
;
11489 UChar modrm
= getUChar(delta
);
11490 UInt rG
= gregOfRexRM(pfx
,modrm
);
11492 IRTemp sV
, s
[8], sV64
[4], dVhi
, dVlo
;
11493 sV64
[3] = sV64
[2] = sV64
[1] = sV64
[0] = IRTemp_INVALID
;
11494 s
[7] = s
[6] = s
[5] = s
[4] = s
[3] = s
[2] = s
[1] = s
[0] = IRTemp_INVALID
;
11495 sV
= newTemp(Ity_V256
);
11496 dVhi
= newTemp(Ity_I64
);
11497 dVlo
= newTemp(Ity_I64
);
11498 if (epartIsReg(modrm
)) {
11499 UInt rE
= eregOfRexRM(pfx
,modrm
);
11500 assign( sV
, getYMMReg(rE
) );
11501 imm8
= (UInt
)getUChar(delta
+1);
11503 DIP("vpshuf%cw $%u,%s,%s\n", xIsH
? 'h' : 'l',
11504 imm8
, nameYMMReg(rE
), nameYMMReg(rG
));
11506 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
11507 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
11508 imm8
= (UInt
)getUChar(delta
+alen
);
11510 DIP("vpshuf%cw $%u,%s,%s\n", xIsH
? 'h' : 'l',
11511 imm8
, dis_buf
, nameYMMReg(rG
));
11514 breakupV256to64s( sV
, &sV64
[3], &sV64
[2], &sV64
[1], &sV64
[0] );
11515 breakup64to16s( sV64
[xIsH
? 3 : 2], &s
[7], &s
[6], &s
[5], &s
[4] );
11516 breakup64to16s( sV64
[xIsH
? 1 : 0], &s
[3], &s
[2], &s
[1], &s
[0] );
11518 assign( dVhi
, mk64from16s( s
[4 + ((imm8
>>6)&3)], s
[4 + ((imm8
>>4)&3)],
11519 s
[4 + ((imm8
>>2)&3)], s
[4 + ((imm8
>>0)&3)] ) );
11520 assign( dVlo
, mk64from16s( s
[0 + ((imm8
>>6)&3)], s
[0 + ((imm8
>>4)&3)],
11521 s
[0 + ((imm8
>>2)&3)], s
[0 + ((imm8
>>0)&3)] ) );
11522 putYMMReg( rG
, mkV256from64s( xIsH
? dVhi
: sV64
[3],
11523 xIsH
? sV64
[2] : dVhi
,
11524 xIsH
? dVlo
: sV64
[1],
11525 xIsH
? sV64
[0] : dVlo
) );
11530 static Long
dis_PEXTRW_128_EregOnly_toG ( const VexAbiInfo
* vbi
, Prefix pfx
,
11531 Long delta
, Bool isAvx
)
11533 Long deltaIN
= delta
;
11534 UChar modrm
= getUChar(delta
);
11535 UInt rG
= gregOfRexRM(pfx
,modrm
);
11536 IRTemp sV
= newTemp(Ity_V128
);
11537 IRTemp d16
= newTemp(Ity_I16
);
11539 IRTemp s0
, s1
, s2
, s3
;
11540 if (epartIsReg(modrm
)) {
11541 UInt rE
= eregOfRexRM(pfx
,modrm
);
11542 assign(sV
, getXMMReg(rE
));
11543 imm8
= getUChar(delta
+1) & 7;
11545 DIP("%spextrw $%u,%s,%s\n", isAvx
? "v" : "",
11546 imm8
, nameXMMReg(rE
), nameIReg32(rG
));
11548 /* The memory case is disallowed, apparently. */
11549 return deltaIN
; /* FAIL */
11551 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11552 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11554 case 0: assign(d16
, unop(Iop_32to16
, mkexpr(s0
))); break;
11555 case 1: assign(d16
, unop(Iop_32HIto16
, mkexpr(s0
))); break;
11556 case 2: assign(d16
, unop(Iop_32to16
, mkexpr(s1
))); break;
11557 case 3: assign(d16
, unop(Iop_32HIto16
, mkexpr(s1
))); break;
11558 case 4: assign(d16
, unop(Iop_32to16
, mkexpr(s2
))); break;
11559 case 5: assign(d16
, unop(Iop_32HIto16
, mkexpr(s2
))); break;
11560 case 6: assign(d16
, unop(Iop_32to16
, mkexpr(s3
))); break;
11561 case 7: assign(d16
, unop(Iop_32HIto16
, mkexpr(s3
))); break;
11562 default: vassert(0);
11564 putIReg32(rG
, unop(Iop_16Uto32
, mkexpr(d16
)));
11569 static Long
dis_CVTDQ2PD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
11570 Long delta
, Bool isAvx
)
11572 IRTemp addr
= IRTemp_INVALID
;
11575 UChar modrm
= getUChar(delta
);
11576 IRTemp arg64
= newTemp(Ity_I64
);
11577 UInt rG
= gregOfRexRM(pfx
,modrm
);
11578 const HChar
* mbV
= isAvx
? "v" : "";
11579 if (epartIsReg(modrm
)) {
11580 UInt rE
= eregOfRexRM(pfx
,modrm
);
11581 assign( arg64
, getXMMRegLane64(rE
, 0) );
11583 DIP("%scvtdq2pd %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
));
11585 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
11586 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
11588 DIP("%scvtdq2pd %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
) );
11592 unop(Iop_I32StoF64
, unop(Iop_64to32
, mkexpr(arg64
)))
11596 unop(Iop_I32StoF64
, unop(Iop_64HIto32
, mkexpr(arg64
)))
11599 putYMMRegLane128(rG
, 1, mkV128(0));
11604 static Long
dis_STMXCSR ( const VexAbiInfo
* vbi
, Prefix pfx
,
11605 Long delta
, Bool isAvx
)
11607 IRTemp addr
= IRTemp_INVALID
;
11610 UChar modrm
= getUChar(delta
);
11611 vassert(!epartIsReg(modrm
)); /* ensured by caller */
11612 vassert(gregOfRexRM(pfx
,modrm
) == 3); /* ditto */
11614 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
11617 /* Fake up a native SSE mxcsr word. The only thing it depends on
11618 is SSEROUND[1:0], so call a clean helper to cook it up.
11620 /* ULong amd64h_create_mxcsr ( ULong sseround ) */
11621 DIP("%sstmxcsr %s\n", isAvx
? "v" : "", dis_buf
);
11626 Ity_I64
, 0/*regp*/,
11627 "amd64g_create_mxcsr", &amd64g_create_mxcsr
,
11628 mkIRExprVec_1( unop(Iop_32Uto64
,get_sse_roundingmode()) )
11636 static Long
dis_LDMXCSR ( const VexAbiInfo
* vbi
, Prefix pfx
,
11637 Long delta
, Bool isAvx
)
11639 IRTemp addr
= IRTemp_INVALID
;
11642 UChar modrm
= getUChar(delta
);
11643 vassert(!epartIsReg(modrm
)); /* ensured by caller */
11644 vassert(gregOfRexRM(pfx
,modrm
) == 2); /* ditto */
11646 IRTemp t64
= newTemp(Ity_I64
);
11647 IRTemp ew
= newTemp(Ity_I32
);
11649 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
11651 DIP("%sldmxcsr %s\n", isAvx
? "v" : "", dis_buf
);
11653 /* The only thing we observe in %mxcsr is the rounding mode.
11654 Therefore, pass the 32-bit value (SSE native-format control
11655 word) to a clean helper, getting back a 64-bit value, the
11656 lower half of which is the SSEROUND value to store, and the
11657 upper half of which is the emulation-warning token which may
11660 /* ULong amd64h_check_ldmxcsr ( ULong ); */
11661 assign( t64
, mkIRExprCCall(
11662 Ity_I64
, 0/*regparms*/,
11663 "amd64g_check_ldmxcsr",
11664 &amd64g_check_ldmxcsr
,
11667 loadLE(Ity_I32
, mkexpr(addr
))
11673 put_sse_roundingmode( unop(Iop_64to32
, mkexpr(t64
)) );
11674 assign( ew
, unop(Iop_64HIto32
, mkexpr(t64
) ) );
11675 put_emwarn( mkexpr(ew
) );
11676 /* Finally, if an emulation warning was reported, side-exit to
11677 the next insn, reporting the warning, so that Valgrind's
11678 dispatcher sees the warning. */
11681 binop(Iop_CmpNE64
, unop(Iop_32Uto64
,mkexpr(ew
)), mkU64(0)),
11683 IRConst_U64(guest_RIP_bbstart
+delta
),
11691 static void gen_XSAVE_SEQUENCE ( IRTemp addr
, IRTemp rfbm
)
11693 /* ------ rfbm[0] gates the x87 state ------ */
11695 /* Uses dirty helper:
11696 void amd64g_do_XSAVE_COMPONENT_0 ( VexGuestAMD64State*, ULong )
11698 IRDirty
* d0
= unsafeIRDirty_0_N (
11700 "amd64g_dirtyhelper_XSAVE_COMPONENT_0",
11701 &amd64g_dirtyhelper_XSAVE_COMPONENT_0
,
11702 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
11704 d0
->guard
= binop(Iop_CmpEQ64
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(1)),
11707 /* Declare we're writing memory. Really, bytes 24 through 31
11708 (MXCSR and MXCSR_MASK) aren't written, but we can't express more
11709 than 1 memory area here, so just mark the whole thing as
11711 d0
->mFx
= Ifx_Write
;
11712 d0
->mAddr
= mkexpr(addr
);
11715 /* declare we're reading guest state */
11717 vex_bzero(&d0
->fxState
, sizeof(d0
->fxState
));
11719 d0
->fxState
[0].fx
= Ifx_Read
;
11720 d0
->fxState
[0].offset
= OFFB_FTOP
;
11721 d0
->fxState
[0].size
= sizeof(UInt
);
11723 d0
->fxState
[1].fx
= Ifx_Read
;
11724 d0
->fxState
[1].offset
= OFFB_FPREGS
;
11725 d0
->fxState
[1].size
= 8 * sizeof(ULong
);
11727 d0
->fxState
[2].fx
= Ifx_Read
;
11728 d0
->fxState
[2].offset
= OFFB_FPTAGS
;
11729 d0
->fxState
[2].size
= 8 * sizeof(UChar
);
11731 d0
->fxState
[3].fx
= Ifx_Read
;
11732 d0
->fxState
[3].offset
= OFFB_FPROUND
;
11733 d0
->fxState
[3].size
= sizeof(ULong
);
11735 d0
->fxState
[4].fx
= Ifx_Read
;
11736 d0
->fxState
[4].offset
= OFFB_FC3210
;
11737 d0
->fxState
[4].size
= sizeof(ULong
);
11739 stmt( IRStmt_Dirty(d0
) );
11741 /* ------ rfbm[1] gates the SSE state ------ */
11743 IRTemp rfbm_1
= newTemp(Ity_I64
);
11744 IRTemp rfbm_1or2
= newTemp(Ity_I64
);
11745 assign(rfbm_1
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(2)));
11746 assign(rfbm_1or2
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(6)));
11748 IRExpr
* guard_1
= binop(Iop_CmpEQ64
, mkexpr(rfbm_1
), mkU64(2));
11749 IRExpr
* guard_1or2
= binop(Iop_CmpNE64
, mkexpr(rfbm_1or2
), mkU64(0));
11751 /* Uses dirty helper:
11752 void amd64g_do_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
11753 ( VexGuestAMD64State*, ULong )
11754 This creates only MXCSR and MXCSR_MASK. We need to do this if
11755 either components 1 (SSE) or 2 (AVX) are requested. Hence the
11756 guard condition is a bit more complex.
11758 IRDirty
* d1
= unsafeIRDirty_0_N (
11760 "amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS",
11761 &amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
,
11762 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
11764 d1
->guard
= guard_1or2
;
11766 /* Declare we're writing memory: MXCSR and MXCSR_MASK. Note that
11767 the code for rbfm[0] just above claims a write of 0 .. 159, so
11768 this duplicates it. But at least correctly connects 24 .. 31 to
11769 the MXCSR guest state representation (SSEROUND field). */
11770 d1
->mFx
= Ifx_Write
;
11771 d1
->mAddr
= binop(Iop_Add64
, mkexpr(addr
), mkU64(24));
11774 /* declare we're reading guest state */
11776 vex_bzero(&d1
->fxState
, sizeof(d1
->fxState
));
11778 d1
->fxState
[0].fx
= Ifx_Read
;
11779 d1
->fxState
[0].offset
= OFFB_SSEROUND
;
11780 d1
->fxState
[0].size
= sizeof(ULong
);
11782 /* Call the helper. This creates MXCSR and MXCSR_MASK but nothing
11783 else. We do the actual register array, XMM[0..15], separately,
11784 in order that any undefinedness in the XMM registers is tracked
11785 separately by Memcheck and does not "infect" the in-memory
11786 shadow for the other parts of the image. */
11787 stmt( IRStmt_Dirty(d1
) );
11789 /* And now the XMMs themselves. */
11791 for (reg
= 0; reg
< 16; reg
++) {
11792 stmt( IRStmt_StoreG(
11794 binop(Iop_Add64
, mkexpr(addr
), mkU64(160 + reg
* 16)),
11800 /* ------ rfbm[2] gates the AVX state ------ */
11801 /* Component 2 is just a bunch of register saves, so we'll do it
11802 inline, just to be simple and to be Memcheck friendly. */
11804 IRTemp rfbm_2
= newTemp(Ity_I64
);
11805 assign(rfbm_2
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(4)));
11807 IRExpr
* guard_2
= binop(Iop_CmpEQ64
, mkexpr(rfbm_2
), mkU64(4));
11809 for (reg
= 0; reg
< 16; reg
++) {
11810 stmt( IRStmt_StoreG(
11812 binop(Iop_Add64
, mkexpr(addr
), mkU64(576 + reg
* 16)),
11813 getYMMRegLane128(reg
,1),
11820 static Long
dis_XSAVE ( const VexAbiInfo
* vbi
,
11821 Prefix pfx
, Long delta
, Int sz
)
11823 /* Note that the presence or absence of REX.W (indicated here by
11824 |sz|) slightly affects the written format: whether the saved FPU
11825 IP and DP pointers are 64 or 32 bits. But the helper function
11826 we call simply writes zero bits in the relevant fields, which
11827 are 64 bits regardless of what REX.W is, and so it's good enough
11828 (iow, equally broken) in both cases. */
11829 IRTemp addr
= IRTemp_INVALID
;
11832 UChar modrm
= getUChar(delta
);
11833 vassert(!epartIsReg(modrm
)); /* ensured by caller */
11834 vassert(sz
== 4 || sz
== 8); /* ditto */
11836 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
11838 gen_SEGV_if_not_64_aligned(addr
);
11840 DIP("%sxsave %s\n", sz
==8 ? "rex64/" : "", dis_buf
);
11842 /* VEX's caller is assumed to have checked this. */
11843 const ULong aSSUMED_XCR0_VALUE
= 7;
11845 IRTemp rfbm
= newTemp(Ity_I64
);
11850 unop(Iop_32Uto64
, getIRegRDX(4)), mkU8(32)),
11851 unop(Iop_32Uto64
, getIRegRAX(4))),
11852 mkU64(aSSUMED_XCR0_VALUE
)));
11854 gen_XSAVE_SEQUENCE(addr
, rfbm
);
11856 /* Finally, we need to update XSTATE_BV in the XSAVE header area, by
11857 OR-ing the RFBM value into it. */
11858 IRTemp addr_plus_512
= newTemp(Ity_I64
);
11859 assign(addr_plus_512
, binop(Iop_Add64
, mkexpr(addr
), mkU64(512)));
11860 storeLE( mkexpr(addr_plus_512
),
11862 unop(Iop_64to8
, mkexpr(rfbm
)),
11863 loadLE(Ity_I8
, mkexpr(addr_plus_512
))) );
11869 static Long
dis_FXSAVE ( const VexAbiInfo
* vbi
,
11870 Prefix pfx
, Long delta
, Int sz
)
11872 /* See comment in dis_XSAVE about the significance of REX.W. */
11873 IRTemp addr
= IRTemp_INVALID
;
11876 UChar modrm
= getUChar(delta
);
11877 vassert(!epartIsReg(modrm
)); /* ensured by caller */
11878 vassert(sz
== 4 || sz
== 8); /* ditto */
11880 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
11882 gen_SEGV_if_not_16_aligned(addr
);
11884 DIP("%sfxsave %s\n", sz
==8 ? "rex64/" : "", dis_buf
);
11886 /* FXSAVE is just XSAVE with components 0 and 1 selected. Set rfbm
11887 to 0b011, generate the XSAVE sequence accordingly, and let iropt
11888 fold out the unused (AVX) parts accordingly. */
11889 IRTemp rfbm
= newTemp(Ity_I64
);
11890 assign(rfbm
, mkU64(3));
11891 gen_XSAVE_SEQUENCE(addr
, rfbm
);
11897 static void gen_XRSTOR_SEQUENCE ( IRTemp addr
, IRTemp xstate_bv
, IRTemp rfbm
)
11899 /* ------ rfbm[0] gates the x87 state ------ */
11901 /* If rfbm[0] == 1, we have to write the x87 state. If
11902 xstate_bv[0] == 1, we will read it from the memory image, else
11903 we'll set it to initial values. Doing this with a helper
11904 function and getting the definedness flow annotations correct is
11905 too difficult, so generate stupid but simple code: first set the
11906 registers to initial values, regardless of xstate_bv[0]. Then,
11907 conditionally restore from the memory image. */
11909 IRTemp rfbm_0
= newTemp(Ity_I64
);
11910 IRTemp xstate_bv_0
= newTemp(Ity_I64
);
11911 IRTemp restore_0
= newTemp(Ity_I64
);
11912 assign(rfbm_0
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(1)));
11913 assign(xstate_bv_0
, binop(Iop_And64
, mkexpr(xstate_bv
), mkU64(1)));
11914 assign(restore_0
, binop(Iop_And64
, mkexpr(rfbm_0
), mkexpr(xstate_bv_0
)));
11916 gen_FINIT_SEQUENCE( binop(Iop_CmpNE64
, mkexpr(rfbm_0
), mkU64(0)) );
11918 /* Uses dirty helper:
11919 void amd64g_do_XRSTOR_COMPONENT_0 ( VexGuestAMD64State*, ULong )
11921 IRDirty
* d0
= unsafeIRDirty_0_N (
11923 "amd64g_dirtyhelper_XRSTOR_COMPONENT_0",
11924 &amd64g_dirtyhelper_XRSTOR_COMPONENT_0
,
11925 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
11927 d0
->guard
= binop(Iop_CmpNE64
, mkexpr(restore_0
), mkU64(0));
11929 /* Declare we're reading memory. Really, bytes 24 through 31
11930 (MXCSR and MXCSR_MASK) aren't read, but we can't express more
11931 than 1 memory area here, so just mark the whole thing as
11933 d0
->mFx
= Ifx_Read
;
11934 d0
->mAddr
= mkexpr(addr
);
11937 /* declare we're writing guest state */
11939 vex_bzero(&d0
->fxState
, sizeof(d0
->fxState
));
11941 d0
->fxState
[0].fx
= Ifx_Write
;
11942 d0
->fxState
[0].offset
= OFFB_FTOP
;
11943 d0
->fxState
[0].size
= sizeof(UInt
);
11945 d0
->fxState
[1].fx
= Ifx_Write
;
11946 d0
->fxState
[1].offset
= OFFB_FPREGS
;
11947 d0
->fxState
[1].size
= 8 * sizeof(ULong
);
11949 d0
->fxState
[2].fx
= Ifx_Write
;
11950 d0
->fxState
[2].offset
= OFFB_FPTAGS
;
11951 d0
->fxState
[2].size
= 8 * sizeof(UChar
);
11953 d0
->fxState
[3].fx
= Ifx_Write
;
11954 d0
->fxState
[3].offset
= OFFB_FPROUND
;
11955 d0
->fxState
[3].size
= sizeof(ULong
);
11957 d0
->fxState
[4].fx
= Ifx_Write
;
11958 d0
->fxState
[4].offset
= OFFB_FC3210
;
11959 d0
->fxState
[4].size
= sizeof(ULong
);
11961 stmt( IRStmt_Dirty(d0
) );
11963 /* ------ rfbm[1] gates the SSE state ------ */
11965 /* Same scheme as component 0: first zero it out, and then possibly
11966 restore from the memory area. */
11967 IRTemp rfbm_1
= newTemp(Ity_I64
);
11968 IRTemp xstate_bv_1
= newTemp(Ity_I64
);
11969 IRTemp restore_1
= newTemp(Ity_I64
);
11970 assign(rfbm_1
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(2)));
11971 assign(xstate_bv_1
, binop(Iop_And64
, mkexpr(xstate_bv
), mkU64(2)));
11972 assign(restore_1
, binop(Iop_And64
, mkexpr(rfbm_1
), mkexpr(xstate_bv_1
)));
11973 IRExpr
* rfbm_1e
= binop(Iop_CmpNE64
, mkexpr(rfbm_1
), mkU64(0));
11974 IRExpr
* restore_1e
= binop(Iop_CmpNE64
, mkexpr(restore_1
), mkU64(0));
11976 IRTemp rfbm_1or2
= newTemp(Ity_I64
);
11977 IRTemp xstate_bv_1or2
= newTemp(Ity_I64
);
11978 IRTemp restore_1or2
= newTemp(Ity_I64
);
11979 assign(rfbm_1or2
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(6)));
11980 assign(xstate_bv_1or2
, binop(Iop_And64
, mkexpr(xstate_bv
), mkU64(6)));
11981 assign(restore_1or2
, binop(Iop_And64
, mkexpr(rfbm_1or2
),
11982 mkexpr(xstate_bv_1or2
)));
11983 IRExpr
* rfbm_1or2e
= binop(Iop_CmpNE64
, mkexpr(rfbm_1or2
), mkU64(0));
11984 IRExpr
* restore_1or2e
= binop(Iop_CmpNE64
, mkexpr(restore_1or2
), mkU64(0));
11986 /* The areas in question are: SSEROUND, and the XMM register array. */
11987 putGuarded(OFFB_SSEROUND
, rfbm_1or2e
, mkU64(Irrm_NEAREST
));
11990 for (reg
= 0; reg
< 16; reg
++) {
11991 putGuarded(xmmGuestRegOffset(reg
), rfbm_1e
, mkV128(0));
11994 /* And now possibly restore from MXCSR/MXCSR_MASK */
11995 /* Uses dirty helper:
11996 void amd64g_do_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
11997 ( VexGuestAMD64State*, ULong )
11998 This restores from only MXCSR and MXCSR_MASK. We need to do
11999 this if either components 1 (SSE) or 2 (AVX) are requested.
12000 Hence the guard condition is a bit more complex.
12002 IRDirty
* d1
= unsafeIRDirty_0_N (
12004 "amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS",
12005 &amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
,
12006 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
12008 d1
->guard
= restore_1or2e
;
12010 /* Declare we're reading memory: MXCSR and MXCSR_MASK. Note that
12011 the code for rbfm[0] just above claims a read of 0 .. 159, so
12012 this duplicates it. But at least correctly connects 24 .. 31 to
12013 the MXCSR guest state representation (SSEROUND field). */
12014 d1
->mFx
= Ifx_Read
;
12015 d1
->mAddr
= binop(Iop_Add64
, mkexpr(addr
), mkU64(24));
12018 /* declare we're writing guest state */
12020 vex_bzero(&d1
->fxState
, sizeof(d1
->fxState
));
12022 d1
->fxState
[0].fx
= Ifx_Write
;
12023 d1
->fxState
[0].offset
= OFFB_SSEROUND
;
12024 d1
->fxState
[0].size
= sizeof(ULong
);
12026 /* Call the helper. This creates SSEROUND but nothing
12027 else. We do the actual register array, XMM[0..15], separately,
12028 in order that any undefinedness in the XMM registers is tracked
12029 separately by Memcheck and is not "infected" by the in-memory
12030 shadow for the other parts of the image. */
12031 stmt( IRStmt_Dirty(d1
) );
12033 /* And now the XMMs themselves. For each register, we PUT either
12034 its old value, or the value loaded from memory. One convenient
12035 way to do that is with a conditional load that has its the
12036 default value, the old value of the register. */
12037 for (reg
= 0; reg
< 16; reg
++) {
12038 IRExpr
* ea
= binop(Iop_Add64
, mkexpr(addr
), mkU64(160 + reg
* 16));
12039 IRExpr
* alt
= getXMMReg(reg
);
12040 IRTemp loadedValue
= newTemp(Ity_V128
);
12041 stmt( IRStmt_LoadG(Iend_LE
,
12043 loadedValue
, ea
, alt
, restore_1e
) );
12044 putXMMReg(reg
, mkexpr(loadedValue
));
12047 /* ------ rfbm[2] gates the AVX state ------ */
12048 /* Component 2 is just a bunch of register loads, so we'll do it
12049 inline, just to be simple and to be Memcheck friendly. */
12051 /* Same scheme as component 0: first zero it out, and then possibly
12052 restore from the memory area. */
12053 IRTemp rfbm_2
= newTemp(Ity_I64
);
12054 IRTemp xstate_bv_2
= newTemp(Ity_I64
);
12055 IRTemp restore_2
= newTemp(Ity_I64
);
12056 assign(rfbm_2
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(4)));
12057 assign(xstate_bv_2
, binop(Iop_And64
, mkexpr(xstate_bv
), mkU64(4)));
12058 assign(restore_2
, binop(Iop_And64
, mkexpr(rfbm_2
), mkexpr(xstate_bv_2
)));
12060 IRExpr
* rfbm_2e
= binop(Iop_CmpNE64
, mkexpr(rfbm_2
), mkU64(0));
12061 IRExpr
* restore_2e
= binop(Iop_CmpNE64
, mkexpr(restore_2
), mkU64(0));
12063 for (reg
= 0; reg
< 16; reg
++) {
12064 putGuarded(ymmGuestRegLane128offset(reg
, 1), rfbm_2e
, mkV128(0));
12067 for (reg
= 0; reg
< 16; reg
++) {
12068 IRExpr
* ea
= binop(Iop_Add64
, mkexpr(addr
), mkU64(576 + reg
* 16));
12069 IRExpr
* alt
= getYMMRegLane128(reg
, 1);
12070 IRTemp loadedValue
= newTemp(Ity_V128
);
12071 stmt( IRStmt_LoadG(Iend_LE
,
12073 loadedValue
, ea
, alt
, restore_2e
) );
12074 putYMMRegLane128(reg
, 1, mkexpr(loadedValue
));
12079 static Long
dis_XRSTOR ( const VexAbiInfo
* vbi
,
12080 Prefix pfx
, Long delta
, Int sz
)
12082 /* As with XRSTOR above we ignore the value of REX.W since we're
12083 not bothering with the FPU DP and IP fields. */
12084 IRTemp addr
= IRTemp_INVALID
;
12087 UChar modrm
= getUChar(delta
);
12088 vassert(!epartIsReg(modrm
)); /* ensured by caller */
12089 vassert(sz
== 4 || sz
== 8); /* ditto */
12091 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12093 gen_SEGV_if_not_64_aligned(addr
);
12095 DIP("%sxrstor %s\n", sz
==8 ? "rex64/" : "", dis_buf
);
12097 /* VEX's caller is assumed to have checked this. */
12098 const ULong aSSUMED_XCR0_VALUE
= 7;
12100 IRTemp rfbm
= newTemp(Ity_I64
);
12105 unop(Iop_32Uto64
, getIRegRDX(4)), mkU8(32)),
12106 unop(Iop_32Uto64
, getIRegRAX(4))),
12107 mkU64(aSSUMED_XCR0_VALUE
)));
12109 IRTemp xstate_bv
= newTemp(Ity_I64
);
12110 assign(xstate_bv
, loadLE(Ity_I64
,
12111 binop(Iop_Add64
, mkexpr(addr
), mkU64(512+0))));
12113 IRTemp xcomp_bv
= newTemp(Ity_I64
);
12114 assign(xcomp_bv
, loadLE(Ity_I64
,
12115 binop(Iop_Add64
, mkexpr(addr
), mkU64(512+8))));
12117 IRTemp xsavehdr_23_16
= newTemp(Ity_I64
);
12118 assign( xsavehdr_23_16
,
12120 binop(Iop_Add64
, mkexpr(addr
), mkU64(512+16))));
12122 /* We must fault if
12123 * xcomp_bv[63] == 1, since this simulated CPU does not support
12124 the compaction extension.
12125 * xstate_bv sets a bit outside of XCR0 (which we assume to be 7).
12126 * any of the xsave header bytes 23 .. 8 are nonzero. This seems to
12127 imply that xcomp_bv must be zero.
12128 xcomp_bv is header bytes 15 .. 8 and xstate_bv is header bytes 7 .. 0
12130 IRTemp fault_if_nonzero
= newTemp(Ity_I64
);
12131 assign(fault_if_nonzero
,
12133 binop(Iop_And64
, mkexpr(xstate_bv
), mkU64(~aSSUMED_XCR0_VALUE
)),
12134 binop(Iop_Or64
, mkexpr(xcomp_bv
), mkexpr(xsavehdr_23_16
))));
12135 stmt( IRStmt_Exit(binop(Iop_CmpNE64
, mkexpr(fault_if_nonzero
), mkU64(0)),
12137 IRConst_U64(guest_RIP_curr_instr
),
12141 /* We are guaranteed now that both xstate_bv and rfbm are in the
12142 range 0 .. 7. Generate the restore sequence proper. */
12143 gen_XRSTOR_SEQUENCE(addr
, xstate_bv
, rfbm
);
12149 static Long
dis_FXRSTOR ( const VexAbiInfo
* vbi
,
12150 Prefix pfx
, Long delta
, Int sz
)
12152 /* As with FXSAVE above we ignore the value of REX.W since we're
12153 not bothering with the FPU DP and IP fields. */
12154 IRTemp addr
= IRTemp_INVALID
;
12157 UChar modrm
= getUChar(delta
);
12158 vassert(!epartIsReg(modrm
)); /* ensured by caller */
12159 vassert(sz
== 4 || sz
== 8); /* ditto */
12161 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12163 gen_SEGV_if_not_16_aligned(addr
);
12165 DIP("%sfxrstor %s\n", sz
==8 ? "rex64/" : "", dis_buf
);
12167 /* FXRSTOR is just XRSTOR with components 0 and 1 selected and also
12168 as if components 0 and 1 are set as present in XSTATE_BV in the
12169 XSAVE header. Set both rfbm and xstate_bv to 0b011 therefore,
12170 generate the XRSTOR sequence accordingly, and let iropt fold out
12171 the unused (AVX) parts accordingly. */
12172 IRTemp three
= newTemp(Ity_I64
);
12173 assign(three
, mkU64(3));
12174 gen_XRSTOR_SEQUENCE(addr
, three
/*xstate_bv*/, three
/*rfbm*/);
12180 static IRTemp
math_PINSRW_128 ( IRTemp v128
, IRTemp u16
, UInt imm8
)
12182 vassert(imm8
>= 0 && imm8
<= 7);
12184 // Create a V128 value which has the selected word in the
12185 // specified lane, and zeroes everywhere else.
12186 IRTemp tmp128
= newTemp(Ity_V128
);
12187 IRTemp halfshift
= newTemp(Ity_I64
);
12188 assign(halfshift
, binop(Iop_Shl64
,
12189 unop(Iop_16Uto64
, mkexpr(u16
)),
12190 mkU8(16 * (imm8
& 3))));
12192 assign(tmp128
, binop(Iop_64HLtoV128
, mkU64(0), mkexpr(halfshift
)));
12194 assign(tmp128
, binop(Iop_64HLtoV128
, mkexpr(halfshift
), mkU64(0)));
12197 UShort mask
= ~(3 << (imm8
* 2));
12198 IRTemp res
= newTemp(Ity_V128
);
12199 assign( res
, binop(Iop_OrV128
,
12201 binop(Iop_AndV128
, mkexpr(v128
), mkV128(mask
))) );
12206 static IRTemp
math_PSADBW_128 ( IRTemp dV
, IRTemp sV
)
12208 IRTemp s1
, s0
, d1
, d0
;
12209 s1
= s0
= d1
= d0
= IRTemp_INVALID
;
12211 breakupV128to64s( sV
, &s1
, &s0
);
12212 breakupV128to64s( dV
, &d1
, &d0
);
12214 IRTemp res
= newTemp(Ity_V128
);
12216 binop(Iop_64HLtoV128
,
12217 mkIRExprCCall(Ity_I64
, 0/*regparms*/,
12218 "amd64g_calculate_mmx_psadbw",
12219 &amd64g_calculate_mmx_psadbw
,
12220 mkIRExprVec_2( mkexpr(s1
), mkexpr(d1
))),
12221 mkIRExprCCall(Ity_I64
, 0/*regparms*/,
12222 "amd64g_calculate_mmx_psadbw",
12223 &amd64g_calculate_mmx_psadbw
,
12224 mkIRExprVec_2( mkexpr(s0
), mkexpr(d0
)))) );
12229 static IRTemp
math_PSADBW_256 ( IRTemp dV
, IRTemp sV
)
12231 IRTemp sHi
, sLo
, dHi
, dLo
;
12232 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
12233 breakupV256toV128s( dV
, &dHi
, &dLo
);
12234 breakupV256toV128s( sV
, &sHi
, &sLo
);
12235 IRTemp res
= newTemp(Ity_V256
);
12236 assign(res
, binop(Iop_V128HLtoV256
,
12237 mkexpr(math_PSADBW_128(dHi
, sHi
)),
12238 mkexpr(math_PSADBW_128(dLo
, sLo
))));
12243 static Long
dis_MASKMOVDQU ( const VexAbiInfo
* vbi
, Prefix pfx
,
12244 Long delta
, Bool isAvx
)
12246 IRTemp regD
= newTemp(Ity_V128
);
12247 IRTemp mask
= newTemp(Ity_V128
);
12248 IRTemp olddata
= newTemp(Ity_V128
);
12249 IRTemp newdata
= newTemp(Ity_V128
);
12250 IRTemp addr
= newTemp(Ity_I64
);
12251 UChar modrm
= getUChar(delta
);
12252 UInt rG
= gregOfRexRM(pfx
,modrm
);
12253 UInt rE
= eregOfRexRM(pfx
,modrm
);
12255 assign( addr
, handleAddrOverrides( vbi
, pfx
, getIReg64(R_RDI
) ));
12256 assign( regD
, getXMMReg( rG
));
12258 /* Unfortunately can't do the obvious thing with SarN8x16
12259 here since that can't be re-emitted as SSE2 code - no such
12262 binop(Iop_64HLtoV128
,
12264 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 1 ),
12267 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0 ),
12269 assign( olddata
, loadLE( Ity_V128
, mkexpr(addr
) ));
12270 assign( newdata
, binop(Iop_OrV128
,
12276 unop(Iop_NotV128
, mkexpr(mask
)))) );
12277 storeLE( mkexpr(addr
), mkexpr(newdata
) );
12280 DIP("%smaskmovdqu %s,%s\n", isAvx
? "v" : "",
12281 nameXMMReg(rE
), nameXMMReg(rG
) );
12286 static Long
dis_MOVMSKPS_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
12287 Long delta
, Bool isAvx
)
12289 UChar modrm
= getUChar(delta
);
12290 UInt rG
= gregOfRexRM(pfx
,modrm
);
12291 UInt rE
= eregOfRexRM(pfx
,modrm
);
12292 IRTemp t0
= newTemp(Ity_I32
);
12293 IRTemp t1
= newTemp(Ity_I32
);
12294 IRTemp t2
= newTemp(Ity_I32
);
12295 IRTemp t3
= newTemp(Ity_I32
);
12297 assign( t0
, binop( Iop_And32
,
12298 binop(Iop_Shr32
, getXMMRegLane32(rE
,0), mkU8(31)),
12300 assign( t1
, binop( Iop_And32
,
12301 binop(Iop_Shr32
, getXMMRegLane32(rE
,1), mkU8(30)),
12303 assign( t2
, binop( Iop_And32
,
12304 binop(Iop_Shr32
, getXMMRegLane32(rE
,2), mkU8(29)),
12306 assign( t3
, binop( Iop_And32
,
12307 binop(Iop_Shr32
, getXMMRegLane32(rE
,3), mkU8(28)),
12309 putIReg32( rG
, binop(Iop_Or32
,
12310 binop(Iop_Or32
, mkexpr(t0
), mkexpr(t1
)),
12311 binop(Iop_Or32
, mkexpr(t2
), mkexpr(t3
)) ) );
12312 DIP("%smovmskps %s,%s\n", isAvx
? "v" : "",
12313 nameXMMReg(rE
), nameIReg32(rG
));
12318 static Long
dis_MOVMSKPS_256 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
)
12320 UChar modrm
= getUChar(delta
);
12321 UInt rG
= gregOfRexRM(pfx
,modrm
);
12322 UInt rE
= eregOfRexRM(pfx
,modrm
);
12323 IRTemp t0
= newTemp(Ity_I32
);
12324 IRTemp t1
= newTemp(Ity_I32
);
12325 IRTemp t2
= newTemp(Ity_I32
);
12326 IRTemp t3
= newTemp(Ity_I32
);
12327 IRTemp t4
= newTemp(Ity_I32
);
12328 IRTemp t5
= newTemp(Ity_I32
);
12329 IRTemp t6
= newTemp(Ity_I32
);
12330 IRTemp t7
= newTemp(Ity_I32
);
12332 assign( t0
, binop( Iop_And32
,
12333 binop(Iop_Shr32
, getYMMRegLane32(rE
,0), mkU8(31)),
12335 assign( t1
, binop( Iop_And32
,
12336 binop(Iop_Shr32
, getYMMRegLane32(rE
,1), mkU8(30)),
12338 assign( t2
, binop( Iop_And32
,
12339 binop(Iop_Shr32
, getYMMRegLane32(rE
,2), mkU8(29)),
12341 assign( t3
, binop( Iop_And32
,
12342 binop(Iop_Shr32
, getYMMRegLane32(rE
,3), mkU8(28)),
12344 assign( t4
, binop( Iop_And32
,
12345 binop(Iop_Shr32
, getYMMRegLane32(rE
,4), mkU8(27)),
12347 assign( t5
, binop( Iop_And32
,
12348 binop(Iop_Shr32
, getYMMRegLane32(rE
,5), mkU8(26)),
12350 assign( t6
, binop( Iop_And32
,
12351 binop(Iop_Shr32
, getYMMRegLane32(rE
,6), mkU8(25)),
12353 assign( t7
, binop( Iop_And32
,
12354 binop(Iop_Shr32
, getYMMRegLane32(rE
,7), mkU8(24)),
12356 putIReg32( rG
, binop(Iop_Or32
,
12358 binop(Iop_Or32
, mkexpr(t0
), mkexpr(t1
)),
12359 binop(Iop_Or32
, mkexpr(t2
), mkexpr(t3
)) ),
12361 binop(Iop_Or32
, mkexpr(t4
), mkexpr(t5
)),
12362 binop(Iop_Or32
, mkexpr(t6
), mkexpr(t7
)) ) ) );
12363 DIP("vmovmskps %s,%s\n", nameYMMReg(rE
), nameIReg32(rG
));
12368 static Long
dis_MOVMSKPD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
12369 Long delta
, Bool isAvx
)
12371 UChar modrm
= getUChar(delta
);
12372 UInt rG
= gregOfRexRM(pfx
,modrm
);
12373 UInt rE
= eregOfRexRM(pfx
,modrm
);
12374 IRTemp t0
= newTemp(Ity_I32
);
12375 IRTemp t1
= newTemp(Ity_I32
);
12377 assign( t0
, binop( Iop_And32
,
12378 binop(Iop_Shr32
, getXMMRegLane32(rE
,1), mkU8(31)),
12380 assign( t1
, binop( Iop_And32
,
12381 binop(Iop_Shr32
, getXMMRegLane32(rE
,3), mkU8(30)),
12383 putIReg32( rG
, binop(Iop_Or32
, mkexpr(t0
), mkexpr(t1
) ) );
12384 DIP("%smovmskpd %s,%s\n", isAvx
? "v" : "",
12385 nameXMMReg(rE
), nameIReg32(rG
));
12390 static Long
dis_MOVMSKPD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
)
12392 UChar modrm
= getUChar(delta
);
12393 UInt rG
= gregOfRexRM(pfx
,modrm
);
12394 UInt rE
= eregOfRexRM(pfx
,modrm
);
12395 IRTemp t0
= newTemp(Ity_I32
);
12396 IRTemp t1
= newTemp(Ity_I32
);
12397 IRTemp t2
= newTemp(Ity_I32
);
12398 IRTemp t3
= newTemp(Ity_I32
);
12400 assign( t0
, binop( Iop_And32
,
12401 binop(Iop_Shr32
, getYMMRegLane32(rE
,1), mkU8(31)),
12403 assign( t1
, binop( Iop_And32
,
12404 binop(Iop_Shr32
, getYMMRegLane32(rE
,3), mkU8(30)),
12406 assign( t2
, binop( Iop_And32
,
12407 binop(Iop_Shr32
, getYMMRegLane32(rE
,5), mkU8(29)),
12409 assign( t3
, binop( Iop_And32
,
12410 binop(Iop_Shr32
, getYMMRegLane32(rE
,7), mkU8(28)),
12412 putIReg32( rG
, binop(Iop_Or32
,
12413 binop(Iop_Or32
, mkexpr(t0
), mkexpr(t1
)),
12414 binop(Iop_Or32
, mkexpr(t2
), mkexpr(t3
)) ) );
12415 DIP("vmovmskps %s,%s\n", nameYMMReg(rE
), nameIReg32(rG
));
12420 /* Note, this also handles SSE(1) insns. */
12421 __attribute__((noinline
))
12423 Long
dis_ESC_0F__SSE2 ( Bool
* decode_OK
,
12424 const VexArchInfo
* archinfo
,
12425 const VexAbiInfo
* vbi
,
12426 Prefix pfx
, Int sz
, Long deltaIN
,
12429 IRTemp addr
= IRTemp_INVALID
;
12430 IRTemp t0
= IRTemp_INVALID
;
12431 IRTemp t1
= IRTemp_INVALID
;
12432 IRTemp t2
= IRTemp_INVALID
;
12433 IRTemp t3
= IRTemp_INVALID
;
12434 IRTemp t4
= IRTemp_INVALID
;
12435 IRTemp t5
= IRTemp_INVALID
;
12436 IRTemp t6
= IRTemp_INVALID
;
12441 *decode_OK
= False
;
12443 Long delta
= deltaIN
;
12444 UChar opc
= getUChar(delta
);
12449 if (have66noF2noF3(pfx
)
12450 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12451 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
12452 modrm
= getUChar(delta
);
12453 if (epartIsReg(modrm
)) {
12454 putXMMReg( gregOfRexRM(pfx
,modrm
),
12455 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
12456 DIP("movupd %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12457 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12460 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12461 putXMMReg( gregOfRexRM(pfx
,modrm
),
12462 loadLE(Ity_V128
, mkexpr(addr
)) );
12463 DIP("movupd %s,%s\n", dis_buf
,
12464 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12467 goto decode_success
;
12469 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
12470 G (lo half xmm). If E is mem, upper half of G is zeroed out.
12471 If E is reg, upper half of G is unchanged. */
12472 if (haveF2no66noF3(pfx
)
12473 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8) ) {
12474 modrm
= getUChar(delta
);
12475 if (epartIsReg(modrm
)) {
12476 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0,
12477 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0 ));
12478 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12479 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12482 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12483 putXMMReg( gregOfRexRM(pfx
,modrm
), mkV128(0) );
12484 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0,
12485 loadLE(Ity_I64
, mkexpr(addr
)) );
12486 DIP("movsd %s,%s\n", dis_buf
,
12487 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12490 goto decode_success
;
12492 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
12493 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
12494 if (haveF3no66noF2(pfx
)
12495 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12496 modrm
= getUChar(delta
);
12497 if (epartIsReg(modrm
)) {
12498 putXMMRegLane32( gregOfRexRM(pfx
,modrm
), 0,
12499 getXMMRegLane32( eregOfRexRM(pfx
,modrm
), 0 ));
12500 DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12501 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12504 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12505 putXMMReg( gregOfRexRM(pfx
,modrm
), mkV128(0) );
12506 putXMMRegLane32( gregOfRexRM(pfx
,modrm
), 0,
12507 loadLE(Ity_I32
, mkexpr(addr
)) );
12508 DIP("movss %s,%s\n", dis_buf
,
12509 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12512 goto decode_success
;
12514 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
12515 if (haveNo66noF2noF3(pfx
)
12516 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12517 modrm
= getUChar(delta
);
12518 if (epartIsReg(modrm
)) {
12519 putXMMReg( gregOfRexRM(pfx
,modrm
),
12520 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
12521 DIP("movups %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12522 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12525 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12526 putXMMReg( gregOfRexRM(pfx
,modrm
),
12527 loadLE(Ity_V128
, mkexpr(addr
)) );
12528 DIP("movups %s,%s\n", dis_buf
,
12529 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12532 goto decode_success
;
12537 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
12538 or lo half xmm). */
12539 if (haveF2no66noF3(pfx
)
12540 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12541 modrm
= getUChar(delta
);
12542 if (epartIsReg(modrm
)) {
12543 putXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0,
12544 getXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0 ));
12545 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12546 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
12549 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12550 storeLE( mkexpr(addr
),
12551 getXMMRegLane64(gregOfRexRM(pfx
,modrm
), 0) );
12552 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12556 goto decode_success
;
12558 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
12560 if (haveF3no66noF2(pfx
) && sz
== 4) {
12561 modrm
= getUChar(delta
);
12562 if (epartIsReg(modrm
)) {
12563 /* fall through, we don't yet have a test case */
12565 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12566 storeLE( mkexpr(addr
),
12567 getXMMRegLane32(gregOfRexRM(pfx
,modrm
), 0) );
12568 DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12571 goto decode_success
;
12574 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
12575 if (have66noF2noF3(pfx
)
12576 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12577 modrm
= getUChar(delta
);
12578 if (epartIsReg(modrm
)) {
12579 putXMMReg( eregOfRexRM(pfx
,modrm
),
12580 getXMMReg( gregOfRexRM(pfx
,modrm
) ) );
12581 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12582 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
12585 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12586 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
12587 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12591 goto decode_success
;
12593 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
12594 if (haveNo66noF2noF3(pfx
)
12595 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12596 modrm
= getUChar(delta
);
12597 if (epartIsReg(modrm
)) {
12598 /* fall through; awaiting test case */
12600 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12601 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
12602 DIP("movups %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12605 goto decode_success
;
12611 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
12612 /* Identical to MOVLPS ? */
12613 if (have66noF2noF3(pfx
)
12614 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12615 modrm
= getUChar(delta
);
12616 if (epartIsReg(modrm
)) {
12617 /* fall through; apparently reg-reg is not possible */
12619 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12621 putXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12623 loadLE(Ity_I64
, mkexpr(addr
)) );
12624 DIP("movlpd %s, %s\n",
12625 dis_buf
, nameXMMReg( gregOfRexRM(pfx
,modrm
) ));
12626 goto decode_success
;
12629 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
12630 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
12631 if (haveNo66noF2noF3(pfx
)
12632 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12633 modrm
= getUChar(delta
);
12634 if (epartIsReg(modrm
)) {
12636 putXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12638 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 1 ));
12639 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12640 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12642 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12644 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0/*lower lane*/,
12645 loadLE(Ity_I64
, mkexpr(addr
)) );
12646 DIP("movlps %s, %s\n",
12647 dis_buf
, nameXMMReg( gregOfRexRM(pfx
,modrm
) ));
12649 goto decode_success
;
12654 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
12655 if (haveNo66noF2noF3(pfx
)
12656 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12657 modrm
= getUChar(delta
);
12658 if (!epartIsReg(modrm
)) {
12659 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12661 storeLE( mkexpr(addr
),
12662 getXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12663 0/*lower lane*/ ) );
12664 DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx
,modrm
) ),
12666 goto decode_success
;
12668 /* else fall through */
12670 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
12671 /* Identical to MOVLPS ? */
12672 if (have66noF2noF3(pfx
)
12673 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12674 modrm
= getUChar(delta
);
12675 if (!epartIsReg(modrm
)) {
12676 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12678 storeLE( mkexpr(addr
),
12679 getXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12680 0/*lower lane*/ ) );
12681 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx
,modrm
) ),
12683 goto decode_success
;
12685 /* else fall through */
12691 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
12692 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
12693 /* These just appear to be special cases of SHUFPS */
12694 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
12695 Bool hi
= toBool(opc
== 0x15);
12696 IRTemp sV
= newTemp(Ity_V128
);
12697 IRTemp dV
= newTemp(Ity_V128
);
12698 modrm
= getUChar(delta
);
12699 UInt rG
= gregOfRexRM(pfx
,modrm
);
12700 assign( dV
, getXMMReg(rG
) );
12701 if (epartIsReg(modrm
)) {
12702 UInt rE
= eregOfRexRM(pfx
,modrm
);
12703 assign( sV
, getXMMReg(rE
) );
12705 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
12706 nameXMMReg(rE
), nameXMMReg(rG
));
12708 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12709 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12711 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
12712 dis_buf
, nameXMMReg(rG
));
12714 IRTemp res
= math_UNPCKxPS_128( sV
, dV
, hi
);
12715 putXMMReg( rG
, mkexpr(res
) );
12716 goto decode_success
;
12718 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
12719 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
12720 /* These just appear to be special cases of SHUFPS */
12721 if (have66noF2noF3(pfx
)
12722 && sz
== 2 /* could be 8 if rex also present */) {
12723 Bool hi
= toBool(opc
== 0x15);
12724 IRTemp sV
= newTemp(Ity_V128
);
12725 IRTemp dV
= newTemp(Ity_V128
);
12726 modrm
= getUChar(delta
);
12727 UInt rG
= gregOfRexRM(pfx
,modrm
);
12728 assign( dV
, getXMMReg(rG
) );
12729 if (epartIsReg(modrm
)) {
12730 UInt rE
= eregOfRexRM(pfx
,modrm
);
12731 assign( sV
, getXMMReg(rE
) );
12733 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
12734 nameXMMReg(rE
), nameXMMReg(rG
));
12736 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12737 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12739 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
12740 dis_buf
, nameXMMReg(rG
));
12742 IRTemp res
= math_UNPCKxPD_128( sV
, dV
, hi
);
12743 putXMMReg( rG
, mkexpr(res
) );
12744 goto decode_success
;
12749 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
12750 /* These seems identical to MOVHPS. This instruction encoding is
12751 completely crazy. */
12752 if (have66noF2noF3(pfx
)
12753 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12754 modrm
= getUChar(delta
);
12755 if (epartIsReg(modrm
)) {
12756 /* fall through; apparently reg-reg is not possible */
12758 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12760 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 1/*upper lane*/,
12761 loadLE(Ity_I64
, mkexpr(addr
)) );
12762 DIP("movhpd %s,%s\n", dis_buf
,
12763 nameXMMReg( gregOfRexRM(pfx
,modrm
) ));
12764 goto decode_success
;
12767 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
12768 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
12769 if (haveNo66noF2noF3(pfx
)
12770 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12771 modrm
= getUChar(delta
);
12772 if (epartIsReg(modrm
)) {
12774 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 1/*upper lane*/,
12775 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0 ) );
12776 DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12777 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12779 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12781 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 1/*upper lane*/,
12782 loadLE(Ity_I64
, mkexpr(addr
)) );
12783 DIP("movhps %s,%s\n", dis_buf
,
12784 nameXMMReg( gregOfRexRM(pfx
,modrm
) ));
12786 goto decode_success
;
12791 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
12792 if (haveNo66noF2noF3(pfx
)
12793 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12794 modrm
= getUChar(delta
);
12795 if (!epartIsReg(modrm
)) {
12796 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12798 storeLE( mkexpr(addr
),
12799 getXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12800 1/*upper lane*/ ) );
12801 DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx
,modrm
) ),
12803 goto decode_success
;
12805 /* else fall through */
12807 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
12808 /* Again, this seems identical to MOVHPS. */
12809 if (have66noF2noF3(pfx
)
12810 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12811 modrm
= getUChar(delta
);
12812 if (!epartIsReg(modrm
)) {
12813 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12815 storeLE( mkexpr(addr
),
12816 getXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12817 1/*upper lane*/ ) );
12818 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx
,modrm
) ),
12820 goto decode_success
;
12822 /* else fall through */
12827 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
12828 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
12829 /* 0F 18 /2 = PREFETCH1 */
12830 /* 0F 18 /3 = PREFETCH2 */
12831 if (haveNo66noF2noF3(pfx
)
12832 && !epartIsReg(getUChar(delta
))
12833 && gregLO3ofRM(getUChar(delta
)) >= 0
12834 && gregLO3ofRM(getUChar(delta
)) <= 3) {
12835 const HChar
* hintstr
= "??";
12837 modrm
= getUChar(delta
);
12838 vassert(!epartIsReg(modrm
));
12840 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12843 switch (gregLO3ofRM(modrm
)) {
12844 case 0: hintstr
= "nta"; break;
12845 case 1: hintstr
= "t0"; break;
12846 case 2: hintstr
= "t1"; break;
12847 case 3: hintstr
= "t2"; break;
12848 default: vassert(0);
12851 DIP("prefetch%s %s\n", hintstr
, dis_buf
);
12852 goto decode_success
;
12857 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
12858 if (have66noF2noF3(pfx
)
12859 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12860 modrm
= getUChar(delta
);
12861 if (epartIsReg(modrm
)) {
12862 putXMMReg( gregOfRexRM(pfx
,modrm
),
12863 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
12864 DIP("movapd %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12865 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12868 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12869 gen_SEGV_if_not_16_aligned( addr
);
12870 putXMMReg( gregOfRexRM(pfx
,modrm
),
12871 loadLE(Ity_V128
, mkexpr(addr
)) );
12872 DIP("movapd %s,%s\n", dis_buf
,
12873 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12876 goto decode_success
;
12878 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
12879 if (haveNo66noF2noF3(pfx
)
12880 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12881 modrm
= getUChar(delta
);
12882 if (epartIsReg(modrm
)) {
12883 putXMMReg( gregOfRexRM(pfx
,modrm
),
12884 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
12885 DIP("movaps %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12886 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12889 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12890 gen_SEGV_if_not_16_aligned( addr
);
12891 putXMMReg( gregOfRexRM(pfx
,modrm
),
12892 loadLE(Ity_V128
, mkexpr(addr
)) );
12893 DIP("movaps %s,%s\n", dis_buf
,
12894 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12897 goto decode_success
;
12902 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
12903 if (haveNo66noF2noF3(pfx
)
12904 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12905 modrm
= getUChar(delta
);
12906 if (epartIsReg(modrm
)) {
12907 putXMMReg( eregOfRexRM(pfx
,modrm
),
12908 getXMMReg( gregOfRexRM(pfx
,modrm
) ));
12909 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12910 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
12913 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12914 gen_SEGV_if_not_16_aligned( addr
);
12915 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
12916 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12920 goto decode_success
;
12922 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
12923 if (have66noF2noF3(pfx
)
12924 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12925 modrm
= getUChar(delta
);
12926 if (epartIsReg(modrm
)) {
12927 putXMMReg( eregOfRexRM(pfx
,modrm
),
12928 getXMMReg( gregOfRexRM(pfx
,modrm
) ) );
12929 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12930 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
12933 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12934 gen_SEGV_if_not_16_aligned( addr
);
12935 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
12936 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12940 goto decode_success
;
12945 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
12947 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
12948 IRTemp arg64
= newTemp(Ity_I64
);
12949 IRTemp rmode
= newTemp(Ity_I32
);
12951 modrm
= getUChar(delta
);
12952 if (epartIsReg(modrm
)) {
12953 /* Only switch to MMX mode if the source is a MMX register.
12954 See comments on CVTPI2PD for details. Fixes #357059. */
12956 assign( arg64
, getMMXReg(eregLO3ofRM(modrm
)) );
12958 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
12959 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12961 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12962 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
12964 DIP("cvtpi2ps %s,%s\n", dis_buf
,
12965 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
12968 assign( rmode
, get_sse_roundingmode() );
12971 gregOfRexRM(pfx
,modrm
), 0,
12972 binop(Iop_F64toF32
,
12974 unop(Iop_I32StoF64
,
12975 unop(Iop_64to32
, mkexpr(arg64
)) )) );
12978 gregOfRexRM(pfx
,modrm
), 1,
12979 binop(Iop_F64toF32
,
12981 unop(Iop_I32StoF64
,
12982 unop(Iop_64HIto32
, mkexpr(arg64
)) )) );
12984 goto decode_success
;
12986 /* F3 0F 2A = CVTSI2SS
12987 -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm
12988 -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */
12989 if (haveF3no66noF2(pfx
) && (sz
== 4 || sz
== 8)) {
12990 IRTemp rmode
= newTemp(Ity_I32
);
12991 assign( rmode
, get_sse_roundingmode() );
12992 modrm
= getUChar(delta
);
12994 IRTemp arg32
= newTemp(Ity_I32
);
12995 if (epartIsReg(modrm
)) {
12996 assign( arg32
, getIReg32(eregOfRexRM(pfx
,modrm
)) );
12998 DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx
,modrm
)),
12999 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13001 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13002 assign( arg32
, loadLE(Ity_I32
, mkexpr(addr
)) );
13004 DIP("cvtsi2ss %s,%s\n", dis_buf
,
13005 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
13008 gregOfRexRM(pfx
,modrm
), 0,
13009 binop(Iop_F64toF32
,
13011 unop(Iop_I32StoF64
, mkexpr(arg32
)) ) );
13014 IRTemp arg64
= newTemp(Ity_I64
);
13015 if (epartIsReg(modrm
)) {
13016 assign( arg64
, getIReg64(eregOfRexRM(pfx
,modrm
)) );
13018 DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx
,modrm
)),
13019 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13021 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13022 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
13024 DIP("cvtsi2ssq %s,%s\n", dis_buf
,
13025 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
13028 gregOfRexRM(pfx
,modrm
), 0,
13029 binop(Iop_F64toF32
,
13031 binop(Iop_I64StoF64
, mkexpr(rmode
), mkexpr(arg64
)) ) );
13033 goto decode_success
;
13035 /* F2 0F 2A = CVTSI2SD
13036 when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm
13037 when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm
13039 if (haveF2no66noF3(pfx
) && (sz
== 4 || sz
== 8)) {
13040 modrm
= getUChar(delta
);
13042 IRTemp arg32
= newTemp(Ity_I32
);
13043 if (epartIsReg(modrm
)) {
13044 assign( arg32
, getIReg32(eregOfRexRM(pfx
,modrm
)) );
13046 DIP("cvtsi2sdl %s,%s\n", nameIReg32(eregOfRexRM(pfx
,modrm
)),
13047 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13049 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13050 assign( arg32
, loadLE(Ity_I32
, mkexpr(addr
)) );
13052 DIP("cvtsi2sdl %s,%s\n", dis_buf
,
13053 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
13055 putXMMRegLane64F( gregOfRexRM(pfx
,modrm
), 0,
13056 unop(Iop_I32StoF64
, mkexpr(arg32
))
13060 IRTemp arg64
= newTemp(Ity_I64
);
13061 if (epartIsReg(modrm
)) {
13062 assign( arg64
, getIReg64(eregOfRexRM(pfx
,modrm
)) );
13064 DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx
,modrm
)),
13065 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13067 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13068 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
13070 DIP("cvtsi2sdq %s,%s\n", dis_buf
,
13071 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
13074 gregOfRexRM(pfx
,modrm
),
13076 binop( Iop_I64StoF64
,
13077 get_sse_roundingmode(),
13082 goto decode_success
;
13084 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
13086 if (have66noF2noF3(pfx
) && sz
== 2) {
13087 IRTemp arg64
= newTemp(Ity_I64
);
13089 modrm
= getUChar(delta
);
13090 if (epartIsReg(modrm
)) {
13091 /* Only switch to MMX mode if the source is a MMX register.
13092 This is inconsistent with all other instructions which
13093 convert between XMM and (M64 or MMX), which always switch
13094 to MMX mode even if 64-bit operand is M64 and not MMX. At
13095 least, that's what the Intel docs seem to me to say.
13098 assign( arg64
, getMMXReg(eregLO3ofRM(modrm
)) );
13100 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
13101 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13103 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13104 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
13106 DIP("cvtpi2pd %s,%s\n", dis_buf
,
13107 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
13111 gregOfRexRM(pfx
,modrm
), 0,
13112 unop(Iop_I32StoF64
, unop(Iop_64to32
, mkexpr(arg64
)) )
13116 gregOfRexRM(pfx
,modrm
), 1,
13117 unop(Iop_I32StoF64
, unop(Iop_64HIto32
, mkexpr(arg64
)) )
13120 goto decode_success
;
13125 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
13126 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
13127 if ( (haveNo66noF2noF3(pfx
) && sz
== 4)
13128 || (have66noF2noF3(pfx
) && sz
== 2) ) {
13129 modrm
= getUChar(delta
);
13130 if (!epartIsReg(modrm
)) {
13131 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13132 gen_SEGV_if_not_16_aligned( addr
);
13133 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
13134 DIP("movntp%s %s,%s\n", sz
==2 ? "d" : "s",
13136 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13138 goto decode_success
;
13140 /* else fall through */
13146 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
13147 I32 in mmx, according to prevailing SSE rounding mode */
13148 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
13149 I32 in mmx, rounding towards zero */
13150 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13151 IRTemp dst64
= newTemp(Ity_I64
);
13152 IRTemp rmode
= newTemp(Ity_I32
);
13153 IRTemp f32lo
= newTemp(Ity_F32
);
13154 IRTemp f32hi
= newTemp(Ity_F32
);
13155 Bool r2zero
= toBool(opc
== 0x2C);
13158 modrm
= getUChar(delta
);
13160 if (epartIsReg(modrm
)) {
13162 assign(f32lo
, getXMMRegLane32F(eregOfRexRM(pfx
,modrm
), 0));
13163 assign(f32hi
, getXMMRegLane32F(eregOfRexRM(pfx
,modrm
), 1));
13164 DIP("cvt%sps2pi %s,%s\n", r2zero
? "t" : "",
13165 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13166 nameMMXReg(gregLO3ofRM(modrm
)));
13168 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13169 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)));
13170 assign(f32hi
, loadLE(Ity_F32
, binop( Iop_Add64
,
13174 DIP("cvt%sps2pi %s,%s\n", r2zero
? "t" : "",
13176 nameMMXReg(gregLO3ofRM(modrm
)));
13180 assign(rmode
, mkU32((UInt
)Irrm_ZERO
) );
13182 assign( rmode
, get_sse_roundingmode() );
13187 binop( Iop_32HLto64
,
13188 binop( Iop_F64toI32S
,
13190 unop( Iop_F32toF64
, mkexpr(f32hi
) ) ),
13191 binop( Iop_F64toI32S
,
13193 unop( Iop_F32toF64
, mkexpr(f32lo
) ) )
13197 putMMXReg(gregLO3ofRM(modrm
), mkexpr(dst64
));
13198 goto decode_success
;
13200 /* F3 0F 2D = CVTSS2SI
13201 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
13202 according to prevailing SSE rounding mode
13203 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
13204 according to prevailing SSE rounding mode
13206 /* F3 0F 2C = CVTTSS2SI
13207 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
13208 truncating towards zero
13209 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
13210 truncating towards zero
13212 if (haveF3no66noF2(pfx
) && (sz
== 4 || sz
== 8)) {
13213 delta
= dis_CVTxSS2SI( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
, sz
);
13214 goto decode_success
;
13216 /* F2 0F 2D = CVTSD2SI
13217 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
13218 according to prevailing SSE rounding mode
13219 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
13220 according to prevailing SSE rounding mode
13222 /* F2 0F 2C = CVTTSD2SI
13223 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
13224 truncating towards zero
13225 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
13226 truncating towards zero
13228 if (haveF2no66noF3(pfx
) && (sz
== 4 || sz
== 8)) {
13229 delta
= dis_CVTxSD2SI( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
, sz
);
13230 goto decode_success
;
13232 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
13233 I32 in mmx, according to prevailing SSE rounding mode */
13234 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
13235 I32 in mmx, rounding towards zero */
13236 if (have66noF2noF3(pfx
) && sz
== 2) {
13237 IRTemp dst64
= newTemp(Ity_I64
);
13238 IRTemp rmode
= newTemp(Ity_I32
);
13239 IRTemp f64lo
= newTemp(Ity_F64
);
13240 IRTemp f64hi
= newTemp(Ity_F64
);
13241 Bool r2zero
= toBool(opc
== 0x2C);
13244 modrm
= getUChar(delta
);
13246 if (epartIsReg(modrm
)) {
13248 assign(f64lo
, getXMMRegLane64F(eregOfRexRM(pfx
,modrm
), 0));
13249 assign(f64hi
, getXMMRegLane64F(eregOfRexRM(pfx
,modrm
), 1));
13250 DIP("cvt%spd2pi %s,%s\n", r2zero
? "t" : "",
13251 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13252 nameMMXReg(gregLO3ofRM(modrm
)));
13254 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13255 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)));
13256 assign(f64hi
, loadLE(Ity_F64
, binop( Iop_Add64
,
13260 DIP("cvt%spf2pi %s,%s\n", r2zero
? "t" : "",
13262 nameMMXReg(gregLO3ofRM(modrm
)));
13266 assign(rmode
, mkU32((UInt
)Irrm_ZERO
) );
13268 assign( rmode
, get_sse_roundingmode() );
13273 binop( Iop_32HLto64
,
13274 binop( Iop_F64toI32S
, mkexpr(rmode
), mkexpr(f64hi
) ),
13275 binop( Iop_F64toI32S
, mkexpr(rmode
), mkexpr(f64lo
) )
13279 putMMXReg(gregLO3ofRM(modrm
), mkexpr(dst64
));
13280 goto decode_success
;
13286 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
13287 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
13288 if (have66noF2noF3(pfx
) && sz
== 2) {
13289 delta
= dis_COMISD( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
);
13290 goto decode_success
;
13292 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
13293 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
13294 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13295 delta
= dis_COMISS( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
);
13296 goto decode_success
;
13301 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
13302 to 4 lowest bits of ireg(G) */
13303 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)
13304 && epartIsReg(getUChar(delta
))) {
13305 /* sz == 8 is a kludge to handle insns with REX.W redundantly
13306 set to 1, which has been known to happen:
13308 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d
13310 20071106: Intel docs say that REX.W isn't redundant: when
13311 present, a 64-bit register is written; when not present, only
13312 the 32-bit half is written. However, testing on a Core2
13313 machine suggests the entire 64 bit register is written
13314 irrespective of the status of REX.W. That could be because
13315 of the default rule that says "if the lower half of a 32-bit
13316 register is written, the upper half is zeroed". By using
13317 putIReg32 here we inadvertantly produce the same behaviour as
13318 the Core2, for the same reason -- putIReg32 implements said
13321 AMD docs give no indication that REX.W is even valid for this
13323 delta
= dis_MOVMSKPS_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
13324 goto decode_success
;
13326 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
13327 2 lowest bits of ireg(G) */
13328 if (have66noF2noF3(pfx
) && (sz
== 2 || sz
== 8)) {
13329 /* sz == 8 is a kludge to handle insns with REX.W redundantly
13330 set to 1, which has been known to happen:
13331 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d
13332 20071106: see further comments on MOVMSKPS implementation above.
13334 delta
= dis_MOVMSKPD_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
13335 goto decode_success
;
13340 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
13341 if (haveF3no66noF2(pfx
) && sz
== 4) {
13342 delta
= dis_SSE_E_to_G_unary_lo32( vbi
, pfx
, delta
,
13343 "sqrtss", Iop_Sqrt32F0x4
);
13344 goto decode_success
;
13346 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
13347 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13348 delta
= dis_SSE_E_to_G_unary_all( vbi
, pfx
, delta
,
13349 "sqrtps", Iop_Sqrt32Fx4
);
13350 goto decode_success
;
13352 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
13353 if (haveF2no66noF3(pfx
) && sz
== 4) {
13354 delta
= dis_SSE_E_to_G_unary_lo64( vbi
, pfx
, delta
,
13355 "sqrtsd", Iop_Sqrt64F0x2
);
13356 goto decode_success
;
13358 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
13359 if (have66noF2noF3(pfx
) && sz
== 2) {
13360 delta
= dis_SSE_E_to_G_unary_all( vbi
, pfx
, delta
,
13361 "sqrtpd", Iop_Sqrt64Fx2
);
13362 goto decode_success
;
13367 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
13368 if (haveF3no66noF2(pfx
) && sz
== 4) {
13369 delta
= dis_SSE_E_to_G_unary_lo32( vbi
, pfx
, delta
,
13370 "rsqrtss", Iop_RSqrtEst32F0x4
);
13371 goto decode_success
;
13373 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
13374 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13375 delta
= dis_SSE_E_to_G_unary_all( vbi
, pfx
, delta
,
13376 "rsqrtps", Iop_RSqrtEst32Fx4
);
13377 goto decode_success
;
13382 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
13383 if (haveF3no66noF2(pfx
) && sz
== 4) {
13384 delta
= dis_SSE_E_to_G_unary_lo32( vbi
, pfx
, delta
,
13385 "rcpss", Iop_RecipEst32F0x4
);
13386 goto decode_success
;
13388 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
13389 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13390 delta
= dis_SSE_E_to_G_unary_all( vbi
, pfx
, delta
,
13391 "rcpps", Iop_RecipEst32Fx4
);
13392 goto decode_success
;
13397 /* 0F 54 = ANDPS -- G = G and E */
13398 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13399 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "andps", Iop_AndV128
);
13400 goto decode_success
;
13402 /* 66 0F 54 = ANDPD -- G = G and E */
13403 if (have66noF2noF3(pfx
) && sz
== 2) {
13404 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "andpd", Iop_AndV128
);
13405 goto decode_success
;
13410 /* 0F 55 = ANDNPS -- G = (not G) and E */
13411 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13412 delta
= dis_SSE_E_to_G_all_invG( vbi
, pfx
, delta
, "andnps",
13414 goto decode_success
;
13416 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
13417 if (have66noF2noF3(pfx
) && sz
== 2) {
13418 delta
= dis_SSE_E_to_G_all_invG( vbi
, pfx
, delta
, "andnpd",
13420 goto decode_success
;
13425 /* 0F 56 = ORPS -- G = G and E */
13426 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13427 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "orps", Iop_OrV128
);
13428 goto decode_success
;
13430 /* 66 0F 56 = ORPD -- G = G and E */
13431 if (have66noF2noF3(pfx
) && sz
== 2) {
13432 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "orpd", Iop_OrV128
);
13433 goto decode_success
;
13438 /* 66 0F 57 = XORPD -- G = G xor E */
13439 if (have66noF2noF3(pfx
) && sz
== 2) {
13440 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "xorpd", Iop_XorV128
);
13441 goto decode_success
;
13443 /* 0F 57 = XORPS -- G = G xor E */
13444 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13445 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "xorps", Iop_XorV128
);
13446 goto decode_success
;
13451 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
13452 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13453 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "addps", Iop_Add32Fx4
);
13454 goto decode_success
;
13456 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
13457 if (haveF3no66noF2(pfx
) && sz
== 4) {
13458 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "addss", Iop_Add32F0x4
);
13459 goto decode_success
;
13461 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
13462 if (haveF2no66noF3(pfx
)
13463 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13464 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "addsd", Iop_Add64F0x2
);
13465 goto decode_success
;
13467 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
13468 if (have66noF2noF3(pfx
)
13469 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
13470 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "addpd", Iop_Add64Fx2
);
13471 goto decode_success
;
13476 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
13477 if (haveF2no66noF3(pfx
)
13478 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13479 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "mulsd", Iop_Mul64F0x2
);
13480 goto decode_success
;
13482 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
13483 if (haveF3no66noF2(pfx
) && sz
== 4) {
13484 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "mulss", Iop_Mul32F0x4
);
13485 goto decode_success
;
13487 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
13488 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13489 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "mulps", Iop_Mul32Fx4
);
13490 goto decode_success
;
13492 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
13493 if (have66noF2noF3(pfx
)
13494 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
13495 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "mulpd", Iop_Mul64Fx2
);
13496 goto decode_success
;
13501 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
13503 if (haveNo66noF2noF3(pfx
)
13504 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13505 delta
= dis_CVTPS2PD_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
13506 goto decode_success
;
13508 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
13510 if (haveF3no66noF2(pfx
) && sz
== 4) {
13511 IRTemp f32lo
= newTemp(Ity_F32
);
13513 modrm
= getUChar(delta
);
13514 if (epartIsReg(modrm
)) {
13516 assign(f32lo
, getXMMRegLane32F(eregOfRexRM(pfx
,modrm
), 0));
13517 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13518 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13520 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13521 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)));
13523 DIP("cvtss2sd %s,%s\n", dis_buf
,
13524 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13527 putXMMRegLane64F( gregOfRexRM(pfx
,modrm
), 0,
13528 unop( Iop_F32toF64
, mkexpr(f32lo
) ) );
13530 goto decode_success
;
13532 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
13533 low 1/4 xmm(G), according to prevailing SSE rounding mode */
13534 if (haveF2no66noF3(pfx
) && sz
== 4) {
13535 IRTemp rmode
= newTemp(Ity_I32
);
13536 IRTemp f64lo
= newTemp(Ity_F64
);
13538 modrm
= getUChar(delta
);
13539 if (epartIsReg(modrm
)) {
13541 assign(f64lo
, getXMMRegLane64F(eregOfRexRM(pfx
,modrm
), 0));
13542 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13543 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13545 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13546 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)));
13548 DIP("cvtsd2ss %s,%s\n", dis_buf
,
13549 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13552 assign( rmode
, get_sse_roundingmode() );
13554 gregOfRexRM(pfx
,modrm
), 0,
13555 binop( Iop_F64toF32
, mkexpr(rmode
), mkexpr(f64lo
) )
13558 goto decode_success
;
13560 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
13561 lo half xmm(G), rounding according to prevailing SSE rounding
13562 mode, and zero upper half */
13563 /* Note, this is practically identical to CVTPD2DQ. It would have
13564 be nice to merge them together. */
13565 if (have66noF2noF3(pfx
) && sz
== 2) {
13566 delta
= dis_CVTPD2PS_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
13567 goto decode_success
;
13572 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
13573 xmm(G), rounding towards zero */
13574 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
13575 xmm(G), as per the prevailing rounding mode */
13576 if ( (have66noF2noF3(pfx
) && sz
== 2)
13577 || (haveF3no66noF2(pfx
) && sz
== 4) ) {
13578 Bool r2zero
= toBool(sz
== 4); // FIXME -- unreliable (???)
13579 delta
= dis_CVTxPS2DQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/, r2zero
);
13580 goto decode_success
;
13582 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
13584 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13585 delta
= dis_CVTDQ2PS_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
13586 goto decode_success
;
13591 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
13592 if (haveF3no66noF2(pfx
) && sz
== 4) {
13593 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "subss", Iop_Sub32F0x4
);
13594 goto decode_success
;
13596 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
13597 if (haveF2no66noF3(pfx
)
13598 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13599 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "subsd", Iop_Sub64F0x2
);
13600 goto decode_success
;
13602 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
13603 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13604 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "subps", Iop_Sub32Fx4
);
13605 goto decode_success
;
13607 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
13608 if (have66noF2noF3(pfx
) && sz
== 2) {
13609 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "subpd", Iop_Sub64Fx2
);
13610 goto decode_success
;
13615 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
13616 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13617 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "minps", Iop_Min32Fx4
);
13618 goto decode_success
;
13620 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
13621 if (haveF3no66noF2(pfx
) && sz
== 4) {
13622 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "minss", Iop_Min32F0x4
);
13623 goto decode_success
;
13625 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
13626 if (haveF2no66noF3(pfx
)
13627 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13628 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "minsd", Iop_Min64F0x2
);
13629 goto decode_success
;
13631 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
13632 if (have66noF2noF3(pfx
) && sz
== 2) {
13633 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "minpd", Iop_Min64Fx2
);
13634 goto decode_success
;
13639 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
13640 if (haveF2no66noF3(pfx
) && sz
== 4) {
13641 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "divsd", Iop_Div64F0x2
);
13642 goto decode_success
;
13644 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
13645 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13646 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "divps", Iop_Div32Fx4
);
13647 goto decode_success
;
13649 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
13650 if (haveF3no66noF2(pfx
) && sz
== 4) {
13651 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "divss", Iop_Div32F0x4
);
13652 goto decode_success
;
13654 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
13655 if (have66noF2noF3(pfx
) && sz
== 2) {
13656 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "divpd", Iop_Div64Fx2
);
13657 goto decode_success
;
13662 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
13663 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13664 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "maxps", Iop_Max32Fx4
);
13665 goto decode_success
;
13667 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
13668 if (haveF3no66noF2(pfx
) && sz
== 4) {
13669 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "maxss", Iop_Max32F0x4
);
13670 goto decode_success
;
13672 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
13673 if (haveF2no66noF3(pfx
)
13674 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13675 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "maxsd", Iop_Max64F0x2
);
13676 goto decode_success
;
13678 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
13679 if (have66noF2noF3(pfx
) && sz
== 2) {
13680 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "maxpd", Iop_Max64Fx2
);
13681 goto decode_success
;
13686 /* 66 0F 60 = PUNPCKLBW */
13687 if (have66noF2noF3(pfx
) && sz
== 2) {
13688 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13690 Iop_InterleaveLO8x16
, True
);
13691 goto decode_success
;
13696 /* 66 0F 61 = PUNPCKLWD */
13697 if (have66noF2noF3(pfx
) && sz
== 2) {
13698 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13700 Iop_InterleaveLO16x8
, True
);
13701 goto decode_success
;
13706 /* 66 0F 62 = PUNPCKLDQ */
13707 if (have66noF2noF3(pfx
) && sz
== 2) {
13708 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13710 Iop_InterleaveLO32x4
, True
);
13711 goto decode_success
;
13716 /* 66 0F 63 = PACKSSWB */
13717 if (have66noF2noF3(pfx
) && sz
== 2) {
13718 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13720 Iop_QNarrowBin16Sto8Sx16
, True
);
13721 goto decode_success
;
13726 /* 66 0F 64 = PCMPGTB */
13727 if (have66noF2noF3(pfx
) && sz
== 2) {
13728 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13729 "pcmpgtb", Iop_CmpGT8Sx16
, False
);
13730 goto decode_success
;
13735 /* 66 0F 65 = PCMPGTW */
13736 if (have66noF2noF3(pfx
) && sz
== 2) {
13737 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13738 "pcmpgtw", Iop_CmpGT16Sx8
, False
);
13739 goto decode_success
;
13744 /* 66 0F 66 = PCMPGTD */
13745 if (have66noF2noF3(pfx
) && sz
== 2) {
13746 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13747 "pcmpgtd", Iop_CmpGT32Sx4
, False
);
13748 goto decode_success
;
13753 /* 66 0F 67 = PACKUSWB */
13754 if (have66noF2noF3(pfx
) && sz
== 2) {
13755 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13757 Iop_QNarrowBin16Sto8Ux16
, True
);
13758 goto decode_success
;
13763 /* 66 0F 68 = PUNPCKHBW */
13764 if (have66noF2noF3(pfx
) && sz
== 2) {
13765 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13767 Iop_InterleaveHI8x16
, True
);
13768 goto decode_success
;
13773 /* 66 0F 69 = PUNPCKHWD */
13774 if (have66noF2noF3(pfx
) && sz
== 2) {
13775 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13777 Iop_InterleaveHI16x8
, True
);
13778 goto decode_success
;
13783 /* 66 0F 6A = PUNPCKHDQ */
13784 if (have66noF2noF3(pfx
) && sz
== 2) {
13785 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13787 Iop_InterleaveHI32x4
, True
);
13788 goto decode_success
;
13793 /* 66 0F 6B = PACKSSDW */
13794 if (have66noF2noF3(pfx
) && sz
== 2) {
13795 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13797 Iop_QNarrowBin32Sto16Sx8
, True
);
13798 goto decode_success
;
13803 /* 66 0F 6C = PUNPCKLQDQ */
13804 if (have66noF2noF3(pfx
) && sz
== 2) {
13805 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13807 Iop_InterleaveLO64x2
, True
);
13808 goto decode_success
;
13813 /* 66 0F 6D = PUNPCKHQDQ */
13814 if (have66noF2noF3(pfx
) && sz
== 2) {
13815 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13817 Iop_InterleaveHI64x2
, True
);
13818 goto decode_success
;
13823 /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4,
13824 zeroing high 3/4 of xmm. */
13825 /* or from ireg64/m64 to xmm lo 1/2,
13826 zeroing high 1/2 of xmm. */
13827 if (have66noF2noF3(pfx
)) {
13828 vassert(sz
== 2 || sz
== 8);
13829 if (sz
== 2) sz
= 4;
13830 modrm
= getUChar(delta
);
13831 if (epartIsReg(modrm
)) {
13835 gregOfRexRM(pfx
,modrm
),
13836 unop( Iop_32UtoV128
, getIReg32(eregOfRexRM(pfx
,modrm
)) )
13838 DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx
,modrm
)),
13839 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13842 gregOfRexRM(pfx
,modrm
),
13843 unop( Iop_64UtoV128
, getIReg64(eregOfRexRM(pfx
,modrm
)) )
13845 DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx
,modrm
)),
13846 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13849 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13852 gregOfRexRM(pfx
,modrm
),
13854 ? unop( Iop_32UtoV128
,loadLE(Ity_I32
, mkexpr(addr
)) )
13855 : unop( Iop_64UtoV128
,loadLE(Ity_I64
, mkexpr(addr
)) )
13857 DIP("mov%c %s, %s\n", sz
== 4 ? 'd' : 'q', dis_buf
,
13858 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13860 goto decode_success
;
13865 if (have66noF2noF3(pfx
)
13866 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
13867 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
13868 modrm
= getUChar(delta
);
13869 if (epartIsReg(modrm
)) {
13870 putXMMReg( gregOfRexRM(pfx
,modrm
),
13871 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
13872 DIP("movdqa %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13873 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13876 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13877 gen_SEGV_if_not_16_aligned( addr
);
13878 putXMMReg( gregOfRexRM(pfx
,modrm
),
13879 loadLE(Ity_V128
, mkexpr(addr
)) );
13880 DIP("movdqa %s,%s\n", dis_buf
,
13881 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13884 goto decode_success
;
13886 if (haveF3no66noF2(pfx
) && sz
== 4) {
13887 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
13888 modrm
= getUChar(delta
);
13889 if (epartIsReg(modrm
)) {
13890 putXMMReg( gregOfRexRM(pfx
,modrm
),
13891 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
13892 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13893 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13896 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13897 putXMMReg( gregOfRexRM(pfx
,modrm
),
13898 loadLE(Ity_V128
, mkexpr(addr
)) );
13899 DIP("movdqu %s,%s\n", dis_buf
,
13900 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13903 goto decode_success
;
13908 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
13909 if (have66noF2noF3(pfx
) && sz
== 2) {
13910 delta
= dis_PSHUFD_32x4( vbi
, pfx
, delta
, False
/*!writesYmm*/);
13911 goto decode_success
;
13913 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13914 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
13915 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13917 IRTemp sV
, dV
, s3
, s2
, s1
, s0
;
13918 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
13919 sV
= newTemp(Ity_I64
);
13920 dV
= newTemp(Ity_I64
);
13922 modrm
= getUChar(delta
);
13923 if (epartIsReg(modrm
)) {
13924 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
13925 order
= (Int
)getUChar(delta
+1);
13927 DIP("pshufw $%d,%s,%s\n", order
,
13928 nameMMXReg(eregLO3ofRM(modrm
)),
13929 nameMMXReg(gregLO3ofRM(modrm
)));
13931 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
,
13932 1/*extra byte after amode*/ );
13933 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
13934 order
= (Int
)getUChar(delta
+alen
);
13936 DIP("pshufw $%d,%s,%s\n", order
,
13938 nameMMXReg(gregLO3ofRM(modrm
)));
13940 breakup64to16s( sV
, &s3
, &s2
, &s1
, &s0
);
13942 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
13944 mk64from16s( SEL((order
>>6)&3), SEL((order
>>4)&3),
13945 SEL((order
>>2)&3), SEL((order
>>0)&3) )
13947 putMMXReg(gregLO3ofRM(modrm
), mkexpr(dV
));
13949 goto decode_success
;
13951 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
13952 mem) to G(xmm), and copy upper half */
13953 if (haveF2no66noF3(pfx
) && sz
== 4) {
13954 delta
= dis_PSHUFxW_128( vbi
, pfx
, delta
,
13955 False
/*!isAvx*/, False
/*!xIsH*/ );
13956 goto decode_success
;
13958 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
13959 mem) to G(xmm), and copy lower half */
13960 if (haveF3no66noF2(pfx
) && sz
== 4) {
13961 delta
= dis_PSHUFxW_128( vbi
, pfx
, delta
,
13962 False
/*!isAvx*/, True
/*xIsH*/ );
13963 goto decode_success
;
13968 /* 66 0F 71 /2 ib = PSRLW by immediate */
13969 if (have66noF2noF3(pfx
) && sz
== 2
13970 && epartIsReg(getUChar(delta
))
13971 && gregLO3ofRM(getUChar(delta
)) == 2) {
13972 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psrlw", Iop_ShrN16x8
);
13973 goto decode_success
;
13975 /* 66 0F 71 /4 ib = PSRAW by immediate */
13976 if (have66noF2noF3(pfx
) && sz
== 2
13977 && epartIsReg(getUChar(delta
))
13978 && gregLO3ofRM(getUChar(delta
)) == 4) {
13979 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psraw", Iop_SarN16x8
);
13980 goto decode_success
;
13982 /* 66 0F 71 /6 ib = PSLLW by immediate */
13983 if (have66noF2noF3(pfx
) && sz
== 2
13984 && epartIsReg(getUChar(delta
))
13985 && gregLO3ofRM(getUChar(delta
)) == 6) {
13986 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psllw", Iop_ShlN16x8
);
13987 goto decode_success
;
13992 /* 66 0F 72 /2 ib = PSRLD by immediate */
13993 if (have66noF2noF3(pfx
) && sz
== 2
13994 && epartIsReg(getUChar(delta
))
13995 && gregLO3ofRM(getUChar(delta
)) == 2) {
13996 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psrld", Iop_ShrN32x4
);
13997 goto decode_success
;
13999 /* 66 0F 72 /4 ib = PSRAD by immediate */
14000 if (have66noF2noF3(pfx
) && sz
== 2
14001 && epartIsReg(getUChar(delta
))
14002 && gregLO3ofRM(getUChar(delta
)) == 4) {
14003 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psrad", Iop_SarN32x4
);
14004 goto decode_success
;
14006 /* 66 0F 72 /6 ib = PSLLD by immediate */
14007 if (have66noF2noF3(pfx
) && sz
== 2
14008 && epartIsReg(getUChar(delta
))
14009 && gregLO3ofRM(getUChar(delta
)) == 6) {
14010 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "pslld", Iop_ShlN32x4
);
14011 goto decode_success
;
14016 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
14017 /* note, if mem case ever filled in, 1 byte after amode */
14018 if (have66noF2noF3(pfx
) && sz
== 2
14019 && epartIsReg(getUChar(delta
))
14020 && gregLO3ofRM(getUChar(delta
)) == 3) {
14021 Int imm
= (Int
)getUChar(delta
+1);
14022 Int reg
= eregOfRexRM(pfx
,getUChar(delta
));
14023 DIP("psrldq $%d,%s\n", imm
, nameXMMReg(reg
));
14025 IRTemp sV
= newTemp(Ity_V128
);
14026 assign( sV
, getXMMReg(reg
) );
14027 putXMMReg(reg
, mkexpr(math_PSRLDQ( sV
, imm
)));
14028 goto decode_success
;
14030 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
14031 /* note, if mem case ever filled in, 1 byte after amode */
14032 if (have66noF2noF3(pfx
) && sz
== 2
14033 && epartIsReg(getUChar(delta
))
14034 && gregLO3ofRM(getUChar(delta
)) == 7) {
14035 Int imm
= (Int
)getUChar(delta
+1);
14036 Int reg
= eregOfRexRM(pfx
,getUChar(delta
));
14037 DIP("pslldq $%d,%s\n", imm
, nameXMMReg(reg
));
14038 vassert(imm
>= 0 && imm
<= 255);
14040 IRTemp sV
= newTemp(Ity_V128
);
14041 assign( sV
, getXMMReg(reg
) );
14042 putXMMReg(reg
, mkexpr(math_PSLLDQ( sV
, imm
)));
14043 goto decode_success
;
14045 /* 66 0F 73 /2 ib = PSRLQ by immediate */
14046 if (have66noF2noF3(pfx
) && sz
== 2
14047 && epartIsReg(getUChar(delta
))
14048 && gregLO3ofRM(getUChar(delta
)) == 2) {
14049 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psrlq", Iop_ShrN64x2
);
14050 goto decode_success
;
14052 /* 66 0F 73 /6 ib = PSLLQ by immediate */
14053 if (have66noF2noF3(pfx
) && sz
== 2
14054 && epartIsReg(getUChar(delta
))
14055 && gregLO3ofRM(getUChar(delta
)) == 6) {
14056 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psllq", Iop_ShlN64x2
);
14057 goto decode_success
;
14062 /* 66 0F 74 = PCMPEQB */
14063 if (have66noF2noF3(pfx
) && sz
== 2) {
14064 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14065 "pcmpeqb", Iop_CmpEQ8x16
, False
);
14066 goto decode_success
;
14071 /* 66 0F 75 = PCMPEQW */
14072 if (have66noF2noF3(pfx
) && sz
== 2) {
14073 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14074 "pcmpeqw", Iop_CmpEQ16x8
, False
);
14075 goto decode_success
;
14080 /* 66 0F 76 = PCMPEQD */
14081 if (have66noF2noF3(pfx
) && sz
== 2) {
14082 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14083 "pcmpeqd", Iop_CmpEQ32x4
, False
);
14084 goto decode_success
;
14089 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
14090 G (lo half xmm). Upper half of G is zeroed out. */
14091 if (haveF3no66noF2(pfx
)
14092 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
14093 modrm
= getUChar(delta
);
14094 if (epartIsReg(modrm
)) {
14095 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0,
14096 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0 ));
14097 /* zero bits 127:64 */
14098 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 1, mkU64(0) );
14099 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
14100 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14103 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14104 putXMMReg( gregOfRexRM(pfx
,modrm
), mkV128(0) );
14105 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0,
14106 loadLE(Ity_I64
, mkexpr(addr
)) );
14107 DIP("movsd %s,%s\n", dis_buf
,
14108 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14111 goto decode_success
;
14113 /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */
14114 /* or from xmm low 1/2 to ireg64 or m64. */
14115 if (have66noF2noF3(pfx
) && (sz
== 2 || sz
== 8)) {
14116 if (sz
== 2) sz
= 4;
14117 modrm
= getUChar(delta
);
14118 if (epartIsReg(modrm
)) {
14121 putIReg32( eregOfRexRM(pfx
,modrm
),
14122 getXMMRegLane32(gregOfRexRM(pfx
,modrm
), 0) );
14123 DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
14124 nameIReg32(eregOfRexRM(pfx
,modrm
)));
14126 putIReg64( eregOfRexRM(pfx
,modrm
),
14127 getXMMRegLane64(gregOfRexRM(pfx
,modrm
), 0) );
14128 DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
14129 nameIReg64(eregOfRexRM(pfx
,modrm
)));
14132 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14134 storeLE( mkexpr(addr
),
14136 ? getXMMRegLane32(gregOfRexRM(pfx
,modrm
),0)
14137 : getXMMRegLane64(gregOfRexRM(pfx
,modrm
),0) );
14138 DIP("mov%c %s, %s\n", sz
== 4 ? 'd' : 'q',
14139 nameXMMReg(gregOfRexRM(pfx
,modrm
)), dis_buf
);
14141 goto decode_success
;
14146 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
14147 if (haveF3no66noF2(pfx
) && sz
== 4) {
14148 modrm
= getUChar(delta
);
14149 if (epartIsReg(modrm
)) {
14150 goto decode_failure
; /* awaiting test case */
14152 putXMMReg( eregOfRexRM(pfx
,modrm
),
14153 getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14154 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
14155 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
14157 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14159 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14160 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)), dis_buf
);
14162 goto decode_success
;
14164 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
14165 if (have66noF2noF3(pfx
) && sz
== 2) {
14166 modrm
= getUChar(delta
);
14167 if (epartIsReg(modrm
)) {
14169 putXMMReg( eregOfRexRM(pfx
,modrm
),
14170 getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14171 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
14172 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
14174 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14175 gen_SEGV_if_not_16_aligned( addr
);
14177 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14178 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)), dis_buf
);
14180 goto decode_success
;
14185 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
14186 if (haveNo66noF2noF3(pfx
)
14187 && epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 7
14190 /* Insert a memory fence. It's sometimes important that these
14191 are carried through to the generated code. */
14192 stmt( IRStmt_MBE(Imbe_Fence
) );
14194 goto decode_success
;
14196 /* mindless duplication follows .. */
14197 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
14198 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
14199 if (haveNo66noF2noF3(pfx
)
14200 && epartIsReg(getUChar(delta
))
14201 && (gregLO3ofRM(getUChar(delta
)) == 5
14202 || gregLO3ofRM(getUChar(delta
)) == 6)
14205 /* Insert a memory fence. It's sometimes important that these
14206 are carried through to the generated code. */
14207 stmt( IRStmt_MBE(Imbe_Fence
) );
14208 DIP("%sfence\n", gregLO3ofRM(getUChar(delta
-1))==5 ? "l" : "m");
14209 goto decode_success
;
14212 /* 0F AE /7 = CLFLUSH -- flush cache line */
14213 if (haveNo66noF2noF3(pfx
)
14214 && !epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 7
14217 /* This is something of a hack. We need to know the size of
14218 the cache line containing addr. Since we don't (easily),
14219 assume 256 on the basis that no real cache would have a
14220 line that big. It's safe to invalidate more stuff than we
14221 need, just inefficient. */
14222 ULong lineszB
= 256ULL;
14224 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14227 /* Round addr down to the start of the containing block. */
14232 mkU64( ~(lineszB
-1) ))) );
14234 stmt( IRStmt_Put(OFFB_CMLEN
, mkU64(lineszB
) ) );
14236 jmp_lit(dres
, Ijk_InvalICache
, (Addr64
)(guest_RIP_bbstart
+delta
));
14238 DIP("clflush %s\n", dis_buf
);
14239 goto decode_success
;
14242 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
14243 if (haveNo66noF2noF3(pfx
)
14244 && !epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 3
14246 delta
= dis_STMXCSR(vbi
, pfx
, delta
, False
/*!isAvx*/);
14247 goto decode_success
;
14249 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
14250 if (haveNo66noF2noF3(pfx
)
14251 && !epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 2
14253 delta
= dis_LDMXCSR(vbi
, pfx
, delta
, False
/*!isAvx*/);
14254 goto decode_success
;
14256 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */
14257 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)
14258 && !epartIsReg(getUChar(delta
))
14259 && gregOfRexRM(pfx
,getUChar(delta
)) == 0) {
14260 delta
= dis_FXSAVE(vbi
, pfx
, delta
, sz
);
14261 goto decode_success
;
14263 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */
14264 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)
14265 && !epartIsReg(getUChar(delta
))
14266 && gregOfRexRM(pfx
,getUChar(delta
)) == 1) {
14267 delta
= dis_FXRSTOR(vbi
, pfx
, delta
, sz
);
14268 goto decode_success
;
14270 /* 0F AE /4 = XSAVE mem -- write x87, SSE, AVX state to memory */
14271 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)
14272 && !epartIsReg(getUChar(delta
))
14273 && gregOfRexRM(pfx
,getUChar(delta
)) == 4
14274 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
14275 delta
= dis_XSAVE(vbi
, pfx
, delta
, sz
);
14276 goto decode_success
;
14278 /* 0F AE /5 = XRSTOR mem -- read x87, SSE, AVX state from memory */
14279 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)
14280 && !epartIsReg(getUChar(delta
))
14281 && gregOfRexRM(pfx
,getUChar(delta
)) == 5
14282 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
14283 delta
= dis_XRSTOR(vbi
, pfx
, delta
, sz
);
14284 goto decode_success
;
14289 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
14290 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14291 Long delta0
= delta
;
14292 delta
= dis_SSE_cmp_E_to_G( vbi
, pfx
, delta
, "cmpps", True
, 4 );
14293 if (delta
> delta0
) goto decode_success
;
14295 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
14296 if (haveF3no66noF2(pfx
) && sz
== 4) {
14297 Long delta0
= delta
;
14298 delta
= dis_SSE_cmp_E_to_G( vbi
, pfx
, delta
, "cmpss", False
, 4 );
14299 if (delta
> delta0
) goto decode_success
;
14301 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
14302 if (haveF2no66noF3(pfx
) && sz
== 4) {
14303 Long delta0
= delta
;
14304 delta
= dis_SSE_cmp_E_to_G( vbi
, pfx
, delta
, "cmpsd", False
, 8 );
14305 if (delta
> delta0
) goto decode_success
;
14307 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
14308 if (have66noF2noF3(pfx
) && sz
== 2) {
14309 Long delta0
= delta
;
14310 delta
= dis_SSE_cmp_E_to_G( vbi
, pfx
, delta
, "cmppd", True
, 8 );
14311 if (delta
> delta0
) goto decode_success
;
14316 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
14317 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)) {
14318 modrm
= getUChar(delta
);
14319 if (!epartIsReg(modrm
)) {
14320 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14321 storeLE( mkexpr(addr
), getIRegG(sz
, pfx
, modrm
) );
14322 DIP("movnti %s,%s\n", dis_buf
,
14323 nameIRegG(sz
, pfx
, modrm
));
14325 goto decode_success
;
14327 /* else fall through */
14332 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14333 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
14334 put it into the specified lane of mmx(G). */
14335 if (haveNo66noF2noF3(pfx
)
14336 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
14337 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
14338 mmx reg. t4 is the new lane value. t5 is the original
14339 mmx value. t6 is the new mmx value. */
14341 t4
= newTemp(Ity_I16
);
14342 t5
= newTemp(Ity_I64
);
14343 t6
= newTemp(Ity_I64
);
14344 modrm
= getUChar(delta
);
14347 assign(t5
, getMMXReg(gregLO3ofRM(modrm
)));
14348 breakup64to16s( t5
, &t3
, &t2
, &t1
, &t0
);
14350 if (epartIsReg(modrm
)) {
14351 assign(t4
, getIReg16(eregOfRexRM(pfx
,modrm
)));
14353 lane
= getUChar(delta
-1);
14354 DIP("pinsrw $%d,%s,%s\n", lane
,
14355 nameIReg16(eregOfRexRM(pfx
,modrm
)),
14356 nameMMXReg(gregLO3ofRM(modrm
)));
14358 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
14360 lane
= getUChar(delta
-1);
14361 assign(t4
, loadLE(Ity_I16
, mkexpr(addr
)));
14362 DIP("pinsrw $%d,%s,%s\n", lane
,
14364 nameMMXReg(gregLO3ofRM(modrm
)));
14367 switch (lane
& 3) {
14368 case 0: assign(t6
, mk64from16s(t3
,t2
,t1
,t4
)); break;
14369 case 1: assign(t6
, mk64from16s(t3
,t2
,t4
,t0
)); break;
14370 case 2: assign(t6
, mk64from16s(t3
,t4
,t1
,t0
)); break;
14371 case 3: assign(t6
, mk64from16s(t4
,t2
,t1
,t0
)); break;
14372 default: vassert(0);
14374 putMMXReg(gregLO3ofRM(modrm
), mkexpr(t6
));
14375 goto decode_success
;
14377 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
14378 put it into the specified lane of xmm(G). */
14379 if (have66noF2noF3(pfx
)
14380 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
14382 t4
= newTemp(Ity_I16
);
14383 modrm
= getUChar(delta
);
14384 UInt rG
= gregOfRexRM(pfx
,modrm
);
14385 if (epartIsReg(modrm
)) {
14386 UInt rE
= eregOfRexRM(pfx
,modrm
);
14387 assign(t4
, getIReg16(rE
));
14389 lane
= getUChar(delta
-1);
14390 DIP("pinsrw $%d,%s,%s\n",
14391 lane
, nameIReg16(rE
), nameXMMReg(rG
));
14393 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
,
14394 1/*byte after the amode*/ );
14396 lane
= getUChar(delta
-1);
14397 assign(t4
, loadLE(Ity_I16
, mkexpr(addr
)));
14398 DIP("pinsrw $%d,%s,%s\n",
14399 lane
, dis_buf
, nameXMMReg(rG
));
14401 IRTemp src_vec
= newTemp(Ity_V128
);
14402 assign(src_vec
, getXMMReg(rG
));
14403 IRTemp res_vec
= math_PINSRW_128( src_vec
, t4
, lane
& 7);
14404 putXMMReg(rG
, mkexpr(res_vec
));
14405 goto decode_success
;
14410 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14411 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
14412 zero-extend of it in ireg(G). */
14413 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)) {
14414 modrm
= getUChar(delta
);
14415 if (epartIsReg(modrm
)) {
14416 IRTemp sV
= newTemp(Ity_I64
);
14417 t5
= newTemp(Ity_I16
);
14419 assign(sV
, getMMXReg(eregLO3ofRM(modrm
)));
14420 breakup64to16s( sV
, &t3
, &t2
, &t1
, &t0
);
14421 switch (getUChar(delta
+1) & 3) {
14422 case 0: assign(t5
, mkexpr(t0
)); break;
14423 case 1: assign(t5
, mkexpr(t1
)); break;
14424 case 2: assign(t5
, mkexpr(t2
)); break;
14425 case 3: assign(t5
, mkexpr(t3
)); break;
14426 default: vassert(0);
14429 putIReg64(gregOfRexRM(pfx
,modrm
), unop(Iop_16Uto64
, mkexpr(t5
)));
14431 putIReg32(gregOfRexRM(pfx
,modrm
), unop(Iop_16Uto32
, mkexpr(t5
)));
14432 DIP("pextrw $%d,%s,%s\n",
14433 (Int
)getUChar(delta
+1),
14434 nameMMXReg(eregLO3ofRM(modrm
)),
14435 sz
==8 ? nameIReg64(gregOfRexRM(pfx
,modrm
))
14436 : nameIReg32(gregOfRexRM(pfx
,modrm
))
14439 goto decode_success
;
14441 /* else fall through */
14442 /* note, for anyone filling in the mem case: this insn has one
14443 byte after the amode and therefore you must pass 1 as the
14444 last arg to disAMode */
14446 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
14447 zero-extend of it in ireg(G). */
14448 if (have66noF2noF3(pfx
)
14449 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
14450 Long delta0
= delta
;
14451 delta
= dis_PEXTRW_128_EregOnly_toG( vbi
, pfx
, delta
,
14453 if (delta
> delta0
) goto decode_success
;
14454 /* else fall through -- decoding has failed */
14459 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
14460 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14462 IRTemp sV
= newTemp(Ity_V128
);
14463 IRTemp dV
= newTemp(Ity_V128
);
14464 modrm
= getUChar(delta
);
14465 UInt rG
= gregOfRexRM(pfx
,modrm
);
14466 assign( dV
, getXMMReg(rG
) );
14467 if (epartIsReg(modrm
)) {
14468 UInt rE
= eregOfRexRM(pfx
,modrm
);
14469 assign( sV
, getXMMReg(rE
) );
14470 imm8
= (Int
)getUChar(delta
+1);
14472 DIP("shufps $%d,%s,%s\n", imm8
, nameXMMReg(rE
), nameXMMReg(rG
));
14474 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
14475 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
14476 imm8
= (Int
)getUChar(delta
+alen
);
14478 DIP("shufps $%d,%s,%s\n", imm8
, dis_buf
, nameXMMReg(rG
));
14480 IRTemp res
= math_SHUFPS_128( sV
, dV
, imm8
);
14481 putXMMReg( gregOfRexRM(pfx
,modrm
), mkexpr(res
) );
14482 goto decode_success
;
14484 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
14485 if (have66noF2noF3(pfx
) && sz
== 2) {
14487 IRTemp sV
= newTemp(Ity_V128
);
14488 IRTemp dV
= newTemp(Ity_V128
);
14490 modrm
= getUChar(delta
);
14491 assign( dV
, getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14493 if (epartIsReg(modrm
)) {
14494 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
14495 select
= (Int
)getUChar(delta
+1);
14497 DIP("shufpd $%d,%s,%s\n", select
,
14498 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
14499 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14501 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
14502 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
14503 select
= getUChar(delta
+alen
);
14505 DIP("shufpd $%d,%s,%s\n", select
,
14507 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14510 IRTemp res
= math_SHUFPD_128( sV
, dV
, select
);
14511 putXMMReg( gregOfRexRM(pfx
,modrm
), mkexpr(res
) );
14512 goto decode_success
;
14517 /* 66 0F D1 = PSRLW by E */
14518 if (have66noF2noF3(pfx
) && sz
== 2) {
14519 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psrlw", Iop_ShrN16x8
);
14520 goto decode_success
;
14525 /* 66 0F D2 = PSRLD by E */
14526 if (have66noF2noF3(pfx
) && sz
== 2) {
14527 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psrld", Iop_ShrN32x4
);
14528 goto decode_success
;
14533 /* 66 0F D3 = PSRLQ by E */
14534 if (have66noF2noF3(pfx
) && sz
== 2) {
14535 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psrlq", Iop_ShrN64x2
);
14536 goto decode_success
;
14541 /* 66 0F D4 = PADDQ */
14542 if (have66noF2noF3(pfx
) && sz
== 2) {
14543 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14544 "paddq", Iop_Add64x2
, False
);
14545 goto decode_success
;
14547 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
14548 /* 0F D4 = PADDQ -- add 64x1 */
14549 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14551 delta
= dis_MMXop_regmem_to_reg (
14552 vbi
, pfx
, delta
, opc
, "paddq", False
);
14553 goto decode_success
;
14558 /* 66 0F D5 = PMULLW -- 16x8 multiply */
14559 if (have66noF2noF3(pfx
) && sz
== 2) {
14560 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14561 "pmullw", Iop_Mul16x8
, False
);
14562 goto decode_success
;
14567 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
14569 if (haveF3no66noF2(pfx
) && sz
== 4) {
14570 modrm
= getUChar(delta
);
14571 if (epartIsReg(modrm
)) {
14573 putXMMReg( gregOfRexRM(pfx
,modrm
),
14574 unop(Iop_64UtoV128
, getMMXReg( eregLO3ofRM(modrm
) )) );
14575 DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
14576 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14578 goto decode_success
;
14580 /* apparently no mem case for this insn */
14582 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
14583 or lo half xmm). */
14584 if (have66noF2noF3(pfx
)
14585 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
14586 modrm
= getUChar(delta
);
14587 if (epartIsReg(modrm
)) {
14588 /* fall through, awaiting test case */
14589 /* dst: lo half copied, hi half zeroed */
14591 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14592 storeLE( mkexpr(addr
),
14593 getXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0 ));
14594 DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)), dis_buf
);
14596 goto decode_success
;
14599 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
14600 if (haveF2no66noF3(pfx
) && sz
== 4) {
14601 modrm
= getUChar(delta
);
14602 if (epartIsReg(modrm
)) {
14604 putMMXReg( gregLO3ofRM(modrm
),
14605 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0 ));
14606 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
14607 nameMMXReg(gregLO3ofRM(modrm
)));
14609 goto decode_success
;
14611 /* apparently no mem case for this insn */
14616 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16
14617 lanes in xmm(E), turn them into a byte, and put
14618 zero-extend of it in ireg(G). Doing this directly is just
14619 too cumbersome; give up therefore and call a helper. */
14620 if (have66noF2noF3(pfx
)
14621 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)
14622 && epartIsReg(getUChar(delta
))) { /* no memory case, it seems */
14623 delta
= dis_PMOVMSKB_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
14624 goto decode_success
;
14626 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14627 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
14628 mmx(E), turn them into a byte, and put zero-extend of it in
14630 if (haveNo66noF2noF3(pfx
)
14631 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
14632 modrm
= getUChar(delta
);
14633 if (epartIsReg(modrm
)) {
14635 t0
= newTemp(Ity_I64
);
14636 t1
= newTemp(Ity_I32
);
14637 assign(t0
, getMMXReg(eregLO3ofRM(modrm
)));
14638 assign(t1
, unop(Iop_8Uto32
, unop(Iop_GetMSBs8x8
, mkexpr(t0
))));
14639 putIReg32(gregOfRexRM(pfx
,modrm
), mkexpr(t1
));
14640 DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
14641 nameIReg32(gregOfRexRM(pfx
,modrm
)));
14643 goto decode_success
;
14645 /* else fall through */
14650 /* 66 0F D8 = PSUBUSB */
14651 if (have66noF2noF3(pfx
) && sz
== 2) {
14652 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14653 "psubusb", Iop_QSub8Ux16
, False
);
14654 goto decode_success
;
14659 /* 66 0F D9 = PSUBUSW */
14660 if (have66noF2noF3(pfx
) && sz
== 2) {
14661 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14662 "psubusw", Iop_QSub16Ux8
, False
);
14663 goto decode_success
;
14668 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14669 /* 0F DA = PMINUB -- 8x8 unsigned min */
14670 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14672 delta
= dis_MMXop_regmem_to_reg (
14673 vbi
, pfx
, delta
, opc
, "pminub", False
);
14674 goto decode_success
;
14676 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
14677 if (have66noF2noF3(pfx
) && sz
== 2) {
14678 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14679 "pminub", Iop_Min8Ux16
, False
);
14680 goto decode_success
;
14685 /* 66 0F DB = PAND */
14686 if (have66noF2noF3(pfx
) && sz
== 2) {
14687 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "pand", Iop_AndV128
);
14688 goto decode_success
;
14693 /* 66 0F DC = PADDUSB */
14694 if (have66noF2noF3(pfx
) && sz
== 2) {
14695 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14696 "paddusb", Iop_QAdd8Ux16
, False
);
14697 goto decode_success
;
14702 /* 66 0F DD = PADDUSW */
14703 if (have66noF2noF3(pfx
) && sz
== 2) {
14704 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14705 "paddusw", Iop_QAdd16Ux8
, False
);
14706 goto decode_success
;
14711 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14712 /* 0F DE = PMAXUB -- 8x8 unsigned max */
14713 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14715 delta
= dis_MMXop_regmem_to_reg (
14716 vbi
, pfx
, delta
, opc
, "pmaxub", False
);
14717 goto decode_success
;
14719 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
14720 if (have66noF2noF3(pfx
) && sz
== 2) {
14721 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14722 "pmaxub", Iop_Max8Ux16
, False
);
14723 goto decode_success
;
14728 /* 66 0F DF = PANDN */
14729 if (have66noF2noF3(pfx
) && sz
== 2) {
14730 delta
= dis_SSE_E_to_G_all_invG( vbi
, pfx
, delta
, "pandn", Iop_AndV128
);
14731 goto decode_success
;
14736 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14737 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
14738 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14740 delta
= dis_MMXop_regmem_to_reg (
14741 vbi
, pfx
, delta
, opc
, "pavgb", False
);
14742 goto decode_success
;
14744 /* 66 0F E0 = PAVGB */
14745 if (have66noF2noF3(pfx
) && sz
== 2) {
14746 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14747 "pavgb", Iop_Avg8Ux16
, False
);
14748 goto decode_success
;
14753 /* 66 0F E1 = PSRAW by E */
14754 if (have66noF2noF3(pfx
) && sz
== 2) {
14755 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psraw", Iop_SarN16x8
);
14756 goto decode_success
;
14761 /* 66 0F E2 = PSRAD by E */
14762 if (have66noF2noF3(pfx
) && sz
== 2) {
14763 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psrad", Iop_SarN32x4
);
14764 goto decode_success
;
14769 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14770 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
14771 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14773 delta
= dis_MMXop_regmem_to_reg (
14774 vbi
, pfx
, delta
, opc
, "pavgw", False
);
14775 goto decode_success
;
14777 /* 66 0F E3 = PAVGW */
14778 if (have66noF2noF3(pfx
) && sz
== 2) {
14779 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14780 "pavgw", Iop_Avg16Ux8
, False
);
14781 goto decode_success
;
14786 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14787 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
14788 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14790 delta
= dis_MMXop_regmem_to_reg (
14791 vbi
, pfx
, delta
, opc
, "pmuluh", False
);
14792 goto decode_success
;
14794 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
14795 if (have66noF2noF3(pfx
) && sz
== 2) {
14796 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14797 "pmulhuw", Iop_MulHi16Ux8
, False
);
14798 goto decode_success
;
14803 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
14804 if (have66noF2noF3(pfx
) && sz
== 2) {
14805 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14806 "pmulhw", Iop_MulHi16Sx8
, False
);
14807 goto decode_success
;
14812 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
14813 lo half xmm(G), and zero upper half, rounding towards zero */
14814 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
14815 lo half xmm(G), according to prevailing rounding mode, and zero
14817 if ( (haveF2no66noF3(pfx
) && sz
== 4)
14818 || (have66noF2noF3(pfx
) && sz
== 2) ) {
14819 delta
= dis_CVTxPD2DQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/,
14820 toBool(sz
== 2)/*r2zero*/);
14821 goto decode_success
;
14823 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
14825 if (haveF3no66noF2(pfx
) && sz
== 4) {
14826 delta
= dis_CVTDQ2PD_128(vbi
, pfx
, delta
, False
/*!isAvx*/);
14827 goto decode_success
;
14832 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14833 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
14834 Intel manual does not say anything about the usual business of
14835 the FP reg tags getting trashed whenever an MMX insn happens.
14836 So we just leave them alone.
14838 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14839 modrm
= getUChar(delta
);
14840 if (!epartIsReg(modrm
)) {
14841 /* do_MMX_preamble(); Intel docs don't specify this */
14842 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14843 storeLE( mkexpr(addr
), getMMXReg(gregLO3ofRM(modrm
)) );
14844 DIP("movntq %s,%s\n", dis_buf
,
14845 nameMMXReg(gregLO3ofRM(modrm
)));
14847 goto decode_success
;
14849 /* else fall through */
14851 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
14852 if (have66noF2noF3(pfx
) && sz
== 2) {
14853 modrm
= getUChar(delta
);
14854 if (!epartIsReg(modrm
)) {
14855 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14856 gen_SEGV_if_not_16_aligned( addr
);
14857 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14858 DIP("movntdq %s,%s\n", dis_buf
,
14859 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14861 goto decode_success
;
14863 /* else fall through */
14868 /* 66 0F E8 = PSUBSB */
14869 if (have66noF2noF3(pfx
) && sz
== 2) {
14870 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14871 "psubsb", Iop_QSub8Sx16
, False
);
14872 goto decode_success
;
14877 /* 66 0F E9 = PSUBSW */
14878 if (have66noF2noF3(pfx
) && sz
== 2) {
14879 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14880 "psubsw", Iop_QSub16Sx8
, False
);
14881 goto decode_success
;
14886 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14887 /* 0F EA = PMINSW -- 16x4 signed min */
14888 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14890 delta
= dis_MMXop_regmem_to_reg (
14891 vbi
, pfx
, delta
, opc
, "pminsw", False
);
14892 goto decode_success
;
14894 /* 66 0F EA = PMINSW -- 16x8 signed min */
14895 if (have66noF2noF3(pfx
) && sz
== 2) {
14896 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14897 "pminsw", Iop_Min16Sx8
, False
);
14898 goto decode_success
;
14903 /* 66 0F EB = POR */
14904 if (have66noF2noF3(pfx
) && sz
== 2) {
14905 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "por", Iop_OrV128
);
14906 goto decode_success
;
14911 /* 66 0F EC = PADDSB */
14912 if (have66noF2noF3(pfx
) && sz
== 2) {
14913 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14914 "paddsb", Iop_QAdd8Sx16
, False
);
14915 goto decode_success
;
14920 /* 66 0F ED = PADDSW */
14921 if (have66noF2noF3(pfx
) && sz
== 2) {
14922 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14923 "paddsw", Iop_QAdd16Sx8
, False
);
14924 goto decode_success
;
14929 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14930 /* 0F EE = PMAXSW -- 16x4 signed max */
14931 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14933 delta
= dis_MMXop_regmem_to_reg (
14934 vbi
, pfx
, delta
, opc
, "pmaxsw", False
);
14935 goto decode_success
;
14937 /* 66 0F EE = PMAXSW -- 16x8 signed max */
14938 if (have66noF2noF3(pfx
) && sz
== 2) {
14939 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14940 "pmaxsw", Iop_Max16Sx8
, False
);
14941 goto decode_success
;
14946 /* 66 0F EF = PXOR */
14947 if (have66noF2noF3(pfx
) && sz
== 2) {
14948 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "pxor", Iop_XorV128
);
14949 goto decode_success
;
14954 /* 66 0F F1 = PSLLW by E */
14955 if (have66noF2noF3(pfx
) && sz
== 2) {
14956 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psllw", Iop_ShlN16x8
);
14957 goto decode_success
;
14962 /* 66 0F F2 = PSLLD by E */
14963 if (have66noF2noF3(pfx
) && sz
== 2) {
14964 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "pslld", Iop_ShlN32x4
);
14965 goto decode_success
;
14970 /* 66 0F F3 = PSLLQ by E */
14971 if (have66noF2noF3(pfx
) && sz
== 2) {
14972 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psllq", Iop_ShlN64x2
);
14973 goto decode_success
;
14978 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
14979 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
14981 if (have66noF2noF3(pfx
) && sz
== 2) {
14982 IRTemp sV
= newTemp(Ity_V128
);
14983 IRTemp dV
= newTemp(Ity_V128
);
14984 modrm
= getUChar(delta
);
14985 UInt rG
= gregOfRexRM(pfx
,modrm
);
14986 assign( dV
, getXMMReg(rG
) );
14987 if (epartIsReg(modrm
)) {
14988 UInt rE
= eregOfRexRM(pfx
,modrm
);
14989 assign( sV
, getXMMReg(rE
) );
14991 DIP("pmuludq %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
14993 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14994 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
14996 DIP("pmuludq %s,%s\n", dis_buf
, nameXMMReg(rG
));
14998 putXMMReg( rG
, mkexpr(math_PMULUDQ_128( sV
, dV
)) );
14999 goto decode_success
;
15001 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
15002 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
15003 0 to form 64-bit result */
15004 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15005 IRTemp sV
= newTemp(Ity_I64
);
15006 IRTemp dV
= newTemp(Ity_I64
);
15007 t1
= newTemp(Ity_I32
);
15008 t0
= newTemp(Ity_I32
);
15009 modrm
= getUChar(delta
);
15012 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
15014 if (epartIsReg(modrm
)) {
15015 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
15017 DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
15018 nameMMXReg(gregLO3ofRM(modrm
)));
15020 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15021 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
15023 DIP("pmuludq %s,%s\n", dis_buf
,
15024 nameMMXReg(gregLO3ofRM(modrm
)));
15027 assign( t0
, unop(Iop_64to32
, mkexpr(dV
)) );
15028 assign( t1
, unop(Iop_64to32
, mkexpr(sV
)) );
15029 putMMXReg( gregLO3ofRM(modrm
),
15030 binop( Iop_MullU32
, mkexpr(t0
), mkexpr(t1
) ) );
15031 goto decode_success
;
15036 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
15037 E(xmm or mem) to G(xmm) */
15038 if (have66noF2noF3(pfx
) && sz
== 2) {
15039 IRTemp sV
= newTemp(Ity_V128
);
15040 IRTemp dV
= newTemp(Ity_V128
);
15041 modrm
= getUChar(delta
);
15042 UInt rG
= gregOfRexRM(pfx
,modrm
);
15043 if (epartIsReg(modrm
)) {
15044 UInt rE
= eregOfRexRM(pfx
,modrm
);
15045 assign( sV
, getXMMReg(rE
) );
15047 DIP("pmaddwd %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
15049 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15050 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15052 DIP("pmaddwd %s,%s\n", dis_buf
, nameXMMReg(rG
));
15054 assign( dV
, getXMMReg(rG
) );
15055 putXMMReg( rG
, mkexpr(math_PMADDWD_128(dV
, sV
)) );
15056 goto decode_success
;
15061 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
15062 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
15063 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15065 delta
= dis_MMXop_regmem_to_reg (
15066 vbi
, pfx
, delta
, opc
, "psadbw", False
);
15067 goto decode_success
;
15069 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
15070 from E(xmm or mem) to G(xmm) */
15071 if (have66noF2noF3(pfx
) && sz
== 2) {
15072 IRTemp sV
= newTemp(Ity_V128
);
15073 IRTemp dV
= newTemp(Ity_V128
);
15074 modrm
= getUChar(delta
);
15075 UInt rG
= gregOfRexRM(pfx
,modrm
);
15076 if (epartIsReg(modrm
)) {
15077 UInt rE
= eregOfRexRM(pfx
,modrm
);
15078 assign( sV
, getXMMReg(rE
) );
15080 DIP("psadbw %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
15082 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15083 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15085 DIP("psadbw %s,%s\n", dis_buf
, nameXMMReg(rG
));
15087 assign( dV
, getXMMReg(rG
) );
15088 putXMMReg( rG
, mkexpr( math_PSADBW_128 ( dV
, sV
) ) );
15090 goto decode_success
;
15095 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
15096 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
15097 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15099 delta
= dis_MMX( &ok
, vbi
, pfx
, sz
, delta
-1 );
15100 if (ok
) goto decode_success
;
15102 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
15103 if (have66noF2noF3(pfx
) && sz
== 2 && epartIsReg(getUChar(delta
))) {
15104 delta
= dis_MASKMOVDQU( vbi
, pfx
, delta
, False
/*!isAvx*/ );
15105 goto decode_success
;
15110 /* 66 0F F8 = PSUBB */
15111 if (have66noF2noF3(pfx
) && sz
== 2) {
15112 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15113 "psubb", Iop_Sub8x16
, False
);
15114 goto decode_success
;
15119 /* 66 0F F9 = PSUBW */
15120 if (have66noF2noF3(pfx
) && sz
== 2) {
15121 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15122 "psubw", Iop_Sub16x8
, False
);
15123 goto decode_success
;
15128 /* 66 0F FA = PSUBD */
15129 if (have66noF2noF3(pfx
) && sz
== 2) {
15130 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15131 "psubd", Iop_Sub32x4
, False
);
15132 goto decode_success
;
15137 /* 66 0F FB = PSUBQ */
15138 if (have66noF2noF3(pfx
) && sz
== 2) {
15139 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15140 "psubq", Iop_Sub64x2
, False
);
15141 goto decode_success
;
15143 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
15144 /* 0F FB = PSUBQ -- sub 64x1 */
15145 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15147 delta
= dis_MMXop_regmem_to_reg (
15148 vbi
, pfx
, delta
, opc
, "psubq", False
);
15149 goto decode_success
;
15154 /* 66 0F FC = PADDB */
15155 if (have66noF2noF3(pfx
) && sz
== 2) {
15156 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15157 "paddb", Iop_Add8x16
, False
);
15158 goto decode_success
;
15163 /* 66 0F FD = PADDW */
15164 if (have66noF2noF3(pfx
) && sz
== 2) {
15165 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15166 "paddw", Iop_Add16x8
, False
);
15167 goto decode_success
;
15172 /* 66 0F FE = PADDD */
15173 if (have66noF2noF3(pfx
) && sz
== 2) {
15174 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15175 "paddd", Iop_Add32x4
, False
);
15176 goto decode_success
;
15181 goto decode_failure
;
15186 *decode_OK
= False
;
15195 /*------------------------------------------------------------*/
15197 /*--- Top-level SSE3 (not SupSSE3): dis_ESC_0F__SSE3 ---*/
15199 /*------------------------------------------------------------*/
15201 static Long
dis_MOVDDUP_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
15202 Long delta
, Bool isAvx
)
15204 IRTemp addr
= IRTemp_INVALID
;
15207 IRTemp sV
= newTemp(Ity_V128
);
15208 IRTemp d0
= newTemp(Ity_I64
);
15209 UChar modrm
= getUChar(delta
);
15210 UInt rG
= gregOfRexRM(pfx
,modrm
);
15211 if (epartIsReg(modrm
)) {
15212 UInt rE
= eregOfRexRM(pfx
,modrm
);
15213 assign( sV
, getXMMReg(rE
) );
15214 DIP("%smovddup %s,%s\n",
15215 isAvx
? "v" : "", nameXMMReg(rE
), nameXMMReg(rG
));
15217 assign ( d0
, unop(Iop_V128to64
, mkexpr(sV
)) );
15219 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15220 assign( d0
, loadLE(Ity_I64
, mkexpr(addr
)) );
15221 DIP("%smovddup %s,%s\n",
15222 isAvx
? "v" : "", dis_buf
, nameXMMReg(rG
));
15225 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
15226 ( rG
, binop(Iop_64HLtoV128
,mkexpr(d0
),mkexpr(d0
)) );
15231 static Long
dis_MOVDDUP_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
15234 IRTemp addr
= IRTemp_INVALID
;
15237 IRTemp d0
= newTemp(Ity_I64
);
15238 IRTemp d1
= newTemp(Ity_I64
);
15239 UChar modrm
= getUChar(delta
);
15240 UInt rG
= gregOfRexRM(pfx
,modrm
);
15241 if (epartIsReg(modrm
)) {
15242 UInt rE
= eregOfRexRM(pfx
,modrm
);
15243 DIP("vmovddup %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
15245 assign ( d0
, getYMMRegLane64(rE
, 0) );
15246 assign ( d1
, getYMMRegLane64(rE
, 2) );
15248 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15249 assign( d0
, loadLE(Ity_I64
, mkexpr(addr
)) );
15250 assign( d1
, loadLE(Ity_I64
, binop(Iop_Add64
,
15251 mkexpr(addr
), mkU64(16))) );
15252 DIP("vmovddup %s,%s\n", dis_buf
, nameYMMReg(rG
));
15255 putYMMRegLane64( rG
, 0, mkexpr(d0
) );
15256 putYMMRegLane64( rG
, 1, mkexpr(d0
) );
15257 putYMMRegLane64( rG
, 2, mkexpr(d1
) );
15258 putYMMRegLane64( rG
, 3, mkexpr(d1
) );
15263 static Long
dis_MOVSxDUP_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
15264 Long delta
, Bool isAvx
, Bool isL
)
15266 IRTemp addr
= IRTemp_INVALID
;
15269 IRTemp sV
= newTemp(Ity_V128
);
15270 UChar modrm
= getUChar(delta
);
15271 UInt rG
= gregOfRexRM(pfx
,modrm
);
15272 IRTemp s3
, s2
, s1
, s0
;
15273 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
15274 if (epartIsReg(modrm
)) {
15275 UInt rE
= eregOfRexRM(pfx
,modrm
);
15276 assign( sV
, getXMMReg(rE
) );
15277 DIP("%smovs%cdup %s,%s\n",
15278 isAvx
? "v" : "", isL
? 'l' : 'h', nameXMMReg(rE
), nameXMMReg(rG
));
15281 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15283 gen_SEGV_if_not_16_aligned( addr
);
15284 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15285 DIP("%smovs%cdup %s,%s\n",
15286 isAvx
? "v" : "", isL
? 'l' : 'h', dis_buf
, nameXMMReg(rG
));
15289 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
15290 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
15291 ( rG
, isL
? mkV128from32s( s2
, s2
, s0
, s0
)
15292 : mkV128from32s( s3
, s3
, s1
, s1
) );
15297 static Long
dis_MOVSxDUP_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
15298 Long delta
, Bool isL
)
15300 IRTemp addr
= IRTemp_INVALID
;
15303 IRTemp sV
= newTemp(Ity_V256
);
15304 UChar modrm
= getUChar(delta
);
15305 UInt rG
= gregOfRexRM(pfx
,modrm
);
15306 IRTemp s7
, s6
, s5
, s4
, s3
, s2
, s1
, s0
;
15307 s7
= s6
= s5
= s4
= s3
= s2
= s1
= s0
= IRTemp_INVALID
;
15308 if (epartIsReg(modrm
)) {
15309 UInt rE
= eregOfRexRM(pfx
,modrm
);
15310 assign( sV
, getYMMReg(rE
) );
15311 DIP("vmovs%cdup %s,%s\n",
15312 isL
? 'l' : 'h', nameYMMReg(rE
), nameYMMReg(rG
));
15315 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15316 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
15317 DIP("vmovs%cdup %s,%s\n",
15318 isL
? 'l' : 'h', dis_buf
, nameYMMReg(rG
));
15321 breakupV256to32s( sV
, &s7
, &s6
, &s5
, &s4
, &s3
, &s2
, &s1
, &s0
);
15322 putYMMRegLane128( rG
, 1, isL
? mkV128from32s( s6
, s6
, s4
, s4
)
15323 : mkV128from32s( s7
, s7
, s5
, s5
) );
15324 putYMMRegLane128( rG
, 0, isL
? mkV128from32s( s2
, s2
, s0
, s0
)
15325 : mkV128from32s( s3
, s3
, s1
, s1
) );
15330 static IRTemp
math_HADDPS_128 ( IRTemp dV
, IRTemp sV
, Bool isAdd
)
15332 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
15333 IRTemp leftV
= newTemp(Ity_V128
);
15334 IRTemp rightV
= newTemp(Ity_V128
);
15335 IRTemp rm
= newTemp(Ity_I32
);
15336 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
15338 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
15339 breakupV128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
15341 assign( leftV
, mkV128from32s( s2
, s0
, d2
, d0
) );
15342 assign( rightV
, mkV128from32s( s3
, s1
, d3
, d1
) );
15344 IRTemp res
= newTemp(Ity_V128
);
15345 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
15346 assign( res
, triop(isAdd
? Iop_Add32Fx4
: Iop_Sub32Fx4
,
15347 mkexpr(rm
), mkexpr(leftV
), mkexpr(rightV
) ) );
15352 static IRTemp
math_HADDPD_128 ( IRTemp dV
, IRTemp sV
, Bool isAdd
)
15354 IRTemp s1
, s0
, d1
, d0
;
15355 IRTemp leftV
= newTemp(Ity_V128
);
15356 IRTemp rightV
= newTemp(Ity_V128
);
15357 IRTemp rm
= newTemp(Ity_I32
);
15358 s1
= s0
= d1
= d0
= IRTemp_INVALID
;
15360 breakupV128to64s( sV
, &s1
, &s0
);
15361 breakupV128to64s( dV
, &d1
, &d0
);
15363 assign( leftV
, binop(Iop_64HLtoV128
, mkexpr(s0
), mkexpr(d0
)) );
15364 assign( rightV
, binop(Iop_64HLtoV128
, mkexpr(s1
), mkexpr(d1
)) );
15366 IRTemp res
= newTemp(Ity_V128
);
15367 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
15368 assign( res
, triop(isAdd
? Iop_Add64Fx2
: Iop_Sub64Fx2
,
15369 mkexpr(rm
), mkexpr(leftV
), mkexpr(rightV
) ) );
15374 __attribute__((noinline
))
15376 Long
dis_ESC_0F__SSE3 ( Bool
* decode_OK
,
15377 const VexAbiInfo
* vbi
,
15378 Prefix pfx
, Int sz
, Long deltaIN
)
15380 IRTemp addr
= IRTemp_INVALID
;
15385 *decode_OK
= False
;
15387 Long delta
= deltaIN
;
15388 UChar opc
= getUChar(delta
);
15393 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
15394 duplicating some lanes (2:2:0:0). */
15395 if (haveF3no66noF2(pfx
) && sz
== 4) {
15396 delta
= dis_MOVSxDUP_128( vbi
, pfx
, delta
, False
/*!isAvx*/,
15398 goto decode_success
;
15400 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
15401 duplicating some lanes (0:1:0:1). */
15402 if (haveF2no66noF3(pfx
)
15403 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
15404 delta
= dis_MOVDDUP_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
15405 goto decode_success
;
15410 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
15411 duplicating some lanes (3:3:1:1). */
15412 if (haveF3no66noF2(pfx
) && sz
== 4) {
15413 delta
= dis_MOVSxDUP_128( vbi
, pfx
, delta
, False
/*!isAvx*/,
15415 goto decode_success
;
15421 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
15422 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
15423 if (haveF2no66noF3(pfx
) && sz
== 4) {
15424 IRTemp eV
= newTemp(Ity_V128
);
15425 IRTemp gV
= newTemp(Ity_V128
);
15426 Bool isAdd
= opc
== 0x7C;
15427 const HChar
* str
= isAdd
? "add" : "sub";
15428 modrm
= getUChar(delta
);
15429 UInt rG
= gregOfRexRM(pfx
,modrm
);
15430 if (epartIsReg(modrm
)) {
15431 UInt rE
= eregOfRexRM(pfx
,modrm
);
15432 assign( eV
, getXMMReg(rE
) );
15433 DIP("h%sps %s,%s\n", str
, nameXMMReg(rE
), nameXMMReg(rG
));
15436 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15437 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15438 DIP("h%sps %s,%s\n", str
, dis_buf
, nameXMMReg(rG
));
15442 assign( gV
, getXMMReg(rG
) );
15443 putXMMReg( rG
, mkexpr( math_HADDPS_128 ( gV
, eV
, isAdd
) ) );
15444 goto decode_success
;
15446 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
15447 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
15448 if (have66noF2noF3(pfx
) && sz
== 2) {
15449 IRTemp eV
= newTemp(Ity_V128
);
15450 IRTemp gV
= newTemp(Ity_V128
);
15451 Bool isAdd
= opc
== 0x7C;
15452 const HChar
* str
= isAdd
? "add" : "sub";
15453 modrm
= getUChar(delta
);
15454 UInt rG
= gregOfRexRM(pfx
,modrm
);
15455 if (epartIsReg(modrm
)) {
15456 UInt rE
= eregOfRexRM(pfx
,modrm
);
15457 assign( eV
, getXMMReg(rE
) );
15458 DIP("h%spd %s,%s\n", str
, nameXMMReg(rE
), nameXMMReg(rG
));
15461 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15462 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15463 DIP("h%spd %s,%s\n", str
, dis_buf
, nameXMMReg(rG
));
15467 assign( gV
, getXMMReg(rG
) );
15468 putXMMReg( rG
, mkexpr( math_HADDPD_128 ( gV
, eV
, isAdd
) ) );
15469 goto decode_success
;
15474 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
15475 if (have66noF2noF3(pfx
) && sz
== 2) {
15476 IRTemp eV
= newTemp(Ity_V128
);
15477 IRTemp gV
= newTemp(Ity_V128
);
15478 modrm
= getUChar(delta
);
15479 UInt rG
= gregOfRexRM(pfx
,modrm
);
15480 if (epartIsReg(modrm
)) {
15481 UInt rE
= eregOfRexRM(pfx
,modrm
);
15482 assign( eV
, getXMMReg(rE
) );
15483 DIP("addsubpd %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
15486 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15487 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15488 DIP("addsubpd %s,%s\n", dis_buf
, nameXMMReg(rG
));
15492 assign( gV
, getXMMReg(rG
) );
15493 putXMMReg( rG
, mkexpr( math_ADDSUBPD_128 ( gV
, eV
) ) );
15494 goto decode_success
;
15496 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
15497 if (haveF2no66noF3(pfx
) && sz
== 4) {
15498 IRTemp eV
= newTemp(Ity_V128
);
15499 IRTemp gV
= newTemp(Ity_V128
);
15500 modrm
= getUChar(delta
);
15501 UInt rG
= gregOfRexRM(pfx
,modrm
);
15503 modrm
= getUChar(delta
);
15504 if (epartIsReg(modrm
)) {
15505 UInt rE
= eregOfRexRM(pfx
,modrm
);
15506 assign( eV
, getXMMReg(rE
) );
15507 DIP("addsubps %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
15510 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15511 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15512 DIP("addsubps %s,%s\n", dis_buf
, nameXMMReg(rG
));
15516 assign( gV
, getXMMReg(rG
) );
15517 putXMMReg( rG
, mkexpr( math_ADDSUBPS_128 ( gV
, eV
) ) );
15518 goto decode_success
;
15523 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
15524 if (haveF2no66noF3(pfx
) && sz
== 4) {
15525 modrm
= getUChar(delta
);
15526 if (epartIsReg(modrm
)) {
15527 goto decode_failure
;
15529 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15530 putXMMReg( gregOfRexRM(pfx
,modrm
),
15531 loadLE(Ity_V128
, mkexpr(addr
)) );
15532 DIP("lddqu %s,%s\n", dis_buf
,
15533 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
15536 goto decode_success
;
15541 goto decode_failure
;
15546 *decode_OK
= False
;
15555 /*------------------------------------------------------------*/
15557 /*--- Top-level SSSE3: dis_ESC_0F38__SupSSE3 ---*/
15559 /*------------------------------------------------------------*/
15562 IRTemp
math_PSHUFB_XMM ( IRTemp dV
/*data to perm*/, IRTemp sV
/*perm*/ )
15564 IRTemp halfMask
= newTemp(Ity_I64
);
15565 assign(halfMask
, mkU64(0x8F8F8F8F8F8F8F8FULL
));
15566 IRExpr
* mask
= binop(Iop_64HLtoV128
, mkexpr(halfMask
), mkexpr(halfMask
));
15567 IRTemp res
= newTemp(Ity_V128
);
15569 binop(Iop_PermOrZero8x16
,
15571 // Mask off bits [6:3] of each source operand lane
15572 binop(Iop_AndV128
, mkexpr(sV
), mask
)
15579 IRTemp
math_PSHUFB_YMM ( IRTemp dV
/*data to perm*/, IRTemp sV
/*perm*/ )
15581 IRTemp sHi
, sLo
, dHi
, dLo
;
15582 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
15583 breakupV256toV128s( dV
, &dHi
, &dLo
);
15584 breakupV256toV128s( sV
, &sHi
, &sLo
);
15585 IRTemp res
= newTemp(Ity_V256
);
15586 assign(res
, binop(Iop_V128HLtoV256
,
15587 mkexpr(math_PSHUFB_XMM(dHi
, sHi
)),
15588 mkexpr(math_PSHUFB_XMM(dLo
, sLo
))));
15593 static Long
dis_PHADD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
15594 Bool isAvx
, UChar opc
)
15596 IRTemp addr
= IRTemp_INVALID
;
15599 const HChar
* str
= "???";
15600 IROp opV64
= Iop_INVALID
;
15601 IROp opCatO
= Iop_CatOddLanes16x4
;
15602 IROp opCatE
= Iop_CatEvenLanes16x4
;
15603 IRTemp sV
= newTemp(Ity_V128
);
15604 IRTemp dV
= newTemp(Ity_V128
);
15605 IRTemp sHi
= newTemp(Ity_I64
);
15606 IRTemp sLo
= newTemp(Ity_I64
);
15607 IRTemp dHi
= newTemp(Ity_I64
);
15608 IRTemp dLo
= newTemp(Ity_I64
);
15609 UChar modrm
= getUChar(delta
);
15610 UInt rG
= gregOfRexRM(pfx
,modrm
);
15611 UInt rV
= isAvx
? getVexNvvvv(pfx
) : rG
;
15614 case 0x01: opV64
= Iop_Add16x4
; str
= "addw"; break;
15615 case 0x02: opV64
= Iop_Add32x2
; str
= "addd"; break;
15616 case 0x03: opV64
= Iop_QAdd16Sx4
; str
= "addsw"; break;
15617 case 0x05: opV64
= Iop_Sub16x4
; str
= "subw"; break;
15618 case 0x06: opV64
= Iop_Sub32x2
; str
= "subd"; break;
15619 case 0x07: opV64
= Iop_QSub16Sx4
; str
= "subsw"; break;
15620 default: vassert(0);
15622 if (opc
== 0x02 || opc
== 0x06) {
15623 opCatO
= Iop_InterleaveHI32x2
;
15624 opCatE
= Iop_InterleaveLO32x2
;
15627 assign( dV
, getXMMReg(rV
) );
15629 if (epartIsReg(modrm
)) {
15630 UInt rE
= eregOfRexRM(pfx
,modrm
);
15631 assign( sV
, getXMMReg(rE
) );
15632 DIP("%sph%s %s,%s\n", isAvx
? "v" : "", str
,
15633 nameXMMReg(rE
), nameXMMReg(rG
));
15636 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15638 gen_SEGV_if_not_16_aligned( addr
);
15639 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15640 DIP("%sph%s %s,%s\n", isAvx
? "v" : "", str
,
15641 dis_buf
, nameXMMReg(rG
));
15645 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
15646 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
15647 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
15648 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
15650 /* This isn't a particularly efficient way to compute the
15651 result, but at least it avoids a proliferation of IROps,
15652 hence avoids complication all the backends. */
15654 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
15656 binop(Iop_64HLtoV128
,
15658 binop(opCatE
,mkexpr(sHi
),mkexpr(sLo
)),
15659 binop(opCatO
,mkexpr(sHi
),mkexpr(sLo
)) ),
15661 binop(opCatE
,mkexpr(dHi
),mkexpr(dLo
)),
15662 binop(opCatO
,mkexpr(dHi
),mkexpr(dLo
)) ) ) );
15667 static Long
dis_PHADD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
15670 IRTemp addr
= IRTemp_INVALID
;
15673 const HChar
* str
= "???";
15674 IROp opV64
= Iop_INVALID
;
15675 IROp opCatO
= Iop_CatOddLanes16x4
;
15676 IROp opCatE
= Iop_CatEvenLanes16x4
;
15677 IRTemp sV
= newTemp(Ity_V256
);
15678 IRTemp dV
= newTemp(Ity_V256
);
15679 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
15680 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
15681 UChar modrm
= getUChar(delta
);
15682 UInt rG
= gregOfRexRM(pfx
,modrm
);
15683 UInt rV
= getVexNvvvv(pfx
);
15686 case 0x01: opV64
= Iop_Add16x4
; str
= "addw"; break;
15687 case 0x02: opV64
= Iop_Add32x2
; str
= "addd"; break;
15688 case 0x03: opV64
= Iop_QAdd16Sx4
; str
= "addsw"; break;
15689 case 0x05: opV64
= Iop_Sub16x4
; str
= "subw"; break;
15690 case 0x06: opV64
= Iop_Sub32x2
; str
= "subd"; break;
15691 case 0x07: opV64
= Iop_QSub16Sx4
; str
= "subsw"; break;
15692 default: vassert(0);
15694 if (opc
== 0x02 || opc
== 0x06) {
15695 opCatO
= Iop_InterleaveHI32x2
;
15696 opCatE
= Iop_InterleaveLO32x2
;
15699 assign( dV
, getYMMReg(rV
) );
15701 if (epartIsReg(modrm
)) {
15702 UInt rE
= eregOfRexRM(pfx
,modrm
);
15703 assign( sV
, getYMMReg(rE
) );
15704 DIP("vph%s %s,%s\n", str
, nameYMMReg(rE
), nameYMMReg(rG
));
15707 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15708 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
15709 DIP("vph%s %s,%s\n", str
, dis_buf
, nameYMMReg(rG
));
15713 breakupV256to64s( dV
, &d3
, &d2
, &d1
, &d0
);
15714 breakupV256to64s( sV
, &s3
, &s2
, &s1
, &s0
);
15716 /* This isn't a particularly efficient way to compute the
15717 result, but at least it avoids a proliferation of IROps,
15718 hence avoids complication all the backends. */
15721 binop(Iop_V128HLtoV256
,
15722 binop(Iop_64HLtoV128
,
15724 binop(opCatE
,mkexpr(s3
),mkexpr(s2
)),
15725 binop(opCatO
,mkexpr(s3
),mkexpr(s2
)) ),
15727 binop(opCatE
,mkexpr(d3
),mkexpr(d2
)),
15728 binop(opCatO
,mkexpr(d3
),mkexpr(d2
)) ) ),
15729 binop(Iop_64HLtoV128
,
15731 binop(opCatE
,mkexpr(s1
),mkexpr(s0
)),
15732 binop(opCatO
,mkexpr(s1
),mkexpr(s0
)) ),
15734 binop(opCatE
,mkexpr(d1
),mkexpr(d0
)),
15735 binop(opCatO
,mkexpr(d1
),mkexpr(d0
)) ) ) ) );
15740 static IRTemp
math_PMADDUBSW_128 ( IRTemp dV
, IRTemp sV
)
15742 IRTemp res
= newTemp(Ity_V128
);
15743 assign(res
, binop(Iop_PwExtUSMulQAdd8x16
, mkexpr(dV
), mkexpr(sV
)));
15749 IRTemp
math_PMADDUBSW_256 ( IRTemp dV
, IRTemp sV
)
15751 IRTemp sHi
, sLo
, dHi
, dLo
;
15752 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
15753 breakupV256toV128s( dV
, &dHi
, &dLo
);
15754 breakupV256toV128s( sV
, &sHi
, &sLo
);
15755 IRTemp res
= newTemp(Ity_V256
);
15756 assign(res
, binop(Iop_V128HLtoV256
,
15757 mkexpr(math_PMADDUBSW_128(dHi
, sHi
)),
15758 mkexpr(math_PMADDUBSW_128(dLo
, sLo
))));
15763 __attribute__((noinline
))
15765 Long
dis_ESC_0F38__SupSSE3 ( Bool
* decode_OK
,
15766 const VexAbiInfo
* vbi
,
15767 Prefix pfx
, Int sz
, Long deltaIN
)
15769 IRTemp addr
= IRTemp_INVALID
;
15774 *decode_OK
= False
;
15776 Long delta
= deltaIN
;
15777 UChar opc
= getUChar(delta
);
15782 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
15783 if (have66noF2noF3(pfx
)
15784 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
15785 IRTemp sV
= newTemp(Ity_V128
);
15786 IRTemp dV
= newTemp(Ity_V128
);
15788 modrm
= getUChar(delta
);
15789 assign( dV
, getXMMReg(gregOfRexRM(pfx
,modrm
)) );
15791 if (epartIsReg(modrm
)) {
15792 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
15794 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
15795 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
15797 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15798 gen_SEGV_if_not_16_aligned( addr
);
15799 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15801 DIP("pshufb %s,%s\n", dis_buf
,
15802 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
15805 IRTemp res
= math_PSHUFB_XMM( dV
, sV
);
15806 putXMMReg(gregOfRexRM(pfx
,modrm
), mkexpr(res
));
15807 goto decode_success
;
15809 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
15810 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15811 IRTemp sV
= newTemp(Ity_I64
);
15812 IRTemp dV
= newTemp(Ity_I64
);
15814 modrm
= getUChar(delta
);
15816 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
15818 if (epartIsReg(modrm
)) {
15819 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
15821 DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
15822 nameMMXReg(gregLO3ofRM(modrm
)));
15824 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15825 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
15827 DIP("pshufb %s,%s\n", dis_buf
,
15828 nameMMXReg(gregLO3ofRM(modrm
)));
15832 gregLO3ofRM(modrm
),
15836 // Mask off bits [6:3] of each source operand lane
15837 binop(Iop_And64
, mkexpr(sV
), mkU64(0x8787878787878787ULL
))
15840 goto decode_success
;
15850 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
15852 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
15854 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
15855 xmm) and G to G (xmm). */
15856 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
15858 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
15860 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
15861 xmm) and G to G (xmm). */
15862 if (have66noF2noF3(pfx
)
15863 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
15864 delta
= dis_PHADD_128( vbi
, pfx
, delta
, False
/*isAvx*/, opc
);
15865 goto decode_success
;
15867 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
15868 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
15870 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
15872 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
15873 mmx) and G to G (mmx). */
15874 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
15876 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
15878 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
15879 mmx) and G to G (mmx). */
15880 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15881 const HChar
* str
= "???";
15882 IROp opV64
= Iop_INVALID
;
15883 IROp opCatO
= Iop_CatOddLanes16x4
;
15884 IROp opCatE
= Iop_CatEvenLanes16x4
;
15885 IRTemp sV
= newTemp(Ity_I64
);
15886 IRTemp dV
= newTemp(Ity_I64
);
15888 modrm
= getUChar(delta
);
15891 case 0x01: opV64
= Iop_Add16x4
; str
= "addw"; break;
15892 case 0x02: opV64
= Iop_Add32x2
; str
= "addd"; break;
15893 case 0x03: opV64
= Iop_QAdd16Sx4
; str
= "addsw"; break;
15894 case 0x05: opV64
= Iop_Sub16x4
; str
= "subw"; break;
15895 case 0x06: opV64
= Iop_Sub32x2
; str
= "subd"; break;
15896 case 0x07: opV64
= Iop_QSub16Sx4
; str
= "subsw"; break;
15897 default: vassert(0);
15899 if (opc
== 0x02 || opc
== 0x06) {
15900 opCatO
= Iop_InterleaveHI32x2
;
15901 opCatE
= Iop_InterleaveLO32x2
;
15905 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
15907 if (epartIsReg(modrm
)) {
15908 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
15910 DIP("ph%s %s,%s\n", str
, nameMMXReg(eregLO3ofRM(modrm
)),
15911 nameMMXReg(gregLO3ofRM(modrm
)));
15913 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15914 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
15916 DIP("ph%s %s,%s\n", str
, dis_buf
,
15917 nameMMXReg(gregLO3ofRM(modrm
)));
15921 gregLO3ofRM(modrm
),
15923 binop(opCatE
,mkexpr(sV
),mkexpr(dV
)),
15924 binop(opCatO
,mkexpr(sV
),mkexpr(dV
))
15927 goto decode_success
;
15932 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
15933 Unsigned Bytes (XMM) */
15934 if (have66noF2noF3(pfx
)
15935 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
15936 IRTemp sV
= newTemp(Ity_V128
);
15937 IRTemp dV
= newTemp(Ity_V128
);
15938 modrm
= getUChar(delta
);
15939 UInt rG
= gregOfRexRM(pfx
,modrm
);
15941 assign( dV
, getXMMReg(rG
) );
15943 if (epartIsReg(modrm
)) {
15944 UInt rE
= eregOfRexRM(pfx
,modrm
);
15945 assign( sV
, getXMMReg(rE
) );
15947 DIP("pmaddubsw %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
15949 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15950 gen_SEGV_if_not_16_aligned( addr
);
15951 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15953 DIP("pmaddubsw %s,%s\n", dis_buf
, nameXMMReg(rG
));
15956 putXMMReg( rG
, mkexpr( math_PMADDUBSW_128( dV
, sV
) ) );
15957 goto decode_success
;
15959 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
15960 Unsigned Bytes (MMX) */
15961 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15962 IRTemp sV
= newTemp(Ity_I64
);
15963 IRTemp dV
= newTemp(Ity_I64
);
15964 IRTemp sVoddsSX
= newTemp(Ity_I64
);
15965 IRTemp sVevensSX
= newTemp(Ity_I64
);
15966 IRTemp dVoddsZX
= newTemp(Ity_I64
);
15967 IRTemp dVevensZX
= newTemp(Ity_I64
);
15969 modrm
= getUChar(delta
);
15971 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
15973 if (epartIsReg(modrm
)) {
15974 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
15976 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
15977 nameMMXReg(gregLO3ofRM(modrm
)));
15979 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15980 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
15982 DIP("pmaddubsw %s,%s\n", dis_buf
,
15983 nameMMXReg(gregLO3ofRM(modrm
)));
15986 /* compute dV unsigned x sV signed */
15988 binop(Iop_SarN16x4
, mkexpr(sV
), mkU8(8)) );
15990 binop(Iop_SarN16x4
,
15991 binop(Iop_ShlN16x4
, mkexpr(sV
), mkU8(8)),
15994 binop(Iop_ShrN16x4
, mkexpr(dV
), mkU8(8)) );
15996 binop(Iop_ShrN16x4
,
15997 binop(Iop_ShlN16x4
, mkexpr(dV
), mkU8(8)),
16001 gregLO3ofRM(modrm
),
16002 binop(Iop_QAdd16Sx4
,
16003 binop(Iop_Mul16x4
, mkexpr(sVoddsSX
), mkexpr(dVoddsZX
)),
16004 binop(Iop_Mul16x4
, mkexpr(sVevensSX
), mkexpr(dVevensZX
))
16007 goto decode_success
;
16014 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
16015 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
16016 /* 66 0F 38 0A = PSIGND -- Packed Sign 32x4 (XMM) */
16017 if (have66noF2noF3(pfx
)
16018 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
16019 IRTemp sV
= newTemp(Ity_V128
);
16020 IRTemp dV
= newTemp(Ity_V128
);
16021 IRTemp sHi
= newTemp(Ity_I64
);
16022 IRTemp sLo
= newTemp(Ity_I64
);
16023 IRTemp dHi
= newTemp(Ity_I64
);
16024 IRTemp dLo
= newTemp(Ity_I64
);
16025 const HChar
* str
= "???";
16029 case 0x08: laneszB
= 1; str
= "b"; break;
16030 case 0x09: laneszB
= 2; str
= "w"; break;
16031 case 0x0A: laneszB
= 4; str
= "d"; break;
16032 default: vassert(0);
16035 modrm
= getUChar(delta
);
16036 assign( dV
, getXMMReg(gregOfRexRM(pfx
,modrm
)) );
16038 if (epartIsReg(modrm
)) {
16039 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
16041 DIP("psign%s %s,%s\n", str
, nameXMMReg(eregOfRexRM(pfx
,modrm
)),
16042 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16044 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16045 gen_SEGV_if_not_16_aligned( addr
);
16046 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
16048 DIP("psign%s %s,%s\n", str
, dis_buf
,
16049 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16052 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
16053 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
16054 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
16055 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
16058 gregOfRexRM(pfx
,modrm
),
16059 binop(Iop_64HLtoV128
,
16060 dis_PSIGN_helper( mkexpr(sHi
), mkexpr(dHi
), laneszB
),
16061 dis_PSIGN_helper( mkexpr(sLo
), mkexpr(dLo
), laneszB
)
16064 goto decode_success
;
16066 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
16067 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
16068 /* 0F 38 0A = PSIGND -- Packed Sign 32x2 (MMX) */
16069 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
16070 IRTemp sV
= newTemp(Ity_I64
);
16071 IRTemp dV
= newTemp(Ity_I64
);
16072 const HChar
* str
= "???";
16076 case 0x08: laneszB
= 1; str
= "b"; break;
16077 case 0x09: laneszB
= 2; str
= "w"; break;
16078 case 0x0A: laneszB
= 4; str
= "d"; break;
16079 default: vassert(0);
16082 modrm
= getUChar(delta
);
16084 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
16086 if (epartIsReg(modrm
)) {
16087 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
16089 DIP("psign%s %s,%s\n", str
, nameMMXReg(eregLO3ofRM(modrm
)),
16090 nameMMXReg(gregLO3ofRM(modrm
)));
16092 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16093 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
16095 DIP("psign%s %s,%s\n", str
, dis_buf
,
16096 nameMMXReg(gregLO3ofRM(modrm
)));
16100 gregLO3ofRM(modrm
),
16101 dis_PSIGN_helper( mkexpr(sV
), mkexpr(dV
), laneszB
)
16103 goto decode_success
;
16108 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
16110 if (have66noF2noF3(pfx
)
16111 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
16112 IRTemp sV
= newTemp(Ity_V128
);
16113 IRTemp dV
= newTemp(Ity_V128
);
16114 IRTemp sHi
= newTemp(Ity_I64
);
16115 IRTemp sLo
= newTemp(Ity_I64
);
16116 IRTemp dHi
= newTemp(Ity_I64
);
16117 IRTemp dLo
= newTemp(Ity_I64
);
16119 modrm
= getUChar(delta
);
16120 assign( dV
, getXMMReg(gregOfRexRM(pfx
,modrm
)) );
16122 if (epartIsReg(modrm
)) {
16123 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
16125 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
16126 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16128 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16129 gen_SEGV_if_not_16_aligned( addr
);
16130 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
16132 DIP("pmulhrsw %s,%s\n", dis_buf
,
16133 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16136 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
16137 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
16138 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
16139 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
16142 gregOfRexRM(pfx
,modrm
),
16143 binop(Iop_64HLtoV128
,
16144 dis_PMULHRSW_helper( mkexpr(sHi
), mkexpr(dHi
) ),
16145 dis_PMULHRSW_helper( mkexpr(sLo
), mkexpr(dLo
) )
16148 goto decode_success
;
16150 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
16152 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
16153 IRTemp sV
= newTemp(Ity_I64
);
16154 IRTemp dV
= newTemp(Ity_I64
);
16156 modrm
= getUChar(delta
);
16158 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
16160 if (epartIsReg(modrm
)) {
16161 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
16163 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
16164 nameMMXReg(gregLO3ofRM(modrm
)));
16166 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16167 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
16169 DIP("pmulhrsw %s,%s\n", dis_buf
,
16170 nameMMXReg(gregLO3ofRM(modrm
)));
16174 gregLO3ofRM(modrm
),
16175 dis_PMULHRSW_helper( mkexpr(sV
), mkexpr(dV
) )
16177 goto decode_success
;
16184 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
16185 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
16186 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
16187 if (have66noF2noF3(pfx
)
16188 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
16189 IRTemp sV
= newTemp(Ity_V128
);
16190 const HChar
* str
= "???";
16194 case 0x1C: laneszB
= 1; str
= "b"; break;
16195 case 0x1D: laneszB
= 2; str
= "w"; break;
16196 case 0x1E: laneszB
= 4; str
= "d"; break;
16197 default: vassert(0);
16200 modrm
= getUChar(delta
);
16201 if (epartIsReg(modrm
)) {
16202 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
16204 DIP("pabs%s %s,%s\n", str
, nameXMMReg(eregOfRexRM(pfx
,modrm
)),
16205 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16207 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16208 gen_SEGV_if_not_16_aligned( addr
);
16209 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
16211 DIP("pabs%s %s,%s\n", str
, dis_buf
,
16212 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16215 putXMMReg( gregOfRexRM(pfx
,modrm
),
16216 mkexpr(math_PABS_XMM(sV
, laneszB
)) );
16217 goto decode_success
;
16219 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
16220 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
16221 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
16222 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
16223 IRTemp sV
= newTemp(Ity_I64
);
16224 const HChar
* str
= "???";
16228 case 0x1C: laneszB
= 1; str
= "b"; break;
16229 case 0x1D: laneszB
= 2; str
= "w"; break;
16230 case 0x1E: laneszB
= 4; str
= "d"; break;
16231 default: vassert(0);
16234 modrm
= getUChar(delta
);
16237 if (epartIsReg(modrm
)) {
16238 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
16240 DIP("pabs%s %s,%s\n", str
, nameMMXReg(eregLO3ofRM(modrm
)),
16241 nameMMXReg(gregLO3ofRM(modrm
)));
16243 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16244 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
16246 DIP("pabs%s %s,%s\n", str
, dis_buf
,
16247 nameMMXReg(gregLO3ofRM(modrm
)));
16250 putMMXReg( gregLO3ofRM(modrm
),
16251 mkexpr(math_PABS_MMX( sV
, laneszB
)) );
16252 goto decode_success
;
16262 *decode_OK
= False
;
16271 /*------------------------------------------------------------*/
16273 /*--- Top-level SSSE3: dis_ESC_0F3A__SupSSE3 ---*/
16275 /*------------------------------------------------------------*/
16277 __attribute__((noinline
))
16279 Long
dis_ESC_0F3A__SupSSE3 ( Bool
* decode_OK
,
16280 const VexAbiInfo
* vbi
,
16281 Prefix pfx
, Int sz
, Long deltaIN
)
16284 IRTemp addr
= IRTemp_INVALID
;
16289 *decode_OK
= False
;
16291 Long delta
= deltaIN
;
16292 UChar opc
= getUChar(delta
);
16297 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
16298 if (have66noF2noF3(pfx
)
16299 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
16300 IRTemp sV
= newTemp(Ity_V128
);
16301 IRTemp dV
= newTemp(Ity_V128
);
16303 modrm
= getUChar(delta
);
16304 assign( dV
, getXMMReg(gregOfRexRM(pfx
,modrm
)) );
16306 if (epartIsReg(modrm
)) {
16307 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
16308 d64
= (Long
)getUChar(delta
+1);
16310 DIP("palignr $%lld,%s,%s\n", d64
,
16311 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
16312 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16314 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
16315 gen_SEGV_if_not_16_aligned( addr
);
16316 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
16317 d64
= (Long
)getUChar(delta
+alen
);
16319 DIP("palignr $%lld,%s,%s\n", d64
,
16321 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16324 IRTemp res
= math_PALIGNR_XMM( sV
, dV
, d64
);
16325 putXMMReg( gregOfRexRM(pfx
,modrm
), mkexpr(res
) );
16326 goto decode_success
;
16328 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
16329 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
16330 IRTemp sV
= newTemp(Ity_I64
);
16331 IRTemp dV
= newTemp(Ity_I64
);
16332 IRTemp res
= newTemp(Ity_I64
);
16334 modrm
= getUChar(delta
);
16336 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
16338 if (epartIsReg(modrm
)) {
16339 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
16340 d64
= (Long
)getUChar(delta
+1);
16342 DIP("palignr $%lld,%s,%s\n", d64
,
16343 nameMMXReg(eregLO3ofRM(modrm
)),
16344 nameMMXReg(gregLO3ofRM(modrm
)));
16346 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
16347 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
16348 d64
= (Long
)getUChar(delta
+alen
);
16350 DIP("palignr $%lld%s,%s\n", d64
,
16352 nameMMXReg(gregLO3ofRM(modrm
)));
16356 assign( res
, mkexpr(sV
) );
16358 else if (d64
>= 1 && d64
<= 7) {
16361 binop(Iop_Shr64
, mkexpr(sV
), mkU8(8*d64
)),
16362 binop(Iop_Shl64
, mkexpr(dV
), mkU8(8*(8-d64
))
16365 else if (d64
== 8) {
16366 assign( res
, mkexpr(dV
) );
16368 else if (d64
>= 9 && d64
<= 15) {
16369 assign( res
, binop(Iop_Shr64
, mkexpr(dV
), mkU8(8*(d64
-8))) );
16371 else if (d64
>= 16 && d64
<= 255) {
16372 assign( res
, mkU64(0) );
16377 putMMXReg( gregLO3ofRM(modrm
), mkexpr(res
) );
16378 goto decode_success
;
16388 *decode_OK
= False
;
16397 /*------------------------------------------------------------*/
16399 /*--- Top-level SSE4: dis_ESC_0F__SSE4 ---*/
16401 /*------------------------------------------------------------*/
16403 __attribute__((noinline
))
16405 Long
dis_ESC_0F__SSE4 ( Bool
* decode_OK
,
16406 const VexArchInfo
* archinfo
,
16407 const VexAbiInfo
* vbi
,
16408 Prefix pfx
, Int sz
, Long deltaIN
)
16410 IRTemp addr
= IRTemp_INVALID
;
16411 IRType ty
= Ity_INVALID
;
16416 *decode_OK
= False
;
16418 Long delta
= deltaIN
;
16419 UChar opc
= getUChar(delta
);
16424 /* F3 0F B8 = POPCNT{W,L,Q}
16425 Count the number of 1 bits in a register
16427 if (haveF3noF2(pfx
) /* so both 66 and REX.W are possibilities */
16428 && (sz
== 2 || sz
== 4 || sz
== 8)) {
16429 /*IRType*/ ty
= szToITy(sz
);
16430 IRTemp src
= newTemp(ty
);
16431 modrm
= getUChar(delta
);
16432 if (epartIsReg(modrm
)) {
16433 assign(src
, getIRegE(sz
, pfx
, modrm
));
16435 DIP("popcnt%c %s, %s\n", nameISize(sz
), nameIRegE(sz
, pfx
, modrm
),
16436 nameIRegG(sz
, pfx
, modrm
));
16438 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0);
16439 assign(src
, loadLE(ty
, mkexpr(addr
)));
16441 DIP("popcnt%c %s, %s\n", nameISize(sz
), dis_buf
,
16442 nameIRegG(sz
, pfx
, modrm
));
16445 IRTemp result
= gen_POPCOUNT(ty
, src
);
16446 putIRegG(sz
, pfx
, modrm
, mkexpr(result
));
16448 // Update flags. This is pretty lame .. perhaps can do better
16449 // if this turns out to be performance critical.
16450 // O S A C P are cleared. Z is set if SRC == 0.
16451 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
16452 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
16453 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
16454 stmt( IRStmt_Put( OFFB_CC_DEP1
,
16458 widenUto64(mkexpr(src
)),
16460 mkU8(AMD64G_CC_SHIFT_Z
))));
16462 goto decode_success
;
16467 /* F3 0F BC -- TZCNT (count trailing zeroes. A BMI extension,
16468 which we can only decode if we're sure this is a BMI1 capable cpu
16469 that supports TZCNT, since otherwise it's BSF, which behaves
16470 differently on zero source. */
16471 if (haveF3noF2(pfx
) /* so both 66 and 48 are possibilities */
16472 && (sz
== 2 || sz
== 4 || sz
== 8)
16473 && 0 != (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_BMI
)) {
16474 /*IRType*/ ty
= szToITy(sz
);
16475 IRTemp src
= newTemp(ty
);
16476 modrm
= getUChar(delta
);
16477 if (epartIsReg(modrm
)) {
16478 assign(src
, getIRegE(sz
, pfx
, modrm
));
16480 DIP("tzcnt%c %s, %s\n", nameISize(sz
), nameIRegE(sz
, pfx
, modrm
),
16481 nameIRegG(sz
, pfx
, modrm
));
16483 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0);
16484 assign(src
, loadLE(ty
, mkexpr(addr
)));
16486 DIP("tzcnt%c %s, %s\n", nameISize(sz
), dis_buf
,
16487 nameIRegG(sz
, pfx
, modrm
));
16490 IRTemp res
= gen_TZCNT(ty
, src
);
16491 putIRegG(sz
, pfx
, modrm
, mkexpr(res
));
16493 // Update flags. This is pretty lame .. perhaps can do better
16494 // if this turns out to be performance critical.
16495 // O S A P are cleared. Z is set if RESULT == 0.
16496 // C is set if SRC is zero.
16497 IRTemp src64
= newTemp(Ity_I64
);
16498 IRTemp res64
= newTemp(Ity_I64
);
16499 assign(src64
, widenUto64(mkexpr(src
)));
16500 assign(res64
, widenUto64(mkexpr(res
)));
16502 IRTemp oszacp
= newTemp(Ity_I64
);
16508 binop(Iop_CmpEQ64
, mkexpr(res64
), mkU64(0))),
16509 mkU8(AMD64G_CC_SHIFT_Z
)),
16512 binop(Iop_CmpEQ64
, mkexpr(src64
), mkU64(0))),
16513 mkU8(AMD64G_CC_SHIFT_C
))
16517 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
16518 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
16519 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
16520 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(oszacp
) ));
16522 goto decode_success
;
16527 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
16528 which we can only decode if we're sure this is an AMD cpu
16529 that supports LZCNT, since otherwise it's BSR, which behaves
16530 differently. Bizarrely, my Sandy Bridge also accepts these
16531 instructions but produces different results. */
16532 if (haveF3noF2(pfx
) /* so both 66 and 48 are possibilities */
16533 && (sz
== 2 || sz
== 4 || sz
== 8)
16534 && 0 != (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_LZCNT
)) {
16535 /*IRType*/ ty
= szToITy(sz
);
16536 IRTemp src
= newTemp(ty
);
16537 modrm
= getUChar(delta
);
16538 if (epartIsReg(modrm
)) {
16539 assign(src
, getIRegE(sz
, pfx
, modrm
));
16541 DIP("lzcnt%c %s, %s\n", nameISize(sz
), nameIRegE(sz
, pfx
, modrm
),
16542 nameIRegG(sz
, pfx
, modrm
));
16544 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0);
16545 assign(src
, loadLE(ty
, mkexpr(addr
)));
16547 DIP("lzcnt%c %s, %s\n", nameISize(sz
), dis_buf
,
16548 nameIRegG(sz
, pfx
, modrm
));
16551 IRTemp res
= gen_LZCNT(ty
, src
);
16552 putIRegG(sz
, pfx
, modrm
, mkexpr(res
));
16554 // Update flags. This is pretty lame .. perhaps can do better
16555 // if this turns out to be performance critical.
16556 // O S A P are cleared. Z is set if RESULT == 0.
16557 // C is set if SRC is zero.
16558 IRTemp src64
= newTemp(Ity_I64
);
16559 IRTemp res64
= newTemp(Ity_I64
);
16560 assign(src64
, widenUto64(mkexpr(src
)));
16561 assign(res64
, widenUto64(mkexpr(res
)));
16563 IRTemp oszacp
= newTemp(Ity_I64
);
16569 binop(Iop_CmpEQ64
, mkexpr(res64
), mkU64(0))),
16570 mkU8(AMD64G_CC_SHIFT_Z
)),
16573 binop(Iop_CmpEQ64
, mkexpr(src64
), mkU64(0))),
16574 mkU8(AMD64G_CC_SHIFT_C
))
16578 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
16579 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
16580 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
16581 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(oszacp
) ));
16583 goto decode_success
;
16593 *decode_OK
= False
;
16602 /*------------------------------------------------------------*/
16604 /*--- Top-level SSE4: dis_ESC_0F38__SSE4 ---*/
16606 /*------------------------------------------------------------*/
16608 static IRTemp
math_PBLENDVB_128 ( IRTemp vecE
, IRTemp vecG
,
16609 IRTemp vec0
/*controlling mask*/,
16610 UInt gran
, IROp opSAR
)
16612 /* The tricky bit is to convert vec0 into a suitable mask, by
16613 copying the most significant bit of each lane into all positions
16615 IRTemp sh
= newTemp(Ity_I8
);
16616 assign(sh
, mkU8(8 * gran
- 1));
16618 IRTemp mask
= newTemp(Ity_V128
);
16619 assign(mask
, binop(opSAR
, mkexpr(vec0
), mkexpr(sh
)));
16621 IRTemp notmask
= newTemp(Ity_V128
);
16622 assign(notmask
, unop(Iop_NotV128
, mkexpr(mask
)));
16624 IRTemp res
= newTemp(Ity_V128
);
16625 assign(res
, binop(Iop_OrV128
,
16626 binop(Iop_AndV128
, mkexpr(vecE
), mkexpr(mask
)),
16627 binop(Iop_AndV128
, mkexpr(vecG
), mkexpr(notmask
))));
16631 static IRTemp
math_PBLENDVB_256 ( IRTemp vecE
, IRTemp vecG
,
16632 IRTemp vec0
/*controlling mask*/,
16633 UInt gran
, IROp opSAR128
)
16635 /* The tricky bit is to convert vec0 into a suitable mask, by
16636 copying the most significant bit of each lane into all positions
16638 IRTemp sh
= newTemp(Ity_I8
);
16639 assign(sh
, mkU8(8 * gran
- 1));
16641 IRTemp vec0Hi
= IRTemp_INVALID
;
16642 IRTemp vec0Lo
= IRTemp_INVALID
;
16643 breakupV256toV128s( vec0
, &vec0Hi
, &vec0Lo
);
16645 IRTemp mask
= newTemp(Ity_V256
);
16646 assign(mask
, binop(Iop_V128HLtoV256
,
16647 binop(opSAR128
, mkexpr(vec0Hi
), mkexpr(sh
)),
16648 binop(opSAR128
, mkexpr(vec0Lo
), mkexpr(sh
))));
16650 IRTemp notmask
= newTemp(Ity_V256
);
16651 assign(notmask
, unop(Iop_NotV256
, mkexpr(mask
)));
16653 IRTemp res
= newTemp(Ity_V256
);
16654 assign(res
, binop(Iop_OrV256
,
16655 binop(Iop_AndV256
, mkexpr(vecE
), mkexpr(mask
)),
16656 binop(Iop_AndV256
, mkexpr(vecG
), mkexpr(notmask
))));
16660 static Long
dis_VBLENDV_128 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
16661 const HChar
*name
, UInt gran
, IROp opSAR
)
16663 IRTemp addr
= IRTemp_INVALID
;
16666 UChar modrm
= getUChar(delta
);
16667 UInt rG
= gregOfRexRM(pfx
, modrm
);
16668 UInt rV
= getVexNvvvv(pfx
);
16669 UInt rIS4
= 0xFF; /* invalid */
16670 IRTemp vecE
= newTemp(Ity_V128
);
16671 IRTemp vecV
= newTemp(Ity_V128
);
16672 IRTemp vecIS4
= newTemp(Ity_V128
);
16673 if (epartIsReg(modrm
)) {
16675 UInt rE
= eregOfRexRM(pfx
, modrm
);
16676 assign(vecE
, getXMMReg(rE
));
16677 UChar ib
= getUChar(delta
);
16678 rIS4
= (ib
>> 4) & 0xF;
16679 DIP("%s %s,%s,%s,%s\n",
16680 name
, nameXMMReg(rIS4
), nameXMMReg(rE
),
16681 nameXMMReg(rV
), nameXMMReg(rG
));
16683 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
16685 assign(vecE
, loadLE(Ity_V128
, mkexpr(addr
)));
16686 UChar ib
= getUChar(delta
);
16687 rIS4
= (ib
>> 4) & 0xF;
16688 DIP("%s %s,%s,%s,%s\n",
16689 name
, nameXMMReg(rIS4
), dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
16692 assign(vecV
, getXMMReg(rV
));
16693 assign(vecIS4
, getXMMReg(rIS4
));
16694 IRTemp res
= math_PBLENDVB_128( vecE
, vecV
, vecIS4
, gran
, opSAR
);
16695 putYMMRegLoAndZU( rG
, mkexpr(res
) );
16699 static Long
dis_VBLENDV_256 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
16700 const HChar
*name
, UInt gran
, IROp opSAR128
)
16702 IRTemp addr
= IRTemp_INVALID
;
16705 UChar modrm
= getUChar(delta
);
16706 UInt rG
= gregOfRexRM(pfx
, modrm
);
16707 UInt rV
= getVexNvvvv(pfx
);
16708 UInt rIS4
= 0xFF; /* invalid */
16709 IRTemp vecE
= newTemp(Ity_V256
);
16710 IRTemp vecV
= newTemp(Ity_V256
);
16711 IRTemp vecIS4
= newTemp(Ity_V256
);
16712 if (epartIsReg(modrm
)) {
16714 UInt rE
= eregOfRexRM(pfx
, modrm
);
16715 assign(vecE
, getYMMReg(rE
));
16716 UChar ib
= getUChar(delta
);
16717 rIS4
= (ib
>> 4) & 0xF;
16718 DIP("%s %s,%s,%s,%s\n",
16719 name
, nameYMMReg(rIS4
), nameYMMReg(rE
),
16720 nameYMMReg(rV
), nameYMMReg(rG
));
16722 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
16724 assign(vecE
, loadLE(Ity_V256
, mkexpr(addr
)));
16725 UChar ib
= getUChar(delta
);
16726 rIS4
= (ib
>> 4) & 0xF;
16727 DIP("%s %s,%s,%s,%s\n",
16728 name
, nameYMMReg(rIS4
), dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
16731 assign(vecV
, getYMMReg(rV
));
16732 assign(vecIS4
, getYMMReg(rIS4
));
16733 IRTemp res
= math_PBLENDVB_256( vecE
, vecV
, vecIS4
, gran
, opSAR128
);
16734 putYMMReg( rG
, mkexpr(res
) );
16738 static void finish_xTESTy ( IRTemp andV
, IRTemp andnV
, Int sign
)
16740 /* Set Z=1 iff (vecE & vecG) == 0
16741 Set C=1 iff (vecE & not vecG) == 0
16744 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16746 /* andV resp. andnV, reduced to 64-bit values, by or-ing the top
16747 and bottom 64-bits together. It relies on this trick:
16749 InterleaveLO64x2([a,b],[c,d]) == [b,d] hence
16751 InterleaveLO64x2([a,b],[a,b]) == [b,b] and similarly
16752 InterleaveHI64x2([a,b],[a,b]) == [a,a]
16754 and so the OR of the above 2 exprs produces
16755 [a OR b, a OR b], from which we simply take the lower half.
16757 IRTemp and64
= newTemp(Ity_I64
);
16758 IRTemp andn64
= newTemp(Ity_I64
);
16763 binop(Iop_InterleaveLO64x2
,
16764 mkexpr(andV
), mkexpr(andV
)),
16765 binop(Iop_InterleaveHI64x2
,
16766 mkexpr(andV
), mkexpr(andV
)))));
16771 binop(Iop_InterleaveLO64x2
,
16772 mkexpr(andnV
), mkexpr(andnV
)),
16773 binop(Iop_InterleaveHI64x2
,
16774 mkexpr(andnV
), mkexpr(andnV
)))));
16776 IRTemp z64
= newTemp(Ity_I64
);
16777 IRTemp c64
= newTemp(Ity_I64
);
16779 /* When only interested in the most significant bit, just shift
16780 arithmetically right and negate. */
16783 binop(Iop_Sar64
, mkexpr(and64
), mkU8(63))));
16787 binop(Iop_Sar64
, mkexpr(andn64
), mkU8(63))));
16790 /* When interested in bit 31 and bit 63, mask those bits and
16791 fallthrough into the PTEST handling. */
16792 IRTemp t0
= newTemp(Ity_I64
);
16793 IRTemp t1
= newTemp(Ity_I64
);
16794 IRTemp t2
= newTemp(Ity_I64
);
16795 assign(t0
, mkU64(0x8000000080000000ULL
));
16796 assign(t1
, binop(Iop_And64
, mkexpr(and64
), mkexpr(t0
)));
16797 assign(t2
, binop(Iop_And64
, mkexpr(andn64
), mkexpr(t0
)));
16801 /* Now convert and64, andn64 to all-zeroes or all-1s, so we can
16802 slice out the Z and C bits conveniently. We use the standard
16803 trick all-zeroes -> all-zeroes, anything-else -> all-ones
16804 done by "(x | -x) >>s (word-size - 1)".
16810 binop(Iop_Sub64
, mkU64(0), mkexpr(and64
)),
16811 mkexpr(and64
)), mkU8(63))));
16817 binop(Iop_Sub64
, mkU64(0), mkexpr(andn64
)),
16818 mkexpr(andn64
)), mkU8(63))));
16821 /* And finally, slice out the Z and C flags and set the flags
16822 thunk to COPY for them. OSAP are set to zero. */
16823 IRTemp newOSZACP
= newTemp(Ity_I64
);
16826 binop(Iop_And64
, mkexpr(z64
), mkU64(AMD64G_CC_MASK_Z
)),
16827 binop(Iop_And64
, mkexpr(c64
), mkU64(AMD64G_CC_MASK_C
))));
16829 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(newOSZACP
)));
16830 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
16831 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
16832 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
16836 /* Handles 128 bit versions of PTEST, VTESTPS or VTESTPD.
16837 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
16838 static Long
dis_xTESTy_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
16839 Long delta
, Bool isAvx
, Int sign
)
16841 IRTemp addr
= IRTemp_INVALID
;
16844 UChar modrm
= getUChar(delta
);
16845 UInt rG
= gregOfRexRM(pfx
, modrm
);
16846 IRTemp vecE
= newTemp(Ity_V128
);
16847 IRTemp vecG
= newTemp(Ity_V128
);
16849 if ( epartIsReg(modrm
) ) {
16850 UInt rE
= eregOfRexRM(pfx
, modrm
);
16851 assign(vecE
, getXMMReg(rE
));
16853 DIP( "%s%stest%s %s,%s\n",
16854 isAvx
? "v" : "", sign
== 0 ? "p" : "",
16855 sign
== 0 ? "" : sign
== 32 ? "ps" : "pd",
16856 nameXMMReg(rE
), nameXMMReg(rG
) );
16858 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16860 gen_SEGV_if_not_16_aligned( addr
);
16861 assign(vecE
, loadLE( Ity_V128
, mkexpr(addr
) ));
16863 DIP( "%s%stest%s %s,%s\n",
16864 isAvx
? "v" : "", sign
== 0 ? "p" : "",
16865 sign
== 0 ? "" : sign
== 32 ? "ps" : "pd",
16866 dis_buf
, nameXMMReg(rG
) );
16869 assign(vecG
, getXMMReg(rG
));
16871 /* Set Z=1 iff (vecE & vecG) == 0
16872 Set C=1 iff (vecE & not vecG) == 0
16875 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16876 IRTemp andV
= newTemp(Ity_V128
);
16877 IRTemp andnV
= newTemp(Ity_V128
);
16878 assign(andV
, binop(Iop_AndV128
, mkexpr(vecE
), mkexpr(vecG
)));
16879 assign(andnV
, binop(Iop_AndV128
,
16881 binop(Iop_XorV128
, mkexpr(vecG
),
16884 finish_xTESTy ( andV
, andnV
, sign
);
16889 /* Handles 256 bit versions of PTEST, VTESTPS or VTESTPD.
16890 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
16891 static Long
dis_xTESTy_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
16892 Long delta
, Int sign
)
16894 IRTemp addr
= IRTemp_INVALID
;
16897 UChar modrm
= getUChar(delta
);
16898 UInt rG
= gregOfRexRM(pfx
, modrm
);
16899 IRTemp vecE
= newTemp(Ity_V256
);
16900 IRTemp vecG
= newTemp(Ity_V256
);
16902 if ( epartIsReg(modrm
) ) {
16903 UInt rE
= eregOfRexRM(pfx
, modrm
);
16904 assign(vecE
, getYMMReg(rE
));
16906 DIP( "v%stest%s %s,%s\n", sign
== 0 ? "p" : "",
16907 sign
== 0 ? "" : sign
== 32 ? "ps" : "pd",
16908 nameYMMReg(rE
), nameYMMReg(rG
) );
16910 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16911 assign(vecE
, loadLE( Ity_V256
, mkexpr(addr
) ));
16913 DIP( "v%stest%s %s,%s\n", sign
== 0 ? "p" : "",
16914 sign
== 0 ? "" : sign
== 32 ? "ps" : "pd",
16915 dis_buf
, nameYMMReg(rG
) );
16918 assign(vecG
, getYMMReg(rG
));
16920 /* Set Z=1 iff (vecE & vecG) == 0
16921 Set C=1 iff (vecE & not vecG) == 0
16924 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16925 IRTemp andV
= newTemp(Ity_V256
);
16926 IRTemp andnV
= newTemp(Ity_V256
);
16927 assign(andV
, binop(Iop_AndV256
, mkexpr(vecE
), mkexpr(vecG
)));
16928 assign(andnV
, binop(Iop_AndV256
,
16929 mkexpr(vecE
), unop(Iop_NotV256
, mkexpr(vecG
))));
16931 IRTemp andVhi
= IRTemp_INVALID
;
16932 IRTemp andVlo
= IRTemp_INVALID
;
16933 IRTemp andnVhi
= IRTemp_INVALID
;
16934 IRTemp andnVlo
= IRTemp_INVALID
;
16935 breakupV256toV128s( andV
, &andVhi
, &andVlo
);
16936 breakupV256toV128s( andnV
, &andnVhi
, &andnVlo
);
16938 IRTemp andV128
= newTemp(Ity_V128
);
16939 IRTemp andnV128
= newTemp(Ity_V128
);
16940 assign( andV128
, binop( Iop_OrV128
, mkexpr(andVhi
), mkexpr(andVlo
) ) );
16941 assign( andnV128
, binop( Iop_OrV128
, mkexpr(andnVhi
), mkexpr(andnVlo
) ) );
16943 finish_xTESTy ( andV128
, andnV128
, sign
);
16948 /* Handles 128 and 256 bit versions of VCVTPH2PS. */
16949 static Long
dis_VCVTPH2PS ( const VexAbiInfo
* vbi
, Prefix pfx
,
16950 Long delta
, Bool is256bit
)
16952 /* This is a width-doubling load or reg-reg move, that does conversion on the
16953 transferred data. */
16954 UChar modrm
= getUChar(delta
);
16955 UInt rG
= gregOfRexRM(pfx
, modrm
);
16956 IRTemp srcE
= newTemp(is256bit
? Ity_V128
: Ity_I64
);
16958 if (epartIsReg(modrm
)) {
16959 UInt rE
= eregOfRexRM(pfx
, modrm
);
16960 assign(srcE
, is256bit
? unop(Iop_V256toV128_0
, getYMMReg(rE
))
16961 : unop(Iop_V128to64
, getXMMReg(rE
)));
16963 DIP("vcvtph2ps %s,%s\n", nameXMMReg(rE
),
16964 (is256bit
? nameYMMReg
: nameXMMReg
)(rG
));
16968 IRTemp addr
= disAMode(&alen
, vbi
, pfx
, delta
, dis_buf
, 0);
16969 // I don't think we need an alignment check here (not 100% sure tho.)
16970 assign(srcE
, loadLE(is256bit
? Ity_V128
: Ity_I64
, mkexpr(addr
)));
16972 DIP( "vcvtph2ps %s,%s\n", dis_buf
,
16973 (is256bit
? nameYMMReg
: nameXMMReg
)(rG
));
16976 IRExpr
* res
= unop(is256bit
? Iop_F16toF32x8
: Iop_F16toF32x4
, mkexpr(srcE
));
16977 (is256bit
? putYMMReg
: putYMMRegLoAndZU
)(rG
, res
);
16983 /* Handles 128 bit versions of PMOVZXBW and PMOVSXBW. */
16984 static Long
dis_PMOVxXBW_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
16985 Long delta
, Bool isAvx
, Bool xIsZ
)
16987 IRTemp addr
= IRTemp_INVALID
;
16990 IRTemp srcVec
= newTemp(Ity_V128
);
16991 UChar modrm
= getUChar(delta
);
16992 const HChar
* mbV
= isAvx
? "v" : "";
16993 const HChar how
= xIsZ
? 'z' : 's';
16994 UInt rG
= gregOfRexRM(pfx
, modrm
);
16995 if ( epartIsReg(modrm
) ) {
16996 UInt rE
= eregOfRexRM(pfx
, modrm
);
16997 assign( srcVec
, getXMMReg(rE
) );
16999 DIP( "%spmov%cxbw %s,%s\n", mbV
, how
, nameXMMReg(rE
), nameXMMReg(rG
) );
17001 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17003 unop( Iop_64UtoV128
, loadLE( Ity_I64
, mkexpr(addr
) ) ) );
17005 DIP( "%spmov%cxbw %s,%s\n", mbV
, how
, dis_buf
, nameXMMReg(rG
) );
17009 = xIsZ
/* do math for either zero or sign extend */
17010 ? binop( Iop_InterleaveLO8x16
,
17011 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) )
17012 : binop( Iop_SarN16x8
,
17013 binop( Iop_ShlN16x8
,
17014 binop( Iop_InterleaveLO8x16
,
17015 IRExpr_Const( IRConst_V128(0) ),
17020 (isAvx
? putYMMRegLoAndZU
: putXMMReg
) ( rG
, res
);
17026 /* Handles 256 bit versions of PMOVZXBW and PMOVSXBW. */
17027 static Long
dis_PMOVxXBW_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17028 Long delta
, Bool xIsZ
)
17030 IRTemp addr
= IRTemp_INVALID
;
17033 IRTemp srcVec
= newTemp(Ity_V128
);
17034 UChar modrm
= getUChar(delta
);
17035 UChar how
= xIsZ
? 'z' : 's';
17036 UInt rG
= gregOfRexRM(pfx
, modrm
);
17037 if ( epartIsReg(modrm
) ) {
17038 UInt rE
= eregOfRexRM(pfx
, modrm
);
17039 assign( srcVec
, getXMMReg(rE
) );
17041 DIP( "vpmov%cxbw %s,%s\n", how
, nameXMMReg(rE
), nameYMMReg(rG
) );
17043 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17044 assign( srcVec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
17046 DIP( "vpmov%cxbw %s,%s\n", how
, dis_buf
, nameYMMReg(rG
) );
17049 /* First do zero extend. */
17051 = binop( Iop_V128HLtoV256
,
17052 binop( Iop_InterleaveHI8x16
,
17053 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ),
17054 binop( Iop_InterleaveLO8x16
,
17055 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ) );
17056 /* And if needed sign extension as well. */
17058 res
= binop( Iop_SarN16x16
,
17059 binop( Iop_ShlN16x16
, res
, mkU8(8) ), mkU8(8) );
17061 putYMMReg ( rG
, res
);
17067 static Long
dis_PMOVxXWD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17068 Long delta
, Bool isAvx
, Bool xIsZ
)
17070 IRTemp addr
= IRTemp_INVALID
;
17073 IRTemp srcVec
= newTemp(Ity_V128
);
17074 UChar modrm
= getUChar(delta
);
17075 const HChar
* mbV
= isAvx
? "v" : "";
17076 const HChar how
= xIsZ
? 'z' : 's';
17077 UInt rG
= gregOfRexRM(pfx
, modrm
);
17079 if ( epartIsReg(modrm
) ) {
17080 UInt rE
= eregOfRexRM(pfx
, modrm
);
17081 assign( srcVec
, getXMMReg(rE
) );
17083 DIP( "%spmov%cxwd %s,%s\n", mbV
, how
, nameXMMReg(rE
), nameXMMReg(rG
) );
17085 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17087 unop( Iop_64UtoV128
, loadLE( Ity_I64
, mkexpr(addr
) ) ) );
17089 DIP( "%spmov%cxwd %s,%s\n", mbV
, how
, dis_buf
, nameXMMReg(rG
) );
17093 = binop( Iop_InterleaveLO16x8
,
17094 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) );
17096 res
= binop(Iop_SarN32x4
,
17097 binop(Iop_ShlN32x4
, res
, mkU8(16)), mkU8(16));
17099 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17100 ( gregOfRexRM(pfx
, modrm
), res
);
17106 static Long
dis_PMOVxXWD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17107 Long delta
, Bool xIsZ
)
17109 IRTemp addr
= IRTemp_INVALID
;
17112 IRTemp srcVec
= newTemp(Ity_V128
);
17113 UChar modrm
= getUChar(delta
);
17114 UChar how
= xIsZ
? 'z' : 's';
17115 UInt rG
= gregOfRexRM(pfx
, modrm
);
17117 if ( epartIsReg(modrm
) ) {
17118 UInt rE
= eregOfRexRM(pfx
, modrm
);
17119 assign( srcVec
, getXMMReg(rE
) );
17121 DIP( "vpmov%cxwd %s,%s\n", how
, nameXMMReg(rE
), nameYMMReg(rG
) );
17123 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17124 assign( srcVec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
17126 DIP( "vpmov%cxwd %s,%s\n", how
, dis_buf
, nameYMMReg(rG
) );
17130 = binop( Iop_V128HLtoV256
,
17131 binop( Iop_InterleaveHI16x8
,
17132 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ),
17133 binop( Iop_InterleaveLO16x8
,
17134 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ) );
17136 res
= binop(Iop_SarN32x8
,
17137 binop(Iop_ShlN32x8
, res
, mkU8(16)), mkU8(16));
17139 putYMMReg ( rG
, res
);
17145 static Long
dis_PMOVSXWQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17146 Long delta
, Bool isAvx
)
17148 IRTemp addr
= IRTemp_INVALID
;
17151 IRTemp srcBytes
= newTemp(Ity_I32
);
17152 UChar modrm
= getUChar(delta
);
17153 const HChar
* mbV
= isAvx
? "v" : "";
17154 UInt rG
= gregOfRexRM(pfx
, modrm
);
17156 if ( epartIsReg( modrm
) ) {
17157 UInt rE
= eregOfRexRM(pfx
, modrm
);
17158 assign( srcBytes
, getXMMRegLane32( rE
, 0 ) );
17160 DIP( "%spmovsxwq %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
) );
17162 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17163 assign( srcBytes
, loadLE( Ity_I32
, mkexpr(addr
) ) );
17165 DIP( "%spmovsxwq %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
) );
17168 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17169 ( rG
, binop( Iop_64HLtoV128
,
17171 unop( Iop_32HIto16
, mkexpr(srcBytes
) ) ),
17173 unop( Iop_32to16
, mkexpr(srcBytes
) ) ) ) );
17178 static Long
dis_PMOVSXWQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
)
17180 IRTemp addr
= IRTemp_INVALID
;
17183 IRTemp srcBytes
= newTemp(Ity_I64
);
17184 UChar modrm
= getUChar(delta
);
17185 UInt rG
= gregOfRexRM(pfx
, modrm
);
17186 IRTemp s3
, s2
, s1
, s0
;
17187 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
17189 if ( epartIsReg( modrm
) ) {
17190 UInt rE
= eregOfRexRM(pfx
, modrm
);
17191 assign( srcBytes
, getXMMRegLane64( rE
, 0 ) );
17193 DIP( "vpmovsxwq %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
) );
17195 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17196 assign( srcBytes
, loadLE( Ity_I64
, mkexpr(addr
) ) );
17198 DIP( "vpmovsxwq %s,%s\n", dis_buf
, nameYMMReg(rG
) );
17201 breakup64to16s( srcBytes
, &s3
, &s2
, &s1
, &s0
);
17202 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
17203 binop( Iop_64HLtoV128
,
17204 unop( Iop_16Sto64
, mkexpr(s3
) ),
17205 unop( Iop_16Sto64
, mkexpr(s2
) ) ),
17206 binop( Iop_64HLtoV128
,
17207 unop( Iop_16Sto64
, mkexpr(s1
) ),
17208 unop( Iop_16Sto64
, mkexpr(s0
) ) ) ) );
17213 static Long
dis_PMOVZXWQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17214 Long delta
, Bool isAvx
)
17216 IRTemp addr
= IRTemp_INVALID
;
17219 IRTemp srcVec
= newTemp(Ity_V128
);
17220 UChar modrm
= getUChar(delta
);
17221 const HChar
* mbV
= isAvx
? "v" : "";
17222 UInt rG
= gregOfRexRM(pfx
, modrm
);
17224 if ( epartIsReg( modrm
) ) {
17225 UInt rE
= eregOfRexRM(pfx
, modrm
);
17226 assign( srcVec
, getXMMReg(rE
) );
17228 DIP( "%spmovzxwq %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
) );
17230 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17232 unop( Iop_32UtoV128
, loadLE( Ity_I32
, mkexpr(addr
) ) ) );
17234 DIP( "%spmovzxwq %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
) );
17237 IRTemp zeroVec
= newTemp( Ity_V128
);
17238 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17240 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17241 ( rG
, binop( Iop_InterleaveLO16x8
,
17243 binop( Iop_InterleaveLO16x8
,
17244 mkexpr(zeroVec
), mkexpr(srcVec
) ) ) );
17249 static Long
dis_PMOVZXWQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17252 IRTemp addr
= IRTemp_INVALID
;
17255 IRTemp srcVec
= newTemp(Ity_V128
);
17256 UChar modrm
= getUChar(delta
);
17257 UInt rG
= gregOfRexRM(pfx
, modrm
);
17259 if ( epartIsReg( modrm
) ) {
17260 UInt rE
= eregOfRexRM(pfx
, modrm
);
17261 assign( srcVec
, getXMMReg(rE
) );
17263 DIP( "vpmovzxwq %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
) );
17265 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17267 unop( Iop_64UtoV128
, loadLE( Ity_I64
, mkexpr(addr
) ) ) );
17269 DIP( "vpmovzxwq %s,%s\n", dis_buf
, nameYMMReg(rG
) );
17272 IRTemp zeroVec
= newTemp( Ity_V128
);
17273 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17275 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
17276 binop( Iop_InterleaveHI16x8
,
17278 binop( Iop_InterleaveLO16x8
,
17279 mkexpr(zeroVec
), mkexpr(srcVec
) ) ),
17280 binop( Iop_InterleaveLO16x8
,
17282 binop( Iop_InterleaveLO16x8
,
17283 mkexpr(zeroVec
), mkexpr(srcVec
) ) ) ) );
17288 /* Handles 128 bit versions of PMOVZXDQ and PMOVSXDQ. */
17289 static Long
dis_PMOVxXDQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17290 Long delta
, Bool isAvx
, Bool xIsZ
)
17292 IRTemp addr
= IRTemp_INVALID
;
17295 IRTemp srcI64
= newTemp(Ity_I64
);
17296 IRTemp srcVec
= newTemp(Ity_V128
);
17297 UChar modrm
= getUChar(delta
);
17298 const HChar
* mbV
= isAvx
? "v" : "";
17299 const HChar how
= xIsZ
? 'z' : 's';
17300 UInt rG
= gregOfRexRM(pfx
, modrm
);
17301 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
17302 thing in a V128, with arbitrary junk in the top 64 bits. Use
17303 one or both of them and let iropt clean up afterwards (as
17305 if ( epartIsReg(modrm
) ) {
17306 UInt rE
= eregOfRexRM(pfx
, modrm
);
17307 assign( srcVec
, getXMMReg(rE
) );
17308 assign( srcI64
, unop(Iop_V128to64
, mkexpr(srcVec
)) );
17310 DIP( "%spmov%cxdq %s,%s\n", mbV
, how
, nameXMMReg(rE
), nameXMMReg(rG
) );
17312 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17313 assign( srcI64
, loadLE(Ity_I64
, mkexpr(addr
)) );
17314 assign( srcVec
, unop( Iop_64UtoV128
, mkexpr(srcI64
)) );
17316 DIP( "%spmov%cxdq %s,%s\n", mbV
, how
, dis_buf
, nameXMMReg(rG
) );
17320 = xIsZ
/* do math for either zero or sign extend */
17321 ? binop( Iop_InterleaveLO32x4
,
17322 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) )
17323 : binop( Iop_64HLtoV128
,
17325 unop( Iop_64HIto32
, mkexpr(srcI64
) ) ),
17327 unop( Iop_64to32
, mkexpr(srcI64
) ) ) );
17329 (isAvx
? putYMMRegLoAndZU
: putXMMReg
) ( rG
, res
);
17335 /* Handles 256 bit versions of PMOVZXDQ and PMOVSXDQ. */
17336 static Long
dis_PMOVxXDQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17337 Long delta
, Bool xIsZ
)
17339 IRTemp addr
= IRTemp_INVALID
;
17342 IRTemp srcVec
= newTemp(Ity_V128
);
17343 UChar modrm
= getUChar(delta
);
17344 UChar how
= xIsZ
? 'z' : 's';
17345 UInt rG
= gregOfRexRM(pfx
, modrm
);
17346 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
17347 thing in a V128, with arbitrary junk in the top 64 bits. Use
17348 one or both of them and let iropt clean up afterwards (as
17350 if ( epartIsReg(modrm
) ) {
17351 UInt rE
= eregOfRexRM(pfx
, modrm
);
17352 assign( srcVec
, getXMMReg(rE
) );
17354 DIP( "vpmov%cxdq %s,%s\n", how
, nameXMMReg(rE
), nameYMMReg(rG
) );
17356 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17357 assign( srcVec
, loadLE(Ity_V128
, mkexpr(addr
)) );
17359 DIP( "vpmov%cxdq %s,%s\n", how
, dis_buf
, nameYMMReg(rG
) );
17364 res
= binop( Iop_V128HLtoV256
,
17365 binop( Iop_InterleaveHI32x4
,
17366 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ),
17367 binop( Iop_InterleaveLO32x4
,
17368 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ) );
17370 IRTemp s3
, s2
, s1
, s0
;
17371 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
17372 breakupV128to32s( srcVec
, &s3
, &s2
, &s1
, &s0
);
17373 res
= binop( Iop_V128HLtoV256
,
17374 binop( Iop_64HLtoV128
,
17375 unop( Iop_32Sto64
, mkexpr(s3
) ),
17376 unop( Iop_32Sto64
, mkexpr(s2
) ) ),
17377 binop( Iop_64HLtoV128
,
17378 unop( Iop_32Sto64
, mkexpr(s1
) ),
17379 unop( Iop_32Sto64
, mkexpr(s0
) ) ) );
17382 putYMMReg ( rG
, res
);
17388 /* Handles 128 bit versions of PMOVZXBD and PMOVSXBD. */
17389 static Long
dis_PMOVxXBD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17390 Long delta
, Bool isAvx
, Bool xIsZ
)
17392 IRTemp addr
= IRTemp_INVALID
;
17395 IRTemp srcVec
= newTemp(Ity_V128
);
17396 UChar modrm
= getUChar(delta
);
17397 const HChar
* mbV
= isAvx
? "v" : "";
17398 const HChar how
= xIsZ
? 'z' : 's';
17399 UInt rG
= gregOfRexRM(pfx
, modrm
);
17400 if ( epartIsReg(modrm
) ) {
17401 UInt rE
= eregOfRexRM(pfx
, modrm
);
17402 assign( srcVec
, getXMMReg(rE
) );
17404 DIP( "%spmov%cxbd %s,%s\n", mbV
, how
, nameXMMReg(rE
), nameXMMReg(rG
) );
17406 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17408 unop( Iop_32UtoV128
, loadLE( Ity_I32
, mkexpr(addr
) ) ) );
17410 DIP( "%spmov%cxbd %s,%s\n", mbV
, how
, dis_buf
, nameXMMReg(rG
) );
17413 IRTemp zeroVec
= newTemp(Ity_V128
);
17414 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17417 = binop(Iop_InterleaveLO8x16
,
17419 binop(Iop_InterleaveLO8x16
,
17420 mkexpr(zeroVec
), mkexpr(srcVec
)));
17422 res
= binop(Iop_SarN32x4
,
17423 binop(Iop_ShlN32x4
, res
, mkU8(24)), mkU8(24));
17425 (isAvx
? putYMMRegLoAndZU
: putXMMReg
) ( rG
, res
);
17431 /* Handles 256 bit versions of PMOVZXBD and PMOVSXBD. */
17432 static Long
dis_PMOVxXBD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17433 Long delta
, Bool xIsZ
)
17435 IRTemp addr
= IRTemp_INVALID
;
17438 IRTemp srcVec
= newTemp(Ity_V128
);
17439 UChar modrm
= getUChar(delta
);
17440 UChar how
= xIsZ
? 'z' : 's';
17441 UInt rG
= gregOfRexRM(pfx
, modrm
);
17442 if ( epartIsReg(modrm
) ) {
17443 UInt rE
= eregOfRexRM(pfx
, modrm
);
17444 assign( srcVec
, getXMMReg(rE
) );
17446 DIP( "vpmov%cxbd %s,%s\n", how
, nameXMMReg(rE
), nameYMMReg(rG
) );
17448 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17450 unop( Iop_64UtoV128
, loadLE( Ity_I64
, mkexpr(addr
) ) ) );
17452 DIP( "vpmov%cxbd %s,%s\n", how
, dis_buf
, nameYMMReg(rG
) );
17455 IRTemp zeroVec
= newTemp(Ity_V128
);
17456 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17459 = binop( Iop_V128HLtoV256
,
17460 binop(Iop_InterleaveHI8x16
,
17462 binop(Iop_InterleaveLO8x16
,
17463 mkexpr(zeroVec
), mkexpr(srcVec
)) ),
17464 binop(Iop_InterleaveLO8x16
,
17466 binop(Iop_InterleaveLO8x16
,
17467 mkexpr(zeroVec
), mkexpr(srcVec
)) ) );
17469 res
= binop(Iop_SarN32x8
,
17470 binop(Iop_ShlN32x8
, res
, mkU8(24)), mkU8(24));
17472 putYMMReg ( rG
, res
);
17478 /* Handles 128 bit versions of PMOVSXBQ. */
17479 static Long
dis_PMOVSXBQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17480 Long delta
, Bool isAvx
)
17482 IRTemp addr
= IRTemp_INVALID
;
17485 IRTemp srcBytes
= newTemp(Ity_I16
);
17486 UChar modrm
= getUChar(delta
);
17487 const HChar
* mbV
= isAvx
? "v" : "";
17488 UInt rG
= gregOfRexRM(pfx
, modrm
);
17489 if ( epartIsReg(modrm
) ) {
17490 UInt rE
= eregOfRexRM(pfx
, modrm
);
17491 assign( srcBytes
, getXMMRegLane16( rE
, 0 ) );
17493 DIP( "%spmovsxbq %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
) );
17495 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17496 assign( srcBytes
, loadLE( Ity_I16
, mkexpr(addr
) ) );
17498 DIP( "%spmovsxbq %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
) );
17501 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17502 ( rG
, binop( Iop_64HLtoV128
,
17504 unop( Iop_16HIto8
, mkexpr(srcBytes
) ) ),
17506 unop( Iop_16to8
, mkexpr(srcBytes
) ) ) ) );
17511 /* Handles 256 bit versions of PMOVSXBQ. */
17512 static Long
dis_PMOVSXBQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17515 IRTemp addr
= IRTemp_INVALID
;
17518 IRTemp srcBytes
= newTemp(Ity_I32
);
17519 UChar modrm
= getUChar(delta
);
17520 UInt rG
= gregOfRexRM(pfx
, modrm
);
17521 if ( epartIsReg(modrm
) ) {
17522 UInt rE
= eregOfRexRM(pfx
, modrm
);
17523 assign( srcBytes
, getXMMRegLane32( rE
, 0 ) );
17525 DIP( "vpmovsxbq %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
) );
17527 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17528 assign( srcBytes
, loadLE( Ity_I32
, mkexpr(addr
) ) );
17530 DIP( "vpmovsxbq %s,%s\n", dis_buf
, nameYMMReg(rG
) );
17534 ( rG
, binop( Iop_V128HLtoV256
,
17535 binop( Iop_64HLtoV128
,
17538 unop( Iop_32HIto16
,
17539 mkexpr(srcBytes
) ) ) ),
17542 unop( Iop_32HIto16
,
17543 mkexpr(srcBytes
) ) ) ) ),
17544 binop( Iop_64HLtoV128
,
17548 mkexpr(srcBytes
) ) ) ),
17552 mkexpr(srcBytes
) ) ) ) ) ) );
17557 /* Handles 128 bit versions of PMOVZXBQ. */
17558 static Long
dis_PMOVZXBQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17559 Long delta
, Bool isAvx
)
17561 IRTemp addr
= IRTemp_INVALID
;
17564 IRTemp srcVec
= newTemp(Ity_V128
);
17565 UChar modrm
= getUChar(delta
);
17566 const HChar
* mbV
= isAvx
? "v" : "";
17567 UInt rG
= gregOfRexRM(pfx
, modrm
);
17568 if ( epartIsReg(modrm
) ) {
17569 UInt rE
= eregOfRexRM(pfx
, modrm
);
17570 assign( srcVec
, getXMMReg(rE
) );
17572 DIP( "%spmovzxbq %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
) );
17574 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17576 unop( Iop_32UtoV128
,
17577 unop( Iop_16Uto32
, loadLE( Ity_I16
, mkexpr(addr
) ))));
17579 DIP( "%spmovzxbq %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
) );
17582 IRTemp zeroVec
= newTemp(Ity_V128
);
17583 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17585 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17586 ( rG
, binop( Iop_InterleaveLO8x16
,
17588 binop( Iop_InterleaveLO8x16
,
17590 binop( Iop_InterleaveLO8x16
,
17591 mkexpr(zeroVec
), mkexpr(srcVec
) ) ) ) );
17596 /* Handles 256 bit versions of PMOVZXBQ. */
17597 static Long
dis_PMOVZXBQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17600 IRTemp addr
= IRTemp_INVALID
;
17603 IRTemp srcVec
= newTemp(Ity_V128
);
17604 UChar modrm
= getUChar(delta
);
17605 UInt rG
= gregOfRexRM(pfx
, modrm
);
17606 if ( epartIsReg(modrm
) ) {
17607 UInt rE
= eregOfRexRM(pfx
, modrm
);
17608 assign( srcVec
, getXMMReg(rE
) );
17610 DIP( "vpmovzxbq %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
) );
17612 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17614 unop( Iop_32UtoV128
, loadLE( Ity_I32
, mkexpr(addr
) )));
17616 DIP( "vpmovzxbq %s,%s\n", dis_buf
, nameYMMReg(rG
) );
17619 IRTemp zeroVec
= newTemp(Ity_V128
);
17620 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17623 ( rG
, binop( Iop_V128HLtoV256
,
17624 binop( Iop_InterleaveHI8x16
,
17626 binop( Iop_InterleaveLO8x16
,
17628 binop( Iop_InterleaveLO8x16
,
17629 mkexpr(zeroVec
), mkexpr(srcVec
) ) ) ),
17630 binop( Iop_InterleaveLO8x16
,
17632 binop( Iop_InterleaveLO8x16
,
17634 binop( Iop_InterleaveLO8x16
,
17635 mkexpr(zeroVec
), mkexpr(srcVec
) ) ) )
17641 static Long
dis_PHMINPOSUW_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17642 Long delta
, Bool isAvx
)
17644 IRTemp addr
= IRTemp_INVALID
;
17647 UChar modrm
= getUChar(delta
);
17648 const HChar
* mbV
= isAvx
? "v" : "";
17649 IRTemp sV
= newTemp(Ity_V128
);
17650 IRTemp sHi
= newTemp(Ity_I64
);
17651 IRTemp sLo
= newTemp(Ity_I64
);
17652 IRTemp dLo
= newTemp(Ity_I64
);
17653 UInt rG
= gregOfRexRM(pfx
,modrm
);
17654 if (epartIsReg(modrm
)) {
17655 UInt rE
= eregOfRexRM(pfx
,modrm
);
17656 assign( sV
, getXMMReg(rE
) );
17658 DIP("%sphminposuw %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
));
17660 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17662 gen_SEGV_if_not_16_aligned(addr
);
17663 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
17665 DIP("%sphminposuw %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
));
17667 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
17668 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
17669 assign( dLo
, mkIRExprCCall(
17670 Ity_I64
, 0/*regparms*/,
17671 "amd64g_calculate_sse_phminposuw",
17672 &amd64g_calculate_sse_phminposuw
,
17673 mkIRExprVec_2( mkexpr(sLo
), mkexpr(sHi
) )
17675 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17676 (rG
, unop(Iop_64UtoV128
, mkexpr(dLo
)));
17681 static Long
dis_AESx ( const VexAbiInfo
* vbi
, Prefix pfx
,
17682 Long delta
, Bool isAvx
, UChar opc
)
17684 IRTemp addr
= IRTemp_INVALID
;
17687 UChar modrm
= getUChar(delta
);
17688 UInt rG
= gregOfRexRM(pfx
, modrm
);
17690 UInt regNoR
= (isAvx
&& opc
!= 0xDB) ? getVexNvvvv(pfx
) : rG
;
17692 /* This is a nasty kludge. We need to pass 2 x V128 to the
17693 helper. Since we can't do that, use a dirty
17694 helper to compute the results directly from the XMM regs in
17695 the guest state. That means for the memory case, we need to
17696 move the left operand into a pseudo-register (XMM16, let's
17698 if (epartIsReg(modrm
)) {
17699 regNoL
= eregOfRexRM(pfx
, modrm
);
17702 regNoL
= 16; /* use XMM16 as an intermediary */
17703 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17704 /* alignment check needed ???? */
17705 stmt( IRStmt_Put( OFFB_YMM16
, loadLE(Ity_V128
, mkexpr(addr
)) ));
17709 void* fn
= &amd64g_dirtyhelper_AES
;
17710 const HChar
* nm
= "amd64g_dirtyhelper_AES";
17712 /* Round up the arguments. Note that this is a kludge -- the
17713 use of mkU64 rather than mkIRExpr_HWord implies the
17714 assumption that the host's word size is 64-bit. */
17715 UInt gstOffD
= ymmGuestRegOffset(rG
);
17716 UInt gstOffL
= regNoL
== 16 ? OFFB_YMM16
: ymmGuestRegOffset(regNoL
);
17717 UInt gstOffR
= ymmGuestRegOffset(regNoR
);
17718 IRExpr
* opc4
= mkU64(opc
);
17719 IRExpr
* gstOffDe
= mkU64(gstOffD
);
17720 IRExpr
* gstOffLe
= mkU64(gstOffL
);
17721 IRExpr
* gstOffRe
= mkU64(gstOffR
);
17723 = mkIRExprVec_5( IRExpr_GSPTR(), opc4
, gstOffDe
, gstOffLe
, gstOffRe
);
17725 IRDirty
* d
= unsafeIRDirty_0_N( 0/*regparms*/, nm
, fn
, args
);
17726 /* It's not really a dirty call, but we can't use the clean helper
17727 mechanism here for the very lame reason that we can't pass 2 x
17728 V128s by value to a helper. Hence this roundabout scheme. */
17730 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
17731 /* AES{ENC,ENCLAST,DEC,DECLAST} read both registers, and writes
17732 the second for !isAvx or the third for isAvx.
17733 AESIMC (0xDB) reads the first register, and writes the second. */
17734 d
->fxState
[0].fx
= Ifx_Read
;
17735 d
->fxState
[0].offset
= gstOffL
;
17736 d
->fxState
[0].size
= sizeof(U128
);
17737 d
->fxState
[1].offset
= gstOffR
;
17738 d
->fxState
[1].size
= sizeof(U128
);
17740 d
->fxState
[1].fx
= Ifx_Write
;
17741 else if (!isAvx
|| rG
== regNoR
)
17742 d
->fxState
[1].fx
= Ifx_Modify
;
17744 d
->fxState
[1].fx
= Ifx_Read
;
17746 d
->fxState
[2].fx
= Ifx_Write
;
17747 d
->fxState
[2].offset
= gstOffD
;
17748 d
->fxState
[2].size
= sizeof(U128
);
17751 stmt( IRStmt_Dirty(d
) );
17753 const HChar
* opsuf
;
17755 case 0xDC: opsuf
= "enc"; break;
17756 case 0XDD: opsuf
= "enclast"; break;
17757 case 0xDE: opsuf
= "dec"; break;
17758 case 0xDF: opsuf
= "declast"; break;
17759 case 0xDB: opsuf
= "imc"; break;
17760 default: vassert(0);
17762 DIP("%saes%s %s,%s%s%s\n", isAvx
? "v" : "", opsuf
,
17763 (regNoL
== 16 ? dis_buf
: nameXMMReg(regNoL
)),
17764 nameXMMReg(regNoR
),
17765 (isAvx
&& opc
!= 0xDB) ? "," : "",
17766 (isAvx
&& opc
!= 0xDB) ? nameXMMReg(rG
) : "");
17769 putYMMRegLane128( rG
, 1, mkV128(0) );
17773 static Long
dis_AESKEYGENASSIST ( const VexAbiInfo
* vbi
, Prefix pfx
,
17774 Long delta
, Bool isAvx
)
17776 IRTemp addr
= IRTemp_INVALID
;
17779 UChar modrm
= getUChar(delta
);
17781 UInt regNoR
= gregOfRexRM(pfx
, modrm
);
17784 /* This is a nasty kludge. See AESENC et al. instructions. */
17785 modrm
= getUChar(delta
);
17786 if (epartIsReg(modrm
)) {
17787 regNoL
= eregOfRexRM(pfx
, modrm
);
17788 imm
= getUChar(delta
+1);
17791 regNoL
= 16; /* use XMM16 as an intermediary */
17792 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
17793 /* alignment check ???? . */
17794 stmt( IRStmt_Put( OFFB_YMM16
, loadLE(Ity_V128
, mkexpr(addr
)) ));
17795 imm
= getUChar(delta
+alen
);
17799 /* Who ya gonna call? Presumably not Ghostbusters. */
17800 void* fn
= &amd64g_dirtyhelper_AESKEYGENASSIST
;
17801 const HChar
* nm
= "amd64g_dirtyhelper_AESKEYGENASSIST";
17803 /* Round up the arguments. Note that this is a kludge -- the
17804 use of mkU64 rather than mkIRExpr_HWord implies the
17805 assumption that the host's word size is 64-bit. */
17806 UInt gstOffL
= regNoL
== 16 ? OFFB_YMM16
: ymmGuestRegOffset(regNoL
);
17807 UInt gstOffR
= ymmGuestRegOffset(regNoR
);
17809 IRExpr
* imme
= mkU64(imm
& 0xFF);
17810 IRExpr
* gstOffLe
= mkU64(gstOffL
);
17811 IRExpr
* gstOffRe
= mkU64(gstOffR
);
17813 = mkIRExprVec_4( IRExpr_GSPTR(), imme
, gstOffLe
, gstOffRe
);
17815 IRDirty
* d
= unsafeIRDirty_0_N( 0/*regparms*/, nm
, fn
, args
);
17816 /* It's not really a dirty call, but we can't use the clean helper
17817 mechanism here for the very lame reason that we can't pass 2 x
17818 V128s by value to a helper. Hence this roundabout scheme. */
17820 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
17821 d
->fxState
[0].fx
= Ifx_Read
;
17822 d
->fxState
[0].offset
= gstOffL
;
17823 d
->fxState
[0].size
= sizeof(U128
);
17824 d
->fxState
[1].fx
= Ifx_Write
;
17825 d
->fxState
[1].offset
= gstOffR
;
17826 d
->fxState
[1].size
= sizeof(U128
);
17827 stmt( IRStmt_Dirty(d
) );
17829 DIP("%saeskeygenassist $%x,%s,%s\n", isAvx
? "v" : "", (UInt
)imm
,
17830 (regNoL
== 16 ? dis_buf
: nameXMMReg(regNoL
)),
17831 nameXMMReg(regNoR
));
17833 putYMMRegLane128( regNoR
, 1, mkV128(0) );
17838 __attribute__((noinline
))
17840 Long
dis_ESC_0F38__SSE4 ( Bool
* decode_OK
,
17841 const VexAbiInfo
* vbi
,
17842 Prefix pfx
, Int sz
, Long deltaIN
)
17844 IRTemp addr
= IRTemp_INVALID
;
17849 *decode_OK
= False
;
17851 Long delta
= deltaIN
;
17852 UChar opc
= getUChar(delta
);
17859 /* 66 0F 38 10 /r = PBLENDVB xmm1, xmm2/m128 (byte gran)
17860 66 0F 38 14 /r = BLENDVPS xmm1, xmm2/m128 (float gran)
17861 66 0F 38 15 /r = BLENDVPD xmm1, xmm2/m128 (double gran)
17862 Blend at various granularities, with XMM0 (implicit operand)
17863 providing the controlling mask.
17865 if (have66noF2noF3(pfx
) && sz
== 2) {
17866 modrm
= getUChar(delta
);
17868 const HChar
* nm
= NULL
;
17870 IROp opSAR
= Iop_INVALID
;
17873 nm
= "pblendvb"; gran
= 1; opSAR
= Iop_SarN8x16
;
17876 nm
= "blendvps"; gran
= 4; opSAR
= Iop_SarN32x4
;
17879 nm
= "blendvpd"; gran
= 8; opSAR
= Iop_SarN64x2
;
17884 IRTemp vecE
= newTemp(Ity_V128
);
17885 IRTemp vecG
= newTemp(Ity_V128
);
17886 IRTemp vec0
= newTemp(Ity_V128
);
17888 if ( epartIsReg(modrm
) ) {
17889 assign(vecE
, getXMMReg(eregOfRexRM(pfx
, modrm
)));
17891 DIP( "%s %s,%s\n", nm
,
17892 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
17893 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
17895 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17896 gen_SEGV_if_not_16_aligned( addr
);
17897 assign(vecE
, loadLE( Ity_V128
, mkexpr(addr
) ));
17899 DIP( "%s %s,%s\n", nm
,
17900 dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
17903 assign(vecG
, getXMMReg(gregOfRexRM(pfx
, modrm
)));
17904 assign(vec0
, getXMMReg(0));
17906 IRTemp res
= math_PBLENDVB_128( vecE
, vecG
, vec0
, gran
, opSAR
);
17907 putXMMReg(gregOfRexRM(pfx
, modrm
), mkexpr(res
));
17909 goto decode_success
;
17914 /* 66 0F 38 17 /r = PTEST xmm1, xmm2/m128
17915 Logical compare (set ZF and CF from AND/ANDN of the operands) */
17916 if (have66noF2noF3(pfx
)
17917 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
17918 delta
= dis_xTESTy_128( vbi
, pfx
, delta
, False
/*!isAvx*/, 0 );
17919 goto decode_success
;
17924 /* 66 0F 38 20 /r = PMOVSXBW xmm1, xmm2/m64
17925 Packed Move with Sign Extend from Byte to Word (XMM) */
17926 if (have66noF2noF3(pfx
) && sz
== 2) {
17927 delta
= dis_PMOVxXBW_128( vbi
, pfx
, delta
,
17928 False
/*!isAvx*/, False
/*!xIsZ*/ );
17929 goto decode_success
;
17934 /* 66 0F 38 21 /r = PMOVSXBD xmm1, xmm2/m32
17935 Packed Move with Sign Extend from Byte to DWord (XMM) */
17936 if (have66noF2noF3(pfx
) && sz
== 2) {
17937 delta
= dis_PMOVxXBD_128( vbi
, pfx
, delta
,
17938 False
/*!isAvx*/, False
/*!xIsZ*/ );
17939 goto decode_success
;
17944 /* 66 0F 38 22 /r = PMOVSXBQ xmm1, xmm2/m16
17945 Packed Move with Sign Extend from Byte to QWord (XMM) */
17946 if (have66noF2noF3(pfx
) && sz
== 2) {
17947 delta
= dis_PMOVSXBQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
17948 goto decode_success
;
17953 /* 66 0F 38 23 /r = PMOVSXWD xmm1, xmm2/m64
17954 Packed Move with Sign Extend from Word to DWord (XMM) */
17955 if (have66noF2noF3(pfx
) && sz
== 2) {
17956 delta
= dis_PMOVxXWD_128(vbi
, pfx
, delta
,
17957 False
/*!isAvx*/, False
/*!xIsZ*/);
17958 goto decode_success
;
17963 /* 66 0F 38 24 /r = PMOVSXWQ xmm1, xmm2/m32
17964 Packed Move with Sign Extend from Word to QWord (XMM) */
17965 if (have66noF2noF3(pfx
) && sz
== 2) {
17966 delta
= dis_PMOVSXWQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
17967 goto decode_success
;
17972 /* 66 0F 38 25 /r = PMOVSXDQ xmm1, xmm2/m64
17973 Packed Move with Sign Extend from Double Word to Quad Word (XMM) */
17974 if (have66noF2noF3(pfx
) && sz
== 2) {
17975 delta
= dis_PMOVxXDQ_128( vbi
, pfx
, delta
,
17976 False
/*!isAvx*/, False
/*!xIsZ*/ );
17977 goto decode_success
;
17982 /* 66 0F 38 28 = PMULDQ -- signed widening multiply of 32-lanes
17983 0 x 0 to form lower 64-bit half and lanes 2 x 2 to form upper
17985 /* This is a really poor translation -- could be improved if
17986 performance critical. It's a copy-paste of PMULUDQ, too. */
17987 if (have66noF2noF3(pfx
) && sz
== 2) {
17988 IRTemp sV
= newTemp(Ity_V128
);
17989 IRTemp dV
= newTemp(Ity_V128
);
17990 modrm
= getUChar(delta
);
17991 UInt rG
= gregOfRexRM(pfx
,modrm
);
17992 assign( dV
, getXMMReg(rG
) );
17993 if (epartIsReg(modrm
)) {
17994 UInt rE
= eregOfRexRM(pfx
,modrm
);
17995 assign( sV
, getXMMReg(rE
) );
17997 DIP("pmuldq %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
17999 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
18000 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
18002 DIP("pmuldq %s,%s\n", dis_buf
, nameXMMReg(rG
));
18005 putXMMReg( rG
, mkexpr(math_PMULDQ_128( dV
, sV
)) );
18006 goto decode_success
;
18011 /* 66 0F 38 29 = PCMPEQQ
18012 64x2 equality comparison */
18013 if (have66noF2noF3(pfx
) && sz
== 2) {
18014 /* FIXME: this needs an alignment check */
18015 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
18016 "pcmpeqq", Iop_CmpEQ64x2
, False
);
18017 goto decode_success
;
18022 /* 66 0F 38 2A = MOVNTDQA
18023 "non-temporal" "streaming" load
18024 Handle like MOVDQA but only memory operand is allowed */
18025 if (have66noF2noF3(pfx
) && sz
== 2) {
18026 modrm
= getUChar(delta
);
18027 if (!epartIsReg(modrm
)) {
18028 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
18029 gen_SEGV_if_not_16_aligned( addr
);
18030 putXMMReg( gregOfRexRM(pfx
,modrm
),
18031 loadLE(Ity_V128
, mkexpr(addr
)) );
18032 DIP("movntdqa %s,%s\n", dis_buf
,
18033 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
18035 goto decode_success
;
18041 /* 66 0f 38 2B /r = PACKUSDW xmm1, xmm2/m128
18042 2x 32x4 S->U saturating narrow from xmm2/m128 to xmm1 */
18043 if (have66noF2noF3(pfx
) && sz
== 2) {
18045 modrm
= getUChar(delta
);
18047 IRTemp argL
= newTemp(Ity_V128
);
18048 IRTemp argR
= newTemp(Ity_V128
);
18050 if ( epartIsReg(modrm
) ) {
18051 assign( argL
, getXMMReg( eregOfRexRM(pfx
, modrm
) ) );
18053 DIP( "packusdw %s,%s\n",
18054 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
18055 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
18057 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
18058 gen_SEGV_if_not_16_aligned( addr
);
18059 assign( argL
, loadLE( Ity_V128
, mkexpr(addr
) ));
18061 DIP( "packusdw %s,%s\n",
18062 dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
18065 assign(argR
, getXMMReg( gregOfRexRM(pfx
, modrm
) ));
18067 putXMMReg( gregOfRexRM(pfx
, modrm
),
18068 binop( Iop_QNarrowBin32Sto16Ux8
,
18069 mkexpr(argL
), mkexpr(argR
)) );
18071 goto decode_success
;
18076 /* 66 0F 38 30 /r = PMOVZXBW xmm1, xmm2/m64
18077 Packed Move with Zero Extend from Byte to Word (XMM) */
18078 if (have66noF2noF3(pfx
) && sz
== 2) {
18079 delta
= dis_PMOVxXBW_128( vbi
, pfx
, delta
,
18080 False
/*!isAvx*/, True
/*xIsZ*/ );
18081 goto decode_success
;
18086 /* 66 0F 38 31 /r = PMOVZXBD xmm1, xmm2/m32
18087 Packed Move with Zero Extend from Byte to DWord (XMM) */
18088 if (have66noF2noF3(pfx
) && sz
== 2) {
18089 delta
= dis_PMOVxXBD_128( vbi
, pfx
, delta
,
18090 False
/*!isAvx*/, True
/*xIsZ*/ );
18091 goto decode_success
;
18096 /* 66 0F 38 32 /r = PMOVZXBQ xmm1, xmm2/m16
18097 Packed Move with Zero Extend from Byte to QWord (XMM) */
18098 if (have66noF2noF3(pfx
) && sz
== 2) {
18099 delta
= dis_PMOVZXBQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
18100 goto decode_success
;
18105 /* 66 0F 38 33 /r = PMOVZXWD xmm1, xmm2/m64
18106 Packed Move with Zero Extend from Word to DWord (XMM) */
18107 if (have66noF2noF3(pfx
) && sz
== 2) {
18108 delta
= dis_PMOVxXWD_128( vbi
, pfx
, delta
,
18109 False
/*!isAvx*/, True
/*xIsZ*/ );
18110 goto decode_success
;
18115 /* 66 0F 38 34 /r = PMOVZXWQ xmm1, xmm2/m32
18116 Packed Move with Zero Extend from Word to QWord (XMM) */
18117 if (have66noF2noF3(pfx
) && sz
== 2) {
18118 delta
= dis_PMOVZXWQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
18119 goto decode_success
;
18124 /* 66 0F 38 35 /r = PMOVZXDQ xmm1, xmm2/m64
18125 Packed Move with Zero Extend from DWord to QWord (XMM) */
18126 if (have66noF2noF3(pfx
) && sz
== 2) {
18127 delta
= dis_PMOVxXDQ_128( vbi
, pfx
, delta
,
18128 False
/*!isAvx*/, True
/*xIsZ*/ );
18129 goto decode_success
;
18134 /* 66 0F 38 37 = PCMPGTQ
18135 64x2 comparison (signed, presumably; the Intel docs don't say :-)
18137 if (have66noF2noF3(pfx
) && sz
== 2) {
18138 /* FIXME: this needs an alignment check */
18139 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
18140 "pcmpgtq", Iop_CmpGT64Sx2
, False
);
18141 goto decode_success
;
18147 /* 66 0F 38 38 /r = PMINSB xmm1, xmm2/m128 8Sx16 (signed) min
18148 66 0F 38 3C /r = PMAXSB xmm1, xmm2/m128 8Sx16 (signed) max
18150 if (have66noF2noF3(pfx
) && sz
== 2) {
18151 /* FIXME: this needs an alignment check */
18152 Bool isMAX
= opc
== 0x3C;
18153 delta
= dis_SSEint_E_to_G(
18155 isMAX
? "pmaxsb" : "pminsb",
18156 isMAX
? Iop_Max8Sx16
: Iop_Min8Sx16
,
18159 goto decode_success
;
18165 /* 66 0F 38 39 /r = PMINSD xmm1, xmm2/m128
18166 Minimum of Packed Signed Double Word Integers (XMM)
18167 66 0F 38 3D /r = PMAXSD xmm1, xmm2/m128
18168 Maximum of Packed Signed Double Word Integers (XMM)
18170 if (have66noF2noF3(pfx
) && sz
== 2) {
18171 /* FIXME: this needs an alignment check */
18172 Bool isMAX
= opc
== 0x3D;
18173 delta
= dis_SSEint_E_to_G(
18175 isMAX
? "pmaxsd" : "pminsd",
18176 isMAX
? Iop_Max32Sx4
: Iop_Min32Sx4
,
18179 goto decode_success
;
18185 /* 66 0F 38 3A /r = PMINUW xmm1, xmm2/m128
18186 Minimum of Packed Unsigned Word Integers (XMM)
18187 66 0F 38 3E /r = PMAXUW xmm1, xmm2/m128
18188 Maximum of Packed Unsigned Word Integers (XMM)
18190 if (have66noF2noF3(pfx
) && sz
== 2) {
18191 /* FIXME: this needs an alignment check */
18192 Bool isMAX
= opc
== 0x3E;
18193 delta
= dis_SSEint_E_to_G(
18195 isMAX
? "pmaxuw" : "pminuw",
18196 isMAX
? Iop_Max16Ux8
: Iop_Min16Ux8
,
18199 goto decode_success
;
18205 /* 66 0F 38 3B /r = PMINUD xmm1, xmm2/m128
18206 Minimum of Packed Unsigned Doubleword Integers (XMM)
18207 66 0F 38 3F /r = PMAXUD xmm1, xmm2/m128
18208 Maximum of Packed Unsigned Doubleword Integers (XMM)
18210 if (have66noF2noF3(pfx
) && sz
== 2) {
18211 /* FIXME: this needs an alignment check */
18212 Bool isMAX
= opc
== 0x3F;
18213 delta
= dis_SSEint_E_to_G(
18215 isMAX
? "pmaxud" : "pminud",
18216 isMAX
? Iop_Max32Ux4
: Iop_Min32Ux4
,
18219 goto decode_success
;
18224 /* 66 0F 38 40 /r = PMULLD xmm1, xmm2/m128
18225 32x4 integer multiply from xmm2/m128 to xmm1 */
18226 if (have66noF2noF3(pfx
) && sz
== 2) {
18228 modrm
= getUChar(delta
);
18230 IRTemp argL
= newTemp(Ity_V128
);
18231 IRTemp argR
= newTemp(Ity_V128
);
18233 if ( epartIsReg(modrm
) ) {
18234 assign( argL
, getXMMReg( eregOfRexRM(pfx
, modrm
) ) );
18236 DIP( "pmulld %s,%s\n",
18237 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
18238 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
18240 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
18241 gen_SEGV_if_not_16_aligned( addr
);
18242 assign( argL
, loadLE( Ity_V128
, mkexpr(addr
) ));
18244 DIP( "pmulld %s,%s\n",
18245 dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
18248 assign(argR
, getXMMReg( gregOfRexRM(pfx
, modrm
) ));
18250 putXMMReg( gregOfRexRM(pfx
, modrm
),
18251 binop( Iop_Mul32x4
, mkexpr(argL
), mkexpr(argR
)) );
18253 goto decode_success
;
18258 /* 66 0F 38 41 /r = PHMINPOSUW xmm1, xmm2/m128
18259 Packed Horizontal Word Minimum from xmm2/m128 to xmm1 */
18260 if (have66noF2noF3(pfx
) && sz
== 2) {
18261 delta
= dis_PHMINPOSUW_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
18262 goto decode_success
;
18271 /* 66 0F 38 DC /r = AESENC xmm1, xmm2/m128
18272 DD /r = AESENCLAST xmm1, xmm2/m128
18273 DE /r = AESDEC xmm1, xmm2/m128
18274 DF /r = AESDECLAST xmm1, xmm2/m128
18276 DB /r = AESIMC xmm1, xmm2/m128 */
18277 if (have66noF2noF3(pfx
) && sz
== 2) {
18278 delta
= dis_AESx( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
);
18279 goto decode_success
;
18285 /* F2 0F 38 F0 /r = CRC32 r/m8, r32 (REX.W ok, 66 not ok)
18286 F2 0F 38 F1 /r = CRC32 r/m{16,32,64}, r32
18287 The decoding on this is a bit unusual.
18289 if (haveF2noF3(pfx
)
18290 && (opc
== 0xF1 || (opc
== 0xF0 && !have66(pfx
)))) {
18291 modrm
= getUChar(delta
);
18296 vassert(sz
== 2 || sz
== 4 || sz
== 8);
18298 IRType tyE
= szToITy(sz
);
18299 IRTemp valE
= newTemp(tyE
);
18301 if (epartIsReg(modrm
)) {
18302 assign(valE
, getIRegE(sz
, pfx
, modrm
));
18304 DIP("crc32b %s,%s\n", nameIRegE(sz
, pfx
, modrm
),
18305 nameIRegG(1==getRexW(pfx
) ? 8 : 4, pfx
, modrm
));
18307 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
18308 assign(valE
, loadLE(tyE
, mkexpr(addr
)));
18310 DIP("crc32b %s,%s\n", dis_buf
,
18311 nameIRegG(1==getRexW(pfx
) ? 8 : 4, pfx
, modrm
));
18314 /* Somewhat funny getting/putting of the crc32 value, in order
18315 to ensure that it turns into 64-bit gets and puts. However,
18316 mask off the upper 32 bits so as to not get memcheck false
18317 +ves around the helper call. */
18318 IRTemp valG0
= newTemp(Ity_I64
);
18319 assign(valG0
, binop(Iop_And64
, getIRegG(8, pfx
, modrm
),
18320 mkU64(0xFFFFFFFF)));
18322 const HChar
* nm
= NULL
;
18325 case 1: nm
= "amd64g_calc_crc32b";
18326 fn
= &amd64g_calc_crc32b
; break;
18327 case 2: nm
= "amd64g_calc_crc32w";
18328 fn
= &amd64g_calc_crc32w
; break;
18329 case 4: nm
= "amd64g_calc_crc32l";
18330 fn
= &amd64g_calc_crc32l
; break;
18331 case 8: nm
= "amd64g_calc_crc32q";
18332 fn
= &amd64g_calc_crc32q
; break;
18335 IRTemp valG1
= newTemp(Ity_I64
);
18337 mkIRExprCCall(Ity_I64
, 0/*regparm*/, nm
, fn
,
18338 mkIRExprVec_2(mkexpr(valG0
),
18339 widenUto64(mkexpr(valE
)))));
18341 putIRegG(4, pfx
, modrm
, unop(Iop_64to32
, mkexpr(valG1
)));
18342 goto decode_success
;
18352 *decode_OK
= False
;
18361 /*------------------------------------------------------------*/
18363 /*--- Top-level SSE4: dis_ESC_0F3A__SSE4 ---*/
18365 /*------------------------------------------------------------*/
18367 static Long
dis_PEXTRW ( const VexAbiInfo
* vbi
, Prefix pfx
,
18368 Long delta
, Bool isAvx
)
18370 IRTemp addr
= IRTemp_INVALID
;
18371 IRTemp t0
= IRTemp_INVALID
;
18372 IRTemp t1
= IRTemp_INVALID
;
18373 IRTemp t2
= IRTemp_INVALID
;
18374 IRTemp t3
= IRTemp_INVALID
;
18375 UChar modrm
= getUChar(delta
);
18378 UInt rG
= gregOfRexRM(pfx
,modrm
);
18380 IRTemp xmm_vec
= newTemp(Ity_V128
);
18381 IRTemp d16
= newTemp(Ity_I16
);
18382 const HChar
* mbV
= isAvx
? "v" : "";
18384 vassert(0==getRexW(pfx
)); /* ensured by caller */
18385 assign( xmm_vec
, getXMMReg(rG
) );
18386 breakupV128to32s( xmm_vec
, &t3
, &t2
, &t1
, &t0
);
18388 if ( epartIsReg( modrm
) ) {
18389 imm8_20
= (Int
)(getUChar(delta
+1) & 7);
18391 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
18392 imm8_20
= (Int
)(getUChar(delta
+alen
) & 7);
18396 case 0: assign(d16
, unop(Iop_32to16
, mkexpr(t0
))); break;
18397 case 1: assign(d16
, unop(Iop_32HIto16
, mkexpr(t0
))); break;
18398 case 2: assign(d16
, unop(Iop_32to16
, mkexpr(t1
))); break;
18399 case 3: assign(d16
, unop(Iop_32HIto16
, mkexpr(t1
))); break;
18400 case 4: assign(d16
, unop(Iop_32to16
, mkexpr(t2
))); break;
18401 case 5: assign(d16
, unop(Iop_32HIto16
, mkexpr(t2
))); break;
18402 case 6: assign(d16
, unop(Iop_32to16
, mkexpr(t3
))); break;
18403 case 7: assign(d16
, unop(Iop_32HIto16
, mkexpr(t3
))); break;
18404 default: vassert(0);
18407 if ( epartIsReg( modrm
) ) {
18408 UInt rE
= eregOfRexRM(pfx
,modrm
);
18409 putIReg32( rE
, unop(Iop_16Uto32
, mkexpr(d16
)) );
18411 DIP( "%spextrw $%d, %s,%s\n", mbV
, imm8_20
,
18412 nameXMMReg( rG
), nameIReg32( rE
) );
18414 storeLE( mkexpr(addr
), mkexpr(d16
) );
18416 DIP( "%spextrw $%d, %s,%s\n", mbV
, imm8_20
, nameXMMReg( rG
), dis_buf
);
18422 static Long
dis_PEXTRD ( const VexAbiInfo
* vbi
, Prefix pfx
,
18423 Long delta
, Bool isAvx
)
18425 IRTemp addr
= IRTemp_INVALID
;
18426 IRTemp t0
= IRTemp_INVALID
;
18427 IRTemp t1
= IRTemp_INVALID
;
18428 IRTemp t2
= IRTemp_INVALID
;
18429 IRTemp t3
= IRTemp_INVALID
;
18435 IRTemp xmm_vec
= newTemp(Ity_V128
);
18436 IRTemp src_dword
= newTemp(Ity_I32
);
18437 const HChar
* mbV
= isAvx
? "v" : "";
18439 vassert(0==getRexW(pfx
)); /* ensured by caller */
18440 modrm
= getUChar(delta
);
18441 assign( xmm_vec
, getXMMReg( gregOfRexRM(pfx
,modrm
) ) );
18442 breakupV128to32s( xmm_vec
, &t3
, &t2
, &t1
, &t0
);
18444 if ( epartIsReg( modrm
) ) {
18445 imm8_10
= (Int
)(getUChar(delta
+1) & 3);
18447 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
18448 imm8_10
= (Int
)(getUChar(delta
+alen
) & 3);
18451 switch ( imm8_10
) {
18452 case 0: assign( src_dword
, mkexpr(t0
) ); break;
18453 case 1: assign( src_dword
, mkexpr(t1
) ); break;
18454 case 2: assign( src_dword
, mkexpr(t2
) ); break;
18455 case 3: assign( src_dword
, mkexpr(t3
) ); break;
18456 default: vassert(0);
18459 if ( epartIsReg( modrm
) ) {
18460 putIReg32( eregOfRexRM(pfx
,modrm
), mkexpr(src_dword
) );
18462 DIP( "%spextrd $%d, %s,%s\n", mbV
, imm8_10
,
18463 nameXMMReg( gregOfRexRM(pfx
, modrm
) ),
18464 nameIReg32( eregOfRexRM(pfx
, modrm
) ) );
18466 storeLE( mkexpr(addr
), mkexpr(src_dword
) );
18468 DIP( "%spextrd $%d, %s,%s\n", mbV
,
18469 imm8_10
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ), dis_buf
);
18475 static Long
dis_PEXTRQ ( const VexAbiInfo
* vbi
, Prefix pfx
,
18476 Long delta
, Bool isAvx
)
18478 IRTemp addr
= IRTemp_INVALID
;
18484 IRTemp xmm_vec
= newTemp(Ity_V128
);
18485 IRTemp src_qword
= newTemp(Ity_I64
);
18486 const HChar
* mbV
= isAvx
? "v" : "";
18488 vassert(1==getRexW(pfx
)); /* ensured by caller */
18489 modrm
= getUChar(delta
);
18490 assign( xmm_vec
, getXMMReg( gregOfRexRM(pfx
,modrm
) ) );
18492 if ( epartIsReg( modrm
) ) {
18493 imm8_0
= (Int
)(getUChar(delta
+1) & 1);
18495 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
18496 imm8_0
= (Int
)(getUChar(delta
+alen
) & 1);
18499 switch ( imm8_0
) {
18500 case 0: assign( src_qword
, unop(Iop_V128to64
, mkexpr(xmm_vec
)) );
18502 case 1: assign( src_qword
, unop(Iop_V128HIto64
, mkexpr(xmm_vec
)) );
18504 default: vassert(0);
18507 if ( epartIsReg( modrm
) ) {
18508 putIReg64( eregOfRexRM(pfx
,modrm
), mkexpr(src_qword
) );
18510 DIP( "%spextrq $%d, %s,%s\n", mbV
, imm8_0
,
18511 nameXMMReg( gregOfRexRM(pfx
, modrm
) ),
18512 nameIReg64( eregOfRexRM(pfx
, modrm
) ) );
18514 storeLE( mkexpr(addr
), mkexpr(src_qword
) );
18516 DIP( "%spextrq $%d, %s,%s\n", mbV
,
18517 imm8_0
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ), dis_buf
);
18522 static IRExpr
* math_CTZ32(IRExpr
*exp
)
18524 /* Iop_Ctz32 isn't implemented by the amd64 back end, so use Iop_Ctz64. */
18525 return unop(Iop_64to32
, unop(Iop_Ctz64
, unop(Iop_32Uto64
, exp
)));
18528 static Long
dis_PCMPISTRI_3A ( UChar modrm
, UInt regNoL
, UInt regNoR
,
18529 Long delta
, UChar opc
, UChar imm
,
18532 /* We only handle PCMPISTRI for now */
18533 vassert((opc
& 0x03) == 0x03);
18534 /* And only an immediate byte of 0x38 or 0x3A */
18535 vassert((imm
& ~0x02) == 0x38);
18537 /* FIXME: Is this correct when RegNoL == 16 ? */
18538 IRTemp argL
= newTemp(Ity_V128
);
18539 assign(argL
, getXMMReg(regNoL
));
18540 IRTemp argR
= newTemp(Ity_V128
);
18541 assign(argR
, getXMMReg(regNoR
));
18543 IRTemp zmaskL
= newTemp(Ity_I32
);
18544 assign(zmaskL
, unop(Iop_16Uto32
,
18545 unop(Iop_GetMSBs8x16
,
18546 binop(Iop_CmpEQ8x16
, mkexpr(argL
), mkV128(0)))));
18547 IRTemp zmaskR
= newTemp(Ity_I32
);
18548 assign(zmaskR
, unop(Iop_16Uto32
,
18549 unop(Iop_GetMSBs8x16
,
18550 binop(Iop_CmpEQ8x16
, mkexpr(argR
), mkV128(0)))));
18552 /* We want validL = ~(zmaskL | -zmaskL)
18554 But this formulation kills memcheck's validity tracking when any
18555 bits above the first "1" are invalid. So reformulate as:
18557 validL = (zmaskL ? (1 << ctz(zmaskL)) : 0) - 1
18560 IRExpr
*ctzL
= unop(Iop_32to8
, math_CTZ32(mkexpr(zmaskL
)));
18562 /* Generate a bool expression which is zero iff the original is
18563 zero. Do this carefully so memcheck can propagate validity bits
18566 IRTemp zmaskL_zero
= newTemp(Ity_I1
);
18567 assign(zmaskL_zero
, binop(Iop_ExpCmpNE32
, mkexpr(zmaskL
), mkU32(0)));
18569 IRTemp validL
= newTemp(Ity_I32
);
18570 assign(validL
, binop(Iop_Sub32
,
18571 IRExpr_ITE(mkexpr(zmaskL_zero
),
18572 binop(Iop_Shl32
, mkU32(1), ctzL
),
18576 /* And similarly for validR. */
18577 IRExpr
*ctzR
= unop(Iop_32to8
, math_CTZ32(mkexpr(zmaskR
)));
18578 IRTemp zmaskR_zero
= newTemp(Ity_I1
);
18579 assign(zmaskR_zero
, binop(Iop_ExpCmpNE32
, mkexpr(zmaskR
), mkU32(0)));
18580 IRTemp validR
= newTemp(Ity_I32
);
18581 assign(validR
, binop(Iop_Sub32
,
18582 IRExpr_ITE(mkexpr(zmaskR_zero
),
18583 binop(Iop_Shl32
, mkU32(1), ctzR
),
18587 /* Do the actual comparison. */
18588 IRExpr
*boolResII
= unop(Iop_16Uto32
,
18589 unop(Iop_GetMSBs8x16
,
18590 binop(Iop_CmpEQ8x16
, mkexpr(argL
),
18593 /* Compute boolresII & validL & validR (i.e., if both valid, use
18594 comparison result) */
18595 IRExpr
*intRes1_a
= binop(Iop_And32
, boolResII
,
18597 mkexpr(validL
), mkexpr(validR
)));
18599 /* Compute ~(validL | validR); i.e., if both invalid, force 1. */
18600 IRExpr
*intRes1_b
= unop(Iop_Not32
, binop(Iop_Or32
,
18601 mkexpr(validL
), mkexpr(validR
)));
18602 /* Otherwise, zero. */
18603 IRExpr
*intRes1
= binop(Iop_And32
, mkU32(0xFFFF),
18604 binop(Iop_Or32
, intRes1_a
, intRes1_b
));
18606 /* The "0x30" in imm=0x3A means "polarity=3" means XOR validL with
18608 IRTemp intRes2
= newTemp(Ity_I32
);
18609 assign(intRes2
, binop(Iop_And32
, mkU32(0xFFFF),
18610 binop(Iop_Xor32
, intRes1
, mkexpr(validL
))));
18612 /* If the 0x40 bit were set in imm=0x3A, we would return the index
18613 of the msb. Since it is clear, we return the index of the
18615 IRExpr
*newECX
= math_CTZ32(binop(Iop_Or32
,
18616 mkexpr(intRes2
), mkU32(0x10000)));
18618 /* And thats our rcx. */
18619 putIReg32(R_RCX
, newECX
);
18621 /* Now for the condition codes... */
18623 /* C == 0 iff intRes2 == 0 */
18624 IRExpr
*c_bit
= IRExpr_ITE( binop(Iop_ExpCmpNE32
, mkexpr(intRes2
),
18626 mkU32(1 << AMD64G_CC_SHIFT_C
),
18628 /* Z == 1 iff any in argL is 0 */
18629 IRExpr
*z_bit
= IRExpr_ITE( mkexpr(zmaskL_zero
),
18630 mkU32(1 << AMD64G_CC_SHIFT_Z
),
18632 /* S == 1 iff any in argR is 0 */
18633 IRExpr
*s_bit
= IRExpr_ITE( mkexpr(zmaskR_zero
),
18634 mkU32(1 << AMD64G_CC_SHIFT_S
),
18636 /* O == IntRes2[0] */
18637 IRExpr
*o_bit
= binop(Iop_Shl32
, binop(Iop_And32
, mkexpr(intRes2
),
18639 mkU8(AMD64G_CC_SHIFT_O
));
18641 /* Put them all together */
18642 IRTemp cc
= newTemp(Ity_I64
);
18643 assign(cc
, widenUto64(binop(Iop_Or32
,
18644 binop(Iop_Or32
, c_bit
, z_bit
),
18645 binop(Iop_Or32
, s_bit
, o_bit
))));
18646 stmt(IRStmt_Put(OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
)));
18647 stmt(IRStmt_Put(OFFB_CC_DEP1
, mkexpr(cc
)));
18648 stmt(IRStmt_Put(OFFB_CC_DEP2
, mkU64(0)));
18649 stmt(IRStmt_Put(OFFB_CC_NDEP
, mkU64(0)));
18654 /* This can fail, in which case it returns the original (unchanged)
18656 static Long
dis_PCMPxSTRx ( const VexAbiInfo
* vbi
, Prefix pfx
,
18657 Long delta
, Bool isAvx
, UChar opc
)
18659 Long delta0
= delta
;
18660 UInt isISTRx
= opc
& 2;
18661 UInt isxSTRM
= (opc
& 1) ^ 1;
18665 IRTemp addr
= IRTemp_INVALID
;
18669 /* This is a nasty kludge. We need to pass 2 x V128 to the helper
18670 (which is clean). Since we can't do that, use a dirty helper to
18671 compute the results directly from the XMM regs in the guest
18672 state. That means for the memory case, we need to move the left
18673 operand into a pseudo-register (XMM16, let's call it). */
18674 UChar modrm
= getUChar(delta
);
18675 if (epartIsReg(modrm
)) {
18676 regNoL
= eregOfRexRM(pfx
, modrm
);
18677 regNoR
= gregOfRexRM(pfx
, modrm
);
18678 imm
= getUChar(delta
+1);
18681 regNoL
= 16; /* use XMM16 as an intermediary */
18682 regNoR
= gregOfRexRM(pfx
, modrm
);
18683 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
18684 /* No alignment check; I guess that makes sense, given that
18685 these insns are for dealing with C style strings. */
18686 stmt( IRStmt_Put( OFFB_YMM16
, loadLE(Ity_V128
, mkexpr(addr
)) ));
18687 imm
= getUChar(delta
+alen
);
18691 /* Print the insn here, since dis_PCMPISTRI_3A doesn't do so
18693 if (regNoL
== 16) {
18694 DIP("%spcmp%cstr%c $%x,%s,%s\n",
18695 isAvx
? "v" : "", isISTRx
? 'i' : 'e', isxSTRM
? 'm' : 'i',
18696 (UInt
)imm
, dis_buf
, nameXMMReg(regNoR
));
18698 DIP("%spcmp%cstr%c $%x,%s,%s\n",
18699 isAvx
? "v" : "", isISTRx
? 'i' : 'e', isxSTRM
? 'm' : 'i',
18700 (UInt
)imm
, nameXMMReg(regNoL
), nameXMMReg(regNoR
));
18703 /* Handle special case(s). */
18704 if (imm
== 0x3A && isISTRx
&& !isxSTRM
) {
18705 return dis_PCMPISTRI_3A ( modrm
, regNoL
, regNoR
, delta
,
18706 opc
, imm
, dis_buf
);
18709 /* Now we know the XMM reg numbers for the operands, and the
18710 immediate byte. Is it one we can actually handle? Throw out any
18711 cases for which the helper function has not been verified. */
18713 case 0x00: case 0x02:
18714 case 0x08: case 0x0A: case 0x0C: case 0x0E:
18715 case 0x10: case 0x12: case 0x14:
18716 case 0x18: case 0x1A:
18717 case 0x30: case 0x34:
18718 case 0x38: case 0x3A:
18719 case 0x40: case 0x42: case 0x44: case 0x46:
18722 case 0x70: case 0x72:
18724 // the 16-bit character versions of the above
18725 case 0x01: case 0x03:
18726 case 0x09: case 0x0B: case 0x0D:
18728 case 0x19: case 0x1B:
18729 case 0x39: case 0x3B:
18730 case 0x41: case 0x45:
18734 return delta0
; /*FAIL*/
18737 /* Who ya gonna call? Presumably not Ghostbusters. */
18738 void* fn
= &amd64g_dirtyhelper_PCMPxSTRx
;
18739 const HChar
* nm
= "amd64g_dirtyhelper_PCMPxSTRx";
18741 /* Round up the arguments. Note that this is a kludge -- the use
18742 of mkU64 rather than mkIRExpr_HWord implies the assumption that
18743 the host's word size is 64-bit. */
18744 UInt gstOffL
= regNoL
== 16 ? OFFB_YMM16
: ymmGuestRegOffset(regNoL
);
18745 UInt gstOffR
= ymmGuestRegOffset(regNoR
);
18747 IRExpr
* opc4_and_imm
= mkU64((opc
<< 8) | (imm
& 0xFF));
18748 IRExpr
* gstOffLe
= mkU64(gstOffL
);
18749 IRExpr
* gstOffRe
= mkU64(gstOffR
);
18750 IRExpr
* edxIN
= isISTRx
? mkU64(0) : getIRegRDX(8);
18751 IRExpr
* eaxIN
= isISTRx
? mkU64(0) : getIRegRAX(8);
18753 = mkIRExprVec_6( IRExpr_GSPTR(),
18754 opc4_and_imm
, gstOffLe
, gstOffRe
, edxIN
, eaxIN
);
18756 IRTemp resT
= newTemp(Ity_I64
);
18757 IRDirty
* d
= unsafeIRDirty_1_N( resT
, 0/*regparms*/, nm
, fn
, args
);
18758 /* It's not really a dirty call, but we can't use the clean helper
18759 mechanism here for the very lame reason that we can't pass 2 x
18760 V128s by value to a helper. Hence this roundabout scheme. */
18762 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
18763 d
->fxState
[0].fx
= Ifx_Read
;
18764 d
->fxState
[0].offset
= gstOffL
;
18765 d
->fxState
[0].size
= sizeof(U128
);
18766 d
->fxState
[1].fx
= Ifx_Read
;
18767 d
->fxState
[1].offset
= gstOffR
;
18768 d
->fxState
[1].size
= sizeof(U128
);
18770 /* Declare that the helper writes XMM0. */
18772 d
->fxState
[2].fx
= Ifx_Write
;
18773 d
->fxState
[2].offset
= ymmGuestRegOffset(0);
18774 d
->fxState
[2].size
= sizeof(U128
);
18777 stmt( IRStmt_Dirty(d
) );
18779 /* Now resT[15:0] holds the new OSZACP values, so the condition
18780 codes must be updated. And for a xSTRI case, resT[31:16] holds
18781 the new ECX value, so stash that too. */
18783 putIReg64(R_RCX
, binop(Iop_And64
,
18784 binop(Iop_Shr64
, mkexpr(resT
), mkU8(16)),
18788 /* Zap the upper half of the dest reg as per AVX conventions. */
18789 if (isxSTRM
&& isAvx
)
18790 putYMMRegLane128(/*YMM*/0, 1, mkV128(0));
18794 binop(Iop_And64
, mkexpr(resT
), mkU64(0xFFFF))
18796 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
18797 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
18798 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
18804 static IRTemp
math_PINSRB_128 ( IRTemp v128
, IRTemp u8
, UInt imm8
)
18806 vassert(imm8
>= 0 && imm8
<= 15);
18808 // Create a V128 value which has the selected byte in the
18809 // specified lane, and zeroes everywhere else.
18810 IRTemp tmp128
= newTemp(Ity_V128
);
18811 IRTemp halfshift
= newTemp(Ity_I64
);
18812 assign(halfshift
, binop(Iop_Shl64
,
18813 unop(Iop_8Uto64
, mkexpr(u8
)),
18814 mkU8(8 * (imm8
& 7))));
18816 assign(tmp128
, binop(Iop_64HLtoV128
, mkU64(0), mkexpr(halfshift
)));
18818 assign(tmp128
, binop(Iop_64HLtoV128
, mkexpr(halfshift
), mkU64(0)));
18821 UShort mask
= ~(1 << imm8
);
18822 IRTemp res
= newTemp(Ity_V128
);
18823 assign( res
, binop(Iop_OrV128
,
18825 binop(Iop_AndV128
, mkexpr(v128
), mkV128(mask
))) );
18830 static IRTemp
math_PINSRD_128 ( IRTemp v128
, IRTemp u32
, UInt imm8
)
18832 IRTemp z32
= newTemp(Ity_I32
);
18833 assign(z32
, mkU32(0));
18835 /* Surround u32 with zeroes as per imm, giving us something we can
18836 OR into a suitably masked-out v128.*/
18837 IRTemp withZs
= newTemp(Ity_V128
);
18840 case 3: mask
= 0x0FFF;
18841 assign(withZs
, mkV128from32s(u32
, z32
, z32
, z32
));
18843 case 2: mask
= 0xF0FF;
18844 assign(withZs
, mkV128from32s(z32
, u32
, z32
, z32
));
18846 case 1: mask
= 0xFF0F;
18847 assign(withZs
, mkV128from32s(z32
, z32
, u32
, z32
));
18849 case 0: mask
= 0xFFF0;
18850 assign(withZs
, mkV128from32s(z32
, z32
, z32
, u32
));
18852 default: vassert(0);
18855 IRTemp res
= newTemp(Ity_V128
);
18856 assign(res
, binop( Iop_OrV128
,
18858 binop( Iop_AndV128
, mkexpr(v128
), mkV128(mask
) ) ) );
18863 static IRTemp
math_PINSRQ_128 ( IRTemp v128
, IRTemp u64
, UInt imm8
)
18865 /* Surround u64 with zeroes as per imm, giving us something we can
18866 OR into a suitably masked-out v128.*/
18867 IRTemp withZs
= newTemp(Ity_V128
);
18871 assign(withZs
, binop(Iop_64HLtoV128
, mkU64(0), mkexpr(u64
)));
18873 vassert(imm8
== 1);
18875 assign( withZs
, binop(Iop_64HLtoV128
, mkexpr(u64
), mkU64(0)));
18878 IRTemp res
= newTemp(Ity_V128
);
18879 assign( res
, binop( Iop_OrV128
,
18881 binop( Iop_AndV128
, mkexpr(v128
), mkV128(mask
) ) ) );
18886 static IRTemp
math_INSERTPS ( IRTemp dstV
, IRTemp toInsertD
, UInt imm8
)
18888 const IRTemp inval
= IRTemp_INVALID
;
18889 IRTemp dstDs
[4] = { inval
, inval
, inval
, inval
};
18890 breakupV128to32s( dstV
, &dstDs
[3], &dstDs
[2], &dstDs
[1], &dstDs
[0] );
18892 vassert(imm8
<= 255);
18893 dstDs
[(imm8
>> 4) & 3] = toInsertD
; /* "imm8_count_d" */
18895 UInt imm8_zmask
= (imm8
& 15);
18896 IRTemp zero_32
= newTemp(Ity_I32
);
18897 assign( zero_32
, mkU32(0) );
18898 IRTemp resV
= newTemp(Ity_V128
);
18899 assign( resV
, mkV128from32s(
18900 ((imm8_zmask
& 8) == 8) ? zero_32
: dstDs
[3],
18901 ((imm8_zmask
& 4) == 4) ? zero_32
: dstDs
[2],
18902 ((imm8_zmask
& 2) == 2) ? zero_32
: dstDs
[1],
18903 ((imm8_zmask
& 1) == 1) ? zero_32
: dstDs
[0]) );
18908 static Long
dis_PEXTRB_128_GtoE ( const VexAbiInfo
* vbi
, Prefix pfx
,
18909 Long delta
, Bool isAvx
)
18911 IRTemp addr
= IRTemp_INVALID
;
18914 IRTemp xmm_vec
= newTemp(Ity_V128
);
18915 IRTemp sel_lane
= newTemp(Ity_I32
);
18916 IRTemp shr_lane
= newTemp(Ity_I32
);
18917 const HChar
* mbV
= isAvx
? "v" : "";
18918 UChar modrm
= getUChar(delta
);
18919 IRTemp t3
, t2
, t1
, t0
;
18921 assign( xmm_vec
, getXMMReg( gregOfRexRM(pfx
,modrm
) ) );
18922 t3
= t2
= t1
= t0
= IRTemp_INVALID
;
18923 breakupV128to32s( xmm_vec
, &t3
, &t2
, &t1
, &t0
);
18925 if ( epartIsReg( modrm
) ) {
18926 imm8
= (Int
)getUChar(delta
+1);
18928 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
18929 imm8
= (Int
)getUChar(delta
+alen
);
18931 switch ( (imm8
>> 2) & 3 ) {
18932 case 0: assign( sel_lane
, mkexpr(t0
) ); break;
18933 case 1: assign( sel_lane
, mkexpr(t1
) ); break;
18934 case 2: assign( sel_lane
, mkexpr(t2
) ); break;
18935 case 3: assign( sel_lane
, mkexpr(t3
) ); break;
18936 default: vassert(0);
18939 binop( Iop_Shr32
, mkexpr(sel_lane
), mkU8(((imm8
& 3)*8)) ) );
18941 if ( epartIsReg( modrm
) ) {
18942 putIReg64( eregOfRexRM(pfx
,modrm
),
18944 binop(Iop_And32
, mkexpr(shr_lane
), mkU32(255)) ) );
18946 DIP( "%spextrb $%d, %s,%s\n", mbV
, imm8
,
18947 nameXMMReg( gregOfRexRM(pfx
, modrm
) ),
18948 nameIReg64( eregOfRexRM(pfx
, modrm
) ) );
18950 storeLE( mkexpr(addr
), unop(Iop_32to8
, mkexpr(shr_lane
) ) );
18952 DIP( "%spextrb $%d,%s,%s\n", mbV
,
18953 imm8
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ), dis_buf
);
18960 static IRTemp
math_DPPD_128 ( IRTemp src_vec
, IRTemp dst_vec
, UInt imm8
)
18962 vassert(imm8
< 256);
18963 UShort imm8_perms
[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF };
18964 IRTemp and_vec
= newTemp(Ity_V128
);
18965 IRTemp sum_vec
= newTemp(Ity_V128
);
18966 IRTemp rm
= newTemp(Ity_I32
);
18967 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
18968 assign( and_vec
, binop( Iop_AndV128
,
18969 triop( Iop_Mul64Fx2
,
18971 mkexpr(dst_vec
), mkexpr(src_vec
) ),
18972 mkV128( imm8_perms
[ ((imm8
>> 4) & 3) ] ) ) );
18974 assign( sum_vec
, binop( Iop_Add64F0x2
,
18975 binop( Iop_InterleaveHI64x2
,
18976 mkexpr(and_vec
), mkexpr(and_vec
) ),
18977 binop( Iop_InterleaveLO64x2
,
18978 mkexpr(and_vec
), mkexpr(and_vec
) ) ) );
18979 IRTemp res
= newTemp(Ity_V128
);
18980 assign(res
, binop( Iop_AndV128
,
18981 binop( Iop_InterleaveLO64x2
,
18982 mkexpr(sum_vec
), mkexpr(sum_vec
) ),
18983 mkV128( imm8_perms
[ (imm8
& 3) ] ) ) );
18988 static IRTemp
math_DPPS_128 ( IRTemp src_vec
, IRTemp dst_vec
, UInt imm8
)
18990 vassert(imm8
< 256);
18991 IRTemp tmp_prod_vec
= newTemp(Ity_V128
);
18992 IRTemp prod_vec
= newTemp(Ity_V128
);
18993 IRTemp sum_vec
= newTemp(Ity_V128
);
18994 IRTemp rm
= newTemp(Ity_I32
);
18995 IRTemp v3
, v2
, v1
, v0
;
18996 v3
= v2
= v1
= v0
= IRTemp_INVALID
;
18997 UShort imm8_perms
[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
18998 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
18999 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
19002 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
19003 assign( tmp_prod_vec
,
19004 binop( Iop_AndV128
,
19005 triop( Iop_Mul32Fx4
,
19006 mkexpr(rm
), mkexpr(dst_vec
), mkexpr(src_vec
) ),
19007 mkV128( imm8_perms
[((imm8
>> 4)& 15)] ) ) );
19008 breakupV128to32s( tmp_prod_vec
, &v3
, &v2
, &v1
, &v0
);
19009 assign( prod_vec
, mkV128from32s( v3
, v1
, v2
, v0
) );
19011 assign( sum_vec
, triop( Iop_Add32Fx4
,
19013 binop( Iop_InterleaveHI32x4
,
19014 mkexpr(prod_vec
), mkexpr(prod_vec
) ),
19015 binop( Iop_InterleaveLO32x4
,
19016 mkexpr(prod_vec
), mkexpr(prod_vec
) ) ) );
19018 IRTemp res
= newTemp(Ity_V128
);
19019 assign( res
, binop( Iop_AndV128
,
19020 triop( Iop_Add32Fx4
,
19022 binop( Iop_InterleaveHI32x4
,
19023 mkexpr(sum_vec
), mkexpr(sum_vec
) ),
19024 binop( Iop_InterleaveLO32x4
,
19025 mkexpr(sum_vec
), mkexpr(sum_vec
) ) ),
19026 mkV128( imm8_perms
[ (imm8
& 15) ] ) ) );
19031 static IRTemp
math_MPSADBW_128 ( IRTemp dst_vec
, IRTemp src_vec
, UInt imm8
)
19033 /* Mask out bits of the operands we don't need. This isn't
19034 strictly necessary, but it does ensure Memcheck doesn't
19035 give us any false uninitialised value errors as a
19037 UShort src_mask
[4] = { 0x000F, 0x00F0, 0x0F00, 0xF000 };
19038 UShort dst_mask
[2] = { 0x07FF, 0x7FF0 };
19040 IRTemp src_maskV
= newTemp(Ity_V128
);
19041 IRTemp dst_maskV
= newTemp(Ity_V128
);
19042 assign(src_maskV
, mkV128( src_mask
[ imm8
& 3 ] ));
19043 assign(dst_maskV
, mkV128( dst_mask
[ (imm8
>> 2) & 1 ] ));
19045 IRTemp src_masked
= newTemp(Ity_V128
);
19046 IRTemp dst_masked
= newTemp(Ity_V128
);
19047 assign(src_masked
, binop(Iop_AndV128
, mkexpr(src_vec
), mkexpr(src_maskV
)));
19048 assign(dst_masked
, binop(Iop_AndV128
, mkexpr(dst_vec
), mkexpr(dst_maskV
)));
19050 /* Generate 4 64 bit values that we can hand to a clean helper */
19051 IRTemp sHi
= newTemp(Ity_I64
);
19052 IRTemp sLo
= newTemp(Ity_I64
);
19053 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(src_masked
)) );
19054 assign( sLo
, unop(Iop_V128to64
, mkexpr(src_masked
)) );
19056 IRTemp dHi
= newTemp(Ity_I64
);
19057 IRTemp dLo
= newTemp(Ity_I64
);
19058 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dst_masked
)) );
19059 assign( dLo
, unop(Iop_V128to64
, mkexpr(dst_masked
)) );
19061 /* Compute halves of the result separately */
19062 IRTemp resHi
= newTemp(Ity_I64
);
19063 IRTemp resLo
= newTemp(Ity_I64
);
19066 = mkIRExprVec_5( mkexpr(sHi
), mkexpr(sLo
), mkexpr(dHi
), mkexpr(dLo
),
19067 mkU64( 0x80 | (imm8
& 7) ));
19069 = mkIRExprVec_5( mkexpr(sHi
), mkexpr(sLo
), mkexpr(dHi
), mkexpr(dLo
),
19070 mkU64( 0x00 | (imm8
& 7) ));
19072 assign(resHi
, mkIRExprCCall( Ity_I64
, 0/*regparm*/,
19073 "amd64g_calc_mpsadbw",
19074 &amd64g_calc_mpsadbw
, argsHi
));
19075 assign(resLo
, mkIRExprCCall( Ity_I64
, 0/*regparm*/,
19076 "amd64g_calc_mpsadbw",
19077 &amd64g_calc_mpsadbw
, argsLo
));
19079 IRTemp res
= newTemp(Ity_V128
);
19080 assign(res
, binop(Iop_64HLtoV128
, mkexpr(resHi
), mkexpr(resLo
)));
19084 static Long
dis_EXTRACTPS ( const VexAbiInfo
* vbi
, Prefix pfx
,
19085 Long delta
, Bool isAvx
)
19087 IRTemp addr
= IRTemp_INVALID
;
19090 UChar modrm
= getUChar(delta
);
19092 IRTemp xmm_vec
= newTemp(Ity_V128
);
19093 IRTemp src_dword
= newTemp(Ity_I32
);
19094 UInt rG
= gregOfRexRM(pfx
,modrm
);
19095 IRTemp t3
, t2
, t1
, t0
;
19096 t3
= t2
= t1
= t0
= IRTemp_INVALID
;
19098 assign( xmm_vec
, getXMMReg( rG
) );
19099 breakupV128to32s( xmm_vec
, &t3
, &t2
, &t1
, &t0
);
19101 if ( epartIsReg( modrm
) ) {
19102 imm8_10
= (Int
)(getUChar(delta
+1) & 3);
19104 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19105 imm8_10
= (Int
)(getUChar(delta
+alen
) & 3);
19108 switch ( imm8_10
) {
19109 case 0: assign( src_dword
, mkexpr(t0
) ); break;
19110 case 1: assign( src_dword
, mkexpr(t1
) ); break;
19111 case 2: assign( src_dword
, mkexpr(t2
) ); break;
19112 case 3: assign( src_dword
, mkexpr(t3
) ); break;
19113 default: vassert(0);
19116 if ( epartIsReg( modrm
) ) {
19117 UInt rE
= eregOfRexRM(pfx
,modrm
);
19118 putIReg32( rE
, mkexpr(src_dword
) );
19120 DIP( "%sextractps $%d, %s,%s\n", isAvx
? "v" : "", imm8_10
,
19121 nameXMMReg( rG
), nameIReg32( rE
) );
19123 storeLE( mkexpr(addr
), mkexpr(src_dword
) );
19125 DIP( "%sextractps $%d, %s,%s\n", isAvx
? "v" : "", imm8_10
,
19126 nameXMMReg( rG
), dis_buf
);
19133 static IRTemp
math_PCLMULQDQ( IRTemp dV
, IRTemp sV
, UInt imm8
)
19135 IRTemp t0
= newTemp(Ity_I64
);
19136 IRTemp t1
= newTemp(Ity_I64
);
19137 assign(t0
, unop((imm8
&1)? Iop_V128HIto64
: Iop_V128to64
,
19139 assign(t1
, unop((imm8
&16) ? Iop_V128HIto64
: Iop_V128to64
,
19142 IRTemp t2
= newTemp(Ity_I64
);
19143 IRTemp t3
= newTemp(Ity_I64
);
19147 args
= mkIRExprVec_3(mkexpr(t0
), mkexpr(t1
), mkU64(0));
19148 assign(t2
, mkIRExprCCall(Ity_I64
,0, "amd64g_calculate_pclmul",
19149 &amd64g_calculate_pclmul
, args
));
19150 args
= mkIRExprVec_3(mkexpr(t0
), mkexpr(t1
), mkU64(1));
19151 assign(t3
, mkIRExprCCall(Ity_I64
,0, "amd64g_calculate_pclmul",
19152 &amd64g_calculate_pclmul
, args
));
19154 IRTemp res
= newTemp(Ity_V128
);
19155 assign(res
, binop(Iop_64HLtoV128
, mkexpr(t3
), mkexpr(t2
)));
19160 __attribute__((noinline
))
19162 Long
dis_ESC_0F3A__SSE4 ( Bool
* decode_OK
,
19163 const VexAbiInfo
* vbi
,
19164 Prefix pfx
, Int sz
, Long deltaIN
)
19166 IRTemp addr
= IRTemp_INVALID
;
19171 *decode_OK
= False
;
19173 Long delta
= deltaIN
;
19174 UChar opc
= getUChar(delta
);
19179 /* 66 0F 3A 08 /r ib = ROUNDPS imm8, xmm2/m128, xmm1 */
19180 if (have66noF2noF3(pfx
) && sz
== 2) {
19182 IRTemp src0
= newTemp(Ity_F32
);
19183 IRTemp src1
= newTemp(Ity_F32
);
19184 IRTemp src2
= newTemp(Ity_F32
);
19185 IRTemp src3
= newTemp(Ity_F32
);
19186 IRTemp res0
= newTemp(Ity_F32
);
19187 IRTemp res1
= newTemp(Ity_F32
);
19188 IRTemp res2
= newTemp(Ity_F32
);
19189 IRTemp res3
= newTemp(Ity_F32
);
19190 IRTemp rm
= newTemp(Ity_I32
);
19193 modrm
= getUChar(delta
);
19195 if (epartIsReg(modrm
)) {
19197 getXMMRegLane32F( eregOfRexRM(pfx
, modrm
), 0 ) );
19199 getXMMRegLane32F( eregOfRexRM(pfx
, modrm
), 1 ) );
19201 getXMMRegLane32F( eregOfRexRM(pfx
, modrm
), 2 ) );
19203 getXMMRegLane32F( eregOfRexRM(pfx
, modrm
), 3 ) );
19204 imm
= getUChar(delta
+1);
19205 if (imm
& ~15) goto decode_failure
;
19207 DIP( "roundps $%d,%s,%s\n",
19208 imm
, nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19209 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19211 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19212 gen_SEGV_if_not_16_aligned(addr
);
19213 assign( src0
, loadLE(Ity_F32
,
19214 binop(Iop_Add64
, mkexpr(addr
), mkU64(0) )));
19215 assign( src1
, loadLE(Ity_F32
,
19216 binop(Iop_Add64
, mkexpr(addr
), mkU64(4) )));
19217 assign( src2
, loadLE(Ity_F32
,
19218 binop(Iop_Add64
, mkexpr(addr
), mkU64(8) )));
19219 assign( src3
, loadLE(Ity_F32
,
19220 binop(Iop_Add64
, mkexpr(addr
), mkU64(12) )));
19221 imm
= getUChar(delta
+alen
);
19222 if (imm
& ~15) goto decode_failure
;
19224 DIP( "roundps $%d,%s,%s\n",
19225 imm
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19228 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19229 that encoding is the same as the encoding for IRRoundingMode,
19230 we can use that value directly in the IR as a rounding
19232 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
19234 assign(res0
, binop(Iop_RoundF32toInt
, mkexpr(rm
), mkexpr(src0
)) );
19235 assign(res1
, binop(Iop_RoundF32toInt
, mkexpr(rm
), mkexpr(src1
)) );
19236 assign(res2
, binop(Iop_RoundF32toInt
, mkexpr(rm
), mkexpr(src2
)) );
19237 assign(res3
, binop(Iop_RoundF32toInt
, mkexpr(rm
), mkexpr(src3
)) );
19239 putXMMRegLane32F( gregOfRexRM(pfx
, modrm
), 0, mkexpr(res0
) );
19240 putXMMRegLane32F( gregOfRexRM(pfx
, modrm
), 1, mkexpr(res1
) );
19241 putXMMRegLane32F( gregOfRexRM(pfx
, modrm
), 2, mkexpr(res2
) );
19242 putXMMRegLane32F( gregOfRexRM(pfx
, modrm
), 3, mkexpr(res3
) );
19244 goto decode_success
;
19249 /* 66 0F 3A 09 /r ib = ROUNDPD imm8, xmm2/m128, xmm1 */
19250 if (have66noF2noF3(pfx
) && sz
== 2) {
19252 IRTemp src0
= newTemp(Ity_F64
);
19253 IRTemp src1
= newTemp(Ity_F64
);
19254 IRTemp res0
= newTemp(Ity_F64
);
19255 IRTemp res1
= newTemp(Ity_F64
);
19256 IRTemp rm
= newTemp(Ity_I32
);
19259 modrm
= getUChar(delta
);
19261 if (epartIsReg(modrm
)) {
19263 getXMMRegLane64F( eregOfRexRM(pfx
, modrm
), 0 ) );
19265 getXMMRegLane64F( eregOfRexRM(pfx
, modrm
), 1 ) );
19266 imm
= getUChar(delta
+1);
19267 if (imm
& ~15) goto decode_failure
;
19269 DIP( "roundpd $%d,%s,%s\n",
19270 imm
, nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19271 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19273 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19274 gen_SEGV_if_not_16_aligned(addr
);
19275 assign( src0
, loadLE(Ity_F64
,
19276 binop(Iop_Add64
, mkexpr(addr
), mkU64(0) )));
19277 assign( src1
, loadLE(Ity_F64
,
19278 binop(Iop_Add64
, mkexpr(addr
), mkU64(8) )));
19279 imm
= getUChar(delta
+alen
);
19280 if (imm
& ~15) goto decode_failure
;
19282 DIP( "roundpd $%d,%s,%s\n",
19283 imm
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19286 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19287 that encoding is the same as the encoding for IRRoundingMode,
19288 we can use that value directly in the IR as a rounding
19290 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
19292 assign(res0
, binop(Iop_RoundF64toInt
, mkexpr(rm
), mkexpr(src0
)) );
19293 assign(res1
, binop(Iop_RoundF64toInt
, mkexpr(rm
), mkexpr(src1
)) );
19295 putXMMRegLane64F( gregOfRexRM(pfx
, modrm
), 0, mkexpr(res0
) );
19296 putXMMRegLane64F( gregOfRexRM(pfx
, modrm
), 1, mkexpr(res1
) );
19298 goto decode_success
;
19304 /* 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
19305 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
19307 if (have66noF2noF3(pfx
) && sz
== 2) {
19309 Bool isD
= opc
== 0x0B;
19310 IRTemp src
= newTemp(isD
? Ity_F64
: Ity_F32
);
19311 IRTemp res
= newTemp(isD
? Ity_F64
: Ity_F32
);
19314 modrm
= getUChar(delta
);
19316 if (epartIsReg(modrm
)) {
19318 isD
? getXMMRegLane64F( eregOfRexRM(pfx
, modrm
), 0 )
19319 : getXMMRegLane32F( eregOfRexRM(pfx
, modrm
), 0 ) );
19320 imm
= getUChar(delta
+1);
19321 if (imm
& ~15) goto decode_failure
;
19323 DIP( "rounds%c $%d,%s,%s\n",
19325 imm
, nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19326 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19328 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19329 assign( src
, loadLE( isD
? Ity_F64
: Ity_F32
, mkexpr(addr
) ));
19330 imm
= getUChar(delta
+alen
);
19331 if (imm
& ~15) goto decode_failure
;
19333 DIP( "rounds%c $%d,%s,%s\n",
19335 imm
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19338 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19339 that encoding is the same as the encoding for IRRoundingMode,
19340 we can use that value directly in the IR as a rounding
19342 assign(res
, binop(isD
? Iop_RoundF64toInt
: Iop_RoundF32toInt
,
19343 (imm
& 4) ? get_sse_roundingmode()
19348 putXMMRegLane64F( gregOfRexRM(pfx
, modrm
), 0, mkexpr(res
) );
19350 putXMMRegLane32F( gregOfRexRM(pfx
, modrm
), 0, mkexpr(res
) );
19352 goto decode_success
;
19357 /* 66 0F 3A 0C /r ib = BLENDPS xmm1, xmm2/m128, imm8
19358 Blend Packed Single Precision Floating-Point Values (XMM) */
19359 if (have66noF2noF3(pfx
) && sz
== 2) {
19362 IRTemp dst_vec
= newTemp(Ity_V128
);
19363 IRTemp src_vec
= newTemp(Ity_V128
);
19365 modrm
= getUChar(delta
);
19367 assign( dst_vec
, getXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19369 if ( epartIsReg( modrm
) ) {
19370 imm8
= (Int
)getUChar(delta
+1);
19371 assign( src_vec
, getXMMReg( eregOfRexRM(pfx
, modrm
) ) );
19373 DIP( "blendps $%d, %s,%s\n", imm8
,
19374 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19375 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19377 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19378 1/* imm8 is 1 byte after the amode */ );
19379 gen_SEGV_if_not_16_aligned( addr
);
19380 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19381 imm8
= (Int
)getUChar(delta
+alen
);
19383 DIP( "blendps $%d, %s,%s\n",
19384 imm8
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19387 putXMMReg( gregOfRexRM(pfx
, modrm
),
19388 mkexpr( math_BLENDPS_128( src_vec
, dst_vec
, imm8
) ) );
19389 goto decode_success
;
19394 /* 66 0F 3A 0D /r ib = BLENDPD xmm1, xmm2/m128, imm8
19395 Blend Packed Double Precision Floating-Point Values (XMM) */
19396 if (have66noF2noF3(pfx
) && sz
== 2) {
19399 IRTemp dst_vec
= newTemp(Ity_V128
);
19400 IRTemp src_vec
= newTemp(Ity_V128
);
19402 modrm
= getUChar(delta
);
19403 assign( dst_vec
, getXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19405 if ( epartIsReg( modrm
) ) {
19406 imm8
= (Int
)getUChar(delta
+1);
19407 assign( src_vec
, getXMMReg( eregOfRexRM(pfx
, modrm
) ) );
19409 DIP( "blendpd $%d, %s,%s\n", imm8
,
19410 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19411 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19413 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19414 1/* imm8 is 1 byte after the amode */ );
19415 gen_SEGV_if_not_16_aligned( addr
);
19416 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19417 imm8
= (Int
)getUChar(delta
+alen
);
19419 DIP( "blendpd $%d, %s,%s\n",
19420 imm8
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19423 putXMMReg( gregOfRexRM(pfx
, modrm
),
19424 mkexpr( math_BLENDPD_128( src_vec
, dst_vec
, imm8
) ) );
19425 goto decode_success
;
19430 /* 66 0F 3A 0E /r ib = PBLENDW xmm1, xmm2/m128, imm8
19431 Blend Packed Words (XMM) */
19432 if (have66noF2noF3(pfx
) && sz
== 2) {
19435 IRTemp dst_vec
= newTemp(Ity_V128
);
19436 IRTemp src_vec
= newTemp(Ity_V128
);
19438 modrm
= getUChar(delta
);
19440 assign( dst_vec
, getXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19442 if ( epartIsReg( modrm
) ) {
19443 imm8
= (Int
)getUChar(delta
+1);
19444 assign( src_vec
, getXMMReg( eregOfRexRM(pfx
, modrm
) ) );
19446 DIP( "pblendw $%d, %s,%s\n", imm8
,
19447 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19448 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19450 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19451 1/* imm8 is 1 byte after the amode */ );
19452 gen_SEGV_if_not_16_aligned( addr
);
19453 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19454 imm8
= (Int
)getUChar(delta
+alen
);
19456 DIP( "pblendw $%d, %s,%s\n",
19457 imm8
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19460 putXMMReg( gregOfRexRM(pfx
, modrm
),
19461 mkexpr( math_PBLENDW_128( src_vec
, dst_vec
, imm8
) ) );
19462 goto decode_success
;
19467 /* 66 0F 3A 14 /r ib = PEXTRB r/m16, xmm, imm8
19468 Extract Byte from xmm, store in mem or zero-extend + store in gen.reg.
19470 if (have66noF2noF3(pfx
) && sz
== 2) {
19471 delta
= dis_PEXTRB_128_GtoE( vbi
, pfx
, delta
, False
/*!isAvx*/ );
19472 goto decode_success
;
19477 /* 66 0F 3A 15 /r ib = PEXTRW r/m16, xmm, imm8
19478 Extract Word from xmm, store in mem or zero-extend + store in gen.reg.
19480 if (have66noF2noF3(pfx
) && sz
== 2) {
19481 delta
= dis_PEXTRW( vbi
, pfx
, delta
, False
/*!isAvx*/ );
19482 goto decode_success
;
19487 /* 66 no-REX.W 0F 3A 16 /r ib = PEXTRD reg/mem32, xmm2, imm8
19488 Extract Doubleword int from xmm reg and store in gen.reg or mem. (XMM)
19489 Note that this insn has the same opcodes as PEXTRQ, but
19490 here the REX.W bit is _not_ present */
19491 if (have66noF2noF3(pfx
)
19492 && sz
== 2 /* REX.W is _not_ present */) {
19493 delta
= dis_PEXTRD( vbi
, pfx
, delta
, False
/*!isAvx*/ );
19494 goto decode_success
;
19496 /* 66 REX.W 0F 3A 16 /r ib = PEXTRQ reg/mem64, xmm2, imm8
19497 Extract Quadword int from xmm reg and store in gen.reg or mem. (XMM)
19498 Note that this insn has the same opcodes as PEXTRD, but
19499 here the REX.W bit is present */
19500 if (have66noF2noF3(pfx
)
19501 && sz
== 8 /* REX.W is present */) {
19502 delta
= dis_PEXTRQ( vbi
, pfx
, delta
, False
/*!isAvx*/);
19503 goto decode_success
;
19508 /* 66 0F 3A 17 /r ib = EXTRACTPS reg/mem32, xmm2, imm8 Extract
19509 float from xmm reg and store in gen.reg or mem. This is
19510 identical to PEXTRD, except that REX.W appears to be ignored.
19512 if (have66noF2noF3(pfx
)
19513 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
19514 delta
= dis_EXTRACTPS( vbi
, pfx
, delta
, False
/*!isAvx*/ );
19515 goto decode_success
;
19520 /* 66 0F 3A 20 /r ib = PINSRB xmm1, r32/m8, imm8
19521 Extract byte from r32/m8 and insert into xmm1 */
19522 if (have66noF2noF3(pfx
) && sz
== 2) {
19524 IRTemp new8
= newTemp(Ity_I8
);
19525 modrm
= getUChar(delta
);
19526 UInt rG
= gregOfRexRM(pfx
, modrm
);
19527 if ( epartIsReg( modrm
) ) {
19528 UInt rE
= eregOfRexRM(pfx
,modrm
);
19529 imm8
= (Int
)(getUChar(delta
+1) & 0xF);
19530 assign( new8
, unop(Iop_32to8
, getIReg32(rE
)) );
19532 DIP( "pinsrb $%d,%s,%s\n", imm8
,
19533 nameIReg32(rE
), nameXMMReg(rG
) );
19535 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19536 imm8
= (Int
)(getUChar(delta
+alen
) & 0xF);
19537 assign( new8
, loadLE( Ity_I8
, mkexpr(addr
) ) );
19539 DIP( "pinsrb $%d,%s,%s\n",
19540 imm8
, dis_buf
, nameXMMReg(rG
) );
19542 IRTemp src_vec
= newTemp(Ity_V128
);
19543 assign(src_vec
, getXMMReg( gregOfRexRM(pfx
, modrm
) ));
19544 IRTemp res
= math_PINSRB_128( src_vec
, new8
, imm8
);
19545 putXMMReg( rG
, mkexpr(res
) );
19546 goto decode_success
;
19551 /* 66 0F 3A 21 /r ib = INSERTPS imm8, xmm2/m32, xmm1
19552 Insert Packed Single Precision Floating-Point Value (XMM) */
19553 if (have66noF2noF3(pfx
) && sz
== 2) {
19555 IRTemp d2ins
= newTemp(Ity_I32
); /* comes from the E part */
19556 const IRTemp inval
= IRTemp_INVALID
;
19558 modrm
= getUChar(delta
);
19559 UInt rG
= gregOfRexRM(pfx
, modrm
);
19561 if ( epartIsReg( modrm
) ) {
19562 UInt rE
= eregOfRexRM(pfx
, modrm
);
19563 IRTemp vE
= newTemp(Ity_V128
);
19564 assign( vE
, getXMMReg(rE
) );
19565 IRTemp dsE
[4] = { inval
, inval
, inval
, inval
};
19566 breakupV128to32s( vE
, &dsE
[3], &dsE
[2], &dsE
[1], &dsE
[0] );
19567 imm8
= getUChar(delta
+1);
19568 d2ins
= dsE
[(imm8
>> 6) & 3]; /* "imm8_count_s" */
19570 DIP( "insertps $%u, %s,%s\n",
19571 imm8
, nameXMMReg(rE
), nameXMMReg(rG
) );
19573 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19574 assign( d2ins
, loadLE( Ity_I32
, mkexpr(addr
) ) );
19575 imm8
= getUChar(delta
+alen
);
19577 DIP( "insertps $%u, %s,%s\n",
19578 imm8
, dis_buf
, nameXMMReg(rG
) );
19581 IRTemp vG
= newTemp(Ity_V128
);
19582 assign( vG
, getXMMReg(rG
) );
19584 putXMMReg( rG
, mkexpr(math_INSERTPS( vG
, d2ins
, imm8
)) );
19585 goto decode_success
;
19590 /* 66 no-REX.W 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8
19591 Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */
19592 if (have66noF2noF3(pfx
)
19593 && sz
== 2 /* REX.W is NOT present */) {
19595 IRTemp src_u32
= newTemp(Ity_I32
);
19596 modrm
= getUChar(delta
);
19597 UInt rG
= gregOfRexRM(pfx
, modrm
);
19599 if ( epartIsReg( modrm
) ) {
19600 UInt rE
= eregOfRexRM(pfx
,modrm
);
19601 imm8_10
= (Int
)(getUChar(delta
+1) & 3);
19602 assign( src_u32
, getIReg32( rE
) );
19604 DIP( "pinsrd $%d, %s,%s\n",
19605 imm8_10
, nameIReg32(rE
), nameXMMReg(rG
) );
19607 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19608 imm8_10
= (Int
)(getUChar(delta
+alen
) & 3);
19609 assign( src_u32
, loadLE( Ity_I32
, mkexpr(addr
) ) );
19611 DIP( "pinsrd $%d, %s,%s\n",
19612 imm8_10
, dis_buf
, nameXMMReg(rG
) );
19615 IRTemp src_vec
= newTemp(Ity_V128
);
19616 assign(src_vec
, getXMMReg( rG
));
19617 IRTemp res_vec
= math_PINSRD_128( src_vec
, src_u32
, imm8_10
);
19618 putXMMReg( rG
, mkexpr(res_vec
) );
19619 goto decode_success
;
19621 /* 66 REX.W 0F 3A 22 /r ib = PINSRQ xmm1, r/m64, imm8
19622 Extract Quadword int from gen.reg/mem64 and insert into xmm1 */
19623 if (have66noF2noF3(pfx
)
19624 && sz
== 8 /* REX.W is present */) {
19626 IRTemp src_u64
= newTemp(Ity_I64
);
19627 modrm
= getUChar(delta
);
19628 UInt rG
= gregOfRexRM(pfx
, modrm
);
19630 if ( epartIsReg( modrm
) ) {
19631 UInt rE
= eregOfRexRM(pfx
,modrm
);
19632 imm8_0
= (Int
)(getUChar(delta
+1) & 1);
19633 assign( src_u64
, getIReg64( rE
) );
19635 DIP( "pinsrq $%d, %s,%s\n",
19636 imm8_0
, nameIReg64(rE
), nameXMMReg(rG
) );
19638 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19639 imm8_0
= (Int
)(getUChar(delta
+alen
) & 1);
19640 assign( src_u64
, loadLE( Ity_I64
, mkexpr(addr
) ) );
19642 DIP( "pinsrq $%d, %s,%s\n",
19643 imm8_0
, dis_buf
, nameXMMReg(rG
) );
19646 IRTemp src_vec
= newTemp(Ity_V128
);
19647 assign(src_vec
, getXMMReg( rG
));
19648 IRTemp res_vec
= math_PINSRQ_128( src_vec
, src_u64
, imm8_0
);
19649 putXMMReg( rG
, mkexpr(res_vec
) );
19650 goto decode_success
;
19655 /* 66 0F 3A 40 /r ib = DPPS xmm1, xmm2/m128, imm8
19656 Dot Product of Packed Single Precision Floating-Point Values (XMM) */
19657 if (have66noF2noF3(pfx
) && sz
== 2) {
19658 modrm
= getUChar(delta
);
19660 IRTemp src_vec
= newTemp(Ity_V128
);
19661 IRTemp dst_vec
= newTemp(Ity_V128
);
19662 UInt rG
= gregOfRexRM(pfx
, modrm
);
19663 assign( dst_vec
, getXMMReg( rG
) );
19664 if ( epartIsReg( modrm
) ) {
19665 UInt rE
= eregOfRexRM(pfx
, modrm
);
19666 imm8
= (Int
)getUChar(delta
+1);
19667 assign( src_vec
, getXMMReg(rE
) );
19669 DIP( "dpps $%d, %s,%s\n",
19670 imm8
, nameXMMReg(rE
), nameXMMReg(rG
) );
19672 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19673 1/* imm8 is 1 byte after the amode */ );
19674 gen_SEGV_if_not_16_aligned( addr
);
19675 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19676 imm8
= (Int
)getUChar(delta
+alen
);
19678 DIP( "dpps $%d, %s,%s\n",
19679 imm8
, dis_buf
, nameXMMReg(rG
) );
19681 IRTemp res
= math_DPPS_128( src_vec
, dst_vec
, imm8
);
19682 putXMMReg( rG
, mkexpr(res
) );
19683 goto decode_success
;
19688 /* 66 0F 3A 41 /r ib = DPPD xmm1, xmm2/m128, imm8
19689 Dot Product of Packed Double Precision Floating-Point Values (XMM) */
19690 if (have66noF2noF3(pfx
) && sz
== 2) {
19691 modrm
= getUChar(delta
);
19693 IRTemp src_vec
= newTemp(Ity_V128
);
19694 IRTemp dst_vec
= newTemp(Ity_V128
);
19695 UInt rG
= gregOfRexRM(pfx
, modrm
);
19696 assign( dst_vec
, getXMMReg( rG
) );
19697 if ( epartIsReg( modrm
) ) {
19698 UInt rE
= eregOfRexRM(pfx
, modrm
);
19699 imm8
= (Int
)getUChar(delta
+1);
19700 assign( src_vec
, getXMMReg(rE
) );
19702 DIP( "dppd $%d, %s,%s\n",
19703 imm8
, nameXMMReg(rE
), nameXMMReg(rG
) );
19705 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19706 1/* imm8 is 1 byte after the amode */ );
19707 gen_SEGV_if_not_16_aligned( addr
);
19708 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19709 imm8
= (Int
)getUChar(delta
+alen
);
19711 DIP( "dppd $%d, %s,%s\n",
19712 imm8
, dis_buf
, nameXMMReg(rG
) );
19714 IRTemp res
= math_DPPD_128( src_vec
, dst_vec
, imm8
);
19715 putXMMReg( rG
, mkexpr(res
) );
19716 goto decode_success
;
19721 /* 66 0F 3A 42 /r ib = MPSADBW xmm1, xmm2/m128, imm8
19722 Multiple Packed Sums of Absolule Difference (XMM) */
19723 if (have66noF2noF3(pfx
) && sz
== 2) {
19725 IRTemp src_vec
= newTemp(Ity_V128
);
19726 IRTemp dst_vec
= newTemp(Ity_V128
);
19727 modrm
= getUChar(delta
);
19728 UInt rG
= gregOfRexRM(pfx
, modrm
);
19730 assign( dst_vec
, getXMMReg(rG
) );
19732 if ( epartIsReg( modrm
) ) {
19733 UInt rE
= eregOfRexRM(pfx
, modrm
);
19735 imm8
= (Int
)getUChar(delta
+1);
19736 assign( src_vec
, getXMMReg(rE
) );
19738 DIP( "mpsadbw $%d, %s,%s\n", imm8
,
19739 nameXMMReg(rE
), nameXMMReg(rG
) );
19741 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19742 1/* imm8 is 1 byte after the amode */ );
19743 gen_SEGV_if_not_16_aligned( addr
);
19744 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19745 imm8
= (Int
)getUChar(delta
+alen
);
19747 DIP( "mpsadbw $%d, %s,%s\n", imm8
, dis_buf
, nameXMMReg(rG
) );
19750 putXMMReg( rG
, mkexpr( math_MPSADBW_128(dst_vec
, src_vec
, imm8
) ) );
19751 goto decode_success
;
19756 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
19757 * Carry-less multiplication of selected XMM quadwords into XMM
19758 * registers (a.k.a multiplication of polynomials over GF(2))
19760 if (have66noF2noF3(pfx
) && sz
== 2) {
19763 IRTemp svec
= newTemp(Ity_V128
);
19764 IRTemp dvec
= newTemp(Ity_V128
);
19765 modrm
= getUChar(delta
);
19766 UInt rG
= gregOfRexRM(pfx
, modrm
);
19768 assign( dvec
, getXMMReg(rG
) );
19770 if ( epartIsReg( modrm
) ) {
19771 UInt rE
= eregOfRexRM(pfx
, modrm
);
19772 imm8
= (Int
)getUChar(delta
+1);
19773 assign( svec
, getXMMReg(rE
) );
19775 DIP( "pclmulqdq $%d, %s,%s\n", imm8
,
19776 nameXMMReg(rE
), nameXMMReg(rG
) );
19778 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19779 1/* imm8 is 1 byte after the amode */ );
19780 gen_SEGV_if_not_16_aligned( addr
);
19781 assign( svec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19782 imm8
= (Int
)getUChar(delta
+alen
);
19784 DIP( "pclmulqdq $%d, %s,%s\n",
19785 imm8
, dis_buf
, nameXMMReg(rG
) );
19788 putXMMReg( rG
, mkexpr( math_PCLMULQDQ(dvec
, svec
, imm8
) ) );
19789 goto decode_success
;
19797 /* 66 0F 3A 63 /r ib = PCMPISTRI imm8, xmm2/m128, xmm1
19798 66 0F 3A 62 /r ib = PCMPISTRM imm8, xmm2/m128, xmm1
19799 66 0F 3A 61 /r ib = PCMPESTRI imm8, xmm2/m128, xmm1
19800 66 0F 3A 60 /r ib = PCMPESTRM imm8, xmm2/m128, xmm1
19801 (selected special cases that actually occur in glibc,
19802 not by any means a complete implementation.)
19804 if (have66noF2noF3(pfx
) && sz
== 2) {
19805 Long delta0
= delta
;
19806 delta
= dis_PCMPxSTRx( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
);
19807 if (delta
> delta0
) goto decode_success
;
19808 /* else fall though; dis_PCMPxSTRx failed to decode it */
19813 /* 66 0F 3A DF /r ib = AESKEYGENASSIST imm8, xmm2/m128, xmm1 */
19814 if (have66noF2noF3(pfx
) && sz
== 2) {
19815 delta
= dis_AESKEYGENASSIST( vbi
, pfx
, delta
, False
/*!isAvx*/ );
19816 goto decode_success
;
19826 *decode_OK
= False
;
19835 /*------------------------------------------------------------*/
19837 /*--- Top-level post-escape decoders: dis_ESC_NONE ---*/
19839 /*------------------------------------------------------------*/
19841 __attribute__((noinline
))
19843 Long
dis_ESC_NONE (
19844 /*MB_OUT*/DisResult
* dres
,
19845 /*MB_OUT*/Bool
* expect_CAS
,
19846 const VexArchInfo
* archinfo
,
19847 const VexAbiInfo
* vbi
,
19848 Prefix pfx
, Int sz
, Long deltaIN
19853 IRTemp addr
= IRTemp_INVALID
;
19854 IRTemp t1
= IRTemp_INVALID
;
19855 IRTemp t2
= IRTemp_INVALID
;
19856 IRTemp t3
= IRTemp_INVALID
;
19857 IRTemp t4
= IRTemp_INVALID
;
19858 IRTemp t5
= IRTemp_INVALID
;
19859 IRType ty
= Ity_INVALID
;
19866 Long delta
= deltaIN
;
19867 UChar opc
= getUChar(delta
); delta
++;
19869 /* delta now points at the modrm byte. In most of the cases that
19870 follow, neither the F2 nor F3 prefixes are allowed. However,
19871 for some basic arithmetic operations we have to allow F2/XACQ or
19872 F3/XREL in the case where the destination is memory and the LOCK
19873 prefix is also present. Do this check by looking at the modrm
19874 byte but not advancing delta over it. */
19875 /* By default, F2 and F3 are not allowed, so let's start off with
19877 Bool validF2orF3
= haveF2orF3(pfx
) ? False
: True
;
19878 { UChar tmp_modrm
= getUChar(delta
);
19880 case 0x00: /* ADD Gb,Eb */ case 0x01: /* ADD Gv,Ev */
19881 case 0x08: /* OR Gb,Eb */ case 0x09: /* OR Gv,Ev */
19882 case 0x10: /* ADC Gb,Eb */ case 0x11: /* ADC Gv,Ev */
19883 case 0x18: /* SBB Gb,Eb */ case 0x19: /* SBB Gv,Ev */
19884 case 0x20: /* AND Gb,Eb */ case 0x21: /* AND Gv,Ev */
19885 case 0x28: /* SUB Gb,Eb */ case 0x29: /* SUB Gv,Ev */
19886 case 0x30: /* XOR Gb,Eb */ case 0x31: /* XOR Gv,Ev */
19887 if (!epartIsReg(tmp_modrm
)
19888 && haveF2orF3(pfx
) && !haveF2andF3(pfx
) && haveLOCK(pfx
)) {
19889 /* dst is mem, and we have F2 or F3 but not both */
19890 validF2orF3
= True
;
19898 /* Now, in the switch below, for the opc values examined by the
19899 switch above, use validF2orF3 rather than looking at pfx
19903 case 0x00: /* ADD Gb,Eb */
19904 if (!validF2orF3
) goto decode_failure
;
19905 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Add8
, WithFlagNone
, True
, 1, delta
, "add" );
19907 case 0x01: /* ADD Gv,Ev */
19908 if (!validF2orF3
) goto decode_failure
;
19909 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Add8
, WithFlagNone
, True
, sz
, delta
, "add" );
19912 case 0x02: /* ADD Eb,Gb */
19913 if (haveF2orF3(pfx
)) goto decode_failure
;
19914 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagNone
, True
, 1, delta
, "add" );
19916 case 0x03: /* ADD Ev,Gv */
19917 if (haveF2orF3(pfx
)) goto decode_failure
;
19918 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagNone
, True
, sz
, delta
, "add" );
19921 case 0x04: /* ADD Ib, AL */
19922 if (haveF2orF3(pfx
)) goto decode_failure
;
19923 delta
= dis_op_imm_A( 1, False
, Iop_Add8
, True
, delta
, "add" );
19925 case 0x05: /* ADD Iv, eAX */
19926 if (haveF2orF3(pfx
)) goto decode_failure
;
19927 delta
= dis_op_imm_A(sz
, False
, Iop_Add8
, True
, delta
, "add" );
19930 case 0x08: /* OR Gb,Eb */
19931 if (!validF2orF3
) goto decode_failure
;
19932 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Or8
, WithFlagNone
, True
, 1, delta
, "or" );
19934 case 0x09: /* OR Gv,Ev */
19935 if (!validF2orF3
) goto decode_failure
;
19936 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Or8
, WithFlagNone
, True
, sz
, delta
, "or" );
19939 case 0x0A: /* OR Eb,Gb */
19940 if (haveF2orF3(pfx
)) goto decode_failure
;
19941 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Or8
, WithFlagNone
, True
, 1, delta
, "or" );
19943 case 0x0B: /* OR Ev,Gv */
19944 if (haveF2orF3(pfx
)) goto decode_failure
;
19945 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Or8
, WithFlagNone
, True
, sz
, delta
, "or" );
19948 case 0x0C: /* OR Ib, AL */
19949 if (haveF2orF3(pfx
)) goto decode_failure
;
19950 delta
= dis_op_imm_A( 1, False
, Iop_Or8
, True
, delta
, "or" );
19952 case 0x0D: /* OR Iv, eAX */
19953 if (haveF2orF3(pfx
)) goto decode_failure
;
19954 delta
= dis_op_imm_A( sz
, False
, Iop_Or8
, True
, delta
, "or" );
19957 case 0x10: /* ADC Gb,Eb */
19958 if (!validF2orF3
) goto decode_failure
;
19959 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Add8
, WithFlagCarry
, True
, 1, delta
, "adc" );
19961 case 0x11: /* ADC Gv,Ev */
19962 if (!validF2orF3
) goto decode_failure
;
19963 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Add8
, WithFlagCarry
, True
, sz
, delta
, "adc" );
19966 case 0x12: /* ADC Eb,Gb */
19967 if (haveF2orF3(pfx
)) goto decode_failure
;
19968 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagCarry
, True
, 1, delta
, "adc" );
19970 case 0x13: /* ADC Ev,Gv */
19971 if (haveF2orF3(pfx
)) goto decode_failure
;
19972 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagCarry
, True
, sz
, delta
, "adc" );
19975 case 0x14: /* ADC Ib, AL */
19976 if (haveF2orF3(pfx
)) goto decode_failure
;
19977 delta
= dis_op_imm_A( 1, True
, Iop_Add8
, True
, delta
, "adc" );
19979 case 0x15: /* ADC Iv, eAX */
19980 if (haveF2orF3(pfx
)) goto decode_failure
;
19981 delta
= dis_op_imm_A( sz
, True
, Iop_Add8
, True
, delta
, "adc" );
19984 case 0x18: /* SBB Gb,Eb */
19985 if (!validF2orF3
) goto decode_failure
;
19986 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagCarry
, True
, 1, delta
, "sbb" );
19988 case 0x19: /* SBB Gv,Ev */
19989 if (!validF2orF3
) goto decode_failure
;
19990 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagCarry
, True
, sz
, delta
, "sbb" );
19993 case 0x1A: /* SBB Eb,Gb */
19994 if (haveF2orF3(pfx
)) goto decode_failure
;
19995 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagCarry
, True
, 1, delta
, "sbb" );
19997 case 0x1B: /* SBB Ev,Gv */
19998 if (haveF2orF3(pfx
)) goto decode_failure
;
19999 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagCarry
, True
, sz
, delta
, "sbb" );
20002 case 0x1C: /* SBB Ib, AL */
20003 if (haveF2orF3(pfx
)) goto decode_failure
;
20004 delta
= dis_op_imm_A( 1, True
, Iop_Sub8
, True
, delta
, "sbb" );
20006 case 0x1D: /* SBB Iv, eAX */
20007 if (haveF2orF3(pfx
)) goto decode_failure
;
20008 delta
= dis_op_imm_A( sz
, True
, Iop_Sub8
, True
, delta
, "sbb" );
20011 case 0x20: /* AND Gb,Eb */
20012 if (!validF2orF3
) goto decode_failure
;
20013 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_And8
, WithFlagNone
, True
, 1, delta
, "and" );
20015 case 0x21: /* AND Gv,Ev */
20016 if (!validF2orF3
) goto decode_failure
;
20017 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_And8
, WithFlagNone
, True
, sz
, delta
, "and" );
20020 case 0x22: /* AND Eb,Gb */
20021 if (haveF2orF3(pfx
)) goto decode_failure
;
20022 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_And8
, WithFlagNone
, True
, 1, delta
, "and" );
20024 case 0x23: /* AND Ev,Gv */
20025 if (haveF2orF3(pfx
)) goto decode_failure
;
20026 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_And8
, WithFlagNone
, True
, sz
, delta
, "and" );
20029 case 0x24: /* AND Ib, AL */
20030 if (haveF2orF3(pfx
)) goto decode_failure
;
20031 delta
= dis_op_imm_A( 1, False
, Iop_And8
, True
, delta
, "and" );
20033 case 0x25: /* AND Iv, eAX */
20034 if (haveF2orF3(pfx
)) goto decode_failure
;
20035 delta
= dis_op_imm_A( sz
, False
, Iop_And8
, True
, delta
, "and" );
20038 case 0x28: /* SUB Gb,Eb */
20039 if (!validF2orF3
) goto decode_failure
;
20040 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, True
, 1, delta
, "sub" );
20042 case 0x29: /* SUB Gv,Ev */
20043 if (!validF2orF3
) goto decode_failure
;
20044 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, True
, sz
, delta
, "sub" );
20047 case 0x2A: /* SUB Eb,Gb */
20048 if (haveF2orF3(pfx
)) goto decode_failure
;
20049 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, True
, 1, delta
, "sub" );
20051 case 0x2B: /* SUB Ev,Gv */
20052 if (haveF2orF3(pfx
)) goto decode_failure
;
20053 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, True
, sz
, delta
, "sub" );
20056 case 0x2C: /* SUB Ib, AL */
20057 if (haveF2orF3(pfx
)) goto decode_failure
;
20058 delta
= dis_op_imm_A(1, False
, Iop_Sub8
, True
, delta
, "sub" );
20060 case 0x2D: /* SUB Iv, eAX */
20061 if (haveF2orF3(pfx
)) goto decode_failure
;
20062 delta
= dis_op_imm_A( sz
, False
, Iop_Sub8
, True
, delta
, "sub" );
20065 case 0x30: /* XOR Gb,Eb */
20066 if (!validF2orF3
) goto decode_failure
;
20067 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Xor8
, WithFlagNone
, True
, 1, delta
, "xor" );
20069 case 0x31: /* XOR Gv,Ev */
20070 if (!validF2orF3
) goto decode_failure
;
20071 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Xor8
, WithFlagNone
, True
, sz
, delta
, "xor" );
20074 case 0x32: /* XOR Eb,Gb */
20075 if (haveF2orF3(pfx
)) goto decode_failure
;
20076 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Xor8
, WithFlagNone
, True
, 1, delta
, "xor" );
20078 case 0x33: /* XOR Ev,Gv */
20079 if (haveF2orF3(pfx
)) goto decode_failure
;
20080 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Xor8
, WithFlagNone
, True
, sz
, delta
, "xor" );
20083 case 0x34: /* XOR Ib, AL */
20084 if (haveF2orF3(pfx
)) goto decode_failure
;
20085 delta
= dis_op_imm_A( 1, False
, Iop_Xor8
, True
, delta
, "xor" );
20087 case 0x35: /* XOR Iv, eAX */
20088 if (haveF2orF3(pfx
)) goto decode_failure
;
20089 delta
= dis_op_imm_A( sz
, False
, Iop_Xor8
, True
, delta
, "xor" );
20092 case 0x38: /* CMP Gb,Eb */
20093 if (haveF2orF3(pfx
)) goto decode_failure
;
20094 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, False
, 1, delta
, "cmp" );
20096 case 0x39: /* CMP Gv,Ev */
20097 if (haveF2orF3(pfx
)) goto decode_failure
;
20098 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, False
, sz
, delta
, "cmp" );
20101 case 0x3A: /* CMP Eb,Gb */
20102 if (haveF2orF3(pfx
)) goto decode_failure
;
20103 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, False
, 1, delta
, "cmp" );
20105 case 0x3B: /* CMP Ev,Gv */
20106 if (haveF2orF3(pfx
)) goto decode_failure
;
20107 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, False
, sz
, delta
, "cmp" );
20110 case 0x3C: /* CMP Ib, AL */
20111 if (haveF2orF3(pfx
)) goto decode_failure
;
20112 delta
= dis_op_imm_A( 1, False
, Iop_Sub8
, False
, delta
, "cmp" );
20114 case 0x3D: /* CMP Iv, eAX */
20115 if (haveF2orF3(pfx
)) goto decode_failure
;
20116 delta
= dis_op_imm_A( sz
, False
, Iop_Sub8
, False
, delta
, "cmp" );
20119 case 0x50: /* PUSH eAX */
20120 case 0x51: /* PUSH eCX */
20121 case 0x52: /* PUSH eDX */
20122 case 0x53: /* PUSH eBX */
20123 case 0x55: /* PUSH eBP */
20124 case 0x56: /* PUSH eSI */
20125 case 0x57: /* PUSH eDI */
20126 case 0x54: /* PUSH eSP */
20127 /* This is the Right Way, in that the value to be pushed is
20128 established before %rsp is changed, so that pushq %rsp
20129 correctly pushes the old value. */
20130 if (haveF2orF3(pfx
)) goto decode_failure
;
20131 vassert(sz
== 2 || sz
== 4 || sz
== 8);
20133 sz
= 8; /* there is no encoding for 32-bit push in 64-bit mode */
20134 ty
= sz
==2 ? Ity_I16
: Ity_I64
;
20136 t2
= newTemp(Ity_I64
);
20137 assign(t1
, getIRegRexB(sz
, pfx
, opc
-0x50));
20138 assign(t2
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(sz
)));
20139 putIReg64(R_RSP
, mkexpr(t2
) );
20140 storeLE(mkexpr(t2
),mkexpr(t1
));
20141 DIP("push%c %s\n", nameISize(sz
), nameIRegRexB(sz
,pfx
,opc
-0x50));
20144 case 0x58: /* POP eAX */
20145 case 0x59: /* POP eCX */
20146 case 0x5A: /* POP eDX */
20147 case 0x5B: /* POP eBX */
20148 case 0x5D: /* POP eBP */
20149 case 0x5E: /* POP eSI */
20150 case 0x5F: /* POP eDI */
20151 case 0x5C: /* POP eSP */
20152 if (haveF2orF3(pfx
)) goto decode_failure
;
20153 vassert(sz
== 2 || sz
== 4 || sz
== 8);
20155 sz
= 8; /* there is no encoding for 32-bit pop in 64-bit mode */
20156 t1
= newTemp(szToITy(sz
));
20157 t2
= newTemp(Ity_I64
);
20158 assign(t2
, getIReg64(R_RSP
));
20159 assign(t1
, loadLE(szToITy(sz
),mkexpr(t2
)));
20160 putIReg64(R_RSP
, binop(Iop_Add64
, mkexpr(t2
), mkU64(sz
)));
20161 putIRegRexB(sz
, pfx
, opc
-0x58, mkexpr(t1
));
20162 DIP("pop%c %s\n", nameISize(sz
), nameIRegRexB(sz
,pfx
,opc
-0x58));
20165 case 0x63: /* MOVSX */
20166 if (haveF2orF3(pfx
)) goto decode_failure
;
20167 if (haveREX(pfx
) && 1==getRexW(pfx
)) {
20169 /* movsx r/m32 to r64 */
20170 modrm
= getUChar(delta
);
20171 if (epartIsReg(modrm
)) {
20173 putIRegG(8, pfx
, modrm
,
20175 getIRegE(4, pfx
, modrm
)));
20176 DIP("movslq %s,%s\n",
20177 nameIRegE(4, pfx
, modrm
),
20178 nameIRegG(8, pfx
, modrm
));
20181 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
20183 putIRegG(8, pfx
, modrm
,
20185 loadLE(Ity_I32
, mkexpr(addr
))));
20186 DIP("movslq %s,%s\n", dis_buf
,
20187 nameIRegG(8, pfx
, modrm
));
20191 goto decode_failure
;
20194 case 0x68: /* PUSH Iv */
20195 if (haveF2orF3(pfx
)) goto decode_failure
;
20196 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
20197 if (sz
== 4) sz
= 8;
20198 d64
= getSDisp(imin(4,sz
),delta
);
20199 delta
+= imin(4,sz
);
20202 case 0x69: /* IMUL Iv, Ev, Gv */
20203 if (haveF2orF3(pfx
)) goto decode_failure
;
20204 delta
= dis_imul_I_E_G ( vbi
, pfx
, sz
, delta
, sz
);
20207 case 0x6A: /* PUSH Ib, sign-extended to sz */
20208 if (haveF2orF3(pfx
)) goto decode_failure
;
20209 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
20210 if (sz
== 4) sz
= 8;
20211 d64
= getSDisp8(delta
); delta
+= 1;
20215 t1
= newTemp(Ity_I64
);
20217 assign( t1
, binop(Iop_Sub64
,getIReg64(R_RSP
),mkU64(sz
)) );
20218 putIReg64(R_RSP
, mkexpr(t1
) );
20219 /* stop mkU16 asserting if d32 is a negative 16-bit number
20223 storeLE( mkexpr(t1
), mkU(ty
,d64
) );
20224 DIP("push%c $%lld\n", nameISize(sz
), (Long
)d64
);
20227 case 0x6B: /* IMUL Ib, Ev, Gv */
20228 delta
= dis_imul_I_E_G ( vbi
, pfx
, sz
, delta
, 1 );
20233 case 0x72: /* JBb/JNAEb (jump below) */
20234 case 0x73: /* JNBb/JAEb (jump not below) */
20235 case 0x74: /* JZb/JEb (jump zero) */
20236 case 0x75: /* JNZb/JNEb (jump not zero) */
20237 case 0x76: /* JBEb/JNAb (jump below or equal) */
20238 case 0x77: /* JNBEb/JAb (jump not below or equal) */
20239 case 0x78: /* JSb (jump negative) */
20240 case 0x79: /* JSb (jump not negative) */
20241 case 0x7A: /* JP (jump parity even) */
20242 case 0x7B: /* JNP/JPO (jump parity odd) */
20243 case 0x7C: /* JLb/JNGEb (jump less) */
20244 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
20245 case 0x7E: /* JLEb/JNGb (jump less or equal) */
20246 case 0x7F: { /* JGb/JNLEb (jump greater) */
20248 const HChar
* comment
= "";
20249 if (haveF3(pfx
)) goto decode_failure
;
20250 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
20251 jmpDelta
= getSDisp8(delta
);
20252 vassert(-128 <= jmpDelta
&& jmpDelta
< 128);
20253 d64
= (guest_RIP_bbstart
+delta
+1) + jmpDelta
;
20255 /* End the block at this point. */
20256 jcc_01( dres
, (AMD64Condcode
)(opc
- 0x70),
20257 guest_RIP_bbstart
+delta
, d64
);
20258 vassert(dres
->whatNext
== Dis_StopHere
);
20259 DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc
- 0x70), (ULong
)d64
,
20264 case 0x80: /* Grp1 Ib,Eb */
20265 modrm
= getUChar(delta
);
20266 /* Disallow F2/XACQ and F3/XREL for the non-mem case. Allow
20267 just one for the mem case and also require LOCK in this case.
20268 Note that this erroneously allows XACQ/XREL on CMP since we
20269 don't check the subopcode here. No big deal. */
20270 if (epartIsReg(modrm
) && haveF2orF3(pfx
))
20271 goto decode_failure
;
20272 if (!epartIsReg(modrm
) && haveF2andF3(pfx
))
20273 goto decode_failure
;
20274 if (!epartIsReg(modrm
) && haveF2orF3(pfx
) && !haveLOCK(pfx
))
20275 goto decode_failure
;
20276 am_sz
= lengthAMode(pfx
,delta
);
20279 d64
= getSDisp8(delta
+ am_sz
);
20280 delta
= dis_Grp1 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
, d64
);
20283 case 0x81: /* Grp1 Iv,Ev */
20284 modrm
= getUChar(delta
);
20285 /* Same comment as for case 0x80 just above. */
20286 if (epartIsReg(modrm
) && haveF2orF3(pfx
))
20287 goto decode_failure
;
20288 if (!epartIsReg(modrm
) && haveF2andF3(pfx
))
20289 goto decode_failure
;
20290 if (!epartIsReg(modrm
) && haveF2orF3(pfx
) && !haveLOCK(pfx
))
20291 goto decode_failure
;
20292 am_sz
= lengthAMode(pfx
,delta
);
20294 d64
= getSDisp(d_sz
, delta
+ am_sz
);
20295 delta
= dis_Grp1 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
, d64
);
20298 case 0x83: /* Grp1 Ib,Ev */
20299 if (haveF2orF3(pfx
)) goto decode_failure
;
20300 modrm
= getUChar(delta
);
20301 am_sz
= lengthAMode(pfx
,delta
);
20303 d64
= getSDisp8(delta
+ am_sz
);
20304 delta
= dis_Grp1 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
, d64
);
20307 case 0x84: /* TEST Eb,Gb */
20308 if (haveF2orF3(pfx
)) goto decode_failure
;
20309 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_And8
, WithFlagNone
, False
,
20310 1, delta
, "test" );
20313 case 0x85: /* TEST Ev,Gv */
20314 if (haveF2orF3(pfx
)) goto decode_failure
;
20315 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_And8
, WithFlagNone
, False
,
20316 sz
, delta
, "test" );
20319 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
20320 prefix. Therefore, generate CAS regardless of the presence or
20321 otherwise of a LOCK prefix. */
20322 case 0x86: /* XCHG Gb,Eb */
20324 /* Fall through ... */
20325 case 0x87: /* XCHG Gv,Ev */
20326 modrm
= getUChar(delta
);
20327 /* Check whether F2 or F3 are allowable. For the mem case, one
20328 or the othter but not both are. We don't care about the
20329 presence of LOCK in this case -- XCHG is unusual in this
20331 if (haveF2orF3(pfx
)) {
20332 if (epartIsReg(modrm
)) {
20333 goto decode_failure
;
20335 if (haveF2andF3(pfx
))
20336 goto decode_failure
;
20340 t1
= newTemp(ty
); t2
= newTemp(ty
);
20341 if (epartIsReg(modrm
)) {
20342 assign(t1
, getIRegE(sz
, pfx
, modrm
));
20343 assign(t2
, getIRegG(sz
, pfx
, modrm
));
20344 putIRegG(sz
, pfx
, modrm
, mkexpr(t1
));
20345 putIRegE(sz
, pfx
, modrm
, mkexpr(t2
));
20347 DIP("xchg%c %s, %s\n",
20348 nameISize(sz
), nameIRegG(sz
, pfx
, modrm
),
20349 nameIRegE(sz
, pfx
, modrm
));
20351 *expect_CAS
= True
;
20352 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
20353 assign( t1
, loadLE(ty
, mkexpr(addr
)) );
20354 assign( t2
, getIRegG(sz
, pfx
, modrm
) );
20355 casLE( mkexpr(addr
),
20356 mkexpr(t1
), mkexpr(t2
), guest_RIP_curr_instr
);
20357 putIRegG( sz
, pfx
, modrm
, mkexpr(t1
) );
20359 DIP("xchg%c %s, %s\n", nameISize(sz
),
20360 nameIRegG(sz
, pfx
, modrm
), dis_buf
);
20364 case 0x88: { /* MOV Gb,Eb */
20365 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */
20367 delta
= dis_mov_G_E(vbi
, pfx
, 1, delta
, &ok
);
20368 if (!ok
) goto decode_failure
;
20372 case 0x89: { /* MOV Gv,Ev */
20373 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */
20375 delta
= dis_mov_G_E(vbi
, pfx
, sz
, delta
, &ok
);
20376 if (!ok
) goto decode_failure
;
20380 case 0x8A: /* MOV Eb,Gb */
20381 if (haveF2orF3(pfx
)) goto decode_failure
;
20382 delta
= dis_mov_E_G(vbi
, pfx
, 1, delta
);
20385 case 0x8B: /* MOV Ev,Gv */
20386 if (haveF2orF3(pfx
)) goto decode_failure
;
20387 delta
= dis_mov_E_G(vbi
, pfx
, sz
, delta
);
20390 case 0x8C: /* MOV S,E -- MOV from a SEGMENT REGISTER */
20391 if (haveF2orF3(pfx
)) goto decode_failure
;
20392 delta
= dis_mov_S_E(vbi
, pfx
, sz
, delta
);
20395 case 0x8D: /* LEA M,Gv */
20396 if (haveF2orF3(pfx
)) goto decode_failure
;
20397 if (sz
!= 4 && sz
!= 8)
20398 goto decode_failure
;
20399 modrm
= getUChar(delta
);
20400 if (epartIsReg(modrm
))
20401 goto decode_failure
;
20402 /* NOTE! this is the one place where a segment override prefix
20403 has no effect on the address calculation. Therefore we clear
20404 any segment override bits in pfx. */
20405 addr
= disAMode ( &alen
, vbi
, clearSegBits(pfx
), delta
, dis_buf
, 0 );
20407 /* This is a hack. But it isn't clear that really doing the
20408 calculation at 32 bits is really worth it. Hence for leal,
20409 do the full 64-bit calculation and then truncate it. */
20410 putIRegG( sz
, pfx
, modrm
,
20412 ? unop(Iop_64to32
, mkexpr(addr
))
20415 DIP("lea%c %s, %s\n", nameISize(sz
), dis_buf
,
20416 nameIRegG(sz
,pfx
,modrm
));
20419 case 0x8F: { /* POPQ m64 / POPW m16 */
20422 /* There is no encoding for 32-bit pop in 64-bit mode.
20423 So sz==4 actually means sz==8. */
20424 if (haveF2orF3(pfx
)) goto decode_failure
;
20425 vassert(sz
== 2 || sz
== 4
20426 || /* tolerate redundant REX.W, see #210481 */ sz
== 8);
20427 if (sz
== 4) sz
= 8;
20428 if (sz
!= 8) goto decode_failure
; // until we know a sz==2 test case exists
20430 rm
= getUChar(delta
);
20432 /* make sure this instruction is correct POP */
20433 if (epartIsReg(rm
) || gregLO3ofRM(rm
) != 0)
20434 goto decode_failure
;
20435 /* and has correct size */
20438 t1
= newTemp(Ity_I64
);
20439 t3
= newTemp(Ity_I64
);
20440 assign( t1
, getIReg64(R_RSP
) );
20441 assign( t3
, loadLE(Ity_I64
, mkexpr(t1
)) );
20443 /* Increase RSP; must be done before the STORE. Intel manual
20444 says: If the RSP register is used as a base register for
20445 addressing a destination operand in memory, the POP
20446 instruction computes the effective address of the operand
20447 after it increments the RSP register. */
20448 putIReg64(R_RSP
, binop(Iop_Add64
, mkexpr(t1
), mkU64(sz
)) );
20450 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
20451 storeLE( mkexpr(addr
), mkexpr(t3
) );
20453 DIP("popl %s\n", dis_buf
);
20459 case 0x90: /* XCHG eAX,eAX */
20460 /* detect and handle F3 90 (rep nop) specially */
20461 if (!have66(pfx
) && !haveF2(pfx
) && haveF3(pfx
)) {
20462 DIP("rep nop (P4 pause)\n");
20463 /* "observe" the hint. The Vex client needs to be careful not
20464 to cause very long delays as a result, though. */
20465 jmp_lit(dres
, Ijk_Yield
, guest_RIP_bbstart
+delta
);
20466 vassert(dres
->whatNext
== Dis_StopHere
);
20469 /* detect and handle NOPs specially */
20470 if (/* F2/F3 probably change meaning completely */
20472 /* If REX.B is 1, we're not exchanging rAX with itself */
20473 && getRexB(pfx
)==0 ) {
20477 /* else fall through to normal case. */
20478 case 0x91: /* XCHG rAX,rCX */
20479 case 0x92: /* XCHG rAX,rDX */
20480 case 0x93: /* XCHG rAX,rBX */
20481 case 0x94: /* XCHG rAX,rSP */
20482 case 0x95: /* XCHG rAX,rBP */
20483 case 0x96: /* XCHG rAX,rSI */
20484 case 0x97: /* XCHG rAX,rDI */
20485 /* guard against mutancy */
20486 if (haveF2orF3(pfx
)) goto decode_failure
;
20487 codegen_xchg_rAX_Reg ( pfx
, sz
, opc
- 0x90 );
20490 case 0x98: /* CBW */
20491 if (haveF2orF3(pfx
)) goto decode_failure
;
20493 putIRegRAX( 8, unop(Iop_32Sto64
, getIRegRAX(4)) );
20494 DIP(/*"cdqe\n"*/"cltq");
20498 putIRegRAX( 4, unop(Iop_16Sto32
, getIRegRAX(2)) );
20503 putIRegRAX( 2, unop(Iop_8Sto16
, getIRegRAX(1)) );
20507 goto decode_failure
;
20509 case 0x99: /* CWD/CDQ/CQO */
20510 if (haveF2orF3(pfx
)) goto decode_failure
;
20511 vassert(sz
== 2 || sz
== 4 || sz
== 8);
20514 binop(mkSizedOp(ty
,Iop_Sar8
),
20516 mkU8(sz
== 2 ? 15 : (sz
== 4 ? 31 : 63))) );
20517 DIP(sz
== 2 ? "cwd\n"
20518 : (sz
== 4 ? /*"cdq\n"*/ "cltd\n"
20522 case 0x9B: /* FWAIT (X87 insn) */
20527 case 0x9C: /* PUSHF */ {
20528 /* Note. There is no encoding for a 32-bit pushf in 64-bit
20529 mode. So sz==4 actually means sz==8. */
20530 /* 24 July 06: has also been seen with a redundant REX prefix,
20531 so must also allow sz==8. */
20532 if (haveF2orF3(pfx
)) goto decode_failure
;
20533 vassert(sz
== 2 || sz
== 4 || sz
== 8);
20534 if (sz
== 4) sz
= 8;
20535 if (sz
!= 8) goto decode_failure
; // until we know a sz==2 test case exists
20537 t1
= newTemp(Ity_I64
);
20538 assign( t1
, binop(Iop_Sub64
,getIReg64(R_RSP
),mkU64(sz
)) );
20539 putIReg64(R_RSP
, mkexpr(t1
) );
20541 t2
= newTemp(Ity_I64
);
20542 assign( t2
, mk_amd64g_calculate_rflags_all() );
20544 /* Patch in the D flag. This can simply be a copy of bit 10 of
20545 baseBlock[OFFB_DFLAG]. */
20546 t3
= newTemp(Ity_I64
);
20547 assign( t3
, binop(Iop_Or64
,
20550 IRExpr_Get(OFFB_DFLAG
,Ity_I64
),
20554 /* And patch in the ID flag. */
20555 t4
= newTemp(Ity_I64
);
20556 assign( t4
, binop(Iop_Or64
,
20559 binop(Iop_Shl64
, IRExpr_Get(OFFB_IDFLAG
,Ity_I64
),
20564 /* And patch in the AC flag too. */
20565 t5
= newTemp(Ity_I64
);
20566 assign( t5
, binop(Iop_Or64
,
20569 binop(Iop_Shl64
, IRExpr_Get(OFFB_ACFLAG
,Ity_I64
),
20574 /* if sz==2, the stored value needs to be narrowed. */
20576 storeLE( mkexpr(t1
), unop(Iop_32to16
,
20577 unop(Iop_64to32
,mkexpr(t5
))) );
20579 storeLE( mkexpr(t1
), mkexpr(t5
) );
20581 DIP("pushf%c\n", nameISize(sz
));
20585 case 0x9D: /* POPF */
20586 /* Note. There is no encoding for a 32-bit popf in 64-bit mode.
20587 So sz==4 actually means sz==8. */
20588 if (haveF2orF3(pfx
)) goto decode_failure
;
20589 vassert(sz
== 2 || sz
== 4);
20590 if (sz
== 4) sz
= 8;
20591 if (sz
!= 8) goto decode_failure
; // until we know a sz==2 test case exists
20592 t1
= newTemp(Ity_I64
); t2
= newTemp(Ity_I64
);
20593 assign(t2
, getIReg64(R_RSP
));
20594 assign(t1
, widenUto64(loadLE(szToITy(sz
),mkexpr(t2
))));
20595 putIReg64(R_RSP
, binop(Iop_Add64
, mkexpr(t2
), mkU64(sz
)));
20596 /* t1 is the flag word. Mask out everything except OSZACP and
20597 set the flags thunk to AMD64G_CC_OP_COPY. */
20598 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
20599 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
20600 stmt( IRStmt_Put( OFFB_CC_DEP1
,
20603 mkU64( AMD64G_CC_MASK_C
| AMD64G_CC_MASK_P
20604 | AMD64G_CC_MASK_A
| AMD64G_CC_MASK_Z
20605 | AMD64G_CC_MASK_S
| AMD64G_CC_MASK_O
)
20610 /* Also need to set the D flag, which is held in bit 10 of t1.
20611 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
20617 binop(Iop_Shr64
, mkexpr(t1
), mkU8(10)),
20619 mkU64(0xFFFFFFFFFFFFFFFFULL
),
20623 /* And set the ID flag */
20629 binop(Iop_Shr64
, mkexpr(t1
), mkU8(21)),
20635 /* And set the AC flag too */
20641 binop(Iop_Shr64
, mkexpr(t1
), mkU8(18)),
20647 DIP("popf%c\n", nameISize(sz
));
20650 case 0x9E: /* SAHF */
20655 case 0x9F: /* LAHF */
20660 case 0xA0: /* MOV Ob,AL */
20661 if (have66orF2orF3(pfx
)) goto decode_failure
;
20663 /* Fall through ... */
20664 case 0xA1: /* MOV Ov,eAX */
20665 if (sz
!= 8 && sz
!= 4 && sz
!= 2 && sz
!= 1)
20666 goto decode_failure
;
20667 d64
= getDisp64(delta
);
20670 addr
= newTemp(Ity_I64
);
20671 assign( addr
, handleAddrOverrides(vbi
, pfx
, mkU64(d64
)) );
20672 putIRegRAX(sz
, loadLE( ty
, mkexpr(addr
) ));
20673 DIP("mov%c %s0x%llx, %s\n", nameISize(sz
),
20674 segRegTxt(pfx
), (ULong
)d64
,
20678 case 0xA2: /* MOV AL,Ob */
20679 if (have66orF2orF3(pfx
)) goto decode_failure
;
20681 /* Fall through ... */
20682 case 0xA3: /* MOV eAX,Ov */
20683 if (sz
!= 8 && sz
!= 4 && sz
!= 2 && sz
!= 1)
20684 goto decode_failure
;
20685 d64
= getDisp64(delta
);
20688 addr
= newTemp(Ity_I64
);
20689 assign( addr
, handleAddrOverrides(vbi
, pfx
, mkU64(d64
)) );
20690 storeLE( mkexpr(addr
), getIRegRAX(sz
) );
20691 DIP("mov%c %s, %s0x%llx\n", nameISize(sz
), nameIRegRAX(sz
),
20692 segRegTxt(pfx
), (ULong
)d64
);
20697 /* F3 A4: rep movsb */
20698 if (haveF3(pfx
) && !haveF2(pfx
)) {
20701 dis_REP_op ( dres
, AMD64CondAlways
, dis_MOVS
, sz
,
20702 guest_RIP_curr_instr
,
20703 guest_RIP_bbstart
+delta
, "rep movs", pfx
);
20704 dres
->whatNext
= Dis_StopHere
;
20708 if (!haveF3(pfx
) && !haveF2(pfx
)) {
20711 dis_string_op( dis_MOVS
, sz
, "movs", pfx
);
20714 goto decode_failure
;
20718 /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */
20719 if (haveF3(pfx
) && !haveF2(pfx
)) {
20722 dis_REP_op ( dres
, AMD64CondZ
, dis_CMPS
, sz
,
20723 guest_RIP_curr_instr
,
20724 guest_RIP_bbstart
+delta
, "repe cmps", pfx
);
20725 dres
->whatNext
= Dis_StopHere
;
20728 goto decode_failure
;
20732 /* F3 AA/AB: rep stosb/rep stos{w,l,q} */
20733 if (haveF3(pfx
) && !haveF2(pfx
)) {
20736 dis_REP_op ( dres
, AMD64CondAlways
, dis_STOS
, sz
,
20737 guest_RIP_curr_instr
,
20738 guest_RIP_bbstart
+delta
, "rep stos", pfx
);
20739 vassert(dres
->whatNext
== Dis_StopHere
);
20742 /* AA/AB: stosb/stos{w,l,q} */
20743 if (!haveF3(pfx
) && !haveF2(pfx
)) {
20746 dis_string_op( dis_STOS
, sz
, "stos", pfx
);
20749 goto decode_failure
;
20751 case 0xA8: /* TEST Ib, AL */
20752 if (haveF2orF3(pfx
)) goto decode_failure
;
20753 delta
= dis_op_imm_A( 1, False
, Iop_And8
, False
, delta
, "test" );
20755 case 0xA9: /* TEST Iv, eAX */
20756 if (haveF2orF3(pfx
)) goto decode_failure
;
20757 delta
= dis_op_imm_A( sz
, False
, Iop_And8
, False
, delta
, "test" );
20760 case 0xAC: /* LODS, no REP prefix */
20762 dis_string_op( dis_LODS
, ( opc
== 0xAC ? 1 : sz
), "lods", pfx
);
20767 /* F2 AE/AF: repne scasb/repne scas{w,l,q} */
20768 if (haveF2(pfx
) && !haveF3(pfx
)) {
20771 dis_REP_op ( dres
, AMD64CondNZ
, dis_SCAS
, sz
,
20772 guest_RIP_curr_instr
,
20773 guest_RIP_bbstart
+delta
, "repne scas", pfx
);
20774 vassert(dres
->whatNext
== Dis_StopHere
);
20777 /* F3 AE/AF: repe scasb/repe scas{w,l,q} */
20778 if (!haveF2(pfx
) && haveF3(pfx
)) {
20781 dis_REP_op ( dres
, AMD64CondZ
, dis_SCAS
, sz
,
20782 guest_RIP_curr_instr
,
20783 guest_RIP_bbstart
+delta
, "repe scas", pfx
);
20784 vassert(dres
->whatNext
== Dis_StopHere
);
20787 /* AE/AF: scasb/scas{w,l,q} */
20788 if (!haveF2(pfx
) && !haveF3(pfx
)) {
20791 dis_string_op( dis_SCAS
, sz
, "scas", pfx
);
20794 goto decode_failure
;
20796 /* XXXX be careful here with moves to AH/BH/CH/DH */
20797 case 0xB0: /* MOV imm,AL */
20798 case 0xB1: /* MOV imm,CL */
20799 case 0xB2: /* MOV imm,DL */
20800 case 0xB3: /* MOV imm,BL */
20801 case 0xB4: /* MOV imm,AH */
20802 case 0xB5: /* MOV imm,CH */
20803 case 0xB6: /* MOV imm,DH */
20804 case 0xB7: /* MOV imm,BH */
20805 if (haveF2orF3(pfx
)) goto decode_failure
;
20806 d64
= getUChar(delta
);
20808 putIRegRexB(1, pfx
, opc
-0xB0, mkU8(d64
));
20809 DIP("movb $%lld,%s\n", d64
, nameIRegRexB(1,pfx
,opc
-0xB0));
20812 case 0xB8: /* MOV imm,eAX */
20813 case 0xB9: /* MOV imm,eCX */
20814 case 0xBA: /* MOV imm,eDX */
20815 case 0xBB: /* MOV imm,eBX */
20816 case 0xBC: /* MOV imm,eSP */
20817 case 0xBD: /* MOV imm,eBP */
20818 case 0xBE: /* MOV imm,eSI */
20819 case 0xBF: /* MOV imm,eDI */
20820 /* This is the one-and-only place where 64-bit literals are
20821 allowed in the instruction stream. */
20822 if (haveF2orF3(pfx
)) goto decode_failure
;
20824 d64
= getDisp64(delta
);
20826 putIRegRexB(8, pfx
, opc
-0xB8, mkU64(d64
));
20827 DIP("movabsq $%lld,%s\n", (Long
)d64
,
20828 nameIRegRexB(8,pfx
,opc
-0xB8));
20830 d64
= getSDisp(imin(4,sz
),delta
);
20831 delta
+= imin(4,sz
);
20832 putIRegRexB(sz
, pfx
, opc
-0xB8,
20833 mkU(szToITy(sz
), d64
& mkSizeMask(sz
)));
20834 DIP("mov%c $%lld,%s\n", nameISize(sz
),
20836 nameIRegRexB(sz
,pfx
,opc
-0xB8));
20840 case 0xC0: { /* Grp2 Ib,Eb */
20841 Bool decode_OK
= True
;
20842 if (haveF2orF3(pfx
)) goto decode_failure
;
20843 modrm
= getUChar(delta
);
20844 am_sz
= lengthAMode(pfx
,delta
);
20846 d64
= getUChar(delta
+ am_sz
);
20848 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
20849 mkU8(d64
& 0xFF), NULL
, &decode_OK
);
20850 if (!decode_OK
) goto decode_failure
;
20854 case 0xC1: { /* Grp2 Ib,Ev */
20855 Bool decode_OK
= True
;
20856 if (haveF2orF3(pfx
)) goto decode_failure
;
20857 modrm
= getUChar(delta
);
20858 am_sz
= lengthAMode(pfx
,delta
);
20860 d64
= getUChar(delta
+ am_sz
);
20861 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
20862 mkU8(d64
& 0xFF), NULL
, &decode_OK
);
20863 if (!decode_OK
) goto decode_failure
;
20867 case 0xC2: /* RET imm16 */
20868 if (have66orF3(pfx
)) goto decode_failure
;
20869 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
20870 d64
= getUDisp16(delta
);
20872 dis_ret(dres
, vbi
, d64
);
20873 DIP("ret $%lld\n", d64
);
20876 case 0xC3: /* RET */
20877 if (have66(pfx
)) goto decode_failure
;
20878 /* F3 is acceptable on AMD. */
20879 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
20880 dis_ret(dres
, vbi
, 0);
20881 DIP(haveF3(pfx
) ? "rep ; ret\n" : "ret\n");
20884 case 0xC6: /* C6 /0 = MOV Ib,Eb */
20886 goto maybe_do_Mov_I_E
;
20887 case 0xC7: /* C7 /0 = MOV Iv,Ev */
20888 goto maybe_do_Mov_I_E
;
20890 modrm
= getUChar(delta
);
20891 if (gregLO3ofRM(modrm
) == 0) {
20892 if (epartIsReg(modrm
)) {
20893 /* Neither F2 nor F3 are allowable. */
20894 if (haveF2orF3(pfx
)) goto decode_failure
;
20895 delta
++; /* mod/rm byte */
20896 d64
= getSDisp(imin(4,sz
),delta
);
20897 delta
+= imin(4,sz
);
20898 putIRegE(sz
, pfx
, modrm
,
20899 mkU(szToITy(sz
), d64
& mkSizeMask(sz
)));
20900 DIP("mov%c $%lld, %s\n", nameISize(sz
),
20902 nameIRegE(sz
,pfx
,modrm
));
20904 if (haveF2(pfx
)) goto decode_failure
;
20905 /* F3(XRELEASE) is allowable here */
20906 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
,
20907 /*xtra*/imin(4,sz
) );
20909 d64
= getSDisp(imin(4,sz
),delta
);
20910 delta
+= imin(4,sz
);
20911 storeLE(mkexpr(addr
),
20912 mkU(szToITy(sz
), d64
& mkSizeMask(sz
)));
20913 DIP("mov%c $%lld, %s\n", nameISize(sz
), (Long
)d64
, dis_buf
);
20917 /* BEGIN HACKY SUPPORT FOR xbegin */
20918 if (opc
== 0xC7 && modrm
== 0xF8 && !have66orF2orF3(pfx
) && sz
== 4
20919 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
20920 delta
++; /* mod/rm byte */
20921 d64
= getSDisp(4,delta
);
20923 guest_RIP_next_mustcheck
= True
;
20924 guest_RIP_next_assumed
= guest_RIP_bbstart
+ delta
;
20925 Addr64 failAddr
= guest_RIP_bbstart
+ delta
+ d64
;
20926 /* EAX contains the failure status code. Bit 3 is "Set if an
20927 internal buffer overflowed", which seems like the
20928 least-bogus choice we can make here. */
20929 putIRegRAX(4, mkU32(1<<3));
20930 /* And jump to the fail address. */
20931 jmp_lit(dres
, Ijk_Boring
, failAddr
);
20932 vassert(dres
->whatNext
== Dis_StopHere
);
20933 DIP("xbeginq 0x%llx\n", failAddr
);
20936 /* END HACKY SUPPORT FOR xbegin */
20937 /* BEGIN HACKY SUPPORT FOR xabort */
20938 if (opc
== 0xC6 && modrm
== 0xF8 && !have66orF2orF3(pfx
) && sz
== 1
20939 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
20940 delta
++; /* mod/rm byte */
20941 abyte
= getUChar(delta
); delta
++;
20942 /* There is never a real transaction in progress, so do nothing. */
20943 DIP("xabort $%d", (Int
)abyte
);
20946 /* END HACKY SUPPORT FOR xabort */
20947 goto decode_failure
;
20949 case 0xC8: /* ENTER */
20950 /* Same comments re operand size as for LEAVE below apply.
20951 Also, only handles the case "enter $imm16, $0"; other cases
20952 for the second operand (nesting depth) are not handled. */
20954 goto decode_failure
;
20955 d64
= getUDisp16(delta
);
20957 vassert(d64
>= 0 && d64
<= 0xFFFF);
20958 if (getUChar(delta
) != 0)
20959 goto decode_failure
;
20961 /* Intel docs seem to suggest:
20967 t1
= newTemp(Ity_I64
);
20968 assign(t1
, getIReg64(R_RBP
));
20969 t2
= newTemp(Ity_I64
);
20970 assign(t2
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(8)));
20971 putIReg64(R_RSP
, mkexpr(t2
));
20972 storeLE(mkexpr(t2
), mkexpr(t1
));
20973 putIReg64(R_RBP
, mkexpr(t2
));
20975 putIReg64(R_RSP
, binop(Iop_Sub64
, mkexpr(t2
), mkU64(d64
)));
20977 DIP("enter $%u, $0\n", (UInt
)d64
);
20980 case 0xC9: /* LEAVE */
20981 /* In 64-bit mode this defaults to a 64-bit operand size. There
20982 is no way to encode a 32-bit variant. Hence sz==4 but we do
20985 goto decode_failure
;
20986 t1
= newTemp(Ity_I64
);
20987 t2
= newTemp(Ity_I64
);
20988 assign(t1
, getIReg64(R_RBP
));
20989 /* First PUT RSP looks redundant, but need it because RSP must
20990 always be up-to-date for Memcheck to work... */
20991 putIReg64(R_RSP
, mkexpr(t1
));
20992 assign(t2
, loadLE(Ity_I64
,mkexpr(t1
)));
20993 putIReg64(R_RBP
, mkexpr(t2
));
20994 putIReg64(R_RSP
, binop(Iop_Add64
, mkexpr(t1
), mkU64(8)) );
20998 case 0xCC: /* INT 3 */
20999 jmp_lit(dres
, Ijk_SigTRAP
, guest_RIP_bbstart
+ delta
);
21000 vassert(dres
->whatNext
== Dis_StopHere
);
21004 case 0xCD: /* INT imm8 */
21005 d64
= getUChar(delta
); delta
++;
21007 /* Handle int $0xD2 (Solaris fasttrap syscalls). */
21009 jmp_lit(dres
, Ijk_Sys_int210
, guest_RIP_bbstart
+ delta
);
21010 vassert(dres
->whatNext
== Dis_StopHere
);
21011 DIP("int $0xD2\n");
21014 goto decode_failure
;
21016 case 0xCF: /* IRET */
21017 /* Note, this is an extremely kludgey and limited implementation of iret
21018 based on the extremely kludgey and limited implementation of iret for x86
21019 popq %RIP; popl %CS; popq %RFLAGS; popq %RSP; popl %SS
21020 %CS and %SS are ignored */
21021 if (sz
!= 8 || have66orF2orF3(pfx
)) goto decode_failure
;
21023 t1
= newTemp(Ity_I64
); /* RSP */
21024 t2
= newTemp(Ity_I64
); /* new RIP */
21025 /* t3 = newTemp(Ity_I32); new CS */
21026 t4
= newTemp(Ity_I64
); /* new RFLAGS */
21027 t5
= newTemp(Ity_I64
); /* new RSP */
21028 /* t6 = newTemp(Ity_I32); new SS */
21030 assign(t1
, getIReg64(R_RSP
));
21031 assign(t2
, loadLE(Ity_I64
, binop(Iop_Add64
,mkexpr(t1
),mkU64(0))));
21032 /* assign(t3, loadLE(Ity_I32, binop(Iop_Add64,mkexpr(t1),mkU64(8)))); */
21033 assign(t4
, loadLE(Ity_I64
, binop(Iop_Add64
,mkexpr(t1
),mkU64(16))));
21034 assign(t5
, loadLE(Ity_I64
, binop(Iop_Add64
,mkexpr(t1
),mkU64(24))));
21035 /* assign(t6, loadLE(Ity_I32, binop(Iop_Add64,mkexpr(t1),mkU64(32)))); */
21038 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
21039 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
21040 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
21041 stmt( IRStmt_Put( OFFB_CC_DEP1
,
21044 mkU64( AMD64G_CC_MASK_C
| AMD64G_CC_MASK_P
21045 | AMD64G_CC_MASK_A
| AMD64G_CC_MASK_Z
21046 | AMD64G_CC_MASK_S
| AMD64G_CC_MASK_O
)
21051 /* Also need to set the D flag, which is held in bit 10 of t4.
21052 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
21058 binop(Iop_Shr64
, mkexpr(t4
), mkU8(10)),
21060 mkU64(0xFFFFFFFFFFFFFFFFULL
),
21064 /* And set the ID flag */
21070 binop(Iop_Shr64
, mkexpr(t4
), mkU8(21)),
21076 /* And set the AC flag too */
21082 binop(Iop_Shr64
, mkexpr(t4
), mkU8(18)),
21089 /* set new stack */
21090 putIReg64(R_RSP
, mkexpr(t5
));
21092 /* goto new RIP value */
21093 jmp_treg(dres
, Ijk_Ret
, t2
);
21094 DIP("iret (very kludgey)\n");
21097 case 0xD0: { /* Grp2 1,Eb */
21098 Bool decode_OK
= True
;
21099 if (haveF2orF3(pfx
)) goto decode_failure
;
21100 modrm
= getUChar(delta
);
21101 am_sz
= lengthAMode(pfx
,delta
);
21105 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
21106 mkU8(d64
), NULL
, &decode_OK
);
21107 if (!decode_OK
) goto decode_failure
;
21111 case 0xD1: { /* Grp2 1,Ev */
21112 Bool decode_OK
= True
;
21113 if (haveF2orF3(pfx
)) goto decode_failure
;
21114 modrm
= getUChar(delta
);
21115 am_sz
= lengthAMode(pfx
,delta
);
21118 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
21119 mkU8(d64
), NULL
, &decode_OK
);
21120 if (!decode_OK
) goto decode_failure
;
21124 case 0xD2: { /* Grp2 CL,Eb */
21125 Bool decode_OK
= True
;
21126 if (haveF2orF3(pfx
)) goto decode_failure
;
21127 modrm
= getUChar(delta
);
21128 am_sz
= lengthAMode(pfx
,delta
);
21131 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
21132 getIRegCL(), "%cl", &decode_OK
);
21133 if (!decode_OK
) goto decode_failure
;
21137 case 0xD3: { /* Grp2 CL,Ev */
21138 Bool decode_OK
= True
;
21139 if (haveF2orF3(pfx
)) goto decode_failure
;
21140 modrm
= getUChar(delta
);
21141 am_sz
= lengthAMode(pfx
,delta
);
21143 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
21144 getIRegCL(), "%cl", &decode_OK
);
21145 if (!decode_OK
) goto decode_failure
;
21149 case 0xD8: /* X87 instructions */
21157 Bool redundantREXWok
= False
;
21159 if (haveF2orF3(pfx
))
21160 goto decode_failure
;
21162 /* kludge to tolerate redundant rex.w prefixes (should do this
21163 properly one day) */
21164 /* mono 1.1.18.1 produces 48 D9 FA, which is rex.w fsqrt */
21165 if ( (opc
== 0xD9 && getUChar(delta
+0) == 0xFA)/*fsqrt*/ )
21166 redundantREXWok
= True
;
21168 Bool size_OK
= False
;
21171 else if ( sz
== 8 )
21172 size_OK
= redundantREXWok
;
21173 else if ( sz
== 2 ) {
21174 int mod_rm
= getUChar(delta
+0);
21175 int reg
= gregLO3ofRM(mod_rm
);
21176 /* The HotSpot JVM uses these */
21177 if ( (opc
== 0xDD) && (reg
== 0 /* FLDL */ ||
21178 reg
== 4 /* FNSAVE */ ||
21179 reg
== 6 /* FRSTOR */ ) )
21182 /* AMD manual says 0x66 size override is ignored, except where
21183 it is meaningful */
21185 goto decode_failure
;
21187 Bool decode_OK
= False
;
21188 delta
= dis_FPU ( &decode_OK
, vbi
, pfx
, delta
);
21190 goto decode_failure
;
21195 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
21196 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
21197 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
21198 { /* The docs say this uses rCX as a count depending on the
21199 address size override, not the operand one. */
21200 IRExpr
* zbit
= NULL
;
21201 IRExpr
* count
= NULL
;
21202 IRExpr
* cond
= NULL
;
21203 const HChar
* xtra
= NULL
;
21205 if (have66orF2orF3(pfx
) || 1==getRexW(pfx
)) goto decode_failure
;
21206 /* So at this point we've rejected any variants which appear to
21207 be governed by the usual operand-size modifiers. Hence only
21208 the address size prefix can have an effect. It changes the
21209 size from 64 (default) to 32. */
21210 d64
= guest_RIP_bbstart
+delta
+1 + getSDisp8(delta
);
21212 if (haveASO(pfx
)) {
21213 /* 64to32 of 64-bit get is merely a get-put improvement
21215 putIReg32(R_RCX
, binop(Iop_Sub32
,
21216 unop(Iop_64to32
, getIReg64(R_RCX
)),
21219 putIReg64(R_RCX
, binop(Iop_Sub64
, getIReg64(R_RCX
), mkU64(1)));
21222 /* This is correct, both for 32- and 64-bit versions. If we're
21223 doing a 32-bit dec and the result is zero then the default
21224 zero extension rule will cause the upper 32 bits to be zero
21225 too. Hence a 64-bit check against zero is OK. */
21226 count
= getIReg64(R_RCX
);
21227 cond
= binop(Iop_CmpNE64
, count
, mkU64(0));
21234 zbit
= mk_amd64g_calculate_condition( AMD64CondZ
);
21235 cond
= mkAnd1(cond
, zbit
);
21239 zbit
= mk_amd64g_calculate_condition( AMD64CondNZ
);
21240 cond
= mkAnd1(cond
, zbit
);
21245 stmt( IRStmt_Exit(cond
, Ijk_Boring
, IRConst_U64(d64
), OFFB_RIP
) );
21247 DIP("loop%s%s 0x%llx\n", xtra
, haveASO(pfx
) ? "l" : "", (ULong
)d64
);
21252 /* JRCXZ or JECXZ, depending address size override. */
21253 if (have66orF2orF3(pfx
)) goto decode_failure
;
21254 d64
= (guest_RIP_bbstart
+delta
+1) + getSDisp8(delta
);
21256 if (haveASO(pfx
)) {
21258 stmt( IRStmt_Exit( binop(Iop_CmpEQ64
,
21259 unop(Iop_32Uto64
, getIReg32(R_RCX
)),
21265 DIP("jecxz 0x%llx\n", (ULong
)d64
);
21268 stmt( IRStmt_Exit( binop(Iop_CmpEQ64
,
21275 DIP("jrcxz 0x%llx\n", (ULong
)d64
);
21279 case 0xE4: /* IN imm8, AL */
21281 t1
= newTemp(Ity_I64
);
21282 abyte
= getUChar(delta
); delta
++;
21283 assign(t1
, mkU64( abyte
& 0xFF ));
21284 DIP("in%c $%d,%s\n", nameISize(sz
), (Int
)abyte
, nameIRegRAX(sz
));
21286 case 0xE5: /* IN imm8, eAX */
21287 if (!(sz
== 2 || sz
== 4)) goto decode_failure
;
21288 t1
= newTemp(Ity_I64
);
21289 abyte
= getUChar(delta
); delta
++;
21290 assign(t1
, mkU64( abyte
& 0xFF ));
21291 DIP("in%c $%d,%s\n", nameISize(sz
), (Int
)abyte
, nameIRegRAX(sz
));
21293 case 0xEC: /* IN %DX, AL */
21295 t1
= newTemp(Ity_I64
);
21296 assign(t1
, unop(Iop_16Uto64
, getIRegRDX(2)));
21297 DIP("in%c %s,%s\n", nameISize(sz
), nameIRegRDX(2),
21300 case 0xED: /* IN %DX, eAX */
21301 if (!(sz
== 2 || sz
== 4)) goto decode_failure
;
21302 t1
= newTemp(Ity_I64
);
21303 assign(t1
, unop(Iop_16Uto64
, getIRegRDX(2)));
21304 DIP("in%c %s,%s\n", nameISize(sz
), nameIRegRDX(2),
21308 /* At this point, sz indicates the width, and t1 is a 64-bit
21309 value giving port number. */
21311 if (haveF2orF3(pfx
)) goto decode_failure
;
21312 vassert(sz
== 1 || sz
== 2 || sz
== 4);
21314 t2
= newTemp(Ity_I64
);
21315 d
= unsafeIRDirty_1_N(
21318 "amd64g_dirtyhelper_IN",
21319 &amd64g_dirtyhelper_IN
,
21320 mkIRExprVec_2( mkexpr(t1
), mkU64(sz
) )
21322 /* do the call, dumping the result in t2. */
21323 stmt( IRStmt_Dirty(d
) );
21324 putIRegRAX(sz
, narrowTo( ty
, mkexpr(t2
) ) );
21328 case 0xE6: /* OUT AL, imm8 */
21330 t1
= newTemp(Ity_I64
);
21331 abyte
= getUChar(delta
); delta
++;
21332 assign( t1
, mkU64( abyte
& 0xFF ) );
21333 DIP("out%c %s,$%d\n", nameISize(sz
), nameIRegRAX(sz
), (Int
)abyte
);
21335 case 0xE7: /* OUT eAX, imm8 */
21336 if (!(sz
== 2 || sz
== 4)) goto decode_failure
;
21337 t1
= newTemp(Ity_I64
);
21338 abyte
= getUChar(delta
); delta
++;
21339 assign( t1
, mkU64( abyte
& 0xFF ) );
21340 DIP("out%c %s,$%d\n", nameISize(sz
), nameIRegRAX(sz
), (Int
)abyte
);
21342 case 0xEE: /* OUT AL, %DX */
21344 t1
= newTemp(Ity_I64
);
21345 assign( t1
, unop(Iop_16Uto64
, getIRegRDX(2)) );
21346 DIP("out%c %s,%s\n", nameISize(sz
), nameIRegRAX(sz
),
21349 case 0xEF: /* OUT eAX, %DX */
21350 if (!(sz
== 2 || sz
== 4)) goto decode_failure
;
21351 t1
= newTemp(Ity_I64
);
21352 assign( t1
, unop(Iop_16Uto64
, getIRegRDX(2)) );
21353 DIP("out%c %s,%s\n", nameISize(sz
), nameIRegRAX(sz
),
21357 /* At this point, sz indicates the width, and t1 is a 64-bit
21358 value giving port number. */
21360 if (haveF2orF3(pfx
)) goto decode_failure
;
21361 vassert(sz
== 1 || sz
== 2 || sz
== 4);
21363 d
= unsafeIRDirty_0_N(
21365 "amd64g_dirtyhelper_OUT",
21366 &amd64g_dirtyhelper_OUT
,
21367 mkIRExprVec_3( mkexpr(t1
),
21368 widenUto64( getIRegRAX(sz
) ),
21371 stmt( IRStmt_Dirty(d
) );
21375 case 0xE8: /* CALL J4 */
21376 if (haveF3(pfx
)) goto decode_failure
;
21377 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
21378 d64
= getSDisp32(delta
); delta
+= 4;
21379 d64
+= (guest_RIP_bbstart
+delta
);
21380 /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */
21381 t1
= newTemp(Ity_I64
);
21382 assign(t1
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(8)));
21383 putIReg64(R_RSP
, mkexpr(t1
));
21384 storeLE( mkexpr(t1
), mkU64(guest_RIP_bbstart
+delta
));
21385 t2
= newTemp(Ity_I64
);
21386 assign(t2
, mkU64((Addr64
)d64
));
21387 make_redzone_AbiHint(vbi
, t1
, t2
/*nia*/, "call-d32");
21388 jmp_lit(dres
, Ijk_Call
, d64
);
21389 vassert(dres
->whatNext
== Dis_StopHere
);
21390 DIP("call 0x%llx\n", (ULong
)d64
);
21393 case 0xE9: /* Jv (jump, 16/32 offset) */
21394 if (haveF3(pfx
)) goto decode_failure
;
21396 goto decode_failure
; /* JRS added 2004 July 11 */
21397 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
21398 d64
= (guest_RIP_bbstart
+delta
+sz
) + getSDisp(sz
,delta
);
21400 jmp_lit(dres
, Ijk_Boring
, d64
);
21401 vassert(dres
->whatNext
== Dis_StopHere
);
21402 DIP("jmp 0x%llx\n", (ULong
)d64
);
21405 case 0xEB: /* Jb (jump, byte offset) */
21406 if (haveF3(pfx
)) goto decode_failure
;
21408 goto decode_failure
; /* JRS added 2004 July 11 */
21409 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
21410 d64
= (guest_RIP_bbstart
+delta
+1) + getSDisp8(delta
);
21412 jmp_lit(dres
, Ijk_Boring
, d64
);
21413 vassert(dres
->whatNext
== Dis_StopHere
);
21414 DIP("jmp-8 0x%llx\n", (ULong
)d64
);
21417 case 0xF5: /* CMC */
21418 case 0xF8: /* CLC */
21419 case 0xF9: /* STC */
21420 t1
= newTemp(Ity_I64
);
21421 t2
= newTemp(Ity_I64
);
21422 assign( t1
, mk_amd64g_calculate_rflags_all() );
21425 assign( t2
, binop(Iop_Xor64
, mkexpr(t1
),
21426 mkU64(AMD64G_CC_MASK_C
)));
21430 assign( t2
, binop(Iop_And64
, mkexpr(t1
),
21431 mkU64(~AMD64G_CC_MASK_C
)));
21435 assign( t2
, binop(Iop_Or64
, mkexpr(t1
),
21436 mkU64(AMD64G_CC_MASK_C
)));
21440 vpanic("disInstr(x64)(cmc/clc/stc)");
21442 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
21443 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
21444 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(t2
) ));
21445 /* Set NDEP even though it isn't used. This makes redundant-PUT
21446 elimination of previous stores to this field work better. */
21447 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
21450 case 0xF6: { /* Grp3 Eb */
21451 Bool decode_OK
= True
;
21452 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21453 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */
21454 delta
= dis_Grp3 ( vbi
, pfx
, 1, delta
, &decode_OK
);
21455 if (!decode_OK
) goto decode_failure
;
21459 case 0xF7: { /* Grp3 Ev */
21460 Bool decode_OK
= True
;
21461 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21462 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */
21463 delta
= dis_Grp3 ( vbi
, pfx
, sz
, delta
, &decode_OK
);
21464 if (!decode_OK
) goto decode_failure
;
21468 case 0xFC: /* CLD */
21469 if (haveF2orF3(pfx
)) goto decode_failure
;
21470 stmt( IRStmt_Put( OFFB_DFLAG
, mkU64(1)) );
21474 case 0xFD: /* STD */
21475 if (haveF2orF3(pfx
)) goto decode_failure
;
21476 stmt( IRStmt_Put( OFFB_DFLAG
, mkU64(-1ULL)) );
21480 case 0xFE: { /* Grp4 Eb */
21481 Bool decode_OK
= True
;
21482 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21483 /* We now let dis_Grp4 itself decide if F2 and/or F3 are valid */
21484 delta
= dis_Grp4 ( vbi
, pfx
, delta
, &decode_OK
);
21485 if (!decode_OK
) goto decode_failure
;
21489 case 0xFF: { /* Grp5 Ev */
21490 Bool decode_OK
= True
;
21491 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21492 /* We now let dis_Grp5 itself decide if F2 and/or F3 are valid */
21493 delta
= dis_Grp5 ( vbi
, pfx
, sz
, delta
, dres
, &decode_OK
);
21494 if (!decode_OK
) goto decode_failure
;
21504 return deltaIN
; /* fail */
21508 /*------------------------------------------------------------*/
21510 /*--- Top-level post-escape decoders: dis_ESC_0F ---*/
21512 /*------------------------------------------------------------*/
21514 static IRTemp
math_BSWAP ( IRTemp t1
, IRType ty
)
21516 IRTemp t2
= newTemp(ty
);
21517 if (ty
== Ity_I64
) {
21518 IRTemp m8
= newTemp(Ity_I64
);
21519 IRTemp s8
= newTemp(Ity_I64
);
21520 IRTemp m16
= newTemp(Ity_I64
);
21521 IRTemp s16
= newTemp(Ity_I64
);
21522 IRTemp m32
= newTemp(Ity_I64
);
21523 assign( m8
, mkU64(0xFF00FF00FF00FF00ULL
) );
21527 binop(Iop_And64
,mkexpr(t1
),mkexpr(m8
)),
21530 binop(Iop_Shl64
,mkexpr(t1
),mkU8(8)),
21535 assign( m16
, mkU64(0xFFFF0000FFFF0000ULL
) );
21539 binop(Iop_And64
,mkexpr(s8
),mkexpr(m16
)),
21542 binop(Iop_Shl64
,mkexpr(s8
),mkU8(16)),
21547 assign( m32
, mkU64(0xFFFFFFFF00000000ULL
) );
21551 binop(Iop_And64
,mkexpr(s16
),mkexpr(m32
)),
21554 binop(Iop_Shl64
,mkexpr(s16
),mkU8(32)),
21560 if (ty
== Ity_I32
) {
21564 binop(Iop_Shl32
, mkexpr(t1
), mkU8(24)),
21567 binop(Iop_And32
, binop(Iop_Shl32
, mkexpr(t1
), mkU8(8)),
21568 mkU32(0x00FF0000)),
21570 binop(Iop_And32
, binop(Iop_Shr32
, mkexpr(t1
), mkU8(8)),
21571 mkU32(0x0000FF00)),
21572 binop(Iop_And32
, binop(Iop_Shr32
, mkexpr(t1
), mkU8(24)),
21573 mkU32(0x000000FF) )
21578 if (ty
== Ity_I16
) {
21581 binop(Iop_Shl16
, mkexpr(t1
), mkU8(8)),
21582 binop(Iop_Shr16
, mkexpr(t1
), mkU8(8)) ));
21587 return IRTemp_INVALID
;
21591 __attribute__((noinline
))
21594 /*MB_OUT*/DisResult
* dres
,
21595 /*MB_OUT*/Bool
* expect_CAS
,
21596 const VexArchInfo
* archinfo
,
21597 const VexAbiInfo
* vbi
,
21598 Prefix pfx
, Int sz
, Long deltaIN
21602 IRTemp addr
= IRTemp_INVALID
;
21603 IRTemp t1
= IRTemp_INVALID
;
21604 IRTemp t2
= IRTemp_INVALID
;
21610 /* In the first switch, look for ordinary integer insns. */
21611 Long delta
= deltaIN
;
21612 UChar opc
= getUChar(delta
);
21614 switch (opc
) { /* first switch */
21618 modrm
= getUChar(delta
);
21619 /* 0F 01 /0 -- SGDT */
21620 /* 0F 01 /1 -- SIDT */
21621 if (!epartIsReg(modrm
)
21622 && (gregLO3ofRM(modrm
) == 0 || gregLO3ofRM(modrm
) == 1)) {
21623 /* This is really revolting, but ... since each processor
21624 (core) only has one IDT and one GDT, just let the guest
21625 see it (pass-through semantics). I can't see any way to
21626 construct a faked-up value, so don't bother to try. */
21627 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
21629 switch (gregLO3ofRM(modrm
)) {
21630 case 0: DIP("sgdt %s\n", dis_buf
); break;
21631 case 1: DIP("sidt %s\n", dis_buf
); break;
21632 default: vassert(0); /*NOTREACHED*/
21634 IRDirty
* d
= unsafeIRDirty_0_N (
21636 "amd64g_dirtyhelper_SxDT",
21637 &amd64g_dirtyhelper_SxDT
,
21638 mkIRExprVec_2( mkexpr(addr
),
21639 mkU64(gregLO3ofRM(modrm
)) )
21641 /* declare we're writing memory */
21642 d
->mFx
= Ifx_Write
;
21643 d
->mAddr
= mkexpr(addr
);
21645 stmt( IRStmt_Dirty(d
) );
21648 /* 0F 01 D0 = XGETBV */
21649 if (modrm
== 0xD0 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
21652 /* Fault (SEGV) if ECX isn't zero. Intel docs say #GP and I
21653 am not sure if that translates in to SEGV or to something
21654 else, in user space. */
21655 t1
= newTemp(Ity_I32
);
21656 assign( t1
, getIReg32(R_RCX
) );
21657 stmt( IRStmt_Exit(binop(Iop_CmpNE32
, mkexpr(t1
), mkU32(0)),
21659 IRConst_U64(guest_RIP_curr_instr
),
21662 putIRegRAX(4, mkU32(7));
21663 putIRegRDX(4, mkU32(0));
21666 /* BEGIN HACKY SUPPORT FOR xend */
21667 /* 0F 01 D5 = XEND */
21668 if (modrm
== 0xD5 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
21669 /* We are never in an transaction (xbegin immediately aborts).
21670 So this just always generates a General Protection Fault. */
21672 jmp_lit(dres
, Ijk_SigSEGV
, guest_RIP_bbstart
+ delta
);
21673 vassert(dres
->whatNext
== Dis_StopHere
);
21677 /* END HACKY SUPPORT FOR xend */
21678 /* BEGIN HACKY SUPPORT FOR xtest */
21679 /* 0F 01 D6 = XTEST */
21680 if (modrm
== 0xD6 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
21681 /* Sets ZF because there never is a transaction, and all
21682 CF, OF, SF, PF and AF are always cleared by xtest. */
21685 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
21686 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
21687 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkU64(AMD64G_CC_MASK_Z
) ));
21688 /* Set NDEP even though it isn't used. This makes redundant-PUT
21689 elimination of previous stores to this field work better. */
21690 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
21693 /* END HACKY SUPPORT FOR xtest */
21694 /* 0F 01 F9 = RDTSCP */
21695 if (modrm
== 0xF9 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_RDTSCP
)) {
21697 /* Uses dirty helper:
21698 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* )
21699 declared to wr rax, rcx, rdx
21701 const HChar
* fName
= "amd64g_dirtyhelper_RDTSCP";
21702 void* fAddr
= &amd64g_dirtyhelper_RDTSCP
;
21704 = unsafeIRDirty_0_N ( 0/*regparms*/,
21705 fName
, fAddr
, mkIRExprVec_1(IRExpr_GSPTR()) );
21706 /* declare guest state effects */
21708 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
21709 d
->fxState
[0].fx
= Ifx_Write
;
21710 d
->fxState
[0].offset
= OFFB_RAX
;
21711 d
->fxState
[0].size
= 8;
21712 d
->fxState
[1].fx
= Ifx_Write
;
21713 d
->fxState
[1].offset
= OFFB_RCX
;
21714 d
->fxState
[1].size
= 8;
21715 d
->fxState
[2].fx
= Ifx_Write
;
21716 d
->fxState
[2].offset
= OFFB_RDX
;
21717 d
->fxState
[2].size
= 8;
21718 /* execute the dirty call, side-effecting guest state */
21719 stmt( IRStmt_Dirty(d
) );
21720 /* RDTSCP is a serialising insn. So, just in case someone is
21721 using it as a memory fence ... */
21722 stmt( IRStmt_MBE(Imbe_Fence
) );
21726 /* else decode failed */
21730 case 0x05: /* SYSCALL */
21731 guest_RIP_next_mustcheck
= True
;
21732 guest_RIP_next_assumed
= guest_RIP_bbstart
+ delta
;
21733 putIReg64( R_RCX
, mkU64(guest_RIP_next_assumed
) );
21734 /* It's important that all guest state is up-to-date
21735 at this point. So we declare an end-of-block here, which
21736 forces any cached guest state to be flushed. */
21737 jmp_lit(dres
, Ijk_Sys_syscall
, guest_RIP_next_assumed
);
21738 vassert(dres
->whatNext
== Dis_StopHere
);
21742 case 0x0B: /* UD2 */
21743 stmt( IRStmt_Put( OFFB_RIP
, mkU64(guest_RIP_curr_instr
) ) );
21744 jmp_lit(dres
, Ijk_NoDecode
, guest_RIP_curr_instr
);
21745 vassert(dres
->whatNext
== Dis_StopHere
);
21749 case 0x0D: /* 0F 0D /0 -- prefetch mem8 */
21750 /* 0F 0D /1 -- prefetchw mem8 */
21751 if (have66orF2orF3(pfx
)) goto decode_failure
;
21752 modrm
= getUChar(delta
);
21753 if (epartIsReg(modrm
)) goto decode_failure
;
21754 if (gregLO3ofRM(modrm
) != 0 && gregLO3ofRM(modrm
) != 1)
21755 goto decode_failure
;
21756 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
21758 switch (gregLO3ofRM(modrm
)) {
21759 case 0: DIP("prefetch %s\n", dis_buf
); break;
21760 case 1: DIP("prefetchw %s\n", dis_buf
); break;
21761 default: vassert(0); /*NOTREACHED*/
21770 // Intel CET instructions can have any prefixes before NOPs
21771 // and can use any ModRM, SIB and disp
21772 modrm
= getUChar(delta
);
21773 if (epartIsReg(modrm
)) {
21775 DIP("nop%c\n", nameISize(sz
));
21777 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
21779 DIP("nop%c %s\n", nameISize(sz
), dis_buf
);
21783 case 0x31: { /* RDTSC */
21784 IRTemp val
= newTemp(Ity_I64
);
21785 IRExpr
** args
= mkIRExprVec_0();
21786 IRDirty
* d
= unsafeIRDirty_1_N (
21789 "amd64g_dirtyhelper_RDTSC",
21790 &amd64g_dirtyhelper_RDTSC
,
21793 if (have66orF2orF3(pfx
)) goto decode_failure
;
21794 /* execute the dirty call, dumping the result in val. */
21795 stmt( IRStmt_Dirty(d
) );
21796 putIRegRDX(4, unop(Iop_64HIto32
, mkexpr(val
)));
21797 putIRegRAX(4, unop(Iop_64to32
, mkexpr(val
)));
21804 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
21805 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
21806 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
21807 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
21808 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
21809 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
21810 case 0x48: /* CMOVSb (cmov negative) */
21811 case 0x49: /* CMOVSb (cmov not negative) */
21812 case 0x4A: /* CMOVP (cmov parity even) */
21813 case 0x4B: /* CMOVNP (cmov parity odd) */
21814 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
21815 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
21816 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
21817 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
21818 if (haveF2orF3(pfx
)) goto decode_failure
;
21819 delta
= dis_cmov_E_G(vbi
, pfx
, sz
, (AMD64Condcode
)(opc
- 0x40), delta
);
21824 case 0x82: /* JBb/JNAEb (jump below) */
21825 case 0x83: /* JNBb/JAEb (jump not below) */
21826 case 0x84: /* JZb/JEb (jump zero) */
21827 case 0x85: /* JNZb/JNEb (jump not zero) */
21828 case 0x86: /* JBEb/JNAb (jump below or equal) */
21829 case 0x87: /* JNBEb/JAb (jump not below or equal) */
21830 case 0x88: /* JSb (jump negative) */
21831 case 0x89: /* JSb (jump not negative) */
21832 case 0x8A: /* JP (jump parity even) */
21833 case 0x8B: /* JNP/JPO (jump parity odd) */
21834 case 0x8C: /* JLb/JNGEb (jump less) */
21835 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
21836 case 0x8E: /* JLEb/JNGb (jump less or equal) */
21837 case 0x8F: { /* JGb/JNLEb (jump greater) */
21839 const HChar
* comment
= "";
21840 if (haveF3(pfx
)) goto decode_failure
;
21841 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
21842 jmpDelta
= getSDisp32(delta
);
21843 d64
= (guest_RIP_bbstart
+delta
+4) + jmpDelta
;
21845 /* End the block at this point. */
21846 jcc_01( dres
, (AMD64Condcode
)(opc
- 0x80),
21847 guest_RIP_bbstart
+delta
, d64
);
21848 vassert(dres
->whatNext
== Dis_StopHere
);
21849 DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc
- 0x80), (ULong
)d64
,
21856 case 0x92: /* set-Bb/set-NAEb (set if below) */
21857 case 0x93: /* set-NBb/set-AEb (set if not below) */
21858 case 0x94: /* set-Zb/set-Eb (set if zero) */
21859 case 0x95: /* set-NZb/set-NEb (set if not zero) */
21860 case 0x96: /* set-BEb/set-NAb (set if below or equal) */
21861 case 0x97: /* set-NBEb/set-Ab (set if not below or equal) */
21862 case 0x98: /* set-Sb (set if negative) */
21863 case 0x99: /* set-Sb (set if not negative) */
21864 case 0x9A: /* set-P (set if parity even) */
21865 case 0x9B: /* set-NP (set if parity odd) */
21866 case 0x9C: /* set-Lb/set-NGEb (set if less) */
21867 case 0x9D: /* set-GEb/set-NLb (set if greater or equal) */
21868 case 0x9E: /* set-LEb/set-NGb (set if less or equal) */
21869 case 0x9F: /* set-Gb/set-NLEb (set if greater) */
21870 if (haveF2orF3(pfx
)) goto decode_failure
;
21871 t1
= newTemp(Ity_I8
);
21872 assign( t1
, unop(Iop_1Uto8
,mk_amd64g_calculate_condition(opc
-0x90)) );
21873 modrm
= getUChar(delta
);
21874 if (epartIsReg(modrm
)) {
21876 putIRegE(1, pfx
, modrm
, mkexpr(t1
));
21877 DIP("set%s %s\n", name_AMD64Condcode(opc
-0x90),
21878 nameIRegE(1,pfx
,modrm
));
21880 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
21882 storeLE( mkexpr(addr
), mkexpr(t1
) );
21883 DIP("set%s %s\n", name_AMD64Condcode(opc
-0x90), dis_buf
);
21888 case 0x1B: { /* Future MPX instructions, currently NOPs.
21889 BNDMK b, m F3 0F 1B
21890 BNDCL b, r/m F3 0F 1A
21891 BNDCU b, r/m F2 0F 1A
21892 BNDCN b, r/m F2 0F 1B
21893 BNDMOV b, b/m 66 0F 1A
21894 BNDMOV b/m, b 66 0F 1B
21895 BNDLDX b, mib 0F 1A
21896 BNDSTX mib, b 0F 1B */
21898 /* All instructions have two operands. One operand is always the
21899 bnd register number (bnd0-bnd3, other register numbers are
21900 ignored when MPX isn't enabled, but should generate an
21901 exception if MPX is enabled) given by gregOfRexRM. The other
21902 operand is either a ModRM:reg, ModRM:r/m or a SIB encoded
21903 address, all of which can be decoded by using either
21904 eregOfRexRM or disAMode. */
21906 modrm
= getUChar(delta
);
21907 int bnd
= gregOfRexRM(pfx
,modrm
);
21909 if (epartIsReg(modrm
)) {
21910 oper
= nameIReg64 (eregOfRexRM(pfx
,modrm
));
21913 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
21918 if (haveF3no66noF2 (pfx
)) {
21920 DIP ("bndmk %s, %%bnd%d\n", oper
, bnd
);
21921 } else /* opc == 0x1A */ {
21922 DIP ("bndcl %s, %%bnd%d\n", oper
, bnd
);
21924 } else if (haveF2no66noF3 (pfx
)) {
21926 DIP ("bndcu %s, %%bnd%d\n", oper
, bnd
);
21927 } else /* opc == 0x1B */ {
21928 DIP ("bndcn %s, %%bnd%d\n", oper
, bnd
);
21930 } else if (have66noF2noF3 (pfx
)) {
21932 DIP ("bndmov %s, %%bnd%d\n", oper
, bnd
);
21933 } else /* opc == 0x1B */ {
21934 DIP ("bndmov %%bnd%d, %s\n", bnd
, oper
);
21936 } else if (haveNo66noF2noF3 (pfx
)) {
21938 DIP ("bndldx %s, %%bnd%d\n", oper
, bnd
);
21939 } else /* opc == 0x1B */ {
21940 DIP ("bndstx %%bnd%d, %s\n", bnd
, oper
);
21942 } else goto decode_failure
;
21947 case 0xA2: { /* CPUID */
21948 /* Uses dirty helper:
21949 void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* )
21950 declared to mod rax, wr rbx, rcx, rdx
21953 const HChar
* fName
= NULL
;
21954 void* fAddr
= NULL
;
21956 if (haveF2orF3(pfx
)) goto decode_failure
;
21958 /* This isn't entirely correct, CPUID should depend on the VEX
21959 capabilities, not on the underlying CPU. See bug #324882. */
21960 if ((archinfo
->hwcaps
& VEX_HWCAPS_AMD64_SSSE3
) &&
21961 (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_CX16
) &&
21962 (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX2
)) {
21963 fName
= "amd64g_dirtyhelper_CPUID_avx2";
21964 fAddr
= &amd64g_dirtyhelper_CPUID_avx2
;
21965 /* This is a Core-i7-4910-like machine */
21967 else if ((archinfo
->hwcaps
& VEX_HWCAPS_AMD64_SSSE3
) &&
21968 (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_CX16
) &&
21969 (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
21970 fName
= "amd64g_dirtyhelper_CPUID_avx_and_cx16";
21971 fAddr
= &amd64g_dirtyhelper_CPUID_avx_and_cx16
;
21972 /* This is a Core-i5-2300-like machine */
21974 else if ((archinfo
->hwcaps
& VEX_HWCAPS_AMD64_SSSE3
) &&
21975 (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_CX16
)) {
21976 fName
= "amd64g_dirtyhelper_CPUID_sse42_and_cx16";
21977 fAddr
= &amd64g_dirtyhelper_CPUID_sse42_and_cx16
;
21978 /* This is a Core-i5-670-like machine */
21981 /* Give a CPUID for at least a baseline machine, SSE2
21982 only, and no CX16 */
21983 fName
= "amd64g_dirtyhelper_CPUID_baseline";
21984 fAddr
= &amd64g_dirtyhelper_CPUID_baseline
;
21987 vassert(fName
); vassert(fAddr
);
21988 IRExpr
** args
= NULL
;
21989 if (fAddr
== &amd64g_dirtyhelper_CPUID_avx2
21990 || fAddr
== &amd64g_dirtyhelper_CPUID_avx_and_cx16
) {
21991 Bool hasF16C
= (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_F16C
) != 0;
21992 Bool hasRDRAND
= (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_RDRAND
) != 0;
21993 args
= mkIRExprVec_3(IRExpr_GSPTR(),
21994 mkIRExpr_HWord(hasF16C
? 1 : 0),
21995 mkIRExpr_HWord(hasRDRAND
? 1 : 0));
21997 args
= mkIRExprVec_1(IRExpr_GSPTR());
21999 d
= unsafeIRDirty_0_N ( 0/*regparms*/, fName
, fAddr
, args
);
22001 /* declare guest state effects */
22003 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
22004 d
->fxState
[0].fx
= Ifx_Modify
;
22005 d
->fxState
[0].offset
= OFFB_RAX
;
22006 d
->fxState
[0].size
= 8;
22007 d
->fxState
[1].fx
= Ifx_Write
;
22008 d
->fxState
[1].offset
= OFFB_RBX
;
22009 d
->fxState
[1].size
= 8;
22010 d
->fxState
[2].fx
= Ifx_Modify
;
22011 d
->fxState
[2].offset
= OFFB_RCX
;
22012 d
->fxState
[2].size
= 8;
22013 d
->fxState
[3].fx
= Ifx_Write
;
22014 d
->fxState
[3].offset
= OFFB_RDX
;
22015 d
->fxState
[3].size
= 8;
22016 /* execute the dirty call, side-effecting guest state */
22017 stmt( IRStmt_Dirty(d
) );
22018 /* CPUID is a serialising insn. So, just in case someone is
22019 using it as a memory fence ... */
22020 stmt( IRStmt_MBE(Imbe_Fence
) );
22025 case 0xA3: { /* BT Gv,Ev */
22026 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22028 if (sz
!= 8 && sz
!= 4 && sz
!= 2) goto decode_failure
;
22029 delta
= dis_bt_G_E ( vbi
, pfx
, sz
, delta
, BtOpNone
, &ok
);
22030 if (!ok
) goto decode_failure
;
22034 case 0xA4: /* SHLDv imm8,Gv,Ev */
22035 modrm
= getUChar(delta
);
22036 d64
= delta
+ lengthAMode(pfx
, delta
);
22037 vex_sprintf(dis_buf
, "$%d", (Int
)getUChar(d64
));
22038 delta
= dis_SHLRD_Gv_Ev (
22039 vbi
, pfx
, delta
, modrm
, sz
,
22040 mkU8(getUChar(d64
)), True
, /* literal */
22041 dis_buf
, True
/* left */ );
22044 case 0xA5: /* SHLDv %cl,Gv,Ev */
22045 modrm
= getUChar(delta
);
22046 delta
= dis_SHLRD_Gv_Ev (
22047 vbi
, pfx
, delta
, modrm
, sz
,
22048 getIRegCL(), False
, /* not literal */
22049 "%cl", True
/* left */ );
22052 case 0xAB: { /* BTS Gv,Ev */
22053 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22055 if (sz
!= 8 && sz
!= 4 && sz
!= 2) goto decode_failure
;
22056 delta
= dis_bt_G_E ( vbi
, pfx
, sz
, delta
, BtOpSet
, &ok
);
22057 if (!ok
) goto decode_failure
;
22061 case 0xAC: /* SHRDv imm8,Gv,Ev */
22062 modrm
= getUChar(delta
);
22063 d64
= delta
+ lengthAMode(pfx
, delta
);
22064 vex_sprintf(dis_buf
, "$%d", (Int
)getUChar(d64
));
22065 delta
= dis_SHLRD_Gv_Ev (
22066 vbi
, pfx
, delta
, modrm
, sz
,
22067 mkU8(getUChar(d64
)), True
, /* literal */
22068 dis_buf
, False
/* right */ );
22071 case 0xAD: /* SHRDv %cl,Gv,Ev */
22072 modrm
= getUChar(delta
);
22073 delta
= dis_SHLRD_Gv_Ev (
22074 vbi
, pfx
, delta
, modrm
, sz
,
22075 getIRegCL(), False
, /* not literal */
22076 "%cl", False
/* right */);
22079 case 0xAF: /* IMUL Ev, Gv */
22080 if (haveF2orF3(pfx
)) goto decode_failure
;
22081 delta
= dis_mul_E_G ( vbi
, pfx
, sz
, delta
);
22084 case 0xB0: { /* CMPXCHG Gb,Eb */
22086 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */
22087 delta
= dis_cmpxchg_G_E ( &ok
, vbi
, pfx
, 1, delta
);
22088 if (!ok
) goto decode_failure
;
22092 case 0xB1: { /* CMPXCHG Gv,Ev (allowed in 16,32,64 bit) */
22094 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */
22095 if (sz
!= 2 && sz
!= 4 && sz
!= 8) goto decode_failure
;
22096 delta
= dis_cmpxchg_G_E ( &ok
, vbi
, pfx
, sz
, delta
);
22097 if (!ok
) goto decode_failure
;
22101 case 0xB3: { /* BTR Gv,Ev */
22102 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22104 if (sz
!= 8 && sz
!= 4 && sz
!= 2) goto decode_failure
;
22105 delta
= dis_bt_G_E ( vbi
, pfx
, sz
, delta
, BtOpReset
, &ok
);
22106 if (!ok
) goto decode_failure
;
22110 case 0xB6: /* MOVZXb Eb,Gv */
22111 if (haveF2orF3(pfx
)) goto decode_failure
;
22112 if (sz
!= 2 && sz
!= 4 && sz
!= 8)
22113 goto decode_failure
;
22114 delta
= dis_movx_E_G ( vbi
, pfx
, delta
, 1, sz
, False
);
22117 case 0xB7: /* MOVZXw Ew,Gv */
22118 if (haveF2orF3(pfx
)) goto decode_failure
;
22119 if (sz
!= 4 && sz
!= 8)
22120 goto decode_failure
;
22121 delta
= dis_movx_E_G ( vbi
, pfx
, delta
, 2, sz
, False
);
22124 case 0xBA: { /* Grp8 Ib,Ev */
22125 /* We let dis_Grp8_Imm decide whether F2 or F3 are allowable. */
22126 Bool decode_OK
= False
;
22127 modrm
= getUChar(delta
);
22128 am_sz
= lengthAMode(pfx
,delta
);
22129 d64
= getSDisp8(delta
+ am_sz
);
22130 delta
= dis_Grp8_Imm ( vbi
, pfx
, delta
, modrm
, am_sz
, sz
, d64
,
22133 goto decode_failure
;
22137 case 0xBB: { /* BTC Gv,Ev */
22138 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22140 if (sz
!= 8 && sz
!= 4 && sz
!= 2) goto decode_failure
;
22141 delta
= dis_bt_G_E ( vbi
, pfx
, sz
, delta
, BtOpComp
, &ok
);
22142 if (!ok
) goto decode_failure
;
22146 case 0xBC: /* BSF Gv,Ev */
22147 if (!haveF2orF3(pfx
)
22148 || (haveF3noF2(pfx
)
22149 && 0 == (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_BMI
))) {
22150 /* no-F2 no-F3 0F BC = BSF
22151 or F3 0F BC = REP; BSF on older CPUs. */
22152 delta
= dis_bs_E_G ( vbi
, pfx
, sz
, delta
, True
);
22155 /* Fall through, since F3 0F BC is TZCNT, and needs to
22156 be handled by dis_ESC_0F__SSE4. */
22159 case 0xBD: /* BSR Gv,Ev */
22160 if (!haveF2orF3(pfx
)
22161 || (haveF3noF2(pfx
)
22162 && 0 == (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_LZCNT
))) {
22163 /* no-F2 no-F3 0F BD = BSR
22164 or F3 0F BD = REP; BSR on older CPUs. */
22165 delta
= dis_bs_E_G ( vbi
, pfx
, sz
, delta
, False
);
22168 /* Fall through, since F3 0F BD is LZCNT, and needs to
22169 be handled by dis_ESC_0F__SSE4. */
22172 case 0xBE: /* MOVSXb Eb,Gv */
22173 if (haveF2orF3(pfx
)) goto decode_failure
;
22174 if (sz
!= 2 && sz
!= 4 && sz
!= 8)
22175 goto decode_failure
;
22176 delta
= dis_movx_E_G ( vbi
, pfx
, delta
, 1, sz
, True
);
22179 case 0xBF: /* MOVSXw Ew,Gv */
22180 if (haveF2orF3(pfx
)) goto decode_failure
;
22181 if (sz
!= 4 && sz
!= 8)
22182 goto decode_failure
;
22183 delta
= dis_movx_E_G ( vbi
, pfx
, delta
, 2, sz
, True
);
22186 case 0xC0: { /* XADD Gb,Eb */
22187 Bool decode_OK
= False
;
22188 delta
= dis_xadd_G_E ( &decode_OK
, vbi
, pfx
, 1, delta
);
22190 goto decode_failure
;
22194 case 0xC1: { /* XADD Gv,Ev */
22195 Bool decode_OK
= False
;
22196 delta
= dis_xadd_G_E ( &decode_OK
, vbi
, pfx
, sz
, delta
);
22198 goto decode_failure
;
22203 modrm
= getUChar(delta
);
22205 // Detecting valid CMPXCHG combinations is pretty complex.
22206 Bool isValidCMPXCHG
= gregLO3ofRM(modrm
) == 1;
22207 if (isValidCMPXCHG
) {
22208 if (have66(pfx
)) isValidCMPXCHG
= False
;
22209 if (sz
!= 4 && sz
!= 8) isValidCMPXCHG
= False
;
22210 if (sz
== 8 && !(archinfo
->hwcaps
& VEX_HWCAPS_AMD64_CX16
))
22211 isValidCMPXCHG
= False
;
22212 if (epartIsReg(modrm
)) isValidCMPXCHG
= False
;
22213 if (haveF2orF3(pfx
)) {
22214 /* Since the e-part is memory only, F2 or F3 (one or the
22215 other) is acceptable if LOCK is also present. But only
22217 if (sz
== 8) isValidCMPXCHG
= False
;
22218 if (haveF2andF3(pfx
) || !haveLOCK(pfx
)) isValidCMPXCHG
= False
;
22222 /* 0F C7 /1 (with qualifications) = CMPXCHG */
22223 if (isValidCMPXCHG
) {
22224 // Note that we've already read the modrm byte by this point, but we
22225 // haven't moved delta past it.
22226 IRType elemTy
= sz
==4 ? Ity_I32
: Ity_I64
;
22227 IRTemp expdHi
= newTemp(elemTy
);
22228 IRTemp expdLo
= newTemp(elemTy
);
22229 IRTemp dataHi
= newTemp(elemTy
);
22230 IRTemp dataLo
= newTemp(elemTy
);
22231 IRTemp oldHi
= newTemp(elemTy
);
22232 IRTemp oldLo
= newTemp(elemTy
);
22233 IRTemp flags_old
= newTemp(Ity_I64
);
22234 IRTemp flags_new
= newTemp(Ity_I64
);
22235 IRTemp success
= newTemp(Ity_I1
);
22236 IROp opOR
= sz
==4 ? Iop_Or32
: Iop_Or64
;
22237 IROp opXOR
= sz
==4 ? Iop_Xor32
: Iop_Xor64
;
22238 IROp opCasCmpEQ
= sz
==4 ? Iop_CasCmpEQ32
: Iop_CasCmpEQ64
;
22239 IRExpr
* zero
= sz
==4 ? mkU32(0) : mkU64(0);
22240 IRTemp expdHi64
= newTemp(Ity_I64
);
22241 IRTemp expdLo64
= newTemp(Ity_I64
);
22243 /* Translate this using a DCAS, even if there is no LOCK
22244 prefix. Life is too short to bother with generating two
22245 different translations for the with/without-LOCK-prefix
22247 *expect_CAS
= True
;
22249 /* Generate address */
22250 vassert(!epartIsReg(modrm
));
22251 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
22254 /* cmpxchg16b requires an alignment check. */
22256 gen_SEGV_if_not_16_aligned( addr
);
22258 /* Get the expected and new values. */
22259 assign( expdHi64
, getIReg64(R_RDX
) );
22260 assign( expdLo64
, getIReg64(R_RAX
) );
22262 /* These are the correctly-sized expected and new values.
22263 However, we also get expdHi64/expdLo64 above as 64-bits
22264 regardless, because we will need them later in the 32-bit
22265 case (paradoxically). */
22266 assign( expdHi
, sz
==4 ? unop(Iop_64to32
, mkexpr(expdHi64
))
22267 : mkexpr(expdHi64
) );
22268 assign( expdLo
, sz
==4 ? unop(Iop_64to32
, mkexpr(expdLo64
))
22269 : mkexpr(expdLo64
) );
22270 assign( dataHi
, sz
==4 ? getIReg32(R_RCX
) : getIReg64(R_RCX
) );
22271 assign( dataLo
, sz
==4 ? getIReg32(R_RBX
) : getIReg64(R_RBX
) );
22275 mkIRCAS( oldHi
, oldLo
,
22276 Iend_LE
, mkexpr(addr
),
22277 mkexpr(expdHi
), mkexpr(expdLo
),
22278 mkexpr(dataHi
), mkexpr(dataLo
)
22281 /* success when oldHi:oldLo == expdHi:expdLo */
22285 binop(opXOR
, mkexpr(oldHi
), mkexpr(expdHi
)),
22286 binop(opXOR
, mkexpr(oldLo
), mkexpr(expdLo
))
22291 /* If the DCAS is successful, that is to say oldHi:oldLo ==
22292 expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX,
22293 which is where they came from originally. Both the actual
22294 contents of these two regs, and any shadow values, are
22295 unchanged. If the DCAS fails then we're putting into
22296 RDX:RAX the value seen in memory. */
22297 /* Now of course there's a complication in the 32-bit case
22298 (bah!): if the DCAS succeeds, we need to leave RDX:RAX
22299 unchanged; but if we use the same scheme as in the 64-bit
22300 case, we get hit by the standard rule that a write to the
22301 bottom 32 bits of an integer register zeros the upper 32
22302 bits. And so the upper halves of RDX and RAX mysteriously
22303 become zero. So we have to stuff back in the original
22304 64-bit values which we previously stashed in
22305 expdHi64:expdLo64, even if we're doing a cmpxchg8b. */
22306 /* It's just _so_ much fun ... */
22308 IRExpr_ITE( mkexpr(success
),
22310 sz
== 4 ? unop(Iop_32Uto64
, mkexpr(oldHi
))
22314 IRExpr_ITE( mkexpr(success
),
22316 sz
== 4 ? unop(Iop_32Uto64
, mkexpr(oldLo
))
22320 /* Copy the success bit into the Z flag and leave the others
22322 assign( flags_old
, widenUto64(mk_amd64g_calculate_rflags_all()));
22326 binop(Iop_And64
, mkexpr(flags_old
),
22327 mkU64(~AMD64G_CC_MASK_Z
)),
22330 unop(Iop_1Uto64
, mkexpr(success
)), mkU64(1)),
22331 mkU8(AMD64G_CC_SHIFT_Z
)) ));
22333 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
22334 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(flags_new
) ));
22335 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
22336 /* Set NDEP even though it isn't used. This makes
22337 redundant-PUT elimination of previous stores to this field
22339 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
22341 /* Sheesh. Aren't you glad it was me and not you that had to
22342 write and validate all this grunge? */
22344 DIP("cmpxchg8b %s\n", dis_buf
);
22346 } // if (isValidCMPXCHG)
22348 /* 0F C7 /6 no-F2-or-F3 = RDRAND */
22349 if (gregLO3ofRM(modrm
) == 6/*RDRAND*/
22350 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_RDRAND
)
22351 && epartIsReg(modrm
) && haveNoF2noF3(pfx
)
22352 && (sz
== 8 || sz
== 4 || sz
== 2)) {
22353 delta
++; // move past modrm
22354 IRType ty
= szToITy(sz
);
22356 // Pull a first 32 bits of randomness, plus C flag, out of the host.
22357 IRTemp pairLO
= newTemp(Ity_I64
);
22359 = unsafeIRDirty_1_N(pairLO
, 0/*regparms*/,
22360 "amd64g_dirtyhelper_RDRAND",
22361 &amd64g_dirtyhelper_RDRAND
, mkIRExprVec_0());
22362 // There are no guest state or memory effects to declare for |dLO|.
22363 stmt( IRStmt_Dirty(dLO
) );
22365 IRTemp randsLO
= newTemp(Ity_I32
);
22366 assign(randsLO
, unop(Iop_64to32
, mkexpr(pairLO
)));
22367 IRTemp cLO
= newTemp(Ity_I64
);
22368 assign(cLO
, binop(Iop_Shr64
, mkexpr(pairLO
), mkU8(32)));
22370 // We'll assemble the final pairing in (cFinal, randsNearlyFinal).
22371 IRTemp randsNearlyFinal
= newTemp(Ity_I64
);
22372 IRTemp cFinal
= newTemp(Ity_I64
);
22374 if (ty
== Ity_I64
) {
22375 // Pull another 32 bits of randomness out of the host.
22376 IRTemp pairHI
= newTemp(Ity_I64
);
22378 = unsafeIRDirty_1_N(pairHI
, 0/*regparms*/,
22379 "amd64g_dirtyhelper_RDRAND",
22380 &amd64g_dirtyhelper_RDRAND
, mkIRExprVec_0());
22381 // There are no guest state or memory effects to declare for |dHI|.
22382 stmt( IRStmt_Dirty(dHI
) );
22384 IRTemp randsHI
= newTemp(Ity_I32
);
22385 assign(randsHI
, unop(Iop_64to32
, mkexpr(pairHI
)));
22386 IRTemp cHI
= newTemp(Ity_I64
);
22387 assign(cHI
, binop(Iop_Shr64
, mkexpr(pairHI
), mkU8(32)));
22388 assign(randsNearlyFinal
, binop(Iop_32HLto64
,
22389 mkexpr(randsHI
), mkexpr(randsLO
)));
22390 assign(cFinal
, binop(Iop_And64
,
22391 binop(Iop_And64
, mkexpr(cHI
), mkexpr(cLO
)),
22394 assign(randsNearlyFinal
, unop(Iop_32Uto64
, mkexpr(randsLO
)));
22395 assign(cFinal
, binop(Iop_And64
, mkexpr(cLO
), mkU64(1)));
22398 /* Now cFinal[0] is the final success/failure flag (cFinal[0] == 1
22399 means success). But there's another twist. If we failed then the
22400 returned value must be forced to zero. Otherwise we could have the
22401 situation, when sz==8, where one of the host calls failed but the
22402 other didn't. This would give cFinal[0] == 0 (correctly) but
22403 randsNearlyFinal not being zero, because it contains the 32 bit
22404 result of the non-failing call. */
22405 IRTemp randsFinal
= newTemp(Ity_I64
);
22408 mkexpr(randsNearlyFinal
),
22410 binop(Iop_Shl64
, mkexpr(cFinal
), mkU8(63)),
22414 // So, finally, update the guest state.
22415 putIRegE(sz
, pfx
, modrm
, narrowTo(ty
, mkexpr(randsFinal
)));
22417 // Set C=<success indication>, O,S,Z,A,P = 0. cFinal has already been
22418 // masked so only the lowest bit remains.
22419 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
22420 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(cFinal
) ));
22421 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
22422 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
22424 DIP("rdrand %s", nameIRegE(sz
, pfx
, modrm
));
22428 goto decode_failure
;
22431 case 0xC8: /* BSWAP %eax */
22438 case 0xCF: /* BSWAP %edi */
22439 if (haveF2orF3(pfx
)) goto decode_failure
;
22440 /* According to the AMD64 docs, this insn can have size 4 or
22443 t1
= newTemp(Ity_I32
);
22444 assign( t1
, getIRegRexB(4, pfx
, opc
-0xC8) );
22445 t2
= math_BSWAP( t1
, Ity_I32
);
22446 putIRegRexB(4, pfx
, opc
-0xC8, mkexpr(t2
));
22447 DIP("bswapl %s\n", nameIRegRexB(4, pfx
, opc
-0xC8));
22451 t1
= newTemp(Ity_I64
);
22452 t2
= newTemp(Ity_I64
);
22453 assign( t1
, getIRegRexB(8, pfx
, opc
-0xC8) );
22454 t2
= math_BSWAP( t1
, Ity_I64
);
22455 putIRegRexB(8, pfx
, opc
-0xC8, mkexpr(t2
));
22456 DIP("bswapq %s\n", nameIRegRexB(8, pfx
, opc
-0xC8));
22459 goto decode_failure
;
22464 } /* first switch */
22467 /* =-=-=-=-=-=-=-=-= MMXery =-=-=-=-=-=-=-=-= */
22468 /* In the second switch, pick off MMX insns. */
22470 if (!have66orF2orF3(pfx
)) {
22471 /* So there's no SIMD prefix. */
22473 vassert(sz
== 4 || sz
== 8);
22475 switch (opc
) { /* second switch */
22479 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
22481 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
22482 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
22483 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
22484 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
22488 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
22491 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
22494 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
22498 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
22501 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
22504 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
22506 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
22507 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
22509 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
22513 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
22517 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
22519 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
22520 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
22521 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
22525 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
22529 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
22531 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
22532 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
22533 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
22534 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
22536 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
22540 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
22544 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
22546 Bool decode_OK
= False
;
22547 delta
= dis_MMX ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22550 goto decode_failure
;
22555 } /* second switch */
22559 /* A couple of MMX corner cases */
22560 if (opc
== 0x0E/* FEMMS */ || opc
== 0x77/* EMMS */) {
22562 goto decode_failure
;
22563 do_EMMS_preamble();
22568 /* =-=-=-=-=-=-=-=-= SSE2ery =-=-=-=-=-=-=-=-= */
22569 /* Perhaps it's an SSE or SSE2 instruction. We can try this
22570 without checking the guest hwcaps because SSE2 is a baseline
22571 facility in 64 bit mode. */
22573 Bool decode_OK
= False
;
22574 delta
= dis_ESC_0F__SSE2 ( &decode_OK
,
22575 archinfo
, vbi
, pfx
, sz
, deltaIN
, dres
);
22580 /* =-=-=-=-=-=-=-=-= SSE3ery =-=-=-=-=-=-=-=-= */
22581 /* Perhaps it's a SSE3 instruction. FIXME: check guest hwcaps
22584 Bool decode_OK
= False
;
22585 delta
= dis_ESC_0F__SSE3 ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22590 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22591 /* Perhaps it's a SSE4 instruction. FIXME: check guest hwcaps
22594 Bool decode_OK
= False
;
22595 delta
= dis_ESC_0F__SSE4 ( &decode_OK
,
22596 archinfo
, vbi
, pfx
, sz
, deltaIN
);
22602 return deltaIN
; /* fail */
22606 /*------------------------------------------------------------*/
22608 /*--- Top-level post-escape decoders: dis_ESC_0F38 ---*/
22610 /*------------------------------------------------------------*/
22612 __attribute__((noinline
))
22614 Long
dis_ESC_0F38 (
22615 /*MB_OUT*/DisResult
* dres
,
22616 const VexArchInfo
* archinfo
,
22617 const VexAbiInfo
* vbi
,
22618 Prefix pfx
, Int sz
, Long deltaIN
22621 Long delta
= deltaIN
;
22622 UChar opc
= getUChar(delta
);
22626 case 0xF0: /* 0F 38 F0 = MOVBE m16/32/64(E), r16/32/64(G) */
22627 case 0xF1: { /* 0F 38 F1 = MOVBE r16/32/64(G), m16/32/64(E) */
22628 if (!haveF2orF3(pfx
) && !haveVEX(pfx
)
22629 && (sz
== 2 || sz
== 4 || sz
== 8)) {
22630 IRTemp addr
= IRTemp_INVALID
;
22634 modrm
= getUChar(delta
);
22635 if (epartIsReg(modrm
)) break;
22636 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
22638 IRType ty
= szToITy(sz
);
22639 IRTemp src
= newTemp(ty
);
22640 if (opc
== 0xF0) { /* LOAD */
22641 assign(src
, loadLE(ty
, mkexpr(addr
)));
22642 IRTemp dst
= math_BSWAP(src
, ty
);
22643 putIRegG(sz
, pfx
, modrm
, mkexpr(dst
));
22644 DIP("movbe %s,%s\n", dis_buf
, nameIRegG(sz
, pfx
, modrm
));
22645 } else { /* STORE */
22646 assign(src
, getIRegG(sz
, pfx
, modrm
));
22647 IRTemp dst
= math_BSWAP(src
, ty
);
22648 storeLE(mkexpr(addr
), mkexpr(dst
));
22649 DIP("movbe %s,%s\n", nameIRegG(sz
, pfx
, modrm
), dis_buf
);
22653 /* else fall through; maybe one of the decoders below knows what
22662 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
22663 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
22664 rather than proceeding indiscriminately. */
22666 Bool decode_OK
= False
;
22667 delta
= dis_ESC_0F38__SupSSE3 ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22672 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22673 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
22674 rather than proceeding indiscriminately. */
22676 Bool decode_OK
= False
;
22677 delta
= dis_ESC_0F38__SSE4 ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22682 /* Ignore previous decode attempts and restart from the beginning of
22683 the instruction. */
22685 opc
= getUChar(delta
);
22691 /* 66 0F 38 F6 = ADCX r32/64(G), m32/64(E) */
22692 /* F3 0F 38 F6 = ADOX r32/64(G), m32/64(E) */
22693 /* These were introduced in Broadwell. Gate them on AVX so as to at
22694 least reject them on earlier guests. Has no host requirements. */
22695 if (have66noF2noF3(pfx
) && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
22697 sz
= 4; /* 66 prefix but operand size is 4/8 */
22699 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagCarryX
, True
,
22700 sz
, delta
, "adcx" );
22703 if (haveF3no66noF2(pfx
) && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
22704 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagOverX
, True
,
22705 sz
, delta
, "adox" );
22708 /* else fall through */
22716 /*decode_failure:*/
22717 return deltaIN
; /* fail */
22721 /*------------------------------------------------------------*/
22723 /*--- Top-level post-escape decoders: dis_ESC_0F3A ---*/
22725 /*------------------------------------------------------------*/
22727 __attribute__((noinline
))
22729 Long
dis_ESC_0F3A (
22730 /*MB_OUT*/DisResult
* dres
,
22731 const VexArchInfo
* archinfo
,
22732 const VexAbiInfo
* vbi
,
22733 Prefix pfx
, Int sz
, Long deltaIN
22736 Long delta
= deltaIN
;
22737 UChar opc
= getUChar(delta
);
22746 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
22747 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
22748 rather than proceeding indiscriminately. */
22750 Bool decode_OK
= False
;
22751 delta
= dis_ESC_0F3A__SupSSE3 ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22756 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22757 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
22758 rather than proceeding indiscriminately. */
22760 Bool decode_OK
= False
;
22761 delta
= dis_ESC_0F3A__SSE4 ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22766 return deltaIN
; /* fail */
22770 /*------------------------------------------------------------*/
22772 /*--- Top-level post-escape decoders: dis_ESC_0F__VEX ---*/
22774 /*------------------------------------------------------------*/
22776 /* FIXME: common up with the _256_ version below? */
22778 Long
dis_VEX_NDS_128_AnySimdPfx_0F_WIG (
22779 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
22780 Prefix pfx
, Long delta
, const HChar
* name
,
22781 /* The actual operation. Use either 'op' or 'opfn',
22783 IROp op
, IRTemp(*opFn
)(IRTemp
,IRTemp
),
22784 Bool invertLeftArg
,
22788 UChar modrm
= getUChar(delta
);
22789 UInt rD
= gregOfRexRM(pfx
, modrm
);
22790 UInt rSL
= getVexNvvvv(pfx
);
22791 IRTemp tSL
= newTemp(Ity_V128
);
22792 IRTemp tSR
= newTemp(Ity_V128
);
22793 IRTemp addr
= IRTemp_INVALID
;
22796 vassert(0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*WIG?*/);
22798 assign(tSL
, invertLeftArg
? unop(Iop_NotV128
, getXMMReg(rSL
))
22801 if (epartIsReg(modrm
)) {
22802 UInt rSR
= eregOfRexRM(pfx
, modrm
);
22804 assign(tSR
, getXMMReg(rSR
));
22805 DIP("%s %s,%s,%s\n",
22806 name
, nameXMMReg(rSR
), nameXMMReg(rSL
), nameXMMReg(rD
));
22808 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
22810 assign(tSR
, loadLE(Ity_V128
, mkexpr(addr
)));
22811 DIP("%s %s,%s,%s\n",
22812 name
, dis_buf
, nameXMMReg(rSL
), nameXMMReg(rD
));
22815 IRTemp res
= IRTemp_INVALID
;
22816 if (op
!= Iop_INVALID
) {
22817 vassert(opFn
== NULL
);
22818 res
= newTemp(Ity_V128
);
22819 if (requiresRMode(op
)) {
22820 IRTemp rm
= newTemp(Ity_I32
);
22821 assign(rm
, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
22822 assign(res
, swapArgs
22823 ? triop(op
, mkexpr(rm
), mkexpr(tSR
), mkexpr(tSL
))
22824 : triop(op
, mkexpr(rm
), mkexpr(tSL
), mkexpr(tSR
)));
22826 assign(res
, swapArgs
22827 ? binop(op
, mkexpr(tSR
), mkexpr(tSL
))
22828 : binop(op
, mkexpr(tSL
), mkexpr(tSR
)));
22831 vassert(opFn
!= NULL
);
22832 res
= swapArgs
? opFn(tSR
, tSL
) : opFn(tSL
, tSR
);
22835 putYMMRegLoAndZU(rD
, mkexpr(res
));
22842 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, with a simple IROp
22843 for the operation, no inversion of the left arg, and no swapping of
22846 Long
dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple (
22847 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
22848 Prefix pfx
, Long delta
, const HChar
* name
,
22852 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22853 uses_vvvv
, vbi
, pfx
, delta
, name
, op
, NULL
, False
, False
);
22857 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, using the given IR
22858 generator to compute the result, no inversion of the left
22859 arg, and no swapping of args. */
22861 Long
dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex (
22862 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
22863 Prefix pfx
, Long delta
, const HChar
* name
,
22864 IRTemp(*opFn
)(IRTemp
,IRTemp
)
22867 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22868 uses_vvvv
, vbi
, pfx
, delta
, name
,
22869 Iop_INVALID
, opFn
, False
, False
);
22873 /* Vector by scalar shift of V by the amount specified at the bottom
22875 static ULong
dis_AVX128_shiftV_byE ( const VexAbiInfo
* vbi
,
22876 Prefix pfx
, Long delta
,
22877 const HChar
* opname
, IROp op
)
22882 Bool shl
, shr
, sar
;
22883 UChar modrm
= getUChar(delta
);
22884 UInt rG
= gregOfRexRM(pfx
,modrm
);
22885 UInt rV
= getVexNvvvv(pfx
);;
22886 IRTemp g0
= newTemp(Ity_V128
);
22887 IRTemp g1
= newTemp(Ity_V128
);
22888 IRTemp amt
= newTemp(Ity_I64
);
22889 IRTemp amt8
= newTemp(Ity_I8
);
22890 if (epartIsReg(modrm
)) {
22891 UInt rE
= eregOfRexRM(pfx
,modrm
);
22892 assign( amt
, getXMMRegLane64(rE
, 0) );
22893 DIP("%s %s,%s,%s\n", opname
, nameXMMReg(rE
),
22894 nameXMMReg(rV
), nameXMMReg(rG
) );
22897 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
22898 assign( amt
, loadLE(Ity_I64
, mkexpr(addr
)) );
22899 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
22902 assign( g0
, getXMMReg(rV
) );
22903 assign( amt8
, unop(Iop_64to8
, mkexpr(amt
)) );
22905 shl
= shr
= sar
= False
;
22908 case Iop_ShlN16x8
: shl
= True
; size
= 32; break;
22909 case Iop_ShlN32x4
: shl
= True
; size
= 32; break;
22910 case Iop_ShlN64x2
: shl
= True
; size
= 64; break;
22911 case Iop_SarN16x8
: sar
= True
; size
= 16; break;
22912 case Iop_SarN32x4
: sar
= True
; size
= 32; break;
22913 case Iop_ShrN16x8
: shr
= True
; size
= 16; break;
22914 case Iop_ShrN32x4
: shr
= True
; size
= 32; break;
22915 case Iop_ShrN64x2
: shr
= True
; size
= 64; break;
22916 default: vassert(0);
22923 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
22924 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
22933 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
22934 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
22935 binop(op
, mkexpr(g0
), mkU8(size
-1))
22942 putYMMRegLoAndZU( rG
, mkexpr(g1
) );
22947 /* Vector by scalar shift of V by the amount specified at the bottom
22949 static ULong
dis_AVX256_shiftV_byE ( const VexAbiInfo
* vbi
,
22950 Prefix pfx
, Long delta
,
22951 const HChar
* opname
, IROp op
)
22956 Bool shl
, shr
, sar
;
22957 UChar modrm
= getUChar(delta
);
22958 UInt rG
= gregOfRexRM(pfx
,modrm
);
22959 UInt rV
= getVexNvvvv(pfx
);;
22960 IRTemp g0
= newTemp(Ity_V256
);
22961 IRTemp g1
= newTemp(Ity_V256
);
22962 IRTemp amt
= newTemp(Ity_I64
);
22963 IRTemp amt8
= newTemp(Ity_I8
);
22964 if (epartIsReg(modrm
)) {
22965 UInt rE
= eregOfRexRM(pfx
,modrm
);
22966 assign( amt
, getXMMRegLane64(rE
, 0) );
22967 DIP("%s %s,%s,%s\n", opname
, nameXMMReg(rE
),
22968 nameYMMReg(rV
), nameYMMReg(rG
) );
22971 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
22972 assign( amt
, loadLE(Ity_I64
, mkexpr(addr
)) );
22973 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
) );
22976 assign( g0
, getYMMReg(rV
) );
22977 assign( amt8
, unop(Iop_64to8
, mkexpr(amt
)) );
22979 shl
= shr
= sar
= False
;
22982 case Iop_ShlN16x16
: shl
= True
; size
= 32; break;
22983 case Iop_ShlN32x8
: shl
= True
; size
= 32; break;
22984 case Iop_ShlN64x4
: shl
= True
; size
= 64; break;
22985 case Iop_SarN16x16
: sar
= True
; size
= 16; break;
22986 case Iop_SarN32x8
: sar
= True
; size
= 32; break;
22987 case Iop_ShrN16x16
: shr
= True
; size
= 16; break;
22988 case Iop_ShrN32x8
: shr
= True
; size
= 32; break;
22989 case Iop_ShrN64x4
: shr
= True
; size
= 64; break;
22990 default: vassert(0);
22997 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
22998 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
22999 binop(Iop_V128HLtoV256
, mkV128(0), mkV128(0))
23007 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
23008 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
23009 binop(op
, mkexpr(g0
), mkU8(size
-1))
23016 putYMMReg( rG
, mkexpr(g1
) );
23021 /* Vector by vector shift of V by the amount specified at the bottom
23022 of E. Vector by vector shifts are defined for all shift amounts,
23023 so not using Iop_S*x* here (and SSE2 doesn't support variable shifts
23025 static ULong
dis_AVX_var_shiftV_byE ( const VexAbiInfo
* vbi
,
23026 Prefix pfx
, Long delta
,
23027 const HChar
* opname
, IROp op
, Bool isYMM
)
23032 UChar modrm
= getUChar(delta
);
23033 UInt rG
= gregOfRexRM(pfx
,modrm
);
23034 UInt rV
= getVexNvvvv(pfx
);;
23035 IRTemp sV
= isYMM
? newTemp(Ity_V256
) : newTemp(Ity_V128
);
23036 IRTemp amt
= isYMM
? newTemp(Ity_V256
) : newTemp(Ity_V128
);
23037 IRTemp amts
[8], sVs
[8], res
[8];
23038 if (epartIsReg(modrm
)) {
23039 UInt rE
= eregOfRexRM(pfx
,modrm
);
23040 assign( amt
, isYMM
? getYMMReg(rE
) : getXMMReg(rE
) );
23042 DIP("%s %s,%s,%s\n", opname
, nameYMMReg(rE
),
23043 nameYMMReg(rV
), nameYMMReg(rG
) );
23045 DIP("%s %s,%s,%s\n", opname
, nameXMMReg(rE
),
23046 nameXMMReg(rV
), nameXMMReg(rG
) );
23050 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23051 assign( amt
, loadLE(isYMM
? Ity_V256
: Ity_V128
, mkexpr(addr
)) );
23053 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameYMMReg(rV
),
23056 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameXMMReg(rV
),
23061 assign( sV
, isYMM
? getYMMReg(rV
) : getXMMReg(rV
) );
23065 case Iop_Shl32
: size
= 32; break;
23066 case Iop_Shl64
: size
= 64; break;
23067 case Iop_Sar32
: size
= 32; break;
23068 case Iop_Shr32
: size
= 32; break;
23069 case Iop_Shr64
: size
= 64; break;
23070 default: vassert(0);
23073 for (i
= 0; i
< 8; i
++) {
23074 sVs
[i
] = IRTemp_INVALID
;
23075 amts
[i
] = IRTemp_INVALID
;
23080 breakupV256to32s( sV
, &sVs
[7], &sVs
[6], &sVs
[5], &sVs
[4],
23081 &sVs
[3], &sVs
[2], &sVs
[1], &sVs
[0] );
23082 breakupV256to32s( amt
, &amts
[7], &amts
[6], &amts
[5], &amts
[4],
23083 &amts
[3], &amts
[2], &amts
[1], &amts
[0] );
23085 breakupV128to32s( sV
, &sVs
[3], &sVs
[2], &sVs
[1], &sVs
[0] );
23086 breakupV128to32s( amt
, &amts
[3], &amts
[2], &amts
[1], &amts
[0] );
23091 breakupV256to64s( sV
, &sVs
[3], &sVs
[2], &sVs
[1], &sVs
[0] );
23092 breakupV256to64s( amt
, &amts
[3], &amts
[2], &amts
[1], &amts
[0] );
23094 breakupV128to64s( sV
, &sVs
[1], &sVs
[0] );
23095 breakupV128to64s( amt
, &amts
[1], &amts
[0] );
23098 default: vassert(0);
23100 for (i
= 0; i
< 8; i
++)
23101 if (sVs
[i
] != IRTemp_INVALID
) {
23102 res
[i
] = size
== 32 ? newTemp(Ity_I32
) : newTemp(Ity_I64
);
23105 binop(size
== 32 ? Iop_CmpLT32U
: Iop_CmpLT64U
,
23107 size
== 32 ? mkU32(size
) : mkU64(size
)),
23108 binop(op
, mkexpr(sVs
[i
]),
23109 unop(size
== 32 ? Iop_32to8
: Iop_64to8
,
23111 op
== Iop_Sar32
? binop(op
, mkexpr(sVs
[i
]), mkU8(size
-1))
23112 : size
== 32 ? mkU32(0) : mkU64(0)
23117 for (i
= 0; i
< 8; i
++)
23118 putYMMRegLane32( rG
, i
, (i
< 4 || isYMM
)
23119 ? mkexpr(res
[i
]) : mkU32(0) );
23122 for (i
= 0; i
< 4; i
++)
23123 putYMMRegLane64( rG
, i
, (i
< 2 || isYMM
)
23124 ? mkexpr(res
[i
]) : mkU64(0) );
23126 default: vassert(0);
23133 /* Vector by scalar shift of E into V, by an immediate byte. Modified
23134 version of dis_SSE_shiftE_imm. */
23136 Long
dis_AVX128_shiftE_to_V_imm( Prefix pfx
,
23137 Long delta
, const HChar
* opname
, IROp op
)
23139 Bool shl
, shr
, sar
;
23140 UChar rm
= getUChar(delta
);
23141 IRTemp e0
= newTemp(Ity_V128
);
23142 IRTemp e1
= newTemp(Ity_V128
);
23143 UInt rD
= getVexNvvvv(pfx
);
23145 vassert(epartIsReg(rm
));
23146 vassert(gregLO3ofRM(rm
) == 2
23147 || gregLO3ofRM(rm
) == 4 || gregLO3ofRM(rm
) == 6);
23148 amt
= getUChar(delta
+1);
23150 DIP("%s $%d,%s,%s\n", opname
,
23152 nameXMMReg(eregOfRexRM(pfx
,rm
)),
23154 assign( e0
, getXMMReg(eregOfRexRM(pfx
,rm
)) );
23156 shl
= shr
= sar
= False
;
23159 case Iop_ShlN16x8
: shl
= True
; size
= 16; break;
23160 case Iop_ShlN32x4
: shl
= True
; size
= 32; break;
23161 case Iop_ShlN64x2
: shl
= True
; size
= 64; break;
23162 case Iop_SarN16x8
: sar
= True
; size
= 16; break;
23163 case Iop_SarN32x4
: sar
= True
; size
= 32; break;
23164 case Iop_ShrN16x8
: shr
= True
; size
= 16; break;
23165 case Iop_ShrN32x4
: shr
= True
; size
= 32; break;
23166 case Iop_ShrN64x2
: shr
= True
; size
= 64; break;
23167 default: vassert(0);
23171 assign( e1
, amt
>= size
23173 : binop(op
, mkexpr(e0
), mkU8(amt
))
23177 assign( e1
, amt
>= size
23178 ? binop(op
, mkexpr(e0
), mkU8(size
-1))
23179 : binop(op
, mkexpr(e0
), mkU8(amt
))
23185 putYMMRegLoAndZU( rD
, mkexpr(e1
) );
23190 /* Vector by scalar shift of E into V, by an immediate byte. Modified
23191 version of dis_AVX128_shiftE_to_V_imm. */
23193 Long
dis_AVX256_shiftE_to_V_imm( Prefix pfx
,
23194 Long delta
, const HChar
* opname
, IROp op
)
23196 Bool shl
, shr
, sar
;
23197 UChar rm
= getUChar(delta
);
23198 IRTemp e0
= newTemp(Ity_V256
);
23199 IRTemp e1
= newTemp(Ity_V256
);
23200 UInt rD
= getVexNvvvv(pfx
);
23202 vassert(epartIsReg(rm
));
23203 vassert(gregLO3ofRM(rm
) == 2
23204 || gregLO3ofRM(rm
) == 4 || gregLO3ofRM(rm
) == 6);
23205 amt
= getUChar(delta
+1);
23207 DIP("%s $%d,%s,%s\n", opname
,
23209 nameYMMReg(eregOfRexRM(pfx
,rm
)),
23211 assign( e0
, getYMMReg(eregOfRexRM(pfx
,rm
)) );
23213 shl
= shr
= sar
= False
;
23216 case Iop_ShlN16x16
: shl
= True
; size
= 16; break;
23217 case Iop_ShlN32x8
: shl
= True
; size
= 32; break;
23218 case Iop_ShlN64x4
: shl
= True
; size
= 64; break;
23219 case Iop_SarN16x16
: sar
= True
; size
= 16; break;
23220 case Iop_SarN32x8
: sar
= True
; size
= 32; break;
23221 case Iop_ShrN16x16
: shr
= True
; size
= 16; break;
23222 case Iop_ShrN32x8
: shr
= True
; size
= 32; break;
23223 case Iop_ShrN64x4
: shr
= True
; size
= 64; break;
23224 default: vassert(0);
23229 assign( e1
, amt
>= size
23230 ? binop(Iop_V128HLtoV256
, mkV128(0), mkV128(0))
23231 : binop(op
, mkexpr(e0
), mkU8(amt
))
23235 assign( e1
, amt
>= size
23236 ? binop(op
, mkexpr(e0
), mkU8(size
-1))
23237 : binop(op
, mkexpr(e0
), mkU8(amt
))
23243 putYMMReg( rD
, mkexpr(e1
) );
23248 /* Lower 64-bit lane only AVX128 binary operation:
23249 G[63:0] = V[63:0] `op` E[63:0]
23250 G[127:64] = V[127:64]
23252 The specified op must be of the 64F0x2 kind, so that it
23253 copies the upper half of the left operand to the result.
23255 static Long
dis_AVX128_E_V_to_G_lo64 ( /*OUT*/Bool
* uses_vvvv
,
23256 const VexAbiInfo
* vbi
,
23257 Prefix pfx
, Long delta
,
23258 const HChar
* opname
, IROp op
)
23263 UChar rm
= getUChar(delta
);
23264 UInt rG
= gregOfRexRM(pfx
,rm
);
23265 UInt rV
= getVexNvvvv(pfx
);
23266 IRExpr
* vpart
= getXMMReg(rV
);
23267 if (epartIsReg(rm
)) {
23268 UInt rE
= eregOfRexRM(pfx
,rm
);
23269 putXMMReg( rG
, binop(op
, vpart
, getXMMReg(rE
)) );
23270 DIP("%s %s,%s,%s\n", opname
,
23271 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
23274 /* We can only do a 64-bit memory read, so the upper half of the
23275 E operand needs to be made simply of zeroes. */
23276 IRTemp epart
= newTemp(Ity_V128
);
23277 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23278 assign( epart
, unop( Iop_64UtoV128
,
23279 loadLE(Ity_I64
, mkexpr(addr
))) );
23280 putXMMReg( rG
, binop(op
, vpart
, mkexpr(epart
)) );
23281 DIP("%s %s,%s,%s\n", opname
,
23282 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
23283 delta
= delta
+alen
;
23285 putYMMRegLane128( rG
, 1, mkV128(0) );
23291 /* Lower 64-bit lane only AVX128 unary operation:
23292 G[63:0] = op(E[63:0])
23293 G[127:64] = V[127:64]
23295 The specified op must be of the 64F0x2 kind, so that it
23296 copies the upper half of the operand to the result.
23298 static Long
dis_AVX128_E_V_to_G_lo64_unary ( /*OUT*/Bool
* uses_vvvv
,
23299 const VexAbiInfo
* vbi
,
23300 Prefix pfx
, Long delta
,
23301 const HChar
* opname
, IROp op
)
23306 UChar rm
= getUChar(delta
);
23307 UInt rG
= gregOfRexRM(pfx
,rm
);
23308 UInt rV
= getVexNvvvv(pfx
);
23309 IRTemp e64
= newTemp(Ity_I64
);
23311 /* Fetch E[63:0] */
23312 if (epartIsReg(rm
)) {
23313 UInt rE
= eregOfRexRM(pfx
,rm
);
23314 assign(e64
, getXMMRegLane64(rE
, 0));
23315 DIP("%s %s,%s,%s\n", opname
,
23316 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
23319 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23320 assign(e64
, loadLE(Ity_I64
, mkexpr(addr
)));
23321 DIP("%s %s,%s,%s\n", opname
,
23322 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
23326 /* Create a value 'arg' as V[127:64]++E[63:0] */
23327 IRTemp arg
= newTemp(Ity_V128
);
23329 binop(Iop_SetV128lo64
,
23330 getXMMReg(rV
), mkexpr(e64
)));
23331 /* and apply op to it */
23332 putYMMRegLoAndZU( rG
, unop(op
, mkexpr(arg
)) );
23338 /* Lower 32-bit lane only AVX128 unary operation:
23339 G[31:0] = op(E[31:0])
23340 G[127:32] = V[127:32]
23342 The specified op must be of the 32F0x4 kind, so that it
23343 copies the upper 3/4 of the operand to the result.
23345 static Long
dis_AVX128_E_V_to_G_lo32_unary ( /*OUT*/Bool
* uses_vvvv
,
23346 const VexAbiInfo
* vbi
,
23347 Prefix pfx
, Long delta
,
23348 const HChar
* opname
, IROp op
)
23353 UChar rm
= getUChar(delta
);
23354 UInt rG
= gregOfRexRM(pfx
,rm
);
23355 UInt rV
= getVexNvvvv(pfx
);
23356 IRTemp e32
= newTemp(Ity_I32
);
23358 /* Fetch E[31:0] */
23359 if (epartIsReg(rm
)) {
23360 UInt rE
= eregOfRexRM(pfx
,rm
);
23361 assign(e32
, getXMMRegLane32(rE
, 0));
23362 DIP("%s %s,%s,%s\n", opname
,
23363 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
23366 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23367 assign(e32
, loadLE(Ity_I32
, mkexpr(addr
)));
23368 DIP("%s %s,%s,%s\n", opname
,
23369 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
23373 /* Create a value 'arg' as V[127:32]++E[31:0] */
23374 IRTemp arg
= newTemp(Ity_V128
);
23376 binop(Iop_SetV128lo32
,
23377 getXMMReg(rV
), mkexpr(e32
)));
23378 /* and apply op to it */
23379 putYMMRegLoAndZU( rG
, unop(op
, mkexpr(arg
)) );
23385 /* Lower 32-bit lane only AVX128 binary operation:
23386 G[31:0] = V[31:0] `op` E[31:0]
23387 G[127:32] = V[127:32]
23389 The specified op must be of the 32F0x4 kind, so that it
23390 copies the upper 3/4 of the left operand to the result.
23392 static Long
dis_AVX128_E_V_to_G_lo32 ( /*OUT*/Bool
* uses_vvvv
,
23393 const VexAbiInfo
* vbi
,
23394 Prefix pfx
, Long delta
,
23395 const HChar
* opname
, IROp op
)
23400 UChar rm
= getUChar(delta
);
23401 UInt rG
= gregOfRexRM(pfx
,rm
);
23402 UInt rV
= getVexNvvvv(pfx
);
23403 IRExpr
* vpart
= getXMMReg(rV
);
23404 if (epartIsReg(rm
)) {
23405 UInt rE
= eregOfRexRM(pfx
,rm
);
23406 putXMMReg( rG
, binop(op
, vpart
, getXMMReg(rE
)) );
23407 DIP("%s %s,%s,%s\n", opname
,
23408 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
23411 /* We can only do a 32-bit memory read, so the upper 3/4 of the
23412 E operand needs to be made simply of zeroes. */
23413 IRTemp epart
= newTemp(Ity_V128
);
23414 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23415 assign( epart
, unop( Iop_32UtoV128
,
23416 loadLE(Ity_I32
, mkexpr(addr
))) );
23417 putXMMReg( rG
, binop(op
, vpart
, mkexpr(epart
)) );
23418 DIP("%s %s,%s,%s\n", opname
,
23419 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
23420 delta
= delta
+alen
;
23422 putYMMRegLane128( rG
, 1, mkV128(0) );
23428 /* All-lanes AVX128 binary operation:
23429 G[127:0] = V[127:0] `op` E[127:0]
23432 static Long
dis_AVX128_E_V_to_G ( /*OUT*/Bool
* uses_vvvv
,
23433 const VexAbiInfo
* vbi
,
23434 Prefix pfx
, Long delta
,
23435 const HChar
* opname
, IROp op
)
23437 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
23438 uses_vvvv
, vbi
, pfx
, delta
, opname
, op
,
23439 NULL
, False
/*!invertLeftArg*/, False
/*!swapArgs*/
23444 /* Handles AVX128 32F/64F comparisons. A derivative of
23445 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
23446 original delta to indicate failure. */
23448 Long
dis_AVX128_cmp_V_E_to_G ( /*OUT*/Bool
* uses_vvvv
,
23449 const VexAbiInfo
* vbi
,
23450 Prefix pfx
, Long delta
,
23451 const HChar
* opname
, Bool all_lanes
, Int sz
)
23453 vassert(sz
== 4 || sz
== 8);
23454 Long deltaIN
= delta
;
23459 Bool preZero
= False
;
23460 Bool preSwap
= False
;
23461 IROp op
= Iop_INVALID
;
23462 Bool postNot
= False
;
23463 IRTemp plain
= newTemp(Ity_V128
);
23464 UChar rm
= getUChar(delta
);
23465 UInt rG
= gregOfRexRM(pfx
, rm
);
23466 UInt rV
= getVexNvvvv(pfx
);
23467 IRTemp argL
= newTemp(Ity_V128
);
23468 IRTemp argR
= newTemp(Ity_V128
);
23470 assign(argL
, getXMMReg(rV
));
23471 if (epartIsReg(rm
)) {
23472 imm8
= getUChar(delta
+1);
23473 Bool ok
= findSSECmpOp(&preZero
, &preSwap
, &op
, &postNot
,
23474 imm8
, all_lanes
, sz
);
23475 if (!ok
) return deltaIN
; /* FAIL */
23476 UInt rE
= eregOfRexRM(pfx
,rm
);
23477 assign(argR
, getXMMReg(rE
));
23479 DIP("%s $%u,%s,%s,%s\n",
23481 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
23483 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
23484 imm8
= getUChar(delta
+alen
);
23485 Bool ok
= findSSECmpOp(&preZero
, &preSwap
, &op
, &postNot
,
23486 imm8
, all_lanes
, sz
);
23487 if (!ok
) return deltaIN
; /* FAIL */
23489 all_lanes
? loadLE(Ity_V128
, mkexpr(addr
))
23490 : sz
== 8 ? unop( Iop_64UtoV128
, loadLE(Ity_I64
, mkexpr(addr
)))
23491 : /*sz==4*/ unop( Iop_32UtoV128
, loadLE(Ity_I32
, mkexpr(addr
))));
23493 DIP("%s $%u,%s,%s,%s\n",
23494 opname
, imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
23497 IRTemp argMask
= newTemp(Ity_V128
);
23499 // In this case, preSwap is irrelevant, but it's harmless to honour it
23501 assign(argMask
, mkV128(all_lanes
? 0x0000 : (sz
==4 ? 0xFFF0 : 0xFF00)));
23503 assign(argMask
, mkV128(0xFFFF));
23508 preSwap
? binop(op
, binop(Iop_AndV128
, mkexpr(argR
), mkexpr(argMask
)),
23509 binop(Iop_AndV128
, mkexpr(argL
), mkexpr(argMask
)))
23510 : binop(op
, binop(Iop_AndV128
, mkexpr(argL
), mkexpr(argMask
)),
23511 binop(Iop_AndV128
, mkexpr(argR
), mkexpr(argMask
)))
23515 /* This is simple: just invert the result, if necessary, and
23518 putYMMRegLoAndZU( rG
, unop(Iop_NotV128
, mkexpr(plain
)) );
23520 putYMMRegLoAndZU( rG
, mkexpr(plain
) );
23525 /* More complex. It's a one-lane-only, hence need to possibly
23526 invert only that one lane. But at least the other lanes are
23527 correctly "in" the result, having been copied from the left
23530 IRExpr
* mask
= mkV128(sz
==4 ? 0x000F : 0x00FF);
23531 putYMMRegLoAndZU( rG
, binop(Iop_XorV128
, mkexpr(plain
),
23534 putYMMRegLoAndZU( rG
, mkexpr(plain
) );
23538 /* This is the most complex case. One-lane-only, but the args
23539 were swapped. So we have to possibly invert the bottom lane,
23540 and (definitely) we have to copy the upper lane(s) from argL
23541 since, due to the swapping, what's currently there is from
23542 argR, which is not correct. */
23543 IRTemp res
= newTemp(Ity_V128
);
23544 IRTemp mask
= newTemp(Ity_V128
);
23545 IRTemp notMask
= newTemp(Ity_V128
);
23546 assign(mask
, mkV128(sz
==4 ? 0x000F : 0x00FF));
23547 assign(notMask
, mkV128(sz
==4 ? 0xFFF0 : 0xFF00));
23552 unop(Iop_NotV128
, mkexpr(plain
)),
23554 binop(Iop_AndV128
, mkexpr(argL
), mkexpr(notMask
))));
23561 binop(Iop_AndV128
, mkexpr(argL
), mkexpr(notMask
))));
23563 putYMMRegLoAndZU( rG
, mkexpr(res
) );
23571 /* Handles AVX256 32F/64F comparisons. A derivative of
23572 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
23573 original delta to indicate failure. */
23575 Long
dis_AVX256_cmp_V_E_to_G ( /*OUT*/Bool
* uses_vvvv
,
23576 const VexAbiInfo
* vbi
,
23577 Prefix pfx
, Long delta
,
23578 const HChar
* opname
, Int sz
)
23580 vassert(sz
== 4 || sz
== 8);
23581 Long deltaIN
= delta
;
23586 Bool preZero
= False
;
23587 Bool preSwap
= False
;
23588 IROp op
= Iop_INVALID
;
23589 Bool postNot
= False
;
23590 IRTemp plain
= newTemp(Ity_V256
);
23591 UChar rm
= getUChar(delta
);
23592 UInt rG
= gregOfRexRM(pfx
, rm
);
23593 UInt rV
= getVexNvvvv(pfx
);
23594 IRTemp argL
= newTemp(Ity_V256
);
23595 IRTemp argR
= newTemp(Ity_V256
);
23596 IRTemp argLhi
= IRTemp_INVALID
;
23597 IRTemp argLlo
= IRTemp_INVALID
;
23598 IRTemp argRhi
= IRTemp_INVALID
;
23599 IRTemp argRlo
= IRTemp_INVALID
;
23601 assign(argL
, getYMMReg(rV
));
23602 if (epartIsReg(rm
)) {
23603 imm8
= getUChar(delta
+1);
23604 Bool ok
= findSSECmpOp(&preZero
, &preSwap
, &op
, &postNot
, imm8
,
23605 True
/*all_lanes*/, sz
);
23606 if (!ok
) return deltaIN
; /* FAIL */
23607 UInt rE
= eregOfRexRM(pfx
,rm
);
23608 assign(argR
, getYMMReg(rE
));
23610 DIP("%s $%u,%s,%s,%s\n",
23612 nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
23614 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
23615 imm8
= getUChar(delta
+alen
);
23616 Bool ok
= findSSECmpOp(&preZero
, &preSwap
, &op
, &postNot
, imm8
,
23617 True
/*all_lanes*/, sz
);
23618 if (!ok
) return deltaIN
; /* FAIL */
23619 assign(argR
, loadLE(Ity_V256
, mkexpr(addr
)) );
23621 DIP("%s $%u,%s,%s,%s\n",
23622 opname
, imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
23625 breakupV256toV128s( preSwap
? argR
: argL
, &argLhi
, &argLlo
);
23626 breakupV256toV128s( preSwap
? argL
: argR
, &argRhi
, &argRlo
);
23628 IRTemp argMask
= newTemp(Ity_V128
);
23630 // In this case, preSwap is irrelevant, but it's harmless to honour it
23632 assign(argMask
, mkV128(0x0000));
23634 assign(argMask
, mkV128(0xFFFF));
23639 binop( Iop_V128HLtoV256
,
23640 binop(op
, binop(Iop_AndV128
, mkexpr(argLhi
), mkexpr(argMask
)),
23641 binop(Iop_AndV128
, mkexpr(argRhi
), mkexpr(argMask
))),
23642 binop(op
, binop(Iop_AndV128
, mkexpr(argLlo
), mkexpr(argMask
)),
23643 binop(Iop_AndV128
, mkexpr(argRlo
), mkexpr(argMask
))))
23646 /* This is simple: just invert the result, if necessary, and
23649 putYMMReg( rG
, unop(Iop_NotV256
, mkexpr(plain
)) );
23651 putYMMReg( rG
, mkexpr(plain
) );
23659 /* Handles AVX128 unary E-to-G all-lanes operations. */
23661 Long
dis_AVX128_E_to_G_unary ( /*OUT*/Bool
* uses_vvvv
,
23662 const VexAbiInfo
* vbi
,
23663 Prefix pfx
, Long delta
,
23664 const HChar
* opname
,
23665 IRTemp (*opFn
)(IRTemp
) )
23670 IRTemp res
= newTemp(Ity_V128
);
23671 IRTemp arg
= newTemp(Ity_V128
);
23672 UChar rm
= getUChar(delta
);
23673 UInt rG
= gregOfRexRM(pfx
, rm
);
23674 if (epartIsReg(rm
)) {
23675 UInt rE
= eregOfRexRM(pfx
,rm
);
23676 assign(arg
, getXMMReg(rE
));
23678 DIP("%s %s,%s\n", opname
, nameXMMReg(rE
), nameXMMReg(rG
));
23680 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23681 assign(arg
, loadLE(Ity_V128
, mkexpr(addr
)));
23683 DIP("%s %s,%s\n", opname
, dis_buf
, nameXMMReg(rG
));
23686 putYMMRegLoAndZU( rG
, mkexpr(res
) );
23687 *uses_vvvv
= False
;
23692 /* Handles AVX128 unary E-to-G all-lanes operations. */
23694 Long
dis_AVX128_E_to_G_unary_all ( /*OUT*/Bool
* uses_vvvv
,
23695 const VexAbiInfo
* vbi
,
23696 Prefix pfx
, Long delta
,
23697 const HChar
* opname
, IROp op
)
23702 IRTemp arg
= newTemp(Ity_V128
);
23703 UChar rm
= getUChar(delta
);
23704 UInt rG
= gregOfRexRM(pfx
, rm
);
23705 if (epartIsReg(rm
)) {
23706 UInt rE
= eregOfRexRM(pfx
,rm
);
23707 assign(arg
, getXMMReg(rE
));
23709 DIP("%s %s,%s\n", opname
, nameXMMReg(rE
), nameXMMReg(rG
));
23711 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23712 assign(arg
, loadLE(Ity_V128
, mkexpr(addr
)));
23714 DIP("%s %s,%s\n", opname
, dis_buf
, nameXMMReg(rG
));
23716 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
23717 // up in the usual way.
23718 Bool needsIRRM
= op
== Iop_Sqrt32Fx4
|| op
== Iop_Sqrt64Fx2
;
23719 /* XXXROUNDINGFIXME */
23720 IRExpr
* res
= needsIRRM
? binop(op
, get_FAKE_roundingmode(), mkexpr(arg
))
23721 : unop(op
, mkexpr(arg
));
23722 putYMMRegLoAndZU( rG
, res
);
23723 *uses_vvvv
= False
;
23728 /* FIXME: common up with the _128_ version above? */
23730 Long
dis_VEX_NDS_256_AnySimdPfx_0F_WIG (
23731 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
23732 Prefix pfx
, Long delta
, const HChar
* name
,
23733 /* The actual operation. Use either 'op' or 'opfn',
23735 IROp op
, IRTemp(*opFn
)(IRTemp
,IRTemp
),
23736 Bool invertLeftArg
,
23740 UChar modrm
= getUChar(delta
);
23741 UInt rD
= gregOfRexRM(pfx
, modrm
);
23742 UInt rSL
= getVexNvvvv(pfx
);
23743 IRTemp tSL
= newTemp(Ity_V256
);
23744 IRTemp tSR
= newTemp(Ity_V256
);
23745 IRTemp addr
= IRTemp_INVALID
;
23748 vassert(1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*WIG?*/);
23750 assign(tSL
, invertLeftArg
? unop(Iop_NotV256
, getYMMReg(rSL
))
23753 if (epartIsReg(modrm
)) {
23754 UInt rSR
= eregOfRexRM(pfx
, modrm
);
23756 assign(tSR
, getYMMReg(rSR
));
23757 DIP("%s %s,%s,%s\n",
23758 name
, nameYMMReg(rSR
), nameYMMReg(rSL
), nameYMMReg(rD
));
23760 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23762 assign(tSR
, loadLE(Ity_V256
, mkexpr(addr
)));
23763 DIP("%s %s,%s,%s\n",
23764 name
, dis_buf
, nameYMMReg(rSL
), nameYMMReg(rD
));
23767 IRTemp res
= IRTemp_INVALID
;
23768 if (op
!= Iop_INVALID
) {
23769 vassert(opFn
== NULL
);
23770 res
= newTemp(Ity_V256
);
23771 if (requiresRMode(op
)) {
23772 IRTemp rm
= newTemp(Ity_I32
);
23773 assign(rm
, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
23774 assign(res
, swapArgs
23775 ? triop(op
, mkexpr(rm
), mkexpr(tSR
), mkexpr(tSL
))
23776 : triop(op
, mkexpr(rm
), mkexpr(tSL
), mkexpr(tSR
)));
23778 assign(res
, swapArgs
23779 ? binop(op
, mkexpr(tSR
), mkexpr(tSL
))
23780 : binop(op
, mkexpr(tSL
), mkexpr(tSR
)));
23783 vassert(opFn
!= NULL
);
23784 res
= swapArgs
? opFn(tSR
, tSL
) : opFn(tSL
, tSR
);
23787 putYMMReg(rD
, mkexpr(res
));
23794 /* All-lanes AVX256 binary operation:
23795 G[255:0] = V[255:0] `op` E[255:0]
23797 static Long
dis_AVX256_E_V_to_G ( /*OUT*/Bool
* uses_vvvv
,
23798 const VexAbiInfo
* vbi
,
23799 Prefix pfx
, Long delta
,
23800 const HChar
* opname
, IROp op
)
23802 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23803 uses_vvvv
, vbi
, pfx
, delta
, opname
, op
,
23804 NULL
, False
/*!invertLeftArg*/, False
/*!swapArgs*/
23809 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, with a simple IROp
23810 for the operation, no inversion of the left arg, and no swapping of
23813 Long
dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple (
23814 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
23815 Prefix pfx
, Long delta
, const HChar
* name
,
23819 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23820 uses_vvvv
, vbi
, pfx
, delta
, name
, op
, NULL
, False
, False
);
23824 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, using the given IR
23825 generator to compute the result, no inversion of the left
23826 arg, and no swapping of args. */
23828 Long
dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex (
23829 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
23830 Prefix pfx
, Long delta
, const HChar
* name
,
23831 IRTemp(*opFn
)(IRTemp
,IRTemp
)
23834 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23835 uses_vvvv
, vbi
, pfx
, delta
, name
,
23836 Iop_INVALID
, opFn
, False
, False
);
23840 /* Handles AVX256 unary E-to-G all-lanes operations. */
23842 Long
dis_AVX256_E_to_G_unary ( /*OUT*/Bool
* uses_vvvv
,
23843 const VexAbiInfo
* vbi
,
23844 Prefix pfx
, Long delta
,
23845 const HChar
* opname
,
23846 IRTemp (*opFn
)(IRTemp
) )
23851 IRTemp res
= newTemp(Ity_V256
);
23852 IRTemp arg
= newTemp(Ity_V256
);
23853 UChar rm
= getUChar(delta
);
23854 UInt rG
= gregOfRexRM(pfx
, rm
);
23855 if (epartIsReg(rm
)) {
23856 UInt rE
= eregOfRexRM(pfx
,rm
);
23857 assign(arg
, getYMMReg(rE
));
23859 DIP("%s %s,%s\n", opname
, nameYMMReg(rE
), nameYMMReg(rG
));
23861 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23862 assign(arg
, loadLE(Ity_V256
, mkexpr(addr
)));
23864 DIP("%s %s,%s\n", opname
, dis_buf
, nameYMMReg(rG
));
23867 putYMMReg( rG
, mkexpr(res
) );
23868 *uses_vvvv
= False
;
23873 /* Handles AVX256 unary E-to-G all-lanes operations. */
23875 Long
dis_AVX256_E_to_G_unary_all ( /*OUT*/Bool
* uses_vvvv
,
23876 const VexAbiInfo
* vbi
,
23877 Prefix pfx
, Long delta
,
23878 const HChar
* opname
, IROp op
)
23883 IRTemp arg
= newTemp(Ity_V256
);
23884 UChar rm
= getUChar(delta
);
23885 UInt rG
= gregOfRexRM(pfx
, rm
);
23886 if (epartIsReg(rm
)) {
23887 UInt rE
= eregOfRexRM(pfx
,rm
);
23888 assign(arg
, getYMMReg(rE
));
23890 DIP("%s %s,%s\n", opname
, nameYMMReg(rE
), nameYMMReg(rG
));
23892 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23893 assign(arg
, loadLE(Ity_V256
, mkexpr(addr
)));
23895 DIP("%s %s,%s\n", opname
, dis_buf
, nameYMMReg(rG
));
23897 putYMMReg( rG
, unop(op
, mkexpr(arg
)) );
23898 *uses_vvvv
= False
;
23903 /* The use of ReinterpF64asI64 is ugly. Surely could do better if we
23904 had a variant of Iop_64x4toV256 that took F64s as args instead. */
23905 static Long
dis_CVTDQ2PD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
23908 IRTemp addr
= IRTemp_INVALID
;
23911 UChar modrm
= getUChar(delta
);
23912 IRTemp sV
= newTemp(Ity_V128
);
23913 UInt rG
= gregOfRexRM(pfx
,modrm
);
23914 if (epartIsReg(modrm
)) {
23915 UInt rE
= eregOfRexRM(pfx
,modrm
);
23916 assign( sV
, getXMMReg(rE
) );
23918 DIP("vcvtdq2pd %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
23920 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23921 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
23923 DIP("vcvtdq2pd %s,%s\n", dis_buf
, nameYMMReg(rG
) );
23925 IRTemp s3
, s2
, s1
, s0
;
23926 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
23927 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
23931 unop(Iop_ReinterpF64asI64
, unop(Iop_I32StoF64
, mkexpr(s3
))),
23932 unop(Iop_ReinterpF64asI64
, unop(Iop_I32StoF64
, mkexpr(s2
))),
23933 unop(Iop_ReinterpF64asI64
, unop(Iop_I32StoF64
, mkexpr(s1
))),
23934 unop(Iop_ReinterpF64asI64
, unop(Iop_I32StoF64
, mkexpr(s0
)))
23936 putYMMReg(rG
, res
);
23941 static Long
dis_CVTPD2PS_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
23944 IRTemp addr
= IRTemp_INVALID
;
23947 UChar modrm
= getUChar(delta
);
23948 UInt rG
= gregOfRexRM(pfx
,modrm
);
23949 IRTemp argV
= newTemp(Ity_V256
);
23950 IRTemp rmode
= newTemp(Ity_I32
);
23951 if (epartIsReg(modrm
)) {
23952 UInt rE
= eregOfRexRM(pfx
,modrm
);
23953 assign( argV
, getYMMReg(rE
) );
23955 DIP("vcvtpd2psy %s,%s\n", nameYMMReg(rE
), nameXMMReg(rG
));
23957 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23958 assign( argV
, loadLE(Ity_V256
, mkexpr(addr
)) );
23960 DIP("vcvtpd2psy %s,%s\n", dis_buf
, nameXMMReg(rG
) );
23963 assign( rmode
, get_sse_roundingmode() );
23964 IRTemp t3
, t2
, t1
, t0
;
23965 t3
= t2
= t1
= t0
= IRTemp_INVALID
;
23966 breakupV256to64s( argV
, &t3
, &t2
, &t1
, &t0
);
23967 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), \
23968 unop(Iop_ReinterpI64asF64, mkexpr(_t)) )
23969 putXMMRegLane32F( rG
, 3, CVT(t3
) );
23970 putXMMRegLane32F( rG
, 2, CVT(t2
) );
23971 putXMMRegLane32F( rG
, 1, CVT(t1
) );
23972 putXMMRegLane32F( rG
, 0, CVT(t0
) );
23974 putYMMRegLane128( rG
, 1, mkV128(0) );
23979 static IRTemp
math_VPUNPCK_YMM ( IRTemp tL
, IRType tR
, IROp op
)
23981 IRTemp tLhi
, tLlo
, tRhi
, tRlo
;
23982 tLhi
= tLlo
= tRhi
= tRlo
= IRTemp_INVALID
;
23983 IRTemp res
= newTemp(Ity_V256
);
23984 breakupV256toV128s( tL
, &tLhi
, &tLlo
);
23985 breakupV256toV128s( tR
, &tRhi
, &tRlo
);
23986 assign( res
, binop( Iop_V128HLtoV256
,
23987 binop( op
, mkexpr(tRhi
), mkexpr(tLhi
) ),
23988 binop( op
, mkexpr(tRlo
), mkexpr(tLlo
) ) ) );
23993 static IRTemp
math_VPUNPCKLBW_YMM ( IRTemp tL
, IRTemp tR
)
23995 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveLO8x16
);
23999 static IRTemp
math_VPUNPCKLWD_YMM ( IRTemp tL
, IRTemp tR
)
24001 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveLO16x8
);
24005 static IRTemp
math_VPUNPCKLDQ_YMM ( IRTemp tL
, IRTemp tR
)
24007 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveLO32x4
);
24011 static IRTemp
math_VPUNPCKLQDQ_YMM ( IRTemp tL
, IRTemp tR
)
24013 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveLO64x2
);
24017 static IRTemp
math_VPUNPCKHBW_YMM ( IRTemp tL
, IRTemp tR
)
24019 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveHI8x16
);
24023 static IRTemp
math_VPUNPCKHWD_YMM ( IRTemp tL
, IRTemp tR
)
24025 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveHI16x8
);
24029 static IRTemp
math_VPUNPCKHDQ_YMM ( IRTemp tL
, IRTemp tR
)
24031 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveHI32x4
);
24035 static IRTemp
math_VPUNPCKHQDQ_YMM ( IRTemp tL
, IRTemp tR
)
24037 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveHI64x2
);
24041 static IRTemp
math_VPACKSSWB_YMM ( IRTemp tL
, IRTemp tR
)
24043 return math_VPUNPCK_YMM( tL
, tR
, Iop_QNarrowBin16Sto8Sx16
);
24047 static IRTemp
math_VPACKUSWB_YMM ( IRTemp tL
, IRTemp tR
)
24049 return math_VPUNPCK_YMM( tL
, tR
, Iop_QNarrowBin16Sto8Ux16
);
24053 static IRTemp
math_VPACKSSDW_YMM ( IRTemp tL
, IRTemp tR
)
24055 return math_VPUNPCK_YMM( tL
, tR
, Iop_QNarrowBin32Sto16Sx8
);
24059 static IRTemp
math_VPACKUSDW_YMM ( IRTemp tL
, IRTemp tR
)
24061 return math_VPUNPCK_YMM( tL
, tR
, Iop_QNarrowBin32Sto16Ux8
);
24065 __attribute__((noinline
))
24067 Long
dis_ESC_0F__VEX (
24068 /*MB_OUT*/DisResult
* dres
,
24069 /*OUT*/ Bool
* uses_vvvv
,
24070 const VexArchInfo
* archinfo
,
24071 const VexAbiInfo
* vbi
,
24072 Prefix pfx
, Int sz
, Long deltaIN
24075 IRTemp addr
= IRTemp_INVALID
;
24078 Long delta
= deltaIN
;
24079 UChar opc
= getUChar(delta
);
24081 *uses_vvvv
= False
;
24086 /* VMOVSD m64, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
24087 /* Move 64 bits from E (mem only) to G (lo half xmm).
24088 Bits 255-64 of the dest are zeroed out. */
24089 if (haveF2no66noF3(pfx
) && !epartIsReg(getUChar(delta
))) {
24090 UChar modrm
= getUChar(delta
);
24091 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24092 UInt rG
= gregOfRexRM(pfx
,modrm
);
24093 IRTemp z128
= newTemp(Ity_V128
);
24094 assign(z128
, mkV128(0));
24095 putXMMReg( rG
, mkexpr(z128
) );
24096 /* FIXME: ALIGNMENT CHECK? */
24097 putXMMRegLane64( rG
, 0, loadLE(Ity_I64
, mkexpr(addr
)) );
24098 putYMMRegLane128( rG
, 1, mkexpr(z128
) );
24099 DIP("vmovsd %s,%s\n", dis_buf
, nameXMMReg(rG
));
24101 goto decode_success
;
24103 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
24105 if (haveF2no66noF3(pfx
) && epartIsReg(getUChar(delta
))) {
24106 UChar modrm
= getUChar(delta
);
24107 UInt rG
= gregOfRexRM(pfx
, modrm
);
24108 UInt rE
= eregOfRexRM(pfx
, modrm
);
24109 UInt rV
= getVexNvvvv(pfx
);
24111 DIP("vmovsd %s,%s,%s\n",
24112 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
24113 IRTemp res
= newTemp(Ity_V128
);
24114 assign(res
, binop(Iop_64HLtoV128
,
24115 getXMMRegLane64(rV
, 1),
24116 getXMMRegLane64(rE
, 0)));
24117 putYMMRegLoAndZU(rG
, mkexpr(res
));
24119 goto decode_success
;
24121 /* VMOVSS m32, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
24122 /* Move 32 bits from E (mem only) to G (lo half xmm).
24123 Bits 255-32 of the dest are zeroed out. */
24124 if (haveF3no66noF2(pfx
) && !epartIsReg(getUChar(delta
))) {
24125 UChar modrm
= getUChar(delta
);
24126 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24127 UInt rG
= gregOfRexRM(pfx
,modrm
);
24128 IRTemp z128
= newTemp(Ity_V128
);
24129 assign(z128
, mkV128(0));
24130 putXMMReg( rG
, mkexpr(z128
) );
24131 /* FIXME: ALIGNMENT CHECK? */
24132 putXMMRegLane32( rG
, 0, loadLE(Ity_I32
, mkexpr(addr
)) );
24133 putYMMRegLane128( rG
, 1, mkexpr(z128
) );
24134 DIP("vmovss %s,%s\n", dis_buf
, nameXMMReg(rG
));
24136 goto decode_success
;
24138 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
24140 if (haveF3no66noF2(pfx
) && epartIsReg(getUChar(delta
))) {
24141 UChar modrm
= getUChar(delta
);
24142 UInt rG
= gregOfRexRM(pfx
, modrm
);
24143 UInt rE
= eregOfRexRM(pfx
, modrm
);
24144 UInt rV
= getVexNvvvv(pfx
);
24146 DIP("vmovss %s,%s,%s\n",
24147 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
24148 IRTemp res
= newTemp(Ity_V128
);
24149 assign( res
, binop( Iop_64HLtoV128
,
24150 getXMMRegLane64(rV
, 1),
24151 binop(Iop_32HLto64
,
24152 getXMMRegLane32(rV
, 1),
24153 getXMMRegLane32(rE
, 0)) ) );
24154 putYMMRegLoAndZU(rG
, mkexpr(res
));
24156 goto decode_success
;
24158 /* VMOVUPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 10 /r */
24159 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24160 UChar modrm
= getUChar(delta
);
24161 UInt rG
= gregOfRexRM(pfx
, modrm
);
24162 if (epartIsReg(modrm
)) {
24163 UInt rE
= eregOfRexRM(pfx
,modrm
);
24164 putYMMRegLoAndZU( rG
, getXMMReg( rE
));
24165 DIP("vmovupd %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
24168 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24169 putYMMRegLoAndZU( rG
, loadLE(Ity_V128
, mkexpr(addr
)) );
24170 DIP("vmovupd %s,%s\n", dis_buf
, nameXMMReg(rG
));
24173 goto decode_success
;
24175 /* VMOVUPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 10 /r */
24176 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24177 UChar modrm
= getUChar(delta
);
24178 UInt rG
= gregOfRexRM(pfx
, modrm
);
24179 if (epartIsReg(modrm
)) {
24180 UInt rE
= eregOfRexRM(pfx
,modrm
);
24181 putYMMReg( rG
, getYMMReg( rE
));
24182 DIP("vmovupd %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
24185 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24186 putYMMReg( rG
, loadLE(Ity_V256
, mkexpr(addr
)) );
24187 DIP("vmovupd %s,%s\n", dis_buf
, nameYMMReg(rG
));
24190 goto decode_success
;
24192 /* VMOVUPS xmm2/m128, xmm1 = VEX.128.0F.WIG 10 /r */
24193 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24194 UChar modrm
= getUChar(delta
);
24195 UInt rG
= gregOfRexRM(pfx
, modrm
);
24196 if (epartIsReg(modrm
)) {
24197 UInt rE
= eregOfRexRM(pfx
,modrm
);
24198 putYMMRegLoAndZU( rG
, getXMMReg( rE
));
24199 DIP("vmovups %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
24202 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24203 putYMMRegLoAndZU( rG
, loadLE(Ity_V128
, mkexpr(addr
)) );
24204 DIP("vmovups %s,%s\n", dis_buf
, nameXMMReg(rG
));
24207 goto decode_success
;
24209 /* VMOVUPS ymm2/m256, ymm1 = VEX.256.0F.WIG 10 /r */
24210 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24211 UChar modrm
= getUChar(delta
);
24212 UInt rG
= gregOfRexRM(pfx
, modrm
);
24213 if (epartIsReg(modrm
)) {
24214 UInt rE
= eregOfRexRM(pfx
,modrm
);
24215 putYMMReg( rG
, getYMMReg( rE
));
24216 DIP("vmovups %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
24219 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24220 putYMMReg( rG
, loadLE(Ity_V256
, mkexpr(addr
)) );
24221 DIP("vmovups %s,%s\n", dis_buf
, nameYMMReg(rG
));
24224 goto decode_success
;
24229 /* VMOVSD xmm1, m64 = VEX.LIG.F2.0F.WIG 11 /r */
24230 /* Move 64 bits from G (low half xmm) to mem only. */
24231 if (haveF2no66noF3(pfx
) && !epartIsReg(getUChar(delta
))) {
24232 UChar modrm
= getUChar(delta
);
24233 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24234 UInt rG
= gregOfRexRM(pfx
,modrm
);
24235 /* FIXME: ALIGNMENT CHECK? */
24236 storeLE( mkexpr(addr
), getXMMRegLane64(rG
, 0));
24237 DIP("vmovsd %s,%s\n", nameXMMReg(rG
), dis_buf
);
24239 goto decode_success
;
24241 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 11 /r */
24243 if (haveF2no66noF3(pfx
) && epartIsReg(getUChar(delta
))) {
24244 UChar modrm
= getUChar(delta
);
24245 UInt rG
= gregOfRexRM(pfx
, modrm
);
24246 UInt rE
= eregOfRexRM(pfx
, modrm
);
24247 UInt rV
= getVexNvvvv(pfx
);
24249 DIP("vmovsd %s,%s,%s\n",
24250 nameXMMReg(rG
), nameXMMReg(rV
), nameXMMReg(rE
));
24251 IRTemp res
= newTemp(Ity_V128
);
24252 assign(res
, binop(Iop_64HLtoV128
,
24253 getXMMRegLane64(rV
, 1),
24254 getXMMRegLane64(rG
, 0)));
24255 putYMMRegLoAndZU(rE
, mkexpr(res
));
24257 goto decode_success
;
24259 /* VMOVSS xmm1, m64 = VEX.LIG.F3.0F.WIG 11 /r */
24260 /* Move 32 bits from G (low 1/4 xmm) to mem only. */
24261 if (haveF3no66noF2(pfx
) && !epartIsReg(getUChar(delta
))) {
24262 UChar modrm
= getUChar(delta
);
24263 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24264 UInt rG
= gregOfRexRM(pfx
,modrm
);
24265 /* FIXME: ALIGNMENT CHECK? */
24266 storeLE( mkexpr(addr
), getXMMRegLane32(rG
, 0));
24267 DIP("vmovss %s,%s\n", nameXMMReg(rG
), dis_buf
);
24269 goto decode_success
;
24271 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 11 /r */
24273 if (haveF3no66noF2(pfx
) && epartIsReg(getUChar(delta
))) {
24274 UChar modrm
= getUChar(delta
);
24275 UInt rG
= gregOfRexRM(pfx
, modrm
);
24276 UInt rE
= eregOfRexRM(pfx
, modrm
);
24277 UInt rV
= getVexNvvvv(pfx
);
24279 DIP("vmovss %s,%s,%s\n",
24280 nameXMMReg(rG
), nameXMMReg(rV
), nameXMMReg(rE
));
24281 IRTemp res
= newTemp(Ity_V128
);
24282 assign( res
, binop( Iop_64HLtoV128
,
24283 getXMMRegLane64(rV
, 1),
24284 binop(Iop_32HLto64
,
24285 getXMMRegLane32(rV
, 1),
24286 getXMMRegLane32(rG
, 0)) ) );
24287 putYMMRegLoAndZU(rE
, mkexpr(res
));
24289 goto decode_success
;
24291 /* VMOVUPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 11 /r */
24292 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24293 UChar modrm
= getUChar(delta
);
24294 UInt rG
= gregOfRexRM(pfx
,modrm
);
24295 if (epartIsReg(modrm
)) {
24296 UInt rE
= eregOfRexRM(pfx
,modrm
);
24297 putYMMRegLoAndZU( rE
, getXMMReg(rG
) );
24298 DIP("vmovupd %s,%s\n", nameXMMReg(rG
), nameXMMReg(rE
));
24301 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24302 storeLE( mkexpr(addr
), getXMMReg(rG
) );
24303 DIP("vmovupd %s,%s\n", nameXMMReg(rG
), dis_buf
);
24306 goto decode_success
;
24308 /* VMOVUPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 11 /r */
24309 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24310 UChar modrm
= getUChar(delta
);
24311 UInt rG
= gregOfRexRM(pfx
,modrm
);
24312 if (epartIsReg(modrm
)) {
24313 UInt rE
= eregOfRexRM(pfx
,modrm
);
24314 putYMMReg( rE
, getYMMReg(rG
) );
24315 DIP("vmovupd %s,%s\n", nameYMMReg(rG
), nameYMMReg(rE
));
24318 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24319 storeLE( mkexpr(addr
), getYMMReg(rG
) );
24320 DIP("vmovupd %s,%s\n", nameYMMReg(rG
), dis_buf
);
24323 goto decode_success
;
24325 /* VMOVUPS xmm1, xmm2/m128 = VEX.128.0F.WIG 11 /r */
24326 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24327 UChar modrm
= getUChar(delta
);
24328 UInt rG
= gregOfRexRM(pfx
,modrm
);
24329 if (epartIsReg(modrm
)) {
24330 UInt rE
= eregOfRexRM(pfx
,modrm
);
24331 putYMMRegLoAndZU( rE
, getXMMReg(rG
) );
24332 DIP("vmovups %s,%s\n", nameXMMReg(rG
), nameXMMReg(rE
));
24335 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24336 storeLE( mkexpr(addr
), getXMMReg(rG
) );
24337 DIP("vmovups %s,%s\n", nameXMMReg(rG
), dis_buf
);
24340 goto decode_success
;
24342 /* VMOVUPS ymm1, ymm2/m256 = VEX.256.0F.WIG 11 /r */
24343 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24344 UChar modrm
= getUChar(delta
);
24345 UInt rG
= gregOfRexRM(pfx
,modrm
);
24346 if (epartIsReg(modrm
)) {
24347 UInt rE
= eregOfRexRM(pfx
,modrm
);
24348 putYMMReg( rE
, getYMMReg(rG
) );
24349 DIP("vmovups %s,%s\n", nameYMMReg(rG
), nameYMMReg(rE
));
24352 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24353 storeLE( mkexpr(addr
), getYMMReg(rG
) );
24354 DIP("vmovups %s,%s\n", nameYMMReg(rG
), dis_buf
);
24357 goto decode_success
;
24362 /* VMOVDDUP xmm2/m64, xmm1 = VEX.128.F2.0F.WIG /12 r */
24363 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24364 delta
= dis_MOVDDUP_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
24365 goto decode_success
;
24367 /* VMOVDDUP ymm2/m256, ymm1 = VEX.256.F2.0F.WIG /12 r */
24368 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24369 delta
= dis_MOVDDUP_256( vbi
, pfx
, delta
);
24370 goto decode_success
;
24372 /* VMOVHLPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 12 /r */
24373 /* Insn only exists in reg form */
24374 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
24375 && epartIsReg(getUChar(delta
))) {
24376 UChar modrm
= getUChar(delta
);
24377 UInt rG
= gregOfRexRM(pfx
, modrm
);
24378 UInt rE
= eregOfRexRM(pfx
, modrm
);
24379 UInt rV
= getVexNvvvv(pfx
);
24381 DIP("vmovhlps %s,%s,%s\n",
24382 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
24383 IRTemp res
= newTemp(Ity_V128
);
24384 assign(res
, binop(Iop_64HLtoV128
,
24385 getXMMRegLane64(rV
, 1),
24386 getXMMRegLane64(rE
, 1)));
24387 putYMMRegLoAndZU(rG
, mkexpr(res
));
24389 goto decode_success
;
24391 /* VMOVLPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 12 /r */
24392 /* Insn exists only in mem form, it appears. */
24393 /* VMOVLPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 12 /r */
24394 /* Insn exists only in mem form, it appears. */
24395 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
24396 && 0==getVexL(pfx
)/*128*/ && !epartIsReg(getUChar(delta
))) {
24397 UChar modrm
= getUChar(delta
);
24398 UInt rG
= gregOfRexRM(pfx
, modrm
);
24399 UInt rV
= getVexNvvvv(pfx
);
24400 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24402 DIP("vmovlpd %s,%s,%s\n",
24403 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
24404 IRTemp res
= newTemp(Ity_V128
);
24405 assign(res
, binop(Iop_64HLtoV128
,
24406 getXMMRegLane64(rV
, 1),
24407 loadLE(Ity_I64
, mkexpr(addr
))));
24408 putYMMRegLoAndZU(rG
, mkexpr(res
));
24410 goto decode_success
;
24412 /* VMOVSLDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 12 /r */
24413 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*128*/) {
24414 delta
= dis_MOVSxDUP_128( vbi
, pfx
, delta
, True
/*isAvx*/,
24416 goto decode_success
;
24418 /* VMOVSLDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 12 /r */
24419 if (haveF3no66noF2(pfx
) && 1==getVexL(pfx
)/*256*/) {
24420 delta
= dis_MOVSxDUP_256( vbi
, pfx
, delta
, True
/*isL*/ );
24421 goto decode_success
;
24426 /* VMOVLPS xmm1, m64 = VEX.128.0F.WIG 13 /r */
24427 /* Insn exists only in mem form, it appears. */
24428 /* VMOVLPD xmm1, m64 = VEX.128.66.0F.WIG 13 /r */
24429 /* Insn exists only in mem form, it appears. */
24430 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
24431 && 0==getVexL(pfx
)/*128*/ && !epartIsReg(getUChar(delta
))) {
24432 UChar modrm
= getUChar(delta
);
24433 UInt rG
= gregOfRexRM(pfx
, modrm
);
24434 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24436 storeLE( mkexpr(addr
), getXMMRegLane64( rG
, 0));
24437 DIP("vmovlpd %s,%s\n", nameXMMReg(rG
), dis_buf
);
24438 goto decode_success
;
24444 /* VUNPCKLPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 14 /r */
24445 /* VUNPCKHPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 15 /r */
24446 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24447 Bool hi
= opc
== 0x15;
24448 UChar modrm
= getUChar(delta
);
24449 UInt rG
= gregOfRexRM(pfx
,modrm
);
24450 UInt rV
= getVexNvvvv(pfx
);
24451 IRTemp eV
= newTemp(Ity_V128
);
24452 IRTemp vV
= newTemp(Ity_V128
);
24453 assign( vV
, getXMMReg(rV
) );
24454 if (epartIsReg(modrm
)) {
24455 UInt rE
= eregOfRexRM(pfx
,modrm
);
24456 assign( eV
, getXMMReg(rE
) );
24458 DIP("vunpck%sps %s,%s\n", hi
? "h" : "l",
24459 nameXMMReg(rE
), nameXMMReg(rG
));
24461 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24462 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
24464 DIP("vunpck%sps %s,%s\n", hi
? "h" : "l",
24465 dis_buf
, nameXMMReg(rG
));
24467 IRTemp res
= math_UNPCKxPS_128( eV
, vV
, hi
);
24468 putYMMRegLoAndZU( rG
, mkexpr(res
) );
24470 goto decode_success
;
24472 /* VUNPCKLPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 14 /r */
24473 /* VUNPCKHPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 15 /r */
24474 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24475 Bool hi
= opc
== 0x15;
24476 UChar modrm
= getUChar(delta
);
24477 UInt rG
= gregOfRexRM(pfx
,modrm
);
24478 UInt rV
= getVexNvvvv(pfx
);
24479 IRTemp eV
= newTemp(Ity_V256
);
24480 IRTemp vV
= newTemp(Ity_V256
);
24481 assign( vV
, getYMMReg(rV
) );
24482 if (epartIsReg(modrm
)) {
24483 UInt rE
= eregOfRexRM(pfx
,modrm
);
24484 assign( eV
, getYMMReg(rE
) );
24486 DIP("vunpck%sps %s,%s\n", hi
? "h" : "l",
24487 nameYMMReg(rE
), nameYMMReg(rG
));
24489 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24490 assign( eV
, loadLE(Ity_V256
, mkexpr(addr
)) );
24492 DIP("vunpck%sps %s,%s\n", hi
? "h" : "l",
24493 dis_buf
, nameYMMReg(rG
));
24495 IRTemp res
= math_UNPCKxPS_256( eV
, vV
, hi
);
24496 putYMMReg( rG
, mkexpr(res
) );
24498 goto decode_success
;
24500 /* VUNPCKLPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 14 /r */
24501 /* VUNPCKHPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 15 /r */
24502 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24503 Bool hi
= opc
== 0x15;
24504 UChar modrm
= getUChar(delta
);
24505 UInt rG
= gregOfRexRM(pfx
,modrm
);
24506 UInt rV
= getVexNvvvv(pfx
);
24507 IRTemp eV
= newTemp(Ity_V128
);
24508 IRTemp vV
= newTemp(Ity_V128
);
24509 assign( vV
, getXMMReg(rV
) );
24510 if (epartIsReg(modrm
)) {
24511 UInt rE
= eregOfRexRM(pfx
,modrm
);
24512 assign( eV
, getXMMReg(rE
) );
24514 DIP("vunpck%spd %s,%s\n", hi
? "h" : "l",
24515 nameXMMReg(rE
), nameXMMReg(rG
));
24517 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24518 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
24520 DIP("vunpck%spd %s,%s\n", hi
? "h" : "l",
24521 dis_buf
, nameXMMReg(rG
));
24523 IRTemp res
= math_UNPCKxPD_128( eV
, vV
, hi
);
24524 putYMMRegLoAndZU( rG
, mkexpr(res
) );
24526 goto decode_success
;
24528 /* VUNPCKLPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 14 /r */
24529 /* VUNPCKHPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 15 /r */
24530 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24531 Bool hi
= opc
== 0x15;
24532 UChar modrm
= getUChar(delta
);
24533 UInt rG
= gregOfRexRM(pfx
,modrm
);
24534 UInt rV
= getVexNvvvv(pfx
);
24535 IRTemp eV
= newTemp(Ity_V256
);
24536 IRTemp vV
= newTemp(Ity_V256
);
24537 assign( vV
, getYMMReg(rV
) );
24538 if (epartIsReg(modrm
)) {
24539 UInt rE
= eregOfRexRM(pfx
,modrm
);
24540 assign( eV
, getYMMReg(rE
) );
24542 DIP("vunpck%spd %s,%s\n", hi
? "h" : "l",
24543 nameYMMReg(rE
), nameYMMReg(rG
));
24545 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24546 assign( eV
, loadLE(Ity_V256
, mkexpr(addr
)) );
24548 DIP("vunpck%spd %s,%s\n", hi
? "h" : "l",
24549 dis_buf
, nameYMMReg(rG
));
24551 IRTemp res
= math_UNPCKxPD_256( eV
, vV
, hi
);
24552 putYMMReg( rG
, mkexpr(res
) );
24554 goto decode_success
;
24559 /* VMOVLHPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 16 /r */
24560 /* Insn only exists in reg form */
24561 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
24562 && epartIsReg(getUChar(delta
))) {
24563 UChar modrm
= getUChar(delta
);
24564 UInt rG
= gregOfRexRM(pfx
, modrm
);
24565 UInt rE
= eregOfRexRM(pfx
, modrm
);
24566 UInt rV
= getVexNvvvv(pfx
);
24568 DIP("vmovlhps %s,%s,%s\n",
24569 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
24570 IRTemp res
= newTemp(Ity_V128
);
24571 assign(res
, binop(Iop_64HLtoV128
,
24572 getXMMRegLane64(rE
, 0),
24573 getXMMRegLane64(rV
, 0)));
24574 putYMMRegLoAndZU(rG
, mkexpr(res
));
24576 goto decode_success
;
24578 /* VMOVHPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 16 /r */
24579 /* Insn exists only in mem form, it appears. */
24580 /* VMOVHPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 16 /r */
24581 /* Insn exists only in mem form, it appears. */
24582 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
24583 && 0==getVexL(pfx
)/*128*/ && !epartIsReg(getUChar(delta
))) {
24584 UChar modrm
= getUChar(delta
);
24585 UInt rG
= gregOfRexRM(pfx
, modrm
);
24586 UInt rV
= getVexNvvvv(pfx
);
24587 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24589 DIP("vmovhp%c %s,%s,%s\n", have66(pfx
) ? 'd' : 's',
24590 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
24591 IRTemp res
= newTemp(Ity_V128
);
24592 assign(res
, binop(Iop_64HLtoV128
,
24593 loadLE(Ity_I64
, mkexpr(addr
)),
24594 getXMMRegLane64(rV
, 0)));
24595 putYMMRegLoAndZU(rG
, mkexpr(res
));
24597 goto decode_success
;
24599 /* VMOVSHDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 16 /r */
24600 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*128*/) {
24601 delta
= dis_MOVSxDUP_128( vbi
, pfx
, delta
, True
/*isAvx*/,
24603 goto decode_success
;
24605 /* VMOVSHDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 16 /r */
24606 if (haveF3no66noF2(pfx
) && 1==getVexL(pfx
)/*256*/) {
24607 delta
= dis_MOVSxDUP_256( vbi
, pfx
, delta
, False
/*!isL*/ );
24608 goto decode_success
;
24613 /* VMOVHPS xmm1, m64 = VEX.128.0F.WIG 17 /r */
24614 /* Insn exists only in mem form, it appears. */
24615 /* VMOVHPD xmm1, m64 = VEX.128.66.0F.WIG 17 /r */
24616 /* Insn exists only in mem form, it appears. */
24617 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
24618 && 0==getVexL(pfx
)/*128*/ && !epartIsReg(getUChar(delta
))) {
24619 UChar modrm
= getUChar(delta
);
24620 UInt rG
= gregOfRexRM(pfx
, modrm
);
24621 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24623 storeLE( mkexpr(addr
), getXMMRegLane64( rG
, 1));
24624 DIP("vmovhp%c %s,%s\n", have66(pfx
) ? 'd' : 's',
24625 nameXMMReg(rG
), dis_buf
);
24626 goto decode_success
;
24631 /* VMOVAPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 28 /r */
24632 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24633 UChar modrm
= getUChar(delta
);
24634 UInt rG
= gregOfRexRM(pfx
, modrm
);
24635 if (epartIsReg(modrm
)) {
24636 UInt rE
= eregOfRexRM(pfx
,modrm
);
24637 putYMMRegLoAndZU( rG
, getXMMReg( rE
));
24638 DIP("vmovapd %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
24641 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24642 gen_SEGV_if_not_16_aligned( addr
);
24643 putYMMRegLoAndZU( rG
, loadLE(Ity_V128
, mkexpr(addr
)) );
24644 DIP("vmovapd %s,%s\n", dis_buf
, nameXMMReg(rG
));
24647 goto decode_success
;
24649 /* VMOVAPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 28 /r */
24650 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24651 UChar modrm
= getUChar(delta
);
24652 UInt rG
= gregOfRexRM(pfx
, modrm
);
24653 if (epartIsReg(modrm
)) {
24654 UInt rE
= eregOfRexRM(pfx
,modrm
);
24655 putYMMReg( rG
, getYMMReg( rE
));
24656 DIP("vmovapd %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
24659 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24660 gen_SEGV_if_not_32_aligned( addr
);
24661 putYMMReg( rG
, loadLE(Ity_V256
, mkexpr(addr
)) );
24662 DIP("vmovapd %s,%s\n", dis_buf
, nameYMMReg(rG
));
24665 goto decode_success
;
24667 /* VMOVAPS xmm2/m128, xmm1 = VEX.128.0F.WIG 28 /r */
24668 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24669 UChar modrm
= getUChar(delta
);
24670 UInt rG
= gregOfRexRM(pfx
, modrm
);
24671 if (epartIsReg(modrm
)) {
24672 UInt rE
= eregOfRexRM(pfx
,modrm
);
24673 putYMMRegLoAndZU( rG
, getXMMReg( rE
));
24674 DIP("vmovaps %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
24677 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24678 gen_SEGV_if_not_16_aligned( addr
);
24679 putYMMRegLoAndZU( rG
, loadLE(Ity_V128
, mkexpr(addr
)) );
24680 DIP("vmovaps %s,%s\n", dis_buf
, nameXMMReg(rG
));
24683 goto decode_success
;
24685 /* VMOVAPS ymm2/m256, ymm1 = VEX.256.0F.WIG 28 /r */
24686 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24687 UChar modrm
= getUChar(delta
);
24688 UInt rG
= gregOfRexRM(pfx
, modrm
);
24689 if (epartIsReg(modrm
)) {
24690 UInt rE
= eregOfRexRM(pfx
,modrm
);
24691 putYMMReg( rG
, getYMMReg( rE
));
24692 DIP("vmovaps %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
24695 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24696 gen_SEGV_if_not_32_aligned( addr
);
24697 putYMMReg( rG
, loadLE(Ity_V256
, mkexpr(addr
)) );
24698 DIP("vmovaps %s,%s\n", dis_buf
, nameYMMReg(rG
));
24701 goto decode_success
;
24706 /* VMOVAPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 29 /r */
24707 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24708 UChar modrm
= getUChar(delta
);
24709 UInt rG
= gregOfRexRM(pfx
,modrm
);
24710 if (epartIsReg(modrm
)) {
24711 UInt rE
= eregOfRexRM(pfx
,modrm
);
24712 putYMMRegLoAndZU( rE
, getXMMReg(rG
) );
24713 DIP("vmovapd %s,%s\n", nameXMMReg(rG
), nameXMMReg(rE
));
24716 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24717 gen_SEGV_if_not_16_aligned( addr
);
24718 storeLE( mkexpr(addr
), getXMMReg(rG
) );
24719 DIP("vmovapd %s,%s\n", nameXMMReg(rG
), dis_buf
);
24722 goto decode_success
;
24724 /* VMOVAPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 29 /r */
24725 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24726 UChar modrm
= getUChar(delta
);
24727 UInt rG
= gregOfRexRM(pfx
,modrm
);
24728 if (epartIsReg(modrm
)) {
24729 UInt rE
= eregOfRexRM(pfx
,modrm
);
24730 putYMMReg( rE
, getYMMReg(rG
) );
24731 DIP("vmovapd %s,%s\n", nameYMMReg(rG
), nameYMMReg(rE
));
24734 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24735 gen_SEGV_if_not_32_aligned( addr
);
24736 storeLE( mkexpr(addr
), getYMMReg(rG
) );
24737 DIP("vmovapd %s,%s\n", nameYMMReg(rG
), dis_buf
);
24740 goto decode_success
;
24742 /* VMOVAPS xmm1, xmm2/m128 = VEX.128.0F.WIG 29 /r */
24743 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24744 UChar modrm
= getUChar(delta
);
24745 UInt rG
= gregOfRexRM(pfx
,modrm
);
24746 if (epartIsReg(modrm
)) {
24747 UInt rE
= eregOfRexRM(pfx
,modrm
);
24748 putYMMRegLoAndZU( rE
, getXMMReg(rG
) );
24749 DIP("vmovaps %s,%s\n", nameXMMReg(rG
), nameXMMReg(rE
));
24751 goto decode_success
;
24753 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24754 gen_SEGV_if_not_16_aligned( addr
);
24755 storeLE( mkexpr(addr
), getXMMReg(rG
) );
24756 DIP("vmovaps %s,%s\n", nameXMMReg(rG
), dis_buf
);
24758 goto decode_success
;
24761 /* VMOVAPS ymm1, ymm2/m256 = VEX.256.0F.WIG 29 /r */
24762 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24763 UChar modrm
= getUChar(delta
);
24764 UInt rG
= gregOfRexRM(pfx
,modrm
);
24765 if (epartIsReg(modrm
)) {
24766 UInt rE
= eregOfRexRM(pfx
,modrm
);
24767 putYMMReg( rE
, getYMMReg(rG
) );
24768 DIP("vmovaps %s,%s\n", nameYMMReg(rG
), nameYMMReg(rE
));
24770 goto decode_success
;
24772 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24773 gen_SEGV_if_not_32_aligned( addr
);
24774 storeLE( mkexpr(addr
), getYMMReg(rG
) );
24775 DIP("vmovaps %s,%s\n", nameYMMReg(rG
), dis_buf
);
24777 goto decode_success
;
24783 IRTemp rmode
= newTemp(Ity_I32
);
24784 assign( rmode
, get_sse_roundingmode() );
24785 /* VCVTSI2SD r/m32, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W0 2A /r */
24786 if (haveF2no66noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
24787 UChar modrm
= getUChar(delta
);
24788 UInt rV
= getVexNvvvv(pfx
);
24789 UInt rD
= gregOfRexRM(pfx
, modrm
);
24790 IRTemp arg32
= newTemp(Ity_I32
);
24791 if (epartIsReg(modrm
)) {
24792 UInt rS
= eregOfRexRM(pfx
,modrm
);
24793 assign( arg32
, getIReg32(rS
) );
24795 DIP("vcvtsi2sdl %s,%s,%s\n",
24796 nameIReg32(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
24798 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24799 assign( arg32
, loadLE(Ity_I32
, mkexpr(addr
)) );
24801 DIP("vcvtsi2sdl %s,%s,%s\n",
24802 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
24804 putXMMRegLane64F( rD
, 0,
24805 unop(Iop_I32StoF64
, mkexpr(arg32
)));
24806 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
24807 putYMMRegLane128( rD
, 1, mkV128(0) );
24809 goto decode_success
;
24811 /* VCVTSI2SD r/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W1 2A /r */
24812 if (haveF2no66noF3(pfx
) && 1==getRexW(pfx
)/*W1*/) {
24813 UChar modrm
= getUChar(delta
);
24814 UInt rV
= getVexNvvvv(pfx
);
24815 UInt rD
= gregOfRexRM(pfx
, modrm
);
24816 IRTemp arg64
= newTemp(Ity_I64
);
24817 if (epartIsReg(modrm
)) {
24818 UInt rS
= eregOfRexRM(pfx
,modrm
);
24819 assign( arg64
, getIReg64(rS
) );
24821 DIP("vcvtsi2sdq %s,%s,%s\n",
24822 nameIReg64(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
24824 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24825 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
24827 DIP("vcvtsi2sdq %s,%s,%s\n",
24828 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
24830 putXMMRegLane64F( rD
, 0,
24831 binop( Iop_I64StoF64
,
24832 get_sse_roundingmode(),
24834 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
24835 putYMMRegLane128( rD
, 1, mkV128(0) );
24837 goto decode_success
;
24839 /* VCVTSI2SS r/m64, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W1 2A /r */
24840 if (haveF3no66noF2(pfx
) && 1==getRexW(pfx
)/*W1*/) {
24841 UChar modrm
= getUChar(delta
);
24842 UInt rV
= getVexNvvvv(pfx
);
24843 UInt rD
= gregOfRexRM(pfx
, modrm
);
24844 IRTemp arg64
= newTemp(Ity_I64
);
24845 if (epartIsReg(modrm
)) {
24846 UInt rS
= eregOfRexRM(pfx
,modrm
);
24847 assign( arg64
, getIReg64(rS
) );
24849 DIP("vcvtsi2ssq %s,%s,%s\n",
24850 nameIReg64(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
24852 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24853 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
24855 DIP("vcvtsi2ssq %s,%s,%s\n",
24856 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
24858 putXMMRegLane32F( rD
, 0,
24859 binop(Iop_F64toF32
,
24861 binop(Iop_I64StoF64
, mkexpr(rmode
),
24862 mkexpr(arg64
)) ) );
24863 putXMMRegLane32( rD
, 1, getXMMRegLane32( rV
, 1 ));
24864 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
24865 putYMMRegLane128( rD
, 1, mkV128(0) );
24867 goto decode_success
;
24869 /* VCVTSI2SS r/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W0 2A /r */
24870 if (haveF3no66noF2(pfx
) && 0==getRexW(pfx
)/*W0*/) {
24871 UChar modrm
= getUChar(delta
);
24872 UInt rV
= getVexNvvvv(pfx
);
24873 UInt rD
= gregOfRexRM(pfx
, modrm
);
24874 IRTemp arg32
= newTemp(Ity_I32
);
24875 if (epartIsReg(modrm
)) {
24876 UInt rS
= eregOfRexRM(pfx
,modrm
);
24877 assign( arg32
, getIReg32(rS
) );
24879 DIP("vcvtsi2ssl %s,%s,%s\n",
24880 nameIReg32(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
24882 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24883 assign( arg32
, loadLE(Ity_I32
, mkexpr(addr
)) );
24885 DIP("vcvtsi2ssl %s,%s,%s\n",
24886 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
24888 putXMMRegLane32F( rD
, 0,
24889 binop(Iop_F64toF32
,
24891 unop(Iop_I32StoF64
, mkexpr(arg32
)) ) );
24892 putXMMRegLane32( rD
, 1, getXMMRegLane32( rV
, 1 ));
24893 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
24894 putYMMRegLane128( rD
, 1, mkV128(0) );
24896 goto decode_success
;
24902 /* VMOVNTPD xmm1, m128 = VEX.128.66.0F.WIG 2B /r */
24903 /* VMOVNTPS xmm1, m128 = VEX.128.0F.WIG 2B /r */
24904 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
24905 && 0==getVexL(pfx
)/*128*/ && !epartIsReg(getUChar(delta
))) {
24906 UChar modrm
= getUChar(delta
);
24907 UInt rS
= gregOfRexRM(pfx
, modrm
);
24908 IRTemp tS
= newTemp(Ity_V128
);
24909 assign(tS
, getXMMReg(rS
));
24910 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24912 gen_SEGV_if_not_16_aligned(addr
);
24913 storeLE(mkexpr(addr
), mkexpr(tS
));
24914 DIP("vmovntp%c %s,%s\n", have66(pfx
) ? 'd' : 's',
24915 nameXMMReg(rS
), dis_buf
);
24916 goto decode_success
;
24918 /* VMOVNTPD ymm1, m256 = VEX.256.66.0F.WIG 2B /r */
24919 /* VMOVNTPS ymm1, m256 = VEX.256.0F.WIG 2B /r */
24920 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
24921 && 1==getVexL(pfx
)/*256*/ && !epartIsReg(getUChar(delta
))) {
24922 UChar modrm
= getUChar(delta
);
24923 UInt rS
= gregOfRexRM(pfx
, modrm
);
24924 IRTemp tS
= newTemp(Ity_V256
);
24925 assign(tS
, getYMMReg(rS
));
24926 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24928 gen_SEGV_if_not_32_aligned(addr
);
24929 storeLE(mkexpr(addr
), mkexpr(tS
));
24930 DIP("vmovntp%c %s,%s\n", have66(pfx
) ? 'd' : 's',
24931 nameYMMReg(rS
), dis_buf
);
24932 goto decode_success
;
24937 /* VCVTTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2C /r */
24938 if (haveF2no66noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
24939 delta
= dis_CVTxSD2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 4);
24940 goto decode_success
;
24942 /* VCVTTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2C /r */
24943 if (haveF2no66noF3(pfx
) && 1==getRexW(pfx
)/*W1*/) {
24944 delta
= dis_CVTxSD2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 8);
24945 goto decode_success
;
24947 /* VCVTTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2C /r */
24948 if (haveF3no66noF2(pfx
) && 0==getRexW(pfx
)/*W0*/) {
24949 delta
= dis_CVTxSS2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 4);
24950 goto decode_success
;
24952 /* VCVTTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2C /r */
24953 if (haveF3no66noF2(pfx
) && 1==getRexW(pfx
)/*W1*/) {
24954 delta
= dis_CVTxSS2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 8);
24955 goto decode_success
;
24960 /* VCVTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2D /r */
24961 if (haveF2no66noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
24962 delta
= dis_CVTxSD2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 4);
24963 goto decode_success
;
24965 /* VCVTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2D /r */
24966 if (haveF2no66noF3(pfx
) && 1==getRexW(pfx
)/*W1*/) {
24967 delta
= dis_CVTxSD2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 8);
24968 goto decode_success
;
24970 /* VCVTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2D /r */
24971 if (haveF3no66noF2(pfx
) && 0==getRexW(pfx
)/*W0*/) {
24972 delta
= dis_CVTxSS2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 4);
24973 goto decode_success
;
24975 /* VCVTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2D /r */
24976 if (haveF3no66noF2(pfx
) && 1==getRexW(pfx
)/*W1*/) {
24977 delta
= dis_CVTxSS2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 8);
24978 goto decode_success
;
24984 /* VUCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2E /r */
24985 /* VCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2F /r */
24986 if (have66noF2noF3(pfx
)) {
24987 delta
= dis_COMISD( vbi
, pfx
, delta
, True
/*isAvx*/, opc
);
24988 goto decode_success
;
24990 /* VUCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2E /r */
24991 /* VCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2F /r */
24992 if (haveNo66noF2noF3(pfx
)) {
24993 delta
= dis_COMISS( vbi
, pfx
, delta
, True
/*isAvx*/, opc
);
24994 goto decode_success
;
24999 /* VMOVMSKPD xmm2, r32 = VEX.128.66.0F.WIG 50 /r */
25000 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25001 delta
= dis_MOVMSKPD_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
25002 goto decode_success
;
25004 /* VMOVMSKPD ymm2, r32 = VEX.256.66.0F.WIG 50 /r */
25005 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25006 delta
= dis_MOVMSKPD_256( vbi
, pfx
, delta
);
25007 goto decode_success
;
25009 /* VMOVMSKPS xmm2, r32 = VEX.128.0F.WIG 50 /r */
25010 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25011 delta
= dis_MOVMSKPS_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
25012 goto decode_success
;
25014 /* VMOVMSKPS ymm2, r32 = VEX.256.0F.WIG 50 /r */
25015 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25016 delta
= dis_MOVMSKPS_256( vbi
, pfx
, delta
);
25017 goto decode_success
;
25022 /* VSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 51 /r */
25023 if (haveF3no66noF2(pfx
)) {
25024 delta
= dis_AVX128_E_V_to_G_lo32_unary(
25025 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtss", Iop_Sqrt32F0x4
);
25026 goto decode_success
;
25028 /* VSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 51 /r */
25029 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25030 delta
= dis_AVX128_E_to_G_unary_all(
25031 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtps", Iop_Sqrt32Fx4
);
25032 goto decode_success
;
25034 /* VSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 51 /r */
25035 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25036 delta
= dis_AVX256_E_to_G_unary_all(
25037 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtps", Iop_Sqrt32Fx8
);
25038 goto decode_success
;
25040 /* VSQRTSD xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F2.0F.WIG 51 /r */
25041 if (haveF2no66noF3(pfx
)) {
25042 delta
= dis_AVX128_E_V_to_G_lo64_unary(
25043 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtsd", Iop_Sqrt64F0x2
);
25044 goto decode_success
;
25046 /* VSQRTPD xmm2/m128(E), xmm1(G) = VEX.NDS.128.66.0F.WIG 51 /r */
25047 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25048 delta
= dis_AVX128_E_to_G_unary_all(
25049 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtpd", Iop_Sqrt64Fx2
);
25050 goto decode_success
;
25052 /* VSQRTPD ymm2/m256(E), ymm1(G) = VEX.NDS.256.66.0F.WIG 51 /r */
25053 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25054 delta
= dis_AVX256_E_to_G_unary_all(
25055 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtpd", Iop_Sqrt64Fx4
);
25056 goto decode_success
;
25061 /* VRSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 52 /r */
25062 if (haveF3no66noF2(pfx
)) {
25063 delta
= dis_AVX128_E_V_to_G_lo32_unary(
25064 uses_vvvv
, vbi
, pfx
, delta
, "vrsqrtss",
25065 Iop_RSqrtEst32F0x4
);
25066 goto decode_success
;
25068 /* VRSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 52 /r */
25069 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25070 delta
= dis_AVX128_E_to_G_unary_all(
25071 uses_vvvv
, vbi
, pfx
, delta
, "vrsqrtps", Iop_RSqrtEst32Fx4
);
25072 goto decode_success
;
25074 /* VRSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 52 /r */
25075 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25076 delta
= dis_AVX256_E_to_G_unary_all(
25077 uses_vvvv
, vbi
, pfx
, delta
, "vrsqrtps", Iop_RSqrtEst32Fx8
);
25078 goto decode_success
;
25083 /* VRCPSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 53 /r */
25084 if (haveF3no66noF2(pfx
)) {
25085 delta
= dis_AVX128_E_V_to_G_lo32_unary(
25086 uses_vvvv
, vbi
, pfx
, delta
, "vrcpss", Iop_RecipEst32F0x4
);
25087 goto decode_success
;
25089 /* VRCPPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 53 /r */
25090 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25091 delta
= dis_AVX128_E_to_G_unary_all(
25092 uses_vvvv
, vbi
, pfx
, delta
, "vrcpps", Iop_RecipEst32Fx4
);
25093 goto decode_success
;
25095 /* VRCPPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 53 /r */
25096 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25097 delta
= dis_AVX256_E_to_G_unary_all(
25098 uses_vvvv
, vbi
, pfx
, delta
, "vrcpps", Iop_RecipEst32Fx8
);
25099 goto decode_success
;
25104 /* VANDPD r/m, rV, r ::: r = rV & r/m */
25105 /* VANDPD = VEX.NDS.128.66.0F.WIG 54 /r */
25106 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25107 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25108 uses_vvvv
, vbi
, pfx
, delta
, "vandpd", Iop_AndV128
);
25109 goto decode_success
;
25111 /* VANDPD r/m, rV, r ::: r = rV & r/m */
25112 /* VANDPD = VEX.NDS.256.66.0F.WIG 54 /r */
25113 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25114 delta
= dis_AVX256_E_V_to_G(
25115 uses_vvvv
, vbi
, pfx
, delta
, "vandpd", Iop_AndV256
);
25116 goto decode_success
;
25118 /* VANDPS = VEX.NDS.128.0F.WIG 54 /r */
25119 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25120 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25121 uses_vvvv
, vbi
, pfx
, delta
, "vandps", Iop_AndV128
);
25122 goto decode_success
;
25124 /* VANDPS = VEX.NDS.256.0F.WIG 54 /r */
25125 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25126 delta
= dis_AVX256_E_V_to_G(
25127 uses_vvvv
, vbi
, pfx
, delta
, "vandps", Iop_AndV256
);
25128 goto decode_success
;
25133 /* VANDNPD r/m, rV, r ::: r = (not rV) & r/m */
25134 /* VANDNPD = VEX.NDS.128.66.0F.WIG 55 /r */
25135 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25136 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25137 uses_vvvv
, vbi
, pfx
, delta
, "vandpd", Iop_AndV128
,
25138 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
25139 goto decode_success
;
25141 /* VANDNPD = VEX.NDS.256.66.0F.WIG 55 /r */
25142 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25143 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
25144 uses_vvvv
, vbi
, pfx
, delta
, "vandpd", Iop_AndV256
,
25145 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
25146 goto decode_success
;
25148 /* VANDNPS = VEX.NDS.128.0F.WIG 55 /r */
25149 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25150 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25151 uses_vvvv
, vbi
, pfx
, delta
, "vandps", Iop_AndV128
,
25152 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
25153 goto decode_success
;
25155 /* VANDNPS = VEX.NDS.256.0F.WIG 55 /r */
25156 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25157 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
25158 uses_vvvv
, vbi
, pfx
, delta
, "vandps", Iop_AndV256
,
25159 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
25160 goto decode_success
;
25165 /* VORPD r/m, rV, r ::: r = rV | r/m */
25166 /* VORPD = VEX.NDS.128.66.0F.WIG 56 /r */
25167 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25168 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25169 uses_vvvv
, vbi
, pfx
, delta
, "vorpd", Iop_OrV128
);
25170 goto decode_success
;
25172 /* VORPD r/m, rV, r ::: r = rV | r/m */
25173 /* VORPD = VEX.NDS.256.66.0F.WIG 56 /r */
25174 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25175 delta
= dis_AVX256_E_V_to_G(
25176 uses_vvvv
, vbi
, pfx
, delta
, "vorpd", Iop_OrV256
);
25177 goto decode_success
;
25179 /* VORPS r/m, rV, r ::: r = rV | r/m */
25180 /* VORPS = VEX.NDS.128.0F.WIG 56 /r */
25181 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25182 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25183 uses_vvvv
, vbi
, pfx
, delta
, "vorps", Iop_OrV128
);
25184 goto decode_success
;
25186 /* VORPS r/m, rV, r ::: r = rV | r/m */
25187 /* VORPS = VEX.NDS.256.0F.WIG 56 /r */
25188 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25189 delta
= dis_AVX256_E_V_to_G(
25190 uses_vvvv
, vbi
, pfx
, delta
, "vorps", Iop_OrV256
);
25191 goto decode_success
;
25196 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
25197 /* VXORPD = VEX.NDS.128.66.0F.WIG 57 /r */
25198 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25199 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25200 uses_vvvv
, vbi
, pfx
, delta
, "vxorpd", Iop_XorV128
);
25201 goto decode_success
;
25203 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
25204 /* VXORPD = VEX.NDS.256.66.0F.WIG 57 /r */
25205 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25206 delta
= dis_AVX256_E_V_to_G(
25207 uses_vvvv
, vbi
, pfx
, delta
, "vxorpd", Iop_XorV256
);
25208 goto decode_success
;
25210 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
25211 /* VXORPS = VEX.NDS.128.0F.WIG 57 /r */
25212 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25213 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25214 uses_vvvv
, vbi
, pfx
, delta
, "vxorps", Iop_XorV128
);
25215 goto decode_success
;
25217 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
25218 /* VXORPS = VEX.NDS.256.0F.WIG 57 /r */
25219 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25220 delta
= dis_AVX256_E_V_to_G(
25221 uses_vvvv
, vbi
, pfx
, delta
, "vxorps", Iop_XorV256
);
25222 goto decode_success
;
25227 /* VADDSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 58 /r */
25228 if (haveF2no66noF3(pfx
)) {
25229 delta
= dis_AVX128_E_V_to_G_lo64(
25230 uses_vvvv
, vbi
, pfx
, delta
, "vaddsd", Iop_Add64F0x2
);
25231 goto decode_success
;
25233 /* VADDSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 58 /r */
25234 if (haveF3no66noF2(pfx
)) {
25235 delta
= dis_AVX128_E_V_to_G_lo32(
25236 uses_vvvv
, vbi
, pfx
, delta
, "vaddss", Iop_Add32F0x4
);
25237 goto decode_success
;
25239 /* VADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 58 /r */
25240 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25241 delta
= dis_AVX128_E_V_to_G(
25242 uses_vvvv
, vbi
, pfx
, delta
, "vaddps", Iop_Add32Fx4
);
25243 goto decode_success
;
25245 /* VADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 58 /r */
25246 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25247 delta
= dis_AVX256_E_V_to_G(
25248 uses_vvvv
, vbi
, pfx
, delta
, "vaddps", Iop_Add32Fx8
);
25249 goto decode_success
;
25251 /* VADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 58 /r */
25252 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25253 delta
= dis_AVX128_E_V_to_G(
25254 uses_vvvv
, vbi
, pfx
, delta
, "vaddpd", Iop_Add64Fx2
);
25255 goto decode_success
;
25257 /* VADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 58 /r */
25258 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25259 delta
= dis_AVX256_E_V_to_G(
25260 uses_vvvv
, vbi
, pfx
, delta
, "vaddpd", Iop_Add64Fx4
);
25261 goto decode_success
;
25266 /* VMULSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 59 /r */
25267 if (haveF2no66noF3(pfx
)) {
25268 delta
= dis_AVX128_E_V_to_G_lo64(
25269 uses_vvvv
, vbi
, pfx
, delta
, "vmulsd", Iop_Mul64F0x2
);
25270 goto decode_success
;
25272 /* VMULSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 59 /r */
25273 if (haveF3no66noF2(pfx
)) {
25274 delta
= dis_AVX128_E_V_to_G_lo32(
25275 uses_vvvv
, vbi
, pfx
, delta
, "vmulss", Iop_Mul32F0x4
);
25276 goto decode_success
;
25278 /* VMULPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 59 /r */
25279 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25280 delta
= dis_AVX128_E_V_to_G(
25281 uses_vvvv
, vbi
, pfx
, delta
, "vmulps", Iop_Mul32Fx4
);
25282 goto decode_success
;
25284 /* VMULPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 59 /r */
25285 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25286 delta
= dis_AVX256_E_V_to_G(
25287 uses_vvvv
, vbi
, pfx
, delta
, "vmulps", Iop_Mul32Fx8
);
25288 goto decode_success
;
25290 /* VMULPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 59 /r */
25291 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25292 delta
= dis_AVX128_E_V_to_G(
25293 uses_vvvv
, vbi
, pfx
, delta
, "vmulpd", Iop_Mul64Fx2
);
25294 goto decode_success
;
25296 /* VMULPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 59 /r */
25297 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25298 delta
= dis_AVX256_E_V_to_G(
25299 uses_vvvv
, vbi
, pfx
, delta
, "vmulpd", Iop_Mul64Fx4
);
25300 goto decode_success
;
25305 /* VCVTPS2PD xmm2/m64, xmm1 = VEX.128.0F.WIG 5A /r */
25306 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25307 delta
= dis_CVTPS2PD_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
25308 goto decode_success
;
25310 /* VCVTPS2PD xmm2/m128, ymm1 = VEX.256.0F.WIG 5A /r */
25311 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25312 delta
= dis_CVTPS2PD_256( vbi
, pfx
, delta
);
25313 goto decode_success
;
25315 /* VCVTPD2PS xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5A /r */
25316 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25317 delta
= dis_CVTPD2PS_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
25318 goto decode_success
;
25320 /* VCVTPD2PS ymm2/m256, xmm1 = VEX.256.66.0F.WIG 5A /r */
25321 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25322 delta
= dis_CVTPD2PS_256( vbi
, pfx
, delta
);
25323 goto decode_success
;
25325 /* VCVTSD2SS xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5A /r */
25326 if (haveF2no66noF3(pfx
)) {
25327 UChar modrm
= getUChar(delta
);
25328 UInt rV
= getVexNvvvv(pfx
);
25329 UInt rD
= gregOfRexRM(pfx
, modrm
);
25330 IRTemp f64lo
= newTemp(Ity_F64
);
25331 IRTemp rmode
= newTemp(Ity_I32
);
25332 assign( rmode
, get_sse_roundingmode() );
25333 if (epartIsReg(modrm
)) {
25334 UInt rS
= eregOfRexRM(pfx
,modrm
);
25335 assign(f64lo
, getXMMRegLane64F(rS
, 0));
25337 DIP("vcvtsd2ss %s,%s,%s\n",
25338 nameXMMReg(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
25340 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25341 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)) );
25343 DIP("vcvtsd2ss %s,%s,%s\n",
25344 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
25346 putXMMRegLane32F( rD
, 0,
25347 binop( Iop_F64toF32
, mkexpr(rmode
),
25349 putXMMRegLane32( rD
, 1, getXMMRegLane32( rV
, 1 ));
25350 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
25351 putYMMRegLane128( rD
, 1, mkV128(0) );
25353 goto decode_success
;
25355 /* VCVTSS2SD xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5A /r */
25356 if (haveF3no66noF2(pfx
)) {
25357 UChar modrm
= getUChar(delta
);
25358 UInt rV
= getVexNvvvv(pfx
);
25359 UInt rD
= gregOfRexRM(pfx
, modrm
);
25360 IRTemp f32lo
= newTemp(Ity_F32
);
25361 if (epartIsReg(modrm
)) {
25362 UInt rS
= eregOfRexRM(pfx
,modrm
);
25363 assign(f32lo
, getXMMRegLane32F(rS
, 0));
25365 DIP("vcvtss2sd %s,%s,%s\n",
25366 nameXMMReg(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
25368 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25369 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)) );
25371 DIP("vcvtss2sd %s,%s,%s\n",
25372 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
25374 putXMMRegLane64F( rD
, 0,
25375 unop( Iop_F32toF64
, mkexpr(f32lo
)) );
25376 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
25377 putYMMRegLane128( rD
, 1, mkV128(0) );
25379 goto decode_success
;
25384 /* VCVTPS2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5B /r */
25385 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25386 delta
= dis_CVTxPS2DQ_128( vbi
, pfx
, delta
,
25387 True
/*isAvx*/, False
/*!r2zero*/ );
25388 goto decode_success
;
25390 /* VCVTPS2DQ ymm2/m256, ymm1 = VEX.256.66.0F.WIG 5B /r */
25391 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25392 delta
= dis_CVTxPS2DQ_256( vbi
, pfx
, delta
,
25393 False
/*!r2zero*/ );
25394 goto decode_success
;
25396 /* VCVTTPS2DQ xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 5B /r */
25397 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*128*/) {
25398 delta
= dis_CVTxPS2DQ_128( vbi
, pfx
, delta
,
25399 True
/*isAvx*/, True
/*r2zero*/ );
25400 goto decode_success
;
25402 /* VCVTTPS2DQ ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 5B /r */
25403 if (haveF3no66noF2(pfx
) && 1==getVexL(pfx
)/*256*/) {
25404 delta
= dis_CVTxPS2DQ_256( vbi
, pfx
, delta
,
25406 goto decode_success
;
25408 /* VCVTDQ2PS xmm2/m128, xmm1 = VEX.128.0F.WIG 5B /r */
25409 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25410 delta
= dis_CVTDQ2PS_128 ( vbi
, pfx
, delta
, True
/*isAvx*/ );
25411 goto decode_success
;
25413 /* VCVTDQ2PS ymm2/m256, ymm1 = VEX.256.0F.WIG 5B /r */
25414 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25415 delta
= dis_CVTDQ2PS_256 ( vbi
, pfx
, delta
);
25416 goto decode_success
;
25421 /* VSUBSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5C /r */
25422 if (haveF2no66noF3(pfx
)) {
25423 delta
= dis_AVX128_E_V_to_G_lo64(
25424 uses_vvvv
, vbi
, pfx
, delta
, "vsubsd", Iop_Sub64F0x2
);
25425 goto decode_success
;
25427 /* VSUBSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5C /r */
25428 if (haveF3no66noF2(pfx
)) {
25429 delta
= dis_AVX128_E_V_to_G_lo32(
25430 uses_vvvv
, vbi
, pfx
, delta
, "vsubss", Iop_Sub32F0x4
);
25431 goto decode_success
;
25433 /* VSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5C /r */
25434 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25435 delta
= dis_AVX128_E_V_to_G(
25436 uses_vvvv
, vbi
, pfx
, delta
, "vsubps", Iop_Sub32Fx4
);
25437 goto decode_success
;
25439 /* VSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5C /r */
25440 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25441 delta
= dis_AVX256_E_V_to_G(
25442 uses_vvvv
, vbi
, pfx
, delta
, "vsubps", Iop_Sub32Fx8
);
25443 goto decode_success
;
25445 /* VSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5C /r */
25446 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25447 delta
= dis_AVX128_E_V_to_G(
25448 uses_vvvv
, vbi
, pfx
, delta
, "vsubpd", Iop_Sub64Fx2
);
25449 goto decode_success
;
25451 /* VSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5C /r */
25452 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25453 delta
= dis_AVX256_E_V_to_G(
25454 uses_vvvv
, vbi
, pfx
, delta
, "vsubpd", Iop_Sub64Fx4
);
25455 goto decode_success
;
25460 /* VMINSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5D /r */
25461 if (haveF2no66noF3(pfx
)) {
25462 delta
= dis_AVX128_E_V_to_G_lo64(
25463 uses_vvvv
, vbi
, pfx
, delta
, "vminsd", Iop_Min64F0x2
);
25464 goto decode_success
;
25466 /* VMINSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5D /r */
25467 if (haveF3no66noF2(pfx
)) {
25468 delta
= dis_AVX128_E_V_to_G_lo32(
25469 uses_vvvv
, vbi
, pfx
, delta
, "vminss", Iop_Min32F0x4
);
25470 goto decode_success
;
25472 /* VMINPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5D /r */
25473 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25474 delta
= dis_AVX128_E_V_to_G(
25475 uses_vvvv
, vbi
, pfx
, delta
, "vminps", Iop_Min32Fx4
);
25476 goto decode_success
;
25478 /* VMINPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5D /r */
25479 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25480 delta
= dis_AVX256_E_V_to_G(
25481 uses_vvvv
, vbi
, pfx
, delta
, "vminps", Iop_Min32Fx8
);
25482 goto decode_success
;
25484 /* VMINPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5D /r */
25485 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25486 delta
= dis_AVX128_E_V_to_G(
25487 uses_vvvv
, vbi
, pfx
, delta
, "vminpd", Iop_Min64Fx2
);
25488 goto decode_success
;
25490 /* VMINPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5D /r */
25491 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25492 delta
= dis_AVX256_E_V_to_G(
25493 uses_vvvv
, vbi
, pfx
, delta
, "vminpd", Iop_Min64Fx4
);
25494 goto decode_success
;
25499 /* VDIVSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5E /r */
25500 if (haveF2no66noF3(pfx
)) {
25501 delta
= dis_AVX128_E_V_to_G_lo64(
25502 uses_vvvv
, vbi
, pfx
, delta
, "vdivsd", Iop_Div64F0x2
);
25503 goto decode_success
;
25505 /* VDIVSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5E /r */
25506 if (haveF3no66noF2(pfx
)) {
25507 delta
= dis_AVX128_E_V_to_G_lo32(
25508 uses_vvvv
, vbi
, pfx
, delta
, "vdivss", Iop_Div32F0x4
);
25509 goto decode_success
;
25511 /* VDIVPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5E /r */
25512 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25513 delta
= dis_AVX128_E_V_to_G(
25514 uses_vvvv
, vbi
, pfx
, delta
, "vdivps", Iop_Div32Fx4
);
25515 goto decode_success
;
25517 /* VDIVPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5E /r */
25518 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25519 delta
= dis_AVX256_E_V_to_G(
25520 uses_vvvv
, vbi
, pfx
, delta
, "vdivps", Iop_Div32Fx8
);
25521 goto decode_success
;
25523 /* VDIVPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5E /r */
25524 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25525 delta
= dis_AVX128_E_V_to_G(
25526 uses_vvvv
, vbi
, pfx
, delta
, "vdivpd", Iop_Div64Fx2
);
25527 goto decode_success
;
25529 /* VDIVPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5E /r */
25530 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25531 delta
= dis_AVX256_E_V_to_G(
25532 uses_vvvv
, vbi
, pfx
, delta
, "vdivpd", Iop_Div64Fx4
);
25533 goto decode_success
;
25538 /* VMAXSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5F /r */
25539 if (haveF2no66noF3(pfx
)) {
25540 delta
= dis_AVX128_E_V_to_G_lo64(
25541 uses_vvvv
, vbi
, pfx
, delta
, "vmaxsd", Iop_Max64F0x2
);
25542 goto decode_success
;
25544 /* VMAXSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5F /r */
25545 if (haveF3no66noF2(pfx
)) {
25546 delta
= dis_AVX128_E_V_to_G_lo32(
25547 uses_vvvv
, vbi
, pfx
, delta
, "vmaxss", Iop_Max32F0x4
);
25548 goto decode_success
;
25550 /* VMAXPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5F /r */
25551 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25552 delta
= dis_AVX128_E_V_to_G(
25553 uses_vvvv
, vbi
, pfx
, delta
, "vmaxps", Iop_Max32Fx4
);
25554 goto decode_success
;
25556 /* VMAXPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5F /r */
25557 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25558 delta
= dis_AVX256_E_V_to_G(
25559 uses_vvvv
, vbi
, pfx
, delta
, "vmaxps", Iop_Max32Fx8
);
25560 goto decode_success
;
25562 /* VMAXPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5F /r */
25563 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25564 delta
= dis_AVX128_E_V_to_G(
25565 uses_vvvv
, vbi
, pfx
, delta
, "vmaxpd", Iop_Max64Fx2
);
25566 goto decode_success
;
25568 /* VMAXPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5F /r */
25569 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25570 delta
= dis_AVX256_E_V_to_G(
25571 uses_vvvv
, vbi
, pfx
, delta
, "vmaxpd", Iop_Max64Fx4
);
25572 goto decode_success
;
25577 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
25578 /* VPUNPCKLBW = VEX.NDS.128.66.0F.WIG 60 /r */
25579 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25580 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25581 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklbw",
25582 Iop_InterleaveLO8x16
, NULL
,
25583 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25584 goto decode_success
;
25586 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
25587 /* VPUNPCKLBW = VEX.NDS.256.66.0F.WIG 60 /r */
25588 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25589 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25590 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklbw",
25591 math_VPUNPCKLBW_YMM
);
25592 goto decode_success
;
25597 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
25598 /* VPUNPCKLWD = VEX.NDS.128.66.0F.WIG 61 /r */
25599 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25600 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25601 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklwd",
25602 Iop_InterleaveLO16x8
, NULL
,
25603 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25604 goto decode_success
;
25606 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
25607 /* VPUNPCKLWD = VEX.NDS.256.66.0F.WIG 61 /r */
25608 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25609 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25610 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklwd",
25611 math_VPUNPCKLWD_YMM
);
25612 goto decode_success
;
25617 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
25618 /* VPUNPCKLDQ = VEX.NDS.128.66.0F.WIG 62 /r */
25619 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25620 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25621 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckldq",
25622 Iop_InterleaveLO32x4
, NULL
,
25623 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25624 goto decode_success
;
25626 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
25627 /* VPUNPCKLDQ = VEX.NDS.256.66.0F.WIG 62 /r */
25628 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25629 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25630 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckldq",
25631 math_VPUNPCKLDQ_YMM
);
25632 goto decode_success
;
25637 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
25638 /* VPACKSSWB = VEX.NDS.128.66.0F.WIG 63 /r */
25639 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25640 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25641 uses_vvvv
, vbi
, pfx
, delta
, "vpacksswb",
25642 Iop_QNarrowBin16Sto8Sx16
, NULL
,
25643 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25644 goto decode_success
;
25646 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
25647 /* VPACKSSWB = VEX.NDS.256.66.0F.WIG 63 /r */
25648 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25649 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25650 uses_vvvv
, vbi
, pfx
, delta
, "vpacksswb",
25651 math_VPACKSSWB_YMM
);
25652 goto decode_success
;
25657 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
25658 /* VPCMPGTB = VEX.NDS.128.66.0F.WIG 64 /r */
25659 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25660 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25661 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtb", Iop_CmpGT8Sx16
);
25662 goto decode_success
;
25664 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
25665 /* VPCMPGTB = VEX.NDS.256.66.0F.WIG 64 /r */
25666 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25667 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25668 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtb", Iop_CmpGT8Sx32
);
25669 goto decode_success
;
25674 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
25675 /* VPCMPGTW = VEX.NDS.128.66.0F.WIG 65 /r */
25676 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25677 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25678 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtw", Iop_CmpGT16Sx8
);
25679 goto decode_success
;
25681 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
25682 /* VPCMPGTW = VEX.NDS.256.66.0F.WIG 65 /r */
25683 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25684 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25685 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtw", Iop_CmpGT16Sx16
);
25686 goto decode_success
;
25691 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
25692 /* VPCMPGTD = VEX.NDS.128.66.0F.WIG 66 /r */
25693 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25694 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25695 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtd", Iop_CmpGT32Sx4
);
25696 goto decode_success
;
25698 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
25699 /* VPCMPGTD = VEX.NDS.256.66.0F.WIG 66 /r */
25700 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25701 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25702 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtd", Iop_CmpGT32Sx8
);
25703 goto decode_success
;
25708 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
25709 /* VPACKUSWB = VEX.NDS.128.66.0F.WIG 67 /r */
25710 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25711 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25712 uses_vvvv
, vbi
, pfx
, delta
, "vpackuswb",
25713 Iop_QNarrowBin16Sto8Ux16
, NULL
,
25714 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25715 goto decode_success
;
25717 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
25718 /* VPACKUSWB = VEX.NDS.256.66.0F.WIG 67 /r */
25719 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25720 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25721 uses_vvvv
, vbi
, pfx
, delta
, "vpackuswb",
25722 math_VPACKUSWB_YMM
);
25723 goto decode_success
;
25728 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
25729 /* VPUNPCKHBW = VEX.NDS.128.0F.WIG 68 /r */
25730 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25731 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25732 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhbw",
25733 Iop_InterleaveHI8x16
, NULL
,
25734 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25735 goto decode_success
;
25737 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
25738 /* VPUNPCKHBW = VEX.NDS.256.0F.WIG 68 /r */
25739 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25740 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25741 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhbw",
25742 math_VPUNPCKHBW_YMM
);
25743 goto decode_success
;
25748 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
25749 /* VPUNPCKHWD = VEX.NDS.128.0F.WIG 69 /r */
25750 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25751 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25752 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhwd",
25753 Iop_InterleaveHI16x8
, NULL
,
25754 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25755 goto decode_success
;
25757 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
25758 /* VPUNPCKHWD = VEX.NDS.256.0F.WIG 69 /r */
25759 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25760 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25761 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhwd",
25762 math_VPUNPCKHWD_YMM
);
25763 goto decode_success
;
25768 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
25769 /* VPUNPCKHDQ = VEX.NDS.128.66.0F.WIG 6A /r */
25770 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25771 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25772 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhdq",
25773 Iop_InterleaveHI32x4
, NULL
,
25774 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25775 goto decode_success
;
25777 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
25778 /* VPUNPCKHDQ = VEX.NDS.256.66.0F.WIG 6A /r */
25779 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25780 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25781 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhdq",
25782 math_VPUNPCKHDQ_YMM
);
25783 goto decode_success
;
25788 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
25789 /* VPACKSSDW = VEX.NDS.128.66.0F.WIG 6B /r */
25790 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25791 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25792 uses_vvvv
, vbi
, pfx
, delta
, "vpackssdw",
25793 Iop_QNarrowBin32Sto16Sx8
, NULL
,
25794 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25795 goto decode_success
;
25797 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
25798 /* VPACKSSDW = VEX.NDS.256.66.0F.WIG 6B /r */
25799 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25800 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25801 uses_vvvv
, vbi
, pfx
, delta
, "vpackssdw",
25802 math_VPACKSSDW_YMM
);
25803 goto decode_success
;
25808 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
25809 /* VPUNPCKLQDQ = VEX.NDS.128.0F.WIG 6C /r */
25810 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25811 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25812 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklqdq",
25813 Iop_InterleaveLO64x2
, NULL
,
25814 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25815 goto decode_success
;
25817 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
25818 /* VPUNPCKLQDQ = VEX.NDS.256.0F.WIG 6C /r */
25819 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25820 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25821 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklqdq",
25822 math_VPUNPCKLQDQ_YMM
);
25823 goto decode_success
;
25828 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
25829 /* VPUNPCKHQDQ = VEX.NDS.128.0F.WIG 6D /r */
25830 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25831 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25832 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhqdq",
25833 Iop_InterleaveHI64x2
, NULL
,
25834 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25835 goto decode_success
;
25837 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
25838 /* VPUNPCKHQDQ = VEX.NDS.256.0F.WIG 6D /r */
25839 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25840 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25841 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhqdq",
25842 math_VPUNPCKHQDQ_YMM
);
25843 goto decode_success
;
25848 /* VMOVD r32/m32, xmm1 = VEX.128.66.0F.W0 6E */
25849 if (have66noF2noF3(pfx
)
25850 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
25851 vassert(sz
== 2); /* even tho we are transferring 4, not 2. */
25852 UChar modrm
= getUChar(delta
);
25853 if (epartIsReg(modrm
)) {
25856 gregOfRexRM(pfx
,modrm
),
25857 unop( Iop_32UtoV128
, getIReg32(eregOfRexRM(pfx
,modrm
)) )
25859 DIP("vmovd %s, %s\n", nameIReg32(eregOfRexRM(pfx
,modrm
)),
25860 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
25862 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25865 gregOfRexRM(pfx
,modrm
),
25866 unop( Iop_32UtoV128
,loadLE(Ity_I32
, mkexpr(addr
)))
25868 DIP("vmovd %s, %s\n", dis_buf
,
25869 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
25871 goto decode_success
;
25873 /* VMOVQ r64/m64, xmm1 = VEX.128.66.0F.W1 6E */
25874 if (have66noF2noF3(pfx
)
25875 && 0==getVexL(pfx
)/*128*/ && 1==getRexW(pfx
)/*W1*/) {
25876 vassert(sz
== 2); /* even tho we are transferring 8, not 2. */
25877 UChar modrm
= getUChar(delta
);
25878 if (epartIsReg(modrm
)) {
25881 gregOfRexRM(pfx
,modrm
),
25882 unop( Iop_64UtoV128
, getIReg64(eregOfRexRM(pfx
,modrm
)) )
25884 DIP("vmovq %s, %s\n", nameIReg64(eregOfRexRM(pfx
,modrm
)),
25885 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
25887 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25890 gregOfRexRM(pfx
,modrm
),
25891 unop( Iop_64UtoV128
,loadLE(Ity_I64
, mkexpr(addr
)))
25893 DIP("vmovq %s, %s\n", dis_buf
,
25894 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
25896 goto decode_success
;
25901 /* VMOVDQA ymm2/m256, ymm1 = VEX.256.66.0F.WIG 6F */
25902 /* VMOVDQU ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 6F */
25903 if ((have66noF2noF3(pfx
) || haveF3no66noF2(pfx
))
25904 && 1==getVexL(pfx
)/*256*/) {
25905 UChar modrm
= getUChar(delta
);
25906 UInt rD
= gregOfRexRM(pfx
, modrm
);
25907 IRTemp tD
= newTemp(Ity_V256
);
25908 Bool isA
= have66noF2noF3(pfx
);
25909 HChar ch
= isA
? 'a' : 'u';
25910 if (epartIsReg(modrm
)) {
25911 UInt rS
= eregOfRexRM(pfx
, modrm
);
25913 assign(tD
, getYMMReg(rS
));
25914 DIP("vmovdq%c %s,%s\n", ch
, nameYMMReg(rS
), nameYMMReg(rD
));
25916 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25919 gen_SEGV_if_not_32_aligned(addr
);
25920 assign(tD
, loadLE(Ity_V256
, mkexpr(addr
)));
25921 DIP("vmovdq%c %s,%s\n", ch
, dis_buf
, nameYMMReg(rD
));
25923 putYMMReg(rD
, mkexpr(tD
));
25924 goto decode_success
;
25926 /* VMOVDQA xmm2/m128, xmm1 = VEX.128.66.0F.WIG 6F */
25927 /* VMOVDQU xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 6F */
25928 if ((have66noF2noF3(pfx
) || haveF3no66noF2(pfx
))
25929 && 0==getVexL(pfx
)/*128*/) {
25930 UChar modrm
= getUChar(delta
);
25931 UInt rD
= gregOfRexRM(pfx
, modrm
);
25932 IRTemp tD
= newTemp(Ity_V128
);
25933 Bool isA
= have66noF2noF3(pfx
);
25934 HChar ch
= isA
? 'a' : 'u';
25935 if (epartIsReg(modrm
)) {
25936 UInt rS
= eregOfRexRM(pfx
, modrm
);
25938 assign(tD
, getXMMReg(rS
));
25939 DIP("vmovdq%c %s,%s\n", ch
, nameXMMReg(rS
), nameXMMReg(rD
));
25941 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25944 gen_SEGV_if_not_16_aligned(addr
);
25945 assign(tD
, loadLE(Ity_V128
, mkexpr(addr
)));
25946 DIP("vmovdq%c %s,%s\n", ch
, dis_buf
, nameXMMReg(rD
));
25948 putYMMRegLoAndZU(rD
, mkexpr(tD
));
25949 goto decode_success
;
25954 /* VPSHUFD imm8, xmm2/m128, xmm1 = VEX.128.66.0F.WIG 70 /r ib */
25955 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25956 delta
= dis_PSHUFD_32x4( vbi
, pfx
, delta
, True
/*writesYmm*/);
25957 goto decode_success
;
25959 /* VPSHUFD imm8, ymm2/m256, ymm1 = VEX.256.66.0F.WIG 70 /r ib */
25960 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25961 delta
= dis_PSHUFD_32x8( vbi
, pfx
, delta
);
25962 goto decode_success
;
25964 /* VPSHUFLW imm8, xmm2/m128, xmm1 = VEX.128.F2.0F.WIG 70 /r ib */
25965 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25966 delta
= dis_PSHUFxW_128( vbi
, pfx
, delta
,
25967 True
/*isAvx*/, False
/*!xIsH*/ );
25968 goto decode_success
;
25970 /* VPSHUFLW imm8, ymm2/m256, ymm1 = VEX.256.F2.0F.WIG 70 /r ib */
25971 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25972 delta
= dis_PSHUFxW_256( vbi
, pfx
, delta
, False
/*!xIsH*/ );
25973 goto decode_success
;
25975 /* VPSHUFHW imm8, xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 70 /r ib */
25976 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*128*/) {
25977 delta
= dis_PSHUFxW_128( vbi
, pfx
, delta
,
25978 True
/*isAvx*/, True
/*xIsH*/ );
25979 goto decode_success
;
25981 /* VPSHUFHW imm8, ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 70 /r ib */
25982 if (haveF3no66noF2(pfx
) && 1==getVexL(pfx
)/*256*/) {
25983 delta
= dis_PSHUFxW_256( vbi
, pfx
, delta
, True
/*xIsH*/ );
25984 goto decode_success
;
25989 /* VPSRLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /2 ib */
25990 /* VPSRAW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /4 ib */
25991 /* VPSLLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /6 ib */
25992 if (have66noF2noF3(pfx
)
25993 && 0==getVexL(pfx
)/*128*/
25994 && epartIsReg(getUChar(delta
))) {
25995 if (gregLO3ofRM(getUChar(delta
)) == 2/*SRL*/) {
25996 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
25997 "vpsrlw", Iop_ShrN16x8
);
25999 goto decode_success
;
26001 if (gregLO3ofRM(getUChar(delta
)) == 4/*SRA*/) {
26002 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26003 "vpsraw", Iop_SarN16x8
);
26005 goto decode_success
;
26007 if (gregLO3ofRM(getUChar(delta
)) == 6/*SLL*/) {
26008 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26009 "vpsllw", Iop_ShlN16x8
);
26011 goto decode_success
;
26013 /* else fall through */
26015 /* VPSRLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /2 ib */
26016 /* VPSRAW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /4 ib */
26017 /* VPSLLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /6 ib */
26018 if (have66noF2noF3(pfx
)
26019 && 1==getVexL(pfx
)/*256*/
26020 && epartIsReg(getUChar(delta
))) {
26021 if (gregLO3ofRM(getUChar(delta
)) == 2/*SRL*/) {
26022 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26023 "vpsrlw", Iop_ShrN16x16
);
26025 goto decode_success
;
26027 if (gregLO3ofRM(getUChar(delta
)) == 4/*SRA*/) {
26028 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26029 "vpsraw", Iop_SarN16x16
);
26031 goto decode_success
;
26033 if (gregLO3ofRM(getUChar(delta
)) == 6/*SLL*/) {
26034 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26035 "vpsllw", Iop_ShlN16x16
);
26037 goto decode_success
;
26039 /* else fall through */
26044 /* VPSRLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /2 ib */
26045 /* VPSRAD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /4 ib */
26046 /* VPSLLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /6 ib */
26047 if (have66noF2noF3(pfx
)
26048 && 0==getVexL(pfx
)/*128*/
26049 && epartIsReg(getUChar(delta
))) {
26050 if (gregLO3ofRM(getUChar(delta
)) == 2/*SRL*/) {
26051 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26052 "vpsrld", Iop_ShrN32x4
);
26054 goto decode_success
;
26056 if (gregLO3ofRM(getUChar(delta
)) == 4/*SRA*/) {
26057 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26058 "vpsrad", Iop_SarN32x4
);
26060 goto decode_success
;
26062 if (gregLO3ofRM(getUChar(delta
)) == 6/*SLL*/) {
26063 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26064 "vpslld", Iop_ShlN32x4
);
26066 goto decode_success
;
26068 /* else fall through */
26070 /* VPSRLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /2 ib */
26071 /* VPSRAD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /4 ib */
26072 /* VPSLLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /6 ib */
26073 if (have66noF2noF3(pfx
)
26074 && 1==getVexL(pfx
)/*256*/
26075 && epartIsReg(getUChar(delta
))) {
26076 if (gregLO3ofRM(getUChar(delta
)) == 2/*SRL*/) {
26077 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26078 "vpsrld", Iop_ShrN32x8
);
26080 goto decode_success
;
26082 if (gregLO3ofRM(getUChar(delta
)) == 4/*SRA*/) {
26083 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26084 "vpsrad", Iop_SarN32x8
);
26086 goto decode_success
;
26088 if (gregLO3ofRM(getUChar(delta
)) == 6/*SLL*/) {
26089 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26090 "vpslld", Iop_ShlN32x8
);
26092 goto decode_success
;
26094 /* else fall through */
26099 /* VPSRLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /3 ib */
26100 /* VPSLLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /7 ib */
26101 /* VPSRLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /2 ib */
26102 /* VPSLLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /6 ib */
26103 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
26104 && epartIsReg(getUChar(delta
))) {
26105 Int rS
= eregOfRexRM(pfx
,getUChar(delta
));
26106 Int rD
= getVexNvvvv(pfx
);
26107 IRTemp vecS
= newTemp(Ity_V128
);
26108 if (gregLO3ofRM(getUChar(delta
)) == 3) {
26109 Int imm
= (Int
)getUChar(delta
+1);
26110 DIP("vpsrldq $%d,%s,%s\n", imm
, nameXMMReg(rS
), nameXMMReg(rD
));
26112 assign( vecS
, getXMMReg(rS
) );
26113 putYMMRegLoAndZU(rD
, mkexpr(math_PSRLDQ( vecS
, imm
)));
26115 goto decode_success
;
26117 if (gregLO3ofRM(getUChar(delta
)) == 7) {
26118 Int imm
= (Int
)getUChar(delta
+1);
26119 DIP("vpslldq $%d,%s,%s\n", imm
, nameXMMReg(rS
), nameXMMReg(rD
));
26121 assign( vecS
, getXMMReg(rS
) );
26122 putYMMRegLoAndZU(rD
, mkexpr(math_PSLLDQ( vecS
, imm
)));
26124 goto decode_success
;
26126 if (gregLO3ofRM(getUChar(delta
)) == 2) {
26127 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26128 "vpsrlq", Iop_ShrN64x2
);
26130 goto decode_success
;
26132 if (gregLO3ofRM(getUChar(delta
)) == 6) {
26133 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26134 "vpsllq", Iop_ShlN64x2
);
26136 goto decode_success
;
26138 /* else fall through */
26140 /* VPSRLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /3 ib */
26141 /* VPSLLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /7 ib */
26142 /* VPSRLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /2 ib */
26143 /* VPSLLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /6 ib */
26144 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
26145 && epartIsReg(getUChar(delta
))) {
26146 Int rS
= eregOfRexRM(pfx
,getUChar(delta
));
26147 Int rD
= getVexNvvvv(pfx
);
26148 if (gregLO3ofRM(getUChar(delta
)) == 3) {
26149 IRTemp vecS0
= newTemp(Ity_V128
);
26150 IRTemp vecS1
= newTemp(Ity_V128
);
26151 Int imm
= (Int
)getUChar(delta
+1);
26152 DIP("vpsrldq $%d,%s,%s\n", imm
, nameYMMReg(rS
), nameYMMReg(rD
));
26154 assign( vecS0
, getYMMRegLane128(rS
, 0));
26155 assign( vecS1
, getYMMRegLane128(rS
, 1));
26156 putYMMRegLane128(rD
, 0, mkexpr(math_PSRLDQ( vecS0
, imm
)));
26157 putYMMRegLane128(rD
, 1, mkexpr(math_PSRLDQ( vecS1
, imm
)));
26159 goto decode_success
;
26161 if (gregLO3ofRM(getUChar(delta
)) == 7) {
26162 IRTemp vecS0
= newTemp(Ity_V128
);
26163 IRTemp vecS1
= newTemp(Ity_V128
);
26164 Int imm
= (Int
)getUChar(delta
+1);
26165 DIP("vpslldq $%d,%s,%s\n", imm
, nameYMMReg(rS
), nameYMMReg(rD
));
26167 assign( vecS0
, getYMMRegLane128(rS
, 0));
26168 assign( vecS1
, getYMMRegLane128(rS
, 1));
26169 putYMMRegLane128(rD
, 0, mkexpr(math_PSLLDQ( vecS0
, imm
)));
26170 putYMMRegLane128(rD
, 1, mkexpr(math_PSLLDQ( vecS1
, imm
)));
26172 goto decode_success
;
26174 if (gregLO3ofRM(getUChar(delta
)) == 2) {
26175 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26176 "vpsrlq", Iop_ShrN64x4
);
26178 goto decode_success
;
26180 if (gregLO3ofRM(getUChar(delta
)) == 6) {
26181 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26182 "vpsllq", Iop_ShlN64x4
);
26184 goto decode_success
;
26186 /* else fall through */
26191 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
26192 /* VPCMPEQB = VEX.NDS.128.66.0F.WIG 74 /r */
26193 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26194 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26195 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqb", Iop_CmpEQ8x16
);
26196 goto decode_success
;
26198 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
26199 /* VPCMPEQB = VEX.NDS.256.66.0F.WIG 74 /r */
26200 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26201 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26202 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqb", Iop_CmpEQ8x32
);
26203 goto decode_success
;
26208 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
26209 /* VPCMPEQW = VEX.NDS.128.66.0F.WIG 75 /r */
26210 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26211 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26212 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqw", Iop_CmpEQ16x8
);
26213 goto decode_success
;
26215 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
26216 /* VPCMPEQW = VEX.NDS.256.66.0F.WIG 75 /r */
26217 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26218 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26219 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqw", Iop_CmpEQ16x16
);
26220 goto decode_success
;
26225 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
26226 /* VPCMPEQD = VEX.NDS.128.66.0F.WIG 76 /r */
26227 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26228 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26229 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqd", Iop_CmpEQ32x4
);
26230 goto decode_success
;
26232 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
26233 /* VPCMPEQD = VEX.NDS.256.66.0F.WIG 76 /r */
26234 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26235 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26236 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqd", Iop_CmpEQ32x8
);
26237 goto decode_success
;
26242 /* VZEROUPPER = VEX.128.0F.WIG 77 */
26243 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26245 IRTemp zero128
= newTemp(Ity_V128
);
26246 assign(zero128
, mkV128(0));
26247 for (i
= 0; i
< 16; i
++) {
26248 putYMMRegLane128(i
, 1, mkexpr(zero128
));
26250 DIP("vzeroupper\n");
26251 goto decode_success
;
26253 /* VZEROALL = VEX.256.0F.WIG 77 */
26254 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26256 IRTemp zero128
= newTemp(Ity_V128
);
26257 assign(zero128
, mkV128(0));
26258 for (i
= 0; i
< 16; i
++) {
26259 putYMMRegLoAndZU(i
, mkexpr(zero128
));
26262 goto decode_success
;
26268 /* VHADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7C /r */
26269 /* VHSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7D /r */
26270 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26271 IRTemp sV
= newTemp(Ity_V128
);
26272 IRTemp dV
= newTemp(Ity_V128
);
26273 Bool isAdd
= opc
== 0x7C;
26274 const HChar
* str
= isAdd
? "add" : "sub";
26275 UChar modrm
= getUChar(delta
);
26276 UInt rG
= gregOfRexRM(pfx
,modrm
);
26277 UInt rV
= getVexNvvvv(pfx
);
26278 if (epartIsReg(modrm
)) {
26279 UInt rE
= eregOfRexRM(pfx
,modrm
);
26280 assign( sV
, getXMMReg(rE
) );
26281 DIP("vh%spd %s,%s,%s\n", str
, nameXMMReg(rE
),
26282 nameXMMReg(rV
), nameXMMReg(rG
));
26285 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26286 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
26287 DIP("vh%spd %s,%s,%s\n", str
, dis_buf
,
26288 nameXMMReg(rV
), nameXMMReg(rG
));
26291 assign( dV
, getXMMReg(rV
) );
26292 putYMMRegLoAndZU( rG
, mkexpr( math_HADDPS_128 ( dV
, sV
, isAdd
) ) );
26294 goto decode_success
;
26296 /* VHADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7C /r */
26297 /* VHSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7D /r */
26298 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26299 IRTemp sV
= newTemp(Ity_V256
);
26300 IRTemp dV
= newTemp(Ity_V256
);
26301 IRTemp s1
, s0
, d1
, d0
;
26302 Bool isAdd
= opc
== 0x7C;
26303 const HChar
* str
= isAdd
? "add" : "sub";
26304 UChar modrm
= getUChar(delta
);
26305 UInt rG
= gregOfRexRM(pfx
,modrm
);
26306 UInt rV
= getVexNvvvv(pfx
);
26307 s1
= s0
= d1
= d0
= IRTemp_INVALID
;
26308 if (epartIsReg(modrm
)) {
26309 UInt rE
= eregOfRexRM(pfx
,modrm
);
26310 assign( sV
, getYMMReg(rE
) );
26311 DIP("vh%spd %s,%s,%s\n", str
, nameYMMReg(rE
),
26312 nameYMMReg(rV
), nameYMMReg(rG
));
26315 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26316 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
26317 DIP("vh%spd %s,%s,%s\n", str
, dis_buf
,
26318 nameYMMReg(rV
), nameYMMReg(rG
));
26321 assign( dV
, getYMMReg(rV
) );
26322 breakupV256toV128s( dV
, &d1
, &d0
);
26323 breakupV256toV128s( sV
, &s1
, &s0
);
26324 putYMMReg( rG
, binop(Iop_V128HLtoV256
,
26325 mkexpr( math_HADDPS_128 ( d1
, s1
, isAdd
) ),
26326 mkexpr( math_HADDPS_128 ( d0
, s0
, isAdd
) ) ) );
26328 goto decode_success
;
26330 /* VHADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7C /r */
26331 /* VHSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7D /r */
26332 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26333 IRTemp sV
= newTemp(Ity_V128
);
26334 IRTemp dV
= newTemp(Ity_V128
);
26335 Bool isAdd
= opc
== 0x7C;
26336 const HChar
* str
= isAdd
? "add" : "sub";
26337 UChar modrm
= getUChar(delta
);
26338 UInt rG
= gregOfRexRM(pfx
,modrm
);
26339 UInt rV
= getVexNvvvv(pfx
);
26340 if (epartIsReg(modrm
)) {
26341 UInt rE
= eregOfRexRM(pfx
,modrm
);
26342 assign( sV
, getXMMReg(rE
) );
26343 DIP("vh%spd %s,%s,%s\n", str
, nameXMMReg(rE
),
26344 nameXMMReg(rV
), nameXMMReg(rG
));
26347 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26348 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
26349 DIP("vh%spd %s,%s,%s\n", str
, dis_buf
,
26350 nameXMMReg(rV
), nameXMMReg(rG
));
26353 assign( dV
, getXMMReg(rV
) );
26354 putYMMRegLoAndZU( rG
, mkexpr( math_HADDPD_128 ( dV
, sV
, isAdd
) ) );
26356 goto decode_success
;
26358 /* VHADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7C /r */
26359 /* VHSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7D /r */
26360 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26361 IRTemp sV
= newTemp(Ity_V256
);
26362 IRTemp dV
= newTemp(Ity_V256
);
26363 IRTemp s1
, s0
, d1
, d0
;
26364 Bool isAdd
= opc
== 0x7C;
26365 const HChar
* str
= isAdd
? "add" : "sub";
26366 UChar modrm
= getUChar(delta
);
26367 UInt rG
= gregOfRexRM(pfx
,modrm
);
26368 UInt rV
= getVexNvvvv(pfx
);
26369 s1
= s0
= d1
= d0
= IRTemp_INVALID
;
26370 if (epartIsReg(modrm
)) {
26371 UInt rE
= eregOfRexRM(pfx
,modrm
);
26372 assign( sV
, getYMMReg(rE
) );
26373 DIP("vh%spd %s,%s,%s\n", str
, nameYMMReg(rE
),
26374 nameYMMReg(rV
), nameYMMReg(rG
));
26377 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26378 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
26379 DIP("vh%spd %s,%s,%s\n", str
, dis_buf
,
26380 nameYMMReg(rV
), nameYMMReg(rG
));
26383 assign( dV
, getYMMReg(rV
) );
26384 breakupV256toV128s( dV
, &d1
, &d0
);
26385 breakupV256toV128s( sV
, &s1
, &s0
);
26386 putYMMReg( rG
, binop(Iop_V128HLtoV256
,
26387 mkexpr( math_HADDPD_128 ( d1
, s1
, isAdd
) ),
26388 mkexpr( math_HADDPD_128 ( d0
, s0
, isAdd
) ) ) );
26390 goto decode_success
;
26395 /* Note the Intel docs don't make sense for this. I think they
26396 are wrong. They seem to imply it is a store when in fact I
26397 think it is a load. Also it's unclear whether this is W0, W1
26399 /* VMOVQ xmm2/m64, xmm1 = VEX.128.F3.0F.W0 7E /r */
26400 if (haveF3no66noF2(pfx
)
26401 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
26402 vassert(sz
== 4); /* even tho we are transferring 8, not 4. */
26403 UChar modrm
= getUChar(delta
);
26404 UInt rG
= gregOfRexRM(pfx
,modrm
);
26405 if (epartIsReg(modrm
)) {
26406 UInt rE
= eregOfRexRM(pfx
,modrm
);
26407 putXMMRegLane64( rG
, 0, getXMMRegLane64( rE
, 0 ));
26408 DIP("vmovq %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
26411 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26412 putXMMRegLane64( rG
, 0, loadLE(Ity_I64
, mkexpr(addr
)) );
26413 DIP("vmovq %s,%s\n", dis_buf
, nameXMMReg(rG
));
26416 /* zero bits 255:64 */
26417 putXMMRegLane64( rG
, 1, mkU64(0) );
26418 putYMMRegLane128( rG
, 1, mkV128(0) );
26419 goto decode_success
;
26421 /* VMOVQ xmm1, r64 = VEX.128.66.0F.W1 7E /r (reg case only) */
26422 /* Moves from G to E, so is a store-form insn */
26423 /* Intel docs list this in the VMOVD entry for some reason. */
26424 if (have66noF2noF3(pfx
)
26425 && 0==getVexL(pfx
)/*128*/ && 1==getRexW(pfx
)/*W1*/) {
26426 UChar modrm
= getUChar(delta
);
26427 UInt rG
= gregOfRexRM(pfx
,modrm
);
26428 if (epartIsReg(modrm
)) {
26429 UInt rE
= eregOfRexRM(pfx
,modrm
);
26430 DIP("vmovq %s,%s\n", nameXMMReg(rG
), nameIReg64(rE
));
26431 putIReg64(rE
, getXMMRegLane64(rG
, 0));
26434 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26435 storeLE( mkexpr(addr
), getXMMRegLane64(rG
, 0) );
26436 DIP("vmovq %s,%s\n", dis_buf
, nameXMMReg(rG
));
26439 goto decode_success
;
26441 /* VMOVD xmm1, m32/r32 = VEX.128.66.0F.W0 7E /r (reg case only) */
26442 /* Moves from G to E, so is a store-form insn */
26443 if (have66noF2noF3(pfx
)
26444 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
26445 UChar modrm
= getUChar(delta
);
26446 UInt rG
= gregOfRexRM(pfx
,modrm
);
26447 if (epartIsReg(modrm
)) {
26448 UInt rE
= eregOfRexRM(pfx
,modrm
);
26449 DIP("vmovd %s,%s\n", nameXMMReg(rG
), nameIReg32(rE
));
26450 putIReg32(rE
, getXMMRegLane32(rG
, 0));
26453 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26454 storeLE( mkexpr(addr
), getXMMRegLane32(rG
, 0) );
26455 DIP("vmovd %s,%s\n", dis_buf
, nameXMMReg(rG
));
26458 goto decode_success
;
26463 /* VMOVDQA ymm1, ymm2/m256 = VEX.256.66.0F.WIG 7F */
26464 /* VMOVDQU ymm1, ymm2/m256 = VEX.256.F3.0F.WIG 7F */
26465 if ((have66noF2noF3(pfx
) || haveF3no66noF2(pfx
))
26466 && 1==getVexL(pfx
)/*256*/) {
26467 UChar modrm
= getUChar(delta
);
26468 UInt rS
= gregOfRexRM(pfx
, modrm
);
26469 IRTemp tS
= newTemp(Ity_V256
);
26470 Bool isA
= have66noF2noF3(pfx
);
26471 HChar ch
= isA
? 'a' : 'u';
26472 assign(tS
, getYMMReg(rS
));
26473 if (epartIsReg(modrm
)) {
26474 UInt rD
= eregOfRexRM(pfx
, modrm
);
26476 putYMMReg(rD
, mkexpr(tS
));
26477 DIP("vmovdq%c %s,%s\n", ch
, nameYMMReg(rS
), nameYMMReg(rD
));
26479 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26482 gen_SEGV_if_not_32_aligned(addr
);
26483 storeLE(mkexpr(addr
), mkexpr(tS
));
26484 DIP("vmovdq%c %s,%s\n", ch
, nameYMMReg(rS
), dis_buf
);
26486 goto decode_success
;
26488 /* VMOVDQA xmm1, xmm2/m128 = VEX.128.66.0F.WIG 7F */
26489 /* VMOVDQU xmm1, xmm2/m128 = VEX.128.F3.0F.WIG 7F */
26490 if ((have66noF2noF3(pfx
) || haveF3no66noF2(pfx
))
26491 && 0==getVexL(pfx
)/*128*/) {
26492 UChar modrm
= getUChar(delta
);
26493 UInt rS
= gregOfRexRM(pfx
, modrm
);
26494 IRTemp tS
= newTemp(Ity_V128
);
26495 Bool isA
= have66noF2noF3(pfx
);
26496 HChar ch
= isA
? 'a' : 'u';
26497 assign(tS
, getXMMReg(rS
));
26498 if (epartIsReg(modrm
)) {
26499 UInt rD
= eregOfRexRM(pfx
, modrm
);
26501 putYMMRegLoAndZU(rD
, mkexpr(tS
));
26502 DIP("vmovdq%c %s,%s\n", ch
, nameXMMReg(rS
), nameXMMReg(rD
));
26504 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26507 gen_SEGV_if_not_16_aligned(addr
);
26508 storeLE(mkexpr(addr
), mkexpr(tS
));
26509 DIP("vmovdq%c %s,%s\n", ch
, nameXMMReg(rS
), dis_buf
);
26511 goto decode_success
;
26516 /* VSTMXCSR m32 = VEX.LZ.0F.WIG AE /3 */
26517 if (haveNo66noF2noF3(pfx
)
26518 && 0==getVexL(pfx
)/*LZ*/
26519 && 0==getRexW(pfx
) /* be paranoid -- Intel docs don't require this */
26520 && !epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 3
26522 delta
= dis_STMXCSR(vbi
, pfx
, delta
, True
/*isAvx*/);
26523 goto decode_success
;
26525 /* VLDMXCSR m32 = VEX.LZ.0F.WIG AE /2 */
26526 if (haveNo66noF2noF3(pfx
)
26527 && 0==getVexL(pfx
)/*LZ*/
26528 && 0==getRexW(pfx
) /* be paranoid -- Intel docs don't require this */
26529 && !epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 2
26531 delta
= dis_LDMXCSR(vbi
, pfx
, delta
, True
/*isAvx*/);
26532 goto decode_success
;
26537 /* VCMPSD xmm3/m64(E=argL), xmm2(V=argR), xmm1(G) */
26538 /* = VEX.NDS.LIG.F2.0F.WIG C2 /r ib */
26539 if (haveF2no66noF3(pfx
)) {
26540 Long delta0
= delta
;
26541 delta
= dis_AVX128_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26542 "vcmpsd", False
/*!all_lanes*/,
26544 if (delta
> delta0
) goto decode_success
;
26545 /* else fall through -- decoding has failed */
26547 /* VCMPSS xmm3/m32(E=argL), xmm2(V=argR), xmm1(G) */
26548 /* = VEX.NDS.LIG.F3.0F.WIG C2 /r ib */
26549 if (haveF3no66noF2(pfx
)) {
26550 Long delta0
= delta
;
26551 delta
= dis_AVX128_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26552 "vcmpss", False
/*!all_lanes*/,
26554 if (delta
> delta0
) goto decode_success
;
26555 /* else fall through -- decoding has failed */
26557 /* VCMPPD xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
26558 /* = VEX.NDS.128.66.0F.WIG C2 /r ib */
26559 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26560 Long delta0
= delta
;
26561 delta
= dis_AVX128_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26562 "vcmppd", True
/*all_lanes*/,
26564 if (delta
> delta0
) goto decode_success
;
26565 /* else fall through -- decoding has failed */
26567 /* VCMPPD ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
26568 /* = VEX.NDS.256.66.0F.WIG C2 /r ib */
26569 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26570 Long delta0
= delta
;
26571 delta
= dis_AVX256_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26572 "vcmppd", 8/*sz*/);
26573 if (delta
> delta0
) goto decode_success
;
26574 /* else fall through -- decoding has failed */
26576 /* VCMPPS xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
26577 /* = VEX.NDS.128.0F.WIG C2 /r ib */
26578 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26579 Long delta0
= delta
;
26580 delta
= dis_AVX128_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26581 "vcmpps", True
/*all_lanes*/,
26583 if (delta
> delta0
) goto decode_success
;
26584 /* else fall through -- decoding has failed */
26586 /* VCMPPS ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
26587 /* = VEX.NDS.256.0F.WIG C2 /r ib */
26588 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26589 Long delta0
= delta
;
26590 delta
= dis_AVX256_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26591 "vcmpps", 4/*sz*/);
26592 if (delta
> delta0
) goto decode_success
;
26593 /* else fall through -- decoding has failed */
26598 /* VPINSRW r32/m16, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG C4 /r ib */
26599 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26600 UChar modrm
= getUChar(delta
);
26601 UInt rG
= gregOfRexRM(pfx
, modrm
);
26602 UInt rV
= getVexNvvvv(pfx
);
26604 IRTemp new16
= newTemp(Ity_I16
);
26606 if ( epartIsReg( modrm
) ) {
26607 imm8
= (Int
)(getUChar(delta
+1) & 7);
26608 assign( new16
, unop(Iop_32to16
,
26609 getIReg32(eregOfRexRM(pfx
,modrm
))) );
26611 DIP( "vpinsrw $%d,%s,%s\n", imm8
,
26612 nameIReg32( eregOfRexRM(pfx
, modrm
) ), nameXMMReg(rG
) );
26614 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
26615 imm8
= (Int
)(getUChar(delta
+alen
) & 7);
26616 assign( new16
, loadLE( Ity_I16
, mkexpr(addr
) ));
26618 DIP( "vpinsrw $%d,%s,%s\n",
26619 imm8
, dis_buf
, nameXMMReg(rG
) );
26622 IRTemp src_vec
= newTemp(Ity_V128
);
26623 assign(src_vec
, getXMMReg( rV
));
26624 IRTemp res_vec
= math_PINSRW_128( src_vec
, new16
, imm8
);
26625 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
26627 goto decode_success
;
26632 /* VPEXTRW imm8, xmm1, reg32 = VEX.128.66.0F.W0 C5 /r ib */
26633 if (have66noF2noF3(pfx
)
26634 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
26635 Long delta0
= delta
;
26636 delta
= dis_PEXTRW_128_EregOnly_toG( vbi
, pfx
, delta
,
26638 if (delta
> delta0
) goto decode_success
;
26639 /* else fall through -- decoding has failed */
26644 /* VSHUFPS imm8, xmm3/m128, xmm2, xmm1, xmm2 */
26645 /* = VEX.NDS.128.0F.WIG C6 /r ib */
26646 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26648 IRTemp eV
= newTemp(Ity_V128
);
26649 IRTemp vV
= newTemp(Ity_V128
);
26650 UInt modrm
= getUChar(delta
);
26651 UInt rG
= gregOfRexRM(pfx
,modrm
);
26652 UInt rV
= getVexNvvvv(pfx
);
26653 assign( vV
, getXMMReg(rV
) );
26654 if (epartIsReg(modrm
)) {
26655 UInt rE
= eregOfRexRM(pfx
,modrm
);
26656 assign( eV
, getXMMReg(rE
) );
26657 imm8
= (Int
)getUChar(delta
+1);
26659 DIP("vshufps $%d,%s,%s,%s\n",
26660 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
26662 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
26663 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
26664 imm8
= (Int
)getUChar(delta
+alen
);
26666 DIP("vshufps $%d,%s,%s,%s\n",
26667 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
26669 IRTemp res
= math_SHUFPS_128( eV
, vV
, imm8
);
26670 putYMMRegLoAndZU( rG
, mkexpr(res
) );
26672 goto decode_success
;
26674 /* VSHUFPS imm8, ymm3/m256, ymm2, ymm1, ymm2 */
26675 /* = VEX.NDS.256.0F.WIG C6 /r ib */
26676 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26678 IRTemp eV
= newTemp(Ity_V256
);
26679 IRTemp vV
= newTemp(Ity_V256
);
26680 UInt modrm
= getUChar(delta
);
26681 UInt rG
= gregOfRexRM(pfx
,modrm
);
26682 UInt rV
= getVexNvvvv(pfx
);
26683 assign( vV
, getYMMReg(rV
) );
26684 if (epartIsReg(modrm
)) {
26685 UInt rE
= eregOfRexRM(pfx
,modrm
);
26686 assign( eV
, getYMMReg(rE
) );
26687 imm8
= (Int
)getUChar(delta
+1);
26689 DIP("vshufps $%d,%s,%s,%s\n",
26690 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
26692 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
26693 assign( eV
, loadLE(Ity_V256
, mkexpr(addr
)) );
26694 imm8
= (Int
)getUChar(delta
+alen
);
26696 DIP("vshufps $%d,%s,%s,%s\n",
26697 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
26699 IRTemp res
= math_SHUFPS_256( eV
, vV
, imm8
);
26700 putYMMReg( rG
, mkexpr(res
) );
26702 goto decode_success
;
26704 /* VSHUFPD imm8, xmm3/m128, xmm2, xmm1, xmm2 */
26705 /* = VEX.NDS.128.66.0F.WIG C6 /r ib */
26706 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26708 IRTemp eV
= newTemp(Ity_V128
);
26709 IRTemp vV
= newTemp(Ity_V128
);
26710 UInt modrm
= getUChar(delta
);
26711 UInt rG
= gregOfRexRM(pfx
,modrm
);
26712 UInt rV
= getVexNvvvv(pfx
);
26713 assign( vV
, getXMMReg(rV
) );
26714 if (epartIsReg(modrm
)) {
26715 UInt rE
= eregOfRexRM(pfx
,modrm
);
26716 assign( eV
, getXMMReg(rE
) );
26717 imm8
= (Int
)getUChar(delta
+1);
26719 DIP("vshufpd $%d,%s,%s,%s\n",
26720 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
26722 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
26723 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
26724 imm8
= (Int
)getUChar(delta
+alen
);
26726 DIP("vshufpd $%d,%s,%s,%s\n",
26727 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
26729 IRTemp res
= math_SHUFPD_128( eV
, vV
, imm8
);
26730 putYMMRegLoAndZU( rG
, mkexpr(res
) );
26732 goto decode_success
;
26734 /* VSHUFPD imm8, ymm3/m256, ymm2, ymm1, ymm2 */
26735 /* = VEX.NDS.256.66.0F.WIG C6 /r ib */
26736 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26738 IRTemp eV
= newTemp(Ity_V256
);
26739 IRTemp vV
= newTemp(Ity_V256
);
26740 UInt modrm
= getUChar(delta
);
26741 UInt rG
= gregOfRexRM(pfx
,modrm
);
26742 UInt rV
= getVexNvvvv(pfx
);
26743 assign( vV
, getYMMReg(rV
) );
26744 if (epartIsReg(modrm
)) {
26745 UInt rE
= eregOfRexRM(pfx
,modrm
);
26746 assign( eV
, getYMMReg(rE
) );
26747 imm8
= (Int
)getUChar(delta
+1);
26749 DIP("vshufpd $%d,%s,%s,%s\n",
26750 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
26752 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
26753 assign( eV
, loadLE(Ity_V256
, mkexpr(addr
)) );
26754 imm8
= (Int
)getUChar(delta
+alen
);
26756 DIP("vshufpd $%d,%s,%s,%s\n",
26757 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
26759 IRTemp res
= math_SHUFPD_256( eV
, vV
, imm8
);
26760 putYMMReg( rG
, mkexpr(res
) );
26762 goto decode_success
;
26767 /* VADDSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D0 /r */
26768 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26769 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26770 uses_vvvv
, vbi
, pfx
, delta
,
26771 "vaddsubpd", math_ADDSUBPD_128
);
26772 goto decode_success
;
26774 /* VADDSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D0 /r */
26775 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26776 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26777 uses_vvvv
, vbi
, pfx
, delta
,
26778 "vaddsubpd", math_ADDSUBPD_256
);
26779 goto decode_success
;
26781 /* VADDSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG D0 /r */
26782 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26783 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26784 uses_vvvv
, vbi
, pfx
, delta
,
26785 "vaddsubps", math_ADDSUBPS_128
);
26786 goto decode_success
;
26788 /* VADDSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG D0 /r */
26789 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26790 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26791 uses_vvvv
, vbi
, pfx
, delta
,
26792 "vaddsubps", math_ADDSUBPS_256
);
26793 goto decode_success
;
26798 /* VPSRLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D1 /r */
26799 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26800 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
26801 "vpsrlw", Iop_ShrN16x8
);
26803 goto decode_success
;
26806 /* VPSRLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D1 /r */
26807 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26808 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
26809 "vpsrlw", Iop_ShrN16x16
);
26811 goto decode_success
;
26817 /* VPSRLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D2 /r */
26818 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26819 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
26820 "vpsrld", Iop_ShrN32x4
);
26822 goto decode_success
;
26824 /* VPSRLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D2 /r */
26825 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26826 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
26827 "vpsrld", Iop_ShrN32x8
);
26829 goto decode_success
;
26834 /* VPSRLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D3 /r */
26835 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26836 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
26837 "vpsrlq", Iop_ShrN64x2
);
26839 goto decode_success
;
26841 /* VPSRLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D3 /r */
26842 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26843 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
26844 "vpsrlq", Iop_ShrN64x4
);
26846 goto decode_success
;
26851 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
26852 /* VPADDQ = VEX.NDS.128.66.0F.WIG D4 /r */
26853 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26854 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26855 uses_vvvv
, vbi
, pfx
, delta
, "vpaddq", Iop_Add64x2
);
26856 goto decode_success
;
26858 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
26859 /* VPADDQ = VEX.NDS.256.66.0F.WIG D4 /r */
26860 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26861 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26862 uses_vvvv
, vbi
, pfx
, delta
, "vpaddq", Iop_Add64x4
);
26863 goto decode_success
;
26868 /* VPMULLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D5 /r */
26869 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26870 delta
= dis_AVX128_E_V_to_G(
26871 uses_vvvv
, vbi
, pfx
, delta
, "vpmullw", Iop_Mul16x8
);
26872 goto decode_success
;
26874 /* VPMULLW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D5 /r */
26875 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26876 delta
= dis_AVX256_E_V_to_G(
26877 uses_vvvv
, vbi
, pfx
, delta
, "vpmullw", Iop_Mul16x16
);
26878 goto decode_success
;
26883 /* I can't even find any Intel docs for this one. */
26884 /* Basically: 66 0F D6 = MOVQ -- move 64 bits from G (lo half
26885 xmm) to E (mem or lo half xmm). Looks like L==0(128), W==0
26887 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
26888 && 0==getRexW(pfx
)/*this might be redundant, dunno*/) {
26889 UChar modrm
= getUChar(delta
);
26890 UInt rG
= gregOfRexRM(pfx
,modrm
);
26891 if (epartIsReg(modrm
)) {
26892 /* fall through, awaiting test case */
26893 /* dst: lo half copied, hi half zeroed */
26895 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26896 storeLE( mkexpr(addr
), getXMMRegLane64( rG
, 0 ));
26897 DIP("vmovq %s,%s\n", nameXMMReg(rG
), dis_buf
);
26899 goto decode_success
;
26905 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB xmm1, r32 */
26906 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26907 delta
= dis_PMOVMSKB_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
26908 goto decode_success
;
26910 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB ymm1, r32 */
26911 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26912 delta
= dis_PMOVMSKB_256( vbi
, pfx
, delta
);
26913 goto decode_success
;
26918 /* VPSUBUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D8 /r */
26919 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26920 delta
= dis_AVX128_E_V_to_G(
26921 uses_vvvv
, vbi
, pfx
, delta
, "vpsubusb", Iop_QSub8Ux16
);
26922 goto decode_success
;
26924 /* VPSUBUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D8 /r */
26925 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26926 delta
= dis_AVX256_E_V_to_G(
26927 uses_vvvv
, vbi
, pfx
, delta
, "vpsubusb", Iop_QSub8Ux32
);
26928 goto decode_success
;
26933 /* VPSUBUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D9 /r */
26934 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26935 delta
= dis_AVX128_E_V_to_G(
26936 uses_vvvv
, vbi
, pfx
, delta
, "vpsubusw", Iop_QSub16Ux8
);
26937 goto decode_success
;
26939 /* VPSUBUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D9 /r */
26940 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26941 delta
= dis_AVX256_E_V_to_G(
26942 uses_vvvv
, vbi
, pfx
, delta
, "vpsubusw", Iop_QSub16Ux16
);
26943 goto decode_success
;
26948 /* VPMINUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DA /r */
26949 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26950 delta
= dis_AVX128_E_V_to_G(
26951 uses_vvvv
, vbi
, pfx
, delta
, "vpminub", Iop_Min8Ux16
);
26952 goto decode_success
;
26954 /* VPMINUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DA /r */
26955 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26956 delta
= dis_AVX256_E_V_to_G(
26957 uses_vvvv
, vbi
, pfx
, delta
, "vpminub", Iop_Min8Ux32
);
26958 goto decode_success
;
26963 /* VPAND r/m, rV, r ::: r = rV & r/m */
26964 /* VEX.NDS.128.66.0F.WIG DB /r = VPAND xmm3/m128, xmm2, xmm1 */
26965 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26966 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26967 uses_vvvv
, vbi
, pfx
, delta
, "vpand", Iop_AndV128
);
26968 goto decode_success
;
26970 /* VPAND r/m, rV, r ::: r = rV & r/m */
26971 /* VEX.NDS.256.66.0F.WIG DB /r = VPAND ymm3/m256, ymm2, ymm1 */
26972 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26973 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26974 uses_vvvv
, vbi
, pfx
, delta
, "vpand", Iop_AndV256
);
26975 goto decode_success
;
26980 /* VPADDUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DC /r */
26981 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26982 delta
= dis_AVX128_E_V_to_G(
26983 uses_vvvv
, vbi
, pfx
, delta
, "vpaddusb", Iop_QAdd8Ux16
);
26984 goto decode_success
;
26986 /* VPADDUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DC /r */
26987 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26988 delta
= dis_AVX256_E_V_to_G(
26989 uses_vvvv
, vbi
, pfx
, delta
, "vpaddusb", Iop_QAdd8Ux32
);
26990 goto decode_success
;
26995 /* VPADDUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DD /r */
26996 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26997 delta
= dis_AVX128_E_V_to_G(
26998 uses_vvvv
, vbi
, pfx
, delta
, "vpaddusw", Iop_QAdd16Ux8
);
26999 goto decode_success
;
27001 /* VPADDUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DD /r */
27002 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27003 delta
= dis_AVX256_E_V_to_G(
27004 uses_vvvv
, vbi
, pfx
, delta
, "vpaddusw", Iop_QAdd16Ux16
);
27005 goto decode_success
;
27010 /* VPMAXUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DE /r */
27011 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27012 delta
= dis_AVX128_E_V_to_G(
27013 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxub", Iop_Max8Ux16
);
27014 goto decode_success
;
27016 /* VPMAXUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DE /r */
27017 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27018 delta
= dis_AVX256_E_V_to_G(
27019 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxub", Iop_Max8Ux32
);
27020 goto decode_success
;
27025 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
27026 /* VEX.NDS.128.66.0F.WIG DF /r = VPANDN xmm3/m128, xmm2, xmm1 */
27027 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27028 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
27029 uses_vvvv
, vbi
, pfx
, delta
, "vpandn", Iop_AndV128
,
27030 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
27031 goto decode_success
;
27033 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
27034 /* VEX.NDS.256.66.0F.WIG DF /r = VPANDN ymm3/m256, ymm2, ymm1 */
27035 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27036 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
27037 uses_vvvv
, vbi
, pfx
, delta
, "vpandn", Iop_AndV256
,
27038 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
27039 goto decode_success
;
27044 /* VPAVGB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E0 /r */
27045 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27046 delta
= dis_AVX128_E_V_to_G(
27047 uses_vvvv
, vbi
, pfx
, delta
, "vpavgb", Iop_Avg8Ux16
);
27048 goto decode_success
;
27050 /* VPAVGB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E0 /r */
27051 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27052 delta
= dis_AVX256_E_V_to_G(
27053 uses_vvvv
, vbi
, pfx
, delta
, "vpavgb", Iop_Avg8Ux32
);
27054 goto decode_success
;
27059 /* VPSRAW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E1 /r */
27060 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27061 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
27062 "vpsraw", Iop_SarN16x8
);
27064 goto decode_success
;
27066 /* VPSRAW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E1 /r */
27067 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27068 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
27069 "vpsraw", Iop_SarN16x16
);
27071 goto decode_success
;
27076 /* VPSRAD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E2 /r */
27077 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27078 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
27079 "vpsrad", Iop_SarN32x4
);
27081 goto decode_success
;
27083 /* VPSRAD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E2 /r */
27084 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27085 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
27086 "vpsrad", Iop_SarN32x8
);
27088 goto decode_success
;
27093 /* VPAVGW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E3 /r */
27094 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27095 delta
= dis_AVX128_E_V_to_G(
27096 uses_vvvv
, vbi
, pfx
, delta
, "vpavgw", Iop_Avg16Ux8
);
27097 goto decode_success
;
27099 /* VPAVGW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E3 /r */
27100 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27101 delta
= dis_AVX256_E_V_to_G(
27102 uses_vvvv
, vbi
, pfx
, delta
, "vpavgw", Iop_Avg16Ux16
);
27103 goto decode_success
;
27108 /* VPMULHUW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E4 /r */
27109 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27110 delta
= dis_AVX128_E_V_to_G(
27111 uses_vvvv
, vbi
, pfx
, delta
, "vpmulhuw", Iop_MulHi16Ux8
);
27112 goto decode_success
;
27114 /* VPMULHUW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E4 /r */
27115 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27116 delta
= dis_AVX256_E_V_to_G(
27117 uses_vvvv
, vbi
, pfx
, delta
, "vpmulhuw", Iop_MulHi16Ux16
);
27118 goto decode_success
;
27123 /* VPMULHW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E5 /r */
27124 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27125 delta
= dis_AVX128_E_V_to_G(
27126 uses_vvvv
, vbi
, pfx
, delta
, "vpmulhw", Iop_MulHi16Sx8
);
27127 goto decode_success
;
27129 /* VPMULHW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E5 /r */
27130 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27131 delta
= dis_AVX256_E_V_to_G(
27132 uses_vvvv
, vbi
, pfx
, delta
, "vpmulhw", Iop_MulHi16Sx16
);
27133 goto decode_success
;
27138 /* VCVTDQ2PD xmm2/m64, xmm1 = VEX.128.F3.0F.WIG E6 /r */
27139 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*128*/) {
27140 delta
= dis_CVTDQ2PD_128(vbi
, pfx
, delta
, True
/*isAvx*/);
27141 goto decode_success
;
27143 /* VCVTDQ2PD xmm2/m128, ymm1 = VEX.256.F3.0F.WIG E6 /r */
27144 if (haveF3no66noF2(pfx
) && 1==getVexL(pfx
)/*256*/) {
27145 delta
= dis_CVTDQ2PD_256(vbi
, pfx
, delta
);
27146 goto decode_success
;
27148 /* VCVTTPD2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG E6 /r */
27149 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27150 delta
= dis_CVTxPD2DQ_128(vbi
, pfx
, delta
, True
/*isAvx*/,
27152 goto decode_success
;
27154 /* VCVTTPD2DQ ymm2/m256, xmm1 = VEX.256.66.0F.WIG E6 /r */
27155 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27156 delta
= dis_CVTxPD2DQ_256(vbi
, pfx
, delta
, True
/*r2zero*/);
27157 goto decode_success
;
27159 /* VCVTPD2DQ xmm2/m128, xmm1 = VEX.128.F2.0F.WIG E6 /r */
27160 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27161 delta
= dis_CVTxPD2DQ_128(vbi
, pfx
, delta
, True
/*isAvx*/,
27163 goto decode_success
;
27165 /* VCVTPD2DQ ymm2/m256, xmm1 = VEX.256.F2.0F.WIG E6 /r */
27166 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27167 delta
= dis_CVTxPD2DQ_256(vbi
, pfx
, delta
, False
/*!r2zero*/);
27168 goto decode_success
;
27173 /* VMOVNTDQ xmm1, m128 = VEX.128.66.0F.WIG E7 /r */
27174 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27175 UChar modrm
= getUChar(delta
);
27176 UInt rG
= gregOfRexRM(pfx
,modrm
);
27177 if (!epartIsReg(modrm
)) {
27178 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27179 gen_SEGV_if_not_16_aligned( addr
);
27180 storeLE( mkexpr(addr
), getXMMReg(rG
) );
27181 DIP("vmovntdq %s,%s\n", dis_buf
, nameXMMReg(rG
));
27183 goto decode_success
;
27185 /* else fall through */
27187 /* VMOVNTDQ ymm1, m256 = VEX.256.66.0F.WIG E7 /r */
27188 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27189 UChar modrm
= getUChar(delta
);
27190 UInt rG
= gregOfRexRM(pfx
,modrm
);
27191 if (!epartIsReg(modrm
)) {
27192 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27193 gen_SEGV_if_not_32_aligned( addr
);
27194 storeLE( mkexpr(addr
), getYMMReg(rG
) );
27195 DIP("vmovntdq %s,%s\n", dis_buf
, nameYMMReg(rG
));
27197 goto decode_success
;
27199 /* else fall through */
27204 /* VPSUBSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E8 /r */
27205 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27206 delta
= dis_AVX128_E_V_to_G(
27207 uses_vvvv
, vbi
, pfx
, delta
, "vpsubsb", Iop_QSub8Sx16
);
27208 goto decode_success
;
27210 /* VPSUBSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E8 /r */
27211 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27212 delta
= dis_AVX256_E_V_to_G(
27213 uses_vvvv
, vbi
, pfx
, delta
, "vpsubsb", Iop_QSub8Sx32
);
27214 goto decode_success
;
27219 /* VPSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E9 /r */
27220 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27221 delta
= dis_AVX128_E_V_to_G(
27222 uses_vvvv
, vbi
, pfx
, delta
, "vpsubsw", Iop_QSub16Sx8
);
27223 goto decode_success
;
27225 /* VPSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E9 /r */
27226 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27227 delta
= dis_AVX256_E_V_to_G(
27228 uses_vvvv
, vbi
, pfx
, delta
, "vpsubsw", Iop_QSub16Sx16
);
27229 goto decode_success
;
27234 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
27235 /* VPMINSW = VEX.NDS.128.66.0F.WIG EA /r */
27236 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27237 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27238 uses_vvvv
, vbi
, pfx
, delta
, "vpminsw", Iop_Min16Sx8
);
27239 goto decode_success
;
27241 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
27242 /* VPMINSW = VEX.NDS.256.66.0F.WIG EA /r */
27243 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27244 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27245 uses_vvvv
, vbi
, pfx
, delta
, "vpminsw", Iop_Min16Sx16
);
27246 goto decode_success
;
27251 /* VPOR r/m, rV, r ::: r = rV | r/m */
27252 /* VPOR = VEX.NDS.128.66.0F.WIG EB /r */
27253 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27254 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27255 uses_vvvv
, vbi
, pfx
, delta
, "vpor", Iop_OrV128
);
27256 goto decode_success
;
27258 /* VPOR r/m, rV, r ::: r = rV | r/m */
27259 /* VPOR = VEX.NDS.256.66.0F.WIG EB /r */
27260 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27261 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27262 uses_vvvv
, vbi
, pfx
, delta
, "vpor", Iop_OrV256
);
27263 goto decode_success
;
27268 /* VPADDSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG EC /r */
27269 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27270 delta
= dis_AVX128_E_V_to_G(
27271 uses_vvvv
, vbi
, pfx
, delta
, "vpaddsb", Iop_QAdd8Sx16
);
27272 goto decode_success
;
27274 /* VPADDSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG EC /r */
27275 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27276 delta
= dis_AVX256_E_V_to_G(
27277 uses_vvvv
, vbi
, pfx
, delta
, "vpaddsb", Iop_QAdd8Sx32
);
27278 goto decode_success
;
27283 /* VPADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG ED /r */
27284 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27285 delta
= dis_AVX128_E_V_to_G(
27286 uses_vvvv
, vbi
, pfx
, delta
, "vpaddsw", Iop_QAdd16Sx8
);
27287 goto decode_success
;
27289 /* VPADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG ED /r */
27290 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27291 delta
= dis_AVX256_E_V_to_G(
27292 uses_vvvv
, vbi
, pfx
, delta
, "vpaddsw", Iop_QAdd16Sx16
);
27293 goto decode_success
;
27298 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
27299 /* VPMAXSW = VEX.NDS.128.66.0F.WIG EE /r */
27300 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27301 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27302 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsw", Iop_Max16Sx8
);
27303 goto decode_success
;
27305 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
27306 /* VPMAXSW = VEX.NDS.256.66.0F.WIG EE /r */
27307 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27308 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27309 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsw", Iop_Max16Sx16
);
27310 goto decode_success
;
27315 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
27316 /* VPXOR = VEX.NDS.128.66.0F.WIG EF /r */
27317 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27318 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27319 uses_vvvv
, vbi
, pfx
, delta
, "vpxor", Iop_XorV128
);
27320 goto decode_success
;
27322 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
27323 /* VPXOR = VEX.NDS.256.66.0F.WIG EF /r */
27324 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27325 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27326 uses_vvvv
, vbi
, pfx
, delta
, "vpxor", Iop_XorV256
);
27327 goto decode_success
;
27332 /* VLDDQU m256, ymm1 = VEX.256.F2.0F.WIG F0 /r */
27333 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27334 UChar modrm
= getUChar(delta
);
27335 UInt rD
= gregOfRexRM(pfx
, modrm
);
27336 IRTemp tD
= newTemp(Ity_V256
);
27337 if (epartIsReg(modrm
)) break;
27338 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27340 assign(tD
, loadLE(Ity_V256
, mkexpr(addr
)));
27341 DIP("vlddqu %s,%s\n", dis_buf
, nameYMMReg(rD
));
27342 putYMMReg(rD
, mkexpr(tD
));
27343 goto decode_success
;
27345 /* VLDDQU m128, xmm1 = VEX.128.F2.0F.WIG F0 /r */
27346 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27347 UChar modrm
= getUChar(delta
);
27348 UInt rD
= gregOfRexRM(pfx
, modrm
);
27349 IRTemp tD
= newTemp(Ity_V128
);
27350 if (epartIsReg(modrm
)) break;
27351 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27353 assign(tD
, loadLE(Ity_V128
, mkexpr(addr
)));
27354 DIP("vlddqu %s,%s\n", dis_buf
, nameXMMReg(rD
));
27355 putYMMRegLoAndZU(rD
, mkexpr(tD
));
27356 goto decode_success
;
27361 /* VPSLLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F1 /r */
27362 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27363 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
27364 "vpsllw", Iop_ShlN16x8
);
27366 goto decode_success
;
27369 /* VPSLLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F1 /r */
27370 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27371 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
27372 "vpsllw", Iop_ShlN16x16
);
27374 goto decode_success
;
27380 /* VPSLLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F2 /r */
27381 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27382 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
27383 "vpslld", Iop_ShlN32x4
);
27385 goto decode_success
;
27387 /* VPSLLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F2 /r */
27388 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27389 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
27390 "vpslld", Iop_ShlN32x8
);
27392 goto decode_success
;
27397 /* VPSLLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F3 /r */
27398 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27399 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
27400 "vpsllq", Iop_ShlN64x2
);
27402 goto decode_success
;
27404 /* VPSLLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F3 /r */
27405 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27406 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
27407 "vpsllq", Iop_ShlN64x4
);
27409 goto decode_success
;
27414 /* VPMULUDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F4 /r */
27415 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27416 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27417 uses_vvvv
, vbi
, pfx
, delta
,
27418 "vpmuludq", math_PMULUDQ_128
);
27419 goto decode_success
;
27421 /* VPMULUDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F4 /r */
27422 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27423 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27424 uses_vvvv
, vbi
, pfx
, delta
,
27425 "vpmuludq", math_PMULUDQ_256
);
27426 goto decode_success
;
27431 /* VPMADDWD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F5 /r */
27432 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27433 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27434 uses_vvvv
, vbi
, pfx
, delta
,
27435 "vpmaddwd", math_PMADDWD_128
);
27436 goto decode_success
;
27438 /* VPMADDWD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F5 /r */
27439 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27440 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27441 uses_vvvv
, vbi
, pfx
, delta
,
27442 "vpmaddwd", math_PMADDWD_256
);
27443 goto decode_success
;
27448 /* VPSADBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F6 /r */
27449 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27450 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27451 uses_vvvv
, vbi
, pfx
, delta
,
27452 "vpsadbw", math_PSADBW_128
);
27453 goto decode_success
;
27455 /* VPSADBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F6 /r */
27456 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27457 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27458 uses_vvvv
, vbi
, pfx
, delta
,
27459 "vpsadbw", math_PSADBW_256
);
27460 goto decode_success
;
27465 /* VMASKMOVDQU xmm2, xmm1 = VEX.128.66.0F.WIG F7 /r */
27466 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
27467 && epartIsReg(getUChar(delta
))) {
27468 delta
= dis_MASKMOVDQU( vbi
, pfx
, delta
, True
/*isAvx*/ );
27469 goto decode_success
;
27474 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
27475 /* VPSUBB = VEX.NDS.128.66.0F.WIG F8 /r */
27476 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27477 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27478 uses_vvvv
, vbi
, pfx
, delta
, "vpsubb", Iop_Sub8x16
);
27479 goto decode_success
;
27481 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
27482 /* VPSUBB = VEX.NDS.256.66.0F.WIG F8 /r */
27483 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27484 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27485 uses_vvvv
, vbi
, pfx
, delta
, "vpsubb", Iop_Sub8x32
);
27486 goto decode_success
;
27491 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
27492 /* VPSUBW = VEX.NDS.128.66.0F.WIG F9 /r */
27493 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27494 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27495 uses_vvvv
, vbi
, pfx
, delta
, "vpsubw", Iop_Sub16x8
);
27496 goto decode_success
;
27498 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
27499 /* VPSUBW = VEX.NDS.256.66.0F.WIG F9 /r */
27500 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27501 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27502 uses_vvvv
, vbi
, pfx
, delta
, "vpsubw", Iop_Sub16x16
);
27503 goto decode_success
;
27508 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
27509 /* VPSUBD = VEX.NDS.128.66.0F.WIG FA /r */
27510 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27511 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27512 uses_vvvv
, vbi
, pfx
, delta
, "vpsubd", Iop_Sub32x4
);
27513 goto decode_success
;
27515 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
27516 /* VPSUBD = VEX.NDS.256.66.0F.WIG FA /r */
27517 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27518 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27519 uses_vvvv
, vbi
, pfx
, delta
, "vpsubd", Iop_Sub32x8
);
27520 goto decode_success
;
27525 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
27526 /* VPSUBQ = VEX.NDS.128.66.0F.WIG FB /r */
27527 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27528 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27529 uses_vvvv
, vbi
, pfx
, delta
, "vpsubq", Iop_Sub64x2
);
27530 goto decode_success
;
27532 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
27533 /* VPSUBQ = VEX.NDS.256.66.0F.WIG FB /r */
27534 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27535 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27536 uses_vvvv
, vbi
, pfx
, delta
, "vpsubq", Iop_Sub64x4
);
27537 goto decode_success
;
27542 /* VPADDB r/m, rV, r ::: r = rV + r/m */
27543 /* VPADDB = VEX.NDS.128.66.0F.WIG FC /r */
27544 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27545 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27546 uses_vvvv
, vbi
, pfx
, delta
, "vpaddb", Iop_Add8x16
);
27547 goto decode_success
;
27549 /* VPADDB r/m, rV, r ::: r = rV + r/m */
27550 /* VPADDB = VEX.NDS.256.66.0F.WIG FC /r */
27551 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27552 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27553 uses_vvvv
, vbi
, pfx
, delta
, "vpaddb", Iop_Add8x32
);
27554 goto decode_success
;
27559 /* VPADDW r/m, rV, r ::: r = rV + r/m */
27560 /* VPADDW = VEX.NDS.128.66.0F.WIG FD /r */
27561 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27562 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27563 uses_vvvv
, vbi
, pfx
, delta
, "vpaddw", Iop_Add16x8
);
27564 goto decode_success
;
27566 /* VPADDW r/m, rV, r ::: r = rV + r/m */
27567 /* VPADDW = VEX.NDS.256.66.0F.WIG FD /r */
27568 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27569 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27570 uses_vvvv
, vbi
, pfx
, delta
, "vpaddw", Iop_Add16x16
);
27571 goto decode_success
;
27576 /* VPADDD r/m, rV, r ::: r = rV + r/m */
27577 /* VPADDD = VEX.NDS.128.66.0F.WIG FE /r */
27578 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27579 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27580 uses_vvvv
, vbi
, pfx
, delta
, "vpaddd", Iop_Add32x4
);
27581 goto decode_success
;
27583 /* VPADDD r/m, rV, r ::: r = rV + r/m */
27584 /* VPADDD = VEX.NDS.256.66.0F.WIG FE /r */
27585 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27586 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27587 uses_vvvv
, vbi
, pfx
, delta
, "vpaddd", Iop_Add32x8
);
27588 goto decode_success
;
27605 /*------------------------------------------------------------*/
27607 /*--- Top-level post-escape decoders: dis_ESC_0F38__VEX ---*/
27609 /*------------------------------------------------------------*/
27611 static IRTemp
math_PERMILPS_VAR_128 ( IRTemp dataV
, IRTemp ctrlV
)
27613 /* In the control vector, zero out all but the bottom two bits of
27614 each 32-bit lane. */
27615 IRExpr
* cv1
= binop(Iop_ShrN32x4
,
27616 binop(Iop_ShlN32x4
, mkexpr(ctrlV
), mkU8(30)),
27618 /* And use the resulting cleaned-up control vector as steering
27619 in a Perm operation. */
27620 IRTemp res
= newTemp(Ity_V128
);
27621 assign(res
, binop(Iop_Perm32x4
, mkexpr(dataV
), cv1
));
27625 static IRTemp
math_PERMILPS_VAR_256 ( IRTemp dataV
, IRTemp ctrlV
)
27627 IRTemp dHi
, dLo
, cHi
, cLo
;
27628 dHi
= dLo
= cHi
= cLo
= IRTemp_INVALID
;
27629 breakupV256toV128s( dataV
, &dHi
, &dLo
);
27630 breakupV256toV128s( ctrlV
, &cHi
, &cLo
);
27631 IRTemp rHi
= math_PERMILPS_VAR_128( dHi
, cHi
);
27632 IRTemp rLo
= math_PERMILPS_VAR_128( dLo
, cLo
);
27633 IRTemp res
= newTemp(Ity_V256
);
27634 assign(res
, binop(Iop_V128HLtoV256
, mkexpr(rHi
), mkexpr(rLo
)));
27638 static IRTemp
math_PERMILPD_VAR_128 ( IRTemp dataV
, IRTemp ctrlV
)
27640 /* No cleverness here .. */
27641 IRTemp dHi
, dLo
, cHi
, cLo
;
27642 dHi
= dLo
= cHi
= cLo
= IRTemp_INVALID
;
27643 breakupV128to64s( dataV
, &dHi
, &dLo
);
27644 breakupV128to64s( ctrlV
, &cHi
, &cLo
);
27646 = IRExpr_ITE( unop(Iop_64to1
,
27647 binop(Iop_Shr64
, mkexpr(cHi
), mkU8(1))),
27648 mkexpr(dHi
), mkexpr(dLo
) );
27650 = IRExpr_ITE( unop(Iop_64to1
,
27651 binop(Iop_Shr64
, mkexpr(cLo
), mkU8(1))),
27652 mkexpr(dHi
), mkexpr(dLo
) );
27653 IRTemp res
= newTemp(Ity_V128
);
27654 assign(res
, binop(Iop_64HLtoV128
, rHi
, rLo
));
27658 static IRTemp
math_PERMILPD_VAR_256 ( IRTemp dataV
, IRTemp ctrlV
)
27660 IRTemp dHi
, dLo
, cHi
, cLo
;
27661 dHi
= dLo
= cHi
= cLo
= IRTemp_INVALID
;
27662 breakupV256toV128s( dataV
, &dHi
, &dLo
);
27663 breakupV256toV128s( ctrlV
, &cHi
, &cLo
);
27664 IRTemp rHi
= math_PERMILPD_VAR_128( dHi
, cHi
);
27665 IRTemp rLo
= math_PERMILPD_VAR_128( dLo
, cLo
);
27666 IRTemp res
= newTemp(Ity_V256
);
27667 assign(res
, binop(Iop_V128HLtoV256
, mkexpr(rHi
), mkexpr(rLo
)));
27671 static IRTemp
math_VPERMD ( IRTemp ctrlV
, IRTemp dataV
)
27673 /* In the control vector, zero out all but the bottom three bits of
27674 each 32-bit lane. */
27675 IRExpr
* cv1
= binop(Iop_ShrN32x8
,
27676 binop(Iop_ShlN32x8
, mkexpr(ctrlV
), mkU8(29)),
27678 /* And use the resulting cleaned-up control vector as steering
27679 in a Perm operation. */
27680 IRTemp res
= newTemp(Ity_V256
);
27681 assign(res
, binop(Iop_Perm32x8
, mkexpr(dataV
), cv1
));
27685 static Long
dis_SHIFTX ( /*OUT*/Bool
* uses_vvvv
,
27686 const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
27687 const HChar
* opname
, IROp op8
)
27691 Int size
= getRexW(pfx
) ? 8 : 4;
27692 IRType ty
= szToITy(size
);
27693 IRTemp src
= newTemp(ty
);
27694 IRTemp amt
= newTemp(ty
);
27695 UChar rm
= getUChar(delta
);
27697 assign( amt
, getIRegV(size
,pfx
) );
27698 if (epartIsReg(rm
)) {
27699 assign( src
, getIRegE(size
,pfx
,rm
) );
27700 DIP("%s %s,%s,%s\n", opname
, nameIRegV(size
,pfx
),
27701 nameIRegE(size
,pfx
,rm
), nameIRegG(size
,pfx
,rm
));
27704 IRTemp addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27705 assign( src
, loadLE(ty
, mkexpr(addr
)) );
27706 DIP("%s %s,%s,%s\n", opname
, nameIRegV(size
,pfx
), dis_buf
,
27707 nameIRegG(size
,pfx
,rm
));
27711 putIRegG( size
, pfx
, rm
,
27712 binop(mkSizedOp(ty
,op8
), mkexpr(src
),
27713 narrowTo(Ity_I8
, binop(mkSizedOp(ty
,Iop_And8
), mkexpr(amt
),
27714 mkU(ty
,8*size
-1)))) );
27715 /* Flags aren't modified. */
27721 static Long
dis_FMA ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
, UChar opc
)
27723 UChar modrm
= getUChar(delta
);
27724 UInt rG
= gregOfRexRM(pfx
, modrm
);
27725 UInt rV
= getVexNvvvv(pfx
);
27726 Bool scalar
= (opc
& 0xF) > 7 && (opc
& 1);
27727 IRType ty
= getRexW(pfx
) ? Ity_F64
: Ity_F32
;
27728 IRType vty
= scalar
? ty
: (getVexL(pfx
) ? Ity_V256
: Ity_V128
);
27729 IRTemp addr
= IRTemp_INVALID
;
27733 const HChar
*suffix
;
27734 const HChar
*order
;
27735 Bool negateRes
= False
;
27736 Bool negateZeven
= False
;
27737 Bool negateZodd
= False
;
27740 switch (opc
& 0xF) {
27741 case 0x6: name
= "addsub"; negateZeven
= True
; break;
27742 case 0x7: name
= "subadd"; negateZodd
= True
; break;
27744 case 0x9: name
= "add"; break;
27746 case 0xB: name
= "sub"; negateZeven
= True
; negateZodd
= True
;
27749 case 0xD: name
= "add"; negateRes
= True
; negateZeven
= True
;
27750 negateZodd
= True
; break;
27752 case 0xF: name
= "sub"; negateRes
= True
; break;
27753 default: vpanic("dis_FMA(amd64)"); break;
27755 switch (opc
& 0xF0) {
27756 case 0x90: order
= "132"; break;
27757 case 0xA0: order
= "213"; break;
27758 case 0xB0: order
= "231"; break;
27759 default: vpanic("dis_FMA(amd64)"); break;
27762 suffix
= ty
== Ity_F64
? "sd" : "ss";
27764 suffix
= ty
== Ity_F64
? "pd" : "ps";
27767 // Figure out |count| (the number of elements) by considering |vty| and |ty|.
27768 count
= sizeofIRType(vty
) / sizeofIRType(ty
);
27769 vassert(count
== 1 || count
== 2 || count
== 4 || count
== 8);
27771 // Fetch operands into the first |count| elements of |sX|, |sY| and |sZ|.
27773 IRExpr
*sX
[8], *sY
[8], *sZ
[8], *res
[8];
27774 for (i
= 0; i
< 8; i
++) sX
[i
] = sY
[i
] = sZ
[i
] = res
[i
] = NULL
;
27776 IRExpr
* (*getYMMRegLane
)(UInt
,Int
)
27777 = ty
== Ity_F32
? getYMMRegLane32F
: getYMMRegLane64F
;
27778 void (*putYMMRegLane
)(UInt
,Int
,IRExpr
*)
27779 = ty
== Ity_F32
? putYMMRegLane32F
: putYMMRegLane64F
;
27781 for (i
= 0; i
< count
; i
++) {
27782 sX
[i
] = getYMMRegLane(rG
, i
);
27783 sZ
[i
] = getYMMRegLane(rV
, i
);
27786 if (epartIsReg(modrm
)) {
27787 UInt rE
= eregOfRexRM(pfx
, modrm
);
27789 for (i
= 0; i
< count
; i
++) {
27790 sY
[i
] = getYMMRegLane(rE
, i
);
27792 if (vty
== Ity_V256
) {
27793 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes
? "n" : "",
27794 name
, order
, suffix
, nameYMMReg(rE
), nameYMMReg(rV
),
27797 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes
? "n" : "",
27798 name
, order
, suffix
, nameXMMReg(rE
), nameXMMReg(rV
),
27802 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27804 for (i
= 0; i
< count
; i
++) {
27805 sY
[i
] = loadLE(ty
, binop(Iop_Add64
, mkexpr(addr
),
27806 mkU64(i
* sizeofIRType(ty
))));
27808 if (vty
== Ity_V256
) {
27809 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes
? "n" : "",
27810 name
, order
, suffix
, dis_buf
, nameYMMReg(rV
),
27813 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes
? "n" : "",
27814 name
, order
, suffix
, dis_buf
, nameXMMReg(rV
),
27819 /* vX/vY/vZ are now in 132 order. If the instruction requires a different
27820 order, swap them around. */
27822 # define COPY_ARR(_dst, _src) \
27823 do { for (int j = 0; j < 8; j++) { _dst[j] = _src[j]; } } while (0)
27825 if ((opc
& 0xF0) != 0x90) {
27827 COPY_ARR(temp
, sX
);
27828 if ((opc
& 0xF0) == 0xA0) {
27831 COPY_ARR(sY
, temp
);
27834 COPY_ARR(sZ
, temp
);
27840 for (i
= 0; i
< count
; i
++) {
27841 IROp opNEG
= ty
== Ity_F64
? Iop_NegF64
: Iop_NegF32
;
27842 if ((i
& 1) ? negateZodd
: negateZeven
) {
27843 sZ
[i
] = unop(opNEG
, sZ
[i
]);
27845 res
[i
] = IRExpr_Qop(ty
== Ity_F64
? Iop_MAddF64
: Iop_MAddF32
,
27846 get_FAKE_roundingmode(), sX
[i
], sY
[i
], sZ
[i
]);
27848 res
[i
] = unop(opNEG
, res
[i
]);
27852 for (i
= 0; i
< count
; i
++) {
27853 putYMMRegLane(rG
, i
, res
[i
]);
27857 case Ity_F32
: putYMMRegLane32(rG
, 1, mkU32(0)); /*fallthru*/
27858 case Ity_F64
: putYMMRegLane64(rG
, 1, mkU64(0)); /*fallthru*/
27859 case Ity_V128
: putYMMRegLane128(rG
, 1, mkV128(0)); /*fallthru*/
27860 case Ity_V256
: break;
27861 default: vassert(0);
27868 /* Masked load or masked store. */
27869 static ULong
dis_VMASKMOV ( Bool
*uses_vvvv
, const VexAbiInfo
* vbi
,
27870 Prefix pfx
, Long delta
,
27871 const HChar
* opname
, Bool isYMM
, IRType ty
,
27877 UChar modrm
= getUChar(delta
);
27878 UInt rG
= gregOfRexRM(pfx
,modrm
);
27879 UInt rV
= getVexNvvvv(pfx
);
27881 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27884 /**/ if (isLoad
&& isYMM
) {
27885 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
) );
27887 else if (isLoad
&& !isYMM
) {
27888 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
27891 else if (!isLoad
&& isYMM
) {
27892 DIP("%s %s,%s,%s\n", opname
, nameYMMReg(rG
), nameYMMReg(rV
), dis_buf
);
27895 vassert(!isLoad
&& !isYMM
);
27896 DIP("%s %s,%s,%s\n", opname
, nameXMMReg(rG
), nameXMMReg(rV
), dis_buf
);
27899 vassert(ty
== Ity_I32
|| ty
== Ity_I64
);
27900 Bool laneIs32
= ty
== Ity_I32
;
27902 Int nLanes
= (isYMM
? 2 : 1) * (laneIs32
? 4 : 2);
27904 for (i
= 0; i
< nLanes
; i
++) {
27905 IRExpr
* shAmt
= laneIs32
? mkU8(31) : mkU8(63);
27906 IRExpr
* one
= laneIs32
? mkU32(1) : mkU64(1);
27907 IROp opSHR
= laneIs32
? Iop_Shr32
: Iop_Shr64
;
27908 IROp opEQ
= laneIs32
? Iop_CmpEQ32
: Iop_CmpEQ64
;
27909 IRExpr
* lane
= (laneIs32
? getYMMRegLane32
: getYMMRegLane64
)( rV
, i
);
27911 IRTemp cond
= newTemp(Ity_I1
);
27912 assign(cond
, binop(opEQ
, binop(opSHR
, lane
, shAmt
), one
));
27914 IRTemp data
= newTemp(ty
);
27915 IRExpr
* ea
= binop(Iop_Add64
, mkexpr(addr
),
27916 mkU64(i
* (laneIs32
? 4 : 8)));
27920 Iend_LE
, laneIs32
? ILGop_Ident32
: ILGop_Ident64
,
27921 data
, ea
, laneIs32
? mkU32(0) : mkU64(0), mkexpr(cond
)
27923 (laneIs32
? putYMMRegLane32
: putYMMRegLane64
)( rG
, i
, mkexpr(data
) );
27925 assign(data
, (laneIs32
? getYMMRegLane32
: getYMMRegLane64
)( rG
, i
));
27926 stmt( IRStmt_StoreG(Iend_LE
, ea
, mkexpr(data
), mkexpr(cond
)) );
27930 if (isLoad
&& !isYMM
)
27931 putYMMRegLane128( rG
, 1, mkV128(0) );
27939 static ULong
dis_VGATHER ( Bool
*uses_vvvv
, const VexAbiInfo
* vbi
,
27940 Prefix pfx
, Long delta
,
27941 const HChar
* opname
, Bool isYMM
,
27942 Bool isVM64x
, IRType ty
)
27945 Int alen
, i
, vscale
, count1
, count2
;
27947 UChar modrm
= getUChar(delta
);
27948 UInt rG
= gregOfRexRM(pfx
,modrm
);
27949 UInt rV
= getVexNvvvv(pfx
);
27951 IRType dstTy
= (isYMM
&& (ty
== Ity_I64
|| !isVM64x
)) ? Ity_V256
: Ity_V128
;
27952 IRType idxTy
= (isYMM
&& (ty
== Ity_I32
|| isVM64x
)) ? Ity_V256
: Ity_V128
;
27954 addr
= disAVSIBMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, &rI
,
27956 if (addr
== IRTemp_INVALID
|| rI
== rG
|| rI
== rV
|| rG
== rV
)
27958 if (dstTy
== Ity_V256
) {
27959 DIP("%s %s,%s,%s\n", opname
, nameYMMReg(rV
), dis_buf
, nameYMMReg(rG
) );
27961 DIP("%s %s,%s,%s\n", opname
, nameXMMReg(rV
), dis_buf
, nameXMMReg(rG
) );
27965 if (ty
== Ity_I32
) {
27966 count1
= isYMM
? 8 : 4;
27967 count2
= isVM64x
? count1
/ 2 : count1
;
27969 count1
= count2
= isYMM
? 4 : 2;
27972 /* First update the mask register to copies of the sign bit. */
27973 if (ty
== Ity_I32
) {
27975 putYMMReg( rV
, binop(Iop_SarN32x8
, getYMMReg( rV
), mkU8(31)) );
27977 putYMMRegLoAndZU( rV
, binop(Iop_SarN32x4
, getXMMReg( rV
), mkU8(31)) );
27979 for (i
= 0; i
< count1
; i
++) {
27980 putYMMRegLane64( rV
, i
, binop(Iop_Sar64
, getYMMRegLane64( rV
, i
),
27985 /* Next gather the individual elements. If any fault occurs, the
27986 corresponding mask element will be set and the loop stops. */
27987 for (i
= 0; i
< count2
; i
++) {
27988 IRExpr
*expr
, *addr_expr
;
27989 cond
= newTemp(Ity_I1
);
27991 binop(ty
== Ity_I32
? Iop_CmpLT32S
: Iop_CmpLT64S
,
27992 ty
== Ity_I32
? getYMMRegLane32( rV
, i
)
27993 : getYMMRegLane64( rV
, i
),
27995 expr
= ty
== Ity_I32
? getYMMRegLane32( rG
, i
)
27996 : getYMMRegLane64( rG
, i
);
27997 addr_expr
= isVM64x
? getYMMRegLane64( rI
, i
)
27998 : unop(Iop_32Sto64
, getYMMRegLane32( rI
, i
));
28000 case 2: addr_expr
= binop(Iop_Shl64
, addr_expr
, mkU8(1)); break;
28001 case 4: addr_expr
= binop(Iop_Shl64
, addr_expr
, mkU8(2)); break;
28002 case 8: addr_expr
= binop(Iop_Shl64
, addr_expr
, mkU8(3)); break;
28005 addr_expr
= binop(Iop_Add64
, mkexpr(addr
), addr_expr
);
28006 addr_expr
= handleAddrOverrides(vbi
, pfx
, addr_expr
);
28007 addr_expr
= IRExpr_ITE(mkexpr(cond
), addr_expr
, getIReg64(R_RSP
));
28008 expr
= IRExpr_ITE(mkexpr(cond
), loadLE(ty
, addr_expr
), expr
);
28009 if (ty
== Ity_I32
) {
28010 putYMMRegLane32( rG
, i
, expr
);
28011 putYMMRegLane32( rV
, i
, mkU32(0) );
28013 putYMMRegLane64( rG
, i
, expr
);
28014 putYMMRegLane64( rV
, i
, mkU64(0) );
28018 if (!isYMM
|| (ty
== Ity_I32
&& isVM64x
)) {
28019 if (ty
== Ity_I64
|| isYMM
)
28020 putYMMRegLane128( rV
, 1, mkV128(0) );
28021 else if (ty
== Ity_I32
&& count2
== 2) {
28022 putYMMRegLane64( rV
, 1, mkU64(0) );
28023 putYMMRegLane64( rG
, 1, mkU64(0) );
28025 putYMMRegLane128( rG
, 1, mkV128(0) );
28033 __attribute__((noinline
))
28035 Long
dis_ESC_0F38__VEX (
28036 /*MB_OUT*/DisResult
* dres
,
28037 /*OUT*/ Bool
* uses_vvvv
,
28038 const VexArchInfo
* archinfo
,
28039 const VexAbiInfo
* vbi
,
28040 Prefix pfx
, Int sz
, Long deltaIN
28043 IRTemp addr
= IRTemp_INVALID
;
28046 Long delta
= deltaIN
;
28047 UChar opc
= getUChar(delta
);
28049 *uses_vvvv
= False
;
28054 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
28055 /* VPSHUFB = VEX.NDS.128.66.0F38.WIG 00 /r */
28056 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28057 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28058 uses_vvvv
, vbi
, pfx
, delta
, "vpshufb", math_PSHUFB_XMM
);
28059 goto decode_success
;
28061 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
28062 /* VPSHUFB = VEX.NDS.256.66.0F38.WIG 00 /r */
28063 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28064 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28065 uses_vvvv
, vbi
, pfx
, delta
, "vpshufb", math_PSHUFB_YMM
);
28066 goto decode_success
;
28073 /* VPHADDW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 01 /r */
28074 /* VPHADDD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 02 /r */
28075 /* VPHADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 03 /r */
28076 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28077 delta
= dis_PHADD_128( vbi
, pfx
, delta
, True
/*isAvx*/, opc
);
28079 goto decode_success
;
28081 /* VPHADDW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 01 /r */
28082 /* VPHADDD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 02 /r */
28083 /* VPHADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 03 /r */
28084 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28085 delta
= dis_PHADD_256( vbi
, pfx
, delta
, opc
);
28087 goto decode_success
;
28092 /* VPMADDUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 04 /r */
28093 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28094 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28095 uses_vvvv
, vbi
, pfx
, delta
, "vpmaddubsw",
28096 math_PMADDUBSW_128
);
28097 goto decode_success
;
28099 /* VPMADDUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 04 /r */
28100 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28101 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28102 uses_vvvv
, vbi
, pfx
, delta
, "vpmaddubsw",
28103 math_PMADDUBSW_256
);
28104 goto decode_success
;
28111 /* VPHSUBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 05 /r */
28112 /* VPHSUBD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 06 /r */
28113 /* VPHSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 07 /r */
28114 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28115 delta
= dis_PHADD_128( vbi
, pfx
, delta
, True
/*isAvx*/, opc
);
28117 goto decode_success
;
28119 /* VPHSUBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 05 /r */
28120 /* VPHSUBD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 06 /r */
28121 /* VPHSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 07 /r */
28122 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28123 delta
= dis_PHADD_256( vbi
, pfx
, delta
, opc
);
28125 goto decode_success
;
28132 /* VPSIGNB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 08 /r */
28133 /* VPSIGNW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 09 /r */
28134 /* VPSIGND xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0A /r */
28135 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28136 IRTemp sV
= newTemp(Ity_V128
);
28137 IRTemp dV
= newTemp(Ity_V128
);
28138 IRTemp sHi
, sLo
, dHi
, dLo
;
28139 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
28142 UChar modrm
= getUChar(delta
);
28143 UInt rG
= gregOfRexRM(pfx
,modrm
);
28144 UInt rV
= getVexNvvvv(pfx
);
28147 case 0x08: laneszB
= 1; ch
= 'b'; break;
28148 case 0x09: laneszB
= 2; ch
= 'w'; break;
28149 case 0x0A: laneszB
= 4; ch
= 'd'; break;
28150 default: vassert(0);
28153 assign( dV
, getXMMReg(rV
) );
28155 if (epartIsReg(modrm
)) {
28156 UInt rE
= eregOfRexRM(pfx
,modrm
);
28157 assign( sV
, getXMMReg(rE
) );
28159 DIP("vpsign%c %s,%s,%s\n", ch
, nameXMMReg(rE
),
28160 nameXMMReg(rV
), nameXMMReg(rG
));
28162 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28163 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
28165 DIP("vpsign%c %s,%s,%s\n", ch
, dis_buf
,
28166 nameXMMReg(rV
), nameXMMReg(rG
));
28169 breakupV128to64s( dV
, &dHi
, &dLo
);
28170 breakupV128to64s( sV
, &sHi
, &sLo
);
28174 binop(Iop_64HLtoV128
,
28175 dis_PSIGN_helper( mkexpr(sHi
), mkexpr(dHi
), laneszB
),
28176 dis_PSIGN_helper( mkexpr(sLo
), mkexpr(dLo
), laneszB
)
28180 goto decode_success
;
28182 /* VPSIGNB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 08 /r */
28183 /* VPSIGNW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 09 /r */
28184 /* VPSIGND ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0A /r */
28185 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28186 IRTemp sV
= newTemp(Ity_V256
);
28187 IRTemp dV
= newTemp(Ity_V256
);
28188 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
28189 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
28190 d3
= d2
= d1
= d0
= IRTemp_INVALID
;
28193 UChar modrm
= getUChar(delta
);
28194 UInt rG
= gregOfRexRM(pfx
,modrm
);
28195 UInt rV
= getVexNvvvv(pfx
);
28198 case 0x08: laneszB
= 1; ch
= 'b'; break;
28199 case 0x09: laneszB
= 2; ch
= 'w'; break;
28200 case 0x0A: laneszB
= 4; ch
= 'd'; break;
28201 default: vassert(0);
28204 assign( dV
, getYMMReg(rV
) );
28206 if (epartIsReg(modrm
)) {
28207 UInt rE
= eregOfRexRM(pfx
,modrm
);
28208 assign( sV
, getYMMReg(rE
) );
28210 DIP("vpsign%c %s,%s,%s\n", ch
, nameYMMReg(rE
),
28211 nameYMMReg(rV
), nameYMMReg(rG
));
28213 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28214 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
28216 DIP("vpsign%c %s,%s,%s\n", ch
, dis_buf
,
28217 nameYMMReg(rV
), nameYMMReg(rG
));
28220 breakupV256to64s( dV
, &d3
, &d2
, &d1
, &d0
);
28221 breakupV256to64s( sV
, &s3
, &s2
, &s1
, &s0
);
28225 binop( Iop_V128HLtoV256
,
28226 binop(Iop_64HLtoV128
,
28227 dis_PSIGN_helper( mkexpr(s3
), mkexpr(d3
), laneszB
),
28228 dis_PSIGN_helper( mkexpr(s2
), mkexpr(d2
), laneszB
)
28230 binop(Iop_64HLtoV128
,
28231 dis_PSIGN_helper( mkexpr(s1
), mkexpr(d1
), laneszB
),
28232 dis_PSIGN_helper( mkexpr(s0
), mkexpr(d0
), laneszB
)
28237 goto decode_success
;
28242 /* VPMULHRSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0B /r */
28243 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28244 IRTemp sV
= newTemp(Ity_V128
);
28245 IRTemp dV
= newTemp(Ity_V128
);
28246 IRTemp sHi
, sLo
, dHi
, dLo
;
28247 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
28248 UChar modrm
= getUChar(delta
);
28249 UInt rG
= gregOfRexRM(pfx
,modrm
);
28250 UInt rV
= getVexNvvvv(pfx
);
28252 assign( dV
, getXMMReg(rV
) );
28254 if (epartIsReg(modrm
)) {
28255 UInt rE
= eregOfRexRM(pfx
,modrm
);
28256 assign( sV
, getXMMReg(rE
) );
28258 DIP("vpmulhrsw %s,%s,%s\n", nameXMMReg(rE
),
28259 nameXMMReg(rV
), nameXMMReg(rG
));
28261 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28262 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
28264 DIP("vpmulhrsw %s,%s,%s\n", dis_buf
,
28265 nameXMMReg(rV
), nameXMMReg(rG
));
28268 breakupV128to64s( dV
, &dHi
, &dLo
);
28269 breakupV128to64s( sV
, &sHi
, &sLo
);
28273 binop(Iop_64HLtoV128
,
28274 dis_PMULHRSW_helper( mkexpr(sHi
), mkexpr(dHi
) ),
28275 dis_PMULHRSW_helper( mkexpr(sLo
), mkexpr(dLo
) )
28279 goto decode_success
;
28281 /* VPMULHRSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0B /r */
28282 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28283 IRTemp sV
= newTemp(Ity_V256
);
28284 IRTemp dV
= newTemp(Ity_V256
);
28285 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
28286 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
28287 UChar modrm
= getUChar(delta
);
28288 UInt rG
= gregOfRexRM(pfx
,modrm
);
28289 UInt rV
= getVexNvvvv(pfx
);
28291 assign( dV
, getYMMReg(rV
) );
28293 if (epartIsReg(modrm
)) {
28294 UInt rE
= eregOfRexRM(pfx
,modrm
);
28295 assign( sV
, getYMMReg(rE
) );
28297 DIP("vpmulhrsw %s,%s,%s\n", nameYMMReg(rE
),
28298 nameYMMReg(rV
), nameYMMReg(rG
));
28300 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28301 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
28303 DIP("vpmulhrsw %s,%s,%s\n", dis_buf
,
28304 nameYMMReg(rV
), nameYMMReg(rG
));
28307 breakupV256to64s( dV
, &d3
, &d2
, &d1
, &d0
);
28308 breakupV256to64s( sV
, &s3
, &s2
, &s1
, &s0
);
28312 binop(Iop_V128HLtoV256
,
28313 binop(Iop_64HLtoV128
,
28314 dis_PMULHRSW_helper( mkexpr(s3
), mkexpr(d3
) ),
28315 dis_PMULHRSW_helper( mkexpr(s2
), mkexpr(d2
) ) ),
28316 binop(Iop_64HLtoV128
,
28317 dis_PMULHRSW_helper( mkexpr(s1
), mkexpr(d1
) ),
28318 dis_PMULHRSW_helper( mkexpr(s0
), mkexpr(d0
) ) )
28322 dres
->hint
= Dis_HintVerbose
;
28323 goto decode_success
;
28328 /* VPERMILPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0C /r */
28329 if (have66noF2noF3(pfx
)
28330 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
28331 UChar modrm
= getUChar(delta
);
28332 UInt rG
= gregOfRexRM(pfx
, modrm
);
28333 UInt rV
= getVexNvvvv(pfx
);
28334 IRTemp ctrlV
= newTemp(Ity_V128
);
28335 if (epartIsReg(modrm
)) {
28336 UInt rE
= eregOfRexRM(pfx
, modrm
);
28338 DIP("vpermilps %s,%s,%s\n",
28339 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
28340 assign(ctrlV
, getXMMReg(rE
));
28342 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28344 DIP("vpermilps %s,%s,%s\n",
28345 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
28346 assign(ctrlV
, loadLE(Ity_V128
, mkexpr(addr
)));
28348 IRTemp dataV
= newTemp(Ity_V128
);
28349 assign(dataV
, getXMMReg(rV
));
28350 IRTemp resV
= math_PERMILPS_VAR_128(dataV
, ctrlV
);
28351 putYMMRegLoAndZU(rG
, mkexpr(resV
));
28353 goto decode_success
;
28355 /* VPERMILPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0C /r */
28356 if (have66noF2noF3(pfx
)
28357 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
28358 UChar modrm
= getUChar(delta
);
28359 UInt rG
= gregOfRexRM(pfx
, modrm
);
28360 UInt rV
= getVexNvvvv(pfx
);
28361 IRTemp ctrlV
= newTemp(Ity_V256
);
28362 if (epartIsReg(modrm
)) {
28363 UInt rE
= eregOfRexRM(pfx
, modrm
);
28365 DIP("vpermilps %s,%s,%s\n",
28366 nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
28367 assign(ctrlV
, getYMMReg(rE
));
28369 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28371 DIP("vpermilps %s,%s,%s\n",
28372 dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
28373 assign(ctrlV
, loadLE(Ity_V256
, mkexpr(addr
)));
28375 IRTemp dataV
= newTemp(Ity_V256
);
28376 assign(dataV
, getYMMReg(rV
));
28377 IRTemp resV
= math_PERMILPS_VAR_256(dataV
, ctrlV
);
28378 putYMMReg(rG
, mkexpr(resV
));
28380 goto decode_success
;
28385 /* VPERMILPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0D /r */
28386 if (have66noF2noF3(pfx
)
28387 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
28388 UChar modrm
= getUChar(delta
);
28389 UInt rG
= gregOfRexRM(pfx
, modrm
);
28390 UInt rV
= getVexNvvvv(pfx
);
28391 IRTemp ctrlV
= newTemp(Ity_V128
);
28392 if (epartIsReg(modrm
)) {
28393 UInt rE
= eregOfRexRM(pfx
, modrm
);
28395 DIP("vpermilpd %s,%s,%s\n",
28396 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
28397 assign(ctrlV
, getXMMReg(rE
));
28399 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28401 DIP("vpermilpd %s,%s,%s\n",
28402 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
28403 assign(ctrlV
, loadLE(Ity_V128
, mkexpr(addr
)));
28405 IRTemp dataV
= newTemp(Ity_V128
);
28406 assign(dataV
, getXMMReg(rV
));
28407 IRTemp resV
= math_PERMILPD_VAR_128(dataV
, ctrlV
);
28408 putYMMRegLoAndZU(rG
, mkexpr(resV
));
28410 goto decode_success
;
28412 /* VPERMILPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0D /r */
28413 if (have66noF2noF3(pfx
)
28414 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
28415 UChar modrm
= getUChar(delta
);
28416 UInt rG
= gregOfRexRM(pfx
, modrm
);
28417 UInt rV
= getVexNvvvv(pfx
);
28418 IRTemp ctrlV
= newTemp(Ity_V256
);
28419 if (epartIsReg(modrm
)) {
28420 UInt rE
= eregOfRexRM(pfx
, modrm
);
28422 DIP("vpermilpd %s,%s,%s\n",
28423 nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
28424 assign(ctrlV
, getYMMReg(rE
));
28426 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28428 DIP("vpermilpd %s,%s,%s\n",
28429 dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
28430 assign(ctrlV
, loadLE(Ity_V256
, mkexpr(addr
)));
28432 IRTemp dataV
= newTemp(Ity_V256
);
28433 assign(dataV
, getYMMReg(rV
));
28434 IRTemp resV
= math_PERMILPD_VAR_256(dataV
, ctrlV
);
28435 putYMMReg(rG
, mkexpr(resV
));
28437 goto decode_success
;
28442 /* VTESTPS xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0E /r */
28443 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28444 delta
= dis_xTESTy_128( vbi
, pfx
, delta
, True
/*isAvx*/, 32 );
28445 goto decode_success
;
28447 /* VTESTPS ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0E /r */
28448 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28449 delta
= dis_xTESTy_256( vbi
, pfx
, delta
, 32 );
28450 goto decode_success
;
28455 /* VTESTPD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0F /r */
28456 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28457 delta
= dis_xTESTy_128( vbi
, pfx
, delta
, True
/*isAvx*/, 64 );
28458 goto decode_success
;
28460 /* VTESTPD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0F /r */
28461 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28462 delta
= dis_xTESTy_256( vbi
, pfx
, delta
, 64 );
28463 goto decode_success
;
28468 /* VCVTPH2PS xmm2/m64, xmm1 = VEX.128.66.0F38.W0 13 /r */
28469 if (have66noF2noF3(pfx
)
28470 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/
28471 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_F16C
)) {
28472 delta
= dis_VCVTPH2PS( vbi
, pfx
, delta
, /*is256bit=*/False
);
28473 goto decode_success
;
28475 /* VCVTPH2PS xmm2/m128, xmm1 = VEX.256.66.0F38.W0 13 /r */
28476 if (have66noF2noF3(pfx
)
28477 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/
28478 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_F16C
)) {
28479 delta
= dis_VCVTPH2PS( vbi
, pfx
, delta
, /*is256bit=*/True
);
28480 goto decode_success
;
28485 /* VPERMPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 16 /r */
28486 if (have66noF2noF3(pfx
)
28487 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
28488 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28489 uses_vvvv
, vbi
, pfx
, delta
, "vpermps", math_VPERMD
);
28490 goto decode_success
;
28495 /* VPTEST xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 17 /r */
28496 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28497 delta
= dis_xTESTy_128( vbi
, pfx
, delta
, True
/*isAvx*/, 0 );
28498 goto decode_success
;
28500 /* VPTEST ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 17 /r */
28501 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28502 delta
= dis_xTESTy_256( vbi
, pfx
, delta
, 0 );
28503 goto decode_success
;
28508 /* VBROADCASTSS m32, xmm1 = VEX.128.66.0F38.WIG 18 /r */
28509 if (have66noF2noF3(pfx
)
28510 && 0==getVexL(pfx
)/*128*/
28511 && !epartIsReg(getUChar(delta
))) {
28512 UChar modrm
= getUChar(delta
);
28513 UInt rG
= gregOfRexRM(pfx
, modrm
);
28514 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28516 DIP("vbroadcastss %s,%s\n", dis_buf
, nameXMMReg(rG
));
28517 IRTemp t32
= newTemp(Ity_I32
);
28518 assign(t32
, loadLE(Ity_I32
, mkexpr(addr
)));
28519 IRTemp t64
= newTemp(Ity_I64
);
28520 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
28521 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
28522 putYMMRegLoAndZU(rG
, res
);
28523 goto decode_success
;
28525 /* VBROADCASTSS m32, ymm1 = VEX.256.66.0F38.WIG 18 /r */
28526 if (have66noF2noF3(pfx
)
28527 && 1==getVexL(pfx
)/*256*/
28528 && !epartIsReg(getUChar(delta
))) {
28529 UChar modrm
= getUChar(delta
);
28530 UInt rG
= gregOfRexRM(pfx
, modrm
);
28531 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28533 DIP("vbroadcastss %s,%s\n", dis_buf
, nameYMMReg(rG
));
28534 IRTemp t32
= newTemp(Ity_I32
);
28535 assign(t32
, loadLE(Ity_I32
, mkexpr(addr
)));
28536 IRTemp t64
= newTemp(Ity_I64
);
28537 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
28538 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
28539 mkexpr(t64
), mkexpr(t64
));
28540 putYMMReg(rG
, res
);
28541 goto decode_success
;
28543 /* VBROADCASTSS xmm2, xmm1 = VEX.128.66.0F38.WIG 18 /r */
28544 if (have66noF2noF3(pfx
)
28545 && 0==getVexL(pfx
)/*128*/
28546 && epartIsReg(getUChar(delta
))) {
28547 UChar modrm
= getUChar(delta
);
28548 UInt rG
= gregOfRexRM(pfx
, modrm
);
28549 UInt rE
= eregOfRexRM(pfx
, modrm
);
28550 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
28551 IRTemp t32
= newTemp(Ity_I32
);
28552 assign(t32
, getXMMRegLane32(rE
, 0));
28553 IRTemp t64
= newTemp(Ity_I64
);
28554 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
28555 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
28556 putYMMRegLoAndZU(rG
, res
);
28558 goto decode_success
;
28560 /* VBROADCASTSS xmm2, ymm1 = VEX.256.66.0F38.WIG 18 /r */
28561 if (have66noF2noF3(pfx
)
28562 && 1==getVexL(pfx
)/*256*/
28563 && epartIsReg(getUChar(delta
))) {
28564 UChar modrm
= getUChar(delta
);
28565 UInt rG
= gregOfRexRM(pfx
, modrm
);
28566 UInt rE
= eregOfRexRM(pfx
, modrm
);
28567 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
28568 IRTemp t32
= newTemp(Ity_I32
);
28569 assign(t32
, getXMMRegLane32(rE
, 0));
28570 IRTemp t64
= newTemp(Ity_I64
);
28571 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
28572 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
28573 mkexpr(t64
), mkexpr(t64
));
28574 putYMMReg(rG
, res
);
28576 goto decode_success
;
28581 /* VBROADCASTSD m64, ymm1 = VEX.256.66.0F38.WIG 19 /r */
28582 if (have66noF2noF3(pfx
)
28583 && 1==getVexL(pfx
)/*256*/
28584 && !epartIsReg(getUChar(delta
))) {
28585 UChar modrm
= getUChar(delta
);
28586 UInt rG
= gregOfRexRM(pfx
, modrm
);
28587 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28589 DIP("vbroadcastsd %s,%s\n", dis_buf
, nameYMMReg(rG
));
28590 IRTemp t64
= newTemp(Ity_I64
);
28591 assign(t64
, loadLE(Ity_I64
, mkexpr(addr
)));
28592 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
28593 mkexpr(t64
), mkexpr(t64
));
28594 putYMMReg(rG
, res
);
28595 goto decode_success
;
28597 /* VBROADCASTSD xmm2, ymm1 = VEX.256.66.0F38.WIG 19 /r */
28598 if (have66noF2noF3(pfx
)
28599 && 1==getVexL(pfx
)/*256*/
28600 && epartIsReg(getUChar(delta
))) {
28601 UChar modrm
= getUChar(delta
);
28602 UInt rG
= gregOfRexRM(pfx
, modrm
);
28603 UInt rE
= eregOfRexRM(pfx
, modrm
);
28604 DIP("vbroadcastsd %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
28605 IRTemp t64
= newTemp(Ity_I64
);
28606 assign(t64
, getXMMRegLane64(rE
, 0));
28607 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
28608 mkexpr(t64
), mkexpr(t64
));
28609 putYMMReg(rG
, res
);
28611 goto decode_success
;
28616 /* VBROADCASTF128 m128, ymm1 = VEX.256.66.0F38.WIG 1A /r */
28617 if (have66noF2noF3(pfx
)
28618 && 1==getVexL(pfx
)/*256*/
28619 && !epartIsReg(getUChar(delta
))) {
28620 UChar modrm
= getUChar(delta
);
28621 UInt rG
= gregOfRexRM(pfx
, modrm
);
28622 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28624 DIP("vbroadcastf128 %s,%s\n", dis_buf
, nameYMMReg(rG
));
28625 IRTemp t128
= newTemp(Ity_V128
);
28626 assign(t128
, loadLE(Ity_V128
, mkexpr(addr
)));
28627 putYMMReg( rG
, binop(Iop_V128HLtoV256
, mkexpr(t128
), mkexpr(t128
)) );
28628 goto decode_success
;
28633 /* VPABSB xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1C /r */
28634 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28635 delta
= dis_AVX128_E_to_G_unary(
28636 uses_vvvv
, vbi
, pfx
, delta
,
28637 "vpabsb", math_PABS_XMM_pap1
);
28638 goto decode_success
;
28640 /* VPABSB ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1C /r */
28641 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28642 delta
= dis_AVX256_E_to_G_unary(
28643 uses_vvvv
, vbi
, pfx
, delta
,
28644 "vpabsb", math_PABS_YMM_pap1
);
28645 goto decode_success
;
28650 /* VPABSW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1D /r */
28651 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28652 delta
= dis_AVX128_E_to_G_unary(
28653 uses_vvvv
, vbi
, pfx
, delta
,
28654 "vpabsw", math_PABS_XMM_pap2
);
28655 goto decode_success
;
28657 /* VPABSW ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1D /r */
28658 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28659 delta
= dis_AVX256_E_to_G_unary(
28660 uses_vvvv
, vbi
, pfx
, delta
,
28661 "vpabsw", math_PABS_YMM_pap2
);
28662 goto decode_success
;
28667 /* VPABSD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1E /r */
28668 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28669 delta
= dis_AVX128_E_to_G_unary(
28670 uses_vvvv
, vbi
, pfx
, delta
,
28671 "vpabsd", math_PABS_XMM_pap4
);
28672 goto decode_success
;
28674 /* VPABSD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1E /r */
28675 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28676 delta
= dis_AVX256_E_to_G_unary(
28677 uses_vvvv
, vbi
, pfx
, delta
,
28678 "vpabsd", math_PABS_YMM_pap4
);
28679 goto decode_success
;
28684 /* VPMOVSXBW xmm2/m64, xmm1 */
28685 /* VPMOVSXBW = VEX.128.66.0F38.WIG 20 /r */
28686 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28687 delta
= dis_PMOVxXBW_128( vbi
, pfx
, delta
,
28688 True
/*isAvx*/, False
/*!xIsZ*/ );
28689 goto decode_success
;
28691 /* VPMOVSXBW xmm2/m128, ymm1 */
28692 /* VPMOVSXBW = VEX.256.66.0F38.WIG 20 /r */
28693 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28694 delta
= dis_PMOVxXBW_256( vbi
, pfx
, delta
, False
/*!xIsZ*/ );
28695 goto decode_success
;
28700 /* VPMOVSXBD xmm2/m32, xmm1 */
28701 /* VPMOVSXBD = VEX.128.66.0F38.WIG 21 /r */
28702 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28703 delta
= dis_PMOVxXBD_128( vbi
, pfx
, delta
,
28704 True
/*isAvx*/, False
/*!xIsZ*/ );
28705 goto decode_success
;
28707 /* VPMOVSXBD xmm2/m64, ymm1 */
28708 /* VPMOVSXBD = VEX.256.66.0F38.WIG 21 /r */
28709 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28710 delta
= dis_PMOVxXBD_256( vbi
, pfx
, delta
, False
/*!xIsZ*/ );
28711 goto decode_success
;
28716 /* VPMOVSXBQ xmm2/m16, xmm1 */
28717 /* VPMOVSXBQ = VEX.128.66.0F38.WIG 22 /r */
28718 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28719 delta
= dis_PMOVSXBQ_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
28720 goto decode_success
;
28722 /* VPMOVSXBQ xmm2/m32, ymm1 */
28723 /* VPMOVSXBQ = VEX.256.66.0F38.WIG 22 /r */
28724 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28725 delta
= dis_PMOVSXBQ_256( vbi
, pfx
, delta
);
28726 goto decode_success
;
28731 /* VPMOVSXWD xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 23 /r */
28732 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28733 delta
= dis_PMOVxXWD_128( vbi
, pfx
, delta
,
28734 True
/*isAvx*/, False
/*!xIsZ*/ );
28735 goto decode_success
;
28737 /* VPMOVSXWD xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 23 /r */
28738 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28739 delta
= dis_PMOVxXWD_256( vbi
, pfx
, delta
, False
/*!xIsZ*/ );
28740 goto decode_success
;
28745 /* VPMOVSXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 24 /r */
28746 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28747 delta
= dis_PMOVSXWQ_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
28748 goto decode_success
;
28750 /* VPMOVSXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 24 /r */
28751 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28752 delta
= dis_PMOVSXWQ_256( vbi
, pfx
, delta
);
28753 goto decode_success
;
28758 /* VPMOVSXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 25 /r */
28759 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28760 delta
= dis_PMOVxXDQ_128( vbi
, pfx
, delta
,
28761 True
/*isAvx*/, False
/*!xIsZ*/ );
28762 goto decode_success
;
28764 /* VPMOVSXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 25 /r */
28765 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28766 delta
= dis_PMOVxXDQ_256( vbi
, pfx
, delta
, False
/*!xIsZ*/ );
28767 goto decode_success
;
28772 /* VPMULDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 28 /r */
28773 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28774 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28775 uses_vvvv
, vbi
, pfx
, delta
,
28776 "vpmuldq", math_PMULDQ_128
);
28777 goto decode_success
;
28779 /* VPMULDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 28 /r */
28780 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28781 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28782 uses_vvvv
, vbi
, pfx
, delta
,
28783 "vpmuldq", math_PMULDQ_256
);
28784 goto decode_success
;
28789 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
28790 /* VPCMPEQQ = VEX.NDS.128.66.0F38.WIG 29 /r */
28791 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28792 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28793 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqq", Iop_CmpEQ64x2
);
28794 goto decode_success
;
28796 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
28797 /* VPCMPEQQ = VEX.NDS.256.66.0F38.WIG 29 /r */
28798 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28799 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28800 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqq", Iop_CmpEQ64x4
);
28801 goto decode_success
;
28806 /* VMOVNTDQA m128, xmm1 = VEX.128.66.0F38.WIG 2A /r */
28807 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
28808 && !epartIsReg(getUChar(delta
))) {
28809 UChar modrm
= getUChar(delta
);
28810 UInt rD
= gregOfRexRM(pfx
, modrm
);
28811 IRTemp tD
= newTemp(Ity_V128
);
28812 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28814 gen_SEGV_if_not_16_aligned(addr
);
28815 assign(tD
, loadLE(Ity_V128
, mkexpr(addr
)));
28816 DIP("vmovntdqa %s,%s\n", dis_buf
, nameXMMReg(rD
));
28817 putYMMRegLoAndZU(rD
, mkexpr(tD
));
28818 goto decode_success
;
28820 /* VMOVNTDQA m256, ymm1 = VEX.256.66.0F38.WIG 2A /r */
28821 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
28822 && !epartIsReg(getUChar(delta
))) {
28823 UChar modrm
= getUChar(delta
);
28824 UInt rD
= gregOfRexRM(pfx
, modrm
);
28825 IRTemp tD
= newTemp(Ity_V256
);
28826 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28828 gen_SEGV_if_not_32_aligned(addr
);
28829 assign(tD
, loadLE(Ity_V256
, mkexpr(addr
)));
28830 DIP("vmovntdqa %s,%s\n", dis_buf
, nameYMMReg(rD
));
28831 putYMMReg(rD
, mkexpr(tD
));
28832 goto decode_success
;
28837 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
28838 /* VPACKUSDW = VEX.NDS.128.66.0F38.WIG 2B /r */
28839 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28840 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
28841 uses_vvvv
, vbi
, pfx
, delta
, "vpackusdw",
28842 Iop_QNarrowBin32Sto16Ux8
, NULL
,
28843 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
28844 goto decode_success
;
28846 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
28847 /* VPACKUSDW = VEX.NDS.256.66.0F38.WIG 2B /r */
28848 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28849 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28850 uses_vvvv
, vbi
, pfx
, delta
, "vpackusdw",
28851 math_VPACKUSDW_YMM
);
28852 goto decode_success
;
28857 /* VMASKMOVPS m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2C /r */
28858 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
28859 && 0==getRexW(pfx
)/*W0*/
28860 && !epartIsReg(getUChar(delta
))) {
28861 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovps",
28862 /*!isYMM*/False
, Ity_I32
, /*isLoad*/True
);
28863 goto decode_success
;
28865 /* VMASKMOVPS m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2C /r */
28866 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
28867 && 0==getRexW(pfx
)/*W0*/
28868 && !epartIsReg(getUChar(delta
))) {
28869 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovps",
28870 /*isYMM*/True
, Ity_I32
, /*isLoad*/True
);
28871 goto decode_success
;
28876 /* VMASKMOVPD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2D /r */
28877 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
28878 && 0==getRexW(pfx
)/*W0*/
28879 && !epartIsReg(getUChar(delta
))) {
28880 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovpd",
28881 /*!isYMM*/False
, Ity_I64
, /*isLoad*/True
);
28882 goto decode_success
;
28884 /* VMASKMOVPD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2D /r */
28885 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
28886 && 0==getRexW(pfx
)/*W0*/
28887 && !epartIsReg(getUChar(delta
))) {
28888 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovpd",
28889 /*isYMM*/True
, Ity_I64
, /*isLoad*/True
);
28890 goto decode_success
;
28895 /* VMASKMOVPS xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2E /r */
28896 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
28897 && 0==getRexW(pfx
)/*W0*/
28898 && !epartIsReg(getUChar(delta
))) {
28899 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovps",
28900 /*!isYMM*/False
, Ity_I32
, /*!isLoad*/False
);
28901 goto decode_success
;
28903 /* VMASKMOVPS ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2E /r */
28904 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
28905 && 0==getRexW(pfx
)/*W0*/
28906 && !epartIsReg(getUChar(delta
))) {
28907 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovps",
28908 /*isYMM*/True
, Ity_I32
, /*!isLoad*/False
);
28909 goto decode_success
;
28914 /* VMASKMOVPD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2F /r */
28915 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
28916 && 0==getRexW(pfx
)/*W0*/
28917 && !epartIsReg(getUChar(delta
))) {
28918 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovpd",
28919 /*!isYMM*/False
, Ity_I64
, /*!isLoad*/False
);
28920 goto decode_success
;
28922 /* VMASKMOVPD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2F /r */
28923 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
28924 && 0==getRexW(pfx
)/*W0*/
28925 && !epartIsReg(getUChar(delta
))) {
28926 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovpd",
28927 /*isYMM*/True
, Ity_I64
, /*!isLoad*/False
);
28928 goto decode_success
;
28933 /* VPMOVZXBW xmm2/m64, xmm1 */
28934 /* VPMOVZXBW = VEX.128.66.0F38.WIG 30 /r */
28935 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28936 delta
= dis_PMOVxXBW_128( vbi
, pfx
, delta
,
28937 True
/*isAvx*/, True
/*xIsZ*/ );
28938 goto decode_success
;
28940 /* VPMOVZXBW xmm2/m128, ymm1 */
28941 /* VPMOVZXBW = VEX.256.66.0F38.WIG 30 /r */
28942 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28943 delta
= dis_PMOVxXBW_256( vbi
, pfx
, delta
, True
/*xIsZ*/ );
28944 goto decode_success
;
28949 /* VPMOVZXBD xmm2/m32, xmm1 */
28950 /* VPMOVZXBD = VEX.128.66.0F38.WIG 31 /r */
28951 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28952 delta
= dis_PMOVxXBD_128( vbi
, pfx
, delta
,
28953 True
/*isAvx*/, True
/*xIsZ*/ );
28954 goto decode_success
;
28956 /* VPMOVZXBD xmm2/m64, ymm1 */
28957 /* VPMOVZXBD = VEX.256.66.0F38.WIG 31 /r */
28958 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28959 delta
= dis_PMOVxXBD_256( vbi
, pfx
, delta
, True
/*xIsZ*/ );
28960 goto decode_success
;
28965 /* VPMOVZXBQ xmm2/m16, xmm1 */
28966 /* VPMOVZXBQ = VEX.128.66.0F38.WIG 32 /r */
28967 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28968 delta
= dis_PMOVZXBQ_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
28969 goto decode_success
;
28971 /* VPMOVZXBQ xmm2/m32, ymm1 */
28972 /* VPMOVZXBQ = VEX.256.66.0F38.WIG 32 /r */
28973 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28974 delta
= dis_PMOVZXBQ_256( vbi
, pfx
, delta
);
28975 goto decode_success
;
28980 /* VPMOVZXWD xmm2/m64, xmm1 */
28981 /* VPMOVZXWD = VEX.128.66.0F38.WIG 33 /r */
28982 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28983 delta
= dis_PMOVxXWD_128( vbi
, pfx
, delta
,
28984 True
/*isAvx*/, True
/*xIsZ*/ );
28985 goto decode_success
;
28987 /* VPMOVZXWD xmm2/m128, ymm1 */
28988 /* VPMOVZXWD = VEX.256.66.0F38.WIG 33 /r */
28989 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28990 delta
= dis_PMOVxXWD_256( vbi
, pfx
, delta
, True
/*xIsZ*/ );
28991 goto decode_success
;
28996 /* VPMOVZXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 34 /r */
28997 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28998 delta
= dis_PMOVZXWQ_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
28999 goto decode_success
;
29001 /* VPMOVZXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 34 /r */
29002 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29003 delta
= dis_PMOVZXWQ_256( vbi
, pfx
, delta
);
29004 goto decode_success
;
29009 /* VPMOVZXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 35 /r */
29010 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29011 delta
= dis_PMOVxXDQ_128( vbi
, pfx
, delta
,
29012 True
/*isAvx*/, True
/*xIsZ*/ );
29013 goto decode_success
;
29015 /* VPMOVZXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 35 /r */
29016 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29017 delta
= dis_PMOVxXDQ_256( vbi
, pfx
, delta
, True
/*xIsZ*/ );
29018 goto decode_success
;
29023 /* VPERMD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 36 /r */
29024 if (have66noF2noF3(pfx
)
29025 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
29026 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
29027 uses_vvvv
, vbi
, pfx
, delta
, "vpermd", math_VPERMD
);
29028 goto decode_success
;
29033 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
29034 /* VPCMPGTQ = VEX.NDS.128.66.0F38.WIG 37 /r */
29035 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29036 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29037 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtq", Iop_CmpGT64Sx2
);
29038 goto decode_success
;
29040 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
29041 /* VPCMPGTQ = VEX.NDS.256.66.0F38.WIG 37 /r */
29042 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29043 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29044 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtq", Iop_CmpGT64Sx4
);
29045 goto decode_success
;
29050 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
29051 /* VPMINSB = VEX.NDS.128.66.0F38.WIG 38 /r */
29052 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29053 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29054 uses_vvvv
, vbi
, pfx
, delta
, "vpminsb", Iop_Min8Sx16
);
29055 goto decode_success
;
29057 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
29058 /* VPMINSB = VEX.NDS.256.66.0F38.WIG 38 /r */
29059 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29060 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29061 uses_vvvv
, vbi
, pfx
, delta
, "vpminsb", Iop_Min8Sx32
);
29062 goto decode_success
;
29067 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
29068 /* VPMINSD = VEX.NDS.128.66.0F38.WIG 39 /r */
29069 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29070 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29071 uses_vvvv
, vbi
, pfx
, delta
, "vpminsd", Iop_Min32Sx4
);
29072 goto decode_success
;
29074 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
29075 /* VPMINSD = VEX.NDS.256.66.0F38.WIG 39 /r */
29076 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29077 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29078 uses_vvvv
, vbi
, pfx
, delta
, "vpminsd", Iop_Min32Sx8
);
29079 goto decode_success
;
29084 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
29085 /* VPMINUW = VEX.NDS.128.66.0F38.WIG 3A /r */
29086 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29087 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29088 uses_vvvv
, vbi
, pfx
, delta
, "vpminuw", Iop_Min16Ux8
);
29089 goto decode_success
;
29091 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
29092 /* VPMINUW = VEX.NDS.256.66.0F38.WIG 3A /r */
29093 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29094 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29095 uses_vvvv
, vbi
, pfx
, delta
, "vpminuw", Iop_Min16Ux16
);
29096 goto decode_success
;
29101 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
29102 /* VPMINUD = VEX.NDS.128.66.0F38.WIG 3B /r */
29103 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29104 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29105 uses_vvvv
, vbi
, pfx
, delta
, "vpminud", Iop_Min32Ux4
);
29106 goto decode_success
;
29108 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
29109 /* VPMINUD = VEX.NDS.256.66.0F38.WIG 3B /r */
29110 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29111 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29112 uses_vvvv
, vbi
, pfx
, delta
, "vpminud", Iop_Min32Ux8
);
29113 goto decode_success
;
29118 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
29119 /* VPMAXSB = VEX.NDS.128.66.0F38.WIG 3C /r */
29120 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29121 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29122 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsb", Iop_Max8Sx16
);
29123 goto decode_success
;
29125 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
29126 /* VPMAXSB = VEX.NDS.256.66.0F38.WIG 3C /r */
29127 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29128 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29129 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsb", Iop_Max8Sx32
);
29130 goto decode_success
;
29135 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
29136 /* VPMAXSD = VEX.NDS.128.66.0F38.WIG 3D /r */
29137 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29138 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29139 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsd", Iop_Max32Sx4
);
29140 goto decode_success
;
29142 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
29143 /* VPMAXSD = VEX.NDS.256.66.0F38.WIG 3D /r */
29144 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29145 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29146 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsd", Iop_Max32Sx8
);
29147 goto decode_success
;
29152 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
29153 /* VPMAXUW = VEX.NDS.128.66.0F38.WIG 3E /r */
29154 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29155 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29156 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxuw", Iop_Max16Ux8
);
29157 goto decode_success
;
29159 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
29160 /* VPMAXUW = VEX.NDS.256.66.0F38.WIG 3E /r */
29161 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29162 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29163 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxuw", Iop_Max16Ux16
);
29164 goto decode_success
;
29169 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
29170 /* VPMAXUD = VEX.NDS.128.66.0F38.WIG 3F /r */
29171 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29172 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29173 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxud", Iop_Max32Ux4
);
29174 goto decode_success
;
29176 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
29177 /* VPMAXUD = VEX.NDS.256.66.0F38.WIG 3F /r */
29178 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29179 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29180 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxud", Iop_Max32Ux8
);
29181 goto decode_success
;
29186 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
29187 /* VPMULLD = VEX.NDS.128.66.0F38.WIG 40 /r */
29188 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29189 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29190 uses_vvvv
, vbi
, pfx
, delta
, "vpmulld", Iop_Mul32x4
);
29191 goto decode_success
;
29193 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
29194 /* VPMULLD = VEX.NDS.256.66.0F38.WIG 40 /r */
29195 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29196 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29197 uses_vvvv
, vbi
, pfx
, delta
, "vpmulld", Iop_Mul32x8
);
29198 goto decode_success
;
29203 /* VPHMINPOSUW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 41 /r */
29204 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29205 delta
= dis_PHMINPOSUW_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
29206 goto decode_success
;
29211 /* VPSRLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 45 /r */
29212 /* VPSRLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 45 /r */
29213 if (have66noF2noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
29214 delta
= dis_AVX_var_shiftV_byE( vbi
, pfx
, delta
, "vpsrlvd",
29215 Iop_Shr32
, 1==getVexL(pfx
) );
29217 goto decode_success
;
29219 /* VPSRLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 45 /r */
29220 /* VPSRLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 45 /r */
29221 if (have66noF2noF3(pfx
) && 1==getRexW(pfx
)/*W1*/) {
29222 delta
= dis_AVX_var_shiftV_byE( vbi
, pfx
, delta
, "vpsrlvq",
29223 Iop_Shr64
, 1==getVexL(pfx
) );
29225 goto decode_success
;
29230 /* VPSRAVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 46 /r */
29231 /* VPSRAVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 46 /r */
29232 if (have66noF2noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
29233 delta
= dis_AVX_var_shiftV_byE( vbi
, pfx
, delta
, "vpsravd",
29234 Iop_Sar32
, 1==getVexL(pfx
) );
29236 goto decode_success
;
29241 /* VPSLLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 47 /r */
29242 /* VPSLLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 47 /r */
29243 if (have66noF2noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
29244 delta
= dis_AVX_var_shiftV_byE( vbi
, pfx
, delta
, "vpsllvd",
29245 Iop_Shl32
, 1==getVexL(pfx
) );
29247 goto decode_success
;
29249 /* VPSLLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 47 /r */
29250 /* VPSLLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 47 /r */
29251 if (have66noF2noF3(pfx
) && 1==getRexW(pfx
)/*W1*/) {
29252 delta
= dis_AVX_var_shiftV_byE( vbi
, pfx
, delta
, "vpsllvq",
29253 Iop_Shl64
, 1==getVexL(pfx
) );
29255 goto decode_success
;
29260 /* VPBROADCASTD xmm2/m32, xmm1 = VEX.128.66.0F38.W0 58 /r */
29261 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29262 && 0==getRexW(pfx
)/*W0*/) {
29263 UChar modrm
= getUChar(delta
);
29264 UInt rG
= gregOfRexRM(pfx
, modrm
);
29265 IRTemp t32
= newTemp(Ity_I32
);
29266 if (epartIsReg(modrm
)) {
29267 UInt rE
= eregOfRexRM(pfx
, modrm
);
29269 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
29270 assign(t32
, getXMMRegLane32(rE
, 0));
29272 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29274 DIP("vpbroadcastd %s,%s\n", dis_buf
, nameXMMReg(rG
));
29275 assign(t32
, loadLE(Ity_I32
, mkexpr(addr
)));
29277 IRTemp t64
= newTemp(Ity_I64
);
29278 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29279 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
29280 putYMMRegLoAndZU(rG
, res
);
29281 goto decode_success
;
29283 /* VPBROADCASTD xmm2/m32, ymm1 = VEX.256.66.0F38.W0 58 /r */
29284 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29285 && 0==getRexW(pfx
)/*W0*/) {
29286 UChar modrm
= getUChar(delta
);
29287 UInt rG
= gregOfRexRM(pfx
, modrm
);
29288 IRTemp t32
= newTemp(Ity_I32
);
29289 if (epartIsReg(modrm
)) {
29290 UInt rE
= eregOfRexRM(pfx
, modrm
);
29292 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
29293 assign(t32
, getXMMRegLane32(rE
, 0));
29295 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29297 DIP("vpbroadcastd %s,%s\n", dis_buf
, nameYMMReg(rG
));
29298 assign(t32
, loadLE(Ity_I32
, mkexpr(addr
)));
29300 IRTemp t64
= newTemp(Ity_I64
);
29301 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29302 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
29303 mkexpr(t64
), mkexpr(t64
));
29304 putYMMReg(rG
, res
);
29305 goto decode_success
;
29310 /* VPBROADCASTQ xmm2/m64, xmm1 = VEX.128.66.0F38.W0 59 /r */
29311 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29312 && 0==getRexW(pfx
)/*W0*/) {
29313 UChar modrm
= getUChar(delta
);
29314 UInt rG
= gregOfRexRM(pfx
, modrm
);
29315 IRTemp t64
= newTemp(Ity_I64
);
29316 if (epartIsReg(modrm
)) {
29317 UInt rE
= eregOfRexRM(pfx
, modrm
);
29319 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
29320 assign(t64
, getXMMRegLane64(rE
, 0));
29322 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29324 DIP("vpbroadcastq %s,%s\n", dis_buf
, nameXMMReg(rG
));
29325 assign(t64
, loadLE(Ity_I64
, mkexpr(addr
)));
29327 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
29328 putYMMRegLoAndZU(rG
, res
);
29329 goto decode_success
;
29331 /* VPBROADCASTQ xmm2/m64, ymm1 = VEX.256.66.0F38.W0 59 /r */
29332 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29333 && 0==getRexW(pfx
)/*W0*/) {
29334 UChar modrm
= getUChar(delta
);
29335 UInt rG
= gregOfRexRM(pfx
, modrm
);
29336 IRTemp t64
= newTemp(Ity_I64
);
29337 if (epartIsReg(modrm
)) {
29338 UInt rE
= eregOfRexRM(pfx
, modrm
);
29340 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
29341 assign(t64
, getXMMRegLane64(rE
, 0));
29343 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29345 DIP("vpbroadcastq %s,%s\n", dis_buf
, nameYMMReg(rG
));
29346 assign(t64
, loadLE(Ity_I64
, mkexpr(addr
)));
29348 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
29349 mkexpr(t64
), mkexpr(t64
));
29350 putYMMReg(rG
, res
);
29351 goto decode_success
;
29356 /* VBROADCASTI128 m128, ymm1 = VEX.256.66.0F38.WIG 5A /r */
29357 if (have66noF2noF3(pfx
)
29358 && 1==getVexL(pfx
)/*256*/
29359 && !epartIsReg(getUChar(delta
))) {
29360 UChar modrm
= getUChar(delta
);
29361 UInt rG
= gregOfRexRM(pfx
, modrm
);
29362 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29364 DIP("vbroadcasti128 %s,%s\n", dis_buf
, nameYMMReg(rG
));
29365 IRTemp t128
= newTemp(Ity_V128
);
29366 assign(t128
, loadLE(Ity_V128
, mkexpr(addr
)));
29367 putYMMReg( rG
, binop(Iop_V128HLtoV256
, mkexpr(t128
), mkexpr(t128
)) );
29368 goto decode_success
;
29373 /* VPBROADCASTB xmm2/m8, xmm1 = VEX.128.66.0F38.W0 78 /r */
29374 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29375 && 0==getRexW(pfx
)/*W0*/) {
29376 UChar modrm
= getUChar(delta
);
29377 UInt rG
= gregOfRexRM(pfx
, modrm
);
29378 IRTemp t8
= newTemp(Ity_I8
);
29379 if (epartIsReg(modrm
)) {
29380 UInt rE
= eregOfRexRM(pfx
, modrm
);
29382 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
29383 assign(t8
, unop(Iop_32to8
, getXMMRegLane32(rE
, 0)));
29385 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29387 DIP("vpbroadcastb %s,%s\n", dis_buf
, nameXMMReg(rG
));
29388 assign(t8
, loadLE(Ity_I8
, mkexpr(addr
)));
29390 IRTemp t16
= newTemp(Ity_I16
);
29391 assign(t16
, binop(Iop_8HLto16
, mkexpr(t8
), mkexpr(t8
)));
29392 IRTemp t32
= newTemp(Ity_I32
);
29393 assign(t32
, binop(Iop_16HLto32
, mkexpr(t16
), mkexpr(t16
)));
29394 IRTemp t64
= newTemp(Ity_I64
);
29395 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29396 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
29397 putYMMRegLoAndZU(rG
, res
);
29398 goto decode_success
;
29400 /* VPBROADCASTB xmm2/m8, ymm1 = VEX.256.66.0F38.W0 78 /r */
29401 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29402 && 0==getRexW(pfx
)/*W0*/) {
29403 UChar modrm
= getUChar(delta
);
29404 UInt rG
= gregOfRexRM(pfx
, modrm
);
29405 IRTemp t8
= newTemp(Ity_I8
);
29406 if (epartIsReg(modrm
)) {
29407 UInt rE
= eregOfRexRM(pfx
, modrm
);
29409 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
29410 assign(t8
, unop(Iop_32to8
, getXMMRegLane32(rE
, 0)));
29412 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29414 DIP("vpbroadcastb %s,%s\n", dis_buf
, nameYMMReg(rG
));
29415 assign(t8
, loadLE(Ity_I8
, mkexpr(addr
)));
29417 IRTemp t16
= newTemp(Ity_I16
);
29418 assign(t16
, binop(Iop_8HLto16
, mkexpr(t8
), mkexpr(t8
)));
29419 IRTemp t32
= newTemp(Ity_I32
);
29420 assign(t32
, binop(Iop_16HLto32
, mkexpr(t16
), mkexpr(t16
)));
29421 IRTemp t64
= newTemp(Ity_I64
);
29422 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29423 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
29424 mkexpr(t64
), mkexpr(t64
));
29425 putYMMReg(rG
, res
);
29426 goto decode_success
;
29431 /* VPBROADCASTW xmm2/m16, xmm1 = VEX.128.66.0F38.W0 79 /r */
29432 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29433 && 0==getRexW(pfx
)/*W0*/) {
29434 UChar modrm
= getUChar(delta
);
29435 UInt rG
= gregOfRexRM(pfx
, modrm
);
29436 IRTemp t16
= newTemp(Ity_I16
);
29437 if (epartIsReg(modrm
)) {
29438 UInt rE
= eregOfRexRM(pfx
, modrm
);
29440 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
29441 assign(t16
, unop(Iop_32to16
, getXMMRegLane32(rE
, 0)));
29443 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29445 DIP("vpbroadcastw %s,%s\n", dis_buf
, nameXMMReg(rG
));
29446 assign(t16
, loadLE(Ity_I16
, mkexpr(addr
)));
29448 IRTemp t32
= newTemp(Ity_I32
);
29449 assign(t32
, binop(Iop_16HLto32
, mkexpr(t16
), mkexpr(t16
)));
29450 IRTemp t64
= newTemp(Ity_I64
);
29451 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29452 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
29453 putYMMRegLoAndZU(rG
, res
);
29454 goto decode_success
;
29456 /* VPBROADCASTW xmm2/m16, ymm1 = VEX.256.66.0F38.W0 79 /r */
29457 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29458 && 0==getRexW(pfx
)/*W0*/) {
29459 UChar modrm
= getUChar(delta
);
29460 UInt rG
= gregOfRexRM(pfx
, modrm
);
29461 IRTemp t16
= newTemp(Ity_I16
);
29462 if (epartIsReg(modrm
)) {
29463 UInt rE
= eregOfRexRM(pfx
, modrm
);
29465 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
29466 assign(t16
, unop(Iop_32to16
, getXMMRegLane32(rE
, 0)));
29468 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29470 DIP("vpbroadcastw %s,%s\n", dis_buf
, nameYMMReg(rG
));
29471 assign(t16
, loadLE(Ity_I16
, mkexpr(addr
)));
29473 IRTemp t32
= newTemp(Ity_I32
);
29474 assign(t32
, binop(Iop_16HLto32
, mkexpr(t16
), mkexpr(t16
)));
29475 IRTemp t64
= newTemp(Ity_I64
);
29476 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29477 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
29478 mkexpr(t64
), mkexpr(t64
));
29479 putYMMReg(rG
, res
);
29480 goto decode_success
;
29485 /* VPMASKMOVD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 8C /r */
29486 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29487 && 0==getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29488 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovd",
29489 /*!isYMM*/False
, Ity_I32
, /*isLoad*/True
);
29490 goto decode_success
;
29492 /* VPMASKMOVD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 8C /r */
29493 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29494 && 0==getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29495 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovd",
29496 /*isYMM*/True
, Ity_I32
, /*isLoad*/True
);
29497 goto decode_success
;
29499 /* VPMASKMOVQ m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 8C /r */
29500 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29501 && 1==getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29502 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovq",
29503 /*!isYMM*/False
, Ity_I64
, /*isLoad*/True
);
29504 goto decode_success
;
29506 /* VPMASKMOVQ m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 8C /r */
29507 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29508 && 1==getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29509 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovq",
29510 /*isYMM*/True
, Ity_I64
, /*isLoad*/True
);
29511 goto decode_success
;
29516 /* VPMASKMOVD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 8E /r */
29517 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29518 && 0==getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29519 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovd",
29520 /*!isYMM*/False
, Ity_I32
, /*!isLoad*/False
);
29521 goto decode_success
;
29523 /* VPMASKMOVD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 8E /r */
29524 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29525 && 0==getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29526 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovd",
29527 /*isYMM*/True
, Ity_I32
, /*!isLoad*/False
);
29528 goto decode_success
;
29530 /* VPMASKMOVQ xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W1 8E /r */
29531 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29532 && 1==getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29533 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovq",
29534 /*!isYMM*/False
, Ity_I64
, /*!isLoad*/False
);
29535 goto decode_success
;
29537 /* VPMASKMOVQ ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W1 8E /r */
29538 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29539 && 1==getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29540 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovq",
29541 /*isYMM*/True
, Ity_I64
, /*!isLoad*/False
);
29542 goto decode_success
;
29547 /* VPGATHERDD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 90 /r */
29548 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29549 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29550 Long delta0
= delta
;
29551 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherdd",
29552 /*!isYMM*/False
, /*!isVM64x*/False
, Ity_I32
);
29553 if (delta
!= delta0
)
29554 goto decode_success
;
29556 /* VPGATHERDD ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 90 /r */
29557 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29558 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29559 Long delta0
= delta
;
29560 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherdd",
29561 /*isYMM*/True
, /*!isVM64x*/False
, Ity_I32
);
29562 if (delta
!= delta0
)
29563 goto decode_success
;
29565 /* VPGATHERDQ xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 90 /r */
29566 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29567 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29568 Long delta0
= delta
;
29569 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherdq",
29570 /*!isYMM*/False
, /*!isVM64x*/False
, Ity_I64
);
29571 if (delta
!= delta0
)
29572 goto decode_success
;
29574 /* VPGATHERDQ ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 90 /r */
29575 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29576 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29577 Long delta0
= delta
;
29578 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherdq",
29579 /*isYMM*/True
, /*!isVM64x*/False
, Ity_I64
);
29580 if (delta
!= delta0
)
29581 goto decode_success
;
29586 /* VPGATHERQD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 91 /r */
29587 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29588 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29589 Long delta0
= delta
;
29590 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherqd",
29591 /*!isYMM*/False
, /*isVM64x*/True
, Ity_I32
);
29592 if (delta
!= delta0
)
29593 goto decode_success
;
29595 /* VPGATHERQD xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 91 /r */
29596 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29597 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29598 Long delta0
= delta
;
29599 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherqd",
29600 /*isYMM*/True
, /*isVM64x*/True
, Ity_I32
);
29601 if (delta
!= delta0
)
29602 goto decode_success
;
29604 /* VPGATHERQQ xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 91 /r */
29605 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29606 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29607 Long delta0
= delta
;
29608 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherqq",
29609 /*!isYMM*/False
, /*isVM64x*/True
, Ity_I64
);
29610 if (delta
!= delta0
)
29611 goto decode_success
;
29613 /* VPGATHERQQ ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 91 /r */
29614 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29615 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29616 Long delta0
= delta
;
29617 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherqq",
29618 /*isYMM*/True
, /*isVM64x*/True
, Ity_I64
);
29619 if (delta
!= delta0
)
29620 goto decode_success
;
29625 /* VGATHERDPS xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 92 /r */
29626 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29627 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29628 Long delta0
= delta
;
29629 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherdps",
29630 /*!isYMM*/False
, /*!isVM64x*/False
, Ity_I32
);
29631 if (delta
!= delta0
)
29632 goto decode_success
;
29634 /* VGATHERDPS ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 92 /r */
29635 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29636 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29637 Long delta0
= delta
;
29638 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherdps",
29639 /*isYMM*/True
, /*!isVM64x*/False
, Ity_I32
);
29640 if (delta
!= delta0
)
29641 goto decode_success
;
29643 /* VGATHERDPD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 92 /r */
29644 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29645 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29646 Long delta0
= delta
;
29647 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherdpd",
29648 /*!isYMM*/False
, /*!isVM64x*/False
, Ity_I64
);
29649 if (delta
!= delta0
)
29650 goto decode_success
;
29652 /* VGATHERDPD ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 92 /r */
29653 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29654 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29655 Long delta0
= delta
;
29656 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherdpd",
29657 /*isYMM*/True
, /*!isVM64x*/False
, Ity_I64
);
29658 if (delta
!= delta0
)
29659 goto decode_success
;
29664 /* VGATHERQPS xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 93 /r */
29665 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29666 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29667 Long delta0
= delta
;
29668 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherqps",
29669 /*!isYMM*/False
, /*isVM64x*/True
, Ity_I32
);
29670 if (delta
!= delta0
)
29671 goto decode_success
;
29673 /* VGATHERQPS xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 93 /r */
29674 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29675 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29676 Long delta0
= delta
;
29677 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherqps",
29678 /*isYMM*/True
, /*isVM64x*/True
, Ity_I32
);
29679 if (delta
!= delta0
)
29680 goto decode_success
;
29682 /* VGATHERQPD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 93 /r */
29683 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29684 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29685 Long delta0
= delta
;
29686 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherqpd",
29687 /*!isYMM*/False
, /*isVM64x*/True
, Ity_I64
);
29688 if (delta
!= delta0
)
29689 goto decode_success
;
29691 /* VGATHERQPD ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 93 /r */
29692 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29693 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29694 Long delta0
= delta
;
29695 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherqpd",
29696 /*isYMM*/True
, /*isVM64x*/True
, Ity_I64
);
29697 if (delta
!= delta0
)
29698 goto decode_success
;
29702 case 0x96 ... 0x9F:
29703 case 0xA6 ... 0xAF:
29704 case 0xB6 ... 0xBF:
29705 /* VFMADDSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 96 /r */
29706 /* VFMADDSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 96 /r */
29707 /* VFMADDSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 96 /r */
29708 /* VFMADDSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 96 /r */
29709 /* VFMSUBADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 97 /r */
29710 /* VFMSUBADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 97 /r */
29711 /* VFMSUBADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 97 /r */
29712 /* VFMSUBADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 97 /r */
29713 /* VFMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 98 /r */
29714 /* VFMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 98 /r */
29715 /* VFMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 98 /r */
29716 /* VFMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 98 /r */
29717 /* VFMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 99 /r */
29718 /* VFMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 99 /r */
29719 /* VFMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9A /r */
29720 /* VFMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9A /r */
29721 /* VFMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9A /r */
29722 /* VFMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9A /r */
29723 /* VFMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9B /r */
29724 /* VFMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9B /r */
29725 /* VFNMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9C /r */
29726 /* VFNMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9C /r */
29727 /* VFNMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9C /r */
29728 /* VFNMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9C /r */
29729 /* VFNMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9D /r */
29730 /* VFNMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9D /r */
29731 /* VFNMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9E /r */
29732 /* VFNMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9E /r */
29733 /* VFNMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9E /r */
29734 /* VFNMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9E /r */
29735 /* VFNMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9F /r */
29736 /* VFNMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9F /r */
29737 /* VFMADDSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A6 /r */
29738 /* VFMADDSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A6 /r */
29739 /* VFMADDSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A6 /r */
29740 /* VFMADDSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A6 /r */
29741 /* VFMSUBADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A7 /r */
29742 /* VFMSUBADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A7 /r */
29743 /* VFMSUBADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A7 /r */
29744 /* VFMSUBADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A7 /r */
29745 /* VFMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A8 /r */
29746 /* VFMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A8 /r */
29747 /* VFMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A8 /r */
29748 /* VFMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A8 /r */
29749 /* VFMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 A9 /r */
29750 /* VFMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 A9 /r */
29751 /* VFMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AA /r */
29752 /* VFMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AA /r */
29753 /* VFMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AA /r */
29754 /* VFMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AA /r */
29755 /* VFMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AB /r */
29756 /* VFMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AB /r */
29757 /* VFNMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AC /r */
29758 /* VFNMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AC /r */
29759 /* VFNMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AC /r */
29760 /* VFNMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AC /r */
29761 /* VFNMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AD /r */
29762 /* VFNMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AD /r */
29763 /* VFNMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AE /r */
29764 /* VFNMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AE /r */
29765 /* VFNMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AE /r */
29766 /* VFNMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AE /r */
29767 /* VFNMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AF /r */
29768 /* VFNMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AF /r */
29769 /* VFMADDSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B6 /r */
29770 /* VFMADDSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B6 /r */
29771 /* VFMADDSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B6 /r */
29772 /* VFMADDSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B6 /r */
29773 /* VFMSUBADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B7 /r */
29774 /* VFMSUBADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B7 /r */
29775 /* VFMSUBADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B7 /r */
29776 /* VFMSUBADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B7 /r */
29777 /* VFMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B8 /r */
29778 /* VFMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B8 /r */
29779 /* VFMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B8 /r */
29780 /* VFMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B8 /r */
29781 /* VFMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 B9 /r */
29782 /* VFMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 B9 /r */
29783 /* VFMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BA /r */
29784 /* VFMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BA /r */
29785 /* VFMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BA /r */
29786 /* VFMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BA /r */
29787 /* VFMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BB /r */
29788 /* VFMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BB /r */
29789 /* VFNMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BC /r */
29790 /* VFNMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BC /r */
29791 /* VFNMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BC /r */
29792 /* VFNMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BC /r */
29793 /* VFNMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BD /r */
29794 /* VFNMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BD /r */
29795 /* VFNMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BE /r */
29796 /* VFNMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BE /r */
29797 /* VFNMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BE /r */
29798 /* VFNMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BE /r */
29799 /* VFNMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BF /r */
29800 /* VFNMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BF /r */
29801 if (have66noF2noF3(pfx
)) {
29802 delta
= dis_FMA( vbi
, pfx
, delta
, opc
);
29804 dres
->hint
= Dis_HintVerbose
;
29805 goto decode_success
;
29814 /* VAESIMC xmm2/m128, xmm1 = VEX.128.66.0F38.WIG DB /r */
29815 /* VAESENC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DC /r */
29816 /* VAESENCLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DD /r */
29817 /* VAESDEC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DE /r */
29818 /* VAESDECLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DF /r */
29819 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29820 delta
= dis_AESx( vbi
, pfx
, delta
, True
/*!isAvx*/, opc
);
29821 if (opc
!= 0xDB) *uses_vvvv
= True
;
29822 goto decode_success
;
29827 /* ANDN r/m32, r32b, r32a = VEX.NDS.LZ.0F38.W0 F2 /r */
29828 /* ANDN r/m64, r64b, r64a = VEX.NDS.LZ.0F38.W1 F2 /r */
29829 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
29830 Int size
= getRexW(pfx
) ? 8 : 4;
29831 IRType ty
= szToITy(size
);
29832 IRTemp dst
= newTemp(ty
);
29833 IRTemp src1
= newTemp(ty
);
29834 IRTemp src2
= newTemp(ty
);
29835 UChar rm
= getUChar(delta
);
29837 assign( src1
, getIRegV(size
,pfx
) );
29838 if (epartIsReg(rm
)) {
29839 assign( src2
, getIRegE(size
,pfx
,rm
) );
29840 DIP("andn %s,%s,%s\n", nameIRegE(size
,pfx
,rm
),
29841 nameIRegV(size
,pfx
), nameIRegG(size
,pfx
,rm
));
29844 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29845 assign( src2
, loadLE(ty
, mkexpr(addr
)) );
29846 DIP("andn %s,%s,%s\n", dis_buf
, nameIRegV(size
,pfx
),
29847 nameIRegG(size
,pfx
,rm
));
29851 assign( dst
, binop( mkSizedOp(ty
,Iop_And8
),
29852 unop( mkSizedOp(ty
,Iop_Not8
), mkexpr(src1
) ),
29854 putIRegG( size
, pfx
, rm
, mkexpr(dst
) );
29855 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
29856 ? AMD64G_CC_OP_ANDN64
29857 : AMD64G_CC_OP_ANDN32
)) );
29858 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
29859 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0)) );
29861 goto decode_success
;
29866 /* BLSI r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /3 */
29867 /* BLSI r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /3 */
29868 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/
29869 && !haveREX(pfx
) && gregLO3ofRM(getUChar(delta
)) == 3) {
29870 Int size
= getRexW(pfx
) ? 8 : 4;
29871 IRType ty
= szToITy(size
);
29872 IRTemp src
= newTemp(ty
);
29873 IRTemp dst
= newTemp(ty
);
29874 UChar rm
= getUChar(delta
);
29876 if (epartIsReg(rm
)) {
29877 assign( src
, getIRegE(size
,pfx
,rm
) );
29878 DIP("blsi %s,%s\n", nameIRegE(size
,pfx
,rm
),
29879 nameIRegV(size
,pfx
));
29882 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29883 assign( src
, loadLE(ty
, mkexpr(addr
)) );
29884 DIP("blsi %s,%s\n", dis_buf
, nameIRegV(size
,pfx
));
29888 assign( dst
, binop(mkSizedOp(ty
,Iop_And8
),
29889 binop(mkSizedOp(ty
,Iop_Sub8
), mkU(ty
, 0),
29890 mkexpr(src
)), mkexpr(src
)) );
29891 putIRegV( size
, pfx
, mkexpr(dst
) );
29892 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
29893 ? AMD64G_CC_OP_BLSI64
29894 : AMD64G_CC_OP_BLSI32
)) );
29895 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
29896 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(src
))) );
29898 goto decode_success
;
29900 /* BLSMSK r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /2 */
29901 /* BLSMSK r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /2 */
29902 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/
29903 && !haveREX(pfx
) && gregLO3ofRM(getUChar(delta
)) == 2) {
29904 Int size
= getRexW(pfx
) ? 8 : 4;
29905 IRType ty
= szToITy(size
);
29906 IRTemp src
= newTemp(ty
);
29907 IRTemp dst
= newTemp(ty
);
29908 UChar rm
= getUChar(delta
);
29910 if (epartIsReg(rm
)) {
29911 assign( src
, getIRegE(size
,pfx
,rm
) );
29912 DIP("blsmsk %s,%s\n", nameIRegE(size
,pfx
,rm
),
29913 nameIRegV(size
,pfx
));
29916 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29917 assign( src
, loadLE(ty
, mkexpr(addr
)) );
29918 DIP("blsmsk %s,%s\n", dis_buf
, nameIRegV(size
,pfx
));
29922 assign( dst
, binop(mkSizedOp(ty
,Iop_Xor8
),
29923 binop(mkSizedOp(ty
,Iop_Sub8
), mkexpr(src
),
29924 mkU(ty
, 1)), mkexpr(src
)) );
29925 putIRegV( size
, pfx
, mkexpr(dst
) );
29926 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
29927 ? AMD64G_CC_OP_BLSMSK64
29928 : AMD64G_CC_OP_BLSMSK32
)) );
29929 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
29930 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(src
))) );
29932 goto decode_success
;
29934 /* BLSR r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /1 */
29935 /* BLSR r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /1 */
29936 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/
29937 && !haveREX(pfx
) && gregLO3ofRM(getUChar(delta
)) == 1) {
29938 Int size
= getRexW(pfx
) ? 8 : 4;
29939 IRType ty
= szToITy(size
);
29940 IRTemp src
= newTemp(ty
);
29941 IRTemp dst
= newTemp(ty
);
29942 UChar rm
= getUChar(delta
);
29944 if (epartIsReg(rm
)) {
29945 assign( src
, getIRegE(size
,pfx
,rm
) );
29946 DIP("blsr %s,%s\n", nameIRegE(size
,pfx
,rm
),
29947 nameIRegV(size
,pfx
));
29950 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29951 assign( src
, loadLE(ty
, mkexpr(addr
)) );
29952 DIP("blsr %s,%s\n", dis_buf
, nameIRegV(size
,pfx
));
29956 assign( dst
, binop(mkSizedOp(ty
,Iop_And8
),
29957 binop(mkSizedOp(ty
,Iop_Sub8
), mkexpr(src
),
29958 mkU(ty
, 1)), mkexpr(src
)) );
29959 putIRegV( size
, pfx
, mkexpr(dst
) );
29960 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
29961 ? AMD64G_CC_OP_BLSR64
29962 : AMD64G_CC_OP_BLSR32
)) );
29963 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
29964 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(src
))) );
29966 goto decode_success
;
29971 /* BZHI r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F5 /r */
29972 /* BZHI r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F5 /r */
29973 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
29974 Int size
= getRexW(pfx
) ? 8 : 4;
29975 IRType ty
= szToITy(size
);
29976 IRTemp dst
= newTemp(ty
);
29977 IRTemp src1
= newTemp(ty
);
29978 IRTemp src2
= newTemp(ty
);
29979 IRTemp start
= newTemp(Ity_I8
);
29980 IRTemp cond
= newTemp(Ity_I1
);
29981 UChar rm
= getUChar(delta
);
29983 assign( src2
, getIRegV(size
,pfx
) );
29984 if (epartIsReg(rm
)) {
29985 assign( src1
, getIRegE(size
,pfx
,rm
) );
29986 DIP("bzhi %s,%s,%s\n", nameIRegV(size
,pfx
),
29987 nameIRegE(size
,pfx
,rm
), nameIRegG(size
,pfx
,rm
));
29990 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29991 assign( src1
, loadLE(ty
, mkexpr(addr
)) );
29992 DIP("bzhi %s,%s,%s\n", nameIRegV(size
,pfx
), dis_buf
,
29993 nameIRegG(size
,pfx
,rm
));
29997 assign( start
, narrowTo( Ity_I8
, mkexpr(src2
) ) );
29998 assign( cond
, binop(Iop_CmpLT32U
,
29999 unop(Iop_8Uto32
, mkexpr(start
)),
30001 /* if (start < opsize) {
30005 dst = (src1 << (opsize-start)) u>> (opsize-start);
30013 binop(Iop_CmpEQ8
, mkexpr(start
), mkU8(0)),
30016 mkSizedOp(ty
,Iop_Shr8
),
30018 mkSizedOp(ty
,Iop_Shl8
),
30020 binop(Iop_Sub8
, mkU8(8*size
), mkexpr(start
))
30022 binop(Iop_Sub8
, mkU8(8*size
), mkexpr(start
))
30028 putIRegG( size
, pfx
, rm
, mkexpr(dst
) );
30029 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
30030 ? AMD64G_CC_OP_BLSR64
30031 : AMD64G_CC_OP_BLSR32
)) );
30032 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
30033 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(cond
))) );
30035 goto decode_success
;
30037 /* PDEP r/m32, r32b, r32a = VEX.NDS.LZ.F2.0F38.W0 F5 /r */
30038 /* PDEP r/m64, r64b, r64a = VEX.NDS.LZ.F2.0F38.W1 F5 /r */
30039 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30040 Int size
= getRexW(pfx
) ? 8 : 4;
30041 IRType ty
= szToITy(size
);
30042 IRTemp src
= newTemp(ty
);
30043 IRTemp mask
= newTemp(ty
);
30044 UChar rm
= getUChar(delta
);
30046 assign( src
, getIRegV(size
,pfx
) );
30047 if (epartIsReg(rm
)) {
30048 assign( mask
, getIRegE(size
,pfx
,rm
) );
30049 DIP("pdep %s,%s,%s\n", nameIRegE(size
,pfx
,rm
),
30050 nameIRegV(size
,pfx
), nameIRegG(size
,pfx
,rm
));
30053 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30054 assign( mask
, loadLE(ty
, mkexpr(addr
)) );
30055 DIP("pdep %s,%s,%s\n", dis_buf
, nameIRegV(size
,pfx
),
30056 nameIRegG(size
,pfx
,rm
));
30060 IRExpr
** args
= mkIRExprVec_2( widenUto64(mkexpr(src
)),
30061 widenUto64(mkexpr(mask
)) );
30062 putIRegG( size
, pfx
, rm
,
30063 narrowTo(ty
, mkIRExprCCall(Ity_I64
, 0/*regparms*/,
30064 "amd64g_calculate_pdep",
30065 &amd64g_calculate_pdep
, args
)) );
30067 /* Flags aren't modified. */
30068 goto decode_success
;
30070 /* PEXT r/m32, r32b, r32a = VEX.NDS.LZ.F3.0F38.W0 F5 /r */
30071 /* PEXT r/m64, r64b, r64a = VEX.NDS.LZ.F3.0F38.W1 F5 /r */
30072 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30073 Int size
= getRexW(pfx
) ? 8 : 4;
30074 IRType ty
= szToITy(size
);
30075 IRTemp src
= newTemp(ty
);
30076 IRTemp mask
= newTemp(ty
);
30077 UChar rm
= getUChar(delta
);
30079 assign( src
, getIRegV(size
,pfx
) );
30080 if (epartIsReg(rm
)) {
30081 assign( mask
, getIRegE(size
,pfx
,rm
) );
30082 DIP("pext %s,%s,%s\n", nameIRegE(size
,pfx
,rm
),
30083 nameIRegV(size
,pfx
), nameIRegG(size
,pfx
,rm
));
30086 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30087 assign( mask
, loadLE(ty
, mkexpr(addr
)) );
30088 DIP("pext %s,%s,%s\n", dis_buf
, nameIRegV(size
,pfx
),
30089 nameIRegG(size
,pfx
,rm
));
30093 /* First mask off bits not set in mask, they are ignored
30094 and it should be fine if they contain undefined values. */
30095 IRExpr
* masked
= binop(mkSizedOp(ty
,Iop_And8
),
30096 mkexpr(src
), mkexpr(mask
));
30097 IRExpr
** args
= mkIRExprVec_2( widenUto64(masked
),
30098 widenUto64(mkexpr(mask
)) );
30099 putIRegG( size
, pfx
, rm
,
30100 narrowTo(ty
, mkIRExprCCall(Ity_I64
, 0/*regparms*/,
30101 "amd64g_calculate_pext",
30102 &amd64g_calculate_pext
, args
)) );
30104 /* Flags aren't modified. */
30105 goto decode_success
;
30110 /* MULX r/m32, r32b, r32a = VEX.NDD.LZ.F2.0F38.W0 F6 /r */
30111 /* MULX r/m64, r64b, r64a = VEX.NDD.LZ.F2.0F38.W1 F6 /r */
30112 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30113 Int size
= getRexW(pfx
) ? 8 : 4;
30114 IRType ty
= szToITy(size
);
30115 IRTemp src1
= newTemp(ty
);
30116 IRTemp src2
= newTemp(ty
);
30117 IRTemp res
= newTemp(size
== 8 ? Ity_I128
: Ity_I64
);
30118 UChar rm
= getUChar(delta
);
30120 assign( src1
, getIRegRDX(size
) );
30121 if (epartIsReg(rm
)) {
30122 assign( src2
, getIRegE(size
,pfx
,rm
) );
30123 DIP("mulx %s,%s,%s\n", nameIRegE(size
,pfx
,rm
),
30124 nameIRegV(size
,pfx
), nameIRegG(size
,pfx
,rm
));
30127 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30128 assign( src2
, loadLE(ty
, mkexpr(addr
)) );
30129 DIP("mulx %s,%s,%s\n", dis_buf
, nameIRegV(size
,pfx
),
30130 nameIRegG(size
,pfx
,rm
));
30134 assign( res
, binop(size
== 8 ? Iop_MullU64
: Iop_MullU32
,
30135 mkexpr(src1
), mkexpr(src2
)) );
30136 putIRegV( size
, pfx
,
30137 unop(size
== 8 ? Iop_128to64
: Iop_64to32
, mkexpr(res
)) );
30138 putIRegG( size
, pfx
, rm
,
30139 unop(size
== 8 ? Iop_128HIto64
: Iop_64HIto32
,
30142 /* Flags aren't modified. */
30143 goto decode_success
;
30148 /* SARX r32b, r/m32, r32a = VEX.NDS.LZ.F3.0F38.W0 F7 /r */
30149 /* SARX r64b, r/m64, r64a = VEX.NDS.LZ.F3.0F38.W1 F7 /r */
30150 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30151 delta
= dis_SHIFTX( uses_vvvv
, vbi
, pfx
, delta
, "sarx", Iop_Sar8
);
30152 goto decode_success
;
30154 /* SHLX r32b, r/m32, r32a = VEX.NDS.LZ.66.0F38.W0 F7 /r */
30155 /* SHLX r64b, r/m64, r64a = VEX.NDS.LZ.66.0F38.W1 F7 /r */
30156 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30157 delta
= dis_SHIFTX( uses_vvvv
, vbi
, pfx
, delta
, "shlx", Iop_Shl8
);
30158 goto decode_success
;
30160 /* SHRX r32b, r/m32, r32a = VEX.NDS.LZ.F2.0F38.W0 F7 /r */
30161 /* SHRX r64b, r/m64, r64a = VEX.NDS.LZ.F2.0F38.W1 F7 /r */
30162 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30163 delta
= dis_SHIFTX( uses_vvvv
, vbi
, pfx
, delta
, "shrx", Iop_Shr8
);
30164 goto decode_success
;
30166 /* BEXTR r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F7 /r */
30167 /* BEXTR r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F7 /r */
30168 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30169 Int size
= getRexW(pfx
) ? 8 : 4;
30170 IRType ty
= szToITy(size
);
30171 IRTemp dst
= newTemp(ty
);
30172 IRTemp src1
= newTemp(ty
);
30173 IRTemp src2
= newTemp(ty
);
30174 IRTemp stle
= newTemp(Ity_I16
);
30175 IRTemp start
= newTemp(Ity_I8
);
30176 IRTemp len
= newTemp(Ity_I8
);
30177 UChar rm
= getUChar(delta
);
30179 assign( src2
, getIRegV(size
,pfx
) );
30180 if (epartIsReg(rm
)) {
30181 assign( src1
, getIRegE(size
,pfx
,rm
) );
30182 DIP("bextr %s,%s,%s\n", nameIRegV(size
,pfx
),
30183 nameIRegE(size
,pfx
,rm
), nameIRegG(size
,pfx
,rm
));
30186 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30187 assign( src1
, loadLE(ty
, mkexpr(addr
)) );
30188 DIP("bextr %s,%s,%s\n", nameIRegV(size
,pfx
), dis_buf
,
30189 nameIRegG(size
,pfx
,rm
));
30193 assign( stle
, narrowTo( Ity_I16
, mkexpr(src2
) ) );
30194 assign( start
, unop( Iop_16to8
, mkexpr(stle
) ) );
30195 assign( len
, unop( Iop_16HIto8
, mkexpr(stle
) ) );
30196 /* if (start+len < opsize) {
30198 dst = (src1 << (opsize-start-len)) u>> (opsize-len);
30202 if (start < opsize)
30203 dst = src1 u>> start;
30209 binop(Iop_CmpLT32U
,
30211 unop(Iop_8Uto32
, mkexpr(start
)),
30212 unop(Iop_8Uto32
, mkexpr(len
))),
30215 binop(Iop_CmpEQ8
, mkexpr(len
), mkU8(0)),
30217 binop(mkSizedOp(ty
,Iop_Shr8
),
30218 binop(mkSizedOp(ty
,Iop_Shl8
), mkexpr(src1
),
30220 binop(Iop_Sub8
, mkU8(8*size
),
30223 binop(Iop_Sub8
, mkU8(8*size
),
30227 binop(Iop_CmpLT32U
,
30228 unop(Iop_8Uto32
, mkexpr(start
)),
30230 binop(mkSizedOp(ty
,Iop_Shr8
), mkexpr(src1
),
30236 putIRegG( size
, pfx
, rm
, mkexpr(dst
) );
30237 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
30238 ? AMD64G_CC_OP_ANDN64
30239 : AMD64G_CC_OP_ANDN32
)) );
30240 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
30241 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0)) );
30243 goto decode_success
;
30263 static Long
decode_vregW(Int count
, Long delta
, UChar modrm
, Prefix pfx
,
30264 const VexAbiInfo
* vbi
, IRTemp
*v
, UInt
*dst
, Int swap
)
30266 v
[0] = newTemp(Ity_V128
);
30267 v
[1] = newTemp(Ity_V128
);
30268 v
[2] = newTemp(Ity_V128
);
30269 v
[3] = newTemp(Ity_V128
);
30270 IRTemp addr
= IRTemp_INVALID
;
30274 *dst
= gregOfRexRM(pfx
, modrm
);
30275 assign( v
[0], getXMMReg(*dst
) );
30277 if ( epartIsReg( modrm
) ) {
30278 UInt ereg
= eregOfRexRM(pfx
, modrm
);
30279 assign(swap
? v
[count
-1] : v
[count
-2], getXMMReg(ereg
) );
30280 DIS(dis_buf
, "%s", nameXMMReg(ereg
));
30282 Bool extra_byte
= (getUChar(delta
- 3) & 0xF) != 9;
30283 addr
= disAMode(&alen
, vbi
, pfx
, delta
, dis_buf
, extra_byte
);
30284 assign(swap
? v
[count
-1] : v
[count
-2], loadLE(Ity_V128
, mkexpr(addr
)));
30288 UInt vvvv
= getVexNvvvv(pfx
);
30291 DIP( "%s,%s", nameXMMReg(*dst
), dis_buf
);
30294 assign( swap
? v
[1] : v
[2], getXMMReg(vvvv
) );
30295 DIP( "%s,%s,%s", nameXMMReg(*dst
), nameXMMReg(vvvv
), dis_buf
);
30299 assign( v
[1], getXMMReg(vvvv
) );
30300 UInt src2
= getUChar(delta
+ 1) >> 4;
30301 assign( swap
? v
[2] : v
[3], getXMMReg(src2
) );
30302 DIP( "%s,%s,%s,%s", nameXMMReg(*dst
), nameXMMReg(vvvv
),
30303 nameXMMReg(src2
), dis_buf
);
30310 static Long
dis_FMA4 (Prefix pfx
, Long delta
, UChar opc
,
30311 Bool
* uses_vvvv
, const VexAbiInfo
* vbi
)
30316 UChar modrm
= getUChar(delta
);
30318 Bool zero_64F
= False
;
30319 Bool zero_96F
= False
;
30320 UInt is_F32
= ((opc
& 0x01) == 0x00) ? 1 : 0;
30321 Bool neg
= (opc
& 0xF0) == 0x70;
30322 Bool alt
= (opc
& 0xF0) == 0x50;
30323 Bool sub
= alt
? (opc
& 0x0E) != 0x0E : (opc
& 0x0C) == 0x0C;
30326 switch(opc
& 0xF) {
30327 case 0x0A: zero_96F
= (opc
>> 4) != 0x05; break;
30328 case 0x0B: zero_64F
= (opc
>> 4) != 0x05; break;
30329 case 0x0E: zero_96F
= (opc
>> 4) != 0x05; break;
30330 case 0x0F: zero_64F
= (opc
>> 4) != 0x05; break;
30333 DIP("vfm%s", neg
? "n" : "");
30334 if(alt
) DIP("%s", sub
? "add" : "sub");
30335 DIP("%s", sub
? "sub" : "add");
30336 DIP("%c ", (zero_64F
|| zero_96F
) ? 's' : 'p');
30337 DIP("%c ", is_F32
? 's' : 'd');
30338 delta
= decode_vregW(4, delta
, modrm
, pfx
, vbi
, operand
, &dst
, getRexW(pfx
));
30342 void (*putXMM
[2])(UInt
,Int
,IRExpr
*) = {&putXMMRegLane64F
, &putXMMRegLane32F
};
30344 IROp size_op
[] = {Iop_V128to64
, Iop_V128HIto64
, Iop_64to32
, Iop_64HIto32
};
30345 IROp neg_op
[] = {Iop_NegF64
, Iop_NegF32
};
30347 for(i
= 0; i
< is_F32
* 2 + 2; i
++) {
30348 for(j
= 0; j
< 3; j
++) {
30350 src
[j
] = unop(Iop_ReinterpI32asF32
,
30351 unop(size_op
[i
%2+2],
30353 mkexpr(operand
[j
+ 1])
30357 src
[j
] = unop(Iop_ReinterpI64asF64
,
30359 mkexpr(operand
[j
+ 1])
30363 putXMM
[is_F32
](dst
, i
, IRExpr_Qop(is_F32
? Iop_MAddF32
: Iop_MAddF64
,
30364 get_FAKE_roundingmode(),
30365 neg
? unop(neg_op
[is_F32
], src
[0])
30368 sub
? unop(neg_op
[is_F32
], src
[2])
30376 /* Zero out top bits of ymm/xmm register. */
30377 putYMMRegLane128( dst
, 1, mkV128(0) );
30379 if(zero_64F
|| zero_96F
) {
30380 putXMMRegLane64( dst
, 1, IRExpr_Const(IRConst_U64(0)));
30384 putXMMRegLane32( dst
, 1, IRExpr_Const(IRConst_U32(0)));
30390 /*------------------------------------------------------------*/
30392 /*--- Top-level post-escape decoders: dis_ESC_0F3A__VEX ---*/
30394 /*------------------------------------------------------------*/
30396 static IRTemp
math_VPERMILPS_128 ( IRTemp sV
, UInt imm8
)
30398 vassert(imm8
< 256);
30399 IRTemp s3
, s2
, s1
, s0
;
30400 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
30401 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
30402 # define SEL(_nn) (((_nn)==0) ? s0 : ((_nn)==1) ? s1 \
30403 : ((_nn)==2) ? s2 : s3)
30404 IRTemp res
= newTemp(Ity_V128
);
30405 assign(res
, mkV128from32s( SEL((imm8
>> 6) & 3),
30406 SEL((imm8
>> 4) & 3),
30407 SEL((imm8
>> 2) & 3),
30408 SEL((imm8
>> 0) & 3) ));
30413 /* Handles 128 and 256 bit versions of VCVTPS2PH. */
30414 static Long
dis_VCVTPS2PH ( const VexAbiInfo
* vbi
, Prefix pfx
,
30415 Long delta
, Bool is256bit
)
30417 /* This is a width-halving store or reg-reg move, that does conversion on the
30418 transferred data. */
30419 UChar modrm
= getUChar(delta
);
30420 UInt rG
= gregOfRexRM(pfx
, modrm
);
30421 IRTemp rm
= newTemp(Ity_I32
);
30422 IROp op
= is256bit
? Iop_F32toF16x8
: Iop_F32toF16x4
;
30423 IRExpr
* srcG
= (is256bit
? getYMMReg
: getXMMReg
)(rG
);
30425 /* (imm & 3) contains an Intel-encoded rounding mode. Because that encoding
30426 is the same as the encoding for IRRoundingMode, we can use that value
30427 directly in the IR as a rounding mode. */
30429 if (epartIsReg(modrm
)) {
30430 UInt rE
= eregOfRexRM(pfx
, modrm
);
30432 UInt imm
= getUChar(delta
);
30433 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
30434 IRExpr
* res
= binop(op
, mkexpr(rm
), srcG
);
30436 res
= unop(Iop_64UtoV128
, res
);
30437 putYMMRegLoAndZU(rE
, res
);
30438 DIP("vcvtps2ph $%u,%s,%s\n",
30439 imm
, (is256bit
? nameYMMReg
: nameXMMReg
)(rG
), nameXMMReg(rE
));
30443 IRTemp addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30445 UInt imm
= getUChar(delta
);
30446 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
30447 IRExpr
* res
= binop(op
, mkexpr(rm
), srcG
);
30448 storeLE(mkexpr(addr
), res
);
30449 DIP("vcvtps2ph $%u,%s,%s\n",
30450 imm
, (is256bit
? nameYMMReg
: nameXMMReg
)(rG
), dis_buf
);
30453 /* doesn't use vvvv */
30457 __attribute__((noinline
))
30459 Long
dis_ESC_0F3A__VEX (
30460 /*MB_OUT*/DisResult
* dres
,
30461 /*OUT*/ Bool
* uses_vvvv
,
30462 const VexArchInfo
* archinfo
,
30463 const VexAbiInfo
* vbi
,
30464 Prefix pfx
, Int sz
, Long deltaIN
30467 IRTemp addr
= IRTemp_INVALID
;
30470 Long delta
= deltaIN
;
30471 UChar opc
= getUChar(delta
);
30473 *uses_vvvv
= False
;
30479 /* VPERMQ imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 00 /r ib */
30480 /* VPERMPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 01 /r ib */
30481 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
30482 && 1==getRexW(pfx
)/*W1*/) {
30483 UChar modrm
= getUChar(delta
);
30485 UInt rG
= gregOfRexRM(pfx
, modrm
);
30486 IRTemp sV
= newTemp(Ity_V256
);
30487 const HChar
*name
= opc
== 0 ? "vpermq" : "vpermpd";
30488 if (epartIsReg(modrm
)) {
30489 UInt rE
= eregOfRexRM(pfx
, modrm
);
30491 imm8
= getUChar(delta
);
30492 DIP("%s $%u,%s,%s\n",
30493 name
, imm8
, nameYMMReg(rE
), nameYMMReg(rG
));
30494 assign(sV
, getYMMReg(rE
));
30496 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30498 imm8
= getUChar(delta
);
30499 DIP("%s $%u,%s,%s\n",
30500 name
, imm8
, dis_buf
, nameYMMReg(rG
));
30501 assign(sV
, loadLE(Ity_V256
, mkexpr(addr
)));
30505 s
[3] = s
[2] = s
[1] = s
[0] = IRTemp_INVALID
;
30506 breakupV256to64s(sV
, &s
[3], &s
[2], &s
[1], &s
[0]);
30507 IRTemp dV
= newTemp(Ity_V256
);
30508 assign(dV
, IRExpr_Qop(Iop_64x4toV256
,
30509 mkexpr(s
[(imm8
>> 6) & 3]),
30510 mkexpr(s
[(imm8
>> 4) & 3]),
30511 mkexpr(s
[(imm8
>> 2) & 3]),
30512 mkexpr(s
[(imm8
>> 0) & 3])));
30513 putYMMReg(rG
, mkexpr(dV
));
30514 goto decode_success
;
30519 /* VPBLENDD imm8, xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 02 /r ib */
30520 if (have66noF2noF3(pfx
)
30521 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
30522 UChar modrm
= getUChar(delta
);
30524 UInt rG
= gregOfRexRM(pfx
, modrm
);
30525 UInt rV
= getVexNvvvv(pfx
);
30526 IRTemp sV
= newTemp(Ity_V128
);
30527 IRTemp dV
= newTemp(Ity_V128
);
30530 assign(sV
, getXMMReg(rV
));
30531 if (epartIsReg(modrm
)) {
30532 UInt rE
= eregOfRexRM(pfx
, modrm
);
30534 imm8
= getUChar(delta
);
30535 DIP("vpblendd $%u,%s,%s,%s\n",
30536 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
30537 assign(dV
, getXMMReg(rE
));
30539 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30541 imm8
= getUChar(delta
);
30542 DIP("vpblendd $%u,%s,%s,%s\n",
30543 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
30544 assign(dV
, loadLE(Ity_V128
, mkexpr(addr
)));
30547 for (i
= 0; i
< 4; i
++) {
30548 s
[i
] = IRTemp_INVALID
;
30549 d
[i
] = IRTemp_INVALID
;
30551 breakupV128to32s( sV
, &s
[3], &s
[2], &s
[1], &s
[0] );
30552 breakupV128to32s( dV
, &d
[3], &d
[2], &d
[1], &d
[0] );
30553 for (i
= 0; i
< 4; i
++)
30554 putYMMRegLane32(rG
, i
, mkexpr((imm8
& (1<<i
)) ? d
[i
] : s
[i
]));
30555 putYMMRegLane128(rG
, 1, mkV128(0));
30557 goto decode_success
;
30559 /* VPBLENDD imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F3A.W0 02 /r ib */
30560 if (have66noF2noF3(pfx
)
30561 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
30562 UChar modrm
= getUChar(delta
);
30564 UInt rG
= gregOfRexRM(pfx
, modrm
);
30565 UInt rV
= getVexNvvvv(pfx
);
30566 IRTemp sV
= newTemp(Ity_V256
);
30567 IRTemp dV
= newTemp(Ity_V256
);
30570 assign(sV
, getYMMReg(rV
));
30571 if (epartIsReg(modrm
)) {
30572 UInt rE
= eregOfRexRM(pfx
, modrm
);
30574 imm8
= getUChar(delta
);
30575 DIP("vpblendd $%u,%s,%s,%s\n",
30576 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
30577 assign(dV
, getYMMReg(rE
));
30579 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30581 imm8
= getUChar(delta
);
30582 DIP("vpblendd $%u,%s,%s,%s\n",
30583 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
30584 assign(dV
, loadLE(Ity_V256
, mkexpr(addr
)));
30587 for (i
= 0; i
< 8; i
++) {
30588 s
[i
] = IRTemp_INVALID
;
30589 d
[i
] = IRTemp_INVALID
;
30591 breakupV256to32s( sV
, &s
[7], &s
[6], &s
[5], &s
[4],
30592 &s
[3], &s
[2], &s
[1], &s
[0] );
30593 breakupV256to32s( dV
, &d
[7], &d
[6], &d
[5], &d
[4],
30594 &d
[3], &d
[2], &d
[1], &d
[0] );
30595 for (i
= 0; i
< 8; i
++)
30596 putYMMRegLane32(rG
, i
, mkexpr((imm8
& (1<<i
)) ? d
[i
] : s
[i
]));
30598 goto decode_success
;
30603 /* VPERMILPS imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 04 /r ib */
30604 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
30605 UChar modrm
= getUChar(delta
);
30607 UInt rG
= gregOfRexRM(pfx
, modrm
);
30608 IRTemp sV
= newTemp(Ity_V256
);
30609 if (epartIsReg(modrm
)) {
30610 UInt rE
= eregOfRexRM(pfx
, modrm
);
30612 imm8
= getUChar(delta
);
30613 DIP("vpermilps $%u,%s,%s\n",
30614 imm8
, nameYMMReg(rE
), nameYMMReg(rG
));
30615 assign(sV
, getYMMReg(rE
));
30617 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30619 imm8
= getUChar(delta
);
30620 DIP("vpermilps $%u,%s,%s\n",
30621 imm8
, dis_buf
, nameYMMReg(rG
));
30622 assign(sV
, loadLE(Ity_V256
, mkexpr(addr
)));
30625 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
30626 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
30627 IRTemp dVhi
= math_VPERMILPS_128( sVhi
, imm8
);
30628 IRTemp dVlo
= math_VPERMILPS_128( sVlo
, imm8
);
30629 IRExpr
* res
= binop(Iop_V128HLtoV256
, mkexpr(dVhi
), mkexpr(dVlo
));
30630 putYMMReg(rG
, res
);
30631 goto decode_success
;
30633 /* VPERMILPS imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 04 /r ib */
30634 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
30635 UChar modrm
= getUChar(delta
);
30637 UInt rG
= gregOfRexRM(pfx
, modrm
);
30638 IRTemp sV
= newTemp(Ity_V128
);
30639 if (epartIsReg(modrm
)) {
30640 UInt rE
= eregOfRexRM(pfx
, modrm
);
30642 imm8
= getUChar(delta
);
30643 DIP("vpermilps $%u,%s,%s\n",
30644 imm8
, nameXMMReg(rE
), nameXMMReg(rG
));
30645 assign(sV
, getXMMReg(rE
));
30647 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30649 imm8
= getUChar(delta
);
30650 DIP("vpermilps $%u,%s,%s\n",
30651 imm8
, dis_buf
, nameXMMReg(rG
));
30652 assign(sV
, loadLE(Ity_V128
, mkexpr(addr
)));
30655 putYMMRegLoAndZU(rG
, mkexpr ( math_VPERMILPS_128 ( sV
, imm8
) ) );
30656 goto decode_success
;
30661 /* VPERMILPD imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 05 /r ib */
30662 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
30663 UChar modrm
= getUChar(delta
);
30665 UInt rG
= gregOfRexRM(pfx
, modrm
);
30666 IRTemp sV
= newTemp(Ity_V128
);
30667 if (epartIsReg(modrm
)) {
30668 UInt rE
= eregOfRexRM(pfx
, modrm
);
30670 imm8
= getUChar(delta
);
30671 DIP("vpermilpd $%u,%s,%s\n",
30672 imm8
, nameXMMReg(rE
), nameXMMReg(rG
));
30673 assign(sV
, getXMMReg(rE
));
30675 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30677 imm8
= getUChar(delta
);
30678 DIP("vpermilpd $%u,%s,%s\n",
30679 imm8
, dis_buf
, nameXMMReg(rG
));
30680 assign(sV
, loadLE(Ity_V128
, mkexpr(addr
)));
30683 IRTemp s1
= newTemp(Ity_I64
);
30684 IRTemp s0
= newTemp(Ity_I64
);
30685 assign(s1
, unop(Iop_V128HIto64
, mkexpr(sV
)));
30686 assign(s0
, unop(Iop_V128to64
, mkexpr(sV
)));
30687 IRTemp dV
= newTemp(Ity_V128
);
30688 assign(dV
, binop(Iop_64HLtoV128
,
30689 mkexpr((imm8
& (1<<1)) ? s1
: s0
),
30690 mkexpr((imm8
& (1<<0)) ? s1
: s0
)));
30691 putYMMRegLoAndZU(rG
, mkexpr(dV
));
30692 goto decode_success
;
30694 /* VPERMILPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 05 /r ib */
30695 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
30696 UChar modrm
= getUChar(delta
);
30698 UInt rG
= gregOfRexRM(pfx
, modrm
);
30699 IRTemp sV
= newTemp(Ity_V256
);
30700 if (epartIsReg(modrm
)) {
30701 UInt rE
= eregOfRexRM(pfx
, modrm
);
30703 imm8
= getUChar(delta
);
30704 DIP("vpermilpd $%u,%s,%s\n",
30705 imm8
, nameYMMReg(rE
), nameYMMReg(rG
));
30706 assign(sV
, getYMMReg(rE
));
30708 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30710 imm8
= getUChar(delta
);
30711 DIP("vpermilpd $%u,%s,%s\n",
30712 imm8
, dis_buf
, nameYMMReg(rG
));
30713 assign(sV
, loadLE(Ity_V256
, mkexpr(addr
)));
30716 IRTemp s3
, s2
, s1
, s0
;
30717 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
30718 breakupV256to64s(sV
, &s3
, &s2
, &s1
, &s0
);
30719 IRTemp dV
= newTemp(Ity_V256
);
30720 assign(dV
, IRExpr_Qop(Iop_64x4toV256
,
30721 mkexpr((imm8
& (1<<3)) ? s3
: s2
),
30722 mkexpr((imm8
& (1<<2)) ? s3
: s2
),
30723 mkexpr((imm8
& (1<<1)) ? s1
: s0
),
30724 mkexpr((imm8
& (1<<0)) ? s1
: s0
)));
30725 putYMMReg(rG
, mkexpr(dV
));
30726 goto decode_success
;
30731 /* VPERM2F128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 06 /r ib */
30732 if (have66noF2noF3(pfx
)
30733 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
30734 UChar modrm
= getUChar(delta
);
30736 UInt rG
= gregOfRexRM(pfx
, modrm
);
30737 UInt rV
= getVexNvvvv(pfx
);
30738 IRTemp s00
= newTemp(Ity_V128
);
30739 IRTemp s01
= newTemp(Ity_V128
);
30740 IRTemp s10
= newTemp(Ity_V128
);
30741 IRTemp s11
= newTemp(Ity_V128
);
30742 assign(s00
, getYMMRegLane128(rV
, 0));
30743 assign(s01
, getYMMRegLane128(rV
, 1));
30744 if (epartIsReg(modrm
)) {
30745 UInt rE
= eregOfRexRM(pfx
, modrm
);
30747 imm8
= getUChar(delta
);
30748 DIP("vperm2f128 $%u,%s,%s,%s\n",
30749 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
30750 assign(s10
, getYMMRegLane128(rE
, 0));
30751 assign(s11
, getYMMRegLane128(rE
, 1));
30753 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30755 imm8
= getUChar(delta
);
30756 DIP("vperm2f128 $%u,%s,%s,%s\n",
30757 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
30758 assign(s10
, loadLE(Ity_V128
, binop(Iop_Add64
,
30759 mkexpr(addr
), mkU64(0))));
30760 assign(s11
, loadLE(Ity_V128
, binop(Iop_Add64
,
30761 mkexpr(addr
), mkU64(16))));
30764 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
30765 : ((_nn)==2) ? s10 : s11)
30766 putYMMRegLane128(rG
, 0, mkexpr(SEL((imm8
>> 0) & 3)));
30767 putYMMRegLane128(rG
, 1, mkexpr(SEL((imm8
>> 4) & 3)));
30769 if (imm8
& (1<<3)) putYMMRegLane128(rG
, 0, mkV128(0));
30770 if (imm8
& (1<<7)) putYMMRegLane128(rG
, 1, mkV128(0));
30772 goto decode_success
;
30777 /* VROUNDPS imm8, xmm2/m128, xmm1 */
30778 /* VROUNDPS = VEX.NDS.128.66.0F3A.WIG 08 ib */
30779 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
30780 UChar modrm
= getUChar(delta
);
30781 UInt rG
= gregOfRexRM(pfx
, modrm
);
30782 IRTemp src
= newTemp(Ity_V128
);
30783 IRTemp s0
= IRTemp_INVALID
;
30784 IRTemp s1
= IRTemp_INVALID
;
30785 IRTemp s2
= IRTemp_INVALID
;
30786 IRTemp s3
= IRTemp_INVALID
;
30787 IRTemp rm
= newTemp(Ity_I32
);
30790 modrm
= getUChar(delta
);
30792 if (epartIsReg(modrm
)) {
30793 UInt rE
= eregOfRexRM(pfx
, modrm
);
30794 assign( src
, getXMMReg( rE
) );
30795 imm
= getUChar(delta
+1);
30796 if (imm
& ~15) break;
30798 DIP( "vroundps $%d,%s,%s\n", imm
, nameXMMReg(rE
), nameXMMReg(rG
) );
30800 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30801 assign( src
, loadLE(Ity_V128
, mkexpr(addr
) ) );
30802 imm
= getUChar(delta
+alen
);
30803 if (imm
& ~15) break;
30805 DIP( "vroundps $%d,%s,%s\n", imm
, dis_buf
, nameXMMReg(rG
) );
30808 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30809 that encoding is the same as the encoding for IRRoundingMode,
30810 we can use that value directly in the IR as a rounding
30812 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
30814 breakupV128to32s( src
, &s3
, &s2
, &s1
, &s0
);
30815 putYMMRegLane128( rG
, 1, mkV128(0) );
30816 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
30817 unop(Iop_ReinterpI32asF32, mkexpr(s)))
30818 putYMMRegLane32F( rG
, 3, CVT(s3
) );
30819 putYMMRegLane32F( rG
, 2, CVT(s2
) );
30820 putYMMRegLane32F( rG
, 1, CVT(s1
) );
30821 putYMMRegLane32F( rG
, 0, CVT(s0
) );
30823 goto decode_success
;
30825 /* VROUNDPS imm8, ymm2/m256, ymm1 */
30826 /* VROUNDPS = VEX.NDS.256.66.0F3A.WIG 08 ib */
30827 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
30828 UChar modrm
= getUChar(delta
);
30829 UInt rG
= gregOfRexRM(pfx
, modrm
);
30830 IRTemp src
= newTemp(Ity_V256
);
30831 IRTemp s0
= IRTemp_INVALID
;
30832 IRTemp s1
= IRTemp_INVALID
;
30833 IRTemp s2
= IRTemp_INVALID
;
30834 IRTemp s3
= IRTemp_INVALID
;
30835 IRTemp s4
= IRTemp_INVALID
;
30836 IRTemp s5
= IRTemp_INVALID
;
30837 IRTemp s6
= IRTemp_INVALID
;
30838 IRTemp s7
= IRTemp_INVALID
;
30839 IRTemp rm
= newTemp(Ity_I32
);
30842 modrm
= getUChar(delta
);
30844 if (epartIsReg(modrm
)) {
30845 UInt rE
= eregOfRexRM(pfx
, modrm
);
30846 assign( src
, getYMMReg( rE
) );
30847 imm
= getUChar(delta
+1);
30848 if (imm
& ~15) break;
30850 DIP( "vroundps $%d,%s,%s\n", imm
, nameYMMReg(rE
), nameYMMReg(rG
) );
30852 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30853 assign( src
, loadLE(Ity_V256
, mkexpr(addr
) ) );
30854 imm
= getUChar(delta
+alen
);
30855 if (imm
& ~15) break;
30857 DIP( "vroundps $%d,%s,%s\n", imm
, dis_buf
, nameYMMReg(rG
) );
30860 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30861 that encoding is the same as the encoding for IRRoundingMode,
30862 we can use that value directly in the IR as a rounding
30864 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
30866 breakupV256to32s( src
, &s7
, &s6
, &s5
, &s4
, &s3
, &s2
, &s1
, &s0
);
30867 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
30868 unop(Iop_ReinterpI32asF32, mkexpr(s)))
30869 putYMMRegLane32F( rG
, 7, CVT(s7
) );
30870 putYMMRegLane32F( rG
, 6, CVT(s6
) );
30871 putYMMRegLane32F( rG
, 5, CVT(s5
) );
30872 putYMMRegLane32F( rG
, 4, CVT(s4
) );
30873 putYMMRegLane32F( rG
, 3, CVT(s3
) );
30874 putYMMRegLane32F( rG
, 2, CVT(s2
) );
30875 putYMMRegLane32F( rG
, 1, CVT(s1
) );
30876 putYMMRegLane32F( rG
, 0, CVT(s0
) );
30878 goto decode_success
;
30883 /* VROUNDPD imm8, xmm2/m128, xmm1 */
30884 /* VROUNDPD = VEX.NDS.128.66.0F3A.WIG 09 ib */
30885 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
30886 UChar modrm
= getUChar(delta
);
30887 UInt rG
= gregOfRexRM(pfx
, modrm
);
30888 IRTemp src
= newTemp(Ity_V128
);
30889 IRTemp s0
= IRTemp_INVALID
;
30890 IRTemp s1
= IRTemp_INVALID
;
30891 IRTemp rm
= newTemp(Ity_I32
);
30894 modrm
= getUChar(delta
);
30896 if (epartIsReg(modrm
)) {
30897 UInt rE
= eregOfRexRM(pfx
, modrm
);
30898 assign( src
, getXMMReg( rE
) );
30899 imm
= getUChar(delta
+1);
30900 if (imm
& ~15) break;
30902 DIP( "vroundpd $%d,%s,%s\n", imm
, nameXMMReg(rE
), nameXMMReg(rG
) );
30904 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30905 assign( src
, loadLE(Ity_V128
, mkexpr(addr
) ) );
30906 imm
= getUChar(delta
+alen
);
30907 if (imm
& ~15) break;
30909 DIP( "vroundpd $%d,%s,%s\n", imm
, dis_buf
, nameXMMReg(rG
) );
30912 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30913 that encoding is the same as the encoding for IRRoundingMode,
30914 we can use that value directly in the IR as a rounding
30916 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
30918 breakupV128to64s( src
, &s1
, &s0
);
30919 putYMMRegLane128( rG
, 1, mkV128(0) );
30920 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
30921 unop(Iop_ReinterpI64asF64, mkexpr(s)))
30922 putYMMRegLane64F( rG
, 1, CVT(s1
) );
30923 putYMMRegLane64F( rG
, 0, CVT(s0
) );
30925 goto decode_success
;
30927 /* VROUNDPD imm8, ymm2/m256, ymm1 */
30928 /* VROUNDPD = VEX.NDS.256.66.0F3A.WIG 09 ib */
30929 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
30930 UChar modrm
= getUChar(delta
);
30931 UInt rG
= gregOfRexRM(pfx
, modrm
);
30932 IRTemp src
= newTemp(Ity_V256
);
30933 IRTemp s0
= IRTemp_INVALID
;
30934 IRTemp s1
= IRTemp_INVALID
;
30935 IRTemp s2
= IRTemp_INVALID
;
30936 IRTemp s3
= IRTemp_INVALID
;
30937 IRTemp rm
= newTemp(Ity_I32
);
30940 modrm
= getUChar(delta
);
30942 if (epartIsReg(modrm
)) {
30943 UInt rE
= eregOfRexRM(pfx
, modrm
);
30944 assign( src
, getYMMReg( rE
) );
30945 imm
= getUChar(delta
+1);
30946 if (imm
& ~15) break;
30948 DIP( "vroundpd $%d,%s,%s\n", imm
, nameYMMReg(rE
), nameYMMReg(rG
) );
30950 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30951 assign( src
, loadLE(Ity_V256
, mkexpr(addr
) ) );
30952 imm
= getUChar(delta
+alen
);
30953 if (imm
& ~15) break;
30955 DIP( "vroundpd $%d,%s,%s\n", imm
, dis_buf
, nameYMMReg(rG
) );
30958 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30959 that encoding is the same as the encoding for IRRoundingMode,
30960 we can use that value directly in the IR as a rounding
30962 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
30964 breakupV256to64s( src
, &s3
, &s2
, &s1
, &s0
);
30965 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
30966 unop(Iop_ReinterpI64asF64, mkexpr(s)))
30967 putYMMRegLane64F( rG
, 3, CVT(s3
) );
30968 putYMMRegLane64F( rG
, 2, CVT(s2
) );
30969 putYMMRegLane64F( rG
, 1, CVT(s1
) );
30970 putYMMRegLane64F( rG
, 0, CVT(s0
) );
30972 goto decode_success
;
30978 /* VROUNDSS imm8, xmm3/m32, xmm2, xmm1 */
30979 /* VROUNDSS = VEX.NDS.128.66.0F3A.WIG 0A ib */
30980 /* VROUNDSD imm8, xmm3/m64, xmm2, xmm1 */
30981 /* VROUNDSD = VEX.NDS.128.66.0F3A.WIG 0B ib */
30982 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
30983 UChar modrm
= getUChar(delta
);
30984 UInt rG
= gregOfRexRM(pfx
, modrm
);
30985 UInt rV
= getVexNvvvv(pfx
);
30986 Bool isD
= opc
== 0x0B;
30987 IRTemp src
= newTemp(isD
? Ity_F64
: Ity_F32
);
30988 IRTemp res
= newTemp(isD
? Ity_F64
: Ity_F32
);
30991 if (epartIsReg(modrm
)) {
30992 UInt rE
= eregOfRexRM(pfx
, modrm
);
30994 isD
? getXMMRegLane64F(rE
, 0) : getXMMRegLane32F(rE
, 0) );
30995 imm
= getUChar(delta
+1);
30996 if (imm
& ~15) break;
30998 DIP( "vrounds%c $%d,%s,%s,%s\n",
31000 imm
, nameXMMReg( rE
), nameXMMReg( rV
), nameXMMReg( rG
) );
31002 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31003 assign( src
, loadLE( isD
? Ity_F64
: Ity_F32
, mkexpr(addr
) ));
31004 imm
= getUChar(delta
+alen
);
31005 if (imm
& ~15) break;
31007 DIP( "vrounds%c $%d,%s,%s,%s\n",
31009 imm
, dis_buf
, nameXMMReg( rV
), nameXMMReg( rG
) );
31012 /* (imm & 3) contains an Intel-encoded rounding mode. Because
31013 that encoding is the same as the encoding for IRRoundingMode,
31014 we can use that value directly in the IR as a rounding
31016 assign(res
, binop(isD
? Iop_RoundF64toInt
: Iop_RoundF32toInt
,
31017 (imm
& 4) ? get_sse_roundingmode()
31022 putXMMRegLane64F( rG
, 0, mkexpr(res
) );
31024 putXMMRegLane32F( rG
, 0, mkexpr(res
) );
31025 putXMMRegLane32F( rG
, 1, getXMMRegLane32F( rV
, 1 ) );
31027 putXMMRegLane64F( rG
, 1, getXMMRegLane64F( rV
, 1 ) );
31028 putYMMRegLane128( rG
, 1, mkV128(0) );
31030 goto decode_success
;
31035 /* VBLENDPS imm8, ymm3/m256, ymm2, ymm1 */
31036 /* VBLENDPS = VEX.NDS.256.66.0F3A.WIG 0C /r ib */
31037 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31038 UChar modrm
= getUChar(delta
);
31040 UInt rG
= gregOfRexRM(pfx
, modrm
);
31041 UInt rV
= getVexNvvvv(pfx
);
31042 IRTemp sV
= newTemp(Ity_V256
);
31043 IRTemp sE
= newTemp(Ity_V256
);
31044 assign ( sV
, getYMMReg(rV
) );
31045 if (epartIsReg(modrm
)) {
31046 UInt rE
= eregOfRexRM(pfx
, modrm
);
31048 imm8
= getUChar(delta
);
31049 DIP("vblendps $%u,%s,%s,%s\n",
31050 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
31051 assign(sE
, getYMMReg(rE
));
31053 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31055 imm8
= getUChar(delta
);
31056 DIP("vblendps $%u,%s,%s,%s\n",
31057 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
31058 assign(sE
, loadLE(Ity_V256
, mkexpr(addr
)));
31062 mkexpr( math_BLENDPS_256( sE
, sV
, imm8
) ) );
31064 goto decode_success
;
31066 /* VBLENDPS imm8, xmm3/m128, xmm2, xmm1 */
31067 /* VBLENDPS = VEX.NDS.128.66.0F3A.WIG 0C /r ib */
31068 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31069 UChar modrm
= getUChar(delta
);
31071 UInt rG
= gregOfRexRM(pfx
, modrm
);
31072 UInt rV
= getVexNvvvv(pfx
);
31073 IRTemp sV
= newTemp(Ity_V128
);
31074 IRTemp sE
= newTemp(Ity_V128
);
31075 assign ( sV
, getXMMReg(rV
) );
31076 if (epartIsReg(modrm
)) {
31077 UInt rE
= eregOfRexRM(pfx
, modrm
);
31079 imm8
= getUChar(delta
);
31080 DIP("vblendps $%u,%s,%s,%s\n",
31081 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
31082 assign(sE
, getXMMReg(rE
));
31084 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31086 imm8
= getUChar(delta
);
31087 DIP("vblendps $%u,%s,%s,%s\n",
31088 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
31089 assign(sE
, loadLE(Ity_V128
, mkexpr(addr
)));
31092 putYMMRegLoAndZU( rG
,
31093 mkexpr( math_BLENDPS_128( sE
, sV
, imm8
) ) );
31095 goto decode_success
;
31100 /* VBLENDPD imm8, ymm3/m256, ymm2, ymm1 */
31101 /* VBLENDPD = VEX.NDS.256.66.0F3A.WIG 0D /r ib */
31102 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31103 UChar modrm
= getUChar(delta
);
31105 UInt rG
= gregOfRexRM(pfx
, modrm
);
31106 UInt rV
= getVexNvvvv(pfx
);
31107 IRTemp sV
= newTemp(Ity_V256
);
31108 IRTemp sE
= newTemp(Ity_V256
);
31109 assign ( sV
, getYMMReg(rV
) );
31110 if (epartIsReg(modrm
)) {
31111 UInt rE
= eregOfRexRM(pfx
, modrm
);
31113 imm8
= getUChar(delta
);
31114 DIP("vblendpd $%u,%s,%s,%s\n",
31115 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
31116 assign(sE
, getYMMReg(rE
));
31118 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31120 imm8
= getUChar(delta
);
31121 DIP("vblendpd $%u,%s,%s,%s\n",
31122 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
31123 assign(sE
, loadLE(Ity_V256
, mkexpr(addr
)));
31127 mkexpr( math_BLENDPD_256( sE
, sV
, imm8
) ) );
31129 goto decode_success
;
31131 /* VBLENDPD imm8, xmm3/m128, xmm2, xmm1 */
31132 /* VBLENDPD = VEX.NDS.128.66.0F3A.WIG 0D /r ib */
31133 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31134 UChar modrm
= getUChar(delta
);
31136 UInt rG
= gregOfRexRM(pfx
, modrm
);
31137 UInt rV
= getVexNvvvv(pfx
);
31138 IRTemp sV
= newTemp(Ity_V128
);
31139 IRTemp sE
= newTemp(Ity_V128
);
31140 assign ( sV
, getXMMReg(rV
) );
31141 if (epartIsReg(modrm
)) {
31142 UInt rE
= eregOfRexRM(pfx
, modrm
);
31144 imm8
= getUChar(delta
);
31145 DIP("vblendpd $%u,%s,%s,%s\n",
31146 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
31147 assign(sE
, getXMMReg(rE
));
31149 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31151 imm8
= getUChar(delta
);
31152 DIP("vblendpd $%u,%s,%s,%s\n",
31153 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
31154 assign(sE
, loadLE(Ity_V128
, mkexpr(addr
)));
31157 putYMMRegLoAndZU( rG
,
31158 mkexpr( math_BLENDPD_128( sE
, sV
, imm8
) ) );
31160 goto decode_success
;
31165 /* VPBLENDW imm8, xmm3/m128, xmm2, xmm1 */
31166 /* VPBLENDW = VEX.NDS.128.66.0F3A.WIG 0E /r ib */
31167 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31168 UChar modrm
= getUChar(delta
);
31170 UInt rG
= gregOfRexRM(pfx
, modrm
);
31171 UInt rV
= getVexNvvvv(pfx
);
31172 IRTemp sV
= newTemp(Ity_V128
);
31173 IRTemp sE
= newTemp(Ity_V128
);
31174 assign ( sV
, getXMMReg(rV
) );
31175 if (epartIsReg(modrm
)) {
31176 UInt rE
= eregOfRexRM(pfx
, modrm
);
31178 imm8
= getUChar(delta
);
31179 DIP("vpblendw $%u,%s,%s,%s\n",
31180 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
31181 assign(sE
, getXMMReg(rE
));
31183 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31185 imm8
= getUChar(delta
);
31186 DIP("vpblendw $%u,%s,%s,%s\n",
31187 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
31188 assign(sE
, loadLE(Ity_V128
, mkexpr(addr
)));
31191 putYMMRegLoAndZU( rG
,
31192 mkexpr( math_PBLENDW_128( sE
, sV
, imm8
) ) );
31194 goto decode_success
;
31196 /* VPBLENDW imm8, ymm3/m256, ymm2, ymm1 */
31197 /* VPBLENDW = VEX.NDS.256.66.0F3A.WIG 0E /r ib */
31198 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31199 UChar modrm
= getUChar(delta
);
31201 UInt rG
= gregOfRexRM(pfx
, modrm
);
31202 UInt rV
= getVexNvvvv(pfx
);
31203 IRTemp sV
= newTemp(Ity_V256
);
31204 IRTemp sE
= newTemp(Ity_V256
);
31205 IRTemp sVhi
, sVlo
, sEhi
, sElo
;
31206 sVhi
= sVlo
= sEhi
= sElo
= IRTemp_INVALID
;
31207 assign ( sV
, getYMMReg(rV
) );
31208 if (epartIsReg(modrm
)) {
31209 UInt rE
= eregOfRexRM(pfx
, modrm
);
31211 imm8
= getUChar(delta
);
31212 DIP("vpblendw $%u,%s,%s,%s\n",
31213 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
31214 assign(sE
, getYMMReg(rE
));
31216 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31218 imm8
= getUChar(delta
);
31219 DIP("vpblendw $%u,%s,%s,%s\n",
31220 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
31221 assign(sE
, loadLE(Ity_V256
, mkexpr(addr
)));
31224 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
31225 breakupV256toV128s( sE
, &sEhi
, &sElo
);
31226 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
31227 mkexpr( math_PBLENDW_128( sEhi
, sVhi
, imm8
) ),
31228 mkexpr( math_PBLENDW_128( sElo
, sVlo
, imm8
) ) ) );
31230 goto decode_success
;
31235 /* VPALIGNR imm8, xmm3/m128, xmm2, xmm1 */
31236 /* VPALIGNR = VEX.NDS.128.66.0F3A.WIG 0F /r ib */
31237 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31238 UChar modrm
= getUChar(delta
);
31239 UInt rG
= gregOfRexRM(pfx
, modrm
);
31240 UInt rV
= getVexNvvvv(pfx
);
31241 IRTemp sV
= newTemp(Ity_V128
);
31242 IRTemp dV
= newTemp(Ity_V128
);
31245 assign( dV
, getXMMReg(rV
) );
31247 if ( epartIsReg( modrm
) ) {
31248 UInt rE
= eregOfRexRM(pfx
, modrm
);
31249 assign( sV
, getXMMReg(rE
) );
31250 imm8
= getUChar(delta
+1);
31252 DIP("vpalignr $%u,%s,%s,%s\n", imm8
, nameXMMReg(rE
),
31253 nameXMMReg(rV
), nameXMMReg(rG
));
31255 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31256 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
31257 imm8
= getUChar(delta
+alen
);
31259 DIP("vpalignr $%u,%s,%s,%s\n", imm8
, dis_buf
,
31260 nameXMMReg(rV
), nameXMMReg(rG
));
31263 IRTemp res
= math_PALIGNR_XMM( sV
, dV
, imm8
);
31264 putYMMRegLoAndZU( rG
, mkexpr(res
) );
31266 goto decode_success
;
31268 /* VPALIGNR imm8, ymm3/m256, ymm2, ymm1 */
31269 /* VPALIGNR = VEX.NDS.256.66.0F3A.WIG 0F /r ib */
31270 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31271 UChar modrm
= getUChar(delta
);
31272 UInt rG
= gregOfRexRM(pfx
, modrm
);
31273 UInt rV
= getVexNvvvv(pfx
);
31274 IRTemp sV
= newTemp(Ity_V256
);
31275 IRTemp dV
= newTemp(Ity_V256
);
31276 IRTemp sHi
, sLo
, dHi
, dLo
;
31277 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
31280 assign( dV
, getYMMReg(rV
) );
31282 if ( epartIsReg( modrm
) ) {
31283 UInt rE
= eregOfRexRM(pfx
, modrm
);
31284 assign( sV
, getYMMReg(rE
) );
31285 imm8
= getUChar(delta
+1);
31287 DIP("vpalignr $%u,%s,%s,%s\n", imm8
, nameYMMReg(rE
),
31288 nameYMMReg(rV
), nameYMMReg(rG
));
31290 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31291 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
31292 imm8
= getUChar(delta
+alen
);
31294 DIP("vpalignr $%u,%s,%s,%s\n", imm8
, dis_buf
,
31295 nameYMMReg(rV
), nameYMMReg(rG
));
31298 breakupV256toV128s( dV
, &dHi
, &dLo
);
31299 breakupV256toV128s( sV
, &sHi
, &sLo
);
31300 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
31301 mkexpr( math_PALIGNR_XMM( sHi
, dHi
, imm8
) ),
31302 mkexpr( math_PALIGNR_XMM( sLo
, dLo
, imm8
) ) )
31305 goto decode_success
;
31310 /* VPEXTRB imm8, xmm2, reg/m8 = VEX.128.66.0F3A.W0 14 /r ib */
31311 if (have66noF2noF3(pfx
)
31312 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
31313 delta
= dis_PEXTRB_128_GtoE( vbi
, pfx
, delta
, False
/*!isAvx*/ );
31314 goto decode_success
;
31319 /* VPEXTRW imm8, reg/m16, xmm2 */
31320 /* VPEXTRW = VEX.128.66.0F3A.W0 15 /r ib */
31321 if (have66noF2noF3(pfx
)
31322 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
31323 delta
= dis_PEXTRW( vbi
, pfx
, delta
, True
/*isAvx*/ );
31324 goto decode_success
;
31329 /* VPEXTRD imm8, r32/m32, xmm2 */
31330 /* VPEXTRD = VEX.128.66.0F3A.W0 16 /r ib */
31331 if (have66noF2noF3(pfx
)
31332 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
31333 delta
= dis_PEXTRD( vbi
, pfx
, delta
, True
/*isAvx*/ );
31334 goto decode_success
;
31336 /* VPEXTRQ = VEX.128.66.0F3A.W1 16 /r ib */
31337 if (have66noF2noF3(pfx
)
31338 && 0==getVexL(pfx
)/*128*/ && 1==getRexW(pfx
)/*W1*/) {
31339 delta
= dis_PEXTRQ( vbi
, pfx
, delta
, True
/*isAvx*/ );
31340 goto decode_success
;
31345 /* VEXTRACTPS imm8, xmm1, r32/m32 = VEX.128.66.0F3A.WIG 17 /r ib */
31346 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31347 delta
= dis_EXTRACTPS( vbi
, pfx
, delta
, True
/*isAvx*/ );
31348 goto decode_success
;
31353 /* VINSERTF128 r/m, rV, rD
31354 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
31355 /* VINSERTF128 = VEX.NDS.256.66.0F3A.W0 18 /r ib */
31356 if (have66noF2noF3(pfx
)
31357 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
31358 UChar modrm
= getUChar(delta
);
31360 UInt rG
= gregOfRexRM(pfx
, modrm
);
31361 UInt rV
= getVexNvvvv(pfx
);
31362 IRTemp t128
= newTemp(Ity_V128
);
31363 if (epartIsReg(modrm
)) {
31364 UInt rE
= eregOfRexRM(pfx
, modrm
);
31366 assign(t128
, getXMMReg(rE
));
31367 ib
= getUChar(delta
);
31368 DIP("vinsertf128 $%u,%s,%s,%s\n",
31369 ib
, nameXMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
31371 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31372 assign(t128
, loadLE(Ity_V128
, mkexpr(addr
)));
31374 ib
= getUChar(delta
);
31375 DIP("vinsertf128 $%u,%s,%s,%s\n",
31376 ib
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
31379 putYMMRegLane128(rG
, 0, getYMMRegLane128(rV
, 0));
31380 putYMMRegLane128(rG
, 1, getYMMRegLane128(rV
, 1));
31381 putYMMRegLane128(rG
, ib
& 1, mkexpr(t128
));
31383 goto decode_success
;
31388 /* VEXTRACTF128 $lane_no, rS, r/m
31389 ::: r/m:V128 = a lane of rS:V256 (RM format) */
31390 /* VEXTRACTF128 = VEX.256.66.0F3A.W0 19 /r ib */
31391 if (have66noF2noF3(pfx
)
31392 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
31393 UChar modrm
= getUChar(delta
);
31395 UInt rS
= gregOfRexRM(pfx
, modrm
);
31396 IRTemp t128
= newTemp(Ity_V128
);
31397 if (epartIsReg(modrm
)) {
31398 UInt rD
= eregOfRexRM(pfx
, modrm
);
31400 ib
= getUChar(delta
);
31401 assign(t128
, getYMMRegLane128(rS
, ib
& 1));
31402 putYMMRegLoAndZU(rD
, mkexpr(t128
));
31403 DIP("vextractf128 $%u,%s,%s\n",
31404 ib
, nameXMMReg(rS
), nameYMMReg(rD
));
31406 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31408 ib
= getUChar(delta
);
31409 assign(t128
, getYMMRegLane128(rS
, ib
& 1));
31410 storeLE(mkexpr(addr
), mkexpr(t128
));
31411 DIP("vextractf128 $%u,%s,%s\n",
31412 ib
, nameYMMReg(rS
), dis_buf
);
31415 /* doesn't use vvvv */
31416 goto decode_success
;
31421 /* VCVTPS2PH imm8, xmm2, xmm1/m64 = VEX.128.66.0F3A.W0 1D /r ib */
31422 if (have66noF2noF3(pfx
)
31423 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/
31424 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_F16C
)) {
31425 delta
= dis_VCVTPS2PH( vbi
, pfx
, delta
, /*is256bit=*/False
);
31426 goto decode_success
;
31428 /* VCVTPS2PH imm8, ymm2, ymm1/m128 = VEX.256.66.0F3A.W0 1D /r ib */
31429 if (have66noF2noF3(pfx
)
31430 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/
31431 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_F16C
)) {
31432 delta
= dis_VCVTPS2PH( vbi
, pfx
, delta
, /*is256bit=*/True
);
31433 goto decode_success
;
31438 /* VPINSRB r32/m8, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 20 /r ib */
31439 if (have66noF2noF3(pfx
)
31440 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
31441 UChar modrm
= getUChar(delta
);
31442 UInt rG
= gregOfRexRM(pfx
, modrm
);
31443 UInt rV
= getVexNvvvv(pfx
);
31445 IRTemp src_u8
= newTemp(Ity_I8
);
31447 if ( epartIsReg( modrm
) ) {
31448 UInt rE
= eregOfRexRM(pfx
,modrm
);
31449 imm8
= (Int
)(getUChar(delta
+1) & 15);
31450 assign( src_u8
, unop(Iop_32to8
, getIReg32( rE
)) );
31452 DIP( "vpinsrb $%d,%s,%s,%s\n",
31453 imm8
, nameIReg32(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31455 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31456 imm8
= (Int
)(getUChar(delta
+alen
) & 15);
31457 assign( src_u8
, loadLE( Ity_I8
, mkexpr(addr
) ) );
31459 DIP( "vpinsrb $%d,%s,%s,%s\n",
31460 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31463 IRTemp src_vec
= newTemp(Ity_V128
);
31464 assign(src_vec
, getXMMReg( rV
));
31465 IRTemp res_vec
= math_PINSRB_128( src_vec
, src_u8
, imm8
);
31466 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
31468 goto decode_success
;
31473 /* VINSERTPS imm8, xmm3/m32, xmm2, xmm1
31474 = VEX.NDS.128.66.0F3A.WIG 21 /r ib */
31475 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31476 UChar modrm
= getUChar(delta
);
31477 UInt rG
= gregOfRexRM(pfx
, modrm
);
31478 UInt rV
= getVexNvvvv(pfx
);
31480 IRTemp d2ins
= newTemp(Ity_I32
); /* comes from the E part */
31481 const IRTemp inval
= IRTemp_INVALID
;
31483 if ( epartIsReg( modrm
) ) {
31484 UInt rE
= eregOfRexRM(pfx
, modrm
);
31485 IRTemp vE
= newTemp(Ity_V128
);
31486 assign( vE
, getXMMReg(rE
) );
31487 IRTemp dsE
[4] = { inval
, inval
, inval
, inval
};
31488 breakupV128to32s( vE
, &dsE
[3], &dsE
[2], &dsE
[1], &dsE
[0] );
31489 imm8
= getUChar(delta
+1);
31490 d2ins
= dsE
[(imm8
>> 6) & 3]; /* "imm8_count_s" */
31492 DIP( "insertps $%u, %s,%s\n",
31493 imm8
, nameXMMReg(rE
), nameXMMReg(rG
) );
31495 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31496 assign( d2ins
, loadLE( Ity_I32
, mkexpr(addr
) ) );
31497 imm8
= getUChar(delta
+alen
);
31499 DIP( "insertps $%u, %s,%s\n",
31500 imm8
, dis_buf
, nameXMMReg(rG
) );
31503 IRTemp vV
= newTemp(Ity_V128
);
31504 assign( vV
, getXMMReg(rV
) );
31506 putYMMRegLoAndZU( rG
, mkexpr(math_INSERTPS( vV
, d2ins
, imm8
)) );
31508 goto decode_success
;
31513 /* VPINSRD r32/m32, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 22 /r ib */
31514 if (have66noF2noF3(pfx
)
31515 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
31516 UChar modrm
= getUChar(delta
);
31517 UInt rG
= gregOfRexRM(pfx
, modrm
);
31518 UInt rV
= getVexNvvvv(pfx
);
31520 IRTemp src_u32
= newTemp(Ity_I32
);
31522 if ( epartIsReg( modrm
) ) {
31523 UInt rE
= eregOfRexRM(pfx
,modrm
);
31524 imm8_10
= (Int
)(getUChar(delta
+1) & 3);
31525 assign( src_u32
, getIReg32( rE
) );
31527 DIP( "vpinsrd $%d,%s,%s,%s\n",
31528 imm8_10
, nameIReg32(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31530 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31531 imm8_10
= (Int
)(getUChar(delta
+alen
) & 3);
31532 assign( src_u32
, loadLE( Ity_I32
, mkexpr(addr
) ) );
31534 DIP( "vpinsrd $%d,%s,%s,%s\n",
31535 imm8_10
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31538 IRTemp src_vec
= newTemp(Ity_V128
);
31539 assign(src_vec
, getXMMReg( rV
));
31540 IRTemp res_vec
= math_PINSRD_128( src_vec
, src_u32
, imm8_10
);
31541 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
31543 goto decode_success
;
31545 /* VPINSRQ r64/m64, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W1 22 /r ib */
31546 if (have66noF2noF3(pfx
)
31547 && 0==getVexL(pfx
)/*128*/ && 1==getRexW(pfx
)/*W1*/) {
31548 UChar modrm
= getUChar(delta
);
31549 UInt rG
= gregOfRexRM(pfx
, modrm
);
31550 UInt rV
= getVexNvvvv(pfx
);
31552 IRTemp src_u64
= newTemp(Ity_I64
);
31554 if ( epartIsReg( modrm
) ) {
31555 UInt rE
= eregOfRexRM(pfx
,modrm
);
31556 imm8_0
= (Int
)(getUChar(delta
+1) & 1);
31557 assign( src_u64
, getIReg64( rE
) );
31559 DIP( "vpinsrq $%d,%s,%s,%s\n",
31560 imm8_0
, nameIReg64(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31562 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31563 imm8_0
= (Int
)(getUChar(delta
+alen
) & 1);
31564 assign( src_u64
, loadLE( Ity_I64
, mkexpr(addr
) ) );
31566 DIP( "vpinsrq $%d,%s,%s,%s\n",
31567 imm8_0
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31570 IRTemp src_vec
= newTemp(Ity_V128
);
31571 assign(src_vec
, getXMMReg( rV
));
31572 IRTemp res_vec
= math_PINSRQ_128( src_vec
, src_u64
, imm8_0
);
31573 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
31575 goto decode_success
;
31580 /* VINSERTI128 r/m, rV, rD
31581 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
31582 /* VINSERTI128 = VEX.NDS.256.66.0F3A.W0 38 /r ib */
31583 if (have66noF2noF3(pfx
)
31584 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
31585 UChar modrm
= getUChar(delta
);
31587 UInt rG
= gregOfRexRM(pfx
, modrm
);
31588 UInt rV
= getVexNvvvv(pfx
);
31589 IRTemp t128
= newTemp(Ity_V128
);
31590 if (epartIsReg(modrm
)) {
31591 UInt rE
= eregOfRexRM(pfx
, modrm
);
31593 assign(t128
, getXMMReg(rE
));
31594 ib
= getUChar(delta
);
31595 DIP("vinserti128 $%u,%s,%s,%s\n",
31596 ib
, nameXMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
31598 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31599 assign(t128
, loadLE(Ity_V128
, mkexpr(addr
)));
31601 ib
= getUChar(delta
);
31602 DIP("vinserti128 $%u,%s,%s,%s\n",
31603 ib
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
31606 putYMMRegLane128(rG
, 0, getYMMRegLane128(rV
, 0));
31607 putYMMRegLane128(rG
, 1, getYMMRegLane128(rV
, 1));
31608 putYMMRegLane128(rG
, ib
& 1, mkexpr(t128
));
31610 goto decode_success
;
31615 /* VEXTRACTI128 $lane_no, rS, r/m
31616 ::: r/m:V128 = a lane of rS:V256 (RM format) */
31617 /* VEXTRACTI128 = VEX.256.66.0F3A.W0 39 /r ib */
31618 if (have66noF2noF3(pfx
)
31619 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
31620 UChar modrm
= getUChar(delta
);
31622 UInt rS
= gregOfRexRM(pfx
, modrm
);
31623 IRTemp t128
= newTemp(Ity_V128
);
31624 if (epartIsReg(modrm
)) {
31625 UInt rD
= eregOfRexRM(pfx
, modrm
);
31627 ib
= getUChar(delta
);
31628 assign(t128
, getYMMRegLane128(rS
, ib
& 1));
31629 putYMMRegLoAndZU(rD
, mkexpr(t128
));
31630 DIP("vextracti128 $%u,%s,%s\n",
31631 ib
, nameXMMReg(rS
), nameYMMReg(rD
));
31633 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31635 ib
= getUChar(delta
);
31636 assign(t128
, getYMMRegLane128(rS
, ib
& 1));
31637 storeLE(mkexpr(addr
), mkexpr(t128
));
31638 DIP("vextracti128 $%u,%s,%s\n",
31639 ib
, nameYMMReg(rS
), dis_buf
);
31642 /* doesn't use vvvv */
31643 goto decode_success
;
31648 /* VDPPS imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 40 /r ib */
31649 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31650 UChar modrm
= getUChar(delta
);
31651 UInt rG
= gregOfRexRM(pfx
, modrm
);
31652 UInt rV
= getVexNvvvv(pfx
);
31653 IRTemp dst_vec
= newTemp(Ity_V128
);
31655 if (epartIsReg( modrm
)) {
31656 UInt rE
= eregOfRexRM(pfx
,modrm
);
31657 imm8
= (Int
)getUChar(delta
+1);
31658 assign( dst_vec
, getXMMReg( rE
) );
31660 DIP( "vdpps $%d,%s,%s,%s\n",
31661 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31663 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31664 imm8
= (Int
)getUChar(delta
+alen
);
31665 assign( dst_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
31667 DIP( "vdpps $%d,%s,%s,%s\n",
31668 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31671 IRTemp src_vec
= newTemp(Ity_V128
);
31672 assign(src_vec
, getXMMReg( rV
));
31673 IRTemp res_vec
= math_DPPS_128( src_vec
, dst_vec
, imm8
);
31674 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
31676 goto decode_success
;
31678 /* VDPPS imm8, ymm3/m128,ymm2,ymm1 = VEX.NDS.256.66.0F3A.WIG 40 /r ib */
31679 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31680 UChar modrm
= getUChar(delta
);
31681 UInt rG
= gregOfRexRM(pfx
, modrm
);
31682 UInt rV
= getVexNvvvv(pfx
);
31683 IRTemp dst_vec
= newTemp(Ity_V256
);
31685 if (epartIsReg( modrm
)) {
31686 UInt rE
= eregOfRexRM(pfx
,modrm
);
31687 imm8
= (Int
)getUChar(delta
+1);
31688 assign( dst_vec
, getYMMReg( rE
) );
31690 DIP( "vdpps $%d,%s,%s,%s\n",
31691 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
) );
31693 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31694 imm8
= (Int
)getUChar(delta
+alen
);
31695 assign( dst_vec
, loadLE( Ity_V256
, mkexpr(addr
) ) );
31697 DIP( "vdpps $%d,%s,%s,%s\n",
31698 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
) );
31701 IRTemp src_vec
= newTemp(Ity_V256
);
31702 assign(src_vec
, getYMMReg( rV
));
31703 IRTemp s0
, s1
, d0
, d1
;
31704 s0
= s1
= d0
= d1
= IRTemp_INVALID
;
31705 breakupV256toV128s( dst_vec
, &d1
, &d0
);
31706 breakupV256toV128s( src_vec
, &s1
, &s0
);
31707 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
31708 mkexpr( math_DPPS_128(s1
, d1
, imm8
) ),
31709 mkexpr( math_DPPS_128(s0
, d0
, imm8
) ) ) );
31711 goto decode_success
;
31716 /* VDPPD imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 41 /r ib */
31717 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31718 UChar modrm
= getUChar(delta
);
31719 UInt rG
= gregOfRexRM(pfx
, modrm
);
31720 UInt rV
= getVexNvvvv(pfx
);
31721 IRTemp dst_vec
= newTemp(Ity_V128
);
31723 if (epartIsReg( modrm
)) {
31724 UInt rE
= eregOfRexRM(pfx
,modrm
);
31725 imm8
= (Int
)getUChar(delta
+1);
31726 assign( dst_vec
, getXMMReg( rE
) );
31728 DIP( "vdppd $%d,%s,%s,%s\n",
31729 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31731 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31732 imm8
= (Int
)getUChar(delta
+alen
);
31733 assign( dst_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
31735 DIP( "vdppd $%d,%s,%s,%s\n",
31736 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31739 IRTemp src_vec
= newTemp(Ity_V128
);
31740 assign(src_vec
, getXMMReg( rV
));
31741 IRTemp res_vec
= math_DPPD_128( src_vec
, dst_vec
, imm8
);
31742 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
31744 goto decode_success
;
31749 /* VMPSADBW imm8, xmm3/m128,xmm2,xmm1 */
31750 /* VMPSADBW = VEX.NDS.128.66.0F3A.WIG 42 /r ib */
31751 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31752 UChar modrm
= getUChar(delta
);
31754 IRTemp src_vec
= newTemp(Ity_V128
);
31755 IRTemp dst_vec
= newTemp(Ity_V128
);
31756 UInt rG
= gregOfRexRM(pfx
, modrm
);
31757 UInt rV
= getVexNvvvv(pfx
);
31759 assign( dst_vec
, getXMMReg(rV
) );
31761 if ( epartIsReg( modrm
) ) {
31762 UInt rE
= eregOfRexRM(pfx
, modrm
);
31764 imm8
= (Int
)getUChar(delta
+1);
31765 assign( src_vec
, getXMMReg(rE
) );
31767 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8
,
31768 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31770 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
31771 1/* imm8 is 1 byte after the amode */ );
31772 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
31773 imm8
= (Int
)getUChar(delta
+alen
);
31775 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8
,
31776 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31779 putYMMRegLoAndZU( rG
, mkexpr( math_MPSADBW_128(dst_vec
,
31780 src_vec
, imm8
) ) );
31782 goto decode_success
;
31784 /* VMPSADBW imm8, ymm3/m256,ymm2,ymm1 */
31785 /* VMPSADBW = VEX.NDS.256.66.0F3A.WIG 42 /r ib */
31786 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31787 UChar modrm
= getUChar(delta
);
31789 IRTemp src_vec
= newTemp(Ity_V256
);
31790 IRTemp dst_vec
= newTemp(Ity_V256
);
31791 UInt rG
= gregOfRexRM(pfx
, modrm
);
31792 UInt rV
= getVexNvvvv(pfx
);
31793 IRTemp sHi
, sLo
, dHi
, dLo
;
31794 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
31796 assign( dst_vec
, getYMMReg(rV
) );
31798 if ( epartIsReg( modrm
) ) {
31799 UInt rE
= eregOfRexRM(pfx
, modrm
);
31801 imm8
= (Int
)getUChar(delta
+1);
31802 assign( src_vec
, getYMMReg(rE
) );
31804 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8
,
31805 nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
) );
31807 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
31808 1/* imm8 is 1 byte after the amode */ );
31809 assign( src_vec
, loadLE( Ity_V256
, mkexpr(addr
) ) );
31810 imm8
= (Int
)getUChar(delta
+alen
);
31812 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8
,
31813 dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
) );
31816 breakupV256toV128s( dst_vec
, &dHi
, &dLo
);
31817 breakupV256toV128s( src_vec
, &sHi
, &sLo
);
31818 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
31819 mkexpr( math_MPSADBW_128(dHi
, sHi
, imm8
>> 3) ),
31820 mkexpr( math_MPSADBW_128(dLo
, sLo
, imm8
) ) ) );
31822 goto decode_success
;
31827 /* VPCLMULQDQ imm8, xmm3/m128,xmm2,xmm1 */
31828 /* VPCLMULQDQ = VEX.NDS.128.66.0F3A.WIG 44 /r ib */
31829 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
31830 * Carry-less multiplication of selected XMM quadwords into XMM
31831 * registers (a.k.a multiplication of polynomials over GF(2))
31833 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31834 UChar modrm
= getUChar(delta
);
31836 IRTemp sV
= newTemp(Ity_V128
);
31837 IRTemp dV
= newTemp(Ity_V128
);
31838 UInt rG
= gregOfRexRM(pfx
, modrm
);
31839 UInt rV
= getVexNvvvv(pfx
);
31841 assign( dV
, getXMMReg(rV
) );
31843 if ( epartIsReg( modrm
) ) {
31844 UInt rE
= eregOfRexRM(pfx
, modrm
);
31845 imm8
= (Int
)getUChar(delta
+1);
31846 assign( sV
, getXMMReg(rE
) );
31848 DIP( "vpclmulqdq $%d, %s,%s,%s\n", imm8
,
31849 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31851 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
31852 1/* imm8 is 1 byte after the amode */ );
31853 assign( sV
, loadLE( Ity_V128
, mkexpr(addr
) ) );
31854 imm8
= (Int
)getUChar(delta
+alen
);
31856 DIP( "vpclmulqdq $%d, %s,%s,%s\n",
31857 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31860 putYMMRegLoAndZU( rG
, mkexpr( math_PCLMULQDQ(dV
, sV
, imm8
) ) );
31862 goto decode_success
;
31867 /* VPERM2I128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 46 /r ib */
31868 if (have66noF2noF3(pfx
)
31869 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
31870 UChar modrm
= getUChar(delta
);
31872 UInt rG
= gregOfRexRM(pfx
, modrm
);
31873 UInt rV
= getVexNvvvv(pfx
);
31874 IRTemp s00
= newTemp(Ity_V128
);
31875 IRTemp s01
= newTemp(Ity_V128
);
31876 IRTemp s10
= newTemp(Ity_V128
);
31877 IRTemp s11
= newTemp(Ity_V128
);
31878 assign(s00
, getYMMRegLane128(rV
, 0));
31879 assign(s01
, getYMMRegLane128(rV
, 1));
31880 if (epartIsReg(modrm
)) {
31881 UInt rE
= eregOfRexRM(pfx
, modrm
);
31883 imm8
= getUChar(delta
);
31884 DIP("vperm2i128 $%u,%s,%s,%s\n",
31885 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
31886 assign(s10
, getYMMRegLane128(rE
, 0));
31887 assign(s11
, getYMMRegLane128(rE
, 1));
31889 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31891 imm8
= getUChar(delta
);
31892 DIP("vperm2i128 $%u,%s,%s,%s\n",
31893 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
31894 assign(s10
, loadLE(Ity_V128
, binop(Iop_Add64
,
31895 mkexpr(addr
), mkU64(0))));
31896 assign(s11
, loadLE(Ity_V128
, binop(Iop_Add64
,
31897 mkexpr(addr
), mkU64(16))));
31900 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
31901 : ((_nn)==2) ? s10 : s11)
31902 putYMMRegLane128(rG
, 0, mkexpr(SEL((imm8
>> 0) & 3)));
31903 putYMMRegLane128(rG
, 1, mkexpr(SEL((imm8
>> 4) & 3)));
31905 if (imm8
& (1<<3)) putYMMRegLane128(rG
, 0, mkV128(0));
31906 if (imm8
& (1<<7)) putYMMRegLane128(rG
, 1, mkV128(0));
31908 goto decode_success
;
31913 /* VBLENDVPS xmmG, xmmE/memE, xmmV, xmmIS4
31914 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
31915 /* VBLENDVPS = VEX.NDS.128.66.0F3A.WIG 4A /r /is4 */
31916 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31917 delta
= dis_VBLENDV_128 ( vbi
, pfx
, delta
,
31918 "vblendvps", 4, Iop_SarN32x4
);
31920 goto decode_success
;
31922 /* VBLENDVPS ymmG, ymmE/memE, ymmV, ymmIS4
31923 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
31924 /* VBLENDVPS = VEX.NDS.256.66.0F3A.WIG 4A /r /is4 */
31925 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31926 delta
= dis_VBLENDV_256 ( vbi
, pfx
, delta
,
31927 "vblendvps", 4, Iop_SarN32x4
);
31929 goto decode_success
;
31934 /* VBLENDVPD xmmG, xmmE/memE, xmmV, xmmIS4
31935 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
31936 /* VBLENDVPD = VEX.NDS.128.66.0F3A.WIG 4B /r /is4 */
31937 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31938 delta
= dis_VBLENDV_128 ( vbi
, pfx
, delta
,
31939 "vblendvpd", 8, Iop_SarN64x2
);
31941 goto decode_success
;
31943 /* VBLENDVPD ymmG, ymmE/memE, ymmV, ymmIS4
31944 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
31945 /* VBLENDVPD = VEX.NDS.256.66.0F3A.WIG 4B /r /is4 */
31946 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31947 delta
= dis_VBLENDV_256 ( vbi
, pfx
, delta
,
31948 "vblendvpd", 8, Iop_SarN64x2
);
31950 goto decode_success
;
31955 /* VPBLENDVB xmmG, xmmE/memE, xmmV, xmmIS4
31956 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
31957 /* VPBLENDVB = VEX.NDS.128.66.0F3A.WIG 4C /r /is4 */
31958 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31959 delta
= dis_VBLENDV_128 ( vbi
, pfx
, delta
,
31960 "vpblendvb", 1, Iop_SarN8x16
);
31962 goto decode_success
;
31964 /* VPBLENDVB ymmG, ymmE/memE, ymmV, ymmIS4
31965 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
31966 /* VPBLENDVB = VEX.NDS.256.66.0F3A.WIG 4C /r /is4 */
31967 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31968 delta
= dis_VBLENDV_256 ( vbi
, pfx
, delta
,
31969 "vpblendvb", 1, Iop_SarN8x16
);
31971 goto decode_success
;
31979 /* VEX.128.66.0F3A.WIG 63 /r ib = VPCMPISTRI imm8, xmm2/m128, xmm1
31980 VEX.128.66.0F3A.WIG 62 /r ib = VPCMPISTRM imm8, xmm2/m128, xmm1
31981 VEX.128.66.0F3A.WIG 61 /r ib = VPCMPESTRI imm8, xmm2/m128, xmm1
31982 VEX.128.66.0F3A.WIG 60 /r ib = VPCMPESTRM imm8, xmm2/m128, xmm1
31983 (selected special cases that actually occur in glibc,
31984 not by any means a complete implementation.)
31986 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31987 Long delta0
= delta
;
31988 delta
= dis_PCMPxSTRx( vbi
, pfx
, delta
, True
/*isAvx*/, opc
);
31989 if (delta
> delta0
) goto decode_success
;
31990 /* else fall though; dis_PCMPxSTRx failed to decode it */
31994 case 0x5C ... 0x5F:
31995 case 0x68 ... 0x6F:
31996 case 0x78 ... 0x7F:
31997 /* FIXME: list the instructions decoded here */
31998 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31999 Long delta0
= delta
;
32000 delta
= dis_FMA4( pfx
, delta
, opc
, uses_vvvv
, vbi
);
32001 if (delta
> delta0
) {
32002 dres
->hint
= Dis_HintVerbose
;
32003 goto decode_success
;
32005 /* else fall though; dis_FMA4 failed to decode it */
32010 /* VAESKEYGENASSIST imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG DF /r */
32011 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
32012 delta
= dis_AESKEYGENASSIST( vbi
, pfx
, delta
, True
/*!isAvx*/ );
32013 goto decode_success
;
32018 /* RORX imm8, r/m32, r32a = VEX.LZ.F2.0F3A.W0 F0 /r /i */
32019 /* RORX imm8, r/m64, r64a = VEX.LZ.F2.0F3A.W1 F0 /r /i */
32020 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
32021 Int size
= getRexW(pfx
) ? 8 : 4;
32022 IRType ty
= szToITy(size
);
32023 IRTemp src
= newTemp(ty
);
32024 UChar rm
= getUChar(delta
);
32027 if (epartIsReg(rm
)) {
32028 imm8
= getUChar(delta
+1);
32029 assign( src
, getIRegE(size
,pfx
,rm
) );
32030 DIP("rorx %d,%s,%s\n", imm8
, nameIRegE(size
,pfx
,rm
),
32031 nameIRegG(size
,pfx
,rm
));
32034 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
32035 imm8
= getUChar(delta
+alen
);
32036 assign( src
, loadLE(ty
, mkexpr(addr
)) );
32037 DIP("rorx %d,%s,%s\n", imm8
, dis_buf
, nameIRegG(size
,pfx
,rm
));
32042 /* dst = (src >>u imm8) | (src << (size-imm8)) */
32043 putIRegG( size
, pfx
, rm
,
32044 imm8
== 0 ? mkexpr(src
)
32045 : binop( mkSizedOp(ty
,Iop_Or8
),
32046 binop( mkSizedOp(ty
,Iop_Shr8
), mkexpr(src
),
32048 binop( mkSizedOp(ty
,Iop_Shl8
), mkexpr(src
),
32049 mkU8(8*size
-imm8
) ) ) );
32050 /* Flags aren't modified. */
32051 goto decode_success
;
32068 /*------------------------------------------------------------*/
32070 /*--- Disassemble a single instruction ---*/
32072 /*------------------------------------------------------------*/
32074 /* Disassemble a single instruction into IR. The instruction is
32075 located in host memory at &guest_code[delta]. */
32078 DisResult
disInstr_AMD64_WRK (
32079 /*OUT*/Bool
* expect_CAS
,
32081 const VexArchInfo
* archinfo
,
32082 const VexAbiInfo
* vbi
,
32091 /* The running delta */
32092 Long delta
= delta64
;
32094 /* Holds eip at the start of the insn, so that we can print
32095 consistent error messages for unimplemented insns. */
32096 Long delta_start
= delta
;
32098 /* sz denotes the nominal data-op size of the insn; we change it to
32099 2 if an 0x66 prefix is seen and 8 if REX.W is 1. In case of
32100 conflict REX.W takes precedence. */
32103 /* pfx holds the summary of prefixes. */
32104 Prefix pfx
= PFX_EMPTY
;
32106 /* Holds the computed opcode-escape indication. */
32107 Escape esc
= ESC_NONE
;
32109 /* Set result defaults. */
32110 dres
.whatNext
= Dis_Continue
;
32112 dres
.jk_StopHere
= Ijk_INVALID
;
32113 dres
.hint
= Dis_HintNone
;
32114 *expect_CAS
= False
;
32116 vassert(guest_RIP_next_assumed
== 0);
32117 vassert(guest_RIP_next_mustcheck
== False
);
32119 t1
= t2
= IRTemp_INVALID
;
32121 DIP("\t0x%llx: ", guest_RIP_bbstart
+delta
);
32123 /* Spot "Special" instructions (see comment at top of file). */
32125 const UChar
* code
= guest_code
+ delta
;
32126 /* Spot the 16-byte preamble:
32127 48C1C703 rolq $3, %rdi
32128 48C1C70D rolq $13, %rdi
32129 48C1C73D rolq $61, %rdi
32130 48C1C733 rolq $51, %rdi
32132 if (code
[ 0] == 0x48 && code
[ 1] == 0xC1 && code
[ 2] == 0xC7
32133 && code
[ 3] == 0x03 &&
32134 code
[ 4] == 0x48 && code
[ 5] == 0xC1 && code
[ 6] == 0xC7
32135 && code
[ 7] == 0x0D &&
32136 code
[ 8] == 0x48 && code
[ 9] == 0xC1 && code
[10] == 0xC7
32137 && code
[11] == 0x3D &&
32138 code
[12] == 0x48 && code
[13] == 0xC1 && code
[14] == 0xC7
32139 && code
[15] == 0x33) {
32140 /* Got a "Special" instruction preamble. Which one is it? */
32141 if (code
[16] == 0x48 && code
[17] == 0x87
32142 && code
[18] == 0xDB /* xchgq %rbx,%rbx */) {
32143 /* %RDX = client_request ( %RAX ) */
32144 DIP("%%rdx = client_request ( %%rax )\n");
32146 jmp_lit(&dres
, Ijk_ClientReq
, guest_RIP_bbstart
+delta
);
32147 vassert(dres
.whatNext
== Dis_StopHere
);
32148 goto decode_success
;
32151 if (code
[16] == 0x48 && code
[17] == 0x87
32152 && code
[18] == 0xC9 /* xchgq %rcx,%rcx */) {
32153 /* %RAX = guest_NRADDR */
32154 DIP("%%rax = guest_NRADDR\n");
32156 putIRegRAX(8, IRExpr_Get( OFFB_NRADDR
, Ity_I64
));
32157 goto decode_success
;
32160 if (code
[16] == 0x48 && code
[17] == 0x87
32161 && code
[18] == 0xD2 /* xchgq %rdx,%rdx */) {
32162 /* call-noredir *%RAX */
32163 DIP("call-noredir *%%rax\n");
32165 t1
= newTemp(Ity_I64
);
32166 assign(t1
, getIRegRAX(8));
32167 t2
= newTemp(Ity_I64
);
32168 assign(t2
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(8)));
32169 putIReg64(R_RSP
, mkexpr(t2
));
32170 storeLE( mkexpr(t2
), mkU64(guest_RIP_bbstart
+delta
));
32171 jmp_treg(&dres
, Ijk_NoRedir
, t1
);
32172 vassert(dres
.whatNext
== Dis_StopHere
);
32173 goto decode_success
;
32176 if (code
[16] == 0x48 && code
[17] == 0x87
32177 && code
[18] == 0xff /* xchgq %rdi,%rdi */) {
32179 DIP("IR injection\n");
32180 vex_inject_ir(irsb
, Iend_LE
);
32182 // Invalidate the current insn. The reason is that the IRop we're
32183 // injecting here can change. In which case the translation has to
32184 // be redone. For ease of handling, we simply invalidate all the
32186 stmt(IRStmt_Put(OFFB_CMSTART
, mkU64(guest_RIP_curr_instr
)));
32187 stmt(IRStmt_Put(OFFB_CMLEN
, mkU64(19)));
32191 stmt( IRStmt_Put( OFFB_RIP
, mkU64(guest_RIP_bbstart
+ delta
) ) );
32192 dres
.whatNext
= Dis_StopHere
;
32193 dres
.jk_StopHere
= Ijk_InvalICache
;
32194 goto decode_success
;
32196 /* We don't know what it is. */
32197 goto decode_failure
;
32202 /* Eat prefixes, summarising the result in pfx and sz, and rejecting
32203 as many invalid combinations as possible. */
32206 if (n_prefixes
> 7) goto decode_failure
;
32207 pre
= getUChar(delta
);
32209 case 0x66: pfx
|= PFX_66
; break;
32210 case 0x67: pfx
|= PFX_ASO
; break;
32211 case 0xF2: pfx
|= PFX_F2
; break;
32212 case 0xF3: pfx
|= PFX_F3
; break;
32213 case 0xF0: pfx
|= PFX_LOCK
; *expect_CAS
= True
; break;
32214 case 0x2E: pfx
|= PFX_CS
; break;
32215 case 0x3E: pfx
|= PFX_DS
; break;
32216 case 0x26: pfx
|= PFX_ES
; break;
32217 case 0x64: pfx
|= PFX_FS
; break;
32218 case 0x65: pfx
|= PFX_GS
; break;
32219 case 0x36: pfx
|= PFX_SS
; break;
32220 case 0x40 ... 0x4F:
32222 if (pre
& (1<<3)) pfx
|= PFX_REXW
;
32223 if (pre
& (1<<2)) pfx
|= PFX_REXR
;
32224 if (pre
& (1<<1)) pfx
|= PFX_REXX
;
32225 if (pre
& (1<<0)) pfx
|= PFX_REXB
;
32228 goto not_a_legacy_prefix
;
32234 not_a_legacy_prefix
:
32235 /* We've used up all the non-VEX prefixes. Parse and validate a
32236 VEX prefix if that's appropriate. */
32237 if (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
) {
32238 /* Used temporarily for holding VEX prefixes. */
32239 UChar vex0
= getUChar(delta
);
32240 if (vex0
== 0xC4) {
32242 UChar vex1
= getUChar(delta
+1);
32243 UChar vex2
= getUChar(delta
+2);
32246 /* Snarf contents of byte 1 */
32247 /* R */ pfx
|= (vex1
& (1<<7)) ? 0 : PFX_REXR
;
32248 /* X */ pfx
|= (vex1
& (1<<6)) ? 0 : PFX_REXX
;
32249 /* B */ pfx
|= (vex1
& (1<<5)) ? 0 : PFX_REXB
;
32251 switch (vex1
& 0x1F) {
32252 case 1: esc
= ESC_0F
; break;
32253 case 2: esc
= ESC_0F38
; break;
32254 case 3: esc
= ESC_0F3A
; break;
32255 /* Any other m-mmmm field will #UD */
32256 default: goto decode_failure
;
32258 /* Snarf contents of byte 2 */
32259 /* W */ pfx
|= (vex2
& (1<<7)) ? PFX_REXW
: 0;
32260 /* ~v3 */ pfx
|= (vex2
& (1<<6)) ? 0 : PFX_VEXnV3
;
32261 /* ~v2 */ pfx
|= (vex2
& (1<<5)) ? 0 : PFX_VEXnV2
;
32262 /* ~v1 */ pfx
|= (vex2
& (1<<4)) ? 0 : PFX_VEXnV1
;
32263 /* ~v0 */ pfx
|= (vex2
& (1<<3)) ? 0 : PFX_VEXnV0
;
32264 /* L */ pfx
|= (vex2
& (1<<2)) ? PFX_VEXL
: 0;
32266 switch (vex2
& 3) {
32268 case 1: pfx
|= PFX_66
; break;
32269 case 2: pfx
|= PFX_F3
; break;
32270 case 3: pfx
|= PFX_F2
; break;
32271 default: vassert(0);
32274 else if (vex0
== 0xC5) {
32276 UChar vex1
= getUChar(delta
+1);
32279 /* Snarf contents of byte 1 */
32280 /* R */ pfx
|= (vex1
& (1<<7)) ? 0 : PFX_REXR
;
32281 /* ~v3 */ pfx
|= (vex1
& (1<<6)) ? 0 : PFX_VEXnV3
;
32282 /* ~v2 */ pfx
|= (vex1
& (1<<5)) ? 0 : PFX_VEXnV2
;
32283 /* ~v1 */ pfx
|= (vex1
& (1<<4)) ? 0 : PFX_VEXnV1
;
32284 /* ~v0 */ pfx
|= (vex1
& (1<<3)) ? 0 : PFX_VEXnV0
;
32285 /* L */ pfx
|= (vex1
& (1<<2)) ? PFX_VEXL
: 0;
32287 switch (vex1
& 3) {
32289 case 1: pfx
|= PFX_66
; break;
32290 case 2: pfx
|= PFX_F3
; break;
32291 case 3: pfx
|= PFX_F2
; break;
32292 default: vassert(0);
32297 /* Can't have both VEX and REX */
32298 if ((pfx
& PFX_VEX
) && (pfx
& PFX_REX
))
32299 goto decode_failure
; /* can't have both */
32302 /* Dump invalid combinations */
32304 if (pfx
& PFX_F2
) n
++;
32305 if (pfx
& PFX_F3
) n
++;
32307 goto decode_failure
; /* can't have both */
32310 if (pfx
& PFX_CS
) n
++;
32311 if (pfx
& PFX_DS
) n
++;
32312 if (pfx
& PFX_ES
) n
++;
32313 if (pfx
& PFX_FS
) n
++;
32314 if (pfx
& PFX_GS
) n
++;
32315 if (pfx
& PFX_SS
) n
++;
32317 goto decode_failure
; /* multiple seg overrides == illegal */
32319 /* We have a %fs prefix. Reject it if there's no evidence in 'vbi'
32320 that we should accept it. */
32321 if ((pfx
& PFX_FS
) && !vbi
->guest_amd64_assume_fs_is_const
)
32322 goto decode_failure
;
32324 /* Ditto for %gs prefixes. */
32325 if ((pfx
& PFX_GS
) && !vbi
->guest_amd64_assume_gs_is_const
)
32326 goto decode_failure
;
32330 if (pfx
& PFX_66
) sz
= 2;
32331 if ((pfx
& PFX_REX
) && (pfx
& PFX_REXW
)) sz
= 8;
32333 /* Now we should be looking at the primary opcode byte or the
32334 leading escapes. Check that any LOCK prefix is actually
32336 if (haveLOCK(pfx
)) {
32337 if (can_be_used_with_LOCK_prefix( &guest_code
[delta
] )) {
32340 *expect_CAS
= False
;
32341 goto decode_failure
;
32345 /* Eat up opcode escape bytes, until we're really looking at the
32346 primary opcode byte. But only if there's no VEX present. */
32347 if (!(pfx
& PFX_VEX
)) {
32348 vassert(esc
== ESC_NONE
);
32349 pre
= getUChar(delta
);
32352 pre
= getUChar(delta
);
32354 case 0x38: esc
= ESC_0F38
; delta
++; break;
32355 case 0x3A: esc
= ESC_0F3A
; delta
++; break;
32356 default: esc
= ESC_0F
; break;
32361 /* So now we're really really looking at the primary opcode
32363 Long delta_at_primary_opcode
= delta
;
32365 if (!(pfx
& PFX_VEX
)) {
32366 /* Handle non-VEX prefixed instructions. "Legacy" (non-VEX) SSE
32367 instructions preserve the upper 128 bits of YMM registers;
32368 iow we can simply ignore the presence of the upper halves of
32369 these registers. */
32372 delta
= dis_ESC_NONE( &dres
, expect_CAS
,
32373 archinfo
, vbi
, pfx
, sz
, delta
);
32376 delta
= dis_ESC_0F ( &dres
, expect_CAS
,
32377 archinfo
, vbi
, pfx
, sz
, delta
);
32380 delta
= dis_ESC_0F38( &dres
,
32381 archinfo
, vbi
, pfx
, sz
, delta
);
32384 delta
= dis_ESC_0F3A( &dres
,
32385 archinfo
, vbi
, pfx
, sz
, delta
);
32391 /* VEX prefixed instruction */
32392 /* Sloppy Intel wording: "An instruction encoded with a VEX.128
32393 prefix that loads a YMM register operand ..." zeroes out bits
32394 128 and above of the register. */
32395 Bool uses_vvvv
= False
;
32398 delta
= dis_ESC_0F__VEX ( &dres
, &uses_vvvv
,
32399 archinfo
, vbi
, pfx
, sz
, delta
);
32402 delta
= dis_ESC_0F38__VEX ( &dres
, &uses_vvvv
,
32403 archinfo
, vbi
, pfx
, sz
, delta
);
32406 delta
= dis_ESC_0F3A__VEX ( &dres
, &uses_vvvv
,
32407 archinfo
, vbi
, pfx
, sz
, delta
);
32410 /* The presence of a VEX prefix, by Intel definition,
32411 always implies at least an 0F escape. */
32412 goto decode_failure
;
32416 /* If the insn doesn't use VEX.vvvv then it must be all ones.
32419 if (getVexNvvvv(pfx
) != 0)
32420 goto decode_failure
;
32424 vassert(delta
- delta_at_primary_opcode
>= 0);
32425 vassert(delta
- delta_at_primary_opcode
< 16/*let's say*/);
32427 /* Use delta == delta_at_primary_opcode to denote decode failure.
32428 This implies that any successful decode must use at least one
32430 if (delta
== delta_at_primary_opcode
)
32431 goto decode_failure
;
32433 goto decode_success
; /* \o/ */
32437 /* All decode failures end up here. */
32439 vex_printf("vex amd64->IR: unhandled instruction bytes: "
32440 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
32441 getUChar(delta_start
+0),
32442 getUChar(delta_start
+1),
32443 getUChar(delta_start
+2),
32444 getUChar(delta_start
+3),
32445 getUChar(delta_start
+4),
32446 getUChar(delta_start
+5),
32447 getUChar(delta_start
+6),
32448 getUChar(delta_start
+7),
32449 getUChar(delta_start
+8),
32450 getUChar(delta_start
+9) );
32451 vex_printf("vex amd64->IR: REX=%d REX.W=%d REX.R=%d REX.X=%d REX.B=%d\n",
32452 haveREX(pfx
) ? 1 : 0, getRexW(pfx
), getRexR(pfx
),
32453 getRexX(pfx
), getRexB(pfx
));
32454 vex_printf("vex amd64->IR: VEX=%d VEX.L=%d VEX.nVVVV=0x%x ESC=%s\n",
32455 haveVEX(pfx
) ? 1 : 0, getVexL(pfx
),
32457 esc
==ESC_NONE
? "NONE" :
32458 esc
==ESC_0F
? "0F" :
32459 esc
==ESC_0F38
? "0F38" :
32460 esc
==ESC_0F3A
? "0F3A" : "???");
32461 vex_printf("vex amd64->IR: PFX.66=%d PFX.F2=%d PFX.F3=%d\n",
32462 have66(pfx
) ? 1 : 0, haveF2(pfx
) ? 1 : 0,
32463 haveF3(pfx
) ? 1 : 0);
32466 /* Tell the dispatcher that this insn cannot be decoded, and so has
32467 not been executed, and (is currently) the next to be executed.
32468 RIP should be up-to-date since it made so at the start of each
32469 insn, but nevertheless be paranoid and update it again right
32471 stmt( IRStmt_Put( OFFB_RIP
, mkU64(guest_RIP_curr_instr
) ) );
32472 jmp_lit(&dres
, Ijk_NoDecode
, guest_RIP_curr_instr
);
32473 vassert(dres
.whatNext
== Dis_StopHere
);
32475 /* We also need to say that a CAS is not expected now, regardless
32476 of what it might have been set to at the start of the function,
32477 since the IR that we've emitted just above (to synthesis a
32478 SIGILL) does not involve any CAS, and presumably no other IR has
32479 been emitted for this (non-decoded) insn. */
32480 *expect_CAS
= False
;
32485 /* All decode successes end up here. */
32486 switch (dres
.whatNext
) {
32488 stmt( IRStmt_Put( OFFB_RIP
, mkU64(guest_RIP_bbstart
+ delta
) ) );
32497 dres
.len
= toUInt(delta
- delta_start
);
32505 /*------------------------------------------------------------*/
32506 /*--- Top-level fn ---*/
32507 /*------------------------------------------------------------*/
32509 /* Disassemble a single instruction into IR. The instruction
32510 is located in host memory at &guest_code[delta]. */
32512 DisResult
disInstr_AMD64 ( IRSB
* irsb_IN
,
32513 const UChar
* guest_code_IN
,
32516 VexArch guest_arch
,
32517 const VexArchInfo
* archinfo
,
32518 const VexAbiInfo
* abiinfo
,
32519 VexEndness host_endness_IN
,
32520 Bool sigill_diag_IN
)
32523 Bool expect_CAS
, has_CAS
;
32526 /* Set globals (see top of this file) */
32527 vassert(guest_arch
== VexArchAMD64
);
32528 guest_code
= guest_code_IN
;
32530 host_endness
= host_endness_IN
;
32531 guest_RIP_curr_instr
= guest_IP
;
32532 guest_RIP_bbstart
= guest_IP
- delta
;
32534 /* We'll consult these after doing disInstr_AMD64_WRK. */
32535 guest_RIP_next_assumed
= 0;
32536 guest_RIP_next_mustcheck
= False
;
32538 x1
= irsb_IN
->stmts_used
;
32539 expect_CAS
= False
;
32540 dres
= disInstr_AMD64_WRK ( &expect_CAS
,
32541 delta
, archinfo
, abiinfo
, sigill_diag_IN
);
32542 x2
= irsb_IN
->stmts_used
;
32545 /* If disInstr_AMD64_WRK tried to figure out the next rip, check it
32546 got it right. Failure of this assertion is serious and denotes
32547 a bug in disInstr. */
32548 if (guest_RIP_next_mustcheck
32549 && guest_RIP_next_assumed
!= guest_RIP_curr_instr
+ dres
.len
) {
32551 vex_printf("assumed next %%rip = 0x%llx\n",
32552 guest_RIP_next_assumed
);
32553 vex_printf(" actual next %%rip = 0x%llx\n",
32554 guest_RIP_curr_instr
+ dres
.len
);
32555 vpanic("disInstr_AMD64: disInstr miscalculated next %rip");
32558 /* See comment at the top of disInstr_AMD64_WRK for meaning of
32559 expect_CAS. Here, we (sanity-)check for the presence/absence of
32560 IRCAS as directed by the returned expect_CAS value. */
32562 for (i
= x1
; i
< x2
; i
++) {
32563 if (irsb_IN
->stmts
[i
]->tag
== Ist_CAS
)
32567 if (expect_CAS
!= has_CAS
) {
32568 /* inconsistency detected. re-disassemble the instruction so as
32569 to generate a useful error message; then assert. */
32570 vex_traceflags
|= VEX_TRACE_FE
;
32571 dres
= disInstr_AMD64_WRK ( &expect_CAS
,
32572 delta
, archinfo
, abiinfo
, sigill_diag_IN
);
32573 for (i
= x1
; i
< x2
; i
++) {
32574 vex_printf("\t\t");
32575 ppIRStmt(irsb_IN
->stmts
[i
]);
32578 /* Failure of this assertion is serious and denotes a bug in
32580 vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling");
32587 /*------------------------------------------------------------*/
32588 /*--- Unused stuff ---*/
32589 /*------------------------------------------------------------*/
32591 // A potentially more Memcheck-friendly version of gen_LZCNT, if
32592 // this should ever be needed.
32594 //static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
32596 // /* Scheme is simple: propagate the most significant 1-bit into all
32597 // lower positions in the word. This gives a word of the form
32598 // 0---01---1. Now invert it, giving a word of the form
32599 // 1---10---0, then do a population-count idiom (to count the 1s,
32600 // which is the number of leading zeroes, or the word size if the
32601 // original word was 0.
32605 // for (i = 0; i < 7; i++) {
32606 // t[i] = newTemp(ty);
32608 // if (ty == Ity_I64) {
32609 // assign(t[0], binop(Iop_Or64, mkexpr(src),
32610 // binop(Iop_Shr64, mkexpr(src), mkU8(1))));
32611 // assign(t[1], binop(Iop_Or64, mkexpr(t[0]),
32612 // binop(Iop_Shr64, mkexpr(t[0]), mkU8(2))));
32613 // assign(t[2], binop(Iop_Or64, mkexpr(t[1]),
32614 // binop(Iop_Shr64, mkexpr(t[1]), mkU8(4))));
32615 // assign(t[3], binop(Iop_Or64, mkexpr(t[2]),
32616 // binop(Iop_Shr64, mkexpr(t[2]), mkU8(8))));
32617 // assign(t[4], binop(Iop_Or64, mkexpr(t[3]),
32618 // binop(Iop_Shr64, mkexpr(t[3]), mkU8(16))));
32619 // assign(t[5], binop(Iop_Or64, mkexpr(t[4]),
32620 // binop(Iop_Shr64, mkexpr(t[4]), mkU8(32))));
32621 // assign(t[6], unop(Iop_Not64, mkexpr(t[5])));
32622 // return gen_POPCOUNT(ty, t[6]);
32624 // if (ty == Ity_I32) {
32625 // assign(t[0], binop(Iop_Or32, mkexpr(src),
32626 // binop(Iop_Shr32, mkexpr(src), mkU8(1))));
32627 // assign(t[1], binop(Iop_Or32, mkexpr(t[0]),
32628 // binop(Iop_Shr32, mkexpr(t[0]), mkU8(2))));
32629 // assign(t[2], binop(Iop_Or32, mkexpr(t[1]),
32630 // binop(Iop_Shr32, mkexpr(t[1]), mkU8(4))));
32631 // assign(t[3], binop(Iop_Or32, mkexpr(t[2]),
32632 // binop(Iop_Shr32, mkexpr(t[2]), mkU8(8))));
32633 // assign(t[4], binop(Iop_Or32, mkexpr(t[3]),
32634 // binop(Iop_Shr32, mkexpr(t[3]), mkU8(16))));
32635 // assign(t[5], unop(Iop_Not32, mkexpr(t[4])));
32636 // return gen_POPCOUNT(ty, t[5]);
32638 // if (ty == Ity_I16) {
32639 // assign(t[0], binop(Iop_Or16, mkexpr(src),
32640 // binop(Iop_Shr16, mkexpr(src), mkU8(1))));
32641 // assign(t[1], binop(Iop_Or16, mkexpr(t[0]),
32642 // binop(Iop_Shr16, mkexpr(t[0]), mkU8(2))));
32643 // assign(t[2], binop(Iop_Or16, mkexpr(t[1]),
32644 // binop(Iop_Shr16, mkexpr(t[1]), mkU8(4))));
32645 // assign(t[3], binop(Iop_Or16, mkexpr(t[2]),
32646 // binop(Iop_Shr16, mkexpr(t[2]), mkU8(8))));
32647 // assign(t[4], unop(Iop_Not16, mkexpr(t[3])));
32648 // return gen_POPCOUNT(ty, t[4]);
32654 /*--------------------------------------------------------------------*/
32655 /*--- end guest_amd64_toIR.c ---*/
32656 /*--------------------------------------------------------------------*/