Bug 400538 - vex amd64->IR: unhandled instruction bytes: 0x48 0xCF (IRETQ).
[valgrind.git] / VEX / priv / guest_amd64_toIR.c
blob4bba03c89ca2a44c7fc69884e77083dee0e26f3e
2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_amd64_toIR.c ---*/
4 /*--------------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 /* Translates AMD64 code to IR. */
36 /* TODO:
38 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
39 to ensure a 64-bit value is being written.
41 x87 FP Limitations:
43 * all arithmetic done at 64 bits
45 * no FP exceptions, except for handling stack over/underflow
47 * FP rounding mode observed only for float->int conversions and
48 int->float conversions which could lose accuracy, and for
49 float-to-float rounding. For all other operations,
50 round-to-nearest is used, regardless.
52 * some of the FCOM cases could do with testing -- not convinced
53 that the args are the right way round.
55 * FSAVE does not re-initialise the FPU; it should do
57 * FINIT not only initialises the FPU environment, it also zeroes
58 all the FP registers. It should leave the registers unchanged.
60 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
61 per Intel docs this bit has no meaning anyway. Since PUSHF is the
62 only way to observe eflags[1], a proper fix would be to make that
63 bit be set by PUSHF.
65 This module uses global variables and so is not MT-safe (if that
66 should ever become relevant).
69 /* Notes re address size overrides (0x67).
71 According to the AMD documentation (24594 Rev 3.09, Sept 2003,
72 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose
73 and System Instructions"), Section 1.2.3 ("Address-Size Override
74 Prefix"):
76 0x67 applies to all explicit memory references, causing the top
77 32 bits of the effective address to become zero.
79 0x67 has no effect on stack references (push/pop); these always
80 use a 64-bit address.
82 0x67 changes the interpretation of instructions which implicitly
83 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used
84 instead. These are:
86 cmp{s,sb,sw,sd,sq}
87 in{s,sb,sw,sd}
88 jcxz, jecxz, jrcxz
89 lod{s,sb,sw,sd,sq}
90 loop{,e,bz,be,z}
91 mov{s,sb,sw,sd,sq}
92 out{s,sb,sw,sd}
93 rep{,e,ne,nz}
94 sca{s,sb,sw,sd,sq}
95 sto{s,sb,sw,sd,sq}
96 xlat{,b} */
98 /* "Special" instructions.
100 This instruction decoder can decode three special instructions
101 which mean nothing natively (are no-ops as far as regs/mem are
102 concerned) but have meaning for supporting Valgrind. A special
103 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D
104 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq
105 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi).
106 Following that, one of the following 3 are allowed (standard
107 interpretation in parentheses):
109 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX )
110 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR
111 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX
112 4887F6 (xchgq %rdi,%rdi) IR injection
114 Any other bytes following the 16-byte preamble are illegal and
115 constitute a failure in instruction decoding. This all assumes
116 that the preamble will never occur except in specific code
117 fragments designed for Valgrind to catch.
119 No prefixes may precede a "Special" instruction.
122 /* casLE (implementation of lock-prefixed insns) and rep-prefixed
123 insns: the side-exit back to the start of the insn is done with
124 Ijk_Boring. This is quite wrong, it should be done with
125 Ijk_NoRedir, since otherwise the side exit, which is intended to
126 restart the instruction for whatever reason, could go somewhere
127 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
128 no-redir jumps performance critical, at least for rep-prefixed
129 instructions, since all iterations thereof would involve such a
130 jump. It's not such a big deal with casLE since the side exit is
131 only taken if the CAS fails, that is, the location is contended,
132 which is relatively unlikely.
134 Note also, the test for CAS success vs failure is done using
135 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
136 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
137 shouldn't definedness-check these comparisons. See
138 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
139 background/rationale.
142 /* LOCK prefixed instructions. These are translated using IR-level
143 CAS statements (IRCAS) and are believed to preserve atomicity, even
144 from the point of view of some other process racing against a
145 simulated one (presumably they communicate via a shared memory
146 segment).
148 Handlers which are aware of LOCK prefixes are:
149 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
150 dis_cmpxchg_G_E (cmpxchg)
151 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
152 dis_Grp3 (not, neg)
153 dis_Grp4 (inc, dec)
154 dis_Grp5 (inc, dec)
155 dis_Grp8_Imm (bts, btc, btr)
156 dis_bt_G_E (bts, btc, btr)
157 dis_xadd_G_E (xadd)
161 #include "libvex_basictypes.h"
162 #include "libvex_ir.h"
163 #include "libvex.h"
164 #include "libvex_guest_amd64.h"
166 #include "main_util.h"
167 #include "main_globals.h"
168 #include "guest_generic_bb_to_IR.h"
169 #include "guest_generic_x87.h"
170 #include "guest_amd64_defs.h"
173 /*------------------------------------------------------------*/
174 /*--- Globals ---*/
175 /*------------------------------------------------------------*/
177 /* These are set at the start of the translation of an insn, right
178 down in disInstr_AMD64, so that we don't have to pass them around
179 endlessly. They are all constant during the translation of any
180 given insn. */
182 /* These are set at the start of the translation of a BB, so
183 that we don't have to pass them around endlessly. */
185 /* We need to know this to do sub-register accesses correctly. */
186 static VexEndness host_endness;
188 /* Pointer to the guest code area (points to start of BB, not to the
189 insn being processed). */
190 static const UChar* guest_code;
192 /* The guest address corresponding to guest_code[0]. */
193 static Addr64 guest_RIP_bbstart;
195 /* The guest address for the instruction currently being
196 translated. */
197 static Addr64 guest_RIP_curr_instr;
199 /* The IRSB* into which we're generating code. */
200 static IRSB* irsb;
202 /* For ensuring that %rip-relative addressing is done right. A read
203 of %rip generates the address of the next instruction. It may be
204 that we don't conveniently know that inside disAMode(). For sanity
205 checking, if the next insn %rip is needed, we make a guess at what
206 it is, record that guess here, and set the accompanying Bool to
207 indicate that -- after this insn's decode is finished -- that guess
208 needs to be checked. */
210 /* At the start of each insn decode, is set to (0, False).
211 After the decode, if _mustcheck is now True, _assumed is
212 checked. */
214 static Addr64 guest_RIP_next_assumed;
215 static Bool guest_RIP_next_mustcheck;
218 /*------------------------------------------------------------*/
219 /*--- Helpers for constructing IR. ---*/
220 /*------------------------------------------------------------*/
222 /* Generate a new temporary of the given type. */
223 static IRTemp newTemp ( IRType ty )
225 vassert(isPlausibleIRType(ty));
226 return newIRTemp( irsb->tyenv, ty );
229 /* Add a statement to the list held by "irsb". */
230 static void stmt ( IRStmt* st )
232 addStmtToIRSB( irsb, st );
235 /* Generate a statement "dst := e". */
236 static void assign ( IRTemp dst, IRExpr* e )
238 stmt( IRStmt_WrTmp(dst, e) );
241 static IRExpr* unop ( IROp op, IRExpr* a )
243 return IRExpr_Unop(op, a);
246 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
248 return IRExpr_Binop(op, a1, a2);
251 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
253 return IRExpr_Triop(op, a1, a2, a3);
256 static IRExpr* mkexpr ( IRTemp tmp )
258 return IRExpr_RdTmp(tmp);
261 static IRExpr* mkU8 ( ULong i )
263 vassert(i < 256);
264 return IRExpr_Const(IRConst_U8( (UChar)i ));
267 static IRExpr* mkU16 ( ULong i )
269 vassert(i < 0x10000ULL);
270 return IRExpr_Const(IRConst_U16( (UShort)i ));
273 static IRExpr* mkU32 ( ULong i )
275 vassert(i < 0x100000000ULL);
276 return IRExpr_Const(IRConst_U32( (UInt)i ));
279 static IRExpr* mkU64 ( ULong i )
281 return IRExpr_Const(IRConst_U64(i));
284 static IRExpr* mkU ( IRType ty, ULong i )
286 switch (ty) {
287 case Ity_I8: return mkU8(i);
288 case Ity_I16: return mkU16(i);
289 case Ity_I32: return mkU32(i);
290 case Ity_I64: return mkU64(i);
291 default: vpanic("mkU(amd64)");
295 static void storeLE ( IRExpr* addr, IRExpr* data )
297 stmt( IRStmt_Store(Iend_LE, addr, data) );
300 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
302 return IRExpr_Load(Iend_LE, ty, addr);
305 static IROp mkSizedOp ( IRType ty, IROp op8 )
307 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8
308 || op8 == Iop_Mul8
309 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8
310 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8
311 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8
312 || op8 == Iop_CasCmpNE8
313 || op8 == Iop_Not8 );
314 switch (ty) {
315 case Ity_I8: return 0 +op8;
316 case Ity_I16: return 1 +op8;
317 case Ity_I32: return 2 +op8;
318 case Ity_I64: return 3 +op8;
319 default: vpanic("mkSizedOp(amd64)");
323 static
324 IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src )
326 if (szSmall == 1 && szBig == 4) {
327 return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src);
329 if (szSmall == 1 && szBig == 2) {
330 return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src);
332 if (szSmall == 2 && szBig == 4) {
333 return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src);
335 if (szSmall == 1 && szBig == 8 && !signd) {
336 return unop(Iop_8Uto64, src);
338 if (szSmall == 1 && szBig == 8 && signd) {
339 return unop(Iop_8Sto64, src);
341 if (szSmall == 2 && szBig == 8 && !signd) {
342 return unop(Iop_16Uto64, src);
344 if (szSmall == 2 && szBig == 8 && signd) {
345 return unop(Iop_16Sto64, src);
347 vpanic("doScalarWidening(amd64)");
350 static
351 void putGuarded ( Int gstOffB, IRExpr* guard, IRExpr* value )
353 IRType ty = typeOfIRExpr(irsb->tyenv, value);
354 stmt( IRStmt_Put(gstOffB,
355 IRExpr_ITE(guard, value, IRExpr_Get(gstOffB, ty))) );
359 /*------------------------------------------------------------*/
360 /*--- Debugging output ---*/
361 /*------------------------------------------------------------*/
363 /* Bomb out if we can't handle something. */
364 __attribute__ ((noreturn))
365 static void unimplemented ( const HChar* str )
367 vex_printf("amd64toIR: unimplemented feature\n");
368 vpanic(str);
371 #define DIP(format, args...) \
372 if (vex_traceflags & VEX_TRACE_FE) \
373 vex_printf(format, ## args)
375 #define DIS(buf, format, args...) \
376 if (vex_traceflags & VEX_TRACE_FE) \
377 vex_sprintf(buf, format, ## args)
380 /*------------------------------------------------------------*/
381 /*--- Offsets of various parts of the amd64 guest state. ---*/
382 /*------------------------------------------------------------*/
384 #define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX)
385 #define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX)
386 #define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX)
387 #define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX)
388 #define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP)
389 #define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP)
390 #define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI)
391 #define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI)
392 #define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8)
393 #define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9)
394 #define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10)
395 #define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11)
396 #define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12)
397 #define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13)
398 #define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14)
399 #define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15)
401 #define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP)
403 #define OFFB_FS_CONST offsetof(VexGuestAMD64State,guest_FS_CONST)
404 #define OFFB_GS_CONST offsetof(VexGuestAMD64State,guest_GS_CONST)
406 #define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP)
407 #define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1)
408 #define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2)
409 #define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP)
411 #define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0])
412 #define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0])
413 #define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG)
414 #define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG)
415 #define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG)
416 #define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP)
417 #define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210)
418 #define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND)
420 #define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND)
421 #define OFFB_YMM0 offsetof(VexGuestAMD64State,guest_YMM0)
422 #define OFFB_YMM1 offsetof(VexGuestAMD64State,guest_YMM1)
423 #define OFFB_YMM2 offsetof(VexGuestAMD64State,guest_YMM2)
424 #define OFFB_YMM3 offsetof(VexGuestAMD64State,guest_YMM3)
425 #define OFFB_YMM4 offsetof(VexGuestAMD64State,guest_YMM4)
426 #define OFFB_YMM5 offsetof(VexGuestAMD64State,guest_YMM5)
427 #define OFFB_YMM6 offsetof(VexGuestAMD64State,guest_YMM6)
428 #define OFFB_YMM7 offsetof(VexGuestAMD64State,guest_YMM7)
429 #define OFFB_YMM8 offsetof(VexGuestAMD64State,guest_YMM8)
430 #define OFFB_YMM9 offsetof(VexGuestAMD64State,guest_YMM9)
431 #define OFFB_YMM10 offsetof(VexGuestAMD64State,guest_YMM10)
432 #define OFFB_YMM11 offsetof(VexGuestAMD64State,guest_YMM11)
433 #define OFFB_YMM12 offsetof(VexGuestAMD64State,guest_YMM12)
434 #define OFFB_YMM13 offsetof(VexGuestAMD64State,guest_YMM13)
435 #define OFFB_YMM14 offsetof(VexGuestAMD64State,guest_YMM14)
436 #define OFFB_YMM15 offsetof(VexGuestAMD64State,guest_YMM15)
437 #define OFFB_YMM16 offsetof(VexGuestAMD64State,guest_YMM16)
439 #define OFFB_EMNOTE offsetof(VexGuestAMD64State,guest_EMNOTE)
440 #define OFFB_CMSTART offsetof(VexGuestAMD64State,guest_CMSTART)
441 #define OFFB_CMLEN offsetof(VexGuestAMD64State,guest_CMLEN)
443 #define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR)
446 /*------------------------------------------------------------*/
447 /*--- Helper bits and pieces for deconstructing the ---*/
448 /*--- amd64 insn stream. ---*/
449 /*------------------------------------------------------------*/
451 /* This is the AMD64 register encoding -- integer regs. */
452 #define R_RAX 0
453 #define R_RCX 1
454 #define R_RDX 2
455 #define R_RBX 3
456 #define R_RSP 4
457 #define R_RBP 5
458 #define R_RSI 6
459 #define R_RDI 7
460 #define R_R8 8
461 #define R_R9 9
462 #define R_R10 10
463 #define R_R11 11
464 #define R_R12 12
465 #define R_R13 13
466 #define R_R14 14
467 #define R_R15 15
469 /* This is the Intel register encoding -- segment regs. */
470 #define R_ES 0
471 #define R_CS 1
472 #define R_SS 2
473 #define R_DS 3
474 #define R_FS 4
475 #define R_GS 5
478 /* Various simple conversions */
480 static ULong extend_s_8to64 ( UChar x )
482 return (ULong)((Long)(((ULong)x) << 56) >> 56);
485 static ULong extend_s_16to64 ( UShort x )
487 return (ULong)((Long)(((ULong)x) << 48) >> 48);
490 static ULong extend_s_32to64 ( UInt x )
492 return (ULong)((Long)(((ULong)x) << 32) >> 32);
495 /* Figure out whether the mod and rm parts of a modRM byte refer to a
496 register or memory. If so, the byte will have the form 11XXXYYY,
497 where YYY is the register number. */
498 inline
499 static Bool epartIsReg ( UChar mod_reg_rm )
501 return toBool(0xC0 == (mod_reg_rm & 0xC0));
504 /* Extract the 'g' field from a modRM byte. This only produces 3
505 bits, which is not a complete register number. You should avoid
506 this function if at all possible. */
507 inline
508 static Int gregLO3ofRM ( UChar mod_reg_rm )
510 return (Int)( (mod_reg_rm >> 3) & 7 );
513 /* Ditto the 'e' field of a modRM byte. */
514 inline
515 static Int eregLO3ofRM ( UChar mod_reg_rm )
517 return (Int)(mod_reg_rm & 0x7);
520 /* Get a 8/16/32-bit unsigned value out of the insn stream. */
522 static inline UChar getUChar ( Long delta )
524 UChar v = guest_code[delta+0];
525 return v;
528 static UInt getUDisp16 ( Long delta )
530 UInt v = guest_code[delta+1]; v <<= 8;
531 v |= guest_code[delta+0];
532 return v & 0xFFFF;
535 //.. static UInt getUDisp ( Int size, Long delta )
536 //.. {
537 //.. switch (size) {
538 //.. case 4: return getUDisp32(delta);
539 //.. case 2: return getUDisp16(delta);
540 //.. case 1: return getUChar(delta);
541 //.. default: vpanic("getUDisp(x86)");
542 //.. }
543 //.. return 0; /*notreached*/
544 //.. }
547 /* Get a byte value out of the insn stream and sign-extend to 64
548 bits. */
549 static Long getSDisp8 ( Long delta )
551 return extend_s_8to64( guest_code[delta] );
554 /* Get a 16-bit value out of the insn stream and sign-extend to 64
555 bits. */
556 static Long getSDisp16 ( Long delta )
558 UInt v = guest_code[delta+1]; v <<= 8;
559 v |= guest_code[delta+0];
560 return extend_s_16to64( (UShort)v );
563 /* Get a 32-bit value out of the insn stream and sign-extend to 64
564 bits. */
565 static Long getSDisp32 ( Long delta )
567 UInt v = guest_code[delta+3]; v <<= 8;
568 v |= guest_code[delta+2]; v <<= 8;
569 v |= guest_code[delta+1]; v <<= 8;
570 v |= guest_code[delta+0];
571 return extend_s_32to64( v );
574 /* Get a 64-bit value out of the insn stream. */
575 static Long getDisp64 ( Long delta )
577 ULong v = 0;
578 v |= guest_code[delta+7]; v <<= 8;
579 v |= guest_code[delta+6]; v <<= 8;
580 v |= guest_code[delta+5]; v <<= 8;
581 v |= guest_code[delta+4]; v <<= 8;
582 v |= guest_code[delta+3]; v <<= 8;
583 v |= guest_code[delta+2]; v <<= 8;
584 v |= guest_code[delta+1]; v <<= 8;
585 v |= guest_code[delta+0];
586 return v;
589 /* Note: because AMD64 doesn't allow 64-bit literals, it is an error
590 if this is called with size==8. Should not happen. */
591 static Long getSDisp ( Int size, Long delta )
593 switch (size) {
594 case 4: return getSDisp32(delta);
595 case 2: return getSDisp16(delta);
596 case 1: return getSDisp8(delta);
597 default: vpanic("getSDisp(amd64)");
601 static ULong mkSizeMask ( Int sz )
603 switch (sz) {
604 case 1: return 0x00000000000000FFULL;
605 case 2: return 0x000000000000FFFFULL;
606 case 4: return 0x00000000FFFFFFFFULL;
607 case 8: return 0xFFFFFFFFFFFFFFFFULL;
608 default: vpanic("mkSzMask(amd64)");
612 static Int imin ( Int a, Int b )
614 return (a < b) ? a : b;
617 static IRType szToITy ( Int n )
619 switch (n) {
620 case 1: return Ity_I8;
621 case 2: return Ity_I16;
622 case 4: return Ity_I32;
623 case 8: return Ity_I64;
624 default: vex_printf("\nszToITy(%d)\n", n);
625 vpanic("szToITy(amd64)");
630 /*------------------------------------------------------------*/
631 /*--- For dealing with prefixes. ---*/
632 /*------------------------------------------------------------*/
634 /* The idea is to pass around an int holding a bitmask summarising
635 info from the prefixes seen on the current instruction, including
636 info from the REX byte. This info is used in various places, but
637 most especially when making sense of register fields in
638 instructions.
640 The top 8 bits of the prefix are 0x55, just as a hacky way to
641 ensure it really is a valid prefix.
643 Things you can safely assume about a well-formed prefix:
644 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set.
645 * if REX is not present then REXW,REXR,REXX,REXB will read
646 as zero.
647 * F2 and F3 will not both be 1.
650 typedef UInt Prefix;
652 #define PFX_ASO (1<<0) /* address-size override present (0x67) */
653 #define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */
654 #define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */
655 #define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */
656 #define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */
657 #define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */
658 #define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */
659 #define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */
660 #define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */
661 #define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */
662 #define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */
663 #define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */
664 #define PFX_ES (1<<12) /* ES segment prefix present (0x26) */
665 #define PFX_FS (1<<13) /* FS segment prefix present (0x64) */
666 #define PFX_GS (1<<14) /* GS segment prefix present (0x65) */
667 #define PFX_SS (1<<15) /* SS segment prefix present (0x36) */
668 #define PFX_VEX (1<<16) /* VEX prefix present (0xC4 or 0xC5) */
669 #define PFX_VEXL (1<<17) /* VEX L bit, if VEX present, else 0 */
670 /* The extra register field VEX.vvvv is encoded (after not-ing it) as
671 PFX_VEXnV3 .. PFX_VEXnV0, so these must occupy adjacent bit
672 positions. */
673 #define PFX_VEXnV0 (1<<18) /* ~VEX vvvv[0], if VEX present, else 0 */
674 #define PFX_VEXnV1 (1<<19) /* ~VEX vvvv[1], if VEX present, else 0 */
675 #define PFX_VEXnV2 (1<<20) /* ~VEX vvvv[2], if VEX present, else 0 */
676 #define PFX_VEXnV3 (1<<21) /* ~VEX vvvv[3], if VEX present, else 0 */
679 #define PFX_EMPTY 0x55000000
681 static Bool IS_VALID_PFX ( Prefix pfx ) {
682 return toBool((pfx & 0xFF000000) == PFX_EMPTY);
685 static Bool haveREX ( Prefix pfx ) {
686 return toBool(pfx & PFX_REX);
689 static Int getRexW ( Prefix pfx ) {
690 return (pfx & PFX_REXW) ? 1 : 0;
692 static Int getRexR ( Prefix pfx ) {
693 return (pfx & PFX_REXR) ? 1 : 0;
695 static Int getRexX ( Prefix pfx ) {
696 return (pfx & PFX_REXX) ? 1 : 0;
698 static Int getRexB ( Prefix pfx ) {
699 return (pfx & PFX_REXB) ? 1 : 0;
702 /* Check a prefix doesn't have F2 or F3 set in it, since usually that
703 completely changes what instruction it really is. */
704 static Bool haveF2orF3 ( Prefix pfx ) {
705 return toBool((pfx & (PFX_F2|PFX_F3)) > 0);
707 static Bool haveF2andF3 ( Prefix pfx ) {
708 return toBool((pfx & (PFX_F2|PFX_F3)) == (PFX_F2|PFX_F3));
710 static Bool haveF2 ( Prefix pfx ) {
711 return toBool((pfx & PFX_F2) > 0);
713 static Bool haveF3 ( Prefix pfx ) {
714 return toBool((pfx & PFX_F3) > 0);
717 static Bool have66 ( Prefix pfx ) {
718 return toBool((pfx & PFX_66) > 0);
720 static Bool haveASO ( Prefix pfx ) {
721 return toBool((pfx & PFX_ASO) > 0);
723 static Bool haveLOCK ( Prefix pfx ) {
724 return toBool((pfx & PFX_LOCK) > 0);
727 /* Return True iff pfx has 66 set and F2 and F3 clear */
728 static Bool have66noF2noF3 ( Prefix pfx )
730 return
731 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66);
734 /* Return True iff pfx has F2 set and 66 and F3 clear */
735 static Bool haveF2no66noF3 ( Prefix pfx )
737 return
738 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2);
741 /* Return True iff pfx has F3 set and 66 and F2 clear */
742 static Bool haveF3no66noF2 ( Prefix pfx )
744 return
745 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3);
748 /* Return True iff pfx has F3 set and F2 clear */
749 static Bool haveF3noF2 ( Prefix pfx )
751 return
752 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F3);
755 /* Return True iff pfx has F2 set and F3 clear */
756 static Bool haveF2noF3 ( Prefix pfx )
758 return
759 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F2);
762 /* Return True iff pfx has F2 and F3 clear */
763 static Bool haveNoF2noF3 ( Prefix pfx )
765 return
766 toBool((pfx & (PFX_F2|PFX_F3)) == 0);
769 /* Return True iff pfx has 66, F2 and F3 clear */
770 static Bool haveNo66noF2noF3 ( Prefix pfx )
772 return
773 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0);
776 /* Return True iff pfx has any of 66, F2 and F3 set */
777 static Bool have66orF2orF3 ( Prefix pfx )
779 return toBool( ! haveNo66noF2noF3(pfx) );
782 /* Return True iff pfx has 66 or F3 set */
783 static Bool have66orF3 ( Prefix pfx )
785 return toBool((pfx & (PFX_66|PFX_F3)) > 0);
788 /* Clear all the segment-override bits in a prefix. */
789 static Prefix clearSegBits ( Prefix p )
791 return
792 p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS);
795 /* Get the (inverted, hence back to "normal") VEX.vvvv field. */
796 static UInt getVexNvvvv ( Prefix pfx ) {
797 UInt r = (UInt)pfx;
798 r /= (UInt)PFX_VEXnV0; /* pray this turns into a shift */
799 return r & 0xF;
802 static Bool haveVEX ( Prefix pfx ) {
803 return toBool(pfx & PFX_VEX);
806 static Int getVexL ( Prefix pfx ) {
807 return (pfx & PFX_VEXL) ? 1 : 0;
811 /*------------------------------------------------------------*/
812 /*--- For dealing with escapes ---*/
813 /*------------------------------------------------------------*/
816 /* Escapes come after the prefixes, but before the primary opcode
817 byte. They escape the primary opcode byte into a bigger space.
818 The 0xF0000000 isn't significant, except so as to make it not
819 overlap valid Prefix values, for sanity checking.
822 typedef
823 enum {
824 ESC_NONE=0xF0000000, // none
825 ESC_0F, // 0F
826 ESC_0F38, // 0F 38
827 ESC_0F3A // 0F 3A
829 Escape;
832 /*------------------------------------------------------------*/
833 /*--- For dealing with integer registers ---*/
834 /*------------------------------------------------------------*/
836 /* This is somewhat complex. The rules are:
838 For 64, 32 and 16 bit register references, the e or g fields in the
839 modrm bytes supply the low 3 bits of the register number. The
840 fourth (most-significant) bit of the register number is supplied by
841 the REX byte, if it is present; else that bit is taken to be zero.
843 The REX.R bit supplies the high bit corresponding to the g register
844 field, and the REX.B bit supplies the high bit corresponding to the
845 e register field (when the mod part of modrm indicates that modrm's
846 e component refers to a register and not to memory).
848 The REX.X bit supplies a high register bit for certain registers
849 in SIB address modes, and is generally rarely used.
851 For 8 bit register references, the presence of the REX byte itself
852 has significance. If there is no REX present, then the 3-bit
853 number extracted from the modrm e or g field is treated as an index
854 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the
855 old x86 encoding scheme.
857 But if there is a REX present, the register reference is
858 interpreted in the same way as for 64/32/16-bit references: a high
859 bit is extracted from REX, giving a 4-bit number, and the denoted
860 register is the lowest 8 bits of the 16 integer registers denoted
861 by the number. In particular, values 3 through 7 of this sequence
862 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of
863 %rsp %rbp %rsi %rdi.
865 The REX.W bit has no bearing at all on register numbers. Instead
866 its presence indicates that the operand size is to be overridden
867 from its default value (32 bits) to 64 bits instead. This is in
868 the same fashion that an 0x66 prefix indicates the operand size is
869 to be overridden from 32 bits down to 16 bits. When both REX.W and
870 0x66 are present there is a conflict, and REX.W takes precedence.
872 Rather than try to handle this complexity using a single huge
873 function, several smaller ones are provided. The aim is to make it
874 as difficult as possible to screw up register decoding in a subtle
875 and hard-to-track-down way.
877 Because these routines fish around in the host's memory (that is,
878 in the guest state area) for sub-parts of guest registers, their
879 correctness depends on the host's endianness. So far these
880 routines only work for little-endian hosts. Those for which
881 endianness is important have assertions to ensure sanity.
885 /* About the simplest question you can ask: where do the 64-bit
886 integer registers live (in the guest state) ? */
888 static Int integerGuestReg64Offset ( UInt reg )
890 switch (reg) {
891 case R_RAX: return OFFB_RAX;
892 case R_RCX: return OFFB_RCX;
893 case R_RDX: return OFFB_RDX;
894 case R_RBX: return OFFB_RBX;
895 case R_RSP: return OFFB_RSP;
896 case R_RBP: return OFFB_RBP;
897 case R_RSI: return OFFB_RSI;
898 case R_RDI: return OFFB_RDI;
899 case R_R8: return OFFB_R8;
900 case R_R9: return OFFB_R9;
901 case R_R10: return OFFB_R10;
902 case R_R11: return OFFB_R11;
903 case R_R12: return OFFB_R12;
904 case R_R13: return OFFB_R13;
905 case R_R14: return OFFB_R14;
906 case R_R15: return OFFB_R15;
907 default: vpanic("integerGuestReg64Offset(amd64)");
912 /* Produce the name of an integer register, for printing purposes.
913 reg is a number in the range 0 .. 15 that has been generated from a
914 3-bit reg-field number and a REX extension bit. irregular denotes
915 the case where sz==1 and no REX byte is present. */
917 static
918 const HChar* nameIReg ( Int sz, UInt reg, Bool irregular )
920 static const HChar* ireg64_names[16]
921 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
922 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
923 static const HChar* ireg32_names[16]
924 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
925 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" };
926 static const HChar* ireg16_names[16]
927 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di",
928 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" };
929 static const HChar* ireg8_names[16]
930 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil",
931 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" };
932 static const HChar* ireg8_irregular[8]
933 = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" };
935 vassert(reg < 16);
936 if (sz == 1) {
937 if (irregular)
938 vassert(reg < 8);
939 } else {
940 vassert(irregular == False);
943 switch (sz) {
944 case 8: return ireg64_names[reg];
945 case 4: return ireg32_names[reg];
946 case 2: return ireg16_names[reg];
947 case 1: if (irregular) {
948 return ireg8_irregular[reg];
949 } else {
950 return ireg8_names[reg];
952 default: vpanic("nameIReg(amd64)");
956 /* Using the same argument conventions as nameIReg, produce the
957 guest state offset of an integer register. */
959 static
960 Int offsetIReg ( Int sz, UInt reg, Bool irregular )
962 vassert(reg < 16);
963 if (sz == 1) {
964 if (irregular)
965 vassert(reg < 8);
966 } else {
967 vassert(irregular == False);
970 /* Deal with irregular case -- sz==1 and no REX present */
971 if (sz == 1 && irregular) {
972 switch (reg) {
973 case R_RSP: return 1+ OFFB_RAX;
974 case R_RBP: return 1+ OFFB_RCX;
975 case R_RSI: return 1+ OFFB_RDX;
976 case R_RDI: return 1+ OFFB_RBX;
977 default: break; /* use the normal case */
981 /* Normal case */
982 return integerGuestReg64Offset(reg);
986 /* Read the %CL register :: Ity_I8, for shift/rotate operations. */
988 static IRExpr* getIRegCL ( void )
990 vassert(host_endness == VexEndnessLE);
991 return IRExpr_Get( OFFB_RCX, Ity_I8 );
995 /* Write to the %AH register. */
997 static void putIRegAH ( IRExpr* e )
999 vassert(host_endness == VexEndnessLE);
1000 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8);
1001 stmt( IRStmt_Put( OFFB_RAX+1, e ) );
1005 /* Read/write various widths of %RAX, as it has various
1006 special-purpose uses. */
1008 static const HChar* nameIRegRAX ( Int sz )
1010 switch (sz) {
1011 case 1: return "%al";
1012 case 2: return "%ax";
1013 case 4: return "%eax";
1014 case 8: return "%rax";
1015 default: vpanic("nameIRegRAX(amd64)");
1019 static IRExpr* getIRegRAX ( Int sz )
1021 vassert(host_endness == VexEndnessLE);
1022 switch (sz) {
1023 case 1: return IRExpr_Get( OFFB_RAX, Ity_I8 );
1024 case 2: return IRExpr_Get( OFFB_RAX, Ity_I16 );
1025 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 ));
1026 case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 );
1027 default: vpanic("getIRegRAX(amd64)");
1031 static void putIRegRAX ( Int sz, IRExpr* e )
1033 IRType ty = typeOfIRExpr(irsb->tyenv, e);
1034 vassert(host_endness == VexEndnessLE);
1035 switch (sz) {
1036 case 8: vassert(ty == Ity_I64);
1037 stmt( IRStmt_Put( OFFB_RAX, e ));
1038 break;
1039 case 4: vassert(ty == Ity_I32);
1040 stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) ));
1041 break;
1042 case 2: vassert(ty == Ity_I16);
1043 stmt( IRStmt_Put( OFFB_RAX, e ));
1044 break;
1045 case 1: vassert(ty == Ity_I8);
1046 stmt( IRStmt_Put( OFFB_RAX, e ));
1047 break;
1048 default: vpanic("putIRegRAX(amd64)");
1053 /* Read/write various widths of %RDX, as it has various
1054 special-purpose uses. */
1056 static const HChar* nameIRegRDX ( Int sz )
1058 switch (sz) {
1059 case 1: return "%dl";
1060 case 2: return "%dx";
1061 case 4: return "%edx";
1062 case 8: return "%rdx";
1063 default: vpanic("nameIRegRDX(amd64)");
1067 static IRExpr* getIRegRDX ( Int sz )
1069 vassert(host_endness == VexEndnessLE);
1070 switch (sz) {
1071 case 1: return IRExpr_Get( OFFB_RDX, Ity_I8 );
1072 case 2: return IRExpr_Get( OFFB_RDX, Ity_I16 );
1073 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 ));
1074 case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 );
1075 default: vpanic("getIRegRDX(amd64)");
1079 static void putIRegRDX ( Int sz, IRExpr* e )
1081 vassert(host_endness == VexEndnessLE);
1082 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
1083 switch (sz) {
1084 case 8: stmt( IRStmt_Put( OFFB_RDX, e ));
1085 break;
1086 case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) ));
1087 break;
1088 case 2: stmt( IRStmt_Put( OFFB_RDX, e ));
1089 break;
1090 case 1: stmt( IRStmt_Put( OFFB_RDX, e ));
1091 break;
1092 default: vpanic("putIRegRDX(amd64)");
1097 /* Simplistic functions to deal with the integer registers as a
1098 straightforward bank of 16 64-bit regs. */
1100 static IRExpr* getIReg64 ( UInt regno )
1102 return IRExpr_Get( integerGuestReg64Offset(regno),
1103 Ity_I64 );
1106 static void putIReg64 ( UInt regno, IRExpr* e )
1108 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
1109 stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) );
1112 static const HChar* nameIReg64 ( UInt regno )
1114 return nameIReg( 8, regno, False );
1118 /* Simplistic functions to deal with the lower halves of integer
1119 registers as a straightforward bank of 16 32-bit regs. */
1121 static IRExpr* getIReg32 ( UInt regno )
1123 vassert(host_endness == VexEndnessLE);
1124 return unop(Iop_64to32,
1125 IRExpr_Get( integerGuestReg64Offset(regno),
1126 Ity_I64 ));
1129 static void putIReg32 ( UInt regno, IRExpr* e )
1131 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
1132 stmt( IRStmt_Put( integerGuestReg64Offset(regno),
1133 unop(Iop_32Uto64,e) ) );
1136 static const HChar* nameIReg32 ( UInt regno )
1138 return nameIReg( 4, regno, False );
1142 /* Simplistic functions to deal with the lower quarters of integer
1143 registers as a straightforward bank of 16 16-bit regs. */
1145 static IRExpr* getIReg16 ( UInt regno )
1147 vassert(host_endness == VexEndnessLE);
1148 return IRExpr_Get( integerGuestReg64Offset(regno),
1149 Ity_I16 );
1152 static void putIReg16 ( UInt regno, IRExpr* e )
1154 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
1155 stmt( IRStmt_Put( integerGuestReg64Offset(regno),
1156 unop(Iop_16Uto64,e) ) );
1159 static const HChar* nameIReg16 ( UInt regno )
1161 return nameIReg( 2, regno, False );
1165 /* Sometimes what we know is a 3-bit register number, a REX byte, and
1166 which field of the REX byte is to be used to extend to a 4-bit
1167 number. These functions cater for that situation.
1169 static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits )
1171 vassert(lo3bits < 8);
1172 vassert(IS_VALID_PFX(pfx));
1173 return getIReg64( lo3bits | (getRexX(pfx) << 3) );
1176 static const HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits )
1178 vassert(lo3bits < 8);
1179 vassert(IS_VALID_PFX(pfx));
1180 return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False );
1183 static const HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
1185 vassert(lo3bits < 8);
1186 vassert(IS_VALID_PFX(pfx));
1187 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1188 return nameIReg( sz, lo3bits | (getRexB(pfx) << 3),
1189 toBool(sz==1 && !haveREX(pfx)) );
1192 static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
1194 vassert(lo3bits < 8);
1195 vassert(IS_VALID_PFX(pfx));
1196 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1197 if (sz == 4) {
1198 sz = 8;
1199 return unop(Iop_64to32,
1200 IRExpr_Get(
1201 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
1202 False/*!irregular*/ ),
1203 szToITy(sz)
1206 } else {
1207 return IRExpr_Get(
1208 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
1209 toBool(sz==1 && !haveREX(pfx)) ),
1210 szToITy(sz)
1215 static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e )
1217 vassert(lo3bits < 8);
1218 vassert(IS_VALID_PFX(pfx));
1219 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1220 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
1221 stmt( IRStmt_Put(
1222 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
1223 toBool(sz==1 && !haveREX(pfx)) ),
1224 sz==4 ? unop(Iop_32Uto64,e) : e
1229 /* Functions for getting register numbers from modrm bytes and REX
1230 when we don't have to consider the complexities of integer subreg
1231 accesses.
1233 /* Extract the g reg field from a modRM byte, and augment it using the
1234 REX.R bit from the supplied REX byte. The R bit usually is
1235 associated with the g register field.
1237 static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
1239 Int reg = (Int)( (mod_reg_rm >> 3) & 7 );
1240 reg += (pfx & PFX_REXR) ? 8 : 0;
1241 return reg;
1244 /* Extract the e reg field from a modRM byte, and augment it using the
1245 REX.B bit from the supplied REX byte. The B bit usually is
1246 associated with the e register field (when modrm indicates e is a
1247 register, that is).
1249 static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
1251 Int rm;
1252 vassert(epartIsReg(mod_reg_rm));
1253 rm = (Int)(mod_reg_rm & 0x7);
1254 rm += (pfx & PFX_REXB) ? 8 : 0;
1255 return rm;
1259 /* General functions for dealing with integer register access. */
1261 /* Produce the guest state offset for a reference to the 'g' register
1262 field in a modrm byte, taking into account REX (or its absence),
1263 and the size of the access.
1265 static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1267 UInt reg;
1268 vassert(host_endness == VexEndnessLE);
1269 vassert(IS_VALID_PFX(pfx));
1270 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1271 reg = gregOfRexRM( pfx, mod_reg_rm );
1272 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) );
1275 static
1276 IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1278 if (sz == 4) {
1279 sz = 8;
1280 return unop(Iop_64to32,
1281 IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
1282 szToITy(sz) ));
1283 } else {
1284 return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
1285 szToITy(sz) );
1289 static
1290 void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
1292 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
1293 if (sz == 4) {
1294 e = unop(Iop_32Uto64,e);
1296 stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) );
1299 static
1300 const HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1302 return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm),
1303 toBool(sz==1 && !haveREX(pfx)) );
1307 static
1308 IRExpr* getIRegV ( Int sz, Prefix pfx )
1310 if (sz == 4) {
1311 sz = 8;
1312 return unop(Iop_64to32,
1313 IRExpr_Get( offsetIReg( sz, getVexNvvvv(pfx), False ),
1314 szToITy(sz) ));
1315 } else {
1316 return IRExpr_Get( offsetIReg( sz, getVexNvvvv(pfx), False ),
1317 szToITy(sz) );
1321 static
1322 void putIRegV ( Int sz, Prefix pfx, IRExpr* e )
1324 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
1325 if (sz == 4) {
1326 e = unop(Iop_32Uto64,e);
1328 stmt( IRStmt_Put( offsetIReg( sz, getVexNvvvv(pfx), False ), e ) );
1331 static
1332 const HChar* nameIRegV ( Int sz, Prefix pfx )
1334 return nameIReg( sz, getVexNvvvv(pfx), False );
1339 /* Produce the guest state offset for a reference to the 'e' register
1340 field in a modrm byte, taking into account REX (or its absence),
1341 and the size of the access. eregOfRexRM will assert if mod_reg_rm
1342 denotes a memory access rather than a register access.
1344 static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1346 UInt reg;
1347 vassert(host_endness == VexEndnessLE);
1348 vassert(IS_VALID_PFX(pfx));
1349 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1350 reg = eregOfRexRM( pfx, mod_reg_rm );
1351 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) );
1354 static
1355 IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1357 if (sz == 4) {
1358 sz = 8;
1359 return unop(Iop_64to32,
1360 IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
1361 szToITy(sz) ));
1362 } else {
1363 return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
1364 szToITy(sz) );
1368 static
1369 void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
1371 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
1372 if (sz == 4) {
1373 e = unop(Iop_32Uto64,e);
1375 stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) );
1378 static
1379 const HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1381 return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm),
1382 toBool(sz==1 && !haveREX(pfx)) );
1386 /*------------------------------------------------------------*/
1387 /*--- For dealing with XMM registers ---*/
1388 /*------------------------------------------------------------*/
1390 static Int ymmGuestRegOffset ( UInt ymmreg )
1392 switch (ymmreg) {
1393 case 0: return OFFB_YMM0;
1394 case 1: return OFFB_YMM1;
1395 case 2: return OFFB_YMM2;
1396 case 3: return OFFB_YMM3;
1397 case 4: return OFFB_YMM4;
1398 case 5: return OFFB_YMM5;
1399 case 6: return OFFB_YMM6;
1400 case 7: return OFFB_YMM7;
1401 case 8: return OFFB_YMM8;
1402 case 9: return OFFB_YMM9;
1403 case 10: return OFFB_YMM10;
1404 case 11: return OFFB_YMM11;
1405 case 12: return OFFB_YMM12;
1406 case 13: return OFFB_YMM13;
1407 case 14: return OFFB_YMM14;
1408 case 15: return OFFB_YMM15;
1409 default: vpanic("ymmGuestRegOffset(amd64)");
1413 static Int xmmGuestRegOffset ( UInt xmmreg )
1415 /* Correct for little-endian host only. */
1416 vassert(host_endness == VexEndnessLE);
1417 return ymmGuestRegOffset( xmmreg );
1420 /* Lanes of vector registers are always numbered from zero being the
1421 least significant lane (rightmost in the register). */
1423 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno )
1425 /* Correct for little-endian host only. */
1426 vassert(host_endness == VexEndnessLE);
1427 vassert(laneno >= 0 && laneno < 8);
1428 return xmmGuestRegOffset( xmmreg ) + 2 * laneno;
1431 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno )
1433 /* Correct for little-endian host only. */
1434 vassert(host_endness == VexEndnessLE);
1435 vassert(laneno >= 0 && laneno < 4);
1436 return xmmGuestRegOffset( xmmreg ) + 4 * laneno;
1439 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno )
1441 /* Correct for little-endian host only. */
1442 vassert(host_endness == VexEndnessLE);
1443 vassert(laneno >= 0 && laneno < 2);
1444 return xmmGuestRegOffset( xmmreg ) + 8 * laneno;
1447 static Int ymmGuestRegLane128offset ( UInt ymmreg, Int laneno )
1449 /* Correct for little-endian host only. */
1450 vassert(host_endness == VexEndnessLE);
1451 vassert(laneno >= 0 && laneno < 2);
1452 return ymmGuestRegOffset( ymmreg ) + 16 * laneno;
1455 static Int ymmGuestRegLane64offset ( UInt ymmreg, Int laneno )
1457 /* Correct for little-endian host only. */
1458 vassert(host_endness == VexEndnessLE);
1459 vassert(laneno >= 0 && laneno < 4);
1460 return ymmGuestRegOffset( ymmreg ) + 8 * laneno;
1463 static Int ymmGuestRegLane32offset ( UInt ymmreg, Int laneno )
1465 /* Correct for little-endian host only. */
1466 vassert(host_endness == VexEndnessLE);
1467 vassert(laneno >= 0 && laneno < 8);
1468 return ymmGuestRegOffset( ymmreg ) + 4 * laneno;
1471 static IRExpr* getXMMReg ( UInt xmmreg )
1473 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 );
1476 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno )
1478 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 );
1481 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno )
1483 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 );
1486 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno )
1488 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 );
1491 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno )
1493 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 );
1496 static IRExpr* getXMMRegLane16 ( UInt xmmreg, Int laneno )
1498 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg,laneno), Ity_I16 );
1501 static void putXMMReg ( UInt xmmreg, IRExpr* e )
1503 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
1504 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) );
1507 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e )
1509 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
1510 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
1513 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e )
1515 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
1516 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
1519 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e )
1521 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
1522 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
1525 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e )
1527 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
1528 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
1531 static IRExpr* getYMMReg ( UInt xmmreg )
1533 return IRExpr_Get( ymmGuestRegOffset(xmmreg), Ity_V256 );
1536 static IRExpr* getYMMRegLane128 ( UInt ymmreg, Int laneno )
1538 return IRExpr_Get( ymmGuestRegLane128offset(ymmreg,laneno), Ity_V128 );
1541 static IRExpr* getYMMRegLane64F ( UInt ymmreg, Int laneno )
1543 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg,laneno), Ity_F64 );
1546 static IRExpr* getYMMRegLane64 ( UInt ymmreg, Int laneno )
1548 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg,laneno), Ity_I64 );
1551 static IRExpr* getYMMRegLane32F ( UInt ymmreg, Int laneno )
1553 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg,laneno), Ity_F32 );
1556 static IRExpr* getYMMRegLane32 ( UInt ymmreg, Int laneno )
1558 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg,laneno), Ity_I32 );
1561 static void putYMMReg ( UInt ymmreg, IRExpr* e )
1563 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V256);
1564 stmt( IRStmt_Put( ymmGuestRegOffset(ymmreg), e ) );
1567 static void putYMMRegLane128 ( UInt ymmreg, Int laneno, IRExpr* e )
1569 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
1570 stmt( IRStmt_Put( ymmGuestRegLane128offset(ymmreg,laneno), e ) );
1573 static void putYMMRegLane64F ( UInt ymmreg, Int laneno, IRExpr* e )
1575 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
1576 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) );
1579 static void putYMMRegLane64 ( UInt ymmreg, Int laneno, IRExpr* e )
1581 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
1582 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) );
1585 static void putYMMRegLane32F ( UInt ymmreg, Int laneno, IRExpr* e )
1587 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
1588 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) );
1591 static void putYMMRegLane32 ( UInt ymmreg, Int laneno, IRExpr* e )
1593 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
1594 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) );
1597 static IRExpr* mkV128 ( UShort mask )
1599 return IRExpr_Const(IRConst_V128(mask));
1602 /* Write the low half of a YMM reg and zero out the upper half. */
1603 static void putYMMRegLoAndZU ( UInt ymmreg, IRExpr* e )
1605 putYMMRegLane128( ymmreg, 0, e );
1606 putYMMRegLane128( ymmreg, 1, mkV128(0) );
1609 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y )
1611 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1);
1612 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1);
1613 return unop(Iop_64to1,
1614 binop(Iop_And64,
1615 unop(Iop_1Uto64,x),
1616 unop(Iop_1Uto64,y)));
1619 /* Generate a compare-and-swap operation, operating on memory at
1620 'addr'. The expected value is 'expVal' and the new value is
1621 'newVal'. If the operation fails, then transfer control (with a
1622 no-redir jump (XXX no -- see comment at top of this file)) to
1623 'restart_point', which is presumably the address of the guest
1624 instruction again -- retrying, essentially. */
1625 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
1626 Addr64 restart_point )
1628 IRCAS* cas;
1629 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal);
1630 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal);
1631 IRTemp oldTmp = newTemp(tyE);
1632 IRTemp expTmp = newTemp(tyE);
1633 vassert(tyE == tyN);
1634 vassert(tyE == Ity_I64 || tyE == Ity_I32
1635 || tyE == Ity_I16 || tyE == Ity_I8);
1636 assign(expTmp, expVal);
1637 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
1638 NULL, mkexpr(expTmp), NULL, newVal );
1639 stmt( IRStmt_CAS(cas) );
1640 stmt( IRStmt_Exit(
1641 binop( mkSizedOp(tyE,Iop_CasCmpNE8),
1642 mkexpr(oldTmp), mkexpr(expTmp) ),
1643 Ijk_Boring, /*Ijk_NoRedir*/
1644 IRConst_U64( restart_point ),
1645 OFFB_RIP
1650 /*------------------------------------------------------------*/
1651 /*--- Helpers for %rflags. ---*/
1652 /*------------------------------------------------------------*/
1654 /* -------------- Evaluating the flags-thunk. -------------- */
1656 /* Build IR to calculate all the eflags from stored
1657 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1658 Ity_I64. */
1659 static IRExpr* mk_amd64g_calculate_rflags_all ( void )
1661 IRExpr** args
1662 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1663 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1664 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1665 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1666 IRExpr* call
1667 = mkIRExprCCall(
1668 Ity_I64,
1669 0/*regparm*/,
1670 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all,
1671 args
1673 /* Exclude OP and NDEP from definedness checking. We're only
1674 interested in DEP1 and DEP2. */
1675 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1676 return call;
1679 /* Build IR to calculate some particular condition from stored
1680 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1681 Ity_Bit. */
1682 static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond )
1684 IRExpr** args
1685 = mkIRExprVec_5( mkU64(cond),
1686 IRExpr_Get(OFFB_CC_OP, Ity_I64),
1687 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1688 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1689 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1690 IRExpr* call
1691 = mkIRExprCCall(
1692 Ity_I64,
1693 0/*regparm*/,
1694 "amd64g_calculate_condition", &amd64g_calculate_condition,
1695 args
1697 /* Exclude the requested condition, OP and NDEP from definedness
1698 checking. We're only interested in DEP1 and DEP2. */
1699 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4);
1700 return unop(Iop_64to1, call);
1703 /* Build IR to calculate just the carry flag from stored
1704 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */
1705 static IRExpr* mk_amd64g_calculate_rflags_c ( void )
1707 IRExpr** args
1708 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1709 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1710 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1711 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1712 IRExpr* call
1713 = mkIRExprCCall(
1714 Ity_I64,
1715 0/*regparm*/,
1716 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c,
1717 args
1719 /* Exclude OP and NDEP from definedness checking. We're only
1720 interested in DEP1 and DEP2. */
1721 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1722 return call;
1726 /* -------------- Building the flags-thunk. -------------- */
1728 /* The machinery in this section builds the flag-thunk following a
1729 flag-setting operation. Hence the various setFlags_* functions.
1732 static Bool isAddSub ( IROp op8 )
1734 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8);
1737 static Bool isLogic ( IROp op8 )
1739 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8);
1742 /* U-widen 1/8/16/32/64 bit int expr to 64. */
1743 static IRExpr* widenUto64 ( IRExpr* e )
1745 switch (typeOfIRExpr(irsb->tyenv,e)) {
1746 case Ity_I64: return e;
1747 case Ity_I32: return unop(Iop_32Uto64, e);
1748 case Ity_I16: return unop(Iop_16Uto64, e);
1749 case Ity_I8: return unop(Iop_8Uto64, e);
1750 case Ity_I1: return unop(Iop_1Uto64, e);
1751 default: vpanic("widenUto64");
1755 /* S-widen 8/16/32/64 bit int expr to 32. */
1756 static IRExpr* widenSto64 ( IRExpr* e )
1758 switch (typeOfIRExpr(irsb->tyenv,e)) {
1759 case Ity_I64: return e;
1760 case Ity_I32: return unop(Iop_32Sto64, e);
1761 case Ity_I16: return unop(Iop_16Sto64, e);
1762 case Ity_I8: return unop(Iop_8Sto64, e);
1763 default: vpanic("widenSto64");
1767 /* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some
1768 of these combinations make sense. */
1769 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e )
1771 IRType src_ty = typeOfIRExpr(irsb->tyenv,e);
1772 if (src_ty == dst_ty)
1773 return e;
1774 if (src_ty == Ity_I32 && dst_ty == Ity_I16)
1775 return unop(Iop_32to16, e);
1776 if (src_ty == Ity_I32 && dst_ty == Ity_I8)
1777 return unop(Iop_32to8, e);
1778 if (src_ty == Ity_I64 && dst_ty == Ity_I32)
1779 return unop(Iop_64to32, e);
1780 if (src_ty == Ity_I64 && dst_ty == Ity_I16)
1781 return unop(Iop_64to16, e);
1782 if (src_ty == Ity_I64 && dst_ty == Ity_I8)
1783 return unop(Iop_64to8, e);
1785 vex_printf("\nsrc, dst tys are: ");
1786 ppIRType(src_ty);
1787 vex_printf(", ");
1788 ppIRType(dst_ty);
1789 vex_printf("\n");
1790 vpanic("narrowTo(amd64)");
1794 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
1795 auto-sized up to the real op. */
1797 static
1798 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty )
1800 Int ccOp = 0;
1801 switch (ty) {
1802 case Ity_I8: ccOp = 0; break;
1803 case Ity_I16: ccOp = 1; break;
1804 case Ity_I32: ccOp = 2; break;
1805 case Ity_I64: ccOp = 3; break;
1806 default: vassert(0);
1808 switch (op8) {
1809 case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB; break;
1810 case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB; break;
1811 default: ppIROp(op8);
1812 vpanic("setFlags_DEP1_DEP2(amd64)");
1814 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1815 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
1816 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) );
1820 /* Set the OP and DEP1 fields only, and write zero to DEP2. */
1822 static
1823 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty )
1825 Int ccOp = 0;
1826 switch (ty) {
1827 case Ity_I8: ccOp = 0; break;
1828 case Ity_I16: ccOp = 1; break;
1829 case Ity_I32: ccOp = 2; break;
1830 case Ity_I64: ccOp = 3; break;
1831 default: vassert(0);
1833 switch (op8) {
1834 case Iop_Or8:
1835 case Iop_And8:
1836 case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break;
1837 default: ppIROp(op8);
1838 vpanic("setFlags_DEP1(amd64)");
1840 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1841 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
1842 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
1846 /* For shift operations, we put in the result and the undershifted
1847 result. Except if the shift amount is zero, the thunk is left
1848 unchanged. */
1850 static void setFlags_DEP1_DEP2_shift ( IROp op64,
1851 IRTemp res,
1852 IRTemp resUS,
1853 IRType ty,
1854 IRTemp guard )
1856 Int ccOp = 0;
1857 switch (ty) {
1858 case Ity_I8: ccOp = 0; break;
1859 case Ity_I16: ccOp = 1; break;
1860 case Ity_I32: ccOp = 2; break;
1861 case Ity_I64: ccOp = 3; break;
1862 default: vassert(0);
1865 vassert(guard);
1867 /* Both kinds of right shifts are handled by the same thunk
1868 operation. */
1869 switch (op64) {
1870 case Iop_Shr64:
1871 case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break;
1872 case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break;
1873 default: ppIROp(op64);
1874 vpanic("setFlags_DEP1_DEP2_shift(amd64)");
1877 /* guard :: Ity_I8. We need to convert it to I1. */
1878 IRTemp guardB = newTemp(Ity_I1);
1879 assign( guardB, binop(Iop_CmpNE8, mkexpr(guard), mkU8(0)) );
1881 /* DEP1 contains the result, DEP2 contains the undershifted value. */
1882 stmt( IRStmt_Put( OFFB_CC_OP,
1883 IRExpr_ITE( mkexpr(guardB),
1884 mkU64(ccOp),
1885 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) ));
1886 stmt( IRStmt_Put( OFFB_CC_DEP1,
1887 IRExpr_ITE( mkexpr(guardB),
1888 widenUto64(mkexpr(res)),
1889 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) ));
1890 stmt( IRStmt_Put( OFFB_CC_DEP2,
1891 IRExpr_ITE( mkexpr(guardB),
1892 widenUto64(mkexpr(resUS)),
1893 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) ));
1897 /* For the inc/dec case, we store in DEP1 the result value and in NDEP
1898 the former value of the carry flag, which unfortunately we have to
1899 compute. */
1901 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty )
1903 Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB;
1905 switch (ty) {
1906 case Ity_I8: ccOp += 0; break;
1907 case Ity_I16: ccOp += 1; break;
1908 case Ity_I32: ccOp += 2; break;
1909 case Ity_I64: ccOp += 3; break;
1910 default: vassert(0);
1913 /* This has to come first, because calculating the C flag
1914 may require reading all four thunk fields. */
1915 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) );
1916 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1917 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) );
1918 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
1922 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
1923 two arguments. */
1925 static
1926 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op )
1928 switch (ty) {
1929 case Ity_I8:
1930 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) );
1931 break;
1932 case Ity_I16:
1933 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) );
1934 break;
1935 case Ity_I32:
1936 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) );
1937 break;
1938 case Ity_I64:
1939 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) );
1940 break;
1941 default:
1942 vpanic("setFlags_MUL(amd64)");
1944 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) ));
1945 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) ));
1949 /* -------------- Condition codes. -------------- */
1951 /* Condition codes, using the AMD encoding. */
1953 static const HChar* name_AMD64Condcode ( AMD64Condcode cond )
1955 switch (cond) {
1956 case AMD64CondO: return "o";
1957 case AMD64CondNO: return "no";
1958 case AMD64CondB: return "b";
1959 case AMD64CondNB: return "ae"; /*"nb";*/
1960 case AMD64CondZ: return "e"; /*"z";*/
1961 case AMD64CondNZ: return "ne"; /*"nz";*/
1962 case AMD64CondBE: return "be";
1963 case AMD64CondNBE: return "a"; /*"nbe";*/
1964 case AMD64CondS: return "s";
1965 case AMD64CondNS: return "ns";
1966 case AMD64CondP: return "p";
1967 case AMD64CondNP: return "np";
1968 case AMD64CondL: return "l";
1969 case AMD64CondNL: return "ge"; /*"nl";*/
1970 case AMD64CondLE: return "le";
1971 case AMD64CondNLE: return "g"; /*"nle";*/
1972 case AMD64CondAlways: return "ALWAYS";
1973 default: vpanic("name_AMD64Condcode");
1977 static
1978 AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode cond,
1979 /*OUT*/Bool* needInvert )
1981 vassert(cond >= AMD64CondO && cond <= AMD64CondNLE);
1982 if (cond & 1) {
1983 *needInvert = True;
1984 return cond-1;
1985 } else {
1986 *needInvert = False;
1987 return cond;
1992 /* -------------- Helpers for ADD/SUB with carry. -------------- */
1994 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
1995 appropriately.
1997 Optionally, generate a store for the 'tres' value. This can either
1998 be a normal store, or it can be a cas-with-possible-failure style
1999 store:
2001 if taddr is IRTemp_INVALID, then no store is generated.
2003 if taddr is not IRTemp_INVALID, then a store (using taddr as
2004 the address) is generated:
2006 if texpVal is IRTemp_INVALID then a normal store is
2007 generated, and restart_point must be zero (it is irrelevant).
2009 if texpVal is not IRTemp_INVALID then a cas-style store is
2010 generated. texpVal is the expected value, restart_point
2011 is the restart point if the store fails, and texpVal must
2012 have the same type as tres.
2015 static void helper_ADC ( Int sz,
2016 IRTemp tres, IRTemp ta1, IRTemp ta2,
2017 /* info about optional store: */
2018 IRTemp taddr, IRTemp texpVal, Addr64 restart_point )
2020 UInt thunkOp;
2021 IRType ty = szToITy(sz);
2022 IRTemp oldc = newTemp(Ity_I64);
2023 IRTemp oldcn = newTemp(ty);
2024 IROp plus = mkSizedOp(ty, Iop_Add8);
2025 IROp xor = mkSizedOp(ty, Iop_Xor8);
2027 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
2029 switch (sz) {
2030 case 8: thunkOp = AMD64G_CC_OP_ADCQ; break;
2031 case 4: thunkOp = AMD64G_CC_OP_ADCL; break;
2032 case 2: thunkOp = AMD64G_CC_OP_ADCW; break;
2033 case 1: thunkOp = AMD64G_CC_OP_ADCB; break;
2034 default: vassert(0);
2037 /* oldc = old carry flag, 0 or 1 */
2038 assign( oldc, binop(Iop_And64,
2039 mk_amd64g_calculate_rflags_c(),
2040 mkU64(1)) );
2042 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
2044 assign( tres, binop(plus,
2045 binop(plus,mkexpr(ta1),mkexpr(ta2)),
2046 mkexpr(oldcn)) );
2048 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2049 start of this function. */
2050 if (taddr != IRTemp_INVALID) {
2051 if (texpVal == IRTemp_INVALID) {
2052 vassert(restart_point == 0);
2053 storeLE( mkexpr(taddr), mkexpr(tres) );
2054 } else {
2055 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
2056 /* .. and hence 'texpVal' has the same type as 'tres'. */
2057 casLE( mkexpr(taddr),
2058 mkexpr(texpVal), mkexpr(tres), restart_point );
2062 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
2063 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) ));
2064 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
2065 mkexpr(oldcn)) )) );
2066 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
2070 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
2071 appropriately. As with helper_ADC, possibly generate a store of
2072 the result -- see comments on helper_ADC for details.
2074 static void helper_SBB ( Int sz,
2075 IRTemp tres, IRTemp ta1, IRTemp ta2,
2076 /* info about optional store: */
2077 IRTemp taddr, IRTemp texpVal, Addr64 restart_point )
2079 UInt thunkOp;
2080 IRType ty = szToITy(sz);
2081 IRTemp oldc = newTemp(Ity_I64);
2082 IRTemp oldcn = newTemp(ty);
2083 IROp minus = mkSizedOp(ty, Iop_Sub8);
2084 IROp xor = mkSizedOp(ty, Iop_Xor8);
2086 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
2088 switch (sz) {
2089 case 8: thunkOp = AMD64G_CC_OP_SBBQ; break;
2090 case 4: thunkOp = AMD64G_CC_OP_SBBL; break;
2091 case 2: thunkOp = AMD64G_CC_OP_SBBW; break;
2092 case 1: thunkOp = AMD64G_CC_OP_SBBB; break;
2093 default: vassert(0);
2096 /* oldc = old carry flag, 0 or 1 */
2097 assign( oldc, binop(Iop_And64,
2098 mk_amd64g_calculate_rflags_c(),
2099 mkU64(1)) );
2101 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
2103 assign( tres, binop(minus,
2104 binop(minus,mkexpr(ta1),mkexpr(ta2)),
2105 mkexpr(oldcn)) );
2107 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2108 start of this function. */
2109 if (taddr != IRTemp_INVALID) {
2110 if (texpVal == IRTemp_INVALID) {
2111 vassert(restart_point == 0);
2112 storeLE( mkexpr(taddr), mkexpr(tres) );
2113 } else {
2114 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
2115 /* .. and hence 'texpVal' has the same type as 'tres'. */
2116 casLE( mkexpr(taddr),
2117 mkexpr(texpVal), mkexpr(tres), restart_point );
2121 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
2122 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) );
2123 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
2124 mkexpr(oldcn)) )) );
2125 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
2129 /* Given ta1, ta2 and tres, compute tres = ADCX(ta1,ta2) or tres = ADOX(ta1,ta2)
2130 and set flags appropriately.
2132 static void helper_ADCX_ADOX ( Bool isADCX, Int sz,
2133 IRTemp tres, IRTemp ta1, IRTemp ta2 )
2135 UInt thunkOp;
2136 IRType ty = szToITy(sz);
2137 IRTemp oldflags = newTemp(Ity_I64);
2138 IRTemp oldOC = newTemp(Ity_I64); // old O or C flag
2139 IRTemp oldOCn = newTemp(ty); // old O or C flag, narrowed
2140 IROp plus = mkSizedOp(ty, Iop_Add8);
2141 IROp xor = mkSizedOp(ty, Iop_Xor8);
2143 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
2145 switch (sz) {
2146 case 8: thunkOp = isADCX ? AMD64G_CC_OP_ADCX64
2147 : AMD64G_CC_OP_ADOX64; break;
2148 case 4: thunkOp = isADCX ? AMD64G_CC_OP_ADCX32
2149 : AMD64G_CC_OP_ADOX32; break;
2150 default: vassert(0);
2153 assign( oldflags, mk_amd64g_calculate_rflags_all() );
2155 /* oldOC = old overflow/carry flag, 0 or 1 */
2156 assign( oldOC, binop(Iop_And64,
2157 binop(Iop_Shr64,
2158 mkexpr(oldflags),
2159 mkU8(isADCX ? AMD64G_CC_SHIFT_C
2160 : AMD64G_CC_SHIFT_O)),
2161 mkU64(1)) );
2163 assign( oldOCn, narrowTo(ty, mkexpr(oldOC)) );
2165 assign( tres, binop(plus,
2166 binop(plus,mkexpr(ta1),mkexpr(ta2)),
2167 mkexpr(oldOCn)) );
2169 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
2170 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) ));
2171 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
2172 mkexpr(oldOCn)) )) );
2173 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldflags) ) );
2177 /* -------------- Helpers for disassembly printing. -------------- */
2179 static const HChar* nameGrp1 ( Int opc_aux )
2181 static const HChar* grp1_names[8]
2182 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
2183 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)");
2184 return grp1_names[opc_aux];
2187 static const HChar* nameGrp2 ( Int opc_aux )
2189 static const HChar* grp2_names[8]
2190 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
2191 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)");
2192 return grp2_names[opc_aux];
2195 static const HChar* nameGrp4 ( Int opc_aux )
2197 static const HChar* grp4_names[8]
2198 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
2199 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)");
2200 return grp4_names[opc_aux];
2203 static const HChar* nameGrp5 ( Int opc_aux )
2205 static const HChar* grp5_names[8]
2206 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
2207 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)");
2208 return grp5_names[opc_aux];
2211 static const HChar* nameGrp8 ( Int opc_aux )
2213 static const HChar* grp8_names[8]
2214 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
2215 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)");
2216 return grp8_names[opc_aux];
2219 static const HChar* nameSReg ( UInt sreg )
2221 switch (sreg) {
2222 case R_ES: return "%es";
2223 case R_CS: return "%cs";
2224 case R_SS: return "%ss";
2225 case R_DS: return "%ds";
2226 case R_FS: return "%fs";
2227 case R_GS: return "%gs";
2228 default: vpanic("nameSReg(amd64)");
2232 static const HChar* nameMMXReg ( Int mmxreg )
2234 static const HChar* mmx_names[8]
2235 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
2236 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)");
2237 return mmx_names[mmxreg];
2240 static const HChar* nameXMMReg ( Int xmmreg )
2242 static const HChar* xmm_names[16]
2243 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
2244 "%xmm4", "%xmm5", "%xmm6", "%xmm7",
2245 "%xmm8", "%xmm9", "%xmm10", "%xmm11",
2246 "%xmm12", "%xmm13", "%xmm14", "%xmm15" };
2247 if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)");
2248 return xmm_names[xmmreg];
2251 static const HChar* nameMMXGran ( Int gran )
2253 switch (gran) {
2254 case 0: return "b";
2255 case 1: return "w";
2256 case 2: return "d";
2257 case 3: return "q";
2258 default: vpanic("nameMMXGran(amd64,guest)");
2262 static HChar nameISize ( Int size )
2264 switch (size) {
2265 case 8: return 'q';
2266 case 4: return 'l';
2267 case 2: return 'w';
2268 case 1: return 'b';
2269 default: vpanic("nameISize(amd64)");
2273 static const HChar* nameYMMReg ( Int ymmreg )
2275 static const HChar* ymm_names[16]
2276 = { "%ymm0", "%ymm1", "%ymm2", "%ymm3",
2277 "%ymm4", "%ymm5", "%ymm6", "%ymm7",
2278 "%ymm8", "%ymm9", "%ymm10", "%ymm11",
2279 "%ymm12", "%ymm13", "%ymm14", "%ymm15" };
2280 if (ymmreg < 0 || ymmreg > 15) vpanic("nameYMMReg(amd64)");
2281 return ymm_names[ymmreg];
2285 /*------------------------------------------------------------*/
2286 /*--- JMP helpers ---*/
2287 /*------------------------------------------------------------*/
2289 static void jmp_lit( /*MOD*/DisResult* dres,
2290 IRJumpKind kind, Addr64 d64 )
2292 vassert(dres->whatNext == Dis_Continue);
2293 vassert(dres->len == 0);
2294 vassert(dres->continueAt == 0);
2295 vassert(dres->jk_StopHere == Ijk_INVALID);
2296 dres->whatNext = Dis_StopHere;
2297 dres->jk_StopHere = kind;
2298 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64) ) );
2301 static void jmp_treg( /*MOD*/DisResult* dres,
2302 IRJumpKind kind, IRTemp t )
2304 vassert(dres->whatNext == Dis_Continue);
2305 vassert(dres->len == 0);
2306 vassert(dres->continueAt == 0);
2307 vassert(dres->jk_StopHere == Ijk_INVALID);
2308 dres->whatNext = Dis_StopHere;
2309 dres->jk_StopHere = kind;
2310 stmt( IRStmt_Put( OFFB_RIP, mkexpr(t) ) );
2313 static
2314 void jcc_01 ( /*MOD*/DisResult* dres,
2315 AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true )
2317 Bool invert;
2318 AMD64Condcode condPos;
2319 vassert(dres->whatNext == Dis_Continue);
2320 vassert(dres->len == 0);
2321 vassert(dres->continueAt == 0);
2322 vassert(dres->jk_StopHere == Ijk_INVALID);
2323 dres->whatNext = Dis_StopHere;
2324 dres->jk_StopHere = Ijk_Boring;
2325 condPos = positiveIse_AMD64Condcode ( cond, &invert );
2326 if (invert) {
2327 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
2328 Ijk_Boring,
2329 IRConst_U64(d64_false),
2330 OFFB_RIP ) );
2331 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_true) ) );
2332 } else {
2333 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
2334 Ijk_Boring,
2335 IRConst_U64(d64_true),
2336 OFFB_RIP ) );
2337 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_false) ) );
2341 /* Let new_rsp be the %rsp value after a call/return. Let nia be the
2342 guest address of the next instruction to be executed.
2344 This function generates an AbiHint to say that -128(%rsp)
2345 .. -1(%rsp) should now be regarded as uninitialised.
2347 static
2348 void make_redzone_AbiHint ( const VexAbiInfo* vbi,
2349 IRTemp new_rsp, IRTemp nia, const HChar* who )
2351 Int szB = vbi->guest_stack_redzone_size;
2352 vassert(szB >= 0);
2354 /* A bit of a kludge. Currently the only AbI we've guested AMD64
2355 for is ELF. So just check it's the expected 128 value
2356 (paranoia). */
2357 vassert(szB == 128);
2359 if (0) vex_printf("AbiHint: %s\n", who);
2360 vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64);
2361 vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64);
2362 if (szB > 0)
2363 stmt( IRStmt_AbiHint(
2364 binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)),
2365 szB,
2366 mkexpr(nia)
2371 /*------------------------------------------------------------*/
2372 /*--- Disassembling addressing modes ---*/
2373 /*------------------------------------------------------------*/
2375 static
2376 const HChar* segRegTxt ( Prefix pfx )
2378 if (pfx & PFX_CS) return "%cs:";
2379 if (pfx & PFX_DS) return "%ds:";
2380 if (pfx & PFX_ES) return "%es:";
2381 if (pfx & PFX_FS) return "%fs:";
2382 if (pfx & PFX_GS) return "%gs:";
2383 if (pfx & PFX_SS) return "%ss:";
2384 return ""; /* no override */
2388 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
2389 linear address by adding any required segment override as indicated
2390 by sorb, and also dealing with any address size override
2391 present. */
2392 static
2393 IRExpr* handleAddrOverrides ( const VexAbiInfo* vbi,
2394 Prefix pfx, IRExpr* virtual )
2396 /* --- address size override --- */
2397 if (haveASO(pfx))
2398 virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual));
2400 /* Note that the below are hacks that relies on the assumption
2401 that %fs or %gs are constant.
2402 Typically, %fs is always 0x63 on linux (in the main thread, it
2403 stays at value 0), %gs always 0x60 on Darwin, ... */
2404 /* --- segment overrides --- */
2405 if (pfx & PFX_FS) {
2406 if (vbi->guest_amd64_assume_fs_is_const) {
2407 /* return virtual + guest_FS_CONST. */
2408 virtual = binop(Iop_Add64, virtual,
2409 IRExpr_Get(OFFB_FS_CONST, Ity_I64));
2410 } else {
2411 unimplemented("amd64 %fs segment override");
2415 if (pfx & PFX_GS) {
2416 if (vbi->guest_amd64_assume_gs_is_const) {
2417 /* return virtual + guest_GS_CONST. */
2418 virtual = binop(Iop_Add64, virtual,
2419 IRExpr_Get(OFFB_GS_CONST, Ity_I64));
2420 } else {
2421 unimplemented("amd64 %gs segment override");
2425 /* cs, ds, es and ss are simply ignored in 64-bit mode. */
2427 return virtual;
2430 //.. {
2431 //.. Int sreg;
2432 //.. IRType hWordTy;
2433 //.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
2434 //..
2435 //.. if (sorb == 0)
2436 //.. /* the common case - no override */
2437 //.. return virtual;
2438 //..
2439 //.. switch (sorb) {
2440 //.. case 0x3E: sreg = R_DS; break;
2441 //.. case 0x26: sreg = R_ES; break;
2442 //.. case 0x64: sreg = R_FS; break;
2443 //.. case 0x65: sreg = R_GS; break;
2444 //.. default: vpanic("handleAddrOverrides(x86,guest)");
2445 //.. }
2446 //..
2447 //.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
2448 //..
2449 //.. seg_selector = newTemp(Ity_I32);
2450 //.. ldt_ptr = newTemp(hWordTy);
2451 //.. gdt_ptr = newTemp(hWordTy);
2452 //.. r64 = newTemp(Ity_I64);
2453 //..
2454 //.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
2455 //.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
2456 //.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
2457 //..
2458 //.. /*
2459 //.. Call this to do the translation and limit checks:
2460 //.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2461 //.. UInt seg_selector, UInt virtual_addr )
2462 //.. */
2463 //.. assign(
2464 //.. r64,
2465 //.. mkIRExprCCall(
2466 //.. Ity_I64,
2467 //.. 0/*regparms*/,
2468 //.. "x86g_use_seg_selector",
2469 //.. &x86g_use_seg_selector,
2470 //.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
2471 //.. mkexpr(seg_selector), virtual)
2472 //.. )
2473 //.. );
2474 //..
2475 //.. /* If the high 32 of the result are non-zero, there was a
2476 //.. failure in address translation. In which case, make a
2477 //.. quick exit.
2478 //.. */
2479 //.. stmt(
2480 //.. IRStmt_Exit(
2481 //.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
2482 //.. Ijk_MapFail,
2483 //.. IRConst_U32( guest_eip_curr_instr )
2484 //.. )
2485 //.. );
2486 //..
2487 //.. /* otherwise, here's the translated result. */
2488 //.. return unop(Iop_64to32, mkexpr(r64));
2489 //.. }
2492 /* Generate IR to calculate an address indicated by a ModRM and
2493 following SIB bytes. The expression, and the number of bytes in
2494 the address mode, are returned (the latter in *len). Note that
2495 this fn should not be called if the R/M part of the address denotes
2496 a register instead of memory. If print_codegen is true, text of
2497 the addressing mode is placed in buf.
2499 The computed address is stored in a new tempreg, and the
2500 identity of the tempreg is returned.
2502 extra_bytes holds the number of bytes after the amode, as supplied
2503 by the caller. This is needed to make sense of %rip-relative
2504 addresses. Note that the value that *len is set to is only the
2505 length of the amode itself and does not include the value supplied
2506 in extra_bytes.
2509 static IRTemp disAMode_copy2tmp ( IRExpr* addr64 )
2511 IRTemp tmp = newTemp(Ity_I64);
2512 assign( tmp, addr64 );
2513 return tmp;
2516 static
2517 IRTemp disAMode ( /*OUT*/Int* len,
2518 const VexAbiInfo* vbi, Prefix pfx, Long delta,
2519 /*OUT*/HChar* buf, Int extra_bytes )
2521 UChar mod_reg_rm = getUChar(delta);
2522 delta++;
2524 buf[0] = (UChar)0;
2525 vassert(extra_bytes >= 0 && extra_bytes < 10);
2527 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2528 jump table seems a bit excessive.
2530 mod_reg_rm &= 0xC7; /* is now XX000YYY */
2531 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
2532 /* is now XX0XXYYY */
2533 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
2534 switch (mod_reg_rm) {
2536 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2537 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2539 case 0x00: case 0x01: case 0x02: case 0x03:
2540 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
2541 { UChar rm = toUChar(mod_reg_rm & 7);
2542 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
2543 *len = 1;
2544 return disAMode_copy2tmp(
2545 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm)));
2548 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2549 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2551 case 0x08: case 0x09: case 0x0A: case 0x0B:
2552 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
2553 { UChar rm = toUChar(mod_reg_rm & 7);
2554 Long d = getSDisp8(delta);
2555 if (d == 0) {
2556 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
2557 } else {
2558 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
2560 *len = 2;
2561 return disAMode_copy2tmp(
2562 handleAddrOverrides(vbi, pfx,
2563 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
2566 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2567 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2569 case 0x10: case 0x11: case 0x12: case 0x13:
2570 /* ! 14 */ case 0x15: case 0x16: case 0x17:
2571 { UChar rm = toUChar(mod_reg_rm & 7);
2572 Long d = getSDisp32(delta);
2573 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
2574 *len = 5;
2575 return disAMode_copy2tmp(
2576 handleAddrOverrides(vbi, pfx,
2577 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
2580 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2581 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2582 case 0x18: case 0x19: case 0x1A: case 0x1B:
2583 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2584 vpanic("disAMode(amd64): not an addr!");
2586 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set
2587 correctly at the start of handling each instruction. */
2588 case 0x05:
2589 { Long d = getSDisp32(delta);
2590 *len = 5;
2591 DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d);
2592 /* We need to know the next instruction's start address.
2593 Try and figure out what it is, record the guess, and ask
2594 the top-level driver logic (bbToIR_AMD64) to check we
2595 guessed right, after the instruction is completely
2596 decoded. */
2597 guest_RIP_next_mustcheck = True;
2598 guest_RIP_next_assumed = guest_RIP_bbstart
2599 + delta+4 + extra_bytes;
2600 return disAMode_copy2tmp(
2601 handleAddrOverrides(vbi, pfx,
2602 binop(Iop_Add64, mkU64(guest_RIP_next_assumed),
2603 mkU64(d))));
2606 case 0x04: {
2607 /* SIB, with no displacement. Special cases:
2608 -- %rsp cannot act as an index value.
2609 If index_r indicates %rsp, zero is used for the index.
2610 -- when mod is zero and base indicates RBP or R13, base is
2611 instead a 32-bit sign-extended literal.
2612 It's all madness, I tell you. Extract %index, %base and
2613 scale from the SIB byte. The value denoted is then:
2614 | %index == %RSP && (%base == %RBP || %base == %R13)
2615 = d32 following SIB byte
2616 | %index == %RSP && !(%base == %RBP || %base == %R13)
2617 = %base
2618 | %index != %RSP && (%base == %RBP || %base == %R13)
2619 = d32 following SIB byte + (%index << scale)
2620 | %index != %RSP && !(%base == %RBP || %base == %R13)
2621 = %base + (%index << scale)
2623 UChar sib = getUChar(delta);
2624 UChar scale = toUChar((sib >> 6) & 3);
2625 UChar index_r = toUChar((sib >> 3) & 7);
2626 UChar base_r = toUChar(sib & 7);
2627 /* correct since #(R13) == 8 + #(RBP) */
2628 Bool base_is_BPor13 = toBool(base_r == R_RBP);
2629 Bool index_is_SP = toBool(index_r == R_RSP && 0==getRexX(pfx));
2630 delta++;
2632 if ((!index_is_SP) && (!base_is_BPor13)) {
2633 if (scale == 0) {
2634 DIS(buf, "%s(%s,%s)", segRegTxt(pfx),
2635 nameIRegRexB(8,pfx,base_r),
2636 nameIReg64rexX(pfx,index_r));
2637 } else {
2638 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx),
2639 nameIRegRexB(8,pfx,base_r),
2640 nameIReg64rexX(pfx,index_r), 1<<scale);
2642 *len = 2;
2643 return
2644 disAMode_copy2tmp(
2645 handleAddrOverrides(vbi, pfx,
2646 binop(Iop_Add64,
2647 getIRegRexB(8,pfx,base_r),
2648 binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
2649 mkU8(scale)))));
2652 if ((!index_is_SP) && base_is_BPor13) {
2653 Long d = getSDisp32(delta);
2654 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d,
2655 nameIReg64rexX(pfx,index_r), 1<<scale);
2656 *len = 6;
2657 return
2658 disAMode_copy2tmp(
2659 handleAddrOverrides(vbi, pfx,
2660 binop(Iop_Add64,
2661 binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
2662 mkU8(scale)),
2663 mkU64(d))));
2666 if (index_is_SP && (!base_is_BPor13)) {
2667 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r));
2668 *len = 2;
2669 return disAMode_copy2tmp(
2670 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,base_r)));
2673 if (index_is_SP && base_is_BPor13) {
2674 Long d = getSDisp32(delta);
2675 DIS(buf, "%s%lld", segRegTxt(pfx), d);
2676 *len = 6;
2677 return disAMode_copy2tmp(
2678 handleAddrOverrides(vbi, pfx, mkU64(d)));
2681 vassert(0);
2684 /* SIB, with 8-bit displacement. Special cases:
2685 -- %esp cannot act as an index value.
2686 If index_r indicates %esp, zero is used for the index.
2687 Denoted value is:
2688 | %index == %ESP
2689 = d8 + %base
2690 | %index != %ESP
2691 = d8 + %base + (%index << scale)
2693 case 0x0C: {
2694 UChar sib = getUChar(delta);
2695 UChar scale = toUChar((sib >> 6) & 3);
2696 UChar index_r = toUChar((sib >> 3) & 7);
2697 UChar base_r = toUChar(sib & 7);
2698 Long d = getSDisp8(delta+1);
2700 if (index_r == R_RSP && 0==getRexX(pfx)) {
2701 DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
2702 d, nameIRegRexB(8,pfx,base_r));
2703 *len = 3;
2704 return disAMode_copy2tmp(
2705 handleAddrOverrides(vbi, pfx,
2706 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
2707 } else {
2708 if (scale == 0) {
2709 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
2710 nameIRegRexB(8,pfx,base_r),
2711 nameIReg64rexX(pfx,index_r));
2712 } else {
2713 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
2714 nameIRegRexB(8,pfx,base_r),
2715 nameIReg64rexX(pfx,index_r), 1<<scale);
2717 *len = 3;
2718 return
2719 disAMode_copy2tmp(
2720 handleAddrOverrides(vbi, pfx,
2721 binop(Iop_Add64,
2722 binop(Iop_Add64,
2723 getIRegRexB(8,pfx,base_r),
2724 binop(Iop_Shl64,
2725 getIReg64rexX(pfx,index_r), mkU8(scale))),
2726 mkU64(d))));
2728 vassert(0); /*NOTREACHED*/
2731 /* SIB, with 32-bit displacement. Special cases:
2732 -- %rsp cannot act as an index value.
2733 If index_r indicates %rsp, zero is used for the index.
2734 Denoted value is:
2735 | %index == %RSP
2736 = d32 + %base
2737 | %index != %RSP
2738 = d32 + %base + (%index << scale)
2740 case 0x14: {
2741 UChar sib = getUChar(delta);
2742 UChar scale = toUChar((sib >> 6) & 3);
2743 UChar index_r = toUChar((sib >> 3) & 7);
2744 UChar base_r = toUChar(sib & 7);
2745 Long d = getSDisp32(delta+1);
2747 if (index_r == R_RSP && 0==getRexX(pfx)) {
2748 DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
2749 d, nameIRegRexB(8,pfx,base_r));
2750 *len = 6;
2751 return disAMode_copy2tmp(
2752 handleAddrOverrides(vbi, pfx,
2753 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
2754 } else {
2755 if (scale == 0) {
2756 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
2757 nameIRegRexB(8,pfx,base_r),
2758 nameIReg64rexX(pfx,index_r));
2759 } else {
2760 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
2761 nameIRegRexB(8,pfx,base_r),
2762 nameIReg64rexX(pfx,index_r), 1<<scale);
2764 *len = 6;
2765 return
2766 disAMode_copy2tmp(
2767 handleAddrOverrides(vbi, pfx,
2768 binop(Iop_Add64,
2769 binop(Iop_Add64,
2770 getIRegRexB(8,pfx,base_r),
2771 binop(Iop_Shl64,
2772 getIReg64rexX(pfx,index_r), mkU8(scale))),
2773 mkU64(d))));
2775 vassert(0); /*NOTREACHED*/
2778 default:
2779 vpanic("disAMode(amd64)");
2780 return 0; /*notreached*/
2785 /* Similarly for VSIB addressing. This returns just the addend,
2786 and fills in *rI and *vscale with the register number of the vector
2787 index and its multiplicand. */
2788 static
2789 IRTemp disAVSIBMode ( /*OUT*/Int* len,
2790 const VexAbiInfo* vbi, Prefix pfx, Long delta,
2791 /*OUT*/HChar* buf, /*OUT*/UInt* rI,
2792 IRType ty, /*OUT*/Int* vscale )
2794 UChar mod_reg_rm = getUChar(delta);
2795 const HChar *vindex;
2797 *len = 0;
2798 *rI = 0;
2799 *vscale = 0;
2800 buf[0] = (UChar)0;
2801 if ((mod_reg_rm & 7) != 4 || epartIsReg(mod_reg_rm))
2802 return IRTemp_INVALID;
2804 UChar sib = getUChar(delta+1);
2805 UChar scale = toUChar((sib >> 6) & 3);
2806 UChar index_r = toUChar((sib >> 3) & 7);
2807 UChar base_r = toUChar(sib & 7);
2808 Long d = 0;
2809 /* correct since #(R13) == 8 + #(RBP) */
2810 Bool base_is_BPor13 = toBool(base_r == R_RBP);
2811 delta += 2;
2812 *len = 2;
2814 *rI = index_r | (getRexX(pfx) << 3);
2815 if (ty == Ity_V128)
2816 vindex = nameXMMReg(*rI);
2817 else
2818 vindex = nameYMMReg(*rI);
2819 *vscale = 1<<scale;
2821 switch (mod_reg_rm >> 6) {
2822 case 0:
2823 if (base_is_BPor13) {
2824 d = getSDisp32(delta);
2825 *len += 4;
2826 if (scale == 0) {
2827 DIS(buf, "%s%lld(,%s)", segRegTxt(pfx), d, vindex);
2828 } else {
2829 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d, vindex, 1<<scale);
2831 return disAMode_copy2tmp( mkU64(d) );
2832 } else {
2833 if (scale == 0) {
2834 DIS(buf, "%s(%s,%s)", segRegTxt(pfx),
2835 nameIRegRexB(8,pfx,base_r), vindex);
2836 } else {
2837 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx),
2838 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale);
2841 break;
2842 case 1:
2843 d = getSDisp8(delta);
2844 *len += 1;
2845 goto have_disp;
2846 case 2:
2847 d = getSDisp32(delta);
2848 *len += 4;
2849 have_disp:
2850 if (scale == 0) {
2851 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
2852 nameIRegRexB(8,pfx,base_r), vindex);
2853 } else {
2854 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
2855 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale);
2857 break;
2860 if (!d)
2861 return disAMode_copy2tmp( getIRegRexB(8,pfx,base_r) );
2862 return disAMode_copy2tmp( binop(Iop_Add64, getIRegRexB(8,pfx,base_r),
2863 mkU64(d)) );
2867 /* Figure out the number of (insn-stream) bytes constituting the amode
2868 beginning at delta. Is useful for getting hold of literals beyond
2869 the end of the amode before it has been disassembled. */
2871 static UInt lengthAMode ( Prefix pfx, Long delta )
2873 UChar mod_reg_rm = getUChar(delta);
2874 delta++;
2876 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2877 jump table seems a bit excessive.
2879 mod_reg_rm &= 0xC7; /* is now XX000YYY */
2880 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
2881 /* is now XX0XXYYY */
2882 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
2883 switch (mod_reg_rm) {
2885 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2886 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2888 case 0x00: case 0x01: case 0x02: case 0x03:
2889 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
2890 return 1;
2892 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2893 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2895 case 0x08: case 0x09: case 0x0A: case 0x0B:
2896 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
2897 return 2;
2899 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2900 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2902 case 0x10: case 0x11: case 0x12: case 0x13:
2903 /* ! 14 */ case 0x15: case 0x16: case 0x17:
2904 return 5;
2906 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2907 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2908 /* Not an address, but still handled. */
2909 case 0x18: case 0x19: case 0x1A: case 0x1B:
2910 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2911 return 1;
2913 /* RIP + disp32. */
2914 case 0x05:
2915 return 5;
2917 case 0x04: {
2918 /* SIB, with no displacement. */
2919 UChar sib = getUChar(delta);
2920 UChar base_r = toUChar(sib & 7);
2921 /* correct since #(R13) == 8 + #(RBP) */
2922 Bool base_is_BPor13 = toBool(base_r == R_RBP);
2924 if (base_is_BPor13) {
2925 return 6;
2926 } else {
2927 return 2;
2931 /* SIB, with 8-bit displacement. */
2932 case 0x0C:
2933 return 3;
2935 /* SIB, with 32-bit displacement. */
2936 case 0x14:
2937 return 6;
2939 default:
2940 vpanic("lengthAMode(amd64)");
2941 return 0; /*notreached*/
2946 /*------------------------------------------------------------*/
2947 /*--- Disassembling common idioms ---*/
2948 /*------------------------------------------------------------*/
2950 typedef
2951 enum { WithFlagNone=2, WithFlagCarry, WithFlagCarryX, WithFlagOverX }
2952 WithFlag;
2954 /* Handle binary integer instructions of the form
2955 op E, G meaning
2956 op reg-or-mem, reg
2957 Is passed the a ptr to the modRM byte, the actual operation, and the
2958 data size. Returns the address advanced completely over this
2959 instruction.
2961 E(src) is reg-or-mem
2962 G(dst) is reg.
2964 If E is reg, --> GET %G, tmp
2965 OP %E, tmp
2966 PUT tmp, %G
2968 If E is mem and OP is not reversible,
2969 --> (getAddr E) -> tmpa
2970 LD (tmpa), tmpa
2971 GET %G, tmp2
2972 OP tmpa, tmp2
2973 PUT tmp2, %G
2975 If E is mem and OP is reversible
2976 --> (getAddr E) -> tmpa
2977 LD (tmpa), tmpa
2978 OP %G, tmpa
2979 PUT tmpa, %G
2981 static
2982 ULong dis_op2_E_G ( const VexAbiInfo* vbi,
2983 Prefix pfx,
2984 IROp op8,
2985 WithFlag flag,
2986 Bool keep,
2987 Int size,
2988 Long delta0,
2989 const HChar* t_amd64opc )
2991 HChar dis_buf[50];
2992 Int len;
2993 IRType ty = szToITy(size);
2994 IRTemp dst1 = newTemp(ty);
2995 IRTemp src = newTemp(ty);
2996 IRTemp dst0 = newTemp(ty);
2997 UChar rm = getUChar(delta0);
2998 IRTemp addr = IRTemp_INVALID;
3000 /* Stay sane -- check for valid (op8, flag, keep) combinations. */
3001 switch (op8) {
3002 case Iop_Add8:
3003 switch (flag) {
3004 case WithFlagNone: case WithFlagCarry:
3005 case WithFlagCarryX: case WithFlagOverX:
3006 vassert(keep);
3007 break;
3008 default:
3009 vassert(0);
3011 break;
3012 case Iop_Sub8:
3013 vassert(flag == WithFlagNone || flag == WithFlagCarry);
3014 if (flag == WithFlagCarry) vassert(keep);
3015 break;
3016 case Iop_And8:
3017 vassert(flag == WithFlagNone);
3018 break;
3019 case Iop_Or8: case Iop_Xor8:
3020 vassert(flag == WithFlagNone);
3021 vassert(keep);
3022 break;
3023 default:
3024 vassert(0);
3027 if (epartIsReg(rm)) {
3028 /* Specially handle XOR reg,reg, because that doesn't really
3029 depend on reg, and doing the obvious thing potentially
3030 generates a spurious value check failure due to the bogus
3031 dependency. Ditto SUB/SBB reg,reg. */
3032 if ((op8 == Iop_Xor8 || ((op8 == Iop_Sub8) && keep))
3033 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
3034 putIRegG(size,pfx,rm, mkU(ty,0));
3037 assign( dst0, getIRegG(size,pfx,rm) );
3038 assign( src, getIRegE(size,pfx,rm) );
3040 if (op8 == Iop_Add8 && flag == WithFlagCarry) {
3041 helper_ADC( size, dst1, dst0, src,
3042 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3043 putIRegG(size, pfx, rm, mkexpr(dst1));
3044 } else
3045 if (op8 == Iop_Sub8 && flag == WithFlagCarry) {
3046 helper_SBB( size, dst1, dst0, src,
3047 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3048 putIRegG(size, pfx, rm, mkexpr(dst1));
3049 } else
3050 if (op8 == Iop_Add8 && flag == WithFlagCarryX) {
3051 helper_ADCX_ADOX( True/*isADCX*/, size, dst1, dst0, src );
3052 putIRegG(size, pfx, rm, mkexpr(dst1));
3053 } else
3054 if (op8 == Iop_Add8 && flag == WithFlagOverX) {
3055 helper_ADCX_ADOX( False/*!isADCX*/, size, dst1, dst0, src );
3056 putIRegG(size, pfx, rm, mkexpr(dst1));
3057 } else {
3058 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
3059 if (isAddSub(op8))
3060 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3061 else
3062 setFlags_DEP1(op8, dst1, ty);
3063 if (keep)
3064 putIRegG(size, pfx, rm, mkexpr(dst1));
3067 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
3068 nameIRegE(size,pfx,rm),
3069 nameIRegG(size,pfx,rm));
3070 return 1+delta0;
3071 } else {
3072 /* E refers to memory */
3073 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
3074 assign( dst0, getIRegG(size,pfx,rm) );
3075 assign( src, loadLE(szToITy(size), mkexpr(addr)) );
3077 if (op8 == Iop_Add8 && flag == WithFlagCarry) {
3078 helper_ADC( size, dst1, dst0, src,
3079 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3080 putIRegG(size, pfx, rm, mkexpr(dst1));
3081 } else
3082 if (op8 == Iop_Sub8 && flag == WithFlagCarry) {
3083 helper_SBB( size, dst1, dst0, src,
3084 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3085 putIRegG(size, pfx, rm, mkexpr(dst1));
3086 } else
3087 if (op8 == Iop_Add8 && flag == WithFlagCarryX) {
3088 helper_ADCX_ADOX( True/*isADCX*/, size, dst1, dst0, src );
3089 putIRegG(size, pfx, rm, mkexpr(dst1));
3090 } else
3091 if (op8 == Iop_Add8 && flag == WithFlagOverX) {
3092 helper_ADCX_ADOX( False/*!isADCX*/, size, dst1, dst0, src );
3093 putIRegG(size, pfx, rm, mkexpr(dst1));
3094 } else {
3095 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
3096 if (isAddSub(op8))
3097 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3098 else
3099 setFlags_DEP1(op8, dst1, ty);
3100 if (keep)
3101 putIRegG(size, pfx, rm, mkexpr(dst1));
3104 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
3105 dis_buf, nameIRegG(size, pfx, rm));
3106 return len+delta0;
3112 /* Handle binary integer instructions of the form
3113 op G, E meaning
3114 op reg, reg-or-mem
3115 Is passed the a ptr to the modRM byte, the actual operation, and the
3116 data size. Returns the address advanced completely over this
3117 instruction.
3119 G(src) is reg.
3120 E(dst) is reg-or-mem
3122 If E is reg, --> GET %E, tmp
3123 OP %G, tmp
3124 PUT tmp, %E
3126 If E is mem, --> (getAddr E) -> tmpa
3127 LD (tmpa), tmpv
3128 OP %G, tmpv
3129 ST tmpv, (tmpa)
3131 static
3132 ULong dis_op2_G_E ( const VexAbiInfo* vbi,
3133 Prefix pfx,
3134 IROp op8,
3135 WithFlag flag,
3136 Bool keep,
3137 Int size,
3138 Long delta0,
3139 const HChar* t_amd64opc )
3141 HChar dis_buf[50];
3142 Int len;
3143 IRType ty = szToITy(size);
3144 IRTemp dst1 = newTemp(ty);
3145 IRTemp src = newTemp(ty);
3146 IRTemp dst0 = newTemp(ty);
3147 UChar rm = getUChar(delta0);
3148 IRTemp addr = IRTemp_INVALID;
3150 /* Stay sane -- check for valid (op8, flag, keep) combinations. */
3151 switch (op8) {
3152 case Iop_Add8:
3153 vassert(flag == WithFlagNone || flag == WithFlagCarry);
3154 vassert(keep);
3155 break;
3156 case Iop_Sub8:
3157 vassert(flag == WithFlagNone || flag == WithFlagCarry);
3158 if (flag == WithFlagCarry) vassert(keep);
3159 break;
3160 case Iop_And8: case Iop_Or8: case Iop_Xor8:
3161 vassert(flag == WithFlagNone);
3162 vassert(keep);
3163 break;
3164 default:
3165 vassert(0);
3168 /* flag != WithFlagNone is only allowed for Add and Sub and indicates the
3169 intended operation is add-with-carry or subtract-with-borrow. */
3171 if (epartIsReg(rm)) {
3172 /* Specially handle XOR reg,reg, because that doesn't really
3173 depend on reg, and doing the obvious thing potentially
3174 generates a spurious value check failure due to the bogus
3175 dependency. Ditto SUB/SBB reg,reg. */
3176 if ((op8 == Iop_Xor8 || ((op8 == Iop_Sub8) && keep))
3177 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
3178 putIRegE(size,pfx,rm, mkU(ty,0));
3181 assign(dst0, getIRegE(size,pfx,rm));
3182 assign(src, getIRegG(size,pfx,rm));
3184 if (op8 == Iop_Add8 && flag == WithFlagCarry) {
3185 helper_ADC( size, dst1, dst0, src,
3186 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3187 putIRegE(size, pfx, rm, mkexpr(dst1));
3188 } else
3189 if (op8 == Iop_Sub8 && flag == WithFlagCarry) {
3190 helper_SBB( size, dst1, dst0, src,
3191 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3192 putIRegE(size, pfx, rm, mkexpr(dst1));
3193 } else {
3194 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3195 if (isAddSub(op8))
3196 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3197 else
3198 setFlags_DEP1(op8, dst1, ty);
3199 if (keep)
3200 putIRegE(size, pfx, rm, mkexpr(dst1));
3203 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
3204 nameIRegG(size,pfx,rm),
3205 nameIRegE(size,pfx,rm));
3206 return 1+delta0;
3209 /* E refers to memory */
3211 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
3212 assign(dst0, loadLE(ty,mkexpr(addr)));
3213 assign(src, getIRegG(size,pfx,rm));
3215 if (op8 == Iop_Add8 && flag == WithFlagCarry) {
3216 if (haveLOCK(pfx)) {
3217 /* cas-style store */
3218 helper_ADC( size, dst1, dst0, src,
3219 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3220 } else {
3221 /* normal store */
3222 helper_ADC( size, dst1, dst0, src,
3223 /*store*/addr, IRTemp_INVALID, 0 );
3225 } else
3226 if (op8 == Iop_Sub8 && flag == WithFlagCarry) {
3227 if (haveLOCK(pfx)) {
3228 /* cas-style store */
3229 helper_SBB( size, dst1, dst0, src,
3230 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3231 } else {
3232 /* normal store */
3233 helper_SBB( size, dst1, dst0, src,
3234 /*store*/addr, IRTemp_INVALID, 0 );
3236 } else {
3237 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3238 if (keep) {
3239 if (haveLOCK(pfx)) {
3240 if (0) vex_printf("locked case\n" );
3241 casLE( mkexpr(addr),
3242 mkexpr(dst0)/*expval*/,
3243 mkexpr(dst1)/*newval*/, guest_RIP_curr_instr );
3244 } else {
3245 if (0) vex_printf("nonlocked case\n");
3246 storeLE(mkexpr(addr), mkexpr(dst1));
3249 if (isAddSub(op8))
3250 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3251 else
3252 setFlags_DEP1(op8, dst1, ty);
3255 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
3256 nameIRegG(size,pfx,rm), dis_buf);
3257 return len+delta0;
3262 /* Handle move instructions of the form
3263 mov E, G meaning
3264 mov reg-or-mem, reg
3265 Is passed the a ptr to the modRM byte, and the data size. Returns
3266 the address advanced completely over this instruction.
3268 E(src) is reg-or-mem
3269 G(dst) is reg.
3271 If E is reg, --> GET %E, tmpv
3272 PUT tmpv, %G
3274 If E is mem --> (getAddr E) -> tmpa
3275 LD (tmpa), tmpb
3276 PUT tmpb, %G
3278 static
3279 ULong dis_mov_E_G ( const VexAbiInfo* vbi,
3280 Prefix pfx,
3281 Int size,
3282 Long delta0 )
3284 Int len;
3285 UChar rm = getUChar(delta0);
3286 HChar dis_buf[50];
3288 if (epartIsReg(rm)) {
3289 putIRegG(size, pfx, rm, getIRegE(size, pfx, rm));
3290 DIP("mov%c %s,%s\n", nameISize(size),
3291 nameIRegE(size,pfx,rm),
3292 nameIRegG(size,pfx,rm));
3293 return 1+delta0;
3296 /* E refers to memory */
3298 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
3299 putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr)));
3300 DIP("mov%c %s,%s\n", nameISize(size),
3301 dis_buf,
3302 nameIRegG(size,pfx,rm));
3303 return delta0+len;
3308 /* Handle move instructions of the form
3309 mov G, E meaning
3310 mov reg, reg-or-mem
3311 Is passed the a ptr to the modRM byte, and the data size. Returns
3312 the address advanced completely over this instruction.
3313 We have to decide here whether F2 or F3 are acceptable. F2 never is.
3315 G(src) is reg.
3316 E(dst) is reg-or-mem
3318 If E is reg, --> GET %G, tmp
3319 PUT tmp, %E
3321 If E is mem, --> (getAddr E) -> tmpa
3322 GET %G, tmpv
3323 ST tmpv, (tmpa)
3325 static
3326 ULong dis_mov_G_E ( const VexAbiInfo* vbi,
3327 Prefix pfx,
3328 Int size,
3329 Long delta0,
3330 /*OUT*/Bool* ok )
3332 Int len;
3333 UChar rm = getUChar(delta0);
3334 HChar dis_buf[50];
3336 *ok = True;
3338 if (epartIsReg(rm)) {
3339 if (haveF2orF3(pfx)) { *ok = False; return delta0; }
3340 putIRegE(size, pfx, rm, getIRegG(size, pfx, rm));
3341 DIP("mov%c %s,%s\n", nameISize(size),
3342 nameIRegG(size,pfx,rm),
3343 nameIRegE(size,pfx,rm));
3344 return 1+delta0;
3347 /* E refers to memory */
3349 if (haveF2(pfx)) { *ok = False; return delta0; }
3350 /* F3(XRELEASE) is acceptable, though. */
3351 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
3352 storeLE( mkexpr(addr), getIRegG(size, pfx, rm) );
3353 DIP("mov%c %s,%s\n", nameISize(size),
3354 nameIRegG(size,pfx,rm),
3355 dis_buf);
3356 return len+delta0;
3361 /* op $immediate, AL/AX/EAX/RAX. */
3362 static
3363 ULong dis_op_imm_A ( Int size,
3364 Bool carrying,
3365 IROp op8,
3366 Bool keep,
3367 Long delta,
3368 const HChar* t_amd64opc )
3370 Int size4 = imin(size,4);
3371 IRType ty = szToITy(size);
3372 IRTemp dst0 = newTemp(ty);
3373 IRTemp src = newTemp(ty);
3374 IRTemp dst1 = newTemp(ty);
3375 Long lit = getSDisp(size4,delta);
3376 assign(dst0, getIRegRAX(size));
3377 assign(src, mkU(ty,lit & mkSizeMask(size)));
3379 if (isAddSub(op8) && !carrying) {
3380 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
3381 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3383 else
3384 if (isLogic(op8)) {
3385 vassert(!carrying);
3386 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
3387 setFlags_DEP1(op8, dst1, ty);
3389 else
3390 if (op8 == Iop_Add8 && carrying) {
3391 helper_ADC( size, dst1, dst0, src,
3392 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3394 else
3395 if (op8 == Iop_Sub8 && carrying) {
3396 helper_SBB( size, dst1, dst0, src,
3397 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3399 else
3400 vpanic("dis_op_imm_A(amd64,guest)");
3402 if (keep)
3403 putIRegRAX(size, mkexpr(dst1));
3405 DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size),
3406 lit, nameIRegRAX(size));
3407 return delta+size4;
3411 /* Sign- and Zero-extending moves. */
3412 static
3413 ULong dis_movx_E_G ( const VexAbiInfo* vbi,
3414 Prefix pfx,
3415 Long delta, Int szs, Int szd, Bool sign_extend )
3417 UChar rm = getUChar(delta);
3418 if (epartIsReg(rm)) {
3419 putIRegG(szd, pfx, rm,
3420 doScalarWidening(
3421 szs,szd,sign_extend,
3422 getIRegE(szs,pfx,rm)));
3423 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
3424 nameISize(szs),
3425 nameISize(szd),
3426 nameIRegE(szs,pfx,rm),
3427 nameIRegG(szd,pfx,rm));
3428 return 1+delta;
3431 /* E refers to memory */
3433 Int len;
3434 HChar dis_buf[50];
3435 IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
3436 putIRegG(szd, pfx, rm,
3437 doScalarWidening(
3438 szs,szd,sign_extend,
3439 loadLE(szToITy(szs),mkexpr(addr))));
3440 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
3441 nameISize(szs),
3442 nameISize(szd),
3443 dis_buf,
3444 nameIRegG(szd,pfx,rm));
3445 return len+delta;
3450 /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by
3451 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */
3452 static
3453 void codegen_div ( Int sz, IRTemp t, Bool signed_divide )
3455 /* special-case the 64-bit case */
3456 if (sz == 8) {
3457 IROp op = signed_divide ? Iop_DivModS128to64
3458 : Iop_DivModU128to64;
3459 IRTemp src128 = newTemp(Ity_I128);
3460 IRTemp dst128 = newTemp(Ity_I128);
3461 assign( src128, binop(Iop_64HLto128,
3462 getIReg64(R_RDX),
3463 getIReg64(R_RAX)) );
3464 assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) );
3465 putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) );
3466 putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) );
3467 } else {
3468 IROp op = signed_divide ? Iop_DivModS64to32
3469 : Iop_DivModU64to32;
3470 IRTemp src64 = newTemp(Ity_I64);
3471 IRTemp dst64 = newTemp(Ity_I64);
3472 switch (sz) {
3473 case 4:
3474 assign( src64,
3475 binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) );
3476 assign( dst64,
3477 binop(op, mkexpr(src64), mkexpr(t)) );
3478 putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) );
3479 putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) );
3480 break;
3481 case 2: {
3482 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
3483 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
3484 assign( src64, unop(widen3264,
3485 binop(Iop_16HLto32,
3486 getIRegRDX(2),
3487 getIRegRAX(2))) );
3488 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) );
3489 putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) );
3490 putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) );
3491 break;
3493 case 1: {
3494 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
3495 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
3496 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16;
3497 assign( src64, unop(widen3264,
3498 unop(widen1632, getIRegRAX(2))) );
3499 assign( dst64,
3500 binop(op, mkexpr(src64),
3501 unop(widen1632, unop(widen816, mkexpr(t)))) );
3502 putIRegRAX( 1, unop(Iop_16to8,
3503 unop(Iop_32to16,
3504 unop(Iop_64to32,mkexpr(dst64)))) );
3505 putIRegAH( unop(Iop_16to8,
3506 unop(Iop_32to16,
3507 unop(Iop_64HIto32,mkexpr(dst64)))) );
3508 break;
3510 default:
3511 vpanic("codegen_div(amd64)");
3516 static
3517 ULong dis_Grp1 ( const VexAbiInfo* vbi,
3518 Prefix pfx,
3519 Long delta, UChar modrm,
3520 Int am_sz, Int d_sz, Int sz, Long d64 )
3522 Int len;
3523 HChar dis_buf[50];
3524 IRType ty = szToITy(sz);
3525 IRTemp dst1 = newTemp(ty);
3526 IRTemp src = newTemp(ty);
3527 IRTemp dst0 = newTemp(ty);
3528 IRTemp addr = IRTemp_INVALID;
3529 IROp op8 = Iop_INVALID;
3530 ULong mask = mkSizeMask(sz);
3532 switch (gregLO3ofRM(modrm)) {
3533 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break;
3534 case 2: break; // ADC
3535 case 3: break; // SBB
3536 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break;
3537 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break;
3538 /*NOTREACHED*/
3539 default: vpanic("dis_Grp1(amd64): unhandled case");
3542 if (epartIsReg(modrm)) {
3543 vassert(am_sz == 1);
3545 assign(dst0, getIRegE(sz,pfx,modrm));
3546 assign(src, mkU(ty,d64 & mask));
3548 if (gregLO3ofRM(modrm) == 2 /* ADC */) {
3549 helper_ADC( sz, dst1, dst0, src,
3550 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3551 } else
3552 if (gregLO3ofRM(modrm) == 3 /* SBB */) {
3553 helper_SBB( sz, dst1, dst0, src,
3554 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3555 } else {
3556 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3557 if (isAddSub(op8))
3558 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3559 else
3560 setFlags_DEP1(op8, dst1, ty);
3563 if (gregLO3ofRM(modrm) < 7)
3564 putIRegE(sz, pfx, modrm, mkexpr(dst1));
3566 delta += (am_sz + d_sz);
3567 DIP("%s%c $%lld, %s\n",
3568 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64,
3569 nameIRegE(sz,pfx,modrm));
3570 } else {
3571 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
3573 assign(dst0, loadLE(ty,mkexpr(addr)));
3574 assign(src, mkU(ty,d64 & mask));
3576 if (gregLO3ofRM(modrm) == 2 /* ADC */) {
3577 if (haveLOCK(pfx)) {
3578 /* cas-style store */
3579 helper_ADC( sz, dst1, dst0, src,
3580 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3581 } else {
3582 /* normal store */
3583 helper_ADC( sz, dst1, dst0, src,
3584 /*store*/addr, IRTemp_INVALID, 0 );
3586 } else
3587 if (gregLO3ofRM(modrm) == 3 /* SBB */) {
3588 if (haveLOCK(pfx)) {
3589 /* cas-style store */
3590 helper_SBB( sz, dst1, dst0, src,
3591 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3592 } else {
3593 /* normal store */
3594 helper_SBB( sz, dst1, dst0, src,
3595 /*store*/addr, IRTemp_INVALID, 0 );
3597 } else {
3598 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3599 if (gregLO3ofRM(modrm) < 7) {
3600 if (haveLOCK(pfx)) {
3601 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
3602 mkexpr(dst1)/*newVal*/,
3603 guest_RIP_curr_instr );
3604 } else {
3605 storeLE(mkexpr(addr), mkexpr(dst1));
3608 if (isAddSub(op8))
3609 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3610 else
3611 setFlags_DEP1(op8, dst1, ty);
3614 delta += (len+d_sz);
3615 DIP("%s%c $%lld, %s\n",
3616 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz),
3617 d64, dis_buf);
3619 return delta;
3623 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed
3624 expression. */
3626 static
3627 ULong dis_Grp2 ( const VexAbiInfo* vbi,
3628 Prefix pfx,
3629 Long delta, UChar modrm,
3630 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr,
3631 const HChar* shift_expr_txt, Bool* decode_OK )
3633 /* delta on entry points at the modrm byte. */
3634 HChar dis_buf[50];
3635 Int len;
3636 Bool isShift, isRotate, isRotateC;
3637 IRType ty = szToITy(sz);
3638 IRTemp dst0 = newTemp(ty);
3639 IRTemp dst1 = newTemp(ty);
3640 IRTemp addr = IRTemp_INVALID;
3642 *decode_OK = True;
3644 vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
3646 /* Put value to shift/rotate in dst0. */
3647 if (epartIsReg(modrm)) {
3648 assign(dst0, getIRegE(sz, pfx, modrm));
3649 delta += (am_sz + d_sz);
3650 } else {
3651 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
3652 assign(dst0, loadLE(ty,mkexpr(addr)));
3653 delta += len + d_sz;
3656 isShift = False;
3657 switch (gregLO3ofRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; }
3659 isRotate = False;
3660 switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; }
3662 isRotateC = False;
3663 switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; }
3665 if (!isShift && !isRotate && !isRotateC) {
3666 /*NOTREACHED*/
3667 vpanic("dis_Grp2(Reg): unhandled case(amd64)");
3670 if (isRotateC) {
3671 /* Call a helper; this insn is so ridiculous it does not deserve
3672 better. One problem is, the helper has to calculate both the
3673 new value and the new flags. This is more than 64 bits, and
3674 there is no way to return more than 64 bits from the helper.
3675 Hence the crude and obvious solution is to call it twice,
3676 using the sign of the sz field to indicate whether it is the
3677 value or rflags result we want.
3679 Bool left = toBool(gregLO3ofRM(modrm) == 2);
3680 IRExpr** argsVALUE;
3681 IRExpr** argsRFLAGS;
3683 IRTemp new_value = newTemp(Ity_I64);
3684 IRTemp new_rflags = newTemp(Ity_I64);
3685 IRTemp old_rflags = newTemp(Ity_I64);
3687 assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) );
3689 argsVALUE
3690 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
3691 widenUto64(shift_expr), /* rotate amount */
3692 mkexpr(old_rflags),
3693 mkU64(sz) );
3694 assign( new_value,
3695 mkIRExprCCall(
3696 Ity_I64,
3697 0/*regparm*/,
3698 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3699 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR,
3700 argsVALUE
3704 argsRFLAGS
3705 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
3706 widenUto64(shift_expr), /* rotate amount */
3707 mkexpr(old_rflags),
3708 mkU64(-sz) );
3709 assign( new_rflags,
3710 mkIRExprCCall(
3711 Ity_I64,
3712 0/*regparm*/,
3713 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3714 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR,
3715 argsRFLAGS
3719 assign( dst1, narrowTo(ty, mkexpr(new_value)) );
3720 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
3721 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) ));
3722 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
3723 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
3726 else
3727 if (isShift) {
3729 IRTemp pre64 = newTemp(Ity_I64);
3730 IRTemp res64 = newTemp(Ity_I64);
3731 IRTemp res64ss = newTemp(Ity_I64);
3732 IRTemp shift_amt = newTemp(Ity_I8);
3733 UChar mask = toUChar(sz==8 ? 63 : 31);
3734 IROp op64;
3736 switch (gregLO3ofRM(modrm)) {
3737 case 4: op64 = Iop_Shl64; break;
3738 case 5: op64 = Iop_Shr64; break;
3739 case 6: op64 = Iop_Shl64; break;
3740 case 7: op64 = Iop_Sar64; break;
3741 /*NOTREACHED*/
3742 default: vpanic("dis_Grp2:shift"); break;
3745 /* Widen the value to be shifted to 64 bits, do the shift, and
3746 narrow back down. This seems surprisingly long-winded, but
3747 unfortunately the AMD semantics requires that 8/16/32-bit
3748 shifts give defined results for shift values all the way up
3749 to 32, and this seems the simplest way to do it. It has the
3750 advantage that the only IR level shifts generated are of 64
3751 bit values, and the shift amount is guaranteed to be in the
3752 range 0 .. 63, thereby observing the IR semantics requiring
3753 all shift values to be in the range 0 .. 2^word_size-1.
3755 Therefore the shift amount is masked with 63 for 64-bit shifts
3756 and 31 for all others.
3758 /* shift_amt = shift_expr & MASK, regardless of operation size */
3759 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) );
3761 /* suitably widen the value to be shifted to 64 bits. */
3762 assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0))
3763 : widenUto64(mkexpr(dst0)) );
3765 /* res64 = pre64 `shift` shift_amt */
3766 assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) );
3768 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */
3769 assign( res64ss,
3770 binop(op64,
3771 mkexpr(pre64),
3772 binop(Iop_And8,
3773 binop(Iop_Sub8,
3774 mkexpr(shift_amt), mkU8(1)),
3775 mkU8(mask))) );
3777 /* Build the flags thunk. */
3778 setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt);
3780 /* Narrow the result back down. */
3781 assign( dst1, narrowTo(ty, mkexpr(res64)) );
3783 } /* if (isShift) */
3785 else
3786 if (isRotate) {
3787 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1
3788 : (ty==Ity_I32 ? 2 : 3));
3789 Bool left = toBool(gregLO3ofRM(modrm) == 0);
3790 IRTemp rot_amt = newTemp(Ity_I8);
3791 IRTemp rot_amt64 = newTemp(Ity_I8);
3792 IRTemp oldFlags = newTemp(Ity_I64);
3793 UChar mask = toUChar(sz==8 ? 63 : 31);
3795 /* rot_amt = shift_expr & mask */
3796 /* By masking the rotate amount thusly, the IR-level Shl/Shr
3797 expressions never shift beyond the word size and thus remain
3798 well defined. */
3799 assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask)));
3801 if (ty == Ity_I64)
3802 assign(rot_amt, mkexpr(rot_amt64));
3803 else
3804 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1)));
3806 if (left) {
3808 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
3809 assign(dst1,
3810 binop( mkSizedOp(ty,Iop_Or8),
3811 binop( mkSizedOp(ty,Iop_Shl8),
3812 mkexpr(dst0),
3813 mkexpr(rot_amt)
3815 binop( mkSizedOp(ty,Iop_Shr8),
3816 mkexpr(dst0),
3817 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
3821 ccOp += AMD64G_CC_OP_ROLB;
3823 } else { /* right */
3825 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
3826 assign(dst1,
3827 binop( mkSizedOp(ty,Iop_Or8),
3828 binop( mkSizedOp(ty,Iop_Shr8),
3829 mkexpr(dst0),
3830 mkexpr(rot_amt)
3832 binop( mkSizedOp(ty,Iop_Shl8),
3833 mkexpr(dst0),
3834 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
3838 ccOp += AMD64G_CC_OP_RORB;
3842 /* dst1 now holds the rotated value. Build flag thunk. We
3843 need the resulting value for this, and the previous flags.
3844 Except don't set it if the rotate count is zero. */
3846 assign(oldFlags, mk_amd64g_calculate_rflags_all());
3848 /* rot_amt64 :: Ity_I8. We need to convert it to I1. */
3849 IRTemp rot_amt64b = newTemp(Ity_I1);
3850 assign(rot_amt64b, binop(Iop_CmpNE8, mkexpr(rot_amt64), mkU8(0)) );
3852 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
3853 stmt( IRStmt_Put( OFFB_CC_OP,
3854 IRExpr_ITE( mkexpr(rot_amt64b),
3855 mkU64(ccOp),
3856 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) ));
3857 stmt( IRStmt_Put( OFFB_CC_DEP1,
3858 IRExpr_ITE( mkexpr(rot_amt64b),
3859 widenUto64(mkexpr(dst1)),
3860 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) ));
3861 stmt( IRStmt_Put( OFFB_CC_DEP2,
3862 IRExpr_ITE( mkexpr(rot_amt64b),
3863 mkU64(0),
3864 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) ));
3865 stmt( IRStmt_Put( OFFB_CC_NDEP,
3866 IRExpr_ITE( mkexpr(rot_amt64b),
3867 mkexpr(oldFlags),
3868 IRExpr_Get(OFFB_CC_NDEP,Ity_I64) ) ));
3869 } /* if (isRotate) */
3871 /* Save result, and finish up. */
3872 if (epartIsReg(modrm)) {
3873 putIRegE(sz, pfx, modrm, mkexpr(dst1));
3874 if (vex_traceflags & VEX_TRACE_FE) {
3875 vex_printf("%s%c ",
3876 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
3877 if (shift_expr_txt)
3878 vex_printf("%s", shift_expr_txt);
3879 else
3880 ppIRExpr(shift_expr);
3881 vex_printf(", %s\n", nameIRegE(sz,pfx,modrm));
3883 } else {
3884 storeLE(mkexpr(addr), mkexpr(dst1));
3885 if (vex_traceflags & VEX_TRACE_FE) {
3886 vex_printf("%s%c ",
3887 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
3888 if (shift_expr_txt)
3889 vex_printf("%s", shift_expr_txt);
3890 else
3891 ppIRExpr(shift_expr);
3892 vex_printf(", %s\n", dis_buf);
3895 return delta;
3899 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
3900 static
3901 ULong dis_Grp8_Imm ( const VexAbiInfo* vbi,
3902 Prefix pfx,
3903 Long delta, UChar modrm,
3904 Int am_sz, Int sz, ULong src_val,
3905 Bool* decode_OK )
3907 /* src_val denotes a d8.
3908 And delta on entry points at the modrm byte. */
3910 IRType ty = szToITy(sz);
3911 IRTemp t2 = newTemp(Ity_I64);
3912 IRTemp t2m = newTemp(Ity_I64);
3913 IRTemp t_addr = IRTemp_INVALID;
3914 HChar dis_buf[50];
3915 ULong mask;
3917 /* we're optimists :-) */
3918 *decode_OK = True;
3920 /* Check whether F2 or F3 are acceptable. */
3921 if (epartIsReg(modrm)) {
3922 /* F2 or F3 are not allowed in the register case. */
3923 if (haveF2orF3(pfx)) {
3924 *decode_OK = False;
3925 return delta;
3927 } else {
3928 /* F2 or F3 (but not both) are allowable provided LOCK is also
3929 present. */
3930 if (haveF2orF3(pfx)) {
3931 if (haveF2andF3(pfx) || !haveLOCK(pfx)) {
3932 *decode_OK = False;
3933 return delta;
3938 /* Limit src_val -- the bit offset -- to something within a word.
3939 The Intel docs say that literal offsets larger than a word are
3940 masked in this way. */
3941 switch (sz) {
3942 case 2: src_val &= 15; break;
3943 case 4: src_val &= 31; break;
3944 case 8: src_val &= 63; break;
3945 default: *decode_OK = False; return delta;
3948 /* Invent a mask suitable for the operation. */
3949 switch (gregLO3ofRM(modrm)) {
3950 case 4: /* BT */ mask = 0; break;
3951 case 5: /* BTS */ mask = 1ULL << src_val; break;
3952 case 6: /* BTR */ mask = ~(1ULL << src_val); break;
3953 case 7: /* BTC */ mask = 1ULL << src_val; break;
3954 /* If this needs to be extended, probably simplest to make a
3955 new function to handle the other cases (0 .. 3). The
3956 Intel docs do however not indicate any use for 0 .. 3, so
3957 we don't expect this to happen. */
3958 default: *decode_OK = False; return delta;
3961 /* Fetch the value to be tested and modified into t2, which is
3962 64-bits wide regardless of sz. */
3963 if (epartIsReg(modrm)) {
3964 vassert(am_sz == 1);
3965 assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) );
3966 delta += (am_sz + 1);
3967 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
3968 nameISize(sz),
3969 src_val, nameIRegE(sz,pfx,modrm));
3970 } else {
3971 Int len;
3972 t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 );
3973 delta += (len+1);
3974 assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) );
3975 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
3976 nameISize(sz),
3977 src_val, dis_buf);
3980 /* Compute the new value into t2m, if non-BT. */
3981 switch (gregLO3ofRM(modrm)) {
3982 case 4: /* BT */
3983 break;
3984 case 5: /* BTS */
3985 assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) );
3986 break;
3987 case 6: /* BTR */
3988 assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) );
3989 break;
3990 case 7: /* BTC */
3991 assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) );
3992 break;
3993 default:
3994 /*NOTREACHED*/ /*the previous switch guards this*/
3995 vassert(0);
3998 /* Write the result back, if non-BT. */
3999 if (gregLO3ofRM(modrm) != 4 /* BT */) {
4000 if (epartIsReg(modrm)) {
4001 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m)));
4002 } else {
4003 if (haveLOCK(pfx)) {
4004 casLE( mkexpr(t_addr),
4005 narrowTo(ty, mkexpr(t2))/*expd*/,
4006 narrowTo(ty, mkexpr(t2m))/*new*/,
4007 guest_RIP_curr_instr );
4008 } else {
4009 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
4014 /* Copy relevant bit from t2 into the carry flag. */
4015 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
4016 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
4017 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
4018 stmt( IRStmt_Put(
4019 OFFB_CC_DEP1,
4020 binop(Iop_And64,
4021 binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)),
4022 mkU64(1))
4024 /* Set NDEP even though it isn't used. This makes redundant-PUT
4025 elimination of previous stores to this field work better. */
4026 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
4028 return delta;
4032 /* Signed/unsigned widening multiply. Generate IR to multiply the
4033 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in
4034 RDX:RAX/EDX:EAX/DX:AX/AX.
4036 static void codegen_mulL_A_D ( Int sz, Bool syned,
4037 IRTemp tmp, const HChar* tmp_txt )
4039 IRType ty = szToITy(sz);
4040 IRTemp t1 = newTemp(ty);
4042 assign( t1, getIRegRAX(sz) );
4044 switch (ty) {
4045 case Ity_I64: {
4046 IRTemp res128 = newTemp(Ity_I128);
4047 IRTemp resHi = newTemp(Ity_I64);
4048 IRTemp resLo = newTemp(Ity_I64);
4049 IROp mulOp = syned ? Iop_MullS64 : Iop_MullU64;
4050 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
4051 setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp );
4052 assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
4053 assign( resHi, unop(Iop_128HIto64,mkexpr(res128)));
4054 assign( resLo, unop(Iop_128to64,mkexpr(res128)));
4055 putIReg64(R_RDX, mkexpr(resHi));
4056 putIReg64(R_RAX, mkexpr(resLo));
4057 break;
4059 case Ity_I32: {
4060 IRTemp res64 = newTemp(Ity_I64);
4061 IRTemp resHi = newTemp(Ity_I32);
4062 IRTemp resLo = newTemp(Ity_I32);
4063 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32;
4064 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
4065 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp );
4066 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
4067 assign( resHi, unop(Iop_64HIto32,mkexpr(res64)));
4068 assign( resLo, unop(Iop_64to32,mkexpr(res64)));
4069 putIRegRDX(4, mkexpr(resHi));
4070 putIRegRAX(4, mkexpr(resLo));
4071 break;
4073 case Ity_I16: {
4074 IRTemp res32 = newTemp(Ity_I32);
4075 IRTemp resHi = newTemp(Ity_I16);
4076 IRTemp resLo = newTemp(Ity_I16);
4077 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16;
4078 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
4079 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp );
4080 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
4081 assign( resHi, unop(Iop_32HIto16,mkexpr(res32)));
4082 assign( resLo, unop(Iop_32to16,mkexpr(res32)));
4083 putIRegRDX(2, mkexpr(resHi));
4084 putIRegRAX(2, mkexpr(resLo));
4085 break;
4087 case Ity_I8: {
4088 IRTemp res16 = newTemp(Ity_I16);
4089 IRTemp resHi = newTemp(Ity_I8);
4090 IRTemp resLo = newTemp(Ity_I8);
4091 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8;
4092 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
4093 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp );
4094 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
4095 assign( resHi, unop(Iop_16HIto8,mkexpr(res16)));
4096 assign( resLo, unop(Iop_16to8,mkexpr(res16)));
4097 putIRegRAX(2, mkexpr(res16));
4098 break;
4100 default:
4101 ppIRType(ty);
4102 vpanic("codegen_mulL_A_D(amd64)");
4104 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt);
4108 /* Group 3 extended opcodes. We have to decide here whether F2 and F3
4109 might be valid.*/
4110 static
4111 ULong dis_Grp3 ( const VexAbiInfo* vbi,
4112 Prefix pfx, Int sz, Long delta, Bool* decode_OK )
4114 Long d64;
4115 UChar modrm;
4116 HChar dis_buf[50];
4117 Int len;
4118 IRTemp addr;
4119 IRType ty = szToITy(sz);
4120 IRTemp t1 = newTemp(ty);
4121 IRTemp dst1, src, dst0;
4122 *decode_OK = True;
4123 modrm = getUChar(delta);
4124 if (epartIsReg(modrm)) {
4125 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */
4126 if (haveF2orF3(pfx)) goto unhandled;
4127 switch (gregLO3ofRM(modrm)) {
4128 case 0: { /* TEST */
4129 delta++;
4130 d64 = getSDisp(imin(4,sz), delta);
4131 delta += imin(4,sz);
4132 dst1 = newTemp(ty);
4133 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
4134 getIRegE(sz,pfx,modrm),
4135 mkU(ty, d64 & mkSizeMask(sz))));
4136 setFlags_DEP1( Iop_And8, dst1, ty );
4137 DIP("test%c $%lld, %s\n",
4138 nameISize(sz), d64,
4139 nameIRegE(sz, pfx, modrm));
4140 break;
4142 case 1:
4143 *decode_OK = False;
4144 return delta;
4145 case 2: /* NOT */
4146 delta++;
4147 putIRegE(sz, pfx, modrm,
4148 unop(mkSizedOp(ty,Iop_Not8),
4149 getIRegE(sz, pfx, modrm)));
4150 DIP("not%c %s\n", nameISize(sz),
4151 nameIRegE(sz, pfx, modrm));
4152 break;
4153 case 3: /* NEG */
4154 delta++;
4155 dst0 = newTemp(ty);
4156 src = newTemp(ty);
4157 dst1 = newTemp(ty);
4158 assign(dst0, mkU(ty,0));
4159 assign(src, getIRegE(sz, pfx, modrm));
4160 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
4161 mkexpr(src)));
4162 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
4163 putIRegE(sz, pfx, modrm, mkexpr(dst1));
4164 DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm));
4165 break;
4166 case 4: /* MUL (unsigned widening) */
4167 delta++;
4168 src = newTemp(ty);
4169 assign(src, getIRegE(sz,pfx,modrm));
4170 codegen_mulL_A_D ( sz, False, src,
4171 nameIRegE(sz,pfx,modrm) );
4172 break;
4173 case 5: /* IMUL (signed widening) */
4174 delta++;
4175 src = newTemp(ty);
4176 assign(src, getIRegE(sz,pfx,modrm));
4177 codegen_mulL_A_D ( sz, True, src,
4178 nameIRegE(sz,pfx,modrm) );
4179 break;
4180 case 6: /* DIV */
4181 delta++;
4182 assign( t1, getIRegE(sz, pfx, modrm) );
4183 codegen_div ( sz, t1, False );
4184 DIP("div%c %s\n", nameISize(sz),
4185 nameIRegE(sz, pfx, modrm));
4186 break;
4187 case 7: /* IDIV */
4188 delta++;
4189 assign( t1, getIRegE(sz, pfx, modrm) );
4190 codegen_div ( sz, t1, True );
4191 DIP("idiv%c %s\n", nameISize(sz),
4192 nameIRegE(sz, pfx, modrm));
4193 break;
4194 default:
4195 /*NOTREACHED*/
4196 vpanic("Grp3(amd64,R)");
4198 } else {
4199 /* Decide if F2/XACQ or F3/XREL might be valid. */
4200 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
4201 if ((gregLO3ofRM(modrm) == 3/*NEG*/ || gregLO3ofRM(modrm) == 2/*NOT*/)
4202 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
4203 validF2orF3 = True;
4205 if (!validF2orF3) goto unhandled;
4206 /* */
4207 addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
4208 /* we have to inform disAMode of any immediate
4209 bytes used */
4210 gregLO3ofRM(modrm)==0/*TEST*/
4211 ? imin(4,sz)
4214 t1 = newTemp(ty);
4215 delta += len;
4216 assign(t1, loadLE(ty,mkexpr(addr)));
4217 switch (gregLO3ofRM(modrm)) {
4218 case 0: { /* TEST */
4219 d64 = getSDisp(imin(4,sz), delta);
4220 delta += imin(4,sz);
4221 dst1 = newTemp(ty);
4222 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
4223 mkexpr(t1),
4224 mkU(ty, d64 & mkSizeMask(sz))));
4225 setFlags_DEP1( Iop_And8, dst1, ty );
4226 DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf);
4227 break;
4229 case 1:
4230 *decode_OK = False;
4231 return delta;
4232 case 2: /* NOT */
4233 dst1 = newTemp(ty);
4234 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
4235 if (haveLOCK(pfx)) {
4236 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
4237 guest_RIP_curr_instr );
4238 } else {
4239 storeLE( mkexpr(addr), mkexpr(dst1) );
4241 DIP("not%c %s\n", nameISize(sz), dis_buf);
4242 break;
4243 case 3: /* NEG */
4244 dst0 = newTemp(ty);
4245 src = newTemp(ty);
4246 dst1 = newTemp(ty);
4247 assign(dst0, mkU(ty,0));
4248 assign(src, mkexpr(t1));
4249 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
4250 mkexpr(src)));
4251 if (haveLOCK(pfx)) {
4252 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
4253 guest_RIP_curr_instr );
4254 } else {
4255 storeLE( mkexpr(addr), mkexpr(dst1) );
4257 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
4258 DIP("neg%c %s\n", nameISize(sz), dis_buf);
4259 break;
4260 case 4: /* MUL (unsigned widening) */
4261 codegen_mulL_A_D ( sz, False, t1, dis_buf );
4262 break;
4263 case 5: /* IMUL */
4264 codegen_mulL_A_D ( sz, True, t1, dis_buf );
4265 break;
4266 case 6: /* DIV */
4267 codegen_div ( sz, t1, False );
4268 DIP("div%c %s\n", nameISize(sz), dis_buf);
4269 break;
4270 case 7: /* IDIV */
4271 codegen_div ( sz, t1, True );
4272 DIP("idiv%c %s\n", nameISize(sz), dis_buf);
4273 break;
4274 default:
4275 /*NOTREACHED*/
4276 vpanic("Grp3(amd64,M)");
4279 return delta;
4280 unhandled:
4281 *decode_OK = False;
4282 return delta;
4286 /* Group 4 extended opcodes. We have to decide here whether F2 and F3
4287 might be valid. */
4288 static
4289 ULong dis_Grp4 ( const VexAbiInfo* vbi,
4290 Prefix pfx, Long delta, Bool* decode_OK )
4292 Int alen;
4293 UChar modrm;
4294 HChar dis_buf[50];
4295 IRType ty = Ity_I8;
4296 IRTemp t1 = newTemp(ty);
4297 IRTemp t2 = newTemp(ty);
4299 *decode_OK = True;
4301 modrm = getUChar(delta);
4302 if (epartIsReg(modrm)) {
4303 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */
4304 if (haveF2orF3(pfx)) goto unhandled;
4305 assign(t1, getIRegE(1, pfx, modrm));
4306 switch (gregLO3ofRM(modrm)) {
4307 case 0: /* INC */
4308 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
4309 putIRegE(1, pfx, modrm, mkexpr(t2));
4310 setFlags_INC_DEC( True, t2, ty );
4311 break;
4312 case 1: /* DEC */
4313 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
4314 putIRegE(1, pfx, modrm, mkexpr(t2));
4315 setFlags_INC_DEC( False, t2, ty );
4316 break;
4317 default:
4318 *decode_OK = False;
4319 return delta;
4321 delta++;
4322 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)),
4323 nameIRegE(1, pfx, modrm));
4324 } else {
4325 /* Decide if F2/XACQ or F3/XREL might be valid. */
4326 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
4327 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/)
4328 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
4329 validF2orF3 = True;
4331 if (!validF2orF3) goto unhandled;
4332 /* */
4333 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
4334 assign( t1, loadLE(ty, mkexpr(addr)) );
4335 switch (gregLO3ofRM(modrm)) {
4336 case 0: /* INC */
4337 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
4338 if (haveLOCK(pfx)) {
4339 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
4340 guest_RIP_curr_instr );
4341 } else {
4342 storeLE( mkexpr(addr), mkexpr(t2) );
4344 setFlags_INC_DEC( True, t2, ty );
4345 break;
4346 case 1: /* DEC */
4347 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
4348 if (haveLOCK(pfx)) {
4349 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
4350 guest_RIP_curr_instr );
4351 } else {
4352 storeLE( mkexpr(addr), mkexpr(t2) );
4354 setFlags_INC_DEC( False, t2, ty );
4355 break;
4356 default:
4357 *decode_OK = False;
4358 return delta;
4360 delta += alen;
4361 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf);
4363 return delta;
4364 unhandled:
4365 *decode_OK = False;
4366 return delta;
4370 /* Group 5 extended opcodes. We have to decide here whether F2 and F3
4371 might be valid. */
4372 static
4373 ULong dis_Grp5 ( const VexAbiInfo* vbi,
4374 Prefix pfx, Int sz, Long delta,
4375 /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK )
4377 Int len;
4378 UChar modrm;
4379 HChar dis_buf[50];
4380 IRTemp addr = IRTemp_INVALID;
4381 IRType ty = szToITy(sz);
4382 IRTemp t1 = newTemp(ty);
4383 IRTemp t2 = IRTemp_INVALID;
4384 IRTemp t3 = IRTemp_INVALID;
4385 Bool showSz = True;
4387 *decode_OK = True;
4389 modrm = getUChar(delta);
4390 if (epartIsReg(modrm)) {
4391 /* F2/XACQ and F3/XREL are always invalid in the non-mem case.
4392 F2/CALL and F2/JMP may have bnd prefix. */
4393 if (haveF2orF3(pfx)
4394 && ! (haveF2(pfx)
4395 && (gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4)))
4396 goto unhandledR;
4397 assign(t1, getIRegE(sz,pfx,modrm));
4398 switch (gregLO3ofRM(modrm)) {
4399 case 0: /* INC */
4400 t2 = newTemp(ty);
4401 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
4402 mkexpr(t1), mkU(ty,1)));
4403 setFlags_INC_DEC( True, t2, ty );
4404 putIRegE(sz,pfx,modrm, mkexpr(t2));
4405 break;
4406 case 1: /* DEC */
4407 t2 = newTemp(ty);
4408 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
4409 mkexpr(t1), mkU(ty,1)));
4410 setFlags_INC_DEC( False, t2, ty );
4411 putIRegE(sz,pfx,modrm, mkexpr(t2));
4412 break;
4413 case 2: /* call Ev */
4414 /* Ignore any sz value and operate as if sz==8. */
4415 if (!(sz == 4 || sz == 8)) goto unhandledR;
4416 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
4417 sz = 8;
4418 t3 = newTemp(Ity_I64);
4419 assign(t3, getIRegE(sz,pfx,modrm));
4420 t2 = newTemp(Ity_I64);
4421 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
4422 putIReg64(R_RSP, mkexpr(t2));
4423 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1));
4424 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)");
4425 jmp_treg(dres, Ijk_Call, t3);
4426 vassert(dres->whatNext == Dis_StopHere);
4427 showSz = False;
4428 break;
4429 case 4: /* jmp Ev */
4430 /* Ignore any sz value and operate as if sz==8. */
4431 if (!(sz == 4 || sz == 8)) goto unhandledR;
4432 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
4433 sz = 8;
4434 t3 = newTemp(Ity_I64);
4435 assign(t3, getIRegE(sz,pfx,modrm));
4436 jmp_treg(dres, Ijk_Boring, t3);
4437 vassert(dres->whatNext == Dis_StopHere);
4438 showSz = False;
4439 break;
4440 case 6: /* PUSH Ev */
4441 /* There is no encoding for 32-bit operand size; hence ... */
4442 if (sz == 4) sz = 8;
4443 if (sz == 8 || sz == 2) {
4444 ty = szToITy(sz); /* redo it, since sz might have changed */
4445 t3 = newTemp(ty);
4446 assign(t3, getIRegE(sz,pfx,modrm));
4447 t2 = newTemp(Ity_I64);
4448 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
4449 putIReg64(R_RSP, mkexpr(t2) );
4450 storeLE( mkexpr(t2), mkexpr(t3) );
4451 break;
4452 } else {
4453 goto unhandledR; /* awaiting test case */
4455 default:
4456 unhandledR:
4457 *decode_OK = False;
4458 return delta;
4460 delta++;
4461 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
4462 showSz ? nameISize(sz) : ' ',
4463 nameIRegE(sz, pfx, modrm));
4464 } else {
4465 /* Decide if F2/XACQ, F3/XREL, F2/CALL or F2/JMP might be valid. */
4466 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
4467 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/)
4468 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
4469 validF2orF3 = True;
4470 } else if ((gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4)
4471 && (haveF2(pfx) && !haveF3(pfx))) {
4472 validF2orF3 = True;
4474 if (!validF2orF3) goto unhandledM;
4475 /* */
4476 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
4477 if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4
4478 && gregLO3ofRM(modrm) != 6) {
4479 assign(t1, loadLE(ty,mkexpr(addr)));
4481 switch (gregLO3ofRM(modrm)) {
4482 case 0: /* INC */
4483 t2 = newTemp(ty);
4484 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
4485 mkexpr(t1), mkU(ty,1)));
4486 if (haveLOCK(pfx)) {
4487 casLE( mkexpr(addr),
4488 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
4489 } else {
4490 storeLE(mkexpr(addr),mkexpr(t2));
4492 setFlags_INC_DEC( True, t2, ty );
4493 break;
4494 case 1: /* DEC */
4495 t2 = newTemp(ty);
4496 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
4497 mkexpr(t1), mkU(ty,1)));
4498 if (haveLOCK(pfx)) {
4499 casLE( mkexpr(addr),
4500 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
4501 } else {
4502 storeLE(mkexpr(addr),mkexpr(t2));
4504 setFlags_INC_DEC( False, t2, ty );
4505 break;
4506 case 2: /* call Ev */
4507 /* Ignore any sz value and operate as if sz==8. */
4508 if (!(sz == 4 || sz == 8)) goto unhandledM;
4509 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
4510 sz = 8;
4511 t3 = newTemp(Ity_I64);
4512 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
4513 t2 = newTemp(Ity_I64);
4514 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
4515 putIReg64(R_RSP, mkexpr(t2));
4516 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len));
4517 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)");
4518 jmp_treg(dres, Ijk_Call, t3);
4519 vassert(dres->whatNext == Dis_StopHere);
4520 showSz = False;
4521 break;
4522 case 4: /* JMP Ev */
4523 /* Ignore any sz value and operate as if sz==8. */
4524 if (!(sz == 4 || sz == 8)) goto unhandledM;
4525 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
4526 sz = 8;
4527 t3 = newTemp(Ity_I64);
4528 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
4529 jmp_treg(dres, Ijk_Boring, t3);
4530 vassert(dres->whatNext == Dis_StopHere);
4531 showSz = False;
4532 break;
4533 case 6: /* PUSH Ev */
4534 /* There is no encoding for 32-bit operand size; hence ... */
4535 if (sz == 4) sz = 8;
4536 if (sz == 8 || sz == 2) {
4537 ty = szToITy(sz); /* redo it, since sz might have changed */
4538 t3 = newTemp(ty);
4539 assign(t3, loadLE(ty,mkexpr(addr)));
4540 t2 = newTemp(Ity_I64);
4541 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
4542 putIReg64(R_RSP, mkexpr(t2) );
4543 storeLE( mkexpr(t2), mkexpr(t3) );
4544 break;
4545 } else {
4546 goto unhandledM; /* awaiting test case */
4548 default:
4549 unhandledM:
4550 *decode_OK = False;
4551 return delta;
4553 delta += len;
4554 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
4555 showSz ? nameISize(sz) : ' ',
4556 dis_buf);
4558 return delta;
4562 /*------------------------------------------------------------*/
4563 /*--- Disassembling string ops (including REP prefixes) ---*/
4564 /*------------------------------------------------------------*/
4566 /* Code shared by all the string ops */
4567 static
4568 void dis_string_op_increment ( Int sz, IRTemp t_inc )
4570 UChar logSz;
4571 if (sz == 8 || sz == 4 || sz == 2) {
4572 logSz = 1;
4573 if (sz == 4) logSz = 2;
4574 if (sz == 8) logSz = 3;
4575 assign( t_inc,
4576 binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ),
4577 mkU8(logSz) ) );
4578 } else {
4579 assign( t_inc,
4580 IRExpr_Get( OFFB_DFLAG, Ity_I64 ) );
4584 static
4585 void dis_string_op( void (*dis_OP)( Int, IRTemp, Prefix pfx ),
4586 Int sz, const HChar* name, Prefix pfx )
4588 IRTemp t_inc = newTemp(Ity_I64);
4589 /* Really we ought to inspect the override prefixes, but we don't.
4590 The following assertion catches any resulting sillyness. */
4591 vassert(pfx == clearSegBits(pfx));
4592 dis_string_op_increment(sz, t_inc);
4593 dis_OP( sz, t_inc, pfx );
4594 DIP("%s%c\n", name, nameISize(sz));
4597 static
4598 void dis_MOVS ( Int sz, IRTemp t_inc, Prefix pfx )
4600 IRType ty = szToITy(sz);
4601 IRTemp td = newTemp(Ity_I64); /* RDI */
4602 IRTemp ts = newTemp(Ity_I64); /* RSI */
4603 IRExpr *incd, *incs;
4605 if (haveASO(pfx)) {
4606 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4607 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4608 } else {
4609 assign( td, getIReg64(R_RDI) );
4610 assign( ts, getIReg64(R_RSI) );
4613 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) );
4615 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4616 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4617 if (haveASO(pfx)) {
4618 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4619 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4621 putIReg64( R_RDI, incd );
4622 putIReg64( R_RSI, incs );
4625 static
4626 void dis_LODS ( Int sz, IRTemp t_inc, Prefix pfx )
4628 IRType ty = szToITy(sz);
4629 IRTemp ts = newTemp(Ity_I64); /* RSI */
4630 IRExpr *incs;
4632 if (haveASO(pfx))
4633 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4634 else
4635 assign( ts, getIReg64(R_RSI) );
4637 putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) );
4639 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4640 if (haveASO(pfx))
4641 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4642 putIReg64( R_RSI, incs );
4645 static
4646 void dis_STOS ( Int sz, IRTemp t_inc, Prefix pfx )
4648 IRType ty = szToITy(sz);
4649 IRTemp ta = newTemp(ty); /* rAX */
4650 IRTemp td = newTemp(Ity_I64); /* RDI */
4651 IRExpr *incd;
4653 assign( ta, getIRegRAX(sz) );
4655 if (haveASO(pfx))
4656 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4657 else
4658 assign( td, getIReg64(R_RDI) );
4660 storeLE( mkexpr(td), mkexpr(ta) );
4662 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4663 if (haveASO(pfx))
4664 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4665 putIReg64( R_RDI, incd );
4668 static
4669 void dis_CMPS ( Int sz, IRTemp t_inc, Prefix pfx )
4671 IRType ty = szToITy(sz);
4672 IRTemp tdv = newTemp(ty); /* (RDI) */
4673 IRTemp tsv = newTemp(ty); /* (RSI) */
4674 IRTemp td = newTemp(Ity_I64); /* RDI */
4675 IRTemp ts = newTemp(Ity_I64); /* RSI */
4676 IRExpr *incd, *incs;
4678 if (haveASO(pfx)) {
4679 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4680 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4681 } else {
4682 assign( td, getIReg64(R_RDI) );
4683 assign( ts, getIReg64(R_RSI) );
4686 assign( tdv, loadLE(ty,mkexpr(td)) );
4688 assign( tsv, loadLE(ty,mkexpr(ts)) );
4690 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty );
4692 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4693 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4694 if (haveASO(pfx)) {
4695 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4696 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4698 putIReg64( R_RDI, incd );
4699 putIReg64( R_RSI, incs );
4702 static
4703 void dis_SCAS ( Int sz, IRTemp t_inc, Prefix pfx )
4705 IRType ty = szToITy(sz);
4706 IRTemp ta = newTemp(ty); /* rAX */
4707 IRTemp td = newTemp(Ity_I64); /* RDI */
4708 IRTemp tdv = newTemp(ty); /* (RDI) */
4709 IRExpr *incd;
4711 assign( ta, getIRegRAX(sz) );
4713 if (haveASO(pfx))
4714 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4715 else
4716 assign( td, getIReg64(R_RDI) );
4718 assign( tdv, loadLE(ty,mkexpr(td)) );
4720 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty );
4722 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4723 if (haveASO(pfx))
4724 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4725 putIReg64( R_RDI, incd );
4729 /* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume
4730 the insn is the last one in the basic block, and so emit a jump to
4731 the next insn, rather than just falling through. */
4732 static
4733 void dis_REP_op ( /*MOD*/DisResult* dres,
4734 AMD64Condcode cond,
4735 void (*dis_OP)(Int, IRTemp, Prefix),
4736 Int sz, Addr64 rip, Addr64 rip_next, const HChar* name,
4737 Prefix pfx )
4739 IRTemp t_inc = newTemp(Ity_I64);
4740 IRTemp tc;
4741 IRExpr* cmp;
4743 /* Really we ought to inspect the override prefixes, but we don't.
4744 The following assertion catches any resulting sillyness. */
4745 vassert(pfx == clearSegBits(pfx));
4747 if (haveASO(pfx)) {
4748 tc = newTemp(Ity_I32); /* ECX */
4749 assign( tc, getIReg32(R_RCX) );
4750 cmp = binop(Iop_CmpEQ32, mkexpr(tc), mkU32(0));
4751 } else {
4752 tc = newTemp(Ity_I64); /* RCX */
4753 assign( tc, getIReg64(R_RCX) );
4754 cmp = binop(Iop_CmpEQ64, mkexpr(tc), mkU64(0));
4757 stmt( IRStmt_Exit( cmp, Ijk_Boring,
4758 IRConst_U64(rip_next), OFFB_RIP ) );
4760 if (haveASO(pfx))
4761 putIReg32(R_RCX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) );
4762 else
4763 putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) );
4765 dis_string_op_increment(sz, t_inc);
4766 dis_OP (sz, t_inc, pfx);
4768 if (cond == AMD64CondAlways) {
4769 jmp_lit(dres, Ijk_Boring, rip);
4770 vassert(dres->whatNext == Dis_StopHere);
4771 } else {
4772 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond),
4773 Ijk_Boring,
4774 IRConst_U64(rip),
4775 OFFB_RIP ) );
4776 jmp_lit(dres, Ijk_Boring, rip_next);
4777 vassert(dres->whatNext == Dis_StopHere);
4779 DIP("%s%c\n", name, nameISize(sz));
4783 /*------------------------------------------------------------*/
4784 /*--- Arithmetic, etc. ---*/
4785 /*------------------------------------------------------------*/
4787 /* IMUL E, G. Supplied eip points to the modR/M byte. */
4788 static
4789 ULong dis_mul_E_G ( const VexAbiInfo* vbi,
4790 Prefix pfx,
4791 Int size,
4792 Long delta0 )
4794 Int alen;
4795 HChar dis_buf[50];
4796 UChar rm = getUChar(delta0);
4797 IRType ty = szToITy(size);
4798 IRTemp te = newTemp(ty);
4799 IRTemp tg = newTemp(ty);
4800 IRTemp resLo = newTemp(ty);
4802 assign( tg, getIRegG(size, pfx, rm) );
4803 if (epartIsReg(rm)) {
4804 assign( te, getIRegE(size, pfx, rm) );
4805 } else {
4806 IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 );
4807 assign( te, loadLE(ty,mkexpr(addr)) );
4810 setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB );
4812 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) );
4814 putIRegG(size, pfx, rm, mkexpr(resLo) );
4816 if (epartIsReg(rm)) {
4817 DIP("imul%c %s, %s\n", nameISize(size),
4818 nameIRegE(size,pfx,rm),
4819 nameIRegG(size,pfx,rm));
4820 return 1+delta0;
4821 } else {
4822 DIP("imul%c %s, %s\n", nameISize(size),
4823 dis_buf,
4824 nameIRegG(size,pfx,rm));
4825 return alen+delta0;
4830 /* IMUL I * E -> G. Supplied rip points to the modR/M byte. */
4831 static
4832 ULong dis_imul_I_E_G ( const VexAbiInfo* vbi,
4833 Prefix pfx,
4834 Int size,
4835 Long delta,
4836 Int litsize )
4838 Long d64;
4839 Int alen;
4840 HChar dis_buf[50];
4841 UChar rm = getUChar(delta);
4842 IRType ty = szToITy(size);
4843 IRTemp te = newTemp(ty);
4844 IRTemp tl = newTemp(ty);
4845 IRTemp resLo = newTemp(ty);
4847 vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8);
4849 if (epartIsReg(rm)) {
4850 assign(te, getIRegE(size, pfx, rm));
4851 delta++;
4852 } else {
4853 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
4854 imin(4,litsize) );
4855 assign(te, loadLE(ty, mkexpr(addr)));
4856 delta += alen;
4858 d64 = getSDisp(imin(4,litsize),delta);
4859 delta += imin(4,litsize);
4861 d64 &= mkSizeMask(size);
4862 assign(tl, mkU(ty,d64));
4864 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) ));
4866 setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB );
4868 putIRegG(size, pfx, rm, mkexpr(resLo));
4870 DIP("imul%c $%lld, %s, %s\n",
4871 nameISize(size), d64,
4872 ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ),
4873 nameIRegG(size,pfx,rm) );
4874 return delta;
4878 /* Generate an IR sequence to do a popcount operation on the supplied
4879 IRTemp, and return a new IRTemp holding the result. 'ty' may be
4880 Ity_I16, Ity_I32 or Ity_I64 only. */
4881 static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src )
4883 Int i;
4884 if (ty == Ity_I16) {
4885 IRTemp old = IRTemp_INVALID;
4886 IRTemp nyu = IRTemp_INVALID;
4887 IRTemp mask[4], shift[4];
4888 for (i = 0; i < 4; i++) {
4889 mask[i] = newTemp(ty);
4890 shift[i] = 1 << i;
4892 assign(mask[0], mkU16(0x5555));
4893 assign(mask[1], mkU16(0x3333));
4894 assign(mask[2], mkU16(0x0F0F));
4895 assign(mask[3], mkU16(0x00FF));
4896 old = src;
4897 for (i = 0; i < 4; i++) {
4898 nyu = newTemp(ty);
4899 assign(nyu,
4900 binop(Iop_Add16,
4901 binop(Iop_And16,
4902 mkexpr(old),
4903 mkexpr(mask[i])),
4904 binop(Iop_And16,
4905 binop(Iop_Shr16, mkexpr(old), mkU8(shift[i])),
4906 mkexpr(mask[i]))));
4907 old = nyu;
4909 return nyu;
4911 if (ty == Ity_I32) {
4912 IRTemp old = IRTemp_INVALID;
4913 IRTemp nyu = IRTemp_INVALID;
4914 IRTemp mask[5], shift[5];
4915 for (i = 0; i < 5; i++) {
4916 mask[i] = newTemp(ty);
4917 shift[i] = 1 << i;
4919 assign(mask[0], mkU32(0x55555555));
4920 assign(mask[1], mkU32(0x33333333));
4921 assign(mask[2], mkU32(0x0F0F0F0F));
4922 assign(mask[3], mkU32(0x00FF00FF));
4923 assign(mask[4], mkU32(0x0000FFFF));
4924 old = src;
4925 for (i = 0; i < 5; i++) {
4926 nyu = newTemp(ty);
4927 assign(nyu,
4928 binop(Iop_Add32,
4929 binop(Iop_And32,
4930 mkexpr(old),
4931 mkexpr(mask[i])),
4932 binop(Iop_And32,
4933 binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
4934 mkexpr(mask[i]))));
4935 old = nyu;
4937 return nyu;
4939 if (ty == Ity_I64) {
4940 IRTemp old = IRTemp_INVALID;
4941 IRTemp nyu = IRTemp_INVALID;
4942 IRTemp mask[6], shift[6];
4943 for (i = 0; i < 6; i++) {
4944 mask[i] = newTemp(ty);
4945 shift[i] = 1 << i;
4947 assign(mask[0], mkU64(0x5555555555555555ULL));
4948 assign(mask[1], mkU64(0x3333333333333333ULL));
4949 assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL));
4950 assign(mask[3], mkU64(0x00FF00FF00FF00FFULL));
4951 assign(mask[4], mkU64(0x0000FFFF0000FFFFULL));
4952 assign(mask[5], mkU64(0x00000000FFFFFFFFULL));
4953 old = src;
4954 for (i = 0; i < 6; i++) {
4955 nyu = newTemp(ty);
4956 assign(nyu,
4957 binop(Iop_Add64,
4958 binop(Iop_And64,
4959 mkexpr(old),
4960 mkexpr(mask[i])),
4961 binop(Iop_And64,
4962 binop(Iop_Shr64, mkexpr(old), mkU8(shift[i])),
4963 mkexpr(mask[i]))));
4964 old = nyu;
4966 return nyu;
4968 /*NOTREACHED*/
4969 vassert(0);
4973 /* Generate an IR sequence to do a count-leading-zeroes operation on
4974 the supplied IRTemp, and return a new IRTemp holding the result.
4975 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
4976 the argument is zero, return the number of bits in the word (the
4977 natural semantics). */
4978 static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
4980 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16);
4982 IRTemp src64 = newTemp(Ity_I64);
4983 assign(src64, widenUto64( mkexpr(src) ));
4985 IRTemp src64x = newTemp(Ity_I64);
4986 assign(src64x,
4987 binop(Iop_Shl64, mkexpr(src64),
4988 mkU8(64 - 8 * sizeofIRType(ty))));
4990 // Clz64 has undefined semantics when its input is zero, so
4991 // special-case around that.
4992 IRTemp res64 = newTemp(Ity_I64);
4993 assign(res64,
4994 IRExpr_ITE(
4995 binop(Iop_CmpEQ64, mkexpr(src64x), mkU64(0)),
4996 mkU64(8 * sizeofIRType(ty)),
4997 unop(Iop_Clz64, mkexpr(src64x))
5000 IRTemp res = newTemp(ty);
5001 assign(res, narrowTo(ty, mkexpr(res64)));
5002 return res;
5006 /* Generate an IR sequence to do a count-trailing-zeroes operation on
5007 the supplied IRTemp, and return a new IRTemp holding the result.
5008 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
5009 the argument is zero, return the number of bits in the word (the
5010 natural semantics). */
5011 static IRTemp gen_TZCNT ( IRType ty, IRTemp src )
5013 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16);
5015 IRTemp src64 = newTemp(Ity_I64);
5016 assign(src64, widenUto64( mkexpr(src) ));
5018 // Ctz64 has undefined semantics when its input is zero, so
5019 // special-case around that.
5020 IRTemp res64 = newTemp(Ity_I64);
5021 assign(res64,
5022 IRExpr_ITE(
5023 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0)),
5024 mkU64(8 * sizeofIRType(ty)),
5025 unop(Iop_Ctz64, mkexpr(src64))
5028 IRTemp res = newTemp(ty);
5029 assign(res, narrowTo(ty, mkexpr(res64)));
5030 return res;
5034 /*------------------------------------------------------------*/
5035 /*--- ---*/
5036 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/
5037 /*--- ---*/
5038 /*------------------------------------------------------------*/
5040 /* --- Helper functions for dealing with the register stack. --- */
5042 /* --- Set the emulation-warning pseudo-register. --- */
5044 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ )
5046 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
5047 stmt( IRStmt_Put( OFFB_EMNOTE, e ) );
5050 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
5052 static IRExpr* mkQNaN64 ( void )
5054 /* QNaN is 0 2047 1 0(51times)
5055 == 0b 11111111111b 1 0(51times)
5056 == 0x7FF8 0000 0000 0000
5058 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL));
5061 /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */
5063 static IRExpr* get_ftop ( void )
5065 return IRExpr_Get( OFFB_FTOP, Ity_I32 );
5068 static void put_ftop ( IRExpr* e )
5070 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
5071 stmt( IRStmt_Put( OFFB_FTOP, e ) );
5074 /* --------- Get/put the C3210 bits. --------- */
5076 static IRExpr* /* :: Ity_I64 */ get_C3210 ( void )
5078 return IRExpr_Get( OFFB_FC3210, Ity_I64 );
5081 static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ )
5083 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
5084 stmt( IRStmt_Put( OFFB_FC3210, e ) );
5087 /* --------- Get/put the FPU rounding mode. --------- */
5088 static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
5090 return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 ));
5093 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e )
5095 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
5096 stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) );
5100 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
5101 /* Produces a value in 0 .. 3, which is encoded as per the type
5102 IRRoundingMode. Since the guest_FPROUND value is also encoded as
5103 per IRRoundingMode, we merely need to get it and mask it for
5104 safety.
5106 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
5108 return binop( Iop_And32, get_fpround(), mkU32(3) );
5111 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
5113 return mkU32(Irrm_NEAREST);
5117 /* --------- Get/set FP register tag bytes. --------- */
5119 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
5121 static void put_ST_TAG ( Int i, IRExpr* value )
5123 IRRegArray* descr;
5124 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8);
5125 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
5126 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
5129 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
5130 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
5132 static IRExpr* get_ST_TAG ( Int i )
5134 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
5135 return IRExpr_GetI( descr, get_ftop(), i );
5139 /* --------- Get/set FP registers. --------- */
5141 /* Given i, and some expression e, emit 'ST(i) = e' and set the
5142 register's tag to indicate the register is full. The previous
5143 state of the register is not checked. */
5145 static void put_ST_UNCHECKED ( Int i, IRExpr* value )
5147 IRRegArray* descr;
5148 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64);
5149 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
5150 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
5151 /* Mark the register as in-use. */
5152 put_ST_TAG(i, mkU8(1));
5155 /* Given i, and some expression e, emit
5156 ST(i) = is_full(i) ? NaN : e
5157 and set the tag accordingly.
5160 static void put_ST ( Int i, IRExpr* value )
5162 put_ST_UNCHECKED(
5164 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)),
5165 /* non-0 means full */
5166 mkQNaN64(),
5167 /* 0 means empty */
5168 value
5174 /* Given i, generate an expression yielding 'ST(i)'. */
5176 static IRExpr* get_ST_UNCHECKED ( Int i )
5178 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
5179 return IRExpr_GetI( descr, get_ftop(), i );
5183 /* Given i, generate an expression yielding
5184 is_full(i) ? ST(i) : NaN
5187 static IRExpr* get_ST ( Int i )
5189 return
5190 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)),
5191 /* non-0 means full */
5192 get_ST_UNCHECKED(i),
5193 /* 0 means empty */
5194 mkQNaN64());
5198 /* Given i, and some expression e, and a condition cond, generate IR
5199 which has the same effect as put_ST(i,e) when cond is true and has
5200 no effect when cond is false. Given the lack of proper
5201 if-then-else in the IR, this is pretty tricky.
5204 static void maybe_put_ST ( IRTemp cond, Int i, IRExpr* value )
5206 // new_tag = if cond then FULL else old_tag
5207 // new_val = if cond then (if old_tag==FULL then NaN else val)
5208 // else old_val
5210 IRTemp old_tag = newTemp(Ity_I8);
5211 assign(old_tag, get_ST_TAG(i));
5212 IRTemp new_tag = newTemp(Ity_I8);
5213 assign(new_tag,
5214 IRExpr_ITE(mkexpr(cond), mkU8(1)/*FULL*/, mkexpr(old_tag)));
5216 IRTemp old_val = newTemp(Ity_F64);
5217 assign(old_val, get_ST_UNCHECKED(i));
5218 IRTemp new_val = newTemp(Ity_F64);
5219 assign(new_val,
5220 IRExpr_ITE(mkexpr(cond),
5221 IRExpr_ITE(binop(Iop_CmpNE8, mkexpr(old_tag), mkU8(0)),
5222 /* non-0 means full */
5223 mkQNaN64(),
5224 /* 0 means empty */
5225 value),
5226 mkexpr(old_val)));
5228 put_ST_UNCHECKED(i, mkexpr(new_val));
5229 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So
5230 // now set it to new_tag instead.
5231 put_ST_TAG(i, mkexpr(new_tag));
5234 /* Adjust FTOP downwards by one register. */
5236 static void fp_push ( void )
5238 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) );
5241 /* Adjust FTOP downwards by one register when COND is 1:I1. Else
5242 don't change it. */
5244 static void maybe_fp_push ( IRTemp cond )
5246 put_ftop( binop(Iop_Sub32, get_ftop(), unop(Iop_1Uto32,mkexpr(cond))) );
5249 /* Adjust FTOP upwards by one register, and mark the vacated register
5250 as empty. */
5252 static void fp_pop ( void )
5254 put_ST_TAG(0, mkU8(0));
5255 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
5258 /* Set the C2 bit of the FPU status register to e[0]. Assumes that
5259 e[31:1] == 0.
5261 static void set_C2 ( IRExpr* e )
5263 IRExpr* cleared = binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2));
5264 put_C3210( binop(Iop_Or64,
5265 cleared,
5266 binop(Iop_Shl64, e, mkU8(AMD64G_FC_SHIFT_C2))) );
5269 /* Generate code to check that abs(d64) < 2^63 and is finite. This is
5270 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The
5271 test is simple, but the derivation of it is not so simple.
5273 The exponent field for an IEEE754 double is 11 bits. That means it
5274 can take values 0 through 0x7FF. If the exponent has value 0x7FF,
5275 the number is either a NaN or an Infinity and so is not finite.
5276 Furthermore, a finite value of exactly 2^63 is the smallest value
5277 that has exponent value 0x43E. Hence, what we need to do is
5278 extract the exponent, ignoring the sign bit and mantissa, and check
5279 it is < 0x43E, or <= 0x43D.
5281 To make this easily applicable to 32- and 64-bit targets, a
5282 roundabout approach is used. First the number is converted to I64,
5283 then the top 32 bits are taken. Shifting them right by 20 bits
5284 places the sign bit and exponent in the bottom 12 bits. Anding
5285 with 0x7FF gets rid of the sign bit, leaving just the exponent
5286 available for comparison.
5288 static IRTemp math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64 )
5290 IRTemp i64 = newTemp(Ity_I64);
5291 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(d64)) );
5292 IRTemp exponent = newTemp(Ity_I32);
5293 assign(exponent,
5294 binop(Iop_And32,
5295 binop(Iop_Shr32, unop(Iop_64HIto32, mkexpr(i64)), mkU8(20)),
5296 mkU32(0x7FF)));
5297 IRTemp in_range_and_finite = newTemp(Ity_I1);
5298 assign(in_range_and_finite,
5299 binop(Iop_CmpLE32U, mkexpr(exponent), mkU32(0x43D)));
5300 return in_range_and_finite;
5303 /* Invent a plausible-looking FPU status word value:
5304 ((ftop & 7) << 11) | (c3210 & 0x4700)
5306 static IRExpr* get_FPU_sw ( void )
5308 return
5309 unop(Iop_32to16,
5310 binop(Iop_Or32,
5311 binop(Iop_Shl32,
5312 binop(Iop_And32, get_ftop(), mkU32(7)),
5313 mkU8(11)),
5314 binop(Iop_And32, unop(Iop_64to32, get_C3210()),
5315 mkU32(0x4700))
5320 /* Generate a dirty helper call that initialises the x87 state a la
5321 FINIT. If |guard| is NULL, it is done unconditionally. Otherwise
5322 |guard| is used as a guarding condition.
5324 static void gen_FINIT_SEQUENCE ( IRExpr* guard )
5326 /* Uses dirty helper:
5327 void amd64g_do_FINIT ( VexGuestAMD64State* ) */
5328 IRDirty* d = unsafeIRDirty_0_N (
5329 0/*regparms*/,
5330 "amd64g_dirtyhelper_FINIT",
5331 &amd64g_dirtyhelper_FINIT,
5332 mkIRExprVec_1( IRExpr_GSPTR() )
5335 /* declare we're writing guest state */
5336 d->nFxState = 5;
5337 vex_bzero(&d->fxState, sizeof(d->fxState));
5339 d->fxState[0].fx = Ifx_Write;
5340 d->fxState[0].offset = OFFB_FTOP;
5341 d->fxState[0].size = sizeof(UInt);
5343 d->fxState[1].fx = Ifx_Write;
5344 d->fxState[1].offset = OFFB_FPREGS;
5345 d->fxState[1].size = 8 * sizeof(ULong);
5347 d->fxState[2].fx = Ifx_Write;
5348 d->fxState[2].offset = OFFB_FPTAGS;
5349 d->fxState[2].size = 8 * sizeof(UChar);
5351 d->fxState[3].fx = Ifx_Write;
5352 d->fxState[3].offset = OFFB_FPROUND;
5353 d->fxState[3].size = sizeof(ULong);
5355 d->fxState[4].fx = Ifx_Write;
5356 d->fxState[4].offset = OFFB_FC3210;
5357 d->fxState[4].size = sizeof(ULong);
5359 if (guard)
5360 d->guard = guard;
5362 stmt( IRStmt_Dirty(d) );
5366 /* ------------------------------------------------------- */
5367 /* Given all that stack-mangling junk, we can now go ahead
5368 and describe FP instructions.
5371 /* ST(0) = ST(0) `op` mem64/32(addr)
5372 Need to check ST(0)'s tag on read, but not on write.
5374 static
5375 void fp_do_op_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf,
5376 IROp op, Bool dbl )
5378 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
5379 if (dbl) {
5380 put_ST_UNCHECKED(0,
5381 triop( op,
5382 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5383 get_ST(0),
5384 loadLE(Ity_F64,mkexpr(addr))
5386 } else {
5387 put_ST_UNCHECKED(0,
5388 triop( op,
5389 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5390 get_ST(0),
5391 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr)))
5397 /* ST(0) = mem64/32(addr) `op` ST(0)
5398 Need to check ST(0)'s tag on read, but not on write.
5400 static
5401 void fp_do_oprev_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf,
5402 IROp op, Bool dbl )
5404 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
5405 if (dbl) {
5406 put_ST_UNCHECKED(0,
5407 triop( op,
5408 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5409 loadLE(Ity_F64,mkexpr(addr)),
5410 get_ST(0)
5412 } else {
5413 put_ST_UNCHECKED(0,
5414 triop( op,
5415 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5416 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))),
5417 get_ST(0)
5423 /* ST(dst) = ST(dst) `op` ST(src).
5424 Check dst and src tags when reading but not on write.
5426 static
5427 void fp_do_op_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
5428 Bool pop_after )
5430 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
5431 put_ST_UNCHECKED(
5432 st_dst,
5433 triop( op,
5434 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5435 get_ST(st_dst),
5436 get_ST(st_src) )
5438 if (pop_after)
5439 fp_pop();
5442 /* ST(dst) = ST(src) `op` ST(dst).
5443 Check dst and src tags when reading but not on write.
5445 static
5446 void fp_do_oprev_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
5447 Bool pop_after )
5449 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
5450 put_ST_UNCHECKED(
5451 st_dst,
5452 triop( op,
5453 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5454 get_ST(st_src),
5455 get_ST(st_dst) )
5457 if (pop_after)
5458 fp_pop();
5461 /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */
5462 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after )
5464 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i);
5465 /* This is a bit of a hack (and isn't really right). It sets
5466 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
5467 documentation implies A and S are unchanged.
5469 /* It's also fishy in that it is used both for COMIP and
5470 UCOMIP, and they aren't the same (although similar). */
5471 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
5472 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
5473 stmt( IRStmt_Put(
5474 OFFB_CC_DEP1,
5475 binop( Iop_And64,
5476 unop( Iop_32Uto64,
5477 binop(Iop_CmpF64, get_ST(0), get_ST(i))),
5478 mkU64(0x45)
5479 )));
5480 if (pop_after)
5481 fp_pop();
5485 /* returns
5486 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 )
5488 static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 )
5490 IRTemp t32 = newTemp(Ity_I32);
5491 assign( t32, e32 );
5492 return
5493 IRExpr_ITE(
5494 binop(Iop_CmpLT64U,
5495 unop(Iop_32Uto64,
5496 binop(Iop_Add32, mkexpr(t32), mkU32(32768))),
5497 mkU64(65536)),
5498 unop(Iop_32to16, mkexpr(t32)),
5499 mkU16( 0x8000 ) );
5503 static
5504 ULong dis_FPU ( /*OUT*/Bool* decode_ok,
5505 const VexAbiInfo* vbi, Prefix pfx, Long delta )
5507 Int len;
5508 UInt r_src, r_dst;
5509 HChar dis_buf[50];
5510 IRTemp t1, t2;
5512 /* On entry, delta points at the second byte of the insn (the modrm
5513 byte).*/
5514 UChar first_opcode = getUChar(delta-1);
5515 UChar modrm = getUChar(delta+0);
5517 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
5519 if (first_opcode == 0xD8) {
5520 if (modrm < 0xC0) {
5522 /* bits 5,4,3 are an opcode extension, and the modRM also
5523 specifies an address. */
5524 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
5525 delta += len;
5527 switch (gregLO3ofRM(modrm)) {
5529 case 0: /* FADD single-real */
5530 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False );
5531 break;
5533 case 1: /* FMUL single-real */
5534 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False );
5535 break;
5537 case 2: /* FCOM single-real */
5538 DIP("fcoms %s\n", dis_buf);
5539 /* This forces C1 to zero, which isn't right. */
5540 /* The AMD documentation suggests that forcing C1 to
5541 zero is correct (Eliot Moss) */
5542 put_C3210(
5543 unop( Iop_32Uto64,
5544 binop( Iop_And32,
5545 binop(Iop_Shl32,
5546 binop(Iop_CmpF64,
5547 get_ST(0),
5548 unop(Iop_F32toF64,
5549 loadLE(Ity_F32,mkexpr(addr)))),
5550 mkU8(8)),
5551 mkU32(0x4500)
5552 )));
5553 break;
5555 case 3: /* FCOMP single-real */
5556 /* The AMD documentation suggests that forcing C1 to
5557 zero is correct (Eliot Moss) */
5558 DIP("fcomps %s\n", dis_buf);
5559 /* This forces C1 to zero, which isn't right. */
5560 put_C3210(
5561 unop( Iop_32Uto64,
5562 binop( Iop_And32,
5563 binop(Iop_Shl32,
5564 binop(Iop_CmpF64,
5565 get_ST(0),
5566 unop(Iop_F32toF64,
5567 loadLE(Ity_F32,mkexpr(addr)))),
5568 mkU8(8)),
5569 mkU32(0x4500)
5570 )));
5571 fp_pop();
5572 break;
5574 case 4: /* FSUB single-real */
5575 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False );
5576 break;
5578 case 5: /* FSUBR single-real */
5579 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False );
5580 break;
5582 case 6: /* FDIV single-real */
5583 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False );
5584 break;
5586 case 7: /* FDIVR single-real */
5587 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False );
5588 break;
5590 default:
5591 vex_printf("unhandled opc_aux = 0x%2x\n",
5592 (UInt)gregLO3ofRM(modrm));
5593 vex_printf("first_opcode == 0xD8\n");
5594 goto decode_fail;
5596 } else {
5597 delta++;
5598 switch (modrm) {
5600 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
5601 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False );
5602 break;
5604 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
5605 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False );
5606 break;
5608 /* Dunno if this is right */
5609 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
5610 r_dst = (UInt)modrm - 0xD0;
5611 DIP("fcom %%st(0),%%st(%u)\n", r_dst);
5612 /* This forces C1 to zero, which isn't right. */
5613 put_C3210(
5614 unop(Iop_32Uto64,
5615 binop( Iop_And32,
5616 binop(Iop_Shl32,
5617 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5618 mkU8(8)),
5619 mkU32(0x4500)
5620 )));
5621 break;
5623 /* Dunno if this is right */
5624 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
5625 r_dst = (UInt)modrm - 0xD8;
5626 DIP("fcomp %%st(0),%%st(%u)\n", r_dst);
5627 /* This forces C1 to zero, which isn't right. */
5628 put_C3210(
5629 unop(Iop_32Uto64,
5630 binop( Iop_And32,
5631 binop(Iop_Shl32,
5632 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5633 mkU8(8)),
5634 mkU32(0x4500)
5635 )));
5636 fp_pop();
5637 break;
5639 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
5640 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False );
5641 break;
5643 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
5644 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False );
5645 break;
5647 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
5648 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False );
5649 break;
5651 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
5652 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False );
5653 break;
5655 default:
5656 goto decode_fail;
5661 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
5662 else
5663 if (first_opcode == 0xD9) {
5664 if (modrm < 0xC0) {
5666 /* bits 5,4,3 are an opcode extension, and the modRM also
5667 specifies an address. */
5668 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
5669 delta += len;
5671 switch (gregLO3ofRM(modrm)) {
5673 case 0: /* FLD single-real */
5674 DIP("flds %s\n", dis_buf);
5675 fp_push();
5676 put_ST(0, unop(Iop_F32toF64,
5677 loadLE(Ity_F32, mkexpr(addr))));
5678 break;
5680 case 2: /* FST single-real */
5681 DIP("fsts %s\n", dis_buf);
5682 storeLE(mkexpr(addr),
5683 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
5684 break;
5686 case 3: /* FSTP single-real */
5687 DIP("fstps %s\n", dis_buf);
5688 storeLE(mkexpr(addr),
5689 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
5690 fp_pop();
5691 break;
5693 case 4: { /* FLDENV m28 */
5694 /* Uses dirty helper:
5695 VexEmNote amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */
5696 IRTemp ew = newTemp(Ity_I32);
5697 IRTemp w64 = newTemp(Ity_I64);
5698 IRDirty* d = unsafeIRDirty_0_N (
5699 0/*regparms*/,
5700 "amd64g_dirtyhelper_FLDENV",
5701 &amd64g_dirtyhelper_FLDENV,
5702 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
5704 d->tmp = w64;
5705 /* declare we're reading memory */
5706 d->mFx = Ifx_Read;
5707 d->mAddr = mkexpr(addr);
5708 d->mSize = 28;
5710 /* declare we're writing guest state */
5711 d->nFxState = 4;
5712 vex_bzero(&d->fxState, sizeof(d->fxState));
5714 d->fxState[0].fx = Ifx_Write;
5715 d->fxState[0].offset = OFFB_FTOP;
5716 d->fxState[0].size = sizeof(UInt);
5718 d->fxState[1].fx = Ifx_Write;
5719 d->fxState[1].offset = OFFB_FPTAGS;
5720 d->fxState[1].size = 8 * sizeof(UChar);
5722 d->fxState[2].fx = Ifx_Write;
5723 d->fxState[2].offset = OFFB_FPROUND;
5724 d->fxState[2].size = sizeof(ULong);
5726 d->fxState[3].fx = Ifx_Write;
5727 d->fxState[3].offset = OFFB_FC3210;
5728 d->fxState[3].size = sizeof(ULong);
5730 stmt( IRStmt_Dirty(d) );
5732 /* ew contains any emulation warning we may need to
5733 issue. If needed, side-exit to the next insn,
5734 reporting the warning, so that Valgrind's dispatcher
5735 sees the warning. */
5736 assign(ew, unop(Iop_64to32,mkexpr(w64)) );
5737 put_emwarn( mkexpr(ew) );
5738 stmt(
5739 IRStmt_Exit(
5740 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5741 Ijk_EmWarn,
5742 IRConst_U64( guest_RIP_bbstart+delta ),
5743 OFFB_RIP
5747 DIP("fldenv %s\n", dis_buf);
5748 break;
5751 case 5: {/* FLDCW */
5752 /* The only thing we observe in the control word is the
5753 rounding mode. Therefore, pass the 16-bit value
5754 (x87 native-format control word) to a clean helper,
5755 getting back a 64-bit value, the lower half of which
5756 is the FPROUND value to store, and the upper half of
5757 which is the emulation-warning token which may be
5758 generated.
5760 /* ULong amd64h_check_fldcw ( ULong ); */
5761 IRTemp t64 = newTemp(Ity_I64);
5762 IRTemp ew = newTemp(Ity_I32);
5763 DIP("fldcw %s\n", dis_buf);
5764 assign( t64, mkIRExprCCall(
5765 Ity_I64, 0/*regparms*/,
5766 "amd64g_check_fldcw",
5767 &amd64g_check_fldcw,
5768 mkIRExprVec_1(
5769 unop( Iop_16Uto64,
5770 loadLE(Ity_I16, mkexpr(addr)))
5775 put_fpround( unop(Iop_64to32, mkexpr(t64)) );
5776 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
5777 put_emwarn( mkexpr(ew) );
5778 /* Finally, if an emulation warning was reported,
5779 side-exit to the next insn, reporting the warning,
5780 so that Valgrind's dispatcher sees the warning. */
5781 stmt(
5782 IRStmt_Exit(
5783 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5784 Ijk_EmWarn,
5785 IRConst_U64( guest_RIP_bbstart+delta ),
5786 OFFB_RIP
5789 break;
5792 case 6: { /* FNSTENV m28 */
5793 /* Uses dirty helper:
5794 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */
5795 IRDirty* d = unsafeIRDirty_0_N (
5796 0/*regparms*/,
5797 "amd64g_dirtyhelper_FSTENV",
5798 &amd64g_dirtyhelper_FSTENV,
5799 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
5801 /* declare we're writing memory */
5802 d->mFx = Ifx_Write;
5803 d->mAddr = mkexpr(addr);
5804 d->mSize = 28;
5806 /* declare we're reading guest state */
5807 d->nFxState = 4;
5808 vex_bzero(&d->fxState, sizeof(d->fxState));
5810 d->fxState[0].fx = Ifx_Read;
5811 d->fxState[0].offset = OFFB_FTOP;
5812 d->fxState[0].size = sizeof(UInt);
5814 d->fxState[1].fx = Ifx_Read;
5815 d->fxState[1].offset = OFFB_FPTAGS;
5816 d->fxState[1].size = 8 * sizeof(UChar);
5818 d->fxState[2].fx = Ifx_Read;
5819 d->fxState[2].offset = OFFB_FPROUND;
5820 d->fxState[2].size = sizeof(ULong);
5822 d->fxState[3].fx = Ifx_Read;
5823 d->fxState[3].offset = OFFB_FC3210;
5824 d->fxState[3].size = sizeof(ULong);
5826 stmt( IRStmt_Dirty(d) );
5828 DIP("fnstenv %s\n", dis_buf);
5829 break;
5832 case 7: /* FNSTCW */
5833 /* Fake up a native x87 FPU control word. The only
5834 thing it depends on is FPROUND[1:0], so call a clean
5835 helper to cook it up. */
5836 /* ULong amd64g_create_fpucw ( ULong fpround ) */
5837 DIP("fnstcw %s\n", dis_buf);
5838 storeLE(
5839 mkexpr(addr),
5840 unop( Iop_64to16,
5841 mkIRExprCCall(
5842 Ity_I64, 0/*regp*/,
5843 "amd64g_create_fpucw", &amd64g_create_fpucw,
5844 mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) )
5848 break;
5850 default:
5851 vex_printf("unhandled opc_aux = 0x%2x\n",
5852 (UInt)gregLO3ofRM(modrm));
5853 vex_printf("first_opcode == 0xD9\n");
5854 goto decode_fail;
5857 } else {
5858 delta++;
5859 switch (modrm) {
5861 case 0xC0 ... 0xC7: /* FLD %st(?) */
5862 r_src = (UInt)modrm - 0xC0;
5863 DIP("fld %%st(%u)\n", r_src);
5864 t1 = newTemp(Ity_F64);
5865 assign(t1, get_ST(r_src));
5866 fp_push();
5867 put_ST(0, mkexpr(t1));
5868 break;
5870 case 0xC8 ... 0xCF: /* FXCH %st(?) */
5871 r_src = (UInt)modrm - 0xC8;
5872 DIP("fxch %%st(%u)\n", r_src);
5873 t1 = newTemp(Ity_F64);
5874 t2 = newTemp(Ity_F64);
5875 assign(t1, get_ST(0));
5876 assign(t2, get_ST(r_src));
5877 put_ST_UNCHECKED(0, mkexpr(t2));
5878 put_ST_UNCHECKED(r_src, mkexpr(t1));
5879 break;
5881 case 0xE0: /* FCHS */
5882 DIP("fchs\n");
5883 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0)));
5884 break;
5886 case 0xE1: /* FABS */
5887 DIP("fabs\n");
5888 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0)));
5889 break;
5891 case 0xE5: { /* FXAM */
5892 /* This is an interesting one. It examines %st(0),
5893 regardless of whether the tag says it's empty or not.
5894 Here, just pass both the tag (in our format) and the
5895 value (as a double, actually a ULong) to a helper
5896 function. */
5897 IRExpr** args
5898 = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)),
5899 unop(Iop_ReinterpF64asI64,
5900 get_ST_UNCHECKED(0)) );
5901 put_C3210(mkIRExprCCall(
5902 Ity_I64,
5903 0/*regparm*/,
5904 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM,
5905 args
5907 DIP("fxam\n");
5908 break;
5911 case 0xE8: /* FLD1 */
5912 DIP("fld1\n");
5913 fp_push();
5914 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
5915 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL)));
5916 break;
5918 case 0xE9: /* FLDL2T */
5919 DIP("fldl2t\n");
5920 fp_push();
5921 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
5922 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL)));
5923 break;
5925 case 0xEA: /* FLDL2E */
5926 DIP("fldl2e\n");
5927 fp_push();
5928 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
5929 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL)));
5930 break;
5932 case 0xEB: /* FLDPI */
5933 DIP("fldpi\n");
5934 fp_push();
5935 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
5936 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL)));
5937 break;
5939 case 0xEC: /* FLDLG2 */
5940 DIP("fldlg2\n");
5941 fp_push();
5942 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
5943 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL)));
5944 break;
5946 case 0xED: /* FLDLN2 */
5947 DIP("fldln2\n");
5948 fp_push();
5949 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
5950 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL)));
5951 break;
5953 case 0xEE: /* FLDZ */
5954 DIP("fldz\n");
5955 fp_push();
5956 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
5957 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL)));
5958 break;
5960 case 0xF0: /* F2XM1 */
5961 DIP("f2xm1\n");
5962 put_ST_UNCHECKED(0,
5963 binop(Iop_2xm1F64,
5964 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5965 get_ST(0)));
5966 break;
5968 case 0xF1: /* FYL2X */
5969 DIP("fyl2x\n");
5970 put_ST_UNCHECKED(1,
5971 triop(Iop_Yl2xF64,
5972 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5973 get_ST(1),
5974 get_ST(0)));
5975 fp_pop();
5976 break;
5978 case 0xF2: { /* FPTAN */
5979 DIP("fptan\n");
5980 IRTemp argD = newTemp(Ity_F64);
5981 assign(argD, get_ST(0));
5982 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
5983 IRTemp resD = newTemp(Ity_F64);
5984 assign(resD,
5985 IRExpr_ITE(
5986 mkexpr(argOK),
5987 binop(Iop_TanF64,
5988 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5989 mkexpr(argD)),
5990 mkexpr(argD))
5992 put_ST_UNCHECKED(0, mkexpr(resD));
5993 /* Conditionally push 1.0 on the stack, if the arg is
5994 in range */
5995 maybe_fp_push(argOK);
5996 maybe_put_ST(argOK, 0,
5997 IRExpr_Const(IRConst_F64(1.0)));
5998 set_C2( binop(Iop_Xor64,
5999 unop(Iop_1Uto64, mkexpr(argOK)),
6000 mkU64(1)) );
6001 break;
6004 case 0xF3: /* FPATAN */
6005 DIP("fpatan\n");
6006 put_ST_UNCHECKED(1,
6007 triop(Iop_AtanF64,
6008 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6009 get_ST(1),
6010 get_ST(0)));
6011 fp_pop();
6012 break;
6014 case 0xF4: { /* FXTRACT */
6015 IRTemp argF = newTemp(Ity_F64);
6016 IRTemp sigF = newTemp(Ity_F64);
6017 IRTemp expF = newTemp(Ity_F64);
6018 IRTemp argI = newTemp(Ity_I64);
6019 IRTemp sigI = newTemp(Ity_I64);
6020 IRTemp expI = newTemp(Ity_I64);
6021 DIP("fxtract\n");
6022 assign( argF, get_ST(0) );
6023 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF)));
6024 assign( sigI,
6025 mkIRExprCCall(
6026 Ity_I64, 0/*regparms*/,
6027 "x86amd64g_calculate_FXTRACT",
6028 &x86amd64g_calculate_FXTRACT,
6029 mkIRExprVec_2( mkexpr(argI),
6030 mkIRExpr_HWord(0)/*sig*/ ))
6032 assign( expI,
6033 mkIRExprCCall(
6034 Ity_I64, 0/*regparms*/,
6035 "x86amd64g_calculate_FXTRACT",
6036 &x86amd64g_calculate_FXTRACT,
6037 mkIRExprVec_2( mkexpr(argI),
6038 mkIRExpr_HWord(1)/*exp*/ ))
6040 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) );
6041 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) );
6042 /* exponent */
6043 put_ST_UNCHECKED(0, mkexpr(expF) );
6044 fp_push();
6045 /* significand */
6046 put_ST(0, mkexpr(sigF) );
6047 break;
6050 case 0xF5: { /* FPREM1 -- IEEE compliant */
6051 IRTemp a1 = newTemp(Ity_F64);
6052 IRTemp a2 = newTemp(Ity_F64);
6053 DIP("fprem1\n");
6054 /* Do FPREM1 twice, once to get the remainder, and once
6055 to get the C3210 flag values. */
6056 assign( a1, get_ST(0) );
6057 assign( a2, get_ST(1) );
6058 put_ST_UNCHECKED(0,
6059 triop(Iop_PRem1F64,
6060 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6061 mkexpr(a1),
6062 mkexpr(a2)));
6063 put_C3210(
6064 unop(Iop_32Uto64,
6065 triop(Iop_PRem1C3210F64,
6066 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6067 mkexpr(a1),
6068 mkexpr(a2)) ));
6069 break;
6072 case 0xF7: /* FINCSTP */
6073 DIP("fincstp\n");
6074 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
6075 break;
6077 case 0xF8: { /* FPREM -- not IEEE compliant */
6078 IRTemp a1 = newTemp(Ity_F64);
6079 IRTemp a2 = newTemp(Ity_F64);
6080 DIP("fprem\n");
6081 /* Do FPREM twice, once to get the remainder, and once
6082 to get the C3210 flag values. */
6083 assign( a1, get_ST(0) );
6084 assign( a2, get_ST(1) );
6085 put_ST_UNCHECKED(0,
6086 triop(Iop_PRemF64,
6087 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6088 mkexpr(a1),
6089 mkexpr(a2)));
6090 put_C3210(
6091 unop(Iop_32Uto64,
6092 triop(Iop_PRemC3210F64,
6093 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6094 mkexpr(a1),
6095 mkexpr(a2)) ));
6096 break;
6099 case 0xF9: /* FYL2XP1 */
6100 DIP("fyl2xp1\n");
6101 put_ST_UNCHECKED(1,
6102 triop(Iop_Yl2xp1F64,
6103 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6104 get_ST(1),
6105 get_ST(0)));
6106 fp_pop();
6107 break;
6109 case 0xFA: /* FSQRT */
6110 DIP("fsqrt\n");
6111 put_ST_UNCHECKED(0,
6112 binop(Iop_SqrtF64,
6113 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6114 get_ST(0)));
6115 break;
6117 case 0xFB: { /* FSINCOS */
6118 DIP("fsincos\n");
6119 IRTemp argD = newTemp(Ity_F64);
6120 assign(argD, get_ST(0));
6121 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
6122 IRTemp resD = newTemp(Ity_F64);
6123 assign(resD,
6124 IRExpr_ITE(
6125 mkexpr(argOK),
6126 binop(Iop_SinF64,
6127 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6128 mkexpr(argD)),
6129 mkexpr(argD))
6131 put_ST_UNCHECKED(0, mkexpr(resD));
6132 /* Conditionally push the cos value on the stack, if
6133 the arg is in range */
6134 maybe_fp_push(argOK);
6135 maybe_put_ST(argOK, 0,
6136 binop(Iop_CosF64,
6137 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6138 mkexpr(argD)));
6139 set_C2( binop(Iop_Xor64,
6140 unop(Iop_1Uto64, mkexpr(argOK)),
6141 mkU64(1)) );
6142 break;
6145 case 0xFC: /* FRNDINT */
6146 DIP("frndint\n");
6147 put_ST_UNCHECKED(0,
6148 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) );
6149 break;
6151 case 0xFD: /* FSCALE */
6152 DIP("fscale\n");
6153 put_ST_UNCHECKED(0,
6154 triop(Iop_ScaleF64,
6155 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6156 get_ST(0),
6157 get_ST(1)));
6158 break;
6160 case 0xFE: /* FSIN */
6161 case 0xFF: { /* FCOS */
6162 Bool isSIN = modrm == 0xFE;
6163 DIP("%s\n", isSIN ? "fsin" : "fcos");
6164 IRTemp argD = newTemp(Ity_F64);
6165 assign(argD, get_ST(0));
6166 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
6167 IRTemp resD = newTemp(Ity_F64);
6168 assign(resD,
6169 IRExpr_ITE(
6170 mkexpr(argOK),
6171 binop(isSIN ? Iop_SinF64 : Iop_CosF64,
6172 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6173 mkexpr(argD)),
6174 mkexpr(argD))
6176 put_ST_UNCHECKED(0, mkexpr(resD));
6177 set_C2( binop(Iop_Xor64,
6178 unop(Iop_1Uto64, mkexpr(argOK)),
6179 mkU64(1)) );
6180 break;
6183 default:
6184 goto decode_fail;
6189 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
6190 else
6191 if (first_opcode == 0xDA) {
6193 if (modrm < 0xC0) {
6195 /* bits 5,4,3 are an opcode extension, and the modRM also
6196 specifies an address. */
6197 IROp fop;
6198 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6199 delta += len;
6200 switch (gregLO3ofRM(modrm)) {
6202 case 0: /* FIADD m32int */ /* ST(0) += m32int */
6203 DIP("fiaddl %s\n", dis_buf);
6204 fop = Iop_AddF64;
6205 goto do_fop_m32;
6207 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
6208 DIP("fimull %s\n", dis_buf);
6209 fop = Iop_MulF64;
6210 goto do_fop_m32;
6212 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
6213 DIP("fisubl %s\n", dis_buf);
6214 fop = Iop_SubF64;
6215 goto do_fop_m32;
6217 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
6218 DIP("fisubrl %s\n", dis_buf);
6219 fop = Iop_SubF64;
6220 goto do_foprev_m32;
6222 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
6223 DIP("fisubl %s\n", dis_buf);
6224 fop = Iop_DivF64;
6225 goto do_fop_m32;
6227 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
6228 DIP("fidivrl %s\n", dis_buf);
6229 fop = Iop_DivF64;
6230 goto do_foprev_m32;
6232 do_fop_m32:
6233 put_ST_UNCHECKED(0,
6234 triop(fop,
6235 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6236 get_ST(0),
6237 unop(Iop_I32StoF64,
6238 loadLE(Ity_I32, mkexpr(addr)))));
6239 break;
6241 do_foprev_m32:
6242 put_ST_UNCHECKED(0,
6243 triop(fop,
6244 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6245 unop(Iop_I32StoF64,
6246 loadLE(Ity_I32, mkexpr(addr))),
6247 get_ST(0)));
6248 break;
6250 default:
6251 vex_printf("unhandled opc_aux = 0x%2x\n",
6252 (UInt)gregLO3ofRM(modrm));
6253 vex_printf("first_opcode == 0xDA\n");
6254 goto decode_fail;
6257 } else {
6259 delta++;
6260 switch (modrm) {
6262 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
6263 r_src = (UInt)modrm - 0xC0;
6264 DIP("fcmovb %%st(%u), %%st(0)\n", r_src);
6265 put_ST_UNCHECKED(0,
6266 IRExpr_ITE(
6267 mk_amd64g_calculate_condition(AMD64CondB),
6268 get_ST(r_src), get_ST(0)) );
6269 break;
6271 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
6272 r_src = (UInt)modrm - 0xC8;
6273 DIP("fcmovz %%st(%u), %%st(0)\n", r_src);
6274 put_ST_UNCHECKED(0,
6275 IRExpr_ITE(
6276 mk_amd64g_calculate_condition(AMD64CondZ),
6277 get_ST(r_src), get_ST(0)) );
6278 break;
6280 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
6281 r_src = (UInt)modrm - 0xD0;
6282 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src);
6283 put_ST_UNCHECKED(0,
6284 IRExpr_ITE(
6285 mk_amd64g_calculate_condition(AMD64CondBE),
6286 get_ST(r_src), get_ST(0)) );
6287 break;
6289 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
6290 r_src = (UInt)modrm - 0xD8;
6291 DIP("fcmovu %%st(%u), %%st(0)\n", r_src);
6292 put_ST_UNCHECKED(0,
6293 IRExpr_ITE(
6294 mk_amd64g_calculate_condition(AMD64CondP),
6295 get_ST(r_src), get_ST(0)) );
6296 break;
6298 case 0xE9: /* FUCOMPP %st(0),%st(1) */
6299 DIP("fucompp %%st(0),%%st(1)\n");
6300 /* This forces C1 to zero, which isn't right. */
6301 put_C3210(
6302 unop(Iop_32Uto64,
6303 binop( Iop_And32,
6304 binop(Iop_Shl32,
6305 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
6306 mkU8(8)),
6307 mkU32(0x4500)
6308 )));
6309 fp_pop();
6310 fp_pop();
6311 break;
6313 default:
6314 goto decode_fail;
6320 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
6321 else
6322 if (first_opcode == 0xDB) {
6323 if (modrm < 0xC0) {
6325 /* bits 5,4,3 are an opcode extension, and the modRM also
6326 specifies an address. */
6327 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6328 delta += len;
6330 switch (gregLO3ofRM(modrm)) {
6332 case 0: /* FILD m32int */
6333 DIP("fildl %s\n", dis_buf);
6334 fp_push();
6335 put_ST(0, unop(Iop_I32StoF64,
6336 loadLE(Ity_I32, mkexpr(addr))));
6337 break;
6339 case 1: /* FISTTPL m32 (SSE3) */
6340 DIP("fisttpl %s\n", dis_buf);
6341 storeLE( mkexpr(addr),
6342 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) );
6343 fp_pop();
6344 break;
6346 case 2: /* FIST m32 */
6347 DIP("fistl %s\n", dis_buf);
6348 storeLE( mkexpr(addr),
6349 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
6350 break;
6352 case 3: /* FISTP m32 */
6353 DIP("fistpl %s\n", dis_buf);
6354 storeLE( mkexpr(addr),
6355 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
6356 fp_pop();
6357 break;
6359 case 5: { /* FLD extended-real */
6360 /* Uses dirty helper:
6361 ULong amd64g_loadF80le ( ULong )
6362 addr holds the address. First, do a dirty call to
6363 get hold of the data. */
6364 IRTemp val = newTemp(Ity_I64);
6365 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) );
6367 IRDirty* d = unsafeIRDirty_1_N (
6368 val,
6369 0/*regparms*/,
6370 "amd64g_dirtyhelper_loadF80le",
6371 &amd64g_dirtyhelper_loadF80le,
6372 args
6374 /* declare that we're reading memory */
6375 d->mFx = Ifx_Read;
6376 d->mAddr = mkexpr(addr);
6377 d->mSize = 10;
6379 /* execute the dirty call, dumping the result in val. */
6380 stmt( IRStmt_Dirty(d) );
6381 fp_push();
6382 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val)));
6384 DIP("fldt %s\n", dis_buf);
6385 break;
6388 case 7: { /* FSTP extended-real */
6389 /* Uses dirty helper:
6390 void amd64g_storeF80le ( ULong addr, ULong data )
6392 IRExpr** args
6393 = mkIRExprVec_2( mkexpr(addr),
6394 unop(Iop_ReinterpF64asI64, get_ST(0)) );
6396 IRDirty* d = unsafeIRDirty_0_N (
6397 0/*regparms*/,
6398 "amd64g_dirtyhelper_storeF80le",
6399 &amd64g_dirtyhelper_storeF80le,
6400 args
6402 /* declare we're writing memory */
6403 d->mFx = Ifx_Write;
6404 d->mAddr = mkexpr(addr);
6405 d->mSize = 10;
6407 /* execute the dirty call. */
6408 stmt( IRStmt_Dirty(d) );
6409 fp_pop();
6411 DIP("fstpt\n %s", dis_buf);
6412 break;
6415 default:
6416 vex_printf("unhandled opc_aux = 0x%2x\n",
6417 (UInt)gregLO3ofRM(modrm));
6418 vex_printf("first_opcode == 0xDB\n");
6419 goto decode_fail;
6422 } else {
6424 delta++;
6425 switch (modrm) {
6427 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
6428 r_src = (UInt)modrm - 0xC0;
6429 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src);
6430 put_ST_UNCHECKED(0,
6431 IRExpr_ITE(
6432 mk_amd64g_calculate_condition(AMD64CondNB),
6433 get_ST(r_src), get_ST(0)) );
6434 break;
6436 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
6437 r_src = (UInt)modrm - 0xC8;
6438 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src);
6439 put_ST_UNCHECKED(
6441 IRExpr_ITE(
6442 mk_amd64g_calculate_condition(AMD64CondNZ),
6443 get_ST(r_src),
6444 get_ST(0)
6447 break;
6449 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
6450 r_src = (UInt)modrm - 0xD0;
6451 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src);
6452 put_ST_UNCHECKED(
6454 IRExpr_ITE(
6455 mk_amd64g_calculate_condition(AMD64CondNBE),
6456 get_ST(r_src),
6457 get_ST(0)
6460 break;
6462 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
6463 r_src = (UInt)modrm - 0xD8;
6464 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src);
6465 put_ST_UNCHECKED(
6467 IRExpr_ITE(
6468 mk_amd64g_calculate_condition(AMD64CondNP),
6469 get_ST(r_src),
6470 get_ST(0)
6473 break;
6475 case 0xE2:
6476 DIP("fnclex\n");
6477 break;
6479 case 0xE3: {
6480 gen_FINIT_SEQUENCE(NULL/*no guarding condition*/);
6481 DIP("fninit\n");
6482 break;
6485 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
6486 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False );
6487 break;
6489 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
6490 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False );
6491 break;
6493 default:
6494 goto decode_fail;
6499 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
6500 else
6501 if (first_opcode == 0xDC) {
6502 if (modrm < 0xC0) {
6504 /* bits 5,4,3 are an opcode extension, and the modRM also
6505 specifies an address. */
6506 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6507 delta += len;
6509 switch (gregLO3ofRM(modrm)) {
6511 case 0: /* FADD double-real */
6512 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True );
6513 break;
6515 case 1: /* FMUL double-real */
6516 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True );
6517 break;
6519 case 2: /* FCOM double-real */
6520 DIP("fcoml %s\n", dis_buf);
6521 /* This forces C1 to zero, which isn't right. */
6522 put_C3210(
6523 unop(Iop_32Uto64,
6524 binop( Iop_And32,
6525 binop(Iop_Shl32,
6526 binop(Iop_CmpF64,
6527 get_ST(0),
6528 loadLE(Ity_F64,mkexpr(addr))),
6529 mkU8(8)),
6530 mkU32(0x4500)
6531 )));
6532 break;
6534 case 3: /* FCOMP double-real */
6535 DIP("fcompl %s\n", dis_buf);
6536 /* This forces C1 to zero, which isn't right. */
6537 put_C3210(
6538 unop(Iop_32Uto64,
6539 binop( Iop_And32,
6540 binop(Iop_Shl32,
6541 binop(Iop_CmpF64,
6542 get_ST(0),
6543 loadLE(Ity_F64,mkexpr(addr))),
6544 mkU8(8)),
6545 mkU32(0x4500)
6546 )));
6547 fp_pop();
6548 break;
6550 case 4: /* FSUB double-real */
6551 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True );
6552 break;
6554 case 5: /* FSUBR double-real */
6555 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True );
6556 break;
6558 case 6: /* FDIV double-real */
6559 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True );
6560 break;
6562 case 7: /* FDIVR double-real */
6563 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True );
6564 break;
6566 default:
6567 vex_printf("unhandled opc_aux = 0x%2x\n",
6568 (UInt)gregLO3ofRM(modrm));
6569 vex_printf("first_opcode == 0xDC\n");
6570 goto decode_fail;
6573 } else {
6575 delta++;
6576 switch (modrm) {
6578 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
6579 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False );
6580 break;
6582 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
6583 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False );
6584 break;
6586 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
6587 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False );
6588 break;
6590 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
6591 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False );
6592 break;
6594 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
6595 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False );
6596 break;
6598 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
6599 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False );
6600 break;
6602 default:
6603 goto decode_fail;
6609 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
6610 else
6611 if (first_opcode == 0xDD) {
6613 if (modrm < 0xC0) {
6615 /* bits 5,4,3 are an opcode extension, and the modRM also
6616 specifies an address. */
6617 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6618 delta += len;
6620 switch (gregLO3ofRM(modrm)) {
6622 case 0: /* FLD double-real */
6623 DIP("fldl %s\n", dis_buf);
6624 fp_push();
6625 put_ST(0, loadLE(Ity_F64, mkexpr(addr)));
6626 break;
6628 case 1: /* FISTTPQ m64 (SSE3) */
6629 DIP("fistppll %s\n", dis_buf);
6630 storeLE( mkexpr(addr),
6631 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) );
6632 fp_pop();
6633 break;
6635 case 2: /* FST double-real */
6636 DIP("fstl %s\n", dis_buf);
6637 storeLE(mkexpr(addr), get_ST(0));
6638 break;
6640 case 3: /* FSTP double-real */
6641 DIP("fstpl %s\n", dis_buf);
6642 storeLE(mkexpr(addr), get_ST(0));
6643 fp_pop();
6644 break;
6646 case 4: { /* FRSTOR m94/m108 */
6647 IRTemp ew = newTemp(Ity_I32);
6648 IRTemp w64 = newTemp(Ity_I64);
6649 IRDirty* d;
6650 if ( have66(pfx) ) {
6651 /* Uses dirty helper:
6652 VexEmNote amd64g_dirtyhelper_FRSTORS
6653 ( VexGuestAMD64State*, HWord ) */
6654 d = unsafeIRDirty_0_N (
6655 0/*regparms*/,
6656 "amd64g_dirtyhelper_FRSTORS",
6657 &amd64g_dirtyhelper_FRSTORS,
6658 mkIRExprVec_1( mkexpr(addr) )
6660 d->mSize = 94;
6661 } else {
6662 /* Uses dirty helper:
6663 VexEmNote amd64g_dirtyhelper_FRSTOR
6664 ( VexGuestAMD64State*, HWord ) */
6665 d = unsafeIRDirty_0_N (
6666 0/*regparms*/,
6667 "amd64g_dirtyhelper_FRSTOR",
6668 &amd64g_dirtyhelper_FRSTOR,
6669 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
6671 d->mSize = 108;
6674 d->tmp = w64;
6675 /* declare we're reading memory */
6676 d->mFx = Ifx_Read;
6677 d->mAddr = mkexpr(addr);
6678 /* d->mSize set above */
6680 /* declare we're writing guest state */
6681 d->nFxState = 5;
6682 vex_bzero(&d->fxState, sizeof(d->fxState));
6684 d->fxState[0].fx = Ifx_Write;
6685 d->fxState[0].offset = OFFB_FTOP;
6686 d->fxState[0].size = sizeof(UInt);
6688 d->fxState[1].fx = Ifx_Write;
6689 d->fxState[1].offset = OFFB_FPREGS;
6690 d->fxState[1].size = 8 * sizeof(ULong);
6692 d->fxState[2].fx = Ifx_Write;
6693 d->fxState[2].offset = OFFB_FPTAGS;
6694 d->fxState[2].size = 8 * sizeof(UChar);
6696 d->fxState[3].fx = Ifx_Write;
6697 d->fxState[3].offset = OFFB_FPROUND;
6698 d->fxState[3].size = sizeof(ULong);
6700 d->fxState[4].fx = Ifx_Write;
6701 d->fxState[4].offset = OFFB_FC3210;
6702 d->fxState[4].size = sizeof(ULong);
6704 stmt( IRStmt_Dirty(d) );
6706 /* ew contains any emulation warning we may need to
6707 issue. If needed, side-exit to the next insn,
6708 reporting the warning, so that Valgrind's dispatcher
6709 sees the warning. */
6710 assign(ew, unop(Iop_64to32,mkexpr(w64)) );
6711 put_emwarn( mkexpr(ew) );
6712 stmt(
6713 IRStmt_Exit(
6714 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
6715 Ijk_EmWarn,
6716 IRConst_U64( guest_RIP_bbstart+delta ),
6717 OFFB_RIP
6721 if ( have66(pfx) ) {
6722 DIP("frstors %s\n", dis_buf);
6723 } else {
6724 DIP("frstor %s\n", dis_buf);
6726 break;
6729 case 6: { /* FNSAVE m94/m108 */
6730 IRDirty *d;
6731 if ( have66(pfx) ) {
6732 /* Uses dirty helper:
6733 void amd64g_dirtyhelper_FNSAVES ( VexGuestAMD64State*,
6734 HWord ) */
6735 d = unsafeIRDirty_0_N (
6736 0/*regparms*/,
6737 "amd64g_dirtyhelper_FNSAVES",
6738 &amd64g_dirtyhelper_FNSAVES,
6739 mkIRExprVec_1( mkexpr(addr) )
6741 d->mSize = 94;
6742 } else {
6743 /* Uses dirty helper:
6744 void amd64g_dirtyhelper_FNSAVE ( VexGuestAMD64State*,
6745 HWord ) */
6746 d = unsafeIRDirty_0_N (
6747 0/*regparms*/,
6748 "amd64g_dirtyhelper_FNSAVE",
6749 &amd64g_dirtyhelper_FNSAVE,
6750 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
6752 d->mSize = 108;
6755 /* declare we're writing memory */
6756 d->mFx = Ifx_Write;
6757 d->mAddr = mkexpr(addr);
6758 /* d->mSize set above */
6760 /* declare we're reading guest state */
6761 d->nFxState = 5;
6762 vex_bzero(&d->fxState, sizeof(d->fxState));
6764 d->fxState[0].fx = Ifx_Read;
6765 d->fxState[0].offset = OFFB_FTOP;
6766 d->fxState[0].size = sizeof(UInt);
6768 d->fxState[1].fx = Ifx_Read;
6769 d->fxState[1].offset = OFFB_FPREGS;
6770 d->fxState[1].size = 8 * sizeof(ULong);
6772 d->fxState[2].fx = Ifx_Read;
6773 d->fxState[2].offset = OFFB_FPTAGS;
6774 d->fxState[2].size = 8 * sizeof(UChar);
6776 d->fxState[3].fx = Ifx_Read;
6777 d->fxState[3].offset = OFFB_FPROUND;
6778 d->fxState[3].size = sizeof(ULong);
6780 d->fxState[4].fx = Ifx_Read;
6781 d->fxState[4].offset = OFFB_FC3210;
6782 d->fxState[4].size = sizeof(ULong);
6784 stmt( IRStmt_Dirty(d) );
6786 if ( have66(pfx) ) {
6787 DIP("fnsaves %s\n", dis_buf);
6788 } else {
6789 DIP("fnsave %s\n", dis_buf);
6791 break;
6794 case 7: { /* FNSTSW m16 */
6795 IRExpr* sw = get_FPU_sw();
6796 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16);
6797 storeLE( mkexpr(addr), sw );
6798 DIP("fnstsw %s\n", dis_buf);
6799 break;
6802 default:
6803 vex_printf("unhandled opc_aux = 0x%2x\n",
6804 (UInt)gregLO3ofRM(modrm));
6805 vex_printf("first_opcode == 0xDD\n");
6806 goto decode_fail;
6808 } else {
6809 delta++;
6810 switch (modrm) {
6812 case 0xC0 ... 0xC7: /* FFREE %st(?) */
6813 r_dst = (UInt)modrm - 0xC0;
6814 DIP("ffree %%st(%u)\n", r_dst);
6815 put_ST_TAG ( r_dst, mkU8(0) );
6816 break;
6818 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
6819 r_dst = (UInt)modrm - 0xD0;
6820 DIP("fst %%st(0),%%st(%u)\n", r_dst);
6821 /* P4 manual says: "If the destination operand is a
6822 non-empty register, the invalid-operation exception
6823 is not generated. Hence put_ST_UNCHECKED. */
6824 put_ST_UNCHECKED(r_dst, get_ST(0));
6825 break;
6827 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
6828 r_dst = (UInt)modrm - 0xD8;
6829 DIP("fstp %%st(0),%%st(%u)\n", r_dst);
6830 /* P4 manual says: "If the destination operand is a
6831 non-empty register, the invalid-operation exception
6832 is not generated. Hence put_ST_UNCHECKED. */
6833 put_ST_UNCHECKED(r_dst, get_ST(0));
6834 fp_pop();
6835 break;
6837 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
6838 r_dst = (UInt)modrm - 0xE0;
6839 DIP("fucom %%st(0),%%st(%u)\n", r_dst);
6840 /* This forces C1 to zero, which isn't right. */
6841 put_C3210(
6842 unop(Iop_32Uto64,
6843 binop( Iop_And32,
6844 binop(Iop_Shl32,
6845 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
6846 mkU8(8)),
6847 mkU32(0x4500)
6848 )));
6849 break;
6851 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
6852 r_dst = (UInt)modrm - 0xE8;
6853 DIP("fucomp %%st(0),%%st(%u)\n", r_dst);
6854 /* This forces C1 to zero, which isn't right. */
6855 put_C3210(
6856 unop(Iop_32Uto64,
6857 binop( Iop_And32,
6858 binop(Iop_Shl32,
6859 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
6860 mkU8(8)),
6861 mkU32(0x4500)
6862 )));
6863 fp_pop();
6864 break;
6866 default:
6867 goto decode_fail;
6872 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
6873 else
6874 if (first_opcode == 0xDE) {
6876 if (modrm < 0xC0) {
6878 /* bits 5,4,3 are an opcode extension, and the modRM also
6879 specifies an address. */
6880 IROp fop;
6881 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6882 delta += len;
6884 switch (gregLO3ofRM(modrm)) {
6886 case 0: /* FIADD m16int */ /* ST(0) += m16int */
6887 DIP("fiaddw %s\n", dis_buf);
6888 fop = Iop_AddF64;
6889 goto do_fop_m16;
6891 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
6892 DIP("fimulw %s\n", dis_buf);
6893 fop = Iop_MulF64;
6894 goto do_fop_m16;
6896 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
6897 DIP("fisubw %s\n", dis_buf);
6898 fop = Iop_SubF64;
6899 goto do_fop_m16;
6901 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
6902 DIP("fisubrw %s\n", dis_buf);
6903 fop = Iop_SubF64;
6904 goto do_foprev_m16;
6906 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
6907 DIP("fisubw %s\n", dis_buf);
6908 fop = Iop_DivF64;
6909 goto do_fop_m16;
6911 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
6912 DIP("fidivrw %s\n", dis_buf);
6913 fop = Iop_DivF64;
6914 goto do_foprev_m16;
6916 do_fop_m16:
6917 put_ST_UNCHECKED(0,
6918 triop(fop,
6919 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6920 get_ST(0),
6921 unop(Iop_I32StoF64,
6922 unop(Iop_16Sto32,
6923 loadLE(Ity_I16, mkexpr(addr))))));
6924 break;
6926 do_foprev_m16:
6927 put_ST_UNCHECKED(0,
6928 triop(fop,
6929 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6930 unop(Iop_I32StoF64,
6931 unop(Iop_16Sto32,
6932 loadLE(Ity_I16, mkexpr(addr)))),
6933 get_ST(0)));
6934 break;
6936 default:
6937 vex_printf("unhandled opc_aux = 0x%2x\n",
6938 (UInt)gregLO3ofRM(modrm));
6939 vex_printf("first_opcode == 0xDE\n");
6940 goto decode_fail;
6943 } else {
6945 delta++;
6946 switch (modrm) {
6948 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
6949 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True );
6950 break;
6952 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
6953 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True );
6954 break;
6956 case 0xD9: /* FCOMPP %st(0),%st(1) */
6957 DIP("fcompp %%st(0),%%st(1)\n");
6958 /* This forces C1 to zero, which isn't right. */
6959 put_C3210(
6960 unop(Iop_32Uto64,
6961 binop( Iop_And32,
6962 binop(Iop_Shl32,
6963 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
6964 mkU8(8)),
6965 mkU32(0x4500)
6966 )));
6967 fp_pop();
6968 fp_pop();
6969 break;
6971 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
6972 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True );
6973 break;
6975 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
6976 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True );
6977 break;
6979 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
6980 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True );
6981 break;
6983 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
6984 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True );
6985 break;
6987 default:
6988 goto decode_fail;
6994 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
6995 else
6996 if (first_opcode == 0xDF) {
6998 if (modrm < 0xC0) {
7000 /* bits 5,4,3 are an opcode extension, and the modRM also
7001 specifies an address. */
7002 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7003 delta += len;
7005 switch (gregLO3ofRM(modrm)) {
7007 case 0: /* FILD m16int */
7008 DIP("fildw %s\n", dis_buf);
7009 fp_push();
7010 put_ST(0, unop(Iop_I32StoF64,
7011 unop(Iop_16Sto32,
7012 loadLE(Ity_I16, mkexpr(addr)))));
7013 break;
7015 case 1: /* FISTTPS m16 (SSE3) */
7016 DIP("fisttps %s\n", dis_buf);
7017 storeLE( mkexpr(addr),
7018 x87ishly_qnarrow_32_to_16(
7019 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ));
7020 fp_pop();
7021 break;
7023 case 2: /* FIST m16 */
7024 DIP("fists %s\n", dis_buf);
7025 storeLE( mkexpr(addr),
7026 x87ishly_qnarrow_32_to_16(
7027 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
7028 break;
7030 case 3: /* FISTP m16 */
7031 DIP("fistps %s\n", dis_buf);
7032 storeLE( mkexpr(addr),
7033 x87ishly_qnarrow_32_to_16(
7034 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
7035 fp_pop();
7036 break;
7038 case 5: /* FILD m64 */
7039 DIP("fildll %s\n", dis_buf);
7040 fp_push();
7041 put_ST(0, binop(Iop_I64StoF64,
7042 get_roundingmode(),
7043 loadLE(Ity_I64, mkexpr(addr))));
7044 break;
7046 case 7: /* FISTP m64 */
7047 DIP("fistpll %s\n", dis_buf);
7048 storeLE( mkexpr(addr),
7049 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) );
7050 fp_pop();
7051 break;
7053 default:
7054 vex_printf("unhandled opc_aux = 0x%2x\n",
7055 (UInt)gregLO3ofRM(modrm));
7056 vex_printf("first_opcode == 0xDF\n");
7057 goto decode_fail;
7060 } else {
7062 delta++;
7063 switch (modrm) {
7065 case 0xC0: /* FFREEP %st(0) */
7066 DIP("ffreep %%st(%d)\n", 0);
7067 put_ST_TAG ( 0, mkU8(0) );
7068 fp_pop();
7069 break;
7071 case 0xE0: /* FNSTSW %ax */
7072 DIP("fnstsw %%ax\n");
7073 /* Invent a plausible-looking FPU status word value and
7074 dump it in %AX:
7075 ((ftop & 7) << 11) | (c3210 & 0x4700)
7077 putIRegRAX(
7079 unop(Iop_32to16,
7080 binop(Iop_Or32,
7081 binop(Iop_Shl32,
7082 binop(Iop_And32, get_ftop(), mkU32(7)),
7083 mkU8(11)),
7084 binop(Iop_And32,
7085 unop(Iop_64to32, get_C3210()),
7086 mkU32(0x4700))
7087 )));
7088 break;
7090 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
7091 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True );
7092 break;
7094 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
7095 /* not really right since COMIP != UCOMIP */
7096 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True );
7097 break;
7099 default:
7100 goto decode_fail;
7106 else
7107 goto decode_fail;
7109 *decode_ok = True;
7110 return delta;
7112 decode_fail:
7113 *decode_ok = False;
7114 return delta;
7118 /*------------------------------------------------------------*/
7119 /*--- ---*/
7120 /*--- MMX INSTRUCTIONS ---*/
7121 /*--- ---*/
7122 /*------------------------------------------------------------*/
7124 /* Effect of MMX insns on x87 FPU state (table 11-2 of
7125 IA32 arch manual, volume 3):
7127 Read from, or write to MMX register (viz, any insn except EMMS):
7128 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
7129 * FP stack pointer set to zero
7131 EMMS:
7132 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
7133 * FP stack pointer set to zero
7136 static void do_MMX_preamble ( void )
7138 Int i;
7139 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
7140 IRExpr* zero = mkU32(0);
7141 IRExpr* tag1 = mkU8(1);
7142 put_ftop(zero);
7143 for (i = 0; i < 8; i++)
7144 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag1) ) );
7147 static void do_EMMS_preamble ( void )
7149 Int i;
7150 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
7151 IRExpr* zero = mkU32(0);
7152 IRExpr* tag0 = mkU8(0);
7153 put_ftop(zero);
7154 for (i = 0; i < 8; i++)
7155 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag0) ) );
7159 static IRExpr* getMMXReg ( UInt archreg )
7161 vassert(archreg < 8);
7162 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 );
7166 static void putMMXReg ( UInt archreg, IRExpr* e )
7168 vassert(archreg < 8);
7169 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
7170 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) );
7174 /* Helper for non-shift MMX insns. Note this is incomplete in the
7175 sense that it does not first call do_MMX_preamble() -- that is the
7176 responsibility of its caller. */
7178 static
7179 ULong dis_MMXop_regmem_to_reg ( const VexAbiInfo* vbi,
7180 Prefix pfx,
7181 Long delta,
7182 UChar opc,
7183 const HChar* name,
7184 Bool show_granularity )
7186 HChar dis_buf[50];
7187 UChar modrm = getUChar(delta);
7188 Bool isReg = epartIsReg(modrm);
7189 IRExpr* argL = NULL;
7190 IRExpr* argR = NULL;
7191 IRExpr* argG = NULL;
7192 IRExpr* argE = NULL;
7193 IRTemp res = newTemp(Ity_I64);
7195 Bool invG = False;
7196 IROp op = Iop_INVALID;
7197 void* hAddr = NULL;
7198 const HChar* hName = NULL;
7199 Bool eLeft = False;
7201 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
7203 switch (opc) {
7204 /* Original MMX ones */
7205 case 0xFC: op = Iop_Add8x8; break;
7206 case 0xFD: op = Iop_Add16x4; break;
7207 case 0xFE: op = Iop_Add32x2; break;
7209 case 0xEC: op = Iop_QAdd8Sx8; break;
7210 case 0xED: op = Iop_QAdd16Sx4; break;
7212 case 0xDC: op = Iop_QAdd8Ux8; break;
7213 case 0xDD: op = Iop_QAdd16Ux4; break;
7215 case 0xF8: op = Iop_Sub8x8; break;
7216 case 0xF9: op = Iop_Sub16x4; break;
7217 case 0xFA: op = Iop_Sub32x2; break;
7219 case 0xE8: op = Iop_QSub8Sx8; break;
7220 case 0xE9: op = Iop_QSub16Sx4; break;
7222 case 0xD8: op = Iop_QSub8Ux8; break;
7223 case 0xD9: op = Iop_QSub16Ux4; break;
7225 case 0xE5: op = Iop_MulHi16Sx4; break;
7226 case 0xD5: op = Iop_Mul16x4; break;
7227 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break;
7229 case 0x74: op = Iop_CmpEQ8x8; break;
7230 case 0x75: op = Iop_CmpEQ16x4; break;
7231 case 0x76: op = Iop_CmpEQ32x2; break;
7233 case 0x64: op = Iop_CmpGT8Sx8; break;
7234 case 0x65: op = Iop_CmpGT16Sx4; break;
7235 case 0x66: op = Iop_CmpGT32Sx2; break;
7237 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break;
7238 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break;
7239 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break;
7241 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break;
7242 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
7243 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break;
7245 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break;
7246 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break;
7247 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break;
7249 case 0xDB: op = Iop_And64; break;
7250 case 0xDF: op = Iop_And64; invG = True; break;
7251 case 0xEB: op = Iop_Or64; break;
7252 case 0xEF: /* Possibly do better here if argL and argR are the
7253 same reg */
7254 op = Iop_Xor64; break;
7256 /* Introduced in SSE1 */
7257 case 0xE0: op = Iop_Avg8Ux8; break;
7258 case 0xE3: op = Iop_Avg16Ux4; break;
7259 case 0xEE: op = Iop_Max16Sx4; break;
7260 case 0xDE: op = Iop_Max8Ux8; break;
7261 case 0xEA: op = Iop_Min16Sx4; break;
7262 case 0xDA: op = Iop_Min8Ux8; break;
7263 case 0xE4: op = Iop_MulHi16Ux4; break;
7264 case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break;
7266 /* Introduced in SSE2 */
7267 case 0xD4: op = Iop_Add64; break;
7268 case 0xFB: op = Iop_Sub64; break;
7270 default:
7271 vex_printf("\n0x%x\n", (UInt)opc);
7272 vpanic("dis_MMXop_regmem_to_reg");
7275 # undef XXX
7277 argG = getMMXReg(gregLO3ofRM(modrm));
7278 if (invG)
7279 argG = unop(Iop_Not64, argG);
7281 if (isReg) {
7282 delta++;
7283 argE = getMMXReg(eregLO3ofRM(modrm));
7284 } else {
7285 Int len;
7286 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7287 delta += len;
7288 argE = loadLE(Ity_I64, mkexpr(addr));
7291 if (eLeft) {
7292 argL = argE;
7293 argR = argG;
7294 } else {
7295 argL = argG;
7296 argR = argE;
7299 if (op != Iop_INVALID) {
7300 vassert(hName == NULL);
7301 vassert(hAddr == NULL);
7302 assign(res, binop(op, argL, argR));
7303 } else {
7304 vassert(hName != NULL);
7305 vassert(hAddr != NULL);
7306 assign( res,
7307 mkIRExprCCall(
7308 Ity_I64,
7309 0/*regparms*/, hName, hAddr,
7310 mkIRExprVec_2( argL, argR )
7315 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
7317 DIP("%s%s %s, %s\n",
7318 name, show_granularity ? nameMMXGran(opc & 3) : "",
7319 ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ),
7320 nameMMXReg(gregLO3ofRM(modrm)) );
7322 return delta;
7326 /* Vector by scalar shift of G by the amount specified at the bottom
7327 of E. This is a straight copy of dis_SSE_shiftG_byE. */
7329 static ULong dis_MMX_shiftG_byE ( const VexAbiInfo* vbi,
7330 Prefix pfx, Long delta,
7331 const HChar* opname, IROp op )
7333 HChar dis_buf[50];
7334 Int alen, size;
7335 IRTemp addr;
7336 Bool shl, shr, sar;
7337 UChar rm = getUChar(delta);
7338 IRTemp g0 = newTemp(Ity_I64);
7339 IRTemp g1 = newTemp(Ity_I64);
7340 IRTemp amt = newTemp(Ity_I64);
7341 IRTemp amt8 = newTemp(Ity_I8);
7343 if (epartIsReg(rm)) {
7344 assign( amt, getMMXReg(eregLO3ofRM(rm)) );
7345 DIP("%s %s,%s\n", opname,
7346 nameMMXReg(eregLO3ofRM(rm)),
7347 nameMMXReg(gregLO3ofRM(rm)) );
7348 delta++;
7349 } else {
7350 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
7351 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
7352 DIP("%s %s,%s\n", opname,
7353 dis_buf,
7354 nameMMXReg(gregLO3ofRM(rm)) );
7355 delta += alen;
7357 assign( g0, getMMXReg(gregLO3ofRM(rm)) );
7358 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
7360 shl = shr = sar = False;
7361 size = 0;
7362 switch (op) {
7363 case Iop_ShlN16x4: shl = True; size = 32; break;
7364 case Iop_ShlN32x2: shl = True; size = 32; break;
7365 case Iop_Shl64: shl = True; size = 64; break;
7366 case Iop_ShrN16x4: shr = True; size = 16; break;
7367 case Iop_ShrN32x2: shr = True; size = 32; break;
7368 case Iop_Shr64: shr = True; size = 64; break;
7369 case Iop_SarN16x4: sar = True; size = 16; break;
7370 case Iop_SarN32x2: sar = True; size = 32; break;
7371 default: vassert(0);
7374 if (shl || shr) {
7375 assign(
7377 IRExpr_ITE(
7378 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)),
7379 binop(op, mkexpr(g0), mkexpr(amt8)),
7380 mkU64(0)
7383 } else
7384 if (sar) {
7385 assign(
7387 IRExpr_ITE(
7388 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)),
7389 binop(op, mkexpr(g0), mkexpr(amt8)),
7390 binop(op, mkexpr(g0), mkU8(size-1))
7393 } else {
7394 vassert(0);
7397 putMMXReg( gregLO3ofRM(rm), mkexpr(g1) );
7398 return delta;
7402 /* Vector by scalar shift of E by an immediate byte. This is a
7403 straight copy of dis_SSE_shiftE_imm. */
7405 static
7406 ULong dis_MMX_shiftE_imm ( Long delta, const HChar* opname, IROp op )
7408 Bool shl, shr, sar;
7409 UChar rm = getUChar(delta);
7410 IRTemp e0 = newTemp(Ity_I64);
7411 IRTemp e1 = newTemp(Ity_I64);
7412 UChar amt, size;
7413 vassert(epartIsReg(rm));
7414 vassert(gregLO3ofRM(rm) == 2
7415 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
7416 amt = getUChar(delta+1);
7417 delta += 2;
7418 DIP("%s $%d,%s\n", opname,
7419 (Int)amt,
7420 nameMMXReg(eregLO3ofRM(rm)) );
7422 assign( e0, getMMXReg(eregLO3ofRM(rm)) );
7424 shl = shr = sar = False;
7425 size = 0;
7426 switch (op) {
7427 case Iop_ShlN16x4: shl = True; size = 16; break;
7428 case Iop_ShlN32x2: shl = True; size = 32; break;
7429 case Iop_Shl64: shl = True; size = 64; break;
7430 case Iop_SarN16x4: sar = True; size = 16; break;
7431 case Iop_SarN32x2: sar = True; size = 32; break;
7432 case Iop_ShrN16x4: shr = True; size = 16; break;
7433 case Iop_ShrN32x2: shr = True; size = 32; break;
7434 case Iop_Shr64: shr = True; size = 64; break;
7435 default: vassert(0);
7438 if (shl || shr) {
7439 assign( e1, amt >= size
7440 ? mkU64(0)
7441 : binop(op, mkexpr(e0), mkU8(amt))
7443 } else
7444 if (sar) {
7445 assign( e1, amt >= size
7446 ? binop(op, mkexpr(e0), mkU8(size-1))
7447 : binop(op, mkexpr(e0), mkU8(amt))
7449 } else {
7450 vassert(0);
7453 putMMXReg( eregLO3ofRM(rm), mkexpr(e1) );
7454 return delta;
7458 /* Completely handle all MMX instructions except emms. */
7460 static
7461 ULong dis_MMX ( Bool* decode_ok,
7462 const VexAbiInfo* vbi, Prefix pfx, Int sz, Long delta )
7464 Int len;
7465 UChar modrm;
7466 HChar dis_buf[50];
7467 UChar opc = getUChar(delta);
7468 delta++;
7470 /* dis_MMX handles all insns except emms. */
7471 do_MMX_preamble();
7473 switch (opc) {
7475 case 0x6E:
7476 if (sz == 4) {
7477 /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/
7478 modrm = getUChar(delta);
7479 if (epartIsReg(modrm)) {
7480 delta++;
7481 putMMXReg(
7482 gregLO3ofRM(modrm),
7483 binop( Iop_32HLto64,
7484 mkU32(0),
7485 getIReg32(eregOfRexRM(pfx,modrm)) ) );
7486 DIP("movd %s, %s\n",
7487 nameIReg32(eregOfRexRM(pfx,modrm)),
7488 nameMMXReg(gregLO3ofRM(modrm)));
7489 } else {
7490 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7491 delta += len;
7492 putMMXReg(
7493 gregLO3ofRM(modrm),
7494 binop( Iop_32HLto64,
7495 mkU32(0),
7496 loadLE(Ity_I32, mkexpr(addr)) ) );
7497 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7500 else
7501 if (sz == 8) {
7502 /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/
7503 modrm = getUChar(delta);
7504 if (epartIsReg(modrm)) {
7505 delta++;
7506 putMMXReg( gregLO3ofRM(modrm),
7507 getIReg64(eregOfRexRM(pfx,modrm)) );
7508 DIP("movd %s, %s\n",
7509 nameIReg64(eregOfRexRM(pfx,modrm)),
7510 nameMMXReg(gregLO3ofRM(modrm)));
7511 } else {
7512 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7513 delta += len;
7514 putMMXReg( gregLO3ofRM(modrm),
7515 loadLE(Ity_I64, mkexpr(addr)) );
7516 DIP("movd{64} %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7519 else {
7520 goto mmx_decode_failure;
7522 break;
7524 case 0x7E:
7525 if (sz == 4) {
7526 /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */
7527 modrm = getUChar(delta);
7528 if (epartIsReg(modrm)) {
7529 delta++;
7530 putIReg32( eregOfRexRM(pfx,modrm),
7531 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
7532 DIP("movd %s, %s\n",
7533 nameMMXReg(gregLO3ofRM(modrm)),
7534 nameIReg32(eregOfRexRM(pfx,modrm)));
7535 } else {
7536 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7537 delta += len;
7538 storeLE( mkexpr(addr),
7539 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
7540 DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7543 else
7544 if (sz == 8) {
7545 /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */
7546 modrm = getUChar(delta);
7547 if (epartIsReg(modrm)) {
7548 delta++;
7549 putIReg64( eregOfRexRM(pfx,modrm),
7550 getMMXReg(gregLO3ofRM(modrm)) );
7551 DIP("movd %s, %s\n",
7552 nameMMXReg(gregLO3ofRM(modrm)),
7553 nameIReg64(eregOfRexRM(pfx,modrm)));
7554 } else {
7555 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7556 delta += len;
7557 storeLE( mkexpr(addr),
7558 getMMXReg(gregLO3ofRM(modrm)) );
7559 DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7561 } else {
7562 goto mmx_decode_failure;
7564 break;
7566 case 0x6F:
7567 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
7568 if (sz != 4
7569 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7570 goto mmx_decode_failure;
7571 modrm = getUChar(delta);
7572 if (epartIsReg(modrm)) {
7573 delta++;
7574 putMMXReg( gregLO3ofRM(modrm), getMMXReg(eregLO3ofRM(modrm)) );
7575 DIP("movq %s, %s\n",
7576 nameMMXReg(eregLO3ofRM(modrm)),
7577 nameMMXReg(gregLO3ofRM(modrm)));
7578 } else {
7579 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7580 delta += len;
7581 putMMXReg( gregLO3ofRM(modrm), loadLE(Ity_I64, mkexpr(addr)) );
7582 DIP("movq %s, %s\n",
7583 dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7585 break;
7587 case 0x7F:
7588 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
7589 if (sz != 4
7590 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7591 goto mmx_decode_failure;
7592 modrm = getUChar(delta);
7593 if (epartIsReg(modrm)) {
7594 delta++;
7595 putMMXReg( eregLO3ofRM(modrm), getMMXReg(gregLO3ofRM(modrm)) );
7596 DIP("movq %s, %s\n",
7597 nameMMXReg(gregLO3ofRM(modrm)),
7598 nameMMXReg(eregLO3ofRM(modrm)));
7599 } else {
7600 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7601 delta += len;
7602 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
7603 DIP("mov(nt)q %s, %s\n",
7604 nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7606 break;
7608 case 0xFC:
7609 case 0xFD:
7610 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
7611 if (sz != 4)
7612 goto mmx_decode_failure;
7613 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padd", True );
7614 break;
7616 case 0xEC:
7617 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
7618 if (sz != 4
7619 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7620 goto mmx_decode_failure;
7621 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padds", True );
7622 break;
7624 case 0xDC:
7625 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7626 if (sz != 4)
7627 goto mmx_decode_failure;
7628 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "paddus", True );
7629 break;
7631 case 0xF8:
7632 case 0xF9:
7633 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
7634 if (sz != 4)
7635 goto mmx_decode_failure;
7636 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psub", True );
7637 break;
7639 case 0xE8:
7640 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
7641 if (sz != 4)
7642 goto mmx_decode_failure;
7643 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubs", True );
7644 break;
7646 case 0xD8:
7647 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7648 if (sz != 4)
7649 goto mmx_decode_failure;
7650 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubus", True );
7651 break;
7653 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
7654 if (sz != 4)
7655 goto mmx_decode_failure;
7656 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmulhw", False );
7657 break;
7659 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
7660 if (sz != 4)
7661 goto mmx_decode_failure;
7662 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmullw", False );
7663 break;
7665 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
7666 vassert(sz == 4);
7667 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmaddwd", False );
7668 break;
7670 case 0x74:
7671 case 0x75:
7672 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
7673 if (sz != 4)
7674 goto mmx_decode_failure;
7675 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpeq", True );
7676 break;
7678 case 0x64:
7679 case 0x65:
7680 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
7681 if (sz != 4)
7682 goto mmx_decode_failure;
7683 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpgt", True );
7684 break;
7686 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
7687 if (sz != 4)
7688 goto mmx_decode_failure;
7689 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packssdw", False );
7690 break;
7692 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
7693 if (sz != 4)
7694 goto mmx_decode_failure;
7695 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packsswb", False );
7696 break;
7698 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
7699 if (sz != 4)
7700 goto mmx_decode_failure;
7701 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packuswb", False );
7702 break;
7704 case 0x68:
7705 case 0x69:
7706 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
7707 if (sz != 4
7708 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7709 goto mmx_decode_failure;
7710 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckh", True );
7711 break;
7713 case 0x60:
7714 case 0x61:
7715 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
7716 if (sz != 4
7717 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7718 goto mmx_decode_failure;
7719 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckl", True );
7720 break;
7722 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
7723 if (sz != 4)
7724 goto mmx_decode_failure;
7725 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pand", False );
7726 break;
7728 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
7729 if (sz != 4)
7730 goto mmx_decode_failure;
7731 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pandn", False );
7732 break;
7734 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
7735 if (sz != 4)
7736 goto mmx_decode_failure;
7737 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "por", False );
7738 break;
7740 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
7741 if (sz != 4)
7742 goto mmx_decode_failure;
7743 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pxor", False );
7744 break;
7746 # define SHIFT_BY_REG(_name,_op) \
7747 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \
7748 break;
7750 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
7751 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4);
7752 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2);
7753 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64);
7755 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
7756 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4);
7757 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2);
7758 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64);
7760 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
7761 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4);
7762 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2);
7764 # undef SHIFT_BY_REG
7766 case 0x71:
7767 case 0x72:
7768 case 0x73: {
7769 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
7770 UChar byte2, subopc;
7771 if (sz != 4)
7772 goto mmx_decode_failure;
7773 byte2 = getUChar(delta); /* amode / sub-opcode */
7774 subopc = toUChar( (byte2 >> 3) & 7 );
7776 # define SHIFT_BY_IMM(_name,_op) \
7777 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
7778 } while (0)
7780 if (subopc == 2 /*SRL*/ && opc == 0x71)
7781 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4);
7782 else if (subopc == 2 /*SRL*/ && opc == 0x72)
7783 SHIFT_BY_IMM("psrld", Iop_ShrN32x2);
7784 else if (subopc == 2 /*SRL*/ && opc == 0x73)
7785 SHIFT_BY_IMM("psrlq", Iop_Shr64);
7787 else if (subopc == 4 /*SAR*/ && opc == 0x71)
7788 SHIFT_BY_IMM("psraw", Iop_SarN16x4);
7789 else if (subopc == 4 /*SAR*/ && opc == 0x72)
7790 SHIFT_BY_IMM("psrad", Iop_SarN32x2);
7792 else if (subopc == 6 /*SHL*/ && opc == 0x71)
7793 SHIFT_BY_IMM("psllw", Iop_ShlN16x4);
7794 else if (subopc == 6 /*SHL*/ && opc == 0x72)
7795 SHIFT_BY_IMM("pslld", Iop_ShlN32x2);
7796 else if (subopc == 6 /*SHL*/ && opc == 0x73)
7797 SHIFT_BY_IMM("psllq", Iop_Shl64);
7799 else goto mmx_decode_failure;
7801 # undef SHIFT_BY_IMM
7802 break;
7805 case 0xF7: {
7806 IRTemp addr = newTemp(Ity_I64);
7807 IRTemp regD = newTemp(Ity_I64);
7808 IRTemp regM = newTemp(Ity_I64);
7809 IRTemp mask = newTemp(Ity_I64);
7810 IRTemp olddata = newTemp(Ity_I64);
7811 IRTemp newdata = newTemp(Ity_I64);
7813 modrm = getUChar(delta);
7814 if (sz != 4 || (!epartIsReg(modrm)))
7815 goto mmx_decode_failure;
7816 delta++;
7818 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
7819 assign( regM, getMMXReg( eregLO3ofRM(modrm) ));
7820 assign( regD, getMMXReg( gregLO3ofRM(modrm) ));
7821 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) );
7822 assign( olddata, loadLE( Ity_I64, mkexpr(addr) ));
7823 assign( newdata,
7824 binop(Iop_Or64,
7825 binop(Iop_And64,
7826 mkexpr(regD),
7827 mkexpr(mask) ),
7828 binop(Iop_And64,
7829 mkexpr(olddata),
7830 unop(Iop_Not64, mkexpr(mask)))) );
7831 storeLE( mkexpr(addr), mkexpr(newdata) );
7832 DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm) ),
7833 nameMMXReg( gregLO3ofRM(modrm) ) );
7834 break;
7837 /* --- MMX decode failure --- */
7838 default:
7839 mmx_decode_failure:
7840 *decode_ok = False;
7841 return delta; /* ignored */
7845 *decode_ok = True;
7846 return delta;
7850 /*------------------------------------------------------------*/
7851 /*--- More misc arithmetic and other obscure insns. ---*/
7852 /*------------------------------------------------------------*/
7854 /* Generate base << amt with vacated places filled with stuff
7855 from xtra. amt guaranteed in 0 .. 63. */
7856 static
7857 IRExpr* shiftL64_with_extras ( IRTemp base, IRTemp xtra, IRTemp amt )
7859 /* if amt == 0
7860 then base
7861 else (base << amt) | (xtra >>u (64-amt))
7863 return
7864 IRExpr_ITE(
7865 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)),
7866 binop(Iop_Or64,
7867 binop(Iop_Shl64, mkexpr(base), mkexpr(amt)),
7868 binop(Iop_Shr64, mkexpr(xtra),
7869 binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
7871 mkexpr(base)
7875 /* Generate base >>u amt with vacated places filled with stuff
7876 from xtra. amt guaranteed in 0 .. 63. */
7877 static
7878 IRExpr* shiftR64_with_extras ( IRTemp xtra, IRTemp base, IRTemp amt )
7880 /* if amt == 0
7881 then base
7882 else (base >>u amt) | (xtra << (64-amt))
7884 return
7885 IRExpr_ITE(
7886 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)),
7887 binop(Iop_Or64,
7888 binop(Iop_Shr64, mkexpr(base), mkexpr(amt)),
7889 binop(Iop_Shl64, mkexpr(xtra),
7890 binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
7892 mkexpr(base)
7896 /* Double length left and right shifts. Apparently only required in
7897 v-size (no b- variant). */
7898 static
7899 ULong dis_SHLRD_Gv_Ev ( const VexAbiInfo* vbi,
7900 Prefix pfx,
7901 Long delta, UChar modrm,
7902 Int sz,
7903 IRExpr* shift_amt,
7904 Bool amt_is_literal,
7905 const HChar* shift_amt_txt,
7906 Bool left_shift )
7908 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
7909 for printing it. And eip on entry points at the modrm byte. */
7910 Int len;
7911 HChar dis_buf[50];
7913 IRType ty = szToITy(sz);
7914 IRTemp gsrc = newTemp(ty);
7915 IRTemp esrc = newTemp(ty);
7916 IRTemp addr = IRTemp_INVALID;
7917 IRTemp tmpSH = newTemp(Ity_I8);
7918 IRTemp tmpSS = newTemp(Ity_I8);
7919 IRTemp tmp64 = IRTemp_INVALID;
7920 IRTemp res64 = IRTemp_INVALID;
7921 IRTemp rss64 = IRTemp_INVALID;
7922 IRTemp resTy = IRTemp_INVALID;
7923 IRTemp rssTy = IRTemp_INVALID;
7924 Int mask = sz==8 ? 63 : 31;
7926 vassert(sz == 2 || sz == 4 || sz == 8);
7928 /* The E-part is the destination; this is shifted. The G-part
7929 supplies bits to be shifted into the E-part, but is not
7930 changed.
7932 If shifting left, form a double-length word with E at the top
7933 and G at the bottom, and shift this left. The result is then in
7934 the high part.
7936 If shifting right, form a double-length word with G at the top
7937 and E at the bottom, and shift this right. The result is then
7938 at the bottom. */
7940 /* Fetch the operands. */
7942 assign( gsrc, getIRegG(sz, pfx, modrm) );
7944 if (epartIsReg(modrm)) {
7945 delta++;
7946 assign( esrc, getIRegE(sz, pfx, modrm) );
7947 DIP("sh%cd%c %s, %s, %s\n",
7948 ( left_shift ? 'l' : 'r' ), nameISize(sz),
7949 shift_amt_txt,
7950 nameIRegG(sz, pfx, modrm), nameIRegE(sz, pfx, modrm));
7951 } else {
7952 addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
7953 /* # bytes following amode */
7954 amt_is_literal ? 1 : 0 );
7955 delta += len;
7956 assign( esrc, loadLE(ty, mkexpr(addr)) );
7957 DIP("sh%cd%c %s, %s, %s\n",
7958 ( left_shift ? 'l' : 'r' ), nameISize(sz),
7959 shift_amt_txt,
7960 nameIRegG(sz, pfx, modrm), dis_buf);
7963 /* Calculate the masked shift amount (tmpSH), the masked subshift
7964 amount (tmpSS), the shifted value (res64) and the subshifted
7965 value (rss64). */
7967 assign( tmpSH, binop(Iop_And8, shift_amt, mkU8(mask)) );
7968 assign( tmpSS, binop(Iop_And8,
7969 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ),
7970 mkU8(mask)));
7972 tmp64 = newTemp(Ity_I64);
7973 res64 = newTemp(Ity_I64);
7974 rss64 = newTemp(Ity_I64);
7976 if (sz == 2 || sz == 4) {
7978 /* G is xtra; E is data */
7979 /* what a freaking nightmare: */
7980 if (sz == 4 && left_shift) {
7981 assign( tmp64, binop(Iop_32HLto64, mkexpr(esrc), mkexpr(gsrc)) );
7982 assign( res64,
7983 binop(Iop_Shr64,
7984 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
7985 mkU8(32)) );
7986 assign( rss64,
7987 binop(Iop_Shr64,
7988 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSS)),
7989 mkU8(32)) );
7991 else
7992 if (sz == 4 && !left_shift) {
7993 assign( tmp64, binop(Iop_32HLto64, mkexpr(gsrc), mkexpr(esrc)) );
7994 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
7995 assign( rss64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSS)) );
7997 else
7998 if (sz == 2 && left_shift) {
7999 assign( tmp64,
8000 binop(Iop_32HLto64,
8001 binop(Iop_16HLto32, mkexpr(esrc), mkexpr(gsrc)),
8002 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc))
8004 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */
8005 assign( res64,
8006 binop(Iop_Shr64,
8007 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
8008 mkU8(48)) );
8009 /* subshift formed by shifting [esrc'0000'0000'0000] */
8010 assign( rss64,
8011 binop(Iop_Shr64,
8012 binop(Iop_Shl64,
8013 binop(Iop_Shl64, unop(Iop_16Uto64, mkexpr(esrc)),
8014 mkU8(48)),
8015 mkexpr(tmpSS)),
8016 mkU8(48)) );
8018 else
8019 if (sz == 2 && !left_shift) {
8020 assign( tmp64,
8021 binop(Iop_32HLto64,
8022 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)),
8023 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(esrc))
8025 /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */
8026 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
8027 /* subshift formed by shifting [0000'0000'0000'esrc] */
8028 assign( rss64, binop(Iop_Shr64,
8029 unop(Iop_16Uto64, mkexpr(esrc)),
8030 mkexpr(tmpSS)) );
8033 } else {
8035 vassert(sz == 8);
8036 if (left_shift) {
8037 assign( res64, shiftL64_with_extras( esrc, gsrc, tmpSH ));
8038 assign( rss64, shiftL64_with_extras( esrc, gsrc, tmpSS ));
8039 } else {
8040 assign( res64, shiftR64_with_extras( gsrc, esrc, tmpSH ));
8041 assign( rss64, shiftR64_with_extras( gsrc, esrc, tmpSS ));
8046 resTy = newTemp(ty);
8047 rssTy = newTemp(ty);
8048 assign( resTy, narrowTo(ty, mkexpr(res64)) );
8049 assign( rssTy, narrowTo(ty, mkexpr(rss64)) );
8051 /* Put result back and write the flags thunk. */
8052 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl64 : Iop_Sar64,
8053 resTy, rssTy, ty, tmpSH );
8055 if (epartIsReg(modrm)) {
8056 putIRegE(sz, pfx, modrm, mkexpr(resTy));
8057 } else {
8058 storeLE( mkexpr(addr), mkexpr(resTy) );
8061 if (amt_is_literal) delta++;
8062 return delta;
8066 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
8067 required. */
8069 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp;
8071 static const HChar* nameBtOp ( BtOp op )
8073 switch (op) {
8074 case BtOpNone: return "";
8075 case BtOpSet: return "s";
8076 case BtOpReset: return "r";
8077 case BtOpComp: return "c";
8078 default: vpanic("nameBtOp(amd64)");
8083 static
8084 ULong dis_bt_G_E ( const VexAbiInfo* vbi,
8085 Prefix pfx, Int sz, Long delta, BtOp op,
8086 /*OUT*/Bool* decode_OK )
8088 HChar dis_buf[50];
8089 UChar modrm;
8090 Int len;
8091 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
8092 t_addr1, t_rsp, t_mask, t_new;
8094 vassert(sz == 2 || sz == 4 || sz == 8);
8096 t_fetched = t_bitno0 = t_bitno1 = t_bitno2
8097 = t_addr0 = t_addr1 = t_rsp
8098 = t_mask = t_new = IRTemp_INVALID;
8100 t_fetched = newTemp(Ity_I8);
8101 t_new = newTemp(Ity_I8);
8102 t_bitno0 = newTemp(Ity_I64);
8103 t_bitno1 = newTemp(Ity_I64);
8104 t_bitno2 = newTemp(Ity_I8);
8105 t_addr1 = newTemp(Ity_I64);
8106 modrm = getUChar(delta);
8108 *decode_OK = True;
8109 if (epartIsReg(modrm)) {
8110 /* F2 and F3 are never acceptable. */
8111 if (haveF2orF3(pfx)) {
8112 *decode_OK = False;
8113 return delta;
8115 } else {
8116 /* F2 or F3 (but not both) are allowed, provided LOCK is also
8117 present, and only for the BTC/BTS/BTR cases (not BT). */
8118 if (haveF2orF3(pfx)) {
8119 if (haveF2andF3(pfx) || !haveLOCK(pfx) || op == BtOpNone) {
8120 *decode_OK = False;
8121 return delta;
8126 assign( t_bitno0, widenSto64(getIRegG(sz, pfx, modrm)) );
8128 if (epartIsReg(modrm)) {
8129 delta++;
8130 /* Get it onto the client's stack. Oh, this is a horrible
8131 kludge. See https://bugs.kde.org/show_bug.cgi?id=245925.
8132 Because of the ELF ABI stack redzone, there may be live data
8133 up to 128 bytes below %RSP. So we can't just push it on the
8134 stack, else we may wind up trashing live data, and causing
8135 impossible-to-find simulation errors. (Yes, this did
8136 happen.) So we need to drop RSP before at least 128 before
8137 pushing it. That unfortunately means hitting Memcheck's
8138 fast-case painting code. Ideally we should drop more than
8139 128, to reduce the chances of breaking buggy programs that
8140 have live data below -128(%RSP). Memcheck fast-cases moves
8141 of 288 bytes due to the need to handle ppc64-linux quickly,
8142 so let's use 288. Of course the real fix is to get rid of
8143 this kludge entirely. */
8144 t_rsp = newTemp(Ity_I64);
8145 t_addr0 = newTemp(Ity_I64);
8147 vassert(vbi->guest_stack_redzone_size == 128);
8148 assign( t_rsp, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(288)) );
8149 putIReg64(R_RSP, mkexpr(t_rsp));
8151 storeLE( mkexpr(t_rsp), getIRegE(sz, pfx, modrm) );
8153 /* Make t_addr0 point at it. */
8154 assign( t_addr0, mkexpr(t_rsp) );
8156 /* Mask out upper bits of the shift amount, since we're doing a
8157 reg. */
8158 assign( t_bitno1, binop(Iop_And64,
8159 mkexpr(t_bitno0),
8160 mkU64(sz == 8 ? 63 : sz == 4 ? 31 : 15)) );
8162 } else {
8163 t_addr0 = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
8164 delta += len;
8165 assign( t_bitno1, mkexpr(t_bitno0) );
8168 /* At this point: t_addr0 is the address being operated on. If it
8169 was a reg, we will have pushed it onto the client's stack.
8170 t_bitno1 is the bit number, suitably masked in the case of a
8171 reg. */
8173 /* Now the main sequence. */
8174 assign( t_addr1,
8175 binop(Iop_Add64,
8176 mkexpr(t_addr0),
8177 binop(Iop_Sar64, mkexpr(t_bitno1), mkU8(3))) );
8179 /* t_addr1 now holds effective address */
8181 assign( t_bitno2,
8182 unop(Iop_64to8,
8183 binop(Iop_And64, mkexpr(t_bitno1), mkU64(7))) );
8185 /* t_bitno2 contains offset of bit within byte */
8187 if (op != BtOpNone) {
8188 t_mask = newTemp(Ity_I8);
8189 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) );
8192 /* t_mask is now a suitable byte mask */
8194 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) );
8196 if (op != BtOpNone) {
8197 switch (op) {
8198 case BtOpSet:
8199 assign( t_new,
8200 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) );
8201 break;
8202 case BtOpComp:
8203 assign( t_new,
8204 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) );
8205 break;
8206 case BtOpReset:
8207 assign( t_new,
8208 binop(Iop_And8, mkexpr(t_fetched),
8209 unop(Iop_Not8, mkexpr(t_mask))) );
8210 break;
8211 default:
8212 vpanic("dis_bt_G_E(amd64)");
8214 if ((haveLOCK(pfx)) && !epartIsReg(modrm)) {
8215 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/,
8216 mkexpr(t_new)/*new*/,
8217 guest_RIP_curr_instr );
8218 } else {
8219 storeLE( mkexpr(t_addr1), mkexpr(t_new) );
8223 /* Side effect done; now get selected bit into Carry flag. The Intel docs
8224 (as of 2015, at least) say that C holds the result, Z is unchanged, and
8225 O,S,A and P are undefined. However, on Skylake it appears that O,S,A,P
8226 are also unchanged, so let's do that. */
8227 const ULong maskC = AMD64G_CC_MASK_C;
8228 const ULong maskOSZAP = AMD64G_CC_MASK_O | AMD64G_CC_MASK_S
8229 | AMD64G_CC_MASK_Z | AMD64G_CC_MASK_A
8230 | AMD64G_CC_MASK_P;
8232 IRTemp old_rflags = newTemp(Ity_I64);
8233 assign(old_rflags, mk_amd64g_calculate_rflags_all());
8235 IRTemp new_rflags = newTemp(Ity_I64);
8236 assign(new_rflags,
8237 binop(Iop_Or64,
8238 binop(Iop_And64, mkexpr(old_rflags), mkU64(maskOSZAP)),
8239 binop(Iop_And64,
8240 binop(Iop_Shr64,
8241 unop(Iop_8Uto64, mkexpr(t_fetched)),
8242 mkexpr(t_bitno2)),
8243 mkU64(maskC))));
8245 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
8246 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
8247 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) ));
8248 /* Set NDEP even though it isn't used. This makes redundant-PUT
8249 elimination of previous stores to this field work better. */
8250 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
8252 /* Move reg operand from stack back to reg */
8253 if (epartIsReg(modrm)) {
8254 /* t_rsp still points at it. */
8255 /* only write the reg if actually modifying it; doing otherwise
8256 zeroes the top half erroneously when doing btl due to
8257 standard zero-extend rule */
8258 if (op != BtOpNone)
8259 putIRegE(sz, pfx, modrm, loadLE(szToITy(sz), mkexpr(t_rsp)) );
8260 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t_rsp), mkU64(288)) );
8263 DIP("bt%s%c %s, %s\n",
8264 nameBtOp(op), nameISize(sz), nameIRegG(sz, pfx, modrm),
8265 ( epartIsReg(modrm) ? nameIRegE(sz, pfx, modrm) : dis_buf ) );
8267 return delta;
8272 /* Handle BSF/BSR. Only v-size seems necessary. */
8273 static
8274 ULong dis_bs_E_G ( const VexAbiInfo* vbi,
8275 Prefix pfx, Int sz, Long delta, Bool fwds )
8277 Bool isReg;
8278 UChar modrm;
8279 HChar dis_buf[50];
8281 IRType ty = szToITy(sz);
8282 IRTemp src = newTemp(ty);
8283 IRTemp dst = newTemp(ty);
8284 IRTemp src64 = newTemp(Ity_I64);
8285 IRTemp dst64 = newTemp(Ity_I64);
8286 IRTemp srcB = newTemp(Ity_I1);
8288 vassert(sz == 8 || sz == 4 || sz == 2);
8290 modrm = getUChar(delta);
8291 isReg = epartIsReg(modrm);
8292 if (isReg) {
8293 delta++;
8294 assign( src, getIRegE(sz, pfx, modrm) );
8295 } else {
8296 Int len;
8297 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
8298 delta += len;
8299 assign( src, loadLE(ty, mkexpr(addr)) );
8302 DIP("bs%c%c %s, %s\n",
8303 fwds ? 'f' : 'r', nameISize(sz),
8304 ( isReg ? nameIRegE(sz, pfx, modrm) : dis_buf ),
8305 nameIRegG(sz, pfx, modrm));
8307 /* First, widen src to 64 bits if it is not already. */
8308 assign( src64, widenUto64(mkexpr(src)) );
8310 /* Generate a bool expression which is zero iff the original is
8311 zero, and nonzero otherwise. Ask for a CmpNE version which, if
8312 instrumented by Memcheck, is instrumented expensively, since
8313 this may be used on the output of a preceding movmskb insn,
8314 which has been known to be partially defined, and in need of
8315 careful handling. */
8316 assign( srcB, binop(Iop_ExpCmpNE64, mkexpr(src64), mkU64(0)) );
8318 /* Flags: Z is 1 iff source value is zero. All others
8319 are undefined -- we force them to zero. */
8320 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
8321 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
8322 stmt( IRStmt_Put(
8323 OFFB_CC_DEP1,
8324 IRExpr_ITE( mkexpr(srcB),
8325 /* src!=0 */
8326 mkU64(0),
8327 /* src==0 */
8328 mkU64(AMD64G_CC_MASK_Z)
8331 /* Set NDEP even though it isn't used. This makes redundant-PUT
8332 elimination of previous stores to this field work better. */
8333 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
8335 /* Result: iff source value is zero, we can't use
8336 Iop_Clz64/Iop_Ctz64 as they have no defined result in that case.
8337 But anyway, amd64 semantics say the result is undefined in
8338 such situations. Hence handle the zero case specially. */
8340 /* Bleh. What we compute:
8342 bsf64: if src == 0 then {dst is unchanged}
8343 else Ctz64(src)
8345 bsr64: if src == 0 then {dst is unchanged}
8346 else 63 - Clz64(src)
8348 bsf32: if src == 0 then {dst is unchanged}
8349 else Ctz64(32Uto64(src))
8351 bsr32: if src == 0 then {dst is unchanged}
8352 else 63 - Clz64(32Uto64(src))
8354 bsf16: if src == 0 then {dst is unchanged}
8355 else Ctz64(32Uto64(16Uto32(src)))
8357 bsr16: if src == 0 then {dst is unchanged}
8358 else 63 - Clz64(32Uto64(16Uto32(src)))
8361 /* The main computation, guarding against zero. */
8362 assign( dst64,
8363 IRExpr_ITE(
8364 mkexpr(srcB),
8365 /* src != 0 */
8366 fwds ? unop(Iop_Ctz64, mkexpr(src64))
8367 : binop(Iop_Sub64,
8368 mkU64(63),
8369 unop(Iop_Clz64, mkexpr(src64))),
8370 /* src == 0 -- leave dst unchanged */
8371 widenUto64( getIRegG( sz, pfx, modrm ) )
8375 if (sz == 2)
8376 assign( dst, unop(Iop_64to16, mkexpr(dst64)) );
8377 else
8378 if (sz == 4)
8379 assign( dst, unop(Iop_64to32, mkexpr(dst64)) );
8380 else
8381 assign( dst, mkexpr(dst64) );
8383 /* dump result back */
8384 putIRegG( sz, pfx, modrm, mkexpr(dst) );
8386 return delta;
8390 /* swap rAX with the reg specified by reg and REX.B */
8391 static
8392 void codegen_xchg_rAX_Reg ( Prefix pfx, Int sz, UInt regLo3 )
8394 IRType ty = szToITy(sz);
8395 IRTemp t1 = newTemp(ty);
8396 IRTemp t2 = newTemp(ty);
8397 vassert(sz == 2 || sz == 4 || sz == 8);
8398 vassert(regLo3 < 8);
8399 if (sz == 8) {
8400 assign( t1, getIReg64(R_RAX) );
8401 assign( t2, getIRegRexB(8, pfx, regLo3) );
8402 putIReg64( R_RAX, mkexpr(t2) );
8403 putIRegRexB(8, pfx, regLo3, mkexpr(t1) );
8404 } else if (sz == 4) {
8405 assign( t1, getIReg32(R_RAX) );
8406 assign( t2, getIRegRexB(4, pfx, regLo3) );
8407 putIReg32( R_RAX, mkexpr(t2) );
8408 putIRegRexB(4, pfx, regLo3, mkexpr(t1) );
8409 } else {
8410 assign( t1, getIReg16(R_RAX) );
8411 assign( t2, getIRegRexB(2, pfx, regLo3) );
8412 putIReg16( R_RAX, mkexpr(t2) );
8413 putIRegRexB(2, pfx, regLo3, mkexpr(t1) );
8415 DIP("xchg%c %s, %s\n",
8416 nameISize(sz), nameIRegRAX(sz),
8417 nameIRegRexB(sz,pfx, regLo3));
8421 static
8422 void codegen_SAHF ( void )
8424 /* Set the flags to:
8425 (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O)
8426 -- retain the old O flag
8427 | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8428 |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C)
8430 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8431 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P;
8432 IRTemp oldflags = newTemp(Ity_I64);
8433 assign( oldflags, mk_amd64g_calculate_rflags_all() );
8434 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
8435 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
8436 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
8437 stmt( IRStmt_Put( OFFB_CC_DEP1,
8438 binop(Iop_Or64,
8439 binop(Iop_And64, mkexpr(oldflags), mkU64(AMD64G_CC_MASK_O)),
8440 binop(Iop_And64,
8441 binop(Iop_Shr64, getIReg64(R_RAX), mkU8(8)),
8442 mkU64(mask_SZACP))
8448 static
8449 void codegen_LAHF ( void )
8451 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
8452 IRExpr* rax_with_hole;
8453 IRExpr* new_byte;
8454 IRExpr* new_rax;
8455 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8456 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P;
8458 IRTemp flags = newTemp(Ity_I64);
8459 assign( flags, mk_amd64g_calculate_rflags_all() );
8461 rax_with_hole
8462 = binop(Iop_And64, getIReg64(R_RAX), mkU64(~0xFF00ULL));
8463 new_byte
8464 = binop(Iop_Or64, binop(Iop_And64, mkexpr(flags), mkU64(mask_SZACP)),
8465 mkU64(1<<1));
8466 new_rax
8467 = binop(Iop_Or64, rax_with_hole,
8468 binop(Iop_Shl64, new_byte, mkU8(8)));
8469 putIReg64(R_RAX, new_rax);
8473 static
8474 ULong dis_cmpxchg_G_E ( /*OUT*/Bool* ok,
8475 const VexAbiInfo* vbi,
8476 Prefix pfx,
8477 Int size,
8478 Long delta0 )
8480 HChar dis_buf[50];
8481 Int len;
8483 IRType ty = szToITy(size);
8484 IRTemp acc = newTemp(ty);
8485 IRTemp src = newTemp(ty);
8486 IRTemp dest = newTemp(ty);
8487 IRTemp dest2 = newTemp(ty);
8488 IRTemp acc2 = newTemp(ty);
8489 IRTemp cond = newTemp(Ity_I1);
8490 IRTemp addr = IRTemp_INVALID;
8491 UChar rm = getUChar(delta0);
8493 /* There are 3 cases to consider:
8495 reg-reg: ignore any lock prefix, generate sequence based
8496 on ITE
8498 reg-mem, not locked: ignore any lock prefix, generate sequence
8499 based on ITE
8501 reg-mem, locked: use IRCAS
8504 /* Decide whether F2 or F3 are acceptable. Never for register
8505 case, but for the memory case, one or the other is OK provided
8506 LOCK is also present. */
8507 if (epartIsReg(rm)) {
8508 if (haveF2orF3(pfx)) {
8509 *ok = False;
8510 return delta0;
8512 } else {
8513 if (haveF2orF3(pfx)) {
8514 if (haveF2andF3(pfx) || !haveLOCK(pfx)) {
8515 *ok = False;
8516 return delta0;
8521 if (epartIsReg(rm)) {
8522 /* case 1 */
8523 assign( dest, getIRegE(size, pfx, rm) );
8524 delta0++;
8525 assign( src, getIRegG(size, pfx, rm) );
8526 assign( acc, getIRegRAX(size) );
8527 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
8528 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) );
8529 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) );
8530 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
8531 putIRegRAX(size, mkexpr(acc2));
8532 putIRegE(size, pfx, rm, mkexpr(dest2));
8533 DIP("cmpxchg%c %s,%s\n", nameISize(size),
8534 nameIRegG(size,pfx,rm),
8535 nameIRegE(size,pfx,rm) );
8537 else if (!epartIsReg(rm) && !haveLOCK(pfx)) {
8538 /* case 2 */
8539 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8540 assign( dest, loadLE(ty, mkexpr(addr)) );
8541 delta0 += len;
8542 assign( src, getIRegG(size, pfx, rm) );
8543 assign( acc, getIRegRAX(size) );
8544 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
8545 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) );
8546 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) );
8547 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
8548 putIRegRAX(size, mkexpr(acc2));
8549 storeLE( mkexpr(addr), mkexpr(dest2) );
8550 DIP("cmpxchg%c %s,%s\n", nameISize(size),
8551 nameIRegG(size,pfx,rm), dis_buf);
8553 else if (!epartIsReg(rm) && haveLOCK(pfx)) {
8554 /* case 3 */
8555 /* src is new value. acc is expected value. dest is old value.
8556 Compute success from the output of the IRCAS, and steer the
8557 new value for RAX accordingly: in case of success, RAX is
8558 unchanged. */
8559 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8560 delta0 += len;
8561 assign( src, getIRegG(size, pfx, rm) );
8562 assign( acc, getIRegRAX(size) );
8563 stmt( IRStmt_CAS(
8564 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr),
8565 NULL, mkexpr(acc), NULL, mkexpr(src) )
8567 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
8568 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) );
8569 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
8570 putIRegRAX(size, mkexpr(acc2));
8571 DIP("cmpxchg%c %s,%s\n", nameISize(size),
8572 nameIRegG(size,pfx,rm), dis_buf);
8574 else vassert(0);
8576 *ok = True;
8577 return delta0;
8581 /* Handle conditional move instructions of the form
8582 cmovcc E(reg-or-mem), G(reg)
8584 E(src) is reg-or-mem
8585 G(dst) is reg.
8587 If E is reg, --> GET %E, tmps
8588 GET %G, tmpd
8589 CMOVcc tmps, tmpd
8590 PUT tmpd, %G
8592 If E is mem --> (getAddr E) -> tmpa
8593 LD (tmpa), tmps
8594 GET %G, tmpd
8595 CMOVcc tmps, tmpd
8596 PUT tmpd, %G
8598 static
8599 ULong dis_cmov_E_G ( const VexAbiInfo* vbi,
8600 Prefix pfx,
8601 Int sz,
8602 AMD64Condcode cond,
8603 Long delta0 )
8605 UChar rm = getUChar(delta0);
8606 HChar dis_buf[50];
8607 Int len;
8609 IRType ty = szToITy(sz);
8610 IRTemp tmps = newTemp(ty);
8611 IRTemp tmpd = newTemp(ty);
8613 if (epartIsReg(rm)) {
8614 assign( tmps, getIRegE(sz, pfx, rm) );
8615 assign( tmpd, getIRegG(sz, pfx, rm) );
8617 putIRegG( sz, pfx, rm,
8618 IRExpr_ITE( mk_amd64g_calculate_condition(cond),
8619 mkexpr(tmps),
8620 mkexpr(tmpd) )
8622 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond),
8623 nameIRegE(sz,pfx,rm),
8624 nameIRegG(sz,pfx,rm));
8625 return 1+delta0;
8628 /* E refers to memory */
8630 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8631 assign( tmps, loadLE(ty, mkexpr(addr)) );
8632 assign( tmpd, getIRegG(sz, pfx, rm) );
8634 putIRegG( sz, pfx, rm,
8635 IRExpr_ITE( mk_amd64g_calculate_condition(cond),
8636 mkexpr(tmps),
8637 mkexpr(tmpd) )
8640 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond),
8641 dis_buf,
8642 nameIRegG(sz,pfx,rm));
8643 return len+delta0;
8648 static
8649 ULong dis_xadd_G_E ( /*OUT*/Bool* decode_ok,
8650 const VexAbiInfo* vbi,
8651 Prefix pfx, Int sz, Long delta0 )
8653 Int len;
8654 UChar rm = getUChar(delta0);
8655 HChar dis_buf[50];
8657 IRType ty = szToITy(sz);
8658 IRTemp tmpd = newTemp(ty);
8659 IRTemp tmpt0 = newTemp(ty);
8660 IRTemp tmpt1 = newTemp(ty);
8662 /* There are 3 cases to consider:
8664 reg-reg: ignore any lock prefix,
8665 generate 'naive' (non-atomic) sequence
8667 reg-mem, not locked: ignore any lock prefix, generate 'naive'
8668 (non-atomic) sequence
8670 reg-mem, locked: use IRCAS
8673 if (epartIsReg(rm)) {
8674 /* case 1 */
8675 assign( tmpd, getIRegE(sz, pfx, rm) );
8676 assign( tmpt0, getIRegG(sz, pfx, rm) );
8677 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8678 mkexpr(tmpd), mkexpr(tmpt0)) );
8679 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8680 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8681 putIRegE(sz, pfx, rm, mkexpr(tmpt1));
8682 DIP("xadd%c %s, %s\n",
8683 nameISize(sz), nameIRegG(sz,pfx,rm), nameIRegE(sz,pfx,rm));
8684 *decode_ok = True;
8685 return 1+delta0;
8687 else if (!epartIsReg(rm) && !haveLOCK(pfx)) {
8688 /* case 2 */
8689 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8690 assign( tmpd, loadLE(ty, mkexpr(addr)) );
8691 assign( tmpt0, getIRegG(sz, pfx, rm) );
8692 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8693 mkexpr(tmpd), mkexpr(tmpt0)) );
8694 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8695 storeLE( mkexpr(addr), mkexpr(tmpt1) );
8696 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8697 DIP("xadd%c %s, %s\n",
8698 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
8699 *decode_ok = True;
8700 return len+delta0;
8702 else if (!epartIsReg(rm) && haveLOCK(pfx)) {
8703 /* case 3 */
8704 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8705 assign( tmpd, loadLE(ty, mkexpr(addr)) );
8706 assign( tmpt0, getIRegG(sz, pfx, rm) );
8707 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8708 mkexpr(tmpd), mkexpr(tmpt0)) );
8709 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/,
8710 mkexpr(tmpt1)/*newVal*/, guest_RIP_curr_instr );
8711 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8712 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8713 DIP("xadd%c %s, %s\n",
8714 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
8715 *decode_ok = True;
8716 return len+delta0;
8718 /*UNREACHED*/
8719 vassert(0);
8722 //.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
8723 //..
8724 //.. static
8725 //.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 )
8726 //.. {
8727 //.. Int len;
8728 //.. IRTemp addr;
8729 //.. UChar rm = getUChar(delta0);
8730 //.. HChar dis_buf[50];
8731 //..
8732 //.. if (epartIsReg(rm)) {
8733 //.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) );
8734 //.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm)));
8735 //.. return 1+delta0;
8736 //.. } else {
8737 //.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8738 //.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) );
8739 //.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm)));
8740 //.. return len+delta0;
8741 //.. }
8742 //.. }
8743 //..
8744 //.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
8745 //.. dst is ireg and sz==4, zero out top half of it. */
8746 //..
8747 //.. static
8748 //.. UInt dis_mov_Sw_Ew ( UChar sorb,
8749 //.. Int sz,
8750 //.. UInt delta0 )
8751 //.. {
8752 //.. Int len;
8753 //.. IRTemp addr;
8754 //.. UChar rm = getUChar(delta0);
8755 //.. HChar dis_buf[50];
8756 //..
8757 //.. vassert(sz == 2 || sz == 4);
8758 //..
8759 //.. if (epartIsReg(rm)) {
8760 //.. if (sz == 4)
8761 //.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm))));
8762 //.. else
8763 //.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm)));
8764 //..
8765 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
8766 //.. return 1+delta0;
8767 //.. } else {
8768 //.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8769 //.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) );
8770 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf);
8771 //.. return len+delta0;
8772 //.. }
8773 //.. }
8775 /* Handle move instructions of the form
8776 mov S, E meaning
8777 mov sreg, reg-or-mem
8778 Is passed the a ptr to the modRM byte, and the data size. Returns
8779 the address advanced completely over this instruction.
8781 VEX does not currently simulate segment registers on AMD64 which means that
8782 instead of moving a value of a segment register, zero is moved to the
8783 destination. The zero value represents a null (unused) selector. This is
8784 not correct (especially for the %cs, %fs and %gs registers) but it seems to
8785 provide a sufficient simulation for currently seen programs that use this
8786 instruction. If some program actually decides to use the obtained segment
8787 selector for something meaningful then the zero value should be a clear
8788 indicator that there is some problem.
8790 S(src) is sreg.
8791 E(dst) is reg-or-mem
8793 If E is reg, --> PUT $0, %E
8795 If E is mem, --> (getAddr E) -> tmpa
8796 ST $0, (tmpa)
8798 static
8799 ULong dis_mov_S_E ( const VexAbiInfo* vbi,
8800 Prefix pfx,
8801 Int size,
8802 Long delta0 )
8804 Int len;
8805 UChar rm = getUChar(delta0);
8806 HChar dis_buf[50];
8808 if (epartIsReg(rm)) {
8809 putIRegE(size, pfx, rm, mkU(szToITy(size), 0));
8810 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx, rm)),
8811 nameIRegE(size, pfx, rm));
8812 return 1+delta0;
8815 /* E refers to memory */
8817 IRTemp addr = disAMode(&len, vbi, pfx, delta0, dis_buf, 0);
8818 storeLE(mkexpr(addr), mkU16(0));
8819 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx, rm)),
8820 dis_buf);
8821 return len+delta0;
8825 //.. static
8826 //.. void dis_push_segreg ( UInt sreg, Int sz )
8827 //.. {
8828 //.. IRTemp t1 = newTemp(Ity_I16);
8829 //.. IRTemp ta = newTemp(Ity_I32);
8830 //.. vassert(sz == 2 || sz == 4);
8831 //..
8832 //.. assign( t1, getSReg(sreg) );
8833 //.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
8834 //.. putIReg(4, R_ESP, mkexpr(ta));
8835 //.. storeLE( mkexpr(ta), mkexpr(t1) );
8836 //..
8837 //.. DIP("pushw %s\n", nameSReg(sreg));
8838 //.. }
8839 //..
8840 //.. static
8841 //.. void dis_pop_segreg ( UInt sreg, Int sz )
8842 //.. {
8843 //.. IRTemp t1 = newTemp(Ity_I16);
8844 //.. IRTemp ta = newTemp(Ity_I32);
8845 //.. vassert(sz == 2 || sz == 4);
8846 //..
8847 //.. assign( ta, getIReg(4, R_ESP) );
8848 //.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) );
8849 //..
8850 //.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) );
8851 //.. putSReg( sreg, mkexpr(t1) );
8852 //.. DIP("pop %s\n", nameSReg(sreg));
8853 //.. }
8855 static
8856 void dis_ret ( /*MOD*/DisResult* dres, const VexAbiInfo* vbi, ULong d64 )
8858 IRTemp t1 = newTemp(Ity_I64);
8859 IRTemp t2 = newTemp(Ity_I64);
8860 IRTemp t3 = newTemp(Ity_I64);
8861 assign(t1, getIReg64(R_RSP));
8862 assign(t2, loadLE(Ity_I64,mkexpr(t1)));
8863 assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64)));
8864 putIReg64(R_RSP, mkexpr(t3));
8865 make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret");
8866 jmp_treg(dres, Ijk_Ret, t2);
8867 vassert(dres->whatNext == Dis_StopHere);
8871 /*------------------------------------------------------------*/
8872 /*--- SSE/SSE2/SSE3 helpers ---*/
8873 /*------------------------------------------------------------*/
8875 /* Indicates whether the op requires a rounding-mode argument. Note
8876 that this covers only vector floating point arithmetic ops, and
8877 omits the scalar ones that need rounding modes. Note also that
8878 inconsistencies here will get picked up later by the IR sanity
8879 checker, so this isn't correctness-critical. */
8880 static Bool requiresRMode ( IROp op )
8882 switch (op) {
8883 /* 128 bit ops */
8884 case Iop_Add32Fx4: case Iop_Sub32Fx4:
8885 case Iop_Mul32Fx4: case Iop_Div32Fx4:
8886 case Iop_Add64Fx2: case Iop_Sub64Fx2:
8887 case Iop_Mul64Fx2: case Iop_Div64Fx2:
8888 /* 256 bit ops */
8889 case Iop_Add32Fx8: case Iop_Sub32Fx8:
8890 case Iop_Mul32Fx8: case Iop_Div32Fx8:
8891 case Iop_Add64Fx4: case Iop_Sub64Fx4:
8892 case Iop_Mul64Fx4: case Iop_Div64Fx4:
8893 return True;
8894 default:
8895 break;
8897 return False;
8901 /* Worker function; do not call directly.
8902 Handles full width G = G `op` E and G = (not G) `op` E.
8905 static ULong dis_SSE_E_to_G_all_wrk (
8906 const VexAbiInfo* vbi,
8907 Prefix pfx, Long delta,
8908 const HChar* opname, IROp op,
8909 Bool invertG
8912 HChar dis_buf[50];
8913 Int alen;
8914 IRTemp addr;
8915 UChar rm = getUChar(delta);
8916 Bool needsRMode = requiresRMode(op);
8917 IRExpr* gpart
8918 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRexRM(pfx,rm)))
8919 : getXMMReg(gregOfRexRM(pfx,rm));
8920 if (epartIsReg(rm)) {
8921 putXMMReg(
8922 gregOfRexRM(pfx,rm),
8923 needsRMode
8924 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
8925 gpart,
8926 getXMMReg(eregOfRexRM(pfx,rm)))
8927 : binop(op, gpart,
8928 getXMMReg(eregOfRexRM(pfx,rm)))
8930 DIP("%s %s,%s\n", opname,
8931 nameXMMReg(eregOfRexRM(pfx,rm)),
8932 nameXMMReg(gregOfRexRM(pfx,rm)) );
8933 return delta+1;
8934 } else {
8935 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
8936 putXMMReg(
8937 gregOfRexRM(pfx,rm),
8938 needsRMode
8939 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
8940 gpart,
8941 loadLE(Ity_V128, mkexpr(addr)))
8942 : binop(op, gpart,
8943 loadLE(Ity_V128, mkexpr(addr)))
8945 DIP("%s %s,%s\n", opname,
8946 dis_buf,
8947 nameXMMReg(gregOfRexRM(pfx,rm)) );
8948 return delta+alen;
8953 /* All lanes SSE binary operation, G = G `op` E. */
8955 static
8956 ULong dis_SSE_E_to_G_all ( const VexAbiInfo* vbi,
8957 Prefix pfx, Long delta,
8958 const HChar* opname, IROp op )
8960 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, False );
8963 /* All lanes SSE binary operation, G = (not G) `op` E. */
8965 static
8966 ULong dis_SSE_E_to_G_all_invG ( const VexAbiInfo* vbi,
8967 Prefix pfx, Long delta,
8968 const HChar* opname, IROp op )
8970 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, True );
8974 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
8976 static ULong dis_SSE_E_to_G_lo32 ( const VexAbiInfo* vbi,
8977 Prefix pfx, Long delta,
8978 const HChar* opname, IROp op )
8980 HChar dis_buf[50];
8981 Int alen;
8982 IRTemp addr;
8983 UChar rm = getUChar(delta);
8984 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
8985 if (epartIsReg(rm)) {
8986 putXMMReg( gregOfRexRM(pfx,rm),
8987 binop(op, gpart,
8988 getXMMReg(eregOfRexRM(pfx,rm))) );
8989 DIP("%s %s,%s\n", opname,
8990 nameXMMReg(eregOfRexRM(pfx,rm)),
8991 nameXMMReg(gregOfRexRM(pfx,rm)) );
8992 return delta+1;
8993 } else {
8994 /* We can only do a 32-bit memory read, so the upper 3/4 of the
8995 E operand needs to be made simply of zeroes. */
8996 IRTemp epart = newTemp(Ity_V128);
8997 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
8998 assign( epart, unop( Iop_32UtoV128,
8999 loadLE(Ity_I32, mkexpr(addr))) );
9000 putXMMReg( gregOfRexRM(pfx,rm),
9001 binop(op, gpart, mkexpr(epart)) );
9002 DIP("%s %s,%s\n", opname,
9003 dis_buf,
9004 nameXMMReg(gregOfRexRM(pfx,rm)) );
9005 return delta+alen;
9010 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
9012 static ULong dis_SSE_E_to_G_lo64 ( const VexAbiInfo* vbi,
9013 Prefix pfx, Long delta,
9014 const HChar* opname, IROp op )
9016 HChar dis_buf[50];
9017 Int alen;
9018 IRTemp addr;
9019 UChar rm = getUChar(delta);
9020 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
9021 if (epartIsReg(rm)) {
9022 putXMMReg( gregOfRexRM(pfx,rm),
9023 binop(op, gpart,
9024 getXMMReg(eregOfRexRM(pfx,rm))) );
9025 DIP("%s %s,%s\n", opname,
9026 nameXMMReg(eregOfRexRM(pfx,rm)),
9027 nameXMMReg(gregOfRexRM(pfx,rm)) );
9028 return delta+1;
9029 } else {
9030 /* We can only do a 64-bit memory read, so the upper half of the
9031 E operand needs to be made simply of zeroes. */
9032 IRTemp epart = newTemp(Ity_V128);
9033 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9034 assign( epart, unop( Iop_64UtoV128,
9035 loadLE(Ity_I64, mkexpr(addr))) );
9036 putXMMReg( gregOfRexRM(pfx,rm),
9037 binop(op, gpart, mkexpr(epart)) );
9038 DIP("%s %s,%s\n", opname,
9039 dis_buf,
9040 nameXMMReg(gregOfRexRM(pfx,rm)) );
9041 return delta+alen;
9046 /* All lanes unary SSE operation, G = op(E). */
9048 static ULong dis_SSE_E_to_G_unary_all (
9049 const VexAbiInfo* vbi,
9050 Prefix pfx, Long delta,
9051 const HChar* opname, IROp op
9054 HChar dis_buf[50];
9055 Int alen;
9056 IRTemp addr;
9057 UChar rm = getUChar(delta);
9058 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
9059 // up in the usual way.
9060 Bool needsIRRM = op == Iop_Sqrt32Fx4 || op == Iop_Sqrt64Fx2;
9061 if (epartIsReg(rm)) {
9062 IRExpr* src = getXMMReg(eregOfRexRM(pfx,rm));
9063 /* XXXROUNDINGFIXME */
9064 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src)
9065 : unop(op, src);
9066 putXMMReg( gregOfRexRM(pfx,rm), res );
9067 DIP("%s %s,%s\n", opname,
9068 nameXMMReg(eregOfRexRM(pfx,rm)),
9069 nameXMMReg(gregOfRexRM(pfx,rm)) );
9070 return delta+1;
9071 } else {
9072 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9073 IRExpr* src = loadLE(Ity_V128, mkexpr(addr));
9074 /* XXXROUNDINGFIXME */
9075 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src)
9076 : unop(op, src);
9077 putXMMReg( gregOfRexRM(pfx,rm), res );
9078 DIP("%s %s,%s\n", opname,
9079 dis_buf,
9080 nameXMMReg(gregOfRexRM(pfx,rm)) );
9081 return delta+alen;
9086 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */
9088 static ULong dis_SSE_E_to_G_unary_lo32 (
9089 const VexAbiInfo* vbi,
9090 Prefix pfx, Long delta,
9091 const HChar* opname, IROp op
9094 /* First we need to get the old G value and patch the low 32 bits
9095 of the E operand into it. Then apply op and write back to G. */
9096 HChar dis_buf[50];
9097 Int alen;
9098 IRTemp addr;
9099 UChar rm = getUChar(delta);
9100 IRTemp oldG0 = newTemp(Ity_V128);
9101 IRTemp oldG1 = newTemp(Ity_V128);
9103 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
9105 if (epartIsReg(rm)) {
9106 assign( oldG1,
9107 binop( Iop_SetV128lo32,
9108 mkexpr(oldG0),
9109 getXMMRegLane32(eregOfRexRM(pfx,rm), 0)) );
9110 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
9111 DIP("%s %s,%s\n", opname,
9112 nameXMMReg(eregOfRexRM(pfx,rm)),
9113 nameXMMReg(gregOfRexRM(pfx,rm)) );
9114 return delta+1;
9115 } else {
9116 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9117 assign( oldG1,
9118 binop( Iop_SetV128lo32,
9119 mkexpr(oldG0),
9120 loadLE(Ity_I32, mkexpr(addr)) ));
9121 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
9122 DIP("%s %s,%s\n", opname,
9123 dis_buf,
9124 nameXMMReg(gregOfRexRM(pfx,rm)) );
9125 return delta+alen;
9130 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */
9132 static ULong dis_SSE_E_to_G_unary_lo64 (
9133 const VexAbiInfo* vbi,
9134 Prefix pfx, Long delta,
9135 const HChar* opname, IROp op
9138 /* First we need to get the old G value and patch the low 64 bits
9139 of the E operand into it. Then apply op and write back to G. */
9140 HChar dis_buf[50];
9141 Int alen;
9142 IRTemp addr;
9143 UChar rm = getUChar(delta);
9144 IRTemp oldG0 = newTemp(Ity_V128);
9145 IRTemp oldG1 = newTemp(Ity_V128);
9147 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
9149 if (epartIsReg(rm)) {
9150 assign( oldG1,
9151 binop( Iop_SetV128lo64,
9152 mkexpr(oldG0),
9153 getXMMRegLane64(eregOfRexRM(pfx,rm), 0)) );
9154 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
9155 DIP("%s %s,%s\n", opname,
9156 nameXMMReg(eregOfRexRM(pfx,rm)),
9157 nameXMMReg(gregOfRexRM(pfx,rm)) );
9158 return delta+1;
9159 } else {
9160 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9161 assign( oldG1,
9162 binop( Iop_SetV128lo64,
9163 mkexpr(oldG0),
9164 loadLE(Ity_I64, mkexpr(addr)) ));
9165 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
9166 DIP("%s %s,%s\n", opname,
9167 dis_buf,
9168 nameXMMReg(gregOfRexRM(pfx,rm)) );
9169 return delta+alen;
9174 /* SSE integer binary operation:
9175 G = G `op` E (eLeft == False)
9176 G = E `op` G (eLeft == True)
9178 static ULong dis_SSEint_E_to_G(
9179 const VexAbiInfo* vbi,
9180 Prefix pfx, Long delta,
9181 const HChar* opname, IROp op,
9182 Bool eLeft
9185 HChar dis_buf[50];
9186 Int alen;
9187 IRTemp addr;
9188 UChar rm = getUChar(delta);
9189 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
9190 IRExpr* epart = NULL;
9191 if (epartIsReg(rm)) {
9192 epart = getXMMReg(eregOfRexRM(pfx,rm));
9193 DIP("%s %s,%s\n", opname,
9194 nameXMMReg(eregOfRexRM(pfx,rm)),
9195 nameXMMReg(gregOfRexRM(pfx,rm)) );
9196 delta += 1;
9197 } else {
9198 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9199 epart = loadLE(Ity_V128, mkexpr(addr));
9200 DIP("%s %s,%s\n", opname,
9201 dis_buf,
9202 nameXMMReg(gregOfRexRM(pfx,rm)) );
9203 delta += alen;
9205 putXMMReg( gregOfRexRM(pfx,rm),
9206 eLeft ? binop(op, epart, gpart)
9207 : binop(op, gpart, epart) );
9208 return delta;
9212 /* Helper for doing SSE FP comparisons. False return ==> unhandled.
9213 This is all a bit of a kludge in that it ignores the subtleties of
9214 ordered-vs-unordered and signalling-vs-nonsignalling in the Intel
9215 spec. The meaning of the outputs is as follows:
9217 preZeroP: the active lanes of both incoming arguments should be set to zero
9218 before performing the operation. IOW the actual args are to be ignored
9219 and instead zero bits are to be used. This is a bit strange but is needed
9220 to make the constant-false/true variants (FALSE_OQ, TRUE_UQ, FALSE_OS,
9221 TRUE_US) work.
9223 preSwapP: the args should be swapped before performing the operation. Note
9224 that zeroing arg input sections (per preZeroP) and swapping them (per
9225 preSwapP) are allowed to happen in either order; the result is the same.
9227 opP: this returns the actual comparison op to perform.
9229 postNotP: if true, the result(ing vector) of the comparison operation should
9230 be bitwise-not-ed. Note that only the lanes of the output actually
9231 computed by opP should be not-ed.
9233 static Bool findSSECmpOp ( /*OUT*/Bool* preZeroP,
9234 /*OUT*/Bool* preSwapP,
9235 /*OUT*/IROp* opP,
9236 /*OUT*/Bool* postNotP,
9237 UInt imm8, Bool all_lanes, Int sz )
9239 vassert(*preZeroP == False);
9240 vassert(*preSwapP == False);
9241 vassert(*opP == Iop_INVALID);
9242 vassert(*postNotP == False);
9244 if (imm8 >= 32) return False;
9246 /* First, compute a (preZero, preSwap, op, postNot) quad from
9247 the supplied imm8. */
9248 Bool preZero = False;
9249 Bool preSwap = False;
9250 IROp op = Iop_INVALID;
9251 Bool postNot = False;
9253 # define XXX(_preZero, _preSwap, _op, _postNot) \
9254 { preZero = _preZero; preSwap = _preSwap; op = _op; postNot = _postNot; }
9255 // If you add a case here, add a corresponding test for both VCMPSD_128
9256 // and VCMPSS_128 in avx-1.c.
9257 // Cases 0xA and above are
9258 // "Enhanced Comparison Predicate[s] for VEX-Encoded [insns]"
9259 switch (imm8) {
9260 // "O" = ordered, "U" = unordered
9261 // "Q" = non-signalling (quiet), "S" = signalling
9263 // replace active arg lanes in operands with zero
9264 // |
9265 // | swap operands before applying the cmp op?
9266 // | |
9267 // | | cmp op invert active lanes after?
9268 // | | | |
9269 // v v v v
9270 case 0x0: XXX(False, False, Iop_CmpEQ32Fx4, False); break; // EQ_OQ
9271 case 0x8: XXX(False, False, Iop_CmpEQ32Fx4, False); break; // EQ_UQ
9272 case 0x10: XXX(False, False, Iop_CmpEQ32Fx4, False); break; // EQ_OS
9273 case 0x18: XXX(False, False, Iop_CmpEQ32Fx4, False); break; // EQ_US
9275 case 0x1: XXX(False, False, Iop_CmpLT32Fx4, False); break; // LT_OS
9276 case 0x11: XXX(False, False, Iop_CmpLT32Fx4, False); break; // LT_OQ
9278 case 0x2: XXX(False, False, Iop_CmpLE32Fx4, False); break; // LE_OS
9279 case 0x12: XXX(False, False, Iop_CmpLE32Fx4, False); break; // LE_OQ
9281 case 0x3: XXX(False, False, Iop_CmpUN32Fx4, False); break; // UNORD_Q
9282 case 0x13: XXX(False, False, Iop_CmpUN32Fx4, False); break; // UNORD_S
9284 // 0xC: this isn't really right because it returns all-1s when
9285 // either operand is a NaN, and it should return all-0s.
9286 case 0x4: XXX(False, False, Iop_CmpEQ32Fx4, True); break; // NEQ_UQ
9287 case 0xC: XXX(False, False, Iop_CmpEQ32Fx4, True); break; // NEQ_OQ
9288 case 0x14: XXX(False, False, Iop_CmpEQ32Fx4, True); break; // NEQ_US
9289 case 0x1C: XXX(False, False, Iop_CmpEQ32Fx4, True); break; // NEQ_OS
9291 case 0x5: XXX(False, False, Iop_CmpLT32Fx4, True); break; // NLT_US
9292 case 0x15: XXX(False, False, Iop_CmpLT32Fx4, True); break; // NLT_UQ
9294 case 0x6: XXX(False, False, Iop_CmpLE32Fx4, True); break; // NLE_US
9295 case 0x16: XXX(False, False, Iop_CmpLE32Fx4, True); break; // NLE_UQ
9297 case 0x7: XXX(False, False, Iop_CmpUN32Fx4, True); break; // ORD_Q
9298 case 0x17: XXX(False, False, Iop_CmpUN32Fx4, True); break; // ORD_S
9300 case 0x9: XXX(False, True, Iop_CmpLE32Fx4, True); break; // NGE_US
9301 case 0x19: XXX(False, True, Iop_CmpLE32Fx4, True); break; // NGE_UQ
9303 case 0xA: XXX(False, True, Iop_CmpLT32Fx4, True); break; // NGT_US
9304 case 0x1A: XXX(False, True, Iop_CmpLT32Fx4, True); break; // NGT_UQ
9306 case 0xD: XXX(False, True, Iop_CmpLE32Fx4, False); break; // GE_OS
9307 case 0x1D: XXX(False, True, Iop_CmpLE32Fx4, False); break; // GE_OQ
9309 case 0xE: XXX(False, True, Iop_CmpLT32Fx4, False); break; // GT_OS
9310 case 0x1E: XXX(False, True, Iop_CmpLT32Fx4, False); break; // GT_OQ
9311 // Constant-value-result ops
9312 case 0xB: XXX(True, False, Iop_CmpEQ32Fx4, True); break; // FALSE_OQ
9313 case 0xF: XXX(True, False, Iop_CmpEQ32Fx4, False); break; // TRUE_UQ
9314 case 0x1B: XXX(True, False, Iop_CmpEQ32Fx4, True); break; // FALSE_OS
9315 case 0x1F: XXX(True, False, Iop_CmpEQ32Fx4, False); break; // TRUE_US
9316 /* Don't forget to add test cases to VCMPSS_128_<imm8> in
9317 avx-1.c if new cases turn up. */
9318 default: break;
9320 # undef XXX
9321 if (op == Iop_INVALID) return False;
9323 /* Now convert the op into one with the same arithmetic but that is
9324 correct for the width and laneage requirements. */
9326 /**/ if (sz == 4 && all_lanes) {
9327 switch (op) {
9328 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32Fx4; break;
9329 case Iop_CmpLT32Fx4: op = Iop_CmpLT32Fx4; break;
9330 case Iop_CmpLE32Fx4: op = Iop_CmpLE32Fx4; break;
9331 case Iop_CmpUN32Fx4: op = Iop_CmpUN32Fx4; break;
9332 default: vassert(0);
9335 else if (sz == 4 && !all_lanes) {
9336 switch (op) {
9337 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32F0x4; break;
9338 case Iop_CmpLT32Fx4: op = Iop_CmpLT32F0x4; break;
9339 case Iop_CmpLE32Fx4: op = Iop_CmpLE32F0x4; break;
9340 case Iop_CmpUN32Fx4: op = Iop_CmpUN32F0x4; break;
9341 default: vassert(0);
9344 else if (sz == 8 && all_lanes) {
9345 switch (op) {
9346 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64Fx2; break;
9347 case Iop_CmpLT32Fx4: op = Iop_CmpLT64Fx2; break;
9348 case Iop_CmpLE32Fx4: op = Iop_CmpLE64Fx2; break;
9349 case Iop_CmpUN32Fx4: op = Iop_CmpUN64Fx2; break;
9350 default: vassert(0);
9353 else if (sz == 8 && !all_lanes) {
9354 switch (op) {
9355 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64F0x2; break;
9356 case Iop_CmpLT32Fx4: op = Iop_CmpLT64F0x2; break;
9357 case Iop_CmpLE32Fx4: op = Iop_CmpLE64F0x2; break;
9358 case Iop_CmpUN32Fx4: op = Iop_CmpUN64F0x2; break;
9359 default: vassert(0);
9362 else {
9363 vpanic("findSSECmpOp(amd64,guest)");
9366 if (preZero) {
9367 // In this case, preSwap is irrelevant, but assert anyway.
9368 vassert(preSwap == False);
9370 *preZeroP = preZero; *preSwapP = preSwap; *opP = op; *postNotP = postNot;
9371 return True;
9375 /* Handles SSE 32F/64F comparisons. It can fail, in which case it
9376 returns the original delta to indicate failure. */
9378 static Long dis_SSE_cmp_E_to_G ( const VexAbiInfo* vbi,
9379 Prefix pfx, Long delta,
9380 const HChar* opname, Bool all_lanes, Int sz )
9382 Long delta0 = delta;
9383 HChar dis_buf[50];
9384 Int alen;
9385 UInt imm8;
9386 IRTemp addr;
9387 Bool preZero = False;
9388 Bool preSwap = False;
9389 IROp op = Iop_INVALID;
9390 Bool postNot = False;
9391 IRTemp plain = newTemp(Ity_V128);
9392 UChar rm = getUChar(delta);
9393 UShort mask = 0;
9394 vassert(sz == 4 || sz == 8);
9395 if (epartIsReg(rm)) {
9396 imm8 = getUChar(delta+1);
9397 if (imm8 >= 8) return delta0; /* FAIL */
9398 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot,
9399 imm8, all_lanes, sz);
9400 if (!ok) return delta0; /* FAIL */
9401 vassert(!preZero); /* never needed for imm8 < 8 */
9402 vassert(!preSwap); /* never needed for imm8 < 8 */
9403 assign( plain, binop(op, getXMMReg(gregOfRexRM(pfx,rm)),
9404 getXMMReg(eregOfRexRM(pfx,rm))) );
9405 delta += 2;
9406 DIP("%s $%u,%s,%s\n", opname,
9407 imm8,
9408 nameXMMReg(eregOfRexRM(pfx,rm)),
9409 nameXMMReg(gregOfRexRM(pfx,rm)) );
9410 } else {
9411 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
9412 imm8 = getUChar(delta+alen);
9413 if (imm8 >= 8) return delta0; /* FAIL */
9414 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot,
9415 imm8, all_lanes, sz);
9416 if (!ok) return delta0; /* FAIL */
9417 vassert(!preZero); /* never needed for imm8 < 8 */
9418 vassert(!preSwap); /* never needed for imm8 < 8 */
9419 assign( plain,
9420 binop(
9422 getXMMReg(gregOfRexRM(pfx,rm)),
9423 all_lanes
9424 ? loadLE(Ity_V128, mkexpr(addr))
9425 : sz == 8
9426 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
9427 : /*sz==4*/
9428 unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr)))
9431 delta += alen+1;
9432 DIP("%s $%u,%s,%s\n", opname,
9433 imm8,
9434 dis_buf,
9435 nameXMMReg(gregOfRexRM(pfx,rm)) );
9438 if (postNot && all_lanes) {
9439 putXMMReg( gregOfRexRM(pfx,rm),
9440 unop(Iop_NotV128, mkexpr(plain)) );
9442 else
9443 if (postNot && !all_lanes) {
9444 mask = toUShort(sz==4 ? 0x000F : 0x00FF);
9445 putXMMReg( gregOfRexRM(pfx,rm),
9446 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) );
9448 else {
9449 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(plain) );
9452 return delta;
9456 /* Vector by scalar shift of G by the amount specified at the bottom
9457 of E. */
9459 static ULong dis_SSE_shiftG_byE ( const VexAbiInfo* vbi,
9460 Prefix pfx, Long delta,
9461 const HChar* opname, IROp op )
9463 HChar dis_buf[50];
9464 Int alen, size;
9465 IRTemp addr;
9466 Bool shl, shr, sar;
9467 UChar rm = getUChar(delta);
9468 IRTemp g0 = newTemp(Ity_V128);
9469 IRTemp g1 = newTemp(Ity_V128);
9470 IRTemp amt = newTemp(Ity_I64);
9471 IRTemp amt8 = newTemp(Ity_I8);
9472 if (epartIsReg(rm)) {
9473 assign( amt, getXMMRegLane64(eregOfRexRM(pfx,rm), 0) );
9474 DIP("%s %s,%s\n", opname,
9475 nameXMMReg(eregOfRexRM(pfx,rm)),
9476 nameXMMReg(gregOfRexRM(pfx,rm)) );
9477 delta++;
9478 } else {
9479 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9480 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
9481 DIP("%s %s,%s\n", opname,
9482 dis_buf,
9483 nameXMMReg(gregOfRexRM(pfx,rm)) );
9484 delta += alen;
9486 assign( g0, getXMMReg(gregOfRexRM(pfx,rm)) );
9487 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
9489 shl = shr = sar = False;
9490 size = 0;
9491 switch (op) {
9492 case Iop_ShlN16x8: shl = True; size = 32; break;
9493 case Iop_ShlN32x4: shl = True; size = 32; break;
9494 case Iop_ShlN64x2: shl = True; size = 64; break;
9495 case Iop_SarN16x8: sar = True; size = 16; break;
9496 case Iop_SarN32x4: sar = True; size = 32; break;
9497 case Iop_ShrN16x8: shr = True; size = 16; break;
9498 case Iop_ShrN32x4: shr = True; size = 32; break;
9499 case Iop_ShrN64x2: shr = True; size = 64; break;
9500 default: vassert(0);
9503 if (shl || shr) {
9504 assign(
9506 IRExpr_ITE(
9507 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
9508 binop(op, mkexpr(g0), mkexpr(amt8)),
9509 mkV128(0x0000)
9512 } else
9513 if (sar) {
9514 assign(
9516 IRExpr_ITE(
9517 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
9518 binop(op, mkexpr(g0), mkexpr(amt8)),
9519 binop(op, mkexpr(g0), mkU8(size-1))
9522 } else {
9523 vassert(0);
9526 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(g1) );
9527 return delta;
9531 /* Vector by scalar shift of E by an immediate byte. */
9533 static
9534 ULong dis_SSE_shiftE_imm ( Prefix pfx,
9535 Long delta, const HChar* opname, IROp op )
9537 Bool shl, shr, sar;
9538 UChar rm = getUChar(delta);
9539 IRTemp e0 = newTemp(Ity_V128);
9540 IRTemp e1 = newTemp(Ity_V128);
9541 UChar amt, size;
9542 vassert(epartIsReg(rm));
9543 vassert(gregLO3ofRM(rm) == 2
9544 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
9545 amt = getUChar(delta+1);
9546 delta += 2;
9547 DIP("%s $%d,%s\n", opname,
9548 (Int)amt,
9549 nameXMMReg(eregOfRexRM(pfx,rm)) );
9550 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) );
9552 shl = shr = sar = False;
9553 size = 0;
9554 switch (op) {
9555 case Iop_ShlN16x8: shl = True; size = 16; break;
9556 case Iop_ShlN32x4: shl = True; size = 32; break;
9557 case Iop_ShlN64x2: shl = True; size = 64; break;
9558 case Iop_SarN16x8: sar = True; size = 16; break;
9559 case Iop_SarN32x4: sar = True; size = 32; break;
9560 case Iop_ShrN16x8: shr = True; size = 16; break;
9561 case Iop_ShrN32x4: shr = True; size = 32; break;
9562 case Iop_ShrN64x2: shr = True; size = 64; break;
9563 default: vassert(0);
9566 if (shl || shr) {
9567 assign( e1, amt >= size
9568 ? mkV128(0x0000)
9569 : binop(op, mkexpr(e0), mkU8(amt))
9571 } else
9572 if (sar) {
9573 assign( e1, amt >= size
9574 ? binop(op, mkexpr(e0), mkU8(size-1))
9575 : binop(op, mkexpr(e0), mkU8(amt))
9577 } else {
9578 vassert(0);
9581 putXMMReg( eregOfRexRM(pfx,rm), mkexpr(e1) );
9582 return delta;
9586 /* Get the current SSE rounding mode. */
9588 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void )
9590 return
9591 unop( Iop_64to32,
9592 binop( Iop_And64,
9593 IRExpr_Get( OFFB_SSEROUND, Ity_I64 ),
9594 mkU64(3) ));
9597 static void put_sse_roundingmode ( IRExpr* sseround )
9599 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32);
9600 stmt( IRStmt_Put( OFFB_SSEROUND,
9601 unop(Iop_32Uto64,sseround) ) );
9604 /* Break a V128-bit value up into four 32-bit ints. */
9606 static void breakupV128to32s ( IRTemp t128,
9607 /*OUTs*/
9608 IRTemp* t3, IRTemp* t2,
9609 IRTemp* t1, IRTemp* t0 )
9611 IRTemp hi64 = newTemp(Ity_I64);
9612 IRTemp lo64 = newTemp(Ity_I64);
9613 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
9614 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) );
9616 vassert(t0 && *t0 == IRTemp_INVALID);
9617 vassert(t1 && *t1 == IRTemp_INVALID);
9618 vassert(t2 && *t2 == IRTemp_INVALID);
9619 vassert(t3 && *t3 == IRTemp_INVALID);
9621 *t0 = newTemp(Ity_I32);
9622 *t1 = newTemp(Ity_I32);
9623 *t2 = newTemp(Ity_I32);
9624 *t3 = newTemp(Ity_I32);
9625 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) );
9626 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
9627 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) );
9628 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
9631 /* Construct a V128-bit value from four 32-bit ints. */
9633 static IRExpr* mkV128from32s ( IRTemp t3, IRTemp t2,
9634 IRTemp t1, IRTemp t0 )
9636 return
9637 binop( Iop_64HLtoV128,
9638 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
9639 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0))
9643 /* Break a 64-bit value up into four 16-bit ints. */
9645 static void breakup64to16s ( IRTemp t64,
9646 /*OUTs*/
9647 IRTemp* t3, IRTemp* t2,
9648 IRTemp* t1, IRTemp* t0 )
9650 IRTemp hi32 = newTemp(Ity_I32);
9651 IRTemp lo32 = newTemp(Ity_I32);
9652 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) );
9653 assign( lo32, unop(Iop_64to32, mkexpr(t64)) );
9655 vassert(t0 && *t0 == IRTemp_INVALID);
9656 vassert(t1 && *t1 == IRTemp_INVALID);
9657 vassert(t2 && *t2 == IRTemp_INVALID);
9658 vassert(t3 && *t3 == IRTemp_INVALID);
9660 *t0 = newTemp(Ity_I16);
9661 *t1 = newTemp(Ity_I16);
9662 *t2 = newTemp(Ity_I16);
9663 *t3 = newTemp(Ity_I16);
9664 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) );
9665 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) );
9666 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) );
9667 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) );
9670 /* Construct a 64-bit value from four 16-bit ints. */
9672 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2,
9673 IRTemp t1, IRTemp t0 )
9675 return
9676 binop( Iop_32HLto64,
9677 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)),
9678 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0))
9682 /* Break a V256-bit value up into four 64-bit ints. */
9684 static void breakupV256to64s ( IRTemp t256,
9685 /*OUTs*/
9686 IRTemp* t3, IRTemp* t2,
9687 IRTemp* t1, IRTemp* t0 )
9689 vassert(t0 && *t0 == IRTemp_INVALID);
9690 vassert(t1 && *t1 == IRTemp_INVALID);
9691 vassert(t2 && *t2 == IRTemp_INVALID);
9692 vassert(t3 && *t3 == IRTemp_INVALID);
9693 *t0 = newTemp(Ity_I64);
9694 *t1 = newTemp(Ity_I64);
9695 *t2 = newTemp(Ity_I64);
9696 *t3 = newTemp(Ity_I64);
9697 assign( *t0, unop(Iop_V256to64_0, mkexpr(t256)) );
9698 assign( *t1, unop(Iop_V256to64_1, mkexpr(t256)) );
9699 assign( *t2, unop(Iop_V256to64_2, mkexpr(t256)) );
9700 assign( *t3, unop(Iop_V256to64_3, mkexpr(t256)) );
9703 /* Break a V256-bit value up into two V128s. */
9705 static void breakupV256toV128s ( IRTemp t256,
9706 /*OUTs*/
9707 IRTemp* t1, IRTemp* t0 )
9709 vassert(t0 && *t0 == IRTemp_INVALID);
9710 vassert(t1 && *t1 == IRTemp_INVALID);
9711 *t0 = newTemp(Ity_V128);
9712 *t1 = newTemp(Ity_V128);
9713 assign(*t1, unop(Iop_V256toV128_1, mkexpr(t256)));
9714 assign(*t0, unop(Iop_V256toV128_0, mkexpr(t256)));
9717 /* Break a V256-bit value up into eight 32-bit ints. */
9719 static void breakupV256to32s ( IRTemp t256,
9720 /*OUTs*/
9721 IRTemp* t7, IRTemp* t6,
9722 IRTemp* t5, IRTemp* t4,
9723 IRTemp* t3, IRTemp* t2,
9724 IRTemp* t1, IRTemp* t0 )
9726 IRTemp t128_1 = IRTemp_INVALID;
9727 IRTemp t128_0 = IRTemp_INVALID;
9728 breakupV256toV128s( t256, &t128_1, &t128_0 );
9729 breakupV128to32s( t128_1, t7, t6, t5, t4 );
9730 breakupV128to32s( t128_0, t3, t2, t1, t0 );
9733 /* Break a V128-bit value up into two 64-bit ints. */
9735 static void breakupV128to64s ( IRTemp t128,
9736 /*OUTs*/
9737 IRTemp* t1, IRTemp* t0 )
9739 vassert(t0 && *t0 == IRTemp_INVALID);
9740 vassert(t1 && *t1 == IRTemp_INVALID);
9741 *t0 = newTemp(Ity_I64);
9742 *t1 = newTemp(Ity_I64);
9743 assign( *t0, unop(Iop_V128to64, mkexpr(t128)) );
9744 assign( *t1, unop(Iop_V128HIto64, mkexpr(t128)) );
9747 /* Construct a V256-bit value from eight 32-bit ints. */
9749 static IRExpr* mkV256from32s ( IRTemp t7, IRTemp t6,
9750 IRTemp t5, IRTemp t4,
9751 IRTemp t3, IRTemp t2,
9752 IRTemp t1, IRTemp t0 )
9754 return
9755 binop( Iop_V128HLtoV256,
9756 binop( Iop_64HLtoV128,
9757 binop(Iop_32HLto64, mkexpr(t7), mkexpr(t6)),
9758 binop(Iop_32HLto64, mkexpr(t5), mkexpr(t4)) ),
9759 binop( Iop_64HLtoV128,
9760 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
9761 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) )
9765 /* Construct a V256-bit value from four 64-bit ints. */
9767 static IRExpr* mkV256from64s ( IRTemp t3, IRTemp t2,
9768 IRTemp t1, IRTemp t0 )
9770 return
9771 binop( Iop_V128HLtoV256,
9772 binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)),
9773 binop(Iop_64HLtoV128, mkexpr(t1), mkexpr(t0))
9777 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
9778 values (aa,bb), computes, for each of the 4 16-bit lanes:
9780 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
9782 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx )
9784 IRTemp aa = newTemp(Ity_I64);
9785 IRTemp bb = newTemp(Ity_I64);
9786 IRTemp aahi32s = newTemp(Ity_I64);
9787 IRTemp aalo32s = newTemp(Ity_I64);
9788 IRTemp bbhi32s = newTemp(Ity_I64);
9789 IRTemp bblo32s = newTemp(Ity_I64);
9790 IRTemp rHi = newTemp(Ity_I64);
9791 IRTemp rLo = newTemp(Ity_I64);
9792 IRTemp one32x2 = newTemp(Ity_I64);
9793 assign(aa, aax);
9794 assign(bb, bbx);
9795 assign( aahi32s,
9796 binop(Iop_SarN32x2,
9797 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)),
9798 mkU8(16) ));
9799 assign( aalo32s,
9800 binop(Iop_SarN32x2,
9801 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)),
9802 mkU8(16) ));
9803 assign( bbhi32s,
9804 binop(Iop_SarN32x2,
9805 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)),
9806 mkU8(16) ));
9807 assign( bblo32s,
9808 binop(Iop_SarN32x2,
9809 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)),
9810 mkU8(16) ));
9811 assign(one32x2, mkU64( (1ULL << 32) + 1 ));
9812 assign(
9813 rHi,
9814 binop(
9815 Iop_ShrN32x2,
9816 binop(
9817 Iop_Add32x2,
9818 binop(
9819 Iop_ShrN32x2,
9820 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)),
9821 mkU8(14)
9823 mkexpr(one32x2)
9825 mkU8(1)
9828 assign(
9829 rLo,
9830 binop(
9831 Iop_ShrN32x2,
9832 binop(
9833 Iop_Add32x2,
9834 binop(
9835 Iop_ShrN32x2,
9836 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)),
9837 mkU8(14)
9839 mkexpr(one32x2)
9841 mkU8(1)
9844 return
9845 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo));
9848 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
9849 values (aa,bb), computes, for each lane:
9851 if aa_lane < 0 then - bb_lane
9852 else if aa_lane > 0 then bb_lane
9853 else 0
9855 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB )
9857 IRTemp aa = newTemp(Ity_I64);
9858 IRTemp bb = newTemp(Ity_I64);
9859 IRTemp zero = newTemp(Ity_I64);
9860 IRTemp bbNeg = newTemp(Ity_I64);
9861 IRTemp negMask = newTemp(Ity_I64);
9862 IRTemp posMask = newTemp(Ity_I64);
9863 IROp opSub = Iop_INVALID;
9864 IROp opCmpGTS = Iop_INVALID;
9866 switch (laneszB) {
9867 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break;
9868 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break;
9869 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break;
9870 default: vassert(0);
9873 assign( aa, aax );
9874 assign( bb, bbx );
9875 assign( zero, mkU64(0) );
9876 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) );
9877 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) );
9878 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) );
9880 return
9881 binop(Iop_Or64,
9882 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)),
9883 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) );
9888 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
9889 value aa, computes, for each lane
9891 if aa < 0 then -aa else aa
9893 Note that the result is interpreted as unsigned, so that the
9894 absolute value of the most negative signed input can be
9895 represented.
9897 static IRTemp math_PABS_MMX ( IRTemp aa, Int laneszB )
9899 IRTemp res = newTemp(Ity_I64);
9900 IRTemp zero = newTemp(Ity_I64);
9901 IRTemp aaNeg = newTemp(Ity_I64);
9902 IRTemp negMask = newTemp(Ity_I64);
9903 IRTemp posMask = newTemp(Ity_I64);
9904 IROp opSub = Iop_INVALID;
9905 IROp opSarN = Iop_INVALID;
9907 switch (laneszB) {
9908 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break;
9909 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break;
9910 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break;
9911 default: vassert(0);
9914 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) );
9915 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) );
9916 assign( zero, mkU64(0) );
9917 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) );
9918 assign( res,
9919 binop(Iop_Or64,
9920 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)),
9921 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) ));
9922 return res;
9925 /* XMM version of math_PABS_MMX. */
9926 static IRTemp math_PABS_XMM ( IRTemp aa, Int laneszB )
9928 IRTemp res = newTemp(Ity_V128);
9929 IRTemp aaHi = newTemp(Ity_I64);
9930 IRTemp aaLo = newTemp(Ity_I64);
9931 assign(aaHi, unop(Iop_V128HIto64, mkexpr(aa)));
9932 assign(aaLo, unop(Iop_V128to64, mkexpr(aa)));
9933 assign(res, binop(Iop_64HLtoV128,
9934 mkexpr(math_PABS_MMX(aaHi, laneszB)),
9935 mkexpr(math_PABS_MMX(aaLo, laneszB))));
9936 return res;
9939 /* Specialisations of math_PABS_XMM, since there's no easy way to do
9940 partial applications in C :-( */
9941 static IRTemp math_PABS_XMM_pap4 ( IRTemp aa ) {
9942 return math_PABS_XMM(aa, 4);
9945 static IRTemp math_PABS_XMM_pap2 ( IRTemp aa ) {
9946 return math_PABS_XMM(aa, 2);
9949 static IRTemp math_PABS_XMM_pap1 ( IRTemp aa ) {
9950 return math_PABS_XMM(aa, 1);
9953 /* YMM version of math_PABS_XMM. */
9954 static IRTemp math_PABS_YMM ( IRTemp aa, Int laneszB )
9956 IRTemp res = newTemp(Ity_V256);
9957 IRTemp aaHi = IRTemp_INVALID;
9958 IRTemp aaLo = IRTemp_INVALID;
9959 breakupV256toV128s(aa, &aaHi, &aaLo);
9960 assign(res, binop(Iop_V128HLtoV256,
9961 mkexpr(math_PABS_XMM(aaHi, laneszB)),
9962 mkexpr(math_PABS_XMM(aaLo, laneszB))));
9963 return res;
9966 static IRTemp math_PABS_YMM_pap4 ( IRTemp aa ) {
9967 return math_PABS_YMM(aa, 4);
9970 static IRTemp math_PABS_YMM_pap2 ( IRTemp aa ) {
9971 return math_PABS_YMM(aa, 2);
9974 static IRTemp math_PABS_YMM_pap1 ( IRTemp aa ) {
9975 return math_PABS_YMM(aa, 1);
9978 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64,
9979 IRTemp lo64, Long byteShift )
9981 vassert(byteShift >= 1 && byteShift <= 7);
9982 return
9983 binop(Iop_Or64,
9984 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))),
9985 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift))
9989 static IRTemp math_PALIGNR_XMM ( IRTemp sV, IRTemp dV, UInt imm8 )
9991 IRTemp res = newTemp(Ity_V128);
9992 IRTemp sHi = newTemp(Ity_I64);
9993 IRTemp sLo = newTemp(Ity_I64);
9994 IRTemp dHi = newTemp(Ity_I64);
9995 IRTemp dLo = newTemp(Ity_I64);
9996 IRTemp rHi = newTemp(Ity_I64);
9997 IRTemp rLo = newTemp(Ity_I64);
9999 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
10000 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
10001 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
10002 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
10004 if (imm8 == 0) {
10005 assign( rHi, mkexpr(sHi) );
10006 assign( rLo, mkexpr(sLo) );
10008 else if (imm8 >= 1 && imm8 <= 7) {
10009 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, imm8) );
10010 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, imm8) );
10012 else if (imm8 == 8) {
10013 assign( rHi, mkexpr(dLo) );
10014 assign( rLo, mkexpr(sHi) );
10016 else if (imm8 >= 9 && imm8 <= 15) {
10017 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-8) );
10018 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, imm8-8) );
10020 else if (imm8 == 16) {
10021 assign( rHi, mkexpr(dHi) );
10022 assign( rLo, mkexpr(dLo) );
10024 else if (imm8 >= 17 && imm8 <= 23) {
10025 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-16))) );
10026 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-16) );
10028 else if (imm8 == 24) {
10029 assign( rHi, mkU64(0) );
10030 assign( rLo, mkexpr(dHi) );
10032 else if (imm8 >= 25 && imm8 <= 31) {
10033 assign( rHi, mkU64(0) );
10034 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-24))) );
10036 else if (imm8 >= 32 && imm8 <= 255) {
10037 assign( rHi, mkU64(0) );
10038 assign( rLo, mkU64(0) );
10040 else
10041 vassert(0);
10043 assign( res, binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)));
10044 return res;
10048 /* Generate a SIGSEGV followed by a restart of the current instruction
10049 if effective_addr is not 16-aligned. This is required behaviour
10050 for some SSE3 instructions and all 128-bit SSSE3 instructions.
10051 This assumes that guest_RIP_curr_instr is set correctly! */
10052 static
10053 void gen_SEGV_if_not_XX_aligned ( IRTemp effective_addr, ULong mask )
10055 stmt(
10056 IRStmt_Exit(
10057 binop(Iop_CmpNE64,
10058 binop(Iop_And64,mkexpr(effective_addr),mkU64(mask)),
10059 mkU64(0)),
10060 Ijk_SigSEGV,
10061 IRConst_U64(guest_RIP_curr_instr),
10062 OFFB_RIP
10067 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) {
10068 gen_SEGV_if_not_XX_aligned(effective_addr, 16-1);
10071 static void gen_SEGV_if_not_32_aligned ( IRTemp effective_addr ) {
10072 gen_SEGV_if_not_XX_aligned(effective_addr, 32-1);
10075 static void gen_SEGV_if_not_64_aligned ( IRTemp effective_addr ) {
10076 gen_SEGV_if_not_XX_aligned(effective_addr, 64-1);
10079 /* Helper for deciding whether a given insn (starting at the opcode
10080 byte) may validly be used with a LOCK prefix. The following insns
10081 may be used with LOCK when their destination operand is in memory.
10082 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
10084 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
10085 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
10086 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
10087 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
10088 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
10089 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
10090 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
10092 DEC FE /1, FF /1
10093 INC FE /0, FF /0
10095 NEG F6 /3, F7 /3
10096 NOT F6 /2, F7 /2
10098 XCHG 86, 87
10100 BTC 0F BB, 0F BA /7
10101 BTR 0F B3, 0F BA /6
10102 BTS 0F AB, 0F BA /5
10104 CMPXCHG 0F B0, 0F B1
10105 CMPXCHG8B 0F C7 /1
10107 XADD 0F C0, 0F C1
10109 ------------------------------
10111 80 /0 = addb $imm8, rm8
10112 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
10113 82 /0 = addb $imm8, rm8
10114 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
10116 00 = addb r8, rm8
10117 01 = addl r32, rm32 and addw r16, rm16
10119 Same for ADD OR ADC SBB AND SUB XOR
10121 FE /1 = dec rm8
10122 FF /1 = dec rm32 and dec rm16
10124 FE /0 = inc rm8
10125 FF /0 = inc rm32 and inc rm16
10127 F6 /3 = neg rm8
10128 F7 /3 = neg rm32 and neg rm16
10130 F6 /2 = not rm8
10131 F7 /2 = not rm32 and not rm16
10133 0F BB = btcw r16, rm16 and btcl r32, rm32
10134 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
10136 Same for BTS, BTR
10138 static Bool can_be_used_with_LOCK_prefix ( const UChar* opc )
10140 switch (opc[0]) {
10141 case 0x00: case 0x01: case 0x08: case 0x09:
10142 case 0x10: case 0x11: case 0x18: case 0x19:
10143 case 0x20: case 0x21: case 0x28: case 0x29:
10144 case 0x30: case 0x31:
10145 if (!epartIsReg(opc[1]))
10146 return True;
10147 break;
10149 case 0x80: case 0x81: case 0x82: case 0x83:
10150 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6
10151 && !epartIsReg(opc[1]))
10152 return True;
10153 break;
10155 case 0xFE: case 0xFF:
10156 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1
10157 && !epartIsReg(opc[1]))
10158 return True;
10159 break;
10161 case 0xF6: case 0xF7:
10162 if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3
10163 && !epartIsReg(opc[1]))
10164 return True;
10165 break;
10167 case 0x86: case 0x87:
10168 if (!epartIsReg(opc[1]))
10169 return True;
10170 break;
10172 case 0x0F: {
10173 switch (opc[1]) {
10174 case 0xBB: case 0xB3: case 0xAB:
10175 if (!epartIsReg(opc[2]))
10176 return True;
10177 break;
10178 case 0xBA:
10179 if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7
10180 && !epartIsReg(opc[2]))
10181 return True;
10182 break;
10183 case 0xB0: case 0xB1:
10184 if (!epartIsReg(opc[2]))
10185 return True;
10186 break;
10187 case 0xC7:
10188 if (gregLO3ofRM(opc[2]) == 1 && !epartIsReg(opc[2]) )
10189 return True;
10190 break;
10191 case 0xC0: case 0xC1:
10192 if (!epartIsReg(opc[2]))
10193 return True;
10194 break;
10195 default:
10196 break;
10197 } /* switch (opc[1]) */
10198 break;
10201 default:
10202 break;
10203 } /* switch (opc[0]) */
10205 return False;
10209 /*------------------------------------------------------------*/
10210 /*--- ---*/
10211 /*--- Top-level SSE/SSE2: dis_ESC_0F__SSE2 ---*/
10212 /*--- ---*/
10213 /*------------------------------------------------------------*/
10215 static Long dis_COMISD ( const VexAbiInfo* vbi, Prefix pfx,
10216 Long delta, Bool isAvx, UChar opc )
10218 vassert(opc == 0x2F/*COMISD*/ || opc == 0x2E/*UCOMISD*/);
10219 Int alen = 0;
10220 HChar dis_buf[50];
10221 IRTemp argL = newTemp(Ity_F64);
10222 IRTemp argR = newTemp(Ity_F64);
10223 UChar modrm = getUChar(delta);
10224 IRTemp addr = IRTemp_INVALID;
10225 if (epartIsReg(modrm)) {
10226 assign( argR, getXMMRegLane64F( eregOfRexRM(pfx,modrm),
10227 0/*lowest lane*/ ) );
10228 delta += 1;
10229 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "",
10230 opc==0x2E ? "u" : "",
10231 nameXMMReg(eregOfRexRM(pfx,modrm)),
10232 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10233 } else {
10234 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10235 assign( argR, loadLE(Ity_F64, mkexpr(addr)) );
10236 delta += alen;
10237 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "",
10238 opc==0x2E ? "u" : "",
10239 dis_buf,
10240 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10242 assign( argL, getXMMRegLane64F( gregOfRexRM(pfx,modrm),
10243 0/*lowest lane*/ ) );
10245 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
10246 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
10247 stmt( IRStmt_Put(
10248 OFFB_CC_DEP1,
10249 binop( Iop_And64,
10250 unop( Iop_32Uto64,
10251 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)) ),
10252 mkU64(0x45)
10253 )));
10254 return delta;
10258 static Long dis_COMISS ( const VexAbiInfo* vbi, Prefix pfx,
10259 Long delta, Bool isAvx, UChar opc )
10261 vassert(opc == 0x2F/*COMISS*/ || opc == 0x2E/*UCOMISS*/);
10262 Int alen = 0;
10263 HChar dis_buf[50];
10264 IRTemp argL = newTemp(Ity_F32);
10265 IRTemp argR = newTemp(Ity_F32);
10266 UChar modrm = getUChar(delta);
10267 IRTemp addr = IRTemp_INVALID;
10268 if (epartIsReg(modrm)) {
10269 assign( argR, getXMMRegLane32F( eregOfRexRM(pfx,modrm),
10270 0/*lowest lane*/ ) );
10271 delta += 1;
10272 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "",
10273 opc==0x2E ? "u" : "",
10274 nameXMMReg(eregOfRexRM(pfx,modrm)),
10275 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10276 } else {
10277 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10278 assign( argR, loadLE(Ity_F32, mkexpr(addr)) );
10279 delta += alen;
10280 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "",
10281 opc==0x2E ? "u" : "",
10282 dis_buf,
10283 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10285 assign( argL, getXMMRegLane32F( gregOfRexRM(pfx,modrm),
10286 0/*lowest lane*/ ) );
10288 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
10289 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
10290 stmt( IRStmt_Put(
10291 OFFB_CC_DEP1,
10292 binop( Iop_And64,
10293 unop( Iop_32Uto64,
10294 binop(Iop_CmpF64,
10295 unop(Iop_F32toF64,mkexpr(argL)),
10296 unop(Iop_F32toF64,mkexpr(argR)))),
10297 mkU64(0x45)
10298 )));
10299 return delta;
10303 static Long dis_PSHUFD_32x4 ( const VexAbiInfo* vbi, Prefix pfx,
10304 Long delta, Bool writesYmm )
10306 Int order;
10307 Int alen = 0;
10308 HChar dis_buf[50];
10309 IRTemp sV = newTemp(Ity_V128);
10310 UChar modrm = getUChar(delta);
10311 const HChar* strV = writesYmm ? "v" : "";
10312 IRTemp addr = IRTemp_INVALID;
10313 if (epartIsReg(modrm)) {
10314 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
10315 order = (Int)getUChar(delta+1);
10316 delta += 1+1;
10317 DIP("%spshufd $%d,%s,%s\n", strV, order,
10318 nameXMMReg(eregOfRexRM(pfx,modrm)),
10319 nameXMMReg(gregOfRexRM(pfx,modrm)));
10320 } else {
10321 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
10322 1/*byte after the amode*/ );
10323 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10324 order = (Int)getUChar(delta+alen);
10325 delta += alen+1;
10326 DIP("%spshufd $%d,%s,%s\n", strV, order,
10327 dis_buf,
10328 nameXMMReg(gregOfRexRM(pfx,modrm)));
10331 IRTemp s3, s2, s1, s0;
10332 s3 = s2 = s1 = s0 = IRTemp_INVALID;
10333 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
10335 # define SEL(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
10336 IRTemp dV = newTemp(Ity_V128);
10337 assign(dV,
10338 mkV128from32s( SEL((order>>6)&3), SEL((order>>4)&3),
10339 SEL((order>>2)&3), SEL((order>>0)&3) )
10341 # undef SEL
10343 (writesYmm ? putYMMRegLoAndZU : putXMMReg)
10344 (gregOfRexRM(pfx,modrm), mkexpr(dV));
10345 return delta;
10349 static Long dis_PSHUFD_32x8 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
10351 Int order;
10352 Int alen = 0;
10353 HChar dis_buf[50];
10354 IRTemp sV = newTemp(Ity_V256);
10355 UChar modrm = getUChar(delta);
10356 IRTemp addr = IRTemp_INVALID;
10357 UInt rG = gregOfRexRM(pfx,modrm);
10358 if (epartIsReg(modrm)) {
10359 UInt rE = eregOfRexRM(pfx,modrm);
10360 assign( sV, getYMMReg(rE) );
10361 order = (Int)getUChar(delta+1);
10362 delta += 1+1;
10363 DIP("vpshufd $%d,%s,%s\n", order, nameYMMReg(rE), nameYMMReg(rG));
10364 } else {
10365 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
10366 1/*byte after the amode*/ );
10367 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
10368 order = (Int)getUChar(delta+alen);
10369 delta += alen+1;
10370 DIP("vpshufd $%d,%s,%s\n", order, dis_buf, nameYMMReg(rG));
10373 IRTemp s[8];
10374 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID;
10375 breakupV256to32s( sV, &s[7], &s[6], &s[5], &s[4],
10376 &s[3], &s[2], &s[1], &s[0] );
10378 putYMMReg( rG, mkV256from32s( s[4 + ((order>>6)&3)],
10379 s[4 + ((order>>4)&3)],
10380 s[4 + ((order>>2)&3)],
10381 s[4 + ((order>>0)&3)],
10382 s[0 + ((order>>6)&3)],
10383 s[0 + ((order>>4)&3)],
10384 s[0 + ((order>>2)&3)],
10385 s[0 + ((order>>0)&3)] ) );
10386 return delta;
10390 static IRTemp math_PSRLDQ ( IRTemp sV, Int imm )
10392 IRTemp dV = newTemp(Ity_V128);
10393 IRTemp hi64 = newTemp(Ity_I64);
10394 IRTemp lo64 = newTemp(Ity_I64);
10395 IRTemp hi64r = newTemp(Ity_I64);
10396 IRTemp lo64r = newTemp(Ity_I64);
10398 vassert(imm >= 0 && imm <= 255);
10399 if (imm >= 16) {
10400 assign(dV, mkV128(0x0000));
10401 return dV;
10404 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
10405 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
10407 if (imm == 0) {
10408 assign( lo64r, mkexpr(lo64) );
10409 assign( hi64r, mkexpr(hi64) );
10411 else
10412 if (imm == 8) {
10413 assign( hi64r, mkU64(0) );
10414 assign( lo64r, mkexpr(hi64) );
10416 else
10417 if (imm > 8) {
10418 assign( hi64r, mkU64(0) );
10419 assign( lo64r, binop( Iop_Shr64, mkexpr(hi64), mkU8( 8*(imm-8) ) ));
10420 } else {
10421 assign( hi64r, binop( Iop_Shr64, mkexpr(hi64), mkU8(8 * imm) ));
10422 assign( lo64r,
10423 binop( Iop_Or64,
10424 binop(Iop_Shr64, mkexpr(lo64),
10425 mkU8(8 * imm)),
10426 binop(Iop_Shl64, mkexpr(hi64),
10427 mkU8(8 * (8 - imm)) )
10432 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
10433 return dV;
10437 static IRTemp math_PSLLDQ ( IRTemp sV, Int imm )
10439 IRTemp dV = newTemp(Ity_V128);
10440 IRTemp hi64 = newTemp(Ity_I64);
10441 IRTemp lo64 = newTemp(Ity_I64);
10442 IRTemp hi64r = newTemp(Ity_I64);
10443 IRTemp lo64r = newTemp(Ity_I64);
10445 vassert(imm >= 0 && imm <= 255);
10446 if (imm >= 16) {
10447 assign(dV, mkV128(0x0000));
10448 return dV;
10451 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
10452 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
10454 if (imm == 0) {
10455 assign( lo64r, mkexpr(lo64) );
10456 assign( hi64r, mkexpr(hi64) );
10458 else
10459 if (imm == 8) {
10460 assign( lo64r, mkU64(0) );
10461 assign( hi64r, mkexpr(lo64) );
10463 else
10464 if (imm > 8) {
10465 assign( lo64r, mkU64(0) );
10466 assign( hi64r, binop( Iop_Shl64, mkexpr(lo64), mkU8( 8*(imm-8) ) ));
10467 } else {
10468 assign( lo64r, binop( Iop_Shl64, mkexpr(lo64), mkU8(8 * imm) ));
10469 assign( hi64r,
10470 binop( Iop_Or64,
10471 binop(Iop_Shl64, mkexpr(hi64),
10472 mkU8(8 * imm)),
10473 binop(Iop_Shr64, mkexpr(lo64),
10474 mkU8(8 * (8 - imm)) )
10479 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
10480 return dV;
10484 static Long dis_CVTxSD2SI ( const VexAbiInfo* vbi, Prefix pfx,
10485 Long delta, Bool isAvx, UChar opc, Int sz )
10487 vassert(opc == 0x2D/*CVTSD2SI*/ || opc == 0x2C/*CVTTSD2SI*/);
10488 HChar dis_buf[50];
10489 Int alen = 0;
10490 UChar modrm = getUChar(delta);
10491 IRTemp addr = IRTemp_INVALID;
10492 IRTemp rmode = newTemp(Ity_I32);
10493 IRTemp f64lo = newTemp(Ity_F64);
10494 Bool r2zero = toBool(opc == 0x2C);
10496 if (epartIsReg(modrm)) {
10497 delta += 1;
10498 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
10499 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10500 nameXMMReg(eregOfRexRM(pfx,modrm)),
10501 nameIReg(sz, gregOfRexRM(pfx,modrm),
10502 False));
10503 } else {
10504 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10505 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
10506 delta += alen;
10507 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10508 dis_buf,
10509 nameIReg(sz, gregOfRexRM(pfx,modrm),
10510 False));
10513 if (r2zero) {
10514 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10515 } else {
10516 assign( rmode, get_sse_roundingmode() );
10519 if (sz == 4) {
10520 putIReg32( gregOfRexRM(pfx,modrm),
10521 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) );
10522 } else {
10523 vassert(sz == 8);
10524 putIReg64( gregOfRexRM(pfx,modrm),
10525 binop( Iop_F64toI64S, mkexpr(rmode), mkexpr(f64lo)) );
10528 return delta;
10532 static Long dis_CVTxSS2SI ( const VexAbiInfo* vbi, Prefix pfx,
10533 Long delta, Bool isAvx, UChar opc, Int sz )
10535 vassert(opc == 0x2D/*CVTSS2SI*/ || opc == 0x2C/*CVTTSS2SI*/);
10536 HChar dis_buf[50];
10537 Int alen = 0;
10538 UChar modrm = getUChar(delta);
10539 IRTemp addr = IRTemp_INVALID;
10540 IRTemp rmode = newTemp(Ity_I32);
10541 IRTemp f32lo = newTemp(Ity_F32);
10542 Bool r2zero = toBool(opc == 0x2C);
10544 if (epartIsReg(modrm)) {
10545 delta += 1;
10546 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
10547 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10548 nameXMMReg(eregOfRexRM(pfx,modrm)),
10549 nameIReg(sz, gregOfRexRM(pfx,modrm),
10550 False));
10551 } else {
10552 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10553 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
10554 delta += alen;
10555 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10556 dis_buf,
10557 nameIReg(sz, gregOfRexRM(pfx,modrm),
10558 False));
10561 if (r2zero) {
10562 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10563 } else {
10564 assign( rmode, get_sse_roundingmode() );
10567 if (sz == 4) {
10568 putIReg32( gregOfRexRM(pfx,modrm),
10569 binop( Iop_F64toI32S,
10570 mkexpr(rmode),
10571 unop(Iop_F32toF64, mkexpr(f32lo))) );
10572 } else {
10573 vassert(sz == 8);
10574 putIReg64( gregOfRexRM(pfx,modrm),
10575 binop( Iop_F64toI64S,
10576 mkexpr(rmode),
10577 unop(Iop_F32toF64, mkexpr(f32lo))) );
10580 return delta;
10584 static Long dis_CVTPS2PD_128 ( const VexAbiInfo* vbi, Prefix pfx,
10585 Long delta, Bool isAvx )
10587 IRTemp addr = IRTemp_INVALID;
10588 Int alen = 0;
10589 HChar dis_buf[50];
10590 IRTemp f32lo = newTemp(Ity_F32);
10591 IRTemp f32hi = newTemp(Ity_F32);
10592 UChar modrm = getUChar(delta);
10593 UInt rG = gregOfRexRM(pfx,modrm);
10594 if (epartIsReg(modrm)) {
10595 UInt rE = eregOfRexRM(pfx,modrm);
10596 assign( f32lo, getXMMRegLane32F(rE, 0) );
10597 assign( f32hi, getXMMRegLane32F(rE, 1) );
10598 delta += 1;
10599 DIP("%scvtps2pd %s,%s\n",
10600 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
10601 } else {
10602 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10603 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) );
10604 assign( f32hi, loadLE(Ity_F32,
10605 binop(Iop_Add64,mkexpr(addr),mkU64(4))) );
10606 delta += alen;
10607 DIP("%scvtps2pd %s,%s\n",
10608 isAvx ? "v" : "", dis_buf, nameXMMReg(rG));
10611 putXMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32hi)) );
10612 putXMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32lo)) );
10613 if (isAvx)
10614 putYMMRegLane128( rG, 1, mkV128(0));
10615 return delta;
10619 static Long dis_CVTPS2PD_256 ( const VexAbiInfo* vbi, Prefix pfx,
10620 Long delta )
10622 IRTemp addr = IRTemp_INVALID;
10623 Int alen = 0;
10624 HChar dis_buf[50];
10625 IRTemp f32_0 = newTemp(Ity_F32);
10626 IRTemp f32_1 = newTemp(Ity_F32);
10627 IRTemp f32_2 = newTemp(Ity_F32);
10628 IRTemp f32_3 = newTemp(Ity_F32);
10629 UChar modrm = getUChar(delta);
10630 UInt rG = gregOfRexRM(pfx,modrm);
10631 if (epartIsReg(modrm)) {
10632 UInt rE = eregOfRexRM(pfx,modrm);
10633 assign( f32_0, getXMMRegLane32F(rE, 0) );
10634 assign( f32_1, getXMMRegLane32F(rE, 1) );
10635 assign( f32_2, getXMMRegLane32F(rE, 2) );
10636 assign( f32_3, getXMMRegLane32F(rE, 3) );
10637 delta += 1;
10638 DIP("vcvtps2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
10639 } else {
10640 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10641 assign( f32_0, loadLE(Ity_F32, mkexpr(addr)) );
10642 assign( f32_1, loadLE(Ity_F32,
10643 binop(Iop_Add64,mkexpr(addr),mkU64(4))) );
10644 assign( f32_2, loadLE(Ity_F32,
10645 binop(Iop_Add64,mkexpr(addr),mkU64(8))) );
10646 assign( f32_3, loadLE(Ity_F32,
10647 binop(Iop_Add64,mkexpr(addr),mkU64(12))) );
10648 delta += alen;
10649 DIP("vcvtps2pd %s,%s\n", dis_buf, nameYMMReg(rG));
10652 putYMMRegLane64F( rG, 3, unop(Iop_F32toF64, mkexpr(f32_3)) );
10653 putYMMRegLane64F( rG, 2, unop(Iop_F32toF64, mkexpr(f32_2)) );
10654 putYMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32_1)) );
10655 putYMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32_0)) );
10656 return delta;
10660 static Long dis_CVTPD2PS_128 ( const VexAbiInfo* vbi, Prefix pfx,
10661 Long delta, Bool isAvx )
10663 IRTemp addr = IRTemp_INVALID;
10664 Int alen = 0;
10665 HChar dis_buf[50];
10666 UChar modrm = getUChar(delta);
10667 UInt rG = gregOfRexRM(pfx,modrm);
10668 IRTemp argV = newTemp(Ity_V128);
10669 IRTemp rmode = newTemp(Ity_I32);
10670 if (epartIsReg(modrm)) {
10671 UInt rE = eregOfRexRM(pfx,modrm);
10672 assign( argV, getXMMReg(rE) );
10673 delta += 1;
10674 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "",
10675 nameXMMReg(rE), nameXMMReg(rG));
10676 } else {
10677 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10678 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10679 delta += alen;
10680 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "",
10681 dis_buf, nameXMMReg(rG) );
10684 assign( rmode, get_sse_roundingmode() );
10685 IRTemp t0 = newTemp(Ity_F64);
10686 IRTemp t1 = newTemp(Ity_F64);
10687 assign( t0, unop(Iop_ReinterpI64asF64,
10688 unop(Iop_V128to64, mkexpr(argV))) );
10689 assign( t1, unop(Iop_ReinterpI64asF64,
10690 unop(Iop_V128HIto64, mkexpr(argV))) );
10692 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), mkexpr(_t) )
10693 putXMMRegLane32( rG, 3, mkU32(0) );
10694 putXMMRegLane32( rG, 2, mkU32(0) );
10695 putXMMRegLane32F( rG, 1, CVT(t1) );
10696 putXMMRegLane32F( rG, 0, CVT(t0) );
10697 # undef CVT
10698 if (isAvx)
10699 putYMMRegLane128( rG, 1, mkV128(0) );
10701 return delta;
10705 static Long dis_CVTxPS2DQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
10706 Long delta, Bool isAvx, Bool r2zero )
10708 IRTemp addr = IRTemp_INVALID;
10709 Int alen = 0;
10710 HChar dis_buf[50];
10711 UChar modrm = getUChar(delta);
10712 IRTemp argV = newTemp(Ity_V128);
10713 IRTemp rmode = newTemp(Ity_I32);
10714 UInt rG = gregOfRexRM(pfx,modrm);
10716 if (epartIsReg(modrm)) {
10717 UInt rE = eregOfRexRM(pfx,modrm);
10718 assign( argV, getXMMReg(rE) );
10719 delta += 1;
10720 DIP("%scvt%sps2dq %s,%s\n",
10721 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG));
10722 } else {
10723 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10724 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10725 delta += alen;
10726 DIP("%scvt%sps2dq %s,%s\n",
10727 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
10730 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO)
10731 : get_sse_roundingmode() );
10732 putXMMReg( rG, binop(Iop_F32toI32Sx4, mkexpr(rmode), mkexpr(argV)) );
10733 if (isAvx)
10734 putYMMRegLane128( rG, 1, mkV128(0) );
10736 return delta;
10740 static Long dis_CVTxPS2DQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
10741 Long delta, Bool r2zero )
10743 IRTemp addr = IRTemp_INVALID;
10744 Int alen = 0;
10745 HChar dis_buf[50];
10746 UChar modrm = getUChar(delta);
10747 IRTemp argV = newTemp(Ity_V256);
10748 IRTemp rmode = newTemp(Ity_I32);
10749 UInt rG = gregOfRexRM(pfx,modrm);
10751 if (epartIsReg(modrm)) {
10752 UInt rE = eregOfRexRM(pfx,modrm);
10753 assign( argV, getYMMReg(rE) );
10754 delta += 1;
10755 DIP("vcvt%sps2dq %s,%s\n",
10756 r2zero ? "t" : "", nameYMMReg(rE), nameYMMReg(rG));
10757 } else {
10758 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10759 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
10760 delta += alen;
10761 DIP("vcvt%sps2dq %s,%s\n",
10762 r2zero ? "t" : "", dis_buf, nameYMMReg(rG) );
10765 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO)
10766 : get_sse_roundingmode() );
10767 putYMMReg( rG, binop(Iop_F32toI32Sx8, mkexpr(rmode), mkexpr(argV)) );
10768 return delta;
10772 static Long dis_CVTxPD2DQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
10773 Long delta, Bool isAvx, Bool r2zero )
10775 IRTemp addr = IRTemp_INVALID;
10776 Int alen = 0;
10777 HChar dis_buf[50];
10778 UChar modrm = getUChar(delta);
10779 IRTemp argV = newTemp(Ity_V128);
10780 IRTemp rmode = newTemp(Ity_I32);
10781 UInt rG = gregOfRexRM(pfx,modrm);
10782 IRTemp t0, t1;
10784 if (epartIsReg(modrm)) {
10785 UInt rE = eregOfRexRM(pfx,modrm);
10786 assign( argV, getXMMReg(rE) );
10787 delta += 1;
10788 DIP("%scvt%spd2dq %s,%s\n",
10789 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG));
10790 } else {
10791 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10792 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10793 delta += alen;
10794 DIP("%scvt%spd2dqx %s,%s\n",
10795 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
10798 if (r2zero) {
10799 assign(rmode, mkU32((UInt)Irrm_ZERO) );
10800 } else {
10801 assign( rmode, get_sse_roundingmode() );
10804 t0 = newTemp(Ity_F64);
10805 t1 = newTemp(Ity_F64);
10806 assign( t0, unop(Iop_ReinterpI64asF64,
10807 unop(Iop_V128to64, mkexpr(argV))) );
10808 assign( t1, unop(Iop_ReinterpI64asF64,
10809 unop(Iop_V128HIto64, mkexpr(argV))) );
10811 # define CVT(_t) binop( Iop_F64toI32S, \
10812 mkexpr(rmode), \
10813 mkexpr(_t) )
10815 putXMMRegLane32( rG, 3, mkU32(0) );
10816 putXMMRegLane32( rG, 2, mkU32(0) );
10817 putXMMRegLane32( rG, 1, CVT(t1) );
10818 putXMMRegLane32( rG, 0, CVT(t0) );
10819 # undef CVT
10820 if (isAvx)
10821 putYMMRegLane128( rG, 1, mkV128(0) );
10823 return delta;
10827 static Long dis_CVTxPD2DQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
10828 Long delta, Bool r2zero )
10830 IRTemp addr = IRTemp_INVALID;
10831 Int alen = 0;
10832 HChar dis_buf[50];
10833 UChar modrm = getUChar(delta);
10834 IRTemp argV = newTemp(Ity_V256);
10835 IRTemp rmode = newTemp(Ity_I32);
10836 UInt rG = gregOfRexRM(pfx,modrm);
10837 IRTemp t0, t1, t2, t3;
10839 if (epartIsReg(modrm)) {
10840 UInt rE = eregOfRexRM(pfx,modrm);
10841 assign( argV, getYMMReg(rE) );
10842 delta += 1;
10843 DIP("vcvt%spd2dq %s,%s\n",
10844 r2zero ? "t" : "", nameYMMReg(rE), nameXMMReg(rG));
10845 } else {
10846 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10847 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
10848 delta += alen;
10849 DIP("vcvt%spd2dqy %s,%s\n",
10850 r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
10853 if (r2zero) {
10854 assign(rmode, mkU32((UInt)Irrm_ZERO) );
10855 } else {
10856 assign( rmode, get_sse_roundingmode() );
10859 t0 = IRTemp_INVALID;
10860 t1 = IRTemp_INVALID;
10861 t2 = IRTemp_INVALID;
10862 t3 = IRTemp_INVALID;
10863 breakupV256to64s( argV, &t3, &t2, &t1, &t0 );
10865 # define CVT(_t) binop( Iop_F64toI32S, \
10866 mkexpr(rmode), \
10867 unop( Iop_ReinterpI64asF64, \
10868 mkexpr(_t) ) )
10870 putXMMRegLane32( rG, 3, CVT(t3) );
10871 putXMMRegLane32( rG, 2, CVT(t2) );
10872 putXMMRegLane32( rG, 1, CVT(t1) );
10873 putXMMRegLane32( rG, 0, CVT(t0) );
10874 # undef CVT
10875 putYMMRegLane128( rG, 1, mkV128(0) );
10877 return delta;
10881 static Long dis_CVTDQ2PS_128 ( const VexAbiInfo* vbi, Prefix pfx,
10882 Long delta, Bool isAvx )
10884 IRTemp addr = IRTemp_INVALID;
10885 Int alen = 0;
10886 HChar dis_buf[50];
10887 UChar modrm = getUChar(delta);
10888 IRTemp argV = newTemp(Ity_V128);
10889 IRTemp rmode = newTemp(Ity_I32);
10890 UInt rG = gregOfRexRM(pfx,modrm);
10892 if (epartIsReg(modrm)) {
10893 UInt rE = eregOfRexRM(pfx,modrm);
10894 assign( argV, getXMMReg(rE) );
10895 delta += 1;
10896 DIP("%scvtdq2ps %s,%s\n",
10897 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
10898 } else {
10899 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10900 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10901 delta += alen;
10902 DIP("%scvtdq2ps %s,%s\n",
10903 isAvx ? "v" : "", dis_buf, nameXMMReg(rG) );
10906 assign( rmode, get_sse_roundingmode() );
10907 putXMMReg(rG, binop(Iop_I32StoF32x4, mkexpr(rmode), mkexpr(argV)));
10909 if (isAvx)
10910 putYMMRegLane128( rG, 1, mkV128(0) );
10912 return delta;
10915 static Long dis_CVTDQ2PS_256 ( const VexAbiInfo* vbi, Prefix pfx,
10916 Long delta )
10918 IRTemp addr = IRTemp_INVALID;
10919 Int alen = 0;
10920 HChar dis_buf[50];
10921 UChar modrm = getUChar(delta);
10922 IRTemp argV = newTemp(Ity_V256);
10923 IRTemp rmode = newTemp(Ity_I32);
10924 UInt rG = gregOfRexRM(pfx,modrm);
10926 if (epartIsReg(modrm)) {
10927 UInt rE = eregOfRexRM(pfx,modrm);
10928 assign( argV, getYMMReg(rE) );
10929 delta += 1;
10930 DIP("vcvtdq2ps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
10931 } else {
10932 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10933 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
10934 delta += alen;
10935 DIP("vcvtdq2ps %s,%s\n", dis_buf, nameYMMReg(rG) );
10938 assign( rmode, get_sse_roundingmode() );
10939 putYMMReg(rG, binop(Iop_I32StoF32x8, mkexpr(rmode), mkexpr(argV)));
10941 return delta;
10945 static Long dis_PMOVMSKB_128 ( const VexAbiInfo* vbi, Prefix pfx,
10946 Long delta, Bool isAvx )
10948 UChar modrm = getUChar(delta);
10949 vassert(epartIsReg(modrm)); /* ensured by caller */
10950 UInt rE = eregOfRexRM(pfx,modrm);
10951 UInt rG = gregOfRexRM(pfx,modrm);
10952 IRTemp t0 = newTemp(Ity_V128);
10953 IRTemp t1 = newTemp(Ity_I32);
10954 assign(t0, getXMMReg(rE));
10955 assign(t1, unop(Iop_16Uto32, unop(Iop_GetMSBs8x16, mkexpr(t0))));
10956 putIReg32(rG, mkexpr(t1));
10957 DIP("%spmovmskb %s,%s\n", isAvx ? "v" : "", nameXMMReg(rE),
10958 nameIReg32(rG));
10959 delta += 1;
10960 return delta;
10964 static Long dis_PMOVMSKB_256 ( const VexAbiInfo* vbi, Prefix pfx,
10965 Long delta )
10967 UChar modrm = getUChar(delta);
10968 vassert(epartIsReg(modrm)); /* ensured by caller */
10969 UInt rE = eregOfRexRM(pfx,modrm);
10970 UInt rG = gregOfRexRM(pfx,modrm);
10971 IRTemp t0 = newTemp(Ity_V128);
10972 IRTemp t1 = newTemp(Ity_V128);
10973 IRTemp t2 = newTemp(Ity_I16);
10974 IRTemp t3 = newTemp(Ity_I16);
10975 assign(t0, getYMMRegLane128(rE, 0));
10976 assign(t1, getYMMRegLane128(rE, 1));
10977 assign(t2, unop(Iop_GetMSBs8x16, mkexpr(t0)));
10978 assign(t3, unop(Iop_GetMSBs8x16, mkexpr(t1)));
10979 putIReg32(rG, binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)));
10980 DIP("vpmovmskb %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
10981 delta += 1;
10982 return delta;
10986 /* FIXME: why not just use InterleaveLO / InterleaveHI? I think the
10987 relevant ops are "xIsH ? InterleaveHI32x4 : InterleaveLO32x4". */
10988 /* Does the maths for 128 bit versions of UNPCKLPS and UNPCKHPS */
10989 static IRTemp math_UNPCKxPS_128 ( IRTemp sV, IRTemp dV, Bool xIsH )
10991 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
10992 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
10993 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
10994 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
10995 IRTemp res = newTemp(Ity_V128);
10996 assign(res, xIsH ? mkV128from32s( s3, d3, s2, d2 )
10997 : mkV128from32s( s1, d1, s0, d0 ));
10998 return res;
11002 /* FIXME: why not just use InterleaveLO / InterleaveHI ?? */
11003 /* Does the maths for 128 bit versions of UNPCKLPD and UNPCKHPD */
11004 static IRTemp math_UNPCKxPD_128 ( IRTemp sV, IRTemp dV, Bool xIsH )
11006 IRTemp s1 = newTemp(Ity_I64);
11007 IRTemp s0 = newTemp(Ity_I64);
11008 IRTemp d1 = newTemp(Ity_I64);
11009 IRTemp d0 = newTemp(Ity_I64);
11010 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
11011 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
11012 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
11013 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
11014 IRTemp res = newTemp(Ity_V128);
11015 assign(res, xIsH ? binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1))
11016 : binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)));
11017 return res;
11021 /* Does the maths for 256 bit versions of UNPCKLPD and UNPCKHPD.
11022 Doesn't seem like this fits in either of the Iop_Interleave{LO,HI}
11023 or the Iop_Cat{Odd,Even}Lanes idioms, hence just do it the stupid
11024 way. */
11025 static IRTemp math_UNPCKxPD_256 ( IRTemp sV, IRTemp dV, Bool xIsH )
11027 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11028 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11029 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
11030 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
11031 IRTemp res = newTemp(Ity_V256);
11032 assign(res, xIsH
11033 ? IRExpr_Qop(Iop_64x4toV256, mkexpr(s3), mkexpr(d3),
11034 mkexpr(s1), mkexpr(d1))
11035 : IRExpr_Qop(Iop_64x4toV256, mkexpr(s2), mkexpr(d2),
11036 mkexpr(s0), mkexpr(d0)));
11037 return res;
11041 /* FIXME: this is really bad. Surely can do something better here?
11042 One observation is that the steering in the upper and lower 128 bit
11043 halves is the same as with math_UNPCKxPS_128, so we simply split
11044 into two halves, and use that. Consequently any improvement in
11045 math_UNPCKxPS_128 (probably, to use interleave-style primops)
11046 benefits this too. */
11047 static IRTemp math_UNPCKxPS_256 ( IRTemp sV, IRTemp dV, Bool xIsH )
11049 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11050 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11051 breakupV256toV128s( sV, &sVhi, &sVlo );
11052 breakupV256toV128s( dV, &dVhi, &dVlo );
11053 IRTemp rVhi = math_UNPCKxPS_128(sVhi, dVhi, xIsH);
11054 IRTemp rVlo = math_UNPCKxPS_128(sVlo, dVlo, xIsH);
11055 IRTemp rV = newTemp(Ity_V256);
11056 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11057 return rV;
11061 static IRTemp math_SHUFPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11063 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11064 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11065 vassert(imm8 < 256);
11067 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11068 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11070 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
11071 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11072 IRTemp res = newTemp(Ity_V128);
11073 assign(res,
11074 mkV128from32s( SELS((imm8>>6)&3), SELS((imm8>>4)&3),
11075 SELD((imm8>>2)&3), SELD((imm8>>0)&3) ) );
11076 # undef SELD
11077 # undef SELS
11078 return res;
11082 /* 256-bit SHUFPS appears to steer each of the 128-bit halves
11083 identically. Hence do the clueless thing and use math_SHUFPS_128
11084 twice. */
11085 static IRTemp math_SHUFPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
11087 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11088 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11089 breakupV256toV128s( sV, &sVhi, &sVlo );
11090 breakupV256toV128s( dV, &dVhi, &dVlo );
11091 IRTemp rVhi = math_SHUFPS_128(sVhi, dVhi, imm8);
11092 IRTemp rVlo = math_SHUFPS_128(sVlo, dVlo, imm8);
11093 IRTemp rV = newTemp(Ity_V256);
11094 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11095 return rV;
11099 static IRTemp math_SHUFPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11101 IRTemp s1 = newTemp(Ity_I64);
11102 IRTemp s0 = newTemp(Ity_I64);
11103 IRTemp d1 = newTemp(Ity_I64);
11104 IRTemp d0 = newTemp(Ity_I64);
11106 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
11107 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
11108 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
11109 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
11111 # define SELD(n) mkexpr((n)==0 ? d0 : d1)
11112 # define SELS(n) mkexpr((n)==0 ? s0 : s1)
11114 IRTemp res = newTemp(Ity_V128);
11115 assign(res, binop( Iop_64HLtoV128,
11116 SELS((imm8>>1)&1), SELD((imm8>>0)&1) ) );
11118 # undef SELD
11119 # undef SELS
11120 return res;
11124 static IRTemp math_SHUFPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
11126 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11127 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11128 breakupV256toV128s( sV, &sVhi, &sVlo );
11129 breakupV256toV128s( dV, &dVhi, &dVlo );
11130 IRTemp rVhi = math_SHUFPD_128(sVhi, dVhi, (imm8 >> 2) & 3);
11131 IRTemp rVlo = math_SHUFPD_128(sVlo, dVlo, imm8 & 3);
11132 IRTemp rV = newTemp(Ity_V256);
11133 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11134 return rV;
11138 static IRTemp math_BLENDPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11140 UShort imm8_mask_16;
11141 IRTemp imm8_mask = newTemp(Ity_V128);
11143 switch( imm8 & 3 ) {
11144 case 0: imm8_mask_16 = 0x0000; break;
11145 case 1: imm8_mask_16 = 0x00FF; break;
11146 case 2: imm8_mask_16 = 0xFF00; break;
11147 case 3: imm8_mask_16 = 0xFFFF; break;
11148 default: vassert(0); break;
11150 assign( imm8_mask, mkV128( imm8_mask_16 ) );
11152 IRTemp res = newTemp(Ity_V128);
11153 assign ( res, binop( Iop_OrV128,
11154 binop( Iop_AndV128, mkexpr(sV),
11155 mkexpr(imm8_mask) ),
11156 binop( Iop_AndV128, mkexpr(dV),
11157 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) );
11158 return res;
11162 static IRTemp math_BLENDPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
11164 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11165 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11166 breakupV256toV128s( sV, &sVhi, &sVlo );
11167 breakupV256toV128s( dV, &dVhi, &dVlo );
11168 IRTemp rVhi = math_BLENDPD_128(sVhi, dVhi, (imm8 >> 2) & 3);
11169 IRTemp rVlo = math_BLENDPD_128(sVlo, dVlo, imm8 & 3);
11170 IRTemp rV = newTemp(Ity_V256);
11171 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11172 return rV;
11176 static IRTemp math_BLENDPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11178 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
11179 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
11180 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
11181 0xFFFF };
11182 IRTemp imm8_mask = newTemp(Ity_V128);
11183 assign( imm8_mask, mkV128( imm8_perms[ (imm8 & 15) ] ) );
11185 IRTemp res = newTemp(Ity_V128);
11186 assign ( res, binop( Iop_OrV128,
11187 binop( Iop_AndV128, mkexpr(sV),
11188 mkexpr(imm8_mask) ),
11189 binop( Iop_AndV128, mkexpr(dV),
11190 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) );
11191 return res;
11195 static IRTemp math_BLENDPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
11197 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11198 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11199 breakupV256toV128s( sV, &sVhi, &sVlo );
11200 breakupV256toV128s( dV, &dVhi, &dVlo );
11201 IRTemp rVhi = math_BLENDPS_128(sVhi, dVhi, (imm8 >> 4) & 15);
11202 IRTemp rVlo = math_BLENDPS_128(sVlo, dVlo, imm8 & 15);
11203 IRTemp rV = newTemp(Ity_V256);
11204 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11205 return rV;
11209 static IRTemp math_PBLENDW_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11211 /* Make w be a 16-bit version of imm8, formed by duplicating each
11212 bit in imm8. */
11213 Int i;
11214 UShort imm16 = 0;
11215 for (i = 0; i < 8; i++) {
11216 if (imm8 & (1 << i))
11217 imm16 |= (3 << (2*i));
11219 IRTemp imm16_mask = newTemp(Ity_V128);
11220 assign( imm16_mask, mkV128( imm16 ));
11222 IRTemp res = newTemp(Ity_V128);
11223 assign ( res, binop( Iop_OrV128,
11224 binop( Iop_AndV128, mkexpr(sV),
11225 mkexpr(imm16_mask) ),
11226 binop( Iop_AndV128, mkexpr(dV),
11227 unop( Iop_NotV128, mkexpr(imm16_mask) ) ) ) );
11228 return res;
11232 static IRTemp math_PMULUDQ_128 ( IRTemp sV, IRTemp dV )
11234 /* This is a really poor translation -- could be improved if
11235 performance critical */
11236 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11237 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11238 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11239 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11240 IRTemp res = newTemp(Ity_V128);
11241 assign(res, binop(Iop_64HLtoV128,
11242 binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)),
11243 binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) ));
11244 return res;
11248 static IRTemp math_PMULUDQ_256 ( IRTemp sV, IRTemp dV )
11250 /* This is a really poor translation -- could be improved if
11251 performance critical */
11252 IRTemp sHi, sLo, dHi, dLo;
11253 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11254 breakupV256toV128s( dV, &dHi, &dLo);
11255 breakupV256toV128s( sV, &sHi, &sLo);
11256 IRTemp res = newTemp(Ity_V256);
11257 assign(res, binop(Iop_V128HLtoV256,
11258 mkexpr(math_PMULUDQ_128(sHi, dHi)),
11259 mkexpr(math_PMULUDQ_128(sLo, dLo))));
11260 return res;
11264 static IRTemp math_PMULDQ_128 ( IRTemp dV, IRTemp sV )
11266 /* This is a really poor translation -- could be improved if
11267 performance critical */
11268 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11269 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11270 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11271 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11272 IRTemp res = newTemp(Ity_V128);
11273 assign(res, binop(Iop_64HLtoV128,
11274 binop( Iop_MullS32, mkexpr(d2), mkexpr(s2)),
11275 binop( Iop_MullS32, mkexpr(d0), mkexpr(s0)) ));
11276 return res;
11280 static IRTemp math_PMULDQ_256 ( IRTemp sV, IRTemp dV )
11282 /* This is a really poor translation -- could be improved if
11283 performance critical */
11284 IRTemp sHi, sLo, dHi, dLo;
11285 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11286 breakupV256toV128s( dV, &dHi, &dLo);
11287 breakupV256toV128s( sV, &sHi, &sLo);
11288 IRTemp res = newTemp(Ity_V256);
11289 assign(res, binop(Iop_V128HLtoV256,
11290 mkexpr(math_PMULDQ_128(sHi, dHi)),
11291 mkexpr(math_PMULDQ_128(sLo, dLo))));
11292 return res;
11296 static IRTemp math_PMADDWD_128 ( IRTemp dV, IRTemp sV )
11298 IRTemp sVhi, sVlo, dVhi, dVlo;
11299 IRTemp resHi = newTemp(Ity_I64);
11300 IRTemp resLo = newTemp(Ity_I64);
11301 sVhi = sVlo = dVhi = dVlo = IRTemp_INVALID;
11302 breakupV128to64s( sV, &sVhi, &sVlo );
11303 breakupV128to64s( dV, &dVhi, &dVlo );
11304 assign( resHi, mkIRExprCCall(Ity_I64, 0/*regparms*/,
11305 "amd64g_calculate_mmx_pmaddwd",
11306 &amd64g_calculate_mmx_pmaddwd,
11307 mkIRExprVec_2( mkexpr(sVhi), mkexpr(dVhi))));
11308 assign( resLo, mkIRExprCCall(Ity_I64, 0/*regparms*/,
11309 "amd64g_calculate_mmx_pmaddwd",
11310 &amd64g_calculate_mmx_pmaddwd,
11311 mkIRExprVec_2( mkexpr(sVlo), mkexpr(dVlo))));
11312 IRTemp res = newTemp(Ity_V128);
11313 assign( res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo))) ;
11314 return res;
11318 static IRTemp math_PMADDWD_256 ( IRTemp dV, IRTemp sV )
11320 IRTemp sHi, sLo, dHi, dLo;
11321 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11322 breakupV256toV128s( dV, &dHi, &dLo);
11323 breakupV256toV128s( sV, &sHi, &sLo);
11324 IRTemp res = newTemp(Ity_V256);
11325 assign(res, binop(Iop_V128HLtoV256,
11326 mkexpr(math_PMADDWD_128(dHi, sHi)),
11327 mkexpr(math_PMADDWD_128(dLo, sLo))));
11328 return res;
11332 static IRTemp math_ADDSUBPD_128 ( IRTemp dV, IRTemp sV )
11334 IRTemp addV = newTemp(Ity_V128);
11335 IRTemp subV = newTemp(Ity_V128);
11336 IRTemp a1 = newTemp(Ity_I64);
11337 IRTemp s0 = newTemp(Ity_I64);
11338 IRTemp rm = newTemp(Ity_I32);
11340 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11341 assign( addV, triop(Iop_Add64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11342 assign( subV, triop(Iop_Sub64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11344 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
11345 assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
11347 IRTemp res = newTemp(Ity_V128);
11348 assign( res, binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) );
11349 return res;
11353 static IRTemp math_ADDSUBPD_256 ( IRTemp dV, IRTemp sV )
11355 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
11356 IRTemp addV = newTemp(Ity_V256);
11357 IRTemp subV = newTemp(Ity_V256);
11358 IRTemp rm = newTemp(Ity_I32);
11359 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11361 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11362 assign( addV, triop(Iop_Add64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11363 assign( subV, triop(Iop_Sub64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11365 breakupV256to64s( addV, &a3, &a2, &a1, &a0 );
11366 breakupV256to64s( subV, &s3, &s2, &s1, &s0 );
11368 IRTemp res = newTemp(Ity_V256);
11369 assign( res, mkV256from64s( a3, s2, a1, s0 ) );
11370 return res;
11374 static IRTemp math_ADDSUBPS_128 ( IRTemp dV, IRTemp sV )
11376 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
11377 IRTemp addV = newTemp(Ity_V128);
11378 IRTemp subV = newTemp(Ity_V128);
11379 IRTemp rm = newTemp(Ity_I32);
11380 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11382 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11383 assign( addV, triop(Iop_Add32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11384 assign( subV, triop(Iop_Sub32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11386 breakupV128to32s( addV, &a3, &a2, &a1, &a0 );
11387 breakupV128to32s( subV, &s3, &s2, &s1, &s0 );
11389 IRTemp res = newTemp(Ity_V128);
11390 assign( res, mkV128from32s( a3, s2, a1, s0 ) );
11391 return res;
11395 static IRTemp math_ADDSUBPS_256 ( IRTemp dV, IRTemp sV )
11397 IRTemp a7, a6, a5, a4, a3, a2, a1, a0;
11398 IRTemp s7, s6, s5, s4, s3, s2, s1, s0;
11399 IRTemp addV = newTemp(Ity_V256);
11400 IRTemp subV = newTemp(Ity_V256);
11401 IRTemp rm = newTemp(Ity_I32);
11402 a7 = a6 = a5 = a4 = a3 = a2 = a1 = a0 = IRTemp_INVALID;
11403 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11405 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11406 assign( addV, triop(Iop_Add32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11407 assign( subV, triop(Iop_Sub32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11409 breakupV256to32s( addV, &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0 );
11410 breakupV256to32s( subV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
11412 IRTemp res = newTemp(Ity_V256);
11413 assign( res, mkV256from32s( a7, s6, a5, s4, a3, s2, a1, s0 ) );
11414 return res;
11418 /* Handle 128 bit PSHUFLW and PSHUFHW. */
11419 static Long dis_PSHUFxW_128 ( const VexAbiInfo* vbi, Prefix pfx,
11420 Long delta, Bool isAvx, Bool xIsH )
11422 IRTemp addr = IRTemp_INVALID;
11423 Int alen = 0;
11424 HChar dis_buf[50];
11425 UChar modrm = getUChar(delta);
11426 UInt rG = gregOfRexRM(pfx,modrm);
11427 UInt imm8;
11428 IRTemp sVmut, dVmut, sVcon, sV, dV, s3, s2, s1, s0;
11429 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11430 sV = newTemp(Ity_V128);
11431 dV = newTemp(Ity_V128);
11432 sVmut = newTemp(Ity_I64);
11433 dVmut = newTemp(Ity_I64);
11434 sVcon = newTemp(Ity_I64);
11435 if (epartIsReg(modrm)) {
11436 UInt rE = eregOfRexRM(pfx,modrm);
11437 assign( sV, getXMMReg(rE) );
11438 imm8 = (UInt)getUChar(delta+1);
11439 delta += 1+1;
11440 DIP("%spshuf%cw $%u,%s,%s\n",
11441 isAvx ? "v" : "", xIsH ? 'h' : 'l',
11442 imm8, nameXMMReg(rE), nameXMMReg(rG));
11443 } else {
11444 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
11445 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11446 imm8 = (UInt)getUChar(delta+alen);
11447 delta += alen+1;
11448 DIP("%spshuf%cw $%u,%s,%s\n",
11449 isAvx ? "v" : "", xIsH ? 'h' : 'l',
11450 imm8, dis_buf, nameXMMReg(rG));
11453 /* Get the to-be-changed (mut) and unchanging (con) bits of the
11454 source. */
11455 assign( sVmut, unop(xIsH ? Iop_V128HIto64 : Iop_V128to64, mkexpr(sV)) );
11456 assign( sVcon, unop(xIsH ? Iop_V128to64 : Iop_V128HIto64, mkexpr(sV)) );
11458 breakup64to16s( sVmut, &s3, &s2, &s1, &s0 );
11459 # define SEL(n) \
11460 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11461 assign(dVmut, mk64from16s( SEL((imm8>>6)&3), SEL((imm8>>4)&3),
11462 SEL((imm8>>2)&3), SEL((imm8>>0)&3) ));
11463 # undef SEL
11465 assign(dV, xIsH ? binop(Iop_64HLtoV128, mkexpr(dVmut), mkexpr(sVcon))
11466 : binop(Iop_64HLtoV128, mkexpr(sVcon), mkexpr(dVmut)) );
11468 (isAvx ? putYMMRegLoAndZU : putXMMReg)(rG, mkexpr(dV));
11469 return delta;
11473 /* Handle 256 bit PSHUFLW and PSHUFHW. */
11474 static Long dis_PSHUFxW_256 ( const VexAbiInfo* vbi, Prefix pfx,
11475 Long delta, Bool xIsH )
11477 IRTemp addr = IRTemp_INVALID;
11478 Int alen = 0;
11479 HChar dis_buf[50];
11480 UChar modrm = getUChar(delta);
11481 UInt rG = gregOfRexRM(pfx,modrm);
11482 UInt imm8;
11483 IRTemp sV, s[8], sV64[4], dVhi, dVlo;
11484 sV64[3] = sV64[2] = sV64[1] = sV64[0] = IRTemp_INVALID;
11485 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID;
11486 sV = newTemp(Ity_V256);
11487 dVhi = newTemp(Ity_I64);
11488 dVlo = newTemp(Ity_I64);
11489 if (epartIsReg(modrm)) {
11490 UInt rE = eregOfRexRM(pfx,modrm);
11491 assign( sV, getYMMReg(rE) );
11492 imm8 = (UInt)getUChar(delta+1);
11493 delta += 1+1;
11494 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l',
11495 imm8, nameYMMReg(rE), nameYMMReg(rG));
11496 } else {
11497 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
11498 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
11499 imm8 = (UInt)getUChar(delta+alen);
11500 delta += alen+1;
11501 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l',
11502 imm8, dis_buf, nameYMMReg(rG));
11505 breakupV256to64s( sV, &sV64[3], &sV64[2], &sV64[1], &sV64[0] );
11506 breakup64to16s( sV64[xIsH ? 3 : 2], &s[7], &s[6], &s[5], &s[4] );
11507 breakup64to16s( sV64[xIsH ? 1 : 0], &s[3], &s[2], &s[1], &s[0] );
11509 assign( dVhi, mk64from16s( s[4 + ((imm8>>6)&3)], s[4 + ((imm8>>4)&3)],
11510 s[4 + ((imm8>>2)&3)], s[4 + ((imm8>>0)&3)] ) );
11511 assign( dVlo, mk64from16s( s[0 + ((imm8>>6)&3)], s[0 + ((imm8>>4)&3)],
11512 s[0 + ((imm8>>2)&3)], s[0 + ((imm8>>0)&3)] ) );
11513 putYMMReg( rG, mkV256from64s( xIsH ? dVhi : sV64[3],
11514 xIsH ? sV64[2] : dVhi,
11515 xIsH ? dVlo : sV64[1],
11516 xIsH ? sV64[0] : dVlo ) );
11517 return delta;
11521 static Long dis_PEXTRW_128_EregOnly_toG ( const VexAbiInfo* vbi, Prefix pfx,
11522 Long delta, Bool isAvx )
11524 Long deltaIN = delta;
11525 UChar modrm = getUChar(delta);
11526 UInt rG = gregOfRexRM(pfx,modrm);
11527 IRTemp sV = newTemp(Ity_V128);
11528 IRTemp d16 = newTemp(Ity_I16);
11529 UInt imm8;
11530 IRTemp s0, s1, s2, s3;
11531 if (epartIsReg(modrm)) {
11532 UInt rE = eregOfRexRM(pfx,modrm);
11533 assign(sV, getXMMReg(rE));
11534 imm8 = getUChar(delta+1) & 7;
11535 delta += 1+1;
11536 DIP("%spextrw $%u,%s,%s\n", isAvx ? "v" : "",
11537 imm8, nameXMMReg(rE), nameIReg32(rG));
11538 } else {
11539 /* The memory case is disallowed, apparently. */
11540 return deltaIN; /* FAIL */
11542 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11543 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11544 switch (imm8) {
11545 case 0: assign(d16, unop(Iop_32to16, mkexpr(s0))); break;
11546 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(s0))); break;
11547 case 2: assign(d16, unop(Iop_32to16, mkexpr(s1))); break;
11548 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(s1))); break;
11549 case 4: assign(d16, unop(Iop_32to16, mkexpr(s2))); break;
11550 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(s2))); break;
11551 case 6: assign(d16, unop(Iop_32to16, mkexpr(s3))); break;
11552 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(s3))); break;
11553 default: vassert(0);
11555 putIReg32(rG, unop(Iop_16Uto32, mkexpr(d16)));
11556 return delta;
11560 static Long dis_CVTDQ2PD_128 ( const VexAbiInfo* vbi, Prefix pfx,
11561 Long delta, Bool isAvx )
11563 IRTemp addr = IRTemp_INVALID;
11564 Int alen = 0;
11565 HChar dis_buf[50];
11566 UChar modrm = getUChar(delta);
11567 IRTemp arg64 = newTemp(Ity_I64);
11568 UInt rG = gregOfRexRM(pfx,modrm);
11569 const HChar* mbV = isAvx ? "v" : "";
11570 if (epartIsReg(modrm)) {
11571 UInt rE = eregOfRexRM(pfx,modrm);
11572 assign( arg64, getXMMRegLane64(rE, 0) );
11573 delta += 1;
11574 DIP("%scvtdq2pd %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG));
11575 } else {
11576 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11577 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
11578 delta += alen;
11579 DIP("%scvtdq2pd %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
11581 putXMMRegLane64F(
11582 rG, 0,
11583 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)))
11585 putXMMRegLane64F(
11586 rG, 1,
11587 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)))
11589 if (isAvx)
11590 putYMMRegLane128(rG, 1, mkV128(0));
11591 return delta;
11595 static Long dis_STMXCSR ( const VexAbiInfo* vbi, Prefix pfx,
11596 Long delta, Bool isAvx )
11598 IRTemp addr = IRTemp_INVALID;
11599 Int alen = 0;
11600 HChar dis_buf[50];
11601 UChar modrm = getUChar(delta);
11602 vassert(!epartIsReg(modrm)); /* ensured by caller */
11603 vassert(gregOfRexRM(pfx,modrm) == 3); /* ditto */
11605 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11606 delta += alen;
11608 /* Fake up a native SSE mxcsr word. The only thing it depends on
11609 is SSEROUND[1:0], so call a clean helper to cook it up.
11611 /* ULong amd64h_create_mxcsr ( ULong sseround ) */
11612 DIP("%sstmxcsr %s\n", isAvx ? "v" : "", dis_buf);
11613 storeLE(
11614 mkexpr(addr),
11615 unop(Iop_64to32,
11616 mkIRExprCCall(
11617 Ity_I64, 0/*regp*/,
11618 "amd64g_create_mxcsr", &amd64g_create_mxcsr,
11619 mkIRExprVec_1( unop(Iop_32Uto64,get_sse_roundingmode()) )
11623 return delta;
11627 static Long dis_LDMXCSR ( const VexAbiInfo* vbi, Prefix pfx,
11628 Long delta, Bool isAvx )
11630 IRTemp addr = IRTemp_INVALID;
11631 Int alen = 0;
11632 HChar dis_buf[50];
11633 UChar modrm = getUChar(delta);
11634 vassert(!epartIsReg(modrm)); /* ensured by caller */
11635 vassert(gregOfRexRM(pfx,modrm) == 2); /* ditto */
11637 IRTemp t64 = newTemp(Ity_I64);
11638 IRTemp ew = newTemp(Ity_I32);
11640 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11641 delta += alen;
11642 DIP("%sldmxcsr %s\n", isAvx ? "v" : "", dis_buf);
11644 /* The only thing we observe in %mxcsr is the rounding mode.
11645 Therefore, pass the 32-bit value (SSE native-format control
11646 word) to a clean helper, getting back a 64-bit value, the
11647 lower half of which is the SSEROUND value to store, and the
11648 upper half of which is the emulation-warning token which may
11649 be generated.
11651 /* ULong amd64h_check_ldmxcsr ( ULong ); */
11652 assign( t64, mkIRExprCCall(
11653 Ity_I64, 0/*regparms*/,
11654 "amd64g_check_ldmxcsr",
11655 &amd64g_check_ldmxcsr,
11656 mkIRExprVec_1(
11657 unop(Iop_32Uto64,
11658 loadLE(Ity_I32, mkexpr(addr))
11664 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) );
11665 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
11666 put_emwarn( mkexpr(ew) );
11667 /* Finally, if an emulation warning was reported, side-exit to
11668 the next insn, reporting the warning, so that Valgrind's
11669 dispatcher sees the warning. */
11670 stmt(
11671 IRStmt_Exit(
11672 binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)),
11673 Ijk_EmWarn,
11674 IRConst_U64(guest_RIP_bbstart+delta),
11675 OFFB_RIP
11678 return delta;
11682 static void gen_XSAVE_SEQUENCE ( IRTemp addr, IRTemp rfbm )
11684 /* ------ rfbm[0] gates the x87 state ------ */
11686 /* Uses dirty helper:
11687 void amd64g_do_XSAVE_COMPONENT_0 ( VexGuestAMD64State*, ULong )
11689 IRDirty* d0 = unsafeIRDirty_0_N (
11690 0/*regparms*/,
11691 "amd64g_dirtyhelper_XSAVE_COMPONENT_0",
11692 &amd64g_dirtyhelper_XSAVE_COMPONENT_0,
11693 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
11695 d0->guard = binop(Iop_CmpEQ64, binop(Iop_And64, mkexpr(rfbm), mkU64(1)),
11696 mkU64(1));
11698 /* Declare we're writing memory. Really, bytes 24 through 31
11699 (MXCSR and MXCSR_MASK) aren't written, but we can't express more
11700 than 1 memory area here, so just mark the whole thing as
11701 written. */
11702 d0->mFx = Ifx_Write;
11703 d0->mAddr = mkexpr(addr);
11704 d0->mSize = 160;
11706 /* declare we're reading guest state */
11707 d0->nFxState = 5;
11708 vex_bzero(&d0->fxState, sizeof(d0->fxState));
11710 d0->fxState[0].fx = Ifx_Read;
11711 d0->fxState[0].offset = OFFB_FTOP;
11712 d0->fxState[0].size = sizeof(UInt);
11714 d0->fxState[1].fx = Ifx_Read;
11715 d0->fxState[1].offset = OFFB_FPREGS;
11716 d0->fxState[1].size = 8 * sizeof(ULong);
11718 d0->fxState[2].fx = Ifx_Read;
11719 d0->fxState[2].offset = OFFB_FPTAGS;
11720 d0->fxState[2].size = 8 * sizeof(UChar);
11722 d0->fxState[3].fx = Ifx_Read;
11723 d0->fxState[3].offset = OFFB_FPROUND;
11724 d0->fxState[3].size = sizeof(ULong);
11726 d0->fxState[4].fx = Ifx_Read;
11727 d0->fxState[4].offset = OFFB_FC3210;
11728 d0->fxState[4].size = sizeof(ULong);
11730 stmt( IRStmt_Dirty(d0) );
11732 /* ------ rfbm[1] gates the SSE state ------ */
11734 IRTemp rfbm_1 = newTemp(Ity_I64);
11735 IRTemp rfbm_1or2 = newTemp(Ity_I64);
11736 assign(rfbm_1, binop(Iop_And64, mkexpr(rfbm), mkU64(2)));
11737 assign(rfbm_1or2, binop(Iop_And64, mkexpr(rfbm), mkU64(6)));
11739 IRExpr* guard_1 = binop(Iop_CmpEQ64, mkexpr(rfbm_1), mkU64(2));
11740 IRExpr* guard_1or2 = binop(Iop_CmpNE64, mkexpr(rfbm_1or2), mkU64(0));
11742 /* Uses dirty helper:
11743 void amd64g_do_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
11744 ( VexGuestAMD64State*, ULong )
11745 This creates only MXCSR and MXCSR_MASK. We need to do this if
11746 either components 1 (SSE) or 2 (AVX) are requested. Hence the
11747 guard condition is a bit more complex.
11749 IRDirty* d1 = unsafeIRDirty_0_N (
11750 0/*regparms*/,
11751 "amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS",
11752 &amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS,
11753 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
11755 d1->guard = guard_1or2;
11757 /* Declare we're writing memory: MXCSR and MXCSR_MASK. Note that
11758 the code for rbfm[0] just above claims a write of 0 .. 159, so
11759 this duplicates it. But at least correctly connects 24 .. 31 to
11760 the MXCSR guest state representation (SSEROUND field). */
11761 d1->mFx = Ifx_Write;
11762 d1->mAddr = binop(Iop_Add64, mkexpr(addr), mkU64(24));
11763 d1->mSize = 8;
11765 /* declare we're reading guest state */
11766 d1->nFxState = 1;
11767 vex_bzero(&d1->fxState, sizeof(d1->fxState));
11769 d1->fxState[0].fx = Ifx_Read;
11770 d1->fxState[0].offset = OFFB_SSEROUND;
11771 d1->fxState[0].size = sizeof(ULong);
11773 /* Call the helper. This creates MXCSR and MXCSR_MASK but nothing
11774 else. We do the actual register array, XMM[0..15], separately,
11775 in order that any undefinedness in the XMM registers is tracked
11776 separately by Memcheck and does not "infect" the in-memory
11777 shadow for the other parts of the image. */
11778 stmt( IRStmt_Dirty(d1) );
11780 /* And now the XMMs themselves. */
11781 UInt reg;
11782 for (reg = 0; reg < 16; reg++) {
11783 stmt( IRStmt_StoreG(
11784 Iend_LE,
11785 binop(Iop_Add64, mkexpr(addr), mkU64(160 + reg * 16)),
11786 getXMMReg(reg),
11787 guard_1
11791 /* ------ rfbm[2] gates the AVX state ------ */
11792 /* Component 2 is just a bunch of register saves, so we'll do it
11793 inline, just to be simple and to be Memcheck friendly. */
11795 IRTemp rfbm_2 = newTemp(Ity_I64);
11796 assign(rfbm_2, binop(Iop_And64, mkexpr(rfbm), mkU64(4)));
11798 IRExpr* guard_2 = binop(Iop_CmpEQ64, mkexpr(rfbm_2), mkU64(4));
11800 for (reg = 0; reg < 16; reg++) {
11801 stmt( IRStmt_StoreG(
11802 Iend_LE,
11803 binop(Iop_Add64, mkexpr(addr), mkU64(576 + reg * 16)),
11804 getYMMRegLane128(reg,1),
11805 guard_2
11811 static Long dis_XSAVE ( const VexAbiInfo* vbi,
11812 Prefix pfx, Long delta, Int sz )
11814 /* Note that the presence or absence of REX.W (indicated here by
11815 |sz|) slightly affects the written format: whether the saved FPU
11816 IP and DP pointers are 64 or 32 bits. But the helper function
11817 we call simply writes zero bits in the relevant fields, which
11818 are 64 bits regardless of what REX.W is, and so it's good enough
11819 (iow, equally broken) in both cases. */
11820 IRTemp addr = IRTemp_INVALID;
11821 Int alen = 0;
11822 HChar dis_buf[50];
11823 UChar modrm = getUChar(delta);
11824 vassert(!epartIsReg(modrm)); /* ensured by caller */
11825 vassert(sz == 4 || sz == 8); /* ditto */
11827 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11828 delta += alen;
11829 gen_SEGV_if_not_64_aligned(addr);
11831 DIP("%sxsave %s\n", sz==8 ? "rex64/" : "", dis_buf);
11833 /* VEX's caller is assumed to have checked this. */
11834 const ULong aSSUMED_XCR0_VALUE = 7;
11836 IRTemp rfbm = newTemp(Ity_I64);
11837 assign(rfbm,
11838 binop(Iop_And64,
11839 binop(Iop_Or64,
11840 binop(Iop_Shl64,
11841 unop(Iop_32Uto64, getIRegRDX(4)), mkU8(32)),
11842 unop(Iop_32Uto64, getIRegRAX(4))),
11843 mkU64(aSSUMED_XCR0_VALUE)));
11845 gen_XSAVE_SEQUENCE(addr, rfbm);
11847 /* Finally, we need to update XSTATE_BV in the XSAVE header area, by
11848 OR-ing the RFBM value into it. */
11849 IRTemp addr_plus_512 = newTemp(Ity_I64);
11850 assign(addr_plus_512, binop(Iop_Add64, mkexpr(addr), mkU64(512)));
11851 storeLE( mkexpr(addr_plus_512),
11852 binop(Iop_Or8,
11853 unop(Iop_64to8, mkexpr(rfbm)),
11854 loadLE(Ity_I8, mkexpr(addr_plus_512))) );
11856 return delta;
11860 static Long dis_FXSAVE ( const VexAbiInfo* vbi,
11861 Prefix pfx, Long delta, Int sz )
11863 /* See comment in dis_XSAVE about the significance of REX.W. */
11864 IRTemp addr = IRTemp_INVALID;
11865 Int alen = 0;
11866 HChar dis_buf[50];
11867 UChar modrm = getUChar(delta);
11868 vassert(!epartIsReg(modrm)); /* ensured by caller */
11869 vassert(sz == 4 || sz == 8); /* ditto */
11871 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11872 delta += alen;
11873 gen_SEGV_if_not_16_aligned(addr);
11875 DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf);
11877 /* FXSAVE is just XSAVE with components 0 and 1 selected. Set rfbm
11878 to 0b011, generate the XSAVE sequence accordingly, and let iropt
11879 fold out the unused (AVX) parts accordingly. */
11880 IRTemp rfbm = newTemp(Ity_I64);
11881 assign(rfbm, mkU64(3));
11882 gen_XSAVE_SEQUENCE(addr, rfbm);
11884 return delta;
11888 static void gen_XRSTOR_SEQUENCE ( IRTemp addr, IRTemp xstate_bv, IRTemp rfbm )
11890 /* ------ rfbm[0] gates the x87 state ------ */
11892 /* If rfbm[0] == 1, we have to write the x87 state. If
11893 xstate_bv[0] == 1, we will read it from the memory image, else
11894 we'll set it to initial values. Doing this with a helper
11895 function and getting the definedness flow annotations correct is
11896 too difficult, so generate stupid but simple code: first set the
11897 registers to initial values, regardless of xstate_bv[0]. Then,
11898 conditionally restore from the memory image. */
11900 IRTemp rfbm_0 = newTemp(Ity_I64);
11901 IRTemp xstate_bv_0 = newTemp(Ity_I64);
11902 IRTemp restore_0 = newTemp(Ity_I64);
11903 assign(rfbm_0, binop(Iop_And64, mkexpr(rfbm), mkU64(1)));
11904 assign(xstate_bv_0, binop(Iop_And64, mkexpr(xstate_bv), mkU64(1)));
11905 assign(restore_0, binop(Iop_And64, mkexpr(rfbm_0), mkexpr(xstate_bv_0)));
11907 gen_FINIT_SEQUENCE( binop(Iop_CmpNE64, mkexpr(rfbm_0), mkU64(0)) );
11909 /* Uses dirty helper:
11910 void amd64g_do_XRSTOR_COMPONENT_0 ( VexGuestAMD64State*, ULong )
11912 IRDirty* d0 = unsafeIRDirty_0_N (
11913 0/*regparms*/,
11914 "amd64g_dirtyhelper_XRSTOR_COMPONENT_0",
11915 &amd64g_dirtyhelper_XRSTOR_COMPONENT_0,
11916 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
11918 d0->guard = binop(Iop_CmpNE64, mkexpr(restore_0), mkU64(0));
11920 /* Declare we're reading memory. Really, bytes 24 through 31
11921 (MXCSR and MXCSR_MASK) aren't read, but we can't express more
11922 than 1 memory area here, so just mark the whole thing as
11923 read. */
11924 d0->mFx = Ifx_Read;
11925 d0->mAddr = mkexpr(addr);
11926 d0->mSize = 160;
11928 /* declare we're writing guest state */
11929 d0->nFxState = 5;
11930 vex_bzero(&d0->fxState, sizeof(d0->fxState));
11932 d0->fxState[0].fx = Ifx_Write;
11933 d0->fxState[0].offset = OFFB_FTOP;
11934 d0->fxState[0].size = sizeof(UInt);
11936 d0->fxState[1].fx = Ifx_Write;
11937 d0->fxState[1].offset = OFFB_FPREGS;
11938 d0->fxState[1].size = 8 * sizeof(ULong);
11940 d0->fxState[2].fx = Ifx_Write;
11941 d0->fxState[2].offset = OFFB_FPTAGS;
11942 d0->fxState[2].size = 8 * sizeof(UChar);
11944 d0->fxState[3].fx = Ifx_Write;
11945 d0->fxState[3].offset = OFFB_FPROUND;
11946 d0->fxState[3].size = sizeof(ULong);
11948 d0->fxState[4].fx = Ifx_Write;
11949 d0->fxState[4].offset = OFFB_FC3210;
11950 d0->fxState[4].size = sizeof(ULong);
11952 stmt( IRStmt_Dirty(d0) );
11954 /* ------ rfbm[1] gates the SSE state ------ */
11956 /* Same scheme as component 0: first zero it out, and then possibly
11957 restore from the memory area. */
11958 IRTemp rfbm_1 = newTemp(Ity_I64);
11959 IRTemp xstate_bv_1 = newTemp(Ity_I64);
11960 IRTemp restore_1 = newTemp(Ity_I64);
11961 assign(rfbm_1, binop(Iop_And64, mkexpr(rfbm), mkU64(2)));
11962 assign(xstate_bv_1, binop(Iop_And64, mkexpr(xstate_bv), mkU64(2)));
11963 assign(restore_1, binop(Iop_And64, mkexpr(rfbm_1), mkexpr(xstate_bv_1)));
11964 IRExpr* rfbm_1e = binop(Iop_CmpNE64, mkexpr(rfbm_1), mkU64(0));
11965 IRExpr* restore_1e = binop(Iop_CmpNE64, mkexpr(restore_1), mkU64(0));
11967 IRTemp rfbm_1or2 = newTemp(Ity_I64);
11968 IRTemp xstate_bv_1or2 = newTemp(Ity_I64);
11969 IRTemp restore_1or2 = newTemp(Ity_I64);
11970 assign(rfbm_1or2, binop(Iop_And64, mkexpr(rfbm), mkU64(6)));
11971 assign(xstate_bv_1or2, binop(Iop_And64, mkexpr(xstate_bv), mkU64(6)));
11972 assign(restore_1or2, binop(Iop_And64, mkexpr(rfbm_1or2),
11973 mkexpr(xstate_bv_1or2)));
11974 IRExpr* rfbm_1or2e = binop(Iop_CmpNE64, mkexpr(rfbm_1or2), mkU64(0));
11975 IRExpr* restore_1or2e = binop(Iop_CmpNE64, mkexpr(restore_1or2), mkU64(0));
11977 /* The areas in question are: SSEROUND, and the XMM register array. */
11978 putGuarded(OFFB_SSEROUND, rfbm_1or2e, mkU64(Irrm_NEAREST));
11980 UInt reg;
11981 for (reg = 0; reg < 16; reg++) {
11982 putGuarded(xmmGuestRegOffset(reg), rfbm_1e, mkV128(0));
11985 /* And now possibly restore from MXCSR/MXCSR_MASK */
11986 /* Uses dirty helper:
11987 void amd64g_do_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
11988 ( VexGuestAMD64State*, ULong )
11989 This restores from only MXCSR and MXCSR_MASK. We need to do
11990 this if either components 1 (SSE) or 2 (AVX) are requested.
11991 Hence the guard condition is a bit more complex.
11993 IRDirty* d1 = unsafeIRDirty_0_N (
11994 0/*regparms*/,
11995 "amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS",
11996 &amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS,
11997 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
11999 d1->guard = restore_1or2e;
12001 /* Declare we're reading memory: MXCSR and MXCSR_MASK. Note that
12002 the code for rbfm[0] just above claims a read of 0 .. 159, so
12003 this duplicates it. But at least correctly connects 24 .. 31 to
12004 the MXCSR guest state representation (SSEROUND field). */
12005 d1->mFx = Ifx_Read;
12006 d1->mAddr = binop(Iop_Add64, mkexpr(addr), mkU64(24));
12007 d1->mSize = 8;
12009 /* declare we're writing guest state */
12010 d1->nFxState = 1;
12011 vex_bzero(&d1->fxState, sizeof(d1->fxState));
12013 d1->fxState[0].fx = Ifx_Write;
12014 d1->fxState[0].offset = OFFB_SSEROUND;
12015 d1->fxState[0].size = sizeof(ULong);
12017 /* Call the helper. This creates SSEROUND but nothing
12018 else. We do the actual register array, XMM[0..15], separately,
12019 in order that any undefinedness in the XMM registers is tracked
12020 separately by Memcheck and is not "infected" by the in-memory
12021 shadow for the other parts of the image. */
12022 stmt( IRStmt_Dirty(d1) );
12024 /* And now the XMMs themselves. For each register, we PUT either
12025 its old value, or the value loaded from memory. One convenient
12026 way to do that is with a conditional load that has its the
12027 default value, the old value of the register. */
12028 for (reg = 0; reg < 16; reg++) {
12029 IRExpr* ea = binop(Iop_Add64, mkexpr(addr), mkU64(160 + reg * 16));
12030 IRExpr* alt = getXMMReg(reg);
12031 IRTemp loadedValue = newTemp(Ity_V128);
12032 stmt( IRStmt_LoadG(Iend_LE,
12033 ILGop_IdentV128,
12034 loadedValue, ea, alt, restore_1e) );
12035 putXMMReg(reg, mkexpr(loadedValue));
12038 /* ------ rfbm[2] gates the AVX state ------ */
12039 /* Component 2 is just a bunch of register loads, so we'll do it
12040 inline, just to be simple and to be Memcheck friendly. */
12042 /* Same scheme as component 0: first zero it out, and then possibly
12043 restore from the memory area. */
12044 IRTemp rfbm_2 = newTemp(Ity_I64);
12045 IRTemp xstate_bv_2 = newTemp(Ity_I64);
12046 IRTemp restore_2 = newTemp(Ity_I64);
12047 assign(rfbm_2, binop(Iop_And64, mkexpr(rfbm), mkU64(4)));
12048 assign(xstate_bv_2, binop(Iop_And64, mkexpr(xstate_bv), mkU64(4)));
12049 assign(restore_2, binop(Iop_And64, mkexpr(rfbm_2), mkexpr(xstate_bv_2)));
12051 IRExpr* rfbm_2e = binop(Iop_CmpNE64, mkexpr(rfbm_2), mkU64(0));
12052 IRExpr* restore_2e = binop(Iop_CmpNE64, mkexpr(restore_2), mkU64(0));
12054 for (reg = 0; reg < 16; reg++) {
12055 putGuarded(ymmGuestRegLane128offset(reg, 1), rfbm_2e, mkV128(0));
12058 for (reg = 0; reg < 16; reg++) {
12059 IRExpr* ea = binop(Iop_Add64, mkexpr(addr), mkU64(576 + reg * 16));
12060 IRExpr* alt = getYMMRegLane128(reg, 1);
12061 IRTemp loadedValue = newTemp(Ity_V128);
12062 stmt( IRStmt_LoadG(Iend_LE,
12063 ILGop_IdentV128,
12064 loadedValue, ea, alt, restore_2e) );
12065 putYMMRegLane128(reg, 1, mkexpr(loadedValue));
12070 static Long dis_XRSTOR ( const VexAbiInfo* vbi,
12071 Prefix pfx, Long delta, Int sz )
12073 /* As with XRSTOR above we ignore the value of REX.W since we're
12074 not bothering with the FPU DP and IP fields. */
12075 IRTemp addr = IRTemp_INVALID;
12076 Int alen = 0;
12077 HChar dis_buf[50];
12078 UChar modrm = getUChar(delta);
12079 vassert(!epartIsReg(modrm)); /* ensured by caller */
12080 vassert(sz == 4 || sz == 8); /* ditto */
12082 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12083 delta += alen;
12084 gen_SEGV_if_not_64_aligned(addr);
12086 DIP("%sxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf);
12088 /* VEX's caller is assumed to have checked this. */
12089 const ULong aSSUMED_XCR0_VALUE = 7;
12091 IRTemp rfbm = newTemp(Ity_I64);
12092 assign(rfbm,
12093 binop(Iop_And64,
12094 binop(Iop_Or64,
12095 binop(Iop_Shl64,
12096 unop(Iop_32Uto64, getIRegRDX(4)), mkU8(32)),
12097 unop(Iop_32Uto64, getIRegRAX(4))),
12098 mkU64(aSSUMED_XCR0_VALUE)));
12100 IRTemp xstate_bv = newTemp(Ity_I64);
12101 assign(xstate_bv, loadLE(Ity_I64,
12102 binop(Iop_Add64, mkexpr(addr), mkU64(512+0))));
12104 IRTemp xcomp_bv = newTemp(Ity_I64);
12105 assign(xcomp_bv, loadLE(Ity_I64,
12106 binop(Iop_Add64, mkexpr(addr), mkU64(512+8))));
12108 IRTemp xsavehdr_23_16 = newTemp(Ity_I64);
12109 assign( xsavehdr_23_16,
12110 loadLE(Ity_I64,
12111 binop(Iop_Add64, mkexpr(addr), mkU64(512+16))));
12113 /* We must fault if
12114 * xcomp_bv[63] == 1, since this simulated CPU does not support
12115 the compaction extension.
12116 * xstate_bv sets a bit outside of XCR0 (which we assume to be 7).
12117 * any of the xsave header bytes 23 .. 8 are nonzero. This seems to
12118 imply that xcomp_bv must be zero.
12119 xcomp_bv is header bytes 15 .. 8 and xstate_bv is header bytes 7 .. 0
12121 IRTemp fault_if_nonzero = newTemp(Ity_I64);
12122 assign(fault_if_nonzero,
12123 binop(Iop_Or64,
12124 binop(Iop_And64, mkexpr(xstate_bv), mkU64(~aSSUMED_XCR0_VALUE)),
12125 binop(Iop_Or64, mkexpr(xcomp_bv), mkexpr(xsavehdr_23_16))));
12126 stmt( IRStmt_Exit(binop(Iop_CmpNE64, mkexpr(fault_if_nonzero), mkU64(0)),
12127 Ijk_SigSEGV,
12128 IRConst_U64(guest_RIP_curr_instr),
12129 OFFB_RIP
12132 /* We are guaranteed now that both xstate_bv and rfbm are in the
12133 range 0 .. 7. Generate the restore sequence proper. */
12134 gen_XRSTOR_SEQUENCE(addr, xstate_bv, rfbm);
12136 return delta;
12140 static Long dis_FXRSTOR ( const VexAbiInfo* vbi,
12141 Prefix pfx, Long delta, Int sz )
12143 /* As with FXSAVE above we ignore the value of REX.W since we're
12144 not bothering with the FPU DP and IP fields. */
12145 IRTemp addr = IRTemp_INVALID;
12146 Int alen = 0;
12147 HChar dis_buf[50];
12148 UChar modrm = getUChar(delta);
12149 vassert(!epartIsReg(modrm)); /* ensured by caller */
12150 vassert(sz == 4 || sz == 8); /* ditto */
12152 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12153 delta += alen;
12154 gen_SEGV_if_not_16_aligned(addr);
12156 DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf);
12158 /* FXRSTOR is just XRSTOR with components 0 and 1 selected and also
12159 as if components 0 and 1 are set as present in XSTATE_BV in the
12160 XSAVE header. Set both rfbm and xstate_bv to 0b011 therefore,
12161 generate the XRSTOR sequence accordingly, and let iropt fold out
12162 the unused (AVX) parts accordingly. */
12163 IRTemp three = newTemp(Ity_I64);
12164 assign(three, mkU64(3));
12165 gen_XRSTOR_SEQUENCE(addr, three/*xstate_bv*/, three/*rfbm*/);
12167 return delta;
12171 static IRTemp math_PINSRW_128 ( IRTemp v128, IRTemp u16, UInt imm8 )
12173 vassert(imm8 >= 0 && imm8 <= 7);
12175 // Create a V128 value which has the selected word in the
12176 // specified lane, and zeroes everywhere else.
12177 IRTemp tmp128 = newTemp(Ity_V128);
12178 IRTemp halfshift = newTemp(Ity_I64);
12179 assign(halfshift, binop(Iop_Shl64,
12180 unop(Iop_16Uto64, mkexpr(u16)),
12181 mkU8(16 * (imm8 & 3))));
12182 if (imm8 < 4) {
12183 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift)));
12184 } else {
12185 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0)));
12188 UShort mask = ~(3 << (imm8 * 2));
12189 IRTemp res = newTemp(Ity_V128);
12190 assign( res, binop(Iop_OrV128,
12191 mkexpr(tmp128),
12192 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) );
12193 return res;
12197 static IRTemp math_PSADBW_128 ( IRTemp dV, IRTemp sV )
12199 IRTemp s1, s0, d1, d0;
12200 s1 = s0 = d1 = d0 = IRTemp_INVALID;
12202 breakupV128to64s( sV, &s1, &s0 );
12203 breakupV128to64s( dV, &d1, &d0 );
12205 IRTemp res = newTemp(Ity_V128);
12206 assign( res,
12207 binop(Iop_64HLtoV128,
12208 mkIRExprCCall(Ity_I64, 0/*regparms*/,
12209 "amd64g_calculate_mmx_psadbw",
12210 &amd64g_calculate_mmx_psadbw,
12211 mkIRExprVec_2( mkexpr(s1), mkexpr(d1))),
12212 mkIRExprCCall(Ity_I64, 0/*regparms*/,
12213 "amd64g_calculate_mmx_psadbw",
12214 &amd64g_calculate_mmx_psadbw,
12215 mkIRExprVec_2( mkexpr(s0), mkexpr(d0)))) );
12216 return res;
12220 static IRTemp math_PSADBW_256 ( IRTemp dV, IRTemp sV )
12222 IRTemp sHi, sLo, dHi, dLo;
12223 sHi = sLo = dHi = dLo = IRTemp_INVALID;
12224 breakupV256toV128s( dV, &dHi, &dLo);
12225 breakupV256toV128s( sV, &sHi, &sLo);
12226 IRTemp res = newTemp(Ity_V256);
12227 assign(res, binop(Iop_V128HLtoV256,
12228 mkexpr(math_PSADBW_128(dHi, sHi)),
12229 mkexpr(math_PSADBW_128(dLo, sLo))));
12230 return res;
12234 static Long dis_MASKMOVDQU ( const VexAbiInfo* vbi, Prefix pfx,
12235 Long delta, Bool isAvx )
12237 IRTemp regD = newTemp(Ity_V128);
12238 IRTemp mask = newTemp(Ity_V128);
12239 IRTemp olddata = newTemp(Ity_V128);
12240 IRTemp newdata = newTemp(Ity_V128);
12241 IRTemp addr = newTemp(Ity_I64);
12242 UChar modrm = getUChar(delta);
12243 UInt rG = gregOfRexRM(pfx,modrm);
12244 UInt rE = eregOfRexRM(pfx,modrm);
12246 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
12247 assign( regD, getXMMReg( rG ));
12249 /* Unfortunately can't do the obvious thing with SarN8x16
12250 here since that can't be re-emitted as SSE2 code - no such
12251 insn. */
12252 assign( mask,
12253 binop(Iop_64HLtoV128,
12254 binop(Iop_SarN8x8,
12255 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ),
12256 mkU8(7) ),
12257 binop(Iop_SarN8x8,
12258 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ),
12259 mkU8(7) ) ));
12260 assign( olddata, loadLE( Ity_V128, mkexpr(addr) ));
12261 assign( newdata, binop(Iop_OrV128,
12262 binop(Iop_AndV128,
12263 mkexpr(regD),
12264 mkexpr(mask) ),
12265 binop(Iop_AndV128,
12266 mkexpr(olddata),
12267 unop(Iop_NotV128, mkexpr(mask)))) );
12268 storeLE( mkexpr(addr), mkexpr(newdata) );
12270 delta += 1;
12271 DIP("%smaskmovdqu %s,%s\n", isAvx ? "v" : "",
12272 nameXMMReg(rE), nameXMMReg(rG) );
12273 return delta;
12277 static Long dis_MOVMSKPS_128 ( const VexAbiInfo* vbi, Prefix pfx,
12278 Long delta, Bool isAvx )
12280 UChar modrm = getUChar(delta);
12281 UInt rG = gregOfRexRM(pfx,modrm);
12282 UInt rE = eregOfRexRM(pfx,modrm);
12283 IRTemp t0 = newTemp(Ity_I32);
12284 IRTemp t1 = newTemp(Ity_I32);
12285 IRTemp t2 = newTemp(Ity_I32);
12286 IRTemp t3 = newTemp(Ity_I32);
12287 delta += 1;
12288 assign( t0, binop( Iop_And32,
12289 binop(Iop_Shr32, getXMMRegLane32(rE,0), mkU8(31)),
12290 mkU32(1) ));
12291 assign( t1, binop( Iop_And32,
12292 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(30)),
12293 mkU32(2) ));
12294 assign( t2, binop( Iop_And32,
12295 binop(Iop_Shr32, getXMMRegLane32(rE,2), mkU8(29)),
12296 mkU32(4) ));
12297 assign( t3, binop( Iop_And32,
12298 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(28)),
12299 mkU32(8) ));
12300 putIReg32( rG, binop(Iop_Or32,
12301 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
12302 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) );
12303 DIP("%smovmskps %s,%s\n", isAvx ? "v" : "",
12304 nameXMMReg(rE), nameIReg32(rG));
12305 return delta;
12309 static Long dis_MOVMSKPS_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
12311 UChar modrm = getUChar(delta);
12312 UInt rG = gregOfRexRM(pfx,modrm);
12313 UInt rE = eregOfRexRM(pfx,modrm);
12314 IRTemp t0 = newTemp(Ity_I32);
12315 IRTemp t1 = newTemp(Ity_I32);
12316 IRTemp t2 = newTemp(Ity_I32);
12317 IRTemp t3 = newTemp(Ity_I32);
12318 IRTemp t4 = newTemp(Ity_I32);
12319 IRTemp t5 = newTemp(Ity_I32);
12320 IRTemp t6 = newTemp(Ity_I32);
12321 IRTemp t7 = newTemp(Ity_I32);
12322 delta += 1;
12323 assign( t0, binop( Iop_And32,
12324 binop(Iop_Shr32, getYMMRegLane32(rE,0), mkU8(31)),
12325 mkU32(1) ));
12326 assign( t1, binop( Iop_And32,
12327 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(30)),
12328 mkU32(2) ));
12329 assign( t2, binop( Iop_And32,
12330 binop(Iop_Shr32, getYMMRegLane32(rE,2), mkU8(29)),
12331 mkU32(4) ));
12332 assign( t3, binop( Iop_And32,
12333 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(28)),
12334 mkU32(8) ));
12335 assign( t4, binop( Iop_And32,
12336 binop(Iop_Shr32, getYMMRegLane32(rE,4), mkU8(27)),
12337 mkU32(16) ));
12338 assign( t5, binop( Iop_And32,
12339 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(26)),
12340 mkU32(32) ));
12341 assign( t6, binop( Iop_And32,
12342 binop(Iop_Shr32, getYMMRegLane32(rE,6), mkU8(25)),
12343 mkU32(64) ));
12344 assign( t7, binop( Iop_And32,
12345 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(24)),
12346 mkU32(128) ));
12347 putIReg32( rG, binop(Iop_Or32,
12348 binop(Iop_Or32,
12349 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
12350 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ),
12351 binop(Iop_Or32,
12352 binop(Iop_Or32, mkexpr(t4), mkexpr(t5)),
12353 binop(Iop_Or32, mkexpr(t6), mkexpr(t7)) ) ) );
12354 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
12355 return delta;
12359 static Long dis_MOVMSKPD_128 ( const VexAbiInfo* vbi, Prefix pfx,
12360 Long delta, Bool isAvx )
12362 UChar modrm = getUChar(delta);
12363 UInt rG = gregOfRexRM(pfx,modrm);
12364 UInt rE = eregOfRexRM(pfx,modrm);
12365 IRTemp t0 = newTemp(Ity_I32);
12366 IRTemp t1 = newTemp(Ity_I32);
12367 delta += 1;
12368 assign( t0, binop( Iop_And32,
12369 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(31)),
12370 mkU32(1) ));
12371 assign( t1, binop( Iop_And32,
12372 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(30)),
12373 mkU32(2) ));
12374 putIReg32( rG, binop(Iop_Or32, mkexpr(t0), mkexpr(t1) ) );
12375 DIP("%smovmskpd %s,%s\n", isAvx ? "v" : "",
12376 nameXMMReg(rE), nameIReg32(rG));
12377 return delta;
12381 static Long dis_MOVMSKPD_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
12383 UChar modrm = getUChar(delta);
12384 UInt rG = gregOfRexRM(pfx,modrm);
12385 UInt rE = eregOfRexRM(pfx,modrm);
12386 IRTemp t0 = newTemp(Ity_I32);
12387 IRTemp t1 = newTemp(Ity_I32);
12388 IRTemp t2 = newTemp(Ity_I32);
12389 IRTemp t3 = newTemp(Ity_I32);
12390 delta += 1;
12391 assign( t0, binop( Iop_And32,
12392 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(31)),
12393 mkU32(1) ));
12394 assign( t1, binop( Iop_And32,
12395 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(30)),
12396 mkU32(2) ));
12397 assign( t2, binop( Iop_And32,
12398 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(29)),
12399 mkU32(4) ));
12400 assign( t3, binop( Iop_And32,
12401 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(28)),
12402 mkU32(8) ));
12403 putIReg32( rG, binop(Iop_Or32,
12404 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
12405 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) );
12406 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
12407 return delta;
12411 /* Note, this also handles SSE(1) insns. */
12412 __attribute__((noinline))
12413 static
12414 Long dis_ESC_0F__SSE2 ( Bool* decode_OK,
12415 const VexArchInfo* archinfo,
12416 const VexAbiInfo* vbi,
12417 Prefix pfx, Int sz, Long deltaIN,
12418 DisResult* dres )
12420 IRTemp addr = IRTemp_INVALID;
12421 IRTemp t0 = IRTemp_INVALID;
12422 IRTemp t1 = IRTemp_INVALID;
12423 IRTemp t2 = IRTemp_INVALID;
12424 IRTemp t3 = IRTemp_INVALID;
12425 IRTemp t4 = IRTemp_INVALID;
12426 IRTemp t5 = IRTemp_INVALID;
12427 IRTemp t6 = IRTemp_INVALID;
12428 UChar modrm = 0;
12429 Int alen = 0;
12430 HChar dis_buf[50];
12432 *decode_OK = False;
12434 Long delta = deltaIN;
12435 UChar opc = getUChar(delta);
12436 delta++;
12437 switch (opc) {
12439 case 0x10:
12440 if (have66noF2noF3(pfx)
12441 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12442 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
12443 modrm = getUChar(delta);
12444 if (epartIsReg(modrm)) {
12445 putXMMReg( gregOfRexRM(pfx,modrm),
12446 getXMMReg( eregOfRexRM(pfx,modrm) ));
12447 DIP("movupd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12448 nameXMMReg(gregOfRexRM(pfx,modrm)));
12449 delta += 1;
12450 } else {
12451 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12452 putXMMReg( gregOfRexRM(pfx,modrm),
12453 loadLE(Ity_V128, mkexpr(addr)) );
12454 DIP("movupd %s,%s\n", dis_buf,
12455 nameXMMReg(gregOfRexRM(pfx,modrm)));
12456 delta += alen;
12458 goto decode_success;
12460 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
12461 G (lo half xmm). If E is mem, upper half of G is zeroed out.
12462 If E is reg, upper half of G is unchanged. */
12463 if (haveF2no66noF3(pfx)
12464 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) ) {
12465 modrm = getUChar(delta);
12466 if (epartIsReg(modrm)) {
12467 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
12468 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
12469 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12470 nameXMMReg(gregOfRexRM(pfx,modrm)));
12471 delta += 1;
12472 } else {
12473 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12474 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
12475 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
12476 loadLE(Ity_I64, mkexpr(addr)) );
12477 DIP("movsd %s,%s\n", dis_buf,
12478 nameXMMReg(gregOfRexRM(pfx,modrm)));
12479 delta += alen;
12481 goto decode_success;
12483 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
12484 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
12485 if (haveF3no66noF2(pfx)
12486 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12487 modrm = getUChar(delta);
12488 if (epartIsReg(modrm)) {
12489 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0,
12490 getXMMRegLane32( eregOfRexRM(pfx,modrm), 0 ));
12491 DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12492 nameXMMReg(gregOfRexRM(pfx,modrm)));
12493 delta += 1;
12494 } else {
12495 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12496 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
12497 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0,
12498 loadLE(Ity_I32, mkexpr(addr)) );
12499 DIP("movss %s,%s\n", dis_buf,
12500 nameXMMReg(gregOfRexRM(pfx,modrm)));
12501 delta += alen;
12503 goto decode_success;
12505 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
12506 if (haveNo66noF2noF3(pfx)
12507 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12508 modrm = getUChar(delta);
12509 if (epartIsReg(modrm)) {
12510 putXMMReg( gregOfRexRM(pfx,modrm),
12511 getXMMReg( eregOfRexRM(pfx,modrm) ));
12512 DIP("movups %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12513 nameXMMReg(gregOfRexRM(pfx,modrm)));
12514 delta += 1;
12515 } else {
12516 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12517 putXMMReg( gregOfRexRM(pfx,modrm),
12518 loadLE(Ity_V128, mkexpr(addr)) );
12519 DIP("movups %s,%s\n", dis_buf,
12520 nameXMMReg(gregOfRexRM(pfx,modrm)));
12521 delta += alen;
12523 goto decode_success;
12525 break;
12527 case 0x11:
12528 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
12529 or lo half xmm). */
12530 if (haveF2no66noF3(pfx)
12531 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12532 modrm = getUChar(delta);
12533 if (epartIsReg(modrm)) {
12534 putXMMRegLane64( eregOfRexRM(pfx,modrm), 0,
12535 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 ));
12536 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12537 nameXMMReg(eregOfRexRM(pfx,modrm)));
12538 delta += 1;
12539 } else {
12540 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12541 storeLE( mkexpr(addr),
12542 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) );
12543 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12544 dis_buf);
12545 delta += alen;
12547 goto decode_success;
12549 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
12550 or lo 1/4 xmm). */
12551 if (haveF3no66noF2(pfx) && sz == 4) {
12552 modrm = getUChar(delta);
12553 if (epartIsReg(modrm)) {
12554 /* fall through, we don't yet have a test case */
12555 } else {
12556 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12557 storeLE( mkexpr(addr),
12558 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) );
12559 DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12560 dis_buf);
12561 delta += alen;
12562 goto decode_success;
12565 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
12566 if (have66noF2noF3(pfx)
12567 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12568 modrm = getUChar(delta);
12569 if (epartIsReg(modrm)) {
12570 putXMMReg( eregOfRexRM(pfx,modrm),
12571 getXMMReg( gregOfRexRM(pfx,modrm) ) );
12572 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12573 nameXMMReg(eregOfRexRM(pfx,modrm)));
12574 delta += 1;
12575 } else {
12576 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12577 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12578 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12579 dis_buf );
12580 delta += alen;
12582 goto decode_success;
12584 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
12585 if (haveNo66noF2noF3(pfx)
12586 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12587 modrm = getUChar(delta);
12588 if (epartIsReg(modrm)) {
12589 /* fall through; awaiting test case */
12590 } else {
12591 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12592 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12593 DIP("movups %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12594 dis_buf );
12595 delta += alen;
12596 goto decode_success;
12599 break;
12601 case 0x12:
12602 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
12603 /* Identical to MOVLPS ? */
12604 if (have66noF2noF3(pfx)
12605 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12606 modrm = getUChar(delta);
12607 if (epartIsReg(modrm)) {
12608 /* fall through; apparently reg-reg is not possible */
12609 } else {
12610 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12611 delta += alen;
12612 putXMMRegLane64( gregOfRexRM(pfx,modrm),
12613 0/*lower lane*/,
12614 loadLE(Ity_I64, mkexpr(addr)) );
12615 DIP("movlpd %s, %s\n",
12616 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) ));
12617 goto decode_success;
12620 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
12621 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
12622 if (haveNo66noF2noF3(pfx)
12623 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12624 modrm = getUChar(delta);
12625 if (epartIsReg(modrm)) {
12626 delta += 1;
12627 putXMMRegLane64( gregOfRexRM(pfx,modrm),
12628 0/*lower lane*/,
12629 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ));
12630 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12631 nameXMMReg(gregOfRexRM(pfx,modrm)));
12632 } else {
12633 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12634 delta += alen;
12635 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0/*lower lane*/,
12636 loadLE(Ity_I64, mkexpr(addr)) );
12637 DIP("movlps %s, %s\n",
12638 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) ));
12640 goto decode_success;
12642 break;
12644 case 0x13:
12645 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
12646 if (haveNo66noF2noF3(pfx)
12647 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12648 modrm = getUChar(delta);
12649 if (!epartIsReg(modrm)) {
12650 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12651 delta += alen;
12652 storeLE( mkexpr(addr),
12653 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12654 0/*lower lane*/ ) );
12655 DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12656 dis_buf);
12657 goto decode_success;
12659 /* else fall through */
12661 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
12662 /* Identical to MOVLPS ? */
12663 if (have66noF2noF3(pfx)
12664 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12665 modrm = getUChar(delta);
12666 if (!epartIsReg(modrm)) {
12667 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12668 delta += alen;
12669 storeLE( mkexpr(addr),
12670 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12671 0/*lower lane*/ ) );
12672 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12673 dis_buf);
12674 goto decode_success;
12676 /* else fall through */
12678 break;
12680 case 0x14:
12681 case 0x15:
12682 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
12683 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
12684 /* These just appear to be special cases of SHUFPS */
12685 if (haveNo66noF2noF3(pfx) && sz == 4) {
12686 Bool hi = toBool(opc == 0x15);
12687 IRTemp sV = newTemp(Ity_V128);
12688 IRTemp dV = newTemp(Ity_V128);
12689 modrm = getUChar(delta);
12690 UInt rG = gregOfRexRM(pfx,modrm);
12691 assign( dV, getXMMReg(rG) );
12692 if (epartIsReg(modrm)) {
12693 UInt rE = eregOfRexRM(pfx,modrm);
12694 assign( sV, getXMMReg(rE) );
12695 delta += 1;
12696 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
12697 nameXMMReg(rE), nameXMMReg(rG));
12698 } else {
12699 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12700 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12701 delta += alen;
12702 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
12703 dis_buf, nameXMMReg(rG));
12705 IRTemp res = math_UNPCKxPS_128( sV, dV, hi );
12706 putXMMReg( rG, mkexpr(res) );
12707 goto decode_success;
12709 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
12710 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
12711 /* These just appear to be special cases of SHUFPS */
12712 if (have66noF2noF3(pfx)
12713 && sz == 2 /* could be 8 if rex also present */) {
12714 Bool hi = toBool(opc == 0x15);
12715 IRTemp sV = newTemp(Ity_V128);
12716 IRTemp dV = newTemp(Ity_V128);
12717 modrm = getUChar(delta);
12718 UInt rG = gregOfRexRM(pfx,modrm);
12719 assign( dV, getXMMReg(rG) );
12720 if (epartIsReg(modrm)) {
12721 UInt rE = eregOfRexRM(pfx,modrm);
12722 assign( sV, getXMMReg(rE) );
12723 delta += 1;
12724 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
12725 nameXMMReg(rE), nameXMMReg(rG));
12726 } else {
12727 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12728 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12729 delta += alen;
12730 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
12731 dis_buf, nameXMMReg(rG));
12733 IRTemp res = math_UNPCKxPD_128( sV, dV, hi );
12734 putXMMReg( rG, mkexpr(res) );
12735 goto decode_success;
12737 break;
12739 case 0x16:
12740 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
12741 /* These seems identical to MOVHPS. This instruction encoding is
12742 completely crazy. */
12743 if (have66noF2noF3(pfx)
12744 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12745 modrm = getUChar(delta);
12746 if (epartIsReg(modrm)) {
12747 /* fall through; apparently reg-reg is not possible */
12748 } else {
12749 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12750 delta += alen;
12751 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
12752 loadLE(Ity_I64, mkexpr(addr)) );
12753 DIP("movhpd %s,%s\n", dis_buf,
12754 nameXMMReg( gregOfRexRM(pfx,modrm) ));
12755 goto decode_success;
12758 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
12759 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
12760 if (haveNo66noF2noF3(pfx)
12761 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12762 modrm = getUChar(delta);
12763 if (epartIsReg(modrm)) {
12764 delta += 1;
12765 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
12766 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ) );
12767 DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12768 nameXMMReg(gregOfRexRM(pfx,modrm)));
12769 } else {
12770 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12771 delta += alen;
12772 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
12773 loadLE(Ity_I64, mkexpr(addr)) );
12774 DIP("movhps %s,%s\n", dis_buf,
12775 nameXMMReg( gregOfRexRM(pfx,modrm) ));
12777 goto decode_success;
12779 break;
12781 case 0x17:
12782 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
12783 if (haveNo66noF2noF3(pfx)
12784 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12785 modrm = getUChar(delta);
12786 if (!epartIsReg(modrm)) {
12787 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12788 delta += alen;
12789 storeLE( mkexpr(addr),
12790 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12791 1/*upper lane*/ ) );
12792 DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12793 dis_buf);
12794 goto decode_success;
12796 /* else fall through */
12798 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
12799 /* Again, this seems identical to MOVHPS. */
12800 if (have66noF2noF3(pfx)
12801 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12802 modrm = getUChar(delta);
12803 if (!epartIsReg(modrm)) {
12804 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12805 delta += alen;
12806 storeLE( mkexpr(addr),
12807 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12808 1/*upper lane*/ ) );
12809 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12810 dis_buf);
12811 goto decode_success;
12813 /* else fall through */
12815 break;
12817 case 0x18:
12818 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
12819 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
12820 /* 0F 18 /2 = PREFETCH1 */
12821 /* 0F 18 /3 = PREFETCH2 */
12822 if (haveNo66noF2noF3(pfx)
12823 && !epartIsReg(getUChar(delta))
12824 && gregLO3ofRM(getUChar(delta)) >= 0
12825 && gregLO3ofRM(getUChar(delta)) <= 3) {
12826 const HChar* hintstr = "??";
12828 modrm = getUChar(delta);
12829 vassert(!epartIsReg(modrm));
12831 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12832 delta += alen;
12834 switch (gregLO3ofRM(modrm)) {
12835 case 0: hintstr = "nta"; break;
12836 case 1: hintstr = "t0"; break;
12837 case 2: hintstr = "t1"; break;
12838 case 3: hintstr = "t2"; break;
12839 default: vassert(0);
12842 DIP("prefetch%s %s\n", hintstr, dis_buf);
12843 goto decode_success;
12845 break;
12847 case 0x28:
12848 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
12849 if (have66noF2noF3(pfx)
12850 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12851 modrm = getUChar(delta);
12852 if (epartIsReg(modrm)) {
12853 putXMMReg( gregOfRexRM(pfx,modrm),
12854 getXMMReg( eregOfRexRM(pfx,modrm) ));
12855 DIP("movapd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12856 nameXMMReg(gregOfRexRM(pfx,modrm)));
12857 delta += 1;
12858 } else {
12859 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12860 gen_SEGV_if_not_16_aligned( addr );
12861 putXMMReg( gregOfRexRM(pfx,modrm),
12862 loadLE(Ity_V128, mkexpr(addr)) );
12863 DIP("movapd %s,%s\n", dis_buf,
12864 nameXMMReg(gregOfRexRM(pfx,modrm)));
12865 delta += alen;
12867 goto decode_success;
12869 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
12870 if (haveNo66noF2noF3(pfx)
12871 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12872 modrm = getUChar(delta);
12873 if (epartIsReg(modrm)) {
12874 putXMMReg( gregOfRexRM(pfx,modrm),
12875 getXMMReg( eregOfRexRM(pfx,modrm) ));
12876 DIP("movaps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12877 nameXMMReg(gregOfRexRM(pfx,modrm)));
12878 delta += 1;
12879 } else {
12880 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12881 gen_SEGV_if_not_16_aligned( addr );
12882 putXMMReg( gregOfRexRM(pfx,modrm),
12883 loadLE(Ity_V128, mkexpr(addr)) );
12884 DIP("movaps %s,%s\n", dis_buf,
12885 nameXMMReg(gregOfRexRM(pfx,modrm)));
12886 delta += alen;
12888 goto decode_success;
12890 break;
12892 case 0x29:
12893 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
12894 if (haveNo66noF2noF3(pfx)
12895 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12896 modrm = getUChar(delta);
12897 if (epartIsReg(modrm)) {
12898 putXMMReg( eregOfRexRM(pfx,modrm),
12899 getXMMReg( gregOfRexRM(pfx,modrm) ));
12900 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12901 nameXMMReg(eregOfRexRM(pfx,modrm)));
12902 delta += 1;
12903 } else {
12904 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12905 gen_SEGV_if_not_16_aligned( addr );
12906 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12907 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12908 dis_buf );
12909 delta += alen;
12911 goto decode_success;
12913 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
12914 if (have66noF2noF3(pfx)
12915 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12916 modrm = getUChar(delta);
12917 if (epartIsReg(modrm)) {
12918 putXMMReg( eregOfRexRM(pfx,modrm),
12919 getXMMReg( gregOfRexRM(pfx,modrm) ) );
12920 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12921 nameXMMReg(eregOfRexRM(pfx,modrm)));
12922 delta += 1;
12923 } else {
12924 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12925 gen_SEGV_if_not_16_aligned( addr );
12926 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12927 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12928 dis_buf );
12929 delta += alen;
12931 goto decode_success;
12933 break;
12935 case 0x2A:
12936 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
12937 half xmm */
12938 if (haveNo66noF2noF3(pfx) && sz == 4) {
12939 IRTemp arg64 = newTemp(Ity_I64);
12940 IRTemp rmode = newTemp(Ity_I32);
12942 modrm = getUChar(delta);
12943 if (epartIsReg(modrm)) {
12944 /* Only switch to MMX mode if the source is a MMX register.
12945 See comments on CVTPI2PD for details. Fixes #357059. */
12946 do_MMX_preamble();
12947 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
12948 delta += 1;
12949 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
12950 nameXMMReg(gregOfRexRM(pfx,modrm)));
12951 } else {
12952 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12953 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
12954 delta += alen;
12955 DIP("cvtpi2ps %s,%s\n", dis_buf,
12956 nameXMMReg(gregOfRexRM(pfx,modrm)) );
12959 assign( rmode, get_sse_roundingmode() );
12961 putXMMRegLane32F(
12962 gregOfRexRM(pfx,modrm), 0,
12963 binop(Iop_F64toF32,
12964 mkexpr(rmode),
12965 unop(Iop_I32StoF64,
12966 unop(Iop_64to32, mkexpr(arg64)) )) );
12968 putXMMRegLane32F(
12969 gregOfRexRM(pfx,modrm), 1,
12970 binop(Iop_F64toF32,
12971 mkexpr(rmode),
12972 unop(Iop_I32StoF64,
12973 unop(Iop_64HIto32, mkexpr(arg64)) )) );
12975 goto decode_success;
12977 /* F3 0F 2A = CVTSI2SS
12978 -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm
12979 -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */
12980 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) {
12981 IRTemp rmode = newTemp(Ity_I32);
12982 assign( rmode, get_sse_roundingmode() );
12983 modrm = getUChar(delta);
12984 if (sz == 4) {
12985 IRTemp arg32 = newTemp(Ity_I32);
12986 if (epartIsReg(modrm)) {
12987 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) );
12988 delta += 1;
12989 DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
12990 nameXMMReg(gregOfRexRM(pfx,modrm)));
12991 } else {
12992 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12993 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
12994 delta += alen;
12995 DIP("cvtsi2ss %s,%s\n", dis_buf,
12996 nameXMMReg(gregOfRexRM(pfx,modrm)) );
12998 putXMMRegLane32F(
12999 gregOfRexRM(pfx,modrm), 0,
13000 binop(Iop_F64toF32,
13001 mkexpr(rmode),
13002 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
13003 } else {
13004 /* sz == 8 */
13005 IRTemp arg64 = newTemp(Ity_I64);
13006 if (epartIsReg(modrm)) {
13007 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) );
13008 delta += 1;
13009 DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
13010 nameXMMReg(gregOfRexRM(pfx,modrm)));
13011 } else {
13012 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13013 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
13014 delta += alen;
13015 DIP("cvtsi2ssq %s,%s\n", dis_buf,
13016 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13018 putXMMRegLane32F(
13019 gregOfRexRM(pfx,modrm), 0,
13020 binop(Iop_F64toF32,
13021 mkexpr(rmode),
13022 binop(Iop_I64StoF64, mkexpr(rmode), mkexpr(arg64)) ) );
13024 goto decode_success;
13026 /* F2 0F 2A = CVTSI2SD
13027 when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm
13028 when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm
13030 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) {
13031 modrm = getUChar(delta);
13032 if (sz == 4) {
13033 IRTemp arg32 = newTemp(Ity_I32);
13034 if (epartIsReg(modrm)) {
13035 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) );
13036 delta += 1;
13037 DIP("cvtsi2sdl %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
13038 nameXMMReg(gregOfRexRM(pfx,modrm)));
13039 } else {
13040 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13041 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
13042 delta += alen;
13043 DIP("cvtsi2sdl %s,%s\n", dis_buf,
13044 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13046 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
13047 unop(Iop_I32StoF64, mkexpr(arg32))
13049 } else {
13050 /* sz == 8 */
13051 IRTemp arg64 = newTemp(Ity_I64);
13052 if (epartIsReg(modrm)) {
13053 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) );
13054 delta += 1;
13055 DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
13056 nameXMMReg(gregOfRexRM(pfx,modrm)));
13057 } else {
13058 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13059 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
13060 delta += alen;
13061 DIP("cvtsi2sdq %s,%s\n", dis_buf,
13062 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13064 putXMMRegLane64F(
13065 gregOfRexRM(pfx,modrm),
13067 binop( Iop_I64StoF64,
13068 get_sse_roundingmode(),
13069 mkexpr(arg64)
13073 goto decode_success;
13075 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
13076 xmm(G) */
13077 if (have66noF2noF3(pfx) && sz == 2) {
13078 IRTemp arg64 = newTemp(Ity_I64);
13080 modrm = getUChar(delta);
13081 if (epartIsReg(modrm)) {
13082 /* Only switch to MMX mode if the source is a MMX register.
13083 This is inconsistent with all other instructions which
13084 convert between XMM and (M64 or MMX), which always switch
13085 to MMX mode even if 64-bit operand is M64 and not MMX. At
13086 least, that's what the Intel docs seem to me to say.
13087 Fixes #210264. */
13088 do_MMX_preamble();
13089 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
13090 delta += 1;
13091 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
13092 nameXMMReg(gregOfRexRM(pfx,modrm)));
13093 } else {
13094 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13095 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
13096 delta += alen;
13097 DIP("cvtpi2pd %s,%s\n", dis_buf,
13098 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13101 putXMMRegLane64F(
13102 gregOfRexRM(pfx,modrm), 0,
13103 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) )
13106 putXMMRegLane64F(
13107 gregOfRexRM(pfx,modrm), 1,
13108 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) )
13111 goto decode_success;
13113 break;
13115 case 0x2B:
13116 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
13117 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
13118 if ( (haveNo66noF2noF3(pfx) && sz == 4)
13119 || (have66noF2noF3(pfx) && sz == 2) ) {
13120 modrm = getUChar(delta);
13121 if (!epartIsReg(modrm)) {
13122 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13123 gen_SEGV_if_not_16_aligned( addr );
13124 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
13125 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
13126 dis_buf,
13127 nameXMMReg(gregOfRexRM(pfx,modrm)));
13128 delta += alen;
13129 goto decode_success;
13131 /* else fall through */
13133 break;
13135 case 0x2C:
13136 case 0x2D:
13137 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
13138 I32 in mmx, according to prevailing SSE rounding mode */
13139 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
13140 I32 in mmx, rounding towards zero */
13141 if (haveNo66noF2noF3(pfx) && sz == 4) {
13142 IRTemp dst64 = newTemp(Ity_I64);
13143 IRTemp rmode = newTemp(Ity_I32);
13144 IRTemp f32lo = newTemp(Ity_F32);
13145 IRTemp f32hi = newTemp(Ity_F32);
13146 Bool r2zero = toBool(opc == 0x2C);
13148 do_MMX_preamble();
13149 modrm = getUChar(delta);
13151 if (epartIsReg(modrm)) {
13152 delta += 1;
13153 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
13154 assign(f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1));
13155 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
13156 nameXMMReg(eregOfRexRM(pfx,modrm)),
13157 nameMMXReg(gregLO3ofRM(modrm)));
13158 } else {
13159 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13160 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
13161 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add64,
13162 mkexpr(addr),
13163 mkU64(4) )));
13164 delta += alen;
13165 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
13166 dis_buf,
13167 nameMMXReg(gregLO3ofRM(modrm)));
13170 if (r2zero) {
13171 assign(rmode, mkU32((UInt)Irrm_ZERO) );
13172 } else {
13173 assign( rmode, get_sse_roundingmode() );
13176 assign(
13177 dst64,
13178 binop( Iop_32HLto64,
13179 binop( Iop_F64toI32S,
13180 mkexpr(rmode),
13181 unop( Iop_F32toF64, mkexpr(f32hi) ) ),
13182 binop( Iop_F64toI32S,
13183 mkexpr(rmode),
13184 unop( Iop_F32toF64, mkexpr(f32lo) ) )
13188 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
13189 goto decode_success;
13191 /* F3 0F 2D = CVTSS2SI
13192 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
13193 according to prevailing SSE rounding mode
13194 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
13195 according to prevailing SSE rounding mode
13197 /* F3 0F 2C = CVTTSS2SI
13198 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
13199 truncating towards zero
13200 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
13201 truncating towards zero
13203 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) {
13204 delta = dis_CVTxSS2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz);
13205 goto decode_success;
13207 /* F2 0F 2D = CVTSD2SI
13208 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
13209 according to prevailing SSE rounding mode
13210 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
13211 according to prevailing SSE rounding mode
13213 /* F2 0F 2C = CVTTSD2SI
13214 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
13215 truncating towards zero
13216 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
13217 truncating towards zero
13219 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) {
13220 delta = dis_CVTxSD2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz);
13221 goto decode_success;
13223 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
13224 I32 in mmx, according to prevailing SSE rounding mode */
13225 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
13226 I32 in mmx, rounding towards zero */
13227 if (have66noF2noF3(pfx) && sz == 2) {
13228 IRTemp dst64 = newTemp(Ity_I64);
13229 IRTemp rmode = newTemp(Ity_I32);
13230 IRTemp f64lo = newTemp(Ity_F64);
13231 IRTemp f64hi = newTemp(Ity_F64);
13232 Bool r2zero = toBool(opc == 0x2C);
13234 do_MMX_preamble();
13235 modrm = getUChar(delta);
13237 if (epartIsReg(modrm)) {
13238 delta += 1;
13239 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
13240 assign(f64hi, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 1));
13241 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "",
13242 nameXMMReg(eregOfRexRM(pfx,modrm)),
13243 nameMMXReg(gregLO3ofRM(modrm)));
13244 } else {
13245 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13246 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
13247 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add64,
13248 mkexpr(addr),
13249 mkU64(8) )));
13250 delta += alen;
13251 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "",
13252 dis_buf,
13253 nameMMXReg(gregLO3ofRM(modrm)));
13256 if (r2zero) {
13257 assign(rmode, mkU32((UInt)Irrm_ZERO) );
13258 } else {
13259 assign( rmode, get_sse_roundingmode() );
13262 assign(
13263 dst64,
13264 binop( Iop_32HLto64,
13265 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ),
13266 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) )
13270 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
13271 goto decode_success;
13273 break;
13275 case 0x2E:
13276 case 0x2F:
13277 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
13278 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
13279 if (have66noF2noF3(pfx) && sz == 2) {
13280 delta = dis_COMISD( vbi, pfx, delta, False/*!isAvx*/, opc );
13281 goto decode_success;
13283 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
13284 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
13285 if (haveNo66noF2noF3(pfx) && sz == 4) {
13286 delta = dis_COMISS( vbi, pfx, delta, False/*!isAvx*/, opc );
13287 goto decode_success;
13289 break;
13291 case 0x50:
13292 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
13293 to 4 lowest bits of ireg(G) */
13294 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
13295 && epartIsReg(getUChar(delta))) {
13296 /* sz == 8 is a kludge to handle insns with REX.W redundantly
13297 set to 1, which has been known to happen:
13299 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d
13301 20071106: Intel docs say that REX.W isn't redundant: when
13302 present, a 64-bit register is written; when not present, only
13303 the 32-bit half is written. However, testing on a Core2
13304 machine suggests the entire 64 bit register is written
13305 irrespective of the status of REX.W. That could be because
13306 of the default rule that says "if the lower half of a 32-bit
13307 register is written, the upper half is zeroed". By using
13308 putIReg32 here we inadvertantly produce the same behaviour as
13309 the Core2, for the same reason -- putIReg32 implements said
13310 rule.
13312 AMD docs give no indication that REX.W is even valid for this
13313 insn. */
13314 delta = dis_MOVMSKPS_128( vbi, pfx, delta, False/*!isAvx*/ );
13315 goto decode_success;
13317 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
13318 2 lowest bits of ireg(G) */
13319 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) {
13320 /* sz == 8 is a kludge to handle insns with REX.W redundantly
13321 set to 1, which has been known to happen:
13322 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d
13323 20071106: see further comments on MOVMSKPS implementation above.
13325 delta = dis_MOVMSKPD_128( vbi, pfx, delta, False/*!isAvx*/ );
13326 goto decode_success;
13328 break;
13330 case 0x51:
13331 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
13332 if (haveF3no66noF2(pfx) && sz == 4) {
13333 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
13334 "sqrtss", Iop_Sqrt32F0x4 );
13335 goto decode_success;
13337 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
13338 if (haveNo66noF2noF3(pfx) && sz == 4) {
13339 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
13340 "sqrtps", Iop_Sqrt32Fx4 );
13341 goto decode_success;
13343 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
13344 if (haveF2no66noF3(pfx) && sz == 4) {
13345 delta = dis_SSE_E_to_G_unary_lo64( vbi, pfx, delta,
13346 "sqrtsd", Iop_Sqrt64F0x2 );
13347 goto decode_success;
13349 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
13350 if (have66noF2noF3(pfx) && sz == 2) {
13351 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
13352 "sqrtpd", Iop_Sqrt64Fx2 );
13353 goto decode_success;
13355 break;
13357 case 0x52:
13358 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
13359 if (haveF3no66noF2(pfx) && sz == 4) {
13360 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
13361 "rsqrtss", Iop_RSqrtEst32F0x4 );
13362 goto decode_success;
13364 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
13365 if (haveNo66noF2noF3(pfx) && sz == 4) {
13366 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
13367 "rsqrtps", Iop_RSqrtEst32Fx4 );
13368 goto decode_success;
13370 break;
13372 case 0x53:
13373 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
13374 if (haveF3no66noF2(pfx) && sz == 4) {
13375 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
13376 "rcpss", Iop_RecipEst32F0x4 );
13377 goto decode_success;
13379 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
13380 if (haveNo66noF2noF3(pfx) && sz == 4) {
13381 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
13382 "rcpps", Iop_RecipEst32Fx4 );
13383 goto decode_success;
13385 break;
13387 case 0x54:
13388 /* 0F 54 = ANDPS -- G = G and E */
13389 if (haveNo66noF2noF3(pfx) && sz == 4) {
13390 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andps", Iop_AndV128 );
13391 goto decode_success;
13393 /* 66 0F 54 = ANDPD -- G = G and E */
13394 if (have66noF2noF3(pfx) && sz == 2) {
13395 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andpd", Iop_AndV128 );
13396 goto decode_success;
13398 break;
13400 case 0x55:
13401 /* 0F 55 = ANDNPS -- G = (not G) and E */
13402 if (haveNo66noF2noF3(pfx) && sz == 4) {
13403 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnps",
13404 Iop_AndV128 );
13405 goto decode_success;
13407 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
13408 if (have66noF2noF3(pfx) && sz == 2) {
13409 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnpd",
13410 Iop_AndV128 );
13411 goto decode_success;
13413 break;
13415 case 0x56:
13416 /* 0F 56 = ORPS -- G = G and E */
13417 if (haveNo66noF2noF3(pfx) && sz == 4) {
13418 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orps", Iop_OrV128 );
13419 goto decode_success;
13421 /* 66 0F 56 = ORPD -- G = G and E */
13422 if (have66noF2noF3(pfx) && sz == 2) {
13423 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orpd", Iop_OrV128 );
13424 goto decode_success;
13426 break;
13428 case 0x57:
13429 /* 66 0F 57 = XORPD -- G = G xor E */
13430 if (have66noF2noF3(pfx) && sz == 2) {
13431 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorpd", Iop_XorV128 );
13432 goto decode_success;
13434 /* 0F 57 = XORPS -- G = G xor E */
13435 if (haveNo66noF2noF3(pfx) && sz == 4) {
13436 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorps", Iop_XorV128 );
13437 goto decode_success;
13439 break;
13441 case 0x58:
13442 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
13443 if (haveNo66noF2noF3(pfx) && sz == 4) {
13444 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addps", Iop_Add32Fx4 );
13445 goto decode_success;
13447 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
13448 if (haveF3no66noF2(pfx) && sz == 4) {
13449 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "addss", Iop_Add32F0x4 );
13450 goto decode_success;
13452 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
13453 if (haveF2no66noF3(pfx)
13454 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13455 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "addsd", Iop_Add64F0x2 );
13456 goto decode_success;
13458 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
13459 if (have66noF2noF3(pfx)
13460 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13461 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addpd", Iop_Add64Fx2 );
13462 goto decode_success;
13464 break;
13466 case 0x59:
13467 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
13468 if (haveF2no66noF3(pfx)
13469 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13470 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "mulsd", Iop_Mul64F0x2 );
13471 goto decode_success;
13473 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
13474 if (haveF3no66noF2(pfx) && sz == 4) {
13475 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "mulss", Iop_Mul32F0x4 );
13476 goto decode_success;
13478 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
13479 if (haveNo66noF2noF3(pfx) && sz == 4) {
13480 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulps", Iop_Mul32Fx4 );
13481 goto decode_success;
13483 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
13484 if (have66noF2noF3(pfx)
13485 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13486 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulpd", Iop_Mul64Fx2 );
13487 goto decode_success;
13489 break;
13491 case 0x5A:
13492 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
13493 F64 in xmm(G). */
13494 if (haveNo66noF2noF3(pfx)
13495 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13496 delta = dis_CVTPS2PD_128( vbi, pfx, delta, False/*!isAvx*/ );
13497 goto decode_success;
13499 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
13500 low half xmm(G) */
13501 if (haveF3no66noF2(pfx) && sz == 4) {
13502 IRTemp f32lo = newTemp(Ity_F32);
13504 modrm = getUChar(delta);
13505 if (epartIsReg(modrm)) {
13506 delta += 1;
13507 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
13508 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13509 nameXMMReg(gregOfRexRM(pfx,modrm)));
13510 } else {
13511 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13512 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
13513 delta += alen;
13514 DIP("cvtss2sd %s,%s\n", dis_buf,
13515 nameXMMReg(gregOfRexRM(pfx,modrm)));
13518 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
13519 unop( Iop_F32toF64, mkexpr(f32lo) ) );
13521 goto decode_success;
13523 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
13524 low 1/4 xmm(G), according to prevailing SSE rounding mode */
13525 if (haveF2no66noF3(pfx) && sz == 4) {
13526 IRTemp rmode = newTemp(Ity_I32);
13527 IRTemp f64lo = newTemp(Ity_F64);
13529 modrm = getUChar(delta);
13530 if (epartIsReg(modrm)) {
13531 delta += 1;
13532 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
13533 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13534 nameXMMReg(gregOfRexRM(pfx,modrm)));
13535 } else {
13536 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13537 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
13538 delta += alen;
13539 DIP("cvtsd2ss %s,%s\n", dis_buf,
13540 nameXMMReg(gregOfRexRM(pfx,modrm)));
13543 assign( rmode, get_sse_roundingmode() );
13544 putXMMRegLane32F(
13545 gregOfRexRM(pfx,modrm), 0,
13546 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) )
13549 goto decode_success;
13551 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
13552 lo half xmm(G), rounding according to prevailing SSE rounding
13553 mode, and zero upper half */
13554 /* Note, this is practically identical to CVTPD2DQ. It would have
13555 be nice to merge them together. */
13556 if (have66noF2noF3(pfx) && sz == 2) {
13557 delta = dis_CVTPD2PS_128( vbi, pfx, delta, False/*!isAvx*/ );
13558 goto decode_success;
13560 break;
13562 case 0x5B:
13563 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
13564 xmm(G), rounding towards zero */
13565 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
13566 xmm(G), as per the prevailing rounding mode */
13567 if ( (have66noF2noF3(pfx) && sz == 2)
13568 || (haveF3no66noF2(pfx) && sz == 4) ) {
13569 Bool r2zero = toBool(sz == 4); // FIXME -- unreliable (???)
13570 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta, False/*!isAvx*/, r2zero );
13571 goto decode_success;
13573 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
13574 xmm(G) */
13575 if (haveNo66noF2noF3(pfx) && sz == 4) {
13576 delta = dis_CVTDQ2PS_128( vbi, pfx, delta, False/*!isAvx*/ );
13577 goto decode_success;
13579 break;
13581 case 0x5C:
13582 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
13583 if (haveF3no66noF2(pfx) && sz == 4) {
13584 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "subss", Iop_Sub32F0x4 );
13585 goto decode_success;
13587 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
13588 if (haveF2no66noF3(pfx)
13589 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13590 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "subsd", Iop_Sub64F0x2 );
13591 goto decode_success;
13593 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
13594 if (haveNo66noF2noF3(pfx) && sz == 4) {
13595 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subps", Iop_Sub32Fx4 );
13596 goto decode_success;
13598 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
13599 if (have66noF2noF3(pfx) && sz == 2) {
13600 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subpd", Iop_Sub64Fx2 );
13601 goto decode_success;
13603 break;
13605 case 0x5D:
13606 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
13607 if (haveNo66noF2noF3(pfx) && sz == 4) {
13608 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minps", Iop_Min32Fx4 );
13609 goto decode_success;
13611 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
13612 if (haveF3no66noF2(pfx) && sz == 4) {
13613 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "minss", Iop_Min32F0x4 );
13614 goto decode_success;
13616 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
13617 if (haveF2no66noF3(pfx)
13618 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13619 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "minsd", Iop_Min64F0x2 );
13620 goto decode_success;
13622 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
13623 if (have66noF2noF3(pfx) && sz == 2) {
13624 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minpd", Iop_Min64Fx2 );
13625 goto decode_success;
13627 break;
13629 case 0x5E:
13630 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
13631 if (haveF2no66noF3(pfx) && sz == 4) {
13632 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "divsd", Iop_Div64F0x2 );
13633 goto decode_success;
13635 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
13636 if (haveNo66noF2noF3(pfx) && sz == 4) {
13637 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divps", Iop_Div32Fx4 );
13638 goto decode_success;
13640 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
13641 if (haveF3no66noF2(pfx) && sz == 4) {
13642 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "divss", Iop_Div32F0x4 );
13643 goto decode_success;
13645 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
13646 if (have66noF2noF3(pfx) && sz == 2) {
13647 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divpd", Iop_Div64Fx2 );
13648 goto decode_success;
13650 break;
13652 case 0x5F:
13653 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
13654 if (haveNo66noF2noF3(pfx) && sz == 4) {
13655 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxps", Iop_Max32Fx4 );
13656 goto decode_success;
13658 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
13659 if (haveF3no66noF2(pfx) && sz == 4) {
13660 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "maxss", Iop_Max32F0x4 );
13661 goto decode_success;
13663 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
13664 if (haveF2no66noF3(pfx)
13665 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13666 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "maxsd", Iop_Max64F0x2 );
13667 goto decode_success;
13669 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
13670 if (have66noF2noF3(pfx) && sz == 2) {
13671 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxpd", Iop_Max64Fx2 );
13672 goto decode_success;
13674 break;
13676 case 0x60:
13677 /* 66 0F 60 = PUNPCKLBW */
13678 if (have66noF2noF3(pfx) && sz == 2) {
13679 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13680 "punpcklbw",
13681 Iop_InterleaveLO8x16, True );
13682 goto decode_success;
13684 break;
13686 case 0x61:
13687 /* 66 0F 61 = PUNPCKLWD */
13688 if (have66noF2noF3(pfx) && sz == 2) {
13689 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13690 "punpcklwd",
13691 Iop_InterleaveLO16x8, True );
13692 goto decode_success;
13694 break;
13696 case 0x62:
13697 /* 66 0F 62 = PUNPCKLDQ */
13698 if (have66noF2noF3(pfx) && sz == 2) {
13699 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13700 "punpckldq",
13701 Iop_InterleaveLO32x4, True );
13702 goto decode_success;
13704 break;
13706 case 0x63:
13707 /* 66 0F 63 = PACKSSWB */
13708 if (have66noF2noF3(pfx) && sz == 2) {
13709 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13710 "packsswb",
13711 Iop_QNarrowBin16Sto8Sx16, True );
13712 goto decode_success;
13714 break;
13716 case 0x64:
13717 /* 66 0F 64 = PCMPGTB */
13718 if (have66noF2noF3(pfx) && sz == 2) {
13719 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13720 "pcmpgtb", Iop_CmpGT8Sx16, False );
13721 goto decode_success;
13723 break;
13725 case 0x65:
13726 /* 66 0F 65 = PCMPGTW */
13727 if (have66noF2noF3(pfx) && sz == 2) {
13728 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13729 "pcmpgtw", Iop_CmpGT16Sx8, False );
13730 goto decode_success;
13732 break;
13734 case 0x66:
13735 /* 66 0F 66 = PCMPGTD */
13736 if (have66noF2noF3(pfx) && sz == 2) {
13737 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13738 "pcmpgtd", Iop_CmpGT32Sx4, False );
13739 goto decode_success;
13741 break;
13743 case 0x67:
13744 /* 66 0F 67 = PACKUSWB */
13745 if (have66noF2noF3(pfx) && sz == 2) {
13746 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13747 "packuswb",
13748 Iop_QNarrowBin16Sto8Ux16, True );
13749 goto decode_success;
13751 break;
13753 case 0x68:
13754 /* 66 0F 68 = PUNPCKHBW */
13755 if (have66noF2noF3(pfx) && sz == 2) {
13756 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13757 "punpckhbw",
13758 Iop_InterleaveHI8x16, True );
13759 goto decode_success;
13761 break;
13763 case 0x69:
13764 /* 66 0F 69 = PUNPCKHWD */
13765 if (have66noF2noF3(pfx) && sz == 2) {
13766 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13767 "punpckhwd",
13768 Iop_InterleaveHI16x8, True );
13769 goto decode_success;
13771 break;
13773 case 0x6A:
13774 /* 66 0F 6A = PUNPCKHDQ */
13775 if (have66noF2noF3(pfx) && sz == 2) {
13776 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13777 "punpckhdq",
13778 Iop_InterleaveHI32x4, True );
13779 goto decode_success;
13781 break;
13783 case 0x6B:
13784 /* 66 0F 6B = PACKSSDW */
13785 if (have66noF2noF3(pfx) && sz == 2) {
13786 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13787 "packssdw",
13788 Iop_QNarrowBin32Sto16Sx8, True );
13789 goto decode_success;
13791 break;
13793 case 0x6C:
13794 /* 66 0F 6C = PUNPCKLQDQ */
13795 if (have66noF2noF3(pfx) && sz == 2) {
13796 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13797 "punpcklqdq",
13798 Iop_InterleaveLO64x2, True );
13799 goto decode_success;
13801 break;
13803 case 0x6D:
13804 /* 66 0F 6D = PUNPCKHQDQ */
13805 if (have66noF2noF3(pfx) && sz == 2) {
13806 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13807 "punpckhqdq",
13808 Iop_InterleaveHI64x2, True );
13809 goto decode_success;
13811 break;
13813 case 0x6E:
13814 /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4,
13815 zeroing high 3/4 of xmm. */
13816 /* or from ireg64/m64 to xmm lo 1/2,
13817 zeroing high 1/2 of xmm. */
13818 if (have66noF2noF3(pfx)) {
13819 vassert(sz == 2 || sz == 8);
13820 if (sz == 2) sz = 4;
13821 modrm = getUChar(delta);
13822 if (epartIsReg(modrm)) {
13823 delta += 1;
13824 if (sz == 4) {
13825 putXMMReg(
13826 gregOfRexRM(pfx,modrm),
13827 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) )
13829 DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
13830 nameXMMReg(gregOfRexRM(pfx,modrm)));
13831 } else {
13832 putXMMReg(
13833 gregOfRexRM(pfx,modrm),
13834 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) )
13836 DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
13837 nameXMMReg(gregOfRexRM(pfx,modrm)));
13839 } else {
13840 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
13841 delta += alen;
13842 putXMMReg(
13843 gregOfRexRM(pfx,modrm),
13844 sz == 4
13845 ? unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) )
13846 : unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)) )
13848 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', dis_buf,
13849 nameXMMReg(gregOfRexRM(pfx,modrm)));
13851 goto decode_success;
13853 break;
13855 case 0x6F:
13856 if (have66noF2noF3(pfx)
13857 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13858 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
13859 modrm = getUChar(delta);
13860 if (epartIsReg(modrm)) {
13861 putXMMReg( gregOfRexRM(pfx,modrm),
13862 getXMMReg( eregOfRexRM(pfx,modrm) ));
13863 DIP("movdqa %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13864 nameXMMReg(gregOfRexRM(pfx,modrm)));
13865 delta += 1;
13866 } else {
13867 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13868 gen_SEGV_if_not_16_aligned( addr );
13869 putXMMReg( gregOfRexRM(pfx,modrm),
13870 loadLE(Ity_V128, mkexpr(addr)) );
13871 DIP("movdqa %s,%s\n", dis_buf,
13872 nameXMMReg(gregOfRexRM(pfx,modrm)));
13873 delta += alen;
13875 goto decode_success;
13877 if (haveF3no66noF2(pfx) && sz == 4) {
13878 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
13879 modrm = getUChar(delta);
13880 if (epartIsReg(modrm)) {
13881 putXMMReg( gregOfRexRM(pfx,modrm),
13882 getXMMReg( eregOfRexRM(pfx,modrm) ));
13883 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13884 nameXMMReg(gregOfRexRM(pfx,modrm)));
13885 delta += 1;
13886 } else {
13887 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13888 putXMMReg( gregOfRexRM(pfx,modrm),
13889 loadLE(Ity_V128, mkexpr(addr)) );
13890 DIP("movdqu %s,%s\n", dis_buf,
13891 nameXMMReg(gregOfRexRM(pfx,modrm)));
13892 delta += alen;
13894 goto decode_success;
13896 break;
13898 case 0x70:
13899 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
13900 if (have66noF2noF3(pfx) && sz == 2) {
13901 delta = dis_PSHUFD_32x4( vbi, pfx, delta, False/*!writesYmm*/);
13902 goto decode_success;
13904 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13905 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
13906 if (haveNo66noF2noF3(pfx) && sz == 4) {
13907 Int order;
13908 IRTemp sV, dV, s3, s2, s1, s0;
13909 s3 = s2 = s1 = s0 = IRTemp_INVALID;
13910 sV = newTemp(Ity_I64);
13911 dV = newTemp(Ity_I64);
13912 do_MMX_preamble();
13913 modrm = getUChar(delta);
13914 if (epartIsReg(modrm)) {
13915 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
13916 order = (Int)getUChar(delta+1);
13917 delta += 1+1;
13918 DIP("pshufw $%d,%s,%s\n", order,
13919 nameMMXReg(eregLO3ofRM(modrm)),
13920 nameMMXReg(gregLO3ofRM(modrm)));
13921 } else {
13922 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
13923 1/*extra byte after amode*/ );
13924 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
13925 order = (Int)getUChar(delta+alen);
13926 delta += 1+alen;
13927 DIP("pshufw $%d,%s,%s\n", order,
13928 dis_buf,
13929 nameMMXReg(gregLO3ofRM(modrm)));
13931 breakup64to16s( sV, &s3, &s2, &s1, &s0 );
13932 # define SEL(n) \
13933 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
13934 assign(dV,
13935 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
13936 SEL((order>>2)&3), SEL((order>>0)&3) )
13938 putMMXReg(gregLO3ofRM(modrm), mkexpr(dV));
13939 # undef SEL
13940 goto decode_success;
13942 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
13943 mem) to G(xmm), and copy upper half */
13944 if (haveF2no66noF3(pfx) && sz == 4) {
13945 delta = dis_PSHUFxW_128( vbi, pfx, delta,
13946 False/*!isAvx*/, False/*!xIsH*/ );
13947 goto decode_success;
13949 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
13950 mem) to G(xmm), and copy lower half */
13951 if (haveF3no66noF2(pfx) && sz == 4) {
13952 delta = dis_PSHUFxW_128( vbi, pfx, delta,
13953 False/*!isAvx*/, True/*xIsH*/ );
13954 goto decode_success;
13956 break;
13958 case 0x71:
13959 /* 66 0F 71 /2 ib = PSRLW by immediate */
13960 if (have66noF2noF3(pfx) && sz == 2
13961 && epartIsReg(getUChar(delta))
13962 && gregLO3ofRM(getUChar(delta)) == 2) {
13963 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlw", Iop_ShrN16x8 );
13964 goto decode_success;
13966 /* 66 0F 71 /4 ib = PSRAW by immediate */
13967 if (have66noF2noF3(pfx) && sz == 2
13968 && epartIsReg(getUChar(delta))
13969 && gregLO3ofRM(getUChar(delta)) == 4) {
13970 delta = dis_SSE_shiftE_imm( pfx, delta, "psraw", Iop_SarN16x8 );
13971 goto decode_success;
13973 /* 66 0F 71 /6 ib = PSLLW by immediate */
13974 if (have66noF2noF3(pfx) && sz == 2
13975 && epartIsReg(getUChar(delta))
13976 && gregLO3ofRM(getUChar(delta)) == 6) {
13977 delta = dis_SSE_shiftE_imm( pfx, delta, "psllw", Iop_ShlN16x8 );
13978 goto decode_success;
13980 break;
13982 case 0x72:
13983 /* 66 0F 72 /2 ib = PSRLD by immediate */
13984 if (have66noF2noF3(pfx) && sz == 2
13985 && epartIsReg(getUChar(delta))
13986 && gregLO3ofRM(getUChar(delta)) == 2) {
13987 delta = dis_SSE_shiftE_imm( pfx, delta, "psrld", Iop_ShrN32x4 );
13988 goto decode_success;
13990 /* 66 0F 72 /4 ib = PSRAD by immediate */
13991 if (have66noF2noF3(pfx) && sz == 2
13992 && epartIsReg(getUChar(delta))
13993 && gregLO3ofRM(getUChar(delta)) == 4) {
13994 delta = dis_SSE_shiftE_imm( pfx, delta, "psrad", Iop_SarN32x4 );
13995 goto decode_success;
13997 /* 66 0F 72 /6 ib = PSLLD by immediate */
13998 if (have66noF2noF3(pfx) && sz == 2
13999 && epartIsReg(getUChar(delta))
14000 && gregLO3ofRM(getUChar(delta)) == 6) {
14001 delta = dis_SSE_shiftE_imm( pfx, delta, "pslld", Iop_ShlN32x4 );
14002 goto decode_success;
14004 break;
14006 case 0x73:
14007 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
14008 /* note, if mem case ever filled in, 1 byte after amode */
14009 if (have66noF2noF3(pfx) && sz == 2
14010 && epartIsReg(getUChar(delta))
14011 && gregLO3ofRM(getUChar(delta)) == 3) {
14012 Int imm = (Int)getUChar(delta+1);
14013 Int reg = eregOfRexRM(pfx,getUChar(delta));
14014 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg));
14015 delta += 2;
14016 IRTemp sV = newTemp(Ity_V128);
14017 assign( sV, getXMMReg(reg) );
14018 putXMMReg(reg, mkexpr(math_PSRLDQ( sV, imm )));
14019 goto decode_success;
14021 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
14022 /* note, if mem case ever filled in, 1 byte after amode */
14023 if (have66noF2noF3(pfx) && sz == 2
14024 && epartIsReg(getUChar(delta))
14025 && gregLO3ofRM(getUChar(delta)) == 7) {
14026 Int imm = (Int)getUChar(delta+1);
14027 Int reg = eregOfRexRM(pfx,getUChar(delta));
14028 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg));
14029 vassert(imm >= 0 && imm <= 255);
14030 delta += 2;
14031 IRTemp sV = newTemp(Ity_V128);
14032 assign( sV, getXMMReg(reg) );
14033 putXMMReg(reg, mkexpr(math_PSLLDQ( sV, imm )));
14034 goto decode_success;
14036 /* 66 0F 73 /2 ib = PSRLQ by immediate */
14037 if (have66noF2noF3(pfx) && sz == 2
14038 && epartIsReg(getUChar(delta))
14039 && gregLO3ofRM(getUChar(delta)) == 2) {
14040 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlq", Iop_ShrN64x2 );
14041 goto decode_success;
14043 /* 66 0F 73 /6 ib = PSLLQ by immediate */
14044 if (have66noF2noF3(pfx) && sz == 2
14045 && epartIsReg(getUChar(delta))
14046 && gregLO3ofRM(getUChar(delta)) == 6) {
14047 delta = dis_SSE_shiftE_imm( pfx, delta, "psllq", Iop_ShlN64x2 );
14048 goto decode_success;
14050 break;
14052 case 0x74:
14053 /* 66 0F 74 = PCMPEQB */
14054 if (have66noF2noF3(pfx) && sz == 2) {
14055 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14056 "pcmpeqb", Iop_CmpEQ8x16, False );
14057 goto decode_success;
14059 break;
14061 case 0x75:
14062 /* 66 0F 75 = PCMPEQW */
14063 if (have66noF2noF3(pfx) && sz == 2) {
14064 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14065 "pcmpeqw", Iop_CmpEQ16x8, False );
14066 goto decode_success;
14068 break;
14070 case 0x76:
14071 /* 66 0F 76 = PCMPEQD */
14072 if (have66noF2noF3(pfx) && sz == 2) {
14073 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14074 "pcmpeqd", Iop_CmpEQ32x4, False );
14075 goto decode_success;
14077 break;
14079 case 0x7E:
14080 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
14081 G (lo half xmm). Upper half of G is zeroed out. */
14082 if (haveF3no66noF2(pfx)
14083 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
14084 modrm = getUChar(delta);
14085 if (epartIsReg(modrm)) {
14086 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
14087 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
14088 /* zero bits 127:64 */
14089 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkU64(0) );
14090 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
14091 nameXMMReg(gregOfRexRM(pfx,modrm)));
14092 delta += 1;
14093 } else {
14094 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14095 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
14096 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
14097 loadLE(Ity_I64, mkexpr(addr)) );
14098 DIP("movsd %s,%s\n", dis_buf,
14099 nameXMMReg(gregOfRexRM(pfx,modrm)));
14100 delta += alen;
14102 goto decode_success;
14104 /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */
14105 /* or from xmm low 1/2 to ireg64 or m64. */
14106 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) {
14107 if (sz == 2) sz = 4;
14108 modrm = getUChar(delta);
14109 if (epartIsReg(modrm)) {
14110 delta += 1;
14111 if (sz == 4) {
14112 putIReg32( eregOfRexRM(pfx,modrm),
14113 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) );
14114 DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
14115 nameIReg32(eregOfRexRM(pfx,modrm)));
14116 } else {
14117 putIReg64( eregOfRexRM(pfx,modrm),
14118 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) );
14119 DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
14120 nameIReg64(eregOfRexRM(pfx,modrm)));
14122 } else {
14123 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
14124 delta += alen;
14125 storeLE( mkexpr(addr),
14126 sz == 4
14127 ? getXMMRegLane32(gregOfRexRM(pfx,modrm),0)
14128 : getXMMRegLane64(gregOfRexRM(pfx,modrm),0) );
14129 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q',
14130 nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
14132 goto decode_success;
14134 break;
14136 case 0x7F:
14137 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
14138 if (haveF3no66noF2(pfx) && sz == 4) {
14139 modrm = getUChar(delta);
14140 if (epartIsReg(modrm)) {
14141 goto decode_failure; /* awaiting test case */
14142 delta += 1;
14143 putXMMReg( eregOfRexRM(pfx,modrm),
14144 getXMMReg(gregOfRexRM(pfx,modrm)) );
14145 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
14146 nameXMMReg(eregOfRexRM(pfx,modrm)));
14147 } else {
14148 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
14149 delta += alen;
14150 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
14151 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
14153 goto decode_success;
14155 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
14156 if (have66noF2noF3(pfx) && sz == 2) {
14157 modrm = getUChar(delta);
14158 if (epartIsReg(modrm)) {
14159 delta += 1;
14160 putXMMReg( eregOfRexRM(pfx,modrm),
14161 getXMMReg(gregOfRexRM(pfx,modrm)) );
14162 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
14163 nameXMMReg(eregOfRexRM(pfx,modrm)));
14164 } else {
14165 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
14166 gen_SEGV_if_not_16_aligned( addr );
14167 delta += alen;
14168 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
14169 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
14171 goto decode_success;
14173 break;
14175 case 0xAE:
14176 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
14177 if (haveNo66noF2noF3(pfx)
14178 && epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7
14179 && sz == 4) {
14180 delta += 1;
14181 /* Insert a memory fence. It's sometimes important that these
14182 are carried through to the generated code. */
14183 stmt( IRStmt_MBE(Imbe_Fence) );
14184 DIP("sfence\n");
14185 goto decode_success;
14187 /* mindless duplication follows .. */
14188 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
14189 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
14190 if (haveNo66noF2noF3(pfx)
14191 && epartIsReg(getUChar(delta))
14192 && (gregLO3ofRM(getUChar(delta)) == 5
14193 || gregLO3ofRM(getUChar(delta)) == 6)
14194 && sz == 4) {
14195 delta += 1;
14196 /* Insert a memory fence. It's sometimes important that these
14197 are carried through to the generated code. */
14198 stmt( IRStmt_MBE(Imbe_Fence) );
14199 DIP("%sfence\n", gregLO3ofRM(getUChar(delta-1))==5 ? "l" : "m");
14200 goto decode_success;
14203 /* 0F AE /7 = CLFLUSH -- flush cache line */
14204 if (haveNo66noF2noF3(pfx)
14205 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7
14206 && sz == 4) {
14208 /* This is something of a hack. We need to know the size of
14209 the cache line containing addr. Since we don't (easily),
14210 assume 256 on the basis that no real cache would have a
14211 line that big. It's safe to invalidate more stuff than we
14212 need, just inefficient. */
14213 ULong lineszB = 256ULL;
14215 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14216 delta += alen;
14218 /* Round addr down to the start of the containing block. */
14219 stmt( IRStmt_Put(
14220 OFFB_CMSTART,
14221 binop( Iop_And64,
14222 mkexpr(addr),
14223 mkU64( ~(lineszB-1) ))) );
14225 stmt( IRStmt_Put(OFFB_CMLEN, mkU64(lineszB) ) );
14227 jmp_lit(dres, Ijk_InvalICache, (Addr64)(guest_RIP_bbstart+delta));
14229 DIP("clflush %s\n", dis_buf);
14230 goto decode_success;
14233 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
14234 if (haveNo66noF2noF3(pfx)
14235 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3
14236 && sz == 4) {
14237 delta = dis_STMXCSR(vbi, pfx, delta, False/*!isAvx*/);
14238 goto decode_success;
14240 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
14241 if (haveNo66noF2noF3(pfx)
14242 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2
14243 && sz == 4) {
14244 delta = dis_LDMXCSR(vbi, pfx, delta, False/*!isAvx*/);
14245 goto decode_success;
14247 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */
14248 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
14249 && !epartIsReg(getUChar(delta))
14250 && gregOfRexRM(pfx,getUChar(delta)) == 0) {
14251 delta = dis_FXSAVE(vbi, pfx, delta, sz);
14252 goto decode_success;
14254 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */
14255 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
14256 && !epartIsReg(getUChar(delta))
14257 && gregOfRexRM(pfx,getUChar(delta)) == 1) {
14258 delta = dis_FXRSTOR(vbi, pfx, delta, sz);
14259 goto decode_success;
14261 /* 0F AE /4 = XSAVE mem -- write x87, SSE, AVX state to memory */
14262 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
14263 && !epartIsReg(getUChar(delta))
14264 && gregOfRexRM(pfx,getUChar(delta)) == 4
14265 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
14266 delta = dis_XSAVE(vbi, pfx, delta, sz);
14267 goto decode_success;
14269 /* 0F AE /5 = XRSTOR mem -- read x87, SSE, AVX state from memory */
14270 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
14271 && !epartIsReg(getUChar(delta))
14272 && gregOfRexRM(pfx,getUChar(delta)) == 5
14273 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
14274 delta = dis_XRSTOR(vbi, pfx, delta, sz);
14275 goto decode_success;
14277 break;
14279 case 0xC2:
14280 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
14281 if (haveNo66noF2noF3(pfx) && sz == 4) {
14282 Long delta0 = delta;
14283 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpps", True, 4 );
14284 if (delta > delta0) goto decode_success;
14286 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
14287 if (haveF3no66noF2(pfx) && sz == 4) {
14288 Long delta0 = delta;
14289 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpss", False, 4 );
14290 if (delta > delta0) goto decode_success;
14292 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
14293 if (haveF2no66noF3(pfx) && sz == 4) {
14294 Long delta0 = delta;
14295 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpsd", False, 8 );
14296 if (delta > delta0) goto decode_success;
14298 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
14299 if (have66noF2noF3(pfx) && sz == 2) {
14300 Long delta0 = delta;
14301 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmppd", True, 8 );
14302 if (delta > delta0) goto decode_success;
14304 break;
14306 case 0xC3:
14307 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
14308 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) {
14309 modrm = getUChar(delta);
14310 if (!epartIsReg(modrm)) {
14311 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14312 storeLE( mkexpr(addr), getIRegG(sz, pfx, modrm) );
14313 DIP("movnti %s,%s\n", dis_buf,
14314 nameIRegG(sz, pfx, modrm));
14315 delta += alen;
14316 goto decode_success;
14318 /* else fall through */
14320 break;
14322 case 0xC4:
14323 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14324 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
14325 put it into the specified lane of mmx(G). */
14326 if (haveNo66noF2noF3(pfx)
14327 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
14328 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
14329 mmx reg. t4 is the new lane value. t5 is the original
14330 mmx value. t6 is the new mmx value. */
14331 Int lane;
14332 t4 = newTemp(Ity_I16);
14333 t5 = newTemp(Ity_I64);
14334 t6 = newTemp(Ity_I64);
14335 modrm = getUChar(delta);
14336 do_MMX_preamble();
14338 assign(t5, getMMXReg(gregLO3ofRM(modrm)));
14339 breakup64to16s( t5, &t3, &t2, &t1, &t0 );
14341 if (epartIsReg(modrm)) {
14342 assign(t4, getIReg16(eregOfRexRM(pfx,modrm)));
14343 delta += 1+1;
14344 lane = getUChar(delta-1);
14345 DIP("pinsrw $%d,%s,%s\n", lane,
14346 nameIReg16(eregOfRexRM(pfx,modrm)),
14347 nameMMXReg(gregLO3ofRM(modrm)));
14348 } else {
14349 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
14350 delta += 1+alen;
14351 lane = getUChar(delta-1);
14352 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
14353 DIP("pinsrw $%d,%s,%s\n", lane,
14354 dis_buf,
14355 nameMMXReg(gregLO3ofRM(modrm)));
14358 switch (lane & 3) {
14359 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break;
14360 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break;
14361 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break;
14362 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break;
14363 default: vassert(0);
14365 putMMXReg(gregLO3ofRM(modrm), mkexpr(t6));
14366 goto decode_success;
14368 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
14369 put it into the specified lane of xmm(G). */
14370 if (have66noF2noF3(pfx)
14371 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
14372 Int lane;
14373 t4 = newTemp(Ity_I16);
14374 modrm = getUChar(delta);
14375 UInt rG = gregOfRexRM(pfx,modrm);
14376 if (epartIsReg(modrm)) {
14377 UInt rE = eregOfRexRM(pfx,modrm);
14378 assign(t4, getIReg16(rE));
14379 delta += 1+1;
14380 lane = getUChar(delta-1);
14381 DIP("pinsrw $%d,%s,%s\n",
14382 lane, nameIReg16(rE), nameXMMReg(rG));
14383 } else {
14384 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
14385 1/*byte after the amode*/ );
14386 delta += 1+alen;
14387 lane = getUChar(delta-1);
14388 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
14389 DIP("pinsrw $%d,%s,%s\n",
14390 lane, dis_buf, nameXMMReg(rG));
14392 IRTemp src_vec = newTemp(Ity_V128);
14393 assign(src_vec, getXMMReg(rG));
14394 IRTemp res_vec = math_PINSRW_128( src_vec, t4, lane & 7);
14395 putXMMReg(rG, mkexpr(res_vec));
14396 goto decode_success;
14398 break;
14400 case 0xC5:
14401 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14402 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
14403 zero-extend of it in ireg(G). */
14404 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) {
14405 modrm = getUChar(delta);
14406 if (epartIsReg(modrm)) {
14407 IRTemp sV = newTemp(Ity_I64);
14408 t5 = newTemp(Ity_I16);
14409 do_MMX_preamble();
14410 assign(sV, getMMXReg(eregLO3ofRM(modrm)));
14411 breakup64to16s( sV, &t3, &t2, &t1, &t0 );
14412 switch (getUChar(delta+1) & 3) {
14413 case 0: assign(t5, mkexpr(t0)); break;
14414 case 1: assign(t5, mkexpr(t1)); break;
14415 case 2: assign(t5, mkexpr(t2)); break;
14416 case 3: assign(t5, mkexpr(t3)); break;
14417 default: vassert(0);
14419 if (sz == 8)
14420 putIReg64(gregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(t5)));
14421 else
14422 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t5)));
14423 DIP("pextrw $%d,%s,%s\n",
14424 (Int)getUChar(delta+1),
14425 nameMMXReg(eregLO3ofRM(modrm)),
14426 sz==8 ? nameIReg64(gregOfRexRM(pfx,modrm))
14427 : nameIReg32(gregOfRexRM(pfx,modrm))
14429 delta += 2;
14430 goto decode_success;
14432 /* else fall through */
14433 /* note, for anyone filling in the mem case: this insn has one
14434 byte after the amode and therefore you must pass 1 as the
14435 last arg to disAMode */
14437 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
14438 zero-extend of it in ireg(G). */
14439 if (have66noF2noF3(pfx)
14440 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
14441 Long delta0 = delta;
14442 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta,
14443 False/*!isAvx*/ );
14444 if (delta > delta0) goto decode_success;
14445 /* else fall through -- decoding has failed */
14447 break;
14449 case 0xC6:
14450 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
14451 if (haveNo66noF2noF3(pfx) && sz == 4) {
14452 Int imm8 = 0;
14453 IRTemp sV = newTemp(Ity_V128);
14454 IRTemp dV = newTemp(Ity_V128);
14455 modrm = getUChar(delta);
14456 UInt rG = gregOfRexRM(pfx,modrm);
14457 assign( dV, getXMMReg(rG) );
14458 if (epartIsReg(modrm)) {
14459 UInt rE = eregOfRexRM(pfx,modrm);
14460 assign( sV, getXMMReg(rE) );
14461 imm8 = (Int)getUChar(delta+1);
14462 delta += 1+1;
14463 DIP("shufps $%d,%s,%s\n", imm8, nameXMMReg(rE), nameXMMReg(rG));
14464 } else {
14465 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
14466 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
14467 imm8 = (Int)getUChar(delta+alen);
14468 delta += 1+alen;
14469 DIP("shufps $%d,%s,%s\n", imm8, dis_buf, nameXMMReg(rG));
14471 IRTemp res = math_SHUFPS_128( sV, dV, imm8 );
14472 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
14473 goto decode_success;
14475 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
14476 if (have66noF2noF3(pfx) && sz == 2) {
14477 Int select;
14478 IRTemp sV = newTemp(Ity_V128);
14479 IRTemp dV = newTemp(Ity_V128);
14481 modrm = getUChar(delta);
14482 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
14484 if (epartIsReg(modrm)) {
14485 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
14486 select = (Int)getUChar(delta+1);
14487 delta += 1+1;
14488 DIP("shufpd $%d,%s,%s\n", select,
14489 nameXMMReg(eregOfRexRM(pfx,modrm)),
14490 nameXMMReg(gregOfRexRM(pfx,modrm)));
14491 } else {
14492 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
14493 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
14494 select = getUChar(delta+alen);
14495 delta += 1+alen;
14496 DIP("shufpd $%d,%s,%s\n", select,
14497 dis_buf,
14498 nameXMMReg(gregOfRexRM(pfx,modrm)));
14501 IRTemp res = math_SHUFPD_128( sV, dV, select );
14502 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
14503 goto decode_success;
14505 break;
14507 case 0xD1:
14508 /* 66 0F D1 = PSRLW by E */
14509 if (have66noF2noF3(pfx) && sz == 2) {
14510 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlw", Iop_ShrN16x8 );
14511 goto decode_success;
14513 break;
14515 case 0xD2:
14516 /* 66 0F D2 = PSRLD by E */
14517 if (have66noF2noF3(pfx) && sz == 2) {
14518 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrld", Iop_ShrN32x4 );
14519 goto decode_success;
14521 break;
14523 case 0xD3:
14524 /* 66 0F D3 = PSRLQ by E */
14525 if (have66noF2noF3(pfx) && sz == 2) {
14526 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlq", Iop_ShrN64x2 );
14527 goto decode_success;
14529 break;
14531 case 0xD4:
14532 /* 66 0F D4 = PADDQ */
14533 if (have66noF2noF3(pfx) && sz == 2) {
14534 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14535 "paddq", Iop_Add64x2, False );
14536 goto decode_success;
14538 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
14539 /* 0F D4 = PADDQ -- add 64x1 */
14540 if (haveNo66noF2noF3(pfx) && sz == 4) {
14541 do_MMX_preamble();
14542 delta = dis_MMXop_regmem_to_reg (
14543 vbi, pfx, delta, opc, "paddq", False );
14544 goto decode_success;
14546 break;
14548 case 0xD5:
14549 /* 66 0F D5 = PMULLW -- 16x8 multiply */
14550 if (have66noF2noF3(pfx) && sz == 2) {
14551 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14552 "pmullw", Iop_Mul16x8, False );
14553 goto decode_success;
14555 break;
14557 case 0xD6:
14558 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
14559 hi half). */
14560 if (haveF3no66noF2(pfx) && sz == 4) {
14561 modrm = getUChar(delta);
14562 if (epartIsReg(modrm)) {
14563 do_MMX_preamble();
14564 putXMMReg( gregOfRexRM(pfx,modrm),
14565 unop(Iop_64UtoV128, getMMXReg( eregLO3ofRM(modrm) )) );
14566 DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
14567 nameXMMReg(gregOfRexRM(pfx,modrm)));
14568 delta += 1;
14569 goto decode_success;
14571 /* apparently no mem case for this insn */
14573 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
14574 or lo half xmm). */
14575 if (have66noF2noF3(pfx)
14576 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
14577 modrm = getUChar(delta);
14578 if (epartIsReg(modrm)) {
14579 /* fall through, awaiting test case */
14580 /* dst: lo half copied, hi half zeroed */
14581 } else {
14582 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14583 storeLE( mkexpr(addr),
14584 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 ));
14585 DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf );
14586 delta += alen;
14587 goto decode_success;
14590 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
14591 if (haveF2no66noF3(pfx) && sz == 4) {
14592 modrm = getUChar(delta);
14593 if (epartIsReg(modrm)) {
14594 do_MMX_preamble();
14595 putMMXReg( gregLO3ofRM(modrm),
14596 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
14597 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
14598 nameMMXReg(gregLO3ofRM(modrm)));
14599 delta += 1;
14600 goto decode_success;
14602 /* apparently no mem case for this insn */
14604 break;
14606 case 0xD7:
14607 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16
14608 lanes in xmm(E), turn them into a byte, and put
14609 zero-extend of it in ireg(G). Doing this directly is just
14610 too cumbersome; give up therefore and call a helper. */
14611 if (have66noF2noF3(pfx)
14612 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
14613 && epartIsReg(getUChar(delta))) { /* no memory case, it seems */
14614 delta = dis_PMOVMSKB_128( vbi, pfx, delta, False/*!isAvx*/ );
14615 goto decode_success;
14617 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14618 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
14619 mmx(E), turn them into a byte, and put zero-extend of it in
14620 ireg(G). */
14621 if (haveNo66noF2noF3(pfx)
14622 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
14623 modrm = getUChar(delta);
14624 if (epartIsReg(modrm)) {
14625 do_MMX_preamble();
14626 t0 = newTemp(Ity_I64);
14627 t1 = newTemp(Ity_I32);
14628 assign(t0, getMMXReg(eregLO3ofRM(modrm)));
14629 assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0))));
14630 putIReg32(gregOfRexRM(pfx,modrm), mkexpr(t1));
14631 DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
14632 nameIReg32(gregOfRexRM(pfx,modrm)));
14633 delta += 1;
14634 goto decode_success;
14636 /* else fall through */
14638 break;
14640 case 0xD8:
14641 /* 66 0F D8 = PSUBUSB */
14642 if (have66noF2noF3(pfx) && sz == 2) {
14643 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14644 "psubusb", Iop_QSub8Ux16, False );
14645 goto decode_success;
14647 break;
14649 case 0xD9:
14650 /* 66 0F D9 = PSUBUSW */
14651 if (have66noF2noF3(pfx) && sz == 2) {
14652 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14653 "psubusw", Iop_QSub16Ux8, False );
14654 goto decode_success;
14656 break;
14658 case 0xDA:
14659 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14660 /* 0F DA = PMINUB -- 8x8 unsigned min */
14661 if (haveNo66noF2noF3(pfx) && sz == 4) {
14662 do_MMX_preamble();
14663 delta = dis_MMXop_regmem_to_reg (
14664 vbi, pfx, delta, opc, "pminub", False );
14665 goto decode_success;
14667 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
14668 if (have66noF2noF3(pfx) && sz == 2) {
14669 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14670 "pminub", Iop_Min8Ux16, False );
14671 goto decode_success;
14673 break;
14675 case 0xDB:
14676 /* 66 0F DB = PAND */
14677 if (have66noF2noF3(pfx) && sz == 2) {
14678 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pand", Iop_AndV128 );
14679 goto decode_success;
14681 break;
14683 case 0xDC:
14684 /* 66 0F DC = PADDUSB */
14685 if (have66noF2noF3(pfx) && sz == 2) {
14686 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14687 "paddusb", Iop_QAdd8Ux16, False );
14688 goto decode_success;
14690 break;
14692 case 0xDD:
14693 /* 66 0F DD = PADDUSW */
14694 if (have66noF2noF3(pfx) && sz == 2) {
14695 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14696 "paddusw", Iop_QAdd16Ux8, False );
14697 goto decode_success;
14699 break;
14701 case 0xDE:
14702 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14703 /* 0F DE = PMAXUB -- 8x8 unsigned max */
14704 if (haveNo66noF2noF3(pfx) && sz == 4) {
14705 do_MMX_preamble();
14706 delta = dis_MMXop_regmem_to_reg (
14707 vbi, pfx, delta, opc, "pmaxub", False );
14708 goto decode_success;
14710 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
14711 if (have66noF2noF3(pfx) && sz == 2) {
14712 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14713 "pmaxub", Iop_Max8Ux16, False );
14714 goto decode_success;
14716 break;
14718 case 0xDF:
14719 /* 66 0F DF = PANDN */
14720 if (have66noF2noF3(pfx) && sz == 2) {
14721 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "pandn", Iop_AndV128 );
14722 goto decode_success;
14724 break;
14726 case 0xE0:
14727 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14728 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
14729 if (haveNo66noF2noF3(pfx) && sz == 4) {
14730 do_MMX_preamble();
14731 delta = dis_MMXop_regmem_to_reg (
14732 vbi, pfx, delta, opc, "pavgb", False );
14733 goto decode_success;
14735 /* 66 0F E0 = PAVGB */
14736 if (have66noF2noF3(pfx) && sz == 2) {
14737 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14738 "pavgb", Iop_Avg8Ux16, False );
14739 goto decode_success;
14741 break;
14743 case 0xE1:
14744 /* 66 0F E1 = PSRAW by E */
14745 if (have66noF2noF3(pfx) && sz == 2) {
14746 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psraw", Iop_SarN16x8 );
14747 goto decode_success;
14749 break;
14751 case 0xE2:
14752 /* 66 0F E2 = PSRAD by E */
14753 if (have66noF2noF3(pfx) && sz == 2) {
14754 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrad", Iop_SarN32x4 );
14755 goto decode_success;
14757 break;
14759 case 0xE3:
14760 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14761 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
14762 if (haveNo66noF2noF3(pfx) && sz == 4) {
14763 do_MMX_preamble();
14764 delta = dis_MMXop_regmem_to_reg (
14765 vbi, pfx, delta, opc, "pavgw", False );
14766 goto decode_success;
14768 /* 66 0F E3 = PAVGW */
14769 if (have66noF2noF3(pfx) && sz == 2) {
14770 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14771 "pavgw", Iop_Avg16Ux8, False );
14772 goto decode_success;
14774 break;
14776 case 0xE4:
14777 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14778 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
14779 if (haveNo66noF2noF3(pfx) && sz == 4) {
14780 do_MMX_preamble();
14781 delta = dis_MMXop_regmem_to_reg (
14782 vbi, pfx, delta, opc, "pmuluh", False );
14783 goto decode_success;
14785 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
14786 if (have66noF2noF3(pfx) && sz == 2) {
14787 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14788 "pmulhuw", Iop_MulHi16Ux8, False );
14789 goto decode_success;
14791 break;
14793 case 0xE5:
14794 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
14795 if (have66noF2noF3(pfx) && sz == 2) {
14796 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14797 "pmulhw", Iop_MulHi16Sx8, False );
14798 goto decode_success;
14800 break;
14802 case 0xE6:
14803 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
14804 lo half xmm(G), and zero upper half, rounding towards zero */
14805 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
14806 lo half xmm(G), according to prevailing rounding mode, and zero
14807 upper half */
14808 if ( (haveF2no66noF3(pfx) && sz == 4)
14809 || (have66noF2noF3(pfx) && sz == 2) ) {
14810 delta = dis_CVTxPD2DQ_128( vbi, pfx, delta, False/*!isAvx*/,
14811 toBool(sz == 2)/*r2zero*/);
14812 goto decode_success;
14814 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
14815 F64 in xmm(G) */
14816 if (haveF3no66noF2(pfx) && sz == 4) {
14817 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, False/*!isAvx*/);
14818 goto decode_success;
14820 break;
14822 case 0xE7:
14823 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14824 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
14825 Intel manual does not say anything about the usual business of
14826 the FP reg tags getting trashed whenever an MMX insn happens.
14827 So we just leave them alone.
14829 if (haveNo66noF2noF3(pfx) && sz == 4) {
14830 modrm = getUChar(delta);
14831 if (!epartIsReg(modrm)) {
14832 /* do_MMX_preamble(); Intel docs don't specify this */
14833 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14834 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
14835 DIP("movntq %s,%s\n", dis_buf,
14836 nameMMXReg(gregLO3ofRM(modrm)));
14837 delta += alen;
14838 goto decode_success;
14840 /* else fall through */
14842 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
14843 if (have66noF2noF3(pfx) && sz == 2) {
14844 modrm = getUChar(delta);
14845 if (!epartIsReg(modrm)) {
14846 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14847 gen_SEGV_if_not_16_aligned( addr );
14848 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
14849 DIP("movntdq %s,%s\n", dis_buf,
14850 nameXMMReg(gregOfRexRM(pfx,modrm)));
14851 delta += alen;
14852 goto decode_success;
14854 /* else fall through */
14856 break;
14858 case 0xE8:
14859 /* 66 0F E8 = PSUBSB */
14860 if (have66noF2noF3(pfx) && sz == 2) {
14861 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14862 "psubsb", Iop_QSub8Sx16, False );
14863 goto decode_success;
14865 break;
14867 case 0xE9:
14868 /* 66 0F E9 = PSUBSW */
14869 if (have66noF2noF3(pfx) && sz == 2) {
14870 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14871 "psubsw", Iop_QSub16Sx8, False );
14872 goto decode_success;
14874 break;
14876 case 0xEA:
14877 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14878 /* 0F EA = PMINSW -- 16x4 signed min */
14879 if (haveNo66noF2noF3(pfx) && sz == 4) {
14880 do_MMX_preamble();
14881 delta = dis_MMXop_regmem_to_reg (
14882 vbi, pfx, delta, opc, "pminsw", False );
14883 goto decode_success;
14885 /* 66 0F EA = PMINSW -- 16x8 signed min */
14886 if (have66noF2noF3(pfx) && sz == 2) {
14887 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14888 "pminsw", Iop_Min16Sx8, False );
14889 goto decode_success;
14891 break;
14893 case 0xEB:
14894 /* 66 0F EB = POR */
14895 if (have66noF2noF3(pfx) && sz == 2) {
14896 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "por", Iop_OrV128 );
14897 goto decode_success;
14899 break;
14901 case 0xEC:
14902 /* 66 0F EC = PADDSB */
14903 if (have66noF2noF3(pfx) && sz == 2) {
14904 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14905 "paddsb", Iop_QAdd8Sx16, False );
14906 goto decode_success;
14908 break;
14910 case 0xED:
14911 /* 66 0F ED = PADDSW */
14912 if (have66noF2noF3(pfx) && sz == 2) {
14913 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14914 "paddsw", Iop_QAdd16Sx8, False );
14915 goto decode_success;
14917 break;
14919 case 0xEE:
14920 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14921 /* 0F EE = PMAXSW -- 16x4 signed max */
14922 if (haveNo66noF2noF3(pfx) && sz == 4) {
14923 do_MMX_preamble();
14924 delta = dis_MMXop_regmem_to_reg (
14925 vbi, pfx, delta, opc, "pmaxsw", False );
14926 goto decode_success;
14928 /* 66 0F EE = PMAXSW -- 16x8 signed max */
14929 if (have66noF2noF3(pfx) && sz == 2) {
14930 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14931 "pmaxsw", Iop_Max16Sx8, False );
14932 goto decode_success;
14934 break;
14936 case 0xEF:
14937 /* 66 0F EF = PXOR */
14938 if (have66noF2noF3(pfx) && sz == 2) {
14939 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pxor", Iop_XorV128 );
14940 goto decode_success;
14942 break;
14944 case 0xF1:
14945 /* 66 0F F1 = PSLLW by E */
14946 if (have66noF2noF3(pfx) && sz == 2) {
14947 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllw", Iop_ShlN16x8 );
14948 goto decode_success;
14950 break;
14952 case 0xF2:
14953 /* 66 0F F2 = PSLLD by E */
14954 if (have66noF2noF3(pfx) && sz == 2) {
14955 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "pslld", Iop_ShlN32x4 );
14956 goto decode_success;
14958 break;
14960 case 0xF3:
14961 /* 66 0F F3 = PSLLQ by E */
14962 if (have66noF2noF3(pfx) && sz == 2) {
14963 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllq", Iop_ShlN64x2 );
14964 goto decode_success;
14966 break;
14968 case 0xF4:
14969 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
14970 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
14971 half */
14972 if (have66noF2noF3(pfx) && sz == 2) {
14973 IRTemp sV = newTemp(Ity_V128);
14974 IRTemp dV = newTemp(Ity_V128);
14975 modrm = getUChar(delta);
14976 UInt rG = gregOfRexRM(pfx,modrm);
14977 assign( dV, getXMMReg(rG) );
14978 if (epartIsReg(modrm)) {
14979 UInt rE = eregOfRexRM(pfx,modrm);
14980 assign( sV, getXMMReg(rE) );
14981 delta += 1;
14982 DIP("pmuludq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
14983 } else {
14984 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14985 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
14986 delta += alen;
14987 DIP("pmuludq %s,%s\n", dis_buf, nameXMMReg(rG));
14989 putXMMReg( rG, mkexpr(math_PMULUDQ_128( sV, dV )) );
14990 goto decode_success;
14992 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
14993 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
14994 0 to form 64-bit result */
14995 if (haveNo66noF2noF3(pfx) && sz == 4) {
14996 IRTemp sV = newTemp(Ity_I64);
14997 IRTemp dV = newTemp(Ity_I64);
14998 t1 = newTemp(Ity_I32);
14999 t0 = newTemp(Ity_I32);
15000 modrm = getUChar(delta);
15002 do_MMX_preamble();
15003 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15005 if (epartIsReg(modrm)) {
15006 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15007 delta += 1;
15008 DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
15009 nameMMXReg(gregLO3ofRM(modrm)));
15010 } else {
15011 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15012 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15013 delta += alen;
15014 DIP("pmuludq %s,%s\n", dis_buf,
15015 nameMMXReg(gregLO3ofRM(modrm)));
15018 assign( t0, unop(Iop_64to32, mkexpr(dV)) );
15019 assign( t1, unop(Iop_64to32, mkexpr(sV)) );
15020 putMMXReg( gregLO3ofRM(modrm),
15021 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) );
15022 goto decode_success;
15024 break;
15026 case 0xF5:
15027 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
15028 E(xmm or mem) to G(xmm) */
15029 if (have66noF2noF3(pfx) && sz == 2) {
15030 IRTemp sV = newTemp(Ity_V128);
15031 IRTemp dV = newTemp(Ity_V128);
15032 modrm = getUChar(delta);
15033 UInt rG = gregOfRexRM(pfx,modrm);
15034 if (epartIsReg(modrm)) {
15035 UInt rE = eregOfRexRM(pfx,modrm);
15036 assign( sV, getXMMReg(rE) );
15037 delta += 1;
15038 DIP("pmaddwd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15039 } else {
15040 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15041 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15042 delta += alen;
15043 DIP("pmaddwd %s,%s\n", dis_buf, nameXMMReg(rG));
15045 assign( dV, getXMMReg(rG) );
15046 putXMMReg( rG, mkexpr(math_PMADDWD_128(dV, sV)) );
15047 goto decode_success;
15049 break;
15051 case 0xF6:
15052 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
15053 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
15054 if (haveNo66noF2noF3(pfx) && sz == 4) {
15055 do_MMX_preamble();
15056 delta = dis_MMXop_regmem_to_reg (
15057 vbi, pfx, delta, opc, "psadbw", False );
15058 goto decode_success;
15060 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
15061 from E(xmm or mem) to G(xmm) */
15062 if (have66noF2noF3(pfx) && sz == 2) {
15063 IRTemp sV = newTemp(Ity_V128);
15064 IRTemp dV = newTemp(Ity_V128);
15065 modrm = getUChar(delta);
15066 UInt rG = gregOfRexRM(pfx,modrm);
15067 if (epartIsReg(modrm)) {
15068 UInt rE = eregOfRexRM(pfx,modrm);
15069 assign( sV, getXMMReg(rE) );
15070 delta += 1;
15071 DIP("psadbw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15072 } else {
15073 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15074 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15075 delta += alen;
15076 DIP("psadbw %s,%s\n", dis_buf, nameXMMReg(rG));
15078 assign( dV, getXMMReg(rG) );
15079 putXMMReg( rG, mkexpr( math_PSADBW_128 ( dV, sV ) ) );
15081 goto decode_success;
15083 break;
15085 case 0xF7:
15086 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
15087 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
15088 if (haveNo66noF2noF3(pfx) && sz == 4) {
15089 Bool ok = False;
15090 delta = dis_MMX( &ok, vbi, pfx, sz, delta-1 );
15091 if (ok) goto decode_success;
15093 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
15094 if (have66noF2noF3(pfx) && sz == 2 && epartIsReg(getUChar(delta))) {
15095 delta = dis_MASKMOVDQU( vbi, pfx, delta, False/*!isAvx*/ );
15096 goto decode_success;
15098 break;
15100 case 0xF8:
15101 /* 66 0F F8 = PSUBB */
15102 if (have66noF2noF3(pfx) && sz == 2) {
15103 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15104 "psubb", Iop_Sub8x16, False );
15105 goto decode_success;
15107 break;
15109 case 0xF9:
15110 /* 66 0F F9 = PSUBW */
15111 if (have66noF2noF3(pfx) && sz == 2) {
15112 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15113 "psubw", Iop_Sub16x8, False );
15114 goto decode_success;
15116 break;
15118 case 0xFA:
15119 /* 66 0F FA = PSUBD */
15120 if (have66noF2noF3(pfx) && sz == 2) {
15121 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15122 "psubd", Iop_Sub32x4, False );
15123 goto decode_success;
15125 break;
15127 case 0xFB:
15128 /* 66 0F FB = PSUBQ */
15129 if (have66noF2noF3(pfx) && sz == 2) {
15130 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15131 "psubq", Iop_Sub64x2, False );
15132 goto decode_success;
15134 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
15135 /* 0F FB = PSUBQ -- sub 64x1 */
15136 if (haveNo66noF2noF3(pfx) && sz == 4) {
15137 do_MMX_preamble();
15138 delta = dis_MMXop_regmem_to_reg (
15139 vbi, pfx, delta, opc, "psubq", False );
15140 goto decode_success;
15142 break;
15144 case 0xFC:
15145 /* 66 0F FC = PADDB */
15146 if (have66noF2noF3(pfx) && sz == 2) {
15147 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15148 "paddb", Iop_Add8x16, False );
15149 goto decode_success;
15151 break;
15153 case 0xFD:
15154 /* 66 0F FD = PADDW */
15155 if (have66noF2noF3(pfx) && sz == 2) {
15156 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15157 "paddw", Iop_Add16x8, False );
15158 goto decode_success;
15160 break;
15162 case 0xFE:
15163 /* 66 0F FE = PADDD */
15164 if (have66noF2noF3(pfx) && sz == 2) {
15165 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15166 "paddd", Iop_Add32x4, False );
15167 goto decode_success;
15169 break;
15171 default:
15172 goto decode_failure;
15176 decode_failure:
15177 *decode_OK = False;
15178 return deltaIN;
15180 decode_success:
15181 *decode_OK = True;
15182 return delta;
15186 /*------------------------------------------------------------*/
15187 /*--- ---*/
15188 /*--- Top-level SSE3 (not SupSSE3): dis_ESC_0F__SSE3 ---*/
15189 /*--- ---*/
15190 /*------------------------------------------------------------*/
15192 static Long dis_MOVDDUP_128 ( const VexAbiInfo* vbi, Prefix pfx,
15193 Long delta, Bool isAvx )
15195 IRTemp addr = IRTemp_INVALID;
15196 Int alen = 0;
15197 HChar dis_buf[50];
15198 IRTemp sV = newTemp(Ity_V128);
15199 IRTemp d0 = newTemp(Ity_I64);
15200 UChar modrm = getUChar(delta);
15201 UInt rG = gregOfRexRM(pfx,modrm);
15202 if (epartIsReg(modrm)) {
15203 UInt rE = eregOfRexRM(pfx,modrm);
15204 assign( sV, getXMMReg(rE) );
15205 DIP("%smovddup %s,%s\n",
15206 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
15207 delta += 1;
15208 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) );
15209 } else {
15210 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15211 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
15212 DIP("%smovddup %s,%s\n",
15213 isAvx ? "v" : "", dis_buf, nameXMMReg(rG));
15214 delta += alen;
15216 (isAvx ? putYMMRegLoAndZU : putXMMReg)
15217 ( rG, binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) );
15218 return delta;
15222 static Long dis_MOVDDUP_256 ( const VexAbiInfo* vbi, Prefix pfx,
15223 Long delta )
15225 IRTemp addr = IRTemp_INVALID;
15226 Int alen = 0;
15227 HChar dis_buf[50];
15228 IRTemp d0 = newTemp(Ity_I64);
15229 IRTemp d1 = newTemp(Ity_I64);
15230 UChar modrm = getUChar(delta);
15231 UInt rG = gregOfRexRM(pfx,modrm);
15232 if (epartIsReg(modrm)) {
15233 UInt rE = eregOfRexRM(pfx,modrm);
15234 DIP("vmovddup %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
15235 delta += 1;
15236 assign ( d0, getYMMRegLane64(rE, 0) );
15237 assign ( d1, getYMMRegLane64(rE, 2) );
15238 } else {
15239 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15240 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
15241 assign( d1, loadLE(Ity_I64, binop(Iop_Add64,
15242 mkexpr(addr), mkU64(16))) );
15243 DIP("vmovddup %s,%s\n", dis_buf, nameYMMReg(rG));
15244 delta += alen;
15246 putYMMRegLane64( rG, 0, mkexpr(d0) );
15247 putYMMRegLane64( rG, 1, mkexpr(d0) );
15248 putYMMRegLane64( rG, 2, mkexpr(d1) );
15249 putYMMRegLane64( rG, 3, mkexpr(d1) );
15250 return delta;
15254 static Long dis_MOVSxDUP_128 ( const VexAbiInfo* vbi, Prefix pfx,
15255 Long delta, Bool isAvx, Bool isL )
15257 IRTemp addr = IRTemp_INVALID;
15258 Int alen = 0;
15259 HChar dis_buf[50];
15260 IRTemp sV = newTemp(Ity_V128);
15261 UChar modrm = getUChar(delta);
15262 UInt rG = gregOfRexRM(pfx,modrm);
15263 IRTemp s3, s2, s1, s0;
15264 s3 = s2 = s1 = s0 = IRTemp_INVALID;
15265 if (epartIsReg(modrm)) {
15266 UInt rE = eregOfRexRM(pfx,modrm);
15267 assign( sV, getXMMReg(rE) );
15268 DIP("%smovs%cdup %s,%s\n",
15269 isAvx ? "v" : "", isL ? 'l' : 'h', nameXMMReg(rE), nameXMMReg(rG));
15270 delta += 1;
15271 } else {
15272 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15273 if (!isAvx)
15274 gen_SEGV_if_not_16_aligned( addr );
15275 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15276 DIP("%smovs%cdup %s,%s\n",
15277 isAvx ? "v" : "", isL ? 'l' : 'h', dis_buf, nameXMMReg(rG));
15278 delta += alen;
15280 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
15281 (isAvx ? putYMMRegLoAndZU : putXMMReg)
15282 ( rG, isL ? mkV128from32s( s2, s2, s0, s0 )
15283 : mkV128from32s( s3, s3, s1, s1 ) );
15284 return delta;
15288 static Long dis_MOVSxDUP_256 ( const VexAbiInfo* vbi, Prefix pfx,
15289 Long delta, Bool isL )
15291 IRTemp addr = IRTemp_INVALID;
15292 Int alen = 0;
15293 HChar dis_buf[50];
15294 IRTemp sV = newTemp(Ity_V256);
15295 UChar modrm = getUChar(delta);
15296 UInt rG = gregOfRexRM(pfx,modrm);
15297 IRTemp s7, s6, s5, s4, s3, s2, s1, s0;
15298 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
15299 if (epartIsReg(modrm)) {
15300 UInt rE = eregOfRexRM(pfx,modrm);
15301 assign( sV, getYMMReg(rE) );
15302 DIP("vmovs%cdup %s,%s\n",
15303 isL ? 'l' : 'h', nameYMMReg(rE), nameYMMReg(rG));
15304 delta += 1;
15305 } else {
15306 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15307 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
15308 DIP("vmovs%cdup %s,%s\n",
15309 isL ? 'l' : 'h', dis_buf, nameYMMReg(rG));
15310 delta += alen;
15312 breakupV256to32s( sV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
15313 putYMMRegLane128( rG, 1, isL ? mkV128from32s( s6, s6, s4, s4 )
15314 : mkV128from32s( s7, s7, s5, s5 ) );
15315 putYMMRegLane128( rG, 0, isL ? mkV128from32s( s2, s2, s0, s0 )
15316 : mkV128from32s( s3, s3, s1, s1 ) );
15317 return delta;
15321 static IRTemp math_HADDPS_128 ( IRTemp dV, IRTemp sV, Bool isAdd )
15323 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
15324 IRTemp leftV = newTemp(Ity_V128);
15325 IRTemp rightV = newTemp(Ity_V128);
15326 IRTemp rm = newTemp(Ity_I32);
15327 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
15329 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
15330 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
15332 assign( leftV, mkV128from32s( s2, s0, d2, d0 ) );
15333 assign( rightV, mkV128from32s( s3, s1, d3, d1 ) );
15335 IRTemp res = newTemp(Ity_V128);
15336 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
15337 assign( res, triop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
15338 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
15339 return res;
15343 static IRTemp math_HADDPD_128 ( IRTemp dV, IRTemp sV, Bool isAdd )
15345 IRTemp s1, s0, d1, d0;
15346 IRTemp leftV = newTemp(Ity_V128);
15347 IRTemp rightV = newTemp(Ity_V128);
15348 IRTemp rm = newTemp(Ity_I32);
15349 s1 = s0 = d1 = d0 = IRTemp_INVALID;
15351 breakupV128to64s( sV, &s1, &s0 );
15352 breakupV128to64s( dV, &d1, &d0 );
15354 assign( leftV, binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) );
15355 assign( rightV, binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) );
15357 IRTemp res = newTemp(Ity_V128);
15358 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
15359 assign( res, triop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
15360 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
15361 return res;
15365 __attribute__((noinline))
15366 static
15367 Long dis_ESC_0F__SSE3 ( Bool* decode_OK,
15368 const VexAbiInfo* vbi,
15369 Prefix pfx, Int sz, Long deltaIN )
15371 IRTemp addr = IRTemp_INVALID;
15372 UChar modrm = 0;
15373 Int alen = 0;
15374 HChar dis_buf[50];
15376 *decode_OK = False;
15378 Long delta = deltaIN;
15379 UChar opc = getUChar(delta);
15380 delta++;
15381 switch (opc) {
15383 case 0x12:
15384 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
15385 duplicating some lanes (2:2:0:0). */
15386 if (haveF3no66noF2(pfx) && sz == 4) {
15387 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/,
15388 True/*isL*/ );
15389 goto decode_success;
15391 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
15392 duplicating some lanes (0:1:0:1). */
15393 if (haveF2no66noF3(pfx)
15394 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
15395 delta = dis_MOVDDUP_128( vbi, pfx, delta, False/*!isAvx*/ );
15396 goto decode_success;
15398 break;
15400 case 0x16:
15401 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
15402 duplicating some lanes (3:3:1:1). */
15403 if (haveF3no66noF2(pfx) && sz == 4) {
15404 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/,
15405 False/*!isL*/ );
15406 goto decode_success;
15408 break;
15410 case 0x7C:
15411 case 0x7D:
15412 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
15413 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
15414 if (haveF2no66noF3(pfx) && sz == 4) {
15415 IRTemp eV = newTemp(Ity_V128);
15416 IRTemp gV = newTemp(Ity_V128);
15417 Bool isAdd = opc == 0x7C;
15418 const HChar* str = isAdd ? "add" : "sub";
15419 modrm = getUChar(delta);
15420 UInt rG = gregOfRexRM(pfx,modrm);
15421 if (epartIsReg(modrm)) {
15422 UInt rE = eregOfRexRM(pfx,modrm);
15423 assign( eV, getXMMReg(rE) );
15424 DIP("h%sps %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG));
15425 delta += 1;
15426 } else {
15427 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15428 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
15429 DIP("h%sps %s,%s\n", str, dis_buf, nameXMMReg(rG));
15430 delta += alen;
15433 assign( gV, getXMMReg(rG) );
15434 putXMMReg( rG, mkexpr( math_HADDPS_128 ( gV, eV, isAdd ) ) );
15435 goto decode_success;
15437 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
15438 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
15439 if (have66noF2noF3(pfx) && sz == 2) {
15440 IRTemp eV = newTemp(Ity_V128);
15441 IRTemp gV = newTemp(Ity_V128);
15442 Bool isAdd = opc == 0x7C;
15443 const HChar* str = isAdd ? "add" : "sub";
15444 modrm = getUChar(delta);
15445 UInt rG = gregOfRexRM(pfx,modrm);
15446 if (epartIsReg(modrm)) {
15447 UInt rE = eregOfRexRM(pfx,modrm);
15448 assign( eV, getXMMReg(rE) );
15449 DIP("h%spd %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG));
15450 delta += 1;
15451 } else {
15452 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15453 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
15454 DIP("h%spd %s,%s\n", str, dis_buf, nameXMMReg(rG));
15455 delta += alen;
15458 assign( gV, getXMMReg(rG) );
15459 putXMMReg( rG, mkexpr( math_HADDPD_128 ( gV, eV, isAdd ) ) );
15460 goto decode_success;
15462 break;
15464 case 0xD0:
15465 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
15466 if (have66noF2noF3(pfx) && sz == 2) {
15467 IRTemp eV = newTemp(Ity_V128);
15468 IRTemp gV = newTemp(Ity_V128);
15469 modrm = getUChar(delta);
15470 UInt rG = gregOfRexRM(pfx,modrm);
15471 if (epartIsReg(modrm)) {
15472 UInt rE = eregOfRexRM(pfx,modrm);
15473 assign( eV, getXMMReg(rE) );
15474 DIP("addsubpd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15475 delta += 1;
15476 } else {
15477 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15478 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
15479 DIP("addsubpd %s,%s\n", dis_buf, nameXMMReg(rG));
15480 delta += alen;
15483 assign( gV, getXMMReg(rG) );
15484 putXMMReg( rG, mkexpr( math_ADDSUBPD_128 ( gV, eV ) ) );
15485 goto decode_success;
15487 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
15488 if (haveF2no66noF3(pfx) && sz == 4) {
15489 IRTemp eV = newTemp(Ity_V128);
15490 IRTemp gV = newTemp(Ity_V128);
15491 modrm = getUChar(delta);
15492 UInt rG = gregOfRexRM(pfx,modrm);
15494 modrm = getUChar(delta);
15495 if (epartIsReg(modrm)) {
15496 UInt rE = eregOfRexRM(pfx,modrm);
15497 assign( eV, getXMMReg(rE) );
15498 DIP("addsubps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15499 delta += 1;
15500 } else {
15501 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15502 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
15503 DIP("addsubps %s,%s\n", dis_buf, nameXMMReg(rG));
15504 delta += alen;
15507 assign( gV, getXMMReg(rG) );
15508 putXMMReg( rG, mkexpr( math_ADDSUBPS_128 ( gV, eV ) ) );
15509 goto decode_success;
15511 break;
15513 case 0xF0:
15514 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
15515 if (haveF2no66noF3(pfx) && sz == 4) {
15516 modrm = getUChar(delta);
15517 if (epartIsReg(modrm)) {
15518 goto decode_failure;
15519 } else {
15520 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15521 putXMMReg( gregOfRexRM(pfx,modrm),
15522 loadLE(Ity_V128, mkexpr(addr)) );
15523 DIP("lddqu %s,%s\n", dis_buf,
15524 nameXMMReg(gregOfRexRM(pfx,modrm)));
15525 delta += alen;
15527 goto decode_success;
15529 break;
15531 default:
15532 goto decode_failure;
15536 decode_failure:
15537 *decode_OK = False;
15538 return deltaIN;
15540 decode_success:
15541 *decode_OK = True;
15542 return delta;
15546 /*------------------------------------------------------------*/
15547 /*--- ---*/
15548 /*--- Top-level SSSE3: dis_ESC_0F38__SupSSE3 ---*/
15549 /*--- ---*/
15550 /*------------------------------------------------------------*/
15552 static
15553 IRTemp math_PSHUFB_XMM ( IRTemp dV/*data to perm*/, IRTemp sV/*perm*/ )
15555 IRTemp halfMask = newTemp(Ity_I64);
15556 assign(halfMask, mkU64(0x8F8F8F8F8F8F8F8FULL));
15557 IRExpr* mask = binop(Iop_64HLtoV128, mkexpr(halfMask), mkexpr(halfMask));
15558 IRTemp res = newTemp(Ity_V128);
15559 assign(res,
15560 binop(Iop_PermOrZero8x16,
15561 mkexpr(dV),
15562 // Mask off bits [6:3] of each source operand lane
15563 binop(Iop_AndV128, mkexpr(sV), mask)
15565 return res;
15569 static
15570 IRTemp math_PSHUFB_YMM ( IRTemp dV/*data to perm*/, IRTemp sV/*perm*/ )
15572 IRTemp sHi, sLo, dHi, dLo;
15573 sHi = sLo = dHi = dLo = IRTemp_INVALID;
15574 breakupV256toV128s( dV, &dHi, &dLo);
15575 breakupV256toV128s( sV, &sHi, &sLo);
15576 IRTemp res = newTemp(Ity_V256);
15577 assign(res, binop(Iop_V128HLtoV256,
15578 mkexpr(math_PSHUFB_XMM(dHi, sHi)),
15579 mkexpr(math_PSHUFB_XMM(dLo, sLo))));
15580 return res;
15584 static Long dis_PHADD_128 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
15585 Bool isAvx, UChar opc )
15587 IRTemp addr = IRTemp_INVALID;
15588 Int alen = 0;
15589 HChar dis_buf[50];
15590 const HChar* str = "???";
15591 IROp opV64 = Iop_INVALID;
15592 IROp opCatO = Iop_CatOddLanes16x4;
15593 IROp opCatE = Iop_CatEvenLanes16x4;
15594 IRTemp sV = newTemp(Ity_V128);
15595 IRTemp dV = newTemp(Ity_V128);
15596 IRTemp sHi = newTemp(Ity_I64);
15597 IRTemp sLo = newTemp(Ity_I64);
15598 IRTemp dHi = newTemp(Ity_I64);
15599 IRTemp dLo = newTemp(Ity_I64);
15600 UChar modrm = getUChar(delta);
15601 UInt rG = gregOfRexRM(pfx,modrm);
15602 UInt rV = isAvx ? getVexNvvvv(pfx) : rG;
15604 switch (opc) {
15605 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
15606 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
15607 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
15608 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
15609 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
15610 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
15611 default: vassert(0);
15613 if (opc == 0x02 || opc == 0x06) {
15614 opCatO = Iop_InterleaveHI32x2;
15615 opCatE = Iop_InterleaveLO32x2;
15618 assign( dV, getXMMReg(rV) );
15620 if (epartIsReg(modrm)) {
15621 UInt rE = eregOfRexRM(pfx,modrm);
15622 assign( sV, getXMMReg(rE) );
15623 DIP("%sph%s %s,%s\n", isAvx ? "v" : "", str,
15624 nameXMMReg(rE), nameXMMReg(rG));
15625 delta += 1;
15626 } else {
15627 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15628 if (!isAvx)
15629 gen_SEGV_if_not_16_aligned( addr );
15630 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15631 DIP("%sph%s %s,%s\n", isAvx ? "v" : "", str,
15632 dis_buf, nameXMMReg(rG));
15633 delta += alen;
15636 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
15637 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
15638 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
15639 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
15641 /* This isn't a particularly efficient way to compute the
15642 result, but at least it avoids a proliferation of IROps,
15643 hence avoids complication all the backends. */
15645 (isAvx ? putYMMRegLoAndZU : putXMMReg)
15646 ( rG,
15647 binop(Iop_64HLtoV128,
15648 binop(opV64,
15649 binop(opCatE,mkexpr(sHi),mkexpr(sLo)),
15650 binop(opCatO,mkexpr(sHi),mkexpr(sLo)) ),
15651 binop(opV64,
15652 binop(opCatE,mkexpr(dHi),mkexpr(dLo)),
15653 binop(opCatO,mkexpr(dHi),mkexpr(dLo)) ) ) );
15654 return delta;
15658 static Long dis_PHADD_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
15659 UChar opc )
15661 IRTemp addr = IRTemp_INVALID;
15662 Int alen = 0;
15663 HChar dis_buf[50];
15664 const HChar* str = "???";
15665 IROp opV64 = Iop_INVALID;
15666 IROp opCatO = Iop_CatOddLanes16x4;
15667 IROp opCatE = Iop_CatEvenLanes16x4;
15668 IRTemp sV = newTemp(Ity_V256);
15669 IRTemp dV = newTemp(Ity_V256);
15670 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
15671 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
15672 UChar modrm = getUChar(delta);
15673 UInt rG = gregOfRexRM(pfx,modrm);
15674 UInt rV = getVexNvvvv(pfx);
15676 switch (opc) {
15677 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
15678 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
15679 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
15680 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
15681 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
15682 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
15683 default: vassert(0);
15685 if (opc == 0x02 || opc == 0x06) {
15686 opCatO = Iop_InterleaveHI32x2;
15687 opCatE = Iop_InterleaveLO32x2;
15690 assign( dV, getYMMReg(rV) );
15692 if (epartIsReg(modrm)) {
15693 UInt rE = eregOfRexRM(pfx,modrm);
15694 assign( sV, getYMMReg(rE) );
15695 DIP("vph%s %s,%s\n", str, nameYMMReg(rE), nameYMMReg(rG));
15696 delta += 1;
15697 } else {
15698 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15699 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
15700 DIP("vph%s %s,%s\n", str, dis_buf, nameYMMReg(rG));
15701 delta += alen;
15704 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
15705 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
15707 /* This isn't a particularly efficient way to compute the
15708 result, but at least it avoids a proliferation of IROps,
15709 hence avoids complication all the backends. */
15711 putYMMReg( rG,
15712 binop(Iop_V128HLtoV256,
15713 binop(Iop_64HLtoV128,
15714 binop(opV64,
15715 binop(opCatE,mkexpr(s3),mkexpr(s2)),
15716 binop(opCatO,mkexpr(s3),mkexpr(s2)) ),
15717 binop(opV64,
15718 binop(opCatE,mkexpr(d3),mkexpr(d2)),
15719 binop(opCatO,mkexpr(d3),mkexpr(d2)) ) ),
15720 binop(Iop_64HLtoV128,
15721 binop(opV64,
15722 binop(opCatE,mkexpr(s1),mkexpr(s0)),
15723 binop(opCatO,mkexpr(s1),mkexpr(s0)) ),
15724 binop(opV64,
15725 binop(opCatE,mkexpr(d1),mkexpr(d0)),
15726 binop(opCatO,mkexpr(d1),mkexpr(d0)) ) ) ) );
15727 return delta;
15731 static IRTemp math_PMADDUBSW_128 ( IRTemp dV, IRTemp sV )
15733 IRTemp res = newTemp(Ity_V128);
15734 assign(res, binop(Iop_PwExtUSMulQAdd8x16, mkexpr(dV), mkexpr(sV)));
15735 return res;
15739 static
15740 IRTemp math_PMADDUBSW_256 ( IRTemp dV, IRTemp sV )
15742 IRTemp sHi, sLo, dHi, dLo;
15743 sHi = sLo = dHi = dLo = IRTemp_INVALID;
15744 breakupV256toV128s( dV, &dHi, &dLo);
15745 breakupV256toV128s( sV, &sHi, &sLo);
15746 IRTemp res = newTemp(Ity_V256);
15747 assign(res, binop(Iop_V128HLtoV256,
15748 mkexpr(math_PMADDUBSW_128(dHi, sHi)),
15749 mkexpr(math_PMADDUBSW_128(dLo, sLo))));
15750 return res;
15754 __attribute__((noinline))
15755 static
15756 Long dis_ESC_0F38__SupSSE3 ( Bool* decode_OK,
15757 const VexAbiInfo* vbi,
15758 Prefix pfx, Int sz, Long deltaIN )
15760 IRTemp addr = IRTemp_INVALID;
15761 UChar modrm = 0;
15762 Int alen = 0;
15763 HChar dis_buf[50];
15765 *decode_OK = False;
15767 Long delta = deltaIN;
15768 UChar opc = getUChar(delta);
15769 delta++;
15770 switch (opc) {
15772 case 0x00:
15773 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
15774 if (have66noF2noF3(pfx)
15775 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
15776 IRTemp sV = newTemp(Ity_V128);
15777 IRTemp dV = newTemp(Ity_V128);
15779 modrm = getUChar(delta);
15780 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
15782 if (epartIsReg(modrm)) {
15783 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
15784 delta += 1;
15785 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
15786 nameXMMReg(gregOfRexRM(pfx,modrm)));
15787 } else {
15788 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15789 gen_SEGV_if_not_16_aligned( addr );
15790 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15791 delta += alen;
15792 DIP("pshufb %s,%s\n", dis_buf,
15793 nameXMMReg(gregOfRexRM(pfx,modrm)));
15796 IRTemp res = math_PSHUFB_XMM( dV, sV );
15797 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(res));
15798 goto decode_success;
15800 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
15801 if (haveNo66noF2noF3(pfx) && sz == 4) {
15802 IRTemp sV = newTemp(Ity_I64);
15803 IRTemp dV = newTemp(Ity_I64);
15805 modrm = getUChar(delta);
15806 do_MMX_preamble();
15807 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15809 if (epartIsReg(modrm)) {
15810 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15811 delta += 1;
15812 DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
15813 nameMMXReg(gregLO3ofRM(modrm)));
15814 } else {
15815 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15816 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15817 delta += alen;
15818 DIP("pshufb %s,%s\n", dis_buf,
15819 nameMMXReg(gregLO3ofRM(modrm)));
15822 putMMXReg(
15823 gregLO3ofRM(modrm),
15824 binop(
15825 Iop_PermOrZero8x8,
15826 mkexpr(dV),
15827 // Mask off bits [6:3] of each source operand lane
15828 binop(Iop_And64, mkexpr(sV), mkU64(0x8787878787878787ULL))
15831 goto decode_success;
15833 break;
15835 case 0x01:
15836 case 0x02:
15837 case 0x03:
15838 case 0x05:
15839 case 0x06:
15840 case 0x07:
15841 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
15842 G to G (xmm). */
15843 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
15844 G to G (xmm). */
15845 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
15846 xmm) and G to G (xmm). */
15847 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
15848 G to G (xmm). */
15849 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
15850 G to G (xmm). */
15851 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
15852 xmm) and G to G (xmm). */
15853 if (have66noF2noF3(pfx)
15854 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
15855 delta = dis_PHADD_128( vbi, pfx, delta, False/*isAvx*/, opc );
15856 goto decode_success;
15858 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
15859 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
15860 to G (mmx). */
15861 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
15862 to G (mmx). */
15863 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
15864 mmx) and G to G (mmx). */
15865 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
15866 to G (mmx). */
15867 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
15868 to G (mmx). */
15869 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
15870 mmx) and G to G (mmx). */
15871 if (haveNo66noF2noF3(pfx) && sz == 4) {
15872 const HChar* str = "???";
15873 IROp opV64 = Iop_INVALID;
15874 IROp opCatO = Iop_CatOddLanes16x4;
15875 IROp opCatE = Iop_CatEvenLanes16x4;
15876 IRTemp sV = newTemp(Ity_I64);
15877 IRTemp dV = newTemp(Ity_I64);
15879 modrm = getUChar(delta);
15881 switch (opc) {
15882 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
15883 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
15884 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
15885 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
15886 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
15887 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
15888 default: vassert(0);
15890 if (opc == 0x02 || opc == 0x06) {
15891 opCatO = Iop_InterleaveHI32x2;
15892 opCatE = Iop_InterleaveLO32x2;
15895 do_MMX_preamble();
15896 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15898 if (epartIsReg(modrm)) {
15899 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15900 delta += 1;
15901 DIP("ph%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
15902 nameMMXReg(gregLO3ofRM(modrm)));
15903 } else {
15904 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15905 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15906 delta += alen;
15907 DIP("ph%s %s,%s\n", str, dis_buf,
15908 nameMMXReg(gregLO3ofRM(modrm)));
15911 putMMXReg(
15912 gregLO3ofRM(modrm),
15913 binop(opV64,
15914 binop(opCatE,mkexpr(sV),mkexpr(dV)),
15915 binop(opCatO,mkexpr(sV),mkexpr(dV))
15918 goto decode_success;
15920 break;
15922 case 0x04:
15923 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
15924 Unsigned Bytes (XMM) */
15925 if (have66noF2noF3(pfx)
15926 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
15927 IRTemp sV = newTemp(Ity_V128);
15928 IRTemp dV = newTemp(Ity_V128);
15929 modrm = getUChar(delta);
15930 UInt rG = gregOfRexRM(pfx,modrm);
15932 assign( dV, getXMMReg(rG) );
15934 if (epartIsReg(modrm)) {
15935 UInt rE = eregOfRexRM(pfx,modrm);
15936 assign( sV, getXMMReg(rE) );
15937 delta += 1;
15938 DIP("pmaddubsw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15939 } else {
15940 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15941 gen_SEGV_if_not_16_aligned( addr );
15942 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15943 delta += alen;
15944 DIP("pmaddubsw %s,%s\n", dis_buf, nameXMMReg(rG));
15947 putXMMReg( rG, mkexpr( math_PMADDUBSW_128( dV, sV ) ) );
15948 goto decode_success;
15950 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
15951 Unsigned Bytes (MMX) */
15952 if (haveNo66noF2noF3(pfx) && sz == 4) {
15953 IRTemp sV = newTemp(Ity_I64);
15954 IRTemp dV = newTemp(Ity_I64);
15955 IRTemp sVoddsSX = newTemp(Ity_I64);
15956 IRTemp sVevensSX = newTemp(Ity_I64);
15957 IRTemp dVoddsZX = newTemp(Ity_I64);
15958 IRTemp dVevensZX = newTemp(Ity_I64);
15960 modrm = getUChar(delta);
15961 do_MMX_preamble();
15962 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15964 if (epartIsReg(modrm)) {
15965 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15966 delta += 1;
15967 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
15968 nameMMXReg(gregLO3ofRM(modrm)));
15969 } else {
15970 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15971 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15972 delta += alen;
15973 DIP("pmaddubsw %s,%s\n", dis_buf,
15974 nameMMXReg(gregLO3ofRM(modrm)));
15977 /* compute dV unsigned x sV signed */
15978 assign( sVoddsSX,
15979 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) );
15980 assign( sVevensSX,
15981 binop(Iop_SarN16x4,
15982 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)),
15983 mkU8(8)) );
15984 assign( dVoddsZX,
15985 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) );
15986 assign( dVevensZX,
15987 binop(Iop_ShrN16x4,
15988 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)),
15989 mkU8(8)) );
15991 putMMXReg(
15992 gregLO3ofRM(modrm),
15993 binop(Iop_QAdd16Sx4,
15994 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
15995 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX))
15998 goto decode_success;
16000 break;
16002 case 0x08:
16003 case 0x09:
16004 case 0x0A:
16005 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
16006 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
16007 /* 66 0F 38 0A = PSIGND -- Packed Sign 32x4 (XMM) */
16008 if (have66noF2noF3(pfx)
16009 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16010 IRTemp sV = newTemp(Ity_V128);
16011 IRTemp dV = newTemp(Ity_V128);
16012 IRTemp sHi = newTemp(Ity_I64);
16013 IRTemp sLo = newTemp(Ity_I64);
16014 IRTemp dHi = newTemp(Ity_I64);
16015 IRTemp dLo = newTemp(Ity_I64);
16016 const HChar* str = "???";
16017 Int laneszB = 0;
16019 switch (opc) {
16020 case 0x08: laneszB = 1; str = "b"; break;
16021 case 0x09: laneszB = 2; str = "w"; break;
16022 case 0x0A: laneszB = 4; str = "d"; break;
16023 default: vassert(0);
16026 modrm = getUChar(delta);
16027 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
16029 if (epartIsReg(modrm)) {
16030 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
16031 delta += 1;
16032 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
16033 nameXMMReg(gregOfRexRM(pfx,modrm)));
16034 } else {
16035 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16036 gen_SEGV_if_not_16_aligned( addr );
16037 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16038 delta += alen;
16039 DIP("psign%s %s,%s\n", str, dis_buf,
16040 nameXMMReg(gregOfRexRM(pfx,modrm)));
16043 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
16044 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
16045 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
16046 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
16048 putXMMReg(
16049 gregOfRexRM(pfx,modrm),
16050 binop(Iop_64HLtoV128,
16051 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
16052 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
16055 goto decode_success;
16057 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
16058 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
16059 /* 0F 38 0A = PSIGND -- Packed Sign 32x2 (MMX) */
16060 if (haveNo66noF2noF3(pfx) && sz == 4) {
16061 IRTemp sV = newTemp(Ity_I64);
16062 IRTemp dV = newTemp(Ity_I64);
16063 const HChar* str = "???";
16064 Int laneszB = 0;
16066 switch (opc) {
16067 case 0x08: laneszB = 1; str = "b"; break;
16068 case 0x09: laneszB = 2; str = "w"; break;
16069 case 0x0A: laneszB = 4; str = "d"; break;
16070 default: vassert(0);
16073 modrm = getUChar(delta);
16074 do_MMX_preamble();
16075 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
16077 if (epartIsReg(modrm)) {
16078 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16079 delta += 1;
16080 DIP("psign%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
16081 nameMMXReg(gregLO3ofRM(modrm)));
16082 } else {
16083 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16084 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16085 delta += alen;
16086 DIP("psign%s %s,%s\n", str, dis_buf,
16087 nameMMXReg(gregLO3ofRM(modrm)));
16090 putMMXReg(
16091 gregLO3ofRM(modrm),
16092 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB )
16094 goto decode_success;
16096 break;
16098 case 0x0B:
16099 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
16100 Scale (XMM) */
16101 if (have66noF2noF3(pfx)
16102 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16103 IRTemp sV = newTemp(Ity_V128);
16104 IRTemp dV = newTemp(Ity_V128);
16105 IRTemp sHi = newTemp(Ity_I64);
16106 IRTemp sLo = newTemp(Ity_I64);
16107 IRTemp dHi = newTemp(Ity_I64);
16108 IRTemp dLo = newTemp(Ity_I64);
16110 modrm = getUChar(delta);
16111 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
16113 if (epartIsReg(modrm)) {
16114 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
16115 delta += 1;
16116 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
16117 nameXMMReg(gregOfRexRM(pfx,modrm)));
16118 } else {
16119 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16120 gen_SEGV_if_not_16_aligned( addr );
16121 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16122 delta += alen;
16123 DIP("pmulhrsw %s,%s\n", dis_buf,
16124 nameXMMReg(gregOfRexRM(pfx,modrm)));
16127 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
16128 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
16129 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
16130 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
16132 putXMMReg(
16133 gregOfRexRM(pfx,modrm),
16134 binop(Iop_64HLtoV128,
16135 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
16136 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
16139 goto decode_success;
16141 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
16142 (MMX) */
16143 if (haveNo66noF2noF3(pfx) && sz == 4) {
16144 IRTemp sV = newTemp(Ity_I64);
16145 IRTemp dV = newTemp(Ity_I64);
16147 modrm = getUChar(delta);
16148 do_MMX_preamble();
16149 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
16151 if (epartIsReg(modrm)) {
16152 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16153 delta += 1;
16154 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
16155 nameMMXReg(gregLO3ofRM(modrm)));
16156 } else {
16157 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16158 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16159 delta += alen;
16160 DIP("pmulhrsw %s,%s\n", dis_buf,
16161 nameMMXReg(gregLO3ofRM(modrm)));
16164 putMMXReg(
16165 gregLO3ofRM(modrm),
16166 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) )
16168 goto decode_success;
16170 break;
16172 case 0x1C:
16173 case 0x1D:
16174 case 0x1E:
16175 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
16176 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
16177 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
16178 if (have66noF2noF3(pfx)
16179 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16180 IRTemp sV = newTemp(Ity_V128);
16181 const HChar* str = "???";
16182 Int laneszB = 0;
16184 switch (opc) {
16185 case 0x1C: laneszB = 1; str = "b"; break;
16186 case 0x1D: laneszB = 2; str = "w"; break;
16187 case 0x1E: laneszB = 4; str = "d"; break;
16188 default: vassert(0);
16191 modrm = getUChar(delta);
16192 if (epartIsReg(modrm)) {
16193 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
16194 delta += 1;
16195 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
16196 nameXMMReg(gregOfRexRM(pfx,modrm)));
16197 } else {
16198 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16199 gen_SEGV_if_not_16_aligned( addr );
16200 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16201 delta += alen;
16202 DIP("pabs%s %s,%s\n", str, dis_buf,
16203 nameXMMReg(gregOfRexRM(pfx,modrm)));
16206 putXMMReg( gregOfRexRM(pfx,modrm),
16207 mkexpr(math_PABS_XMM(sV, laneszB)) );
16208 goto decode_success;
16210 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
16211 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
16212 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
16213 if (haveNo66noF2noF3(pfx) && sz == 4) {
16214 IRTemp sV = newTemp(Ity_I64);
16215 const HChar* str = "???";
16216 Int laneszB = 0;
16218 switch (opc) {
16219 case 0x1C: laneszB = 1; str = "b"; break;
16220 case 0x1D: laneszB = 2; str = "w"; break;
16221 case 0x1E: laneszB = 4; str = "d"; break;
16222 default: vassert(0);
16225 modrm = getUChar(delta);
16226 do_MMX_preamble();
16228 if (epartIsReg(modrm)) {
16229 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16230 delta += 1;
16231 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
16232 nameMMXReg(gregLO3ofRM(modrm)));
16233 } else {
16234 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16235 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16236 delta += alen;
16237 DIP("pabs%s %s,%s\n", str, dis_buf,
16238 nameMMXReg(gregLO3ofRM(modrm)));
16241 putMMXReg( gregLO3ofRM(modrm),
16242 mkexpr(math_PABS_MMX( sV, laneszB )) );
16243 goto decode_success;
16245 break;
16247 default:
16248 break;
16252 //decode_failure:
16253 *decode_OK = False;
16254 return deltaIN;
16256 decode_success:
16257 *decode_OK = True;
16258 return delta;
16262 /*------------------------------------------------------------*/
16263 /*--- ---*/
16264 /*--- Top-level SSSE3: dis_ESC_0F3A__SupSSE3 ---*/
16265 /*--- ---*/
16266 /*------------------------------------------------------------*/
16268 __attribute__((noinline))
16269 static
16270 Long dis_ESC_0F3A__SupSSE3 ( Bool* decode_OK,
16271 const VexAbiInfo* vbi,
16272 Prefix pfx, Int sz, Long deltaIN )
16274 Long d64 = 0;
16275 IRTemp addr = IRTemp_INVALID;
16276 UChar modrm = 0;
16277 Int alen = 0;
16278 HChar dis_buf[50];
16280 *decode_OK = False;
16282 Long delta = deltaIN;
16283 UChar opc = getUChar(delta);
16284 delta++;
16285 switch (opc) {
16287 case 0x0F:
16288 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
16289 if (have66noF2noF3(pfx)
16290 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16291 IRTemp sV = newTemp(Ity_V128);
16292 IRTemp dV = newTemp(Ity_V128);
16294 modrm = getUChar(delta);
16295 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
16297 if (epartIsReg(modrm)) {
16298 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
16299 d64 = (Long)getUChar(delta+1);
16300 delta += 1+1;
16301 DIP("palignr $%lld,%s,%s\n", d64,
16302 nameXMMReg(eregOfRexRM(pfx,modrm)),
16303 nameXMMReg(gregOfRexRM(pfx,modrm)));
16304 } else {
16305 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
16306 gen_SEGV_if_not_16_aligned( addr );
16307 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16308 d64 = (Long)getUChar(delta+alen);
16309 delta += alen+1;
16310 DIP("palignr $%lld,%s,%s\n", d64,
16311 dis_buf,
16312 nameXMMReg(gregOfRexRM(pfx,modrm)));
16315 IRTemp res = math_PALIGNR_XMM( sV, dV, d64 );
16316 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
16317 goto decode_success;
16319 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
16320 if (haveNo66noF2noF3(pfx) && sz == 4) {
16321 IRTemp sV = newTemp(Ity_I64);
16322 IRTemp dV = newTemp(Ity_I64);
16323 IRTemp res = newTemp(Ity_I64);
16325 modrm = getUChar(delta);
16326 do_MMX_preamble();
16327 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
16329 if (epartIsReg(modrm)) {
16330 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16331 d64 = (Long)getUChar(delta+1);
16332 delta += 1+1;
16333 DIP("palignr $%lld,%s,%s\n", d64,
16334 nameMMXReg(eregLO3ofRM(modrm)),
16335 nameMMXReg(gregLO3ofRM(modrm)));
16336 } else {
16337 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
16338 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16339 d64 = (Long)getUChar(delta+alen);
16340 delta += alen+1;
16341 DIP("palignr $%lld%s,%s\n", d64,
16342 dis_buf,
16343 nameMMXReg(gregLO3ofRM(modrm)));
16346 if (d64 == 0) {
16347 assign( res, mkexpr(sV) );
16349 else if (d64 >= 1 && d64 <= 7) {
16350 assign(res,
16351 binop(Iop_Or64,
16352 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d64)),
16353 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d64))
16354 )));
16356 else if (d64 == 8) {
16357 assign( res, mkexpr(dV) );
16359 else if (d64 >= 9 && d64 <= 15) {
16360 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d64-8))) );
16362 else if (d64 >= 16 && d64 <= 255) {
16363 assign( res, mkU64(0) );
16365 else
16366 vassert(0);
16368 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
16369 goto decode_success;
16371 break;
16373 default:
16374 break;
16378 //decode_failure:
16379 *decode_OK = False;
16380 return deltaIN;
16382 decode_success:
16383 *decode_OK = True;
16384 return delta;
16388 /*------------------------------------------------------------*/
16389 /*--- ---*/
16390 /*--- Top-level SSE4: dis_ESC_0F__SSE4 ---*/
16391 /*--- ---*/
16392 /*------------------------------------------------------------*/
16394 __attribute__((noinline))
16395 static
16396 Long dis_ESC_0F__SSE4 ( Bool* decode_OK,
16397 const VexArchInfo* archinfo,
16398 const VexAbiInfo* vbi,
16399 Prefix pfx, Int sz, Long deltaIN )
16401 IRTemp addr = IRTemp_INVALID;
16402 IRType ty = Ity_INVALID;
16403 UChar modrm = 0;
16404 Int alen = 0;
16405 HChar dis_buf[50];
16407 *decode_OK = False;
16409 Long delta = deltaIN;
16410 UChar opc = getUChar(delta);
16411 delta++;
16412 switch (opc) {
16414 case 0xB8:
16415 /* F3 0F B8 = POPCNT{W,L,Q}
16416 Count the number of 1 bits in a register
16418 if (haveF3noF2(pfx) /* so both 66 and REX.W are possibilities */
16419 && (sz == 2 || sz == 4 || sz == 8)) {
16420 /*IRType*/ ty = szToITy(sz);
16421 IRTemp src = newTemp(ty);
16422 modrm = getUChar(delta);
16423 if (epartIsReg(modrm)) {
16424 assign(src, getIRegE(sz, pfx, modrm));
16425 delta += 1;
16426 DIP("popcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
16427 nameIRegG(sz, pfx, modrm));
16428 } else {
16429 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
16430 assign(src, loadLE(ty, mkexpr(addr)));
16431 delta += alen;
16432 DIP("popcnt%c %s, %s\n", nameISize(sz), dis_buf,
16433 nameIRegG(sz, pfx, modrm));
16436 IRTemp result = gen_POPCOUNT(ty, src);
16437 putIRegG(sz, pfx, modrm, mkexpr(result));
16439 // Update flags. This is pretty lame .. perhaps can do better
16440 // if this turns out to be performance critical.
16441 // O S A C P are cleared. Z is set if SRC == 0.
16442 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16443 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16444 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16445 stmt( IRStmt_Put( OFFB_CC_DEP1,
16446 binop(Iop_Shl64,
16447 unop(Iop_1Uto64,
16448 binop(Iop_CmpEQ64,
16449 widenUto64(mkexpr(src)),
16450 mkU64(0))),
16451 mkU8(AMD64G_CC_SHIFT_Z))));
16453 goto decode_success;
16455 break;
16457 case 0xBC:
16458 /* F3 0F BC -- TZCNT (count trailing zeroes. A BMI extension,
16459 which we can only decode if we're sure this is a BMI1 capable cpu
16460 that supports TZCNT, since otherwise it's BSF, which behaves
16461 differently on zero source. */
16462 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */
16463 && (sz == 2 || sz == 4 || sz == 8)
16464 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_BMI)) {
16465 /*IRType*/ ty = szToITy(sz);
16466 IRTemp src = newTemp(ty);
16467 modrm = getUChar(delta);
16468 if (epartIsReg(modrm)) {
16469 assign(src, getIRegE(sz, pfx, modrm));
16470 delta += 1;
16471 DIP("tzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
16472 nameIRegG(sz, pfx, modrm));
16473 } else {
16474 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
16475 assign(src, loadLE(ty, mkexpr(addr)));
16476 delta += alen;
16477 DIP("tzcnt%c %s, %s\n", nameISize(sz), dis_buf,
16478 nameIRegG(sz, pfx, modrm));
16481 IRTemp res = gen_TZCNT(ty, src);
16482 putIRegG(sz, pfx, modrm, mkexpr(res));
16484 // Update flags. This is pretty lame .. perhaps can do better
16485 // if this turns out to be performance critical.
16486 // O S A P are cleared. Z is set if RESULT == 0.
16487 // C is set if SRC is zero.
16488 IRTemp src64 = newTemp(Ity_I64);
16489 IRTemp res64 = newTemp(Ity_I64);
16490 assign(src64, widenUto64(mkexpr(src)));
16491 assign(res64, widenUto64(mkexpr(res)));
16493 IRTemp oszacp = newTemp(Ity_I64);
16494 assign(
16495 oszacp,
16496 binop(Iop_Or64,
16497 binop(Iop_Shl64,
16498 unop(Iop_1Uto64,
16499 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))),
16500 mkU8(AMD64G_CC_SHIFT_Z)),
16501 binop(Iop_Shl64,
16502 unop(Iop_1Uto64,
16503 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))),
16504 mkU8(AMD64G_CC_SHIFT_C))
16508 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16509 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16510 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16511 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
16513 goto decode_success;
16515 break;
16517 case 0xBD:
16518 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
16519 which we can only decode if we're sure this is an AMD cpu
16520 that supports LZCNT, since otherwise it's BSR, which behaves
16521 differently. Bizarrely, my Sandy Bridge also accepts these
16522 instructions but produces different results. */
16523 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */
16524 && (sz == 2 || sz == 4 || sz == 8)
16525 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT)) {
16526 /*IRType*/ ty = szToITy(sz);
16527 IRTemp src = newTemp(ty);
16528 modrm = getUChar(delta);
16529 if (epartIsReg(modrm)) {
16530 assign(src, getIRegE(sz, pfx, modrm));
16531 delta += 1;
16532 DIP("lzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
16533 nameIRegG(sz, pfx, modrm));
16534 } else {
16535 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
16536 assign(src, loadLE(ty, mkexpr(addr)));
16537 delta += alen;
16538 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf,
16539 nameIRegG(sz, pfx, modrm));
16542 IRTemp res = gen_LZCNT(ty, src);
16543 putIRegG(sz, pfx, modrm, mkexpr(res));
16545 // Update flags. This is pretty lame .. perhaps can do better
16546 // if this turns out to be performance critical.
16547 // O S A P are cleared. Z is set if RESULT == 0.
16548 // C is set if SRC is zero.
16549 IRTemp src64 = newTemp(Ity_I64);
16550 IRTemp res64 = newTemp(Ity_I64);
16551 assign(src64, widenUto64(mkexpr(src)));
16552 assign(res64, widenUto64(mkexpr(res)));
16554 IRTemp oszacp = newTemp(Ity_I64);
16555 assign(
16556 oszacp,
16557 binop(Iop_Or64,
16558 binop(Iop_Shl64,
16559 unop(Iop_1Uto64,
16560 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))),
16561 mkU8(AMD64G_CC_SHIFT_Z)),
16562 binop(Iop_Shl64,
16563 unop(Iop_1Uto64,
16564 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))),
16565 mkU8(AMD64G_CC_SHIFT_C))
16569 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16570 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16571 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16572 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
16574 goto decode_success;
16576 break;
16578 default:
16579 break;
16583 //decode_failure:
16584 *decode_OK = False;
16585 return deltaIN;
16587 decode_success:
16588 *decode_OK = True;
16589 return delta;
16593 /*------------------------------------------------------------*/
16594 /*--- ---*/
16595 /*--- Top-level SSE4: dis_ESC_0F38__SSE4 ---*/
16596 /*--- ---*/
16597 /*------------------------------------------------------------*/
16599 static IRTemp math_PBLENDVB_128 ( IRTemp vecE, IRTemp vecG,
16600 IRTemp vec0/*controlling mask*/,
16601 UInt gran, IROp opSAR )
16603 /* The tricky bit is to convert vec0 into a suitable mask, by
16604 copying the most significant bit of each lane into all positions
16605 in the lane. */
16606 IRTemp sh = newTemp(Ity_I8);
16607 assign(sh, mkU8(8 * gran - 1));
16609 IRTemp mask = newTemp(Ity_V128);
16610 assign(mask, binop(opSAR, mkexpr(vec0), mkexpr(sh)));
16612 IRTemp notmask = newTemp(Ity_V128);
16613 assign(notmask, unop(Iop_NotV128, mkexpr(mask)));
16615 IRTemp res = newTemp(Ity_V128);
16616 assign(res, binop(Iop_OrV128,
16617 binop(Iop_AndV128, mkexpr(vecE), mkexpr(mask)),
16618 binop(Iop_AndV128, mkexpr(vecG), mkexpr(notmask))));
16619 return res;
16622 static IRTemp math_PBLENDVB_256 ( IRTemp vecE, IRTemp vecG,
16623 IRTemp vec0/*controlling mask*/,
16624 UInt gran, IROp opSAR128 )
16626 /* The tricky bit is to convert vec0 into a suitable mask, by
16627 copying the most significant bit of each lane into all positions
16628 in the lane. */
16629 IRTemp sh = newTemp(Ity_I8);
16630 assign(sh, mkU8(8 * gran - 1));
16632 IRTemp vec0Hi = IRTemp_INVALID;
16633 IRTemp vec0Lo = IRTemp_INVALID;
16634 breakupV256toV128s( vec0, &vec0Hi, &vec0Lo );
16636 IRTemp mask = newTemp(Ity_V256);
16637 assign(mask, binop(Iop_V128HLtoV256,
16638 binop(opSAR128, mkexpr(vec0Hi), mkexpr(sh)),
16639 binop(opSAR128, mkexpr(vec0Lo), mkexpr(sh))));
16641 IRTemp notmask = newTemp(Ity_V256);
16642 assign(notmask, unop(Iop_NotV256, mkexpr(mask)));
16644 IRTemp res = newTemp(Ity_V256);
16645 assign(res, binop(Iop_OrV256,
16646 binop(Iop_AndV256, mkexpr(vecE), mkexpr(mask)),
16647 binop(Iop_AndV256, mkexpr(vecG), mkexpr(notmask))));
16648 return res;
16651 static Long dis_VBLENDV_128 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
16652 const HChar *name, UInt gran, IROp opSAR )
16654 IRTemp addr = IRTemp_INVALID;
16655 Int alen = 0;
16656 HChar dis_buf[50];
16657 UChar modrm = getUChar(delta);
16658 UInt rG = gregOfRexRM(pfx, modrm);
16659 UInt rV = getVexNvvvv(pfx);
16660 UInt rIS4 = 0xFF; /* invalid */
16661 IRTemp vecE = newTemp(Ity_V128);
16662 IRTemp vecV = newTemp(Ity_V128);
16663 IRTemp vecIS4 = newTemp(Ity_V128);
16664 if (epartIsReg(modrm)) {
16665 delta++;
16666 UInt rE = eregOfRexRM(pfx, modrm);
16667 assign(vecE, getXMMReg(rE));
16668 UChar ib = getUChar(delta);
16669 rIS4 = (ib >> 4) & 0xF;
16670 DIP("%s %s,%s,%s,%s\n",
16671 name, nameXMMReg(rIS4), nameXMMReg(rE),
16672 nameXMMReg(rV), nameXMMReg(rG));
16673 } else {
16674 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
16675 delta += alen;
16676 assign(vecE, loadLE(Ity_V128, mkexpr(addr)));
16677 UChar ib = getUChar(delta);
16678 rIS4 = (ib >> 4) & 0xF;
16679 DIP("%s %s,%s,%s,%s\n",
16680 name, nameXMMReg(rIS4), dis_buf, nameXMMReg(rV), nameXMMReg(rG));
16682 delta++;
16683 assign(vecV, getXMMReg(rV));
16684 assign(vecIS4, getXMMReg(rIS4));
16685 IRTemp res = math_PBLENDVB_128( vecE, vecV, vecIS4, gran, opSAR );
16686 putYMMRegLoAndZU( rG, mkexpr(res) );
16687 return delta;
16690 static Long dis_VBLENDV_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
16691 const HChar *name, UInt gran, IROp opSAR128 )
16693 IRTemp addr = IRTemp_INVALID;
16694 Int alen = 0;
16695 HChar dis_buf[50];
16696 UChar modrm = getUChar(delta);
16697 UInt rG = gregOfRexRM(pfx, modrm);
16698 UInt rV = getVexNvvvv(pfx);
16699 UInt rIS4 = 0xFF; /* invalid */
16700 IRTemp vecE = newTemp(Ity_V256);
16701 IRTemp vecV = newTemp(Ity_V256);
16702 IRTemp vecIS4 = newTemp(Ity_V256);
16703 if (epartIsReg(modrm)) {
16704 delta++;
16705 UInt rE = eregOfRexRM(pfx, modrm);
16706 assign(vecE, getYMMReg(rE));
16707 UChar ib = getUChar(delta);
16708 rIS4 = (ib >> 4) & 0xF;
16709 DIP("%s %s,%s,%s,%s\n",
16710 name, nameYMMReg(rIS4), nameYMMReg(rE),
16711 nameYMMReg(rV), nameYMMReg(rG));
16712 } else {
16713 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
16714 delta += alen;
16715 assign(vecE, loadLE(Ity_V256, mkexpr(addr)));
16716 UChar ib = getUChar(delta);
16717 rIS4 = (ib >> 4) & 0xF;
16718 DIP("%s %s,%s,%s,%s\n",
16719 name, nameYMMReg(rIS4), dis_buf, nameYMMReg(rV), nameYMMReg(rG));
16721 delta++;
16722 assign(vecV, getYMMReg(rV));
16723 assign(vecIS4, getYMMReg(rIS4));
16724 IRTemp res = math_PBLENDVB_256( vecE, vecV, vecIS4, gran, opSAR128 );
16725 putYMMReg( rG, mkexpr(res) );
16726 return delta;
16729 static void finish_xTESTy ( IRTemp andV, IRTemp andnV, Int sign )
16731 /* Set Z=1 iff (vecE & vecG) == 0
16732 Set C=1 iff (vecE & not vecG) == 0
16735 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16737 /* andV resp. andnV, reduced to 64-bit values, by or-ing the top
16738 and bottom 64-bits together. It relies on this trick:
16740 InterleaveLO64x2([a,b],[c,d]) == [b,d] hence
16742 InterleaveLO64x2([a,b],[a,b]) == [b,b] and similarly
16743 InterleaveHI64x2([a,b],[a,b]) == [a,a]
16745 and so the OR of the above 2 exprs produces
16746 [a OR b, a OR b], from which we simply take the lower half.
16748 IRTemp and64 = newTemp(Ity_I64);
16749 IRTemp andn64 = newTemp(Ity_I64);
16751 assign(and64,
16752 unop(Iop_V128to64,
16753 binop(Iop_OrV128,
16754 binop(Iop_InterleaveLO64x2,
16755 mkexpr(andV), mkexpr(andV)),
16756 binop(Iop_InterleaveHI64x2,
16757 mkexpr(andV), mkexpr(andV)))));
16759 assign(andn64,
16760 unop(Iop_V128to64,
16761 binop(Iop_OrV128,
16762 binop(Iop_InterleaveLO64x2,
16763 mkexpr(andnV), mkexpr(andnV)),
16764 binop(Iop_InterleaveHI64x2,
16765 mkexpr(andnV), mkexpr(andnV)))));
16767 IRTemp z64 = newTemp(Ity_I64);
16768 IRTemp c64 = newTemp(Ity_I64);
16769 if (sign == 64) {
16770 /* When only interested in the most significant bit, just shift
16771 arithmetically right and negate. */
16772 assign(z64,
16773 unop(Iop_Not64,
16774 binop(Iop_Sar64, mkexpr(and64), mkU8(63))));
16776 assign(c64,
16777 unop(Iop_Not64,
16778 binop(Iop_Sar64, mkexpr(andn64), mkU8(63))));
16779 } else {
16780 if (sign == 32) {
16781 /* When interested in bit 31 and bit 63, mask those bits and
16782 fallthrough into the PTEST handling. */
16783 IRTemp t0 = newTemp(Ity_I64);
16784 IRTemp t1 = newTemp(Ity_I64);
16785 IRTemp t2 = newTemp(Ity_I64);
16786 assign(t0, mkU64(0x8000000080000000ULL));
16787 assign(t1, binop(Iop_And64, mkexpr(and64), mkexpr(t0)));
16788 assign(t2, binop(Iop_And64, mkexpr(andn64), mkexpr(t0)));
16789 and64 = t1;
16790 andn64 = t2;
16792 /* Now convert and64, andn64 to all-zeroes or all-1s, so we can
16793 slice out the Z and C bits conveniently. We use the standard
16794 trick all-zeroes -> all-zeroes, anything-else -> all-ones
16795 done by "(x | -x) >>s (word-size - 1)".
16797 assign(z64,
16798 unop(Iop_Not64,
16799 binop(Iop_Sar64,
16800 binop(Iop_Or64,
16801 binop(Iop_Sub64, mkU64(0), mkexpr(and64)),
16802 mkexpr(and64)), mkU8(63))));
16804 assign(c64,
16805 unop(Iop_Not64,
16806 binop(Iop_Sar64,
16807 binop(Iop_Or64,
16808 binop(Iop_Sub64, mkU64(0), mkexpr(andn64)),
16809 mkexpr(andn64)), mkU8(63))));
16812 /* And finally, slice out the Z and C flags and set the flags
16813 thunk to COPY for them. OSAP are set to zero. */
16814 IRTemp newOSZACP = newTemp(Ity_I64);
16815 assign(newOSZACP,
16816 binop(Iop_Or64,
16817 binop(Iop_And64, mkexpr(z64), mkU64(AMD64G_CC_MASK_Z)),
16818 binop(Iop_And64, mkexpr(c64), mkU64(AMD64G_CC_MASK_C))));
16820 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(newOSZACP)));
16821 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16822 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16823 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16827 /* Handles 128 bit versions of PTEST, VTESTPS or VTESTPD.
16828 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
16829 static Long dis_xTESTy_128 ( const VexAbiInfo* vbi, Prefix pfx,
16830 Long delta, Bool isAvx, Int sign )
16832 IRTemp addr = IRTemp_INVALID;
16833 Int alen = 0;
16834 HChar dis_buf[50];
16835 UChar modrm = getUChar(delta);
16836 UInt rG = gregOfRexRM(pfx, modrm);
16837 IRTemp vecE = newTemp(Ity_V128);
16838 IRTemp vecG = newTemp(Ity_V128);
16840 if ( epartIsReg(modrm) ) {
16841 UInt rE = eregOfRexRM(pfx, modrm);
16842 assign(vecE, getXMMReg(rE));
16843 delta += 1;
16844 DIP( "%s%stest%s %s,%s\n",
16845 isAvx ? "v" : "", sign == 0 ? "p" : "",
16846 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16847 nameXMMReg(rE), nameXMMReg(rG) );
16848 } else {
16849 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16850 if (!isAvx)
16851 gen_SEGV_if_not_16_aligned( addr );
16852 assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
16853 delta += alen;
16854 DIP( "%s%stest%s %s,%s\n",
16855 isAvx ? "v" : "", sign == 0 ? "p" : "",
16856 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16857 dis_buf, nameXMMReg(rG) );
16860 assign(vecG, getXMMReg(rG));
16862 /* Set Z=1 iff (vecE & vecG) == 0
16863 Set C=1 iff (vecE & not vecG) == 0
16866 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16867 IRTemp andV = newTemp(Ity_V128);
16868 IRTemp andnV = newTemp(Ity_V128);
16869 assign(andV, binop(Iop_AndV128, mkexpr(vecE), mkexpr(vecG)));
16870 assign(andnV, binop(Iop_AndV128,
16871 mkexpr(vecE),
16872 binop(Iop_XorV128, mkexpr(vecG),
16873 mkV128(0xFFFF))));
16875 finish_xTESTy ( andV, andnV, sign );
16876 return delta;
16880 /* Handles 256 bit versions of PTEST, VTESTPS or VTESTPD.
16881 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
16882 static Long dis_xTESTy_256 ( const VexAbiInfo* vbi, Prefix pfx,
16883 Long delta, Int sign )
16885 IRTemp addr = IRTemp_INVALID;
16886 Int alen = 0;
16887 HChar dis_buf[50];
16888 UChar modrm = getUChar(delta);
16889 UInt rG = gregOfRexRM(pfx, modrm);
16890 IRTemp vecE = newTemp(Ity_V256);
16891 IRTemp vecG = newTemp(Ity_V256);
16893 if ( epartIsReg(modrm) ) {
16894 UInt rE = eregOfRexRM(pfx, modrm);
16895 assign(vecE, getYMMReg(rE));
16896 delta += 1;
16897 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "",
16898 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16899 nameYMMReg(rE), nameYMMReg(rG) );
16900 } else {
16901 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16902 assign(vecE, loadLE( Ity_V256, mkexpr(addr) ));
16903 delta += alen;
16904 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "",
16905 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16906 dis_buf, nameYMMReg(rG) );
16909 assign(vecG, getYMMReg(rG));
16911 /* Set Z=1 iff (vecE & vecG) == 0
16912 Set C=1 iff (vecE & not vecG) == 0
16915 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16916 IRTemp andV = newTemp(Ity_V256);
16917 IRTemp andnV = newTemp(Ity_V256);
16918 assign(andV, binop(Iop_AndV256, mkexpr(vecE), mkexpr(vecG)));
16919 assign(andnV, binop(Iop_AndV256,
16920 mkexpr(vecE), unop(Iop_NotV256, mkexpr(vecG))));
16922 IRTemp andVhi = IRTemp_INVALID;
16923 IRTemp andVlo = IRTemp_INVALID;
16924 IRTemp andnVhi = IRTemp_INVALID;
16925 IRTemp andnVlo = IRTemp_INVALID;
16926 breakupV256toV128s( andV, &andVhi, &andVlo );
16927 breakupV256toV128s( andnV, &andnVhi, &andnVlo );
16929 IRTemp andV128 = newTemp(Ity_V128);
16930 IRTemp andnV128 = newTemp(Ity_V128);
16931 assign( andV128, binop( Iop_OrV128, mkexpr(andVhi), mkexpr(andVlo) ) );
16932 assign( andnV128, binop( Iop_OrV128, mkexpr(andnVhi), mkexpr(andnVlo) ) );
16934 finish_xTESTy ( andV128, andnV128, sign );
16935 return delta;
16939 /* Handles 128 and 256 bit versions of VCVTPH2PS. */
16940 static Long dis_VCVTPH2PS ( const VexAbiInfo* vbi, Prefix pfx,
16941 Long delta, Bool is256bit )
16943 /* This is a width-doubling load or reg-reg move, that does conversion on the
16944 transferred data. */
16945 UChar modrm = getUChar(delta);
16946 UInt rG = gregOfRexRM(pfx, modrm);
16947 IRTemp srcE = newTemp(is256bit ? Ity_V128 : Ity_I64);
16949 if (epartIsReg(modrm)) {
16950 UInt rE = eregOfRexRM(pfx, modrm);
16951 assign(srcE, is256bit ? unop(Iop_V256toV128_0, getYMMReg(rE))
16952 : unop(Iop_V128to64, getXMMReg(rE)));
16953 delta += 1;
16954 DIP("vcvtph2ps %s,%s\n", nameXMMReg(rE),
16955 (is256bit ? nameYMMReg: nameXMMReg)(rG));
16956 } else {
16957 Int alen = 0;
16958 HChar dis_buf[50];
16959 IRTemp addr = disAMode(&alen, vbi, pfx, delta, dis_buf, 0);
16960 // I don't think we need an alignment check here (not 100% sure tho.)
16961 assign(srcE, loadLE(is256bit ? Ity_V128 : Ity_I64, mkexpr(addr)));
16962 delta += alen;
16963 DIP( "vcvtph2ps %s,%s\n", dis_buf,
16964 (is256bit ? nameYMMReg: nameXMMReg)(rG));
16967 IRExpr* res = unop(is256bit ? Iop_F16toF32x8 : Iop_F16toF32x4, mkexpr(srcE));
16968 (is256bit ? putYMMReg : putYMMRegLoAndZU)(rG, res);
16970 return delta;
16974 /* Handles 128 bit versions of PMOVZXBW and PMOVSXBW. */
16975 static Long dis_PMOVxXBW_128 ( const VexAbiInfo* vbi, Prefix pfx,
16976 Long delta, Bool isAvx, Bool xIsZ )
16978 IRTemp addr = IRTemp_INVALID;
16979 Int alen = 0;
16980 HChar dis_buf[50];
16981 IRTemp srcVec = newTemp(Ity_V128);
16982 UChar modrm = getUChar(delta);
16983 const HChar* mbV = isAvx ? "v" : "";
16984 const HChar how = xIsZ ? 'z' : 's';
16985 UInt rG = gregOfRexRM(pfx, modrm);
16986 if ( epartIsReg(modrm) ) {
16987 UInt rE = eregOfRexRM(pfx, modrm);
16988 assign( srcVec, getXMMReg(rE) );
16989 delta += 1;
16990 DIP( "%spmov%cxbw %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
16991 } else {
16992 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16993 assign( srcVec,
16994 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
16995 delta += alen;
16996 DIP( "%spmov%cxbw %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
16999 IRExpr* res
17000 = xIsZ /* do math for either zero or sign extend */
17001 ? binop( Iop_InterleaveLO8x16,
17002 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) )
17003 : binop( Iop_SarN16x8,
17004 binop( Iop_ShlN16x8,
17005 binop( Iop_InterleaveLO8x16,
17006 IRExpr_Const( IRConst_V128(0) ),
17007 mkexpr(srcVec) ),
17008 mkU8(8) ),
17009 mkU8(8) );
17011 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
17013 return delta;
17017 /* Handles 256 bit versions of PMOVZXBW and PMOVSXBW. */
17018 static Long dis_PMOVxXBW_256 ( const VexAbiInfo* vbi, Prefix pfx,
17019 Long delta, Bool xIsZ )
17021 IRTemp addr = IRTemp_INVALID;
17022 Int alen = 0;
17023 HChar dis_buf[50];
17024 IRTemp srcVec = newTemp(Ity_V128);
17025 UChar modrm = getUChar(delta);
17026 UChar how = xIsZ ? 'z' : 's';
17027 UInt rG = gregOfRexRM(pfx, modrm);
17028 if ( epartIsReg(modrm) ) {
17029 UInt rE = eregOfRexRM(pfx, modrm);
17030 assign( srcVec, getXMMReg(rE) );
17031 delta += 1;
17032 DIP( "vpmov%cxbw %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
17033 } else {
17034 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17035 assign( srcVec, loadLE( Ity_V128, mkexpr(addr) ) );
17036 delta += alen;
17037 DIP( "vpmov%cxbw %s,%s\n", how, dis_buf, nameYMMReg(rG) );
17040 /* First do zero extend. */
17041 IRExpr* res
17042 = binop( Iop_V128HLtoV256,
17043 binop( Iop_InterleaveHI8x16,
17044 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ),
17045 binop( Iop_InterleaveLO8x16,
17046 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
17047 /* And if needed sign extension as well. */
17048 if (!xIsZ)
17049 res = binop( Iop_SarN16x16,
17050 binop( Iop_ShlN16x16, res, mkU8(8) ), mkU8(8) );
17052 putYMMReg ( rG, res );
17054 return delta;
17058 static Long dis_PMOVxXWD_128 ( const VexAbiInfo* vbi, Prefix pfx,
17059 Long delta, Bool isAvx, Bool xIsZ )
17061 IRTemp addr = IRTemp_INVALID;
17062 Int alen = 0;
17063 HChar dis_buf[50];
17064 IRTemp srcVec = newTemp(Ity_V128);
17065 UChar modrm = getUChar(delta);
17066 const HChar* mbV = isAvx ? "v" : "";
17067 const HChar how = xIsZ ? 'z' : 's';
17068 UInt rG = gregOfRexRM(pfx, modrm);
17070 if ( epartIsReg(modrm) ) {
17071 UInt rE = eregOfRexRM(pfx, modrm);
17072 assign( srcVec, getXMMReg(rE) );
17073 delta += 1;
17074 DIP( "%spmov%cxwd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
17075 } else {
17076 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17077 assign( srcVec,
17078 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
17079 delta += alen;
17080 DIP( "%spmov%cxwd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
17083 IRExpr* res
17084 = binop( Iop_InterleaveLO16x8,
17085 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) );
17086 if (!xIsZ)
17087 res = binop(Iop_SarN32x4,
17088 binop(Iop_ShlN32x4, res, mkU8(16)), mkU8(16));
17090 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17091 ( gregOfRexRM(pfx, modrm), res );
17093 return delta;
17097 static Long dis_PMOVxXWD_256 ( const VexAbiInfo* vbi, Prefix pfx,
17098 Long delta, Bool xIsZ )
17100 IRTemp addr = IRTemp_INVALID;
17101 Int alen = 0;
17102 HChar dis_buf[50];
17103 IRTemp srcVec = newTemp(Ity_V128);
17104 UChar modrm = getUChar(delta);
17105 UChar how = xIsZ ? 'z' : 's';
17106 UInt rG = gregOfRexRM(pfx, modrm);
17108 if ( epartIsReg(modrm) ) {
17109 UInt rE = eregOfRexRM(pfx, modrm);
17110 assign( srcVec, getXMMReg(rE) );
17111 delta += 1;
17112 DIP( "vpmov%cxwd %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
17113 } else {
17114 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17115 assign( srcVec, loadLE( Ity_V128, mkexpr(addr) ) );
17116 delta += alen;
17117 DIP( "vpmov%cxwd %s,%s\n", how, dis_buf, nameYMMReg(rG) );
17120 IRExpr* res
17121 = binop( Iop_V128HLtoV256,
17122 binop( Iop_InterleaveHI16x8,
17123 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ),
17124 binop( Iop_InterleaveLO16x8,
17125 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
17126 if (!xIsZ)
17127 res = binop(Iop_SarN32x8,
17128 binop(Iop_ShlN32x8, res, mkU8(16)), mkU8(16));
17130 putYMMReg ( rG, res );
17132 return delta;
17136 static Long dis_PMOVSXWQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17137 Long delta, Bool isAvx )
17139 IRTemp addr = IRTemp_INVALID;
17140 Int alen = 0;
17141 HChar dis_buf[50];
17142 IRTemp srcBytes = newTemp(Ity_I32);
17143 UChar modrm = getUChar(delta);
17144 const HChar* mbV = isAvx ? "v" : "";
17145 UInt rG = gregOfRexRM(pfx, modrm);
17147 if ( epartIsReg( modrm ) ) {
17148 UInt rE = eregOfRexRM(pfx, modrm);
17149 assign( srcBytes, getXMMRegLane32( rE, 0 ) );
17150 delta += 1;
17151 DIP( "%spmovsxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
17152 } else {
17153 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17154 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) );
17155 delta += alen;
17156 DIP( "%spmovsxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
17159 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17160 ( rG, binop( Iop_64HLtoV128,
17161 unop( Iop_16Sto64,
17162 unop( Iop_32HIto16, mkexpr(srcBytes) ) ),
17163 unop( Iop_16Sto64,
17164 unop( Iop_32to16, mkexpr(srcBytes) ) ) ) );
17165 return delta;
17169 static Long dis_PMOVSXWQ_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
17171 IRTemp addr = IRTemp_INVALID;
17172 Int alen = 0;
17173 HChar dis_buf[50];
17174 IRTemp srcBytes = newTemp(Ity_I64);
17175 UChar modrm = getUChar(delta);
17176 UInt rG = gregOfRexRM(pfx, modrm);
17177 IRTemp s3, s2, s1, s0;
17178 s3 = s2 = s1 = s0 = IRTemp_INVALID;
17180 if ( epartIsReg( modrm ) ) {
17181 UInt rE = eregOfRexRM(pfx, modrm);
17182 assign( srcBytes, getXMMRegLane64( rE, 0 ) );
17183 delta += 1;
17184 DIP( "vpmovsxwq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17185 } else {
17186 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17187 assign( srcBytes, loadLE( Ity_I64, mkexpr(addr) ) );
17188 delta += alen;
17189 DIP( "vpmovsxwq %s,%s\n", dis_buf, nameYMMReg(rG) );
17192 breakup64to16s( srcBytes, &s3, &s2, &s1, &s0 );
17193 putYMMReg( rG, binop( Iop_V128HLtoV256,
17194 binop( Iop_64HLtoV128,
17195 unop( Iop_16Sto64, mkexpr(s3) ),
17196 unop( Iop_16Sto64, mkexpr(s2) ) ),
17197 binop( Iop_64HLtoV128,
17198 unop( Iop_16Sto64, mkexpr(s1) ),
17199 unop( Iop_16Sto64, mkexpr(s0) ) ) ) );
17200 return delta;
17204 static Long dis_PMOVZXWQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17205 Long delta, Bool isAvx )
17207 IRTemp addr = IRTemp_INVALID;
17208 Int alen = 0;
17209 HChar dis_buf[50];
17210 IRTemp srcVec = newTemp(Ity_V128);
17211 UChar modrm = getUChar(delta);
17212 const HChar* mbV = isAvx ? "v" : "";
17213 UInt rG = gregOfRexRM(pfx, modrm);
17215 if ( epartIsReg( modrm ) ) {
17216 UInt rE = eregOfRexRM(pfx, modrm);
17217 assign( srcVec, getXMMReg(rE) );
17218 delta += 1;
17219 DIP( "%spmovzxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
17220 } else {
17221 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17222 assign( srcVec,
17223 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
17224 delta += alen;
17225 DIP( "%spmovzxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
17228 IRTemp zeroVec = newTemp( Ity_V128 );
17229 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17231 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17232 ( rG, binop( Iop_InterleaveLO16x8,
17233 mkexpr(zeroVec),
17234 binop( Iop_InterleaveLO16x8,
17235 mkexpr(zeroVec), mkexpr(srcVec) ) ) );
17236 return delta;
17240 static Long dis_PMOVZXWQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
17241 Long delta )
17243 IRTemp addr = IRTemp_INVALID;
17244 Int alen = 0;
17245 HChar dis_buf[50];
17246 IRTemp srcVec = newTemp(Ity_V128);
17247 UChar modrm = getUChar(delta);
17248 UInt rG = gregOfRexRM(pfx, modrm);
17250 if ( epartIsReg( modrm ) ) {
17251 UInt rE = eregOfRexRM(pfx, modrm);
17252 assign( srcVec, getXMMReg(rE) );
17253 delta += 1;
17254 DIP( "vpmovzxwq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17255 } else {
17256 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17257 assign( srcVec,
17258 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
17259 delta += alen;
17260 DIP( "vpmovzxwq %s,%s\n", dis_buf, nameYMMReg(rG) );
17263 IRTemp zeroVec = newTemp( Ity_V128 );
17264 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17266 putYMMReg( rG, binop( Iop_V128HLtoV256,
17267 binop( Iop_InterleaveHI16x8,
17268 mkexpr(zeroVec),
17269 binop( Iop_InterleaveLO16x8,
17270 mkexpr(zeroVec), mkexpr(srcVec) ) ),
17271 binop( Iop_InterleaveLO16x8,
17272 mkexpr(zeroVec),
17273 binop( Iop_InterleaveLO16x8,
17274 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) );
17275 return delta;
17279 /* Handles 128 bit versions of PMOVZXDQ and PMOVSXDQ. */
17280 static Long dis_PMOVxXDQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17281 Long delta, Bool isAvx, Bool xIsZ )
17283 IRTemp addr = IRTemp_INVALID;
17284 Int alen = 0;
17285 HChar dis_buf[50];
17286 IRTemp srcI64 = newTemp(Ity_I64);
17287 IRTemp srcVec = newTemp(Ity_V128);
17288 UChar modrm = getUChar(delta);
17289 const HChar* mbV = isAvx ? "v" : "";
17290 const HChar how = xIsZ ? 'z' : 's';
17291 UInt rG = gregOfRexRM(pfx, modrm);
17292 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
17293 thing in a V128, with arbitrary junk in the top 64 bits. Use
17294 one or both of them and let iropt clean up afterwards (as
17295 usual). */
17296 if ( epartIsReg(modrm) ) {
17297 UInt rE = eregOfRexRM(pfx, modrm);
17298 assign( srcVec, getXMMReg(rE) );
17299 assign( srcI64, unop(Iop_V128to64, mkexpr(srcVec)) );
17300 delta += 1;
17301 DIP( "%spmov%cxdq %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
17302 } else {
17303 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17304 assign( srcI64, loadLE(Ity_I64, mkexpr(addr)) );
17305 assign( srcVec, unop( Iop_64UtoV128, mkexpr(srcI64)) );
17306 delta += alen;
17307 DIP( "%spmov%cxdq %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
17310 IRExpr* res
17311 = xIsZ /* do math for either zero or sign extend */
17312 ? binop( Iop_InterleaveLO32x4,
17313 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) )
17314 : binop( Iop_64HLtoV128,
17315 unop( Iop_32Sto64,
17316 unop( Iop_64HIto32, mkexpr(srcI64) ) ),
17317 unop( Iop_32Sto64,
17318 unop( Iop_64to32, mkexpr(srcI64) ) ) );
17320 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
17322 return delta;
17326 /* Handles 256 bit versions of PMOVZXDQ and PMOVSXDQ. */
17327 static Long dis_PMOVxXDQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
17328 Long delta, Bool xIsZ )
17330 IRTemp addr = IRTemp_INVALID;
17331 Int alen = 0;
17332 HChar dis_buf[50];
17333 IRTemp srcVec = newTemp(Ity_V128);
17334 UChar modrm = getUChar(delta);
17335 UChar how = xIsZ ? 'z' : 's';
17336 UInt rG = gregOfRexRM(pfx, modrm);
17337 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
17338 thing in a V128, with arbitrary junk in the top 64 bits. Use
17339 one or both of them and let iropt clean up afterwards (as
17340 usual). */
17341 if ( epartIsReg(modrm) ) {
17342 UInt rE = eregOfRexRM(pfx, modrm);
17343 assign( srcVec, getXMMReg(rE) );
17344 delta += 1;
17345 DIP( "vpmov%cxdq %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
17346 } else {
17347 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17348 assign( srcVec, loadLE(Ity_V128, mkexpr(addr)) );
17349 delta += alen;
17350 DIP( "vpmov%cxdq %s,%s\n", how, dis_buf, nameYMMReg(rG) );
17353 IRExpr* res;
17354 if (xIsZ)
17355 res = binop( Iop_V128HLtoV256,
17356 binop( Iop_InterleaveHI32x4,
17357 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ),
17358 binop( Iop_InterleaveLO32x4,
17359 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
17360 else {
17361 IRTemp s3, s2, s1, s0;
17362 s3 = s2 = s1 = s0 = IRTemp_INVALID;
17363 breakupV128to32s( srcVec, &s3, &s2, &s1, &s0 );
17364 res = binop( Iop_V128HLtoV256,
17365 binop( Iop_64HLtoV128,
17366 unop( Iop_32Sto64, mkexpr(s3) ),
17367 unop( Iop_32Sto64, mkexpr(s2) ) ),
17368 binop( Iop_64HLtoV128,
17369 unop( Iop_32Sto64, mkexpr(s1) ),
17370 unop( Iop_32Sto64, mkexpr(s0) ) ) );
17373 putYMMReg ( rG, res );
17375 return delta;
17379 /* Handles 128 bit versions of PMOVZXBD and PMOVSXBD. */
17380 static Long dis_PMOVxXBD_128 ( const VexAbiInfo* vbi, Prefix pfx,
17381 Long delta, Bool isAvx, Bool xIsZ )
17383 IRTemp addr = IRTemp_INVALID;
17384 Int alen = 0;
17385 HChar dis_buf[50];
17386 IRTemp srcVec = newTemp(Ity_V128);
17387 UChar modrm = getUChar(delta);
17388 const HChar* mbV = isAvx ? "v" : "";
17389 const HChar how = xIsZ ? 'z' : 's';
17390 UInt rG = gregOfRexRM(pfx, modrm);
17391 if ( epartIsReg(modrm) ) {
17392 UInt rE = eregOfRexRM(pfx, modrm);
17393 assign( srcVec, getXMMReg(rE) );
17394 delta += 1;
17395 DIP( "%spmov%cxbd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
17396 } else {
17397 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17398 assign( srcVec,
17399 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
17400 delta += alen;
17401 DIP( "%spmov%cxbd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
17404 IRTemp zeroVec = newTemp(Ity_V128);
17405 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17407 IRExpr* res
17408 = binop(Iop_InterleaveLO8x16,
17409 mkexpr(zeroVec),
17410 binop(Iop_InterleaveLO8x16,
17411 mkexpr(zeroVec), mkexpr(srcVec)));
17412 if (!xIsZ)
17413 res = binop(Iop_SarN32x4,
17414 binop(Iop_ShlN32x4, res, mkU8(24)), mkU8(24));
17416 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
17418 return delta;
17422 /* Handles 256 bit versions of PMOVZXBD and PMOVSXBD. */
17423 static Long dis_PMOVxXBD_256 ( const VexAbiInfo* vbi, Prefix pfx,
17424 Long delta, Bool xIsZ )
17426 IRTemp addr = IRTemp_INVALID;
17427 Int alen = 0;
17428 HChar dis_buf[50];
17429 IRTemp srcVec = newTemp(Ity_V128);
17430 UChar modrm = getUChar(delta);
17431 UChar how = xIsZ ? 'z' : 's';
17432 UInt rG = gregOfRexRM(pfx, modrm);
17433 if ( epartIsReg(modrm) ) {
17434 UInt rE = eregOfRexRM(pfx, modrm);
17435 assign( srcVec, getXMMReg(rE) );
17436 delta += 1;
17437 DIP( "vpmov%cxbd %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
17438 } else {
17439 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17440 assign( srcVec,
17441 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
17442 delta += alen;
17443 DIP( "vpmov%cxbd %s,%s\n", how, dis_buf, nameYMMReg(rG) );
17446 IRTemp zeroVec = newTemp(Ity_V128);
17447 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17449 IRExpr* res
17450 = binop( Iop_V128HLtoV256,
17451 binop(Iop_InterleaveHI8x16,
17452 mkexpr(zeroVec),
17453 binop(Iop_InterleaveLO8x16,
17454 mkexpr(zeroVec), mkexpr(srcVec)) ),
17455 binop(Iop_InterleaveLO8x16,
17456 mkexpr(zeroVec),
17457 binop(Iop_InterleaveLO8x16,
17458 mkexpr(zeroVec), mkexpr(srcVec)) ) );
17459 if (!xIsZ)
17460 res = binop(Iop_SarN32x8,
17461 binop(Iop_ShlN32x8, res, mkU8(24)), mkU8(24));
17463 putYMMReg ( rG, res );
17465 return delta;
17469 /* Handles 128 bit versions of PMOVSXBQ. */
17470 static Long dis_PMOVSXBQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17471 Long delta, Bool isAvx )
17473 IRTemp addr = IRTemp_INVALID;
17474 Int alen = 0;
17475 HChar dis_buf[50];
17476 IRTemp srcBytes = newTemp(Ity_I16);
17477 UChar modrm = getUChar(delta);
17478 const HChar* mbV = isAvx ? "v" : "";
17479 UInt rG = gregOfRexRM(pfx, modrm);
17480 if ( epartIsReg(modrm) ) {
17481 UInt rE = eregOfRexRM(pfx, modrm);
17482 assign( srcBytes, getXMMRegLane16( rE, 0 ) );
17483 delta += 1;
17484 DIP( "%spmovsxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
17485 } else {
17486 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17487 assign( srcBytes, loadLE( Ity_I16, mkexpr(addr) ) );
17488 delta += alen;
17489 DIP( "%spmovsxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
17492 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17493 ( rG, binop( Iop_64HLtoV128,
17494 unop( Iop_8Sto64,
17495 unop( Iop_16HIto8, mkexpr(srcBytes) ) ),
17496 unop( Iop_8Sto64,
17497 unop( Iop_16to8, mkexpr(srcBytes) ) ) ) );
17498 return delta;
17502 /* Handles 256 bit versions of PMOVSXBQ. */
17503 static Long dis_PMOVSXBQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
17504 Long delta )
17506 IRTemp addr = IRTemp_INVALID;
17507 Int alen = 0;
17508 HChar dis_buf[50];
17509 IRTemp srcBytes = newTemp(Ity_I32);
17510 UChar modrm = getUChar(delta);
17511 UInt rG = gregOfRexRM(pfx, modrm);
17512 if ( epartIsReg(modrm) ) {
17513 UInt rE = eregOfRexRM(pfx, modrm);
17514 assign( srcBytes, getXMMRegLane32( rE, 0 ) );
17515 delta += 1;
17516 DIP( "vpmovsxbq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17517 } else {
17518 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17519 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) );
17520 delta += alen;
17521 DIP( "vpmovsxbq %s,%s\n", dis_buf, nameYMMReg(rG) );
17524 putYMMReg
17525 ( rG, binop( Iop_V128HLtoV256,
17526 binop( Iop_64HLtoV128,
17527 unop( Iop_8Sto64,
17528 unop( Iop_16HIto8,
17529 unop( Iop_32HIto16,
17530 mkexpr(srcBytes) ) ) ),
17531 unop( Iop_8Sto64,
17532 unop( Iop_16to8,
17533 unop( Iop_32HIto16,
17534 mkexpr(srcBytes) ) ) ) ),
17535 binop( Iop_64HLtoV128,
17536 unop( Iop_8Sto64,
17537 unop( Iop_16HIto8,
17538 unop( Iop_32to16,
17539 mkexpr(srcBytes) ) ) ),
17540 unop( Iop_8Sto64,
17541 unop( Iop_16to8,
17542 unop( Iop_32to16,
17543 mkexpr(srcBytes) ) ) ) ) ) );
17544 return delta;
17548 /* Handles 128 bit versions of PMOVZXBQ. */
17549 static Long dis_PMOVZXBQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17550 Long delta, Bool isAvx )
17552 IRTemp addr = IRTemp_INVALID;
17553 Int alen = 0;
17554 HChar dis_buf[50];
17555 IRTemp srcVec = newTemp(Ity_V128);
17556 UChar modrm = getUChar(delta);
17557 const HChar* mbV = isAvx ? "v" : "";
17558 UInt rG = gregOfRexRM(pfx, modrm);
17559 if ( epartIsReg(modrm) ) {
17560 UInt rE = eregOfRexRM(pfx, modrm);
17561 assign( srcVec, getXMMReg(rE) );
17562 delta += 1;
17563 DIP( "%spmovzxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
17564 } else {
17565 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17566 assign( srcVec,
17567 unop( Iop_32UtoV128,
17568 unop( Iop_16Uto32, loadLE( Ity_I16, mkexpr(addr) ))));
17569 delta += alen;
17570 DIP( "%spmovzxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
17573 IRTemp zeroVec = newTemp(Ity_V128);
17574 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17576 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17577 ( rG, binop( Iop_InterleaveLO8x16,
17578 mkexpr(zeroVec),
17579 binop( Iop_InterleaveLO8x16,
17580 mkexpr(zeroVec),
17581 binop( Iop_InterleaveLO8x16,
17582 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) );
17583 return delta;
17587 /* Handles 256 bit versions of PMOVZXBQ. */
17588 static Long dis_PMOVZXBQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
17589 Long delta )
17591 IRTemp addr = IRTemp_INVALID;
17592 Int alen = 0;
17593 HChar dis_buf[50];
17594 IRTemp srcVec = newTemp(Ity_V128);
17595 UChar modrm = getUChar(delta);
17596 UInt rG = gregOfRexRM(pfx, modrm);
17597 if ( epartIsReg(modrm) ) {
17598 UInt rE = eregOfRexRM(pfx, modrm);
17599 assign( srcVec, getXMMReg(rE) );
17600 delta += 1;
17601 DIP( "vpmovzxbq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17602 } else {
17603 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17604 assign( srcVec,
17605 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) )));
17606 delta += alen;
17607 DIP( "vpmovzxbq %s,%s\n", dis_buf, nameYMMReg(rG) );
17610 IRTemp zeroVec = newTemp(Ity_V128);
17611 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17613 putYMMReg
17614 ( rG, binop( Iop_V128HLtoV256,
17615 binop( Iop_InterleaveHI8x16,
17616 mkexpr(zeroVec),
17617 binop( Iop_InterleaveLO8x16,
17618 mkexpr(zeroVec),
17619 binop( Iop_InterleaveLO8x16,
17620 mkexpr(zeroVec), mkexpr(srcVec) ) ) ),
17621 binop( Iop_InterleaveLO8x16,
17622 mkexpr(zeroVec),
17623 binop( Iop_InterleaveLO8x16,
17624 mkexpr(zeroVec),
17625 binop( Iop_InterleaveLO8x16,
17626 mkexpr(zeroVec), mkexpr(srcVec) ) ) )
17627 ) );
17628 return delta;
17632 static Long dis_PHMINPOSUW_128 ( const VexAbiInfo* vbi, Prefix pfx,
17633 Long delta, Bool isAvx )
17635 IRTemp addr = IRTemp_INVALID;
17636 Int alen = 0;
17637 HChar dis_buf[50];
17638 UChar modrm = getUChar(delta);
17639 const HChar* mbV = isAvx ? "v" : "";
17640 IRTemp sV = newTemp(Ity_V128);
17641 IRTemp sHi = newTemp(Ity_I64);
17642 IRTemp sLo = newTemp(Ity_I64);
17643 IRTemp dLo = newTemp(Ity_I64);
17644 UInt rG = gregOfRexRM(pfx,modrm);
17645 if (epartIsReg(modrm)) {
17646 UInt rE = eregOfRexRM(pfx,modrm);
17647 assign( sV, getXMMReg(rE) );
17648 delta += 1;
17649 DIP("%sphminposuw %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG));
17650 } else {
17651 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
17652 if (!isAvx)
17653 gen_SEGV_if_not_16_aligned(addr);
17654 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
17655 delta += alen;
17656 DIP("%sphminposuw %s,%s\n", mbV, dis_buf, nameXMMReg(rG));
17658 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
17659 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
17660 assign( dLo, mkIRExprCCall(
17661 Ity_I64, 0/*regparms*/,
17662 "amd64g_calculate_sse_phminposuw",
17663 &amd64g_calculate_sse_phminposuw,
17664 mkIRExprVec_2( mkexpr(sLo), mkexpr(sHi) )
17666 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17667 (rG, unop(Iop_64UtoV128, mkexpr(dLo)));
17668 return delta;
17672 static Long dis_AESx ( const VexAbiInfo* vbi, Prefix pfx,
17673 Long delta, Bool isAvx, UChar opc )
17675 IRTemp addr = IRTemp_INVALID;
17676 Int alen = 0;
17677 HChar dis_buf[50];
17678 UChar modrm = getUChar(delta);
17679 UInt rG = gregOfRexRM(pfx, modrm);
17680 UInt regNoL = 0;
17681 UInt regNoR = (isAvx && opc != 0xDB) ? getVexNvvvv(pfx) : rG;
17683 /* This is a nasty kludge. We need to pass 2 x V128 to the
17684 helper. Since we can't do that, use a dirty
17685 helper to compute the results directly from the XMM regs in
17686 the guest state. That means for the memory case, we need to
17687 move the left operand into a pseudo-register (XMM16, let's
17688 call it). */
17689 if (epartIsReg(modrm)) {
17690 regNoL = eregOfRexRM(pfx, modrm);
17691 delta += 1;
17692 } else {
17693 regNoL = 16; /* use XMM16 as an intermediary */
17694 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17695 /* alignment check needed ???? */
17696 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
17697 delta += alen;
17700 void* fn = &amd64g_dirtyhelper_AES;
17701 const HChar* nm = "amd64g_dirtyhelper_AES";
17703 /* Round up the arguments. Note that this is a kludge -- the
17704 use of mkU64 rather than mkIRExpr_HWord implies the
17705 assumption that the host's word size is 64-bit. */
17706 UInt gstOffD = ymmGuestRegOffset(rG);
17707 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
17708 UInt gstOffR = ymmGuestRegOffset(regNoR);
17709 IRExpr* opc4 = mkU64(opc);
17710 IRExpr* gstOffDe = mkU64(gstOffD);
17711 IRExpr* gstOffLe = mkU64(gstOffL);
17712 IRExpr* gstOffRe = mkU64(gstOffR);
17713 IRExpr** args
17714 = mkIRExprVec_5( IRExpr_GSPTR(), opc4, gstOffDe, gstOffLe, gstOffRe );
17716 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args );
17717 /* It's not really a dirty call, but we can't use the clean helper
17718 mechanism here for the very lame reason that we can't pass 2 x
17719 V128s by value to a helper. Hence this roundabout scheme. */
17720 d->nFxState = 2;
17721 vex_bzero(&d->fxState, sizeof(d->fxState));
17722 /* AES{ENC,ENCLAST,DEC,DECLAST} read both registers, and writes
17723 the second for !isAvx or the third for isAvx.
17724 AESIMC (0xDB) reads the first register, and writes the second. */
17725 d->fxState[0].fx = Ifx_Read;
17726 d->fxState[0].offset = gstOffL;
17727 d->fxState[0].size = sizeof(U128);
17728 d->fxState[1].offset = gstOffR;
17729 d->fxState[1].size = sizeof(U128);
17730 if (opc == 0xDB)
17731 d->fxState[1].fx = Ifx_Write;
17732 else if (!isAvx || rG == regNoR)
17733 d->fxState[1].fx = Ifx_Modify;
17734 else {
17735 d->fxState[1].fx = Ifx_Read;
17736 d->nFxState++;
17737 d->fxState[2].fx = Ifx_Write;
17738 d->fxState[2].offset = gstOffD;
17739 d->fxState[2].size = sizeof(U128);
17742 stmt( IRStmt_Dirty(d) );
17744 const HChar* opsuf;
17745 switch (opc) {
17746 case 0xDC: opsuf = "enc"; break;
17747 case 0XDD: opsuf = "enclast"; break;
17748 case 0xDE: opsuf = "dec"; break;
17749 case 0xDF: opsuf = "declast"; break;
17750 case 0xDB: opsuf = "imc"; break;
17751 default: vassert(0);
17753 DIP("%saes%s %s,%s%s%s\n", isAvx ? "v" : "", opsuf,
17754 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)),
17755 nameXMMReg(regNoR),
17756 (isAvx && opc != 0xDB) ? "," : "",
17757 (isAvx && opc != 0xDB) ? nameXMMReg(rG) : "");
17759 if (isAvx)
17760 putYMMRegLane128( rG, 1, mkV128(0) );
17761 return delta;
17764 static Long dis_AESKEYGENASSIST ( const VexAbiInfo* vbi, Prefix pfx,
17765 Long delta, Bool isAvx )
17767 IRTemp addr = IRTemp_INVALID;
17768 Int alen = 0;
17769 HChar dis_buf[50];
17770 UChar modrm = getUChar(delta);
17771 UInt regNoL = 0;
17772 UInt regNoR = gregOfRexRM(pfx, modrm);
17773 UChar imm = 0;
17775 /* This is a nasty kludge. See AESENC et al. instructions. */
17776 modrm = getUChar(delta);
17777 if (epartIsReg(modrm)) {
17778 regNoL = eregOfRexRM(pfx, modrm);
17779 imm = getUChar(delta+1);
17780 delta += 1+1;
17781 } else {
17782 regNoL = 16; /* use XMM16 as an intermediary */
17783 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
17784 /* alignment check ???? . */
17785 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
17786 imm = getUChar(delta+alen);
17787 delta += alen+1;
17790 /* Who ya gonna call? Presumably not Ghostbusters. */
17791 void* fn = &amd64g_dirtyhelper_AESKEYGENASSIST;
17792 const HChar* nm = "amd64g_dirtyhelper_AESKEYGENASSIST";
17794 /* Round up the arguments. Note that this is a kludge -- the
17795 use of mkU64 rather than mkIRExpr_HWord implies the
17796 assumption that the host's word size is 64-bit. */
17797 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
17798 UInt gstOffR = ymmGuestRegOffset(regNoR);
17800 IRExpr* imme = mkU64(imm & 0xFF);
17801 IRExpr* gstOffLe = mkU64(gstOffL);
17802 IRExpr* gstOffRe = mkU64(gstOffR);
17803 IRExpr** args
17804 = mkIRExprVec_4( IRExpr_GSPTR(), imme, gstOffLe, gstOffRe );
17806 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args );
17807 /* It's not really a dirty call, but we can't use the clean helper
17808 mechanism here for the very lame reason that we can't pass 2 x
17809 V128s by value to a helper. Hence this roundabout scheme. */
17810 d->nFxState = 2;
17811 vex_bzero(&d->fxState, sizeof(d->fxState));
17812 d->fxState[0].fx = Ifx_Read;
17813 d->fxState[0].offset = gstOffL;
17814 d->fxState[0].size = sizeof(U128);
17815 d->fxState[1].fx = Ifx_Write;
17816 d->fxState[1].offset = gstOffR;
17817 d->fxState[1].size = sizeof(U128);
17818 stmt( IRStmt_Dirty(d) );
17820 DIP("%saeskeygenassist $%x,%s,%s\n", isAvx ? "v" : "", (UInt)imm,
17821 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)),
17822 nameXMMReg(regNoR));
17823 if (isAvx)
17824 putYMMRegLane128( regNoR, 1, mkV128(0) );
17825 return delta;
17829 __attribute__((noinline))
17830 static
17831 Long dis_ESC_0F38__SSE4 ( Bool* decode_OK,
17832 const VexAbiInfo* vbi,
17833 Prefix pfx, Int sz, Long deltaIN )
17835 IRTemp addr = IRTemp_INVALID;
17836 UChar modrm = 0;
17837 Int alen = 0;
17838 HChar dis_buf[50];
17840 *decode_OK = False;
17842 Long delta = deltaIN;
17843 UChar opc = getUChar(delta);
17844 delta++;
17845 switch (opc) {
17847 case 0x10:
17848 case 0x14:
17849 case 0x15:
17850 /* 66 0F 38 10 /r = PBLENDVB xmm1, xmm2/m128 (byte gran)
17851 66 0F 38 14 /r = BLENDVPS xmm1, xmm2/m128 (float gran)
17852 66 0F 38 15 /r = BLENDVPD xmm1, xmm2/m128 (double gran)
17853 Blend at various granularities, with XMM0 (implicit operand)
17854 providing the controlling mask.
17856 if (have66noF2noF3(pfx) && sz == 2) {
17857 modrm = getUChar(delta);
17859 const HChar* nm = NULL;
17860 UInt gran = 0;
17861 IROp opSAR = Iop_INVALID;
17862 switch (opc) {
17863 case 0x10:
17864 nm = "pblendvb"; gran = 1; opSAR = Iop_SarN8x16;
17865 break;
17866 case 0x14:
17867 nm = "blendvps"; gran = 4; opSAR = Iop_SarN32x4;
17868 break;
17869 case 0x15:
17870 nm = "blendvpd"; gran = 8; opSAR = Iop_SarN64x2;
17871 break;
17873 vassert(nm);
17875 IRTemp vecE = newTemp(Ity_V128);
17876 IRTemp vecG = newTemp(Ity_V128);
17877 IRTemp vec0 = newTemp(Ity_V128);
17879 if ( epartIsReg(modrm) ) {
17880 assign(vecE, getXMMReg(eregOfRexRM(pfx, modrm)));
17881 delta += 1;
17882 DIP( "%s %s,%s\n", nm,
17883 nameXMMReg( eregOfRexRM(pfx, modrm) ),
17884 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17885 } else {
17886 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17887 gen_SEGV_if_not_16_aligned( addr );
17888 assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
17889 delta += alen;
17890 DIP( "%s %s,%s\n", nm,
17891 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17894 assign(vecG, getXMMReg(gregOfRexRM(pfx, modrm)));
17895 assign(vec0, getXMMReg(0));
17897 IRTemp res = math_PBLENDVB_128( vecE, vecG, vec0, gran, opSAR );
17898 putXMMReg(gregOfRexRM(pfx, modrm), mkexpr(res));
17900 goto decode_success;
17902 break;
17904 case 0x17:
17905 /* 66 0F 38 17 /r = PTEST xmm1, xmm2/m128
17906 Logical compare (set ZF and CF from AND/ANDN of the operands) */
17907 if (have66noF2noF3(pfx)
17908 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
17909 delta = dis_xTESTy_128( vbi, pfx, delta, False/*!isAvx*/, 0 );
17910 goto decode_success;
17912 break;
17914 case 0x20:
17915 /* 66 0F 38 20 /r = PMOVSXBW xmm1, xmm2/m64
17916 Packed Move with Sign Extend from Byte to Word (XMM) */
17917 if (have66noF2noF3(pfx) && sz == 2) {
17918 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
17919 False/*!isAvx*/, False/*!xIsZ*/ );
17920 goto decode_success;
17922 break;
17924 case 0x21:
17925 /* 66 0F 38 21 /r = PMOVSXBD xmm1, xmm2/m32
17926 Packed Move with Sign Extend from Byte to DWord (XMM) */
17927 if (have66noF2noF3(pfx) && sz == 2) {
17928 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
17929 False/*!isAvx*/, False/*!xIsZ*/ );
17930 goto decode_success;
17932 break;
17934 case 0x22:
17935 /* 66 0F 38 22 /r = PMOVSXBQ xmm1, xmm2/m16
17936 Packed Move with Sign Extend from Byte to QWord (XMM) */
17937 if (have66noF2noF3(pfx) && sz == 2) {
17938 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, False/*!isAvx*/ );
17939 goto decode_success;
17941 break;
17943 case 0x23:
17944 /* 66 0F 38 23 /r = PMOVSXWD xmm1, xmm2/m64
17945 Packed Move with Sign Extend from Word to DWord (XMM) */
17946 if (have66noF2noF3(pfx) && sz == 2) {
17947 delta = dis_PMOVxXWD_128(vbi, pfx, delta,
17948 False/*!isAvx*/, False/*!xIsZ*/);
17949 goto decode_success;
17951 break;
17953 case 0x24:
17954 /* 66 0F 38 24 /r = PMOVSXWQ xmm1, xmm2/m32
17955 Packed Move with Sign Extend from Word to QWord (XMM) */
17956 if (have66noF2noF3(pfx) && sz == 2) {
17957 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, False/*!isAvx*/ );
17958 goto decode_success;
17960 break;
17962 case 0x25:
17963 /* 66 0F 38 25 /r = PMOVSXDQ xmm1, xmm2/m64
17964 Packed Move with Sign Extend from Double Word to Quad Word (XMM) */
17965 if (have66noF2noF3(pfx) && sz == 2) {
17966 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
17967 False/*!isAvx*/, False/*!xIsZ*/ );
17968 goto decode_success;
17970 break;
17972 case 0x28:
17973 /* 66 0F 38 28 = PMULDQ -- signed widening multiply of 32-lanes
17974 0 x 0 to form lower 64-bit half and lanes 2 x 2 to form upper
17975 64-bit half */
17976 /* This is a really poor translation -- could be improved if
17977 performance critical. It's a copy-paste of PMULUDQ, too. */
17978 if (have66noF2noF3(pfx) && sz == 2) {
17979 IRTemp sV = newTemp(Ity_V128);
17980 IRTemp dV = newTemp(Ity_V128);
17981 modrm = getUChar(delta);
17982 UInt rG = gregOfRexRM(pfx,modrm);
17983 assign( dV, getXMMReg(rG) );
17984 if (epartIsReg(modrm)) {
17985 UInt rE = eregOfRexRM(pfx,modrm);
17986 assign( sV, getXMMReg(rE) );
17987 delta += 1;
17988 DIP("pmuldq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
17989 } else {
17990 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
17991 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
17992 delta += alen;
17993 DIP("pmuldq %s,%s\n", dis_buf, nameXMMReg(rG));
17996 putXMMReg( rG, mkexpr(math_PMULDQ_128( dV, sV )) );
17997 goto decode_success;
17999 break;
18001 case 0x29:
18002 /* 66 0F 38 29 = PCMPEQQ
18003 64x2 equality comparison */
18004 if (have66noF2noF3(pfx) && sz == 2) {
18005 /* FIXME: this needs an alignment check */
18006 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
18007 "pcmpeqq", Iop_CmpEQ64x2, False );
18008 goto decode_success;
18010 break;
18012 case 0x2A:
18013 /* 66 0F 38 2A = MOVNTDQA
18014 "non-temporal" "streaming" load
18015 Handle like MOVDQA but only memory operand is allowed */
18016 if (have66noF2noF3(pfx) && sz == 2) {
18017 modrm = getUChar(delta);
18018 if (!epartIsReg(modrm)) {
18019 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
18020 gen_SEGV_if_not_16_aligned( addr );
18021 putXMMReg( gregOfRexRM(pfx,modrm),
18022 loadLE(Ity_V128, mkexpr(addr)) );
18023 DIP("movntdqa %s,%s\n", dis_buf,
18024 nameXMMReg(gregOfRexRM(pfx,modrm)));
18025 delta += alen;
18026 goto decode_success;
18029 break;
18031 case 0x2B:
18032 /* 66 0f 38 2B /r = PACKUSDW xmm1, xmm2/m128
18033 2x 32x4 S->U saturating narrow from xmm2/m128 to xmm1 */
18034 if (have66noF2noF3(pfx) && sz == 2) {
18036 modrm = getUChar(delta);
18038 IRTemp argL = newTemp(Ity_V128);
18039 IRTemp argR = newTemp(Ity_V128);
18041 if ( epartIsReg(modrm) ) {
18042 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) );
18043 delta += 1;
18044 DIP( "packusdw %s,%s\n",
18045 nameXMMReg( eregOfRexRM(pfx, modrm) ),
18046 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18047 } else {
18048 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
18049 gen_SEGV_if_not_16_aligned( addr );
18050 assign( argL, loadLE( Ity_V128, mkexpr(addr) ));
18051 delta += alen;
18052 DIP( "packusdw %s,%s\n",
18053 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18056 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) ));
18058 putXMMReg( gregOfRexRM(pfx, modrm),
18059 binop( Iop_QNarrowBin32Sto16Ux8,
18060 mkexpr(argL), mkexpr(argR)) );
18062 goto decode_success;
18064 break;
18066 case 0x30:
18067 /* 66 0F 38 30 /r = PMOVZXBW xmm1, xmm2/m64
18068 Packed Move with Zero Extend from Byte to Word (XMM) */
18069 if (have66noF2noF3(pfx) && sz == 2) {
18070 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
18071 False/*!isAvx*/, True/*xIsZ*/ );
18072 goto decode_success;
18074 break;
18076 case 0x31:
18077 /* 66 0F 38 31 /r = PMOVZXBD xmm1, xmm2/m32
18078 Packed Move with Zero Extend from Byte to DWord (XMM) */
18079 if (have66noF2noF3(pfx) && sz == 2) {
18080 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
18081 False/*!isAvx*/, True/*xIsZ*/ );
18082 goto decode_success;
18084 break;
18086 case 0x32:
18087 /* 66 0F 38 32 /r = PMOVZXBQ xmm1, xmm2/m16
18088 Packed Move with Zero Extend from Byte to QWord (XMM) */
18089 if (have66noF2noF3(pfx) && sz == 2) {
18090 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, False/*!isAvx*/ );
18091 goto decode_success;
18093 break;
18095 case 0x33:
18096 /* 66 0F 38 33 /r = PMOVZXWD xmm1, xmm2/m64
18097 Packed Move with Zero Extend from Word to DWord (XMM) */
18098 if (have66noF2noF3(pfx) && sz == 2) {
18099 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
18100 False/*!isAvx*/, True/*xIsZ*/ );
18101 goto decode_success;
18103 break;
18105 case 0x34:
18106 /* 66 0F 38 34 /r = PMOVZXWQ xmm1, xmm2/m32
18107 Packed Move with Zero Extend from Word to QWord (XMM) */
18108 if (have66noF2noF3(pfx) && sz == 2) {
18109 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, False/*!isAvx*/ );
18110 goto decode_success;
18112 break;
18114 case 0x35:
18115 /* 66 0F 38 35 /r = PMOVZXDQ xmm1, xmm2/m64
18116 Packed Move with Zero Extend from DWord to QWord (XMM) */
18117 if (have66noF2noF3(pfx) && sz == 2) {
18118 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
18119 False/*!isAvx*/, True/*xIsZ*/ );
18120 goto decode_success;
18122 break;
18124 case 0x37:
18125 /* 66 0F 38 37 = PCMPGTQ
18126 64x2 comparison (signed, presumably; the Intel docs don't say :-)
18128 if (have66noF2noF3(pfx) && sz == 2) {
18129 /* FIXME: this needs an alignment check */
18130 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
18131 "pcmpgtq", Iop_CmpGT64Sx2, False );
18132 goto decode_success;
18134 break;
18136 case 0x38:
18137 case 0x3C:
18138 /* 66 0F 38 38 /r = PMINSB xmm1, xmm2/m128 8Sx16 (signed) min
18139 66 0F 38 3C /r = PMAXSB xmm1, xmm2/m128 8Sx16 (signed) max
18141 if (have66noF2noF3(pfx) && sz == 2) {
18142 /* FIXME: this needs an alignment check */
18143 Bool isMAX = opc == 0x3C;
18144 delta = dis_SSEint_E_to_G(
18145 vbi, pfx, delta,
18146 isMAX ? "pmaxsb" : "pminsb",
18147 isMAX ? Iop_Max8Sx16 : Iop_Min8Sx16,
18148 False
18150 goto decode_success;
18152 break;
18154 case 0x39:
18155 case 0x3D:
18156 /* 66 0F 38 39 /r = PMINSD xmm1, xmm2/m128
18157 Minimum of Packed Signed Double Word Integers (XMM)
18158 66 0F 38 3D /r = PMAXSD xmm1, xmm2/m128
18159 Maximum of Packed Signed Double Word Integers (XMM)
18161 if (have66noF2noF3(pfx) && sz == 2) {
18162 /* FIXME: this needs an alignment check */
18163 Bool isMAX = opc == 0x3D;
18164 delta = dis_SSEint_E_to_G(
18165 vbi, pfx, delta,
18166 isMAX ? "pmaxsd" : "pminsd",
18167 isMAX ? Iop_Max32Sx4 : Iop_Min32Sx4,
18168 False
18170 goto decode_success;
18172 break;
18174 case 0x3A:
18175 case 0x3E:
18176 /* 66 0F 38 3A /r = PMINUW xmm1, xmm2/m128
18177 Minimum of Packed Unsigned Word Integers (XMM)
18178 66 0F 38 3E /r = PMAXUW xmm1, xmm2/m128
18179 Maximum of Packed Unsigned Word Integers (XMM)
18181 if (have66noF2noF3(pfx) && sz == 2) {
18182 /* FIXME: this needs an alignment check */
18183 Bool isMAX = opc == 0x3E;
18184 delta = dis_SSEint_E_to_G(
18185 vbi, pfx, delta,
18186 isMAX ? "pmaxuw" : "pminuw",
18187 isMAX ? Iop_Max16Ux8 : Iop_Min16Ux8,
18188 False
18190 goto decode_success;
18192 break;
18194 case 0x3B:
18195 case 0x3F:
18196 /* 66 0F 38 3B /r = PMINUD xmm1, xmm2/m128
18197 Minimum of Packed Unsigned Doubleword Integers (XMM)
18198 66 0F 38 3F /r = PMAXUD xmm1, xmm2/m128
18199 Maximum of Packed Unsigned Doubleword Integers (XMM)
18201 if (have66noF2noF3(pfx) && sz == 2) {
18202 /* FIXME: this needs an alignment check */
18203 Bool isMAX = opc == 0x3F;
18204 delta = dis_SSEint_E_to_G(
18205 vbi, pfx, delta,
18206 isMAX ? "pmaxud" : "pminud",
18207 isMAX ? Iop_Max32Ux4 : Iop_Min32Ux4,
18208 False
18210 goto decode_success;
18212 break;
18214 case 0x40:
18215 /* 66 0F 38 40 /r = PMULLD xmm1, xmm2/m128
18216 32x4 integer multiply from xmm2/m128 to xmm1 */
18217 if (have66noF2noF3(pfx) && sz == 2) {
18219 modrm = getUChar(delta);
18221 IRTemp argL = newTemp(Ity_V128);
18222 IRTemp argR = newTemp(Ity_V128);
18224 if ( epartIsReg(modrm) ) {
18225 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) );
18226 delta += 1;
18227 DIP( "pmulld %s,%s\n",
18228 nameXMMReg( eregOfRexRM(pfx, modrm) ),
18229 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18230 } else {
18231 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
18232 gen_SEGV_if_not_16_aligned( addr );
18233 assign( argL, loadLE( Ity_V128, mkexpr(addr) ));
18234 delta += alen;
18235 DIP( "pmulld %s,%s\n",
18236 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18239 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) ));
18241 putXMMReg( gregOfRexRM(pfx, modrm),
18242 binop( Iop_Mul32x4, mkexpr(argL), mkexpr(argR)) );
18244 goto decode_success;
18246 break;
18248 case 0x41:
18249 /* 66 0F 38 41 /r = PHMINPOSUW xmm1, xmm2/m128
18250 Packed Horizontal Word Minimum from xmm2/m128 to xmm1 */
18251 if (have66noF2noF3(pfx) && sz == 2) {
18252 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, False/*!isAvx*/ );
18253 goto decode_success;
18255 break;
18257 case 0xDC:
18258 case 0xDD:
18259 case 0xDE:
18260 case 0xDF:
18261 case 0xDB:
18262 /* 66 0F 38 DC /r = AESENC xmm1, xmm2/m128
18263 DD /r = AESENCLAST xmm1, xmm2/m128
18264 DE /r = AESDEC xmm1, xmm2/m128
18265 DF /r = AESDECLAST xmm1, xmm2/m128
18267 DB /r = AESIMC xmm1, xmm2/m128 */
18268 if (have66noF2noF3(pfx) && sz == 2) {
18269 delta = dis_AESx( vbi, pfx, delta, False/*!isAvx*/, opc );
18270 goto decode_success;
18272 break;
18274 case 0xF0:
18275 case 0xF1:
18276 /* F2 0F 38 F0 /r = CRC32 r/m8, r32 (REX.W ok, 66 not ok)
18277 F2 0F 38 F1 /r = CRC32 r/m{16,32,64}, r32
18278 The decoding on this is a bit unusual.
18280 if (haveF2noF3(pfx)
18281 && (opc == 0xF1 || (opc == 0xF0 && !have66(pfx)))) {
18282 modrm = getUChar(delta);
18284 if (opc == 0xF0)
18285 sz = 1;
18286 else
18287 vassert(sz == 2 || sz == 4 || sz == 8);
18289 IRType tyE = szToITy(sz);
18290 IRTemp valE = newTemp(tyE);
18292 if (epartIsReg(modrm)) {
18293 assign(valE, getIRegE(sz, pfx, modrm));
18294 delta += 1;
18295 DIP("crc32b %s,%s\n", nameIRegE(sz, pfx, modrm),
18296 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm));
18297 } else {
18298 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
18299 assign(valE, loadLE(tyE, mkexpr(addr)));
18300 delta += alen;
18301 DIP("crc32b %s,%s\n", dis_buf,
18302 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm));
18305 /* Somewhat funny getting/putting of the crc32 value, in order
18306 to ensure that it turns into 64-bit gets and puts. However,
18307 mask off the upper 32 bits so as to not get memcheck false
18308 +ves around the helper call. */
18309 IRTemp valG0 = newTemp(Ity_I64);
18310 assign(valG0, binop(Iop_And64, getIRegG(8, pfx, modrm),
18311 mkU64(0xFFFFFFFF)));
18313 const HChar* nm = NULL;
18314 void* fn = NULL;
18315 switch (sz) {
18316 case 1: nm = "amd64g_calc_crc32b";
18317 fn = &amd64g_calc_crc32b; break;
18318 case 2: nm = "amd64g_calc_crc32w";
18319 fn = &amd64g_calc_crc32w; break;
18320 case 4: nm = "amd64g_calc_crc32l";
18321 fn = &amd64g_calc_crc32l; break;
18322 case 8: nm = "amd64g_calc_crc32q";
18323 fn = &amd64g_calc_crc32q; break;
18325 vassert(nm && fn);
18326 IRTemp valG1 = newTemp(Ity_I64);
18327 assign(valG1,
18328 mkIRExprCCall(Ity_I64, 0/*regparm*/, nm, fn,
18329 mkIRExprVec_2(mkexpr(valG0),
18330 widenUto64(mkexpr(valE)))));
18332 putIRegG(4, pfx, modrm, unop(Iop_64to32, mkexpr(valG1)));
18333 goto decode_success;
18335 break;
18337 default:
18338 break;
18342 //decode_failure:
18343 *decode_OK = False;
18344 return deltaIN;
18346 decode_success:
18347 *decode_OK = True;
18348 return delta;
18352 /*------------------------------------------------------------*/
18353 /*--- ---*/
18354 /*--- Top-level SSE4: dis_ESC_0F3A__SSE4 ---*/
18355 /*--- ---*/
18356 /*------------------------------------------------------------*/
18358 static Long dis_PEXTRW ( const VexAbiInfo* vbi, Prefix pfx,
18359 Long delta, Bool isAvx )
18361 IRTemp addr = IRTemp_INVALID;
18362 IRTemp t0 = IRTemp_INVALID;
18363 IRTemp t1 = IRTemp_INVALID;
18364 IRTemp t2 = IRTemp_INVALID;
18365 IRTemp t3 = IRTemp_INVALID;
18366 UChar modrm = getUChar(delta);
18367 Int alen = 0;
18368 HChar dis_buf[50];
18369 UInt rG = gregOfRexRM(pfx,modrm);
18370 Int imm8_20;
18371 IRTemp xmm_vec = newTemp(Ity_V128);
18372 IRTemp d16 = newTemp(Ity_I16);
18373 const HChar* mbV = isAvx ? "v" : "";
18375 vassert(0==getRexW(pfx)); /* ensured by caller */
18376 assign( xmm_vec, getXMMReg(rG) );
18377 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
18379 if ( epartIsReg( modrm ) ) {
18380 imm8_20 = (Int)(getUChar(delta+1) & 7);
18381 } else {
18382 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18383 imm8_20 = (Int)(getUChar(delta+alen) & 7);
18386 switch (imm8_20) {
18387 case 0: assign(d16, unop(Iop_32to16, mkexpr(t0))); break;
18388 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(t0))); break;
18389 case 2: assign(d16, unop(Iop_32to16, mkexpr(t1))); break;
18390 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(t1))); break;
18391 case 4: assign(d16, unop(Iop_32to16, mkexpr(t2))); break;
18392 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(t2))); break;
18393 case 6: assign(d16, unop(Iop_32to16, mkexpr(t3))); break;
18394 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(t3))); break;
18395 default: vassert(0);
18398 if ( epartIsReg( modrm ) ) {
18399 UInt rE = eregOfRexRM(pfx,modrm);
18400 putIReg32( rE, unop(Iop_16Uto32, mkexpr(d16)) );
18401 delta += 1+1;
18402 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20,
18403 nameXMMReg( rG ), nameIReg32( rE ) );
18404 } else {
18405 storeLE( mkexpr(addr), mkexpr(d16) );
18406 delta += alen+1;
18407 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20, nameXMMReg( rG ), dis_buf );
18409 return delta;
18413 static Long dis_PEXTRD ( const VexAbiInfo* vbi, Prefix pfx,
18414 Long delta, Bool isAvx )
18416 IRTemp addr = IRTemp_INVALID;
18417 IRTemp t0 = IRTemp_INVALID;
18418 IRTemp t1 = IRTemp_INVALID;
18419 IRTemp t2 = IRTemp_INVALID;
18420 IRTemp t3 = IRTemp_INVALID;
18421 UChar modrm = 0;
18422 Int alen = 0;
18423 HChar dis_buf[50];
18425 Int imm8_10;
18426 IRTemp xmm_vec = newTemp(Ity_V128);
18427 IRTemp src_dword = newTemp(Ity_I32);
18428 const HChar* mbV = isAvx ? "v" : "";
18430 vassert(0==getRexW(pfx)); /* ensured by caller */
18431 modrm = getUChar(delta);
18432 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
18433 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
18435 if ( epartIsReg( modrm ) ) {
18436 imm8_10 = (Int)(getUChar(delta+1) & 3);
18437 } else {
18438 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18439 imm8_10 = (Int)(getUChar(delta+alen) & 3);
18442 switch ( imm8_10 ) {
18443 case 0: assign( src_dword, mkexpr(t0) ); break;
18444 case 1: assign( src_dword, mkexpr(t1) ); break;
18445 case 2: assign( src_dword, mkexpr(t2) ); break;
18446 case 3: assign( src_dword, mkexpr(t3) ); break;
18447 default: vassert(0);
18450 if ( epartIsReg( modrm ) ) {
18451 putIReg32( eregOfRexRM(pfx,modrm), mkexpr(src_dword) );
18452 delta += 1+1;
18453 DIP( "%spextrd $%d, %s,%s\n", mbV, imm8_10,
18454 nameXMMReg( gregOfRexRM(pfx, modrm) ),
18455 nameIReg32( eregOfRexRM(pfx, modrm) ) );
18456 } else {
18457 storeLE( mkexpr(addr), mkexpr(src_dword) );
18458 delta += alen+1;
18459 DIP( "%spextrd $%d, %s,%s\n", mbV,
18460 imm8_10, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
18462 return delta;
18466 static Long dis_PEXTRQ ( const VexAbiInfo* vbi, Prefix pfx,
18467 Long delta, Bool isAvx )
18469 IRTemp addr = IRTemp_INVALID;
18470 UChar modrm = 0;
18471 Int alen = 0;
18472 HChar dis_buf[50];
18474 Int imm8_0;
18475 IRTemp xmm_vec = newTemp(Ity_V128);
18476 IRTemp src_qword = newTemp(Ity_I64);
18477 const HChar* mbV = isAvx ? "v" : "";
18479 vassert(1==getRexW(pfx)); /* ensured by caller */
18480 modrm = getUChar(delta);
18481 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
18483 if ( epartIsReg( modrm ) ) {
18484 imm8_0 = (Int)(getUChar(delta+1) & 1);
18485 } else {
18486 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18487 imm8_0 = (Int)(getUChar(delta+alen) & 1);
18490 switch ( imm8_0 ) {
18491 case 0: assign( src_qword, unop(Iop_V128to64, mkexpr(xmm_vec)) );
18492 break;
18493 case 1: assign( src_qword, unop(Iop_V128HIto64, mkexpr(xmm_vec)) );
18494 break;
18495 default: vassert(0);
18498 if ( epartIsReg( modrm ) ) {
18499 putIReg64( eregOfRexRM(pfx,modrm), mkexpr(src_qword) );
18500 delta += 1+1;
18501 DIP( "%spextrq $%d, %s,%s\n", mbV, imm8_0,
18502 nameXMMReg( gregOfRexRM(pfx, modrm) ),
18503 nameIReg64( eregOfRexRM(pfx, modrm) ) );
18504 } else {
18505 storeLE( mkexpr(addr), mkexpr(src_qword) );
18506 delta += alen+1;
18507 DIP( "%spextrq $%d, %s,%s\n", mbV,
18508 imm8_0, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
18510 return delta;
18513 static IRExpr* math_CTZ32(IRExpr *exp)
18515 /* Iop_Ctz32 isn't implemented by the amd64 back end, so use Iop_Ctz64. */
18516 return unop(Iop_64to32, unop(Iop_Ctz64, unop(Iop_32Uto64, exp)));
18519 static Long dis_PCMPISTRI_3A ( UChar modrm, UInt regNoL, UInt regNoR,
18520 Long delta, UChar opc, UChar imm,
18521 HChar dis_buf[])
18523 /* We only handle PCMPISTRI for now */
18524 vassert((opc & 0x03) == 0x03);
18525 /* And only an immediate byte of 0x38 or 0x3A */
18526 vassert((imm & ~0x02) == 0x38);
18528 /* FIXME: Is this correct when RegNoL == 16 ? */
18529 IRTemp argL = newTemp(Ity_V128);
18530 assign(argL, getXMMReg(regNoL));
18531 IRTemp argR = newTemp(Ity_V128);
18532 assign(argR, getXMMReg(regNoR));
18534 IRTemp zmaskL = newTemp(Ity_I32);
18535 assign(zmaskL, unop(Iop_16Uto32,
18536 unop(Iop_GetMSBs8x16,
18537 binop(Iop_CmpEQ8x16, mkexpr(argL), mkV128(0)))));
18538 IRTemp zmaskR = newTemp(Ity_I32);
18539 assign(zmaskR, unop(Iop_16Uto32,
18540 unop(Iop_GetMSBs8x16,
18541 binop(Iop_CmpEQ8x16, mkexpr(argR), mkV128(0)))));
18543 /* We want validL = ~(zmaskL | -zmaskL)
18545 But this formulation kills memcheck's validity tracking when any
18546 bits above the first "1" are invalid. So reformulate as:
18548 validL = (zmaskL ? (1 << ctz(zmaskL)) : 0) - 1
18551 IRExpr *ctzL = unop(Iop_32to8, math_CTZ32(mkexpr(zmaskL)));
18553 /* Generate a bool expression which is zero iff the original is
18554 zero. Do this carefully so memcheck can propagate validity bits
18555 correctly.
18557 IRTemp zmaskL_zero = newTemp(Ity_I1);
18558 assign(zmaskL_zero, binop(Iop_ExpCmpNE32, mkexpr(zmaskL), mkU32(0)));
18560 IRTemp validL = newTemp(Ity_I32);
18561 assign(validL, binop(Iop_Sub32,
18562 IRExpr_ITE(mkexpr(zmaskL_zero),
18563 binop(Iop_Shl32, mkU32(1), ctzL),
18564 mkU32(0)),
18565 mkU32(1)));
18567 /* And similarly for validR. */
18568 IRExpr *ctzR = unop(Iop_32to8, math_CTZ32(mkexpr(zmaskR)));
18569 IRTemp zmaskR_zero = newTemp(Ity_I1);
18570 assign(zmaskR_zero, binop(Iop_ExpCmpNE32, mkexpr(zmaskR), mkU32(0)));
18571 IRTemp validR = newTemp(Ity_I32);
18572 assign(validR, binop(Iop_Sub32,
18573 IRExpr_ITE(mkexpr(zmaskR_zero),
18574 binop(Iop_Shl32, mkU32(1), ctzR),
18575 mkU32(0)),
18576 mkU32(1)));
18578 /* Do the actual comparison. */
18579 IRExpr *boolResII = unop(Iop_16Uto32,
18580 unop(Iop_GetMSBs8x16,
18581 binop(Iop_CmpEQ8x16, mkexpr(argL),
18582 mkexpr(argR))));
18584 /* Compute boolresII & validL & validR (i.e., if both valid, use
18585 comparison result) */
18586 IRExpr *intRes1_a = binop(Iop_And32, boolResII,
18587 binop(Iop_And32,
18588 mkexpr(validL), mkexpr(validR)));
18590 /* Compute ~(validL | validR); i.e., if both invalid, force 1. */
18591 IRExpr *intRes1_b = unop(Iop_Not32, binop(Iop_Or32,
18592 mkexpr(validL), mkexpr(validR)));
18593 /* Otherwise, zero. */
18594 IRExpr *intRes1 = binop(Iop_And32, mkU32(0xFFFF),
18595 binop(Iop_Or32, intRes1_a, intRes1_b));
18597 /* The "0x30" in imm=0x3A means "polarity=3" means XOR validL with
18598 result. */
18599 IRTemp intRes2 = newTemp(Ity_I32);
18600 assign(intRes2, binop(Iop_And32, mkU32(0xFFFF),
18601 binop(Iop_Xor32, intRes1, mkexpr(validL))));
18603 /* If the 0x40 bit were set in imm=0x3A, we would return the index
18604 of the msb. Since it is clear, we return the index of the
18605 lsb. */
18606 IRExpr *newECX = math_CTZ32(binop(Iop_Or32,
18607 mkexpr(intRes2), mkU32(0x10000)));
18609 /* And thats our rcx. */
18610 putIReg32(R_RCX, newECX);
18612 /* Now for the condition codes... */
18614 /* C == 0 iff intRes2 == 0 */
18615 IRExpr *c_bit = IRExpr_ITE( binop(Iop_ExpCmpNE32, mkexpr(intRes2),
18616 mkU32(0)),
18617 mkU32(1 << AMD64G_CC_SHIFT_C),
18618 mkU32(0));
18619 /* Z == 1 iff any in argL is 0 */
18620 IRExpr *z_bit = IRExpr_ITE( mkexpr(zmaskL_zero),
18621 mkU32(1 << AMD64G_CC_SHIFT_Z),
18622 mkU32(0));
18623 /* S == 1 iff any in argR is 0 */
18624 IRExpr *s_bit = IRExpr_ITE( mkexpr(zmaskR_zero),
18625 mkU32(1 << AMD64G_CC_SHIFT_S),
18626 mkU32(0));
18627 /* O == IntRes2[0] */
18628 IRExpr *o_bit = binop(Iop_Shl32, binop(Iop_And32, mkexpr(intRes2),
18629 mkU32(0x01)),
18630 mkU8(AMD64G_CC_SHIFT_O));
18632 /* Put them all together */
18633 IRTemp cc = newTemp(Ity_I64);
18634 assign(cc, widenUto64(binop(Iop_Or32,
18635 binop(Iop_Or32, c_bit, z_bit),
18636 binop(Iop_Or32, s_bit, o_bit))));
18637 stmt(IRStmt_Put(OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY)));
18638 stmt(IRStmt_Put(OFFB_CC_DEP1, mkexpr(cc)));
18639 stmt(IRStmt_Put(OFFB_CC_DEP2, mkU64(0)));
18640 stmt(IRStmt_Put(OFFB_CC_NDEP, mkU64(0)));
18642 return delta;
18645 /* This can fail, in which case it returns the original (unchanged)
18646 delta. */
18647 static Long dis_PCMPxSTRx ( const VexAbiInfo* vbi, Prefix pfx,
18648 Long delta, Bool isAvx, UChar opc )
18650 Long delta0 = delta;
18651 UInt isISTRx = opc & 2;
18652 UInt isxSTRM = (opc & 1) ^ 1;
18653 UInt regNoL = 0;
18654 UInt regNoR = 0;
18655 UChar imm = 0;
18656 IRTemp addr = IRTemp_INVALID;
18657 Int alen = 0;
18658 HChar dis_buf[50];
18660 /* This is a nasty kludge. We need to pass 2 x V128 to the helper
18661 (which is clean). Since we can't do that, use a dirty helper to
18662 compute the results directly from the XMM regs in the guest
18663 state. That means for the memory case, we need to move the left
18664 operand into a pseudo-register (XMM16, let's call it). */
18665 UChar modrm = getUChar(delta);
18666 if (epartIsReg(modrm)) {
18667 regNoL = eregOfRexRM(pfx, modrm);
18668 regNoR = gregOfRexRM(pfx, modrm);
18669 imm = getUChar(delta+1);
18670 delta += 1+1;
18671 } else {
18672 regNoL = 16; /* use XMM16 as an intermediary */
18673 regNoR = gregOfRexRM(pfx, modrm);
18674 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18675 /* No alignment check; I guess that makes sense, given that
18676 these insns are for dealing with C style strings. */
18677 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
18678 imm = getUChar(delta+alen);
18679 delta += alen+1;
18682 /* Print the insn here, since dis_PCMPISTRI_3A doesn't do so
18683 itself. */
18684 if (regNoL == 16) {
18685 DIP("%spcmp%cstr%c $%x,%s,%s\n",
18686 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i',
18687 (UInt)imm, dis_buf, nameXMMReg(regNoR));
18688 } else {
18689 DIP("%spcmp%cstr%c $%x,%s,%s\n",
18690 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i',
18691 (UInt)imm, nameXMMReg(regNoL), nameXMMReg(regNoR));
18694 /* Handle special case(s). */
18695 if (imm == 0x3A && isISTRx && !isxSTRM) {
18696 return dis_PCMPISTRI_3A ( modrm, regNoL, regNoR, delta,
18697 opc, imm, dis_buf);
18700 /* Now we know the XMM reg numbers for the operands, and the
18701 immediate byte. Is it one we can actually handle? Throw out any
18702 cases for which the helper function has not been verified. */
18703 switch (imm) {
18704 case 0x00: case 0x02:
18705 case 0x08: case 0x0A: case 0x0C: case 0x0E:
18706 case 0x10: case 0x12: case 0x14:
18707 case 0x18: case 0x1A:
18708 case 0x30: case 0x34:
18709 case 0x38: case 0x3A:
18710 case 0x40: case 0x42: case 0x44: case 0x46:
18711 case 0x4A:
18712 case 0x62:
18713 case 0x70: case 0x72:
18714 break;
18715 // the 16-bit character versions of the above
18716 case 0x01: case 0x03:
18717 case 0x09: case 0x0B: case 0x0D:
18718 case 0x13:
18719 case 0x19: case 0x1B:
18720 case 0x39: case 0x3B:
18721 case 0x41: case 0x45:
18722 case 0x4B:
18723 break;
18724 default:
18725 return delta0; /*FAIL*/
18728 /* Who ya gonna call? Presumably not Ghostbusters. */
18729 void* fn = &amd64g_dirtyhelper_PCMPxSTRx;
18730 const HChar* nm = "amd64g_dirtyhelper_PCMPxSTRx";
18732 /* Round up the arguments. Note that this is a kludge -- the use
18733 of mkU64 rather than mkIRExpr_HWord implies the assumption that
18734 the host's word size is 64-bit. */
18735 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
18736 UInt gstOffR = ymmGuestRegOffset(regNoR);
18738 IRExpr* opc4_and_imm = mkU64((opc << 8) | (imm & 0xFF));
18739 IRExpr* gstOffLe = mkU64(gstOffL);
18740 IRExpr* gstOffRe = mkU64(gstOffR);
18741 IRExpr* edxIN = isISTRx ? mkU64(0) : getIRegRDX(8);
18742 IRExpr* eaxIN = isISTRx ? mkU64(0) : getIRegRAX(8);
18743 IRExpr** args
18744 = mkIRExprVec_6( IRExpr_GSPTR(),
18745 opc4_and_imm, gstOffLe, gstOffRe, edxIN, eaxIN );
18747 IRTemp resT = newTemp(Ity_I64);
18748 IRDirty* d = unsafeIRDirty_1_N( resT, 0/*regparms*/, nm, fn, args );
18749 /* It's not really a dirty call, but we can't use the clean helper
18750 mechanism here for the very lame reason that we can't pass 2 x
18751 V128s by value to a helper. Hence this roundabout scheme. */
18752 d->nFxState = 2;
18753 vex_bzero(&d->fxState, sizeof(d->fxState));
18754 d->fxState[0].fx = Ifx_Read;
18755 d->fxState[0].offset = gstOffL;
18756 d->fxState[0].size = sizeof(U128);
18757 d->fxState[1].fx = Ifx_Read;
18758 d->fxState[1].offset = gstOffR;
18759 d->fxState[1].size = sizeof(U128);
18760 if (isxSTRM) {
18761 /* Declare that the helper writes XMM0. */
18762 d->nFxState = 3;
18763 d->fxState[2].fx = Ifx_Write;
18764 d->fxState[2].offset = ymmGuestRegOffset(0);
18765 d->fxState[2].size = sizeof(U128);
18768 stmt( IRStmt_Dirty(d) );
18770 /* Now resT[15:0] holds the new OSZACP values, so the condition
18771 codes must be updated. And for a xSTRI case, resT[31:16] holds
18772 the new ECX value, so stash that too. */
18773 if (!isxSTRM) {
18774 putIReg64(R_RCX, binop(Iop_And64,
18775 binop(Iop_Shr64, mkexpr(resT), mkU8(16)),
18776 mkU64(0xFFFF)));
18779 /* Zap the upper half of the dest reg as per AVX conventions. */
18780 if (isxSTRM && isAvx)
18781 putYMMRegLane128(/*YMM*/0, 1, mkV128(0));
18783 stmt( IRStmt_Put(
18784 OFFB_CC_DEP1,
18785 binop(Iop_And64, mkexpr(resT), mkU64(0xFFFF))
18787 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
18788 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
18789 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
18791 return delta;
18795 static IRTemp math_PINSRB_128 ( IRTemp v128, IRTemp u8, UInt imm8 )
18797 vassert(imm8 >= 0 && imm8 <= 15);
18799 // Create a V128 value which has the selected byte in the
18800 // specified lane, and zeroes everywhere else.
18801 IRTemp tmp128 = newTemp(Ity_V128);
18802 IRTemp halfshift = newTemp(Ity_I64);
18803 assign(halfshift, binop(Iop_Shl64,
18804 unop(Iop_8Uto64, mkexpr(u8)),
18805 mkU8(8 * (imm8 & 7))));
18806 if (imm8 < 8) {
18807 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift)));
18808 } else {
18809 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0)));
18812 UShort mask = ~(1 << imm8);
18813 IRTemp res = newTemp(Ity_V128);
18814 assign( res, binop(Iop_OrV128,
18815 mkexpr(tmp128),
18816 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) );
18817 return res;
18821 static IRTemp math_PINSRD_128 ( IRTemp v128, IRTemp u32, UInt imm8 )
18823 IRTemp z32 = newTemp(Ity_I32);
18824 assign(z32, mkU32(0));
18826 /* Surround u32 with zeroes as per imm, giving us something we can
18827 OR into a suitably masked-out v128.*/
18828 IRTemp withZs = newTemp(Ity_V128);
18829 UShort mask = 0;
18830 switch (imm8) {
18831 case 3: mask = 0x0FFF;
18832 assign(withZs, mkV128from32s(u32, z32, z32, z32));
18833 break;
18834 case 2: mask = 0xF0FF;
18835 assign(withZs, mkV128from32s(z32, u32, z32, z32));
18836 break;
18837 case 1: mask = 0xFF0F;
18838 assign(withZs, mkV128from32s(z32, z32, u32, z32));
18839 break;
18840 case 0: mask = 0xFFF0;
18841 assign(withZs, mkV128from32s(z32, z32, z32, u32));
18842 break;
18843 default: vassert(0);
18846 IRTemp res = newTemp(Ity_V128);
18847 assign(res, binop( Iop_OrV128,
18848 mkexpr(withZs),
18849 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) );
18850 return res;
18854 static IRTemp math_PINSRQ_128 ( IRTemp v128, IRTemp u64, UInt imm8 )
18856 /* Surround u64 with zeroes as per imm, giving us something we can
18857 OR into a suitably masked-out v128.*/
18858 IRTemp withZs = newTemp(Ity_V128);
18859 UShort mask = 0;
18860 if (imm8 == 0) {
18861 mask = 0xFF00;
18862 assign(withZs, binop(Iop_64HLtoV128, mkU64(0), mkexpr(u64)));
18863 } else {
18864 vassert(imm8 == 1);
18865 mask = 0x00FF;
18866 assign( withZs, binop(Iop_64HLtoV128, mkexpr(u64), mkU64(0)));
18869 IRTemp res = newTemp(Ity_V128);
18870 assign( res, binop( Iop_OrV128,
18871 mkexpr(withZs),
18872 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) );
18873 return res;
18877 static IRTemp math_INSERTPS ( IRTemp dstV, IRTemp toInsertD, UInt imm8 )
18879 const IRTemp inval = IRTemp_INVALID;
18880 IRTemp dstDs[4] = { inval, inval, inval, inval };
18881 breakupV128to32s( dstV, &dstDs[3], &dstDs[2], &dstDs[1], &dstDs[0] );
18883 vassert(imm8 <= 255);
18884 dstDs[(imm8 >> 4) & 3] = toInsertD; /* "imm8_count_d" */
18886 UInt imm8_zmask = (imm8 & 15);
18887 IRTemp zero_32 = newTemp(Ity_I32);
18888 assign( zero_32, mkU32(0) );
18889 IRTemp resV = newTemp(Ity_V128);
18890 assign( resV, mkV128from32s(
18891 ((imm8_zmask & 8) == 8) ? zero_32 : dstDs[3],
18892 ((imm8_zmask & 4) == 4) ? zero_32 : dstDs[2],
18893 ((imm8_zmask & 2) == 2) ? zero_32 : dstDs[1],
18894 ((imm8_zmask & 1) == 1) ? zero_32 : dstDs[0]) );
18895 return resV;
18899 static Long dis_PEXTRB_128_GtoE ( const VexAbiInfo* vbi, Prefix pfx,
18900 Long delta, Bool isAvx )
18902 IRTemp addr = IRTemp_INVALID;
18903 Int alen = 0;
18904 HChar dis_buf[50];
18905 IRTemp xmm_vec = newTemp(Ity_V128);
18906 IRTemp sel_lane = newTemp(Ity_I32);
18907 IRTemp shr_lane = newTemp(Ity_I32);
18908 const HChar* mbV = isAvx ? "v" : "";
18909 UChar modrm = getUChar(delta);
18910 IRTemp t3, t2, t1, t0;
18911 Int imm8;
18912 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
18913 t3 = t2 = t1 = t0 = IRTemp_INVALID;
18914 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
18916 if ( epartIsReg( modrm ) ) {
18917 imm8 = (Int)getUChar(delta+1);
18918 } else {
18919 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18920 imm8 = (Int)getUChar(delta+alen);
18922 switch ( (imm8 >> 2) & 3 ) {
18923 case 0: assign( sel_lane, mkexpr(t0) ); break;
18924 case 1: assign( sel_lane, mkexpr(t1) ); break;
18925 case 2: assign( sel_lane, mkexpr(t2) ); break;
18926 case 3: assign( sel_lane, mkexpr(t3) ); break;
18927 default: vassert(0);
18929 assign( shr_lane,
18930 binop( Iop_Shr32, mkexpr(sel_lane), mkU8(((imm8 & 3)*8)) ) );
18932 if ( epartIsReg( modrm ) ) {
18933 putIReg64( eregOfRexRM(pfx,modrm),
18934 unop( Iop_32Uto64,
18935 binop(Iop_And32, mkexpr(shr_lane), mkU32(255)) ) );
18936 delta += 1+1;
18937 DIP( "%spextrb $%d, %s,%s\n", mbV, imm8,
18938 nameXMMReg( gregOfRexRM(pfx, modrm) ),
18939 nameIReg64( eregOfRexRM(pfx, modrm) ) );
18940 } else {
18941 storeLE( mkexpr(addr), unop(Iop_32to8, mkexpr(shr_lane) ) );
18942 delta += alen+1;
18943 DIP( "%spextrb $%d,%s,%s\n", mbV,
18944 imm8, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
18947 return delta;
18951 static IRTemp math_DPPD_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 )
18953 vassert(imm8 < 256);
18954 UShort imm8_perms[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF };
18955 IRTemp and_vec = newTemp(Ity_V128);
18956 IRTemp sum_vec = newTemp(Ity_V128);
18957 IRTemp rm = newTemp(Ity_I32);
18958 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
18959 assign( and_vec, binop( Iop_AndV128,
18960 triop( Iop_Mul64Fx2,
18961 mkexpr(rm),
18962 mkexpr(dst_vec), mkexpr(src_vec) ),
18963 mkV128( imm8_perms[ ((imm8 >> 4) & 3) ] ) ) );
18965 assign( sum_vec, binop( Iop_Add64F0x2,
18966 binop( Iop_InterleaveHI64x2,
18967 mkexpr(and_vec), mkexpr(and_vec) ),
18968 binop( Iop_InterleaveLO64x2,
18969 mkexpr(and_vec), mkexpr(and_vec) ) ) );
18970 IRTemp res = newTemp(Ity_V128);
18971 assign(res, binop( Iop_AndV128,
18972 binop( Iop_InterleaveLO64x2,
18973 mkexpr(sum_vec), mkexpr(sum_vec) ),
18974 mkV128( imm8_perms[ (imm8 & 3) ] ) ) );
18975 return res;
18979 static IRTemp math_DPPS_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 )
18981 vassert(imm8 < 256);
18982 IRTemp tmp_prod_vec = newTemp(Ity_V128);
18983 IRTemp prod_vec = newTemp(Ity_V128);
18984 IRTemp sum_vec = newTemp(Ity_V128);
18985 IRTemp rm = newTemp(Ity_I32);
18986 IRTemp v3, v2, v1, v0;
18987 v3 = v2 = v1 = v0 = IRTemp_INVALID;
18988 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
18989 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
18990 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
18991 0xFFFF };
18993 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
18994 assign( tmp_prod_vec,
18995 binop( Iop_AndV128,
18996 triop( Iop_Mul32Fx4,
18997 mkexpr(rm), mkexpr(dst_vec), mkexpr(src_vec) ),
18998 mkV128( imm8_perms[((imm8 >> 4)& 15)] ) ) );
18999 breakupV128to32s( tmp_prod_vec, &v3, &v2, &v1, &v0 );
19000 assign( prod_vec, mkV128from32s( v3, v1, v2, v0 ) );
19002 assign( sum_vec, triop( Iop_Add32Fx4,
19003 mkexpr(rm),
19004 binop( Iop_InterleaveHI32x4,
19005 mkexpr(prod_vec), mkexpr(prod_vec) ),
19006 binop( Iop_InterleaveLO32x4,
19007 mkexpr(prod_vec), mkexpr(prod_vec) ) ) );
19009 IRTemp res = newTemp(Ity_V128);
19010 assign( res, binop( Iop_AndV128,
19011 triop( Iop_Add32Fx4,
19012 mkexpr(rm),
19013 binop( Iop_InterleaveHI32x4,
19014 mkexpr(sum_vec), mkexpr(sum_vec) ),
19015 binop( Iop_InterleaveLO32x4,
19016 mkexpr(sum_vec), mkexpr(sum_vec) ) ),
19017 mkV128( imm8_perms[ (imm8 & 15) ] ) ) );
19018 return res;
19022 static IRTemp math_MPSADBW_128 ( IRTemp dst_vec, IRTemp src_vec, UInt imm8 )
19024 /* Mask out bits of the operands we don't need. This isn't
19025 strictly necessary, but it does ensure Memcheck doesn't
19026 give us any false uninitialised value errors as a
19027 result. */
19028 UShort src_mask[4] = { 0x000F, 0x00F0, 0x0F00, 0xF000 };
19029 UShort dst_mask[2] = { 0x07FF, 0x7FF0 };
19031 IRTemp src_maskV = newTemp(Ity_V128);
19032 IRTemp dst_maskV = newTemp(Ity_V128);
19033 assign(src_maskV, mkV128( src_mask[ imm8 & 3 ] ));
19034 assign(dst_maskV, mkV128( dst_mask[ (imm8 >> 2) & 1 ] ));
19036 IRTemp src_masked = newTemp(Ity_V128);
19037 IRTemp dst_masked = newTemp(Ity_V128);
19038 assign(src_masked, binop(Iop_AndV128, mkexpr(src_vec), mkexpr(src_maskV)));
19039 assign(dst_masked, binop(Iop_AndV128, mkexpr(dst_vec), mkexpr(dst_maskV)));
19041 /* Generate 4 64 bit values that we can hand to a clean helper */
19042 IRTemp sHi = newTemp(Ity_I64);
19043 IRTemp sLo = newTemp(Ity_I64);
19044 assign( sHi, unop(Iop_V128HIto64, mkexpr(src_masked)) );
19045 assign( sLo, unop(Iop_V128to64, mkexpr(src_masked)) );
19047 IRTemp dHi = newTemp(Ity_I64);
19048 IRTemp dLo = newTemp(Ity_I64);
19049 assign( dHi, unop(Iop_V128HIto64, mkexpr(dst_masked)) );
19050 assign( dLo, unop(Iop_V128to64, mkexpr(dst_masked)) );
19052 /* Compute halves of the result separately */
19053 IRTemp resHi = newTemp(Ity_I64);
19054 IRTemp resLo = newTemp(Ity_I64);
19056 IRExpr** argsHi
19057 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo),
19058 mkU64( 0x80 | (imm8 & 7) ));
19059 IRExpr** argsLo
19060 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo),
19061 mkU64( 0x00 | (imm8 & 7) ));
19063 assign(resHi, mkIRExprCCall( Ity_I64, 0/*regparm*/,
19064 "amd64g_calc_mpsadbw",
19065 &amd64g_calc_mpsadbw, argsHi ));
19066 assign(resLo, mkIRExprCCall( Ity_I64, 0/*regparm*/,
19067 "amd64g_calc_mpsadbw",
19068 &amd64g_calc_mpsadbw, argsLo ));
19070 IRTemp res = newTemp(Ity_V128);
19071 assign(res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo)));
19072 return res;
19075 static Long dis_EXTRACTPS ( const VexAbiInfo* vbi, Prefix pfx,
19076 Long delta, Bool isAvx )
19078 IRTemp addr = IRTemp_INVALID;
19079 Int alen = 0;
19080 HChar dis_buf[50];
19081 UChar modrm = getUChar(delta);
19082 Int imm8_10;
19083 IRTemp xmm_vec = newTemp(Ity_V128);
19084 IRTemp src_dword = newTemp(Ity_I32);
19085 UInt rG = gregOfRexRM(pfx,modrm);
19086 IRTemp t3, t2, t1, t0;
19087 t3 = t2 = t1 = t0 = IRTemp_INVALID;
19089 assign( xmm_vec, getXMMReg( rG ) );
19090 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
19092 if ( epartIsReg( modrm ) ) {
19093 imm8_10 = (Int)(getUChar(delta+1) & 3);
19094 } else {
19095 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19096 imm8_10 = (Int)(getUChar(delta+alen) & 3);
19099 switch ( imm8_10 ) {
19100 case 0: assign( src_dword, mkexpr(t0) ); break;
19101 case 1: assign( src_dword, mkexpr(t1) ); break;
19102 case 2: assign( src_dword, mkexpr(t2) ); break;
19103 case 3: assign( src_dword, mkexpr(t3) ); break;
19104 default: vassert(0);
19107 if ( epartIsReg( modrm ) ) {
19108 UInt rE = eregOfRexRM(pfx,modrm);
19109 putIReg32( rE, mkexpr(src_dword) );
19110 delta += 1+1;
19111 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10,
19112 nameXMMReg( rG ), nameIReg32( rE ) );
19113 } else {
19114 storeLE( mkexpr(addr), mkexpr(src_dword) );
19115 delta += alen+1;
19116 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10,
19117 nameXMMReg( rG ), dis_buf );
19120 return delta;
19124 static IRTemp math_PCLMULQDQ( IRTemp dV, IRTemp sV, UInt imm8 )
19126 IRTemp t0 = newTemp(Ity_I64);
19127 IRTemp t1 = newTemp(Ity_I64);
19128 assign(t0, unop((imm8&1)? Iop_V128HIto64 : Iop_V128to64,
19129 mkexpr(dV)));
19130 assign(t1, unop((imm8&16) ? Iop_V128HIto64 : Iop_V128to64,
19131 mkexpr(sV)));
19133 IRTemp t2 = newTemp(Ity_I64);
19134 IRTemp t3 = newTemp(Ity_I64);
19136 IRExpr** args;
19138 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(0));
19139 assign(t2, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul",
19140 &amd64g_calculate_pclmul, args));
19141 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(1));
19142 assign(t3, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul",
19143 &amd64g_calculate_pclmul, args));
19145 IRTemp res = newTemp(Ity_V128);
19146 assign(res, binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)));
19147 return res;
19151 __attribute__((noinline))
19152 static
19153 Long dis_ESC_0F3A__SSE4 ( Bool* decode_OK,
19154 const VexAbiInfo* vbi,
19155 Prefix pfx, Int sz, Long deltaIN )
19157 IRTemp addr = IRTemp_INVALID;
19158 UChar modrm = 0;
19159 Int alen = 0;
19160 HChar dis_buf[50];
19162 *decode_OK = False;
19164 Long delta = deltaIN;
19165 UChar opc = getUChar(delta);
19166 delta++;
19167 switch (opc) {
19169 case 0x08:
19170 /* 66 0F 3A 08 /r ib = ROUNDPS imm8, xmm2/m128, xmm1 */
19171 if (have66noF2noF3(pfx) && sz == 2) {
19173 IRTemp src0 = newTemp(Ity_F32);
19174 IRTemp src1 = newTemp(Ity_F32);
19175 IRTemp src2 = newTemp(Ity_F32);
19176 IRTemp src3 = newTemp(Ity_F32);
19177 IRTemp res0 = newTemp(Ity_F32);
19178 IRTemp res1 = newTemp(Ity_F32);
19179 IRTemp res2 = newTemp(Ity_F32);
19180 IRTemp res3 = newTemp(Ity_F32);
19181 IRTemp rm = newTemp(Ity_I32);
19182 Int imm = 0;
19184 modrm = getUChar(delta);
19186 if (epartIsReg(modrm)) {
19187 assign( src0,
19188 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
19189 assign( src1,
19190 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 1 ) );
19191 assign( src2,
19192 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 2 ) );
19193 assign( src3,
19194 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 3 ) );
19195 imm = getUChar(delta+1);
19196 if (imm & ~15) goto decode_failure;
19197 delta += 1+1;
19198 DIP( "roundps $%d,%s,%s\n",
19199 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
19200 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19201 } else {
19202 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19203 gen_SEGV_if_not_16_aligned(addr);
19204 assign( src0, loadLE(Ity_F32,
19205 binop(Iop_Add64, mkexpr(addr), mkU64(0) )));
19206 assign( src1, loadLE(Ity_F32,
19207 binop(Iop_Add64, mkexpr(addr), mkU64(4) )));
19208 assign( src2, loadLE(Ity_F32,
19209 binop(Iop_Add64, mkexpr(addr), mkU64(8) )));
19210 assign( src3, loadLE(Ity_F32,
19211 binop(Iop_Add64, mkexpr(addr), mkU64(12) )));
19212 imm = getUChar(delta+alen);
19213 if (imm & ~15) goto decode_failure;
19214 delta += alen+1;
19215 DIP( "roundps $%d,%s,%s\n",
19216 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19219 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19220 that encoding is the same as the encoding for IRRoundingMode,
19221 we can use that value directly in the IR as a rounding
19222 mode. */
19223 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
19225 assign(res0, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src0)) );
19226 assign(res1, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src1)) );
19227 assign(res2, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src2)) );
19228 assign(res3, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src3)) );
19230 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) );
19231 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) );
19232 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 2, mkexpr(res2) );
19233 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 3, mkexpr(res3) );
19235 goto decode_success;
19237 break;
19239 case 0x09:
19240 /* 66 0F 3A 09 /r ib = ROUNDPD imm8, xmm2/m128, xmm1 */
19241 if (have66noF2noF3(pfx) && sz == 2) {
19243 IRTemp src0 = newTemp(Ity_F64);
19244 IRTemp src1 = newTemp(Ity_F64);
19245 IRTemp res0 = newTemp(Ity_F64);
19246 IRTemp res1 = newTemp(Ity_F64);
19247 IRTemp rm = newTemp(Ity_I32);
19248 Int imm = 0;
19250 modrm = getUChar(delta);
19252 if (epartIsReg(modrm)) {
19253 assign( src0,
19254 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) );
19255 assign( src1,
19256 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 1 ) );
19257 imm = getUChar(delta+1);
19258 if (imm & ~15) goto decode_failure;
19259 delta += 1+1;
19260 DIP( "roundpd $%d,%s,%s\n",
19261 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
19262 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19263 } else {
19264 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19265 gen_SEGV_if_not_16_aligned(addr);
19266 assign( src0, loadLE(Ity_F64,
19267 binop(Iop_Add64, mkexpr(addr), mkU64(0) )));
19268 assign( src1, loadLE(Ity_F64,
19269 binop(Iop_Add64, mkexpr(addr), mkU64(8) )));
19270 imm = getUChar(delta+alen);
19271 if (imm & ~15) goto decode_failure;
19272 delta += alen+1;
19273 DIP( "roundpd $%d,%s,%s\n",
19274 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19277 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19278 that encoding is the same as the encoding for IRRoundingMode,
19279 we can use that value directly in the IR as a rounding
19280 mode. */
19281 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
19283 assign(res0, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src0)) );
19284 assign(res1, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src1)) );
19286 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) );
19287 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) );
19289 goto decode_success;
19291 break;
19293 case 0x0A:
19294 case 0x0B:
19295 /* 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
19296 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
19298 if (have66noF2noF3(pfx) && sz == 2) {
19300 Bool isD = opc == 0x0B;
19301 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
19302 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
19303 Int imm = 0;
19305 modrm = getUChar(delta);
19307 if (epartIsReg(modrm)) {
19308 assign( src,
19309 isD ? getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 )
19310 : getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
19311 imm = getUChar(delta+1);
19312 if (imm & ~15) goto decode_failure;
19313 delta += 1+1;
19314 DIP( "rounds%c $%d,%s,%s\n",
19315 isD ? 'd' : 's',
19316 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
19317 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19318 } else {
19319 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19320 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
19321 imm = getUChar(delta+alen);
19322 if (imm & ~15) goto decode_failure;
19323 delta += alen+1;
19324 DIP( "rounds%c $%d,%s,%s\n",
19325 isD ? 'd' : 's',
19326 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19329 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19330 that encoding is the same as the encoding for IRRoundingMode,
19331 we can use that value directly in the IR as a rounding
19332 mode. */
19333 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
19334 (imm & 4) ? get_sse_roundingmode()
19335 : mkU32(imm & 3),
19336 mkexpr(src)) );
19338 if (isD)
19339 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
19340 else
19341 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
19343 goto decode_success;
19345 break;
19347 case 0x0C:
19348 /* 66 0F 3A 0C /r ib = BLENDPS xmm1, xmm2/m128, imm8
19349 Blend Packed Single Precision Floating-Point Values (XMM) */
19350 if (have66noF2noF3(pfx) && sz == 2) {
19352 Int imm8;
19353 IRTemp dst_vec = newTemp(Ity_V128);
19354 IRTemp src_vec = newTemp(Ity_V128);
19356 modrm = getUChar(delta);
19358 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
19360 if ( epartIsReg( modrm ) ) {
19361 imm8 = (Int)getUChar(delta+1);
19362 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
19363 delta += 1+1;
19364 DIP( "blendps $%d, %s,%s\n", imm8,
19365 nameXMMReg( eregOfRexRM(pfx, modrm) ),
19366 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19367 } else {
19368 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19369 1/* imm8 is 1 byte after the amode */ );
19370 gen_SEGV_if_not_16_aligned( addr );
19371 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19372 imm8 = (Int)getUChar(delta+alen);
19373 delta += alen+1;
19374 DIP( "blendps $%d, %s,%s\n",
19375 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19378 putXMMReg( gregOfRexRM(pfx, modrm),
19379 mkexpr( math_BLENDPS_128( src_vec, dst_vec, imm8) ) );
19380 goto decode_success;
19382 break;
19384 case 0x0D:
19385 /* 66 0F 3A 0D /r ib = BLENDPD xmm1, xmm2/m128, imm8
19386 Blend Packed Double Precision Floating-Point Values (XMM) */
19387 if (have66noF2noF3(pfx) && sz == 2) {
19389 Int imm8;
19390 IRTemp dst_vec = newTemp(Ity_V128);
19391 IRTemp src_vec = newTemp(Ity_V128);
19393 modrm = getUChar(delta);
19394 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
19396 if ( epartIsReg( modrm ) ) {
19397 imm8 = (Int)getUChar(delta+1);
19398 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
19399 delta += 1+1;
19400 DIP( "blendpd $%d, %s,%s\n", imm8,
19401 nameXMMReg( eregOfRexRM(pfx, modrm) ),
19402 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19403 } else {
19404 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19405 1/* imm8 is 1 byte after the amode */ );
19406 gen_SEGV_if_not_16_aligned( addr );
19407 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19408 imm8 = (Int)getUChar(delta+alen);
19409 delta += alen+1;
19410 DIP( "blendpd $%d, %s,%s\n",
19411 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19414 putXMMReg( gregOfRexRM(pfx, modrm),
19415 mkexpr( math_BLENDPD_128( src_vec, dst_vec, imm8) ) );
19416 goto decode_success;
19418 break;
19420 case 0x0E:
19421 /* 66 0F 3A 0E /r ib = PBLENDW xmm1, xmm2/m128, imm8
19422 Blend Packed Words (XMM) */
19423 if (have66noF2noF3(pfx) && sz == 2) {
19425 Int imm8;
19426 IRTemp dst_vec = newTemp(Ity_V128);
19427 IRTemp src_vec = newTemp(Ity_V128);
19429 modrm = getUChar(delta);
19431 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
19433 if ( epartIsReg( modrm ) ) {
19434 imm8 = (Int)getUChar(delta+1);
19435 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
19436 delta += 1+1;
19437 DIP( "pblendw $%d, %s,%s\n", imm8,
19438 nameXMMReg( eregOfRexRM(pfx, modrm) ),
19439 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19440 } else {
19441 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19442 1/* imm8 is 1 byte after the amode */ );
19443 gen_SEGV_if_not_16_aligned( addr );
19444 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19445 imm8 = (Int)getUChar(delta+alen);
19446 delta += alen+1;
19447 DIP( "pblendw $%d, %s,%s\n",
19448 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19451 putXMMReg( gregOfRexRM(pfx, modrm),
19452 mkexpr( math_PBLENDW_128( src_vec, dst_vec, imm8) ) );
19453 goto decode_success;
19455 break;
19457 case 0x14:
19458 /* 66 0F 3A 14 /r ib = PEXTRB r/m16, xmm, imm8
19459 Extract Byte from xmm, store in mem or zero-extend + store in gen.reg.
19460 (XMM) */
19461 if (have66noF2noF3(pfx) && sz == 2) {
19462 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ );
19463 goto decode_success;
19465 break;
19467 case 0x15:
19468 /* 66 0F 3A 15 /r ib = PEXTRW r/m16, xmm, imm8
19469 Extract Word from xmm, store in mem or zero-extend + store in gen.reg.
19470 (XMM) */
19471 if (have66noF2noF3(pfx) && sz == 2) {
19472 delta = dis_PEXTRW( vbi, pfx, delta, False/*!isAvx*/ );
19473 goto decode_success;
19475 break;
19477 case 0x16:
19478 /* 66 no-REX.W 0F 3A 16 /r ib = PEXTRD reg/mem32, xmm2, imm8
19479 Extract Doubleword int from xmm reg and store in gen.reg or mem. (XMM)
19480 Note that this insn has the same opcodes as PEXTRQ, but
19481 here the REX.W bit is _not_ present */
19482 if (have66noF2noF3(pfx)
19483 && sz == 2 /* REX.W is _not_ present */) {
19484 delta = dis_PEXTRD( vbi, pfx, delta, False/*!isAvx*/ );
19485 goto decode_success;
19487 /* 66 REX.W 0F 3A 16 /r ib = PEXTRQ reg/mem64, xmm2, imm8
19488 Extract Quadword int from xmm reg and store in gen.reg or mem. (XMM)
19489 Note that this insn has the same opcodes as PEXTRD, but
19490 here the REX.W bit is present */
19491 if (have66noF2noF3(pfx)
19492 && sz == 8 /* REX.W is present */) {
19493 delta = dis_PEXTRQ( vbi, pfx, delta, False/*!isAvx*/);
19494 goto decode_success;
19496 break;
19498 case 0x17:
19499 /* 66 0F 3A 17 /r ib = EXTRACTPS reg/mem32, xmm2, imm8 Extract
19500 float from xmm reg and store in gen.reg or mem. This is
19501 identical to PEXTRD, except that REX.W appears to be ignored.
19503 if (have66noF2noF3(pfx)
19504 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
19505 delta = dis_EXTRACTPS( vbi, pfx, delta, False/*!isAvx*/ );
19506 goto decode_success;
19508 break;
19510 case 0x20:
19511 /* 66 0F 3A 20 /r ib = PINSRB xmm1, r32/m8, imm8
19512 Extract byte from r32/m8 and insert into xmm1 */
19513 if (have66noF2noF3(pfx) && sz == 2) {
19514 Int imm8;
19515 IRTemp new8 = newTemp(Ity_I8);
19516 modrm = getUChar(delta);
19517 UInt rG = gregOfRexRM(pfx, modrm);
19518 if ( epartIsReg( modrm ) ) {
19519 UInt rE = eregOfRexRM(pfx,modrm);
19520 imm8 = (Int)(getUChar(delta+1) & 0xF);
19521 assign( new8, unop(Iop_32to8, getIReg32(rE)) );
19522 delta += 1+1;
19523 DIP( "pinsrb $%d,%s,%s\n", imm8,
19524 nameIReg32(rE), nameXMMReg(rG) );
19525 } else {
19526 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19527 imm8 = (Int)(getUChar(delta+alen) & 0xF);
19528 assign( new8, loadLE( Ity_I8, mkexpr(addr) ) );
19529 delta += alen+1;
19530 DIP( "pinsrb $%d,%s,%s\n",
19531 imm8, dis_buf, nameXMMReg(rG) );
19533 IRTemp src_vec = newTemp(Ity_V128);
19534 assign(src_vec, getXMMReg( gregOfRexRM(pfx, modrm) ));
19535 IRTemp res = math_PINSRB_128( src_vec, new8, imm8 );
19536 putXMMReg( rG, mkexpr(res) );
19537 goto decode_success;
19539 break;
19541 case 0x21:
19542 /* 66 0F 3A 21 /r ib = INSERTPS imm8, xmm2/m32, xmm1
19543 Insert Packed Single Precision Floating-Point Value (XMM) */
19544 if (have66noF2noF3(pfx) && sz == 2) {
19545 UInt imm8;
19546 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */
19547 const IRTemp inval = IRTemp_INVALID;
19549 modrm = getUChar(delta);
19550 UInt rG = gregOfRexRM(pfx, modrm);
19552 if ( epartIsReg( modrm ) ) {
19553 UInt rE = eregOfRexRM(pfx, modrm);
19554 IRTemp vE = newTemp(Ity_V128);
19555 assign( vE, getXMMReg(rE) );
19556 IRTemp dsE[4] = { inval, inval, inval, inval };
19557 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] );
19558 imm8 = getUChar(delta+1);
19559 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */
19560 delta += 1+1;
19561 DIP( "insertps $%u, %s,%s\n",
19562 imm8, nameXMMReg(rE), nameXMMReg(rG) );
19563 } else {
19564 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19565 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) );
19566 imm8 = getUChar(delta+alen);
19567 delta += alen+1;
19568 DIP( "insertps $%u, %s,%s\n",
19569 imm8, dis_buf, nameXMMReg(rG) );
19572 IRTemp vG = newTemp(Ity_V128);
19573 assign( vG, getXMMReg(rG) );
19575 putXMMReg( rG, mkexpr(math_INSERTPS( vG, d2ins, imm8 )) );
19576 goto decode_success;
19578 break;
19580 case 0x22:
19581 /* 66 no-REX.W 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8
19582 Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */
19583 if (have66noF2noF3(pfx)
19584 && sz == 2 /* REX.W is NOT present */) {
19585 Int imm8_10;
19586 IRTemp src_u32 = newTemp(Ity_I32);
19587 modrm = getUChar(delta);
19588 UInt rG = gregOfRexRM(pfx, modrm);
19590 if ( epartIsReg( modrm ) ) {
19591 UInt rE = eregOfRexRM(pfx,modrm);
19592 imm8_10 = (Int)(getUChar(delta+1) & 3);
19593 assign( src_u32, getIReg32( rE ) );
19594 delta += 1+1;
19595 DIP( "pinsrd $%d, %s,%s\n",
19596 imm8_10, nameIReg32(rE), nameXMMReg(rG) );
19597 } else {
19598 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19599 imm8_10 = (Int)(getUChar(delta+alen) & 3);
19600 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) );
19601 delta += alen+1;
19602 DIP( "pinsrd $%d, %s,%s\n",
19603 imm8_10, dis_buf, nameXMMReg(rG) );
19606 IRTemp src_vec = newTemp(Ity_V128);
19607 assign(src_vec, getXMMReg( rG ));
19608 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 );
19609 putXMMReg( rG, mkexpr(res_vec) );
19610 goto decode_success;
19612 /* 66 REX.W 0F 3A 22 /r ib = PINSRQ xmm1, r/m64, imm8
19613 Extract Quadword int from gen.reg/mem64 and insert into xmm1 */
19614 if (have66noF2noF3(pfx)
19615 && sz == 8 /* REX.W is present */) {
19616 Int imm8_0;
19617 IRTemp src_u64 = newTemp(Ity_I64);
19618 modrm = getUChar(delta);
19619 UInt rG = gregOfRexRM(pfx, modrm);
19621 if ( epartIsReg( modrm ) ) {
19622 UInt rE = eregOfRexRM(pfx,modrm);
19623 imm8_0 = (Int)(getUChar(delta+1) & 1);
19624 assign( src_u64, getIReg64( rE ) );
19625 delta += 1+1;
19626 DIP( "pinsrq $%d, %s,%s\n",
19627 imm8_0, nameIReg64(rE), nameXMMReg(rG) );
19628 } else {
19629 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19630 imm8_0 = (Int)(getUChar(delta+alen) & 1);
19631 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) );
19632 delta += alen+1;
19633 DIP( "pinsrq $%d, %s,%s\n",
19634 imm8_0, dis_buf, nameXMMReg(rG) );
19637 IRTemp src_vec = newTemp(Ity_V128);
19638 assign(src_vec, getXMMReg( rG ));
19639 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 );
19640 putXMMReg( rG, mkexpr(res_vec) );
19641 goto decode_success;
19643 break;
19645 case 0x40:
19646 /* 66 0F 3A 40 /r ib = DPPS xmm1, xmm2/m128, imm8
19647 Dot Product of Packed Single Precision Floating-Point Values (XMM) */
19648 if (have66noF2noF3(pfx) && sz == 2) {
19649 modrm = getUChar(delta);
19650 Int imm8;
19651 IRTemp src_vec = newTemp(Ity_V128);
19652 IRTemp dst_vec = newTemp(Ity_V128);
19653 UInt rG = gregOfRexRM(pfx, modrm);
19654 assign( dst_vec, getXMMReg( rG ) );
19655 if ( epartIsReg( modrm ) ) {
19656 UInt rE = eregOfRexRM(pfx, modrm);
19657 imm8 = (Int)getUChar(delta+1);
19658 assign( src_vec, getXMMReg(rE) );
19659 delta += 1+1;
19660 DIP( "dpps $%d, %s,%s\n",
19661 imm8, nameXMMReg(rE), nameXMMReg(rG) );
19662 } else {
19663 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19664 1/* imm8 is 1 byte after the amode */ );
19665 gen_SEGV_if_not_16_aligned( addr );
19666 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19667 imm8 = (Int)getUChar(delta+alen);
19668 delta += alen+1;
19669 DIP( "dpps $%d, %s,%s\n",
19670 imm8, dis_buf, nameXMMReg(rG) );
19672 IRTemp res = math_DPPS_128( src_vec, dst_vec, imm8 );
19673 putXMMReg( rG, mkexpr(res) );
19674 goto decode_success;
19676 break;
19678 case 0x41:
19679 /* 66 0F 3A 41 /r ib = DPPD xmm1, xmm2/m128, imm8
19680 Dot Product of Packed Double Precision Floating-Point Values (XMM) */
19681 if (have66noF2noF3(pfx) && sz == 2) {
19682 modrm = getUChar(delta);
19683 Int imm8;
19684 IRTemp src_vec = newTemp(Ity_V128);
19685 IRTemp dst_vec = newTemp(Ity_V128);
19686 UInt rG = gregOfRexRM(pfx, modrm);
19687 assign( dst_vec, getXMMReg( rG ) );
19688 if ( epartIsReg( modrm ) ) {
19689 UInt rE = eregOfRexRM(pfx, modrm);
19690 imm8 = (Int)getUChar(delta+1);
19691 assign( src_vec, getXMMReg(rE) );
19692 delta += 1+1;
19693 DIP( "dppd $%d, %s,%s\n",
19694 imm8, nameXMMReg(rE), nameXMMReg(rG) );
19695 } else {
19696 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19697 1/* imm8 is 1 byte after the amode */ );
19698 gen_SEGV_if_not_16_aligned( addr );
19699 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19700 imm8 = (Int)getUChar(delta+alen);
19701 delta += alen+1;
19702 DIP( "dppd $%d, %s,%s\n",
19703 imm8, dis_buf, nameXMMReg(rG) );
19705 IRTemp res = math_DPPD_128( src_vec, dst_vec, imm8 );
19706 putXMMReg( rG, mkexpr(res) );
19707 goto decode_success;
19709 break;
19711 case 0x42:
19712 /* 66 0F 3A 42 /r ib = MPSADBW xmm1, xmm2/m128, imm8
19713 Multiple Packed Sums of Absolule Difference (XMM) */
19714 if (have66noF2noF3(pfx) && sz == 2) {
19715 Int imm8;
19716 IRTemp src_vec = newTemp(Ity_V128);
19717 IRTemp dst_vec = newTemp(Ity_V128);
19718 modrm = getUChar(delta);
19719 UInt rG = gregOfRexRM(pfx, modrm);
19721 assign( dst_vec, getXMMReg(rG) );
19723 if ( epartIsReg( modrm ) ) {
19724 UInt rE = eregOfRexRM(pfx, modrm);
19726 imm8 = (Int)getUChar(delta+1);
19727 assign( src_vec, getXMMReg(rE) );
19728 delta += 1+1;
19729 DIP( "mpsadbw $%d, %s,%s\n", imm8,
19730 nameXMMReg(rE), nameXMMReg(rG) );
19731 } else {
19732 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19733 1/* imm8 is 1 byte after the amode */ );
19734 gen_SEGV_if_not_16_aligned( addr );
19735 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19736 imm8 = (Int)getUChar(delta+alen);
19737 delta += alen+1;
19738 DIP( "mpsadbw $%d, %s,%s\n", imm8, dis_buf, nameXMMReg(rG) );
19741 putXMMReg( rG, mkexpr( math_MPSADBW_128(dst_vec, src_vec, imm8) ) );
19742 goto decode_success;
19744 break;
19746 case 0x44:
19747 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
19748 * Carry-less multiplication of selected XMM quadwords into XMM
19749 * registers (a.k.a multiplication of polynomials over GF(2))
19751 if (have66noF2noF3(pfx) && sz == 2) {
19753 Int imm8;
19754 IRTemp svec = newTemp(Ity_V128);
19755 IRTemp dvec = newTemp(Ity_V128);
19756 modrm = getUChar(delta);
19757 UInt rG = gregOfRexRM(pfx, modrm);
19759 assign( dvec, getXMMReg(rG) );
19761 if ( epartIsReg( modrm ) ) {
19762 UInt rE = eregOfRexRM(pfx, modrm);
19763 imm8 = (Int)getUChar(delta+1);
19764 assign( svec, getXMMReg(rE) );
19765 delta += 1+1;
19766 DIP( "pclmulqdq $%d, %s,%s\n", imm8,
19767 nameXMMReg(rE), nameXMMReg(rG) );
19768 } else {
19769 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19770 1/* imm8 is 1 byte after the amode */ );
19771 gen_SEGV_if_not_16_aligned( addr );
19772 assign( svec, loadLE( Ity_V128, mkexpr(addr) ) );
19773 imm8 = (Int)getUChar(delta+alen);
19774 delta += alen+1;
19775 DIP( "pclmulqdq $%d, %s,%s\n",
19776 imm8, dis_buf, nameXMMReg(rG) );
19779 putXMMReg( rG, mkexpr( math_PCLMULQDQ(dvec, svec, imm8) ) );
19780 goto decode_success;
19782 break;
19784 case 0x60:
19785 case 0x61:
19786 case 0x62:
19787 case 0x63:
19788 /* 66 0F 3A 63 /r ib = PCMPISTRI imm8, xmm2/m128, xmm1
19789 66 0F 3A 62 /r ib = PCMPISTRM imm8, xmm2/m128, xmm1
19790 66 0F 3A 61 /r ib = PCMPESTRI imm8, xmm2/m128, xmm1
19791 66 0F 3A 60 /r ib = PCMPESTRM imm8, xmm2/m128, xmm1
19792 (selected special cases that actually occur in glibc,
19793 not by any means a complete implementation.)
19795 if (have66noF2noF3(pfx) && sz == 2) {
19796 Long delta0 = delta;
19797 delta = dis_PCMPxSTRx( vbi, pfx, delta, False/*!isAvx*/, opc );
19798 if (delta > delta0) goto decode_success;
19799 /* else fall though; dis_PCMPxSTRx failed to decode it */
19801 break;
19803 case 0xDF:
19804 /* 66 0F 3A DF /r ib = AESKEYGENASSIST imm8, xmm2/m128, xmm1 */
19805 if (have66noF2noF3(pfx) && sz == 2) {
19806 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, False/*!isAvx*/ );
19807 goto decode_success;
19809 break;
19811 default:
19812 break;
19816 decode_failure:
19817 *decode_OK = False;
19818 return deltaIN;
19820 decode_success:
19821 *decode_OK = True;
19822 return delta;
19826 /*------------------------------------------------------------*/
19827 /*--- ---*/
19828 /*--- Top-level post-escape decoders: dis_ESC_NONE ---*/
19829 /*--- ---*/
19830 /*------------------------------------------------------------*/
19832 __attribute__((noinline))
19833 static
19834 Long dis_ESC_NONE (
19835 /*MB_OUT*/DisResult* dres,
19836 /*MB_OUT*/Bool* expect_CAS,
19837 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
19838 Bool resteerCisOk,
19839 void* callback_opaque,
19840 const VexArchInfo* archinfo,
19841 const VexAbiInfo* vbi,
19842 Prefix pfx, Int sz, Long deltaIN
19845 Long d64 = 0;
19846 UChar abyte = 0;
19847 IRTemp addr = IRTemp_INVALID;
19848 IRTemp t1 = IRTemp_INVALID;
19849 IRTemp t2 = IRTemp_INVALID;
19850 IRTemp t3 = IRTemp_INVALID;
19851 IRTemp t4 = IRTemp_INVALID;
19852 IRTemp t5 = IRTemp_INVALID;
19853 IRType ty = Ity_INVALID;
19854 UChar modrm = 0;
19855 Int am_sz = 0;
19856 Int d_sz = 0;
19857 Int alen = 0;
19858 HChar dis_buf[50];
19860 Long delta = deltaIN;
19861 UChar opc = getUChar(delta); delta++;
19863 /* delta now points at the modrm byte. In most of the cases that
19864 follow, neither the F2 nor F3 prefixes are allowed. However,
19865 for some basic arithmetic operations we have to allow F2/XACQ or
19866 F3/XREL in the case where the destination is memory and the LOCK
19867 prefix is also present. Do this check by looking at the modrm
19868 byte but not advancing delta over it. */
19869 /* By default, F2 and F3 are not allowed, so let's start off with
19870 that setting. */
19871 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
19872 { UChar tmp_modrm = getUChar(delta);
19873 switch (opc) {
19874 case 0x00: /* ADD Gb,Eb */ case 0x01: /* ADD Gv,Ev */
19875 case 0x08: /* OR Gb,Eb */ case 0x09: /* OR Gv,Ev */
19876 case 0x10: /* ADC Gb,Eb */ case 0x11: /* ADC Gv,Ev */
19877 case 0x18: /* SBB Gb,Eb */ case 0x19: /* SBB Gv,Ev */
19878 case 0x20: /* AND Gb,Eb */ case 0x21: /* AND Gv,Ev */
19879 case 0x28: /* SUB Gb,Eb */ case 0x29: /* SUB Gv,Ev */
19880 case 0x30: /* XOR Gb,Eb */ case 0x31: /* XOR Gv,Ev */
19881 if (!epartIsReg(tmp_modrm)
19882 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
19883 /* dst is mem, and we have F2 or F3 but not both */
19884 validF2orF3 = True;
19886 break;
19887 default:
19888 break;
19892 /* Now, in the switch below, for the opc values examined by the
19893 switch above, use validF2orF3 rather than looking at pfx
19894 directly. */
19895 switch (opc) {
19897 case 0x00: /* ADD Gb,Eb */
19898 if (!validF2orF3) goto decode_failure;
19899 delta = dis_op2_G_E ( vbi, pfx, Iop_Add8, WithFlagNone, True, 1, delta, "add" );
19900 return delta;
19901 case 0x01: /* ADD Gv,Ev */
19902 if (!validF2orF3) goto decode_failure;
19903 delta = dis_op2_G_E ( vbi, pfx, Iop_Add8, WithFlagNone, True, sz, delta, "add" );
19904 return delta;
19906 case 0x02: /* ADD Eb,Gb */
19907 if (haveF2orF3(pfx)) goto decode_failure;
19908 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagNone, True, 1, delta, "add" );
19909 return delta;
19910 case 0x03: /* ADD Ev,Gv */
19911 if (haveF2orF3(pfx)) goto decode_failure;
19912 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagNone, True, sz, delta, "add" );
19913 return delta;
19915 case 0x04: /* ADD Ib, AL */
19916 if (haveF2orF3(pfx)) goto decode_failure;
19917 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" );
19918 return delta;
19919 case 0x05: /* ADD Iv, eAX */
19920 if (haveF2orF3(pfx)) goto decode_failure;
19921 delta = dis_op_imm_A(sz, False, Iop_Add8, True, delta, "add" );
19922 return delta;
19924 case 0x08: /* OR Gb,Eb */
19925 if (!validF2orF3) goto decode_failure;
19926 delta = dis_op2_G_E ( vbi, pfx, Iop_Or8, WithFlagNone, True, 1, delta, "or" );
19927 return delta;
19928 case 0x09: /* OR Gv,Ev */
19929 if (!validF2orF3) goto decode_failure;
19930 delta = dis_op2_G_E ( vbi, pfx, Iop_Or8, WithFlagNone, True, sz, delta, "or" );
19931 return delta;
19933 case 0x0A: /* OR Eb,Gb */
19934 if (haveF2orF3(pfx)) goto decode_failure;
19935 delta = dis_op2_E_G ( vbi, pfx, Iop_Or8, WithFlagNone, True, 1, delta, "or" );
19936 return delta;
19937 case 0x0B: /* OR Ev,Gv */
19938 if (haveF2orF3(pfx)) goto decode_failure;
19939 delta = dis_op2_E_G ( vbi, pfx, Iop_Or8, WithFlagNone, True, sz, delta, "or" );
19940 return delta;
19942 case 0x0C: /* OR Ib, AL */
19943 if (haveF2orF3(pfx)) goto decode_failure;
19944 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" );
19945 return delta;
19946 case 0x0D: /* OR Iv, eAX */
19947 if (haveF2orF3(pfx)) goto decode_failure;
19948 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" );
19949 return delta;
19951 case 0x10: /* ADC Gb,Eb */
19952 if (!validF2orF3) goto decode_failure;
19953 delta = dis_op2_G_E ( vbi, pfx, Iop_Add8, WithFlagCarry, True, 1, delta, "adc" );
19954 return delta;
19955 case 0x11: /* ADC Gv,Ev */
19956 if (!validF2orF3) goto decode_failure;
19957 delta = dis_op2_G_E ( vbi, pfx, Iop_Add8, WithFlagCarry, True, sz, delta, "adc" );
19958 return delta;
19960 case 0x12: /* ADC Eb,Gb */
19961 if (haveF2orF3(pfx)) goto decode_failure;
19962 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagCarry, True, 1, delta, "adc" );
19963 return delta;
19964 case 0x13: /* ADC Ev,Gv */
19965 if (haveF2orF3(pfx)) goto decode_failure;
19966 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagCarry, True, sz, delta, "adc" );
19967 return delta;
19969 case 0x14: /* ADC Ib, AL */
19970 if (haveF2orF3(pfx)) goto decode_failure;
19971 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" );
19972 return delta;
19973 case 0x15: /* ADC Iv, eAX */
19974 if (haveF2orF3(pfx)) goto decode_failure;
19975 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" );
19976 return delta;
19978 case 0x18: /* SBB Gb,Eb */
19979 if (!validF2orF3) goto decode_failure;
19980 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagCarry, True, 1, delta, "sbb" );
19981 return delta;
19982 case 0x19: /* SBB Gv,Ev */
19983 if (!validF2orF3) goto decode_failure;
19984 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagCarry, True, sz, delta, "sbb" );
19985 return delta;
19987 case 0x1A: /* SBB Eb,Gb */
19988 if (haveF2orF3(pfx)) goto decode_failure;
19989 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagCarry, True, 1, delta, "sbb" );
19990 return delta;
19991 case 0x1B: /* SBB Ev,Gv */
19992 if (haveF2orF3(pfx)) goto decode_failure;
19993 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagCarry, True, sz, delta, "sbb" );
19994 return delta;
19996 case 0x1C: /* SBB Ib, AL */
19997 if (haveF2orF3(pfx)) goto decode_failure;
19998 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" );
19999 return delta;
20000 case 0x1D: /* SBB Iv, eAX */
20001 if (haveF2orF3(pfx)) goto decode_failure;
20002 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" );
20003 return delta;
20005 case 0x20: /* AND Gb,Eb */
20006 if (!validF2orF3) goto decode_failure;
20007 delta = dis_op2_G_E ( vbi, pfx, Iop_And8, WithFlagNone, True, 1, delta, "and" );
20008 return delta;
20009 case 0x21: /* AND Gv,Ev */
20010 if (!validF2orF3) goto decode_failure;
20011 delta = dis_op2_G_E ( vbi, pfx, Iop_And8, WithFlagNone, True, sz, delta, "and" );
20012 return delta;
20014 case 0x22: /* AND Eb,Gb */
20015 if (haveF2orF3(pfx)) goto decode_failure;
20016 delta = dis_op2_E_G ( vbi, pfx, Iop_And8, WithFlagNone, True, 1, delta, "and" );
20017 return delta;
20018 case 0x23: /* AND Ev,Gv */
20019 if (haveF2orF3(pfx)) goto decode_failure;
20020 delta = dis_op2_E_G ( vbi, pfx, Iop_And8, WithFlagNone, True, sz, delta, "and" );
20021 return delta;
20023 case 0x24: /* AND Ib, AL */
20024 if (haveF2orF3(pfx)) goto decode_failure;
20025 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" );
20026 return delta;
20027 case 0x25: /* AND Iv, eAX */
20028 if (haveF2orF3(pfx)) goto decode_failure;
20029 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" );
20030 return delta;
20032 case 0x28: /* SUB Gb,Eb */
20033 if (!validF2orF3) goto decode_failure;
20034 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagNone, True, 1, delta, "sub" );
20035 return delta;
20036 case 0x29: /* SUB Gv,Ev */
20037 if (!validF2orF3) goto decode_failure;
20038 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagNone, True, sz, delta, "sub" );
20039 return delta;
20041 case 0x2A: /* SUB Eb,Gb */
20042 if (haveF2orF3(pfx)) goto decode_failure;
20043 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagNone, True, 1, delta, "sub" );
20044 return delta;
20045 case 0x2B: /* SUB Ev,Gv */
20046 if (haveF2orF3(pfx)) goto decode_failure;
20047 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagNone, True, sz, delta, "sub" );
20048 return delta;
20050 case 0x2C: /* SUB Ib, AL */
20051 if (haveF2orF3(pfx)) goto decode_failure;
20052 delta = dis_op_imm_A(1, False, Iop_Sub8, True, delta, "sub" );
20053 return delta;
20054 case 0x2D: /* SUB Iv, eAX */
20055 if (haveF2orF3(pfx)) goto decode_failure;
20056 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" );
20057 return delta;
20059 case 0x30: /* XOR Gb,Eb */
20060 if (!validF2orF3) goto decode_failure;
20061 delta = dis_op2_G_E ( vbi, pfx, Iop_Xor8, WithFlagNone, True, 1, delta, "xor" );
20062 return delta;
20063 case 0x31: /* XOR Gv,Ev */
20064 if (!validF2orF3) goto decode_failure;
20065 delta = dis_op2_G_E ( vbi, pfx, Iop_Xor8, WithFlagNone, True, sz, delta, "xor" );
20066 return delta;
20068 case 0x32: /* XOR Eb,Gb */
20069 if (haveF2orF3(pfx)) goto decode_failure;
20070 delta = dis_op2_E_G ( vbi, pfx, Iop_Xor8, WithFlagNone, True, 1, delta, "xor" );
20071 return delta;
20072 case 0x33: /* XOR Ev,Gv */
20073 if (haveF2orF3(pfx)) goto decode_failure;
20074 delta = dis_op2_E_G ( vbi, pfx, Iop_Xor8, WithFlagNone, True, sz, delta, "xor" );
20075 return delta;
20077 case 0x34: /* XOR Ib, AL */
20078 if (haveF2orF3(pfx)) goto decode_failure;
20079 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" );
20080 return delta;
20081 case 0x35: /* XOR Iv, eAX */
20082 if (haveF2orF3(pfx)) goto decode_failure;
20083 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" );
20084 return delta;
20086 case 0x38: /* CMP Gb,Eb */
20087 if (haveF2orF3(pfx)) goto decode_failure;
20088 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagNone, False, 1, delta, "cmp" );
20089 return delta;
20090 case 0x39: /* CMP Gv,Ev */
20091 if (haveF2orF3(pfx)) goto decode_failure;
20092 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagNone, False, sz, delta, "cmp" );
20093 return delta;
20095 case 0x3A: /* CMP Eb,Gb */
20096 if (haveF2orF3(pfx)) goto decode_failure;
20097 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagNone, False, 1, delta, "cmp" );
20098 return delta;
20099 case 0x3B: /* CMP Ev,Gv */
20100 if (haveF2orF3(pfx)) goto decode_failure;
20101 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagNone, False, sz, delta, "cmp" );
20102 return delta;
20104 case 0x3C: /* CMP Ib, AL */
20105 if (haveF2orF3(pfx)) goto decode_failure;
20106 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" );
20107 return delta;
20108 case 0x3D: /* CMP Iv, eAX */
20109 if (haveF2orF3(pfx)) goto decode_failure;
20110 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" );
20111 return delta;
20113 case 0x50: /* PUSH eAX */
20114 case 0x51: /* PUSH eCX */
20115 case 0x52: /* PUSH eDX */
20116 case 0x53: /* PUSH eBX */
20117 case 0x55: /* PUSH eBP */
20118 case 0x56: /* PUSH eSI */
20119 case 0x57: /* PUSH eDI */
20120 case 0x54: /* PUSH eSP */
20121 /* This is the Right Way, in that the value to be pushed is
20122 established before %rsp is changed, so that pushq %rsp
20123 correctly pushes the old value. */
20124 if (haveF2orF3(pfx)) goto decode_failure;
20125 vassert(sz == 2 || sz == 4 || sz == 8);
20126 if (sz == 4)
20127 sz = 8; /* there is no encoding for 32-bit push in 64-bit mode */
20128 ty = sz==2 ? Ity_I16 : Ity_I64;
20129 t1 = newTemp(ty);
20130 t2 = newTemp(Ity_I64);
20131 assign(t1, getIRegRexB(sz, pfx, opc-0x50));
20132 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(sz)));
20133 putIReg64(R_RSP, mkexpr(t2) );
20134 storeLE(mkexpr(t2),mkexpr(t1));
20135 DIP("push%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x50));
20136 return delta;
20138 case 0x58: /* POP eAX */
20139 case 0x59: /* POP eCX */
20140 case 0x5A: /* POP eDX */
20141 case 0x5B: /* POP eBX */
20142 case 0x5D: /* POP eBP */
20143 case 0x5E: /* POP eSI */
20144 case 0x5F: /* POP eDI */
20145 case 0x5C: /* POP eSP */
20146 if (haveF2orF3(pfx)) goto decode_failure;
20147 vassert(sz == 2 || sz == 4 || sz == 8);
20148 if (sz == 4)
20149 sz = 8; /* there is no encoding for 32-bit pop in 64-bit mode */
20150 t1 = newTemp(szToITy(sz));
20151 t2 = newTemp(Ity_I64);
20152 assign(t2, getIReg64(R_RSP));
20153 assign(t1, loadLE(szToITy(sz),mkexpr(t2)));
20154 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz)));
20155 putIRegRexB(sz, pfx, opc-0x58, mkexpr(t1));
20156 DIP("pop%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x58));
20157 return delta;
20159 case 0x63: /* MOVSX */
20160 if (haveF2orF3(pfx)) goto decode_failure;
20161 if (haveREX(pfx) && 1==getRexW(pfx)) {
20162 vassert(sz == 8);
20163 /* movsx r/m32 to r64 */
20164 modrm = getUChar(delta);
20165 if (epartIsReg(modrm)) {
20166 delta++;
20167 putIRegG(8, pfx, modrm,
20168 unop(Iop_32Sto64,
20169 getIRegE(4, pfx, modrm)));
20170 DIP("movslq %s,%s\n",
20171 nameIRegE(4, pfx, modrm),
20172 nameIRegG(8, pfx, modrm));
20173 return delta;
20174 } else {
20175 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
20176 delta += alen;
20177 putIRegG(8, pfx, modrm,
20178 unop(Iop_32Sto64,
20179 loadLE(Ity_I32, mkexpr(addr))));
20180 DIP("movslq %s,%s\n", dis_buf,
20181 nameIRegG(8, pfx, modrm));
20182 return delta;
20184 } else {
20185 goto decode_failure;
20188 case 0x68: /* PUSH Iv */
20189 if (haveF2orF3(pfx)) goto decode_failure;
20190 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
20191 if (sz == 4) sz = 8;
20192 d64 = getSDisp(imin(4,sz),delta);
20193 delta += imin(4,sz);
20194 goto do_push_I;
20196 case 0x69: /* IMUL Iv, Ev, Gv */
20197 if (haveF2orF3(pfx)) goto decode_failure;
20198 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, sz );
20199 return delta;
20201 case 0x6A: /* PUSH Ib, sign-extended to sz */
20202 if (haveF2orF3(pfx)) goto decode_failure;
20203 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
20204 if (sz == 4) sz = 8;
20205 d64 = getSDisp8(delta); delta += 1;
20206 goto do_push_I;
20207 do_push_I:
20208 ty = szToITy(sz);
20209 t1 = newTemp(Ity_I64);
20210 t2 = newTemp(ty);
20211 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
20212 putIReg64(R_RSP, mkexpr(t1) );
20213 /* stop mkU16 asserting if d32 is a negative 16-bit number
20214 (bug #132813) */
20215 if (ty == Ity_I16)
20216 d64 &= 0xFFFF;
20217 storeLE( mkexpr(t1), mkU(ty,d64) );
20218 DIP("push%c $%lld\n", nameISize(sz), (Long)d64);
20219 return delta;
20221 case 0x6B: /* IMUL Ib, Ev, Gv */
20222 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, 1 );
20223 return delta;
20225 case 0x70:
20226 case 0x71:
20227 case 0x72: /* JBb/JNAEb (jump below) */
20228 case 0x73: /* JNBb/JAEb (jump not below) */
20229 case 0x74: /* JZb/JEb (jump zero) */
20230 case 0x75: /* JNZb/JNEb (jump not zero) */
20231 case 0x76: /* JBEb/JNAb (jump below or equal) */
20232 case 0x77: /* JNBEb/JAb (jump not below or equal) */
20233 case 0x78: /* JSb (jump negative) */
20234 case 0x79: /* JSb (jump not negative) */
20235 case 0x7A: /* JP (jump parity even) */
20236 case 0x7B: /* JNP/JPO (jump parity odd) */
20237 case 0x7C: /* JLb/JNGEb (jump less) */
20238 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
20239 case 0x7E: /* JLEb/JNGb (jump less or equal) */
20240 case 0x7F: { /* JGb/JNLEb (jump greater) */
20241 Long jmpDelta;
20242 const HChar* comment = "";
20243 if (haveF3(pfx)) goto decode_failure;
20244 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
20245 jmpDelta = getSDisp8(delta);
20246 vassert(-128 <= jmpDelta && jmpDelta < 128);
20247 d64 = (guest_RIP_bbstart+delta+1) + jmpDelta;
20248 delta++;
20249 if (resteerCisOk
20250 && vex_control.guest_chase_cond
20251 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
20252 && jmpDelta < 0
20253 && resteerOkFn( callback_opaque, (Addr64)d64) ) {
20254 /* Speculation: assume this backward branch is taken. So we
20255 need to emit a side-exit to the insn following this one,
20256 on the negation of the condition, and continue at the
20257 branch target address (d64). If we wind up back at the
20258 first instruction of the trace, just stop; it's better to
20259 let the IR loop unroller handle that case. */
20260 stmt( IRStmt_Exit(
20261 mk_amd64g_calculate_condition(
20262 (AMD64Condcode)(1 ^ (opc - 0x70))),
20263 Ijk_Boring,
20264 IRConst_U64(guest_RIP_bbstart+delta),
20265 OFFB_RIP ) );
20266 dres->whatNext = Dis_ResteerC;
20267 dres->continueAt = d64;
20268 comment = "(assumed taken)";
20270 else
20271 if (resteerCisOk
20272 && vex_control.guest_chase_cond
20273 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
20274 && jmpDelta >= 0
20275 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) {
20276 /* Speculation: assume this forward branch is not taken. So
20277 we need to emit a side-exit to d64 (the dest) and continue
20278 disassembling at the insn immediately following this
20279 one. */
20280 stmt( IRStmt_Exit(
20281 mk_amd64g_calculate_condition((AMD64Condcode)(opc - 0x70)),
20282 Ijk_Boring,
20283 IRConst_U64(d64),
20284 OFFB_RIP ) );
20285 dres->whatNext = Dis_ResteerC;
20286 dres->continueAt = guest_RIP_bbstart+delta;
20287 comment = "(assumed not taken)";
20289 else {
20290 /* Conservative default translation - end the block at this
20291 point. */
20292 jcc_01( dres, (AMD64Condcode)(opc - 0x70),
20293 guest_RIP_bbstart+delta, d64 );
20294 vassert(dres->whatNext == Dis_StopHere);
20296 DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc - 0x70), (ULong)d64,
20297 comment);
20298 return delta;
20301 case 0x80: /* Grp1 Ib,Eb */
20302 modrm = getUChar(delta);
20303 /* Disallow F2/XACQ and F3/XREL for the non-mem case. Allow
20304 just one for the mem case and also require LOCK in this case.
20305 Note that this erroneously allows XACQ/XREL on CMP since we
20306 don't check the subopcode here. No big deal. */
20307 if (epartIsReg(modrm) && haveF2orF3(pfx))
20308 goto decode_failure;
20309 if (!epartIsReg(modrm) && haveF2andF3(pfx))
20310 goto decode_failure;
20311 if (!epartIsReg(modrm) && haveF2orF3(pfx) && !haveLOCK(pfx))
20312 goto decode_failure;
20313 am_sz = lengthAMode(pfx,delta);
20314 sz = 1;
20315 d_sz = 1;
20316 d64 = getSDisp8(delta + am_sz);
20317 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
20318 return delta;
20320 case 0x81: /* Grp1 Iv,Ev */
20321 modrm = getUChar(delta);
20322 /* Same comment as for case 0x80 just above. */
20323 if (epartIsReg(modrm) && haveF2orF3(pfx))
20324 goto decode_failure;
20325 if (!epartIsReg(modrm) && haveF2andF3(pfx))
20326 goto decode_failure;
20327 if (!epartIsReg(modrm) && haveF2orF3(pfx) && !haveLOCK(pfx))
20328 goto decode_failure;
20329 am_sz = lengthAMode(pfx,delta);
20330 d_sz = imin(sz,4);
20331 d64 = getSDisp(d_sz, delta + am_sz);
20332 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
20333 return delta;
20335 case 0x83: /* Grp1 Ib,Ev */
20336 if (haveF2orF3(pfx)) goto decode_failure;
20337 modrm = getUChar(delta);
20338 am_sz = lengthAMode(pfx,delta);
20339 d_sz = 1;
20340 d64 = getSDisp8(delta + am_sz);
20341 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
20342 return delta;
20344 case 0x84: /* TEST Eb,Gb */
20345 if (haveF2orF3(pfx)) goto decode_failure;
20346 delta = dis_op2_E_G ( vbi, pfx, Iop_And8, WithFlagNone, False,
20347 1, delta, "test" );
20348 return delta;
20350 case 0x85: /* TEST Ev,Gv */
20351 if (haveF2orF3(pfx)) goto decode_failure;
20352 delta = dis_op2_E_G ( vbi, pfx, Iop_And8, WithFlagNone, False,
20353 sz, delta, "test" );
20354 return delta;
20356 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
20357 prefix. Therefore, generate CAS regardless of the presence or
20358 otherwise of a LOCK prefix. */
20359 case 0x86: /* XCHG Gb,Eb */
20360 sz = 1;
20361 /* Fall through ... */
20362 case 0x87: /* XCHG Gv,Ev */
20363 modrm = getUChar(delta);
20364 /* Check whether F2 or F3 are allowable. For the mem case, one
20365 or the othter but not both are. We don't care about the
20366 presence of LOCK in this case -- XCHG is unusual in this
20367 respect. */
20368 if (haveF2orF3(pfx)) {
20369 if (epartIsReg(modrm)) {
20370 goto decode_failure;
20371 } else {
20372 if (haveF2andF3(pfx))
20373 goto decode_failure;
20376 ty = szToITy(sz);
20377 t1 = newTemp(ty); t2 = newTemp(ty);
20378 if (epartIsReg(modrm)) {
20379 assign(t1, getIRegE(sz, pfx, modrm));
20380 assign(t2, getIRegG(sz, pfx, modrm));
20381 putIRegG(sz, pfx, modrm, mkexpr(t1));
20382 putIRegE(sz, pfx, modrm, mkexpr(t2));
20383 delta++;
20384 DIP("xchg%c %s, %s\n",
20385 nameISize(sz), nameIRegG(sz, pfx, modrm),
20386 nameIRegE(sz, pfx, modrm));
20387 } else {
20388 *expect_CAS = True;
20389 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
20390 assign( t1, loadLE(ty, mkexpr(addr)) );
20391 assign( t2, getIRegG(sz, pfx, modrm) );
20392 casLE( mkexpr(addr),
20393 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
20394 putIRegG( sz, pfx, modrm, mkexpr(t1) );
20395 delta += alen;
20396 DIP("xchg%c %s, %s\n", nameISize(sz),
20397 nameIRegG(sz, pfx, modrm), dis_buf);
20399 return delta;
20401 case 0x88: { /* MOV Gb,Eb */
20402 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */
20403 Bool ok = True;
20404 delta = dis_mov_G_E(vbi, pfx, 1, delta, &ok);
20405 if (!ok) goto decode_failure;
20406 return delta;
20409 case 0x89: { /* MOV Gv,Ev */
20410 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */
20411 Bool ok = True;
20412 delta = dis_mov_G_E(vbi, pfx, sz, delta, &ok);
20413 if (!ok) goto decode_failure;
20414 return delta;
20417 case 0x8A: /* MOV Eb,Gb */
20418 if (haveF2orF3(pfx)) goto decode_failure;
20419 delta = dis_mov_E_G(vbi, pfx, 1, delta);
20420 return delta;
20422 case 0x8B: /* MOV Ev,Gv */
20423 if (haveF2orF3(pfx)) goto decode_failure;
20424 delta = dis_mov_E_G(vbi, pfx, sz, delta);
20425 return delta;
20427 case 0x8C: /* MOV S,E -- MOV from a SEGMENT REGISTER */
20428 if (haveF2orF3(pfx)) goto decode_failure;
20429 delta = dis_mov_S_E(vbi, pfx, sz, delta);
20430 return delta;
20432 case 0x8D: /* LEA M,Gv */
20433 if (haveF2orF3(pfx)) goto decode_failure;
20434 if (sz != 4 && sz != 8)
20435 goto decode_failure;
20436 modrm = getUChar(delta);
20437 if (epartIsReg(modrm))
20438 goto decode_failure;
20439 /* NOTE! this is the one place where a segment override prefix
20440 has no effect on the address calculation. Therefore we clear
20441 any segment override bits in pfx. */
20442 addr = disAMode ( &alen, vbi, clearSegBits(pfx), delta, dis_buf, 0 );
20443 delta += alen;
20444 /* This is a hack. But it isn't clear that really doing the
20445 calculation at 32 bits is really worth it. Hence for leal,
20446 do the full 64-bit calculation and then truncate it. */
20447 putIRegG( sz, pfx, modrm,
20448 sz == 4
20449 ? unop(Iop_64to32, mkexpr(addr))
20450 : mkexpr(addr)
20452 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf,
20453 nameIRegG(sz,pfx,modrm));
20454 return delta;
20456 case 0x8F: { /* POPQ m64 / POPW m16 */
20457 Int len;
20458 UChar rm;
20459 /* There is no encoding for 32-bit pop in 64-bit mode.
20460 So sz==4 actually means sz==8. */
20461 if (haveF2orF3(pfx)) goto decode_failure;
20462 vassert(sz == 2 || sz == 4
20463 || /* tolerate redundant REX.W, see #210481 */ sz == 8);
20464 if (sz == 4) sz = 8;
20465 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
20467 rm = getUChar(delta);
20469 /* make sure this instruction is correct POP */
20470 if (epartIsReg(rm) || gregLO3ofRM(rm) != 0)
20471 goto decode_failure;
20472 /* and has correct size */
20473 vassert(sz == 8);
20475 t1 = newTemp(Ity_I64);
20476 t3 = newTemp(Ity_I64);
20477 assign( t1, getIReg64(R_RSP) );
20478 assign( t3, loadLE(Ity_I64, mkexpr(t1)) );
20480 /* Increase RSP; must be done before the STORE. Intel manual
20481 says: If the RSP register is used as a base register for
20482 addressing a destination operand in memory, the POP
20483 instruction computes the effective address of the operand
20484 after it increments the RSP register. */
20485 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(sz)) );
20487 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
20488 storeLE( mkexpr(addr), mkexpr(t3) );
20490 DIP("popl %s\n", dis_buf);
20492 delta += len;
20493 return delta;
20496 case 0x90: /* XCHG eAX,eAX */
20497 /* detect and handle F3 90 (rep nop) specially */
20498 if (!have66(pfx) && !haveF2(pfx) && haveF3(pfx)) {
20499 DIP("rep nop (P4 pause)\n");
20500 /* "observe" the hint. The Vex client needs to be careful not
20501 to cause very long delays as a result, though. */
20502 jmp_lit(dres, Ijk_Yield, guest_RIP_bbstart+delta);
20503 vassert(dres->whatNext == Dis_StopHere);
20504 return delta;
20506 /* detect and handle NOPs specially */
20507 if (/* F2/F3 probably change meaning completely */
20508 !haveF2orF3(pfx)
20509 /* If REX.B is 1, we're not exchanging rAX with itself */
20510 && getRexB(pfx)==0 ) {
20511 DIP("nop\n");
20512 return delta;
20514 /* else fall through to normal case. */
20515 case 0x91: /* XCHG rAX,rCX */
20516 case 0x92: /* XCHG rAX,rDX */
20517 case 0x93: /* XCHG rAX,rBX */
20518 case 0x94: /* XCHG rAX,rSP */
20519 case 0x95: /* XCHG rAX,rBP */
20520 case 0x96: /* XCHG rAX,rSI */
20521 case 0x97: /* XCHG rAX,rDI */
20522 /* guard against mutancy */
20523 if (haveF2orF3(pfx)) goto decode_failure;
20524 codegen_xchg_rAX_Reg ( pfx, sz, opc - 0x90 );
20525 return delta;
20527 case 0x98: /* CBW */
20528 if (haveF2orF3(pfx)) goto decode_failure;
20529 if (sz == 8) {
20530 putIRegRAX( 8, unop(Iop_32Sto64, getIRegRAX(4)) );
20531 DIP(/*"cdqe\n"*/"cltq");
20532 return delta;
20534 if (sz == 4) {
20535 putIRegRAX( 4, unop(Iop_16Sto32, getIRegRAX(2)) );
20536 DIP("cwtl\n");
20537 return delta;
20539 if (sz == 2) {
20540 putIRegRAX( 2, unop(Iop_8Sto16, getIRegRAX(1)) );
20541 DIP("cbw\n");
20542 return delta;
20544 goto decode_failure;
20546 case 0x99: /* CWD/CDQ/CQO */
20547 if (haveF2orF3(pfx)) goto decode_failure;
20548 vassert(sz == 2 || sz == 4 || sz == 8);
20549 ty = szToITy(sz);
20550 putIRegRDX( sz,
20551 binop(mkSizedOp(ty,Iop_Sar8),
20552 getIRegRAX(sz),
20553 mkU8(sz == 2 ? 15 : (sz == 4 ? 31 : 63))) );
20554 DIP(sz == 2 ? "cwd\n"
20555 : (sz == 4 ? /*"cdq\n"*/ "cltd\n"
20556 : "cqo\n"));
20557 return delta;
20559 case 0x9B: /* FWAIT (X87 insn) */
20560 /* ignore? */
20561 DIP("fwait\n");
20562 return delta;
20564 case 0x9C: /* PUSHF */ {
20565 /* Note. There is no encoding for a 32-bit pushf in 64-bit
20566 mode. So sz==4 actually means sz==8. */
20567 /* 24 July 06: has also been seen with a redundant REX prefix,
20568 so must also allow sz==8. */
20569 if (haveF2orF3(pfx)) goto decode_failure;
20570 vassert(sz == 2 || sz == 4 || sz == 8);
20571 if (sz == 4) sz = 8;
20572 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
20574 t1 = newTemp(Ity_I64);
20575 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
20576 putIReg64(R_RSP, mkexpr(t1) );
20578 t2 = newTemp(Ity_I64);
20579 assign( t2, mk_amd64g_calculate_rflags_all() );
20581 /* Patch in the D flag. This can simply be a copy of bit 10 of
20582 baseBlock[OFFB_DFLAG]. */
20583 t3 = newTemp(Ity_I64);
20584 assign( t3, binop(Iop_Or64,
20585 mkexpr(t2),
20586 binop(Iop_And64,
20587 IRExpr_Get(OFFB_DFLAG,Ity_I64),
20588 mkU64(1<<10)))
20591 /* And patch in the ID flag. */
20592 t4 = newTemp(Ity_I64);
20593 assign( t4, binop(Iop_Or64,
20594 mkexpr(t3),
20595 binop(Iop_And64,
20596 binop(Iop_Shl64, IRExpr_Get(OFFB_IDFLAG,Ity_I64),
20597 mkU8(21)),
20598 mkU64(1<<21)))
20601 /* And patch in the AC flag too. */
20602 t5 = newTemp(Ity_I64);
20603 assign( t5, binop(Iop_Or64,
20604 mkexpr(t4),
20605 binop(Iop_And64,
20606 binop(Iop_Shl64, IRExpr_Get(OFFB_ACFLAG,Ity_I64),
20607 mkU8(18)),
20608 mkU64(1<<18)))
20611 /* if sz==2, the stored value needs to be narrowed. */
20612 if (sz == 2)
20613 storeLE( mkexpr(t1), unop(Iop_32to16,
20614 unop(Iop_64to32,mkexpr(t5))) );
20615 else
20616 storeLE( mkexpr(t1), mkexpr(t5) );
20618 DIP("pushf%c\n", nameISize(sz));
20619 return delta;
20622 case 0x9D: /* POPF */
20623 /* Note. There is no encoding for a 32-bit popf in 64-bit mode.
20624 So sz==4 actually means sz==8. */
20625 if (haveF2orF3(pfx)) goto decode_failure;
20626 vassert(sz == 2 || sz == 4);
20627 if (sz == 4) sz = 8;
20628 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
20629 t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I64);
20630 assign(t2, getIReg64(R_RSP));
20631 assign(t1, widenUto64(loadLE(szToITy(sz),mkexpr(t2))));
20632 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz)));
20633 /* t1 is the flag word. Mask out everything except OSZACP and
20634 set the flags thunk to AMD64G_CC_OP_COPY. */
20635 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
20636 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
20637 stmt( IRStmt_Put( OFFB_CC_DEP1,
20638 binop(Iop_And64,
20639 mkexpr(t1),
20640 mkU64( AMD64G_CC_MASK_C | AMD64G_CC_MASK_P
20641 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_Z
20642 | AMD64G_CC_MASK_S| AMD64G_CC_MASK_O )
20647 /* Also need to set the D flag, which is held in bit 10 of t1.
20648 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
20649 stmt( IRStmt_Put(
20650 OFFB_DFLAG,
20651 IRExpr_ITE(
20652 unop(Iop_64to1,
20653 binop(Iop_And64,
20654 binop(Iop_Shr64, mkexpr(t1), mkU8(10)),
20655 mkU64(1))),
20656 mkU64(0xFFFFFFFFFFFFFFFFULL),
20657 mkU64(1)))
20660 /* And set the ID flag */
20661 stmt( IRStmt_Put(
20662 OFFB_IDFLAG,
20663 IRExpr_ITE(
20664 unop(Iop_64to1,
20665 binop(Iop_And64,
20666 binop(Iop_Shr64, mkexpr(t1), mkU8(21)),
20667 mkU64(1))),
20668 mkU64(1),
20669 mkU64(0)))
20672 /* And set the AC flag too */
20673 stmt( IRStmt_Put(
20674 OFFB_ACFLAG,
20675 IRExpr_ITE(
20676 unop(Iop_64to1,
20677 binop(Iop_And64,
20678 binop(Iop_Shr64, mkexpr(t1), mkU8(18)),
20679 mkU64(1))),
20680 mkU64(1),
20681 mkU64(0)))
20684 DIP("popf%c\n", nameISize(sz));
20685 return delta;
20687 case 0x9E: /* SAHF */
20688 codegen_SAHF();
20689 DIP("sahf\n");
20690 return delta;
20692 case 0x9F: /* LAHF */
20693 codegen_LAHF();
20694 DIP("lahf\n");
20695 return delta;
20697 case 0xA0: /* MOV Ob,AL */
20698 if (have66orF2orF3(pfx)) goto decode_failure;
20699 sz = 1;
20700 /* Fall through ... */
20701 case 0xA1: /* MOV Ov,eAX */
20702 if (sz != 8 && sz != 4 && sz != 2 && sz != 1)
20703 goto decode_failure;
20704 d64 = getDisp64(delta);
20705 delta += 8;
20706 ty = szToITy(sz);
20707 addr = newTemp(Ity_I64);
20708 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) );
20709 putIRegRAX(sz, loadLE( ty, mkexpr(addr) ));
20710 DIP("mov%c %s0x%llx, %s\n", nameISize(sz),
20711 segRegTxt(pfx), (ULong)d64,
20712 nameIRegRAX(sz));
20713 return delta;
20715 case 0xA2: /* MOV AL,Ob */
20716 if (have66orF2orF3(pfx)) goto decode_failure;
20717 sz = 1;
20718 /* Fall through ... */
20719 case 0xA3: /* MOV eAX,Ov */
20720 if (sz != 8 && sz != 4 && sz != 2 && sz != 1)
20721 goto decode_failure;
20722 d64 = getDisp64(delta);
20723 delta += 8;
20724 ty = szToITy(sz);
20725 addr = newTemp(Ity_I64);
20726 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) );
20727 storeLE( mkexpr(addr), getIRegRAX(sz) );
20728 DIP("mov%c %s, %s0x%llx\n", nameISize(sz), nameIRegRAX(sz),
20729 segRegTxt(pfx), (ULong)d64);
20730 return delta;
20732 case 0xA4:
20733 case 0xA5:
20734 /* F3 A4: rep movsb */
20735 if (haveF3(pfx) && !haveF2(pfx)) {
20736 if (opc == 0xA4)
20737 sz = 1;
20738 dis_REP_op ( dres, AMD64CondAlways, dis_MOVS, sz,
20739 guest_RIP_curr_instr,
20740 guest_RIP_bbstart+delta, "rep movs", pfx );
20741 dres->whatNext = Dis_StopHere;
20742 return delta;
20744 /* A4: movsb */
20745 if (!haveF3(pfx) && !haveF2(pfx)) {
20746 if (opc == 0xA4)
20747 sz = 1;
20748 dis_string_op( dis_MOVS, sz, "movs", pfx );
20749 return delta;
20751 goto decode_failure;
20753 case 0xA6:
20754 case 0xA7:
20755 /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */
20756 if (haveF3(pfx) && !haveF2(pfx)) {
20757 if (opc == 0xA6)
20758 sz = 1;
20759 dis_REP_op ( dres, AMD64CondZ, dis_CMPS, sz,
20760 guest_RIP_curr_instr,
20761 guest_RIP_bbstart+delta, "repe cmps", pfx );
20762 dres->whatNext = Dis_StopHere;
20763 return delta;
20765 goto decode_failure;
20767 case 0xAA:
20768 case 0xAB:
20769 /* F3 AA/AB: rep stosb/rep stos{w,l,q} */
20770 if (haveF3(pfx) && !haveF2(pfx)) {
20771 if (opc == 0xAA)
20772 sz = 1;
20773 dis_REP_op ( dres, AMD64CondAlways, dis_STOS, sz,
20774 guest_RIP_curr_instr,
20775 guest_RIP_bbstart+delta, "rep stos", pfx );
20776 vassert(dres->whatNext == Dis_StopHere);
20777 return delta;
20779 /* AA/AB: stosb/stos{w,l,q} */
20780 if (!haveF3(pfx) && !haveF2(pfx)) {
20781 if (opc == 0xAA)
20782 sz = 1;
20783 dis_string_op( dis_STOS, sz, "stos", pfx );
20784 return delta;
20786 goto decode_failure;
20788 case 0xA8: /* TEST Ib, AL */
20789 if (haveF2orF3(pfx)) goto decode_failure;
20790 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" );
20791 return delta;
20792 case 0xA9: /* TEST Iv, eAX */
20793 if (haveF2orF3(pfx)) goto decode_failure;
20794 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" );
20795 return delta;
20797 case 0xAC: /* LODS, no REP prefix */
20798 case 0xAD:
20799 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", pfx );
20800 return delta;
20802 case 0xAE:
20803 case 0xAF:
20804 /* F2 AE/AF: repne scasb/repne scas{w,l,q} */
20805 if (haveF2(pfx) && !haveF3(pfx)) {
20806 if (opc == 0xAE)
20807 sz = 1;
20808 dis_REP_op ( dres, AMD64CondNZ, dis_SCAS, sz,
20809 guest_RIP_curr_instr,
20810 guest_RIP_bbstart+delta, "repne scas", pfx );
20811 vassert(dres->whatNext == Dis_StopHere);
20812 return delta;
20814 /* F3 AE/AF: repe scasb/repe scas{w,l,q} */
20815 if (!haveF2(pfx) && haveF3(pfx)) {
20816 if (opc == 0xAE)
20817 sz = 1;
20818 dis_REP_op ( dres, AMD64CondZ, dis_SCAS, sz,
20819 guest_RIP_curr_instr,
20820 guest_RIP_bbstart+delta, "repe scas", pfx );
20821 vassert(dres->whatNext == Dis_StopHere);
20822 return delta;
20824 /* AE/AF: scasb/scas{w,l,q} */
20825 if (!haveF2(pfx) && !haveF3(pfx)) {
20826 if (opc == 0xAE)
20827 sz = 1;
20828 dis_string_op( dis_SCAS, sz, "scas", pfx );
20829 return delta;
20831 goto decode_failure;
20833 /* XXXX be careful here with moves to AH/BH/CH/DH */
20834 case 0xB0: /* MOV imm,AL */
20835 case 0xB1: /* MOV imm,CL */
20836 case 0xB2: /* MOV imm,DL */
20837 case 0xB3: /* MOV imm,BL */
20838 case 0xB4: /* MOV imm,AH */
20839 case 0xB5: /* MOV imm,CH */
20840 case 0xB6: /* MOV imm,DH */
20841 case 0xB7: /* MOV imm,BH */
20842 if (haveF2orF3(pfx)) goto decode_failure;
20843 d64 = getUChar(delta);
20844 delta += 1;
20845 putIRegRexB(1, pfx, opc-0xB0, mkU8(d64));
20846 DIP("movb $%lld,%s\n", d64, nameIRegRexB(1,pfx,opc-0xB0));
20847 return delta;
20849 case 0xB8: /* MOV imm,eAX */
20850 case 0xB9: /* MOV imm,eCX */
20851 case 0xBA: /* MOV imm,eDX */
20852 case 0xBB: /* MOV imm,eBX */
20853 case 0xBC: /* MOV imm,eSP */
20854 case 0xBD: /* MOV imm,eBP */
20855 case 0xBE: /* MOV imm,eSI */
20856 case 0xBF: /* MOV imm,eDI */
20857 /* This is the one-and-only place where 64-bit literals are
20858 allowed in the instruction stream. */
20859 if (haveF2orF3(pfx)) goto decode_failure;
20860 if (sz == 8) {
20861 d64 = getDisp64(delta);
20862 delta += 8;
20863 putIRegRexB(8, pfx, opc-0xB8, mkU64(d64));
20864 DIP("movabsq $%lld,%s\n", (Long)d64,
20865 nameIRegRexB(8,pfx,opc-0xB8));
20866 } else {
20867 d64 = getSDisp(imin(4,sz),delta);
20868 delta += imin(4,sz);
20869 putIRegRexB(sz, pfx, opc-0xB8,
20870 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
20871 DIP("mov%c $%lld,%s\n", nameISize(sz),
20872 (Long)d64,
20873 nameIRegRexB(sz,pfx,opc-0xB8));
20875 return delta;
20877 case 0xC0: { /* Grp2 Ib,Eb */
20878 Bool decode_OK = True;
20879 if (haveF2orF3(pfx)) goto decode_failure;
20880 modrm = getUChar(delta);
20881 am_sz = lengthAMode(pfx,delta);
20882 d_sz = 1;
20883 d64 = getUChar(delta + am_sz);
20884 sz = 1;
20885 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
20886 mkU8(d64 & 0xFF), NULL, &decode_OK );
20887 if (!decode_OK) goto decode_failure;
20888 return delta;
20891 case 0xC1: { /* Grp2 Ib,Ev */
20892 Bool decode_OK = True;
20893 if (haveF2orF3(pfx)) goto decode_failure;
20894 modrm = getUChar(delta);
20895 am_sz = lengthAMode(pfx,delta);
20896 d_sz = 1;
20897 d64 = getUChar(delta + am_sz);
20898 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
20899 mkU8(d64 & 0xFF), NULL, &decode_OK );
20900 if (!decode_OK) goto decode_failure;
20901 return delta;
20904 case 0xC2: /* RET imm16 */
20905 if (have66orF3(pfx)) goto decode_failure;
20906 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
20907 d64 = getUDisp16(delta);
20908 delta += 2;
20909 dis_ret(dres, vbi, d64);
20910 DIP("ret $%lld\n", d64);
20911 return delta;
20913 case 0xC3: /* RET */
20914 if (have66(pfx)) goto decode_failure;
20915 /* F3 is acceptable on AMD. */
20916 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
20917 dis_ret(dres, vbi, 0);
20918 DIP(haveF3(pfx) ? "rep ; ret\n" : "ret\n");
20919 return delta;
20921 case 0xC6: /* C6 /0 = MOV Ib,Eb */
20922 sz = 1;
20923 goto maybe_do_Mov_I_E;
20924 case 0xC7: /* C7 /0 = MOV Iv,Ev */
20925 goto maybe_do_Mov_I_E;
20926 maybe_do_Mov_I_E:
20927 modrm = getUChar(delta);
20928 if (gregLO3ofRM(modrm) == 0) {
20929 if (epartIsReg(modrm)) {
20930 /* Neither F2 nor F3 are allowable. */
20931 if (haveF2orF3(pfx)) goto decode_failure;
20932 delta++; /* mod/rm byte */
20933 d64 = getSDisp(imin(4,sz),delta);
20934 delta += imin(4,sz);
20935 putIRegE(sz, pfx, modrm,
20936 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
20937 DIP("mov%c $%lld, %s\n", nameISize(sz),
20938 (Long)d64,
20939 nameIRegE(sz,pfx,modrm));
20940 } else {
20941 if (haveF2(pfx)) goto decode_failure;
20942 /* F3(XRELEASE) is allowable here */
20943 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
20944 /*xtra*/imin(4,sz) );
20945 delta += alen;
20946 d64 = getSDisp(imin(4,sz),delta);
20947 delta += imin(4,sz);
20948 storeLE(mkexpr(addr),
20949 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
20950 DIP("mov%c $%lld, %s\n", nameISize(sz), (Long)d64, dis_buf);
20952 return delta;
20954 /* BEGIN HACKY SUPPORT FOR xbegin */
20955 if (opc == 0xC7 && modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 4
20956 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
20957 delta++; /* mod/rm byte */
20958 d64 = getSDisp(4,delta);
20959 delta += 4;
20960 guest_RIP_next_mustcheck = True;
20961 guest_RIP_next_assumed = guest_RIP_bbstart + delta;
20962 Addr64 failAddr = guest_RIP_bbstart + delta + d64;
20963 /* EAX contains the failure status code. Bit 3 is "Set if an
20964 internal buffer overflowed", which seems like the
20965 least-bogus choice we can make here. */
20966 putIRegRAX(4, mkU32(1<<3));
20967 /* And jump to the fail address. */
20968 jmp_lit(dres, Ijk_Boring, failAddr);
20969 vassert(dres->whatNext == Dis_StopHere);
20970 DIP("xbeginq 0x%llx\n", failAddr);
20971 return delta;
20973 /* END HACKY SUPPORT FOR xbegin */
20974 /* BEGIN HACKY SUPPORT FOR xabort */
20975 if (opc == 0xC6 && modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 1
20976 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
20977 delta++; /* mod/rm byte */
20978 abyte = getUChar(delta); delta++;
20979 /* There is never a real transaction in progress, so do nothing. */
20980 DIP("xabort $%d", (Int)abyte);
20981 return delta;
20983 /* END HACKY SUPPORT FOR xabort */
20984 goto decode_failure;
20986 case 0xC8: /* ENTER */
20987 /* Same comments re operand size as for LEAVE below apply.
20988 Also, only handles the case "enter $imm16, $0"; other cases
20989 for the second operand (nesting depth) are not handled. */
20990 if (sz != 4)
20991 goto decode_failure;
20992 d64 = getUDisp16(delta);
20993 delta += 2;
20994 vassert(d64 >= 0 && d64 <= 0xFFFF);
20995 if (getUChar(delta) != 0)
20996 goto decode_failure;
20997 delta++;
20998 /* Intel docs seem to suggest:
20999 push rbp
21000 temp = rsp
21001 rbp = temp
21002 rsp = rsp - imm16
21004 t1 = newTemp(Ity_I64);
21005 assign(t1, getIReg64(R_RBP));
21006 t2 = newTemp(Ity_I64);
21007 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
21008 putIReg64(R_RSP, mkexpr(t2));
21009 storeLE(mkexpr(t2), mkexpr(t1));
21010 putIReg64(R_RBP, mkexpr(t2));
21011 if (d64 > 0) {
21012 putIReg64(R_RSP, binop(Iop_Sub64, mkexpr(t2), mkU64(d64)));
21014 DIP("enter $%u, $0\n", (UInt)d64);
21015 return delta;
21017 case 0xC9: /* LEAVE */
21018 /* In 64-bit mode this defaults to a 64-bit operand size. There
21019 is no way to encode a 32-bit variant. Hence sz==4 but we do
21020 it as if sz=8. */
21021 if (sz != 4)
21022 goto decode_failure;
21023 t1 = newTemp(Ity_I64);
21024 t2 = newTemp(Ity_I64);
21025 assign(t1, getIReg64(R_RBP));
21026 /* First PUT RSP looks redundant, but need it because RSP must
21027 always be up-to-date for Memcheck to work... */
21028 putIReg64(R_RSP, mkexpr(t1));
21029 assign(t2, loadLE(Ity_I64,mkexpr(t1)));
21030 putIReg64(R_RBP, mkexpr(t2));
21031 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(8)) );
21032 DIP("leave\n");
21033 return delta;
21035 case 0xCC: /* INT 3 */
21036 jmp_lit(dres, Ijk_SigTRAP, guest_RIP_bbstart + delta);
21037 vassert(dres->whatNext == Dis_StopHere);
21038 DIP("int $0x3\n");
21039 return delta;
21041 case 0xCD: /* INT imm8 */
21042 d64 = getUChar(delta); delta++;
21044 /* Handle int $0xD2 (Solaris fasttrap syscalls). */
21045 if (d64 == 0xD2) {
21046 jmp_lit(dres, Ijk_Sys_int210, guest_RIP_bbstart + delta);
21047 vassert(dres->whatNext == Dis_StopHere);
21048 DIP("int $0xD2\n");
21049 return delta;
21051 goto decode_failure;
21053 case 0xCF: /* IRET */
21054 /* Note, this is an extremely kludgey and limited implementation of iret
21055 based on the extremely kludgey and limited implementation of iret for x86
21056 popq %RIP; popl %CS; popq %RFLAGS; popq %RSP; popl %SS
21057 %CS and %SS are ignored */
21058 if (sz != 8 || have66orF2orF3(pfx)) goto decode_failure;
21060 t1 = newTemp(Ity_I64); /* RSP */
21061 t2 = newTemp(Ity_I64); /* new RIP */
21062 /* t3 = newTemp(Ity_I32); new CS */
21063 t4 = newTemp(Ity_I64); /* new RFLAGS */
21064 t5 = newTemp(Ity_I64); /* new RSP */
21065 /* t6 = newTemp(Ity_I32); new SS */
21067 assign(t1, getIReg64(R_RSP));
21068 assign(t2, loadLE(Ity_I64, binop(Iop_Add64,mkexpr(t1),mkU64(0))));
21069 /* assign(t3, loadLE(Ity_I32, binop(Iop_Add64,mkexpr(t1),mkU64(8)))); */
21070 assign(t4, loadLE(Ity_I64, binop(Iop_Add64,mkexpr(t1),mkU64(16))));
21071 assign(t5, loadLE(Ity_I64, binop(Iop_Add64,mkexpr(t1),mkU64(24))));
21072 /* assign(t6, loadLE(Ity_I32, binop(Iop_Add64,mkexpr(t1),mkU64(32)))); */
21074 /* set %RFLAGS */
21075 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
21076 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
21077 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
21078 stmt( IRStmt_Put( OFFB_CC_DEP1,
21079 binop(Iop_And64,
21080 mkexpr(t4),
21081 mkU64( AMD64G_CC_MASK_C | AMD64G_CC_MASK_P
21082 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_Z
21083 | AMD64G_CC_MASK_S| AMD64G_CC_MASK_O )
21088 /* Also need to set the D flag, which is held in bit 10 of t4.
21089 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
21090 stmt( IRStmt_Put(
21091 OFFB_DFLAG,
21092 IRExpr_ITE(
21093 unop(Iop_64to1,
21094 binop(Iop_And64,
21095 binop(Iop_Shr64, mkexpr(t4), mkU8(10)),
21096 mkU64(1))),
21097 mkU64(0xFFFFFFFFFFFFFFFFULL),
21098 mkU64(1)))
21101 /* And set the ID flag */
21102 stmt( IRStmt_Put(
21103 OFFB_IDFLAG,
21104 IRExpr_ITE(
21105 unop(Iop_64to1,
21106 binop(Iop_And64,
21107 binop(Iop_Shr64, mkexpr(t4), mkU8(21)),
21108 mkU64(1))),
21109 mkU64(1),
21110 mkU64(0)))
21113 /* And set the AC flag too */
21114 stmt( IRStmt_Put(
21115 OFFB_ACFLAG,
21116 IRExpr_ITE(
21117 unop(Iop_64to1,
21118 binop(Iop_And64,
21119 binop(Iop_Shr64, mkexpr(t4), mkU8(18)),
21120 mkU64(1))),
21121 mkU64(1),
21122 mkU64(0)))
21126 /* set new stack */
21127 putIReg64(R_RSP, mkexpr(t5));
21129 /* goto new RIP value */
21130 jmp_treg(dres, Ijk_Ret, t2);
21131 DIP("iret (very kludgey)\n");
21132 return delta;
21134 case 0xD0: { /* Grp2 1,Eb */
21135 Bool decode_OK = True;
21136 if (haveF2orF3(pfx)) goto decode_failure;
21137 modrm = getUChar(delta);
21138 am_sz = lengthAMode(pfx,delta);
21139 d_sz = 0;
21140 d64 = 1;
21141 sz = 1;
21142 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
21143 mkU8(d64), NULL, &decode_OK );
21144 if (!decode_OK) goto decode_failure;
21145 return delta;
21148 case 0xD1: { /* Grp2 1,Ev */
21149 Bool decode_OK = True;
21150 if (haveF2orF3(pfx)) goto decode_failure;
21151 modrm = getUChar(delta);
21152 am_sz = lengthAMode(pfx,delta);
21153 d_sz = 0;
21154 d64 = 1;
21155 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
21156 mkU8(d64), NULL, &decode_OK );
21157 if (!decode_OK) goto decode_failure;
21158 return delta;
21161 case 0xD2: { /* Grp2 CL,Eb */
21162 Bool decode_OK = True;
21163 if (haveF2orF3(pfx)) goto decode_failure;
21164 modrm = getUChar(delta);
21165 am_sz = lengthAMode(pfx,delta);
21166 d_sz = 0;
21167 sz = 1;
21168 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
21169 getIRegCL(), "%cl", &decode_OK );
21170 if (!decode_OK) goto decode_failure;
21171 return delta;
21174 case 0xD3: { /* Grp2 CL,Ev */
21175 Bool decode_OK = True;
21176 if (haveF2orF3(pfx)) goto decode_failure;
21177 modrm = getUChar(delta);
21178 am_sz = lengthAMode(pfx,delta);
21179 d_sz = 0;
21180 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
21181 getIRegCL(), "%cl", &decode_OK );
21182 if (!decode_OK) goto decode_failure;
21183 return delta;
21186 case 0xD8: /* X87 instructions */
21187 case 0xD9:
21188 case 0xDA:
21189 case 0xDB:
21190 case 0xDC:
21191 case 0xDD:
21192 case 0xDE:
21193 case 0xDF: {
21194 Bool redundantREXWok = False;
21196 if (haveF2orF3(pfx))
21197 goto decode_failure;
21199 /* kludge to tolerate redundant rex.w prefixes (should do this
21200 properly one day) */
21201 /* mono 1.1.18.1 produces 48 D9 FA, which is rex.w fsqrt */
21202 if ( (opc == 0xD9 && getUChar(delta+0) == 0xFA)/*fsqrt*/ )
21203 redundantREXWok = True;
21205 Bool size_OK = False;
21206 if ( sz == 4 )
21207 size_OK = True;
21208 else if ( sz == 8 )
21209 size_OK = redundantREXWok;
21210 else if ( sz == 2 ) {
21211 int mod_rm = getUChar(delta+0);
21212 int reg = gregLO3ofRM(mod_rm);
21213 /* The HotSpot JVM uses these */
21214 if ( (opc == 0xDD) && (reg == 0 /* FLDL */ ||
21215 reg == 4 /* FNSAVE */ ||
21216 reg == 6 /* FRSTOR */ ) )
21217 size_OK = True;
21219 /* AMD manual says 0x66 size override is ignored, except where
21220 it is meaningful */
21221 if (!size_OK)
21222 goto decode_failure;
21224 Bool decode_OK = False;
21225 delta = dis_FPU ( &decode_OK, vbi, pfx, delta );
21226 if (!decode_OK)
21227 goto decode_failure;
21229 return delta;
21232 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
21233 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
21234 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
21235 { /* The docs say this uses rCX as a count depending on the
21236 address size override, not the operand one. */
21237 IRExpr* zbit = NULL;
21238 IRExpr* count = NULL;
21239 IRExpr* cond = NULL;
21240 const HChar* xtra = NULL;
21242 if (have66orF2orF3(pfx) || 1==getRexW(pfx)) goto decode_failure;
21243 /* So at this point we've rejected any variants which appear to
21244 be governed by the usual operand-size modifiers. Hence only
21245 the address size prefix can have an effect. It changes the
21246 size from 64 (default) to 32. */
21247 d64 = guest_RIP_bbstart+delta+1 + getSDisp8(delta);
21248 delta++;
21249 if (haveASO(pfx)) {
21250 /* 64to32 of 64-bit get is merely a get-put improvement
21251 trick. */
21252 putIReg32(R_RCX, binop(Iop_Sub32,
21253 unop(Iop_64to32, getIReg64(R_RCX)),
21254 mkU32(1)));
21255 } else {
21256 putIReg64(R_RCX, binop(Iop_Sub64, getIReg64(R_RCX), mkU64(1)));
21259 /* This is correct, both for 32- and 64-bit versions. If we're
21260 doing a 32-bit dec and the result is zero then the default
21261 zero extension rule will cause the upper 32 bits to be zero
21262 too. Hence a 64-bit check against zero is OK. */
21263 count = getIReg64(R_RCX);
21264 cond = binop(Iop_CmpNE64, count, mkU64(0));
21265 switch (opc) {
21266 case 0xE2:
21267 xtra = "";
21268 break;
21269 case 0xE1:
21270 xtra = "e";
21271 zbit = mk_amd64g_calculate_condition( AMD64CondZ );
21272 cond = mkAnd1(cond, zbit);
21273 break;
21274 case 0xE0:
21275 xtra = "ne";
21276 zbit = mk_amd64g_calculate_condition( AMD64CondNZ );
21277 cond = mkAnd1(cond, zbit);
21278 break;
21279 default:
21280 vassert(0);
21282 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64), OFFB_RIP) );
21284 DIP("loop%s%s 0x%llx\n", xtra, haveASO(pfx) ? "l" : "", (ULong)d64);
21285 return delta;
21288 case 0xE3:
21289 /* JRCXZ or JECXZ, depending address size override. */
21290 if (have66orF2orF3(pfx)) goto decode_failure;
21291 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta);
21292 delta++;
21293 if (haveASO(pfx)) {
21294 /* 32-bit */
21295 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
21296 unop(Iop_32Uto64, getIReg32(R_RCX)),
21297 mkU64(0)),
21298 Ijk_Boring,
21299 IRConst_U64(d64),
21300 OFFB_RIP
21302 DIP("jecxz 0x%llx\n", (ULong)d64);
21303 } else {
21304 /* 64-bit */
21305 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
21306 getIReg64(R_RCX),
21307 mkU64(0)),
21308 Ijk_Boring,
21309 IRConst_U64(d64),
21310 OFFB_RIP
21312 DIP("jrcxz 0x%llx\n", (ULong)d64);
21314 return delta;
21316 case 0xE4: /* IN imm8, AL */
21317 sz = 1;
21318 t1 = newTemp(Ity_I64);
21319 abyte = getUChar(delta); delta++;
21320 assign(t1, mkU64( abyte & 0xFF ));
21321 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz));
21322 goto do_IN;
21323 case 0xE5: /* IN imm8, eAX */
21324 if (!(sz == 2 || sz == 4)) goto decode_failure;
21325 t1 = newTemp(Ity_I64);
21326 abyte = getUChar(delta); delta++;
21327 assign(t1, mkU64( abyte & 0xFF ));
21328 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz));
21329 goto do_IN;
21330 case 0xEC: /* IN %DX, AL */
21331 sz = 1;
21332 t1 = newTemp(Ity_I64);
21333 assign(t1, unop(Iop_16Uto64, getIRegRDX(2)));
21334 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2),
21335 nameIRegRAX(sz));
21336 goto do_IN;
21337 case 0xED: /* IN %DX, eAX */
21338 if (!(sz == 2 || sz == 4)) goto decode_failure;
21339 t1 = newTemp(Ity_I64);
21340 assign(t1, unop(Iop_16Uto64, getIRegRDX(2)));
21341 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2),
21342 nameIRegRAX(sz));
21343 goto do_IN;
21344 do_IN: {
21345 /* At this point, sz indicates the width, and t1 is a 64-bit
21346 value giving port number. */
21347 IRDirty* d;
21348 if (haveF2orF3(pfx)) goto decode_failure;
21349 vassert(sz == 1 || sz == 2 || sz == 4);
21350 ty = szToITy(sz);
21351 t2 = newTemp(Ity_I64);
21352 d = unsafeIRDirty_1_N(
21354 0/*regparms*/,
21355 "amd64g_dirtyhelper_IN",
21356 &amd64g_dirtyhelper_IN,
21357 mkIRExprVec_2( mkexpr(t1), mkU64(sz) )
21359 /* do the call, dumping the result in t2. */
21360 stmt( IRStmt_Dirty(d) );
21361 putIRegRAX(sz, narrowTo( ty, mkexpr(t2) ) );
21362 return delta;
21365 case 0xE6: /* OUT AL, imm8 */
21366 sz = 1;
21367 t1 = newTemp(Ity_I64);
21368 abyte = getUChar(delta); delta++;
21369 assign( t1, mkU64( abyte & 0xFF ) );
21370 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte);
21371 goto do_OUT;
21372 case 0xE7: /* OUT eAX, imm8 */
21373 if (!(sz == 2 || sz == 4)) goto decode_failure;
21374 t1 = newTemp(Ity_I64);
21375 abyte = getUChar(delta); delta++;
21376 assign( t1, mkU64( abyte & 0xFF ) );
21377 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte);
21378 goto do_OUT;
21379 case 0xEE: /* OUT AL, %DX */
21380 sz = 1;
21381 t1 = newTemp(Ity_I64);
21382 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) );
21383 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz),
21384 nameIRegRDX(2));
21385 goto do_OUT;
21386 case 0xEF: /* OUT eAX, %DX */
21387 if (!(sz == 2 || sz == 4)) goto decode_failure;
21388 t1 = newTemp(Ity_I64);
21389 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) );
21390 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz),
21391 nameIRegRDX(2));
21392 goto do_OUT;
21393 do_OUT: {
21394 /* At this point, sz indicates the width, and t1 is a 64-bit
21395 value giving port number. */
21396 IRDirty* d;
21397 if (haveF2orF3(pfx)) goto decode_failure;
21398 vassert(sz == 1 || sz == 2 || sz == 4);
21399 ty = szToITy(sz);
21400 d = unsafeIRDirty_0_N(
21401 0/*regparms*/,
21402 "amd64g_dirtyhelper_OUT",
21403 &amd64g_dirtyhelper_OUT,
21404 mkIRExprVec_3( mkexpr(t1),
21405 widenUto64( getIRegRAX(sz) ),
21406 mkU64(sz) )
21408 stmt( IRStmt_Dirty(d) );
21409 return delta;
21412 case 0xE8: /* CALL J4 */
21413 if (haveF3(pfx)) goto decode_failure;
21414 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
21415 d64 = getSDisp32(delta); delta += 4;
21416 d64 += (guest_RIP_bbstart+delta);
21417 /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */
21418 t1 = newTemp(Ity_I64);
21419 assign(t1, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
21420 putIReg64(R_RSP, mkexpr(t1));
21421 storeLE( mkexpr(t1), mkU64(guest_RIP_bbstart+delta));
21422 t2 = newTemp(Ity_I64);
21423 assign(t2, mkU64((Addr64)d64));
21424 make_redzone_AbiHint(vbi, t1, t2/*nia*/, "call-d32");
21425 if (resteerOkFn( callback_opaque, (Addr64)d64) ) {
21426 /* follow into the call target. */
21427 dres->whatNext = Dis_ResteerU;
21428 dres->continueAt = d64;
21429 } else {
21430 jmp_lit(dres, Ijk_Call, d64);
21431 vassert(dres->whatNext == Dis_StopHere);
21433 DIP("call 0x%llx\n", (ULong)d64);
21434 return delta;
21436 case 0xE9: /* Jv (jump, 16/32 offset) */
21437 if (haveF3(pfx)) goto decode_failure;
21438 if (sz != 4)
21439 goto decode_failure; /* JRS added 2004 July 11 */
21440 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
21441 d64 = (guest_RIP_bbstart+delta+sz) + getSDisp(sz,delta);
21442 delta += sz;
21443 if (resteerOkFn(callback_opaque, (Addr64)d64)) {
21444 dres->whatNext = Dis_ResteerU;
21445 dres->continueAt = d64;
21446 } else {
21447 jmp_lit(dres, Ijk_Boring, d64);
21448 vassert(dres->whatNext == Dis_StopHere);
21450 DIP("jmp 0x%llx\n", (ULong)d64);
21451 return delta;
21453 case 0xEB: /* Jb (jump, byte offset) */
21454 if (haveF3(pfx)) goto decode_failure;
21455 if (sz != 4)
21456 goto decode_failure; /* JRS added 2004 July 11 */
21457 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
21458 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta);
21459 delta++;
21460 if (resteerOkFn(callback_opaque, (Addr64)d64)) {
21461 dres->whatNext = Dis_ResteerU;
21462 dres->continueAt = d64;
21463 } else {
21464 jmp_lit(dres, Ijk_Boring, d64);
21465 vassert(dres->whatNext == Dis_StopHere);
21467 DIP("jmp-8 0x%llx\n", (ULong)d64);
21468 return delta;
21470 case 0xF5: /* CMC */
21471 case 0xF8: /* CLC */
21472 case 0xF9: /* STC */
21473 t1 = newTemp(Ity_I64);
21474 t2 = newTemp(Ity_I64);
21475 assign( t1, mk_amd64g_calculate_rflags_all() );
21476 switch (opc) {
21477 case 0xF5:
21478 assign( t2, binop(Iop_Xor64, mkexpr(t1),
21479 mkU64(AMD64G_CC_MASK_C)));
21480 DIP("cmc\n");
21481 break;
21482 case 0xF8:
21483 assign( t2, binop(Iop_And64, mkexpr(t1),
21484 mkU64(~AMD64G_CC_MASK_C)));
21485 DIP("clc\n");
21486 break;
21487 case 0xF9:
21488 assign( t2, binop(Iop_Or64, mkexpr(t1),
21489 mkU64(AMD64G_CC_MASK_C)));
21490 DIP("stc\n");
21491 break;
21492 default:
21493 vpanic("disInstr(x64)(cmc/clc/stc)");
21495 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
21496 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
21497 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t2) ));
21498 /* Set NDEP even though it isn't used. This makes redundant-PUT
21499 elimination of previous stores to this field work better. */
21500 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
21501 return delta;
21503 case 0xF6: { /* Grp3 Eb */
21504 Bool decode_OK = True;
21505 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21506 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */
21507 delta = dis_Grp3 ( vbi, pfx, 1, delta, &decode_OK );
21508 if (!decode_OK) goto decode_failure;
21509 return delta;
21512 case 0xF7: { /* Grp3 Ev */
21513 Bool decode_OK = True;
21514 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21515 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */
21516 delta = dis_Grp3 ( vbi, pfx, sz, delta, &decode_OK );
21517 if (!decode_OK) goto decode_failure;
21518 return delta;
21521 case 0xFC: /* CLD */
21522 if (haveF2orF3(pfx)) goto decode_failure;
21523 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(1)) );
21524 DIP("cld\n");
21525 return delta;
21527 case 0xFD: /* STD */
21528 if (haveF2orF3(pfx)) goto decode_failure;
21529 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(-1ULL)) );
21530 DIP("std\n");
21531 return delta;
21533 case 0xFE: { /* Grp4 Eb */
21534 Bool decode_OK = True;
21535 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21536 /* We now let dis_Grp4 itself decide if F2 and/or F3 are valid */
21537 delta = dis_Grp4 ( vbi, pfx, delta, &decode_OK );
21538 if (!decode_OK) goto decode_failure;
21539 return delta;
21542 case 0xFF: { /* Grp5 Ev */
21543 Bool decode_OK = True;
21544 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21545 /* We now let dis_Grp5 itself decide if F2 and/or F3 are valid */
21546 delta = dis_Grp5 ( vbi, pfx, sz, delta, dres, &decode_OK );
21547 if (!decode_OK) goto decode_failure;
21548 return delta;
21551 default:
21552 break;
21556 decode_failure:
21557 return deltaIN; /* fail */
21561 /*------------------------------------------------------------*/
21562 /*--- ---*/
21563 /*--- Top-level post-escape decoders: dis_ESC_0F ---*/
21564 /*--- ---*/
21565 /*------------------------------------------------------------*/
21567 static IRTemp math_BSWAP ( IRTemp t1, IRType ty )
21569 IRTemp t2 = newTemp(ty);
21570 if (ty == Ity_I64) {
21571 IRTemp m8 = newTemp(Ity_I64);
21572 IRTemp s8 = newTemp(Ity_I64);
21573 IRTemp m16 = newTemp(Ity_I64);
21574 IRTemp s16 = newTemp(Ity_I64);
21575 IRTemp m32 = newTemp(Ity_I64);
21576 assign( m8, mkU64(0xFF00FF00FF00FF00ULL) );
21577 assign( s8,
21578 binop(Iop_Or64,
21579 binop(Iop_Shr64,
21580 binop(Iop_And64,mkexpr(t1),mkexpr(m8)),
21581 mkU8(8)),
21582 binop(Iop_And64,
21583 binop(Iop_Shl64,mkexpr(t1),mkU8(8)),
21584 mkexpr(m8))
21588 assign( m16, mkU64(0xFFFF0000FFFF0000ULL) );
21589 assign( s16,
21590 binop(Iop_Or64,
21591 binop(Iop_Shr64,
21592 binop(Iop_And64,mkexpr(s8),mkexpr(m16)),
21593 mkU8(16)),
21594 binop(Iop_And64,
21595 binop(Iop_Shl64,mkexpr(s8),mkU8(16)),
21596 mkexpr(m16))
21600 assign( m32, mkU64(0xFFFFFFFF00000000ULL) );
21601 assign( t2,
21602 binop(Iop_Or64,
21603 binop(Iop_Shr64,
21604 binop(Iop_And64,mkexpr(s16),mkexpr(m32)),
21605 mkU8(32)),
21606 binop(Iop_And64,
21607 binop(Iop_Shl64,mkexpr(s16),mkU8(32)),
21608 mkexpr(m32))
21611 return t2;
21613 if (ty == Ity_I32) {
21614 assign( t2,
21615 binop(
21616 Iop_Or32,
21617 binop(Iop_Shl32, mkexpr(t1), mkU8(24)),
21618 binop(
21619 Iop_Or32,
21620 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)),
21621 mkU32(0x00FF0000)),
21622 binop(Iop_Or32,
21623 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)),
21624 mkU32(0x0000FF00)),
21625 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)),
21626 mkU32(0x000000FF) )
21629 return t2;
21631 if (ty == Ity_I16) {
21632 assign(t2,
21633 binop(Iop_Or16,
21634 binop(Iop_Shl16, mkexpr(t1), mkU8(8)),
21635 binop(Iop_Shr16, mkexpr(t1), mkU8(8)) ));
21636 return t2;
21638 vassert(0);
21639 /*NOTREACHED*/
21640 return IRTemp_INVALID;
21644 __attribute__((noinline))
21645 static
21646 Long dis_ESC_0F (
21647 /*MB_OUT*/DisResult* dres,
21648 /*MB_OUT*/Bool* expect_CAS,
21649 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
21650 Bool resteerCisOk,
21651 void* callback_opaque,
21652 const VexArchInfo* archinfo,
21653 const VexAbiInfo* vbi,
21654 Prefix pfx, Int sz, Long deltaIN
21657 Long d64 = 0;
21658 IRTemp addr = IRTemp_INVALID;
21659 IRTemp t1 = IRTemp_INVALID;
21660 IRTemp t2 = IRTemp_INVALID;
21661 UChar modrm = 0;
21662 Int am_sz = 0;
21663 Int alen = 0;
21664 HChar dis_buf[50];
21666 /* In the first switch, look for ordinary integer insns. */
21667 Long delta = deltaIN;
21668 UChar opc = getUChar(delta);
21669 delta++;
21670 switch (opc) { /* first switch */
21672 case 0x01:
21674 modrm = getUChar(delta);
21675 /* 0F 01 /0 -- SGDT */
21676 /* 0F 01 /1 -- SIDT */
21677 if (!epartIsReg(modrm)
21678 && (gregLO3ofRM(modrm) == 0 || gregLO3ofRM(modrm) == 1)) {
21679 /* This is really revolting, but ... since each processor
21680 (core) only has one IDT and one GDT, just let the guest
21681 see it (pass-through semantics). I can't see any way to
21682 construct a faked-up value, so don't bother to try. */
21683 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21684 delta += alen;
21685 switch (gregLO3ofRM(modrm)) {
21686 case 0: DIP("sgdt %s\n", dis_buf); break;
21687 case 1: DIP("sidt %s\n", dis_buf); break;
21688 default: vassert(0); /*NOTREACHED*/
21690 IRDirty* d = unsafeIRDirty_0_N (
21691 0/*regparms*/,
21692 "amd64g_dirtyhelper_SxDT",
21693 &amd64g_dirtyhelper_SxDT,
21694 mkIRExprVec_2( mkexpr(addr),
21695 mkU64(gregLO3ofRM(modrm)) )
21697 /* declare we're writing memory */
21698 d->mFx = Ifx_Write;
21699 d->mAddr = mkexpr(addr);
21700 d->mSize = 6;
21701 stmt( IRStmt_Dirty(d) );
21702 return delta;
21704 /* 0F 01 D0 = XGETBV */
21705 if (modrm == 0xD0 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21706 delta += 1;
21707 DIP("xgetbv\n");
21708 /* Fault (SEGV) if ECX isn't zero. Intel docs say #GP and I
21709 am not sure if that translates in to SEGV or to something
21710 else, in user space. */
21711 t1 = newTemp(Ity_I32);
21712 assign( t1, getIReg32(R_RCX) );
21713 stmt( IRStmt_Exit(binop(Iop_CmpNE32, mkexpr(t1), mkU32(0)),
21714 Ijk_SigSEGV,
21715 IRConst_U64(guest_RIP_curr_instr),
21716 OFFB_RIP
21718 putIRegRAX(4, mkU32(7));
21719 putIRegRDX(4, mkU32(0));
21720 return delta;
21722 /* BEGIN HACKY SUPPORT FOR xend */
21723 /* 0F 01 D5 = XEND */
21724 if (modrm == 0xD5 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21725 /* We are never in an transaction (xbegin immediately aborts).
21726 So this just always generates a General Protection Fault. */
21727 delta += 1;
21728 jmp_lit(dres, Ijk_SigSEGV, guest_RIP_bbstart + delta);
21729 vassert(dres->whatNext == Dis_StopHere);
21730 DIP("xend\n");
21731 return delta;
21733 /* END HACKY SUPPORT FOR xend */
21734 /* BEGIN HACKY SUPPORT FOR xtest */
21735 /* 0F 01 D6 = XTEST */
21736 if (modrm == 0xD6 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21737 /* Sets ZF because there never is a transaction, and all
21738 CF, OF, SF, PF and AF are always cleared by xtest. */
21739 delta += 1;
21740 DIP("xtest\n");
21741 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
21742 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
21743 stmt( IRStmt_Put( OFFB_CC_DEP1, mkU64(AMD64G_CC_MASK_Z) ));
21744 /* Set NDEP even though it isn't used. This makes redundant-PUT
21745 elimination of previous stores to this field work better. */
21746 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
21747 return delta;
21749 /* END HACKY SUPPORT FOR xtest */
21750 /* 0F 01 F9 = RDTSCP */
21751 if (modrm == 0xF9 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDTSCP)) {
21752 delta += 1;
21753 /* Uses dirty helper:
21754 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* )
21755 declared to wr rax, rcx, rdx
21757 const HChar* fName = "amd64g_dirtyhelper_RDTSCP";
21758 void* fAddr = &amd64g_dirtyhelper_RDTSCP;
21759 IRDirty* d
21760 = unsafeIRDirty_0_N ( 0/*regparms*/,
21761 fName, fAddr, mkIRExprVec_1(IRExpr_GSPTR()) );
21762 /* declare guest state effects */
21763 d->nFxState = 3;
21764 vex_bzero(&d->fxState, sizeof(d->fxState));
21765 d->fxState[0].fx = Ifx_Write;
21766 d->fxState[0].offset = OFFB_RAX;
21767 d->fxState[0].size = 8;
21768 d->fxState[1].fx = Ifx_Write;
21769 d->fxState[1].offset = OFFB_RCX;
21770 d->fxState[1].size = 8;
21771 d->fxState[2].fx = Ifx_Write;
21772 d->fxState[2].offset = OFFB_RDX;
21773 d->fxState[2].size = 8;
21774 /* execute the dirty call, side-effecting guest state */
21775 stmt( IRStmt_Dirty(d) );
21776 /* RDTSCP is a serialising insn. So, just in case someone is
21777 using it as a memory fence ... */
21778 stmt( IRStmt_MBE(Imbe_Fence) );
21779 DIP("rdtscp\n");
21780 return delta;
21782 /* else decode failed */
21783 break;
21786 case 0x05: /* SYSCALL */
21787 guest_RIP_next_mustcheck = True;
21788 guest_RIP_next_assumed = guest_RIP_bbstart + delta;
21789 putIReg64( R_RCX, mkU64(guest_RIP_next_assumed) );
21790 /* It's important that all guest state is up-to-date
21791 at this point. So we declare an end-of-block here, which
21792 forces any cached guest state to be flushed. */
21793 jmp_lit(dres, Ijk_Sys_syscall, guest_RIP_next_assumed);
21794 vassert(dres->whatNext == Dis_StopHere);
21795 DIP("syscall\n");
21796 return delta;
21798 case 0x0B: /* UD2 */
21799 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
21800 jmp_lit(dres, Ijk_NoDecode, guest_RIP_curr_instr);
21801 vassert(dres->whatNext == Dis_StopHere);
21802 DIP("ud2\n");
21803 return delta;
21805 case 0x0D: /* 0F 0D /0 -- prefetch mem8 */
21806 /* 0F 0D /1 -- prefetchw mem8 */
21807 if (have66orF2orF3(pfx)) goto decode_failure;
21808 modrm = getUChar(delta);
21809 if (epartIsReg(modrm)) goto decode_failure;
21810 if (gregLO3ofRM(modrm) != 0 && gregLO3ofRM(modrm) != 1)
21811 goto decode_failure;
21812 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21813 delta += alen;
21814 switch (gregLO3ofRM(modrm)) {
21815 case 0: DIP("prefetch %s\n", dis_buf); break;
21816 case 1: DIP("prefetchw %s\n", dis_buf); break;
21817 default: vassert(0); /*NOTREACHED*/
21819 return delta;
21821 case 0x19:
21822 case 0x1C:
21823 case 0x1D:
21824 case 0x1E:
21825 case 0x1F:
21826 // Intel CET instructions can have any prefixes before NOPs
21827 // and can use any ModRM, SIB and disp
21828 modrm = getUChar(delta);
21829 if (epartIsReg(modrm)) {
21830 delta += 1;
21831 DIP("nop%c\n", nameISize(sz));
21832 } else {
21833 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21834 delta += alen;
21835 DIP("nop%c %s\n", nameISize(sz), dis_buf);
21837 return delta;
21839 case 0x31: { /* RDTSC */
21840 IRTemp val = newTemp(Ity_I64);
21841 IRExpr** args = mkIRExprVec_0();
21842 IRDirty* d = unsafeIRDirty_1_N (
21843 val,
21844 0/*regparms*/,
21845 "amd64g_dirtyhelper_RDTSC",
21846 &amd64g_dirtyhelper_RDTSC,
21847 args
21849 if (have66orF2orF3(pfx)) goto decode_failure;
21850 /* execute the dirty call, dumping the result in val. */
21851 stmt( IRStmt_Dirty(d) );
21852 putIRegRDX(4, unop(Iop_64HIto32, mkexpr(val)));
21853 putIRegRAX(4, unop(Iop_64to32, mkexpr(val)));
21854 DIP("rdtsc\n");
21855 return delta;
21858 case 0x40:
21859 case 0x41:
21860 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
21861 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
21862 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
21863 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
21864 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
21865 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
21866 case 0x48: /* CMOVSb (cmov negative) */
21867 case 0x49: /* CMOVSb (cmov not negative) */
21868 case 0x4A: /* CMOVP (cmov parity even) */
21869 case 0x4B: /* CMOVNP (cmov parity odd) */
21870 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
21871 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
21872 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
21873 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
21874 if (haveF2orF3(pfx)) goto decode_failure;
21875 delta = dis_cmov_E_G(vbi, pfx, sz, (AMD64Condcode)(opc - 0x40), delta);
21876 return delta;
21878 case 0x80:
21879 case 0x81:
21880 case 0x82: /* JBb/JNAEb (jump below) */
21881 case 0x83: /* JNBb/JAEb (jump not below) */
21882 case 0x84: /* JZb/JEb (jump zero) */
21883 case 0x85: /* JNZb/JNEb (jump not zero) */
21884 case 0x86: /* JBEb/JNAb (jump below or equal) */
21885 case 0x87: /* JNBEb/JAb (jump not below or equal) */
21886 case 0x88: /* JSb (jump negative) */
21887 case 0x89: /* JSb (jump not negative) */
21888 case 0x8A: /* JP (jump parity even) */
21889 case 0x8B: /* JNP/JPO (jump parity odd) */
21890 case 0x8C: /* JLb/JNGEb (jump less) */
21891 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
21892 case 0x8E: /* JLEb/JNGb (jump less or equal) */
21893 case 0x8F: { /* JGb/JNLEb (jump greater) */
21894 Long jmpDelta;
21895 const HChar* comment = "";
21896 if (haveF3(pfx)) goto decode_failure;
21897 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
21898 jmpDelta = getSDisp32(delta);
21899 d64 = (guest_RIP_bbstart+delta+4) + jmpDelta;
21900 delta += 4;
21901 if (resteerCisOk
21902 && vex_control.guest_chase_cond
21903 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
21904 && jmpDelta < 0
21905 && resteerOkFn( callback_opaque, (Addr64)d64) ) {
21906 /* Speculation: assume this backward branch is taken. So
21907 we need to emit a side-exit to the insn following this
21908 one, on the negation of the condition, and continue at
21909 the branch target address (d64). If we wind up back at
21910 the first instruction of the trace, just stop; it's
21911 better to let the IR loop unroller handle that case. */
21912 stmt( IRStmt_Exit(
21913 mk_amd64g_calculate_condition(
21914 (AMD64Condcode)(1 ^ (opc - 0x80))),
21915 Ijk_Boring,
21916 IRConst_U64(guest_RIP_bbstart+delta),
21917 OFFB_RIP
21919 dres->whatNext = Dis_ResteerC;
21920 dres->continueAt = d64;
21921 comment = "(assumed taken)";
21923 else
21924 if (resteerCisOk
21925 && vex_control.guest_chase_cond
21926 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
21927 && jmpDelta >= 0
21928 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) {
21929 /* Speculation: assume this forward branch is not taken.
21930 So we need to emit a side-exit to d64 (the dest) and
21931 continue disassembling at the insn immediately
21932 following this one. */
21933 stmt( IRStmt_Exit(
21934 mk_amd64g_calculate_condition((AMD64Condcode)
21935 (opc - 0x80)),
21936 Ijk_Boring,
21937 IRConst_U64(d64),
21938 OFFB_RIP
21940 dres->whatNext = Dis_ResteerC;
21941 dres->continueAt = guest_RIP_bbstart+delta;
21942 comment = "(assumed not taken)";
21944 else {
21945 /* Conservative default translation - end the block at
21946 this point. */
21947 jcc_01( dres, (AMD64Condcode)(opc - 0x80),
21948 guest_RIP_bbstart+delta, d64 );
21949 vassert(dres->whatNext == Dis_StopHere);
21951 DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc - 0x80), (ULong)d64,
21952 comment);
21953 return delta;
21956 case 0x90:
21957 case 0x91:
21958 case 0x92: /* set-Bb/set-NAEb (set if below) */
21959 case 0x93: /* set-NBb/set-AEb (set if not below) */
21960 case 0x94: /* set-Zb/set-Eb (set if zero) */
21961 case 0x95: /* set-NZb/set-NEb (set if not zero) */
21962 case 0x96: /* set-BEb/set-NAb (set if below or equal) */
21963 case 0x97: /* set-NBEb/set-Ab (set if not below or equal) */
21964 case 0x98: /* set-Sb (set if negative) */
21965 case 0x99: /* set-Sb (set if not negative) */
21966 case 0x9A: /* set-P (set if parity even) */
21967 case 0x9B: /* set-NP (set if parity odd) */
21968 case 0x9C: /* set-Lb/set-NGEb (set if less) */
21969 case 0x9D: /* set-GEb/set-NLb (set if greater or equal) */
21970 case 0x9E: /* set-LEb/set-NGb (set if less or equal) */
21971 case 0x9F: /* set-Gb/set-NLEb (set if greater) */
21972 if (haveF2orF3(pfx)) goto decode_failure;
21973 t1 = newTemp(Ity_I8);
21974 assign( t1, unop(Iop_1Uto8,mk_amd64g_calculate_condition(opc-0x90)) );
21975 modrm = getUChar(delta);
21976 if (epartIsReg(modrm)) {
21977 delta++;
21978 putIRegE(1, pfx, modrm, mkexpr(t1));
21979 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90),
21980 nameIRegE(1,pfx,modrm));
21981 } else {
21982 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21983 delta += alen;
21984 storeLE( mkexpr(addr), mkexpr(t1) );
21985 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), dis_buf);
21987 return delta;
21989 case 0x1A:
21990 case 0x1B: { /* Future MPX instructions, currently NOPs.
21991 BNDMK b, m F3 0F 1B
21992 BNDCL b, r/m F3 0F 1A
21993 BNDCU b, r/m F2 0F 1A
21994 BNDCN b, r/m F2 0F 1B
21995 BNDMOV b, b/m 66 0F 1A
21996 BNDMOV b/m, b 66 0F 1B
21997 BNDLDX b, mib 0F 1A
21998 BNDSTX mib, b 0F 1B */
22000 /* All instructions have two operands. One operand is always the
22001 bnd register number (bnd0-bnd3, other register numbers are
22002 ignored when MPX isn't enabled, but should generate an
22003 exception if MPX is enabled) given by gregOfRexRM. The other
22004 operand is either a ModRM:reg, ModRM:r/m or a SIB encoded
22005 address, all of which can be decoded by using either
22006 eregOfRexRM or disAMode. */
22008 modrm = getUChar(delta);
22009 int bnd = gregOfRexRM(pfx,modrm);
22010 const HChar *oper;
22011 if (epartIsReg(modrm)) {
22012 oper = nameIReg64 (eregOfRexRM(pfx,modrm));
22013 delta += 1;
22014 } else {
22015 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22016 delta += alen;
22017 oper = dis_buf;
22020 if (haveF3no66noF2 (pfx)) {
22021 if (opc == 0x1B) {
22022 DIP ("bndmk %s, %%bnd%d\n", oper, bnd);
22023 } else /* opc == 0x1A */ {
22024 DIP ("bndcl %s, %%bnd%d\n", oper, bnd);
22026 } else if (haveF2no66noF3 (pfx)) {
22027 if (opc == 0x1A) {
22028 DIP ("bndcu %s, %%bnd%d\n", oper, bnd);
22029 } else /* opc == 0x1B */ {
22030 DIP ("bndcn %s, %%bnd%d\n", oper, bnd);
22032 } else if (have66noF2noF3 (pfx)) {
22033 if (opc == 0x1A) {
22034 DIP ("bndmov %s, %%bnd%d\n", oper, bnd);
22035 } else /* opc == 0x1B */ {
22036 DIP ("bndmov %%bnd%d, %s\n", bnd, oper);
22038 } else if (haveNo66noF2noF3 (pfx)) {
22039 if (opc == 0x1A) {
22040 DIP ("bndldx %s, %%bnd%d\n", oper, bnd);
22041 } else /* opc == 0x1B */ {
22042 DIP ("bndstx %%bnd%d, %s\n", bnd, oper);
22044 } else goto decode_failure;
22046 return delta;
22049 case 0xA2: { /* CPUID */
22050 /* Uses dirty helper:
22051 void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* )
22052 declared to mod rax, wr rbx, rcx, rdx
22054 IRDirty* d = NULL;
22055 const HChar* fName = NULL;
22056 void* fAddr = NULL;
22058 if (haveF2orF3(pfx)) goto decode_failure;
22060 /* This isn't entirely correct, CPUID should depend on the VEX
22061 capabilities, not on the underlying CPU. See bug #324882. */
22062 if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSSE3) &&
22063 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16) &&
22064 (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX2)) {
22065 fName = "amd64g_dirtyhelper_CPUID_avx2";
22066 fAddr = &amd64g_dirtyhelper_CPUID_avx2;
22067 /* This is a Core-i7-4910-like machine */
22069 else if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSSE3) &&
22070 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16) &&
22071 (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
22072 fName = "amd64g_dirtyhelper_CPUID_avx_and_cx16";
22073 fAddr = &amd64g_dirtyhelper_CPUID_avx_and_cx16;
22074 /* This is a Core-i5-2300-like machine */
22076 else if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSSE3) &&
22077 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16)) {
22078 fName = "amd64g_dirtyhelper_CPUID_sse42_and_cx16";
22079 fAddr = &amd64g_dirtyhelper_CPUID_sse42_and_cx16;
22080 /* This is a Core-i5-670-like machine */
22082 else {
22083 /* Give a CPUID for at least a baseline machine, SSE2
22084 only, and no CX16 */
22085 fName = "amd64g_dirtyhelper_CPUID_baseline";
22086 fAddr = &amd64g_dirtyhelper_CPUID_baseline;
22089 vassert(fName); vassert(fAddr);
22090 IRExpr** args = NULL;
22091 if (fAddr == &amd64g_dirtyhelper_CPUID_avx2
22092 || fAddr == &amd64g_dirtyhelper_CPUID_avx_and_cx16) {
22093 Bool hasF16C = (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C) != 0;
22094 Bool hasRDRAND = (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDRAND) != 0;
22095 args = mkIRExprVec_3(IRExpr_GSPTR(),
22096 mkIRExpr_HWord(hasF16C ? 1 : 0),
22097 mkIRExpr_HWord(hasRDRAND ? 1 : 0));
22098 } else {
22099 args = mkIRExprVec_1(IRExpr_GSPTR());
22101 d = unsafeIRDirty_0_N ( 0/*regparms*/, fName, fAddr, args );
22103 /* declare guest state effects */
22104 d->nFxState = 4;
22105 vex_bzero(&d->fxState, sizeof(d->fxState));
22106 d->fxState[0].fx = Ifx_Modify;
22107 d->fxState[0].offset = OFFB_RAX;
22108 d->fxState[0].size = 8;
22109 d->fxState[1].fx = Ifx_Write;
22110 d->fxState[1].offset = OFFB_RBX;
22111 d->fxState[1].size = 8;
22112 d->fxState[2].fx = Ifx_Modify;
22113 d->fxState[2].offset = OFFB_RCX;
22114 d->fxState[2].size = 8;
22115 d->fxState[3].fx = Ifx_Write;
22116 d->fxState[3].offset = OFFB_RDX;
22117 d->fxState[3].size = 8;
22118 /* execute the dirty call, side-effecting guest state */
22119 stmt( IRStmt_Dirty(d) );
22120 /* CPUID is a serialising insn. So, just in case someone is
22121 using it as a memory fence ... */
22122 stmt( IRStmt_MBE(Imbe_Fence) );
22123 DIP("cpuid\n");
22124 return delta;
22127 case 0xA3: { /* BT Gv,Ev */
22128 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22129 Bool ok = True;
22130 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
22131 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpNone, &ok );
22132 if (!ok) goto decode_failure;
22133 return delta;
22136 case 0xA4: /* SHLDv imm8,Gv,Ev */
22137 modrm = getUChar(delta);
22138 d64 = delta + lengthAMode(pfx, delta);
22139 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64));
22140 delta = dis_SHLRD_Gv_Ev (
22141 vbi, pfx, delta, modrm, sz,
22142 mkU8(getUChar(d64)), True, /* literal */
22143 dis_buf, True /* left */ );
22144 return delta;
22146 case 0xA5: /* SHLDv %cl,Gv,Ev */
22147 modrm = getUChar(delta);
22148 delta = dis_SHLRD_Gv_Ev (
22149 vbi, pfx, delta, modrm, sz,
22150 getIRegCL(), False, /* not literal */
22151 "%cl", True /* left */ );
22152 return delta;
22154 case 0xAB: { /* BTS Gv,Ev */
22155 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22156 Bool ok = True;
22157 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
22158 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpSet, &ok );
22159 if (!ok) goto decode_failure;
22160 return delta;
22163 case 0xAC: /* SHRDv imm8,Gv,Ev */
22164 modrm = getUChar(delta);
22165 d64 = delta + lengthAMode(pfx, delta);
22166 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64));
22167 delta = dis_SHLRD_Gv_Ev (
22168 vbi, pfx, delta, modrm, sz,
22169 mkU8(getUChar(d64)), True, /* literal */
22170 dis_buf, False /* right */ );
22171 return delta;
22173 case 0xAD: /* SHRDv %cl,Gv,Ev */
22174 modrm = getUChar(delta);
22175 delta = dis_SHLRD_Gv_Ev (
22176 vbi, pfx, delta, modrm, sz,
22177 getIRegCL(), False, /* not literal */
22178 "%cl", False /* right */);
22179 return delta;
22181 case 0xAF: /* IMUL Ev, Gv */
22182 if (haveF2orF3(pfx)) goto decode_failure;
22183 delta = dis_mul_E_G ( vbi, pfx, sz, delta );
22184 return delta;
22186 case 0xB0: { /* CMPXCHG Gb,Eb */
22187 Bool ok = True;
22188 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */
22189 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, 1, delta );
22190 if (!ok) goto decode_failure;
22191 return delta;
22194 case 0xB1: { /* CMPXCHG Gv,Ev (allowed in 16,32,64 bit) */
22195 Bool ok = True;
22196 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */
22197 if (sz != 2 && sz != 4 && sz != 8) goto decode_failure;
22198 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, sz, delta );
22199 if (!ok) goto decode_failure;
22200 return delta;
22203 case 0xB3: { /* BTR Gv,Ev */
22204 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22205 Bool ok = True;
22206 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
22207 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpReset, &ok );
22208 if (!ok) goto decode_failure;
22209 return delta;
22212 case 0xB6: /* MOVZXb Eb,Gv */
22213 if (haveF2orF3(pfx)) goto decode_failure;
22214 if (sz != 2 && sz != 4 && sz != 8)
22215 goto decode_failure;
22216 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, False );
22217 return delta;
22219 case 0xB7: /* MOVZXw Ew,Gv */
22220 if (haveF2orF3(pfx)) goto decode_failure;
22221 if (sz != 4 && sz != 8)
22222 goto decode_failure;
22223 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, False );
22224 return delta;
22226 case 0xBA: { /* Grp8 Ib,Ev */
22227 /* We let dis_Grp8_Imm decide whether F2 or F3 are allowable. */
22228 Bool decode_OK = False;
22229 modrm = getUChar(delta);
22230 am_sz = lengthAMode(pfx,delta);
22231 d64 = getSDisp8(delta + am_sz);
22232 delta = dis_Grp8_Imm ( vbi, pfx, delta, modrm, am_sz, sz, d64,
22233 &decode_OK );
22234 if (!decode_OK)
22235 goto decode_failure;
22236 return delta;
22239 case 0xBB: { /* BTC Gv,Ev */
22240 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22241 Bool ok = False;
22242 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
22243 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpComp, &ok );
22244 if (!ok) goto decode_failure;
22245 return delta;
22248 case 0xBC: /* BSF Gv,Ev */
22249 if (!haveF2orF3(pfx)
22250 || (haveF3noF2(pfx)
22251 && 0 == (archinfo->hwcaps & VEX_HWCAPS_AMD64_BMI))) {
22252 /* no-F2 no-F3 0F BC = BSF
22253 or F3 0F BC = REP; BSF on older CPUs. */
22254 delta = dis_bs_E_G ( vbi, pfx, sz, delta, True );
22255 return delta;
22257 /* Fall through, since F3 0F BC is TZCNT, and needs to
22258 be handled by dis_ESC_0F__SSE4. */
22259 break;
22261 case 0xBD: /* BSR Gv,Ev */
22262 if (!haveF2orF3(pfx)
22263 || (haveF3noF2(pfx)
22264 && 0 == (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT))) {
22265 /* no-F2 no-F3 0F BD = BSR
22266 or F3 0F BD = REP; BSR on older CPUs. */
22267 delta = dis_bs_E_G ( vbi, pfx, sz, delta, False );
22268 return delta;
22270 /* Fall through, since F3 0F BD is LZCNT, and needs to
22271 be handled by dis_ESC_0F__SSE4. */
22272 break;
22274 case 0xBE: /* MOVSXb Eb,Gv */
22275 if (haveF2orF3(pfx)) goto decode_failure;
22276 if (sz != 2 && sz != 4 && sz != 8)
22277 goto decode_failure;
22278 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, True );
22279 return delta;
22281 case 0xBF: /* MOVSXw Ew,Gv */
22282 if (haveF2orF3(pfx)) goto decode_failure;
22283 if (sz != 4 && sz != 8)
22284 goto decode_failure;
22285 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, True );
22286 return delta;
22288 case 0xC0: { /* XADD Gb,Eb */
22289 Bool decode_OK = False;
22290 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, 1, delta );
22291 if (!decode_OK)
22292 goto decode_failure;
22293 return delta;
22296 case 0xC1: { /* XADD Gv,Ev */
22297 Bool decode_OK = False;
22298 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, sz, delta );
22299 if (!decode_OK)
22300 goto decode_failure;
22301 return delta;
22304 case 0xC7: {
22305 modrm = getUChar(delta);
22307 // Detecting valid CMPXCHG combinations is pretty complex.
22308 Bool isValidCMPXCHG = gregLO3ofRM(modrm) == 1;
22309 if (isValidCMPXCHG) {
22310 if (have66(pfx)) isValidCMPXCHG = False;
22311 if (sz != 4 && sz != 8) isValidCMPXCHG = False;
22312 if (sz == 8 && !(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16))
22313 isValidCMPXCHG = False;
22314 if (epartIsReg(modrm)) isValidCMPXCHG = False;
22315 if (haveF2orF3(pfx)) {
22316 /* Since the e-part is memory only, F2 or F3 (one or the
22317 other) is acceptable if LOCK is also present. But only
22318 for cmpxchg8b. */
22319 if (sz == 8) isValidCMPXCHG = False;
22320 if (haveF2andF3(pfx) || !haveLOCK(pfx)) isValidCMPXCHG = False;
22324 /* 0F C7 /1 (with qualifications) = CMPXCHG */
22325 if (isValidCMPXCHG) {
22326 // Note that we've already read the modrm byte by this point, but we
22327 // haven't moved delta past it.
22328 IRType elemTy = sz==4 ? Ity_I32 : Ity_I64;
22329 IRTemp expdHi = newTemp(elemTy);
22330 IRTemp expdLo = newTemp(elemTy);
22331 IRTemp dataHi = newTemp(elemTy);
22332 IRTemp dataLo = newTemp(elemTy);
22333 IRTemp oldHi = newTemp(elemTy);
22334 IRTemp oldLo = newTemp(elemTy);
22335 IRTemp flags_old = newTemp(Ity_I64);
22336 IRTemp flags_new = newTemp(Ity_I64);
22337 IRTemp success = newTemp(Ity_I1);
22338 IROp opOR = sz==4 ? Iop_Or32 : Iop_Or64;
22339 IROp opXOR = sz==4 ? Iop_Xor32 : Iop_Xor64;
22340 IROp opCasCmpEQ = sz==4 ? Iop_CasCmpEQ32 : Iop_CasCmpEQ64;
22341 IRExpr* zero = sz==4 ? mkU32(0) : mkU64(0);
22342 IRTemp expdHi64 = newTemp(Ity_I64);
22343 IRTemp expdLo64 = newTemp(Ity_I64);
22345 /* Translate this using a DCAS, even if there is no LOCK
22346 prefix. Life is too short to bother with generating two
22347 different translations for the with/without-LOCK-prefix
22348 cases. */
22349 *expect_CAS = True;
22351 /* Generate address */
22352 vassert(!epartIsReg(modrm));
22353 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22354 delta += alen;
22356 /* cmpxchg16b requires an alignment check. */
22357 if (sz == 8)
22358 gen_SEGV_if_not_16_aligned( addr );
22360 /* Get the expected and new values. */
22361 assign( expdHi64, getIReg64(R_RDX) );
22362 assign( expdLo64, getIReg64(R_RAX) );
22364 /* These are the correctly-sized expected and new values.
22365 However, we also get expdHi64/expdLo64 above as 64-bits
22366 regardless, because we will need them later in the 32-bit
22367 case (paradoxically). */
22368 assign( expdHi, sz==4 ? unop(Iop_64to32, mkexpr(expdHi64))
22369 : mkexpr(expdHi64) );
22370 assign( expdLo, sz==4 ? unop(Iop_64to32, mkexpr(expdLo64))
22371 : mkexpr(expdLo64) );
22372 assign( dataHi, sz==4 ? getIReg32(R_RCX) : getIReg64(R_RCX) );
22373 assign( dataLo, sz==4 ? getIReg32(R_RBX) : getIReg64(R_RBX) );
22375 /* Do the DCAS */
22376 stmt( IRStmt_CAS(
22377 mkIRCAS( oldHi, oldLo,
22378 Iend_LE, mkexpr(addr),
22379 mkexpr(expdHi), mkexpr(expdLo),
22380 mkexpr(dataHi), mkexpr(dataLo)
22381 )));
22383 /* success when oldHi:oldLo == expdHi:expdLo */
22384 assign( success,
22385 binop(opCasCmpEQ,
22386 binop(opOR,
22387 binop(opXOR, mkexpr(oldHi), mkexpr(expdHi)),
22388 binop(opXOR, mkexpr(oldLo), mkexpr(expdLo))
22390 zero
22393 /* If the DCAS is successful, that is to say oldHi:oldLo ==
22394 expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX,
22395 which is where they came from originally. Both the actual
22396 contents of these two regs, and any shadow values, are
22397 unchanged. If the DCAS fails then we're putting into
22398 RDX:RAX the value seen in memory. */
22399 /* Now of course there's a complication in the 32-bit case
22400 (bah!): if the DCAS succeeds, we need to leave RDX:RAX
22401 unchanged; but if we use the same scheme as in the 64-bit
22402 case, we get hit by the standard rule that a write to the
22403 bottom 32 bits of an integer register zeros the upper 32
22404 bits. And so the upper halves of RDX and RAX mysteriously
22405 become zero. So we have to stuff back in the original
22406 64-bit values which we previously stashed in
22407 expdHi64:expdLo64, even if we're doing a cmpxchg8b. */
22408 /* It's just _so_ much fun ... */
22409 putIRegRDX( 8,
22410 IRExpr_ITE( mkexpr(success),
22411 mkexpr(expdHi64),
22412 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldHi))
22413 : mkexpr(oldHi)
22415 putIRegRAX( 8,
22416 IRExpr_ITE( mkexpr(success),
22417 mkexpr(expdLo64),
22418 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldLo))
22419 : mkexpr(oldLo)
22422 /* Copy the success bit into the Z flag and leave the others
22423 unchanged */
22424 assign( flags_old, widenUto64(mk_amd64g_calculate_rflags_all()));
22425 assign(
22426 flags_new,
22427 binop(Iop_Or64,
22428 binop(Iop_And64, mkexpr(flags_old),
22429 mkU64(~AMD64G_CC_MASK_Z)),
22430 binop(Iop_Shl64,
22431 binop(Iop_And64,
22432 unop(Iop_1Uto64, mkexpr(success)), mkU64(1)),
22433 mkU8(AMD64G_CC_SHIFT_Z)) ));
22435 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
22436 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) ));
22437 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
22438 /* Set NDEP even though it isn't used. This makes
22439 redundant-PUT elimination of previous stores to this field
22440 work better. */
22441 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
22443 /* Sheesh. Aren't you glad it was me and not you that had to
22444 write and validate all this grunge? */
22446 DIP("cmpxchg8b %s\n", dis_buf);
22447 return delta;
22448 } // if (isValidCMPXCHG)
22450 /* 0F C7 /6 no-F2-or-F3 = RDRAND */
22451 if (gregLO3ofRM(modrm) == 6/*RDRAND*/
22452 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDRAND)
22453 && epartIsReg(modrm) && haveNoF2noF3(pfx)
22454 && (sz == 8 || sz == 4 || sz == 2)) {
22455 delta++; // move past modrm
22456 IRType ty = szToITy(sz);
22458 // Pull a first 32 bits of randomness, plus C flag, out of the host.
22459 IRTemp pairLO = newTemp(Ity_I64);
22460 IRDirty* dLO
22461 = unsafeIRDirty_1_N(pairLO, 0/*regparms*/,
22462 "amd64g_dirtyhelper_RDRAND",
22463 &amd64g_dirtyhelper_RDRAND, mkIRExprVec_0());
22464 // There are no guest state or memory effects to declare for |dLO|.
22465 stmt( IRStmt_Dirty(dLO) );
22467 IRTemp randsLO = newTemp(Ity_I32);
22468 assign(randsLO, unop(Iop_64to32, mkexpr(pairLO)));
22469 IRTemp cLO = newTemp(Ity_I64);
22470 assign(cLO, binop(Iop_Shr64, mkexpr(pairLO), mkU8(32)));
22472 // We'll assemble the final pairing in (cFinal, randsNearlyFinal).
22473 IRTemp randsNearlyFinal = newTemp(Ity_I64);
22474 IRTemp cFinal = newTemp(Ity_I64);
22476 if (ty == Ity_I64) {
22477 // Pull another 32 bits of randomness out of the host.
22478 IRTemp pairHI = newTemp(Ity_I64);
22479 IRDirty* dHI
22480 = unsafeIRDirty_1_N(pairHI, 0/*regparms*/,
22481 "amd64g_dirtyhelper_RDRAND",
22482 &amd64g_dirtyhelper_RDRAND, mkIRExprVec_0());
22483 // There are no guest state or memory effects to declare for |dHI|.
22484 stmt( IRStmt_Dirty(dHI) );
22486 IRTemp randsHI = newTemp(Ity_I32);
22487 assign(randsHI, unop(Iop_64to32, mkexpr(pairHI)));
22488 IRTemp cHI = newTemp(Ity_I64);
22489 assign(cHI, binop(Iop_Shr64, mkexpr(pairHI), mkU8(32)));
22490 assign(randsNearlyFinal, binop(Iop_32HLto64,
22491 mkexpr(randsHI), mkexpr(randsLO)));
22492 assign(cFinal, binop(Iop_And64,
22493 binop(Iop_And64, mkexpr(cHI), mkexpr(cLO)),
22494 mkU64(1)));
22495 } else {
22496 assign(randsNearlyFinal, unop(Iop_32Uto64, mkexpr(randsLO)));
22497 assign(cFinal, binop(Iop_And64, mkexpr(cLO), mkU64(1)));
22500 /* Now cFinal[0] is the final success/failure flag (cFinal[0] == 1
22501 means success). But there's another twist. If we failed then the
22502 returned value must be forced to zero. Otherwise we could have the
22503 situation, when sz==8, where one of the host calls failed but the
22504 other didn't. This would give cFinal[0] == 0 (correctly) but
22505 randsNearlyFinal not being zero, because it contains the 32 bit
22506 result of the non-failing call. */
22507 IRTemp randsFinal = newTemp(Ity_I64);
22508 assign(randsFinal,
22509 binop(Iop_And64,
22510 mkexpr(randsNearlyFinal),
22511 binop(Iop_Sar64,
22512 binop(Iop_Shl64, mkexpr(cFinal), mkU8(63)),
22513 mkU8(63))
22516 // So, finally, update the guest state.
22517 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(randsFinal)));
22519 // Set C=<success indication>, O,S,Z,A,P = 0. cFinal has already been
22520 // masked so only the lowest bit remains.
22521 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
22522 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(cFinal) ));
22523 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
22524 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
22526 DIP("rdrand %s", nameIRegE(sz, pfx, modrm));
22527 return delta;
22530 goto decode_failure;
22533 case 0xC8: /* BSWAP %eax */
22534 case 0xC9:
22535 case 0xCA:
22536 case 0xCB:
22537 case 0xCC:
22538 case 0xCD:
22539 case 0xCE:
22540 case 0xCF: /* BSWAP %edi */
22541 if (haveF2orF3(pfx)) goto decode_failure;
22542 /* According to the AMD64 docs, this insn can have size 4 or
22543 8. */
22544 if (sz == 4) {
22545 t1 = newTemp(Ity_I32);
22546 assign( t1, getIRegRexB(4, pfx, opc-0xC8) );
22547 t2 = math_BSWAP( t1, Ity_I32 );
22548 putIRegRexB(4, pfx, opc-0xC8, mkexpr(t2));
22549 DIP("bswapl %s\n", nameIRegRexB(4, pfx, opc-0xC8));
22550 return delta;
22552 if (sz == 8) {
22553 t1 = newTemp(Ity_I64);
22554 t2 = newTemp(Ity_I64);
22555 assign( t1, getIRegRexB(8, pfx, opc-0xC8) );
22556 t2 = math_BSWAP( t1, Ity_I64 );
22557 putIRegRexB(8, pfx, opc-0xC8, mkexpr(t2));
22558 DIP("bswapq %s\n", nameIRegRexB(8, pfx, opc-0xC8));
22559 return delta;
22561 goto decode_failure;
22563 default:
22564 break;
22566 } /* first switch */
22569 /* =-=-=-=-=-=-=-=-= MMXery =-=-=-=-=-=-=-=-= */
22570 /* In the second switch, pick off MMX insns. */
22572 if (!have66orF2orF3(pfx)) {
22573 /* So there's no SIMD prefix. */
22575 vassert(sz == 4 || sz == 8);
22577 switch (opc) { /* second switch */
22579 case 0x71:
22580 case 0x72:
22581 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
22583 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
22584 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
22585 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
22586 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
22588 case 0xFC:
22589 case 0xFD:
22590 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
22592 case 0xEC:
22593 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
22595 case 0xDC:
22596 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
22598 case 0xF8:
22599 case 0xF9:
22600 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
22602 case 0xE8:
22603 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
22605 case 0xD8:
22606 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
22608 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
22609 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
22611 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
22613 case 0x74:
22614 case 0x75:
22615 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
22617 case 0x64:
22618 case 0x65:
22619 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
22621 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
22622 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
22623 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
22625 case 0x68:
22626 case 0x69:
22627 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
22629 case 0x60:
22630 case 0x61:
22631 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
22633 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
22634 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
22635 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
22636 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
22638 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
22639 case 0xF2:
22640 case 0xF3:
22642 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
22643 case 0xD2:
22644 case 0xD3:
22646 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
22647 case 0xE2: {
22648 Bool decode_OK = False;
22649 delta = dis_MMX ( &decode_OK, vbi, pfx, sz, deltaIN );
22650 if (decode_OK)
22651 return delta;
22652 goto decode_failure;
22655 default:
22656 break;
22657 } /* second switch */
22661 /* A couple of MMX corner cases */
22662 if (opc == 0x0E/* FEMMS */ || opc == 0x77/* EMMS */) {
22663 if (sz != 4)
22664 goto decode_failure;
22665 do_EMMS_preamble();
22666 DIP("{f}emms\n");
22667 return delta;
22670 /* =-=-=-=-=-=-=-=-= SSE2ery =-=-=-=-=-=-=-=-= */
22671 /* Perhaps it's an SSE or SSE2 instruction. We can try this
22672 without checking the guest hwcaps because SSE2 is a baseline
22673 facility in 64 bit mode. */
22675 Bool decode_OK = False;
22676 delta = dis_ESC_0F__SSE2 ( &decode_OK,
22677 archinfo, vbi, pfx, sz, deltaIN, dres );
22678 if (decode_OK)
22679 return delta;
22682 /* =-=-=-=-=-=-=-=-= SSE3ery =-=-=-=-=-=-=-=-= */
22683 /* Perhaps it's a SSE3 instruction. FIXME: check guest hwcaps
22684 first. */
22686 Bool decode_OK = False;
22687 delta = dis_ESC_0F__SSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
22688 if (decode_OK)
22689 return delta;
22692 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22693 /* Perhaps it's a SSE4 instruction. FIXME: check guest hwcaps
22694 first. */
22696 Bool decode_OK = False;
22697 delta = dis_ESC_0F__SSE4 ( &decode_OK,
22698 archinfo, vbi, pfx, sz, deltaIN );
22699 if (decode_OK)
22700 return delta;
22703 decode_failure:
22704 return deltaIN; /* fail */
22708 /*------------------------------------------------------------*/
22709 /*--- ---*/
22710 /*--- Top-level post-escape decoders: dis_ESC_0F38 ---*/
22711 /*--- ---*/
22712 /*------------------------------------------------------------*/
22714 __attribute__((noinline))
22715 static
22716 Long dis_ESC_0F38 (
22717 /*MB_OUT*/DisResult* dres,
22718 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
22719 Bool resteerCisOk,
22720 void* callback_opaque,
22721 const VexArchInfo* archinfo,
22722 const VexAbiInfo* vbi,
22723 Prefix pfx, Int sz, Long deltaIN
22726 Long delta = deltaIN;
22727 UChar opc = getUChar(delta);
22728 delta++;
22729 switch (opc) {
22731 case 0xF0: /* 0F 38 F0 = MOVBE m16/32/64(E), r16/32/64(G) */
22732 case 0xF1: { /* 0F 38 F1 = MOVBE r16/32/64(G), m16/32/64(E) */
22733 if (!haveF2orF3(pfx) && !haveVEX(pfx)
22734 && (sz == 2 || sz == 4 || sz == 8)) {
22735 IRTemp addr = IRTemp_INVALID;
22736 UChar modrm = 0;
22737 Int alen = 0;
22738 HChar dis_buf[50];
22739 modrm = getUChar(delta);
22740 if (epartIsReg(modrm)) break;
22741 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22742 delta += alen;
22743 IRType ty = szToITy(sz);
22744 IRTemp src = newTemp(ty);
22745 if (opc == 0xF0) { /* LOAD */
22746 assign(src, loadLE(ty, mkexpr(addr)));
22747 IRTemp dst = math_BSWAP(src, ty);
22748 putIRegG(sz, pfx, modrm, mkexpr(dst));
22749 DIP("movbe %s,%s\n", dis_buf, nameIRegG(sz, pfx, modrm));
22750 } else { /* STORE */
22751 assign(src, getIRegG(sz, pfx, modrm));
22752 IRTemp dst = math_BSWAP(src, ty);
22753 storeLE(mkexpr(addr), mkexpr(dst));
22754 DIP("movbe %s,%s\n", nameIRegG(sz, pfx, modrm), dis_buf);
22756 return delta;
22758 /* else fall through; maybe one of the decoders below knows what
22759 it is. */
22760 break;
22763 default:
22764 break;
22767 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
22768 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
22769 rather than proceeding indiscriminately. */
22771 Bool decode_OK = False;
22772 delta = dis_ESC_0F38__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
22773 if (decode_OK)
22774 return delta;
22777 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22778 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
22779 rather than proceeding indiscriminately. */
22781 Bool decode_OK = False;
22782 delta = dis_ESC_0F38__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN );
22783 if (decode_OK)
22784 return delta;
22787 /* Ignore previous decode attempts and restart from the beginning of
22788 the instruction. */
22789 delta = deltaIN;
22790 opc = getUChar(delta);
22791 delta++;
22793 switch (opc) {
22795 case 0xF6: {
22796 /* 66 0F 38 F6 = ADCX r32/64(G), m32/64(E) */
22797 /* F3 0F 38 F6 = ADOX r32/64(G), m32/64(E) */
22798 /* These were introduced in Broadwell. Gate them on AVX so as to at
22799 least reject them on earlier guests. Has no host requirements. */
22800 if (have66noF2noF3(pfx) && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
22801 if (sz == 2) {
22802 sz = 4; /* 66 prefix but operand size is 4/8 */
22804 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagCarryX, True,
22805 sz, delta, "adcx" );
22806 return delta;
22808 if (haveF3no66noF2(pfx) && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
22809 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagOverX, True,
22810 sz, delta, "adox" );
22811 return delta;
22813 /* else fall through */
22814 break;
22817 default:
22818 break;
22821 /*decode_failure:*/
22822 return deltaIN; /* fail */
22826 /*------------------------------------------------------------*/
22827 /*--- ---*/
22828 /*--- Top-level post-escape decoders: dis_ESC_0F3A ---*/
22829 /*--- ---*/
22830 /*------------------------------------------------------------*/
22832 __attribute__((noinline))
22833 static
22834 Long dis_ESC_0F3A (
22835 /*MB_OUT*/DisResult* dres,
22836 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
22837 Bool resteerCisOk,
22838 void* callback_opaque,
22839 const VexArchInfo* archinfo,
22840 const VexAbiInfo* vbi,
22841 Prefix pfx, Int sz, Long deltaIN
22844 Long delta = deltaIN;
22845 UChar opc = getUChar(delta);
22846 delta++;
22847 switch (opc) {
22849 default:
22850 break;
22854 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
22855 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
22856 rather than proceeding indiscriminately. */
22858 Bool decode_OK = False;
22859 delta = dis_ESC_0F3A__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
22860 if (decode_OK)
22861 return delta;
22864 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22865 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
22866 rather than proceeding indiscriminately. */
22868 Bool decode_OK = False;
22869 delta = dis_ESC_0F3A__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN );
22870 if (decode_OK)
22871 return delta;
22874 return deltaIN; /* fail */
22878 /*------------------------------------------------------------*/
22879 /*--- ---*/
22880 /*--- Top-level post-escape decoders: dis_ESC_0F__VEX ---*/
22881 /*--- ---*/
22882 /*------------------------------------------------------------*/
22884 /* FIXME: common up with the _256_ version below? */
22885 static
22886 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG (
22887 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
22888 Prefix pfx, Long delta, const HChar* name,
22889 /* The actual operation. Use either 'op' or 'opfn',
22890 but not both. */
22891 IROp op, IRTemp(*opFn)(IRTemp,IRTemp),
22892 Bool invertLeftArg,
22893 Bool swapArgs
22896 UChar modrm = getUChar(delta);
22897 UInt rD = gregOfRexRM(pfx, modrm);
22898 UInt rSL = getVexNvvvv(pfx);
22899 IRTemp tSL = newTemp(Ity_V128);
22900 IRTemp tSR = newTemp(Ity_V128);
22901 IRTemp addr = IRTemp_INVALID;
22902 HChar dis_buf[50];
22903 Int alen = 0;
22904 vassert(0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*WIG?*/);
22906 assign(tSL, invertLeftArg ? unop(Iop_NotV128, getXMMReg(rSL))
22907 : getXMMReg(rSL));
22909 if (epartIsReg(modrm)) {
22910 UInt rSR = eregOfRexRM(pfx, modrm);
22911 delta += 1;
22912 assign(tSR, getXMMReg(rSR));
22913 DIP("%s %s,%s,%s\n",
22914 name, nameXMMReg(rSR), nameXMMReg(rSL), nameXMMReg(rD));
22915 } else {
22916 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
22917 delta += alen;
22918 assign(tSR, loadLE(Ity_V128, mkexpr(addr)));
22919 DIP("%s %s,%s,%s\n",
22920 name, dis_buf, nameXMMReg(rSL), nameXMMReg(rD));
22923 IRTemp res = IRTemp_INVALID;
22924 if (op != Iop_INVALID) {
22925 vassert(opFn == NULL);
22926 res = newTemp(Ity_V128);
22927 if (requiresRMode(op)) {
22928 IRTemp rm = newTemp(Ity_I32);
22929 assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
22930 assign(res, swapArgs
22931 ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL))
22932 : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR)));
22933 } else {
22934 assign(res, swapArgs
22935 ? binop(op, mkexpr(tSR), mkexpr(tSL))
22936 : binop(op, mkexpr(tSL), mkexpr(tSR)));
22938 } else {
22939 vassert(opFn != NULL);
22940 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR);
22943 putYMMRegLoAndZU(rD, mkexpr(res));
22945 *uses_vvvv = True;
22946 return delta;
22950 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, with a simple IROp
22951 for the operation, no inversion of the left arg, and no swapping of
22952 args. */
22953 static
22954 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple (
22955 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
22956 Prefix pfx, Long delta, const HChar* name,
22957 IROp op
22960 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22961 uses_vvvv, vbi, pfx, delta, name, op, NULL, False, False);
22965 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, using the given IR
22966 generator to compute the result, no inversion of the left
22967 arg, and no swapping of args. */
22968 static
22969 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex (
22970 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
22971 Prefix pfx, Long delta, const HChar* name,
22972 IRTemp(*opFn)(IRTemp,IRTemp)
22975 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22976 uses_vvvv, vbi, pfx, delta, name,
22977 Iop_INVALID, opFn, False, False );
22981 /* Vector by scalar shift of V by the amount specified at the bottom
22982 of E. */
22983 static ULong dis_AVX128_shiftV_byE ( const VexAbiInfo* vbi,
22984 Prefix pfx, Long delta,
22985 const HChar* opname, IROp op )
22987 HChar dis_buf[50];
22988 Int alen, size;
22989 IRTemp addr;
22990 Bool shl, shr, sar;
22991 UChar modrm = getUChar(delta);
22992 UInt rG = gregOfRexRM(pfx,modrm);
22993 UInt rV = getVexNvvvv(pfx);;
22994 IRTemp g0 = newTemp(Ity_V128);
22995 IRTemp g1 = newTemp(Ity_V128);
22996 IRTemp amt = newTemp(Ity_I64);
22997 IRTemp amt8 = newTemp(Ity_I8);
22998 if (epartIsReg(modrm)) {
22999 UInt rE = eregOfRexRM(pfx,modrm);
23000 assign( amt, getXMMRegLane64(rE, 0) );
23001 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE),
23002 nameXMMReg(rV), nameXMMReg(rG) );
23003 delta++;
23004 } else {
23005 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23006 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
23007 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
23008 delta += alen;
23010 assign( g0, getXMMReg(rV) );
23011 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
23013 shl = shr = sar = False;
23014 size = 0;
23015 switch (op) {
23016 case Iop_ShlN16x8: shl = True; size = 32; break;
23017 case Iop_ShlN32x4: shl = True; size = 32; break;
23018 case Iop_ShlN64x2: shl = True; size = 64; break;
23019 case Iop_SarN16x8: sar = True; size = 16; break;
23020 case Iop_SarN32x4: sar = True; size = 32; break;
23021 case Iop_ShrN16x8: shr = True; size = 16; break;
23022 case Iop_ShrN32x4: shr = True; size = 32; break;
23023 case Iop_ShrN64x2: shr = True; size = 64; break;
23024 default: vassert(0);
23027 if (shl || shr) {
23028 assign(
23030 IRExpr_ITE(
23031 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
23032 binop(op, mkexpr(g0), mkexpr(amt8)),
23033 mkV128(0x0000)
23036 } else
23037 if (sar) {
23038 assign(
23040 IRExpr_ITE(
23041 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
23042 binop(op, mkexpr(g0), mkexpr(amt8)),
23043 binop(op, mkexpr(g0), mkU8(size-1))
23046 } else {
23047 vassert(0);
23050 putYMMRegLoAndZU( rG, mkexpr(g1) );
23051 return delta;
23055 /* Vector by scalar shift of V by the amount specified at the bottom
23056 of E. */
23057 static ULong dis_AVX256_shiftV_byE ( const VexAbiInfo* vbi,
23058 Prefix pfx, Long delta,
23059 const HChar* opname, IROp op )
23061 HChar dis_buf[50];
23062 Int alen, size;
23063 IRTemp addr;
23064 Bool shl, shr, sar;
23065 UChar modrm = getUChar(delta);
23066 UInt rG = gregOfRexRM(pfx,modrm);
23067 UInt rV = getVexNvvvv(pfx);;
23068 IRTemp g0 = newTemp(Ity_V256);
23069 IRTemp g1 = newTemp(Ity_V256);
23070 IRTemp amt = newTemp(Ity_I64);
23071 IRTemp amt8 = newTemp(Ity_I8);
23072 if (epartIsReg(modrm)) {
23073 UInt rE = eregOfRexRM(pfx,modrm);
23074 assign( amt, getXMMRegLane64(rE, 0) );
23075 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE),
23076 nameYMMReg(rV), nameYMMReg(rG) );
23077 delta++;
23078 } else {
23079 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23080 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
23081 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
23082 delta += alen;
23084 assign( g0, getYMMReg(rV) );
23085 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
23087 shl = shr = sar = False;
23088 size = 0;
23089 switch (op) {
23090 case Iop_ShlN16x16: shl = True; size = 32; break;
23091 case Iop_ShlN32x8: shl = True; size = 32; break;
23092 case Iop_ShlN64x4: shl = True; size = 64; break;
23093 case Iop_SarN16x16: sar = True; size = 16; break;
23094 case Iop_SarN32x8: sar = True; size = 32; break;
23095 case Iop_ShrN16x16: shr = True; size = 16; break;
23096 case Iop_ShrN32x8: shr = True; size = 32; break;
23097 case Iop_ShrN64x4: shr = True; size = 64; break;
23098 default: vassert(0);
23101 if (shl || shr) {
23102 assign(
23104 IRExpr_ITE(
23105 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
23106 binop(op, mkexpr(g0), mkexpr(amt8)),
23107 binop(Iop_V128HLtoV256, mkV128(0), mkV128(0))
23110 } else
23111 if (sar) {
23112 assign(
23114 IRExpr_ITE(
23115 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
23116 binop(op, mkexpr(g0), mkexpr(amt8)),
23117 binop(op, mkexpr(g0), mkU8(size-1))
23120 } else {
23121 vassert(0);
23124 putYMMReg( rG, mkexpr(g1) );
23125 return delta;
23129 /* Vector by vector shift of V by the amount specified at the bottom
23130 of E. Vector by vector shifts are defined for all shift amounts,
23131 so not using Iop_S*x* here (and SSE2 doesn't support variable shifts
23132 anyway). */
23133 static ULong dis_AVX_var_shiftV_byE ( const VexAbiInfo* vbi,
23134 Prefix pfx, Long delta,
23135 const HChar* opname, IROp op, Bool isYMM )
23137 HChar dis_buf[50];
23138 Int alen, size, i;
23139 IRTemp addr;
23140 UChar modrm = getUChar(delta);
23141 UInt rG = gregOfRexRM(pfx,modrm);
23142 UInt rV = getVexNvvvv(pfx);;
23143 IRTemp sV = isYMM ? newTemp(Ity_V256) : newTemp(Ity_V128);
23144 IRTemp amt = isYMM ? newTemp(Ity_V256) : newTemp(Ity_V128);
23145 IRTemp amts[8], sVs[8], res[8];
23146 if (epartIsReg(modrm)) {
23147 UInt rE = eregOfRexRM(pfx,modrm);
23148 assign( amt, isYMM ? getYMMReg(rE) : getXMMReg(rE) );
23149 if (isYMM) {
23150 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rE),
23151 nameYMMReg(rV), nameYMMReg(rG) );
23152 } else {
23153 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE),
23154 nameXMMReg(rV), nameXMMReg(rG) );
23156 delta++;
23157 } else {
23158 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23159 assign( amt, loadLE(isYMM ? Ity_V256 : Ity_V128, mkexpr(addr)) );
23160 if (isYMM) {
23161 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV),
23162 nameYMMReg(rG) );
23163 } else {
23164 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV),
23165 nameXMMReg(rG) );
23167 delta += alen;
23169 assign( sV, isYMM ? getYMMReg(rV) : getXMMReg(rV) );
23171 size = 0;
23172 switch (op) {
23173 case Iop_Shl32: size = 32; break;
23174 case Iop_Shl64: size = 64; break;
23175 case Iop_Sar32: size = 32; break;
23176 case Iop_Shr32: size = 32; break;
23177 case Iop_Shr64: size = 64; break;
23178 default: vassert(0);
23181 for (i = 0; i < 8; i++) {
23182 sVs[i] = IRTemp_INVALID;
23183 amts[i] = IRTemp_INVALID;
23185 switch (size) {
23186 case 32:
23187 if (isYMM) {
23188 breakupV256to32s( sV, &sVs[7], &sVs[6], &sVs[5], &sVs[4],
23189 &sVs[3], &sVs[2], &sVs[1], &sVs[0] );
23190 breakupV256to32s( amt, &amts[7], &amts[6], &amts[5], &amts[4],
23191 &amts[3], &amts[2], &amts[1], &amts[0] );
23192 } else {
23193 breakupV128to32s( sV, &sVs[3], &sVs[2], &sVs[1], &sVs[0] );
23194 breakupV128to32s( amt, &amts[3], &amts[2], &amts[1], &amts[0] );
23196 break;
23197 case 64:
23198 if (isYMM) {
23199 breakupV256to64s( sV, &sVs[3], &sVs[2], &sVs[1], &sVs[0] );
23200 breakupV256to64s( amt, &amts[3], &amts[2], &amts[1], &amts[0] );
23201 } else {
23202 breakupV128to64s( sV, &sVs[1], &sVs[0] );
23203 breakupV128to64s( amt, &amts[1], &amts[0] );
23205 break;
23206 default: vassert(0);
23208 for (i = 0; i < 8; i++)
23209 if (sVs[i] != IRTemp_INVALID) {
23210 res[i] = size == 32 ? newTemp(Ity_I32) : newTemp(Ity_I64);
23211 assign( res[i],
23212 IRExpr_ITE(
23213 binop(size == 32 ? Iop_CmpLT32U : Iop_CmpLT64U,
23214 mkexpr(amts[i]),
23215 size == 32 ? mkU32(size) : mkU64(size)),
23216 binop(op, mkexpr(sVs[i]),
23217 unop(size == 32 ? Iop_32to8 : Iop_64to8,
23218 mkexpr(amts[i]))),
23219 op == Iop_Sar32 ? binop(op, mkexpr(sVs[i]), mkU8(size-1))
23220 : size == 32 ? mkU32(0) : mkU64(0)
23223 switch (size) {
23224 case 32:
23225 for (i = 0; i < 8; i++)
23226 putYMMRegLane32( rG, i, (i < 4 || isYMM)
23227 ? mkexpr(res[i]) : mkU32(0) );
23228 break;
23229 case 64:
23230 for (i = 0; i < 4; i++)
23231 putYMMRegLane64( rG, i, (i < 2 || isYMM)
23232 ? mkexpr(res[i]) : mkU64(0) );
23233 break;
23234 default: vassert(0);
23237 return delta;
23241 /* Vector by scalar shift of E into V, by an immediate byte. Modified
23242 version of dis_SSE_shiftE_imm. */
23243 static
23244 Long dis_AVX128_shiftE_to_V_imm( Prefix pfx,
23245 Long delta, const HChar* opname, IROp op )
23247 Bool shl, shr, sar;
23248 UChar rm = getUChar(delta);
23249 IRTemp e0 = newTemp(Ity_V128);
23250 IRTemp e1 = newTemp(Ity_V128);
23251 UInt rD = getVexNvvvv(pfx);
23252 UChar amt, size;
23253 vassert(epartIsReg(rm));
23254 vassert(gregLO3ofRM(rm) == 2
23255 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
23256 amt = getUChar(delta+1);
23257 delta += 2;
23258 DIP("%s $%d,%s,%s\n", opname,
23259 (Int)amt,
23260 nameXMMReg(eregOfRexRM(pfx,rm)),
23261 nameXMMReg(rD));
23262 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) );
23264 shl = shr = sar = False;
23265 size = 0;
23266 switch (op) {
23267 case Iop_ShlN16x8: shl = True; size = 16; break;
23268 case Iop_ShlN32x4: shl = True; size = 32; break;
23269 case Iop_ShlN64x2: shl = True; size = 64; break;
23270 case Iop_SarN16x8: sar = True; size = 16; break;
23271 case Iop_SarN32x4: sar = True; size = 32; break;
23272 case Iop_ShrN16x8: shr = True; size = 16; break;
23273 case Iop_ShrN32x4: shr = True; size = 32; break;
23274 case Iop_ShrN64x2: shr = True; size = 64; break;
23275 default: vassert(0);
23278 if (shl || shr) {
23279 assign( e1, amt >= size
23280 ? mkV128(0x0000)
23281 : binop(op, mkexpr(e0), mkU8(amt))
23283 } else
23284 if (sar) {
23285 assign( e1, amt >= size
23286 ? binop(op, mkexpr(e0), mkU8(size-1))
23287 : binop(op, mkexpr(e0), mkU8(amt))
23289 } else {
23290 vassert(0);
23293 putYMMRegLoAndZU( rD, mkexpr(e1) );
23294 return delta;
23298 /* Vector by scalar shift of E into V, by an immediate byte. Modified
23299 version of dis_AVX128_shiftE_to_V_imm. */
23300 static
23301 Long dis_AVX256_shiftE_to_V_imm( Prefix pfx,
23302 Long delta, const HChar* opname, IROp op )
23304 Bool shl, shr, sar;
23305 UChar rm = getUChar(delta);
23306 IRTemp e0 = newTemp(Ity_V256);
23307 IRTemp e1 = newTemp(Ity_V256);
23308 UInt rD = getVexNvvvv(pfx);
23309 UChar amt, size;
23310 vassert(epartIsReg(rm));
23311 vassert(gregLO3ofRM(rm) == 2
23312 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
23313 amt = getUChar(delta+1);
23314 delta += 2;
23315 DIP("%s $%d,%s,%s\n", opname,
23316 (Int)amt,
23317 nameYMMReg(eregOfRexRM(pfx,rm)),
23318 nameYMMReg(rD));
23319 assign( e0, getYMMReg(eregOfRexRM(pfx,rm)) );
23321 shl = shr = sar = False;
23322 size = 0;
23323 switch (op) {
23324 case Iop_ShlN16x16: shl = True; size = 16; break;
23325 case Iop_ShlN32x8: shl = True; size = 32; break;
23326 case Iop_ShlN64x4: shl = True; size = 64; break;
23327 case Iop_SarN16x16: sar = True; size = 16; break;
23328 case Iop_SarN32x8: sar = True; size = 32; break;
23329 case Iop_ShrN16x16: shr = True; size = 16; break;
23330 case Iop_ShrN32x8: shr = True; size = 32; break;
23331 case Iop_ShrN64x4: shr = True; size = 64; break;
23332 default: vassert(0);
23336 if (shl || shr) {
23337 assign( e1, amt >= size
23338 ? binop(Iop_V128HLtoV256, mkV128(0), mkV128(0))
23339 : binop(op, mkexpr(e0), mkU8(amt))
23341 } else
23342 if (sar) {
23343 assign( e1, amt >= size
23344 ? binop(op, mkexpr(e0), mkU8(size-1))
23345 : binop(op, mkexpr(e0), mkU8(amt))
23347 } else {
23348 vassert(0);
23351 putYMMReg( rD, mkexpr(e1) );
23352 return delta;
23356 /* Lower 64-bit lane only AVX128 binary operation:
23357 G[63:0] = V[63:0] `op` E[63:0]
23358 G[127:64] = V[127:64]
23359 G[255:128] = 0.
23360 The specified op must be of the 64F0x2 kind, so that it
23361 copies the upper half of the left operand to the result.
23363 static Long dis_AVX128_E_V_to_G_lo64 ( /*OUT*/Bool* uses_vvvv,
23364 const VexAbiInfo* vbi,
23365 Prefix pfx, Long delta,
23366 const HChar* opname, IROp op )
23368 HChar dis_buf[50];
23369 Int alen;
23370 IRTemp addr;
23371 UChar rm = getUChar(delta);
23372 UInt rG = gregOfRexRM(pfx,rm);
23373 UInt rV = getVexNvvvv(pfx);
23374 IRExpr* vpart = getXMMReg(rV);
23375 if (epartIsReg(rm)) {
23376 UInt rE = eregOfRexRM(pfx,rm);
23377 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) );
23378 DIP("%s %s,%s,%s\n", opname,
23379 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23380 delta = delta+1;
23381 } else {
23382 /* We can only do a 64-bit memory read, so the upper half of the
23383 E operand needs to be made simply of zeroes. */
23384 IRTemp epart = newTemp(Ity_V128);
23385 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23386 assign( epart, unop( Iop_64UtoV128,
23387 loadLE(Ity_I64, mkexpr(addr))) );
23388 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) );
23389 DIP("%s %s,%s,%s\n", opname,
23390 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23391 delta = delta+alen;
23393 putYMMRegLane128( rG, 1, mkV128(0) );
23394 *uses_vvvv = True;
23395 return delta;
23399 /* Lower 64-bit lane only AVX128 unary operation:
23400 G[63:0] = op(E[63:0])
23401 G[127:64] = V[127:64]
23402 G[255:128] = 0
23403 The specified op must be of the 64F0x2 kind, so that it
23404 copies the upper half of the operand to the result.
23406 static Long dis_AVX128_E_V_to_G_lo64_unary ( /*OUT*/Bool* uses_vvvv,
23407 const VexAbiInfo* vbi,
23408 Prefix pfx, Long delta,
23409 const HChar* opname, IROp op )
23411 HChar dis_buf[50];
23412 Int alen;
23413 IRTemp addr;
23414 UChar rm = getUChar(delta);
23415 UInt rG = gregOfRexRM(pfx,rm);
23416 UInt rV = getVexNvvvv(pfx);
23417 IRTemp e64 = newTemp(Ity_I64);
23419 /* Fetch E[63:0] */
23420 if (epartIsReg(rm)) {
23421 UInt rE = eregOfRexRM(pfx,rm);
23422 assign(e64, getXMMRegLane64(rE, 0));
23423 DIP("%s %s,%s,%s\n", opname,
23424 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23425 delta += 1;
23426 } else {
23427 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23428 assign(e64, loadLE(Ity_I64, mkexpr(addr)));
23429 DIP("%s %s,%s,%s\n", opname,
23430 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23431 delta += alen;
23434 /* Create a value 'arg' as V[127:64]++E[63:0] */
23435 IRTemp arg = newTemp(Ity_V128);
23436 assign(arg,
23437 binop(Iop_SetV128lo64,
23438 getXMMReg(rV), mkexpr(e64)));
23439 /* and apply op to it */
23440 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) );
23441 *uses_vvvv = True;
23442 return delta;
23446 /* Lower 32-bit lane only AVX128 unary operation:
23447 G[31:0] = op(E[31:0])
23448 G[127:32] = V[127:32]
23449 G[255:128] = 0
23450 The specified op must be of the 32F0x4 kind, so that it
23451 copies the upper 3/4 of the operand to the result.
23453 static Long dis_AVX128_E_V_to_G_lo32_unary ( /*OUT*/Bool* uses_vvvv,
23454 const VexAbiInfo* vbi,
23455 Prefix pfx, Long delta,
23456 const HChar* opname, IROp op )
23458 HChar dis_buf[50];
23459 Int alen;
23460 IRTemp addr;
23461 UChar rm = getUChar(delta);
23462 UInt rG = gregOfRexRM(pfx,rm);
23463 UInt rV = getVexNvvvv(pfx);
23464 IRTemp e32 = newTemp(Ity_I32);
23466 /* Fetch E[31:0] */
23467 if (epartIsReg(rm)) {
23468 UInt rE = eregOfRexRM(pfx,rm);
23469 assign(e32, getXMMRegLane32(rE, 0));
23470 DIP("%s %s,%s,%s\n", opname,
23471 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23472 delta += 1;
23473 } else {
23474 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23475 assign(e32, loadLE(Ity_I32, mkexpr(addr)));
23476 DIP("%s %s,%s,%s\n", opname,
23477 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23478 delta += alen;
23481 /* Create a value 'arg' as V[127:32]++E[31:0] */
23482 IRTemp arg = newTemp(Ity_V128);
23483 assign(arg,
23484 binop(Iop_SetV128lo32,
23485 getXMMReg(rV), mkexpr(e32)));
23486 /* and apply op to it */
23487 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) );
23488 *uses_vvvv = True;
23489 return delta;
23493 /* Lower 32-bit lane only AVX128 binary operation:
23494 G[31:0] = V[31:0] `op` E[31:0]
23495 G[127:32] = V[127:32]
23496 G[255:128] = 0.
23497 The specified op must be of the 32F0x4 kind, so that it
23498 copies the upper 3/4 of the left operand to the result.
23500 static Long dis_AVX128_E_V_to_G_lo32 ( /*OUT*/Bool* uses_vvvv,
23501 const VexAbiInfo* vbi,
23502 Prefix pfx, Long delta,
23503 const HChar* opname, IROp op )
23505 HChar dis_buf[50];
23506 Int alen;
23507 IRTemp addr;
23508 UChar rm = getUChar(delta);
23509 UInt rG = gregOfRexRM(pfx,rm);
23510 UInt rV = getVexNvvvv(pfx);
23511 IRExpr* vpart = getXMMReg(rV);
23512 if (epartIsReg(rm)) {
23513 UInt rE = eregOfRexRM(pfx,rm);
23514 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) );
23515 DIP("%s %s,%s,%s\n", opname,
23516 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23517 delta = delta+1;
23518 } else {
23519 /* We can only do a 32-bit memory read, so the upper 3/4 of the
23520 E operand needs to be made simply of zeroes. */
23521 IRTemp epart = newTemp(Ity_V128);
23522 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23523 assign( epart, unop( Iop_32UtoV128,
23524 loadLE(Ity_I32, mkexpr(addr))) );
23525 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) );
23526 DIP("%s %s,%s,%s\n", opname,
23527 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23528 delta = delta+alen;
23530 putYMMRegLane128( rG, 1, mkV128(0) );
23531 *uses_vvvv = True;
23532 return delta;
23536 /* All-lanes AVX128 binary operation:
23537 G[127:0] = V[127:0] `op` E[127:0]
23538 G[255:128] = 0.
23540 static Long dis_AVX128_E_V_to_G ( /*OUT*/Bool* uses_vvvv,
23541 const VexAbiInfo* vbi,
23542 Prefix pfx, Long delta,
23543 const HChar* opname, IROp op )
23545 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
23546 uses_vvvv, vbi, pfx, delta, opname, op,
23547 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/
23552 /* Handles AVX128 32F/64F comparisons. A derivative of
23553 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
23554 original delta to indicate failure. */
23555 static
23556 Long dis_AVX128_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv,
23557 const VexAbiInfo* vbi,
23558 Prefix pfx, Long delta,
23559 const HChar* opname, Bool all_lanes, Int sz )
23561 vassert(sz == 4 || sz == 8);
23562 Long deltaIN = delta;
23563 HChar dis_buf[50];
23564 Int alen;
23565 UInt imm8;
23566 IRTemp addr;
23567 Bool preZero = False;
23568 Bool preSwap = False;
23569 IROp op = Iop_INVALID;
23570 Bool postNot = False;
23571 IRTemp plain = newTemp(Ity_V128);
23572 UChar rm = getUChar(delta);
23573 UInt rG = gregOfRexRM(pfx, rm);
23574 UInt rV = getVexNvvvv(pfx);
23575 IRTemp argL = newTemp(Ity_V128);
23576 IRTemp argR = newTemp(Ity_V128);
23578 assign(argL, getXMMReg(rV));
23579 if (epartIsReg(rm)) {
23580 imm8 = getUChar(delta+1);
23581 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot,
23582 imm8, all_lanes, sz);
23583 if (!ok) return deltaIN; /* FAIL */
23584 UInt rE = eregOfRexRM(pfx,rm);
23585 assign(argR, getXMMReg(rE));
23586 delta += 1+1;
23587 DIP("%s $%u,%s,%s,%s\n",
23588 opname, imm8,
23589 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23590 } else {
23591 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
23592 imm8 = getUChar(delta+alen);
23593 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot,
23594 imm8, all_lanes, sz);
23595 if (!ok) return deltaIN; /* FAIL */
23596 assign(argR,
23597 all_lanes ? loadLE(Ity_V128, mkexpr(addr))
23598 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
23599 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr))));
23600 delta += alen+1;
23601 DIP("%s $%u,%s,%s,%s\n",
23602 opname, imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23605 IRTemp argMask = newTemp(Ity_V128);
23606 if (preZero) {
23607 // In this case, preSwap is irrelevant, but it's harmless to honour it
23608 // anyway.
23609 assign(argMask, mkV128(all_lanes ? 0x0000 : (sz==4 ? 0xFFF0 : 0xFF00)));
23610 } else {
23611 assign(argMask, mkV128(0xFFFF));
23614 assign(
23615 plain,
23616 preSwap ? binop(op, binop(Iop_AndV128, mkexpr(argR), mkexpr(argMask)),
23617 binop(Iop_AndV128, mkexpr(argL), mkexpr(argMask)))
23618 : binop(op, binop(Iop_AndV128, mkexpr(argL), mkexpr(argMask)),
23619 binop(Iop_AndV128, mkexpr(argR), mkexpr(argMask)))
23622 if (all_lanes) {
23623 /* This is simple: just invert the result, if necessary, and
23624 have done. */
23625 if (postNot) {
23626 putYMMRegLoAndZU( rG, unop(Iop_NotV128, mkexpr(plain)) );
23627 } else {
23628 putYMMRegLoAndZU( rG, mkexpr(plain) );
23631 else
23632 if (!preSwap) {
23633 /* More complex. It's a one-lane-only, hence need to possibly
23634 invert only that one lane. But at least the other lanes are
23635 correctly "in" the result, having been copied from the left
23636 operand (argL). */
23637 if (postNot) {
23638 IRExpr* mask = mkV128(sz==4 ? 0x000F : 0x00FF);
23639 putYMMRegLoAndZU( rG, binop(Iop_XorV128, mkexpr(plain),
23640 mask) );
23641 } else {
23642 putYMMRegLoAndZU( rG, mkexpr(plain) );
23645 else {
23646 /* This is the most complex case. One-lane-only, but the args
23647 were swapped. So we have to possibly invert the bottom lane,
23648 and (definitely) we have to copy the upper lane(s) from argL
23649 since, due to the swapping, what's currently there is from
23650 argR, which is not correct. */
23651 IRTemp res = newTemp(Ity_V128);
23652 IRTemp mask = newTemp(Ity_V128);
23653 IRTemp notMask = newTemp(Ity_V128);
23654 assign(mask, mkV128(sz==4 ? 0x000F : 0x00FF));
23655 assign(notMask, mkV128(sz==4 ? 0xFFF0 : 0xFF00));
23656 if (postNot) {
23657 assign(res,
23658 binop(Iop_OrV128,
23659 binop(Iop_AndV128,
23660 unop(Iop_NotV128, mkexpr(plain)),
23661 mkexpr(mask)),
23662 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask))));
23663 } else {
23664 assign(res,
23665 binop(Iop_OrV128,
23666 binop(Iop_AndV128,
23667 mkexpr(plain),
23668 mkexpr(mask)),
23669 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask))));
23671 putYMMRegLoAndZU( rG, mkexpr(res) );
23674 *uses_vvvv = True;
23675 return delta;
23679 /* Handles AVX256 32F/64F comparisons. A derivative of
23680 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
23681 original delta to indicate failure. */
23682 static
23683 Long dis_AVX256_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv,
23684 const VexAbiInfo* vbi,
23685 Prefix pfx, Long delta,
23686 const HChar* opname, Int sz )
23688 vassert(sz == 4 || sz == 8);
23689 Long deltaIN = delta;
23690 HChar dis_buf[50];
23691 Int alen;
23692 UInt imm8;
23693 IRTemp addr;
23694 Bool preZero = False;
23695 Bool preSwap = False;
23696 IROp op = Iop_INVALID;
23697 Bool postNot = False;
23698 IRTemp plain = newTemp(Ity_V256);
23699 UChar rm = getUChar(delta);
23700 UInt rG = gregOfRexRM(pfx, rm);
23701 UInt rV = getVexNvvvv(pfx);
23702 IRTemp argL = newTemp(Ity_V256);
23703 IRTemp argR = newTemp(Ity_V256);
23704 IRTemp argLhi = IRTemp_INVALID;
23705 IRTemp argLlo = IRTemp_INVALID;
23706 IRTemp argRhi = IRTemp_INVALID;
23707 IRTemp argRlo = IRTemp_INVALID;
23709 assign(argL, getYMMReg(rV));
23710 if (epartIsReg(rm)) {
23711 imm8 = getUChar(delta+1);
23712 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot, imm8,
23713 True/*all_lanes*/, sz);
23714 if (!ok) return deltaIN; /* FAIL */
23715 UInt rE = eregOfRexRM(pfx,rm);
23716 assign(argR, getYMMReg(rE));
23717 delta += 1+1;
23718 DIP("%s $%u,%s,%s,%s\n",
23719 opname, imm8,
23720 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
23721 } else {
23722 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
23723 imm8 = getUChar(delta+alen);
23724 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot, imm8,
23725 True/*all_lanes*/, sz);
23726 if (!ok) return deltaIN; /* FAIL */
23727 assign(argR, loadLE(Ity_V256, mkexpr(addr)) );
23728 delta += alen+1;
23729 DIP("%s $%u,%s,%s,%s\n",
23730 opname, imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
23733 breakupV256toV128s( preSwap ? argR : argL, &argLhi, &argLlo );
23734 breakupV256toV128s( preSwap ? argL : argR, &argRhi, &argRlo );
23736 IRTemp argMask = newTemp(Ity_V128);
23737 if (preZero) {
23738 // In this case, preSwap is irrelevant, but it's harmless to honour it
23739 // anyway.
23740 assign(argMask, mkV128(0x0000));
23741 } else {
23742 assign(argMask, mkV128(0xFFFF));
23745 assign(
23746 plain,
23747 binop( Iop_V128HLtoV256,
23748 binop(op, binop(Iop_AndV128, mkexpr(argLhi), mkexpr(argMask)),
23749 binop(Iop_AndV128, mkexpr(argRhi), mkexpr(argMask))),
23750 binop(op, binop(Iop_AndV128, mkexpr(argLlo), mkexpr(argMask)),
23751 binop(Iop_AndV128, mkexpr(argRlo), mkexpr(argMask))))
23754 /* This is simple: just invert the result, if necessary, and
23755 have done. */
23756 if (postNot) {
23757 putYMMReg( rG, unop(Iop_NotV256, mkexpr(plain)) );
23758 } else {
23759 putYMMReg( rG, mkexpr(plain) );
23762 *uses_vvvv = True;
23763 return delta;
23767 /* Handles AVX128 unary E-to-G all-lanes operations. */
23768 static
23769 Long dis_AVX128_E_to_G_unary ( /*OUT*/Bool* uses_vvvv,
23770 const VexAbiInfo* vbi,
23771 Prefix pfx, Long delta,
23772 const HChar* opname,
23773 IRTemp (*opFn)(IRTemp) )
23775 HChar dis_buf[50];
23776 Int alen;
23777 IRTemp addr;
23778 IRTemp res = newTemp(Ity_V128);
23779 IRTemp arg = newTemp(Ity_V128);
23780 UChar rm = getUChar(delta);
23781 UInt rG = gregOfRexRM(pfx, rm);
23782 if (epartIsReg(rm)) {
23783 UInt rE = eregOfRexRM(pfx,rm);
23784 assign(arg, getXMMReg(rE));
23785 delta += 1;
23786 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG));
23787 } else {
23788 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23789 assign(arg, loadLE(Ity_V128, mkexpr(addr)));
23790 delta += alen;
23791 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG));
23793 res = opFn(arg);
23794 putYMMRegLoAndZU( rG, mkexpr(res) );
23795 *uses_vvvv = False;
23796 return delta;
23800 /* Handles AVX128 unary E-to-G all-lanes operations. */
23801 static
23802 Long dis_AVX128_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv,
23803 const VexAbiInfo* vbi,
23804 Prefix pfx, Long delta,
23805 const HChar* opname, IROp op )
23807 HChar dis_buf[50];
23808 Int alen;
23809 IRTemp addr;
23810 IRTemp arg = newTemp(Ity_V128);
23811 UChar rm = getUChar(delta);
23812 UInt rG = gregOfRexRM(pfx, rm);
23813 if (epartIsReg(rm)) {
23814 UInt rE = eregOfRexRM(pfx,rm);
23815 assign(arg, getXMMReg(rE));
23816 delta += 1;
23817 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG));
23818 } else {
23819 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23820 assign(arg, loadLE(Ity_V128, mkexpr(addr)));
23821 delta += alen;
23822 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG));
23824 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
23825 // up in the usual way.
23826 Bool needsIRRM = op == Iop_Sqrt32Fx4 || op == Iop_Sqrt64Fx2;
23827 /* XXXROUNDINGFIXME */
23828 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), mkexpr(arg))
23829 : unop(op, mkexpr(arg));
23830 putYMMRegLoAndZU( rG, res );
23831 *uses_vvvv = False;
23832 return delta;
23836 /* FIXME: common up with the _128_ version above? */
23837 static
23838 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG (
23839 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
23840 Prefix pfx, Long delta, const HChar* name,
23841 /* The actual operation. Use either 'op' or 'opfn',
23842 but not both. */
23843 IROp op, IRTemp(*opFn)(IRTemp,IRTemp),
23844 Bool invertLeftArg,
23845 Bool swapArgs
23848 UChar modrm = getUChar(delta);
23849 UInt rD = gregOfRexRM(pfx, modrm);
23850 UInt rSL = getVexNvvvv(pfx);
23851 IRTemp tSL = newTemp(Ity_V256);
23852 IRTemp tSR = newTemp(Ity_V256);
23853 IRTemp addr = IRTemp_INVALID;
23854 HChar dis_buf[50];
23855 Int alen = 0;
23856 vassert(1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*WIG?*/);
23858 assign(tSL, invertLeftArg ? unop(Iop_NotV256, getYMMReg(rSL))
23859 : getYMMReg(rSL));
23861 if (epartIsReg(modrm)) {
23862 UInt rSR = eregOfRexRM(pfx, modrm);
23863 delta += 1;
23864 assign(tSR, getYMMReg(rSR));
23865 DIP("%s %s,%s,%s\n",
23866 name, nameYMMReg(rSR), nameYMMReg(rSL), nameYMMReg(rD));
23867 } else {
23868 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
23869 delta += alen;
23870 assign(tSR, loadLE(Ity_V256, mkexpr(addr)));
23871 DIP("%s %s,%s,%s\n",
23872 name, dis_buf, nameYMMReg(rSL), nameYMMReg(rD));
23875 IRTemp res = IRTemp_INVALID;
23876 if (op != Iop_INVALID) {
23877 vassert(opFn == NULL);
23878 res = newTemp(Ity_V256);
23879 if (requiresRMode(op)) {
23880 IRTemp rm = newTemp(Ity_I32);
23881 assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
23882 assign(res, swapArgs
23883 ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL))
23884 : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR)));
23885 } else {
23886 assign(res, swapArgs
23887 ? binop(op, mkexpr(tSR), mkexpr(tSL))
23888 : binop(op, mkexpr(tSL), mkexpr(tSR)));
23890 } else {
23891 vassert(opFn != NULL);
23892 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR);
23895 putYMMReg(rD, mkexpr(res));
23897 *uses_vvvv = True;
23898 return delta;
23902 /* All-lanes AVX256 binary operation:
23903 G[255:0] = V[255:0] `op` E[255:0]
23905 static Long dis_AVX256_E_V_to_G ( /*OUT*/Bool* uses_vvvv,
23906 const VexAbiInfo* vbi,
23907 Prefix pfx, Long delta,
23908 const HChar* opname, IROp op )
23910 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23911 uses_vvvv, vbi, pfx, delta, opname, op,
23912 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/
23917 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, with a simple IROp
23918 for the operation, no inversion of the left arg, and no swapping of
23919 args. */
23920 static
23921 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple (
23922 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
23923 Prefix pfx, Long delta, const HChar* name,
23924 IROp op
23927 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23928 uses_vvvv, vbi, pfx, delta, name, op, NULL, False, False);
23932 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, using the given IR
23933 generator to compute the result, no inversion of the left
23934 arg, and no swapping of args. */
23935 static
23936 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex (
23937 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
23938 Prefix pfx, Long delta, const HChar* name,
23939 IRTemp(*opFn)(IRTemp,IRTemp)
23942 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23943 uses_vvvv, vbi, pfx, delta, name,
23944 Iop_INVALID, opFn, False, False );
23948 /* Handles AVX256 unary E-to-G all-lanes operations. */
23949 static
23950 Long dis_AVX256_E_to_G_unary ( /*OUT*/Bool* uses_vvvv,
23951 const VexAbiInfo* vbi,
23952 Prefix pfx, Long delta,
23953 const HChar* opname,
23954 IRTemp (*opFn)(IRTemp) )
23956 HChar dis_buf[50];
23957 Int alen;
23958 IRTemp addr;
23959 IRTemp res = newTemp(Ity_V256);
23960 IRTemp arg = newTemp(Ity_V256);
23961 UChar rm = getUChar(delta);
23962 UInt rG = gregOfRexRM(pfx, rm);
23963 if (epartIsReg(rm)) {
23964 UInt rE = eregOfRexRM(pfx,rm);
23965 assign(arg, getYMMReg(rE));
23966 delta += 1;
23967 DIP("%s %s,%s\n", opname, nameYMMReg(rE), nameYMMReg(rG));
23968 } else {
23969 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23970 assign(arg, loadLE(Ity_V256, mkexpr(addr)));
23971 delta += alen;
23972 DIP("%s %s,%s\n", opname, dis_buf, nameYMMReg(rG));
23974 res = opFn(arg);
23975 putYMMReg( rG, mkexpr(res) );
23976 *uses_vvvv = False;
23977 return delta;
23981 /* Handles AVX256 unary E-to-G all-lanes operations. */
23982 static
23983 Long dis_AVX256_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv,
23984 const VexAbiInfo* vbi,
23985 Prefix pfx, Long delta,
23986 const HChar* opname, IROp op )
23988 HChar dis_buf[50];
23989 Int alen;
23990 IRTemp addr;
23991 IRTemp arg = newTemp(Ity_V256);
23992 UChar rm = getUChar(delta);
23993 UInt rG = gregOfRexRM(pfx, rm);
23994 if (epartIsReg(rm)) {
23995 UInt rE = eregOfRexRM(pfx,rm);
23996 assign(arg, getYMMReg(rE));
23997 delta += 1;
23998 DIP("%s %s,%s\n", opname, nameYMMReg(rE), nameYMMReg(rG));
23999 } else {
24000 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24001 assign(arg, loadLE(Ity_V256, mkexpr(addr)));
24002 delta += alen;
24003 DIP("%s %s,%s\n", opname, dis_buf, nameYMMReg(rG));
24005 putYMMReg( rG, unop(op, mkexpr(arg)) );
24006 *uses_vvvv = False;
24007 return delta;
24011 /* The use of ReinterpF64asI64 is ugly. Surely could do better if we
24012 had a variant of Iop_64x4toV256 that took F64s as args instead. */
24013 static Long dis_CVTDQ2PD_256 ( const VexAbiInfo* vbi, Prefix pfx,
24014 Long delta )
24016 IRTemp addr = IRTemp_INVALID;
24017 Int alen = 0;
24018 HChar dis_buf[50];
24019 UChar modrm = getUChar(delta);
24020 IRTemp sV = newTemp(Ity_V128);
24021 UInt rG = gregOfRexRM(pfx,modrm);
24022 if (epartIsReg(modrm)) {
24023 UInt rE = eregOfRexRM(pfx,modrm);
24024 assign( sV, getXMMReg(rE) );
24025 delta += 1;
24026 DIP("vcvtdq2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
24027 } else {
24028 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24029 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
24030 delta += alen;
24031 DIP("vcvtdq2pd %s,%s\n", dis_buf, nameYMMReg(rG) );
24033 IRTemp s3, s2, s1, s0;
24034 s3 = s2 = s1 = s0 = IRTemp_INVALID;
24035 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
24036 IRExpr* res
24037 = IRExpr_Qop(
24038 Iop_64x4toV256,
24039 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s3))),
24040 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s2))),
24041 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s1))),
24042 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s0)))
24044 putYMMReg(rG, res);
24045 return delta;
24049 static Long dis_CVTPD2PS_256 ( const VexAbiInfo* vbi, Prefix pfx,
24050 Long delta )
24052 IRTemp addr = IRTemp_INVALID;
24053 Int alen = 0;
24054 HChar dis_buf[50];
24055 UChar modrm = getUChar(delta);
24056 UInt rG = gregOfRexRM(pfx,modrm);
24057 IRTemp argV = newTemp(Ity_V256);
24058 IRTemp rmode = newTemp(Ity_I32);
24059 if (epartIsReg(modrm)) {
24060 UInt rE = eregOfRexRM(pfx,modrm);
24061 assign( argV, getYMMReg(rE) );
24062 delta += 1;
24063 DIP("vcvtpd2psy %s,%s\n", nameYMMReg(rE), nameXMMReg(rG));
24064 } else {
24065 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24066 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
24067 delta += alen;
24068 DIP("vcvtpd2psy %s,%s\n", dis_buf, nameXMMReg(rG) );
24071 assign( rmode, get_sse_roundingmode() );
24072 IRTemp t3, t2, t1, t0;
24073 t3 = t2 = t1 = t0 = IRTemp_INVALID;
24074 breakupV256to64s( argV, &t3, &t2, &t1, &t0 );
24075 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), \
24076 unop(Iop_ReinterpI64asF64, mkexpr(_t)) )
24077 putXMMRegLane32F( rG, 3, CVT(t3) );
24078 putXMMRegLane32F( rG, 2, CVT(t2) );
24079 putXMMRegLane32F( rG, 1, CVT(t1) );
24080 putXMMRegLane32F( rG, 0, CVT(t0) );
24081 # undef CVT
24082 putYMMRegLane128( rG, 1, mkV128(0) );
24083 return delta;
24087 static IRTemp math_VPUNPCK_YMM ( IRTemp tL, IRType tR, IROp op )
24089 IRTemp tLhi, tLlo, tRhi, tRlo;
24090 tLhi = tLlo = tRhi = tRlo = IRTemp_INVALID;
24091 IRTemp res = newTemp(Ity_V256);
24092 breakupV256toV128s( tL, &tLhi, &tLlo );
24093 breakupV256toV128s( tR, &tRhi, &tRlo );
24094 assign( res, binop( Iop_V128HLtoV256,
24095 binop( op, mkexpr(tRhi), mkexpr(tLhi) ),
24096 binop( op, mkexpr(tRlo), mkexpr(tLlo) ) ) );
24097 return res;
24101 static IRTemp math_VPUNPCKLBW_YMM ( IRTemp tL, IRTemp tR )
24103 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO8x16 );
24107 static IRTemp math_VPUNPCKLWD_YMM ( IRTemp tL, IRTemp tR )
24109 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO16x8 );
24113 static IRTemp math_VPUNPCKLDQ_YMM ( IRTemp tL, IRTemp tR )
24115 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO32x4 );
24119 static IRTemp math_VPUNPCKLQDQ_YMM ( IRTemp tL, IRTemp tR )
24121 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO64x2 );
24125 static IRTemp math_VPUNPCKHBW_YMM ( IRTemp tL, IRTemp tR )
24127 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI8x16 );
24131 static IRTemp math_VPUNPCKHWD_YMM ( IRTemp tL, IRTemp tR )
24133 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI16x8 );
24137 static IRTemp math_VPUNPCKHDQ_YMM ( IRTemp tL, IRTemp tR )
24139 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI32x4 );
24143 static IRTemp math_VPUNPCKHQDQ_YMM ( IRTemp tL, IRTemp tR )
24145 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI64x2 );
24149 static IRTemp math_VPACKSSWB_YMM ( IRTemp tL, IRTemp tR )
24151 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin16Sto8Sx16 );
24155 static IRTemp math_VPACKUSWB_YMM ( IRTemp tL, IRTemp tR )
24157 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin16Sto8Ux16 );
24161 static IRTemp math_VPACKSSDW_YMM ( IRTemp tL, IRTemp tR )
24163 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin32Sto16Sx8 );
24167 static IRTemp math_VPACKUSDW_YMM ( IRTemp tL, IRTemp tR )
24169 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin32Sto16Ux8 );
24173 __attribute__((noinline))
24174 static
24175 Long dis_ESC_0F__VEX (
24176 /*MB_OUT*/DisResult* dres,
24177 /*OUT*/ Bool* uses_vvvv,
24178 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
24179 Bool resteerCisOk,
24180 void* callback_opaque,
24181 const VexArchInfo* archinfo,
24182 const VexAbiInfo* vbi,
24183 Prefix pfx, Int sz, Long deltaIN
24186 IRTemp addr = IRTemp_INVALID;
24187 Int alen = 0;
24188 HChar dis_buf[50];
24189 Long delta = deltaIN;
24190 UChar opc = getUChar(delta);
24191 delta++;
24192 *uses_vvvv = False;
24194 switch (opc) {
24196 case 0x10:
24197 /* VMOVSD m64, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
24198 /* Move 64 bits from E (mem only) to G (lo half xmm).
24199 Bits 255-64 of the dest are zeroed out. */
24200 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) {
24201 UChar modrm = getUChar(delta);
24202 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24203 UInt rG = gregOfRexRM(pfx,modrm);
24204 IRTemp z128 = newTemp(Ity_V128);
24205 assign(z128, mkV128(0));
24206 putXMMReg( rG, mkexpr(z128) );
24207 /* FIXME: ALIGNMENT CHECK? */
24208 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) );
24209 putYMMRegLane128( rG, 1, mkexpr(z128) );
24210 DIP("vmovsd %s,%s\n", dis_buf, nameXMMReg(rG));
24211 delta += alen;
24212 goto decode_success;
24214 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
24215 /* Reg form. */
24216 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) {
24217 UChar modrm = getUChar(delta);
24218 UInt rG = gregOfRexRM(pfx, modrm);
24219 UInt rE = eregOfRexRM(pfx, modrm);
24220 UInt rV = getVexNvvvv(pfx);
24221 delta++;
24222 DIP("vmovsd %s,%s,%s\n",
24223 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
24224 IRTemp res = newTemp(Ity_V128);
24225 assign(res, binop(Iop_64HLtoV128,
24226 getXMMRegLane64(rV, 1),
24227 getXMMRegLane64(rE, 0)));
24228 putYMMRegLoAndZU(rG, mkexpr(res));
24229 *uses_vvvv = True;
24230 goto decode_success;
24232 /* VMOVSS m32, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
24233 /* Move 32 bits from E (mem only) to G (lo half xmm).
24234 Bits 255-32 of the dest are zeroed out. */
24235 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) {
24236 UChar modrm = getUChar(delta);
24237 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24238 UInt rG = gregOfRexRM(pfx,modrm);
24239 IRTemp z128 = newTemp(Ity_V128);
24240 assign(z128, mkV128(0));
24241 putXMMReg( rG, mkexpr(z128) );
24242 /* FIXME: ALIGNMENT CHECK? */
24243 putXMMRegLane32( rG, 0, loadLE(Ity_I32, mkexpr(addr)) );
24244 putYMMRegLane128( rG, 1, mkexpr(z128) );
24245 DIP("vmovss %s,%s\n", dis_buf, nameXMMReg(rG));
24246 delta += alen;
24247 goto decode_success;
24249 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
24250 /* Reg form. */
24251 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) {
24252 UChar modrm = getUChar(delta);
24253 UInt rG = gregOfRexRM(pfx, modrm);
24254 UInt rE = eregOfRexRM(pfx, modrm);
24255 UInt rV = getVexNvvvv(pfx);
24256 delta++;
24257 DIP("vmovss %s,%s,%s\n",
24258 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
24259 IRTemp res = newTemp(Ity_V128);
24260 assign( res, binop( Iop_64HLtoV128,
24261 getXMMRegLane64(rV, 1),
24262 binop(Iop_32HLto64,
24263 getXMMRegLane32(rV, 1),
24264 getXMMRegLane32(rE, 0)) ) );
24265 putYMMRegLoAndZU(rG, mkexpr(res));
24266 *uses_vvvv = True;
24267 goto decode_success;
24269 /* VMOVUPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 10 /r */
24270 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24271 UChar modrm = getUChar(delta);
24272 UInt rG = gregOfRexRM(pfx, modrm);
24273 if (epartIsReg(modrm)) {
24274 UInt rE = eregOfRexRM(pfx,modrm);
24275 putYMMRegLoAndZU( rG, getXMMReg( rE ));
24276 DIP("vmovupd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
24277 delta += 1;
24278 } else {
24279 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24280 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
24281 DIP("vmovupd %s,%s\n", dis_buf, nameXMMReg(rG));
24282 delta += alen;
24284 goto decode_success;
24286 /* VMOVUPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 10 /r */
24287 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24288 UChar modrm = getUChar(delta);
24289 UInt rG = gregOfRexRM(pfx, modrm);
24290 if (epartIsReg(modrm)) {
24291 UInt rE = eregOfRexRM(pfx,modrm);
24292 putYMMReg( rG, getYMMReg( rE ));
24293 DIP("vmovupd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
24294 delta += 1;
24295 } else {
24296 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24297 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
24298 DIP("vmovupd %s,%s\n", dis_buf, nameYMMReg(rG));
24299 delta += alen;
24301 goto decode_success;
24303 /* VMOVUPS xmm2/m128, xmm1 = VEX.128.0F.WIG 10 /r */
24304 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24305 UChar modrm = getUChar(delta);
24306 UInt rG = gregOfRexRM(pfx, modrm);
24307 if (epartIsReg(modrm)) {
24308 UInt rE = eregOfRexRM(pfx,modrm);
24309 putYMMRegLoAndZU( rG, getXMMReg( rE ));
24310 DIP("vmovups %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
24311 delta += 1;
24312 } else {
24313 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24314 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
24315 DIP("vmovups %s,%s\n", dis_buf, nameXMMReg(rG));
24316 delta += alen;
24318 goto decode_success;
24320 /* VMOVUPS ymm2/m256, ymm1 = VEX.256.0F.WIG 10 /r */
24321 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24322 UChar modrm = getUChar(delta);
24323 UInt rG = gregOfRexRM(pfx, modrm);
24324 if (epartIsReg(modrm)) {
24325 UInt rE = eregOfRexRM(pfx,modrm);
24326 putYMMReg( rG, getYMMReg( rE ));
24327 DIP("vmovups %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
24328 delta += 1;
24329 } else {
24330 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24331 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
24332 DIP("vmovups %s,%s\n", dis_buf, nameYMMReg(rG));
24333 delta += alen;
24335 goto decode_success;
24337 break;
24339 case 0x11:
24340 /* VMOVSD xmm1, m64 = VEX.LIG.F2.0F.WIG 11 /r */
24341 /* Move 64 bits from G (low half xmm) to mem only. */
24342 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) {
24343 UChar modrm = getUChar(delta);
24344 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24345 UInt rG = gregOfRexRM(pfx,modrm);
24346 /* FIXME: ALIGNMENT CHECK? */
24347 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0));
24348 DIP("vmovsd %s,%s\n", nameXMMReg(rG), dis_buf);
24349 delta += alen;
24350 goto decode_success;
24352 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 11 /r */
24353 /* Reg form. */
24354 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) {
24355 UChar modrm = getUChar(delta);
24356 UInt rG = gregOfRexRM(pfx, modrm);
24357 UInt rE = eregOfRexRM(pfx, modrm);
24358 UInt rV = getVexNvvvv(pfx);
24359 delta++;
24360 DIP("vmovsd %s,%s,%s\n",
24361 nameXMMReg(rG), nameXMMReg(rV), nameXMMReg(rE));
24362 IRTemp res = newTemp(Ity_V128);
24363 assign(res, binop(Iop_64HLtoV128,
24364 getXMMRegLane64(rV, 1),
24365 getXMMRegLane64(rG, 0)));
24366 putYMMRegLoAndZU(rE, mkexpr(res));
24367 *uses_vvvv = True;
24368 goto decode_success;
24370 /* VMOVSS xmm1, m64 = VEX.LIG.F3.0F.WIG 11 /r */
24371 /* Move 32 bits from G (low 1/4 xmm) to mem only. */
24372 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) {
24373 UChar modrm = getUChar(delta);
24374 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24375 UInt rG = gregOfRexRM(pfx,modrm);
24376 /* FIXME: ALIGNMENT CHECK? */
24377 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0));
24378 DIP("vmovss %s,%s\n", nameXMMReg(rG), dis_buf);
24379 delta += alen;
24380 goto decode_success;
24382 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 11 /r */
24383 /* Reg form. */
24384 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) {
24385 UChar modrm = getUChar(delta);
24386 UInt rG = gregOfRexRM(pfx, modrm);
24387 UInt rE = eregOfRexRM(pfx, modrm);
24388 UInt rV = getVexNvvvv(pfx);
24389 delta++;
24390 DIP("vmovss %s,%s,%s\n",
24391 nameXMMReg(rG), nameXMMReg(rV), nameXMMReg(rE));
24392 IRTemp res = newTemp(Ity_V128);
24393 assign( res, binop( Iop_64HLtoV128,
24394 getXMMRegLane64(rV, 1),
24395 binop(Iop_32HLto64,
24396 getXMMRegLane32(rV, 1),
24397 getXMMRegLane32(rG, 0)) ) );
24398 putYMMRegLoAndZU(rE, mkexpr(res));
24399 *uses_vvvv = True;
24400 goto decode_success;
24402 /* VMOVUPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 11 /r */
24403 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24404 UChar modrm = getUChar(delta);
24405 UInt rG = gregOfRexRM(pfx,modrm);
24406 if (epartIsReg(modrm)) {
24407 UInt rE = eregOfRexRM(pfx,modrm);
24408 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24409 DIP("vmovupd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24410 delta += 1;
24411 } else {
24412 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24413 storeLE( mkexpr(addr), getXMMReg(rG) );
24414 DIP("vmovupd %s,%s\n", nameXMMReg(rG), dis_buf);
24415 delta += alen;
24417 goto decode_success;
24419 /* VMOVUPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 11 /r */
24420 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24421 UChar modrm = getUChar(delta);
24422 UInt rG = gregOfRexRM(pfx,modrm);
24423 if (epartIsReg(modrm)) {
24424 UInt rE = eregOfRexRM(pfx,modrm);
24425 putYMMReg( rE, getYMMReg(rG) );
24426 DIP("vmovupd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
24427 delta += 1;
24428 } else {
24429 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24430 storeLE( mkexpr(addr), getYMMReg(rG) );
24431 DIP("vmovupd %s,%s\n", nameYMMReg(rG), dis_buf);
24432 delta += alen;
24434 goto decode_success;
24436 /* VMOVUPS xmm1, xmm2/m128 = VEX.128.0F.WIG 11 /r */
24437 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24438 UChar modrm = getUChar(delta);
24439 UInt rG = gregOfRexRM(pfx,modrm);
24440 if (epartIsReg(modrm)) {
24441 UInt rE = eregOfRexRM(pfx,modrm);
24442 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24443 DIP("vmovups %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24444 delta += 1;
24445 } else {
24446 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24447 storeLE( mkexpr(addr), getXMMReg(rG) );
24448 DIP("vmovups %s,%s\n", nameXMMReg(rG), dis_buf);
24449 delta += alen;
24451 goto decode_success;
24453 /* VMOVUPS ymm1, ymm2/m256 = VEX.256.0F.WIG 11 /r */
24454 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24455 UChar modrm = getUChar(delta);
24456 UInt rG = gregOfRexRM(pfx,modrm);
24457 if (epartIsReg(modrm)) {
24458 UInt rE = eregOfRexRM(pfx,modrm);
24459 putYMMReg( rE, getYMMReg(rG) );
24460 DIP("vmovups %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
24461 delta += 1;
24462 } else {
24463 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24464 storeLE( mkexpr(addr), getYMMReg(rG) );
24465 DIP("vmovups %s,%s\n", nameYMMReg(rG), dis_buf);
24466 delta += alen;
24468 goto decode_success;
24470 break;
24472 case 0x12:
24473 /* VMOVDDUP xmm2/m64, xmm1 = VEX.128.F2.0F.WIG /12 r */
24474 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24475 delta = dis_MOVDDUP_128( vbi, pfx, delta, True/*isAvx*/ );
24476 goto decode_success;
24478 /* VMOVDDUP ymm2/m256, ymm1 = VEX.256.F2.0F.WIG /12 r */
24479 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24480 delta = dis_MOVDDUP_256( vbi, pfx, delta );
24481 goto decode_success;
24483 /* VMOVHLPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 12 /r */
24484 /* Insn only exists in reg form */
24485 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
24486 && epartIsReg(getUChar(delta))) {
24487 UChar modrm = getUChar(delta);
24488 UInt rG = gregOfRexRM(pfx, modrm);
24489 UInt rE = eregOfRexRM(pfx, modrm);
24490 UInt rV = getVexNvvvv(pfx);
24491 delta++;
24492 DIP("vmovhlps %s,%s,%s\n",
24493 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
24494 IRTemp res = newTemp(Ity_V128);
24495 assign(res, binop(Iop_64HLtoV128,
24496 getXMMRegLane64(rV, 1),
24497 getXMMRegLane64(rE, 1)));
24498 putYMMRegLoAndZU(rG, mkexpr(res));
24499 *uses_vvvv = True;
24500 goto decode_success;
24502 /* VMOVLPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 12 /r */
24503 /* Insn exists only in mem form, it appears. */
24504 /* VMOVLPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 12 /r */
24505 /* Insn exists only in mem form, it appears. */
24506 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24507 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24508 UChar modrm = getUChar(delta);
24509 UInt rG = gregOfRexRM(pfx, modrm);
24510 UInt rV = getVexNvvvv(pfx);
24511 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24512 delta += alen;
24513 DIP("vmovlpd %s,%s,%s\n",
24514 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
24515 IRTemp res = newTemp(Ity_V128);
24516 assign(res, binop(Iop_64HLtoV128,
24517 getXMMRegLane64(rV, 1),
24518 loadLE(Ity_I64, mkexpr(addr))));
24519 putYMMRegLoAndZU(rG, mkexpr(res));
24520 *uses_vvvv = True;
24521 goto decode_success;
24523 /* VMOVSLDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 12 /r */
24524 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
24525 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/,
24526 True/*isL*/ );
24527 goto decode_success;
24529 /* VMOVSLDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 12 /r */
24530 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
24531 delta = dis_MOVSxDUP_256( vbi, pfx, delta, True/*isL*/ );
24532 goto decode_success;
24534 break;
24536 case 0x13:
24537 /* VMOVLPS xmm1, m64 = VEX.128.0F.WIG 13 /r */
24538 /* Insn exists only in mem form, it appears. */
24539 /* VMOVLPD xmm1, m64 = VEX.128.66.0F.WIG 13 /r */
24540 /* Insn exists only in mem form, it appears. */
24541 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24542 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24543 UChar modrm = getUChar(delta);
24544 UInt rG = gregOfRexRM(pfx, modrm);
24545 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24546 delta += alen;
24547 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0));
24548 DIP("vmovlpd %s,%s\n", nameXMMReg(rG), dis_buf);
24549 goto decode_success;
24551 break;
24553 case 0x14:
24554 case 0x15:
24555 /* VUNPCKLPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 14 /r */
24556 /* VUNPCKHPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 15 /r */
24557 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24558 Bool hi = opc == 0x15;
24559 UChar modrm = getUChar(delta);
24560 UInt rG = gregOfRexRM(pfx,modrm);
24561 UInt rV = getVexNvvvv(pfx);
24562 IRTemp eV = newTemp(Ity_V128);
24563 IRTemp vV = newTemp(Ity_V128);
24564 assign( vV, getXMMReg(rV) );
24565 if (epartIsReg(modrm)) {
24566 UInt rE = eregOfRexRM(pfx,modrm);
24567 assign( eV, getXMMReg(rE) );
24568 delta += 1;
24569 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
24570 nameXMMReg(rE), nameXMMReg(rG));
24571 } else {
24572 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24573 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
24574 delta += alen;
24575 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
24576 dis_buf, nameXMMReg(rG));
24578 IRTemp res = math_UNPCKxPS_128( eV, vV, hi );
24579 putYMMRegLoAndZU( rG, mkexpr(res) );
24580 *uses_vvvv = True;
24581 goto decode_success;
24583 /* VUNPCKLPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 14 /r */
24584 /* VUNPCKHPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 15 /r */
24585 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24586 Bool hi = opc == 0x15;
24587 UChar modrm = getUChar(delta);
24588 UInt rG = gregOfRexRM(pfx,modrm);
24589 UInt rV = getVexNvvvv(pfx);
24590 IRTemp eV = newTemp(Ity_V256);
24591 IRTemp vV = newTemp(Ity_V256);
24592 assign( vV, getYMMReg(rV) );
24593 if (epartIsReg(modrm)) {
24594 UInt rE = eregOfRexRM(pfx,modrm);
24595 assign( eV, getYMMReg(rE) );
24596 delta += 1;
24597 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
24598 nameYMMReg(rE), nameYMMReg(rG));
24599 } else {
24600 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24601 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
24602 delta += alen;
24603 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
24604 dis_buf, nameYMMReg(rG));
24606 IRTemp res = math_UNPCKxPS_256( eV, vV, hi );
24607 putYMMReg( rG, mkexpr(res) );
24608 *uses_vvvv = True;
24609 goto decode_success;
24611 /* VUNPCKLPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 14 /r */
24612 /* VUNPCKHPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 15 /r */
24613 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24614 Bool hi = opc == 0x15;
24615 UChar modrm = getUChar(delta);
24616 UInt rG = gregOfRexRM(pfx,modrm);
24617 UInt rV = getVexNvvvv(pfx);
24618 IRTemp eV = newTemp(Ity_V128);
24619 IRTemp vV = newTemp(Ity_V128);
24620 assign( vV, getXMMReg(rV) );
24621 if (epartIsReg(modrm)) {
24622 UInt rE = eregOfRexRM(pfx,modrm);
24623 assign( eV, getXMMReg(rE) );
24624 delta += 1;
24625 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
24626 nameXMMReg(rE), nameXMMReg(rG));
24627 } else {
24628 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24629 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
24630 delta += alen;
24631 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
24632 dis_buf, nameXMMReg(rG));
24634 IRTemp res = math_UNPCKxPD_128( eV, vV, hi );
24635 putYMMRegLoAndZU( rG, mkexpr(res) );
24636 *uses_vvvv = True;
24637 goto decode_success;
24639 /* VUNPCKLPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 14 /r */
24640 /* VUNPCKHPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 15 /r */
24641 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24642 Bool hi = opc == 0x15;
24643 UChar modrm = getUChar(delta);
24644 UInt rG = gregOfRexRM(pfx,modrm);
24645 UInt rV = getVexNvvvv(pfx);
24646 IRTemp eV = newTemp(Ity_V256);
24647 IRTemp vV = newTemp(Ity_V256);
24648 assign( vV, getYMMReg(rV) );
24649 if (epartIsReg(modrm)) {
24650 UInt rE = eregOfRexRM(pfx,modrm);
24651 assign( eV, getYMMReg(rE) );
24652 delta += 1;
24653 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
24654 nameYMMReg(rE), nameYMMReg(rG));
24655 } else {
24656 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24657 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
24658 delta += alen;
24659 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
24660 dis_buf, nameYMMReg(rG));
24662 IRTemp res = math_UNPCKxPD_256( eV, vV, hi );
24663 putYMMReg( rG, mkexpr(res) );
24664 *uses_vvvv = True;
24665 goto decode_success;
24667 break;
24669 case 0x16:
24670 /* VMOVLHPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 16 /r */
24671 /* Insn only exists in reg form */
24672 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
24673 && epartIsReg(getUChar(delta))) {
24674 UChar modrm = getUChar(delta);
24675 UInt rG = gregOfRexRM(pfx, modrm);
24676 UInt rE = eregOfRexRM(pfx, modrm);
24677 UInt rV = getVexNvvvv(pfx);
24678 delta++;
24679 DIP("vmovlhps %s,%s,%s\n",
24680 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
24681 IRTemp res = newTemp(Ity_V128);
24682 assign(res, binop(Iop_64HLtoV128,
24683 getXMMRegLane64(rE, 0),
24684 getXMMRegLane64(rV, 0)));
24685 putYMMRegLoAndZU(rG, mkexpr(res));
24686 *uses_vvvv = True;
24687 goto decode_success;
24689 /* VMOVHPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 16 /r */
24690 /* Insn exists only in mem form, it appears. */
24691 /* VMOVHPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 16 /r */
24692 /* Insn exists only in mem form, it appears. */
24693 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24694 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24695 UChar modrm = getUChar(delta);
24696 UInt rG = gregOfRexRM(pfx, modrm);
24697 UInt rV = getVexNvvvv(pfx);
24698 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24699 delta += alen;
24700 DIP("vmovhp%c %s,%s,%s\n", have66(pfx) ? 'd' : 's',
24701 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
24702 IRTemp res = newTemp(Ity_V128);
24703 assign(res, binop(Iop_64HLtoV128,
24704 loadLE(Ity_I64, mkexpr(addr)),
24705 getXMMRegLane64(rV, 0)));
24706 putYMMRegLoAndZU(rG, mkexpr(res));
24707 *uses_vvvv = True;
24708 goto decode_success;
24710 /* VMOVSHDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 16 /r */
24711 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
24712 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/,
24713 False/*!isL*/ );
24714 goto decode_success;
24716 /* VMOVSHDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 16 /r */
24717 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
24718 delta = dis_MOVSxDUP_256( vbi, pfx, delta, False/*!isL*/ );
24719 goto decode_success;
24721 break;
24723 case 0x17:
24724 /* VMOVHPS xmm1, m64 = VEX.128.0F.WIG 17 /r */
24725 /* Insn exists only in mem form, it appears. */
24726 /* VMOVHPD xmm1, m64 = VEX.128.66.0F.WIG 17 /r */
24727 /* Insn exists only in mem form, it appears. */
24728 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24729 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24730 UChar modrm = getUChar(delta);
24731 UInt rG = gregOfRexRM(pfx, modrm);
24732 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24733 delta += alen;
24734 storeLE( mkexpr(addr), getXMMRegLane64( rG, 1));
24735 DIP("vmovhp%c %s,%s\n", have66(pfx) ? 'd' : 's',
24736 nameXMMReg(rG), dis_buf);
24737 goto decode_success;
24739 break;
24741 case 0x28:
24742 /* VMOVAPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 28 /r */
24743 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24744 UChar modrm = getUChar(delta);
24745 UInt rG = gregOfRexRM(pfx, modrm);
24746 if (epartIsReg(modrm)) {
24747 UInt rE = eregOfRexRM(pfx,modrm);
24748 putYMMRegLoAndZU( rG, getXMMReg( rE ));
24749 DIP("vmovapd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
24750 delta += 1;
24751 } else {
24752 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24753 gen_SEGV_if_not_16_aligned( addr );
24754 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
24755 DIP("vmovapd %s,%s\n", dis_buf, nameXMMReg(rG));
24756 delta += alen;
24758 goto decode_success;
24760 /* VMOVAPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 28 /r */
24761 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24762 UChar modrm = getUChar(delta);
24763 UInt rG = gregOfRexRM(pfx, modrm);
24764 if (epartIsReg(modrm)) {
24765 UInt rE = eregOfRexRM(pfx,modrm);
24766 putYMMReg( rG, getYMMReg( rE ));
24767 DIP("vmovapd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
24768 delta += 1;
24769 } else {
24770 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24771 gen_SEGV_if_not_32_aligned( addr );
24772 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
24773 DIP("vmovapd %s,%s\n", dis_buf, nameYMMReg(rG));
24774 delta += alen;
24776 goto decode_success;
24778 /* VMOVAPS xmm2/m128, xmm1 = VEX.128.0F.WIG 28 /r */
24779 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24780 UChar modrm = getUChar(delta);
24781 UInt rG = gregOfRexRM(pfx, modrm);
24782 if (epartIsReg(modrm)) {
24783 UInt rE = eregOfRexRM(pfx,modrm);
24784 putYMMRegLoAndZU( rG, getXMMReg( rE ));
24785 DIP("vmovaps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
24786 delta += 1;
24787 } else {
24788 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24789 gen_SEGV_if_not_16_aligned( addr );
24790 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
24791 DIP("vmovaps %s,%s\n", dis_buf, nameXMMReg(rG));
24792 delta += alen;
24794 goto decode_success;
24796 /* VMOVAPS ymm2/m256, ymm1 = VEX.256.0F.WIG 28 /r */
24797 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24798 UChar modrm = getUChar(delta);
24799 UInt rG = gregOfRexRM(pfx, modrm);
24800 if (epartIsReg(modrm)) {
24801 UInt rE = eregOfRexRM(pfx,modrm);
24802 putYMMReg( rG, getYMMReg( rE ));
24803 DIP("vmovaps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
24804 delta += 1;
24805 } else {
24806 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24807 gen_SEGV_if_not_32_aligned( addr );
24808 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
24809 DIP("vmovaps %s,%s\n", dis_buf, nameYMMReg(rG));
24810 delta += alen;
24812 goto decode_success;
24814 break;
24816 case 0x29:
24817 /* VMOVAPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 29 /r */
24818 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24819 UChar modrm = getUChar(delta);
24820 UInt rG = gregOfRexRM(pfx,modrm);
24821 if (epartIsReg(modrm)) {
24822 UInt rE = eregOfRexRM(pfx,modrm);
24823 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24824 DIP("vmovapd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24825 delta += 1;
24826 } else {
24827 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24828 gen_SEGV_if_not_16_aligned( addr );
24829 storeLE( mkexpr(addr), getXMMReg(rG) );
24830 DIP("vmovapd %s,%s\n", nameXMMReg(rG), dis_buf );
24831 delta += alen;
24833 goto decode_success;
24835 /* VMOVAPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 29 /r */
24836 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24837 UChar modrm = getUChar(delta);
24838 UInt rG = gregOfRexRM(pfx,modrm);
24839 if (epartIsReg(modrm)) {
24840 UInt rE = eregOfRexRM(pfx,modrm);
24841 putYMMReg( rE, getYMMReg(rG) );
24842 DIP("vmovapd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
24843 delta += 1;
24844 } else {
24845 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24846 gen_SEGV_if_not_32_aligned( addr );
24847 storeLE( mkexpr(addr), getYMMReg(rG) );
24848 DIP("vmovapd %s,%s\n", nameYMMReg(rG), dis_buf );
24849 delta += alen;
24851 goto decode_success;
24853 /* VMOVAPS xmm1, xmm2/m128 = VEX.128.0F.WIG 29 /r */
24854 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24855 UChar modrm = getUChar(delta);
24856 UInt rG = gregOfRexRM(pfx,modrm);
24857 if (epartIsReg(modrm)) {
24858 UInt rE = eregOfRexRM(pfx,modrm);
24859 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24860 DIP("vmovaps %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24861 delta += 1;
24862 goto decode_success;
24863 } else {
24864 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24865 gen_SEGV_if_not_16_aligned( addr );
24866 storeLE( mkexpr(addr), getXMMReg(rG) );
24867 DIP("vmovaps %s,%s\n", nameXMMReg(rG), dis_buf );
24868 delta += alen;
24869 goto decode_success;
24872 /* VMOVAPS ymm1, ymm2/m256 = VEX.256.0F.WIG 29 /r */
24873 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24874 UChar modrm = getUChar(delta);
24875 UInt rG = gregOfRexRM(pfx,modrm);
24876 if (epartIsReg(modrm)) {
24877 UInt rE = eregOfRexRM(pfx,modrm);
24878 putYMMReg( rE, getYMMReg(rG) );
24879 DIP("vmovaps %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
24880 delta += 1;
24881 goto decode_success;
24882 } else {
24883 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24884 gen_SEGV_if_not_32_aligned( addr );
24885 storeLE( mkexpr(addr), getYMMReg(rG) );
24886 DIP("vmovaps %s,%s\n", nameYMMReg(rG), dis_buf );
24887 delta += alen;
24888 goto decode_success;
24891 break;
24893 case 0x2A: {
24894 IRTemp rmode = newTemp(Ity_I32);
24895 assign( rmode, get_sse_roundingmode() );
24896 /* VCVTSI2SD r/m32, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W0 2A /r */
24897 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
24898 UChar modrm = getUChar(delta);
24899 UInt rV = getVexNvvvv(pfx);
24900 UInt rD = gregOfRexRM(pfx, modrm);
24901 IRTemp arg32 = newTemp(Ity_I32);
24902 if (epartIsReg(modrm)) {
24903 UInt rS = eregOfRexRM(pfx,modrm);
24904 assign( arg32, getIReg32(rS) );
24905 delta += 1;
24906 DIP("vcvtsi2sdl %s,%s,%s\n",
24907 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD));
24908 } else {
24909 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24910 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
24911 delta += alen;
24912 DIP("vcvtsi2sdl %s,%s,%s\n",
24913 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24915 putXMMRegLane64F( rD, 0,
24916 unop(Iop_I32StoF64, mkexpr(arg32)));
24917 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24918 putYMMRegLane128( rD, 1, mkV128(0) );
24919 *uses_vvvv = True;
24920 goto decode_success;
24922 /* VCVTSI2SD r/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W1 2A /r */
24923 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
24924 UChar modrm = getUChar(delta);
24925 UInt rV = getVexNvvvv(pfx);
24926 UInt rD = gregOfRexRM(pfx, modrm);
24927 IRTemp arg64 = newTemp(Ity_I64);
24928 if (epartIsReg(modrm)) {
24929 UInt rS = eregOfRexRM(pfx,modrm);
24930 assign( arg64, getIReg64(rS) );
24931 delta += 1;
24932 DIP("vcvtsi2sdq %s,%s,%s\n",
24933 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD));
24934 } else {
24935 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24936 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
24937 delta += alen;
24938 DIP("vcvtsi2sdq %s,%s,%s\n",
24939 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24941 putXMMRegLane64F( rD, 0,
24942 binop( Iop_I64StoF64,
24943 get_sse_roundingmode(),
24944 mkexpr(arg64)) );
24945 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24946 putYMMRegLane128( rD, 1, mkV128(0) );
24947 *uses_vvvv = True;
24948 goto decode_success;
24950 /* VCVTSI2SS r/m64, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W1 2A /r */
24951 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
24952 UChar modrm = getUChar(delta);
24953 UInt rV = getVexNvvvv(pfx);
24954 UInt rD = gregOfRexRM(pfx, modrm);
24955 IRTemp arg64 = newTemp(Ity_I64);
24956 if (epartIsReg(modrm)) {
24957 UInt rS = eregOfRexRM(pfx,modrm);
24958 assign( arg64, getIReg64(rS) );
24959 delta += 1;
24960 DIP("vcvtsi2ssq %s,%s,%s\n",
24961 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD));
24962 } else {
24963 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24964 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
24965 delta += alen;
24966 DIP("vcvtsi2ssq %s,%s,%s\n",
24967 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24969 putXMMRegLane32F( rD, 0,
24970 binop(Iop_F64toF32,
24971 mkexpr(rmode),
24972 binop(Iop_I64StoF64, mkexpr(rmode),
24973 mkexpr(arg64)) ) );
24974 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
24975 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24976 putYMMRegLane128( rD, 1, mkV128(0) );
24977 *uses_vvvv = True;
24978 goto decode_success;
24980 /* VCVTSI2SS r/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W0 2A /r */
24981 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
24982 UChar modrm = getUChar(delta);
24983 UInt rV = getVexNvvvv(pfx);
24984 UInt rD = gregOfRexRM(pfx, modrm);
24985 IRTemp arg32 = newTemp(Ity_I32);
24986 if (epartIsReg(modrm)) {
24987 UInt rS = eregOfRexRM(pfx,modrm);
24988 assign( arg32, getIReg32(rS) );
24989 delta += 1;
24990 DIP("vcvtsi2ssl %s,%s,%s\n",
24991 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD));
24992 } else {
24993 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24994 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
24995 delta += alen;
24996 DIP("vcvtsi2ssl %s,%s,%s\n",
24997 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24999 putXMMRegLane32F( rD, 0,
25000 binop(Iop_F64toF32,
25001 mkexpr(rmode),
25002 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
25003 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
25004 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
25005 putYMMRegLane128( rD, 1, mkV128(0) );
25006 *uses_vvvv = True;
25007 goto decode_success;
25009 break;
25012 case 0x2B:
25013 /* VMOVNTPD xmm1, m128 = VEX.128.66.0F.WIG 2B /r */
25014 /* VMOVNTPS xmm1, m128 = VEX.128.0F.WIG 2B /r */
25015 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
25016 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
25017 UChar modrm = getUChar(delta);
25018 UInt rS = gregOfRexRM(pfx, modrm);
25019 IRTemp tS = newTemp(Ity_V128);
25020 assign(tS, getXMMReg(rS));
25021 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25022 delta += alen;
25023 gen_SEGV_if_not_16_aligned(addr);
25024 storeLE(mkexpr(addr), mkexpr(tS));
25025 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's',
25026 nameXMMReg(rS), dis_buf);
25027 goto decode_success;
25029 /* VMOVNTPD ymm1, m256 = VEX.256.66.0F.WIG 2B /r */
25030 /* VMOVNTPS ymm1, m256 = VEX.256.0F.WIG 2B /r */
25031 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
25032 && 1==getVexL(pfx)/*256*/ && !epartIsReg(getUChar(delta))) {
25033 UChar modrm = getUChar(delta);
25034 UInt rS = gregOfRexRM(pfx, modrm);
25035 IRTemp tS = newTemp(Ity_V256);
25036 assign(tS, getYMMReg(rS));
25037 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25038 delta += alen;
25039 gen_SEGV_if_not_32_aligned(addr);
25040 storeLE(mkexpr(addr), mkexpr(tS));
25041 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's',
25042 nameYMMReg(rS), dis_buf);
25043 goto decode_success;
25045 break;
25047 case 0x2C:
25048 /* VCVTTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2C /r */
25049 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
25050 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
25051 goto decode_success;
25053 /* VCVTTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2C /r */
25054 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
25055 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
25056 goto decode_success;
25058 /* VCVTTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2C /r */
25059 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
25060 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
25061 goto decode_success;
25063 /* VCVTTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2C /r */
25064 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
25065 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
25066 goto decode_success;
25068 break;
25070 case 0x2D:
25071 /* VCVTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2D /r */
25072 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
25073 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
25074 goto decode_success;
25076 /* VCVTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2D /r */
25077 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
25078 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
25079 goto decode_success;
25081 /* VCVTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2D /r */
25082 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
25083 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
25084 goto decode_success;
25086 /* VCVTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2D /r */
25087 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
25088 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
25089 goto decode_success;
25091 break;
25093 case 0x2E:
25094 case 0x2F:
25095 /* VUCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2E /r */
25096 /* VCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2F /r */
25097 if (have66noF2noF3(pfx)) {
25098 delta = dis_COMISD( vbi, pfx, delta, True/*isAvx*/, opc );
25099 goto decode_success;
25101 /* VUCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2E /r */
25102 /* VCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2F /r */
25103 if (haveNo66noF2noF3(pfx)) {
25104 delta = dis_COMISS( vbi, pfx, delta, True/*isAvx*/, opc );
25105 goto decode_success;
25107 break;
25109 case 0x50:
25110 /* VMOVMSKPD xmm2, r32 = VEX.128.66.0F.WIG 50 /r */
25111 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25112 delta = dis_MOVMSKPD_128( vbi, pfx, delta, True/*isAvx*/ );
25113 goto decode_success;
25115 /* VMOVMSKPD ymm2, r32 = VEX.256.66.0F.WIG 50 /r */
25116 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25117 delta = dis_MOVMSKPD_256( vbi, pfx, delta );
25118 goto decode_success;
25120 /* VMOVMSKPS xmm2, r32 = VEX.128.0F.WIG 50 /r */
25121 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25122 delta = dis_MOVMSKPS_128( vbi, pfx, delta, True/*isAvx*/ );
25123 goto decode_success;
25125 /* VMOVMSKPS ymm2, r32 = VEX.256.0F.WIG 50 /r */
25126 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25127 delta = dis_MOVMSKPS_256( vbi, pfx, delta );
25128 goto decode_success;
25130 break;
25132 case 0x51:
25133 /* VSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 51 /r */
25134 if (haveF3no66noF2(pfx)) {
25135 delta = dis_AVX128_E_V_to_G_lo32_unary(
25136 uses_vvvv, vbi, pfx, delta, "vsqrtss", Iop_Sqrt32F0x4 );
25137 goto decode_success;
25139 /* VSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 51 /r */
25140 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25141 delta = dis_AVX128_E_to_G_unary_all(
25142 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx4 );
25143 goto decode_success;
25145 /* VSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 51 /r */
25146 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25147 delta = dis_AVX256_E_to_G_unary_all(
25148 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx8 );
25149 goto decode_success;
25151 /* VSQRTSD xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F2.0F.WIG 51 /r */
25152 if (haveF2no66noF3(pfx)) {
25153 delta = dis_AVX128_E_V_to_G_lo64_unary(
25154 uses_vvvv, vbi, pfx, delta, "vsqrtsd", Iop_Sqrt64F0x2 );
25155 goto decode_success;
25157 /* VSQRTPD xmm2/m128(E), xmm1(G) = VEX.NDS.128.66.0F.WIG 51 /r */
25158 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25159 delta = dis_AVX128_E_to_G_unary_all(
25160 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx2 );
25161 goto decode_success;
25163 /* VSQRTPD ymm2/m256(E), ymm1(G) = VEX.NDS.256.66.0F.WIG 51 /r */
25164 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25165 delta = dis_AVX256_E_to_G_unary_all(
25166 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx4 );
25167 goto decode_success;
25169 break;
25171 case 0x52:
25172 /* VRSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 52 /r */
25173 if (haveF3no66noF2(pfx)) {
25174 delta = dis_AVX128_E_V_to_G_lo32_unary(
25175 uses_vvvv, vbi, pfx, delta, "vrsqrtss",
25176 Iop_RSqrtEst32F0x4 );
25177 goto decode_success;
25179 /* VRSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 52 /r */
25180 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25181 delta = dis_AVX128_E_to_G_unary_all(
25182 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrtEst32Fx4 );
25183 goto decode_success;
25185 /* VRSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 52 /r */
25186 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25187 delta = dis_AVX256_E_to_G_unary_all(
25188 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrtEst32Fx8 );
25189 goto decode_success;
25191 break;
25193 case 0x53:
25194 /* VRCPSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 53 /r */
25195 if (haveF3no66noF2(pfx)) {
25196 delta = dis_AVX128_E_V_to_G_lo32_unary(
25197 uses_vvvv, vbi, pfx, delta, "vrcpss", Iop_RecipEst32F0x4 );
25198 goto decode_success;
25200 /* VRCPPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 53 /r */
25201 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25202 delta = dis_AVX128_E_to_G_unary_all(
25203 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_RecipEst32Fx4 );
25204 goto decode_success;
25206 /* VRCPPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 53 /r */
25207 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25208 delta = dis_AVX256_E_to_G_unary_all(
25209 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_RecipEst32Fx8 );
25210 goto decode_success;
25212 break;
25214 case 0x54:
25215 /* VANDPD r/m, rV, r ::: r = rV & r/m */
25216 /* VANDPD = VEX.NDS.128.66.0F.WIG 54 /r */
25217 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25218 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25219 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128 );
25220 goto decode_success;
25222 /* VANDPD r/m, rV, r ::: r = rV & r/m */
25223 /* VANDPD = VEX.NDS.256.66.0F.WIG 54 /r */
25224 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25225 delta = dis_AVX256_E_V_to_G(
25226 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256 );
25227 goto decode_success;
25229 /* VANDPS = VEX.NDS.128.0F.WIG 54 /r */
25230 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25231 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25232 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128 );
25233 goto decode_success;
25235 /* VANDPS = VEX.NDS.256.0F.WIG 54 /r */
25236 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25237 delta = dis_AVX256_E_V_to_G(
25238 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256 );
25239 goto decode_success;
25241 break;
25243 case 0x55:
25244 /* VANDNPD r/m, rV, r ::: r = (not rV) & r/m */
25245 /* VANDNPD = VEX.NDS.128.66.0F.WIG 55 /r */
25246 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25247 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25248 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128,
25249 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
25250 goto decode_success;
25252 /* VANDNPD = VEX.NDS.256.66.0F.WIG 55 /r */
25253 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25254 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
25255 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256,
25256 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
25257 goto decode_success;
25259 /* VANDNPS = VEX.NDS.128.0F.WIG 55 /r */
25260 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25261 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25262 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128,
25263 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
25264 goto decode_success;
25266 /* VANDNPS = VEX.NDS.256.0F.WIG 55 /r */
25267 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25268 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
25269 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256,
25270 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
25271 goto decode_success;
25273 break;
25275 case 0x56:
25276 /* VORPD r/m, rV, r ::: r = rV | r/m */
25277 /* VORPD = VEX.NDS.128.66.0F.WIG 56 /r */
25278 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25279 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25280 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV128 );
25281 goto decode_success;
25283 /* VORPD r/m, rV, r ::: r = rV | r/m */
25284 /* VORPD = VEX.NDS.256.66.0F.WIG 56 /r */
25285 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25286 delta = dis_AVX256_E_V_to_G(
25287 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV256 );
25288 goto decode_success;
25290 /* VORPS r/m, rV, r ::: r = rV | r/m */
25291 /* VORPS = VEX.NDS.128.0F.WIG 56 /r */
25292 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25293 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25294 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV128 );
25295 goto decode_success;
25297 /* VORPS r/m, rV, r ::: r = rV | r/m */
25298 /* VORPS = VEX.NDS.256.0F.WIG 56 /r */
25299 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25300 delta = dis_AVX256_E_V_to_G(
25301 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV256 );
25302 goto decode_success;
25304 break;
25306 case 0x57:
25307 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
25308 /* VXORPD = VEX.NDS.128.66.0F.WIG 57 /r */
25309 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25310 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25311 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV128 );
25312 goto decode_success;
25314 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
25315 /* VXORPD = VEX.NDS.256.66.0F.WIG 57 /r */
25316 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25317 delta = dis_AVX256_E_V_to_G(
25318 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV256 );
25319 goto decode_success;
25321 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
25322 /* VXORPS = VEX.NDS.128.0F.WIG 57 /r */
25323 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25324 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25325 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV128 );
25326 goto decode_success;
25328 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
25329 /* VXORPS = VEX.NDS.256.0F.WIG 57 /r */
25330 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25331 delta = dis_AVX256_E_V_to_G(
25332 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV256 );
25333 goto decode_success;
25335 break;
25337 case 0x58:
25338 /* VADDSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 58 /r */
25339 if (haveF2no66noF3(pfx)) {
25340 delta = dis_AVX128_E_V_to_G_lo64(
25341 uses_vvvv, vbi, pfx, delta, "vaddsd", Iop_Add64F0x2 );
25342 goto decode_success;
25344 /* VADDSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 58 /r */
25345 if (haveF3no66noF2(pfx)) {
25346 delta = dis_AVX128_E_V_to_G_lo32(
25347 uses_vvvv, vbi, pfx, delta, "vaddss", Iop_Add32F0x4 );
25348 goto decode_success;
25350 /* VADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 58 /r */
25351 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25352 delta = dis_AVX128_E_V_to_G(
25353 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx4 );
25354 goto decode_success;
25356 /* VADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 58 /r */
25357 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25358 delta = dis_AVX256_E_V_to_G(
25359 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx8 );
25360 goto decode_success;
25362 /* VADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 58 /r */
25363 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25364 delta = dis_AVX128_E_V_to_G(
25365 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx2 );
25366 goto decode_success;
25368 /* VADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 58 /r */
25369 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25370 delta = dis_AVX256_E_V_to_G(
25371 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx4 );
25372 goto decode_success;
25374 break;
25376 case 0x59:
25377 /* VMULSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 59 /r */
25378 if (haveF2no66noF3(pfx)) {
25379 delta = dis_AVX128_E_V_to_G_lo64(
25380 uses_vvvv, vbi, pfx, delta, "vmulsd", Iop_Mul64F0x2 );
25381 goto decode_success;
25383 /* VMULSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 59 /r */
25384 if (haveF3no66noF2(pfx)) {
25385 delta = dis_AVX128_E_V_to_G_lo32(
25386 uses_vvvv, vbi, pfx, delta, "vmulss", Iop_Mul32F0x4 );
25387 goto decode_success;
25389 /* VMULPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 59 /r */
25390 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25391 delta = dis_AVX128_E_V_to_G(
25392 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx4 );
25393 goto decode_success;
25395 /* VMULPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 59 /r */
25396 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25397 delta = dis_AVX256_E_V_to_G(
25398 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx8 );
25399 goto decode_success;
25401 /* VMULPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 59 /r */
25402 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25403 delta = dis_AVX128_E_V_to_G(
25404 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx2 );
25405 goto decode_success;
25407 /* VMULPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 59 /r */
25408 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25409 delta = dis_AVX256_E_V_to_G(
25410 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx4 );
25411 goto decode_success;
25413 break;
25415 case 0x5A:
25416 /* VCVTPS2PD xmm2/m64, xmm1 = VEX.128.0F.WIG 5A /r */
25417 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25418 delta = dis_CVTPS2PD_128( vbi, pfx, delta, True/*isAvx*/ );
25419 goto decode_success;
25421 /* VCVTPS2PD xmm2/m128, ymm1 = VEX.256.0F.WIG 5A /r */
25422 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25423 delta = dis_CVTPS2PD_256( vbi, pfx, delta );
25424 goto decode_success;
25426 /* VCVTPD2PS xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5A /r */
25427 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25428 delta = dis_CVTPD2PS_128( vbi, pfx, delta, True/*isAvx*/ );
25429 goto decode_success;
25431 /* VCVTPD2PS ymm2/m256, xmm1 = VEX.256.66.0F.WIG 5A /r */
25432 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25433 delta = dis_CVTPD2PS_256( vbi, pfx, delta );
25434 goto decode_success;
25436 /* VCVTSD2SS xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5A /r */
25437 if (haveF2no66noF3(pfx)) {
25438 UChar modrm = getUChar(delta);
25439 UInt rV = getVexNvvvv(pfx);
25440 UInt rD = gregOfRexRM(pfx, modrm);
25441 IRTemp f64lo = newTemp(Ity_F64);
25442 IRTemp rmode = newTemp(Ity_I32);
25443 assign( rmode, get_sse_roundingmode() );
25444 if (epartIsReg(modrm)) {
25445 UInt rS = eregOfRexRM(pfx,modrm);
25446 assign(f64lo, getXMMRegLane64F(rS, 0));
25447 delta += 1;
25448 DIP("vcvtsd2ss %s,%s,%s\n",
25449 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD));
25450 } else {
25451 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25452 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)) );
25453 delta += alen;
25454 DIP("vcvtsd2ss %s,%s,%s\n",
25455 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
25457 putXMMRegLane32F( rD, 0,
25458 binop( Iop_F64toF32, mkexpr(rmode),
25459 mkexpr(f64lo)) );
25460 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
25461 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
25462 putYMMRegLane128( rD, 1, mkV128(0) );
25463 *uses_vvvv = True;
25464 goto decode_success;
25466 /* VCVTSS2SD xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5A /r */
25467 if (haveF3no66noF2(pfx)) {
25468 UChar modrm = getUChar(delta);
25469 UInt rV = getVexNvvvv(pfx);
25470 UInt rD = gregOfRexRM(pfx, modrm);
25471 IRTemp f32lo = newTemp(Ity_F32);
25472 if (epartIsReg(modrm)) {
25473 UInt rS = eregOfRexRM(pfx,modrm);
25474 assign(f32lo, getXMMRegLane32F(rS, 0));
25475 delta += 1;
25476 DIP("vcvtss2sd %s,%s,%s\n",
25477 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD));
25478 } else {
25479 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25480 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)) );
25481 delta += alen;
25482 DIP("vcvtss2sd %s,%s,%s\n",
25483 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
25485 putXMMRegLane64F( rD, 0,
25486 unop( Iop_F32toF64, mkexpr(f32lo)) );
25487 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
25488 putYMMRegLane128( rD, 1, mkV128(0) );
25489 *uses_vvvv = True;
25490 goto decode_success;
25492 break;
25494 case 0x5B:
25495 /* VCVTPS2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5B /r */
25496 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25497 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta,
25498 True/*isAvx*/, False/*!r2zero*/ );
25499 goto decode_success;
25501 /* VCVTPS2DQ ymm2/m256, ymm1 = VEX.256.66.0F.WIG 5B /r */
25502 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25503 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta,
25504 False/*!r2zero*/ );
25505 goto decode_success;
25507 /* VCVTTPS2DQ xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 5B /r */
25508 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
25509 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta,
25510 True/*isAvx*/, True/*r2zero*/ );
25511 goto decode_success;
25513 /* VCVTTPS2DQ ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 5B /r */
25514 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
25515 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta,
25516 True/*r2zero*/ );
25517 goto decode_success;
25519 /* VCVTDQ2PS xmm2/m128, xmm1 = VEX.128.0F.WIG 5B /r */
25520 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25521 delta = dis_CVTDQ2PS_128 ( vbi, pfx, delta, True/*isAvx*/ );
25522 goto decode_success;
25524 /* VCVTDQ2PS ymm2/m256, ymm1 = VEX.256.0F.WIG 5B /r */
25525 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25526 delta = dis_CVTDQ2PS_256 ( vbi, pfx, delta );
25527 goto decode_success;
25529 break;
25531 case 0x5C:
25532 /* VSUBSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5C /r */
25533 if (haveF2no66noF3(pfx)) {
25534 delta = dis_AVX128_E_V_to_G_lo64(
25535 uses_vvvv, vbi, pfx, delta, "vsubsd", Iop_Sub64F0x2 );
25536 goto decode_success;
25538 /* VSUBSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5C /r */
25539 if (haveF3no66noF2(pfx)) {
25540 delta = dis_AVX128_E_V_to_G_lo32(
25541 uses_vvvv, vbi, pfx, delta, "vsubss", Iop_Sub32F0x4 );
25542 goto decode_success;
25544 /* VSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5C /r */
25545 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25546 delta = dis_AVX128_E_V_to_G(
25547 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx4 );
25548 goto decode_success;
25550 /* VSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5C /r */
25551 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25552 delta = dis_AVX256_E_V_to_G(
25553 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx8 );
25554 goto decode_success;
25556 /* VSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5C /r */
25557 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25558 delta = dis_AVX128_E_V_to_G(
25559 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx2 );
25560 goto decode_success;
25562 /* VSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5C /r */
25563 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25564 delta = dis_AVX256_E_V_to_G(
25565 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx4 );
25566 goto decode_success;
25568 break;
25570 case 0x5D:
25571 /* VMINSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5D /r */
25572 if (haveF2no66noF3(pfx)) {
25573 delta = dis_AVX128_E_V_to_G_lo64(
25574 uses_vvvv, vbi, pfx, delta, "vminsd", Iop_Min64F0x2 );
25575 goto decode_success;
25577 /* VMINSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5D /r */
25578 if (haveF3no66noF2(pfx)) {
25579 delta = dis_AVX128_E_V_to_G_lo32(
25580 uses_vvvv, vbi, pfx, delta, "vminss", Iop_Min32F0x4 );
25581 goto decode_success;
25583 /* VMINPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5D /r */
25584 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25585 delta = dis_AVX128_E_V_to_G(
25586 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx4 );
25587 goto decode_success;
25589 /* VMINPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5D /r */
25590 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25591 delta = dis_AVX256_E_V_to_G(
25592 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx8 );
25593 goto decode_success;
25595 /* VMINPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5D /r */
25596 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25597 delta = dis_AVX128_E_V_to_G(
25598 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx2 );
25599 goto decode_success;
25601 /* VMINPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5D /r */
25602 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25603 delta = dis_AVX256_E_V_to_G(
25604 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx4 );
25605 goto decode_success;
25607 break;
25609 case 0x5E:
25610 /* VDIVSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5E /r */
25611 if (haveF2no66noF3(pfx)) {
25612 delta = dis_AVX128_E_V_to_G_lo64(
25613 uses_vvvv, vbi, pfx, delta, "vdivsd", Iop_Div64F0x2 );
25614 goto decode_success;
25616 /* VDIVSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5E /r */
25617 if (haveF3no66noF2(pfx)) {
25618 delta = dis_AVX128_E_V_to_G_lo32(
25619 uses_vvvv, vbi, pfx, delta, "vdivss", Iop_Div32F0x4 );
25620 goto decode_success;
25622 /* VDIVPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5E /r */
25623 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25624 delta = dis_AVX128_E_V_to_G(
25625 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx4 );
25626 goto decode_success;
25628 /* VDIVPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5E /r */
25629 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25630 delta = dis_AVX256_E_V_to_G(
25631 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx8 );
25632 goto decode_success;
25634 /* VDIVPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5E /r */
25635 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25636 delta = dis_AVX128_E_V_to_G(
25637 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx2 );
25638 goto decode_success;
25640 /* VDIVPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5E /r */
25641 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25642 delta = dis_AVX256_E_V_to_G(
25643 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx4 );
25644 goto decode_success;
25646 break;
25648 case 0x5F:
25649 /* VMAXSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5F /r */
25650 if (haveF2no66noF3(pfx)) {
25651 delta = dis_AVX128_E_V_to_G_lo64(
25652 uses_vvvv, vbi, pfx, delta, "vmaxsd", Iop_Max64F0x2 );
25653 goto decode_success;
25655 /* VMAXSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5F /r */
25656 if (haveF3no66noF2(pfx)) {
25657 delta = dis_AVX128_E_V_to_G_lo32(
25658 uses_vvvv, vbi, pfx, delta, "vmaxss", Iop_Max32F0x4 );
25659 goto decode_success;
25661 /* VMAXPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5F /r */
25662 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25663 delta = dis_AVX128_E_V_to_G(
25664 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx4 );
25665 goto decode_success;
25667 /* VMAXPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5F /r */
25668 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25669 delta = dis_AVX256_E_V_to_G(
25670 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx8 );
25671 goto decode_success;
25673 /* VMAXPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5F /r */
25674 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25675 delta = dis_AVX128_E_V_to_G(
25676 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx2 );
25677 goto decode_success;
25679 /* VMAXPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5F /r */
25680 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25681 delta = dis_AVX256_E_V_to_G(
25682 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx4 );
25683 goto decode_success;
25685 break;
25687 case 0x60:
25688 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
25689 /* VPUNPCKLBW = VEX.NDS.128.66.0F.WIG 60 /r */
25690 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25691 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25692 uses_vvvv, vbi, pfx, delta, "vpunpcklbw",
25693 Iop_InterleaveLO8x16, NULL,
25694 False/*!invertLeftArg*/, True/*swapArgs*/ );
25695 goto decode_success;
25697 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
25698 /* VPUNPCKLBW = VEX.NDS.256.66.0F.WIG 60 /r */
25699 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25700 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25701 uses_vvvv, vbi, pfx, delta, "vpunpcklbw",
25702 math_VPUNPCKLBW_YMM );
25703 goto decode_success;
25705 break;
25707 case 0x61:
25708 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
25709 /* VPUNPCKLWD = VEX.NDS.128.66.0F.WIG 61 /r */
25710 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25711 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25712 uses_vvvv, vbi, pfx, delta, "vpunpcklwd",
25713 Iop_InterleaveLO16x8, NULL,
25714 False/*!invertLeftArg*/, True/*swapArgs*/ );
25715 goto decode_success;
25717 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
25718 /* VPUNPCKLWD = VEX.NDS.256.66.0F.WIG 61 /r */
25719 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25720 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25721 uses_vvvv, vbi, pfx, delta, "vpunpcklwd",
25722 math_VPUNPCKLWD_YMM );
25723 goto decode_success;
25725 break;
25727 case 0x62:
25728 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
25729 /* VPUNPCKLDQ = VEX.NDS.128.66.0F.WIG 62 /r */
25730 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25731 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25732 uses_vvvv, vbi, pfx, delta, "vpunpckldq",
25733 Iop_InterleaveLO32x4, NULL,
25734 False/*!invertLeftArg*/, True/*swapArgs*/ );
25735 goto decode_success;
25737 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
25738 /* VPUNPCKLDQ = VEX.NDS.256.66.0F.WIG 62 /r */
25739 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25740 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25741 uses_vvvv, vbi, pfx, delta, "vpunpckldq",
25742 math_VPUNPCKLDQ_YMM );
25743 goto decode_success;
25745 break;
25747 case 0x63:
25748 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
25749 /* VPACKSSWB = VEX.NDS.128.66.0F.WIG 63 /r */
25750 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25751 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25752 uses_vvvv, vbi, pfx, delta, "vpacksswb",
25753 Iop_QNarrowBin16Sto8Sx16, NULL,
25754 False/*!invertLeftArg*/, True/*swapArgs*/ );
25755 goto decode_success;
25757 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
25758 /* VPACKSSWB = VEX.NDS.256.66.0F.WIG 63 /r */
25759 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25760 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25761 uses_vvvv, vbi, pfx, delta, "vpacksswb",
25762 math_VPACKSSWB_YMM );
25763 goto decode_success;
25765 break;
25767 case 0x64:
25768 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
25769 /* VPCMPGTB = VEX.NDS.128.66.0F.WIG 64 /r */
25770 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25771 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25772 uses_vvvv, vbi, pfx, delta, "vpcmpgtb", Iop_CmpGT8Sx16 );
25773 goto decode_success;
25775 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
25776 /* VPCMPGTB = VEX.NDS.256.66.0F.WIG 64 /r */
25777 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25778 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25779 uses_vvvv, vbi, pfx, delta, "vpcmpgtb", Iop_CmpGT8Sx32 );
25780 goto decode_success;
25782 break;
25784 case 0x65:
25785 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
25786 /* VPCMPGTW = VEX.NDS.128.66.0F.WIG 65 /r */
25787 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25788 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25789 uses_vvvv, vbi, pfx, delta, "vpcmpgtw", Iop_CmpGT16Sx8 );
25790 goto decode_success;
25792 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
25793 /* VPCMPGTW = VEX.NDS.256.66.0F.WIG 65 /r */
25794 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25795 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25796 uses_vvvv, vbi, pfx, delta, "vpcmpgtw", Iop_CmpGT16Sx16 );
25797 goto decode_success;
25799 break;
25801 case 0x66:
25802 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
25803 /* VPCMPGTD = VEX.NDS.128.66.0F.WIG 66 /r */
25804 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25805 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25806 uses_vvvv, vbi, pfx, delta, "vpcmpgtd", Iop_CmpGT32Sx4 );
25807 goto decode_success;
25809 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
25810 /* VPCMPGTD = VEX.NDS.256.66.0F.WIG 66 /r */
25811 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25812 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25813 uses_vvvv, vbi, pfx, delta, "vpcmpgtd", Iop_CmpGT32Sx8 );
25814 goto decode_success;
25816 break;
25818 case 0x67:
25819 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
25820 /* VPACKUSWB = VEX.NDS.128.66.0F.WIG 67 /r */
25821 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25822 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25823 uses_vvvv, vbi, pfx, delta, "vpackuswb",
25824 Iop_QNarrowBin16Sto8Ux16, NULL,
25825 False/*!invertLeftArg*/, True/*swapArgs*/ );
25826 goto decode_success;
25828 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
25829 /* VPACKUSWB = VEX.NDS.256.66.0F.WIG 67 /r */
25830 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25831 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25832 uses_vvvv, vbi, pfx, delta, "vpackuswb",
25833 math_VPACKUSWB_YMM );
25834 goto decode_success;
25836 break;
25838 case 0x68:
25839 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
25840 /* VPUNPCKHBW = VEX.NDS.128.0F.WIG 68 /r */
25841 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25842 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25843 uses_vvvv, vbi, pfx, delta, "vpunpckhbw",
25844 Iop_InterleaveHI8x16, NULL,
25845 False/*!invertLeftArg*/, True/*swapArgs*/ );
25846 goto decode_success;
25848 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
25849 /* VPUNPCKHBW = VEX.NDS.256.0F.WIG 68 /r */
25850 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25851 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25852 uses_vvvv, vbi, pfx, delta, "vpunpckhbw",
25853 math_VPUNPCKHBW_YMM );
25854 goto decode_success;
25856 break;
25858 case 0x69:
25859 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
25860 /* VPUNPCKHWD = VEX.NDS.128.0F.WIG 69 /r */
25861 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25862 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25863 uses_vvvv, vbi, pfx, delta, "vpunpckhwd",
25864 Iop_InterleaveHI16x8, NULL,
25865 False/*!invertLeftArg*/, True/*swapArgs*/ );
25866 goto decode_success;
25868 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
25869 /* VPUNPCKHWD = VEX.NDS.256.0F.WIG 69 /r */
25870 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25871 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25872 uses_vvvv, vbi, pfx, delta, "vpunpckhwd",
25873 math_VPUNPCKHWD_YMM );
25874 goto decode_success;
25876 break;
25878 case 0x6A:
25879 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
25880 /* VPUNPCKHDQ = VEX.NDS.128.66.0F.WIG 6A /r */
25881 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25882 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25883 uses_vvvv, vbi, pfx, delta, "vpunpckhdq",
25884 Iop_InterleaveHI32x4, NULL,
25885 False/*!invertLeftArg*/, True/*swapArgs*/ );
25886 goto decode_success;
25888 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
25889 /* VPUNPCKHDQ = VEX.NDS.256.66.0F.WIG 6A /r */
25890 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25891 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25892 uses_vvvv, vbi, pfx, delta, "vpunpckhdq",
25893 math_VPUNPCKHDQ_YMM );
25894 goto decode_success;
25896 break;
25898 case 0x6B:
25899 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
25900 /* VPACKSSDW = VEX.NDS.128.66.0F.WIG 6B /r */
25901 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25902 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25903 uses_vvvv, vbi, pfx, delta, "vpackssdw",
25904 Iop_QNarrowBin32Sto16Sx8, NULL,
25905 False/*!invertLeftArg*/, True/*swapArgs*/ );
25906 goto decode_success;
25908 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
25909 /* VPACKSSDW = VEX.NDS.256.66.0F.WIG 6B /r */
25910 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25911 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25912 uses_vvvv, vbi, pfx, delta, "vpackssdw",
25913 math_VPACKSSDW_YMM );
25914 goto decode_success;
25916 break;
25918 case 0x6C:
25919 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
25920 /* VPUNPCKLQDQ = VEX.NDS.128.0F.WIG 6C /r */
25921 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25922 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25923 uses_vvvv, vbi, pfx, delta, "vpunpcklqdq",
25924 Iop_InterleaveLO64x2, NULL,
25925 False/*!invertLeftArg*/, True/*swapArgs*/ );
25926 goto decode_success;
25928 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
25929 /* VPUNPCKLQDQ = VEX.NDS.256.0F.WIG 6C /r */
25930 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25931 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25932 uses_vvvv, vbi, pfx, delta, "vpunpcklqdq",
25933 math_VPUNPCKLQDQ_YMM );
25934 goto decode_success;
25936 break;
25938 case 0x6D:
25939 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
25940 /* VPUNPCKHQDQ = VEX.NDS.128.0F.WIG 6D /r */
25941 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25942 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25943 uses_vvvv, vbi, pfx, delta, "vpunpckhqdq",
25944 Iop_InterleaveHI64x2, NULL,
25945 False/*!invertLeftArg*/, True/*swapArgs*/ );
25946 goto decode_success;
25948 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
25949 /* VPUNPCKHQDQ = VEX.NDS.256.0F.WIG 6D /r */
25950 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25951 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25952 uses_vvvv, vbi, pfx, delta, "vpunpckhqdq",
25953 math_VPUNPCKHQDQ_YMM );
25954 goto decode_success;
25956 break;
25958 case 0x6E:
25959 /* VMOVD r32/m32, xmm1 = VEX.128.66.0F.W0 6E */
25960 if (have66noF2noF3(pfx)
25961 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
25962 vassert(sz == 2); /* even tho we are transferring 4, not 2. */
25963 UChar modrm = getUChar(delta);
25964 if (epartIsReg(modrm)) {
25965 delta += 1;
25966 putYMMRegLoAndZU(
25967 gregOfRexRM(pfx,modrm),
25968 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) )
25970 DIP("vmovd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
25971 nameXMMReg(gregOfRexRM(pfx,modrm)));
25972 } else {
25973 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25974 delta += alen;
25975 putYMMRegLoAndZU(
25976 gregOfRexRM(pfx,modrm),
25977 unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)))
25979 DIP("vmovd %s, %s\n", dis_buf,
25980 nameXMMReg(gregOfRexRM(pfx,modrm)));
25982 goto decode_success;
25984 /* VMOVQ r64/m64, xmm1 = VEX.128.66.0F.W1 6E */
25985 if (have66noF2noF3(pfx)
25986 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
25987 vassert(sz == 2); /* even tho we are transferring 8, not 2. */
25988 UChar modrm = getUChar(delta);
25989 if (epartIsReg(modrm)) {
25990 delta += 1;
25991 putYMMRegLoAndZU(
25992 gregOfRexRM(pfx,modrm),
25993 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) )
25995 DIP("vmovq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
25996 nameXMMReg(gregOfRexRM(pfx,modrm)));
25997 } else {
25998 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25999 delta += alen;
26000 putYMMRegLoAndZU(
26001 gregOfRexRM(pfx,modrm),
26002 unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)))
26004 DIP("vmovq %s, %s\n", dis_buf,
26005 nameXMMReg(gregOfRexRM(pfx,modrm)));
26007 goto decode_success;
26009 break;
26011 case 0x6F:
26012 /* VMOVDQA ymm2/m256, ymm1 = VEX.256.66.0F.WIG 6F */
26013 /* VMOVDQU ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 6F */
26014 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
26015 && 1==getVexL(pfx)/*256*/) {
26016 UChar modrm = getUChar(delta);
26017 UInt rD = gregOfRexRM(pfx, modrm);
26018 IRTemp tD = newTemp(Ity_V256);
26019 Bool isA = have66noF2noF3(pfx);
26020 HChar ch = isA ? 'a' : 'u';
26021 if (epartIsReg(modrm)) {
26022 UInt rS = eregOfRexRM(pfx, modrm);
26023 delta += 1;
26024 assign(tD, getYMMReg(rS));
26025 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD));
26026 } else {
26027 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
26028 delta += alen;
26029 if (isA)
26030 gen_SEGV_if_not_32_aligned(addr);
26031 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
26032 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameYMMReg(rD));
26034 putYMMReg(rD, mkexpr(tD));
26035 goto decode_success;
26037 /* VMOVDQA xmm2/m128, xmm1 = VEX.128.66.0F.WIG 6F */
26038 /* VMOVDQU xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 6F */
26039 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
26040 && 0==getVexL(pfx)/*128*/) {
26041 UChar modrm = getUChar(delta);
26042 UInt rD = gregOfRexRM(pfx, modrm);
26043 IRTemp tD = newTemp(Ity_V128);
26044 Bool isA = have66noF2noF3(pfx);
26045 HChar ch = isA ? 'a' : 'u';
26046 if (epartIsReg(modrm)) {
26047 UInt rS = eregOfRexRM(pfx, modrm);
26048 delta += 1;
26049 assign(tD, getXMMReg(rS));
26050 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD));
26051 } else {
26052 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
26053 delta += alen;
26054 if (isA)
26055 gen_SEGV_if_not_16_aligned(addr);
26056 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
26057 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameXMMReg(rD));
26059 putYMMRegLoAndZU(rD, mkexpr(tD));
26060 goto decode_success;
26062 break;
26064 case 0x70:
26065 /* VPSHUFD imm8, xmm2/m128, xmm1 = VEX.128.66.0F.WIG 70 /r ib */
26066 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26067 delta = dis_PSHUFD_32x4( vbi, pfx, delta, True/*writesYmm*/);
26068 goto decode_success;
26070 /* VPSHUFD imm8, ymm2/m256, ymm1 = VEX.256.66.0F.WIG 70 /r ib */
26071 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26072 delta = dis_PSHUFD_32x8( vbi, pfx, delta);
26073 goto decode_success;
26075 /* VPSHUFLW imm8, xmm2/m128, xmm1 = VEX.128.F2.0F.WIG 70 /r ib */
26076 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26077 delta = dis_PSHUFxW_128( vbi, pfx, delta,
26078 True/*isAvx*/, False/*!xIsH*/ );
26079 goto decode_success;
26081 /* VPSHUFLW imm8, ymm2/m256, ymm1 = VEX.256.F2.0F.WIG 70 /r ib */
26082 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26083 delta = dis_PSHUFxW_256( vbi, pfx, delta, False/*!xIsH*/ );
26084 goto decode_success;
26086 /* VPSHUFHW imm8, xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 70 /r ib */
26087 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
26088 delta = dis_PSHUFxW_128( vbi, pfx, delta,
26089 True/*isAvx*/, True/*xIsH*/ );
26090 goto decode_success;
26092 /* VPSHUFHW imm8, ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 70 /r ib */
26093 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
26094 delta = dis_PSHUFxW_256( vbi, pfx, delta, True/*xIsH*/ );
26095 goto decode_success;
26097 break;
26099 case 0x71:
26100 /* VPSRLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /2 ib */
26101 /* VPSRAW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /4 ib */
26102 /* VPSLLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /6 ib */
26103 if (have66noF2noF3(pfx)
26104 && 0==getVexL(pfx)/*128*/
26105 && epartIsReg(getUChar(delta))) {
26106 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
26107 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26108 "vpsrlw", Iop_ShrN16x8 );
26109 *uses_vvvv = True;
26110 goto decode_success;
26112 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
26113 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26114 "vpsraw", Iop_SarN16x8 );
26115 *uses_vvvv = True;
26116 goto decode_success;
26118 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
26119 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26120 "vpsllw", Iop_ShlN16x8 );
26121 *uses_vvvv = True;
26122 goto decode_success;
26124 /* else fall through */
26126 /* VPSRLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /2 ib */
26127 /* VPSRAW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /4 ib */
26128 /* VPSLLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /6 ib */
26129 if (have66noF2noF3(pfx)
26130 && 1==getVexL(pfx)/*256*/
26131 && epartIsReg(getUChar(delta))) {
26132 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
26133 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26134 "vpsrlw", Iop_ShrN16x16 );
26135 *uses_vvvv = True;
26136 goto decode_success;
26138 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
26139 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26140 "vpsraw", Iop_SarN16x16 );
26141 *uses_vvvv = True;
26142 goto decode_success;
26144 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
26145 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26146 "vpsllw", Iop_ShlN16x16 );
26147 *uses_vvvv = True;
26148 goto decode_success;
26150 /* else fall through */
26152 break;
26154 case 0x72:
26155 /* VPSRLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /2 ib */
26156 /* VPSRAD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /4 ib */
26157 /* VPSLLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /6 ib */
26158 if (have66noF2noF3(pfx)
26159 && 0==getVexL(pfx)/*128*/
26160 && epartIsReg(getUChar(delta))) {
26161 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
26162 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26163 "vpsrld", Iop_ShrN32x4 );
26164 *uses_vvvv = True;
26165 goto decode_success;
26167 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
26168 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26169 "vpsrad", Iop_SarN32x4 );
26170 *uses_vvvv = True;
26171 goto decode_success;
26173 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
26174 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26175 "vpslld", Iop_ShlN32x4 );
26176 *uses_vvvv = True;
26177 goto decode_success;
26179 /* else fall through */
26181 /* VPSRLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /2 ib */
26182 /* VPSRAD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /4 ib */
26183 /* VPSLLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /6 ib */
26184 if (have66noF2noF3(pfx)
26185 && 1==getVexL(pfx)/*256*/
26186 && epartIsReg(getUChar(delta))) {
26187 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
26188 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26189 "vpsrld", Iop_ShrN32x8 );
26190 *uses_vvvv = True;
26191 goto decode_success;
26193 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
26194 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26195 "vpsrad", Iop_SarN32x8 );
26196 *uses_vvvv = True;
26197 goto decode_success;
26199 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
26200 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26201 "vpslld", Iop_ShlN32x8 );
26202 *uses_vvvv = True;
26203 goto decode_success;
26205 /* else fall through */
26207 break;
26209 case 0x73:
26210 /* VPSRLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /3 ib */
26211 /* VPSLLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /7 ib */
26212 /* VPSRLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /2 ib */
26213 /* VPSLLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /6 ib */
26214 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
26215 && epartIsReg(getUChar(delta))) {
26216 Int rS = eregOfRexRM(pfx,getUChar(delta));
26217 Int rD = getVexNvvvv(pfx);
26218 IRTemp vecS = newTemp(Ity_V128);
26219 if (gregLO3ofRM(getUChar(delta)) == 3) {
26220 Int imm = (Int)getUChar(delta+1);
26221 DIP("vpsrldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD));
26222 delta += 2;
26223 assign( vecS, getXMMReg(rS) );
26224 putYMMRegLoAndZU(rD, mkexpr(math_PSRLDQ( vecS, imm )));
26225 *uses_vvvv = True;
26226 goto decode_success;
26228 if (gregLO3ofRM(getUChar(delta)) == 7) {
26229 Int imm = (Int)getUChar(delta+1);
26230 DIP("vpslldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD));
26231 delta += 2;
26232 assign( vecS, getXMMReg(rS) );
26233 putYMMRegLoAndZU(rD, mkexpr(math_PSLLDQ( vecS, imm )));
26234 *uses_vvvv = True;
26235 goto decode_success;
26237 if (gregLO3ofRM(getUChar(delta)) == 2) {
26238 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26239 "vpsrlq", Iop_ShrN64x2 );
26240 *uses_vvvv = True;
26241 goto decode_success;
26243 if (gregLO3ofRM(getUChar(delta)) == 6) {
26244 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26245 "vpsllq", Iop_ShlN64x2 );
26246 *uses_vvvv = True;
26247 goto decode_success;
26249 /* else fall through */
26251 /* VPSRLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /3 ib */
26252 /* VPSLLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /7 ib */
26253 /* VPSRLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /2 ib */
26254 /* VPSLLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /6 ib */
26255 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
26256 && epartIsReg(getUChar(delta))) {
26257 Int rS = eregOfRexRM(pfx,getUChar(delta));
26258 Int rD = getVexNvvvv(pfx);
26259 if (gregLO3ofRM(getUChar(delta)) == 3) {
26260 IRTemp vecS0 = newTemp(Ity_V128);
26261 IRTemp vecS1 = newTemp(Ity_V128);
26262 Int imm = (Int)getUChar(delta+1);
26263 DIP("vpsrldq $%d,%s,%s\n", imm, nameYMMReg(rS), nameYMMReg(rD));
26264 delta += 2;
26265 assign( vecS0, getYMMRegLane128(rS, 0));
26266 assign( vecS1, getYMMRegLane128(rS, 1));
26267 putYMMRegLane128(rD, 0, mkexpr(math_PSRLDQ( vecS0, imm )));
26268 putYMMRegLane128(rD, 1, mkexpr(math_PSRLDQ( vecS1, imm )));
26269 *uses_vvvv = True;
26270 goto decode_success;
26272 if (gregLO3ofRM(getUChar(delta)) == 7) {
26273 IRTemp vecS0 = newTemp(Ity_V128);
26274 IRTemp vecS1 = newTemp(Ity_V128);
26275 Int imm = (Int)getUChar(delta+1);
26276 DIP("vpslldq $%d,%s,%s\n", imm, nameYMMReg(rS), nameYMMReg(rD));
26277 delta += 2;
26278 assign( vecS0, getYMMRegLane128(rS, 0));
26279 assign( vecS1, getYMMRegLane128(rS, 1));
26280 putYMMRegLane128(rD, 0, mkexpr(math_PSLLDQ( vecS0, imm )));
26281 putYMMRegLane128(rD, 1, mkexpr(math_PSLLDQ( vecS1, imm )));
26282 *uses_vvvv = True;
26283 goto decode_success;
26285 if (gregLO3ofRM(getUChar(delta)) == 2) {
26286 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26287 "vpsrlq", Iop_ShrN64x4 );
26288 *uses_vvvv = True;
26289 goto decode_success;
26291 if (gregLO3ofRM(getUChar(delta)) == 6) {
26292 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26293 "vpsllq", Iop_ShlN64x4 );
26294 *uses_vvvv = True;
26295 goto decode_success;
26297 /* else fall through */
26299 break;
26301 case 0x74:
26302 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
26303 /* VPCMPEQB = VEX.NDS.128.66.0F.WIG 74 /r */
26304 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26305 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26306 uses_vvvv, vbi, pfx, delta, "vpcmpeqb", Iop_CmpEQ8x16 );
26307 goto decode_success;
26309 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
26310 /* VPCMPEQB = VEX.NDS.256.66.0F.WIG 74 /r */
26311 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26312 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26313 uses_vvvv, vbi, pfx, delta, "vpcmpeqb", Iop_CmpEQ8x32 );
26314 goto decode_success;
26316 break;
26318 case 0x75:
26319 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
26320 /* VPCMPEQW = VEX.NDS.128.66.0F.WIG 75 /r */
26321 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26322 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26323 uses_vvvv, vbi, pfx, delta, "vpcmpeqw", Iop_CmpEQ16x8 );
26324 goto decode_success;
26326 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
26327 /* VPCMPEQW = VEX.NDS.256.66.0F.WIG 75 /r */
26328 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26329 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26330 uses_vvvv, vbi, pfx, delta, "vpcmpeqw", Iop_CmpEQ16x16 );
26331 goto decode_success;
26333 break;
26335 case 0x76:
26336 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
26337 /* VPCMPEQD = VEX.NDS.128.66.0F.WIG 76 /r */
26338 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26339 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26340 uses_vvvv, vbi, pfx, delta, "vpcmpeqd", Iop_CmpEQ32x4 );
26341 goto decode_success;
26343 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
26344 /* VPCMPEQD = VEX.NDS.256.66.0F.WIG 76 /r */
26345 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26346 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26347 uses_vvvv, vbi, pfx, delta, "vpcmpeqd", Iop_CmpEQ32x8 );
26348 goto decode_success;
26350 break;
26352 case 0x77:
26353 /* VZEROUPPER = VEX.128.0F.WIG 77 */
26354 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26355 Int i;
26356 IRTemp zero128 = newTemp(Ity_V128);
26357 assign(zero128, mkV128(0));
26358 for (i = 0; i < 16; i++) {
26359 putYMMRegLane128(i, 1, mkexpr(zero128));
26361 DIP("vzeroupper\n");
26362 goto decode_success;
26364 /* VZEROALL = VEX.256.0F.WIG 77 */
26365 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26366 Int i;
26367 IRTemp zero128 = newTemp(Ity_V128);
26368 assign(zero128, mkV128(0));
26369 for (i = 0; i < 16; i++) {
26370 putYMMRegLoAndZU(i, mkexpr(zero128));
26372 DIP("vzeroall\n");
26373 goto decode_success;
26375 break;
26377 case 0x7C:
26378 case 0x7D:
26379 /* VHADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7C /r */
26380 /* VHSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7D /r */
26381 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26382 IRTemp sV = newTemp(Ity_V128);
26383 IRTemp dV = newTemp(Ity_V128);
26384 Bool isAdd = opc == 0x7C;
26385 const HChar* str = isAdd ? "add" : "sub";
26386 UChar modrm = getUChar(delta);
26387 UInt rG = gregOfRexRM(pfx,modrm);
26388 UInt rV = getVexNvvvv(pfx);
26389 if (epartIsReg(modrm)) {
26390 UInt rE = eregOfRexRM(pfx,modrm);
26391 assign( sV, getXMMReg(rE) );
26392 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE),
26393 nameXMMReg(rV), nameXMMReg(rG));
26394 delta += 1;
26395 } else {
26396 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26397 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
26398 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
26399 nameXMMReg(rV), nameXMMReg(rG));
26400 delta += alen;
26402 assign( dV, getXMMReg(rV) );
26403 putYMMRegLoAndZU( rG, mkexpr( math_HADDPS_128 ( dV, sV, isAdd ) ) );
26404 *uses_vvvv = True;
26405 goto decode_success;
26407 /* VHADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7C /r */
26408 /* VHSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7D /r */
26409 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26410 IRTemp sV = newTemp(Ity_V256);
26411 IRTemp dV = newTemp(Ity_V256);
26412 IRTemp s1, s0, d1, d0;
26413 Bool isAdd = opc == 0x7C;
26414 const HChar* str = isAdd ? "add" : "sub";
26415 UChar modrm = getUChar(delta);
26416 UInt rG = gregOfRexRM(pfx,modrm);
26417 UInt rV = getVexNvvvv(pfx);
26418 s1 = s0 = d1 = d0 = IRTemp_INVALID;
26419 if (epartIsReg(modrm)) {
26420 UInt rE = eregOfRexRM(pfx,modrm);
26421 assign( sV, getYMMReg(rE) );
26422 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE),
26423 nameYMMReg(rV), nameYMMReg(rG));
26424 delta += 1;
26425 } else {
26426 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26427 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
26428 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
26429 nameYMMReg(rV), nameYMMReg(rG));
26430 delta += alen;
26432 assign( dV, getYMMReg(rV) );
26433 breakupV256toV128s( dV, &d1, &d0 );
26434 breakupV256toV128s( sV, &s1, &s0 );
26435 putYMMReg( rG, binop(Iop_V128HLtoV256,
26436 mkexpr( math_HADDPS_128 ( d1, s1, isAdd ) ),
26437 mkexpr( math_HADDPS_128 ( d0, s0, isAdd ) ) ) );
26438 *uses_vvvv = True;
26439 goto decode_success;
26441 /* VHADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7C /r */
26442 /* VHSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7D /r */
26443 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26444 IRTemp sV = newTemp(Ity_V128);
26445 IRTemp dV = newTemp(Ity_V128);
26446 Bool isAdd = opc == 0x7C;
26447 const HChar* str = isAdd ? "add" : "sub";
26448 UChar modrm = getUChar(delta);
26449 UInt rG = gregOfRexRM(pfx,modrm);
26450 UInt rV = getVexNvvvv(pfx);
26451 if (epartIsReg(modrm)) {
26452 UInt rE = eregOfRexRM(pfx,modrm);
26453 assign( sV, getXMMReg(rE) );
26454 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE),
26455 nameXMMReg(rV), nameXMMReg(rG));
26456 delta += 1;
26457 } else {
26458 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26459 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
26460 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
26461 nameXMMReg(rV), nameXMMReg(rG));
26462 delta += alen;
26464 assign( dV, getXMMReg(rV) );
26465 putYMMRegLoAndZU( rG, mkexpr( math_HADDPD_128 ( dV, sV, isAdd ) ) );
26466 *uses_vvvv = True;
26467 goto decode_success;
26469 /* VHADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7C /r */
26470 /* VHSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7D /r */
26471 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26472 IRTemp sV = newTemp(Ity_V256);
26473 IRTemp dV = newTemp(Ity_V256);
26474 IRTemp s1, s0, d1, d0;
26475 Bool isAdd = opc == 0x7C;
26476 const HChar* str = isAdd ? "add" : "sub";
26477 UChar modrm = getUChar(delta);
26478 UInt rG = gregOfRexRM(pfx,modrm);
26479 UInt rV = getVexNvvvv(pfx);
26480 s1 = s0 = d1 = d0 = IRTemp_INVALID;
26481 if (epartIsReg(modrm)) {
26482 UInt rE = eregOfRexRM(pfx,modrm);
26483 assign( sV, getYMMReg(rE) );
26484 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE),
26485 nameYMMReg(rV), nameYMMReg(rG));
26486 delta += 1;
26487 } else {
26488 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26489 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
26490 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
26491 nameYMMReg(rV), nameYMMReg(rG));
26492 delta += alen;
26494 assign( dV, getYMMReg(rV) );
26495 breakupV256toV128s( dV, &d1, &d0 );
26496 breakupV256toV128s( sV, &s1, &s0 );
26497 putYMMReg( rG, binop(Iop_V128HLtoV256,
26498 mkexpr( math_HADDPD_128 ( d1, s1, isAdd ) ),
26499 mkexpr( math_HADDPD_128 ( d0, s0, isAdd ) ) ) );
26500 *uses_vvvv = True;
26501 goto decode_success;
26503 break;
26505 case 0x7E:
26506 /* Note the Intel docs don't make sense for this. I think they
26507 are wrong. They seem to imply it is a store when in fact I
26508 think it is a load. Also it's unclear whether this is W0, W1
26509 or WIG. */
26510 /* VMOVQ xmm2/m64, xmm1 = VEX.128.F3.0F.W0 7E /r */
26511 if (haveF3no66noF2(pfx)
26512 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
26513 vassert(sz == 4); /* even tho we are transferring 8, not 4. */
26514 UChar modrm = getUChar(delta);
26515 UInt rG = gregOfRexRM(pfx,modrm);
26516 if (epartIsReg(modrm)) {
26517 UInt rE = eregOfRexRM(pfx,modrm);
26518 putXMMRegLane64( rG, 0, getXMMRegLane64( rE, 0 ));
26519 DIP("vmovq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
26520 delta += 1;
26521 } else {
26522 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26523 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) );
26524 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG));
26525 delta += alen;
26527 /* zero bits 255:64 */
26528 putXMMRegLane64( rG, 1, mkU64(0) );
26529 putYMMRegLane128( rG, 1, mkV128(0) );
26530 goto decode_success;
26532 /* VMOVQ xmm1, r64 = VEX.128.66.0F.W1 7E /r (reg case only) */
26533 /* Moves from G to E, so is a store-form insn */
26534 /* Intel docs list this in the VMOVD entry for some reason. */
26535 if (have66noF2noF3(pfx)
26536 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
26537 UChar modrm = getUChar(delta);
26538 UInt rG = gregOfRexRM(pfx,modrm);
26539 if (epartIsReg(modrm)) {
26540 UInt rE = eregOfRexRM(pfx,modrm);
26541 DIP("vmovq %s,%s\n", nameXMMReg(rG), nameIReg64(rE));
26542 putIReg64(rE, getXMMRegLane64(rG, 0));
26543 delta += 1;
26544 } else {
26545 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26546 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0) );
26547 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG));
26548 delta += alen;
26550 goto decode_success;
26552 /* VMOVD xmm1, m32/r32 = VEX.128.66.0F.W0 7E /r (reg case only) */
26553 /* Moves from G to E, so is a store-form insn */
26554 if (have66noF2noF3(pfx)
26555 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
26556 UChar modrm = getUChar(delta);
26557 UInt rG = gregOfRexRM(pfx,modrm);
26558 if (epartIsReg(modrm)) {
26559 UInt rE = eregOfRexRM(pfx,modrm);
26560 DIP("vmovd %s,%s\n", nameXMMReg(rG), nameIReg32(rE));
26561 putIReg32(rE, getXMMRegLane32(rG, 0));
26562 delta += 1;
26563 } else {
26564 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26565 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0) );
26566 DIP("vmovd %s,%s\n", dis_buf, nameXMMReg(rG));
26567 delta += alen;
26569 goto decode_success;
26571 break;
26573 case 0x7F:
26574 /* VMOVDQA ymm1, ymm2/m256 = VEX.256.66.0F.WIG 7F */
26575 /* VMOVDQU ymm1, ymm2/m256 = VEX.256.F3.0F.WIG 7F */
26576 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
26577 && 1==getVexL(pfx)/*256*/) {
26578 UChar modrm = getUChar(delta);
26579 UInt rS = gregOfRexRM(pfx, modrm);
26580 IRTemp tS = newTemp(Ity_V256);
26581 Bool isA = have66noF2noF3(pfx);
26582 HChar ch = isA ? 'a' : 'u';
26583 assign(tS, getYMMReg(rS));
26584 if (epartIsReg(modrm)) {
26585 UInt rD = eregOfRexRM(pfx, modrm);
26586 delta += 1;
26587 putYMMReg(rD, mkexpr(tS));
26588 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD));
26589 } else {
26590 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
26591 delta += alen;
26592 if (isA)
26593 gen_SEGV_if_not_32_aligned(addr);
26594 storeLE(mkexpr(addr), mkexpr(tS));
26595 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), dis_buf);
26597 goto decode_success;
26599 /* VMOVDQA xmm1, xmm2/m128 = VEX.128.66.0F.WIG 7F */
26600 /* VMOVDQU xmm1, xmm2/m128 = VEX.128.F3.0F.WIG 7F */
26601 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
26602 && 0==getVexL(pfx)/*128*/) {
26603 UChar modrm = getUChar(delta);
26604 UInt rS = gregOfRexRM(pfx, modrm);
26605 IRTemp tS = newTemp(Ity_V128);
26606 Bool isA = have66noF2noF3(pfx);
26607 HChar ch = isA ? 'a' : 'u';
26608 assign(tS, getXMMReg(rS));
26609 if (epartIsReg(modrm)) {
26610 UInt rD = eregOfRexRM(pfx, modrm);
26611 delta += 1;
26612 putYMMRegLoAndZU(rD, mkexpr(tS));
26613 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD));
26614 } else {
26615 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
26616 delta += alen;
26617 if (isA)
26618 gen_SEGV_if_not_16_aligned(addr);
26619 storeLE(mkexpr(addr), mkexpr(tS));
26620 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), dis_buf);
26622 goto decode_success;
26624 break;
26626 case 0xAE:
26627 /* VSTMXCSR m32 = VEX.LZ.0F.WIG AE /3 */
26628 if (haveNo66noF2noF3(pfx)
26629 && 0==getVexL(pfx)/*LZ*/
26630 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */
26631 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3
26632 && sz == 4) {
26633 delta = dis_STMXCSR(vbi, pfx, delta, True/*isAvx*/);
26634 goto decode_success;
26636 /* VLDMXCSR m32 = VEX.LZ.0F.WIG AE /2 */
26637 if (haveNo66noF2noF3(pfx)
26638 && 0==getVexL(pfx)/*LZ*/
26639 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */
26640 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2
26641 && sz == 4) {
26642 delta = dis_LDMXCSR(vbi, pfx, delta, True/*isAvx*/);
26643 goto decode_success;
26645 break;
26647 case 0xC2:
26648 /* VCMPSD xmm3/m64(E=argL), xmm2(V=argR), xmm1(G) */
26649 /* = VEX.NDS.LIG.F2.0F.WIG C2 /r ib */
26650 if (haveF2no66noF3(pfx)) {
26651 Long delta0 = delta;
26652 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26653 "vcmpsd", False/*!all_lanes*/,
26654 8/*sz*/);
26655 if (delta > delta0) goto decode_success;
26656 /* else fall through -- decoding has failed */
26658 /* VCMPSS xmm3/m32(E=argL), xmm2(V=argR), xmm1(G) */
26659 /* = VEX.NDS.LIG.F3.0F.WIG C2 /r ib */
26660 if (haveF3no66noF2(pfx)) {
26661 Long delta0 = delta;
26662 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26663 "vcmpss", False/*!all_lanes*/,
26664 4/*sz*/);
26665 if (delta > delta0) goto decode_success;
26666 /* else fall through -- decoding has failed */
26668 /* VCMPPD xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
26669 /* = VEX.NDS.128.66.0F.WIG C2 /r ib */
26670 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26671 Long delta0 = delta;
26672 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26673 "vcmppd", True/*all_lanes*/,
26674 8/*sz*/);
26675 if (delta > delta0) goto decode_success;
26676 /* else fall through -- decoding has failed */
26678 /* VCMPPD ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
26679 /* = VEX.NDS.256.66.0F.WIG C2 /r ib */
26680 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26681 Long delta0 = delta;
26682 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26683 "vcmppd", 8/*sz*/);
26684 if (delta > delta0) goto decode_success;
26685 /* else fall through -- decoding has failed */
26687 /* VCMPPS xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
26688 /* = VEX.NDS.128.0F.WIG C2 /r ib */
26689 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26690 Long delta0 = delta;
26691 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26692 "vcmpps", True/*all_lanes*/,
26693 4/*sz*/);
26694 if (delta > delta0) goto decode_success;
26695 /* else fall through -- decoding has failed */
26697 /* VCMPPS ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
26698 /* = VEX.NDS.256.0F.WIG C2 /r ib */
26699 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26700 Long delta0 = delta;
26701 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26702 "vcmpps", 4/*sz*/);
26703 if (delta > delta0) goto decode_success;
26704 /* else fall through -- decoding has failed */
26706 break;
26708 case 0xC4:
26709 /* VPINSRW r32/m16, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG C4 /r ib */
26710 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26711 UChar modrm = getUChar(delta);
26712 UInt rG = gregOfRexRM(pfx, modrm);
26713 UInt rV = getVexNvvvv(pfx);
26714 Int imm8;
26715 IRTemp new16 = newTemp(Ity_I16);
26717 if ( epartIsReg( modrm ) ) {
26718 imm8 = (Int)(getUChar(delta+1) & 7);
26719 assign( new16, unop(Iop_32to16,
26720 getIReg32(eregOfRexRM(pfx,modrm))) );
26721 delta += 1+1;
26722 DIP( "vpinsrw $%d,%s,%s\n", imm8,
26723 nameIReg32( eregOfRexRM(pfx, modrm) ), nameXMMReg(rG) );
26724 } else {
26725 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
26726 imm8 = (Int)(getUChar(delta+alen) & 7);
26727 assign( new16, loadLE( Ity_I16, mkexpr(addr) ));
26728 delta += alen+1;
26729 DIP( "vpinsrw $%d,%s,%s\n",
26730 imm8, dis_buf, nameXMMReg(rG) );
26733 IRTemp src_vec = newTemp(Ity_V128);
26734 assign(src_vec, getXMMReg( rV ));
26735 IRTemp res_vec = math_PINSRW_128( src_vec, new16, imm8 );
26736 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
26737 *uses_vvvv = True;
26738 goto decode_success;
26740 break;
26742 case 0xC5:
26743 /* VPEXTRW imm8, xmm1, reg32 = VEX.128.66.0F.W0 C5 /r ib */
26744 if (have66noF2noF3(pfx)
26745 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
26746 Long delta0 = delta;
26747 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta,
26748 True/*isAvx*/ );
26749 if (delta > delta0) goto decode_success;
26750 /* else fall through -- decoding has failed */
26752 break;
26754 case 0xC6:
26755 /* VSHUFPS imm8, xmm3/m128, xmm2, xmm1, xmm2 */
26756 /* = VEX.NDS.128.0F.WIG C6 /r ib */
26757 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26758 Int imm8 = 0;
26759 IRTemp eV = newTemp(Ity_V128);
26760 IRTemp vV = newTemp(Ity_V128);
26761 UInt modrm = getUChar(delta);
26762 UInt rG = gregOfRexRM(pfx,modrm);
26763 UInt rV = getVexNvvvv(pfx);
26764 assign( vV, getXMMReg(rV) );
26765 if (epartIsReg(modrm)) {
26766 UInt rE = eregOfRexRM(pfx,modrm);
26767 assign( eV, getXMMReg(rE) );
26768 imm8 = (Int)getUChar(delta+1);
26769 delta += 1+1;
26770 DIP("vshufps $%d,%s,%s,%s\n",
26771 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
26772 } else {
26773 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26774 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
26775 imm8 = (Int)getUChar(delta+alen);
26776 delta += 1+alen;
26777 DIP("vshufps $%d,%s,%s,%s\n",
26778 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
26780 IRTemp res = math_SHUFPS_128( eV, vV, imm8 );
26781 putYMMRegLoAndZU( rG, mkexpr(res) );
26782 *uses_vvvv = True;
26783 goto decode_success;
26785 /* VSHUFPS imm8, ymm3/m256, ymm2, ymm1, ymm2 */
26786 /* = VEX.NDS.256.0F.WIG C6 /r ib */
26787 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26788 Int imm8 = 0;
26789 IRTemp eV = newTemp(Ity_V256);
26790 IRTemp vV = newTemp(Ity_V256);
26791 UInt modrm = getUChar(delta);
26792 UInt rG = gregOfRexRM(pfx,modrm);
26793 UInt rV = getVexNvvvv(pfx);
26794 assign( vV, getYMMReg(rV) );
26795 if (epartIsReg(modrm)) {
26796 UInt rE = eregOfRexRM(pfx,modrm);
26797 assign( eV, getYMMReg(rE) );
26798 imm8 = (Int)getUChar(delta+1);
26799 delta += 1+1;
26800 DIP("vshufps $%d,%s,%s,%s\n",
26801 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
26802 } else {
26803 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26804 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
26805 imm8 = (Int)getUChar(delta+alen);
26806 delta += 1+alen;
26807 DIP("vshufps $%d,%s,%s,%s\n",
26808 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
26810 IRTemp res = math_SHUFPS_256( eV, vV, imm8 );
26811 putYMMReg( rG, mkexpr(res) );
26812 *uses_vvvv = True;
26813 goto decode_success;
26815 /* VSHUFPD imm8, xmm3/m128, xmm2, xmm1, xmm2 */
26816 /* = VEX.NDS.128.66.0F.WIG C6 /r ib */
26817 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26818 Int imm8 = 0;
26819 IRTemp eV = newTemp(Ity_V128);
26820 IRTemp vV = newTemp(Ity_V128);
26821 UInt modrm = getUChar(delta);
26822 UInt rG = gregOfRexRM(pfx,modrm);
26823 UInt rV = getVexNvvvv(pfx);
26824 assign( vV, getXMMReg(rV) );
26825 if (epartIsReg(modrm)) {
26826 UInt rE = eregOfRexRM(pfx,modrm);
26827 assign( eV, getXMMReg(rE) );
26828 imm8 = (Int)getUChar(delta+1);
26829 delta += 1+1;
26830 DIP("vshufpd $%d,%s,%s,%s\n",
26831 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
26832 } else {
26833 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26834 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
26835 imm8 = (Int)getUChar(delta+alen);
26836 delta += 1+alen;
26837 DIP("vshufpd $%d,%s,%s,%s\n",
26838 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
26840 IRTemp res = math_SHUFPD_128( eV, vV, imm8 );
26841 putYMMRegLoAndZU( rG, mkexpr(res) );
26842 *uses_vvvv = True;
26843 goto decode_success;
26845 /* VSHUFPD imm8, ymm3/m256, ymm2, ymm1, ymm2 */
26846 /* = VEX.NDS.256.66.0F.WIG C6 /r ib */
26847 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26848 Int imm8 = 0;
26849 IRTemp eV = newTemp(Ity_V256);
26850 IRTemp vV = newTemp(Ity_V256);
26851 UInt modrm = getUChar(delta);
26852 UInt rG = gregOfRexRM(pfx,modrm);
26853 UInt rV = getVexNvvvv(pfx);
26854 assign( vV, getYMMReg(rV) );
26855 if (epartIsReg(modrm)) {
26856 UInt rE = eregOfRexRM(pfx,modrm);
26857 assign( eV, getYMMReg(rE) );
26858 imm8 = (Int)getUChar(delta+1);
26859 delta += 1+1;
26860 DIP("vshufpd $%d,%s,%s,%s\n",
26861 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
26862 } else {
26863 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26864 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
26865 imm8 = (Int)getUChar(delta+alen);
26866 delta += 1+alen;
26867 DIP("vshufpd $%d,%s,%s,%s\n",
26868 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
26870 IRTemp res = math_SHUFPD_256( eV, vV, imm8 );
26871 putYMMReg( rG, mkexpr(res) );
26872 *uses_vvvv = True;
26873 goto decode_success;
26875 break;
26877 case 0xD0:
26878 /* VADDSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D0 /r */
26879 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26880 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26881 uses_vvvv, vbi, pfx, delta,
26882 "vaddsubpd", math_ADDSUBPD_128 );
26883 goto decode_success;
26885 /* VADDSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D0 /r */
26886 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26887 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26888 uses_vvvv, vbi, pfx, delta,
26889 "vaddsubpd", math_ADDSUBPD_256 );
26890 goto decode_success;
26892 /* VADDSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG D0 /r */
26893 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26894 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26895 uses_vvvv, vbi, pfx, delta,
26896 "vaddsubps", math_ADDSUBPS_128 );
26897 goto decode_success;
26899 /* VADDSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG D0 /r */
26900 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26901 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26902 uses_vvvv, vbi, pfx, delta,
26903 "vaddsubps", math_ADDSUBPS_256 );
26904 goto decode_success;
26906 break;
26908 case 0xD1:
26909 /* VPSRLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D1 /r */
26910 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26911 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26912 "vpsrlw", Iop_ShrN16x8 );
26913 *uses_vvvv = True;
26914 goto decode_success;
26917 /* VPSRLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D1 /r */
26918 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26919 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26920 "vpsrlw", Iop_ShrN16x16 );
26921 *uses_vvvv = True;
26922 goto decode_success;
26925 break;
26927 case 0xD2:
26928 /* VPSRLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D2 /r */
26929 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26930 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26931 "vpsrld", Iop_ShrN32x4 );
26932 *uses_vvvv = True;
26933 goto decode_success;
26935 /* VPSRLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D2 /r */
26936 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26937 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26938 "vpsrld", Iop_ShrN32x8 );
26939 *uses_vvvv = True;
26940 goto decode_success;
26942 break;
26944 case 0xD3:
26945 /* VPSRLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D3 /r */
26946 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26947 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26948 "vpsrlq", Iop_ShrN64x2 );
26949 *uses_vvvv = True;
26950 goto decode_success;
26952 /* VPSRLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D3 /r */
26953 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26954 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26955 "vpsrlq", Iop_ShrN64x4 );
26956 *uses_vvvv = True;
26957 goto decode_success;
26959 break;
26961 case 0xD4:
26962 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
26963 /* VPADDQ = VEX.NDS.128.66.0F.WIG D4 /r */
26964 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26965 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26966 uses_vvvv, vbi, pfx, delta, "vpaddq", Iop_Add64x2 );
26967 goto decode_success;
26969 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
26970 /* VPADDQ = VEX.NDS.256.66.0F.WIG D4 /r */
26971 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26972 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26973 uses_vvvv, vbi, pfx, delta, "vpaddq", Iop_Add64x4 );
26974 goto decode_success;
26976 break;
26978 case 0xD5:
26979 /* VPMULLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D5 /r */
26980 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26981 delta = dis_AVX128_E_V_to_G(
26982 uses_vvvv, vbi, pfx, delta, "vpmullw", Iop_Mul16x8 );
26983 goto decode_success;
26985 /* VPMULLW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D5 /r */
26986 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26987 delta = dis_AVX256_E_V_to_G(
26988 uses_vvvv, vbi, pfx, delta, "vpmullw", Iop_Mul16x16 );
26989 goto decode_success;
26991 break;
26993 case 0xD6:
26994 /* I can't even find any Intel docs for this one. */
26995 /* Basically: 66 0F D6 = MOVQ -- move 64 bits from G (lo half
26996 xmm) to E (mem or lo half xmm). Looks like L==0(128), W==0
26997 (WIG, maybe?) */
26998 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
26999 && 0==getRexW(pfx)/*this might be redundant, dunno*/) {
27000 UChar modrm = getUChar(delta);
27001 UInt rG = gregOfRexRM(pfx,modrm);
27002 if (epartIsReg(modrm)) {
27003 /* fall through, awaiting test case */
27004 /* dst: lo half copied, hi half zeroed */
27005 } else {
27006 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27007 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0 ));
27008 DIP("vmovq %s,%s\n", nameXMMReg(rG), dis_buf );
27009 delta += alen;
27010 goto decode_success;
27013 break;
27015 case 0xD7:
27016 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB xmm1, r32 */
27017 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27018 delta = dis_PMOVMSKB_128( vbi, pfx, delta, True/*isAvx*/ );
27019 goto decode_success;
27021 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB ymm1, r32 */
27022 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27023 delta = dis_PMOVMSKB_256( vbi, pfx, delta );
27024 goto decode_success;
27026 break;
27028 case 0xD8:
27029 /* VPSUBUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D8 /r */
27030 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27031 delta = dis_AVX128_E_V_to_G(
27032 uses_vvvv, vbi, pfx, delta, "vpsubusb", Iop_QSub8Ux16 );
27033 goto decode_success;
27035 /* VPSUBUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D8 /r */
27036 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27037 delta = dis_AVX256_E_V_to_G(
27038 uses_vvvv, vbi, pfx, delta, "vpsubusb", Iop_QSub8Ux32 );
27039 goto decode_success;
27041 break;
27043 case 0xD9:
27044 /* VPSUBUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D9 /r */
27045 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27046 delta = dis_AVX128_E_V_to_G(
27047 uses_vvvv, vbi, pfx, delta, "vpsubusw", Iop_QSub16Ux8 );
27048 goto decode_success;
27050 /* VPSUBUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D9 /r */
27051 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27052 delta = dis_AVX256_E_V_to_G(
27053 uses_vvvv, vbi, pfx, delta, "vpsubusw", Iop_QSub16Ux16 );
27054 goto decode_success;
27056 break;
27058 case 0xDA:
27059 /* VPMINUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DA /r */
27060 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27061 delta = dis_AVX128_E_V_to_G(
27062 uses_vvvv, vbi, pfx, delta, "vpminub", Iop_Min8Ux16 );
27063 goto decode_success;
27065 /* VPMINUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DA /r */
27066 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27067 delta = dis_AVX256_E_V_to_G(
27068 uses_vvvv, vbi, pfx, delta, "vpminub", Iop_Min8Ux32 );
27069 goto decode_success;
27071 break;
27073 case 0xDB:
27074 /* VPAND r/m, rV, r ::: r = rV & r/m */
27075 /* VEX.NDS.128.66.0F.WIG DB /r = VPAND xmm3/m128, xmm2, xmm1 */
27076 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27077 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27078 uses_vvvv, vbi, pfx, delta, "vpand", Iop_AndV128 );
27079 goto decode_success;
27081 /* VPAND r/m, rV, r ::: r = rV & r/m */
27082 /* VEX.NDS.256.66.0F.WIG DB /r = VPAND ymm3/m256, ymm2, ymm1 */
27083 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27084 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27085 uses_vvvv, vbi, pfx, delta, "vpand", Iop_AndV256 );
27086 goto decode_success;
27088 break;
27090 case 0xDC:
27091 /* VPADDUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DC /r */
27092 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27093 delta = dis_AVX128_E_V_to_G(
27094 uses_vvvv, vbi, pfx, delta, "vpaddusb", Iop_QAdd8Ux16 );
27095 goto decode_success;
27097 /* VPADDUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DC /r */
27098 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27099 delta = dis_AVX256_E_V_to_G(
27100 uses_vvvv, vbi, pfx, delta, "vpaddusb", Iop_QAdd8Ux32 );
27101 goto decode_success;
27103 break;
27105 case 0xDD:
27106 /* VPADDUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DD /r */
27107 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27108 delta = dis_AVX128_E_V_to_G(
27109 uses_vvvv, vbi, pfx, delta, "vpaddusw", Iop_QAdd16Ux8 );
27110 goto decode_success;
27112 /* VPADDUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DD /r */
27113 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27114 delta = dis_AVX256_E_V_to_G(
27115 uses_vvvv, vbi, pfx, delta, "vpaddusw", Iop_QAdd16Ux16 );
27116 goto decode_success;
27118 break;
27120 case 0xDE:
27121 /* VPMAXUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DE /r */
27122 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27123 delta = dis_AVX128_E_V_to_G(
27124 uses_vvvv, vbi, pfx, delta, "vpmaxub", Iop_Max8Ux16 );
27125 goto decode_success;
27127 /* VPMAXUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DE /r */
27128 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27129 delta = dis_AVX256_E_V_to_G(
27130 uses_vvvv, vbi, pfx, delta, "vpmaxub", Iop_Max8Ux32 );
27131 goto decode_success;
27133 break;
27135 case 0xDF:
27136 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
27137 /* VEX.NDS.128.66.0F.WIG DF /r = VPANDN xmm3/m128, xmm2, xmm1 */
27138 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27139 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
27140 uses_vvvv, vbi, pfx, delta, "vpandn", Iop_AndV128,
27141 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
27142 goto decode_success;
27144 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
27145 /* VEX.NDS.256.66.0F.WIG DF /r = VPANDN ymm3/m256, ymm2, ymm1 */
27146 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27147 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
27148 uses_vvvv, vbi, pfx, delta, "vpandn", Iop_AndV256,
27149 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
27150 goto decode_success;
27152 break;
27154 case 0xE0:
27155 /* VPAVGB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E0 /r */
27156 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27157 delta = dis_AVX128_E_V_to_G(
27158 uses_vvvv, vbi, pfx, delta, "vpavgb", Iop_Avg8Ux16 );
27159 goto decode_success;
27161 /* VPAVGB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E0 /r */
27162 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27163 delta = dis_AVX256_E_V_to_G(
27164 uses_vvvv, vbi, pfx, delta, "vpavgb", Iop_Avg8Ux32 );
27165 goto decode_success;
27167 break;
27169 case 0xE1:
27170 /* VPSRAW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E1 /r */
27171 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27172 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27173 "vpsraw", Iop_SarN16x8 );
27174 *uses_vvvv = True;
27175 goto decode_success;
27177 /* VPSRAW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E1 /r */
27178 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27179 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27180 "vpsraw", Iop_SarN16x16 );
27181 *uses_vvvv = True;
27182 goto decode_success;
27184 break;
27186 case 0xE2:
27187 /* VPSRAD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E2 /r */
27188 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27189 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27190 "vpsrad", Iop_SarN32x4 );
27191 *uses_vvvv = True;
27192 goto decode_success;
27194 /* VPSRAD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E2 /r */
27195 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27196 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27197 "vpsrad", Iop_SarN32x8 );
27198 *uses_vvvv = True;
27199 goto decode_success;
27201 break;
27203 case 0xE3:
27204 /* VPAVGW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E3 /r */
27205 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27206 delta = dis_AVX128_E_V_to_G(
27207 uses_vvvv, vbi, pfx, delta, "vpavgw", Iop_Avg16Ux8 );
27208 goto decode_success;
27210 /* VPAVGW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E3 /r */
27211 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27212 delta = dis_AVX256_E_V_to_G(
27213 uses_vvvv, vbi, pfx, delta, "vpavgw", Iop_Avg16Ux16 );
27214 goto decode_success;
27216 break;
27218 case 0xE4:
27219 /* VPMULHUW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E4 /r */
27220 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27221 delta = dis_AVX128_E_V_to_G(
27222 uses_vvvv, vbi, pfx, delta, "vpmulhuw", Iop_MulHi16Ux8 );
27223 goto decode_success;
27225 /* VPMULHUW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E4 /r */
27226 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27227 delta = dis_AVX256_E_V_to_G(
27228 uses_vvvv, vbi, pfx, delta, "vpmulhuw", Iop_MulHi16Ux16 );
27229 goto decode_success;
27231 break;
27233 case 0xE5:
27234 /* VPMULHW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E5 /r */
27235 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27236 delta = dis_AVX128_E_V_to_G(
27237 uses_vvvv, vbi, pfx, delta, "vpmulhw", Iop_MulHi16Sx8 );
27238 goto decode_success;
27240 /* VPMULHW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E5 /r */
27241 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27242 delta = dis_AVX256_E_V_to_G(
27243 uses_vvvv, vbi, pfx, delta, "vpmulhw", Iop_MulHi16Sx16 );
27244 goto decode_success;
27246 break;
27248 case 0xE6:
27249 /* VCVTDQ2PD xmm2/m64, xmm1 = VEX.128.F3.0F.WIG E6 /r */
27250 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
27251 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, True/*isAvx*/);
27252 goto decode_success;
27254 /* VCVTDQ2PD xmm2/m128, ymm1 = VEX.256.F3.0F.WIG E6 /r */
27255 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
27256 delta = dis_CVTDQ2PD_256(vbi, pfx, delta);
27257 goto decode_success;
27259 /* VCVTTPD2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG E6 /r */
27260 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27261 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/,
27262 True/*r2zero*/);
27263 goto decode_success;
27265 /* VCVTTPD2DQ ymm2/m256, xmm1 = VEX.256.66.0F.WIG E6 /r */
27266 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27267 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, True/*r2zero*/);
27268 goto decode_success;
27270 /* VCVTPD2DQ xmm2/m128, xmm1 = VEX.128.F2.0F.WIG E6 /r */
27271 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27272 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/,
27273 False/*!r2zero*/);
27274 goto decode_success;
27276 /* VCVTPD2DQ ymm2/m256, xmm1 = VEX.256.F2.0F.WIG E6 /r */
27277 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27278 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, False/*!r2zero*/);
27279 goto decode_success;
27281 break;
27283 case 0xE7:
27284 /* VMOVNTDQ xmm1, m128 = VEX.128.66.0F.WIG E7 /r */
27285 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27286 UChar modrm = getUChar(delta);
27287 UInt rG = gregOfRexRM(pfx,modrm);
27288 if (!epartIsReg(modrm)) {
27289 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27290 gen_SEGV_if_not_16_aligned( addr );
27291 storeLE( mkexpr(addr), getXMMReg(rG) );
27292 DIP("vmovntdq %s,%s\n", dis_buf, nameXMMReg(rG));
27293 delta += alen;
27294 goto decode_success;
27296 /* else fall through */
27298 /* VMOVNTDQ ymm1, m256 = VEX.256.66.0F.WIG E7 /r */
27299 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27300 UChar modrm = getUChar(delta);
27301 UInt rG = gregOfRexRM(pfx,modrm);
27302 if (!epartIsReg(modrm)) {
27303 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27304 gen_SEGV_if_not_32_aligned( addr );
27305 storeLE( mkexpr(addr), getYMMReg(rG) );
27306 DIP("vmovntdq %s,%s\n", dis_buf, nameYMMReg(rG));
27307 delta += alen;
27308 goto decode_success;
27310 /* else fall through */
27312 break;
27314 case 0xE8:
27315 /* VPSUBSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E8 /r */
27316 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27317 delta = dis_AVX128_E_V_to_G(
27318 uses_vvvv, vbi, pfx, delta, "vpsubsb", Iop_QSub8Sx16 );
27319 goto decode_success;
27321 /* VPSUBSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E8 /r */
27322 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27323 delta = dis_AVX256_E_V_to_G(
27324 uses_vvvv, vbi, pfx, delta, "vpsubsb", Iop_QSub8Sx32 );
27325 goto decode_success;
27327 break;
27329 case 0xE9:
27330 /* VPSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E9 /r */
27331 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27332 delta = dis_AVX128_E_V_to_G(
27333 uses_vvvv, vbi, pfx, delta, "vpsubsw", Iop_QSub16Sx8 );
27334 goto decode_success;
27336 /* VPSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E9 /r */
27337 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27338 delta = dis_AVX256_E_V_to_G(
27339 uses_vvvv, vbi, pfx, delta, "vpsubsw", Iop_QSub16Sx16 );
27340 goto decode_success;
27342 break;
27344 case 0xEA:
27345 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
27346 /* VPMINSW = VEX.NDS.128.66.0F.WIG EA /r */
27347 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27348 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27349 uses_vvvv, vbi, pfx, delta, "vpminsw", Iop_Min16Sx8 );
27350 goto decode_success;
27352 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
27353 /* VPMINSW = VEX.NDS.256.66.0F.WIG EA /r */
27354 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27355 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27356 uses_vvvv, vbi, pfx, delta, "vpminsw", Iop_Min16Sx16 );
27357 goto decode_success;
27359 break;
27361 case 0xEB:
27362 /* VPOR r/m, rV, r ::: r = rV | r/m */
27363 /* VPOR = VEX.NDS.128.66.0F.WIG EB /r */
27364 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27365 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27366 uses_vvvv, vbi, pfx, delta, "vpor", Iop_OrV128 );
27367 goto decode_success;
27369 /* VPOR r/m, rV, r ::: r = rV | r/m */
27370 /* VPOR = VEX.NDS.256.66.0F.WIG EB /r */
27371 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27372 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27373 uses_vvvv, vbi, pfx, delta, "vpor", Iop_OrV256 );
27374 goto decode_success;
27376 break;
27378 case 0xEC:
27379 /* VPADDSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG EC /r */
27380 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27381 delta = dis_AVX128_E_V_to_G(
27382 uses_vvvv, vbi, pfx, delta, "vpaddsb", Iop_QAdd8Sx16 );
27383 goto decode_success;
27385 /* VPADDSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG EC /r */
27386 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27387 delta = dis_AVX256_E_V_to_G(
27388 uses_vvvv, vbi, pfx, delta, "vpaddsb", Iop_QAdd8Sx32 );
27389 goto decode_success;
27391 break;
27393 case 0xED:
27394 /* VPADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG ED /r */
27395 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27396 delta = dis_AVX128_E_V_to_G(
27397 uses_vvvv, vbi, pfx, delta, "vpaddsw", Iop_QAdd16Sx8 );
27398 goto decode_success;
27400 /* VPADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG ED /r */
27401 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27402 delta = dis_AVX256_E_V_to_G(
27403 uses_vvvv, vbi, pfx, delta, "vpaddsw", Iop_QAdd16Sx16 );
27404 goto decode_success;
27406 break;
27408 case 0xEE:
27409 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
27410 /* VPMAXSW = VEX.NDS.128.66.0F.WIG EE /r */
27411 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27412 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27413 uses_vvvv, vbi, pfx, delta, "vpmaxsw", Iop_Max16Sx8 );
27414 goto decode_success;
27416 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
27417 /* VPMAXSW = VEX.NDS.256.66.0F.WIG EE /r */
27418 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27419 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27420 uses_vvvv, vbi, pfx, delta, "vpmaxsw", Iop_Max16Sx16 );
27421 goto decode_success;
27423 break;
27425 case 0xEF:
27426 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
27427 /* VPXOR = VEX.NDS.128.66.0F.WIG EF /r */
27428 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27429 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27430 uses_vvvv, vbi, pfx, delta, "vpxor", Iop_XorV128 );
27431 goto decode_success;
27433 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
27434 /* VPXOR = VEX.NDS.256.66.0F.WIG EF /r */
27435 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27436 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27437 uses_vvvv, vbi, pfx, delta, "vpxor", Iop_XorV256 );
27438 goto decode_success;
27440 break;
27442 case 0xF0:
27443 /* VLDDQU m256, ymm1 = VEX.256.F2.0F.WIG F0 /r */
27444 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27445 UChar modrm = getUChar(delta);
27446 UInt rD = gregOfRexRM(pfx, modrm);
27447 IRTemp tD = newTemp(Ity_V256);
27448 if (epartIsReg(modrm)) break;
27449 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27450 delta += alen;
27451 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
27452 DIP("vlddqu %s,%s\n", dis_buf, nameYMMReg(rD));
27453 putYMMReg(rD, mkexpr(tD));
27454 goto decode_success;
27456 /* VLDDQU m128, xmm1 = VEX.128.F2.0F.WIG F0 /r */
27457 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27458 UChar modrm = getUChar(delta);
27459 UInt rD = gregOfRexRM(pfx, modrm);
27460 IRTemp tD = newTemp(Ity_V128);
27461 if (epartIsReg(modrm)) break;
27462 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27463 delta += alen;
27464 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
27465 DIP("vlddqu %s,%s\n", dis_buf, nameXMMReg(rD));
27466 putYMMRegLoAndZU(rD, mkexpr(tD));
27467 goto decode_success;
27469 break;
27471 case 0xF1:
27472 /* VPSLLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F1 /r */
27473 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27474 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27475 "vpsllw", Iop_ShlN16x8 );
27476 *uses_vvvv = True;
27477 goto decode_success;
27480 /* VPSLLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F1 /r */
27481 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27482 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27483 "vpsllw", Iop_ShlN16x16 );
27484 *uses_vvvv = True;
27485 goto decode_success;
27488 break;
27490 case 0xF2:
27491 /* VPSLLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F2 /r */
27492 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27493 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27494 "vpslld", Iop_ShlN32x4 );
27495 *uses_vvvv = True;
27496 goto decode_success;
27498 /* VPSLLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F2 /r */
27499 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27500 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27501 "vpslld", Iop_ShlN32x8 );
27502 *uses_vvvv = True;
27503 goto decode_success;
27505 break;
27507 case 0xF3:
27508 /* VPSLLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F3 /r */
27509 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27510 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27511 "vpsllq", Iop_ShlN64x2 );
27512 *uses_vvvv = True;
27513 goto decode_success;
27515 /* VPSLLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F3 /r */
27516 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27517 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27518 "vpsllq", Iop_ShlN64x4 );
27519 *uses_vvvv = True;
27520 goto decode_success;
27522 break;
27524 case 0xF4:
27525 /* VPMULUDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F4 /r */
27526 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27527 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27528 uses_vvvv, vbi, pfx, delta,
27529 "vpmuludq", math_PMULUDQ_128 );
27530 goto decode_success;
27532 /* VPMULUDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F4 /r */
27533 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27534 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27535 uses_vvvv, vbi, pfx, delta,
27536 "vpmuludq", math_PMULUDQ_256 );
27537 goto decode_success;
27539 break;
27541 case 0xF5:
27542 /* VPMADDWD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F5 /r */
27543 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27544 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27545 uses_vvvv, vbi, pfx, delta,
27546 "vpmaddwd", math_PMADDWD_128 );
27547 goto decode_success;
27549 /* VPMADDWD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F5 /r */
27550 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27551 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27552 uses_vvvv, vbi, pfx, delta,
27553 "vpmaddwd", math_PMADDWD_256 );
27554 goto decode_success;
27556 break;
27558 case 0xF6:
27559 /* VPSADBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F6 /r */
27560 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27561 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27562 uses_vvvv, vbi, pfx, delta,
27563 "vpsadbw", math_PSADBW_128 );
27564 goto decode_success;
27566 /* VPSADBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F6 /r */
27567 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27568 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27569 uses_vvvv, vbi, pfx, delta,
27570 "vpsadbw", math_PSADBW_256 );
27571 goto decode_success;
27573 break;
27575 case 0xF7:
27576 /* VMASKMOVDQU xmm2, xmm1 = VEX.128.66.0F.WIG F7 /r */
27577 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
27578 && epartIsReg(getUChar(delta))) {
27579 delta = dis_MASKMOVDQU( vbi, pfx, delta, True/*isAvx*/ );
27580 goto decode_success;
27582 break;
27584 case 0xF8:
27585 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
27586 /* VPSUBB = VEX.NDS.128.66.0F.WIG F8 /r */
27587 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27588 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27589 uses_vvvv, vbi, pfx, delta, "vpsubb", Iop_Sub8x16 );
27590 goto decode_success;
27592 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
27593 /* VPSUBB = VEX.NDS.256.66.0F.WIG F8 /r */
27594 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27595 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27596 uses_vvvv, vbi, pfx, delta, "vpsubb", Iop_Sub8x32 );
27597 goto decode_success;
27599 break;
27601 case 0xF9:
27602 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
27603 /* VPSUBW = VEX.NDS.128.66.0F.WIG F9 /r */
27604 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27605 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27606 uses_vvvv, vbi, pfx, delta, "vpsubw", Iop_Sub16x8 );
27607 goto decode_success;
27609 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
27610 /* VPSUBW = VEX.NDS.256.66.0F.WIG F9 /r */
27611 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27612 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27613 uses_vvvv, vbi, pfx, delta, "vpsubw", Iop_Sub16x16 );
27614 goto decode_success;
27616 break;
27618 case 0xFA:
27619 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
27620 /* VPSUBD = VEX.NDS.128.66.0F.WIG FA /r */
27621 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27622 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27623 uses_vvvv, vbi, pfx, delta, "vpsubd", Iop_Sub32x4 );
27624 goto decode_success;
27626 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
27627 /* VPSUBD = VEX.NDS.256.66.0F.WIG FA /r */
27628 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27629 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27630 uses_vvvv, vbi, pfx, delta, "vpsubd", Iop_Sub32x8 );
27631 goto decode_success;
27633 break;
27635 case 0xFB:
27636 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
27637 /* VPSUBQ = VEX.NDS.128.66.0F.WIG FB /r */
27638 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27639 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27640 uses_vvvv, vbi, pfx, delta, "vpsubq", Iop_Sub64x2 );
27641 goto decode_success;
27643 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
27644 /* VPSUBQ = VEX.NDS.256.66.0F.WIG FB /r */
27645 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27646 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27647 uses_vvvv, vbi, pfx, delta, "vpsubq", Iop_Sub64x4 );
27648 goto decode_success;
27650 break;
27652 case 0xFC:
27653 /* VPADDB r/m, rV, r ::: r = rV + r/m */
27654 /* VPADDB = VEX.NDS.128.66.0F.WIG FC /r */
27655 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27656 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27657 uses_vvvv, vbi, pfx, delta, "vpaddb", Iop_Add8x16 );
27658 goto decode_success;
27660 /* VPADDB r/m, rV, r ::: r = rV + r/m */
27661 /* VPADDB = VEX.NDS.256.66.0F.WIG FC /r */
27662 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27663 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27664 uses_vvvv, vbi, pfx, delta, "vpaddb", Iop_Add8x32 );
27665 goto decode_success;
27667 break;
27669 case 0xFD:
27670 /* VPADDW r/m, rV, r ::: r = rV + r/m */
27671 /* VPADDW = VEX.NDS.128.66.0F.WIG FD /r */
27672 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27673 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27674 uses_vvvv, vbi, pfx, delta, "vpaddw", Iop_Add16x8 );
27675 goto decode_success;
27677 /* VPADDW r/m, rV, r ::: r = rV + r/m */
27678 /* VPADDW = VEX.NDS.256.66.0F.WIG FD /r */
27679 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27680 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27681 uses_vvvv, vbi, pfx, delta, "vpaddw", Iop_Add16x16 );
27682 goto decode_success;
27684 break;
27686 case 0xFE:
27687 /* VPADDD r/m, rV, r ::: r = rV + r/m */
27688 /* VPADDD = VEX.NDS.128.66.0F.WIG FE /r */
27689 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27690 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27691 uses_vvvv, vbi, pfx, delta, "vpaddd", Iop_Add32x4 );
27692 goto decode_success;
27694 /* VPADDD r/m, rV, r ::: r = rV + r/m */
27695 /* VPADDD = VEX.NDS.256.66.0F.WIG FE /r */
27696 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27697 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27698 uses_vvvv, vbi, pfx, delta, "vpaddd", Iop_Add32x8 );
27699 goto decode_success;
27701 break;
27703 default:
27704 break;
27708 //decode_failure:
27709 return deltaIN;
27711 decode_success:
27712 return delta;
27716 /*------------------------------------------------------------*/
27717 /*--- ---*/
27718 /*--- Top-level post-escape decoders: dis_ESC_0F38__VEX ---*/
27719 /*--- ---*/
27720 /*------------------------------------------------------------*/
27722 static IRTemp math_PERMILPS_VAR_128 ( IRTemp dataV, IRTemp ctrlV )
27724 /* In the control vector, zero out all but the bottom two bits of
27725 each 32-bit lane. */
27726 IRExpr* cv1 = binop(Iop_ShrN32x4,
27727 binop(Iop_ShlN32x4, mkexpr(ctrlV), mkU8(30)),
27728 mkU8(30));
27729 /* And use the resulting cleaned-up control vector as steering
27730 in a Perm operation. */
27731 IRTemp res = newTemp(Ity_V128);
27732 assign(res, binop(Iop_Perm32x4, mkexpr(dataV), cv1));
27733 return res;
27736 static IRTemp math_PERMILPS_VAR_256 ( IRTemp dataV, IRTemp ctrlV )
27738 IRTemp dHi, dLo, cHi, cLo;
27739 dHi = dLo = cHi = cLo = IRTemp_INVALID;
27740 breakupV256toV128s( dataV, &dHi, &dLo );
27741 breakupV256toV128s( ctrlV, &cHi, &cLo );
27742 IRTemp rHi = math_PERMILPS_VAR_128( dHi, cHi );
27743 IRTemp rLo = math_PERMILPS_VAR_128( dLo, cLo );
27744 IRTemp res = newTemp(Ity_V256);
27745 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo)));
27746 return res;
27749 static IRTemp math_PERMILPD_VAR_128 ( IRTemp dataV, IRTemp ctrlV )
27751 /* No cleverness here .. */
27752 IRTemp dHi, dLo, cHi, cLo;
27753 dHi = dLo = cHi = cLo = IRTemp_INVALID;
27754 breakupV128to64s( dataV, &dHi, &dLo );
27755 breakupV128to64s( ctrlV, &cHi, &cLo );
27756 IRExpr* rHi
27757 = IRExpr_ITE( unop(Iop_64to1,
27758 binop(Iop_Shr64, mkexpr(cHi), mkU8(1))),
27759 mkexpr(dHi), mkexpr(dLo) );
27760 IRExpr* rLo
27761 = IRExpr_ITE( unop(Iop_64to1,
27762 binop(Iop_Shr64, mkexpr(cLo), mkU8(1))),
27763 mkexpr(dHi), mkexpr(dLo) );
27764 IRTemp res = newTemp(Ity_V128);
27765 assign(res, binop(Iop_64HLtoV128, rHi, rLo));
27766 return res;
27769 static IRTemp math_PERMILPD_VAR_256 ( IRTemp dataV, IRTemp ctrlV )
27771 IRTemp dHi, dLo, cHi, cLo;
27772 dHi = dLo = cHi = cLo = IRTemp_INVALID;
27773 breakupV256toV128s( dataV, &dHi, &dLo );
27774 breakupV256toV128s( ctrlV, &cHi, &cLo );
27775 IRTemp rHi = math_PERMILPD_VAR_128( dHi, cHi );
27776 IRTemp rLo = math_PERMILPD_VAR_128( dLo, cLo );
27777 IRTemp res = newTemp(Ity_V256);
27778 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo)));
27779 return res;
27782 static IRTemp math_VPERMD ( IRTemp ctrlV, IRTemp dataV )
27784 /* In the control vector, zero out all but the bottom three bits of
27785 each 32-bit lane. */
27786 IRExpr* cv1 = binop(Iop_ShrN32x8,
27787 binop(Iop_ShlN32x8, mkexpr(ctrlV), mkU8(29)),
27788 mkU8(29));
27789 /* And use the resulting cleaned-up control vector as steering
27790 in a Perm operation. */
27791 IRTemp res = newTemp(Ity_V256);
27792 assign(res, binop(Iop_Perm32x8, mkexpr(dataV), cv1));
27793 return res;
27796 static Long dis_SHIFTX ( /*OUT*/Bool* uses_vvvv,
27797 const VexAbiInfo* vbi, Prefix pfx, Long delta,
27798 const HChar* opname, IROp op8 )
27800 HChar dis_buf[50];
27801 Int alen;
27802 Int size = getRexW(pfx) ? 8 : 4;
27803 IRType ty = szToITy(size);
27804 IRTemp src = newTemp(ty);
27805 IRTemp amt = newTemp(ty);
27806 UChar rm = getUChar(delta);
27808 assign( amt, getIRegV(size,pfx) );
27809 if (epartIsReg(rm)) {
27810 assign( src, getIRegE(size,pfx,rm) );
27811 DIP("%s %s,%s,%s\n", opname, nameIRegV(size,pfx),
27812 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm));
27813 delta++;
27814 } else {
27815 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27816 assign( src, loadLE(ty, mkexpr(addr)) );
27817 DIP("%s %s,%s,%s\n", opname, nameIRegV(size,pfx), dis_buf,
27818 nameIRegG(size,pfx,rm));
27819 delta += alen;
27822 putIRegG( size, pfx, rm,
27823 binop(mkSizedOp(ty,op8), mkexpr(src),
27824 narrowTo(Ity_I8, binop(mkSizedOp(ty,Iop_And8), mkexpr(amt),
27825 mkU(ty,8*size-1)))) );
27826 /* Flags aren't modified. */
27827 *uses_vvvv = True;
27828 return delta;
27832 static Long dis_FMA ( const VexAbiInfo* vbi, Prefix pfx, Long delta, UChar opc )
27834 UChar modrm = getUChar(delta);
27835 UInt rG = gregOfRexRM(pfx, modrm);
27836 UInt rV = getVexNvvvv(pfx);
27837 Bool scalar = (opc & 0xF) > 7 && (opc & 1);
27838 IRType ty = getRexW(pfx) ? Ity_F64 : Ity_F32;
27839 IRType vty = scalar ? ty : (getVexL(pfx) ? Ity_V256 : Ity_V128);
27840 IRTemp addr = IRTemp_INVALID;
27841 HChar dis_buf[50];
27842 Int alen = 0;
27843 const HChar *name;
27844 const HChar *suffix;
27845 const HChar *order;
27846 Bool negateRes = False;
27847 Bool negateZeven = False;
27848 Bool negateZodd = False;
27849 UInt count = 0;
27851 switch (opc & 0xF) {
27852 case 0x6: name = "addsub"; negateZeven = True; break;
27853 case 0x7: name = "subadd"; negateZodd = True; break;
27854 case 0x8:
27855 case 0x9: name = "add"; break;
27856 case 0xA:
27857 case 0xB: name = "sub"; negateZeven = True; negateZodd = True;
27858 break;
27859 case 0xC:
27860 case 0xD: name = "add"; negateRes = True; negateZeven = True;
27861 negateZodd = True; break;
27862 case 0xE:
27863 case 0xF: name = "sub"; negateRes = True; break;
27864 default: vpanic("dis_FMA(amd64)"); break;
27866 switch (opc & 0xF0) {
27867 case 0x90: order = "132"; break;
27868 case 0xA0: order = "213"; break;
27869 case 0xB0: order = "231"; break;
27870 default: vpanic("dis_FMA(amd64)"); break;
27872 if (scalar) {
27873 suffix = ty == Ity_F64 ? "sd" : "ss";
27874 } else {
27875 suffix = ty == Ity_F64 ? "pd" : "ps";
27878 // Figure out |count| (the number of elements) by considering |vty| and |ty|.
27879 count = sizeofIRType(vty) / sizeofIRType(ty);
27880 vassert(count == 1 || count == 2 || count == 4 || count == 8);
27882 // Fetch operands into the first |count| elements of |sX|, |sY| and |sZ|.
27883 UInt i;
27884 IRExpr *sX[8], *sY[8], *sZ[8], *res[8];
27885 for (i = 0; i < 8; i++) sX[i] = sY[i] = sZ[i] = res[i] = NULL;
27887 IRExpr* (*getYMMRegLane)(UInt,Int)
27888 = ty == Ity_F32 ? getYMMRegLane32F : getYMMRegLane64F;
27889 void (*putYMMRegLane)(UInt,Int,IRExpr*)
27890 = ty == Ity_F32 ? putYMMRegLane32F : putYMMRegLane64F;
27892 for (i = 0; i < count; i++) {
27893 sX[i] = getYMMRegLane(rG, i);
27894 sZ[i] = getYMMRegLane(rV, i);
27897 if (epartIsReg(modrm)) {
27898 UInt rE = eregOfRexRM(pfx, modrm);
27899 delta += 1;
27900 for (i = 0; i < count; i++) {
27901 sY[i] = getYMMRegLane(rE, i);
27903 if (vty == Ity_V256) {
27904 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27905 name, order, suffix, nameYMMReg(rE), nameYMMReg(rV),
27906 nameYMMReg(rG));
27907 } else {
27908 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27909 name, order, suffix, nameXMMReg(rE), nameXMMReg(rV),
27910 nameXMMReg(rG));
27912 } else {
27913 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27914 delta += alen;
27915 for (i = 0; i < count; i++) {
27916 sY[i] = loadLE(ty, binop(Iop_Add64, mkexpr(addr),
27917 mkU64(i * sizeofIRType(ty))));
27919 if (vty == Ity_V256) {
27920 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27921 name, order, suffix, dis_buf, nameYMMReg(rV),
27922 nameYMMReg(rG));
27923 } else {
27924 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27925 name, order, suffix, dis_buf, nameXMMReg(rV),
27926 nameXMMReg(rG));
27930 /* vX/vY/vZ are now in 132 order. If the instruction requires a different
27931 order, swap them around. */
27933 # define COPY_ARR(_dst, _src) \
27934 do { for (int j = 0; j < 8; j++) { _dst[j] = _src[j]; } } while (0)
27936 if ((opc & 0xF0) != 0x90) {
27937 IRExpr* temp[8];
27938 COPY_ARR(temp, sX);
27939 if ((opc & 0xF0) == 0xA0) {
27940 COPY_ARR(sX, sZ);
27941 COPY_ARR(sZ, sY);
27942 COPY_ARR(sY, temp);
27943 } else {
27944 COPY_ARR(sX, sZ);
27945 COPY_ARR(sZ, temp);
27949 # undef COPY_ARR
27951 for (i = 0; i < count; i++) {
27952 IROp opNEG = ty == Ity_F64 ? Iop_NegF64 : Iop_NegF32;
27953 if ((i & 1) ? negateZodd : negateZeven) {
27954 sZ[i] = unop(opNEG, sZ[i]);
27956 res[i] = IRExpr_Qop(ty == Ity_F64 ? Iop_MAddF64 : Iop_MAddF32,
27957 get_FAKE_roundingmode(), sX[i], sY[i], sZ[i]);
27958 if (negateRes) {
27959 res[i] = unop(opNEG, res[i]);
27963 for (i = 0; i < count; i++) {
27964 putYMMRegLane(rG, i, res[i]);
27967 switch (vty) {
27968 case Ity_F32: putYMMRegLane32(rG, 1, mkU32(0)); /*fallthru*/
27969 case Ity_F64: putYMMRegLane64(rG, 1, mkU64(0)); /*fallthru*/
27970 case Ity_V128: putYMMRegLane128(rG, 1, mkV128(0)); /*fallthru*/
27971 case Ity_V256: break;
27972 default: vassert(0);
27975 return delta;
27979 /* Masked load or masked store. */
27980 static ULong dis_VMASKMOV ( Bool *uses_vvvv, const VexAbiInfo* vbi,
27981 Prefix pfx, Long delta,
27982 const HChar* opname, Bool isYMM, IRType ty,
27983 Bool isLoad )
27985 HChar dis_buf[50];
27986 Int alen, i;
27987 IRTemp addr;
27988 UChar modrm = getUChar(delta);
27989 UInt rG = gregOfRexRM(pfx,modrm);
27990 UInt rV = getVexNvvvv(pfx);
27992 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27993 delta += alen;
27995 /**/ if (isLoad && isYMM) {
27996 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
27998 else if (isLoad && !isYMM) {
27999 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
28002 else if (!isLoad && isYMM) {
28003 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rG), nameYMMReg(rV), dis_buf );
28005 else {
28006 vassert(!isLoad && !isYMM);
28007 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rG), nameXMMReg(rV), dis_buf );
28010 vassert(ty == Ity_I32 || ty == Ity_I64);
28011 Bool laneIs32 = ty == Ity_I32;
28013 Int nLanes = (isYMM ? 2 : 1) * (laneIs32 ? 4 : 2);
28015 for (i = 0; i < nLanes; i++) {
28016 IRExpr* shAmt = laneIs32 ? mkU8(31) : mkU8(63);
28017 IRExpr* one = laneIs32 ? mkU32(1) : mkU64(1);
28018 IROp opSHR = laneIs32 ? Iop_Shr32 : Iop_Shr64;
28019 IROp opEQ = laneIs32 ? Iop_CmpEQ32 : Iop_CmpEQ64;
28020 IRExpr* lane = (laneIs32 ? getYMMRegLane32 : getYMMRegLane64)( rV, i );
28022 IRTemp cond = newTemp(Ity_I1);
28023 assign(cond, binop(opEQ, binop(opSHR, lane, shAmt), one));
28025 IRTemp data = newTemp(ty);
28026 IRExpr* ea = binop(Iop_Add64, mkexpr(addr),
28027 mkU64(i * (laneIs32 ? 4 : 8)));
28028 if (isLoad) {
28029 stmt(
28030 IRStmt_LoadG(
28031 Iend_LE, laneIs32 ? ILGop_Ident32 : ILGop_Ident64,
28032 data, ea, laneIs32 ? mkU32(0) : mkU64(0), mkexpr(cond)
28034 (laneIs32 ? putYMMRegLane32 : putYMMRegLane64)( rG, i, mkexpr(data) );
28035 } else {
28036 assign(data, (laneIs32 ? getYMMRegLane32 : getYMMRegLane64)( rG, i ));
28037 stmt( IRStmt_StoreG(Iend_LE, ea, mkexpr(data), mkexpr(cond)) );
28041 if (isLoad && !isYMM)
28042 putYMMRegLane128( rG, 1, mkV128(0) );
28044 *uses_vvvv = True;
28045 return delta;
28049 /* Gather. */
28050 static ULong dis_VGATHER ( Bool *uses_vvvv, const VexAbiInfo* vbi,
28051 Prefix pfx, Long delta,
28052 const HChar* opname, Bool isYMM,
28053 Bool isVM64x, IRType ty )
28055 HChar dis_buf[50];
28056 Int alen, i, vscale, count1, count2;
28057 IRTemp addr;
28058 UChar modrm = getUChar(delta);
28059 UInt rG = gregOfRexRM(pfx,modrm);
28060 UInt rV = getVexNvvvv(pfx);
28061 UInt rI;
28062 IRType dstTy = (isYMM && (ty == Ity_I64 || !isVM64x)) ? Ity_V256 : Ity_V128;
28063 IRType idxTy = (isYMM && (ty == Ity_I32 || isVM64x)) ? Ity_V256 : Ity_V128;
28064 IRTemp cond;
28065 addr = disAVSIBMode ( &alen, vbi, pfx, delta, dis_buf, &rI,
28066 idxTy, &vscale );
28067 if (addr == IRTemp_INVALID || rI == rG || rI == rV || rG == rV)
28068 return delta;
28069 if (dstTy == Ity_V256) {
28070 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rV), dis_buf, nameYMMReg(rG) );
28071 } else {
28072 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rV), dis_buf, nameXMMReg(rG) );
28074 delta += alen;
28076 if (ty == Ity_I32) {
28077 count1 = isYMM ? 8 : 4;
28078 count2 = isVM64x ? count1 / 2 : count1;
28079 } else {
28080 count1 = count2 = isYMM ? 4 : 2;
28083 /* First update the mask register to copies of the sign bit. */
28084 if (ty == Ity_I32) {
28085 if (isYMM)
28086 putYMMReg( rV, binop(Iop_SarN32x8, getYMMReg( rV ), mkU8(31)) );
28087 else
28088 putYMMRegLoAndZU( rV, binop(Iop_SarN32x4, getXMMReg( rV ), mkU8(31)) );
28089 } else {
28090 for (i = 0; i < count1; i++) {
28091 putYMMRegLane64( rV, i, binop(Iop_Sar64, getYMMRegLane64( rV, i ),
28092 mkU8(63)) );
28096 /* Next gather the individual elements. If any fault occurs, the
28097 corresponding mask element will be set and the loop stops. */
28098 for (i = 0; i < count2; i++) {
28099 IRExpr *expr, *addr_expr;
28100 cond = newTemp(Ity_I1);
28101 assign( cond,
28102 binop(ty == Ity_I32 ? Iop_CmpLT32S : Iop_CmpLT64S,
28103 ty == Ity_I32 ? getYMMRegLane32( rV, i )
28104 : getYMMRegLane64( rV, i ),
28105 mkU(ty, 0)) );
28106 expr = ty == Ity_I32 ? getYMMRegLane32( rG, i )
28107 : getYMMRegLane64( rG, i );
28108 addr_expr = isVM64x ? getYMMRegLane64( rI, i )
28109 : unop(Iop_32Sto64, getYMMRegLane32( rI, i ));
28110 switch (vscale) {
28111 case 2: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(1)); break;
28112 case 4: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(2)); break;
28113 case 8: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(3)); break;
28114 default: break;
28116 addr_expr = binop(Iop_Add64, mkexpr(addr), addr_expr);
28117 addr_expr = handleAddrOverrides(vbi, pfx, addr_expr);
28118 addr_expr = IRExpr_ITE(mkexpr(cond), addr_expr, getIReg64(R_RSP));
28119 expr = IRExpr_ITE(mkexpr(cond), loadLE(ty, addr_expr), expr);
28120 if (ty == Ity_I32) {
28121 putYMMRegLane32( rG, i, expr );
28122 putYMMRegLane32( rV, i, mkU32(0) );
28123 } else {
28124 putYMMRegLane64( rG, i, expr);
28125 putYMMRegLane64( rV, i, mkU64(0) );
28129 if (!isYMM || (ty == Ity_I32 && isVM64x)) {
28130 if (ty == Ity_I64 || isYMM)
28131 putYMMRegLane128( rV, 1, mkV128(0) );
28132 else if (ty == Ity_I32 && count2 == 2) {
28133 putYMMRegLane64( rV, 1, mkU64(0) );
28134 putYMMRegLane64( rG, 1, mkU64(0) );
28136 putYMMRegLane128( rG, 1, mkV128(0) );
28139 *uses_vvvv = True;
28140 return delta;
28144 __attribute__((noinline))
28145 static
28146 Long dis_ESC_0F38__VEX (
28147 /*MB_OUT*/DisResult* dres,
28148 /*OUT*/ Bool* uses_vvvv,
28149 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
28150 Bool resteerCisOk,
28151 void* callback_opaque,
28152 const VexArchInfo* archinfo,
28153 const VexAbiInfo* vbi,
28154 Prefix pfx, Int sz, Long deltaIN
28157 IRTemp addr = IRTemp_INVALID;
28158 Int alen = 0;
28159 HChar dis_buf[50];
28160 Long delta = deltaIN;
28161 UChar opc = getUChar(delta);
28162 delta++;
28163 *uses_vvvv = False;
28165 switch (opc) {
28167 case 0x00:
28168 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
28169 /* VPSHUFB = VEX.NDS.128.66.0F38.WIG 00 /r */
28170 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28171 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28172 uses_vvvv, vbi, pfx, delta, "vpshufb", math_PSHUFB_XMM );
28173 goto decode_success;
28175 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
28176 /* VPSHUFB = VEX.NDS.256.66.0F38.WIG 00 /r */
28177 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28178 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28179 uses_vvvv, vbi, pfx, delta, "vpshufb", math_PSHUFB_YMM );
28180 goto decode_success;
28182 break;
28184 case 0x01:
28185 case 0x02:
28186 case 0x03:
28187 /* VPHADDW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 01 /r */
28188 /* VPHADDD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 02 /r */
28189 /* VPHADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 03 /r */
28190 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28191 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc );
28192 *uses_vvvv = True;
28193 goto decode_success;
28195 /* VPHADDW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 01 /r */
28196 /* VPHADDD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 02 /r */
28197 /* VPHADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 03 /r */
28198 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28199 delta = dis_PHADD_256( vbi, pfx, delta, opc );
28200 *uses_vvvv = True;
28201 goto decode_success;
28203 break;
28205 case 0x04:
28206 /* VPMADDUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 04 /r */
28207 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28208 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28209 uses_vvvv, vbi, pfx, delta, "vpmaddubsw",
28210 math_PMADDUBSW_128 );
28211 goto decode_success;
28213 /* VPMADDUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 04 /r */
28214 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28215 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28216 uses_vvvv, vbi, pfx, delta, "vpmaddubsw",
28217 math_PMADDUBSW_256 );
28218 goto decode_success;
28220 break;
28222 case 0x05:
28223 case 0x06:
28224 case 0x07:
28225 /* VPHSUBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 05 /r */
28226 /* VPHSUBD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 06 /r */
28227 /* VPHSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 07 /r */
28228 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28229 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc );
28230 *uses_vvvv = True;
28231 goto decode_success;
28233 /* VPHSUBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 05 /r */
28234 /* VPHSUBD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 06 /r */
28235 /* VPHSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 07 /r */
28236 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28237 delta = dis_PHADD_256( vbi, pfx, delta, opc );
28238 *uses_vvvv = True;
28239 goto decode_success;
28241 break;
28243 case 0x08:
28244 case 0x09:
28245 case 0x0A:
28246 /* VPSIGNB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 08 /r */
28247 /* VPSIGNW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 09 /r */
28248 /* VPSIGND xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0A /r */
28249 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28250 IRTemp sV = newTemp(Ity_V128);
28251 IRTemp dV = newTemp(Ity_V128);
28252 IRTemp sHi, sLo, dHi, dLo;
28253 sHi = sLo = dHi = dLo = IRTemp_INVALID;
28254 HChar ch = '?';
28255 Int laneszB = 0;
28256 UChar modrm = getUChar(delta);
28257 UInt rG = gregOfRexRM(pfx,modrm);
28258 UInt rV = getVexNvvvv(pfx);
28260 switch (opc) {
28261 case 0x08: laneszB = 1; ch = 'b'; break;
28262 case 0x09: laneszB = 2; ch = 'w'; break;
28263 case 0x0A: laneszB = 4; ch = 'd'; break;
28264 default: vassert(0);
28267 assign( dV, getXMMReg(rV) );
28269 if (epartIsReg(modrm)) {
28270 UInt rE = eregOfRexRM(pfx,modrm);
28271 assign( sV, getXMMReg(rE) );
28272 delta += 1;
28273 DIP("vpsign%c %s,%s,%s\n", ch, nameXMMReg(rE),
28274 nameXMMReg(rV), nameXMMReg(rG));
28275 } else {
28276 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
28277 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
28278 delta += alen;
28279 DIP("vpsign%c %s,%s,%s\n", ch, dis_buf,
28280 nameXMMReg(rV), nameXMMReg(rG));
28283 breakupV128to64s( dV, &dHi, &dLo );
28284 breakupV128to64s( sV, &sHi, &sLo );
28286 putYMMRegLoAndZU(
28288 binop(Iop_64HLtoV128,
28289 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
28290 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
28293 *uses_vvvv = True;
28294 goto decode_success;
28296 /* VPSIGNB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 08 /r */
28297 /* VPSIGNW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 09 /r */
28298 /* VPSIGND ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0A /r */
28299 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28300 IRTemp sV = newTemp(Ity_V256);
28301 IRTemp dV = newTemp(Ity_V256);
28302 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
28303 s3 = s2 = s1 = s0 = IRTemp_INVALID;
28304 d3 = d2 = d1 = d0 = IRTemp_INVALID;
28305 UChar ch = '?';
28306 Int laneszB = 0;
28307 UChar modrm = getUChar(delta);
28308 UInt rG = gregOfRexRM(pfx,modrm);
28309 UInt rV = getVexNvvvv(pfx);
28311 switch (opc) {
28312 case 0x08: laneszB = 1; ch = 'b'; break;
28313 case 0x09: laneszB = 2; ch = 'w'; break;
28314 case 0x0A: laneszB = 4; ch = 'd'; break;
28315 default: vassert(0);
28318 assign( dV, getYMMReg(rV) );
28320 if (epartIsReg(modrm)) {
28321 UInt rE = eregOfRexRM(pfx,modrm);
28322 assign( sV, getYMMReg(rE) );
28323 delta += 1;
28324 DIP("vpsign%c %s,%s,%s\n", ch, nameYMMReg(rE),
28325 nameYMMReg(rV), nameYMMReg(rG));
28326 } else {
28327 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
28328 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
28329 delta += alen;
28330 DIP("vpsign%c %s,%s,%s\n", ch, dis_buf,
28331 nameYMMReg(rV), nameYMMReg(rG));
28334 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
28335 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
28337 putYMMReg(
28339 binop( Iop_V128HLtoV256,
28340 binop(Iop_64HLtoV128,
28341 dis_PSIGN_helper( mkexpr(s3), mkexpr(d3), laneszB ),
28342 dis_PSIGN_helper( mkexpr(s2), mkexpr(d2), laneszB )
28344 binop(Iop_64HLtoV128,
28345 dis_PSIGN_helper( mkexpr(s1), mkexpr(d1), laneszB ),
28346 dis_PSIGN_helper( mkexpr(s0), mkexpr(d0), laneszB )
28350 *uses_vvvv = True;
28351 goto decode_success;
28353 break;
28355 case 0x0B:
28356 /* VPMULHRSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0B /r */
28357 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28358 IRTemp sV = newTemp(Ity_V128);
28359 IRTemp dV = newTemp(Ity_V128);
28360 IRTemp sHi, sLo, dHi, dLo;
28361 sHi = sLo = dHi = dLo = IRTemp_INVALID;
28362 UChar modrm = getUChar(delta);
28363 UInt rG = gregOfRexRM(pfx,modrm);
28364 UInt rV = getVexNvvvv(pfx);
28366 assign( dV, getXMMReg(rV) );
28368 if (epartIsReg(modrm)) {
28369 UInt rE = eregOfRexRM(pfx,modrm);
28370 assign( sV, getXMMReg(rE) );
28371 delta += 1;
28372 DIP("vpmulhrsw %s,%s,%s\n", nameXMMReg(rE),
28373 nameXMMReg(rV), nameXMMReg(rG));
28374 } else {
28375 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
28376 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
28377 delta += alen;
28378 DIP("vpmulhrsw %s,%s,%s\n", dis_buf,
28379 nameXMMReg(rV), nameXMMReg(rG));
28382 breakupV128to64s( dV, &dHi, &dLo );
28383 breakupV128to64s( sV, &sHi, &sLo );
28385 putYMMRegLoAndZU(
28387 binop(Iop_64HLtoV128,
28388 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
28389 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
28392 *uses_vvvv = True;
28393 goto decode_success;
28395 /* VPMULHRSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0B /r */
28396 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28397 IRTemp sV = newTemp(Ity_V256);
28398 IRTemp dV = newTemp(Ity_V256);
28399 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
28400 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
28401 UChar modrm = getUChar(delta);
28402 UInt rG = gregOfRexRM(pfx,modrm);
28403 UInt rV = getVexNvvvv(pfx);
28405 assign( dV, getYMMReg(rV) );
28407 if (epartIsReg(modrm)) {
28408 UInt rE = eregOfRexRM(pfx,modrm);
28409 assign( sV, getYMMReg(rE) );
28410 delta += 1;
28411 DIP("vpmulhrsw %s,%s,%s\n", nameYMMReg(rE),
28412 nameYMMReg(rV), nameYMMReg(rG));
28413 } else {
28414 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
28415 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
28416 delta += alen;
28417 DIP("vpmulhrsw %s,%s,%s\n", dis_buf,
28418 nameYMMReg(rV), nameYMMReg(rG));
28421 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
28422 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
28424 putYMMReg(
28426 binop(Iop_V128HLtoV256,
28427 binop(Iop_64HLtoV128,
28428 dis_PMULHRSW_helper( mkexpr(s3), mkexpr(d3) ),
28429 dis_PMULHRSW_helper( mkexpr(s2), mkexpr(d2) ) ),
28430 binop(Iop_64HLtoV128,
28431 dis_PMULHRSW_helper( mkexpr(s1), mkexpr(d1) ),
28432 dis_PMULHRSW_helper( mkexpr(s0), mkexpr(d0) ) )
28435 *uses_vvvv = True;
28436 dres->hint = Dis_HintVerbose;
28437 goto decode_success;
28439 break;
28441 case 0x0C:
28442 /* VPERMILPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0C /r */
28443 if (have66noF2noF3(pfx)
28444 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
28445 UChar modrm = getUChar(delta);
28446 UInt rG = gregOfRexRM(pfx, modrm);
28447 UInt rV = getVexNvvvv(pfx);
28448 IRTemp ctrlV = newTemp(Ity_V128);
28449 if (epartIsReg(modrm)) {
28450 UInt rE = eregOfRexRM(pfx, modrm);
28451 delta += 1;
28452 DIP("vpermilps %s,%s,%s\n",
28453 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
28454 assign(ctrlV, getXMMReg(rE));
28455 } else {
28456 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28457 delta += alen;
28458 DIP("vpermilps %s,%s,%s\n",
28459 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
28460 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr)));
28462 IRTemp dataV = newTemp(Ity_V128);
28463 assign(dataV, getXMMReg(rV));
28464 IRTemp resV = math_PERMILPS_VAR_128(dataV, ctrlV);
28465 putYMMRegLoAndZU(rG, mkexpr(resV));
28466 *uses_vvvv = True;
28467 goto decode_success;
28469 /* VPERMILPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0C /r */
28470 if (have66noF2noF3(pfx)
28471 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
28472 UChar modrm = getUChar(delta);
28473 UInt rG = gregOfRexRM(pfx, modrm);
28474 UInt rV = getVexNvvvv(pfx);
28475 IRTemp ctrlV = newTemp(Ity_V256);
28476 if (epartIsReg(modrm)) {
28477 UInt rE = eregOfRexRM(pfx, modrm);
28478 delta += 1;
28479 DIP("vpermilps %s,%s,%s\n",
28480 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
28481 assign(ctrlV, getYMMReg(rE));
28482 } else {
28483 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28484 delta += alen;
28485 DIP("vpermilps %s,%s,%s\n",
28486 dis_buf, nameYMMReg(rV), nameYMMReg(rG));
28487 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr)));
28489 IRTemp dataV = newTemp(Ity_V256);
28490 assign(dataV, getYMMReg(rV));
28491 IRTemp resV = math_PERMILPS_VAR_256(dataV, ctrlV);
28492 putYMMReg(rG, mkexpr(resV));
28493 *uses_vvvv = True;
28494 goto decode_success;
28496 break;
28498 case 0x0D:
28499 /* VPERMILPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0D /r */
28500 if (have66noF2noF3(pfx)
28501 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
28502 UChar modrm = getUChar(delta);
28503 UInt rG = gregOfRexRM(pfx, modrm);
28504 UInt rV = getVexNvvvv(pfx);
28505 IRTemp ctrlV = newTemp(Ity_V128);
28506 if (epartIsReg(modrm)) {
28507 UInt rE = eregOfRexRM(pfx, modrm);
28508 delta += 1;
28509 DIP("vpermilpd %s,%s,%s\n",
28510 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
28511 assign(ctrlV, getXMMReg(rE));
28512 } else {
28513 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28514 delta += alen;
28515 DIP("vpermilpd %s,%s,%s\n",
28516 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
28517 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr)));
28519 IRTemp dataV = newTemp(Ity_V128);
28520 assign(dataV, getXMMReg(rV));
28521 IRTemp resV = math_PERMILPD_VAR_128(dataV, ctrlV);
28522 putYMMRegLoAndZU(rG, mkexpr(resV));
28523 *uses_vvvv = True;
28524 goto decode_success;
28526 /* VPERMILPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0D /r */
28527 if (have66noF2noF3(pfx)
28528 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
28529 UChar modrm = getUChar(delta);
28530 UInt rG = gregOfRexRM(pfx, modrm);
28531 UInt rV = getVexNvvvv(pfx);
28532 IRTemp ctrlV = newTemp(Ity_V256);
28533 if (epartIsReg(modrm)) {
28534 UInt rE = eregOfRexRM(pfx, modrm);
28535 delta += 1;
28536 DIP("vpermilpd %s,%s,%s\n",
28537 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
28538 assign(ctrlV, getYMMReg(rE));
28539 } else {
28540 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28541 delta += alen;
28542 DIP("vpermilpd %s,%s,%s\n",
28543 dis_buf, nameYMMReg(rV), nameYMMReg(rG));
28544 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr)));
28546 IRTemp dataV = newTemp(Ity_V256);
28547 assign(dataV, getYMMReg(rV));
28548 IRTemp resV = math_PERMILPD_VAR_256(dataV, ctrlV);
28549 putYMMReg(rG, mkexpr(resV));
28550 *uses_vvvv = True;
28551 goto decode_success;
28553 break;
28555 case 0x0E:
28556 /* VTESTPS xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0E /r */
28557 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28558 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 32 );
28559 goto decode_success;
28561 /* VTESTPS ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0E /r */
28562 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28563 delta = dis_xTESTy_256( vbi, pfx, delta, 32 );
28564 goto decode_success;
28566 break;
28568 case 0x0F:
28569 /* VTESTPD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0F /r */
28570 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28571 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 64 );
28572 goto decode_success;
28574 /* VTESTPD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0F /r */
28575 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28576 delta = dis_xTESTy_256( vbi, pfx, delta, 64 );
28577 goto decode_success;
28579 break;
28581 case 0x13:
28582 /* VCVTPH2PS xmm2/m64, xmm1 = VEX.128.66.0F38.W0 13 /r */
28583 if (have66noF2noF3(pfx)
28584 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/
28585 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C)) {
28586 delta = dis_VCVTPH2PS( vbi, pfx, delta, /*is256bit=*/False );
28587 goto decode_success;
28589 /* VCVTPH2PS xmm2/m128, xmm1 = VEX.256.66.0F38.W0 13 /r */
28590 if (have66noF2noF3(pfx)
28591 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/
28592 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C)) {
28593 delta = dis_VCVTPH2PS( vbi, pfx, delta, /*is256bit=*/True );
28594 goto decode_success;
28596 break;
28598 case 0x16:
28599 /* VPERMPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 16 /r */
28600 if (have66noF2noF3(pfx)
28601 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
28602 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28603 uses_vvvv, vbi, pfx, delta, "vpermps", math_VPERMD );
28604 goto decode_success;
28606 break;
28608 case 0x17:
28609 /* VPTEST xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 17 /r */
28610 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28611 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 0 );
28612 goto decode_success;
28614 /* VPTEST ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 17 /r */
28615 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28616 delta = dis_xTESTy_256( vbi, pfx, delta, 0 );
28617 goto decode_success;
28619 break;
28621 case 0x18:
28622 /* VBROADCASTSS m32, xmm1 = VEX.128.66.0F38.WIG 18 /r */
28623 if (have66noF2noF3(pfx)
28624 && 0==getVexL(pfx)/*128*/
28625 && !epartIsReg(getUChar(delta))) {
28626 UChar modrm = getUChar(delta);
28627 UInt rG = gregOfRexRM(pfx, modrm);
28628 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28629 delta += alen;
28630 DIP("vbroadcastss %s,%s\n", dis_buf, nameXMMReg(rG));
28631 IRTemp t32 = newTemp(Ity_I32);
28632 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
28633 IRTemp t64 = newTemp(Ity_I64);
28634 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28635 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
28636 putYMMRegLoAndZU(rG, res);
28637 goto decode_success;
28639 /* VBROADCASTSS m32, ymm1 = VEX.256.66.0F38.WIG 18 /r */
28640 if (have66noF2noF3(pfx)
28641 && 1==getVexL(pfx)/*256*/
28642 && !epartIsReg(getUChar(delta))) {
28643 UChar modrm = getUChar(delta);
28644 UInt rG = gregOfRexRM(pfx, modrm);
28645 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28646 delta += alen;
28647 DIP("vbroadcastss %s,%s\n", dis_buf, nameYMMReg(rG));
28648 IRTemp t32 = newTemp(Ity_I32);
28649 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
28650 IRTemp t64 = newTemp(Ity_I64);
28651 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28652 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28653 mkexpr(t64), mkexpr(t64));
28654 putYMMReg(rG, res);
28655 goto decode_success;
28657 /* VBROADCASTSS xmm2, xmm1 = VEX.128.66.0F38.WIG 18 /r */
28658 if (have66noF2noF3(pfx)
28659 && 0==getVexL(pfx)/*128*/
28660 && epartIsReg(getUChar(delta))) {
28661 UChar modrm = getUChar(delta);
28662 UInt rG = gregOfRexRM(pfx, modrm);
28663 UInt rE = eregOfRexRM(pfx, modrm);
28664 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
28665 IRTemp t32 = newTemp(Ity_I32);
28666 assign(t32, getXMMRegLane32(rE, 0));
28667 IRTemp t64 = newTemp(Ity_I64);
28668 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28669 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
28670 putYMMRegLoAndZU(rG, res);
28671 delta++;
28672 goto decode_success;
28674 /* VBROADCASTSS xmm2, ymm1 = VEX.256.66.0F38.WIG 18 /r */
28675 if (have66noF2noF3(pfx)
28676 && 1==getVexL(pfx)/*256*/
28677 && epartIsReg(getUChar(delta))) {
28678 UChar modrm = getUChar(delta);
28679 UInt rG = gregOfRexRM(pfx, modrm);
28680 UInt rE = eregOfRexRM(pfx, modrm);
28681 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
28682 IRTemp t32 = newTemp(Ity_I32);
28683 assign(t32, getXMMRegLane32(rE, 0));
28684 IRTemp t64 = newTemp(Ity_I64);
28685 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28686 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28687 mkexpr(t64), mkexpr(t64));
28688 putYMMReg(rG, res);
28689 delta++;
28690 goto decode_success;
28692 break;
28694 case 0x19:
28695 /* VBROADCASTSD m64, ymm1 = VEX.256.66.0F38.WIG 19 /r */
28696 if (have66noF2noF3(pfx)
28697 && 1==getVexL(pfx)/*256*/
28698 && !epartIsReg(getUChar(delta))) {
28699 UChar modrm = getUChar(delta);
28700 UInt rG = gregOfRexRM(pfx, modrm);
28701 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28702 delta += alen;
28703 DIP("vbroadcastsd %s,%s\n", dis_buf, nameYMMReg(rG));
28704 IRTemp t64 = newTemp(Ity_I64);
28705 assign(t64, loadLE(Ity_I64, mkexpr(addr)));
28706 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28707 mkexpr(t64), mkexpr(t64));
28708 putYMMReg(rG, res);
28709 goto decode_success;
28711 /* VBROADCASTSD xmm2, ymm1 = VEX.256.66.0F38.WIG 19 /r */
28712 if (have66noF2noF3(pfx)
28713 && 1==getVexL(pfx)/*256*/
28714 && epartIsReg(getUChar(delta))) {
28715 UChar modrm = getUChar(delta);
28716 UInt rG = gregOfRexRM(pfx, modrm);
28717 UInt rE = eregOfRexRM(pfx, modrm);
28718 DIP("vbroadcastsd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
28719 IRTemp t64 = newTemp(Ity_I64);
28720 assign(t64, getXMMRegLane64(rE, 0));
28721 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28722 mkexpr(t64), mkexpr(t64));
28723 putYMMReg(rG, res);
28724 delta++;
28725 goto decode_success;
28727 break;
28729 case 0x1A:
28730 /* VBROADCASTF128 m128, ymm1 = VEX.256.66.0F38.WIG 1A /r */
28731 if (have66noF2noF3(pfx)
28732 && 1==getVexL(pfx)/*256*/
28733 && !epartIsReg(getUChar(delta))) {
28734 UChar modrm = getUChar(delta);
28735 UInt rG = gregOfRexRM(pfx, modrm);
28736 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28737 delta += alen;
28738 DIP("vbroadcastf128 %s,%s\n", dis_buf, nameYMMReg(rG));
28739 IRTemp t128 = newTemp(Ity_V128);
28740 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
28741 putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) );
28742 goto decode_success;
28744 break;
28746 case 0x1C:
28747 /* VPABSB xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1C /r */
28748 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28749 delta = dis_AVX128_E_to_G_unary(
28750 uses_vvvv, vbi, pfx, delta,
28751 "vpabsb", math_PABS_XMM_pap1 );
28752 goto decode_success;
28754 /* VPABSB ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1C /r */
28755 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28756 delta = dis_AVX256_E_to_G_unary(
28757 uses_vvvv, vbi, pfx, delta,
28758 "vpabsb", math_PABS_YMM_pap1 );
28759 goto decode_success;
28761 break;
28763 case 0x1D:
28764 /* VPABSW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1D /r */
28765 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28766 delta = dis_AVX128_E_to_G_unary(
28767 uses_vvvv, vbi, pfx, delta,
28768 "vpabsw", math_PABS_XMM_pap2 );
28769 goto decode_success;
28771 /* VPABSW ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1D /r */
28772 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28773 delta = dis_AVX256_E_to_G_unary(
28774 uses_vvvv, vbi, pfx, delta,
28775 "vpabsw", math_PABS_YMM_pap2 );
28776 goto decode_success;
28778 break;
28780 case 0x1E:
28781 /* VPABSD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1E /r */
28782 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28783 delta = dis_AVX128_E_to_G_unary(
28784 uses_vvvv, vbi, pfx, delta,
28785 "vpabsd", math_PABS_XMM_pap4 );
28786 goto decode_success;
28788 /* VPABSD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1E /r */
28789 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28790 delta = dis_AVX256_E_to_G_unary(
28791 uses_vvvv, vbi, pfx, delta,
28792 "vpabsd", math_PABS_YMM_pap4 );
28793 goto decode_success;
28795 break;
28797 case 0x20:
28798 /* VPMOVSXBW xmm2/m64, xmm1 */
28799 /* VPMOVSXBW = VEX.128.66.0F38.WIG 20 /r */
28800 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28801 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
28802 True/*isAvx*/, False/*!xIsZ*/ );
28803 goto decode_success;
28805 /* VPMOVSXBW xmm2/m128, ymm1 */
28806 /* VPMOVSXBW = VEX.256.66.0F38.WIG 20 /r */
28807 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28808 delta = dis_PMOVxXBW_256( vbi, pfx, delta, False/*!xIsZ*/ );
28809 goto decode_success;
28811 break;
28813 case 0x21:
28814 /* VPMOVSXBD xmm2/m32, xmm1 */
28815 /* VPMOVSXBD = VEX.128.66.0F38.WIG 21 /r */
28816 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28817 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
28818 True/*isAvx*/, False/*!xIsZ*/ );
28819 goto decode_success;
28821 /* VPMOVSXBD xmm2/m64, ymm1 */
28822 /* VPMOVSXBD = VEX.256.66.0F38.WIG 21 /r */
28823 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28824 delta = dis_PMOVxXBD_256( vbi, pfx, delta, False/*!xIsZ*/ );
28825 goto decode_success;
28827 break;
28829 case 0x22:
28830 /* VPMOVSXBQ xmm2/m16, xmm1 */
28831 /* VPMOVSXBQ = VEX.128.66.0F38.WIG 22 /r */
28832 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28833 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, True/*isAvx*/ );
28834 goto decode_success;
28836 /* VPMOVSXBQ xmm2/m32, ymm1 */
28837 /* VPMOVSXBQ = VEX.256.66.0F38.WIG 22 /r */
28838 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28839 delta = dis_PMOVSXBQ_256( vbi, pfx, delta );
28840 goto decode_success;
28842 break;
28844 case 0x23:
28845 /* VPMOVSXWD xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 23 /r */
28846 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28847 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
28848 True/*isAvx*/, False/*!xIsZ*/ );
28849 goto decode_success;
28851 /* VPMOVSXWD xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 23 /r */
28852 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28853 delta = dis_PMOVxXWD_256( vbi, pfx, delta, False/*!xIsZ*/ );
28854 goto decode_success;
28856 break;
28858 case 0x24:
28859 /* VPMOVSXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 24 /r */
28860 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28861 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, True/*isAvx*/ );
28862 goto decode_success;
28864 /* VPMOVSXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 24 /r */
28865 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28866 delta = dis_PMOVSXWQ_256( vbi, pfx, delta );
28867 goto decode_success;
28869 break;
28871 case 0x25:
28872 /* VPMOVSXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 25 /r */
28873 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28874 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
28875 True/*isAvx*/, False/*!xIsZ*/ );
28876 goto decode_success;
28878 /* VPMOVSXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 25 /r */
28879 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28880 delta = dis_PMOVxXDQ_256( vbi, pfx, delta, False/*!xIsZ*/ );
28881 goto decode_success;
28883 break;
28885 case 0x28:
28886 /* VPMULDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 28 /r */
28887 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28888 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28889 uses_vvvv, vbi, pfx, delta,
28890 "vpmuldq", math_PMULDQ_128 );
28891 goto decode_success;
28893 /* VPMULDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 28 /r */
28894 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28895 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28896 uses_vvvv, vbi, pfx, delta,
28897 "vpmuldq", math_PMULDQ_256 );
28898 goto decode_success;
28900 break;
28902 case 0x29:
28903 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
28904 /* VPCMPEQQ = VEX.NDS.128.66.0F38.WIG 29 /r */
28905 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28906 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28907 uses_vvvv, vbi, pfx, delta, "vpcmpeqq", Iop_CmpEQ64x2 );
28908 goto decode_success;
28910 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
28911 /* VPCMPEQQ = VEX.NDS.256.66.0F38.WIG 29 /r */
28912 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28913 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28914 uses_vvvv, vbi, pfx, delta, "vpcmpeqq", Iop_CmpEQ64x4 );
28915 goto decode_success;
28917 break;
28919 case 0x2A:
28920 /* VMOVNTDQA m128, xmm1 = VEX.128.66.0F38.WIG 2A /r */
28921 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28922 && !epartIsReg(getUChar(delta))) {
28923 UChar modrm = getUChar(delta);
28924 UInt rD = gregOfRexRM(pfx, modrm);
28925 IRTemp tD = newTemp(Ity_V128);
28926 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28927 delta += alen;
28928 gen_SEGV_if_not_16_aligned(addr);
28929 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
28930 DIP("vmovntdqa %s,%s\n", dis_buf, nameXMMReg(rD));
28931 putYMMRegLoAndZU(rD, mkexpr(tD));
28932 goto decode_success;
28934 /* VMOVNTDQA m256, ymm1 = VEX.256.66.0F38.WIG 2A /r */
28935 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28936 && !epartIsReg(getUChar(delta))) {
28937 UChar modrm = getUChar(delta);
28938 UInt rD = gregOfRexRM(pfx, modrm);
28939 IRTemp tD = newTemp(Ity_V256);
28940 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28941 delta += alen;
28942 gen_SEGV_if_not_32_aligned(addr);
28943 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
28944 DIP("vmovntdqa %s,%s\n", dis_buf, nameYMMReg(rD));
28945 putYMMReg(rD, mkexpr(tD));
28946 goto decode_success;
28948 break;
28950 case 0x2B:
28951 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
28952 /* VPACKUSDW = VEX.NDS.128.66.0F38.WIG 2B /r */
28953 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28954 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
28955 uses_vvvv, vbi, pfx, delta, "vpackusdw",
28956 Iop_QNarrowBin32Sto16Ux8, NULL,
28957 False/*!invertLeftArg*/, True/*swapArgs*/ );
28958 goto decode_success;
28960 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
28961 /* VPACKUSDW = VEX.NDS.256.66.0F38.WIG 2B /r */
28962 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28963 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28964 uses_vvvv, vbi, pfx, delta, "vpackusdw",
28965 math_VPACKUSDW_YMM );
28966 goto decode_success;
28968 break;
28970 case 0x2C:
28971 /* VMASKMOVPS m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2C /r */
28972 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28973 && 0==getRexW(pfx)/*W0*/
28974 && !epartIsReg(getUChar(delta))) {
28975 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
28976 /*!isYMM*/False, Ity_I32, /*isLoad*/True );
28977 goto decode_success;
28979 /* VMASKMOVPS m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2C /r */
28980 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28981 && 0==getRexW(pfx)/*W0*/
28982 && !epartIsReg(getUChar(delta))) {
28983 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
28984 /*isYMM*/True, Ity_I32, /*isLoad*/True );
28985 goto decode_success;
28987 break;
28989 case 0x2D:
28990 /* VMASKMOVPD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2D /r */
28991 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28992 && 0==getRexW(pfx)/*W0*/
28993 && !epartIsReg(getUChar(delta))) {
28994 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
28995 /*!isYMM*/False, Ity_I64, /*isLoad*/True );
28996 goto decode_success;
28998 /* VMASKMOVPD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2D /r */
28999 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29000 && 0==getRexW(pfx)/*W0*/
29001 && !epartIsReg(getUChar(delta))) {
29002 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
29003 /*isYMM*/True, Ity_I64, /*isLoad*/True );
29004 goto decode_success;
29006 break;
29008 case 0x2E:
29009 /* VMASKMOVPS xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2E /r */
29010 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29011 && 0==getRexW(pfx)/*W0*/
29012 && !epartIsReg(getUChar(delta))) {
29013 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
29014 /*!isYMM*/False, Ity_I32, /*!isLoad*/False );
29015 goto decode_success;
29017 /* VMASKMOVPS ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2E /r */
29018 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29019 && 0==getRexW(pfx)/*W0*/
29020 && !epartIsReg(getUChar(delta))) {
29021 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
29022 /*isYMM*/True, Ity_I32, /*!isLoad*/False );
29023 goto decode_success;
29025 break;
29027 case 0x2F:
29028 /* VMASKMOVPD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2F /r */
29029 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29030 && 0==getRexW(pfx)/*W0*/
29031 && !epartIsReg(getUChar(delta))) {
29032 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
29033 /*!isYMM*/False, Ity_I64, /*!isLoad*/False );
29034 goto decode_success;
29036 /* VMASKMOVPD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2F /r */
29037 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29038 && 0==getRexW(pfx)/*W0*/
29039 && !epartIsReg(getUChar(delta))) {
29040 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
29041 /*isYMM*/True, Ity_I64, /*!isLoad*/False );
29042 goto decode_success;
29044 break;
29046 case 0x30:
29047 /* VPMOVZXBW xmm2/m64, xmm1 */
29048 /* VPMOVZXBW = VEX.128.66.0F38.WIG 30 /r */
29049 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29050 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
29051 True/*isAvx*/, True/*xIsZ*/ );
29052 goto decode_success;
29054 /* VPMOVZXBW xmm2/m128, ymm1 */
29055 /* VPMOVZXBW = VEX.256.66.0F38.WIG 30 /r */
29056 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29057 delta = dis_PMOVxXBW_256( vbi, pfx, delta, True/*xIsZ*/ );
29058 goto decode_success;
29060 break;
29062 case 0x31:
29063 /* VPMOVZXBD xmm2/m32, xmm1 */
29064 /* VPMOVZXBD = VEX.128.66.0F38.WIG 31 /r */
29065 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29066 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
29067 True/*isAvx*/, True/*xIsZ*/ );
29068 goto decode_success;
29070 /* VPMOVZXBD xmm2/m64, ymm1 */
29071 /* VPMOVZXBD = VEX.256.66.0F38.WIG 31 /r */
29072 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29073 delta = dis_PMOVxXBD_256( vbi, pfx, delta, True/*xIsZ*/ );
29074 goto decode_success;
29076 break;
29078 case 0x32:
29079 /* VPMOVZXBQ xmm2/m16, xmm1 */
29080 /* VPMOVZXBQ = VEX.128.66.0F38.WIG 32 /r */
29081 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29082 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, True/*isAvx*/ );
29083 goto decode_success;
29085 /* VPMOVZXBQ xmm2/m32, ymm1 */
29086 /* VPMOVZXBQ = VEX.256.66.0F38.WIG 32 /r */
29087 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29088 delta = dis_PMOVZXBQ_256( vbi, pfx, delta );
29089 goto decode_success;
29091 break;
29093 case 0x33:
29094 /* VPMOVZXWD xmm2/m64, xmm1 */
29095 /* VPMOVZXWD = VEX.128.66.0F38.WIG 33 /r */
29096 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29097 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
29098 True/*isAvx*/, True/*xIsZ*/ );
29099 goto decode_success;
29101 /* VPMOVZXWD xmm2/m128, ymm1 */
29102 /* VPMOVZXWD = VEX.256.66.0F38.WIG 33 /r */
29103 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29104 delta = dis_PMOVxXWD_256( vbi, pfx, delta, True/*xIsZ*/ );
29105 goto decode_success;
29107 break;
29109 case 0x34:
29110 /* VPMOVZXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 34 /r */
29111 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29112 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, True/*isAvx*/ );
29113 goto decode_success;
29115 /* VPMOVZXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 34 /r */
29116 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29117 delta = dis_PMOVZXWQ_256( vbi, pfx, delta );
29118 goto decode_success;
29120 break;
29122 case 0x35:
29123 /* VPMOVZXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 35 /r */
29124 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29125 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
29126 True/*isAvx*/, True/*xIsZ*/ );
29127 goto decode_success;
29129 /* VPMOVZXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 35 /r */
29130 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29131 delta = dis_PMOVxXDQ_256( vbi, pfx, delta, True/*xIsZ*/ );
29132 goto decode_success;
29134 break;
29136 case 0x36:
29137 /* VPERMD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 36 /r */
29138 if (have66noF2noF3(pfx)
29139 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
29140 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
29141 uses_vvvv, vbi, pfx, delta, "vpermd", math_VPERMD );
29142 goto decode_success;
29144 break;
29146 case 0x37:
29147 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
29148 /* VPCMPGTQ = VEX.NDS.128.66.0F38.WIG 37 /r */
29149 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29150 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29151 uses_vvvv, vbi, pfx, delta, "vpcmpgtq", Iop_CmpGT64Sx2 );
29152 goto decode_success;
29154 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
29155 /* VPCMPGTQ = VEX.NDS.256.66.0F38.WIG 37 /r */
29156 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29157 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29158 uses_vvvv, vbi, pfx, delta, "vpcmpgtq", Iop_CmpGT64Sx4 );
29159 goto decode_success;
29161 break;
29163 case 0x38:
29164 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
29165 /* VPMINSB = VEX.NDS.128.66.0F38.WIG 38 /r */
29166 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29167 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29168 uses_vvvv, vbi, pfx, delta, "vpminsb", Iop_Min8Sx16 );
29169 goto decode_success;
29171 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
29172 /* VPMINSB = VEX.NDS.256.66.0F38.WIG 38 /r */
29173 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29174 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29175 uses_vvvv, vbi, pfx, delta, "vpminsb", Iop_Min8Sx32 );
29176 goto decode_success;
29178 break;
29180 case 0x39:
29181 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
29182 /* VPMINSD = VEX.NDS.128.66.0F38.WIG 39 /r */
29183 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29184 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29185 uses_vvvv, vbi, pfx, delta, "vpminsd", Iop_Min32Sx4 );
29186 goto decode_success;
29188 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
29189 /* VPMINSD = VEX.NDS.256.66.0F38.WIG 39 /r */
29190 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29191 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29192 uses_vvvv, vbi, pfx, delta, "vpminsd", Iop_Min32Sx8 );
29193 goto decode_success;
29195 break;
29197 case 0x3A:
29198 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
29199 /* VPMINUW = VEX.NDS.128.66.0F38.WIG 3A /r */
29200 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29201 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29202 uses_vvvv, vbi, pfx, delta, "vpminuw", Iop_Min16Ux8 );
29203 goto decode_success;
29205 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
29206 /* VPMINUW = VEX.NDS.256.66.0F38.WIG 3A /r */
29207 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29208 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29209 uses_vvvv, vbi, pfx, delta, "vpminuw", Iop_Min16Ux16 );
29210 goto decode_success;
29212 break;
29214 case 0x3B:
29215 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
29216 /* VPMINUD = VEX.NDS.128.66.0F38.WIG 3B /r */
29217 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29218 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29219 uses_vvvv, vbi, pfx, delta, "vpminud", Iop_Min32Ux4 );
29220 goto decode_success;
29222 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
29223 /* VPMINUD = VEX.NDS.256.66.0F38.WIG 3B /r */
29224 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29225 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29226 uses_vvvv, vbi, pfx, delta, "vpminud", Iop_Min32Ux8 );
29227 goto decode_success;
29229 break;
29231 case 0x3C:
29232 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
29233 /* VPMAXSB = VEX.NDS.128.66.0F38.WIG 3C /r */
29234 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29235 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29236 uses_vvvv, vbi, pfx, delta, "vpmaxsb", Iop_Max8Sx16 );
29237 goto decode_success;
29239 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
29240 /* VPMAXSB = VEX.NDS.256.66.0F38.WIG 3C /r */
29241 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29242 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29243 uses_vvvv, vbi, pfx, delta, "vpmaxsb", Iop_Max8Sx32 );
29244 goto decode_success;
29246 break;
29248 case 0x3D:
29249 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
29250 /* VPMAXSD = VEX.NDS.128.66.0F38.WIG 3D /r */
29251 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29252 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29253 uses_vvvv, vbi, pfx, delta, "vpmaxsd", Iop_Max32Sx4 );
29254 goto decode_success;
29256 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
29257 /* VPMAXSD = VEX.NDS.256.66.0F38.WIG 3D /r */
29258 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29259 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29260 uses_vvvv, vbi, pfx, delta, "vpmaxsd", Iop_Max32Sx8 );
29261 goto decode_success;
29263 break;
29265 case 0x3E:
29266 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
29267 /* VPMAXUW = VEX.NDS.128.66.0F38.WIG 3E /r */
29268 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29269 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29270 uses_vvvv, vbi, pfx, delta, "vpmaxuw", Iop_Max16Ux8 );
29271 goto decode_success;
29273 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
29274 /* VPMAXUW = VEX.NDS.256.66.0F38.WIG 3E /r */
29275 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29276 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29277 uses_vvvv, vbi, pfx, delta, "vpmaxuw", Iop_Max16Ux16 );
29278 goto decode_success;
29280 break;
29282 case 0x3F:
29283 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
29284 /* VPMAXUD = VEX.NDS.128.66.0F38.WIG 3F /r */
29285 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29286 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29287 uses_vvvv, vbi, pfx, delta, "vpmaxud", Iop_Max32Ux4 );
29288 goto decode_success;
29290 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
29291 /* VPMAXUD = VEX.NDS.256.66.0F38.WIG 3F /r */
29292 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29293 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29294 uses_vvvv, vbi, pfx, delta, "vpmaxud", Iop_Max32Ux8 );
29295 goto decode_success;
29297 break;
29299 case 0x40:
29300 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
29301 /* VPMULLD = VEX.NDS.128.66.0F38.WIG 40 /r */
29302 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29303 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29304 uses_vvvv, vbi, pfx, delta, "vpmulld", Iop_Mul32x4 );
29305 goto decode_success;
29307 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
29308 /* VPMULLD = VEX.NDS.256.66.0F38.WIG 40 /r */
29309 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29310 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29311 uses_vvvv, vbi, pfx, delta, "vpmulld", Iop_Mul32x8 );
29312 goto decode_success;
29314 break;
29316 case 0x41:
29317 /* VPHMINPOSUW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 41 /r */
29318 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29319 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, True/*isAvx*/ );
29320 goto decode_success;
29322 break;
29324 case 0x45:
29325 /* VPSRLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 45 /r */
29326 /* VPSRLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 45 /r */
29327 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
29328 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsrlvd",
29329 Iop_Shr32, 1==getVexL(pfx) );
29330 *uses_vvvv = True;
29331 goto decode_success;
29333 /* VPSRLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 45 /r */
29334 /* VPSRLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 45 /r */
29335 if (have66noF2noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
29336 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsrlvq",
29337 Iop_Shr64, 1==getVexL(pfx) );
29338 *uses_vvvv = True;
29339 goto decode_success;
29341 break;
29343 case 0x46:
29344 /* VPSRAVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 46 /r */
29345 /* VPSRAVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 46 /r */
29346 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
29347 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsravd",
29348 Iop_Sar32, 1==getVexL(pfx) );
29349 *uses_vvvv = True;
29350 goto decode_success;
29352 break;
29354 case 0x47:
29355 /* VPSLLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 47 /r */
29356 /* VPSLLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 47 /r */
29357 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
29358 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsllvd",
29359 Iop_Shl32, 1==getVexL(pfx) );
29360 *uses_vvvv = True;
29361 goto decode_success;
29363 /* VPSLLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 47 /r */
29364 /* VPSLLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 47 /r */
29365 if (have66noF2noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
29366 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsllvq",
29367 Iop_Shl64, 1==getVexL(pfx) );
29368 *uses_vvvv = True;
29369 goto decode_success;
29371 break;
29373 case 0x58:
29374 /* VPBROADCASTD xmm2/m32, xmm1 = VEX.128.66.0F38.W0 58 /r */
29375 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29376 && 0==getRexW(pfx)/*W0*/) {
29377 UChar modrm = getUChar(delta);
29378 UInt rG = gregOfRexRM(pfx, modrm);
29379 IRTemp t32 = newTemp(Ity_I32);
29380 if (epartIsReg(modrm)) {
29381 UInt rE = eregOfRexRM(pfx, modrm);
29382 delta++;
29383 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
29384 assign(t32, getXMMRegLane32(rE, 0));
29385 } else {
29386 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29387 delta += alen;
29388 DIP("vpbroadcastd %s,%s\n", dis_buf, nameXMMReg(rG));
29389 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
29391 IRTemp t64 = newTemp(Ity_I64);
29392 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29393 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
29394 putYMMRegLoAndZU(rG, res);
29395 goto decode_success;
29397 /* VPBROADCASTD xmm2/m32, ymm1 = VEX.256.66.0F38.W0 58 /r */
29398 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29399 && 0==getRexW(pfx)/*W0*/) {
29400 UChar modrm = getUChar(delta);
29401 UInt rG = gregOfRexRM(pfx, modrm);
29402 IRTemp t32 = newTemp(Ity_I32);
29403 if (epartIsReg(modrm)) {
29404 UInt rE = eregOfRexRM(pfx, modrm);
29405 delta++;
29406 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
29407 assign(t32, getXMMRegLane32(rE, 0));
29408 } else {
29409 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29410 delta += alen;
29411 DIP("vpbroadcastd %s,%s\n", dis_buf, nameYMMReg(rG));
29412 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
29414 IRTemp t64 = newTemp(Ity_I64);
29415 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29416 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
29417 mkexpr(t64), mkexpr(t64));
29418 putYMMReg(rG, res);
29419 goto decode_success;
29421 break;
29423 case 0x59:
29424 /* VPBROADCASTQ xmm2/m64, xmm1 = VEX.128.66.0F38.W0 59 /r */
29425 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29426 && 0==getRexW(pfx)/*W0*/) {
29427 UChar modrm = getUChar(delta);
29428 UInt rG = gregOfRexRM(pfx, modrm);
29429 IRTemp t64 = newTemp(Ity_I64);
29430 if (epartIsReg(modrm)) {
29431 UInt rE = eregOfRexRM(pfx, modrm);
29432 delta++;
29433 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
29434 assign(t64, getXMMRegLane64(rE, 0));
29435 } else {
29436 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29437 delta += alen;
29438 DIP("vpbroadcastq %s,%s\n", dis_buf, nameXMMReg(rG));
29439 assign(t64, loadLE(Ity_I64, mkexpr(addr)));
29441 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
29442 putYMMRegLoAndZU(rG, res);
29443 goto decode_success;
29445 /* VPBROADCASTQ xmm2/m64, ymm1 = VEX.256.66.0F38.W0 59 /r */
29446 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29447 && 0==getRexW(pfx)/*W0*/) {
29448 UChar modrm = getUChar(delta);
29449 UInt rG = gregOfRexRM(pfx, modrm);
29450 IRTemp t64 = newTemp(Ity_I64);
29451 if (epartIsReg(modrm)) {
29452 UInt rE = eregOfRexRM(pfx, modrm);
29453 delta++;
29454 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
29455 assign(t64, getXMMRegLane64(rE, 0));
29456 } else {
29457 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29458 delta += alen;
29459 DIP("vpbroadcastq %s,%s\n", dis_buf, nameYMMReg(rG));
29460 assign(t64, loadLE(Ity_I64, mkexpr(addr)));
29462 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
29463 mkexpr(t64), mkexpr(t64));
29464 putYMMReg(rG, res);
29465 goto decode_success;
29467 break;
29469 case 0x5A:
29470 /* VBROADCASTI128 m128, ymm1 = VEX.256.66.0F38.WIG 5A /r */
29471 if (have66noF2noF3(pfx)
29472 && 1==getVexL(pfx)/*256*/
29473 && !epartIsReg(getUChar(delta))) {
29474 UChar modrm = getUChar(delta);
29475 UInt rG = gregOfRexRM(pfx, modrm);
29476 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29477 delta += alen;
29478 DIP("vbroadcasti128 %s,%s\n", dis_buf, nameYMMReg(rG));
29479 IRTemp t128 = newTemp(Ity_V128);
29480 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
29481 putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) );
29482 goto decode_success;
29484 break;
29486 case 0x78:
29487 /* VPBROADCASTB xmm2/m8, xmm1 = VEX.128.66.0F38.W0 78 /r */
29488 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29489 && 0==getRexW(pfx)/*W0*/) {
29490 UChar modrm = getUChar(delta);
29491 UInt rG = gregOfRexRM(pfx, modrm);
29492 IRTemp t8 = newTemp(Ity_I8);
29493 if (epartIsReg(modrm)) {
29494 UInt rE = eregOfRexRM(pfx, modrm);
29495 delta++;
29496 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
29497 assign(t8, unop(Iop_32to8, getXMMRegLane32(rE, 0)));
29498 } else {
29499 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29500 delta += alen;
29501 DIP("vpbroadcastb %s,%s\n", dis_buf, nameXMMReg(rG));
29502 assign(t8, loadLE(Ity_I8, mkexpr(addr)));
29504 IRTemp t16 = newTemp(Ity_I16);
29505 assign(t16, binop(Iop_8HLto16, mkexpr(t8), mkexpr(t8)));
29506 IRTemp t32 = newTemp(Ity_I32);
29507 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
29508 IRTemp t64 = newTemp(Ity_I64);
29509 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29510 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
29511 putYMMRegLoAndZU(rG, res);
29512 goto decode_success;
29514 /* VPBROADCASTB xmm2/m8, ymm1 = VEX.256.66.0F38.W0 78 /r */
29515 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29516 && 0==getRexW(pfx)/*W0*/) {
29517 UChar modrm = getUChar(delta);
29518 UInt rG = gregOfRexRM(pfx, modrm);
29519 IRTemp t8 = newTemp(Ity_I8);
29520 if (epartIsReg(modrm)) {
29521 UInt rE = eregOfRexRM(pfx, modrm);
29522 delta++;
29523 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
29524 assign(t8, unop(Iop_32to8, getXMMRegLane32(rE, 0)));
29525 } else {
29526 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29527 delta += alen;
29528 DIP("vpbroadcastb %s,%s\n", dis_buf, nameYMMReg(rG));
29529 assign(t8, loadLE(Ity_I8, mkexpr(addr)));
29531 IRTemp t16 = newTemp(Ity_I16);
29532 assign(t16, binop(Iop_8HLto16, mkexpr(t8), mkexpr(t8)));
29533 IRTemp t32 = newTemp(Ity_I32);
29534 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
29535 IRTemp t64 = newTemp(Ity_I64);
29536 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29537 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
29538 mkexpr(t64), mkexpr(t64));
29539 putYMMReg(rG, res);
29540 goto decode_success;
29542 break;
29544 case 0x79:
29545 /* VPBROADCASTW xmm2/m16, xmm1 = VEX.128.66.0F38.W0 79 /r */
29546 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29547 && 0==getRexW(pfx)/*W0*/) {
29548 UChar modrm = getUChar(delta);
29549 UInt rG = gregOfRexRM(pfx, modrm);
29550 IRTemp t16 = newTemp(Ity_I16);
29551 if (epartIsReg(modrm)) {
29552 UInt rE = eregOfRexRM(pfx, modrm);
29553 delta++;
29554 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
29555 assign(t16, unop(Iop_32to16, getXMMRegLane32(rE, 0)));
29556 } else {
29557 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29558 delta += alen;
29559 DIP("vpbroadcastw %s,%s\n", dis_buf, nameXMMReg(rG));
29560 assign(t16, loadLE(Ity_I16, mkexpr(addr)));
29562 IRTemp t32 = newTemp(Ity_I32);
29563 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
29564 IRTemp t64 = newTemp(Ity_I64);
29565 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29566 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
29567 putYMMRegLoAndZU(rG, res);
29568 goto decode_success;
29570 /* VPBROADCASTW xmm2/m16, ymm1 = VEX.256.66.0F38.W0 79 /r */
29571 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29572 && 0==getRexW(pfx)/*W0*/) {
29573 UChar modrm = getUChar(delta);
29574 UInt rG = gregOfRexRM(pfx, modrm);
29575 IRTemp t16 = newTemp(Ity_I16);
29576 if (epartIsReg(modrm)) {
29577 UInt rE = eregOfRexRM(pfx, modrm);
29578 delta++;
29579 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
29580 assign(t16, unop(Iop_32to16, getXMMRegLane32(rE, 0)));
29581 } else {
29582 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29583 delta += alen;
29584 DIP("vpbroadcastw %s,%s\n", dis_buf, nameYMMReg(rG));
29585 assign(t16, loadLE(Ity_I16, mkexpr(addr)));
29587 IRTemp t32 = newTemp(Ity_I32);
29588 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
29589 IRTemp t64 = newTemp(Ity_I64);
29590 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29591 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
29592 mkexpr(t64), mkexpr(t64));
29593 putYMMReg(rG, res);
29594 goto decode_success;
29596 break;
29598 case 0x8C:
29599 /* VPMASKMOVD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 8C /r */
29600 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29601 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29602 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
29603 /*!isYMM*/False, Ity_I32, /*isLoad*/True );
29604 goto decode_success;
29606 /* VPMASKMOVD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 8C /r */
29607 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29608 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29609 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
29610 /*isYMM*/True, Ity_I32, /*isLoad*/True );
29611 goto decode_success;
29613 /* VPMASKMOVQ m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 8C /r */
29614 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29615 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29616 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
29617 /*!isYMM*/False, Ity_I64, /*isLoad*/True );
29618 goto decode_success;
29620 /* VPMASKMOVQ m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 8C /r */
29621 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29622 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29623 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
29624 /*isYMM*/True, Ity_I64, /*isLoad*/True );
29625 goto decode_success;
29627 break;
29629 case 0x8E:
29630 /* VPMASKMOVD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 8E /r */
29631 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29632 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29633 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
29634 /*!isYMM*/False, Ity_I32, /*!isLoad*/False );
29635 goto decode_success;
29637 /* VPMASKMOVD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 8E /r */
29638 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29639 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29640 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
29641 /*isYMM*/True, Ity_I32, /*!isLoad*/False );
29642 goto decode_success;
29644 /* VPMASKMOVQ xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W1 8E /r */
29645 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29646 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29647 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
29648 /*!isYMM*/False, Ity_I64, /*!isLoad*/False );
29649 goto decode_success;
29651 /* VPMASKMOVQ ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W1 8E /r */
29652 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29653 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29654 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
29655 /*isYMM*/True, Ity_I64, /*!isLoad*/False );
29656 goto decode_success;
29658 break;
29660 case 0x90:
29661 /* VPGATHERDD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 90 /r */
29662 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29663 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29664 Long delta0 = delta;
29665 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdd",
29666 /*!isYMM*/False, /*!isVM64x*/False, Ity_I32 );
29667 if (delta != delta0)
29668 goto decode_success;
29670 /* VPGATHERDD ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 90 /r */
29671 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29672 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29673 Long delta0 = delta;
29674 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdd",
29675 /*isYMM*/True, /*!isVM64x*/False, Ity_I32 );
29676 if (delta != delta0)
29677 goto decode_success;
29679 /* VPGATHERDQ xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 90 /r */
29680 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29681 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29682 Long delta0 = delta;
29683 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdq",
29684 /*!isYMM*/False, /*!isVM64x*/False, Ity_I64 );
29685 if (delta != delta0)
29686 goto decode_success;
29688 /* VPGATHERDQ ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 90 /r */
29689 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29690 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29691 Long delta0 = delta;
29692 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdq",
29693 /*isYMM*/True, /*!isVM64x*/False, Ity_I64 );
29694 if (delta != delta0)
29695 goto decode_success;
29697 break;
29699 case 0x91:
29700 /* VPGATHERQD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 91 /r */
29701 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29702 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29703 Long delta0 = delta;
29704 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqd",
29705 /*!isYMM*/False, /*isVM64x*/True, Ity_I32 );
29706 if (delta != delta0)
29707 goto decode_success;
29709 /* VPGATHERQD xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 91 /r */
29710 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29711 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29712 Long delta0 = delta;
29713 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqd",
29714 /*isYMM*/True, /*isVM64x*/True, Ity_I32 );
29715 if (delta != delta0)
29716 goto decode_success;
29718 /* VPGATHERQQ xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 91 /r */
29719 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29720 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29721 Long delta0 = delta;
29722 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqq",
29723 /*!isYMM*/False, /*isVM64x*/True, Ity_I64 );
29724 if (delta != delta0)
29725 goto decode_success;
29727 /* VPGATHERQQ ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 91 /r */
29728 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29729 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29730 Long delta0 = delta;
29731 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqq",
29732 /*isYMM*/True, /*isVM64x*/True, Ity_I64 );
29733 if (delta != delta0)
29734 goto decode_success;
29736 break;
29738 case 0x92:
29739 /* VGATHERDPS xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 92 /r */
29740 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29741 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29742 Long delta0 = delta;
29743 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdps",
29744 /*!isYMM*/False, /*!isVM64x*/False, Ity_I32 );
29745 if (delta != delta0)
29746 goto decode_success;
29748 /* VGATHERDPS ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 92 /r */
29749 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29750 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29751 Long delta0 = delta;
29752 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdps",
29753 /*isYMM*/True, /*!isVM64x*/False, Ity_I32 );
29754 if (delta != delta0)
29755 goto decode_success;
29757 /* VGATHERDPD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 92 /r */
29758 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29759 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29760 Long delta0 = delta;
29761 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdpd",
29762 /*!isYMM*/False, /*!isVM64x*/False, Ity_I64 );
29763 if (delta != delta0)
29764 goto decode_success;
29766 /* VGATHERDPD ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 92 /r */
29767 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29768 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29769 Long delta0 = delta;
29770 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdpd",
29771 /*isYMM*/True, /*!isVM64x*/False, Ity_I64 );
29772 if (delta != delta0)
29773 goto decode_success;
29775 break;
29777 case 0x93:
29778 /* VGATHERQPS xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 93 /r */
29779 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29780 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29781 Long delta0 = delta;
29782 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqps",
29783 /*!isYMM*/False, /*isVM64x*/True, Ity_I32 );
29784 if (delta != delta0)
29785 goto decode_success;
29787 /* VGATHERQPS xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 93 /r */
29788 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29789 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29790 Long delta0 = delta;
29791 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqps",
29792 /*isYMM*/True, /*isVM64x*/True, Ity_I32 );
29793 if (delta != delta0)
29794 goto decode_success;
29796 /* VGATHERQPD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 93 /r */
29797 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29798 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29799 Long delta0 = delta;
29800 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqpd",
29801 /*!isYMM*/False, /*isVM64x*/True, Ity_I64 );
29802 if (delta != delta0)
29803 goto decode_success;
29805 /* VGATHERQPD ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 93 /r */
29806 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29807 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29808 Long delta0 = delta;
29809 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqpd",
29810 /*isYMM*/True, /*isVM64x*/True, Ity_I64 );
29811 if (delta != delta0)
29812 goto decode_success;
29814 break;
29816 case 0x96 ... 0x9F:
29817 case 0xA6 ... 0xAF:
29818 case 0xB6 ... 0xBF:
29819 /* VFMADDSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 96 /r */
29820 /* VFMADDSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 96 /r */
29821 /* VFMADDSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 96 /r */
29822 /* VFMADDSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 96 /r */
29823 /* VFMSUBADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 97 /r */
29824 /* VFMSUBADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 97 /r */
29825 /* VFMSUBADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 97 /r */
29826 /* VFMSUBADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 97 /r */
29827 /* VFMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 98 /r */
29828 /* VFMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 98 /r */
29829 /* VFMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 98 /r */
29830 /* VFMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 98 /r */
29831 /* VFMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 99 /r */
29832 /* VFMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 99 /r */
29833 /* VFMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9A /r */
29834 /* VFMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9A /r */
29835 /* VFMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9A /r */
29836 /* VFMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9A /r */
29837 /* VFMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9B /r */
29838 /* VFMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9B /r */
29839 /* VFNMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9C /r */
29840 /* VFNMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9C /r */
29841 /* VFNMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9C /r */
29842 /* VFNMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9C /r */
29843 /* VFNMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9D /r */
29844 /* VFNMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9D /r */
29845 /* VFNMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9E /r */
29846 /* VFNMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9E /r */
29847 /* VFNMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9E /r */
29848 /* VFNMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9E /r */
29849 /* VFNMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9F /r */
29850 /* VFNMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9F /r */
29851 /* VFMADDSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A6 /r */
29852 /* VFMADDSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A6 /r */
29853 /* VFMADDSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A6 /r */
29854 /* VFMADDSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A6 /r */
29855 /* VFMSUBADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A7 /r */
29856 /* VFMSUBADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A7 /r */
29857 /* VFMSUBADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A7 /r */
29858 /* VFMSUBADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A7 /r */
29859 /* VFMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A8 /r */
29860 /* VFMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A8 /r */
29861 /* VFMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A8 /r */
29862 /* VFMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A8 /r */
29863 /* VFMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 A9 /r */
29864 /* VFMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 A9 /r */
29865 /* VFMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AA /r */
29866 /* VFMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AA /r */
29867 /* VFMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AA /r */
29868 /* VFMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AA /r */
29869 /* VFMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AB /r */
29870 /* VFMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AB /r */
29871 /* VFNMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AC /r */
29872 /* VFNMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AC /r */
29873 /* VFNMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AC /r */
29874 /* VFNMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AC /r */
29875 /* VFNMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AD /r */
29876 /* VFNMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AD /r */
29877 /* VFNMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AE /r */
29878 /* VFNMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AE /r */
29879 /* VFNMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AE /r */
29880 /* VFNMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AE /r */
29881 /* VFNMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AF /r */
29882 /* VFNMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AF /r */
29883 /* VFMADDSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B6 /r */
29884 /* VFMADDSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B6 /r */
29885 /* VFMADDSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B6 /r */
29886 /* VFMADDSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B6 /r */
29887 /* VFMSUBADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B7 /r */
29888 /* VFMSUBADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B7 /r */
29889 /* VFMSUBADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B7 /r */
29890 /* VFMSUBADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B7 /r */
29891 /* VFMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B8 /r */
29892 /* VFMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B8 /r */
29893 /* VFMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B8 /r */
29894 /* VFMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B8 /r */
29895 /* VFMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 B9 /r */
29896 /* VFMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 B9 /r */
29897 /* VFMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BA /r */
29898 /* VFMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BA /r */
29899 /* VFMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BA /r */
29900 /* VFMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BA /r */
29901 /* VFMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BB /r */
29902 /* VFMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BB /r */
29903 /* VFNMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BC /r */
29904 /* VFNMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BC /r */
29905 /* VFNMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BC /r */
29906 /* VFNMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BC /r */
29907 /* VFNMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BD /r */
29908 /* VFNMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BD /r */
29909 /* VFNMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BE /r */
29910 /* VFNMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BE /r */
29911 /* VFNMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BE /r */
29912 /* VFNMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BE /r */
29913 /* VFNMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BF /r */
29914 /* VFNMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BF /r */
29915 if (have66noF2noF3(pfx)) {
29916 delta = dis_FMA( vbi, pfx, delta, opc );
29917 *uses_vvvv = True;
29918 dres->hint = Dis_HintVerbose;
29919 goto decode_success;
29921 break;
29923 case 0xDB:
29924 case 0xDC:
29925 case 0xDD:
29926 case 0xDE:
29927 case 0xDF:
29928 /* VAESIMC xmm2/m128, xmm1 = VEX.128.66.0F38.WIG DB /r */
29929 /* VAESENC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DC /r */
29930 /* VAESENCLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DD /r */
29931 /* VAESDEC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DE /r */
29932 /* VAESDECLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DF /r */
29933 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29934 delta = dis_AESx( vbi, pfx, delta, True/*!isAvx*/, opc );
29935 if (opc != 0xDB) *uses_vvvv = True;
29936 goto decode_success;
29938 break;
29940 case 0xF2:
29941 /* ANDN r/m32, r32b, r32a = VEX.NDS.LZ.0F38.W0 F2 /r */
29942 /* ANDN r/m64, r64b, r64a = VEX.NDS.LZ.0F38.W1 F2 /r */
29943 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29944 Int size = getRexW(pfx) ? 8 : 4;
29945 IRType ty = szToITy(size);
29946 IRTemp dst = newTemp(ty);
29947 IRTemp src1 = newTemp(ty);
29948 IRTemp src2 = newTemp(ty);
29949 UChar rm = getUChar(delta);
29951 assign( src1, getIRegV(size,pfx) );
29952 if (epartIsReg(rm)) {
29953 assign( src2, getIRegE(size,pfx,rm) );
29954 DIP("andn %s,%s,%s\n", nameIRegE(size,pfx,rm),
29955 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
29956 delta++;
29957 } else {
29958 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29959 assign( src2, loadLE(ty, mkexpr(addr)) );
29960 DIP("andn %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
29961 nameIRegG(size,pfx,rm));
29962 delta += alen;
29965 assign( dst, binop( mkSizedOp(ty,Iop_And8),
29966 unop( mkSizedOp(ty,Iop_Not8), mkexpr(src1) ),
29967 mkexpr(src2) ) );
29968 putIRegG( size, pfx, rm, mkexpr(dst) );
29969 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29970 ? AMD64G_CC_OP_ANDN64
29971 : AMD64G_CC_OP_ANDN32)) );
29972 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
29973 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
29974 *uses_vvvv = True;
29975 goto decode_success;
29977 break;
29979 case 0xF3:
29980 /* BLSI r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /3 */
29981 /* BLSI r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /3 */
29982 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/
29983 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 3) {
29984 Int size = getRexW(pfx) ? 8 : 4;
29985 IRType ty = szToITy(size);
29986 IRTemp src = newTemp(ty);
29987 IRTemp dst = newTemp(ty);
29988 UChar rm = getUChar(delta);
29990 if (epartIsReg(rm)) {
29991 assign( src, getIRegE(size,pfx,rm) );
29992 DIP("blsi %s,%s\n", nameIRegE(size,pfx,rm),
29993 nameIRegV(size,pfx));
29994 delta++;
29995 } else {
29996 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29997 assign( src, loadLE(ty, mkexpr(addr)) );
29998 DIP("blsi %s,%s\n", dis_buf, nameIRegV(size,pfx));
29999 delta += alen;
30002 assign( dst, binop(mkSizedOp(ty,Iop_And8),
30003 binop(mkSizedOp(ty,Iop_Sub8), mkU(ty, 0),
30004 mkexpr(src)), mkexpr(src)) );
30005 putIRegV( size, pfx, mkexpr(dst) );
30006 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
30007 ? AMD64G_CC_OP_BLSI64
30008 : AMD64G_CC_OP_BLSI32)) );
30009 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
30010 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) );
30011 *uses_vvvv = True;
30012 goto decode_success;
30014 /* BLSMSK r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /2 */
30015 /* BLSMSK r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /2 */
30016 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/
30017 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 2) {
30018 Int size = getRexW(pfx) ? 8 : 4;
30019 IRType ty = szToITy(size);
30020 IRTemp src = newTemp(ty);
30021 IRTemp dst = newTemp(ty);
30022 UChar rm = getUChar(delta);
30024 if (epartIsReg(rm)) {
30025 assign( src, getIRegE(size,pfx,rm) );
30026 DIP("blsmsk %s,%s\n", nameIRegE(size,pfx,rm),
30027 nameIRegV(size,pfx));
30028 delta++;
30029 } else {
30030 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30031 assign( src, loadLE(ty, mkexpr(addr)) );
30032 DIP("blsmsk %s,%s\n", dis_buf, nameIRegV(size,pfx));
30033 delta += alen;
30036 assign( dst, binop(mkSizedOp(ty,Iop_Xor8),
30037 binop(mkSizedOp(ty,Iop_Sub8), mkexpr(src),
30038 mkU(ty, 1)), mkexpr(src)) );
30039 putIRegV( size, pfx, mkexpr(dst) );
30040 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
30041 ? AMD64G_CC_OP_BLSMSK64
30042 : AMD64G_CC_OP_BLSMSK32)) );
30043 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
30044 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) );
30045 *uses_vvvv = True;
30046 goto decode_success;
30048 /* BLSR r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /1 */
30049 /* BLSR r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /1 */
30050 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/
30051 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 1) {
30052 Int size = getRexW(pfx) ? 8 : 4;
30053 IRType ty = szToITy(size);
30054 IRTemp src = newTemp(ty);
30055 IRTemp dst = newTemp(ty);
30056 UChar rm = getUChar(delta);
30058 if (epartIsReg(rm)) {
30059 assign( src, getIRegE(size,pfx,rm) );
30060 DIP("blsr %s,%s\n", nameIRegE(size,pfx,rm),
30061 nameIRegV(size,pfx));
30062 delta++;
30063 } else {
30064 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30065 assign( src, loadLE(ty, mkexpr(addr)) );
30066 DIP("blsr %s,%s\n", dis_buf, nameIRegV(size,pfx));
30067 delta += alen;
30070 assign( dst, binop(mkSizedOp(ty,Iop_And8),
30071 binop(mkSizedOp(ty,Iop_Sub8), mkexpr(src),
30072 mkU(ty, 1)), mkexpr(src)) );
30073 putIRegV( size, pfx, mkexpr(dst) );
30074 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
30075 ? AMD64G_CC_OP_BLSR64
30076 : AMD64G_CC_OP_BLSR32)) );
30077 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
30078 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) );
30079 *uses_vvvv = True;
30080 goto decode_success;
30082 break;
30084 case 0xF5:
30085 /* BZHI r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F5 /r */
30086 /* BZHI r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F5 /r */
30087 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30088 Int size = getRexW(pfx) ? 8 : 4;
30089 IRType ty = szToITy(size);
30090 IRTemp dst = newTemp(ty);
30091 IRTemp src1 = newTemp(ty);
30092 IRTemp src2 = newTemp(ty);
30093 IRTemp start = newTemp(Ity_I8);
30094 IRTemp cond = newTemp(Ity_I1);
30095 UChar rm = getUChar(delta);
30097 assign( src2, getIRegV(size,pfx) );
30098 if (epartIsReg(rm)) {
30099 assign( src1, getIRegE(size,pfx,rm) );
30100 DIP("bzhi %s,%s,%s\n", nameIRegV(size,pfx),
30101 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm));
30102 delta++;
30103 } else {
30104 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30105 assign( src1, loadLE(ty, mkexpr(addr)) );
30106 DIP("bzhi %s,%s,%s\n", nameIRegV(size,pfx), dis_buf,
30107 nameIRegG(size,pfx,rm));
30108 delta += alen;
30111 assign( start, narrowTo( Ity_I8, mkexpr(src2) ) );
30112 assign( cond, binop(Iop_CmpLT32U,
30113 unop(Iop_8Uto32, mkexpr(start)),
30114 mkU32(8*size)) );
30115 /* if (start < opsize) {
30116 if (start == 0)
30117 dst = 0;
30118 else
30119 dst = (src1 << (opsize-start)) u>> (opsize-start);
30120 } else {
30121 dst = src1;
30122 } */
30123 assign( dst,
30124 IRExpr_ITE(
30125 mkexpr(cond),
30126 IRExpr_ITE(
30127 binop(Iop_CmpEQ8, mkexpr(start), mkU8(0)),
30128 mkU(ty, 0),
30129 binop(
30130 mkSizedOp(ty,Iop_Shr8),
30131 binop(
30132 mkSizedOp(ty,Iop_Shl8),
30133 mkexpr(src1),
30134 binop(Iop_Sub8, mkU8(8*size), mkexpr(start))
30136 binop(Iop_Sub8, mkU8(8*size), mkexpr(start))
30139 mkexpr(src1)
30142 putIRegG( size, pfx, rm, mkexpr(dst) );
30143 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
30144 ? AMD64G_CC_OP_BLSR64
30145 : AMD64G_CC_OP_BLSR32)) );
30146 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
30147 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(cond))) );
30148 *uses_vvvv = True;
30149 goto decode_success;
30151 /* PDEP r/m32, r32b, r32a = VEX.NDS.LZ.F2.0F38.W0 F5 /r */
30152 /* PDEP r/m64, r64b, r64a = VEX.NDS.LZ.F2.0F38.W1 F5 /r */
30153 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30154 Int size = getRexW(pfx) ? 8 : 4;
30155 IRType ty = szToITy(size);
30156 IRTemp src = newTemp(ty);
30157 IRTemp mask = newTemp(ty);
30158 UChar rm = getUChar(delta);
30160 assign( src, getIRegV(size,pfx) );
30161 if (epartIsReg(rm)) {
30162 assign( mask, getIRegE(size,pfx,rm) );
30163 DIP("pdep %s,%s,%s\n", nameIRegE(size,pfx,rm),
30164 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
30165 delta++;
30166 } else {
30167 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30168 assign( mask, loadLE(ty, mkexpr(addr)) );
30169 DIP("pdep %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
30170 nameIRegG(size,pfx,rm));
30171 delta += alen;
30174 IRExpr** args = mkIRExprVec_2( widenUto64(mkexpr(src)),
30175 widenUto64(mkexpr(mask)) );
30176 putIRegG( size, pfx, rm,
30177 narrowTo(ty, mkIRExprCCall(Ity_I64, 0/*regparms*/,
30178 "amd64g_calculate_pdep",
30179 &amd64g_calculate_pdep, args)) );
30180 *uses_vvvv = True;
30181 /* Flags aren't modified. */
30182 goto decode_success;
30184 /* PEXT r/m32, r32b, r32a = VEX.NDS.LZ.F3.0F38.W0 F5 /r */
30185 /* PEXT r/m64, r64b, r64a = VEX.NDS.LZ.F3.0F38.W1 F5 /r */
30186 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30187 Int size = getRexW(pfx) ? 8 : 4;
30188 IRType ty = szToITy(size);
30189 IRTemp src = newTemp(ty);
30190 IRTemp mask = newTemp(ty);
30191 UChar rm = getUChar(delta);
30193 assign( src, getIRegV(size,pfx) );
30194 if (epartIsReg(rm)) {
30195 assign( mask, getIRegE(size,pfx,rm) );
30196 DIP("pext %s,%s,%s\n", nameIRegE(size,pfx,rm),
30197 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
30198 delta++;
30199 } else {
30200 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30201 assign( mask, loadLE(ty, mkexpr(addr)) );
30202 DIP("pext %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
30203 nameIRegG(size,pfx,rm));
30204 delta += alen;
30207 /* First mask off bits not set in mask, they are ignored
30208 and it should be fine if they contain undefined values. */
30209 IRExpr* masked = binop(mkSizedOp(ty,Iop_And8),
30210 mkexpr(src), mkexpr(mask));
30211 IRExpr** args = mkIRExprVec_2( widenUto64(masked),
30212 widenUto64(mkexpr(mask)) );
30213 putIRegG( size, pfx, rm,
30214 narrowTo(ty, mkIRExprCCall(Ity_I64, 0/*regparms*/,
30215 "amd64g_calculate_pext",
30216 &amd64g_calculate_pext, args)) );
30217 *uses_vvvv = True;
30218 /* Flags aren't modified. */
30219 goto decode_success;
30221 break;
30223 case 0xF6:
30224 /* MULX r/m32, r32b, r32a = VEX.NDD.LZ.F2.0F38.W0 F6 /r */
30225 /* MULX r/m64, r64b, r64a = VEX.NDD.LZ.F2.0F38.W1 F6 /r */
30226 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30227 Int size = getRexW(pfx) ? 8 : 4;
30228 IRType ty = szToITy(size);
30229 IRTemp src1 = newTemp(ty);
30230 IRTemp src2 = newTemp(ty);
30231 IRTemp res = newTemp(size == 8 ? Ity_I128 : Ity_I64);
30232 UChar rm = getUChar(delta);
30234 assign( src1, getIRegRDX(size) );
30235 if (epartIsReg(rm)) {
30236 assign( src2, getIRegE(size,pfx,rm) );
30237 DIP("mulx %s,%s,%s\n", nameIRegE(size,pfx,rm),
30238 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
30239 delta++;
30240 } else {
30241 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30242 assign( src2, loadLE(ty, mkexpr(addr)) );
30243 DIP("mulx %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
30244 nameIRegG(size,pfx,rm));
30245 delta += alen;
30248 assign( res, binop(size == 8 ? Iop_MullU64 : Iop_MullU32,
30249 mkexpr(src1), mkexpr(src2)) );
30250 putIRegV( size, pfx,
30251 unop(size == 8 ? Iop_128to64 : Iop_64to32, mkexpr(res)) );
30252 putIRegG( size, pfx, rm,
30253 unop(size == 8 ? Iop_128HIto64 : Iop_64HIto32,
30254 mkexpr(res)) );
30255 *uses_vvvv = True;
30256 /* Flags aren't modified. */
30257 goto decode_success;
30259 break;
30261 case 0xF7:
30262 /* SARX r32b, r/m32, r32a = VEX.NDS.LZ.F3.0F38.W0 F7 /r */
30263 /* SARX r64b, r/m64, r64a = VEX.NDS.LZ.F3.0F38.W1 F7 /r */
30264 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30265 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "sarx", Iop_Sar8 );
30266 goto decode_success;
30268 /* SHLX r32b, r/m32, r32a = VEX.NDS.LZ.66.0F38.W0 F7 /r */
30269 /* SHLX r64b, r/m64, r64a = VEX.NDS.LZ.66.0F38.W1 F7 /r */
30270 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30271 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "shlx", Iop_Shl8 );
30272 goto decode_success;
30274 /* SHRX r32b, r/m32, r32a = VEX.NDS.LZ.F2.0F38.W0 F7 /r */
30275 /* SHRX r64b, r/m64, r64a = VEX.NDS.LZ.F2.0F38.W1 F7 /r */
30276 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30277 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "shrx", Iop_Shr8 );
30278 goto decode_success;
30280 /* BEXTR r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F7 /r */
30281 /* BEXTR r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F7 /r */
30282 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30283 Int size = getRexW(pfx) ? 8 : 4;
30284 IRType ty = szToITy(size);
30285 IRTemp dst = newTemp(ty);
30286 IRTemp src1 = newTemp(ty);
30287 IRTemp src2 = newTemp(ty);
30288 IRTemp stle = newTemp(Ity_I16);
30289 IRTemp start = newTemp(Ity_I8);
30290 IRTemp len = newTemp(Ity_I8);
30291 UChar rm = getUChar(delta);
30293 assign( src2, getIRegV(size,pfx) );
30294 if (epartIsReg(rm)) {
30295 assign( src1, getIRegE(size,pfx,rm) );
30296 DIP("bextr %s,%s,%s\n", nameIRegV(size,pfx),
30297 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm));
30298 delta++;
30299 } else {
30300 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30301 assign( src1, loadLE(ty, mkexpr(addr)) );
30302 DIP("bextr %s,%s,%s\n", nameIRegV(size,pfx), dis_buf,
30303 nameIRegG(size,pfx,rm));
30304 delta += alen;
30307 assign( stle, narrowTo( Ity_I16, mkexpr(src2) ) );
30308 assign( start, unop( Iop_16to8, mkexpr(stle) ) );
30309 assign( len, unop( Iop_16HIto8, mkexpr(stle) ) );
30310 /* if (start+len < opsize) {
30311 if (len != 0)
30312 dst = (src1 << (opsize-start-len)) u>> (opsize-len);
30313 else
30314 dst = 0;
30315 } else {
30316 if (start < opsize)
30317 dst = src1 u>> start;
30318 else
30319 dst = 0;
30320 } */
30321 assign( dst,
30322 IRExpr_ITE(
30323 binop(Iop_CmpLT32U,
30324 binop(Iop_Add32,
30325 unop(Iop_8Uto32, mkexpr(start)),
30326 unop(Iop_8Uto32, mkexpr(len))),
30327 mkU32(8*size)),
30328 IRExpr_ITE(
30329 binop(Iop_CmpEQ8, mkexpr(len), mkU8(0)),
30330 mkU(ty, 0),
30331 binop(mkSizedOp(ty,Iop_Shr8),
30332 binop(mkSizedOp(ty,Iop_Shl8), mkexpr(src1),
30333 binop(Iop_Sub8,
30334 binop(Iop_Sub8, mkU8(8*size),
30335 mkexpr(start)),
30336 mkexpr(len))),
30337 binop(Iop_Sub8, mkU8(8*size),
30338 mkexpr(len)))
30340 IRExpr_ITE(
30341 binop(Iop_CmpLT32U,
30342 unop(Iop_8Uto32, mkexpr(start)),
30343 mkU32(8*size)),
30344 binop(mkSizedOp(ty,Iop_Shr8), mkexpr(src1),
30345 mkexpr(start)),
30346 mkU(ty, 0)
30350 putIRegG( size, pfx, rm, mkexpr(dst) );
30351 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
30352 ? AMD64G_CC_OP_ANDN64
30353 : AMD64G_CC_OP_ANDN32)) );
30354 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
30355 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
30356 *uses_vvvv = True;
30357 goto decode_success;
30359 break;
30361 default:
30362 break;
30366 //decode_failure:
30367 return deltaIN;
30369 decode_success:
30370 return delta;
30373 /* operand format:
30374 * [0] = dst
30375 * [n] = srcn
30377 static Long decode_vregW(Int count, Long delta, UChar modrm, Prefix pfx,
30378 const VexAbiInfo* vbi, IRTemp *v, UInt *dst, Int swap)
30380 v[0] = newTemp(Ity_V128);
30381 v[1] = newTemp(Ity_V128);
30382 v[2] = newTemp(Ity_V128);
30383 v[3] = newTemp(Ity_V128);
30384 IRTemp addr = IRTemp_INVALID;
30385 Int alen = 0;
30386 HChar dis_buf[50];
30388 *dst = gregOfRexRM(pfx, modrm);
30389 assign( v[0], getXMMReg(*dst) );
30391 if ( epartIsReg( modrm ) ) {
30392 UInt ereg = eregOfRexRM(pfx, modrm);
30393 assign(swap ? v[count-1] : v[count-2], getXMMReg(ereg) );
30394 DIS(dis_buf, "%s", nameXMMReg(ereg));
30395 } else {
30396 Bool extra_byte = (getUChar(delta - 3) & 0xF) != 9;
30397 addr = disAMode(&alen, vbi, pfx, delta, dis_buf, extra_byte);
30398 assign(swap ? v[count-1] : v[count-2], loadLE(Ity_V128, mkexpr(addr)));
30399 delta += alen - 1;
30402 UInt vvvv = getVexNvvvv(pfx);
30403 switch(count) {
30404 case 2:
30405 DIP( "%s,%s", nameXMMReg(*dst), dis_buf );
30406 break;
30407 case 3:
30408 assign( swap ? v[1] : v[2], getXMMReg(vvvv) );
30409 DIP( "%s,%s,%s", nameXMMReg(*dst), nameXMMReg(vvvv), dis_buf );
30410 break;
30411 case 4:
30413 assign( v[1], getXMMReg(vvvv) );
30414 UInt src2 = getUChar(delta + 1) >> 4;
30415 assign( swap ? v[2] : v[3], getXMMReg(src2) );
30416 DIP( "%s,%s,%s,%s", nameXMMReg(*dst), nameXMMReg(vvvv),
30417 nameXMMReg(src2), dis_buf );
30419 break;
30421 return delta + 1;
30424 static Long dis_FMA4 (Prefix pfx, Long delta, UChar opc,
30425 Bool* uses_vvvv, const VexAbiInfo* vbi )
30427 UInt dst;
30428 *uses_vvvv = True;
30430 UChar modrm = getUChar(delta);
30432 Bool zero_64F = False;
30433 Bool zero_96F = False;
30434 UInt is_F32 = ((opc & 0x01) == 0x00) ? 1 : 0;
30435 Bool neg = (opc & 0xF0) == 0x70;
30436 Bool alt = (opc & 0xF0) == 0x50;
30437 Bool sub = alt ? (opc & 0x0E) != 0x0E : (opc & 0x0C) == 0x0C;
30439 IRTemp operand[4];
30440 switch(opc & 0xF) {
30441 case 0x0A: zero_96F = (opc >> 4) != 0x05; break;
30442 case 0x0B: zero_64F = (opc >> 4) != 0x05; break;
30443 case 0x0E: zero_96F = (opc >> 4) != 0x05; break;
30444 case 0x0F: zero_64F = (opc >> 4) != 0x05; break;
30445 default: break;
30447 DIP("vfm%s", neg ? "n" : "");
30448 if(alt) DIP("%s", sub ? "add" : "sub");
30449 DIP("%s", sub ? "sub" : "add");
30450 DIP("%c ", (zero_64F || zero_96F) ? 's' : 'p');
30451 DIP("%c ", is_F32 ? 's' : 'd');
30452 delta = decode_vregW(4, delta, modrm, pfx, vbi, operand, &dst, getRexW(pfx));
30453 DIP("\n");
30454 IRExpr *src[3];
30456 void (*putXMM[2])(UInt,Int,IRExpr*) = {&putXMMRegLane64F, &putXMMRegLane32F};
30458 IROp size_op[] = {Iop_V128to64, Iop_V128HIto64, Iop_64to32, Iop_64HIto32};
30459 IROp neg_op[] = {Iop_NegF64, Iop_NegF32};
30460 int i, j;
30461 for(i = 0; i < is_F32 * 2 + 2; i++) {
30462 for(j = 0; j < 3; j++) {
30463 if(is_F32) {
30464 src[j] = unop(Iop_ReinterpI32asF32,
30465 unop(size_op[i%2+2],
30466 unop(size_op[i/2],
30467 mkexpr(operand[j + 1])
30470 } else {
30471 src[j] = unop(Iop_ReinterpI64asF64,
30472 unop(size_op[i%2],
30473 mkexpr(operand[j + 1])
30477 putXMM[is_F32](dst, i, IRExpr_Qop(is_F32 ? Iop_MAddF32 : Iop_MAddF64,
30478 get_FAKE_roundingmode(),
30479 neg ? unop(neg_op[is_F32], src[0])
30480 : src[0],
30481 src[1],
30482 sub ? unop(neg_op[is_F32], src[2])
30483 : src[2]
30485 if(alt) {
30486 sub = !sub;
30490 /* Zero out top bits of ymm/xmm register. */
30491 putYMMRegLane128( dst, 1, mkV128(0) );
30493 if(zero_64F || zero_96F) {
30494 putXMMRegLane64( dst, 1, IRExpr_Const(IRConst_U64(0)));
30497 if(zero_96F) {
30498 putXMMRegLane32( dst, 1, IRExpr_Const(IRConst_U32(0)));
30501 return delta+1;
30504 /*------------------------------------------------------------*/
30505 /*--- ---*/
30506 /*--- Top-level post-escape decoders: dis_ESC_0F3A__VEX ---*/
30507 /*--- ---*/
30508 /*------------------------------------------------------------*/
30510 static IRTemp math_VPERMILPS_128 ( IRTemp sV, UInt imm8 )
30512 vassert(imm8 < 256);
30513 IRTemp s3, s2, s1, s0;
30514 s3 = s2 = s1 = s0 = IRTemp_INVALID;
30515 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
30516 # define SEL(_nn) (((_nn)==0) ? s0 : ((_nn)==1) ? s1 \
30517 : ((_nn)==2) ? s2 : s3)
30518 IRTemp res = newTemp(Ity_V128);
30519 assign(res, mkV128from32s( SEL((imm8 >> 6) & 3),
30520 SEL((imm8 >> 4) & 3),
30521 SEL((imm8 >> 2) & 3),
30522 SEL((imm8 >> 0) & 3) ));
30523 # undef SEL
30524 return res;
30527 /* Handles 128 and 256 bit versions of VCVTPS2PH. */
30528 static Long dis_VCVTPS2PH ( const VexAbiInfo* vbi, Prefix pfx,
30529 Long delta, Bool is256bit )
30531 /* This is a width-halving store or reg-reg move, that does conversion on the
30532 transferred data. */
30533 UChar modrm = getUChar(delta);
30534 UInt rG = gregOfRexRM(pfx, modrm);
30535 IRTemp rm = newTemp(Ity_I32);
30536 IROp op = is256bit ? Iop_F32toF16x8 : Iop_F32toF16x4;
30537 IRExpr* srcG = (is256bit ? getYMMReg : getXMMReg)(rG);
30539 /* (imm & 3) contains an Intel-encoded rounding mode. Because that encoding
30540 is the same as the encoding for IRRoundingMode, we can use that value
30541 directly in the IR as a rounding mode. */
30543 if (epartIsReg(modrm)) {
30544 UInt rE = eregOfRexRM(pfx, modrm);
30545 delta += 1;
30546 UInt imm = getUChar(delta);
30547 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30548 IRExpr* res = binop(op, mkexpr(rm), srcG);
30549 if (!is256bit)
30550 res = unop(Iop_64UtoV128, res);
30551 putYMMRegLoAndZU(rE, res);
30552 DIP("vcvtps2ph $%u,%s,%s\n",
30553 imm, (is256bit ? nameYMMReg : nameXMMReg)(rG), nameXMMReg(rE));
30554 } else {
30555 Int alen = 0;
30556 HChar dis_buf[50];
30557 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30558 delta += alen;
30559 UInt imm = getUChar(delta);
30560 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30561 IRExpr* res = binop(op, mkexpr(rm), srcG);
30562 storeLE(mkexpr(addr), res);
30563 DIP("vcvtps2ph $%u,%s,%s\n",
30564 imm, (is256bit ? nameYMMReg : nameXMMReg)(rG), dis_buf);
30566 delta++;
30567 /* doesn't use vvvv */
30568 return delta;
30571 __attribute__((noinline))
30572 static
30573 Long dis_ESC_0F3A__VEX (
30574 /*MB_OUT*/DisResult* dres,
30575 /*OUT*/ Bool* uses_vvvv,
30576 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
30577 Bool resteerCisOk,
30578 void* callback_opaque,
30579 const VexArchInfo* archinfo,
30580 const VexAbiInfo* vbi,
30581 Prefix pfx, Int sz, Long deltaIN
30584 IRTemp addr = IRTemp_INVALID;
30585 Int alen = 0;
30586 HChar dis_buf[50];
30587 Long delta = deltaIN;
30588 UChar opc = getUChar(delta);
30589 delta++;
30590 *uses_vvvv = False;
30592 switch (opc) {
30594 case 0x00:
30595 case 0x01:
30596 /* VPERMQ imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 00 /r ib */
30597 /* VPERMPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 01 /r ib */
30598 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
30599 && 1==getRexW(pfx)/*W1*/) {
30600 UChar modrm = getUChar(delta);
30601 UInt imm8 = 0;
30602 UInt rG = gregOfRexRM(pfx, modrm);
30603 IRTemp sV = newTemp(Ity_V256);
30604 const HChar *name = opc == 0 ? "vpermq" : "vpermpd";
30605 if (epartIsReg(modrm)) {
30606 UInt rE = eregOfRexRM(pfx, modrm);
30607 delta += 1;
30608 imm8 = getUChar(delta);
30609 DIP("%s $%u,%s,%s\n",
30610 name, imm8, nameYMMReg(rE), nameYMMReg(rG));
30611 assign(sV, getYMMReg(rE));
30612 } else {
30613 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30614 delta += alen;
30615 imm8 = getUChar(delta);
30616 DIP("%s $%u,%s,%s\n",
30617 name, imm8, dis_buf, nameYMMReg(rG));
30618 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
30620 delta++;
30621 IRTemp s[4];
30622 s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID;
30623 breakupV256to64s(sV, &s[3], &s[2], &s[1], &s[0]);
30624 IRTemp dV = newTemp(Ity_V256);
30625 assign(dV, IRExpr_Qop(Iop_64x4toV256,
30626 mkexpr(s[(imm8 >> 6) & 3]),
30627 mkexpr(s[(imm8 >> 4) & 3]),
30628 mkexpr(s[(imm8 >> 2) & 3]),
30629 mkexpr(s[(imm8 >> 0) & 3])));
30630 putYMMReg(rG, mkexpr(dV));
30631 goto decode_success;
30633 break;
30635 case 0x02:
30636 /* VPBLENDD imm8, xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 02 /r ib */
30637 if (have66noF2noF3(pfx)
30638 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
30639 UChar modrm = getUChar(delta);
30640 UInt imm8 = 0;
30641 UInt rG = gregOfRexRM(pfx, modrm);
30642 UInt rV = getVexNvvvv(pfx);
30643 IRTemp sV = newTemp(Ity_V128);
30644 IRTemp dV = newTemp(Ity_V128);
30645 UInt i;
30646 IRTemp s[4], d[4];
30647 assign(sV, getXMMReg(rV));
30648 if (epartIsReg(modrm)) {
30649 UInt rE = eregOfRexRM(pfx, modrm);
30650 delta += 1;
30651 imm8 = getUChar(delta);
30652 DIP("vpblendd $%u,%s,%s,%s\n",
30653 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
30654 assign(dV, getXMMReg(rE));
30655 } else {
30656 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30657 delta += alen;
30658 imm8 = getUChar(delta);
30659 DIP("vpblendd $%u,%s,%s,%s\n",
30660 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
30661 assign(dV, loadLE(Ity_V128, mkexpr(addr)));
30663 delta++;
30664 for (i = 0; i < 4; i++) {
30665 s[i] = IRTemp_INVALID;
30666 d[i] = IRTemp_INVALID;
30668 breakupV128to32s( sV, &s[3], &s[2], &s[1], &s[0] );
30669 breakupV128to32s( dV, &d[3], &d[2], &d[1], &d[0] );
30670 for (i = 0; i < 4; i++)
30671 putYMMRegLane32(rG, i, mkexpr((imm8 & (1<<i)) ? d[i] : s[i]));
30672 putYMMRegLane128(rG, 1, mkV128(0));
30673 *uses_vvvv = True;
30674 goto decode_success;
30676 /* VPBLENDD imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F3A.W0 02 /r ib */
30677 if (have66noF2noF3(pfx)
30678 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
30679 UChar modrm = getUChar(delta);
30680 UInt imm8 = 0;
30681 UInt rG = gregOfRexRM(pfx, modrm);
30682 UInt rV = getVexNvvvv(pfx);
30683 IRTemp sV = newTemp(Ity_V256);
30684 IRTemp dV = newTemp(Ity_V256);
30685 UInt i;
30686 IRTemp s[8], d[8];
30687 assign(sV, getYMMReg(rV));
30688 if (epartIsReg(modrm)) {
30689 UInt rE = eregOfRexRM(pfx, modrm);
30690 delta += 1;
30691 imm8 = getUChar(delta);
30692 DIP("vpblendd $%u,%s,%s,%s\n",
30693 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
30694 assign(dV, getYMMReg(rE));
30695 } else {
30696 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30697 delta += alen;
30698 imm8 = getUChar(delta);
30699 DIP("vpblendd $%u,%s,%s,%s\n",
30700 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
30701 assign(dV, loadLE(Ity_V256, mkexpr(addr)));
30703 delta++;
30704 for (i = 0; i < 8; i++) {
30705 s[i] = IRTemp_INVALID;
30706 d[i] = IRTemp_INVALID;
30708 breakupV256to32s( sV, &s[7], &s[6], &s[5], &s[4],
30709 &s[3], &s[2], &s[1], &s[0] );
30710 breakupV256to32s( dV, &d[7], &d[6], &d[5], &d[4],
30711 &d[3], &d[2], &d[1], &d[0] );
30712 for (i = 0; i < 8; i++)
30713 putYMMRegLane32(rG, i, mkexpr((imm8 & (1<<i)) ? d[i] : s[i]));
30714 *uses_vvvv = True;
30715 goto decode_success;
30717 break;
30719 case 0x04:
30720 /* VPERMILPS imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 04 /r ib */
30721 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30722 UChar modrm = getUChar(delta);
30723 UInt imm8 = 0;
30724 UInt rG = gregOfRexRM(pfx, modrm);
30725 IRTemp sV = newTemp(Ity_V256);
30726 if (epartIsReg(modrm)) {
30727 UInt rE = eregOfRexRM(pfx, modrm);
30728 delta += 1;
30729 imm8 = getUChar(delta);
30730 DIP("vpermilps $%u,%s,%s\n",
30731 imm8, nameYMMReg(rE), nameYMMReg(rG));
30732 assign(sV, getYMMReg(rE));
30733 } else {
30734 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30735 delta += alen;
30736 imm8 = getUChar(delta);
30737 DIP("vpermilps $%u,%s,%s\n",
30738 imm8, dis_buf, nameYMMReg(rG));
30739 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
30741 delta++;
30742 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
30743 breakupV256toV128s( sV, &sVhi, &sVlo );
30744 IRTemp dVhi = math_VPERMILPS_128( sVhi, imm8 );
30745 IRTemp dVlo = math_VPERMILPS_128( sVlo, imm8 );
30746 IRExpr* res = binop(Iop_V128HLtoV256, mkexpr(dVhi), mkexpr(dVlo));
30747 putYMMReg(rG, res);
30748 goto decode_success;
30750 /* VPERMILPS imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 04 /r ib */
30751 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30752 UChar modrm = getUChar(delta);
30753 UInt imm8 = 0;
30754 UInt rG = gregOfRexRM(pfx, modrm);
30755 IRTemp sV = newTemp(Ity_V128);
30756 if (epartIsReg(modrm)) {
30757 UInt rE = eregOfRexRM(pfx, modrm);
30758 delta += 1;
30759 imm8 = getUChar(delta);
30760 DIP("vpermilps $%u,%s,%s\n",
30761 imm8, nameXMMReg(rE), nameXMMReg(rG));
30762 assign(sV, getXMMReg(rE));
30763 } else {
30764 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30765 delta += alen;
30766 imm8 = getUChar(delta);
30767 DIP("vpermilps $%u,%s,%s\n",
30768 imm8, dis_buf, nameXMMReg(rG));
30769 assign(sV, loadLE(Ity_V128, mkexpr(addr)));
30771 delta++;
30772 putYMMRegLoAndZU(rG, mkexpr ( math_VPERMILPS_128 ( sV, imm8 ) ) );
30773 goto decode_success;
30775 break;
30777 case 0x05:
30778 /* VPERMILPD imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 05 /r ib */
30779 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30780 UChar modrm = getUChar(delta);
30781 UInt imm8 = 0;
30782 UInt rG = gregOfRexRM(pfx, modrm);
30783 IRTemp sV = newTemp(Ity_V128);
30784 if (epartIsReg(modrm)) {
30785 UInt rE = eregOfRexRM(pfx, modrm);
30786 delta += 1;
30787 imm8 = getUChar(delta);
30788 DIP("vpermilpd $%u,%s,%s\n",
30789 imm8, nameXMMReg(rE), nameXMMReg(rG));
30790 assign(sV, getXMMReg(rE));
30791 } else {
30792 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30793 delta += alen;
30794 imm8 = getUChar(delta);
30795 DIP("vpermilpd $%u,%s,%s\n",
30796 imm8, dis_buf, nameXMMReg(rG));
30797 assign(sV, loadLE(Ity_V128, mkexpr(addr)));
30799 delta++;
30800 IRTemp s1 = newTemp(Ity_I64);
30801 IRTemp s0 = newTemp(Ity_I64);
30802 assign(s1, unop(Iop_V128HIto64, mkexpr(sV)));
30803 assign(s0, unop(Iop_V128to64, mkexpr(sV)));
30804 IRTemp dV = newTemp(Ity_V128);
30805 assign(dV, binop(Iop_64HLtoV128,
30806 mkexpr((imm8 & (1<<1)) ? s1 : s0),
30807 mkexpr((imm8 & (1<<0)) ? s1 : s0)));
30808 putYMMRegLoAndZU(rG, mkexpr(dV));
30809 goto decode_success;
30811 /* VPERMILPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 05 /r ib */
30812 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30813 UChar modrm = getUChar(delta);
30814 UInt imm8 = 0;
30815 UInt rG = gregOfRexRM(pfx, modrm);
30816 IRTemp sV = newTemp(Ity_V256);
30817 if (epartIsReg(modrm)) {
30818 UInt rE = eregOfRexRM(pfx, modrm);
30819 delta += 1;
30820 imm8 = getUChar(delta);
30821 DIP("vpermilpd $%u,%s,%s\n",
30822 imm8, nameYMMReg(rE), nameYMMReg(rG));
30823 assign(sV, getYMMReg(rE));
30824 } else {
30825 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30826 delta += alen;
30827 imm8 = getUChar(delta);
30828 DIP("vpermilpd $%u,%s,%s\n",
30829 imm8, dis_buf, nameYMMReg(rG));
30830 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
30832 delta++;
30833 IRTemp s3, s2, s1, s0;
30834 s3 = s2 = s1 = s0 = IRTemp_INVALID;
30835 breakupV256to64s(sV, &s3, &s2, &s1, &s0);
30836 IRTemp dV = newTemp(Ity_V256);
30837 assign(dV, IRExpr_Qop(Iop_64x4toV256,
30838 mkexpr((imm8 & (1<<3)) ? s3 : s2),
30839 mkexpr((imm8 & (1<<2)) ? s3 : s2),
30840 mkexpr((imm8 & (1<<1)) ? s1 : s0),
30841 mkexpr((imm8 & (1<<0)) ? s1 : s0)));
30842 putYMMReg(rG, mkexpr(dV));
30843 goto decode_success;
30845 break;
30847 case 0x06:
30848 /* VPERM2F128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 06 /r ib */
30849 if (have66noF2noF3(pfx)
30850 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
30851 UChar modrm = getUChar(delta);
30852 UInt imm8 = 0;
30853 UInt rG = gregOfRexRM(pfx, modrm);
30854 UInt rV = getVexNvvvv(pfx);
30855 IRTemp s00 = newTemp(Ity_V128);
30856 IRTemp s01 = newTemp(Ity_V128);
30857 IRTemp s10 = newTemp(Ity_V128);
30858 IRTemp s11 = newTemp(Ity_V128);
30859 assign(s00, getYMMRegLane128(rV, 0));
30860 assign(s01, getYMMRegLane128(rV, 1));
30861 if (epartIsReg(modrm)) {
30862 UInt rE = eregOfRexRM(pfx, modrm);
30863 delta += 1;
30864 imm8 = getUChar(delta);
30865 DIP("vperm2f128 $%u,%s,%s,%s\n",
30866 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
30867 assign(s10, getYMMRegLane128(rE, 0));
30868 assign(s11, getYMMRegLane128(rE, 1));
30869 } else {
30870 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30871 delta += alen;
30872 imm8 = getUChar(delta);
30873 DIP("vperm2f128 $%u,%s,%s,%s\n",
30874 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
30875 assign(s10, loadLE(Ity_V128, binop(Iop_Add64,
30876 mkexpr(addr), mkU64(0))));
30877 assign(s11, loadLE(Ity_V128, binop(Iop_Add64,
30878 mkexpr(addr), mkU64(16))));
30880 delta++;
30881 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
30882 : ((_nn)==2) ? s10 : s11)
30883 putYMMRegLane128(rG, 0, mkexpr(SEL((imm8 >> 0) & 3)));
30884 putYMMRegLane128(rG, 1, mkexpr(SEL((imm8 >> 4) & 3)));
30885 # undef SEL
30886 if (imm8 & (1<<3)) putYMMRegLane128(rG, 0, mkV128(0));
30887 if (imm8 & (1<<7)) putYMMRegLane128(rG, 1, mkV128(0));
30888 *uses_vvvv = True;
30889 goto decode_success;
30891 break;
30893 case 0x08:
30894 /* VROUNDPS imm8, xmm2/m128, xmm1 */
30895 /* VROUNDPS = VEX.NDS.128.66.0F3A.WIG 08 ib */
30896 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30897 UChar modrm = getUChar(delta);
30898 UInt rG = gregOfRexRM(pfx, modrm);
30899 IRTemp src = newTemp(Ity_V128);
30900 IRTemp s0 = IRTemp_INVALID;
30901 IRTemp s1 = IRTemp_INVALID;
30902 IRTemp s2 = IRTemp_INVALID;
30903 IRTemp s3 = IRTemp_INVALID;
30904 IRTemp rm = newTemp(Ity_I32);
30905 Int imm = 0;
30907 modrm = getUChar(delta);
30909 if (epartIsReg(modrm)) {
30910 UInt rE = eregOfRexRM(pfx, modrm);
30911 assign( src, getXMMReg( rE ) );
30912 imm = getUChar(delta+1);
30913 if (imm & ~15) break;
30914 delta += 1+1;
30915 DIP( "vroundps $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) );
30916 } else {
30917 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30918 assign( src, loadLE(Ity_V128, mkexpr(addr) ) );
30919 imm = getUChar(delta+alen);
30920 if (imm & ~15) break;
30921 delta += alen+1;
30922 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) );
30925 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30926 that encoding is the same as the encoding for IRRoundingMode,
30927 we can use that value directly in the IR as a rounding
30928 mode. */
30929 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30931 breakupV128to32s( src, &s3, &s2, &s1, &s0 );
30932 putYMMRegLane128( rG, 1, mkV128(0) );
30933 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
30934 unop(Iop_ReinterpI32asF32, mkexpr(s)))
30935 putYMMRegLane32F( rG, 3, CVT(s3) );
30936 putYMMRegLane32F( rG, 2, CVT(s2) );
30937 putYMMRegLane32F( rG, 1, CVT(s1) );
30938 putYMMRegLane32F( rG, 0, CVT(s0) );
30939 # undef CVT
30940 goto decode_success;
30942 /* VROUNDPS imm8, ymm2/m256, ymm1 */
30943 /* VROUNDPS = VEX.NDS.256.66.0F3A.WIG 08 ib */
30944 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30945 UChar modrm = getUChar(delta);
30946 UInt rG = gregOfRexRM(pfx, modrm);
30947 IRTemp src = newTemp(Ity_V256);
30948 IRTemp s0 = IRTemp_INVALID;
30949 IRTemp s1 = IRTemp_INVALID;
30950 IRTemp s2 = IRTemp_INVALID;
30951 IRTemp s3 = IRTemp_INVALID;
30952 IRTemp s4 = IRTemp_INVALID;
30953 IRTemp s5 = IRTemp_INVALID;
30954 IRTemp s6 = IRTemp_INVALID;
30955 IRTemp s7 = IRTemp_INVALID;
30956 IRTemp rm = newTemp(Ity_I32);
30957 Int imm = 0;
30959 modrm = getUChar(delta);
30961 if (epartIsReg(modrm)) {
30962 UInt rE = eregOfRexRM(pfx, modrm);
30963 assign( src, getYMMReg( rE ) );
30964 imm = getUChar(delta+1);
30965 if (imm & ~15) break;
30966 delta += 1+1;
30967 DIP( "vroundps $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) );
30968 } else {
30969 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30970 assign( src, loadLE(Ity_V256, mkexpr(addr) ) );
30971 imm = getUChar(delta+alen);
30972 if (imm & ~15) break;
30973 delta += alen+1;
30974 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) );
30977 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30978 that encoding is the same as the encoding for IRRoundingMode,
30979 we can use that value directly in the IR as a rounding
30980 mode. */
30981 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30983 breakupV256to32s( src, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
30984 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
30985 unop(Iop_ReinterpI32asF32, mkexpr(s)))
30986 putYMMRegLane32F( rG, 7, CVT(s7) );
30987 putYMMRegLane32F( rG, 6, CVT(s6) );
30988 putYMMRegLane32F( rG, 5, CVT(s5) );
30989 putYMMRegLane32F( rG, 4, CVT(s4) );
30990 putYMMRegLane32F( rG, 3, CVT(s3) );
30991 putYMMRegLane32F( rG, 2, CVT(s2) );
30992 putYMMRegLane32F( rG, 1, CVT(s1) );
30993 putYMMRegLane32F( rG, 0, CVT(s0) );
30994 # undef CVT
30995 goto decode_success;
30997 break;
30999 case 0x09:
31000 /* VROUNDPD imm8, xmm2/m128, xmm1 */
31001 /* VROUNDPD = VEX.NDS.128.66.0F3A.WIG 09 ib */
31002 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31003 UChar modrm = getUChar(delta);
31004 UInt rG = gregOfRexRM(pfx, modrm);
31005 IRTemp src = newTemp(Ity_V128);
31006 IRTemp s0 = IRTemp_INVALID;
31007 IRTemp s1 = IRTemp_INVALID;
31008 IRTemp rm = newTemp(Ity_I32);
31009 Int imm = 0;
31011 modrm = getUChar(delta);
31013 if (epartIsReg(modrm)) {
31014 UInt rE = eregOfRexRM(pfx, modrm);
31015 assign( src, getXMMReg( rE ) );
31016 imm = getUChar(delta+1);
31017 if (imm & ~15) break;
31018 delta += 1+1;
31019 DIP( "vroundpd $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) );
31020 } else {
31021 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31022 assign( src, loadLE(Ity_V128, mkexpr(addr) ) );
31023 imm = getUChar(delta+alen);
31024 if (imm & ~15) break;
31025 delta += alen+1;
31026 DIP( "vroundpd $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) );
31029 /* (imm & 3) contains an Intel-encoded rounding mode. Because
31030 that encoding is the same as the encoding for IRRoundingMode,
31031 we can use that value directly in the IR as a rounding
31032 mode. */
31033 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
31035 breakupV128to64s( src, &s1, &s0 );
31036 putYMMRegLane128( rG, 1, mkV128(0) );
31037 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
31038 unop(Iop_ReinterpI64asF64, mkexpr(s)))
31039 putYMMRegLane64F( rG, 1, CVT(s1) );
31040 putYMMRegLane64F( rG, 0, CVT(s0) );
31041 # undef CVT
31042 goto decode_success;
31044 /* VROUNDPD imm8, ymm2/m256, ymm1 */
31045 /* VROUNDPD = VEX.NDS.256.66.0F3A.WIG 09 ib */
31046 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31047 UChar modrm = getUChar(delta);
31048 UInt rG = gregOfRexRM(pfx, modrm);
31049 IRTemp src = newTemp(Ity_V256);
31050 IRTemp s0 = IRTemp_INVALID;
31051 IRTemp s1 = IRTemp_INVALID;
31052 IRTemp s2 = IRTemp_INVALID;
31053 IRTemp s3 = IRTemp_INVALID;
31054 IRTemp rm = newTemp(Ity_I32);
31055 Int imm = 0;
31057 modrm = getUChar(delta);
31059 if (epartIsReg(modrm)) {
31060 UInt rE = eregOfRexRM(pfx, modrm);
31061 assign( src, getYMMReg( rE ) );
31062 imm = getUChar(delta+1);
31063 if (imm & ~15) break;
31064 delta += 1+1;
31065 DIP( "vroundpd $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) );
31066 } else {
31067 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31068 assign( src, loadLE(Ity_V256, mkexpr(addr) ) );
31069 imm = getUChar(delta+alen);
31070 if (imm & ~15) break;
31071 delta += alen+1;
31072 DIP( "vroundpd $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) );
31075 /* (imm & 3) contains an Intel-encoded rounding mode. Because
31076 that encoding is the same as the encoding for IRRoundingMode,
31077 we can use that value directly in the IR as a rounding
31078 mode. */
31079 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
31081 breakupV256to64s( src, &s3, &s2, &s1, &s0 );
31082 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
31083 unop(Iop_ReinterpI64asF64, mkexpr(s)))
31084 putYMMRegLane64F( rG, 3, CVT(s3) );
31085 putYMMRegLane64F( rG, 2, CVT(s2) );
31086 putYMMRegLane64F( rG, 1, CVT(s1) );
31087 putYMMRegLane64F( rG, 0, CVT(s0) );
31088 # undef CVT
31089 goto decode_success;
31091 break;
31093 case 0x0A:
31094 case 0x0B:
31095 /* VROUNDSS imm8, xmm3/m32, xmm2, xmm1 */
31096 /* VROUNDSS = VEX.NDS.128.66.0F3A.WIG 0A ib */
31097 /* VROUNDSD imm8, xmm3/m64, xmm2, xmm1 */
31098 /* VROUNDSD = VEX.NDS.128.66.0F3A.WIG 0B ib */
31099 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31100 UChar modrm = getUChar(delta);
31101 UInt rG = gregOfRexRM(pfx, modrm);
31102 UInt rV = getVexNvvvv(pfx);
31103 Bool isD = opc == 0x0B;
31104 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
31105 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
31106 Int imm = 0;
31108 if (epartIsReg(modrm)) {
31109 UInt rE = eregOfRexRM(pfx, modrm);
31110 assign( src,
31111 isD ? getXMMRegLane64F(rE, 0) : getXMMRegLane32F(rE, 0) );
31112 imm = getUChar(delta+1);
31113 if (imm & ~15) break;
31114 delta += 1+1;
31115 DIP( "vrounds%c $%d,%s,%s,%s\n",
31116 isD ? 'd' : 's',
31117 imm, nameXMMReg( rE ), nameXMMReg( rV ), nameXMMReg( rG ) );
31118 } else {
31119 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31120 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
31121 imm = getUChar(delta+alen);
31122 if (imm & ~15) break;
31123 delta += alen+1;
31124 DIP( "vrounds%c $%d,%s,%s,%s\n",
31125 isD ? 'd' : 's',
31126 imm, dis_buf, nameXMMReg( rV ), nameXMMReg( rG ) );
31129 /* (imm & 3) contains an Intel-encoded rounding mode. Because
31130 that encoding is the same as the encoding for IRRoundingMode,
31131 we can use that value directly in the IR as a rounding
31132 mode. */
31133 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
31134 (imm & 4) ? get_sse_roundingmode()
31135 : mkU32(imm & 3),
31136 mkexpr(src)) );
31138 if (isD)
31139 putXMMRegLane64F( rG, 0, mkexpr(res) );
31140 else {
31141 putXMMRegLane32F( rG, 0, mkexpr(res) );
31142 putXMMRegLane32F( rG, 1, getXMMRegLane32F( rV, 1 ) );
31144 putXMMRegLane64F( rG, 1, getXMMRegLane64F( rV, 1 ) );
31145 putYMMRegLane128( rG, 1, mkV128(0) );
31146 *uses_vvvv = True;
31147 goto decode_success;
31149 break;
31151 case 0x0C:
31152 /* VBLENDPS imm8, ymm3/m256, ymm2, ymm1 */
31153 /* VBLENDPS = VEX.NDS.256.66.0F3A.WIG 0C /r ib */
31154 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31155 UChar modrm = getUChar(delta);
31156 UInt imm8;
31157 UInt rG = gregOfRexRM(pfx, modrm);
31158 UInt rV = getVexNvvvv(pfx);
31159 IRTemp sV = newTemp(Ity_V256);
31160 IRTemp sE = newTemp(Ity_V256);
31161 assign ( sV, getYMMReg(rV) );
31162 if (epartIsReg(modrm)) {
31163 UInt rE = eregOfRexRM(pfx, modrm);
31164 delta += 1;
31165 imm8 = getUChar(delta);
31166 DIP("vblendps $%u,%s,%s,%s\n",
31167 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31168 assign(sE, getYMMReg(rE));
31169 } else {
31170 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31171 delta += alen;
31172 imm8 = getUChar(delta);
31173 DIP("vblendps $%u,%s,%s,%s\n",
31174 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31175 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
31177 delta++;
31178 putYMMReg( rG,
31179 mkexpr( math_BLENDPS_256( sE, sV, imm8) ) );
31180 *uses_vvvv = True;
31181 goto decode_success;
31183 /* VBLENDPS imm8, xmm3/m128, xmm2, xmm1 */
31184 /* VBLENDPS = VEX.NDS.128.66.0F3A.WIG 0C /r ib */
31185 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31186 UChar modrm = getUChar(delta);
31187 UInt imm8;
31188 UInt rG = gregOfRexRM(pfx, modrm);
31189 UInt rV = getVexNvvvv(pfx);
31190 IRTemp sV = newTemp(Ity_V128);
31191 IRTemp sE = newTemp(Ity_V128);
31192 assign ( sV, getXMMReg(rV) );
31193 if (epartIsReg(modrm)) {
31194 UInt rE = eregOfRexRM(pfx, modrm);
31195 delta += 1;
31196 imm8 = getUChar(delta);
31197 DIP("vblendps $%u,%s,%s,%s\n",
31198 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
31199 assign(sE, getXMMReg(rE));
31200 } else {
31201 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31202 delta += alen;
31203 imm8 = getUChar(delta);
31204 DIP("vblendps $%u,%s,%s,%s\n",
31205 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
31206 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
31208 delta++;
31209 putYMMRegLoAndZU( rG,
31210 mkexpr( math_BLENDPS_128( sE, sV, imm8) ) );
31211 *uses_vvvv = True;
31212 goto decode_success;
31214 break;
31216 case 0x0D:
31217 /* VBLENDPD imm8, ymm3/m256, ymm2, ymm1 */
31218 /* VBLENDPD = VEX.NDS.256.66.0F3A.WIG 0D /r ib */
31219 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31220 UChar modrm = getUChar(delta);
31221 UInt imm8;
31222 UInt rG = gregOfRexRM(pfx, modrm);
31223 UInt rV = getVexNvvvv(pfx);
31224 IRTemp sV = newTemp(Ity_V256);
31225 IRTemp sE = newTemp(Ity_V256);
31226 assign ( sV, getYMMReg(rV) );
31227 if (epartIsReg(modrm)) {
31228 UInt rE = eregOfRexRM(pfx, modrm);
31229 delta += 1;
31230 imm8 = getUChar(delta);
31231 DIP("vblendpd $%u,%s,%s,%s\n",
31232 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31233 assign(sE, getYMMReg(rE));
31234 } else {
31235 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31236 delta += alen;
31237 imm8 = getUChar(delta);
31238 DIP("vblendpd $%u,%s,%s,%s\n",
31239 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31240 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
31242 delta++;
31243 putYMMReg( rG,
31244 mkexpr( math_BLENDPD_256( sE, sV, imm8) ) );
31245 *uses_vvvv = True;
31246 goto decode_success;
31248 /* VBLENDPD imm8, xmm3/m128, xmm2, xmm1 */
31249 /* VBLENDPD = VEX.NDS.128.66.0F3A.WIG 0D /r ib */
31250 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31251 UChar modrm = getUChar(delta);
31252 UInt imm8;
31253 UInt rG = gregOfRexRM(pfx, modrm);
31254 UInt rV = getVexNvvvv(pfx);
31255 IRTemp sV = newTemp(Ity_V128);
31256 IRTemp sE = newTemp(Ity_V128);
31257 assign ( sV, getXMMReg(rV) );
31258 if (epartIsReg(modrm)) {
31259 UInt rE = eregOfRexRM(pfx, modrm);
31260 delta += 1;
31261 imm8 = getUChar(delta);
31262 DIP("vblendpd $%u,%s,%s,%s\n",
31263 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
31264 assign(sE, getXMMReg(rE));
31265 } else {
31266 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31267 delta += alen;
31268 imm8 = getUChar(delta);
31269 DIP("vblendpd $%u,%s,%s,%s\n",
31270 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
31271 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
31273 delta++;
31274 putYMMRegLoAndZU( rG,
31275 mkexpr( math_BLENDPD_128( sE, sV, imm8) ) );
31276 *uses_vvvv = True;
31277 goto decode_success;
31279 break;
31281 case 0x0E:
31282 /* VPBLENDW imm8, xmm3/m128, xmm2, xmm1 */
31283 /* VPBLENDW = VEX.NDS.128.66.0F3A.WIG 0E /r ib */
31284 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31285 UChar modrm = getUChar(delta);
31286 UInt imm8;
31287 UInt rG = gregOfRexRM(pfx, modrm);
31288 UInt rV = getVexNvvvv(pfx);
31289 IRTemp sV = newTemp(Ity_V128);
31290 IRTemp sE = newTemp(Ity_V128);
31291 assign ( sV, getXMMReg(rV) );
31292 if (epartIsReg(modrm)) {
31293 UInt rE = eregOfRexRM(pfx, modrm);
31294 delta += 1;
31295 imm8 = getUChar(delta);
31296 DIP("vpblendw $%u,%s,%s,%s\n",
31297 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
31298 assign(sE, getXMMReg(rE));
31299 } else {
31300 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31301 delta += alen;
31302 imm8 = getUChar(delta);
31303 DIP("vpblendw $%u,%s,%s,%s\n",
31304 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
31305 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
31307 delta++;
31308 putYMMRegLoAndZU( rG,
31309 mkexpr( math_PBLENDW_128( sE, sV, imm8) ) );
31310 *uses_vvvv = True;
31311 goto decode_success;
31313 /* VPBLENDW imm8, ymm3/m256, ymm2, ymm1 */
31314 /* VPBLENDW = VEX.NDS.256.66.0F3A.WIG 0E /r ib */
31315 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31316 UChar modrm = getUChar(delta);
31317 UInt imm8;
31318 UInt rG = gregOfRexRM(pfx, modrm);
31319 UInt rV = getVexNvvvv(pfx);
31320 IRTemp sV = newTemp(Ity_V256);
31321 IRTemp sE = newTemp(Ity_V256);
31322 IRTemp sVhi, sVlo, sEhi, sElo;
31323 sVhi = sVlo = sEhi = sElo = IRTemp_INVALID;
31324 assign ( sV, getYMMReg(rV) );
31325 if (epartIsReg(modrm)) {
31326 UInt rE = eregOfRexRM(pfx, modrm);
31327 delta += 1;
31328 imm8 = getUChar(delta);
31329 DIP("vpblendw $%u,%s,%s,%s\n",
31330 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31331 assign(sE, getYMMReg(rE));
31332 } else {
31333 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31334 delta += alen;
31335 imm8 = getUChar(delta);
31336 DIP("vpblendw $%u,%s,%s,%s\n",
31337 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31338 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
31340 delta++;
31341 breakupV256toV128s( sV, &sVhi, &sVlo );
31342 breakupV256toV128s( sE, &sEhi, &sElo );
31343 putYMMReg( rG, binop( Iop_V128HLtoV256,
31344 mkexpr( math_PBLENDW_128( sEhi, sVhi, imm8) ),
31345 mkexpr( math_PBLENDW_128( sElo, sVlo, imm8) ) ) );
31346 *uses_vvvv = True;
31347 goto decode_success;
31349 break;
31351 case 0x0F:
31352 /* VPALIGNR imm8, xmm3/m128, xmm2, xmm1 */
31353 /* VPALIGNR = VEX.NDS.128.66.0F3A.WIG 0F /r ib */
31354 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31355 UChar modrm = getUChar(delta);
31356 UInt rG = gregOfRexRM(pfx, modrm);
31357 UInt rV = getVexNvvvv(pfx);
31358 IRTemp sV = newTemp(Ity_V128);
31359 IRTemp dV = newTemp(Ity_V128);
31360 UInt imm8;
31362 assign( dV, getXMMReg(rV) );
31364 if ( epartIsReg( modrm ) ) {
31365 UInt rE = eregOfRexRM(pfx, modrm);
31366 assign( sV, getXMMReg(rE) );
31367 imm8 = getUChar(delta+1);
31368 delta += 1+1;
31369 DIP("vpalignr $%u,%s,%s,%s\n", imm8, nameXMMReg(rE),
31370 nameXMMReg(rV), nameXMMReg(rG));
31371 } else {
31372 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31373 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
31374 imm8 = getUChar(delta+alen);
31375 delta += alen+1;
31376 DIP("vpalignr $%u,%s,%s,%s\n", imm8, dis_buf,
31377 nameXMMReg(rV), nameXMMReg(rG));
31380 IRTemp res = math_PALIGNR_XMM( sV, dV, imm8 );
31381 putYMMRegLoAndZU( rG, mkexpr(res) );
31382 *uses_vvvv = True;
31383 goto decode_success;
31385 /* VPALIGNR imm8, ymm3/m256, ymm2, ymm1 */
31386 /* VPALIGNR = VEX.NDS.256.66.0F3A.WIG 0F /r ib */
31387 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31388 UChar modrm = getUChar(delta);
31389 UInt rG = gregOfRexRM(pfx, modrm);
31390 UInt rV = getVexNvvvv(pfx);
31391 IRTemp sV = newTemp(Ity_V256);
31392 IRTemp dV = newTemp(Ity_V256);
31393 IRTemp sHi, sLo, dHi, dLo;
31394 sHi = sLo = dHi = dLo = IRTemp_INVALID;
31395 UInt imm8;
31397 assign( dV, getYMMReg(rV) );
31399 if ( epartIsReg( modrm ) ) {
31400 UInt rE = eregOfRexRM(pfx, modrm);
31401 assign( sV, getYMMReg(rE) );
31402 imm8 = getUChar(delta+1);
31403 delta += 1+1;
31404 DIP("vpalignr $%u,%s,%s,%s\n", imm8, nameYMMReg(rE),
31405 nameYMMReg(rV), nameYMMReg(rG));
31406 } else {
31407 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31408 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
31409 imm8 = getUChar(delta+alen);
31410 delta += alen+1;
31411 DIP("vpalignr $%u,%s,%s,%s\n", imm8, dis_buf,
31412 nameYMMReg(rV), nameYMMReg(rG));
31415 breakupV256toV128s( dV, &dHi, &dLo );
31416 breakupV256toV128s( sV, &sHi, &sLo );
31417 putYMMReg( rG, binop( Iop_V128HLtoV256,
31418 mkexpr( math_PALIGNR_XMM( sHi, dHi, imm8 ) ),
31419 mkexpr( math_PALIGNR_XMM( sLo, dLo, imm8 ) ) )
31421 *uses_vvvv = True;
31422 goto decode_success;
31424 break;
31426 case 0x14:
31427 /* VPEXTRB imm8, xmm2, reg/m8 = VEX.128.66.0F3A.W0 14 /r ib */
31428 if (have66noF2noF3(pfx)
31429 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31430 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ );
31431 goto decode_success;
31433 break;
31435 case 0x15:
31436 /* VPEXTRW imm8, reg/m16, xmm2 */
31437 /* VPEXTRW = VEX.128.66.0F3A.W0 15 /r ib */
31438 if (have66noF2noF3(pfx)
31439 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31440 delta = dis_PEXTRW( vbi, pfx, delta, True/*isAvx*/ );
31441 goto decode_success;
31443 break;
31445 case 0x16:
31446 /* VPEXTRD imm8, r32/m32, xmm2 */
31447 /* VPEXTRD = VEX.128.66.0F3A.W0 16 /r ib */
31448 if (have66noF2noF3(pfx)
31449 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31450 delta = dis_PEXTRD( vbi, pfx, delta, True/*isAvx*/ );
31451 goto decode_success;
31453 /* VPEXTRQ = VEX.128.66.0F3A.W1 16 /r ib */
31454 if (have66noF2noF3(pfx)
31455 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
31456 delta = dis_PEXTRQ( vbi, pfx, delta, True/*isAvx*/ );
31457 goto decode_success;
31459 break;
31461 case 0x17:
31462 /* VEXTRACTPS imm8, xmm1, r32/m32 = VEX.128.66.0F3A.WIG 17 /r ib */
31463 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31464 delta = dis_EXTRACTPS( vbi, pfx, delta, True/*isAvx*/ );
31465 goto decode_success;
31467 break;
31469 case 0x18:
31470 /* VINSERTF128 r/m, rV, rD
31471 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
31472 /* VINSERTF128 = VEX.NDS.256.66.0F3A.W0 18 /r ib */
31473 if (have66noF2noF3(pfx)
31474 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31475 UChar modrm = getUChar(delta);
31476 UInt ib = 0;
31477 UInt rG = gregOfRexRM(pfx, modrm);
31478 UInt rV = getVexNvvvv(pfx);
31479 IRTemp t128 = newTemp(Ity_V128);
31480 if (epartIsReg(modrm)) {
31481 UInt rE = eregOfRexRM(pfx, modrm);
31482 delta += 1;
31483 assign(t128, getXMMReg(rE));
31484 ib = getUChar(delta);
31485 DIP("vinsertf128 $%u,%s,%s,%s\n",
31486 ib, nameXMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31487 } else {
31488 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31489 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
31490 delta += alen;
31491 ib = getUChar(delta);
31492 DIP("vinsertf128 $%u,%s,%s,%s\n",
31493 ib, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31495 delta++;
31496 putYMMRegLane128(rG, 0, getYMMRegLane128(rV, 0));
31497 putYMMRegLane128(rG, 1, getYMMRegLane128(rV, 1));
31498 putYMMRegLane128(rG, ib & 1, mkexpr(t128));
31499 *uses_vvvv = True;
31500 goto decode_success;
31502 break;
31504 case 0x19:
31505 /* VEXTRACTF128 $lane_no, rS, r/m
31506 ::: r/m:V128 = a lane of rS:V256 (RM format) */
31507 /* VEXTRACTF128 = VEX.256.66.0F3A.W0 19 /r ib */
31508 if (have66noF2noF3(pfx)
31509 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31510 UChar modrm = getUChar(delta);
31511 UInt ib = 0;
31512 UInt rS = gregOfRexRM(pfx, modrm);
31513 IRTemp t128 = newTemp(Ity_V128);
31514 if (epartIsReg(modrm)) {
31515 UInt rD = eregOfRexRM(pfx, modrm);
31516 delta += 1;
31517 ib = getUChar(delta);
31518 assign(t128, getYMMRegLane128(rS, ib & 1));
31519 putYMMRegLoAndZU(rD, mkexpr(t128));
31520 DIP("vextractf128 $%u,%s,%s\n",
31521 ib, nameXMMReg(rS), nameYMMReg(rD));
31522 } else {
31523 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31524 delta += alen;
31525 ib = getUChar(delta);
31526 assign(t128, getYMMRegLane128(rS, ib & 1));
31527 storeLE(mkexpr(addr), mkexpr(t128));
31528 DIP("vextractf128 $%u,%s,%s\n",
31529 ib, nameYMMReg(rS), dis_buf);
31531 delta++;
31532 /* doesn't use vvvv */
31533 goto decode_success;
31535 break;
31537 case 0x1D:
31538 /* VCVTPS2PH imm8, xmm2, xmm1/m64 = VEX.128.66.0F3A.W0 1D /r ib */
31539 if (have66noF2noF3(pfx)
31540 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/
31541 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C)) {
31542 delta = dis_VCVTPS2PH( vbi, pfx, delta, /*is256bit=*/False );
31543 goto decode_success;
31545 /* VCVTPS2PH imm8, ymm2, ymm1/m128 = VEX.256.66.0F3A.W0 1D /r ib */
31546 if (have66noF2noF3(pfx)
31547 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/
31548 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C)) {
31549 delta = dis_VCVTPS2PH( vbi, pfx, delta, /*is256bit=*/True );
31550 goto decode_success;
31552 break;
31554 case 0x20:
31555 /* VPINSRB r32/m8, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 20 /r ib */
31556 if (have66noF2noF3(pfx)
31557 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31558 UChar modrm = getUChar(delta);
31559 UInt rG = gregOfRexRM(pfx, modrm);
31560 UInt rV = getVexNvvvv(pfx);
31561 Int imm8;
31562 IRTemp src_u8 = newTemp(Ity_I8);
31564 if ( epartIsReg( modrm ) ) {
31565 UInt rE = eregOfRexRM(pfx,modrm);
31566 imm8 = (Int)(getUChar(delta+1) & 15);
31567 assign( src_u8, unop(Iop_32to8, getIReg32( rE )) );
31568 delta += 1+1;
31569 DIP( "vpinsrb $%d,%s,%s,%s\n",
31570 imm8, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) );
31571 } else {
31572 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31573 imm8 = (Int)(getUChar(delta+alen) & 15);
31574 assign( src_u8, loadLE( Ity_I8, mkexpr(addr) ) );
31575 delta += alen+1;
31576 DIP( "vpinsrb $%d,%s,%s,%s\n",
31577 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31580 IRTemp src_vec = newTemp(Ity_V128);
31581 assign(src_vec, getXMMReg( rV ));
31582 IRTemp res_vec = math_PINSRB_128( src_vec, src_u8, imm8 );
31583 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31584 *uses_vvvv = True;
31585 goto decode_success;
31587 break;
31589 case 0x21:
31590 /* VINSERTPS imm8, xmm3/m32, xmm2, xmm1
31591 = VEX.NDS.128.66.0F3A.WIG 21 /r ib */
31592 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31593 UChar modrm = getUChar(delta);
31594 UInt rG = gregOfRexRM(pfx, modrm);
31595 UInt rV = getVexNvvvv(pfx);
31596 UInt imm8;
31597 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */
31598 const IRTemp inval = IRTemp_INVALID;
31600 if ( epartIsReg( modrm ) ) {
31601 UInt rE = eregOfRexRM(pfx, modrm);
31602 IRTemp vE = newTemp(Ity_V128);
31603 assign( vE, getXMMReg(rE) );
31604 IRTemp dsE[4] = { inval, inval, inval, inval };
31605 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] );
31606 imm8 = getUChar(delta+1);
31607 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */
31608 delta += 1+1;
31609 DIP( "insertps $%u, %s,%s\n",
31610 imm8, nameXMMReg(rE), nameXMMReg(rG) );
31611 } else {
31612 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31613 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) );
31614 imm8 = getUChar(delta+alen);
31615 delta += alen+1;
31616 DIP( "insertps $%u, %s,%s\n",
31617 imm8, dis_buf, nameXMMReg(rG) );
31620 IRTemp vV = newTemp(Ity_V128);
31621 assign( vV, getXMMReg(rV) );
31623 putYMMRegLoAndZU( rG, mkexpr(math_INSERTPS( vV, d2ins, imm8 )) );
31624 *uses_vvvv = True;
31625 goto decode_success;
31627 break;
31629 case 0x22:
31630 /* VPINSRD r32/m32, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 22 /r ib */
31631 if (have66noF2noF3(pfx)
31632 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31633 UChar modrm = getUChar(delta);
31634 UInt rG = gregOfRexRM(pfx, modrm);
31635 UInt rV = getVexNvvvv(pfx);
31636 Int imm8_10;
31637 IRTemp src_u32 = newTemp(Ity_I32);
31639 if ( epartIsReg( modrm ) ) {
31640 UInt rE = eregOfRexRM(pfx,modrm);
31641 imm8_10 = (Int)(getUChar(delta+1) & 3);
31642 assign( src_u32, getIReg32( rE ) );
31643 delta += 1+1;
31644 DIP( "vpinsrd $%d,%s,%s,%s\n",
31645 imm8_10, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) );
31646 } else {
31647 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31648 imm8_10 = (Int)(getUChar(delta+alen) & 3);
31649 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) );
31650 delta += alen+1;
31651 DIP( "vpinsrd $%d,%s,%s,%s\n",
31652 imm8_10, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31655 IRTemp src_vec = newTemp(Ity_V128);
31656 assign(src_vec, getXMMReg( rV ));
31657 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 );
31658 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31659 *uses_vvvv = True;
31660 goto decode_success;
31662 /* VPINSRQ r64/m64, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W1 22 /r ib */
31663 if (have66noF2noF3(pfx)
31664 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
31665 UChar modrm = getUChar(delta);
31666 UInt rG = gregOfRexRM(pfx, modrm);
31667 UInt rV = getVexNvvvv(pfx);
31668 Int imm8_0;
31669 IRTemp src_u64 = newTemp(Ity_I64);
31671 if ( epartIsReg( modrm ) ) {
31672 UInt rE = eregOfRexRM(pfx,modrm);
31673 imm8_0 = (Int)(getUChar(delta+1) & 1);
31674 assign( src_u64, getIReg64( rE ) );
31675 delta += 1+1;
31676 DIP( "vpinsrq $%d,%s,%s,%s\n",
31677 imm8_0, nameIReg64(rE), nameXMMReg(rV), nameXMMReg(rG) );
31678 } else {
31679 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31680 imm8_0 = (Int)(getUChar(delta+alen) & 1);
31681 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) );
31682 delta += alen+1;
31683 DIP( "vpinsrq $%d,%s,%s,%s\n",
31684 imm8_0, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31687 IRTemp src_vec = newTemp(Ity_V128);
31688 assign(src_vec, getXMMReg( rV ));
31689 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 );
31690 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31691 *uses_vvvv = True;
31692 goto decode_success;
31694 break;
31696 case 0x38:
31697 /* VINSERTI128 r/m, rV, rD
31698 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
31699 /* VINSERTI128 = VEX.NDS.256.66.0F3A.W0 38 /r ib */
31700 if (have66noF2noF3(pfx)
31701 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31702 UChar modrm = getUChar(delta);
31703 UInt ib = 0;
31704 UInt rG = gregOfRexRM(pfx, modrm);
31705 UInt rV = getVexNvvvv(pfx);
31706 IRTemp t128 = newTemp(Ity_V128);
31707 if (epartIsReg(modrm)) {
31708 UInt rE = eregOfRexRM(pfx, modrm);
31709 delta += 1;
31710 assign(t128, getXMMReg(rE));
31711 ib = getUChar(delta);
31712 DIP("vinserti128 $%u,%s,%s,%s\n",
31713 ib, nameXMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31714 } else {
31715 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31716 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
31717 delta += alen;
31718 ib = getUChar(delta);
31719 DIP("vinserti128 $%u,%s,%s,%s\n",
31720 ib, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31722 delta++;
31723 putYMMRegLane128(rG, 0, getYMMRegLane128(rV, 0));
31724 putYMMRegLane128(rG, 1, getYMMRegLane128(rV, 1));
31725 putYMMRegLane128(rG, ib & 1, mkexpr(t128));
31726 *uses_vvvv = True;
31727 goto decode_success;
31729 break;
31731 case 0x39:
31732 /* VEXTRACTI128 $lane_no, rS, r/m
31733 ::: r/m:V128 = a lane of rS:V256 (RM format) */
31734 /* VEXTRACTI128 = VEX.256.66.0F3A.W0 39 /r ib */
31735 if (have66noF2noF3(pfx)
31736 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31737 UChar modrm = getUChar(delta);
31738 UInt ib = 0;
31739 UInt rS = gregOfRexRM(pfx, modrm);
31740 IRTemp t128 = newTemp(Ity_V128);
31741 if (epartIsReg(modrm)) {
31742 UInt rD = eregOfRexRM(pfx, modrm);
31743 delta += 1;
31744 ib = getUChar(delta);
31745 assign(t128, getYMMRegLane128(rS, ib & 1));
31746 putYMMRegLoAndZU(rD, mkexpr(t128));
31747 DIP("vextracti128 $%u,%s,%s\n",
31748 ib, nameXMMReg(rS), nameYMMReg(rD));
31749 } else {
31750 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31751 delta += alen;
31752 ib = getUChar(delta);
31753 assign(t128, getYMMRegLane128(rS, ib & 1));
31754 storeLE(mkexpr(addr), mkexpr(t128));
31755 DIP("vextracti128 $%u,%s,%s\n",
31756 ib, nameYMMReg(rS), dis_buf);
31758 delta++;
31759 /* doesn't use vvvv */
31760 goto decode_success;
31762 break;
31764 case 0x40:
31765 /* VDPPS imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 40 /r ib */
31766 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31767 UChar modrm = getUChar(delta);
31768 UInt rG = gregOfRexRM(pfx, modrm);
31769 UInt rV = getVexNvvvv(pfx);
31770 IRTemp dst_vec = newTemp(Ity_V128);
31771 Int imm8;
31772 if (epartIsReg( modrm )) {
31773 UInt rE = eregOfRexRM(pfx,modrm);
31774 imm8 = (Int)getUChar(delta+1);
31775 assign( dst_vec, getXMMReg( rE ) );
31776 delta += 1+1;
31777 DIP( "vdpps $%d,%s,%s,%s\n",
31778 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
31779 } else {
31780 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31781 imm8 = (Int)getUChar(delta+alen);
31782 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) );
31783 delta += alen+1;
31784 DIP( "vdpps $%d,%s,%s,%s\n",
31785 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31788 IRTemp src_vec = newTemp(Ity_V128);
31789 assign(src_vec, getXMMReg( rV ));
31790 IRTemp res_vec = math_DPPS_128( src_vec, dst_vec, imm8 );
31791 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31792 *uses_vvvv = True;
31793 goto decode_success;
31795 /* VDPPS imm8, ymm3/m128,ymm2,ymm1 = VEX.NDS.256.66.0F3A.WIG 40 /r ib */
31796 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31797 UChar modrm = getUChar(delta);
31798 UInt rG = gregOfRexRM(pfx, modrm);
31799 UInt rV = getVexNvvvv(pfx);
31800 IRTemp dst_vec = newTemp(Ity_V256);
31801 Int imm8;
31802 if (epartIsReg( modrm )) {
31803 UInt rE = eregOfRexRM(pfx,modrm);
31804 imm8 = (Int)getUChar(delta+1);
31805 assign( dst_vec, getYMMReg( rE ) );
31806 delta += 1+1;
31807 DIP( "vdpps $%d,%s,%s,%s\n",
31808 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG) );
31809 } else {
31810 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31811 imm8 = (Int)getUChar(delta+alen);
31812 assign( dst_vec, loadLE( Ity_V256, mkexpr(addr) ) );
31813 delta += alen+1;
31814 DIP( "vdpps $%d,%s,%s,%s\n",
31815 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
31818 IRTemp src_vec = newTemp(Ity_V256);
31819 assign(src_vec, getYMMReg( rV ));
31820 IRTemp s0, s1, d0, d1;
31821 s0 = s1 = d0 = d1 = IRTemp_INVALID;
31822 breakupV256toV128s( dst_vec, &d1, &d0 );
31823 breakupV256toV128s( src_vec, &s1, &s0 );
31824 putYMMReg( rG, binop( Iop_V128HLtoV256,
31825 mkexpr( math_DPPS_128(s1, d1, imm8) ),
31826 mkexpr( math_DPPS_128(s0, d0, imm8) ) ) );
31827 *uses_vvvv = True;
31828 goto decode_success;
31830 break;
31832 case 0x41:
31833 /* VDPPD imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 41 /r ib */
31834 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31835 UChar modrm = getUChar(delta);
31836 UInt rG = gregOfRexRM(pfx, modrm);
31837 UInt rV = getVexNvvvv(pfx);
31838 IRTemp dst_vec = newTemp(Ity_V128);
31839 Int imm8;
31840 if (epartIsReg( modrm )) {
31841 UInt rE = eregOfRexRM(pfx,modrm);
31842 imm8 = (Int)getUChar(delta+1);
31843 assign( dst_vec, getXMMReg( rE ) );
31844 delta += 1+1;
31845 DIP( "vdppd $%d,%s,%s,%s\n",
31846 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
31847 } else {
31848 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31849 imm8 = (Int)getUChar(delta+alen);
31850 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) );
31851 delta += alen+1;
31852 DIP( "vdppd $%d,%s,%s,%s\n",
31853 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31856 IRTemp src_vec = newTemp(Ity_V128);
31857 assign(src_vec, getXMMReg( rV ));
31858 IRTemp res_vec = math_DPPD_128( src_vec, dst_vec, imm8 );
31859 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31860 *uses_vvvv = True;
31861 goto decode_success;
31863 break;
31865 case 0x42:
31866 /* VMPSADBW imm8, xmm3/m128,xmm2,xmm1 */
31867 /* VMPSADBW = VEX.NDS.128.66.0F3A.WIG 42 /r ib */
31868 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31869 UChar modrm = getUChar(delta);
31870 Int imm8;
31871 IRTemp src_vec = newTemp(Ity_V128);
31872 IRTemp dst_vec = newTemp(Ity_V128);
31873 UInt rG = gregOfRexRM(pfx, modrm);
31874 UInt rV = getVexNvvvv(pfx);
31876 assign( dst_vec, getXMMReg(rV) );
31878 if ( epartIsReg( modrm ) ) {
31879 UInt rE = eregOfRexRM(pfx, modrm);
31881 imm8 = (Int)getUChar(delta+1);
31882 assign( src_vec, getXMMReg(rE) );
31883 delta += 1+1;
31884 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
31885 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
31886 } else {
31887 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
31888 1/* imm8 is 1 byte after the amode */ );
31889 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
31890 imm8 = (Int)getUChar(delta+alen);
31891 delta += alen+1;
31892 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
31893 dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31896 putYMMRegLoAndZU( rG, mkexpr( math_MPSADBW_128(dst_vec,
31897 src_vec, imm8) ) );
31898 *uses_vvvv = True;
31899 goto decode_success;
31901 /* VMPSADBW imm8, ymm3/m256,ymm2,ymm1 */
31902 /* VMPSADBW = VEX.NDS.256.66.0F3A.WIG 42 /r ib */
31903 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31904 UChar modrm = getUChar(delta);
31905 Int imm8;
31906 IRTemp src_vec = newTemp(Ity_V256);
31907 IRTemp dst_vec = newTemp(Ity_V256);
31908 UInt rG = gregOfRexRM(pfx, modrm);
31909 UInt rV = getVexNvvvv(pfx);
31910 IRTemp sHi, sLo, dHi, dLo;
31911 sHi = sLo = dHi = dLo = IRTemp_INVALID;
31913 assign( dst_vec, getYMMReg(rV) );
31915 if ( epartIsReg( modrm ) ) {
31916 UInt rE = eregOfRexRM(pfx, modrm);
31918 imm8 = (Int)getUChar(delta+1);
31919 assign( src_vec, getYMMReg(rE) );
31920 delta += 1+1;
31921 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
31922 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG) );
31923 } else {
31924 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
31925 1/* imm8 is 1 byte after the amode */ );
31926 assign( src_vec, loadLE( Ity_V256, mkexpr(addr) ) );
31927 imm8 = (Int)getUChar(delta+alen);
31928 delta += alen+1;
31929 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
31930 dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
31933 breakupV256toV128s( dst_vec, &dHi, &dLo );
31934 breakupV256toV128s( src_vec, &sHi, &sLo );
31935 putYMMReg( rG, binop( Iop_V128HLtoV256,
31936 mkexpr( math_MPSADBW_128(dHi, sHi, imm8 >> 3) ),
31937 mkexpr( math_MPSADBW_128(dLo, sLo, imm8) ) ) );
31938 *uses_vvvv = True;
31939 goto decode_success;
31941 break;
31943 case 0x44:
31944 /* VPCLMULQDQ imm8, xmm3/m128,xmm2,xmm1 */
31945 /* VPCLMULQDQ = VEX.NDS.128.66.0F3A.WIG 44 /r ib */
31946 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
31947 * Carry-less multiplication of selected XMM quadwords into XMM
31948 * registers (a.k.a multiplication of polynomials over GF(2))
31950 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31951 UChar modrm = getUChar(delta);
31952 Int imm8;
31953 IRTemp sV = newTemp(Ity_V128);
31954 IRTemp dV = newTemp(Ity_V128);
31955 UInt rG = gregOfRexRM(pfx, modrm);
31956 UInt rV = getVexNvvvv(pfx);
31958 assign( dV, getXMMReg(rV) );
31960 if ( epartIsReg( modrm ) ) {
31961 UInt rE = eregOfRexRM(pfx, modrm);
31962 imm8 = (Int)getUChar(delta+1);
31963 assign( sV, getXMMReg(rE) );
31964 delta += 1+1;
31965 DIP( "vpclmulqdq $%d, %s,%s,%s\n", imm8,
31966 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
31967 } else {
31968 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
31969 1/* imm8 is 1 byte after the amode */ );
31970 assign( sV, loadLE( Ity_V128, mkexpr(addr) ) );
31971 imm8 = (Int)getUChar(delta+alen);
31972 delta += alen+1;
31973 DIP( "vpclmulqdq $%d, %s,%s,%s\n",
31974 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31977 putYMMRegLoAndZU( rG, mkexpr( math_PCLMULQDQ(dV, sV, imm8) ) );
31978 *uses_vvvv = True;
31979 goto decode_success;
31981 break;
31983 case 0x46:
31984 /* VPERM2I128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 46 /r ib */
31985 if (have66noF2noF3(pfx)
31986 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31987 UChar modrm = getUChar(delta);
31988 UInt imm8 = 0;
31989 UInt rG = gregOfRexRM(pfx, modrm);
31990 UInt rV = getVexNvvvv(pfx);
31991 IRTemp s00 = newTemp(Ity_V128);
31992 IRTemp s01 = newTemp(Ity_V128);
31993 IRTemp s10 = newTemp(Ity_V128);
31994 IRTemp s11 = newTemp(Ity_V128);
31995 assign(s00, getYMMRegLane128(rV, 0));
31996 assign(s01, getYMMRegLane128(rV, 1));
31997 if (epartIsReg(modrm)) {
31998 UInt rE = eregOfRexRM(pfx, modrm);
31999 delta += 1;
32000 imm8 = getUChar(delta);
32001 DIP("vperm2i128 $%u,%s,%s,%s\n",
32002 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
32003 assign(s10, getYMMRegLane128(rE, 0));
32004 assign(s11, getYMMRegLane128(rE, 1));
32005 } else {
32006 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
32007 delta += alen;
32008 imm8 = getUChar(delta);
32009 DIP("vperm2i128 $%u,%s,%s,%s\n",
32010 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
32011 assign(s10, loadLE(Ity_V128, binop(Iop_Add64,
32012 mkexpr(addr), mkU64(0))));
32013 assign(s11, loadLE(Ity_V128, binop(Iop_Add64,
32014 mkexpr(addr), mkU64(16))));
32016 delta++;
32017 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
32018 : ((_nn)==2) ? s10 : s11)
32019 putYMMRegLane128(rG, 0, mkexpr(SEL((imm8 >> 0) & 3)));
32020 putYMMRegLane128(rG, 1, mkexpr(SEL((imm8 >> 4) & 3)));
32021 # undef SEL
32022 if (imm8 & (1<<3)) putYMMRegLane128(rG, 0, mkV128(0));
32023 if (imm8 & (1<<7)) putYMMRegLane128(rG, 1, mkV128(0));
32024 *uses_vvvv = True;
32025 goto decode_success;
32027 break;
32029 case 0x4A:
32030 /* VBLENDVPS xmmG, xmmE/memE, xmmV, xmmIS4
32031 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
32032 /* VBLENDVPS = VEX.NDS.128.66.0F3A.WIG 4A /r /is4 */
32033 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
32034 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
32035 "vblendvps", 4, Iop_SarN32x4 );
32036 *uses_vvvv = True;
32037 goto decode_success;
32039 /* VBLENDVPS ymmG, ymmE/memE, ymmV, ymmIS4
32040 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
32041 /* VBLENDVPS = VEX.NDS.256.66.0F3A.WIG 4A /r /is4 */
32042 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
32043 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
32044 "vblendvps", 4, Iop_SarN32x4 );
32045 *uses_vvvv = True;
32046 goto decode_success;
32048 break;
32050 case 0x4B:
32051 /* VBLENDVPD xmmG, xmmE/memE, xmmV, xmmIS4
32052 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
32053 /* VBLENDVPD = VEX.NDS.128.66.0F3A.WIG 4B /r /is4 */
32054 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
32055 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
32056 "vblendvpd", 8, Iop_SarN64x2 );
32057 *uses_vvvv = True;
32058 goto decode_success;
32060 /* VBLENDVPD ymmG, ymmE/memE, ymmV, ymmIS4
32061 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
32062 /* VBLENDVPD = VEX.NDS.256.66.0F3A.WIG 4B /r /is4 */
32063 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
32064 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
32065 "vblendvpd", 8, Iop_SarN64x2 );
32066 *uses_vvvv = True;
32067 goto decode_success;
32069 break;
32071 case 0x4C:
32072 /* VPBLENDVB xmmG, xmmE/memE, xmmV, xmmIS4
32073 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
32074 /* VPBLENDVB = VEX.NDS.128.66.0F3A.WIG 4C /r /is4 */
32075 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
32076 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
32077 "vpblendvb", 1, Iop_SarN8x16 );
32078 *uses_vvvv = True;
32079 goto decode_success;
32081 /* VPBLENDVB ymmG, ymmE/memE, ymmV, ymmIS4
32082 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
32083 /* VPBLENDVB = VEX.NDS.256.66.0F3A.WIG 4C /r /is4 */
32084 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
32085 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
32086 "vpblendvb", 1, Iop_SarN8x16 );
32087 *uses_vvvv = True;
32088 goto decode_success;
32090 break;
32092 case 0x60:
32093 case 0x61:
32094 case 0x62:
32095 case 0x63:
32096 /* VEX.128.66.0F3A.WIG 63 /r ib = VPCMPISTRI imm8, xmm2/m128, xmm1
32097 VEX.128.66.0F3A.WIG 62 /r ib = VPCMPISTRM imm8, xmm2/m128, xmm1
32098 VEX.128.66.0F3A.WIG 61 /r ib = VPCMPESTRI imm8, xmm2/m128, xmm1
32099 VEX.128.66.0F3A.WIG 60 /r ib = VPCMPESTRM imm8, xmm2/m128, xmm1
32100 (selected special cases that actually occur in glibc,
32101 not by any means a complete implementation.)
32103 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
32104 Long delta0 = delta;
32105 delta = dis_PCMPxSTRx( vbi, pfx, delta, True/*isAvx*/, opc );
32106 if (delta > delta0) goto decode_success;
32107 /* else fall though; dis_PCMPxSTRx failed to decode it */
32109 break;
32111 case 0x5C ... 0x5F:
32112 case 0x68 ... 0x6F:
32113 case 0x78 ... 0x7F:
32114 /* FIXME: list the instructions decoded here */
32115 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
32116 Long delta0 = delta;
32117 delta = dis_FMA4( pfx, delta, opc, uses_vvvv, vbi );
32118 if (delta > delta0) {
32119 dres->hint = Dis_HintVerbose;
32120 goto decode_success;
32122 /* else fall though; dis_FMA4 failed to decode it */
32124 break;
32126 case 0xDF:
32127 /* VAESKEYGENASSIST imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG DF /r */
32128 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
32129 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, True/*!isAvx*/ );
32130 goto decode_success;
32132 break;
32134 case 0xF0:
32135 /* RORX imm8, r/m32, r32a = VEX.LZ.F2.0F3A.W0 F0 /r /i */
32136 /* RORX imm8, r/m64, r64a = VEX.LZ.F2.0F3A.W1 F0 /r /i */
32137 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
32138 Int size = getRexW(pfx) ? 8 : 4;
32139 IRType ty = szToITy(size);
32140 IRTemp src = newTemp(ty);
32141 UChar rm = getUChar(delta);
32142 UChar imm8;
32144 if (epartIsReg(rm)) {
32145 imm8 = getUChar(delta+1);
32146 assign( src, getIRegE(size,pfx,rm) );
32147 DIP("rorx %d,%s,%s\n", imm8, nameIRegE(size,pfx,rm),
32148 nameIRegG(size,pfx,rm));
32149 delta += 2;
32150 } else {
32151 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
32152 imm8 = getUChar(delta+alen);
32153 assign( src, loadLE(ty, mkexpr(addr)) );
32154 DIP("rorx %d,%s,%s\n", imm8, dis_buf, nameIRegG(size,pfx,rm));
32155 delta += alen + 1;
32157 imm8 &= 8*size-1;
32159 /* dst = (src >>u imm8) | (src << (size-imm8)) */
32160 putIRegG( size, pfx, rm,
32161 imm8 == 0 ? mkexpr(src)
32162 : binop( mkSizedOp(ty,Iop_Or8),
32163 binop( mkSizedOp(ty,Iop_Shr8), mkexpr(src),
32164 mkU8(imm8) ),
32165 binop( mkSizedOp(ty,Iop_Shl8), mkexpr(src),
32166 mkU8(8*size-imm8) ) ) );
32167 /* Flags aren't modified. */
32168 goto decode_success;
32170 break;
32172 default:
32173 break;
32177 //decode_failure:
32178 return deltaIN;
32180 decode_success:
32181 return delta;
32185 /*------------------------------------------------------------*/
32186 /*--- ---*/
32187 /*--- Disassemble a single instruction ---*/
32188 /*--- ---*/
32189 /*------------------------------------------------------------*/
32191 /* Disassemble a single instruction into IR. The instruction is
32192 located in host memory at &guest_code[delta]. */
32194 static
32195 DisResult disInstr_AMD64_WRK (
32196 /*OUT*/Bool* expect_CAS,
32197 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
32198 Bool resteerCisOk,
32199 void* callback_opaque,
32200 Long delta64,
32201 const VexArchInfo* archinfo,
32202 const VexAbiInfo* vbi,
32203 Bool sigill_diag
32206 IRTemp t1, t2;
32207 UChar pre;
32208 Int n, n_prefixes;
32209 DisResult dres;
32211 /* The running delta */
32212 Long delta = delta64;
32214 /* Holds eip at the start of the insn, so that we can print
32215 consistent error messages for unimplemented insns. */
32216 Long delta_start = delta;
32218 /* sz denotes the nominal data-op size of the insn; we change it to
32219 2 if an 0x66 prefix is seen and 8 if REX.W is 1. In case of
32220 conflict REX.W takes precedence. */
32221 Int sz = 4;
32223 /* pfx holds the summary of prefixes. */
32224 Prefix pfx = PFX_EMPTY;
32226 /* Holds the computed opcode-escape indication. */
32227 Escape esc = ESC_NONE;
32229 /* Set result defaults. */
32230 dres.whatNext = Dis_Continue;
32231 dres.len = 0;
32232 dres.continueAt = 0;
32233 dres.jk_StopHere = Ijk_INVALID;
32234 dres.hint = Dis_HintNone;
32235 *expect_CAS = False;
32237 vassert(guest_RIP_next_assumed == 0);
32238 vassert(guest_RIP_next_mustcheck == False);
32240 t1 = t2 = IRTemp_INVALID;
32242 DIP("\t0x%llx: ", guest_RIP_bbstart+delta);
32244 /* Spot "Special" instructions (see comment at top of file). */
32246 const UChar* code = guest_code + delta;
32247 /* Spot the 16-byte preamble:
32248 48C1C703 rolq $3, %rdi
32249 48C1C70D rolq $13, %rdi
32250 48C1C73D rolq $61, %rdi
32251 48C1C733 rolq $51, %rdi
32253 if (code[ 0] == 0x48 && code[ 1] == 0xC1 && code[ 2] == 0xC7
32254 && code[ 3] == 0x03 &&
32255 code[ 4] == 0x48 && code[ 5] == 0xC1 && code[ 6] == 0xC7
32256 && code[ 7] == 0x0D &&
32257 code[ 8] == 0x48 && code[ 9] == 0xC1 && code[10] == 0xC7
32258 && code[11] == 0x3D &&
32259 code[12] == 0x48 && code[13] == 0xC1 && code[14] == 0xC7
32260 && code[15] == 0x33) {
32261 /* Got a "Special" instruction preamble. Which one is it? */
32262 if (code[16] == 0x48 && code[17] == 0x87
32263 && code[18] == 0xDB /* xchgq %rbx,%rbx */) {
32264 /* %RDX = client_request ( %RAX ) */
32265 DIP("%%rdx = client_request ( %%rax )\n");
32266 delta += 19;
32267 jmp_lit(&dres, Ijk_ClientReq, guest_RIP_bbstart+delta);
32268 vassert(dres.whatNext == Dis_StopHere);
32269 goto decode_success;
32271 else
32272 if (code[16] == 0x48 && code[17] == 0x87
32273 && code[18] == 0xC9 /* xchgq %rcx,%rcx */) {
32274 /* %RAX = guest_NRADDR */
32275 DIP("%%rax = guest_NRADDR\n");
32276 delta += 19;
32277 putIRegRAX(8, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
32278 goto decode_success;
32280 else
32281 if (code[16] == 0x48 && code[17] == 0x87
32282 && code[18] == 0xD2 /* xchgq %rdx,%rdx */) {
32283 /* call-noredir *%RAX */
32284 DIP("call-noredir *%%rax\n");
32285 delta += 19;
32286 t1 = newTemp(Ity_I64);
32287 assign(t1, getIRegRAX(8));
32288 t2 = newTemp(Ity_I64);
32289 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
32290 putIReg64(R_RSP, mkexpr(t2));
32291 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta));
32292 jmp_treg(&dres, Ijk_NoRedir, t1);
32293 vassert(dres.whatNext == Dis_StopHere);
32294 goto decode_success;
32296 else
32297 if (code[16] == 0x48 && code[17] == 0x87
32298 && code[18] == 0xff /* xchgq %rdi,%rdi */) {
32299 /* IR injection */
32300 DIP("IR injection\n");
32301 vex_inject_ir(irsb, Iend_LE);
32303 // Invalidate the current insn. The reason is that the IRop we're
32304 // injecting here can change. In which case the translation has to
32305 // be redone. For ease of handling, we simply invalidate all the
32306 // time.
32307 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_RIP_curr_instr)));
32308 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(19)));
32310 delta += 19;
32312 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) );
32313 dres.whatNext = Dis_StopHere;
32314 dres.jk_StopHere = Ijk_InvalICache;
32315 goto decode_success;
32317 /* We don't know what it is. */
32318 goto decode_failure;
32319 /*NOTREACHED*/
32323 /* Eat prefixes, summarising the result in pfx and sz, and rejecting
32324 as many invalid combinations as possible. */
32325 n_prefixes = 0;
32326 while (True) {
32327 if (n_prefixes > 7) goto decode_failure;
32328 pre = getUChar(delta);
32329 switch (pre) {
32330 case 0x66: pfx |= PFX_66; break;
32331 case 0x67: pfx |= PFX_ASO; break;
32332 case 0xF2: pfx |= PFX_F2; break;
32333 case 0xF3: pfx |= PFX_F3; break;
32334 case 0xF0: pfx |= PFX_LOCK; *expect_CAS = True; break;
32335 case 0x2E: pfx |= PFX_CS; break;
32336 case 0x3E: pfx |= PFX_DS; break;
32337 case 0x26: pfx |= PFX_ES; break;
32338 case 0x64: pfx |= PFX_FS; break;
32339 case 0x65: pfx |= PFX_GS; break;
32340 case 0x36: pfx |= PFX_SS; break;
32341 case 0x40 ... 0x4F:
32342 pfx |= PFX_REX;
32343 if (pre & (1<<3)) pfx |= PFX_REXW;
32344 if (pre & (1<<2)) pfx |= PFX_REXR;
32345 if (pre & (1<<1)) pfx |= PFX_REXX;
32346 if (pre & (1<<0)) pfx |= PFX_REXB;
32347 break;
32348 default:
32349 goto not_a_legacy_prefix;
32351 n_prefixes++;
32352 delta++;
32355 not_a_legacy_prefix:
32356 /* We've used up all the non-VEX prefixes. Parse and validate a
32357 VEX prefix if that's appropriate. */
32358 if (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX) {
32359 /* Used temporarily for holding VEX prefixes. */
32360 UChar vex0 = getUChar(delta);
32361 if (vex0 == 0xC4) {
32362 /* 3-byte VEX */
32363 UChar vex1 = getUChar(delta+1);
32364 UChar vex2 = getUChar(delta+2);
32365 delta += 3;
32366 pfx |= PFX_VEX;
32367 /* Snarf contents of byte 1 */
32368 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR;
32369 /* X */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_REXX;
32370 /* B */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_REXB;
32371 /* m-mmmm */
32372 switch (vex1 & 0x1F) {
32373 case 1: esc = ESC_0F; break;
32374 case 2: esc = ESC_0F38; break;
32375 case 3: esc = ESC_0F3A; break;
32376 /* Any other m-mmmm field will #UD */
32377 default: goto decode_failure;
32379 /* Snarf contents of byte 2 */
32380 /* W */ pfx |= (vex2 & (1<<7)) ? PFX_REXW : 0;
32381 /* ~v3 */ pfx |= (vex2 & (1<<6)) ? 0 : PFX_VEXnV3;
32382 /* ~v2 */ pfx |= (vex2 & (1<<5)) ? 0 : PFX_VEXnV2;
32383 /* ~v1 */ pfx |= (vex2 & (1<<4)) ? 0 : PFX_VEXnV1;
32384 /* ~v0 */ pfx |= (vex2 & (1<<3)) ? 0 : PFX_VEXnV0;
32385 /* L */ pfx |= (vex2 & (1<<2)) ? PFX_VEXL : 0;
32386 /* pp */
32387 switch (vex2 & 3) {
32388 case 0: break;
32389 case 1: pfx |= PFX_66; break;
32390 case 2: pfx |= PFX_F3; break;
32391 case 3: pfx |= PFX_F2; break;
32392 default: vassert(0);
32395 else if (vex0 == 0xC5) {
32396 /* 2-byte VEX */
32397 UChar vex1 = getUChar(delta+1);
32398 delta += 2;
32399 pfx |= PFX_VEX;
32400 /* Snarf contents of byte 1 */
32401 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR;
32402 /* ~v3 */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_VEXnV3;
32403 /* ~v2 */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_VEXnV2;
32404 /* ~v1 */ pfx |= (vex1 & (1<<4)) ? 0 : PFX_VEXnV1;
32405 /* ~v0 */ pfx |= (vex1 & (1<<3)) ? 0 : PFX_VEXnV0;
32406 /* L */ pfx |= (vex1 & (1<<2)) ? PFX_VEXL : 0;
32407 /* pp */
32408 switch (vex1 & 3) {
32409 case 0: break;
32410 case 1: pfx |= PFX_66; break;
32411 case 2: pfx |= PFX_F3; break;
32412 case 3: pfx |= PFX_F2; break;
32413 default: vassert(0);
32415 /* implied: */
32416 esc = ESC_0F;
32418 /* Can't have both VEX and REX */
32419 if ((pfx & PFX_VEX) && (pfx & PFX_REX))
32420 goto decode_failure; /* can't have both */
32423 /* Dump invalid combinations */
32424 n = 0;
32425 if (pfx & PFX_F2) n++;
32426 if (pfx & PFX_F3) n++;
32427 if (n > 1)
32428 goto decode_failure; /* can't have both */
32430 n = 0;
32431 if (pfx & PFX_CS) n++;
32432 if (pfx & PFX_DS) n++;
32433 if (pfx & PFX_ES) n++;
32434 if (pfx & PFX_FS) n++;
32435 if (pfx & PFX_GS) n++;
32436 if (pfx & PFX_SS) n++;
32437 if (n > 1)
32438 goto decode_failure; /* multiple seg overrides == illegal */
32440 /* We have a %fs prefix. Reject it if there's no evidence in 'vbi'
32441 that we should accept it. */
32442 if ((pfx & PFX_FS) && !vbi->guest_amd64_assume_fs_is_const)
32443 goto decode_failure;
32445 /* Ditto for %gs prefixes. */
32446 if ((pfx & PFX_GS) && !vbi->guest_amd64_assume_gs_is_const)
32447 goto decode_failure;
32449 /* Set up sz. */
32450 sz = 4;
32451 if (pfx & PFX_66) sz = 2;
32452 if ((pfx & PFX_REX) && (pfx & PFX_REXW)) sz = 8;
32454 /* Now we should be looking at the primary opcode byte or the
32455 leading escapes. Check that any LOCK prefix is actually
32456 allowed. */
32457 if (haveLOCK(pfx)) {
32458 if (can_be_used_with_LOCK_prefix( &guest_code[delta] )) {
32459 DIP("lock ");
32460 } else {
32461 *expect_CAS = False;
32462 goto decode_failure;
32466 /* Eat up opcode escape bytes, until we're really looking at the
32467 primary opcode byte. But only if there's no VEX present. */
32468 if (!(pfx & PFX_VEX)) {
32469 vassert(esc == ESC_NONE);
32470 pre = getUChar(delta);
32471 if (pre == 0x0F) {
32472 delta++;
32473 pre = getUChar(delta);
32474 switch (pre) {
32475 case 0x38: esc = ESC_0F38; delta++; break;
32476 case 0x3A: esc = ESC_0F3A; delta++; break;
32477 default: esc = ESC_0F; break;
32482 /* So now we're really really looking at the primary opcode
32483 byte. */
32484 Long delta_at_primary_opcode = delta;
32486 if (!(pfx & PFX_VEX)) {
32487 /* Handle non-VEX prefixed instructions. "Legacy" (non-VEX) SSE
32488 instructions preserve the upper 128 bits of YMM registers;
32489 iow we can simply ignore the presence of the upper halves of
32490 these registers. */
32491 switch (esc) {
32492 case ESC_NONE:
32493 delta = dis_ESC_NONE( &dres, expect_CAS,
32494 resteerOkFn, resteerCisOk, callback_opaque,
32495 archinfo, vbi, pfx, sz, delta );
32496 break;
32497 case ESC_0F:
32498 delta = dis_ESC_0F ( &dres, expect_CAS,
32499 resteerOkFn, resteerCisOk, callback_opaque,
32500 archinfo, vbi, pfx, sz, delta );
32501 break;
32502 case ESC_0F38:
32503 delta = dis_ESC_0F38( &dres,
32504 resteerOkFn, resteerCisOk, callback_opaque,
32505 archinfo, vbi, pfx, sz, delta );
32506 break;
32507 case ESC_0F3A:
32508 delta = dis_ESC_0F3A( &dres,
32509 resteerOkFn, resteerCisOk, callback_opaque,
32510 archinfo, vbi, pfx, sz, delta );
32511 break;
32512 default:
32513 vassert(0);
32515 } else {
32516 /* VEX prefixed instruction */
32517 /* Sloppy Intel wording: "An instruction encoded with a VEX.128
32518 prefix that loads a YMM register operand ..." zeroes out bits
32519 128 and above of the register. */
32520 Bool uses_vvvv = False;
32521 switch (esc) {
32522 case ESC_0F:
32523 delta = dis_ESC_0F__VEX ( &dres, &uses_vvvv,
32524 resteerOkFn, resteerCisOk,
32525 callback_opaque,
32526 archinfo, vbi, pfx, sz, delta );
32527 break;
32528 case ESC_0F38:
32529 delta = dis_ESC_0F38__VEX ( &dres, &uses_vvvv,
32530 resteerOkFn, resteerCisOk,
32531 callback_opaque,
32532 archinfo, vbi, pfx, sz, delta );
32533 break;
32534 case ESC_0F3A:
32535 delta = dis_ESC_0F3A__VEX ( &dres, &uses_vvvv,
32536 resteerOkFn, resteerCisOk,
32537 callback_opaque,
32538 archinfo, vbi, pfx, sz, delta );
32539 break;
32540 case ESC_NONE:
32541 /* The presence of a VEX prefix, by Intel definition,
32542 always implies at least an 0F escape. */
32543 goto decode_failure;
32544 default:
32545 vassert(0);
32547 /* If the insn doesn't use VEX.vvvv then it must be all ones.
32548 Check this. */
32549 if (!uses_vvvv) {
32550 if (getVexNvvvv(pfx) != 0)
32551 goto decode_failure;
32555 vassert(delta - delta_at_primary_opcode >= 0);
32556 vassert(delta - delta_at_primary_opcode < 16/*let's say*/);
32558 /* Use delta == delta_at_primary_opcode to denote decode failure.
32559 This implies that any successful decode must use at least one
32560 byte up. */
32561 if (delta == delta_at_primary_opcode)
32562 goto decode_failure;
32563 else
32564 goto decode_success; /* \o/ */
32567 decode_failure:
32568 /* All decode failures end up here. */
32569 if (sigill_diag) {
32570 vex_printf("vex amd64->IR: unhandled instruction bytes: "
32571 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
32572 getUChar(delta_start+0),
32573 getUChar(delta_start+1),
32574 getUChar(delta_start+2),
32575 getUChar(delta_start+3),
32576 getUChar(delta_start+4),
32577 getUChar(delta_start+5),
32578 getUChar(delta_start+6),
32579 getUChar(delta_start+7),
32580 getUChar(delta_start+8),
32581 getUChar(delta_start+9) );
32582 vex_printf("vex amd64->IR: REX=%d REX.W=%d REX.R=%d REX.X=%d REX.B=%d\n",
32583 haveREX(pfx) ? 1 : 0, getRexW(pfx), getRexR(pfx),
32584 getRexX(pfx), getRexB(pfx));
32585 vex_printf("vex amd64->IR: VEX=%d VEX.L=%d VEX.nVVVV=0x%x ESC=%s\n",
32586 haveVEX(pfx) ? 1 : 0, getVexL(pfx),
32587 getVexNvvvv(pfx),
32588 esc==ESC_NONE ? "NONE" :
32589 esc==ESC_0F ? "0F" :
32590 esc==ESC_0F38 ? "0F38" :
32591 esc==ESC_0F3A ? "0F3A" : "???");
32592 vex_printf("vex amd64->IR: PFX.66=%d PFX.F2=%d PFX.F3=%d\n",
32593 have66(pfx) ? 1 : 0, haveF2(pfx) ? 1 : 0,
32594 haveF3(pfx) ? 1 : 0);
32597 /* Tell the dispatcher that this insn cannot be decoded, and so has
32598 not been executed, and (is currently) the next to be executed.
32599 RIP should be up-to-date since it made so at the start of each
32600 insn, but nevertheless be paranoid and update it again right
32601 now. */
32602 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
32603 jmp_lit(&dres, Ijk_NoDecode, guest_RIP_curr_instr);
32604 vassert(dres.whatNext == Dis_StopHere);
32605 dres.len = 0;
32606 /* We also need to say that a CAS is not expected now, regardless
32607 of what it might have been set to at the start of the function,
32608 since the IR that we've emitted just above (to synthesis a
32609 SIGILL) does not involve any CAS, and presumably no other IR has
32610 been emitted for this (non-decoded) insn. */
32611 *expect_CAS = False;
32612 return dres;
32615 decode_success:
32616 /* All decode successes end up here. */
32617 switch (dres.whatNext) {
32618 case Dis_Continue:
32619 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) );
32620 break;
32621 case Dis_ResteerU:
32622 case Dis_ResteerC:
32623 stmt( IRStmt_Put( OFFB_RIP, mkU64(dres.continueAt) ) );
32624 break;
32625 case Dis_StopHere:
32626 break;
32627 default:
32628 vassert(0);
32631 DIP("\n");
32632 dres.len = toUInt(delta - delta_start);
32633 return dres;
32636 #undef DIP
32637 #undef DIS
32640 /*------------------------------------------------------------*/
32641 /*--- Top-level fn ---*/
32642 /*------------------------------------------------------------*/
32644 /* Disassemble a single instruction into IR. The instruction
32645 is located in host memory at &guest_code[delta]. */
32647 DisResult disInstr_AMD64 ( IRSB* irsb_IN,
32648 Bool (*resteerOkFn) ( void*, Addr ),
32649 Bool resteerCisOk,
32650 void* callback_opaque,
32651 const UChar* guest_code_IN,
32652 Long delta,
32653 Addr guest_IP,
32654 VexArch guest_arch,
32655 const VexArchInfo* archinfo,
32656 const VexAbiInfo* abiinfo,
32657 VexEndness host_endness_IN,
32658 Bool sigill_diag_IN )
32660 Int i, x1, x2;
32661 Bool expect_CAS, has_CAS;
32662 DisResult dres;
32664 /* Set globals (see top of this file) */
32665 vassert(guest_arch == VexArchAMD64);
32666 guest_code = guest_code_IN;
32667 irsb = irsb_IN;
32668 host_endness = host_endness_IN;
32669 guest_RIP_curr_instr = guest_IP;
32670 guest_RIP_bbstart = guest_IP - delta;
32672 /* We'll consult these after doing disInstr_AMD64_WRK. */
32673 guest_RIP_next_assumed = 0;
32674 guest_RIP_next_mustcheck = False;
32676 x1 = irsb_IN->stmts_used;
32677 expect_CAS = False;
32678 dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn,
32679 resteerCisOk,
32680 callback_opaque,
32681 delta, archinfo, abiinfo, sigill_diag_IN );
32682 x2 = irsb_IN->stmts_used;
32683 vassert(x2 >= x1);
32685 /* If disInstr_AMD64_WRK tried to figure out the next rip, check it
32686 got it right. Failure of this assertion is serious and denotes
32687 a bug in disInstr. */
32688 if (guest_RIP_next_mustcheck
32689 && guest_RIP_next_assumed != guest_RIP_curr_instr + dres.len) {
32690 vex_printf("\n");
32691 vex_printf("assumed next %%rip = 0x%llx\n",
32692 guest_RIP_next_assumed );
32693 vex_printf(" actual next %%rip = 0x%llx\n",
32694 guest_RIP_curr_instr + dres.len );
32695 vpanic("disInstr_AMD64: disInstr miscalculated next %rip");
32698 /* See comment at the top of disInstr_AMD64_WRK for meaning of
32699 expect_CAS. Here, we (sanity-)check for the presence/absence of
32700 IRCAS as directed by the returned expect_CAS value. */
32701 has_CAS = False;
32702 for (i = x1; i < x2; i++) {
32703 if (irsb_IN->stmts[i]->tag == Ist_CAS)
32704 has_CAS = True;
32707 if (expect_CAS != has_CAS) {
32708 /* inconsistency detected. re-disassemble the instruction so as
32709 to generate a useful error message; then assert. */
32710 vex_traceflags |= VEX_TRACE_FE;
32711 dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn,
32712 resteerCisOk,
32713 callback_opaque,
32714 delta, archinfo, abiinfo, sigill_diag_IN );
32715 for (i = x1; i < x2; i++) {
32716 vex_printf("\t\t");
32717 ppIRStmt(irsb_IN->stmts[i]);
32718 vex_printf("\n");
32720 /* Failure of this assertion is serious and denotes a bug in
32721 disInstr. */
32722 vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling");
32725 return dres;
32729 /*------------------------------------------------------------*/
32730 /*--- Unused stuff ---*/
32731 /*------------------------------------------------------------*/
32733 // A potentially more Memcheck-friendly version of gen_LZCNT, if
32734 // this should ever be needed.
32736 //static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
32738 // /* Scheme is simple: propagate the most significant 1-bit into all
32739 // lower positions in the word. This gives a word of the form
32740 // 0---01---1. Now invert it, giving a word of the form
32741 // 1---10---0, then do a population-count idiom (to count the 1s,
32742 // which is the number of leading zeroes, or the word size if the
32743 // original word was 0.
32744 // */
32745 // Int i;
32746 // IRTemp t[7];
32747 // for (i = 0; i < 7; i++) {
32748 // t[i] = newTemp(ty);
32749 // }
32750 // if (ty == Ity_I64) {
32751 // assign(t[0], binop(Iop_Or64, mkexpr(src),
32752 // binop(Iop_Shr64, mkexpr(src), mkU8(1))));
32753 // assign(t[1], binop(Iop_Or64, mkexpr(t[0]),
32754 // binop(Iop_Shr64, mkexpr(t[0]), mkU8(2))));
32755 // assign(t[2], binop(Iop_Or64, mkexpr(t[1]),
32756 // binop(Iop_Shr64, mkexpr(t[1]), mkU8(4))));
32757 // assign(t[3], binop(Iop_Or64, mkexpr(t[2]),
32758 // binop(Iop_Shr64, mkexpr(t[2]), mkU8(8))));
32759 // assign(t[4], binop(Iop_Or64, mkexpr(t[3]),
32760 // binop(Iop_Shr64, mkexpr(t[3]), mkU8(16))));
32761 // assign(t[5], binop(Iop_Or64, mkexpr(t[4]),
32762 // binop(Iop_Shr64, mkexpr(t[4]), mkU8(32))));
32763 // assign(t[6], unop(Iop_Not64, mkexpr(t[5])));
32764 // return gen_POPCOUNT(ty, t[6]);
32765 // }
32766 // if (ty == Ity_I32) {
32767 // assign(t[0], binop(Iop_Or32, mkexpr(src),
32768 // binop(Iop_Shr32, mkexpr(src), mkU8(1))));
32769 // assign(t[1], binop(Iop_Or32, mkexpr(t[0]),
32770 // binop(Iop_Shr32, mkexpr(t[0]), mkU8(2))));
32771 // assign(t[2], binop(Iop_Or32, mkexpr(t[1]),
32772 // binop(Iop_Shr32, mkexpr(t[1]), mkU8(4))));
32773 // assign(t[3], binop(Iop_Or32, mkexpr(t[2]),
32774 // binop(Iop_Shr32, mkexpr(t[2]), mkU8(8))));
32775 // assign(t[4], binop(Iop_Or32, mkexpr(t[3]),
32776 // binop(Iop_Shr32, mkexpr(t[3]), mkU8(16))));
32777 // assign(t[5], unop(Iop_Not32, mkexpr(t[4])));
32778 // return gen_POPCOUNT(ty, t[5]);
32779 // }
32780 // if (ty == Ity_I16) {
32781 // assign(t[0], binop(Iop_Or16, mkexpr(src),
32782 // binop(Iop_Shr16, mkexpr(src), mkU8(1))));
32783 // assign(t[1], binop(Iop_Or16, mkexpr(t[0]),
32784 // binop(Iop_Shr16, mkexpr(t[0]), mkU8(2))));
32785 // assign(t[2], binop(Iop_Or16, mkexpr(t[1]),
32786 // binop(Iop_Shr16, mkexpr(t[1]), mkU8(4))));
32787 // assign(t[3], binop(Iop_Or16, mkexpr(t[2]),
32788 // binop(Iop_Shr16, mkexpr(t[2]), mkU8(8))));
32789 // assign(t[4], unop(Iop_Not16, mkexpr(t[3])));
32790 // return gen_POPCOUNT(ty, t[4]);
32791 // }
32792 // vassert(0);
32796 /*--------------------------------------------------------------------*/
32797 /*--- end guest_amd64_toIR.c ---*/
32798 /*--------------------------------------------------------------------*/