Initial implementation of C-source-level &&-idiom recovery
[valgrind.git] / VEX / priv / guest_amd64_toIR.c
blobfadf47d41d6ba6f63a5ca3800409b11da94c5f7e
2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_amd64_toIR.c ---*/
4 /*--------------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 /* Translates AMD64 code to IR. */
36 /* TODO:
38 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
39 to ensure a 64-bit value is being written.
41 x87 FP Limitations:
43 * all arithmetic done at 64 bits
45 * no FP exceptions, except for handling stack over/underflow
47 * FP rounding mode observed only for float->int conversions and
48 int->float conversions which could lose accuracy, and for
49 float-to-float rounding. For all other operations,
50 round-to-nearest is used, regardless.
52 * some of the FCOM cases could do with testing -- not convinced
53 that the args are the right way round.
55 * FSAVE does not re-initialise the FPU; it should do
57 * FINIT not only initialises the FPU environment, it also zeroes
58 all the FP registers. It should leave the registers unchanged.
60 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
61 per Intel docs this bit has no meaning anyway. Since PUSHF is the
62 only way to observe eflags[1], a proper fix would be to make that
63 bit be set by PUSHF.
65 This module uses global variables and so is not MT-safe (if that
66 should ever become relevant).
69 /* Notes re address size overrides (0x67).
71 According to the AMD documentation (24594 Rev 3.09, Sept 2003,
72 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose
73 and System Instructions"), Section 1.2.3 ("Address-Size Override
74 Prefix"):
76 0x67 applies to all explicit memory references, causing the top
77 32 bits of the effective address to become zero.
79 0x67 has no effect on stack references (push/pop); these always
80 use a 64-bit address.
82 0x67 changes the interpretation of instructions which implicitly
83 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used
84 instead. These are:
86 cmp{s,sb,sw,sd,sq}
87 in{s,sb,sw,sd}
88 jcxz, jecxz, jrcxz
89 lod{s,sb,sw,sd,sq}
90 loop{,e,bz,be,z}
91 mov{s,sb,sw,sd,sq}
92 out{s,sb,sw,sd}
93 rep{,e,ne,nz}
94 sca{s,sb,sw,sd,sq}
95 sto{s,sb,sw,sd,sq}
96 xlat{,b} */
98 /* "Special" instructions.
100 This instruction decoder can decode three special instructions
101 which mean nothing natively (are no-ops as far as regs/mem are
102 concerned) but have meaning for supporting Valgrind. A special
103 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D
104 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq
105 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi).
106 Following that, one of the following 3 are allowed (standard
107 interpretation in parentheses):
109 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX )
110 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR
111 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX
112 4887F6 (xchgq %rdi,%rdi) IR injection
114 Any other bytes following the 16-byte preamble are illegal and
115 constitute a failure in instruction decoding. This all assumes
116 that the preamble will never occur except in specific code
117 fragments designed for Valgrind to catch.
119 No prefixes may precede a "Special" instruction.
122 /* casLE (implementation of lock-prefixed insns) and rep-prefixed
123 insns: the side-exit back to the start of the insn is done with
124 Ijk_Boring. This is quite wrong, it should be done with
125 Ijk_NoRedir, since otherwise the side exit, which is intended to
126 restart the instruction for whatever reason, could go somewhere
127 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
128 no-redir jumps performance critical, at least for rep-prefixed
129 instructions, since all iterations thereof would involve such a
130 jump. It's not such a big deal with casLE since the side exit is
131 only taken if the CAS fails, that is, the location is contended,
132 which is relatively unlikely.
134 Note also, the test for CAS success vs failure is done using
135 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
136 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
137 shouldn't definedness-check these comparisons. See
138 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
139 background/rationale.
142 /* LOCK prefixed instructions. These are translated using IR-level
143 CAS statements (IRCAS) and are believed to preserve atomicity, even
144 from the point of view of some other process racing against a
145 simulated one (presumably they communicate via a shared memory
146 segment).
148 Handlers which are aware of LOCK prefixes are:
149 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
150 dis_cmpxchg_G_E (cmpxchg)
151 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
152 dis_Grp3 (not, neg)
153 dis_Grp4 (inc, dec)
154 dis_Grp5 (inc, dec)
155 dis_Grp8_Imm (bts, btc, btr)
156 dis_bt_G_E (bts, btc, btr)
157 dis_xadd_G_E (xadd)
161 #include "libvex_basictypes.h"
162 #include "libvex_ir.h"
163 #include "libvex.h"
164 #include "libvex_guest_amd64.h"
166 #include "main_util.h"
167 #include "main_globals.h"
168 #include "guest_generic_bb_to_IR.h"
169 #include "guest_generic_x87.h"
170 #include "guest_amd64_defs.h"
173 /*------------------------------------------------------------*/
174 /*--- Globals ---*/
175 /*------------------------------------------------------------*/
177 /* These are set at the start of the translation of an insn, right
178 down in disInstr_AMD64, so that we don't have to pass them around
179 endlessly. They are all constant during the translation of any
180 given insn. */
182 /* These are set at the start of the translation of a BB, so
183 that we don't have to pass them around endlessly. */
185 /* We need to know this to do sub-register accesses correctly. */
186 static VexEndness host_endness;
188 /* Pointer to the guest code area (points to start of BB, not to the
189 insn being processed). */
190 static const UChar* guest_code;
192 /* The guest address corresponding to guest_code[0]. */
193 static Addr64 guest_RIP_bbstart;
195 /* The guest address for the instruction currently being
196 translated. */
197 static Addr64 guest_RIP_curr_instr;
199 /* The IRSB* into which we're generating code. */
200 static IRSB* irsb;
202 /* For ensuring that %rip-relative addressing is done right. A read
203 of %rip generates the address of the next instruction. It may be
204 that we don't conveniently know that inside disAMode(). For sanity
205 checking, if the next insn %rip is needed, we make a guess at what
206 it is, record that guess here, and set the accompanying Bool to
207 indicate that -- after this insn's decode is finished -- that guess
208 needs to be checked. */
210 /* At the start of each insn decode, is set to (0, False).
211 After the decode, if _mustcheck is now True, _assumed is
212 checked. */
214 static Addr64 guest_RIP_next_assumed;
215 static Bool guest_RIP_next_mustcheck;
218 /*------------------------------------------------------------*/
219 /*--- Helpers for constructing IR. ---*/
220 /*------------------------------------------------------------*/
222 /* Generate a new temporary of the given type. */
223 static IRTemp newTemp ( IRType ty )
225 vassert(isPlausibleIRType(ty));
226 return newIRTemp( irsb->tyenv, ty );
229 /* Add a statement to the list held by "irsb". */
230 static void stmt ( IRStmt* st )
232 addStmtToIRSB( irsb, st );
235 /* Generate a statement "dst := e". */
236 static void assign ( IRTemp dst, IRExpr* e )
238 stmt( IRStmt_WrTmp(dst, e) );
241 static IRExpr* unop ( IROp op, IRExpr* a )
243 return IRExpr_Unop(op, a);
246 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
248 return IRExpr_Binop(op, a1, a2);
251 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
253 return IRExpr_Triop(op, a1, a2, a3);
256 static IRExpr* mkexpr ( IRTemp tmp )
258 return IRExpr_RdTmp(tmp);
261 static IRExpr* mkU8 ( ULong i )
263 vassert(i < 256);
264 return IRExpr_Const(IRConst_U8( (UChar)i ));
267 static IRExpr* mkU16 ( ULong i )
269 vassert(i < 0x10000ULL);
270 return IRExpr_Const(IRConst_U16( (UShort)i ));
273 static IRExpr* mkU32 ( ULong i )
275 vassert(i < 0x100000000ULL);
276 return IRExpr_Const(IRConst_U32( (UInt)i ));
279 static IRExpr* mkU64 ( ULong i )
281 return IRExpr_Const(IRConst_U64(i));
284 static IRExpr* mkU ( IRType ty, ULong i )
286 switch (ty) {
287 case Ity_I8: return mkU8(i);
288 case Ity_I16: return mkU16(i);
289 case Ity_I32: return mkU32(i);
290 case Ity_I64: return mkU64(i);
291 default: vpanic("mkU(amd64)");
295 static void storeLE ( IRExpr* addr, IRExpr* data )
297 stmt( IRStmt_Store(Iend_LE, addr, data) );
300 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
302 return IRExpr_Load(Iend_LE, ty, addr);
305 static IROp mkSizedOp ( IRType ty, IROp op8 )
307 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8
308 || op8 == Iop_Mul8
309 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8
310 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8
311 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8
312 || op8 == Iop_CasCmpNE8
313 || op8 == Iop_Not8 );
314 switch (ty) {
315 case Ity_I8: return 0 +op8;
316 case Ity_I16: return 1 +op8;
317 case Ity_I32: return 2 +op8;
318 case Ity_I64: return 3 +op8;
319 default: vpanic("mkSizedOp(amd64)");
323 static
324 IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src )
326 if (szSmall == 1 && szBig == 4) {
327 return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src);
329 if (szSmall == 1 && szBig == 2) {
330 return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src);
332 if (szSmall == 2 && szBig == 4) {
333 return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src);
335 if (szSmall == 1 && szBig == 8 && !signd) {
336 return unop(Iop_8Uto64, src);
338 if (szSmall == 1 && szBig == 8 && signd) {
339 return unop(Iop_8Sto64, src);
341 if (szSmall == 2 && szBig == 8 && !signd) {
342 return unop(Iop_16Uto64, src);
344 if (szSmall == 2 && szBig == 8 && signd) {
345 return unop(Iop_16Sto64, src);
347 vpanic("doScalarWidening(amd64)");
350 static
351 void putGuarded ( Int gstOffB, IRExpr* guard, IRExpr* value )
353 IRType ty = typeOfIRExpr(irsb->tyenv, value);
354 stmt( IRStmt_Put(gstOffB,
355 IRExpr_ITE(guard, value, IRExpr_Get(gstOffB, ty))) );
359 /*------------------------------------------------------------*/
360 /*--- Debugging output ---*/
361 /*------------------------------------------------------------*/
363 /* Bomb out if we can't handle something. */
364 __attribute__ ((noreturn))
365 static void unimplemented ( const HChar* str )
367 vex_printf("amd64toIR: unimplemented feature\n");
368 vpanic(str);
371 #define DIP(format, args...) \
372 if (vex_traceflags & VEX_TRACE_FE) \
373 vex_printf(format, ## args)
375 #define DIS(buf, format, args...) \
376 if (vex_traceflags & VEX_TRACE_FE) \
377 vex_sprintf(buf, format, ## args)
380 /*------------------------------------------------------------*/
381 /*--- Offsets of various parts of the amd64 guest state. ---*/
382 /*------------------------------------------------------------*/
384 #define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX)
385 #define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX)
386 #define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX)
387 #define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX)
388 #define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP)
389 #define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP)
390 #define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI)
391 #define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI)
392 #define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8)
393 #define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9)
394 #define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10)
395 #define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11)
396 #define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12)
397 #define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13)
398 #define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14)
399 #define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15)
401 #define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP)
403 #define OFFB_FS_CONST offsetof(VexGuestAMD64State,guest_FS_CONST)
404 #define OFFB_GS_CONST offsetof(VexGuestAMD64State,guest_GS_CONST)
406 #define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP)
407 #define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1)
408 #define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2)
409 #define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP)
411 #define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0])
412 #define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0])
413 #define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG)
414 #define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG)
415 #define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG)
416 #define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP)
417 #define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210)
418 #define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND)
420 #define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND)
421 #define OFFB_YMM0 offsetof(VexGuestAMD64State,guest_YMM0)
422 #define OFFB_YMM1 offsetof(VexGuestAMD64State,guest_YMM1)
423 #define OFFB_YMM2 offsetof(VexGuestAMD64State,guest_YMM2)
424 #define OFFB_YMM3 offsetof(VexGuestAMD64State,guest_YMM3)
425 #define OFFB_YMM4 offsetof(VexGuestAMD64State,guest_YMM4)
426 #define OFFB_YMM5 offsetof(VexGuestAMD64State,guest_YMM5)
427 #define OFFB_YMM6 offsetof(VexGuestAMD64State,guest_YMM6)
428 #define OFFB_YMM7 offsetof(VexGuestAMD64State,guest_YMM7)
429 #define OFFB_YMM8 offsetof(VexGuestAMD64State,guest_YMM8)
430 #define OFFB_YMM9 offsetof(VexGuestAMD64State,guest_YMM9)
431 #define OFFB_YMM10 offsetof(VexGuestAMD64State,guest_YMM10)
432 #define OFFB_YMM11 offsetof(VexGuestAMD64State,guest_YMM11)
433 #define OFFB_YMM12 offsetof(VexGuestAMD64State,guest_YMM12)
434 #define OFFB_YMM13 offsetof(VexGuestAMD64State,guest_YMM13)
435 #define OFFB_YMM14 offsetof(VexGuestAMD64State,guest_YMM14)
436 #define OFFB_YMM15 offsetof(VexGuestAMD64State,guest_YMM15)
437 #define OFFB_YMM16 offsetof(VexGuestAMD64State,guest_YMM16)
439 #define OFFB_EMNOTE offsetof(VexGuestAMD64State,guest_EMNOTE)
440 #define OFFB_CMSTART offsetof(VexGuestAMD64State,guest_CMSTART)
441 #define OFFB_CMLEN offsetof(VexGuestAMD64State,guest_CMLEN)
443 #define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR)
446 /*------------------------------------------------------------*/
447 /*--- Helper bits and pieces for deconstructing the ---*/
448 /*--- amd64 insn stream. ---*/
449 /*------------------------------------------------------------*/
451 /* This is the AMD64 register encoding -- integer regs. */
452 #define R_RAX 0
453 #define R_RCX 1
454 #define R_RDX 2
455 #define R_RBX 3
456 #define R_RSP 4
457 #define R_RBP 5
458 #define R_RSI 6
459 #define R_RDI 7
460 #define R_R8 8
461 #define R_R9 9
462 #define R_R10 10
463 #define R_R11 11
464 #define R_R12 12
465 #define R_R13 13
466 #define R_R14 14
467 #define R_R15 15
469 /* This is the Intel register encoding -- segment regs. */
470 #define R_ES 0
471 #define R_CS 1
472 #define R_SS 2
473 #define R_DS 3
474 #define R_FS 4
475 #define R_GS 5
478 /* Various simple conversions */
480 static ULong extend_s_8to64 ( UChar x )
482 return (ULong)((Long)(((ULong)x) << 56) >> 56);
485 static ULong extend_s_16to64 ( UShort x )
487 return (ULong)((Long)(((ULong)x) << 48) >> 48);
490 static ULong extend_s_32to64 ( UInt x )
492 return (ULong)((Long)(((ULong)x) << 32) >> 32);
495 /* Figure out whether the mod and rm parts of a modRM byte refer to a
496 register or memory. If so, the byte will have the form 11XXXYYY,
497 where YYY is the register number. */
498 inline
499 static Bool epartIsReg ( UChar mod_reg_rm )
501 return toBool(0xC0 == (mod_reg_rm & 0xC0));
504 /* Extract the 'g' field from a modRM byte. This only produces 3
505 bits, which is not a complete register number. You should avoid
506 this function if at all possible. */
507 inline
508 static Int gregLO3ofRM ( UChar mod_reg_rm )
510 return (Int)( (mod_reg_rm >> 3) & 7 );
513 /* Ditto the 'e' field of a modRM byte. */
514 inline
515 static Int eregLO3ofRM ( UChar mod_reg_rm )
517 return (Int)(mod_reg_rm & 0x7);
520 /* Get a 8/16/32-bit unsigned value out of the insn stream. */
522 static inline UChar getUChar ( Long delta )
524 UChar v = guest_code[delta+0];
525 return v;
528 static UInt getUDisp16 ( Long delta )
530 UInt v = guest_code[delta+1]; v <<= 8;
531 v |= guest_code[delta+0];
532 return v & 0xFFFF;
535 //.. static UInt getUDisp ( Int size, Long delta )
536 //.. {
537 //.. switch (size) {
538 //.. case 4: return getUDisp32(delta);
539 //.. case 2: return getUDisp16(delta);
540 //.. case 1: return getUChar(delta);
541 //.. default: vpanic("getUDisp(x86)");
542 //.. }
543 //.. return 0; /*notreached*/
544 //.. }
547 /* Get a byte value out of the insn stream and sign-extend to 64
548 bits. */
549 static Long getSDisp8 ( Long delta )
551 return extend_s_8to64( guest_code[delta] );
554 /* Get a 16-bit value out of the insn stream and sign-extend to 64
555 bits. */
556 static Long getSDisp16 ( Long delta )
558 UInt v = guest_code[delta+1]; v <<= 8;
559 v |= guest_code[delta+0];
560 return extend_s_16to64( (UShort)v );
563 /* Get a 32-bit value out of the insn stream and sign-extend to 64
564 bits. */
565 static Long getSDisp32 ( Long delta )
567 UInt v = guest_code[delta+3]; v <<= 8;
568 v |= guest_code[delta+2]; v <<= 8;
569 v |= guest_code[delta+1]; v <<= 8;
570 v |= guest_code[delta+0];
571 return extend_s_32to64( v );
574 /* Get a 64-bit value out of the insn stream. */
575 static Long getDisp64 ( Long delta )
577 ULong v = 0;
578 v |= guest_code[delta+7]; v <<= 8;
579 v |= guest_code[delta+6]; v <<= 8;
580 v |= guest_code[delta+5]; v <<= 8;
581 v |= guest_code[delta+4]; v <<= 8;
582 v |= guest_code[delta+3]; v <<= 8;
583 v |= guest_code[delta+2]; v <<= 8;
584 v |= guest_code[delta+1]; v <<= 8;
585 v |= guest_code[delta+0];
586 return v;
589 /* Note: because AMD64 doesn't allow 64-bit literals, it is an error
590 if this is called with size==8. Should not happen. */
591 static Long getSDisp ( Int size, Long delta )
593 switch (size) {
594 case 4: return getSDisp32(delta);
595 case 2: return getSDisp16(delta);
596 case 1: return getSDisp8(delta);
597 default: vpanic("getSDisp(amd64)");
601 static ULong mkSizeMask ( Int sz )
603 switch (sz) {
604 case 1: return 0x00000000000000FFULL;
605 case 2: return 0x000000000000FFFFULL;
606 case 4: return 0x00000000FFFFFFFFULL;
607 case 8: return 0xFFFFFFFFFFFFFFFFULL;
608 default: vpanic("mkSzMask(amd64)");
612 static Int imin ( Int a, Int b )
614 return (a < b) ? a : b;
617 static IRType szToITy ( Int n )
619 switch (n) {
620 case 1: return Ity_I8;
621 case 2: return Ity_I16;
622 case 4: return Ity_I32;
623 case 8: return Ity_I64;
624 default: vex_printf("\nszToITy(%d)\n", n);
625 vpanic("szToITy(amd64)");
630 /*------------------------------------------------------------*/
631 /*--- For dealing with prefixes. ---*/
632 /*------------------------------------------------------------*/
634 /* The idea is to pass around an int holding a bitmask summarising
635 info from the prefixes seen on the current instruction, including
636 info from the REX byte. This info is used in various places, but
637 most especially when making sense of register fields in
638 instructions.
640 The top 8 bits of the prefix are 0x55, just as a hacky way to
641 ensure it really is a valid prefix.
643 Things you can safely assume about a well-formed prefix:
644 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set.
645 * if REX is not present then REXW,REXR,REXX,REXB will read
646 as zero.
647 * F2 and F3 will not both be 1.
650 typedef UInt Prefix;
652 #define PFX_ASO (1<<0) /* address-size override present (0x67) */
653 #define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */
654 #define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */
655 #define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */
656 #define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */
657 #define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */
658 #define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */
659 #define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */
660 #define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */
661 #define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */
662 #define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */
663 #define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */
664 #define PFX_ES (1<<12) /* ES segment prefix present (0x26) */
665 #define PFX_FS (1<<13) /* FS segment prefix present (0x64) */
666 #define PFX_GS (1<<14) /* GS segment prefix present (0x65) */
667 #define PFX_SS (1<<15) /* SS segment prefix present (0x36) */
668 #define PFX_VEX (1<<16) /* VEX prefix present (0xC4 or 0xC5) */
669 #define PFX_VEXL (1<<17) /* VEX L bit, if VEX present, else 0 */
670 /* The extra register field VEX.vvvv is encoded (after not-ing it) as
671 PFX_VEXnV3 .. PFX_VEXnV0, so these must occupy adjacent bit
672 positions. */
673 #define PFX_VEXnV0 (1<<18) /* ~VEX vvvv[0], if VEX present, else 0 */
674 #define PFX_VEXnV1 (1<<19) /* ~VEX vvvv[1], if VEX present, else 0 */
675 #define PFX_VEXnV2 (1<<20) /* ~VEX vvvv[2], if VEX present, else 0 */
676 #define PFX_VEXnV3 (1<<21) /* ~VEX vvvv[3], if VEX present, else 0 */
679 #define PFX_EMPTY 0x55000000
681 static Bool IS_VALID_PFX ( Prefix pfx ) {
682 return toBool((pfx & 0xFF000000) == PFX_EMPTY);
685 static Bool haveREX ( Prefix pfx ) {
686 return toBool(pfx & PFX_REX);
689 static Int getRexW ( Prefix pfx ) {
690 return (pfx & PFX_REXW) ? 1 : 0;
692 static Int getRexR ( Prefix pfx ) {
693 return (pfx & PFX_REXR) ? 1 : 0;
695 static Int getRexX ( Prefix pfx ) {
696 return (pfx & PFX_REXX) ? 1 : 0;
698 static Int getRexB ( Prefix pfx ) {
699 return (pfx & PFX_REXB) ? 1 : 0;
702 /* Check a prefix doesn't have F2 or F3 set in it, since usually that
703 completely changes what instruction it really is. */
704 static Bool haveF2orF3 ( Prefix pfx ) {
705 return toBool((pfx & (PFX_F2|PFX_F3)) > 0);
707 static Bool haveF2andF3 ( Prefix pfx ) {
708 return toBool((pfx & (PFX_F2|PFX_F3)) == (PFX_F2|PFX_F3));
710 static Bool haveF2 ( Prefix pfx ) {
711 return toBool((pfx & PFX_F2) > 0);
713 static Bool haveF3 ( Prefix pfx ) {
714 return toBool((pfx & PFX_F3) > 0);
717 static Bool have66 ( Prefix pfx ) {
718 return toBool((pfx & PFX_66) > 0);
720 static Bool haveASO ( Prefix pfx ) {
721 return toBool((pfx & PFX_ASO) > 0);
723 static Bool haveLOCK ( Prefix pfx ) {
724 return toBool((pfx & PFX_LOCK) > 0);
727 /* Return True iff pfx has 66 set and F2 and F3 clear */
728 static Bool have66noF2noF3 ( Prefix pfx )
730 return
731 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66);
734 /* Return True iff pfx has F2 set and 66 and F3 clear */
735 static Bool haveF2no66noF3 ( Prefix pfx )
737 return
738 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2);
741 /* Return True iff pfx has F3 set and 66 and F2 clear */
742 static Bool haveF3no66noF2 ( Prefix pfx )
744 return
745 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3);
748 /* Return True iff pfx has F3 set and F2 clear */
749 static Bool haveF3noF2 ( Prefix pfx )
751 return
752 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F3);
755 /* Return True iff pfx has F2 set and F3 clear */
756 static Bool haveF2noF3 ( Prefix pfx )
758 return
759 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F2);
762 /* Return True iff pfx has F2 and F3 clear */
763 static Bool haveNoF2noF3 ( Prefix pfx )
765 return
766 toBool((pfx & (PFX_F2|PFX_F3)) == 0);
769 /* Return True iff pfx has 66, F2 and F3 clear */
770 static Bool haveNo66noF2noF3 ( Prefix pfx )
772 return
773 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0);
776 /* Return True iff pfx has any of 66, F2 and F3 set */
777 static Bool have66orF2orF3 ( Prefix pfx )
779 return toBool( ! haveNo66noF2noF3(pfx) );
782 /* Return True iff pfx has 66 or F3 set */
783 static Bool have66orF3 ( Prefix pfx )
785 return toBool((pfx & (PFX_66|PFX_F3)) > 0);
788 /* Clear all the segment-override bits in a prefix. */
789 static Prefix clearSegBits ( Prefix p )
791 return
792 p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS);
795 /* Get the (inverted, hence back to "normal") VEX.vvvv field. */
796 static UInt getVexNvvvv ( Prefix pfx ) {
797 UInt r = (UInt)pfx;
798 r /= (UInt)PFX_VEXnV0; /* pray this turns into a shift */
799 return r & 0xF;
802 static Bool haveVEX ( Prefix pfx ) {
803 return toBool(pfx & PFX_VEX);
806 static Int getVexL ( Prefix pfx ) {
807 return (pfx & PFX_VEXL) ? 1 : 0;
811 /*------------------------------------------------------------*/
812 /*--- For dealing with escapes ---*/
813 /*------------------------------------------------------------*/
816 /* Escapes come after the prefixes, but before the primary opcode
817 byte. They escape the primary opcode byte into a bigger space.
818 The 0xF0000000 isn't significant, except so as to make it not
819 overlap valid Prefix values, for sanity checking.
822 typedef
823 enum {
824 ESC_NONE=0xF0000000, // none
825 ESC_0F, // 0F
826 ESC_0F38, // 0F 38
827 ESC_0F3A // 0F 3A
829 Escape;
832 /*------------------------------------------------------------*/
833 /*--- For dealing with integer registers ---*/
834 /*------------------------------------------------------------*/
836 /* This is somewhat complex. The rules are:
838 For 64, 32 and 16 bit register references, the e or g fields in the
839 modrm bytes supply the low 3 bits of the register number. The
840 fourth (most-significant) bit of the register number is supplied by
841 the REX byte, if it is present; else that bit is taken to be zero.
843 The REX.R bit supplies the high bit corresponding to the g register
844 field, and the REX.B bit supplies the high bit corresponding to the
845 e register field (when the mod part of modrm indicates that modrm's
846 e component refers to a register and not to memory).
848 The REX.X bit supplies a high register bit for certain registers
849 in SIB address modes, and is generally rarely used.
851 For 8 bit register references, the presence of the REX byte itself
852 has significance. If there is no REX present, then the 3-bit
853 number extracted from the modrm e or g field is treated as an index
854 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the
855 old x86 encoding scheme.
857 But if there is a REX present, the register reference is
858 interpreted in the same way as for 64/32/16-bit references: a high
859 bit is extracted from REX, giving a 4-bit number, and the denoted
860 register is the lowest 8 bits of the 16 integer registers denoted
861 by the number. In particular, values 3 through 7 of this sequence
862 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of
863 %rsp %rbp %rsi %rdi.
865 The REX.W bit has no bearing at all on register numbers. Instead
866 its presence indicates that the operand size is to be overridden
867 from its default value (32 bits) to 64 bits instead. This is in
868 the same fashion that an 0x66 prefix indicates the operand size is
869 to be overridden from 32 bits down to 16 bits. When both REX.W and
870 0x66 are present there is a conflict, and REX.W takes precedence.
872 Rather than try to handle this complexity using a single huge
873 function, several smaller ones are provided. The aim is to make it
874 as difficult as possible to screw up register decoding in a subtle
875 and hard-to-track-down way.
877 Because these routines fish around in the host's memory (that is,
878 in the guest state area) for sub-parts of guest registers, their
879 correctness depends on the host's endianness. So far these
880 routines only work for little-endian hosts. Those for which
881 endianness is important have assertions to ensure sanity.
885 /* About the simplest question you can ask: where do the 64-bit
886 integer registers live (in the guest state) ? */
888 static Int integerGuestReg64Offset ( UInt reg )
890 switch (reg) {
891 case R_RAX: return OFFB_RAX;
892 case R_RCX: return OFFB_RCX;
893 case R_RDX: return OFFB_RDX;
894 case R_RBX: return OFFB_RBX;
895 case R_RSP: return OFFB_RSP;
896 case R_RBP: return OFFB_RBP;
897 case R_RSI: return OFFB_RSI;
898 case R_RDI: return OFFB_RDI;
899 case R_R8: return OFFB_R8;
900 case R_R9: return OFFB_R9;
901 case R_R10: return OFFB_R10;
902 case R_R11: return OFFB_R11;
903 case R_R12: return OFFB_R12;
904 case R_R13: return OFFB_R13;
905 case R_R14: return OFFB_R14;
906 case R_R15: return OFFB_R15;
907 default: vpanic("integerGuestReg64Offset(amd64)");
912 /* Produce the name of an integer register, for printing purposes.
913 reg is a number in the range 0 .. 15 that has been generated from a
914 3-bit reg-field number and a REX extension bit. irregular denotes
915 the case where sz==1 and no REX byte is present. */
917 static
918 const HChar* nameIReg ( Int sz, UInt reg, Bool irregular )
920 static const HChar* ireg64_names[16]
921 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
922 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
923 static const HChar* ireg32_names[16]
924 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
925 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" };
926 static const HChar* ireg16_names[16]
927 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di",
928 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" };
929 static const HChar* ireg8_names[16]
930 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil",
931 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" };
932 static const HChar* ireg8_irregular[8]
933 = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" };
935 vassert(reg < 16);
936 if (sz == 1) {
937 if (irregular)
938 vassert(reg < 8);
939 } else {
940 vassert(irregular == False);
943 switch (sz) {
944 case 8: return ireg64_names[reg];
945 case 4: return ireg32_names[reg];
946 case 2: return ireg16_names[reg];
947 case 1: if (irregular) {
948 return ireg8_irregular[reg];
949 } else {
950 return ireg8_names[reg];
952 default: vpanic("nameIReg(amd64)");
956 /* Using the same argument conventions as nameIReg, produce the
957 guest state offset of an integer register. */
959 static
960 Int offsetIReg ( Int sz, UInt reg, Bool irregular )
962 vassert(reg < 16);
963 if (sz == 1) {
964 if (irregular)
965 vassert(reg < 8);
966 } else {
967 vassert(irregular == False);
970 /* Deal with irregular case -- sz==1 and no REX present */
971 if (sz == 1 && irregular) {
972 switch (reg) {
973 case R_RSP: return 1+ OFFB_RAX;
974 case R_RBP: return 1+ OFFB_RCX;
975 case R_RSI: return 1+ OFFB_RDX;
976 case R_RDI: return 1+ OFFB_RBX;
977 default: break; /* use the normal case */
981 /* Normal case */
982 return integerGuestReg64Offset(reg);
986 /* Read the %CL register :: Ity_I8, for shift/rotate operations. */
988 static IRExpr* getIRegCL ( void )
990 vassert(host_endness == VexEndnessLE);
991 return IRExpr_Get( OFFB_RCX, Ity_I8 );
995 /* Write to the %AH register. */
997 static void putIRegAH ( IRExpr* e )
999 vassert(host_endness == VexEndnessLE);
1000 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8);
1001 stmt( IRStmt_Put( OFFB_RAX+1, e ) );
1005 /* Read/write various widths of %RAX, as it has various
1006 special-purpose uses. */
1008 static const HChar* nameIRegRAX ( Int sz )
1010 switch (sz) {
1011 case 1: return "%al";
1012 case 2: return "%ax";
1013 case 4: return "%eax";
1014 case 8: return "%rax";
1015 default: vpanic("nameIRegRAX(amd64)");
1019 static IRExpr* getIRegRAX ( Int sz )
1021 vassert(host_endness == VexEndnessLE);
1022 switch (sz) {
1023 case 1: return IRExpr_Get( OFFB_RAX, Ity_I8 );
1024 case 2: return IRExpr_Get( OFFB_RAX, Ity_I16 );
1025 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 ));
1026 case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 );
1027 default: vpanic("getIRegRAX(amd64)");
1031 static void putIRegRAX ( Int sz, IRExpr* e )
1033 IRType ty = typeOfIRExpr(irsb->tyenv, e);
1034 vassert(host_endness == VexEndnessLE);
1035 switch (sz) {
1036 case 8: vassert(ty == Ity_I64);
1037 stmt( IRStmt_Put( OFFB_RAX, e ));
1038 break;
1039 case 4: vassert(ty == Ity_I32);
1040 stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) ));
1041 break;
1042 case 2: vassert(ty == Ity_I16);
1043 stmt( IRStmt_Put( OFFB_RAX, e ));
1044 break;
1045 case 1: vassert(ty == Ity_I8);
1046 stmt( IRStmt_Put( OFFB_RAX, e ));
1047 break;
1048 default: vpanic("putIRegRAX(amd64)");
1053 /* Read/write various widths of %RDX, as it has various
1054 special-purpose uses. */
1056 static const HChar* nameIRegRDX ( Int sz )
1058 switch (sz) {
1059 case 1: return "%dl";
1060 case 2: return "%dx";
1061 case 4: return "%edx";
1062 case 8: return "%rdx";
1063 default: vpanic("nameIRegRDX(amd64)");
1067 static IRExpr* getIRegRDX ( Int sz )
1069 vassert(host_endness == VexEndnessLE);
1070 switch (sz) {
1071 case 1: return IRExpr_Get( OFFB_RDX, Ity_I8 );
1072 case 2: return IRExpr_Get( OFFB_RDX, Ity_I16 );
1073 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 ));
1074 case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 );
1075 default: vpanic("getIRegRDX(amd64)");
1079 static void putIRegRDX ( Int sz, IRExpr* e )
1081 vassert(host_endness == VexEndnessLE);
1082 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
1083 switch (sz) {
1084 case 8: stmt( IRStmt_Put( OFFB_RDX, e ));
1085 break;
1086 case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) ));
1087 break;
1088 case 2: stmt( IRStmt_Put( OFFB_RDX, e ));
1089 break;
1090 case 1: stmt( IRStmt_Put( OFFB_RDX, e ));
1091 break;
1092 default: vpanic("putIRegRDX(amd64)");
1097 /* Simplistic functions to deal with the integer registers as a
1098 straightforward bank of 16 64-bit regs. */
1100 static IRExpr* getIReg64 ( UInt regno )
1102 return IRExpr_Get( integerGuestReg64Offset(regno),
1103 Ity_I64 );
1106 static void putIReg64 ( UInt regno, IRExpr* e )
1108 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
1109 stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) );
1112 static const HChar* nameIReg64 ( UInt regno )
1114 return nameIReg( 8, regno, False );
1118 /* Simplistic functions to deal with the lower halves of integer
1119 registers as a straightforward bank of 16 32-bit regs. */
1121 static IRExpr* getIReg32 ( UInt regno )
1123 vassert(host_endness == VexEndnessLE);
1124 return unop(Iop_64to32,
1125 IRExpr_Get( integerGuestReg64Offset(regno),
1126 Ity_I64 ));
1129 static void putIReg32 ( UInt regno, IRExpr* e )
1131 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
1132 stmt( IRStmt_Put( integerGuestReg64Offset(regno),
1133 unop(Iop_32Uto64,e) ) );
1136 static const HChar* nameIReg32 ( UInt regno )
1138 return nameIReg( 4, regno, False );
1142 /* Simplistic functions to deal with the lower quarters of integer
1143 registers as a straightforward bank of 16 16-bit regs. */
1145 static IRExpr* getIReg16 ( UInt regno )
1147 vassert(host_endness == VexEndnessLE);
1148 return IRExpr_Get( integerGuestReg64Offset(regno),
1149 Ity_I16 );
1152 static void putIReg16 ( UInt regno, IRExpr* e )
1154 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
1155 stmt( IRStmt_Put( integerGuestReg64Offset(regno),
1156 unop(Iop_16Uto64,e) ) );
1159 static const HChar* nameIReg16 ( UInt regno )
1161 return nameIReg( 2, regno, False );
1165 /* Sometimes what we know is a 3-bit register number, a REX byte, and
1166 which field of the REX byte is to be used to extend to a 4-bit
1167 number. These functions cater for that situation.
1169 static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits )
1171 vassert(lo3bits < 8);
1172 vassert(IS_VALID_PFX(pfx));
1173 return getIReg64( lo3bits | (getRexX(pfx) << 3) );
1176 static const HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits )
1178 vassert(lo3bits < 8);
1179 vassert(IS_VALID_PFX(pfx));
1180 return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False );
1183 static const HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
1185 vassert(lo3bits < 8);
1186 vassert(IS_VALID_PFX(pfx));
1187 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1188 return nameIReg( sz, lo3bits | (getRexB(pfx) << 3),
1189 toBool(sz==1 && !haveREX(pfx)) );
1192 static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
1194 vassert(lo3bits < 8);
1195 vassert(IS_VALID_PFX(pfx));
1196 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1197 if (sz == 4) {
1198 sz = 8;
1199 return unop(Iop_64to32,
1200 IRExpr_Get(
1201 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
1202 False/*!irregular*/ ),
1203 szToITy(sz)
1206 } else {
1207 return IRExpr_Get(
1208 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
1209 toBool(sz==1 && !haveREX(pfx)) ),
1210 szToITy(sz)
1215 static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e )
1217 vassert(lo3bits < 8);
1218 vassert(IS_VALID_PFX(pfx));
1219 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1220 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
1221 stmt( IRStmt_Put(
1222 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
1223 toBool(sz==1 && !haveREX(pfx)) ),
1224 sz==4 ? unop(Iop_32Uto64,e) : e
1229 /* Functions for getting register numbers from modrm bytes and REX
1230 when we don't have to consider the complexities of integer subreg
1231 accesses.
1233 /* Extract the g reg field from a modRM byte, and augment it using the
1234 REX.R bit from the supplied REX byte. The R bit usually is
1235 associated with the g register field.
1237 static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
1239 Int reg = (Int)( (mod_reg_rm >> 3) & 7 );
1240 reg += (pfx & PFX_REXR) ? 8 : 0;
1241 return reg;
1244 /* Extract the e reg field from a modRM byte, and augment it using the
1245 REX.B bit from the supplied REX byte. The B bit usually is
1246 associated with the e register field (when modrm indicates e is a
1247 register, that is).
1249 static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
1251 Int rm;
1252 vassert(epartIsReg(mod_reg_rm));
1253 rm = (Int)(mod_reg_rm & 0x7);
1254 rm += (pfx & PFX_REXB) ? 8 : 0;
1255 return rm;
1259 /* General functions for dealing with integer register access. */
1261 /* Produce the guest state offset for a reference to the 'g' register
1262 field in a modrm byte, taking into account REX (or its absence),
1263 and the size of the access.
1265 static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1267 UInt reg;
1268 vassert(host_endness == VexEndnessLE);
1269 vassert(IS_VALID_PFX(pfx));
1270 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1271 reg = gregOfRexRM( pfx, mod_reg_rm );
1272 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) );
1275 static
1276 IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1278 if (sz == 4) {
1279 sz = 8;
1280 return unop(Iop_64to32,
1281 IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
1282 szToITy(sz) ));
1283 } else {
1284 return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
1285 szToITy(sz) );
1289 static
1290 void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
1292 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
1293 if (sz == 4) {
1294 e = unop(Iop_32Uto64,e);
1296 stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) );
1299 static
1300 const HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1302 return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm),
1303 toBool(sz==1 && !haveREX(pfx)) );
1307 static
1308 IRExpr* getIRegV ( Int sz, Prefix pfx )
1310 if (sz == 4) {
1311 sz = 8;
1312 return unop(Iop_64to32,
1313 IRExpr_Get( offsetIReg( sz, getVexNvvvv(pfx), False ),
1314 szToITy(sz) ));
1315 } else {
1316 return IRExpr_Get( offsetIReg( sz, getVexNvvvv(pfx), False ),
1317 szToITy(sz) );
1321 static
1322 void putIRegV ( Int sz, Prefix pfx, IRExpr* e )
1324 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
1325 if (sz == 4) {
1326 e = unop(Iop_32Uto64,e);
1328 stmt( IRStmt_Put( offsetIReg( sz, getVexNvvvv(pfx), False ), e ) );
1331 static
1332 const HChar* nameIRegV ( Int sz, Prefix pfx )
1334 return nameIReg( sz, getVexNvvvv(pfx), False );
1339 /* Produce the guest state offset for a reference to the 'e' register
1340 field in a modrm byte, taking into account REX (or its absence),
1341 and the size of the access. eregOfRexRM will assert if mod_reg_rm
1342 denotes a memory access rather than a register access.
1344 static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1346 UInt reg;
1347 vassert(host_endness == VexEndnessLE);
1348 vassert(IS_VALID_PFX(pfx));
1349 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1350 reg = eregOfRexRM( pfx, mod_reg_rm );
1351 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) );
1354 static
1355 IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1357 if (sz == 4) {
1358 sz = 8;
1359 return unop(Iop_64to32,
1360 IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
1361 szToITy(sz) ));
1362 } else {
1363 return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
1364 szToITy(sz) );
1368 static
1369 void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
1371 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
1372 if (sz == 4) {
1373 e = unop(Iop_32Uto64,e);
1375 stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) );
1378 static
1379 const HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1381 return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm),
1382 toBool(sz==1 && !haveREX(pfx)) );
1386 /*------------------------------------------------------------*/
1387 /*--- For dealing with XMM registers ---*/
1388 /*------------------------------------------------------------*/
1390 static Int ymmGuestRegOffset ( UInt ymmreg )
1392 switch (ymmreg) {
1393 case 0: return OFFB_YMM0;
1394 case 1: return OFFB_YMM1;
1395 case 2: return OFFB_YMM2;
1396 case 3: return OFFB_YMM3;
1397 case 4: return OFFB_YMM4;
1398 case 5: return OFFB_YMM5;
1399 case 6: return OFFB_YMM6;
1400 case 7: return OFFB_YMM7;
1401 case 8: return OFFB_YMM8;
1402 case 9: return OFFB_YMM9;
1403 case 10: return OFFB_YMM10;
1404 case 11: return OFFB_YMM11;
1405 case 12: return OFFB_YMM12;
1406 case 13: return OFFB_YMM13;
1407 case 14: return OFFB_YMM14;
1408 case 15: return OFFB_YMM15;
1409 default: vpanic("ymmGuestRegOffset(amd64)");
1413 static Int xmmGuestRegOffset ( UInt xmmreg )
1415 /* Correct for little-endian host only. */
1416 vassert(host_endness == VexEndnessLE);
1417 return ymmGuestRegOffset( xmmreg );
1420 /* Lanes of vector registers are always numbered from zero being the
1421 least significant lane (rightmost in the register). */
1423 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno )
1425 /* Correct for little-endian host only. */
1426 vassert(host_endness == VexEndnessLE);
1427 vassert(laneno >= 0 && laneno < 8);
1428 return xmmGuestRegOffset( xmmreg ) + 2 * laneno;
1431 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno )
1433 /* Correct for little-endian host only. */
1434 vassert(host_endness == VexEndnessLE);
1435 vassert(laneno >= 0 && laneno < 4);
1436 return xmmGuestRegOffset( xmmreg ) + 4 * laneno;
1439 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno )
1441 /* Correct for little-endian host only. */
1442 vassert(host_endness == VexEndnessLE);
1443 vassert(laneno >= 0 && laneno < 2);
1444 return xmmGuestRegOffset( xmmreg ) + 8 * laneno;
1447 static Int ymmGuestRegLane128offset ( UInt ymmreg, Int laneno )
1449 /* Correct for little-endian host only. */
1450 vassert(host_endness == VexEndnessLE);
1451 vassert(laneno >= 0 && laneno < 2);
1452 return ymmGuestRegOffset( ymmreg ) + 16 * laneno;
1455 static Int ymmGuestRegLane64offset ( UInt ymmreg, Int laneno )
1457 /* Correct for little-endian host only. */
1458 vassert(host_endness == VexEndnessLE);
1459 vassert(laneno >= 0 && laneno < 4);
1460 return ymmGuestRegOffset( ymmreg ) + 8 * laneno;
1463 static Int ymmGuestRegLane32offset ( UInt ymmreg, Int laneno )
1465 /* Correct for little-endian host only. */
1466 vassert(host_endness == VexEndnessLE);
1467 vassert(laneno >= 0 && laneno < 8);
1468 return ymmGuestRegOffset( ymmreg ) + 4 * laneno;
1471 static IRExpr* getXMMReg ( UInt xmmreg )
1473 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 );
1476 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno )
1478 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 );
1481 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno )
1483 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 );
1486 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno )
1488 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 );
1491 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno )
1493 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 );
1496 static IRExpr* getXMMRegLane16 ( UInt xmmreg, Int laneno )
1498 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg,laneno), Ity_I16 );
1501 static void putXMMReg ( UInt xmmreg, IRExpr* e )
1503 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
1504 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) );
1507 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e )
1509 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
1510 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
1513 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e )
1515 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
1516 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
1519 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e )
1521 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
1522 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
1525 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e )
1527 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
1528 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
1531 static IRExpr* getYMMReg ( UInt xmmreg )
1533 return IRExpr_Get( ymmGuestRegOffset(xmmreg), Ity_V256 );
1536 static IRExpr* getYMMRegLane128 ( UInt ymmreg, Int laneno )
1538 return IRExpr_Get( ymmGuestRegLane128offset(ymmreg,laneno), Ity_V128 );
1541 static IRExpr* getYMMRegLane64F ( UInt ymmreg, Int laneno )
1543 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg,laneno), Ity_F64 );
1546 static IRExpr* getYMMRegLane64 ( UInt ymmreg, Int laneno )
1548 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg,laneno), Ity_I64 );
1551 static IRExpr* getYMMRegLane32F ( UInt ymmreg, Int laneno )
1553 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg,laneno), Ity_F32 );
1556 static IRExpr* getYMMRegLane32 ( UInt ymmreg, Int laneno )
1558 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg,laneno), Ity_I32 );
1561 static void putYMMReg ( UInt ymmreg, IRExpr* e )
1563 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V256);
1564 stmt( IRStmt_Put( ymmGuestRegOffset(ymmreg), e ) );
1567 static void putYMMRegLane128 ( UInt ymmreg, Int laneno, IRExpr* e )
1569 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
1570 stmt( IRStmt_Put( ymmGuestRegLane128offset(ymmreg,laneno), e ) );
1573 static void putYMMRegLane64F ( UInt ymmreg, Int laneno, IRExpr* e )
1575 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
1576 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) );
1579 static void putYMMRegLane64 ( UInt ymmreg, Int laneno, IRExpr* e )
1581 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
1582 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) );
1585 static void putYMMRegLane32F ( UInt ymmreg, Int laneno, IRExpr* e )
1587 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
1588 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) );
1591 static void putYMMRegLane32 ( UInt ymmreg, Int laneno, IRExpr* e )
1593 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
1594 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) );
1597 static IRExpr* mkV128 ( UShort mask )
1599 return IRExpr_Const(IRConst_V128(mask));
1602 /* Write the low half of a YMM reg and zero out the upper half. */
1603 static void putYMMRegLoAndZU ( UInt ymmreg, IRExpr* e )
1605 putYMMRegLane128( ymmreg, 0, e );
1606 putYMMRegLane128( ymmreg, 1, mkV128(0) );
1609 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y )
1611 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1);
1612 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1);
1613 return unop(Iop_64to1,
1614 binop(Iop_And64,
1615 unop(Iop_1Uto64,x),
1616 unop(Iop_1Uto64,y)));
1619 /* Generate a compare-and-swap operation, operating on memory at
1620 'addr'. The expected value is 'expVal' and the new value is
1621 'newVal'. If the operation fails, then transfer control (with a
1622 no-redir jump (XXX no -- see comment at top of this file)) to
1623 'restart_point', which is presumably the address of the guest
1624 instruction again -- retrying, essentially. */
1625 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
1626 Addr64 restart_point )
1628 IRCAS* cas;
1629 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal);
1630 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal);
1631 IRTemp oldTmp = newTemp(tyE);
1632 IRTemp expTmp = newTemp(tyE);
1633 vassert(tyE == tyN);
1634 vassert(tyE == Ity_I64 || tyE == Ity_I32
1635 || tyE == Ity_I16 || tyE == Ity_I8);
1636 assign(expTmp, expVal);
1637 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
1638 NULL, mkexpr(expTmp), NULL, newVal );
1639 stmt( IRStmt_CAS(cas) );
1640 stmt( IRStmt_Exit(
1641 binop( mkSizedOp(tyE,Iop_CasCmpNE8),
1642 mkexpr(oldTmp), mkexpr(expTmp) ),
1643 Ijk_Boring, /*Ijk_NoRedir*/
1644 IRConst_U64( restart_point ),
1645 OFFB_RIP
1650 /*------------------------------------------------------------*/
1651 /*--- Helpers for %rflags. ---*/
1652 /*------------------------------------------------------------*/
1654 /* -------------- Evaluating the flags-thunk. -------------- */
1656 /* Build IR to calculate all the eflags from stored
1657 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1658 Ity_I64. */
1659 static IRExpr* mk_amd64g_calculate_rflags_all ( void )
1661 IRExpr** args
1662 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1663 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1664 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1665 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1666 IRExpr* call
1667 = mkIRExprCCall(
1668 Ity_I64,
1669 0/*regparm*/,
1670 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all,
1671 args
1673 /* Exclude OP and NDEP from definedness checking. We're only
1674 interested in DEP1 and DEP2. */
1675 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1676 return call;
1679 /* Build IR to calculate some particular condition from stored
1680 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1681 Ity_Bit. */
1682 static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond )
1684 IRExpr** args
1685 = mkIRExprVec_5( mkU64(cond),
1686 IRExpr_Get(OFFB_CC_OP, Ity_I64),
1687 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1688 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1689 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1690 IRExpr* call
1691 = mkIRExprCCall(
1692 Ity_I64,
1693 0/*regparm*/,
1694 "amd64g_calculate_condition", &amd64g_calculate_condition,
1695 args
1697 /* Exclude the requested condition, OP and NDEP from definedness
1698 checking. We're only interested in DEP1 and DEP2. */
1699 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4);
1700 return unop(Iop_64to1, call);
1703 /* Build IR to calculate just the carry flag from stored
1704 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */
1705 static IRExpr* mk_amd64g_calculate_rflags_c ( void )
1707 IRExpr** args
1708 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1709 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1710 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1711 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1712 IRExpr* call
1713 = mkIRExprCCall(
1714 Ity_I64,
1715 0/*regparm*/,
1716 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c,
1717 args
1719 /* Exclude OP and NDEP from definedness checking. We're only
1720 interested in DEP1 and DEP2. */
1721 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1722 return call;
1726 /* -------------- Building the flags-thunk. -------------- */
1728 /* The machinery in this section builds the flag-thunk following a
1729 flag-setting operation. Hence the various setFlags_* functions.
1732 static Bool isAddSub ( IROp op8 )
1734 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8);
1737 static Bool isLogic ( IROp op8 )
1739 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8);
1742 /* U-widen 1/8/16/32/64 bit int expr to 64. */
1743 static IRExpr* widenUto64 ( IRExpr* e )
1745 switch (typeOfIRExpr(irsb->tyenv,e)) {
1746 case Ity_I64: return e;
1747 case Ity_I32: return unop(Iop_32Uto64, e);
1748 case Ity_I16: return unop(Iop_16Uto64, e);
1749 case Ity_I8: return unop(Iop_8Uto64, e);
1750 case Ity_I1: return unop(Iop_1Uto64, e);
1751 default: vpanic("widenUto64");
1755 /* S-widen 8/16/32/64 bit int expr to 32. */
1756 static IRExpr* widenSto64 ( IRExpr* e )
1758 switch (typeOfIRExpr(irsb->tyenv,e)) {
1759 case Ity_I64: return e;
1760 case Ity_I32: return unop(Iop_32Sto64, e);
1761 case Ity_I16: return unop(Iop_16Sto64, e);
1762 case Ity_I8: return unop(Iop_8Sto64, e);
1763 default: vpanic("widenSto64");
1767 /* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some
1768 of these combinations make sense. */
1769 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e )
1771 IRType src_ty = typeOfIRExpr(irsb->tyenv,e);
1772 if (src_ty == dst_ty)
1773 return e;
1774 if (src_ty == Ity_I32 && dst_ty == Ity_I16)
1775 return unop(Iop_32to16, e);
1776 if (src_ty == Ity_I32 && dst_ty == Ity_I8)
1777 return unop(Iop_32to8, e);
1778 if (src_ty == Ity_I64 && dst_ty == Ity_I32)
1779 return unop(Iop_64to32, e);
1780 if (src_ty == Ity_I64 && dst_ty == Ity_I16)
1781 return unop(Iop_64to16, e);
1782 if (src_ty == Ity_I64 && dst_ty == Ity_I8)
1783 return unop(Iop_64to8, e);
1785 vex_printf("\nsrc, dst tys are: ");
1786 ppIRType(src_ty);
1787 vex_printf(", ");
1788 ppIRType(dst_ty);
1789 vex_printf("\n");
1790 vpanic("narrowTo(amd64)");
1794 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
1795 auto-sized up to the real op. */
1797 static
1798 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty )
1800 Int ccOp = 0;
1801 switch (ty) {
1802 case Ity_I8: ccOp = 0; break;
1803 case Ity_I16: ccOp = 1; break;
1804 case Ity_I32: ccOp = 2; break;
1805 case Ity_I64: ccOp = 3; break;
1806 default: vassert(0);
1808 switch (op8) {
1809 case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB; break;
1810 case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB; break;
1811 default: ppIROp(op8);
1812 vpanic("setFlags_DEP1_DEP2(amd64)");
1814 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1815 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
1816 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) );
1820 /* Set the OP and DEP1 fields only, and write zero to DEP2. */
1822 static
1823 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty )
1825 Int ccOp = 0;
1826 switch (ty) {
1827 case Ity_I8: ccOp = 0; break;
1828 case Ity_I16: ccOp = 1; break;
1829 case Ity_I32: ccOp = 2; break;
1830 case Ity_I64: ccOp = 3; break;
1831 default: vassert(0);
1833 switch (op8) {
1834 case Iop_Or8:
1835 case Iop_And8:
1836 case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break;
1837 default: ppIROp(op8);
1838 vpanic("setFlags_DEP1(amd64)");
1840 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1841 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
1842 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
1846 /* For shift operations, we put in the result and the undershifted
1847 result. Except if the shift amount is zero, the thunk is left
1848 unchanged. */
1850 static void setFlags_DEP1_DEP2_shift ( IROp op64,
1851 IRTemp res,
1852 IRTemp resUS,
1853 IRType ty,
1854 IRTemp guard )
1856 Int ccOp = 0;
1857 switch (ty) {
1858 case Ity_I8: ccOp = 0; break;
1859 case Ity_I16: ccOp = 1; break;
1860 case Ity_I32: ccOp = 2; break;
1861 case Ity_I64: ccOp = 3; break;
1862 default: vassert(0);
1865 vassert(guard);
1867 /* Both kinds of right shifts are handled by the same thunk
1868 operation. */
1869 switch (op64) {
1870 case Iop_Shr64:
1871 case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break;
1872 case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break;
1873 default: ppIROp(op64);
1874 vpanic("setFlags_DEP1_DEP2_shift(amd64)");
1877 /* guard :: Ity_I8. We need to convert it to I1. */
1878 IRTemp guardB = newTemp(Ity_I1);
1879 assign( guardB, binop(Iop_CmpNE8, mkexpr(guard), mkU8(0)) );
1881 /* DEP1 contains the result, DEP2 contains the undershifted value. */
1882 stmt( IRStmt_Put( OFFB_CC_OP,
1883 IRExpr_ITE( mkexpr(guardB),
1884 mkU64(ccOp),
1885 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) ));
1886 stmt( IRStmt_Put( OFFB_CC_DEP1,
1887 IRExpr_ITE( mkexpr(guardB),
1888 widenUto64(mkexpr(res)),
1889 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) ));
1890 stmt( IRStmt_Put( OFFB_CC_DEP2,
1891 IRExpr_ITE( mkexpr(guardB),
1892 widenUto64(mkexpr(resUS)),
1893 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) ));
1897 /* For the inc/dec case, we store in DEP1 the result value and in NDEP
1898 the former value of the carry flag, which unfortunately we have to
1899 compute. */
1901 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty )
1903 Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB;
1905 switch (ty) {
1906 case Ity_I8: ccOp += 0; break;
1907 case Ity_I16: ccOp += 1; break;
1908 case Ity_I32: ccOp += 2; break;
1909 case Ity_I64: ccOp += 3; break;
1910 default: vassert(0);
1913 /* This has to come first, because calculating the C flag
1914 may require reading all four thunk fields. */
1915 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) );
1916 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1917 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) );
1918 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
1922 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
1923 two arguments. */
1925 static
1926 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op )
1928 switch (ty) {
1929 case Ity_I8:
1930 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) );
1931 break;
1932 case Ity_I16:
1933 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) );
1934 break;
1935 case Ity_I32:
1936 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) );
1937 break;
1938 case Ity_I64:
1939 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) );
1940 break;
1941 default:
1942 vpanic("setFlags_MUL(amd64)");
1944 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) ));
1945 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) ));
1949 /* -------------- Condition codes. -------------- */
1951 /* Condition codes, using the AMD encoding. */
1953 static const HChar* name_AMD64Condcode ( AMD64Condcode cond )
1955 switch (cond) {
1956 case AMD64CondO: return "o";
1957 case AMD64CondNO: return "no";
1958 case AMD64CondB: return "b";
1959 case AMD64CondNB: return "ae"; /*"nb";*/
1960 case AMD64CondZ: return "e"; /*"z";*/
1961 case AMD64CondNZ: return "ne"; /*"nz";*/
1962 case AMD64CondBE: return "be";
1963 case AMD64CondNBE: return "a"; /*"nbe";*/
1964 case AMD64CondS: return "s";
1965 case AMD64CondNS: return "ns";
1966 case AMD64CondP: return "p";
1967 case AMD64CondNP: return "np";
1968 case AMD64CondL: return "l";
1969 case AMD64CondNL: return "ge"; /*"nl";*/
1970 case AMD64CondLE: return "le";
1971 case AMD64CondNLE: return "g"; /*"nle";*/
1972 case AMD64CondAlways: return "ALWAYS";
1973 default: vpanic("name_AMD64Condcode");
1977 static
1978 AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode cond,
1979 /*OUT*/Bool* needInvert )
1981 vassert(cond >= AMD64CondO && cond <= AMD64CondNLE);
1982 if (cond & 1) {
1983 *needInvert = True;
1984 return cond-1;
1985 } else {
1986 *needInvert = False;
1987 return cond;
1992 /* -------------- Helpers for ADD/SUB with carry. -------------- */
1994 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
1995 appropriately.
1997 Optionally, generate a store for the 'tres' value. This can either
1998 be a normal store, or it can be a cas-with-possible-failure style
1999 store:
2001 if taddr is IRTemp_INVALID, then no store is generated.
2003 if taddr is not IRTemp_INVALID, then a store (using taddr as
2004 the address) is generated:
2006 if texpVal is IRTemp_INVALID then a normal store is
2007 generated, and restart_point must be zero (it is irrelevant).
2009 if texpVal is not IRTemp_INVALID then a cas-style store is
2010 generated. texpVal is the expected value, restart_point
2011 is the restart point if the store fails, and texpVal must
2012 have the same type as tres.
2015 static void helper_ADC ( Int sz,
2016 IRTemp tres, IRTemp ta1, IRTemp ta2,
2017 /* info about optional store: */
2018 IRTemp taddr, IRTemp texpVal, Addr64 restart_point )
2020 UInt thunkOp;
2021 IRType ty = szToITy(sz);
2022 IRTemp oldc = newTemp(Ity_I64);
2023 IRTemp oldcn = newTemp(ty);
2024 IROp plus = mkSizedOp(ty, Iop_Add8);
2025 IROp xor = mkSizedOp(ty, Iop_Xor8);
2027 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
2029 switch (sz) {
2030 case 8: thunkOp = AMD64G_CC_OP_ADCQ; break;
2031 case 4: thunkOp = AMD64G_CC_OP_ADCL; break;
2032 case 2: thunkOp = AMD64G_CC_OP_ADCW; break;
2033 case 1: thunkOp = AMD64G_CC_OP_ADCB; break;
2034 default: vassert(0);
2037 /* oldc = old carry flag, 0 or 1 */
2038 assign( oldc, binop(Iop_And64,
2039 mk_amd64g_calculate_rflags_c(),
2040 mkU64(1)) );
2042 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
2044 assign( tres, binop(plus,
2045 binop(plus,mkexpr(ta1),mkexpr(ta2)),
2046 mkexpr(oldcn)) );
2048 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2049 start of this function. */
2050 if (taddr != IRTemp_INVALID) {
2051 if (texpVal == IRTemp_INVALID) {
2052 vassert(restart_point == 0);
2053 storeLE( mkexpr(taddr), mkexpr(tres) );
2054 } else {
2055 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
2056 /* .. and hence 'texpVal' has the same type as 'tres'. */
2057 casLE( mkexpr(taddr),
2058 mkexpr(texpVal), mkexpr(tres), restart_point );
2062 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
2063 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) ));
2064 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
2065 mkexpr(oldcn)) )) );
2066 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
2070 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
2071 appropriately. As with helper_ADC, possibly generate a store of
2072 the result -- see comments on helper_ADC for details.
2074 static void helper_SBB ( Int sz,
2075 IRTemp tres, IRTemp ta1, IRTemp ta2,
2076 /* info about optional store: */
2077 IRTemp taddr, IRTemp texpVal, Addr64 restart_point )
2079 UInt thunkOp;
2080 IRType ty = szToITy(sz);
2081 IRTemp oldc = newTemp(Ity_I64);
2082 IRTemp oldcn = newTemp(ty);
2083 IROp minus = mkSizedOp(ty, Iop_Sub8);
2084 IROp xor = mkSizedOp(ty, Iop_Xor8);
2086 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
2088 switch (sz) {
2089 case 8: thunkOp = AMD64G_CC_OP_SBBQ; break;
2090 case 4: thunkOp = AMD64G_CC_OP_SBBL; break;
2091 case 2: thunkOp = AMD64G_CC_OP_SBBW; break;
2092 case 1: thunkOp = AMD64G_CC_OP_SBBB; break;
2093 default: vassert(0);
2096 /* oldc = old carry flag, 0 or 1 */
2097 assign( oldc, binop(Iop_And64,
2098 mk_amd64g_calculate_rflags_c(),
2099 mkU64(1)) );
2101 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
2103 assign( tres, binop(minus,
2104 binop(minus,mkexpr(ta1),mkexpr(ta2)),
2105 mkexpr(oldcn)) );
2107 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2108 start of this function. */
2109 if (taddr != IRTemp_INVALID) {
2110 if (texpVal == IRTemp_INVALID) {
2111 vassert(restart_point == 0);
2112 storeLE( mkexpr(taddr), mkexpr(tres) );
2113 } else {
2114 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
2115 /* .. and hence 'texpVal' has the same type as 'tres'. */
2116 casLE( mkexpr(taddr),
2117 mkexpr(texpVal), mkexpr(tres), restart_point );
2121 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
2122 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) );
2123 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
2124 mkexpr(oldcn)) )) );
2125 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
2129 /* Given ta1, ta2 and tres, compute tres = ADCX(ta1,ta2) or tres = ADOX(ta1,ta2)
2130 and set flags appropriately.
2132 static void helper_ADCX_ADOX ( Bool isADCX, Int sz,
2133 IRTemp tres, IRTemp ta1, IRTemp ta2 )
2135 UInt thunkOp;
2136 IRType ty = szToITy(sz);
2137 IRTemp oldflags = newTemp(Ity_I64);
2138 IRTemp oldOC = newTemp(Ity_I64); // old O or C flag
2139 IRTemp oldOCn = newTemp(ty); // old O or C flag, narrowed
2140 IROp plus = mkSizedOp(ty, Iop_Add8);
2141 IROp xor = mkSizedOp(ty, Iop_Xor8);
2143 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
2145 switch (sz) {
2146 case 8: thunkOp = isADCX ? AMD64G_CC_OP_ADCX64
2147 : AMD64G_CC_OP_ADOX64; break;
2148 case 4: thunkOp = isADCX ? AMD64G_CC_OP_ADCX32
2149 : AMD64G_CC_OP_ADOX32; break;
2150 default: vassert(0);
2153 assign( oldflags, mk_amd64g_calculate_rflags_all() );
2155 /* oldOC = old overflow/carry flag, 0 or 1 */
2156 assign( oldOC, binop(Iop_And64,
2157 binop(Iop_Shr64,
2158 mkexpr(oldflags),
2159 mkU8(isADCX ? AMD64G_CC_SHIFT_C
2160 : AMD64G_CC_SHIFT_O)),
2161 mkU64(1)) );
2163 assign( oldOCn, narrowTo(ty, mkexpr(oldOC)) );
2165 assign( tres, binop(plus,
2166 binop(plus,mkexpr(ta1),mkexpr(ta2)),
2167 mkexpr(oldOCn)) );
2169 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
2170 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) ));
2171 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
2172 mkexpr(oldOCn)) )) );
2173 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldflags) ) );
2177 /* -------------- Helpers for disassembly printing. -------------- */
2179 static const HChar* nameGrp1 ( Int opc_aux )
2181 static const HChar* grp1_names[8]
2182 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
2183 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)");
2184 return grp1_names[opc_aux];
2187 static const HChar* nameGrp2 ( Int opc_aux )
2189 static const HChar* grp2_names[8]
2190 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
2191 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)");
2192 return grp2_names[opc_aux];
2195 static const HChar* nameGrp4 ( Int opc_aux )
2197 static const HChar* grp4_names[8]
2198 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
2199 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)");
2200 return grp4_names[opc_aux];
2203 static const HChar* nameGrp5 ( Int opc_aux )
2205 static const HChar* grp5_names[8]
2206 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
2207 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)");
2208 return grp5_names[opc_aux];
2211 static const HChar* nameGrp8 ( Int opc_aux )
2213 static const HChar* grp8_names[8]
2214 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
2215 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)");
2216 return grp8_names[opc_aux];
2219 static const HChar* nameSReg ( UInt sreg )
2221 switch (sreg) {
2222 case R_ES: return "%es";
2223 case R_CS: return "%cs";
2224 case R_SS: return "%ss";
2225 case R_DS: return "%ds";
2226 case R_FS: return "%fs";
2227 case R_GS: return "%gs";
2228 default: vpanic("nameSReg(amd64)");
2232 static const HChar* nameMMXReg ( Int mmxreg )
2234 static const HChar* mmx_names[8]
2235 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
2236 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)");
2237 return mmx_names[mmxreg];
2240 static const HChar* nameXMMReg ( Int xmmreg )
2242 static const HChar* xmm_names[16]
2243 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
2244 "%xmm4", "%xmm5", "%xmm6", "%xmm7",
2245 "%xmm8", "%xmm9", "%xmm10", "%xmm11",
2246 "%xmm12", "%xmm13", "%xmm14", "%xmm15" };
2247 if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)");
2248 return xmm_names[xmmreg];
2251 static const HChar* nameMMXGran ( Int gran )
2253 switch (gran) {
2254 case 0: return "b";
2255 case 1: return "w";
2256 case 2: return "d";
2257 case 3: return "q";
2258 default: vpanic("nameMMXGran(amd64,guest)");
2262 static HChar nameISize ( Int size )
2264 switch (size) {
2265 case 8: return 'q';
2266 case 4: return 'l';
2267 case 2: return 'w';
2268 case 1: return 'b';
2269 default: vpanic("nameISize(amd64)");
2273 static const HChar* nameYMMReg ( Int ymmreg )
2275 static const HChar* ymm_names[16]
2276 = { "%ymm0", "%ymm1", "%ymm2", "%ymm3",
2277 "%ymm4", "%ymm5", "%ymm6", "%ymm7",
2278 "%ymm8", "%ymm9", "%ymm10", "%ymm11",
2279 "%ymm12", "%ymm13", "%ymm14", "%ymm15" };
2280 if (ymmreg < 0 || ymmreg > 15) vpanic("nameYMMReg(amd64)");
2281 return ymm_names[ymmreg];
2285 /*------------------------------------------------------------*/
2286 /*--- JMP helpers ---*/
2287 /*------------------------------------------------------------*/
2289 static void jmp_lit( /*MOD*/DisResult* dres,
2290 IRJumpKind kind, Addr64 d64 )
2292 vassert(dres->whatNext == Dis_Continue);
2293 vassert(dres->len == 0);
2294 vassert(dres->jk_StopHere == Ijk_INVALID);
2295 dres->whatNext = Dis_StopHere;
2296 dres->jk_StopHere = kind;
2297 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64) ) );
2300 static void jmp_treg( /*MOD*/DisResult* dres,
2301 IRJumpKind kind, IRTemp t )
2303 vassert(dres->whatNext == Dis_Continue);
2304 vassert(dres->len == 0);
2305 vassert(dres->jk_StopHere == Ijk_INVALID);
2306 dres->whatNext = Dis_StopHere;
2307 dres->jk_StopHere = kind;
2308 stmt( IRStmt_Put( OFFB_RIP, mkexpr(t) ) );
2311 static
2312 void jcc_01 ( /*MOD*/DisResult* dres,
2313 AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true )
2315 Bool invert;
2316 AMD64Condcode condPos;
2317 vassert(dres->whatNext == Dis_Continue);
2318 vassert(dres->len == 0);
2319 vassert(dres->jk_StopHere == Ijk_INVALID);
2320 dres->whatNext = Dis_StopHere;
2321 dres->jk_StopHere = Ijk_Boring;
2322 condPos = positiveIse_AMD64Condcode ( cond, &invert );
2323 if (invert) {
2324 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
2325 Ijk_Boring,
2326 IRConst_U64(d64_false),
2327 OFFB_RIP ) );
2328 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_true) ) );
2329 } else {
2330 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
2331 Ijk_Boring,
2332 IRConst_U64(d64_true),
2333 OFFB_RIP ) );
2334 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_false) ) );
2338 /* Let new_rsp be the %rsp value after a call/return. Let nia be the
2339 guest address of the next instruction to be executed.
2341 This function generates an AbiHint to say that -128(%rsp)
2342 .. -1(%rsp) should now be regarded as uninitialised.
2344 static
2345 void make_redzone_AbiHint ( const VexAbiInfo* vbi,
2346 IRTemp new_rsp, IRTemp nia, const HChar* who )
2348 Int szB = vbi->guest_stack_redzone_size;
2349 vassert(szB >= 0);
2351 /* A bit of a kludge. Currently the only AbI we've guested AMD64
2352 for is ELF. So just check it's the expected 128 value
2353 (paranoia). */
2354 vassert(szB == 128);
2356 if (0) vex_printf("AbiHint: %s\n", who);
2357 vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64);
2358 vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64);
2359 if (szB > 0)
2360 stmt( IRStmt_AbiHint(
2361 binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)),
2362 szB,
2363 mkexpr(nia)
2368 /*------------------------------------------------------------*/
2369 /*--- Disassembling addressing modes ---*/
2370 /*------------------------------------------------------------*/
2372 static
2373 const HChar* segRegTxt ( Prefix pfx )
2375 if (pfx & PFX_CS) return "%cs:";
2376 if (pfx & PFX_DS) return "%ds:";
2377 if (pfx & PFX_ES) return "%es:";
2378 if (pfx & PFX_FS) return "%fs:";
2379 if (pfx & PFX_GS) return "%gs:";
2380 if (pfx & PFX_SS) return "%ss:";
2381 return ""; /* no override */
2385 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
2386 linear address by adding any required segment override as indicated
2387 by sorb, and also dealing with any address size override
2388 present. */
2389 static
2390 IRExpr* handleAddrOverrides ( const VexAbiInfo* vbi,
2391 Prefix pfx, IRExpr* virtual )
2393 /* --- address size override --- */
2394 if (haveASO(pfx))
2395 virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual));
2397 /* Note that the below are hacks that relies on the assumption
2398 that %fs or %gs are constant.
2399 Typically, %fs is always 0x63 on linux (in the main thread, it
2400 stays at value 0), %gs always 0x60 on Darwin, ... */
2401 /* --- segment overrides --- */
2402 if (pfx & PFX_FS) {
2403 if (vbi->guest_amd64_assume_fs_is_const) {
2404 /* return virtual + guest_FS_CONST. */
2405 virtual = binop(Iop_Add64, virtual,
2406 IRExpr_Get(OFFB_FS_CONST, Ity_I64));
2407 } else {
2408 unimplemented("amd64 %fs segment override");
2412 if (pfx & PFX_GS) {
2413 if (vbi->guest_amd64_assume_gs_is_const) {
2414 /* return virtual + guest_GS_CONST. */
2415 virtual = binop(Iop_Add64, virtual,
2416 IRExpr_Get(OFFB_GS_CONST, Ity_I64));
2417 } else {
2418 unimplemented("amd64 %gs segment override");
2422 /* cs, ds, es and ss are simply ignored in 64-bit mode. */
2424 return virtual;
2427 //.. {
2428 //.. Int sreg;
2429 //.. IRType hWordTy;
2430 //.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
2431 //..
2432 //.. if (sorb == 0)
2433 //.. /* the common case - no override */
2434 //.. return virtual;
2435 //..
2436 //.. switch (sorb) {
2437 //.. case 0x3E: sreg = R_DS; break;
2438 //.. case 0x26: sreg = R_ES; break;
2439 //.. case 0x64: sreg = R_FS; break;
2440 //.. case 0x65: sreg = R_GS; break;
2441 //.. default: vpanic("handleAddrOverrides(x86,guest)");
2442 //.. }
2443 //..
2444 //.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
2445 //..
2446 //.. seg_selector = newTemp(Ity_I32);
2447 //.. ldt_ptr = newTemp(hWordTy);
2448 //.. gdt_ptr = newTemp(hWordTy);
2449 //.. r64 = newTemp(Ity_I64);
2450 //..
2451 //.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
2452 //.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
2453 //.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
2454 //..
2455 //.. /*
2456 //.. Call this to do the translation and limit checks:
2457 //.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2458 //.. UInt seg_selector, UInt virtual_addr )
2459 //.. */
2460 //.. assign(
2461 //.. r64,
2462 //.. mkIRExprCCall(
2463 //.. Ity_I64,
2464 //.. 0/*regparms*/,
2465 //.. "x86g_use_seg_selector",
2466 //.. &x86g_use_seg_selector,
2467 //.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
2468 //.. mkexpr(seg_selector), virtual)
2469 //.. )
2470 //.. );
2471 //..
2472 //.. /* If the high 32 of the result are non-zero, there was a
2473 //.. failure in address translation. In which case, make a
2474 //.. quick exit.
2475 //.. */
2476 //.. stmt(
2477 //.. IRStmt_Exit(
2478 //.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
2479 //.. Ijk_MapFail,
2480 //.. IRConst_U32( guest_eip_curr_instr )
2481 //.. )
2482 //.. );
2483 //..
2484 //.. /* otherwise, here's the translated result. */
2485 //.. return unop(Iop_64to32, mkexpr(r64));
2486 //.. }
2489 /* Generate IR to calculate an address indicated by a ModRM and
2490 following SIB bytes. The expression, and the number of bytes in
2491 the address mode, are returned (the latter in *len). Note that
2492 this fn should not be called if the R/M part of the address denotes
2493 a register instead of memory. If print_codegen is true, text of
2494 the addressing mode is placed in buf.
2496 The computed address is stored in a new tempreg, and the
2497 identity of the tempreg is returned.
2499 extra_bytes holds the number of bytes after the amode, as supplied
2500 by the caller. This is needed to make sense of %rip-relative
2501 addresses. Note that the value that *len is set to is only the
2502 length of the amode itself and does not include the value supplied
2503 in extra_bytes.
2506 static IRTemp disAMode_copy2tmp ( IRExpr* addr64 )
2508 IRTemp tmp = newTemp(Ity_I64);
2509 assign( tmp, addr64 );
2510 return tmp;
2513 static
2514 IRTemp disAMode ( /*OUT*/Int* len,
2515 const VexAbiInfo* vbi, Prefix pfx, Long delta,
2516 /*OUT*/HChar* buf, Int extra_bytes )
2518 UChar mod_reg_rm = getUChar(delta);
2519 delta++;
2521 buf[0] = (UChar)0;
2522 vassert(extra_bytes >= 0 && extra_bytes < 10);
2524 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2525 jump table seems a bit excessive.
2527 mod_reg_rm &= 0xC7; /* is now XX000YYY */
2528 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
2529 /* is now XX0XXYYY */
2530 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
2531 switch (mod_reg_rm) {
2533 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2534 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2536 case 0x00: case 0x01: case 0x02: case 0x03:
2537 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
2538 { UChar rm = toUChar(mod_reg_rm & 7);
2539 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
2540 *len = 1;
2541 return disAMode_copy2tmp(
2542 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm)));
2545 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2546 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2548 case 0x08: case 0x09: case 0x0A: case 0x0B:
2549 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
2550 { UChar rm = toUChar(mod_reg_rm & 7);
2551 Long d = getSDisp8(delta);
2552 if (d == 0) {
2553 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
2554 } else {
2555 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
2557 *len = 2;
2558 return disAMode_copy2tmp(
2559 handleAddrOverrides(vbi, pfx,
2560 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
2563 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2564 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2566 case 0x10: case 0x11: case 0x12: case 0x13:
2567 /* ! 14 */ case 0x15: case 0x16: case 0x17:
2568 { UChar rm = toUChar(mod_reg_rm & 7);
2569 Long d = getSDisp32(delta);
2570 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
2571 *len = 5;
2572 return disAMode_copy2tmp(
2573 handleAddrOverrides(vbi, pfx,
2574 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
2577 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2578 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2579 case 0x18: case 0x19: case 0x1A: case 0x1B:
2580 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2581 vpanic("disAMode(amd64): not an addr!");
2583 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set
2584 correctly at the start of handling each instruction. */
2585 case 0x05:
2586 { Long d = getSDisp32(delta);
2587 *len = 5;
2588 DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d);
2589 /* We need to know the next instruction's start address.
2590 Try and figure out what it is, record the guess, and ask
2591 the top-level driver logic (bbToIR_AMD64) to check we
2592 guessed right, after the instruction is completely
2593 decoded. */
2594 guest_RIP_next_mustcheck = True;
2595 guest_RIP_next_assumed = guest_RIP_bbstart
2596 + delta+4 + extra_bytes;
2597 return disAMode_copy2tmp(
2598 handleAddrOverrides(vbi, pfx,
2599 binop(Iop_Add64, mkU64(guest_RIP_next_assumed),
2600 mkU64(d))));
2603 case 0x04: {
2604 /* SIB, with no displacement. Special cases:
2605 -- %rsp cannot act as an index value.
2606 If index_r indicates %rsp, zero is used for the index.
2607 -- when mod is zero and base indicates RBP or R13, base is
2608 instead a 32-bit sign-extended literal.
2609 It's all madness, I tell you. Extract %index, %base and
2610 scale from the SIB byte. The value denoted is then:
2611 | %index == %RSP && (%base == %RBP || %base == %R13)
2612 = d32 following SIB byte
2613 | %index == %RSP && !(%base == %RBP || %base == %R13)
2614 = %base
2615 | %index != %RSP && (%base == %RBP || %base == %R13)
2616 = d32 following SIB byte + (%index << scale)
2617 | %index != %RSP && !(%base == %RBP || %base == %R13)
2618 = %base + (%index << scale)
2620 UChar sib = getUChar(delta);
2621 UChar scale = toUChar((sib >> 6) & 3);
2622 UChar index_r = toUChar((sib >> 3) & 7);
2623 UChar base_r = toUChar(sib & 7);
2624 /* correct since #(R13) == 8 + #(RBP) */
2625 Bool base_is_BPor13 = toBool(base_r == R_RBP);
2626 Bool index_is_SP = toBool(index_r == R_RSP && 0==getRexX(pfx));
2627 delta++;
2629 if ((!index_is_SP) && (!base_is_BPor13)) {
2630 if (scale == 0) {
2631 DIS(buf, "%s(%s,%s)", segRegTxt(pfx),
2632 nameIRegRexB(8,pfx,base_r),
2633 nameIReg64rexX(pfx,index_r));
2634 } else {
2635 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx),
2636 nameIRegRexB(8,pfx,base_r),
2637 nameIReg64rexX(pfx,index_r), 1<<scale);
2639 *len = 2;
2640 return
2641 disAMode_copy2tmp(
2642 handleAddrOverrides(vbi, pfx,
2643 binop(Iop_Add64,
2644 getIRegRexB(8,pfx,base_r),
2645 binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
2646 mkU8(scale)))));
2649 if ((!index_is_SP) && base_is_BPor13) {
2650 Long d = getSDisp32(delta);
2651 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d,
2652 nameIReg64rexX(pfx,index_r), 1<<scale);
2653 *len = 6;
2654 return
2655 disAMode_copy2tmp(
2656 handleAddrOverrides(vbi, pfx,
2657 binop(Iop_Add64,
2658 binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
2659 mkU8(scale)),
2660 mkU64(d))));
2663 if (index_is_SP && (!base_is_BPor13)) {
2664 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r));
2665 *len = 2;
2666 return disAMode_copy2tmp(
2667 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,base_r)));
2670 if (index_is_SP && base_is_BPor13) {
2671 Long d = getSDisp32(delta);
2672 DIS(buf, "%s%lld", segRegTxt(pfx), d);
2673 *len = 6;
2674 return disAMode_copy2tmp(
2675 handleAddrOverrides(vbi, pfx, mkU64(d)));
2678 vassert(0);
2681 /* SIB, with 8-bit displacement. Special cases:
2682 -- %esp cannot act as an index value.
2683 If index_r indicates %esp, zero is used for the index.
2684 Denoted value is:
2685 | %index == %ESP
2686 = d8 + %base
2687 | %index != %ESP
2688 = d8 + %base + (%index << scale)
2690 case 0x0C: {
2691 UChar sib = getUChar(delta);
2692 UChar scale = toUChar((sib >> 6) & 3);
2693 UChar index_r = toUChar((sib >> 3) & 7);
2694 UChar base_r = toUChar(sib & 7);
2695 Long d = getSDisp8(delta+1);
2697 if (index_r == R_RSP && 0==getRexX(pfx)) {
2698 DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
2699 d, nameIRegRexB(8,pfx,base_r));
2700 *len = 3;
2701 return disAMode_copy2tmp(
2702 handleAddrOverrides(vbi, pfx,
2703 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
2704 } else {
2705 if (scale == 0) {
2706 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
2707 nameIRegRexB(8,pfx,base_r),
2708 nameIReg64rexX(pfx,index_r));
2709 } else {
2710 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
2711 nameIRegRexB(8,pfx,base_r),
2712 nameIReg64rexX(pfx,index_r), 1<<scale);
2714 *len = 3;
2715 return
2716 disAMode_copy2tmp(
2717 handleAddrOverrides(vbi, pfx,
2718 binop(Iop_Add64,
2719 binop(Iop_Add64,
2720 getIRegRexB(8,pfx,base_r),
2721 binop(Iop_Shl64,
2722 getIReg64rexX(pfx,index_r), mkU8(scale))),
2723 mkU64(d))));
2725 vassert(0); /*NOTREACHED*/
2728 /* SIB, with 32-bit displacement. Special cases:
2729 -- %rsp cannot act as an index value.
2730 If index_r indicates %rsp, zero is used for the index.
2731 Denoted value is:
2732 | %index == %RSP
2733 = d32 + %base
2734 | %index != %RSP
2735 = d32 + %base + (%index << scale)
2737 case 0x14: {
2738 UChar sib = getUChar(delta);
2739 UChar scale = toUChar((sib >> 6) & 3);
2740 UChar index_r = toUChar((sib >> 3) & 7);
2741 UChar base_r = toUChar(sib & 7);
2742 Long d = getSDisp32(delta+1);
2744 if (index_r == R_RSP && 0==getRexX(pfx)) {
2745 DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
2746 d, nameIRegRexB(8,pfx,base_r));
2747 *len = 6;
2748 return disAMode_copy2tmp(
2749 handleAddrOverrides(vbi, pfx,
2750 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
2751 } else {
2752 if (scale == 0) {
2753 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
2754 nameIRegRexB(8,pfx,base_r),
2755 nameIReg64rexX(pfx,index_r));
2756 } else {
2757 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
2758 nameIRegRexB(8,pfx,base_r),
2759 nameIReg64rexX(pfx,index_r), 1<<scale);
2761 *len = 6;
2762 return
2763 disAMode_copy2tmp(
2764 handleAddrOverrides(vbi, pfx,
2765 binop(Iop_Add64,
2766 binop(Iop_Add64,
2767 getIRegRexB(8,pfx,base_r),
2768 binop(Iop_Shl64,
2769 getIReg64rexX(pfx,index_r), mkU8(scale))),
2770 mkU64(d))));
2772 vassert(0); /*NOTREACHED*/
2775 default:
2776 vpanic("disAMode(amd64)");
2777 return 0; /*notreached*/
2782 /* Similarly for VSIB addressing. This returns just the addend,
2783 and fills in *rI and *vscale with the register number of the vector
2784 index and its multiplicand. */
2785 static
2786 IRTemp disAVSIBMode ( /*OUT*/Int* len,
2787 const VexAbiInfo* vbi, Prefix pfx, Long delta,
2788 /*OUT*/HChar* buf, /*OUT*/UInt* rI,
2789 IRType ty, /*OUT*/Int* vscale )
2791 UChar mod_reg_rm = getUChar(delta);
2792 const HChar *vindex;
2794 *len = 0;
2795 *rI = 0;
2796 *vscale = 0;
2797 buf[0] = (UChar)0;
2798 if ((mod_reg_rm & 7) != 4 || epartIsReg(mod_reg_rm))
2799 return IRTemp_INVALID;
2801 UChar sib = getUChar(delta+1);
2802 UChar scale = toUChar((sib >> 6) & 3);
2803 UChar index_r = toUChar((sib >> 3) & 7);
2804 UChar base_r = toUChar(sib & 7);
2805 Long d = 0;
2806 /* correct since #(R13) == 8 + #(RBP) */
2807 Bool base_is_BPor13 = toBool(base_r == R_RBP);
2808 delta += 2;
2809 *len = 2;
2811 *rI = index_r | (getRexX(pfx) << 3);
2812 if (ty == Ity_V128)
2813 vindex = nameXMMReg(*rI);
2814 else
2815 vindex = nameYMMReg(*rI);
2816 *vscale = 1<<scale;
2818 switch (mod_reg_rm >> 6) {
2819 case 0:
2820 if (base_is_BPor13) {
2821 d = getSDisp32(delta);
2822 *len += 4;
2823 if (scale == 0) {
2824 DIS(buf, "%s%lld(,%s)", segRegTxt(pfx), d, vindex);
2825 } else {
2826 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d, vindex, 1<<scale);
2828 return disAMode_copy2tmp( mkU64(d) );
2829 } else {
2830 if (scale == 0) {
2831 DIS(buf, "%s(%s,%s)", segRegTxt(pfx),
2832 nameIRegRexB(8,pfx,base_r), vindex);
2833 } else {
2834 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx),
2835 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale);
2838 break;
2839 case 1:
2840 d = getSDisp8(delta);
2841 *len += 1;
2842 goto have_disp;
2843 case 2:
2844 d = getSDisp32(delta);
2845 *len += 4;
2846 have_disp:
2847 if (scale == 0) {
2848 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
2849 nameIRegRexB(8,pfx,base_r), vindex);
2850 } else {
2851 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
2852 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale);
2854 break;
2857 if (!d)
2858 return disAMode_copy2tmp( getIRegRexB(8,pfx,base_r) );
2859 return disAMode_copy2tmp( binop(Iop_Add64, getIRegRexB(8,pfx,base_r),
2860 mkU64(d)) );
2864 /* Figure out the number of (insn-stream) bytes constituting the amode
2865 beginning at delta. Is useful for getting hold of literals beyond
2866 the end of the amode before it has been disassembled. */
2868 static UInt lengthAMode ( Prefix pfx, Long delta )
2870 UChar mod_reg_rm = getUChar(delta);
2871 delta++;
2873 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2874 jump table seems a bit excessive.
2876 mod_reg_rm &= 0xC7; /* is now XX000YYY */
2877 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
2878 /* is now XX0XXYYY */
2879 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
2880 switch (mod_reg_rm) {
2882 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2883 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2885 case 0x00: case 0x01: case 0x02: case 0x03:
2886 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
2887 return 1;
2889 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2890 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2892 case 0x08: case 0x09: case 0x0A: case 0x0B:
2893 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
2894 return 2;
2896 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2897 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2899 case 0x10: case 0x11: case 0x12: case 0x13:
2900 /* ! 14 */ case 0x15: case 0x16: case 0x17:
2901 return 5;
2903 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2904 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2905 /* Not an address, but still handled. */
2906 case 0x18: case 0x19: case 0x1A: case 0x1B:
2907 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2908 return 1;
2910 /* RIP + disp32. */
2911 case 0x05:
2912 return 5;
2914 case 0x04: {
2915 /* SIB, with no displacement. */
2916 UChar sib = getUChar(delta);
2917 UChar base_r = toUChar(sib & 7);
2918 /* correct since #(R13) == 8 + #(RBP) */
2919 Bool base_is_BPor13 = toBool(base_r == R_RBP);
2921 if (base_is_BPor13) {
2922 return 6;
2923 } else {
2924 return 2;
2928 /* SIB, with 8-bit displacement. */
2929 case 0x0C:
2930 return 3;
2932 /* SIB, with 32-bit displacement. */
2933 case 0x14:
2934 return 6;
2936 default:
2937 vpanic("lengthAMode(amd64)");
2938 return 0; /*notreached*/
2943 /*------------------------------------------------------------*/
2944 /*--- Disassembling common idioms ---*/
2945 /*------------------------------------------------------------*/
2947 typedef
2948 enum { WithFlagNone=2, WithFlagCarry, WithFlagCarryX, WithFlagOverX }
2949 WithFlag;
2951 /* Handle binary integer instructions of the form
2952 op E, G meaning
2953 op reg-or-mem, reg
2954 Is passed the a ptr to the modRM byte, the actual operation, and the
2955 data size. Returns the address advanced completely over this
2956 instruction.
2958 E(src) is reg-or-mem
2959 G(dst) is reg.
2961 If E is reg, --> GET %G, tmp
2962 OP %E, tmp
2963 PUT tmp, %G
2965 If E is mem and OP is not reversible,
2966 --> (getAddr E) -> tmpa
2967 LD (tmpa), tmpa
2968 GET %G, tmp2
2969 OP tmpa, tmp2
2970 PUT tmp2, %G
2972 If E is mem and OP is reversible
2973 --> (getAddr E) -> tmpa
2974 LD (tmpa), tmpa
2975 OP %G, tmpa
2976 PUT tmpa, %G
2978 static
2979 ULong dis_op2_E_G ( const VexAbiInfo* vbi,
2980 Prefix pfx,
2981 IROp op8,
2982 WithFlag flag,
2983 Bool keep,
2984 Int size,
2985 Long delta0,
2986 const HChar* t_amd64opc )
2988 HChar dis_buf[50];
2989 Int len;
2990 IRType ty = szToITy(size);
2991 IRTemp dst1 = newTemp(ty);
2992 IRTemp src = newTemp(ty);
2993 IRTemp dst0 = newTemp(ty);
2994 UChar rm = getUChar(delta0);
2995 IRTemp addr = IRTemp_INVALID;
2997 /* Stay sane -- check for valid (op8, flag, keep) combinations. */
2998 switch (op8) {
2999 case Iop_Add8:
3000 switch (flag) {
3001 case WithFlagNone: case WithFlagCarry:
3002 case WithFlagCarryX: case WithFlagOverX:
3003 vassert(keep);
3004 break;
3005 default:
3006 vassert(0);
3008 break;
3009 case Iop_Sub8:
3010 vassert(flag == WithFlagNone || flag == WithFlagCarry);
3011 if (flag == WithFlagCarry) vassert(keep);
3012 break;
3013 case Iop_And8:
3014 vassert(flag == WithFlagNone);
3015 break;
3016 case Iop_Or8: case Iop_Xor8:
3017 vassert(flag == WithFlagNone);
3018 vassert(keep);
3019 break;
3020 default:
3021 vassert(0);
3024 if (epartIsReg(rm)) {
3025 /* Specially handle XOR reg,reg, because that doesn't really
3026 depend on reg, and doing the obvious thing potentially
3027 generates a spurious value check failure due to the bogus
3028 dependency. Ditto SUB/SBB reg,reg. */
3029 if ((op8 == Iop_Xor8 || ((op8 == Iop_Sub8) && keep))
3030 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
3031 putIRegG(size,pfx,rm, mkU(ty,0));
3034 assign( dst0, getIRegG(size,pfx,rm) );
3035 assign( src, getIRegE(size,pfx,rm) );
3037 if (op8 == Iop_Add8 && flag == WithFlagCarry) {
3038 helper_ADC( size, dst1, dst0, src,
3039 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3040 putIRegG(size, pfx, rm, mkexpr(dst1));
3041 } else
3042 if (op8 == Iop_Sub8 && flag == WithFlagCarry) {
3043 helper_SBB( size, dst1, dst0, src,
3044 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3045 putIRegG(size, pfx, rm, mkexpr(dst1));
3046 } else
3047 if (op8 == Iop_Add8 && flag == WithFlagCarryX) {
3048 helper_ADCX_ADOX( True/*isADCX*/, size, dst1, dst0, src );
3049 putIRegG(size, pfx, rm, mkexpr(dst1));
3050 } else
3051 if (op8 == Iop_Add8 && flag == WithFlagOverX) {
3052 helper_ADCX_ADOX( False/*!isADCX*/, size, dst1, dst0, src );
3053 putIRegG(size, pfx, rm, mkexpr(dst1));
3054 } else {
3055 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
3056 if (isAddSub(op8))
3057 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3058 else
3059 setFlags_DEP1(op8, dst1, ty);
3060 if (keep)
3061 putIRegG(size, pfx, rm, mkexpr(dst1));
3064 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
3065 nameIRegE(size,pfx,rm),
3066 nameIRegG(size,pfx,rm));
3067 return 1+delta0;
3068 } else {
3069 /* E refers to memory */
3070 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
3071 assign( dst0, getIRegG(size,pfx,rm) );
3072 assign( src, loadLE(szToITy(size), mkexpr(addr)) );
3074 if (op8 == Iop_Add8 && flag == WithFlagCarry) {
3075 helper_ADC( size, dst1, dst0, src,
3076 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3077 putIRegG(size, pfx, rm, mkexpr(dst1));
3078 } else
3079 if (op8 == Iop_Sub8 && flag == WithFlagCarry) {
3080 helper_SBB( size, dst1, dst0, src,
3081 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3082 putIRegG(size, pfx, rm, mkexpr(dst1));
3083 } else
3084 if (op8 == Iop_Add8 && flag == WithFlagCarryX) {
3085 helper_ADCX_ADOX( True/*isADCX*/, size, dst1, dst0, src );
3086 putIRegG(size, pfx, rm, mkexpr(dst1));
3087 } else
3088 if (op8 == Iop_Add8 && flag == WithFlagOverX) {
3089 helper_ADCX_ADOX( False/*!isADCX*/, size, dst1, dst0, src );
3090 putIRegG(size, pfx, rm, mkexpr(dst1));
3091 } else {
3092 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
3093 if (isAddSub(op8))
3094 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3095 else
3096 setFlags_DEP1(op8, dst1, ty);
3097 if (keep)
3098 putIRegG(size, pfx, rm, mkexpr(dst1));
3101 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
3102 dis_buf, nameIRegG(size, pfx, rm));
3103 return len+delta0;
3109 /* Handle binary integer instructions of the form
3110 op G, E meaning
3111 op reg, reg-or-mem
3112 Is passed the a ptr to the modRM byte, the actual operation, and the
3113 data size. Returns the address advanced completely over this
3114 instruction.
3116 G(src) is reg.
3117 E(dst) is reg-or-mem
3119 If E is reg, --> GET %E, tmp
3120 OP %G, tmp
3121 PUT tmp, %E
3123 If E is mem, --> (getAddr E) -> tmpa
3124 LD (tmpa), tmpv
3125 OP %G, tmpv
3126 ST tmpv, (tmpa)
3128 static
3129 ULong dis_op2_G_E ( const VexAbiInfo* vbi,
3130 Prefix pfx,
3131 IROp op8,
3132 WithFlag flag,
3133 Bool keep,
3134 Int size,
3135 Long delta0,
3136 const HChar* t_amd64opc )
3138 HChar dis_buf[50];
3139 Int len;
3140 IRType ty = szToITy(size);
3141 IRTemp dst1 = newTemp(ty);
3142 IRTemp src = newTemp(ty);
3143 IRTemp dst0 = newTemp(ty);
3144 UChar rm = getUChar(delta0);
3145 IRTemp addr = IRTemp_INVALID;
3147 /* Stay sane -- check for valid (op8, flag, keep) combinations. */
3148 switch (op8) {
3149 case Iop_Add8:
3150 vassert(flag == WithFlagNone || flag == WithFlagCarry);
3151 vassert(keep);
3152 break;
3153 case Iop_Sub8:
3154 vassert(flag == WithFlagNone || flag == WithFlagCarry);
3155 if (flag == WithFlagCarry) vassert(keep);
3156 break;
3157 case Iop_And8: case Iop_Or8: case Iop_Xor8:
3158 vassert(flag == WithFlagNone);
3159 vassert(keep);
3160 break;
3161 default:
3162 vassert(0);
3165 /* flag != WithFlagNone is only allowed for Add and Sub and indicates the
3166 intended operation is add-with-carry or subtract-with-borrow. */
3168 if (epartIsReg(rm)) {
3169 /* Specially handle XOR reg,reg, because that doesn't really
3170 depend on reg, and doing the obvious thing potentially
3171 generates a spurious value check failure due to the bogus
3172 dependency. Ditto SUB/SBB reg,reg. */
3173 if ((op8 == Iop_Xor8 || ((op8 == Iop_Sub8) && keep))
3174 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
3175 putIRegE(size,pfx,rm, mkU(ty,0));
3178 assign(dst0, getIRegE(size,pfx,rm));
3179 assign(src, getIRegG(size,pfx,rm));
3181 if (op8 == Iop_Add8 && flag == WithFlagCarry) {
3182 helper_ADC( size, dst1, dst0, src,
3183 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3184 putIRegE(size, pfx, rm, mkexpr(dst1));
3185 } else
3186 if (op8 == Iop_Sub8 && flag == WithFlagCarry) {
3187 helper_SBB( size, dst1, dst0, src,
3188 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3189 putIRegE(size, pfx, rm, mkexpr(dst1));
3190 } else {
3191 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3192 if (isAddSub(op8))
3193 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3194 else
3195 setFlags_DEP1(op8, dst1, ty);
3196 if (keep)
3197 putIRegE(size, pfx, rm, mkexpr(dst1));
3200 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
3201 nameIRegG(size,pfx,rm),
3202 nameIRegE(size,pfx,rm));
3203 return 1+delta0;
3206 /* E refers to memory */
3208 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
3209 assign(dst0, loadLE(ty,mkexpr(addr)));
3210 assign(src, getIRegG(size,pfx,rm));
3212 if (op8 == Iop_Add8 && flag == WithFlagCarry) {
3213 if (haveLOCK(pfx)) {
3214 /* cas-style store */
3215 helper_ADC( size, dst1, dst0, src,
3216 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3217 } else {
3218 /* normal store */
3219 helper_ADC( size, dst1, dst0, src,
3220 /*store*/addr, IRTemp_INVALID, 0 );
3222 } else
3223 if (op8 == Iop_Sub8 && flag == WithFlagCarry) {
3224 if (haveLOCK(pfx)) {
3225 /* cas-style store */
3226 helper_SBB( size, dst1, dst0, src,
3227 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3228 } else {
3229 /* normal store */
3230 helper_SBB( size, dst1, dst0, src,
3231 /*store*/addr, IRTemp_INVALID, 0 );
3233 } else {
3234 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3235 if (keep) {
3236 if (haveLOCK(pfx)) {
3237 if (0) vex_printf("locked case\n" );
3238 casLE( mkexpr(addr),
3239 mkexpr(dst0)/*expval*/,
3240 mkexpr(dst1)/*newval*/, guest_RIP_curr_instr );
3241 } else {
3242 if (0) vex_printf("nonlocked case\n");
3243 storeLE(mkexpr(addr), mkexpr(dst1));
3246 if (isAddSub(op8))
3247 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3248 else
3249 setFlags_DEP1(op8, dst1, ty);
3252 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
3253 nameIRegG(size,pfx,rm), dis_buf);
3254 return len+delta0;
3259 /* Handle move instructions of the form
3260 mov E, G meaning
3261 mov reg-or-mem, reg
3262 Is passed the a ptr to the modRM byte, and the data size. Returns
3263 the address advanced completely over this instruction.
3265 E(src) is reg-or-mem
3266 G(dst) is reg.
3268 If E is reg, --> GET %E, tmpv
3269 PUT tmpv, %G
3271 If E is mem --> (getAddr E) -> tmpa
3272 LD (tmpa), tmpb
3273 PUT tmpb, %G
3275 static
3276 ULong dis_mov_E_G ( const VexAbiInfo* vbi,
3277 Prefix pfx,
3278 Int size,
3279 Long delta0 )
3281 Int len;
3282 UChar rm = getUChar(delta0);
3283 HChar dis_buf[50];
3285 if (epartIsReg(rm)) {
3286 putIRegG(size, pfx, rm, getIRegE(size, pfx, rm));
3287 DIP("mov%c %s,%s\n", nameISize(size),
3288 nameIRegE(size,pfx,rm),
3289 nameIRegG(size,pfx,rm));
3290 return 1+delta0;
3293 /* E refers to memory */
3295 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
3296 putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr)));
3297 DIP("mov%c %s,%s\n", nameISize(size),
3298 dis_buf,
3299 nameIRegG(size,pfx,rm));
3300 return delta0+len;
3305 /* Handle move instructions of the form
3306 mov G, E meaning
3307 mov reg, reg-or-mem
3308 Is passed the a ptr to the modRM byte, and the data size. Returns
3309 the address advanced completely over this instruction.
3310 We have to decide here whether F2 or F3 are acceptable. F2 never is.
3312 G(src) is reg.
3313 E(dst) is reg-or-mem
3315 If E is reg, --> GET %G, tmp
3316 PUT tmp, %E
3318 If E is mem, --> (getAddr E) -> tmpa
3319 GET %G, tmpv
3320 ST tmpv, (tmpa)
3322 static
3323 ULong dis_mov_G_E ( const VexAbiInfo* vbi,
3324 Prefix pfx,
3325 Int size,
3326 Long delta0,
3327 /*OUT*/Bool* ok )
3329 Int len;
3330 UChar rm = getUChar(delta0);
3331 HChar dis_buf[50];
3333 *ok = True;
3335 if (epartIsReg(rm)) {
3336 if (haveF2orF3(pfx)) { *ok = False; return delta0; }
3337 putIRegE(size, pfx, rm, getIRegG(size, pfx, rm));
3338 DIP("mov%c %s,%s\n", nameISize(size),
3339 nameIRegG(size,pfx,rm),
3340 nameIRegE(size,pfx,rm));
3341 return 1+delta0;
3344 /* E refers to memory */
3346 if (haveF2(pfx)) { *ok = False; return delta0; }
3347 /* F3(XRELEASE) is acceptable, though. */
3348 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
3349 storeLE( mkexpr(addr), getIRegG(size, pfx, rm) );
3350 DIP("mov%c %s,%s\n", nameISize(size),
3351 nameIRegG(size,pfx,rm),
3352 dis_buf);
3353 return len+delta0;
3358 /* op $immediate, AL/AX/EAX/RAX. */
3359 static
3360 ULong dis_op_imm_A ( Int size,
3361 Bool carrying,
3362 IROp op8,
3363 Bool keep,
3364 Long delta,
3365 const HChar* t_amd64opc )
3367 Int size4 = imin(size,4);
3368 IRType ty = szToITy(size);
3369 IRTemp dst0 = newTemp(ty);
3370 IRTemp src = newTemp(ty);
3371 IRTemp dst1 = newTemp(ty);
3372 Long lit = getSDisp(size4,delta);
3373 assign(dst0, getIRegRAX(size));
3374 assign(src, mkU(ty,lit & mkSizeMask(size)));
3376 if (isAddSub(op8) && !carrying) {
3377 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
3378 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3380 else
3381 if (isLogic(op8)) {
3382 vassert(!carrying);
3383 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
3384 setFlags_DEP1(op8, dst1, ty);
3386 else
3387 if (op8 == Iop_Add8 && carrying) {
3388 helper_ADC( size, dst1, dst0, src,
3389 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3391 else
3392 if (op8 == Iop_Sub8 && carrying) {
3393 helper_SBB( size, dst1, dst0, src,
3394 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3396 else
3397 vpanic("dis_op_imm_A(amd64,guest)");
3399 if (keep)
3400 putIRegRAX(size, mkexpr(dst1));
3402 DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size),
3403 lit, nameIRegRAX(size));
3404 return delta+size4;
3408 /* Sign- and Zero-extending moves. */
3409 static
3410 ULong dis_movx_E_G ( const VexAbiInfo* vbi,
3411 Prefix pfx,
3412 Long delta, Int szs, Int szd, Bool sign_extend )
3414 UChar rm = getUChar(delta);
3415 if (epartIsReg(rm)) {
3416 putIRegG(szd, pfx, rm,
3417 doScalarWidening(
3418 szs,szd,sign_extend,
3419 getIRegE(szs,pfx,rm)));
3420 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
3421 nameISize(szs),
3422 nameISize(szd),
3423 nameIRegE(szs,pfx,rm),
3424 nameIRegG(szd,pfx,rm));
3425 return 1+delta;
3428 /* E refers to memory */
3430 Int len;
3431 HChar dis_buf[50];
3432 IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
3433 putIRegG(szd, pfx, rm,
3434 doScalarWidening(
3435 szs,szd,sign_extend,
3436 loadLE(szToITy(szs),mkexpr(addr))));
3437 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
3438 nameISize(szs),
3439 nameISize(szd),
3440 dis_buf,
3441 nameIRegG(szd,pfx,rm));
3442 return len+delta;
3447 /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by
3448 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */
3449 static
3450 void codegen_div ( Int sz, IRTemp t, Bool signed_divide )
3452 /* special-case the 64-bit case */
3453 if (sz == 8) {
3454 IROp op = signed_divide ? Iop_DivModS128to64
3455 : Iop_DivModU128to64;
3456 IRTemp src128 = newTemp(Ity_I128);
3457 IRTemp dst128 = newTemp(Ity_I128);
3458 assign( src128, binop(Iop_64HLto128,
3459 getIReg64(R_RDX),
3460 getIReg64(R_RAX)) );
3461 assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) );
3462 putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) );
3463 putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) );
3464 } else {
3465 IROp op = signed_divide ? Iop_DivModS64to32
3466 : Iop_DivModU64to32;
3467 IRTemp src64 = newTemp(Ity_I64);
3468 IRTemp dst64 = newTemp(Ity_I64);
3469 switch (sz) {
3470 case 4:
3471 assign( src64,
3472 binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) );
3473 assign( dst64,
3474 binop(op, mkexpr(src64), mkexpr(t)) );
3475 putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) );
3476 putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) );
3477 break;
3478 case 2: {
3479 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
3480 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
3481 assign( src64, unop(widen3264,
3482 binop(Iop_16HLto32,
3483 getIRegRDX(2),
3484 getIRegRAX(2))) );
3485 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) );
3486 putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) );
3487 putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) );
3488 break;
3490 case 1: {
3491 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
3492 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
3493 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16;
3494 assign( src64, unop(widen3264,
3495 unop(widen1632, getIRegRAX(2))) );
3496 assign( dst64,
3497 binop(op, mkexpr(src64),
3498 unop(widen1632, unop(widen816, mkexpr(t)))) );
3499 putIRegRAX( 1, unop(Iop_16to8,
3500 unop(Iop_32to16,
3501 unop(Iop_64to32,mkexpr(dst64)))) );
3502 putIRegAH( unop(Iop_16to8,
3503 unop(Iop_32to16,
3504 unop(Iop_64HIto32,mkexpr(dst64)))) );
3505 break;
3507 default:
3508 vpanic("codegen_div(amd64)");
3513 static
3514 ULong dis_Grp1 ( const VexAbiInfo* vbi,
3515 Prefix pfx,
3516 Long delta, UChar modrm,
3517 Int am_sz, Int d_sz, Int sz, Long d64 )
3519 Int len;
3520 HChar dis_buf[50];
3521 IRType ty = szToITy(sz);
3522 IRTemp dst1 = newTemp(ty);
3523 IRTemp src = newTemp(ty);
3524 IRTemp dst0 = newTemp(ty);
3525 IRTemp addr = IRTemp_INVALID;
3526 IROp op8 = Iop_INVALID;
3527 ULong mask = mkSizeMask(sz);
3529 switch (gregLO3ofRM(modrm)) {
3530 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break;
3531 case 2: break; // ADC
3532 case 3: break; // SBB
3533 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break;
3534 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break;
3535 /*NOTREACHED*/
3536 default: vpanic("dis_Grp1(amd64): unhandled case");
3539 if (epartIsReg(modrm)) {
3540 vassert(am_sz == 1);
3542 assign(dst0, getIRegE(sz,pfx,modrm));
3543 assign(src, mkU(ty,d64 & mask));
3545 if (gregLO3ofRM(modrm) == 2 /* ADC */) {
3546 helper_ADC( sz, dst1, dst0, src,
3547 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3548 } else
3549 if (gregLO3ofRM(modrm) == 3 /* SBB */) {
3550 helper_SBB( sz, dst1, dst0, src,
3551 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3552 } else {
3553 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3554 if (isAddSub(op8))
3555 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3556 else
3557 setFlags_DEP1(op8, dst1, ty);
3560 if (gregLO3ofRM(modrm) < 7)
3561 putIRegE(sz, pfx, modrm, mkexpr(dst1));
3563 delta += (am_sz + d_sz);
3564 DIP("%s%c $%lld, %s\n",
3565 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64,
3566 nameIRegE(sz,pfx,modrm));
3567 } else {
3568 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
3570 assign(dst0, loadLE(ty,mkexpr(addr)));
3571 assign(src, mkU(ty,d64 & mask));
3573 if (gregLO3ofRM(modrm) == 2 /* ADC */) {
3574 if (haveLOCK(pfx)) {
3575 /* cas-style store */
3576 helper_ADC( sz, dst1, dst0, src,
3577 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3578 } else {
3579 /* normal store */
3580 helper_ADC( sz, dst1, dst0, src,
3581 /*store*/addr, IRTemp_INVALID, 0 );
3583 } else
3584 if (gregLO3ofRM(modrm) == 3 /* SBB */) {
3585 if (haveLOCK(pfx)) {
3586 /* cas-style store */
3587 helper_SBB( sz, dst1, dst0, src,
3588 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3589 } else {
3590 /* normal store */
3591 helper_SBB( sz, dst1, dst0, src,
3592 /*store*/addr, IRTemp_INVALID, 0 );
3594 } else {
3595 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3596 if (gregLO3ofRM(modrm) < 7) {
3597 if (haveLOCK(pfx)) {
3598 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
3599 mkexpr(dst1)/*newVal*/,
3600 guest_RIP_curr_instr );
3601 } else {
3602 storeLE(mkexpr(addr), mkexpr(dst1));
3605 if (isAddSub(op8))
3606 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3607 else
3608 setFlags_DEP1(op8, dst1, ty);
3611 delta += (len+d_sz);
3612 DIP("%s%c $%lld, %s\n",
3613 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz),
3614 d64, dis_buf);
3616 return delta;
3620 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed
3621 expression. */
3623 static
3624 ULong dis_Grp2 ( const VexAbiInfo* vbi,
3625 Prefix pfx,
3626 Long delta, UChar modrm,
3627 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr,
3628 const HChar* shift_expr_txt, Bool* decode_OK )
3630 /* delta on entry points at the modrm byte. */
3631 HChar dis_buf[50];
3632 Int len;
3633 Bool isShift, isRotate, isRotateC;
3634 IRType ty = szToITy(sz);
3635 IRTemp dst0 = newTemp(ty);
3636 IRTemp dst1 = newTemp(ty);
3637 IRTemp addr = IRTemp_INVALID;
3639 *decode_OK = True;
3641 vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
3643 /* Put value to shift/rotate in dst0. */
3644 if (epartIsReg(modrm)) {
3645 assign(dst0, getIRegE(sz, pfx, modrm));
3646 delta += (am_sz + d_sz);
3647 } else {
3648 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
3649 assign(dst0, loadLE(ty,mkexpr(addr)));
3650 delta += len + d_sz;
3653 isShift = False;
3654 switch (gregLO3ofRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; }
3656 isRotate = False;
3657 switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; }
3659 isRotateC = False;
3660 switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; }
3662 if (!isShift && !isRotate && !isRotateC) {
3663 /*NOTREACHED*/
3664 vpanic("dis_Grp2(Reg): unhandled case(amd64)");
3667 if (isRotateC) {
3668 /* Call a helper; this insn is so ridiculous it does not deserve
3669 better. One problem is, the helper has to calculate both the
3670 new value and the new flags. This is more than 64 bits, and
3671 there is no way to return more than 64 bits from the helper.
3672 Hence the crude and obvious solution is to call it twice,
3673 using the sign of the sz field to indicate whether it is the
3674 value or rflags result we want.
3676 Bool left = toBool(gregLO3ofRM(modrm) == 2);
3677 IRExpr** argsVALUE;
3678 IRExpr** argsRFLAGS;
3680 IRTemp new_value = newTemp(Ity_I64);
3681 IRTemp new_rflags = newTemp(Ity_I64);
3682 IRTemp old_rflags = newTemp(Ity_I64);
3684 assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) );
3686 argsVALUE
3687 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
3688 widenUto64(shift_expr), /* rotate amount */
3689 mkexpr(old_rflags),
3690 mkU64(sz) );
3691 assign( new_value,
3692 mkIRExprCCall(
3693 Ity_I64,
3694 0/*regparm*/,
3695 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3696 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR,
3697 argsVALUE
3701 argsRFLAGS
3702 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
3703 widenUto64(shift_expr), /* rotate amount */
3704 mkexpr(old_rflags),
3705 mkU64(-sz) );
3706 assign( new_rflags,
3707 mkIRExprCCall(
3708 Ity_I64,
3709 0/*regparm*/,
3710 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3711 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR,
3712 argsRFLAGS
3716 assign( dst1, narrowTo(ty, mkexpr(new_value)) );
3717 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
3718 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) ));
3719 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
3720 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
3723 else
3724 if (isShift) {
3726 IRTemp pre64 = newTemp(Ity_I64);
3727 IRTemp res64 = newTemp(Ity_I64);
3728 IRTemp res64ss = newTemp(Ity_I64);
3729 IRTemp shift_amt = newTemp(Ity_I8);
3730 UChar mask = toUChar(sz==8 ? 63 : 31);
3731 IROp op64;
3733 switch (gregLO3ofRM(modrm)) {
3734 case 4: op64 = Iop_Shl64; break;
3735 case 5: op64 = Iop_Shr64; break;
3736 case 6: op64 = Iop_Shl64; break;
3737 case 7: op64 = Iop_Sar64; break;
3738 /*NOTREACHED*/
3739 default: vpanic("dis_Grp2:shift"); break;
3742 /* Widen the value to be shifted to 64 bits, do the shift, and
3743 narrow back down. This seems surprisingly long-winded, but
3744 unfortunately the AMD semantics requires that 8/16/32-bit
3745 shifts give defined results for shift values all the way up
3746 to 32, and this seems the simplest way to do it. It has the
3747 advantage that the only IR level shifts generated are of 64
3748 bit values, and the shift amount is guaranteed to be in the
3749 range 0 .. 63, thereby observing the IR semantics requiring
3750 all shift values to be in the range 0 .. 2^word_size-1.
3752 Therefore the shift amount is masked with 63 for 64-bit shifts
3753 and 31 for all others.
3755 /* shift_amt = shift_expr & MASK, regardless of operation size */
3756 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) );
3758 /* suitably widen the value to be shifted to 64 bits. */
3759 assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0))
3760 : widenUto64(mkexpr(dst0)) );
3762 /* res64 = pre64 `shift` shift_amt */
3763 assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) );
3765 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */
3766 assign( res64ss,
3767 binop(op64,
3768 mkexpr(pre64),
3769 binop(Iop_And8,
3770 binop(Iop_Sub8,
3771 mkexpr(shift_amt), mkU8(1)),
3772 mkU8(mask))) );
3774 /* Build the flags thunk. */
3775 setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt);
3777 /* Narrow the result back down. */
3778 assign( dst1, narrowTo(ty, mkexpr(res64)) );
3780 } /* if (isShift) */
3782 else
3783 if (isRotate) {
3784 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1
3785 : (ty==Ity_I32 ? 2 : 3));
3786 Bool left = toBool(gregLO3ofRM(modrm) == 0);
3787 IRTemp rot_amt = newTemp(Ity_I8);
3788 IRTemp rot_amt64 = newTemp(Ity_I8);
3789 IRTemp oldFlags = newTemp(Ity_I64);
3790 UChar mask = toUChar(sz==8 ? 63 : 31);
3792 /* rot_amt = shift_expr & mask */
3793 /* By masking the rotate amount thusly, the IR-level Shl/Shr
3794 expressions never shift beyond the word size and thus remain
3795 well defined. */
3796 assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask)));
3798 if (ty == Ity_I64)
3799 assign(rot_amt, mkexpr(rot_amt64));
3800 else
3801 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1)));
3803 if (left) {
3805 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
3806 assign(dst1,
3807 binop( mkSizedOp(ty,Iop_Or8),
3808 binop( mkSizedOp(ty,Iop_Shl8),
3809 mkexpr(dst0),
3810 mkexpr(rot_amt)
3812 binop( mkSizedOp(ty,Iop_Shr8),
3813 mkexpr(dst0),
3814 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
3818 ccOp += AMD64G_CC_OP_ROLB;
3820 } else { /* right */
3822 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
3823 assign(dst1,
3824 binop( mkSizedOp(ty,Iop_Or8),
3825 binop( mkSizedOp(ty,Iop_Shr8),
3826 mkexpr(dst0),
3827 mkexpr(rot_amt)
3829 binop( mkSizedOp(ty,Iop_Shl8),
3830 mkexpr(dst0),
3831 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
3835 ccOp += AMD64G_CC_OP_RORB;
3839 /* dst1 now holds the rotated value. Build flag thunk. We
3840 need the resulting value for this, and the previous flags.
3841 Except don't set it if the rotate count is zero. */
3843 assign(oldFlags, mk_amd64g_calculate_rflags_all());
3845 /* rot_amt64 :: Ity_I8. We need to convert it to I1. */
3846 IRTemp rot_amt64b = newTemp(Ity_I1);
3847 assign(rot_amt64b, binop(Iop_CmpNE8, mkexpr(rot_amt64), mkU8(0)) );
3849 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
3850 stmt( IRStmt_Put( OFFB_CC_OP,
3851 IRExpr_ITE( mkexpr(rot_amt64b),
3852 mkU64(ccOp),
3853 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) ));
3854 stmt( IRStmt_Put( OFFB_CC_DEP1,
3855 IRExpr_ITE( mkexpr(rot_amt64b),
3856 widenUto64(mkexpr(dst1)),
3857 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) ));
3858 stmt( IRStmt_Put( OFFB_CC_DEP2,
3859 IRExpr_ITE( mkexpr(rot_amt64b),
3860 mkU64(0),
3861 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) ));
3862 stmt( IRStmt_Put( OFFB_CC_NDEP,
3863 IRExpr_ITE( mkexpr(rot_amt64b),
3864 mkexpr(oldFlags),
3865 IRExpr_Get(OFFB_CC_NDEP,Ity_I64) ) ));
3866 } /* if (isRotate) */
3868 /* Save result, and finish up. */
3869 if (epartIsReg(modrm)) {
3870 putIRegE(sz, pfx, modrm, mkexpr(dst1));
3871 if (vex_traceflags & VEX_TRACE_FE) {
3872 vex_printf("%s%c ",
3873 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
3874 if (shift_expr_txt)
3875 vex_printf("%s", shift_expr_txt);
3876 else
3877 ppIRExpr(shift_expr);
3878 vex_printf(", %s\n", nameIRegE(sz,pfx,modrm));
3880 } else {
3881 storeLE(mkexpr(addr), mkexpr(dst1));
3882 if (vex_traceflags & VEX_TRACE_FE) {
3883 vex_printf("%s%c ",
3884 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
3885 if (shift_expr_txt)
3886 vex_printf("%s", shift_expr_txt);
3887 else
3888 ppIRExpr(shift_expr);
3889 vex_printf(", %s\n", dis_buf);
3892 return delta;
3896 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
3897 static
3898 ULong dis_Grp8_Imm ( const VexAbiInfo* vbi,
3899 Prefix pfx,
3900 Long delta, UChar modrm,
3901 Int am_sz, Int sz, ULong src_val,
3902 Bool* decode_OK )
3904 /* src_val denotes a d8.
3905 And delta on entry points at the modrm byte. */
3907 IRType ty = szToITy(sz);
3908 IRTemp t2 = newTemp(Ity_I64);
3909 IRTemp t2m = newTemp(Ity_I64);
3910 IRTemp t_addr = IRTemp_INVALID;
3911 HChar dis_buf[50];
3912 ULong mask;
3914 /* we're optimists :-) */
3915 *decode_OK = True;
3917 /* Check whether F2 or F3 are acceptable. */
3918 if (epartIsReg(modrm)) {
3919 /* F2 or F3 are not allowed in the register case. */
3920 if (haveF2orF3(pfx)) {
3921 *decode_OK = False;
3922 return delta;
3924 } else {
3925 /* F2 or F3 (but not both) are allowable provided LOCK is also
3926 present. */
3927 if (haveF2orF3(pfx)) {
3928 if (haveF2andF3(pfx) || !haveLOCK(pfx)) {
3929 *decode_OK = False;
3930 return delta;
3935 /* Limit src_val -- the bit offset -- to something within a word.
3936 The Intel docs say that literal offsets larger than a word are
3937 masked in this way. */
3938 switch (sz) {
3939 case 2: src_val &= 15; break;
3940 case 4: src_val &= 31; break;
3941 case 8: src_val &= 63; break;
3942 default: *decode_OK = False; return delta;
3945 /* Invent a mask suitable for the operation. */
3946 switch (gregLO3ofRM(modrm)) {
3947 case 4: /* BT */ mask = 0; break;
3948 case 5: /* BTS */ mask = 1ULL << src_val; break;
3949 case 6: /* BTR */ mask = ~(1ULL << src_val); break;
3950 case 7: /* BTC */ mask = 1ULL << src_val; break;
3951 /* If this needs to be extended, probably simplest to make a
3952 new function to handle the other cases (0 .. 3). The
3953 Intel docs do however not indicate any use for 0 .. 3, so
3954 we don't expect this to happen. */
3955 default: *decode_OK = False; return delta;
3958 /* Fetch the value to be tested and modified into t2, which is
3959 64-bits wide regardless of sz. */
3960 if (epartIsReg(modrm)) {
3961 vassert(am_sz == 1);
3962 assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) );
3963 delta += (am_sz + 1);
3964 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
3965 nameISize(sz),
3966 src_val, nameIRegE(sz,pfx,modrm));
3967 } else {
3968 Int len;
3969 t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 );
3970 delta += (len+1);
3971 assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) );
3972 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
3973 nameISize(sz),
3974 src_val, dis_buf);
3977 /* Compute the new value into t2m, if non-BT. */
3978 switch (gregLO3ofRM(modrm)) {
3979 case 4: /* BT */
3980 break;
3981 case 5: /* BTS */
3982 assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) );
3983 break;
3984 case 6: /* BTR */
3985 assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) );
3986 break;
3987 case 7: /* BTC */
3988 assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) );
3989 break;
3990 default:
3991 /*NOTREACHED*/ /*the previous switch guards this*/
3992 vassert(0);
3995 /* Write the result back, if non-BT. */
3996 if (gregLO3ofRM(modrm) != 4 /* BT */) {
3997 if (epartIsReg(modrm)) {
3998 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m)));
3999 } else {
4000 if (haveLOCK(pfx)) {
4001 casLE( mkexpr(t_addr),
4002 narrowTo(ty, mkexpr(t2))/*expd*/,
4003 narrowTo(ty, mkexpr(t2m))/*new*/,
4004 guest_RIP_curr_instr );
4005 } else {
4006 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
4011 /* Copy relevant bit from t2 into the carry flag. */
4012 /* Flags: C=selected bit, O,S,A,P undefined, Z unchanged */
4013 /* so let's also keep O,S,A,P unchanged */
4014 const ULong maskC = AMD64G_CC_MASK_C;
4015 const ULong maskOSZAP = AMD64G_CC_MASK_O | AMD64G_CC_MASK_S
4016 | AMD64G_CC_MASK_Z | AMD64G_CC_MASK_A
4017 | AMD64G_CC_MASK_P;
4019 IRTemp old_rflags = newTemp(Ity_I64);
4020 assign(old_rflags, mk_amd64g_calculate_rflags_all());
4022 IRTemp new_rflags = newTemp(Ity_I64);
4023 assign(new_rflags,
4024 binop(Iop_Or64,
4025 binop(Iop_And64, mkexpr(old_rflags), mkU64(maskOSZAP)),
4026 binop(Iop_And64,
4027 binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)),
4028 mkU64(maskC)) ));
4030 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
4031 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
4032 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) ));
4033 /* Set NDEP even though it isn't used. This makes redundant-PUT
4034 elimination of previous stores to this field work better. */
4035 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
4037 return delta;
4041 /* Signed/unsigned widening multiply. Generate IR to multiply the
4042 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in
4043 RDX:RAX/EDX:EAX/DX:AX/AX.
4045 static void codegen_mulL_A_D ( Int sz, Bool syned,
4046 IRTemp tmp, const HChar* tmp_txt )
4048 IRType ty = szToITy(sz);
4049 IRTemp t1 = newTemp(ty);
4051 assign( t1, getIRegRAX(sz) );
4053 switch (ty) {
4054 case Ity_I64: {
4055 IRTemp res128 = newTemp(Ity_I128);
4056 IRTemp resHi = newTemp(Ity_I64);
4057 IRTemp resLo = newTemp(Ity_I64);
4058 IROp mulOp = syned ? Iop_MullS64 : Iop_MullU64;
4059 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
4060 setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp );
4061 assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
4062 assign( resHi, unop(Iop_128HIto64,mkexpr(res128)));
4063 assign( resLo, unop(Iop_128to64,mkexpr(res128)));
4064 putIReg64(R_RDX, mkexpr(resHi));
4065 putIReg64(R_RAX, mkexpr(resLo));
4066 break;
4068 case Ity_I32: {
4069 IRTemp res64 = newTemp(Ity_I64);
4070 IRTemp resHi = newTemp(Ity_I32);
4071 IRTemp resLo = newTemp(Ity_I32);
4072 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32;
4073 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
4074 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp );
4075 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
4076 assign( resHi, unop(Iop_64HIto32,mkexpr(res64)));
4077 assign( resLo, unop(Iop_64to32,mkexpr(res64)));
4078 putIRegRDX(4, mkexpr(resHi));
4079 putIRegRAX(4, mkexpr(resLo));
4080 break;
4082 case Ity_I16: {
4083 IRTemp res32 = newTemp(Ity_I32);
4084 IRTemp resHi = newTemp(Ity_I16);
4085 IRTemp resLo = newTemp(Ity_I16);
4086 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16;
4087 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
4088 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp );
4089 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
4090 assign( resHi, unop(Iop_32HIto16,mkexpr(res32)));
4091 assign( resLo, unop(Iop_32to16,mkexpr(res32)));
4092 putIRegRDX(2, mkexpr(resHi));
4093 putIRegRAX(2, mkexpr(resLo));
4094 break;
4096 case Ity_I8: {
4097 IRTemp res16 = newTemp(Ity_I16);
4098 IRTemp resHi = newTemp(Ity_I8);
4099 IRTemp resLo = newTemp(Ity_I8);
4100 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8;
4101 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
4102 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp );
4103 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
4104 assign( resHi, unop(Iop_16HIto8,mkexpr(res16)));
4105 assign( resLo, unop(Iop_16to8,mkexpr(res16)));
4106 putIRegRAX(2, mkexpr(res16));
4107 break;
4109 default:
4110 ppIRType(ty);
4111 vpanic("codegen_mulL_A_D(amd64)");
4113 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt);
4117 /* Group 3 extended opcodes. We have to decide here whether F2 and F3
4118 might be valid.*/
4119 static
4120 ULong dis_Grp3 ( const VexAbiInfo* vbi,
4121 Prefix pfx, Int sz, Long delta, Bool* decode_OK )
4123 Long d64;
4124 UChar modrm;
4125 HChar dis_buf[50];
4126 Int len;
4127 IRTemp addr;
4128 IRType ty = szToITy(sz);
4129 IRTemp t1 = newTemp(ty);
4130 IRTemp dst1, src, dst0;
4131 *decode_OK = True;
4132 modrm = getUChar(delta);
4133 if (epartIsReg(modrm)) {
4134 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */
4135 if (haveF2orF3(pfx)) goto unhandled;
4136 switch (gregLO3ofRM(modrm)) {
4137 case 0: { /* TEST */
4138 delta++;
4139 d64 = getSDisp(imin(4,sz), delta);
4140 delta += imin(4,sz);
4141 dst1 = newTemp(ty);
4142 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
4143 getIRegE(sz,pfx,modrm),
4144 mkU(ty, d64 & mkSizeMask(sz))));
4145 setFlags_DEP1( Iop_And8, dst1, ty );
4146 DIP("test%c $%lld, %s\n",
4147 nameISize(sz), d64,
4148 nameIRegE(sz, pfx, modrm));
4149 break;
4151 case 1:
4152 *decode_OK = False;
4153 return delta;
4154 case 2: /* NOT */
4155 delta++;
4156 putIRegE(sz, pfx, modrm,
4157 unop(mkSizedOp(ty,Iop_Not8),
4158 getIRegE(sz, pfx, modrm)));
4159 DIP("not%c %s\n", nameISize(sz),
4160 nameIRegE(sz, pfx, modrm));
4161 break;
4162 case 3: /* NEG */
4163 delta++;
4164 dst0 = newTemp(ty);
4165 src = newTemp(ty);
4166 dst1 = newTemp(ty);
4167 assign(dst0, mkU(ty,0));
4168 assign(src, getIRegE(sz, pfx, modrm));
4169 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
4170 mkexpr(src)));
4171 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
4172 putIRegE(sz, pfx, modrm, mkexpr(dst1));
4173 DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm));
4174 break;
4175 case 4: /* MUL (unsigned widening) */
4176 delta++;
4177 src = newTemp(ty);
4178 assign(src, getIRegE(sz,pfx,modrm));
4179 codegen_mulL_A_D ( sz, False, src,
4180 nameIRegE(sz,pfx,modrm) );
4181 break;
4182 case 5: /* IMUL (signed widening) */
4183 delta++;
4184 src = newTemp(ty);
4185 assign(src, getIRegE(sz,pfx,modrm));
4186 codegen_mulL_A_D ( sz, True, src,
4187 nameIRegE(sz,pfx,modrm) );
4188 break;
4189 case 6: /* DIV */
4190 delta++;
4191 assign( t1, getIRegE(sz, pfx, modrm) );
4192 codegen_div ( sz, t1, False );
4193 DIP("div%c %s\n", nameISize(sz),
4194 nameIRegE(sz, pfx, modrm));
4195 break;
4196 case 7: /* IDIV */
4197 delta++;
4198 assign( t1, getIRegE(sz, pfx, modrm) );
4199 codegen_div ( sz, t1, True );
4200 DIP("idiv%c %s\n", nameISize(sz),
4201 nameIRegE(sz, pfx, modrm));
4202 break;
4203 default:
4204 /*NOTREACHED*/
4205 vpanic("Grp3(amd64,R)");
4207 } else {
4208 /* Decide if F2/XACQ or F3/XREL might be valid. */
4209 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
4210 if ((gregLO3ofRM(modrm) == 3/*NEG*/ || gregLO3ofRM(modrm) == 2/*NOT*/)
4211 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
4212 validF2orF3 = True;
4214 if (!validF2orF3) goto unhandled;
4215 /* */
4216 addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
4217 /* we have to inform disAMode of any immediate
4218 bytes used */
4219 gregLO3ofRM(modrm)==0/*TEST*/
4220 ? imin(4,sz)
4223 t1 = newTemp(ty);
4224 delta += len;
4225 assign(t1, loadLE(ty,mkexpr(addr)));
4226 switch (gregLO3ofRM(modrm)) {
4227 case 0: { /* TEST */
4228 d64 = getSDisp(imin(4,sz), delta);
4229 delta += imin(4,sz);
4230 dst1 = newTemp(ty);
4231 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
4232 mkexpr(t1),
4233 mkU(ty, d64 & mkSizeMask(sz))));
4234 setFlags_DEP1( Iop_And8, dst1, ty );
4235 DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf);
4236 break;
4238 case 1:
4239 *decode_OK = False;
4240 return delta;
4241 case 2: /* NOT */
4242 dst1 = newTemp(ty);
4243 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
4244 if (haveLOCK(pfx)) {
4245 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
4246 guest_RIP_curr_instr );
4247 } else {
4248 storeLE( mkexpr(addr), mkexpr(dst1) );
4250 DIP("not%c %s\n", nameISize(sz), dis_buf);
4251 break;
4252 case 3: /* NEG */
4253 dst0 = newTemp(ty);
4254 src = newTemp(ty);
4255 dst1 = newTemp(ty);
4256 assign(dst0, mkU(ty,0));
4257 assign(src, mkexpr(t1));
4258 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
4259 mkexpr(src)));
4260 if (haveLOCK(pfx)) {
4261 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
4262 guest_RIP_curr_instr );
4263 } else {
4264 storeLE( mkexpr(addr), mkexpr(dst1) );
4266 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
4267 DIP("neg%c %s\n", nameISize(sz), dis_buf);
4268 break;
4269 case 4: /* MUL (unsigned widening) */
4270 codegen_mulL_A_D ( sz, False, t1, dis_buf );
4271 break;
4272 case 5: /* IMUL */
4273 codegen_mulL_A_D ( sz, True, t1, dis_buf );
4274 break;
4275 case 6: /* DIV */
4276 codegen_div ( sz, t1, False );
4277 DIP("div%c %s\n", nameISize(sz), dis_buf);
4278 break;
4279 case 7: /* IDIV */
4280 codegen_div ( sz, t1, True );
4281 DIP("idiv%c %s\n", nameISize(sz), dis_buf);
4282 break;
4283 default:
4284 /*NOTREACHED*/
4285 vpanic("Grp3(amd64,M)");
4288 return delta;
4289 unhandled:
4290 *decode_OK = False;
4291 return delta;
4295 /* Group 4 extended opcodes. We have to decide here whether F2 and F3
4296 might be valid. */
4297 static
4298 ULong dis_Grp4 ( const VexAbiInfo* vbi,
4299 Prefix pfx, Long delta, Bool* decode_OK )
4301 Int alen;
4302 UChar modrm;
4303 HChar dis_buf[50];
4304 IRType ty = Ity_I8;
4305 IRTemp t1 = newTemp(ty);
4306 IRTemp t2 = newTemp(ty);
4308 *decode_OK = True;
4310 modrm = getUChar(delta);
4311 if (epartIsReg(modrm)) {
4312 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */
4313 if (haveF2orF3(pfx)) goto unhandled;
4314 assign(t1, getIRegE(1, pfx, modrm));
4315 switch (gregLO3ofRM(modrm)) {
4316 case 0: /* INC */
4317 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
4318 putIRegE(1, pfx, modrm, mkexpr(t2));
4319 setFlags_INC_DEC( True, t2, ty );
4320 break;
4321 case 1: /* DEC */
4322 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
4323 putIRegE(1, pfx, modrm, mkexpr(t2));
4324 setFlags_INC_DEC( False, t2, ty );
4325 break;
4326 default:
4327 *decode_OK = False;
4328 return delta;
4330 delta++;
4331 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)),
4332 nameIRegE(1, pfx, modrm));
4333 } else {
4334 /* Decide if F2/XACQ or F3/XREL might be valid. */
4335 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
4336 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/)
4337 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
4338 validF2orF3 = True;
4340 if (!validF2orF3) goto unhandled;
4341 /* */
4342 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
4343 assign( t1, loadLE(ty, mkexpr(addr)) );
4344 switch (gregLO3ofRM(modrm)) {
4345 case 0: /* INC */
4346 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
4347 if (haveLOCK(pfx)) {
4348 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
4349 guest_RIP_curr_instr );
4350 } else {
4351 storeLE( mkexpr(addr), mkexpr(t2) );
4353 setFlags_INC_DEC( True, t2, ty );
4354 break;
4355 case 1: /* DEC */
4356 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
4357 if (haveLOCK(pfx)) {
4358 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
4359 guest_RIP_curr_instr );
4360 } else {
4361 storeLE( mkexpr(addr), mkexpr(t2) );
4363 setFlags_INC_DEC( False, t2, ty );
4364 break;
4365 default:
4366 *decode_OK = False;
4367 return delta;
4369 delta += alen;
4370 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf);
4372 return delta;
4373 unhandled:
4374 *decode_OK = False;
4375 return delta;
4379 /* Group 5 extended opcodes. We have to decide here whether F2 and F3
4380 might be valid. */
4381 static
4382 ULong dis_Grp5 ( const VexAbiInfo* vbi,
4383 Prefix pfx, Int sz, Long delta,
4384 /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK )
4386 Int len;
4387 UChar modrm;
4388 HChar dis_buf[50];
4389 IRTemp addr = IRTemp_INVALID;
4390 IRType ty = szToITy(sz);
4391 IRTemp t1 = newTemp(ty);
4392 IRTemp t2 = IRTemp_INVALID;
4393 IRTemp t3 = IRTemp_INVALID;
4394 Bool showSz = True;
4396 *decode_OK = True;
4398 modrm = getUChar(delta);
4399 if (epartIsReg(modrm)) {
4400 /* F2/XACQ and F3/XREL are always invalid in the non-mem case.
4401 F2/CALL and F2/JMP may have bnd prefix. */
4402 if (haveF2orF3(pfx)
4403 && ! (haveF2(pfx)
4404 && (gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4)))
4405 goto unhandledR;
4406 assign(t1, getIRegE(sz,pfx,modrm));
4407 switch (gregLO3ofRM(modrm)) {
4408 case 0: /* INC */
4409 t2 = newTemp(ty);
4410 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
4411 mkexpr(t1), mkU(ty,1)));
4412 setFlags_INC_DEC( True, t2, ty );
4413 putIRegE(sz,pfx,modrm, mkexpr(t2));
4414 break;
4415 case 1: /* DEC */
4416 t2 = newTemp(ty);
4417 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
4418 mkexpr(t1), mkU(ty,1)));
4419 setFlags_INC_DEC( False, t2, ty );
4420 putIRegE(sz,pfx,modrm, mkexpr(t2));
4421 break;
4422 case 2: /* call Ev */
4423 /* Ignore any sz value and operate as if sz==8. */
4424 if (!(sz == 4 || sz == 8)) goto unhandledR;
4425 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
4426 sz = 8;
4427 t3 = newTemp(Ity_I64);
4428 assign(t3, getIRegE(sz,pfx,modrm));
4429 t2 = newTemp(Ity_I64);
4430 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
4431 putIReg64(R_RSP, mkexpr(t2));
4432 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1));
4433 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)");
4434 jmp_treg(dres, Ijk_Call, t3);
4435 vassert(dres->whatNext == Dis_StopHere);
4436 showSz = False;
4437 break;
4438 case 4: /* jmp Ev */
4439 /* Ignore any sz value and operate as if sz==8. */
4440 if (!(sz == 4 || sz == 8)) goto unhandledR;
4441 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
4442 sz = 8;
4443 t3 = newTemp(Ity_I64);
4444 assign(t3, getIRegE(sz,pfx,modrm));
4445 jmp_treg(dres, Ijk_Boring, t3);
4446 vassert(dres->whatNext == Dis_StopHere);
4447 showSz = False;
4448 break;
4449 case 6: /* PUSH Ev */
4450 /* There is no encoding for 32-bit operand size; hence ... */
4451 if (sz == 4) sz = 8;
4452 if (sz == 8 || sz == 2) {
4453 ty = szToITy(sz); /* redo it, since sz might have changed */
4454 t3 = newTemp(ty);
4455 assign(t3, getIRegE(sz,pfx,modrm));
4456 t2 = newTemp(Ity_I64);
4457 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
4458 putIReg64(R_RSP, mkexpr(t2) );
4459 storeLE( mkexpr(t2), mkexpr(t3) );
4460 break;
4461 } else {
4462 goto unhandledR; /* awaiting test case */
4464 default:
4465 unhandledR:
4466 *decode_OK = False;
4467 return delta;
4469 delta++;
4470 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
4471 showSz ? nameISize(sz) : ' ',
4472 nameIRegE(sz, pfx, modrm));
4473 } else {
4474 /* Decide if F2/XACQ, F3/XREL, F2/CALL or F2/JMP might be valid. */
4475 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
4476 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/)
4477 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
4478 validF2orF3 = True;
4479 } else if ((gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4)
4480 && (haveF2(pfx) && !haveF3(pfx))) {
4481 validF2orF3 = True;
4483 if (!validF2orF3) goto unhandledM;
4484 /* */
4485 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
4486 if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4
4487 && gregLO3ofRM(modrm) != 6) {
4488 assign(t1, loadLE(ty,mkexpr(addr)));
4490 switch (gregLO3ofRM(modrm)) {
4491 case 0: /* INC */
4492 t2 = newTemp(ty);
4493 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
4494 mkexpr(t1), mkU(ty,1)));
4495 if (haveLOCK(pfx)) {
4496 casLE( mkexpr(addr),
4497 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
4498 } else {
4499 storeLE(mkexpr(addr),mkexpr(t2));
4501 setFlags_INC_DEC( True, t2, ty );
4502 break;
4503 case 1: /* DEC */
4504 t2 = newTemp(ty);
4505 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
4506 mkexpr(t1), mkU(ty,1)));
4507 if (haveLOCK(pfx)) {
4508 casLE( mkexpr(addr),
4509 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
4510 } else {
4511 storeLE(mkexpr(addr),mkexpr(t2));
4513 setFlags_INC_DEC( False, t2, ty );
4514 break;
4515 case 2: /* call Ev */
4516 /* Ignore any sz value and operate as if sz==8. */
4517 if (!(sz == 4 || sz == 8)) goto unhandledM;
4518 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
4519 sz = 8;
4520 t3 = newTemp(Ity_I64);
4521 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
4522 t2 = newTemp(Ity_I64);
4523 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
4524 putIReg64(R_RSP, mkexpr(t2));
4525 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len));
4526 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)");
4527 jmp_treg(dres, Ijk_Call, t3);
4528 vassert(dres->whatNext == Dis_StopHere);
4529 showSz = False;
4530 break;
4531 case 4: /* JMP Ev */
4532 /* Ignore any sz value and operate as if sz==8. */
4533 if (!(sz == 4 || sz == 8)) goto unhandledM;
4534 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
4535 sz = 8;
4536 t3 = newTemp(Ity_I64);
4537 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
4538 jmp_treg(dres, Ijk_Boring, t3);
4539 vassert(dres->whatNext == Dis_StopHere);
4540 showSz = False;
4541 break;
4542 case 6: /* PUSH Ev */
4543 /* There is no encoding for 32-bit operand size; hence ... */
4544 if (sz == 4) sz = 8;
4545 if (sz == 8 || sz == 2) {
4546 ty = szToITy(sz); /* redo it, since sz might have changed */
4547 t3 = newTemp(ty);
4548 assign(t3, loadLE(ty,mkexpr(addr)));
4549 t2 = newTemp(Ity_I64);
4550 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
4551 putIReg64(R_RSP, mkexpr(t2) );
4552 storeLE( mkexpr(t2), mkexpr(t3) );
4553 break;
4554 } else {
4555 goto unhandledM; /* awaiting test case */
4557 default:
4558 unhandledM:
4559 *decode_OK = False;
4560 return delta;
4562 delta += len;
4563 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
4564 showSz ? nameISize(sz) : ' ',
4565 dis_buf);
4567 return delta;
4571 /*------------------------------------------------------------*/
4572 /*--- Disassembling string ops (including REP prefixes) ---*/
4573 /*------------------------------------------------------------*/
4575 /* Code shared by all the string ops */
4576 static
4577 void dis_string_op_increment ( Int sz, IRTemp t_inc )
4579 UChar logSz;
4580 if (sz == 8 || sz == 4 || sz == 2) {
4581 logSz = 1;
4582 if (sz == 4) logSz = 2;
4583 if (sz == 8) logSz = 3;
4584 assign( t_inc,
4585 binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ),
4586 mkU8(logSz) ) );
4587 } else {
4588 assign( t_inc,
4589 IRExpr_Get( OFFB_DFLAG, Ity_I64 ) );
4593 static
4594 void dis_string_op( void (*dis_OP)( Int, IRTemp, Prefix pfx ),
4595 Int sz, const HChar* name, Prefix pfx )
4597 IRTemp t_inc = newTemp(Ity_I64);
4598 /* Really we ought to inspect the override prefixes, but we don't.
4599 The following assertion catches any resulting sillyness. */
4600 vassert(pfx == clearSegBits(pfx));
4601 dis_string_op_increment(sz, t_inc);
4602 dis_OP( sz, t_inc, pfx );
4603 DIP("%s%c\n", name, nameISize(sz));
4606 static
4607 void dis_MOVS ( Int sz, IRTemp t_inc, Prefix pfx )
4609 IRType ty = szToITy(sz);
4610 IRTemp td = newTemp(Ity_I64); /* RDI */
4611 IRTemp ts = newTemp(Ity_I64); /* RSI */
4612 IRExpr *incd, *incs;
4614 if (haveASO(pfx)) {
4615 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4616 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4617 } else {
4618 assign( td, getIReg64(R_RDI) );
4619 assign( ts, getIReg64(R_RSI) );
4622 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) );
4624 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4625 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4626 if (haveASO(pfx)) {
4627 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4628 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4630 putIReg64( R_RDI, incd );
4631 putIReg64( R_RSI, incs );
4634 static
4635 void dis_LODS ( Int sz, IRTemp t_inc, Prefix pfx )
4637 IRType ty = szToITy(sz);
4638 IRTemp ts = newTemp(Ity_I64); /* RSI */
4639 IRExpr *incs;
4641 if (haveASO(pfx))
4642 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4643 else
4644 assign( ts, getIReg64(R_RSI) );
4646 putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) );
4648 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4649 if (haveASO(pfx))
4650 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4651 putIReg64( R_RSI, incs );
4654 static
4655 void dis_STOS ( Int sz, IRTemp t_inc, Prefix pfx )
4657 IRType ty = szToITy(sz);
4658 IRTemp ta = newTemp(ty); /* rAX */
4659 IRTemp td = newTemp(Ity_I64); /* RDI */
4660 IRExpr *incd;
4662 assign( ta, getIRegRAX(sz) );
4664 if (haveASO(pfx))
4665 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4666 else
4667 assign( td, getIReg64(R_RDI) );
4669 storeLE( mkexpr(td), mkexpr(ta) );
4671 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4672 if (haveASO(pfx))
4673 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4674 putIReg64( R_RDI, incd );
4677 static
4678 void dis_CMPS ( Int sz, IRTemp t_inc, Prefix pfx )
4680 IRType ty = szToITy(sz);
4681 IRTemp tdv = newTemp(ty); /* (RDI) */
4682 IRTemp tsv = newTemp(ty); /* (RSI) */
4683 IRTemp td = newTemp(Ity_I64); /* RDI */
4684 IRTemp ts = newTemp(Ity_I64); /* RSI */
4685 IRExpr *incd, *incs;
4687 if (haveASO(pfx)) {
4688 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4689 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4690 } else {
4691 assign( td, getIReg64(R_RDI) );
4692 assign( ts, getIReg64(R_RSI) );
4695 assign( tdv, loadLE(ty,mkexpr(td)) );
4697 assign( tsv, loadLE(ty,mkexpr(ts)) );
4699 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty );
4701 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4702 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4703 if (haveASO(pfx)) {
4704 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4705 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4707 putIReg64( R_RDI, incd );
4708 putIReg64( R_RSI, incs );
4711 static
4712 void dis_SCAS ( Int sz, IRTemp t_inc, Prefix pfx )
4714 IRType ty = szToITy(sz);
4715 IRTemp ta = newTemp(ty); /* rAX */
4716 IRTemp td = newTemp(Ity_I64); /* RDI */
4717 IRTemp tdv = newTemp(ty); /* (RDI) */
4718 IRExpr *incd;
4720 assign( ta, getIRegRAX(sz) );
4722 if (haveASO(pfx))
4723 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4724 else
4725 assign( td, getIReg64(R_RDI) );
4727 assign( tdv, loadLE(ty,mkexpr(td)) );
4729 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty );
4731 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4732 if (haveASO(pfx))
4733 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4734 putIReg64( R_RDI, incd );
4738 /* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume
4739 the insn is the last one in the basic block, and so emit a jump to
4740 the next insn, rather than just falling through. */
4741 static
4742 void dis_REP_op ( /*MOD*/DisResult* dres,
4743 AMD64Condcode cond,
4744 void (*dis_OP)(Int, IRTemp, Prefix),
4745 Int sz, Addr64 rip, Addr64 rip_next, const HChar* name,
4746 Prefix pfx )
4748 IRTemp t_inc = newTemp(Ity_I64);
4749 IRTemp tc;
4750 IRExpr* cmp;
4752 /* Really we ought to inspect the override prefixes, but we don't.
4753 The following assertion catches any resulting sillyness. */
4754 vassert(pfx == clearSegBits(pfx));
4756 if (haveASO(pfx)) {
4757 tc = newTemp(Ity_I32); /* ECX */
4758 assign( tc, getIReg32(R_RCX) );
4759 cmp = binop(Iop_CmpEQ32, mkexpr(tc), mkU32(0));
4760 } else {
4761 tc = newTemp(Ity_I64); /* RCX */
4762 assign( tc, getIReg64(R_RCX) );
4763 cmp = binop(Iop_CmpEQ64, mkexpr(tc), mkU64(0));
4766 stmt( IRStmt_Exit( cmp, Ijk_Boring,
4767 IRConst_U64(rip_next), OFFB_RIP ) );
4769 if (haveASO(pfx))
4770 putIReg32(R_RCX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) );
4771 else
4772 putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) );
4774 dis_string_op_increment(sz, t_inc);
4775 dis_OP (sz, t_inc, pfx);
4777 if (cond == AMD64CondAlways) {
4778 jmp_lit(dres, Ijk_Boring, rip);
4779 vassert(dres->whatNext == Dis_StopHere);
4780 } else {
4781 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond),
4782 Ijk_Boring,
4783 IRConst_U64(rip),
4784 OFFB_RIP ) );
4785 jmp_lit(dres, Ijk_Boring, rip_next);
4786 vassert(dres->whatNext == Dis_StopHere);
4788 DIP("%s%c\n", name, nameISize(sz));
4792 /*------------------------------------------------------------*/
4793 /*--- Arithmetic, etc. ---*/
4794 /*------------------------------------------------------------*/
4796 /* IMUL E, G. Supplied eip points to the modR/M byte. */
4797 static
4798 ULong dis_mul_E_G ( const VexAbiInfo* vbi,
4799 Prefix pfx,
4800 Int size,
4801 Long delta0 )
4803 Int alen;
4804 HChar dis_buf[50];
4805 UChar rm = getUChar(delta0);
4806 IRType ty = szToITy(size);
4807 IRTemp te = newTemp(ty);
4808 IRTemp tg = newTemp(ty);
4809 IRTemp resLo = newTemp(ty);
4811 assign( tg, getIRegG(size, pfx, rm) );
4812 if (epartIsReg(rm)) {
4813 assign( te, getIRegE(size, pfx, rm) );
4814 } else {
4815 IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 );
4816 assign( te, loadLE(ty,mkexpr(addr)) );
4819 setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB );
4821 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) );
4823 putIRegG(size, pfx, rm, mkexpr(resLo) );
4825 if (epartIsReg(rm)) {
4826 DIP("imul%c %s, %s\n", nameISize(size),
4827 nameIRegE(size,pfx,rm),
4828 nameIRegG(size,pfx,rm));
4829 return 1+delta0;
4830 } else {
4831 DIP("imul%c %s, %s\n", nameISize(size),
4832 dis_buf,
4833 nameIRegG(size,pfx,rm));
4834 return alen+delta0;
4839 /* IMUL I * E -> G. Supplied rip points to the modR/M byte. */
4840 static
4841 ULong dis_imul_I_E_G ( const VexAbiInfo* vbi,
4842 Prefix pfx,
4843 Int size,
4844 Long delta,
4845 Int litsize )
4847 Long d64;
4848 Int alen;
4849 HChar dis_buf[50];
4850 UChar rm = getUChar(delta);
4851 IRType ty = szToITy(size);
4852 IRTemp te = newTemp(ty);
4853 IRTemp tl = newTemp(ty);
4854 IRTemp resLo = newTemp(ty);
4856 vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8);
4858 if (epartIsReg(rm)) {
4859 assign(te, getIRegE(size, pfx, rm));
4860 delta++;
4861 } else {
4862 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
4863 imin(4,litsize) );
4864 assign(te, loadLE(ty, mkexpr(addr)));
4865 delta += alen;
4867 d64 = getSDisp(imin(4,litsize),delta);
4868 delta += imin(4,litsize);
4870 d64 &= mkSizeMask(size);
4871 assign(tl, mkU(ty,d64));
4873 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) ));
4875 setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB );
4877 putIRegG(size, pfx, rm, mkexpr(resLo));
4879 DIP("imul%c $%lld, %s, %s\n",
4880 nameISize(size), d64,
4881 ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ),
4882 nameIRegG(size,pfx,rm) );
4883 return delta;
4887 /* Generate an IR sequence to do a popcount operation on the supplied
4888 IRTemp, and return a new IRTemp holding the result. 'ty' may be
4889 Ity_I16, Ity_I32 or Ity_I64 only. */
4890 static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src )
4892 Int i;
4893 if (ty == Ity_I16) {
4894 IRTemp old = IRTemp_INVALID;
4895 IRTemp nyu = IRTemp_INVALID;
4896 IRTemp mask[4], shift[4];
4897 for (i = 0; i < 4; i++) {
4898 mask[i] = newTemp(ty);
4899 shift[i] = 1 << i;
4901 assign(mask[0], mkU16(0x5555));
4902 assign(mask[1], mkU16(0x3333));
4903 assign(mask[2], mkU16(0x0F0F));
4904 assign(mask[3], mkU16(0x00FF));
4905 old = src;
4906 for (i = 0; i < 4; i++) {
4907 nyu = newTemp(ty);
4908 assign(nyu,
4909 binop(Iop_Add16,
4910 binop(Iop_And16,
4911 mkexpr(old),
4912 mkexpr(mask[i])),
4913 binop(Iop_And16,
4914 binop(Iop_Shr16, mkexpr(old), mkU8(shift[i])),
4915 mkexpr(mask[i]))));
4916 old = nyu;
4918 return nyu;
4920 if (ty == Ity_I32) {
4921 IRTemp old = IRTemp_INVALID;
4922 IRTemp nyu = IRTemp_INVALID;
4923 IRTemp mask[5], shift[5];
4924 for (i = 0; i < 5; i++) {
4925 mask[i] = newTemp(ty);
4926 shift[i] = 1 << i;
4928 assign(mask[0], mkU32(0x55555555));
4929 assign(mask[1], mkU32(0x33333333));
4930 assign(mask[2], mkU32(0x0F0F0F0F));
4931 assign(mask[3], mkU32(0x00FF00FF));
4932 assign(mask[4], mkU32(0x0000FFFF));
4933 old = src;
4934 for (i = 0; i < 5; i++) {
4935 nyu = newTemp(ty);
4936 assign(nyu,
4937 binop(Iop_Add32,
4938 binop(Iop_And32,
4939 mkexpr(old),
4940 mkexpr(mask[i])),
4941 binop(Iop_And32,
4942 binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
4943 mkexpr(mask[i]))));
4944 old = nyu;
4946 return nyu;
4948 if (ty == Ity_I64) {
4949 IRTemp old = IRTemp_INVALID;
4950 IRTemp nyu = IRTemp_INVALID;
4951 IRTemp mask[6], shift[6];
4952 for (i = 0; i < 6; i++) {
4953 mask[i] = newTemp(ty);
4954 shift[i] = 1 << i;
4956 assign(mask[0], mkU64(0x5555555555555555ULL));
4957 assign(mask[1], mkU64(0x3333333333333333ULL));
4958 assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL));
4959 assign(mask[3], mkU64(0x00FF00FF00FF00FFULL));
4960 assign(mask[4], mkU64(0x0000FFFF0000FFFFULL));
4961 assign(mask[5], mkU64(0x00000000FFFFFFFFULL));
4962 old = src;
4963 for (i = 0; i < 6; i++) {
4964 nyu = newTemp(ty);
4965 assign(nyu,
4966 binop(Iop_Add64,
4967 binop(Iop_And64,
4968 mkexpr(old),
4969 mkexpr(mask[i])),
4970 binop(Iop_And64,
4971 binop(Iop_Shr64, mkexpr(old), mkU8(shift[i])),
4972 mkexpr(mask[i]))));
4973 old = nyu;
4975 return nyu;
4977 /*NOTREACHED*/
4978 vassert(0);
4982 /* Generate an IR sequence to do a count-leading-zeroes operation on
4983 the supplied IRTemp, and return a new IRTemp holding the result.
4984 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
4985 the argument is zero, return the number of bits in the word (the
4986 natural semantics). */
4987 static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
4989 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16);
4991 IRTemp src64 = newTemp(Ity_I64);
4992 assign(src64, widenUto64( mkexpr(src) ));
4994 IRTemp src64x = newTemp(Ity_I64);
4995 assign(src64x,
4996 binop(Iop_Shl64, mkexpr(src64),
4997 mkU8(64 - 8 * sizeofIRType(ty))));
4999 // Clz64 has undefined semantics when its input is zero, so
5000 // special-case around that.
5001 IRTemp res64 = newTemp(Ity_I64);
5002 assign(res64,
5003 IRExpr_ITE(
5004 binop(Iop_CmpEQ64, mkexpr(src64x), mkU64(0)),
5005 mkU64(8 * sizeofIRType(ty)),
5006 unop(Iop_Clz64, mkexpr(src64x))
5009 IRTemp res = newTemp(ty);
5010 assign(res, narrowTo(ty, mkexpr(res64)));
5011 return res;
5015 /* Generate an IR sequence to do a count-trailing-zeroes operation on
5016 the supplied IRTemp, and return a new IRTemp holding the result.
5017 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
5018 the argument is zero, return the number of bits in the word (the
5019 natural semantics). */
5020 static IRTemp gen_TZCNT ( IRType ty, IRTemp src )
5022 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16);
5024 IRTemp src64 = newTemp(Ity_I64);
5025 assign(src64, widenUto64( mkexpr(src) ));
5027 // Ctz64 has undefined semantics when its input is zero, so
5028 // special-case around that.
5029 IRTemp res64 = newTemp(Ity_I64);
5030 assign(res64,
5031 IRExpr_ITE(
5032 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0)),
5033 mkU64(8 * sizeofIRType(ty)),
5034 unop(Iop_Ctz64, mkexpr(src64))
5037 IRTemp res = newTemp(ty);
5038 assign(res, narrowTo(ty, mkexpr(res64)));
5039 return res;
5043 /*------------------------------------------------------------*/
5044 /*--- ---*/
5045 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/
5046 /*--- ---*/
5047 /*------------------------------------------------------------*/
5049 /* --- Helper functions for dealing with the register stack. --- */
5051 /* --- Set the emulation-warning pseudo-register. --- */
5053 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ )
5055 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
5056 stmt( IRStmt_Put( OFFB_EMNOTE, e ) );
5059 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
5061 static IRExpr* mkQNaN64 ( void )
5063 /* QNaN is 0 2047 1 0(51times)
5064 == 0b 11111111111b 1 0(51times)
5065 == 0x7FF8 0000 0000 0000
5067 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL));
5070 /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */
5072 static IRExpr* get_ftop ( void )
5074 return IRExpr_Get( OFFB_FTOP, Ity_I32 );
5077 static void put_ftop ( IRExpr* e )
5079 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
5080 stmt( IRStmt_Put( OFFB_FTOP, e ) );
5083 /* --------- Get/put the C3210 bits. --------- */
5085 static IRExpr* /* :: Ity_I64 */ get_C3210 ( void )
5087 return IRExpr_Get( OFFB_FC3210, Ity_I64 );
5090 static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ )
5092 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
5093 stmt( IRStmt_Put( OFFB_FC3210, e ) );
5096 /* --------- Get/put the FPU rounding mode. --------- */
5097 static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
5099 return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 ));
5102 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e )
5104 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
5105 stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) );
5109 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
5110 /* Produces a value in 0 .. 3, which is encoded as per the type
5111 IRRoundingMode. Since the guest_FPROUND value is also encoded as
5112 per IRRoundingMode, we merely need to get it and mask it for
5113 safety.
5115 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
5117 return binop( Iop_And32, get_fpround(), mkU32(3) );
5120 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
5122 return mkU32(Irrm_NEAREST);
5126 /* --------- Get/set FP register tag bytes. --------- */
5128 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
5130 static void put_ST_TAG ( Int i, IRExpr* value )
5132 IRRegArray* descr;
5133 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8);
5134 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
5135 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
5138 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
5139 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
5141 static IRExpr* get_ST_TAG ( Int i )
5143 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
5144 return IRExpr_GetI( descr, get_ftop(), i );
5148 /* --------- Get/set FP registers. --------- */
5150 /* Given i, and some expression e, emit 'ST(i) = e' and set the
5151 register's tag to indicate the register is full. The previous
5152 state of the register is not checked. */
5154 static void put_ST_UNCHECKED ( Int i, IRExpr* value )
5156 IRRegArray* descr;
5157 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64);
5158 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
5159 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
5160 /* Mark the register as in-use. */
5161 put_ST_TAG(i, mkU8(1));
5164 /* Given i, and some expression e, emit
5165 ST(i) = is_full(i) ? NaN : e
5166 and set the tag accordingly.
5169 static void put_ST ( Int i, IRExpr* value )
5171 put_ST_UNCHECKED(
5173 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)),
5174 /* non-0 means full */
5175 mkQNaN64(),
5176 /* 0 means empty */
5177 value
5183 /* Given i, generate an expression yielding 'ST(i)'. */
5185 static IRExpr* get_ST_UNCHECKED ( Int i )
5187 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
5188 return IRExpr_GetI( descr, get_ftop(), i );
5192 /* Given i, generate an expression yielding
5193 is_full(i) ? ST(i) : NaN
5196 static IRExpr* get_ST ( Int i )
5198 return
5199 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)),
5200 /* non-0 means full */
5201 get_ST_UNCHECKED(i),
5202 /* 0 means empty */
5203 mkQNaN64());
5207 /* Given i, and some expression e, and a condition cond, generate IR
5208 which has the same effect as put_ST(i,e) when cond is true and has
5209 no effect when cond is false. Given the lack of proper
5210 if-then-else in the IR, this is pretty tricky.
5213 static void maybe_put_ST ( IRTemp cond, Int i, IRExpr* value )
5215 // new_tag = if cond then FULL else old_tag
5216 // new_val = if cond then (if old_tag==FULL then NaN else val)
5217 // else old_val
5219 IRTemp old_tag = newTemp(Ity_I8);
5220 assign(old_tag, get_ST_TAG(i));
5221 IRTemp new_tag = newTemp(Ity_I8);
5222 assign(new_tag,
5223 IRExpr_ITE(mkexpr(cond), mkU8(1)/*FULL*/, mkexpr(old_tag)));
5225 IRTemp old_val = newTemp(Ity_F64);
5226 assign(old_val, get_ST_UNCHECKED(i));
5227 IRTemp new_val = newTemp(Ity_F64);
5228 assign(new_val,
5229 IRExpr_ITE(mkexpr(cond),
5230 IRExpr_ITE(binop(Iop_CmpNE8, mkexpr(old_tag), mkU8(0)),
5231 /* non-0 means full */
5232 mkQNaN64(),
5233 /* 0 means empty */
5234 value),
5235 mkexpr(old_val)));
5237 put_ST_UNCHECKED(i, mkexpr(new_val));
5238 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So
5239 // now set it to new_tag instead.
5240 put_ST_TAG(i, mkexpr(new_tag));
5243 /* Adjust FTOP downwards by one register. */
5245 static void fp_push ( void )
5247 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) );
5250 /* Adjust FTOP downwards by one register when COND is 1:I1. Else
5251 don't change it. */
5253 static void maybe_fp_push ( IRTemp cond )
5255 put_ftop( binop(Iop_Sub32, get_ftop(), unop(Iop_1Uto32,mkexpr(cond))) );
5258 /* Adjust FTOP upwards by one register, and mark the vacated register
5259 as empty. */
5261 static void fp_pop ( void )
5263 put_ST_TAG(0, mkU8(0));
5264 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
5267 /* Set the C2 bit of the FPU status register to e[0]. Assumes that
5268 e[31:1] == 0.
5270 static void set_C2 ( IRExpr* e )
5272 IRExpr* cleared = binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2));
5273 put_C3210( binop(Iop_Or64,
5274 cleared,
5275 binop(Iop_Shl64, e, mkU8(AMD64G_FC_SHIFT_C2))) );
5278 /* Generate code to check that abs(d64) < 2^63 and is finite. This is
5279 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The
5280 test is simple, but the derivation of it is not so simple.
5282 The exponent field for an IEEE754 double is 11 bits. That means it
5283 can take values 0 through 0x7FF. If the exponent has value 0x7FF,
5284 the number is either a NaN or an Infinity and so is not finite.
5285 Furthermore, a finite value of exactly 2^63 is the smallest value
5286 that has exponent value 0x43E. Hence, what we need to do is
5287 extract the exponent, ignoring the sign bit and mantissa, and check
5288 it is < 0x43E, or <= 0x43D.
5290 To make this easily applicable to 32- and 64-bit targets, a
5291 roundabout approach is used. First the number is converted to I64,
5292 then the top 32 bits are taken. Shifting them right by 20 bits
5293 places the sign bit and exponent in the bottom 12 bits. Anding
5294 with 0x7FF gets rid of the sign bit, leaving just the exponent
5295 available for comparison.
5297 static IRTemp math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64 )
5299 IRTemp i64 = newTemp(Ity_I64);
5300 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(d64)) );
5301 IRTemp exponent = newTemp(Ity_I32);
5302 assign(exponent,
5303 binop(Iop_And32,
5304 binop(Iop_Shr32, unop(Iop_64HIto32, mkexpr(i64)), mkU8(20)),
5305 mkU32(0x7FF)));
5306 IRTemp in_range_and_finite = newTemp(Ity_I1);
5307 assign(in_range_and_finite,
5308 binop(Iop_CmpLE32U, mkexpr(exponent), mkU32(0x43D)));
5309 return in_range_and_finite;
5312 /* Invent a plausible-looking FPU status word value:
5313 ((ftop & 7) << 11) | (c3210 & 0x4700)
5315 static IRExpr* get_FPU_sw ( void )
5317 return
5318 unop(Iop_32to16,
5319 binop(Iop_Or32,
5320 binop(Iop_Shl32,
5321 binop(Iop_And32, get_ftop(), mkU32(7)),
5322 mkU8(11)),
5323 binop(Iop_And32, unop(Iop_64to32, get_C3210()),
5324 mkU32(0x4700))
5329 /* Generate a dirty helper call that initialises the x87 state a la
5330 FINIT. If |guard| is NULL, it is done unconditionally. Otherwise
5331 |guard| is used as a guarding condition.
5333 static void gen_FINIT_SEQUENCE ( IRExpr* guard )
5335 /* Uses dirty helper:
5336 void amd64g_do_FINIT ( VexGuestAMD64State* ) */
5337 IRDirty* d = unsafeIRDirty_0_N (
5338 0/*regparms*/,
5339 "amd64g_dirtyhelper_FINIT",
5340 &amd64g_dirtyhelper_FINIT,
5341 mkIRExprVec_1( IRExpr_GSPTR() )
5344 /* declare we're writing guest state */
5345 d->nFxState = 5;
5346 vex_bzero(&d->fxState, sizeof(d->fxState));
5348 d->fxState[0].fx = Ifx_Write;
5349 d->fxState[0].offset = OFFB_FTOP;
5350 d->fxState[0].size = sizeof(UInt);
5352 d->fxState[1].fx = Ifx_Write;
5353 d->fxState[1].offset = OFFB_FPREGS;
5354 d->fxState[1].size = 8 * sizeof(ULong);
5356 d->fxState[2].fx = Ifx_Write;
5357 d->fxState[2].offset = OFFB_FPTAGS;
5358 d->fxState[2].size = 8 * sizeof(UChar);
5360 d->fxState[3].fx = Ifx_Write;
5361 d->fxState[3].offset = OFFB_FPROUND;
5362 d->fxState[3].size = sizeof(ULong);
5364 d->fxState[4].fx = Ifx_Write;
5365 d->fxState[4].offset = OFFB_FC3210;
5366 d->fxState[4].size = sizeof(ULong);
5368 if (guard)
5369 d->guard = guard;
5371 stmt( IRStmt_Dirty(d) );
5375 /* ------------------------------------------------------- */
5376 /* Given all that stack-mangling junk, we can now go ahead
5377 and describe FP instructions.
5380 /* ST(0) = ST(0) `op` mem64/32(addr)
5381 Need to check ST(0)'s tag on read, but not on write.
5383 static
5384 void fp_do_op_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf,
5385 IROp op, Bool dbl )
5387 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
5388 if (dbl) {
5389 put_ST_UNCHECKED(0,
5390 triop( op,
5391 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5392 get_ST(0),
5393 loadLE(Ity_F64,mkexpr(addr))
5395 } else {
5396 put_ST_UNCHECKED(0,
5397 triop( op,
5398 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5399 get_ST(0),
5400 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr)))
5406 /* ST(0) = mem64/32(addr) `op` ST(0)
5407 Need to check ST(0)'s tag on read, but not on write.
5409 static
5410 void fp_do_oprev_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf,
5411 IROp op, Bool dbl )
5413 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
5414 if (dbl) {
5415 put_ST_UNCHECKED(0,
5416 triop( op,
5417 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5418 loadLE(Ity_F64,mkexpr(addr)),
5419 get_ST(0)
5421 } else {
5422 put_ST_UNCHECKED(0,
5423 triop( op,
5424 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5425 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))),
5426 get_ST(0)
5432 /* ST(dst) = ST(dst) `op` ST(src).
5433 Check dst and src tags when reading but not on write.
5435 static
5436 void fp_do_op_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
5437 Bool pop_after )
5439 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
5440 put_ST_UNCHECKED(
5441 st_dst,
5442 triop( op,
5443 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5444 get_ST(st_dst),
5445 get_ST(st_src) )
5447 if (pop_after)
5448 fp_pop();
5451 /* ST(dst) = ST(src) `op` ST(dst).
5452 Check dst and src tags when reading but not on write.
5454 static
5455 void fp_do_oprev_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
5456 Bool pop_after )
5458 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
5459 put_ST_UNCHECKED(
5460 st_dst,
5461 triop( op,
5462 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5463 get_ST(st_src),
5464 get_ST(st_dst) )
5466 if (pop_after)
5467 fp_pop();
5470 /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */
5471 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after )
5473 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i);
5474 /* This is a bit of a hack (and isn't really right). It sets
5475 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
5476 documentation implies A and S are unchanged.
5478 /* It's also fishy in that it is used both for COMIP and
5479 UCOMIP, and they aren't the same (although similar). */
5480 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
5481 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
5482 stmt( IRStmt_Put(
5483 OFFB_CC_DEP1,
5484 binop( Iop_And64,
5485 unop( Iop_32Uto64,
5486 binop(Iop_CmpF64, get_ST(0), get_ST(i))),
5487 mkU64(0x45)
5488 )));
5489 if (pop_after)
5490 fp_pop();
5494 /* returns
5495 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 )
5497 static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 )
5499 IRTemp t32 = newTemp(Ity_I32);
5500 assign( t32, e32 );
5501 return
5502 IRExpr_ITE(
5503 binop(Iop_CmpLT64U,
5504 unop(Iop_32Uto64,
5505 binop(Iop_Add32, mkexpr(t32), mkU32(32768))),
5506 mkU64(65536)),
5507 unop(Iop_32to16, mkexpr(t32)),
5508 mkU16( 0x8000 ) );
5512 static
5513 ULong dis_FPU ( /*OUT*/Bool* decode_ok,
5514 const VexAbiInfo* vbi, Prefix pfx, Long delta )
5516 Int len;
5517 UInt r_src, r_dst;
5518 HChar dis_buf[50];
5519 IRTemp t1, t2;
5521 /* On entry, delta points at the second byte of the insn (the modrm
5522 byte).*/
5523 UChar first_opcode = getUChar(delta-1);
5524 UChar modrm = getUChar(delta+0);
5526 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
5528 if (first_opcode == 0xD8) {
5529 if (modrm < 0xC0) {
5531 /* bits 5,4,3 are an opcode extension, and the modRM also
5532 specifies an address. */
5533 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
5534 delta += len;
5536 switch (gregLO3ofRM(modrm)) {
5538 case 0: /* FADD single-real */
5539 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False );
5540 break;
5542 case 1: /* FMUL single-real */
5543 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False );
5544 break;
5546 case 2: /* FCOM single-real */
5547 DIP("fcoms %s\n", dis_buf);
5548 /* This forces C1 to zero, which isn't right. */
5549 /* The AMD documentation suggests that forcing C1 to
5550 zero is correct (Eliot Moss) */
5551 put_C3210(
5552 unop( Iop_32Uto64,
5553 binop( Iop_And32,
5554 binop(Iop_Shl32,
5555 binop(Iop_CmpF64,
5556 get_ST(0),
5557 unop(Iop_F32toF64,
5558 loadLE(Ity_F32,mkexpr(addr)))),
5559 mkU8(8)),
5560 mkU32(0x4500)
5561 )));
5562 break;
5564 case 3: /* FCOMP single-real */
5565 /* The AMD documentation suggests that forcing C1 to
5566 zero is correct (Eliot Moss) */
5567 DIP("fcomps %s\n", dis_buf);
5568 /* This forces C1 to zero, which isn't right. */
5569 put_C3210(
5570 unop( Iop_32Uto64,
5571 binop( Iop_And32,
5572 binop(Iop_Shl32,
5573 binop(Iop_CmpF64,
5574 get_ST(0),
5575 unop(Iop_F32toF64,
5576 loadLE(Ity_F32,mkexpr(addr)))),
5577 mkU8(8)),
5578 mkU32(0x4500)
5579 )));
5580 fp_pop();
5581 break;
5583 case 4: /* FSUB single-real */
5584 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False );
5585 break;
5587 case 5: /* FSUBR single-real */
5588 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False );
5589 break;
5591 case 6: /* FDIV single-real */
5592 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False );
5593 break;
5595 case 7: /* FDIVR single-real */
5596 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False );
5597 break;
5599 default:
5600 vex_printf("unhandled opc_aux = 0x%2x\n",
5601 (UInt)gregLO3ofRM(modrm));
5602 vex_printf("first_opcode == 0xD8\n");
5603 goto decode_fail;
5605 } else {
5606 delta++;
5607 switch (modrm) {
5609 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
5610 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False );
5611 break;
5613 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
5614 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False );
5615 break;
5617 /* Dunno if this is right */
5618 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
5619 r_dst = (UInt)modrm - 0xD0;
5620 DIP("fcom %%st(0),%%st(%u)\n", r_dst);
5621 /* This forces C1 to zero, which isn't right. */
5622 put_C3210(
5623 unop(Iop_32Uto64,
5624 binop( Iop_And32,
5625 binop(Iop_Shl32,
5626 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5627 mkU8(8)),
5628 mkU32(0x4500)
5629 )));
5630 break;
5632 /* Dunno if this is right */
5633 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
5634 r_dst = (UInt)modrm - 0xD8;
5635 DIP("fcomp %%st(0),%%st(%u)\n", r_dst);
5636 /* This forces C1 to zero, which isn't right. */
5637 put_C3210(
5638 unop(Iop_32Uto64,
5639 binop( Iop_And32,
5640 binop(Iop_Shl32,
5641 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5642 mkU8(8)),
5643 mkU32(0x4500)
5644 )));
5645 fp_pop();
5646 break;
5648 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
5649 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False );
5650 break;
5652 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
5653 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False );
5654 break;
5656 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
5657 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False );
5658 break;
5660 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
5661 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False );
5662 break;
5664 default:
5665 goto decode_fail;
5670 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
5671 else
5672 if (first_opcode == 0xD9) {
5673 if (modrm < 0xC0) {
5675 /* bits 5,4,3 are an opcode extension, and the modRM also
5676 specifies an address. */
5677 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
5678 delta += len;
5680 switch (gregLO3ofRM(modrm)) {
5682 case 0: /* FLD single-real */
5683 DIP("flds %s\n", dis_buf);
5684 fp_push();
5685 put_ST(0, unop(Iop_F32toF64,
5686 loadLE(Ity_F32, mkexpr(addr))));
5687 break;
5689 case 2: /* FST single-real */
5690 DIP("fsts %s\n", dis_buf);
5691 storeLE(mkexpr(addr),
5692 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
5693 break;
5695 case 3: /* FSTP single-real */
5696 DIP("fstps %s\n", dis_buf);
5697 storeLE(mkexpr(addr),
5698 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
5699 fp_pop();
5700 break;
5702 case 4: { /* FLDENV m28 */
5703 /* Uses dirty helper:
5704 VexEmNote amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */
5705 IRTemp ew = newTemp(Ity_I32);
5706 IRTemp w64 = newTemp(Ity_I64);
5707 IRDirty* d = unsafeIRDirty_0_N (
5708 0/*regparms*/,
5709 "amd64g_dirtyhelper_FLDENV",
5710 &amd64g_dirtyhelper_FLDENV,
5711 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
5713 d->tmp = w64;
5714 /* declare we're reading memory */
5715 d->mFx = Ifx_Read;
5716 d->mAddr = mkexpr(addr);
5717 d->mSize = 28;
5719 /* declare we're writing guest state */
5720 d->nFxState = 4;
5721 vex_bzero(&d->fxState, sizeof(d->fxState));
5723 d->fxState[0].fx = Ifx_Write;
5724 d->fxState[0].offset = OFFB_FTOP;
5725 d->fxState[0].size = sizeof(UInt);
5727 d->fxState[1].fx = Ifx_Write;
5728 d->fxState[1].offset = OFFB_FPTAGS;
5729 d->fxState[1].size = 8 * sizeof(UChar);
5731 d->fxState[2].fx = Ifx_Write;
5732 d->fxState[2].offset = OFFB_FPROUND;
5733 d->fxState[2].size = sizeof(ULong);
5735 d->fxState[3].fx = Ifx_Write;
5736 d->fxState[3].offset = OFFB_FC3210;
5737 d->fxState[3].size = sizeof(ULong);
5739 stmt( IRStmt_Dirty(d) );
5741 /* ew contains any emulation warning we may need to
5742 issue. If needed, side-exit to the next insn,
5743 reporting the warning, so that Valgrind's dispatcher
5744 sees the warning. */
5745 assign(ew, unop(Iop_64to32,mkexpr(w64)) );
5746 put_emwarn( mkexpr(ew) );
5747 stmt(
5748 IRStmt_Exit(
5749 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5750 Ijk_EmWarn,
5751 IRConst_U64( guest_RIP_bbstart+delta ),
5752 OFFB_RIP
5756 DIP("fldenv %s\n", dis_buf);
5757 break;
5760 case 5: {/* FLDCW */
5761 /* The only thing we observe in the control word is the
5762 rounding mode. Therefore, pass the 16-bit value
5763 (x87 native-format control word) to a clean helper,
5764 getting back a 64-bit value, the lower half of which
5765 is the FPROUND value to store, and the upper half of
5766 which is the emulation-warning token which may be
5767 generated.
5769 /* ULong amd64h_check_fldcw ( ULong ); */
5770 IRTemp t64 = newTemp(Ity_I64);
5771 IRTemp ew = newTemp(Ity_I32);
5772 DIP("fldcw %s\n", dis_buf);
5773 assign( t64, mkIRExprCCall(
5774 Ity_I64, 0/*regparms*/,
5775 "amd64g_check_fldcw",
5776 &amd64g_check_fldcw,
5777 mkIRExprVec_1(
5778 unop( Iop_16Uto64,
5779 loadLE(Ity_I16, mkexpr(addr)))
5784 put_fpround( unop(Iop_64to32, mkexpr(t64)) );
5785 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
5786 put_emwarn( mkexpr(ew) );
5787 /* Finally, if an emulation warning was reported,
5788 side-exit to the next insn, reporting the warning,
5789 so that Valgrind's dispatcher sees the warning. */
5790 stmt(
5791 IRStmt_Exit(
5792 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5793 Ijk_EmWarn,
5794 IRConst_U64( guest_RIP_bbstart+delta ),
5795 OFFB_RIP
5798 break;
5801 case 6: { /* FNSTENV m28 */
5802 /* Uses dirty helper:
5803 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */
5804 IRDirty* d = unsafeIRDirty_0_N (
5805 0/*regparms*/,
5806 "amd64g_dirtyhelper_FSTENV",
5807 &amd64g_dirtyhelper_FSTENV,
5808 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
5810 /* declare we're writing memory */
5811 d->mFx = Ifx_Write;
5812 d->mAddr = mkexpr(addr);
5813 d->mSize = 28;
5815 /* declare we're reading guest state */
5816 d->nFxState = 4;
5817 vex_bzero(&d->fxState, sizeof(d->fxState));
5819 d->fxState[0].fx = Ifx_Read;
5820 d->fxState[0].offset = OFFB_FTOP;
5821 d->fxState[0].size = sizeof(UInt);
5823 d->fxState[1].fx = Ifx_Read;
5824 d->fxState[1].offset = OFFB_FPTAGS;
5825 d->fxState[1].size = 8 * sizeof(UChar);
5827 d->fxState[2].fx = Ifx_Read;
5828 d->fxState[2].offset = OFFB_FPROUND;
5829 d->fxState[2].size = sizeof(ULong);
5831 d->fxState[3].fx = Ifx_Read;
5832 d->fxState[3].offset = OFFB_FC3210;
5833 d->fxState[3].size = sizeof(ULong);
5835 stmt( IRStmt_Dirty(d) );
5837 DIP("fnstenv %s\n", dis_buf);
5838 break;
5841 case 7: /* FNSTCW */
5842 /* Fake up a native x87 FPU control word. The only
5843 thing it depends on is FPROUND[1:0], so call a clean
5844 helper to cook it up. */
5845 /* ULong amd64g_create_fpucw ( ULong fpround ) */
5846 DIP("fnstcw %s\n", dis_buf);
5847 storeLE(
5848 mkexpr(addr),
5849 unop( Iop_64to16,
5850 mkIRExprCCall(
5851 Ity_I64, 0/*regp*/,
5852 "amd64g_create_fpucw", &amd64g_create_fpucw,
5853 mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) )
5857 break;
5859 default:
5860 vex_printf("unhandled opc_aux = 0x%2x\n",
5861 (UInt)gregLO3ofRM(modrm));
5862 vex_printf("first_opcode == 0xD9\n");
5863 goto decode_fail;
5866 } else {
5867 delta++;
5868 switch (modrm) {
5870 case 0xC0 ... 0xC7: /* FLD %st(?) */
5871 r_src = (UInt)modrm - 0xC0;
5872 DIP("fld %%st(%u)\n", r_src);
5873 t1 = newTemp(Ity_F64);
5874 assign(t1, get_ST(r_src));
5875 fp_push();
5876 put_ST(0, mkexpr(t1));
5877 break;
5879 case 0xC8 ... 0xCF: /* FXCH %st(?) */
5880 r_src = (UInt)modrm - 0xC8;
5881 DIP("fxch %%st(%u)\n", r_src);
5882 t1 = newTemp(Ity_F64);
5883 t2 = newTemp(Ity_F64);
5884 assign(t1, get_ST(0));
5885 assign(t2, get_ST(r_src));
5886 put_ST_UNCHECKED(0, mkexpr(t2));
5887 put_ST_UNCHECKED(r_src, mkexpr(t1));
5888 break;
5890 case 0xE0: /* FCHS */
5891 DIP("fchs\n");
5892 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0)));
5893 break;
5895 case 0xE1: /* FABS */
5896 DIP("fabs\n");
5897 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0)));
5898 break;
5900 case 0xE5: { /* FXAM */
5901 /* This is an interesting one. It examines %st(0),
5902 regardless of whether the tag says it's empty or not.
5903 Here, just pass both the tag (in our format) and the
5904 value (as a double, actually a ULong) to a helper
5905 function. */
5906 IRExpr** args
5907 = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)),
5908 unop(Iop_ReinterpF64asI64,
5909 get_ST_UNCHECKED(0)) );
5910 put_C3210(mkIRExprCCall(
5911 Ity_I64,
5912 0/*regparm*/,
5913 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM,
5914 args
5916 DIP("fxam\n");
5917 break;
5920 case 0xE8: /* FLD1 */
5921 DIP("fld1\n");
5922 fp_push();
5923 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
5924 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL)));
5925 break;
5927 case 0xE9: /* FLDL2T */
5928 DIP("fldl2t\n");
5929 fp_push();
5930 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
5931 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL)));
5932 break;
5934 case 0xEA: /* FLDL2E */
5935 DIP("fldl2e\n");
5936 fp_push();
5937 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
5938 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL)));
5939 break;
5941 case 0xEB: /* FLDPI */
5942 DIP("fldpi\n");
5943 fp_push();
5944 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
5945 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL)));
5946 break;
5948 case 0xEC: /* FLDLG2 */
5949 DIP("fldlg2\n");
5950 fp_push();
5951 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
5952 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL)));
5953 break;
5955 case 0xED: /* FLDLN2 */
5956 DIP("fldln2\n");
5957 fp_push();
5958 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
5959 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL)));
5960 break;
5962 case 0xEE: /* FLDZ */
5963 DIP("fldz\n");
5964 fp_push();
5965 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
5966 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL)));
5967 break;
5969 case 0xF0: /* F2XM1 */
5970 DIP("f2xm1\n");
5971 put_ST_UNCHECKED(0,
5972 binop(Iop_2xm1F64,
5973 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5974 get_ST(0)));
5975 break;
5977 case 0xF1: /* FYL2X */
5978 DIP("fyl2x\n");
5979 put_ST_UNCHECKED(1,
5980 triop(Iop_Yl2xF64,
5981 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5982 get_ST(1),
5983 get_ST(0)));
5984 fp_pop();
5985 break;
5987 case 0xF2: { /* FPTAN */
5988 DIP("fptan\n");
5989 IRTemp argD = newTemp(Ity_F64);
5990 assign(argD, get_ST(0));
5991 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
5992 IRTemp resD = newTemp(Ity_F64);
5993 assign(resD,
5994 IRExpr_ITE(
5995 mkexpr(argOK),
5996 binop(Iop_TanF64,
5997 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5998 mkexpr(argD)),
5999 mkexpr(argD))
6001 put_ST_UNCHECKED(0, mkexpr(resD));
6002 /* Conditionally push 1.0 on the stack, if the arg is
6003 in range */
6004 maybe_fp_push(argOK);
6005 maybe_put_ST(argOK, 0,
6006 IRExpr_Const(IRConst_F64(1.0)));
6007 set_C2( binop(Iop_Xor64,
6008 unop(Iop_1Uto64, mkexpr(argOK)),
6009 mkU64(1)) );
6010 break;
6013 case 0xF3: /* FPATAN */
6014 DIP("fpatan\n");
6015 put_ST_UNCHECKED(1,
6016 triop(Iop_AtanF64,
6017 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6018 get_ST(1),
6019 get_ST(0)));
6020 fp_pop();
6021 break;
6023 case 0xF4: { /* FXTRACT */
6024 IRTemp argF = newTemp(Ity_F64);
6025 IRTemp sigF = newTemp(Ity_F64);
6026 IRTemp expF = newTemp(Ity_F64);
6027 IRTemp argI = newTemp(Ity_I64);
6028 IRTemp sigI = newTemp(Ity_I64);
6029 IRTemp expI = newTemp(Ity_I64);
6030 DIP("fxtract\n");
6031 assign( argF, get_ST(0) );
6032 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF)));
6033 assign( sigI,
6034 mkIRExprCCall(
6035 Ity_I64, 0/*regparms*/,
6036 "x86amd64g_calculate_FXTRACT",
6037 &x86amd64g_calculate_FXTRACT,
6038 mkIRExprVec_2( mkexpr(argI),
6039 mkIRExpr_HWord(0)/*sig*/ ))
6041 assign( expI,
6042 mkIRExprCCall(
6043 Ity_I64, 0/*regparms*/,
6044 "x86amd64g_calculate_FXTRACT",
6045 &x86amd64g_calculate_FXTRACT,
6046 mkIRExprVec_2( mkexpr(argI),
6047 mkIRExpr_HWord(1)/*exp*/ ))
6049 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) );
6050 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) );
6051 /* exponent */
6052 put_ST_UNCHECKED(0, mkexpr(expF) );
6053 fp_push();
6054 /* significand */
6055 put_ST(0, mkexpr(sigF) );
6056 break;
6059 case 0xF5: { /* FPREM1 -- IEEE compliant */
6060 IRTemp a1 = newTemp(Ity_F64);
6061 IRTemp a2 = newTemp(Ity_F64);
6062 DIP("fprem1\n");
6063 /* Do FPREM1 twice, once to get the remainder, and once
6064 to get the C3210 flag values. */
6065 assign( a1, get_ST(0) );
6066 assign( a2, get_ST(1) );
6067 put_ST_UNCHECKED(0,
6068 triop(Iop_PRem1F64,
6069 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6070 mkexpr(a1),
6071 mkexpr(a2)));
6072 put_C3210(
6073 unop(Iop_32Uto64,
6074 triop(Iop_PRem1C3210F64,
6075 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6076 mkexpr(a1),
6077 mkexpr(a2)) ));
6078 break;
6081 case 0xF7: /* FINCSTP */
6082 DIP("fincstp\n");
6083 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
6084 break;
6086 case 0xF8: { /* FPREM -- not IEEE compliant */
6087 IRTemp a1 = newTemp(Ity_F64);
6088 IRTemp a2 = newTemp(Ity_F64);
6089 DIP("fprem\n");
6090 /* Do FPREM twice, once to get the remainder, and once
6091 to get the C3210 flag values. */
6092 assign( a1, get_ST(0) );
6093 assign( a2, get_ST(1) );
6094 put_ST_UNCHECKED(0,
6095 triop(Iop_PRemF64,
6096 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6097 mkexpr(a1),
6098 mkexpr(a2)));
6099 put_C3210(
6100 unop(Iop_32Uto64,
6101 triop(Iop_PRemC3210F64,
6102 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6103 mkexpr(a1),
6104 mkexpr(a2)) ));
6105 break;
6108 case 0xF9: /* FYL2XP1 */
6109 DIP("fyl2xp1\n");
6110 put_ST_UNCHECKED(1,
6111 triop(Iop_Yl2xp1F64,
6112 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6113 get_ST(1),
6114 get_ST(0)));
6115 fp_pop();
6116 break;
6118 case 0xFA: /* FSQRT */
6119 DIP("fsqrt\n");
6120 put_ST_UNCHECKED(0,
6121 binop(Iop_SqrtF64,
6122 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6123 get_ST(0)));
6124 break;
6126 case 0xFB: { /* FSINCOS */
6127 DIP("fsincos\n");
6128 IRTemp argD = newTemp(Ity_F64);
6129 assign(argD, get_ST(0));
6130 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
6131 IRTemp resD = newTemp(Ity_F64);
6132 assign(resD,
6133 IRExpr_ITE(
6134 mkexpr(argOK),
6135 binop(Iop_SinF64,
6136 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6137 mkexpr(argD)),
6138 mkexpr(argD))
6140 put_ST_UNCHECKED(0, mkexpr(resD));
6141 /* Conditionally push the cos value on the stack, if
6142 the arg is in range */
6143 maybe_fp_push(argOK);
6144 maybe_put_ST(argOK, 0,
6145 binop(Iop_CosF64,
6146 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6147 mkexpr(argD)));
6148 set_C2( binop(Iop_Xor64,
6149 unop(Iop_1Uto64, mkexpr(argOK)),
6150 mkU64(1)) );
6151 break;
6154 case 0xFC: /* FRNDINT */
6155 DIP("frndint\n");
6156 put_ST_UNCHECKED(0,
6157 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) );
6158 break;
6160 case 0xFD: /* FSCALE */
6161 DIP("fscale\n");
6162 put_ST_UNCHECKED(0,
6163 triop(Iop_ScaleF64,
6164 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6165 get_ST(0),
6166 get_ST(1)));
6167 break;
6169 case 0xFE: /* FSIN */
6170 case 0xFF: { /* FCOS */
6171 Bool isSIN = modrm == 0xFE;
6172 DIP("%s\n", isSIN ? "fsin" : "fcos");
6173 IRTemp argD = newTemp(Ity_F64);
6174 assign(argD, get_ST(0));
6175 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
6176 IRTemp resD = newTemp(Ity_F64);
6177 assign(resD,
6178 IRExpr_ITE(
6179 mkexpr(argOK),
6180 binop(isSIN ? Iop_SinF64 : Iop_CosF64,
6181 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6182 mkexpr(argD)),
6183 mkexpr(argD))
6185 put_ST_UNCHECKED(0, mkexpr(resD));
6186 set_C2( binop(Iop_Xor64,
6187 unop(Iop_1Uto64, mkexpr(argOK)),
6188 mkU64(1)) );
6189 break;
6192 default:
6193 goto decode_fail;
6198 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
6199 else
6200 if (first_opcode == 0xDA) {
6202 if (modrm < 0xC0) {
6204 /* bits 5,4,3 are an opcode extension, and the modRM also
6205 specifies an address. */
6206 IROp fop;
6207 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6208 delta += len;
6209 switch (gregLO3ofRM(modrm)) {
6211 case 0: /* FIADD m32int */ /* ST(0) += m32int */
6212 DIP("fiaddl %s\n", dis_buf);
6213 fop = Iop_AddF64;
6214 goto do_fop_m32;
6216 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
6217 DIP("fimull %s\n", dis_buf);
6218 fop = Iop_MulF64;
6219 goto do_fop_m32;
6221 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
6222 DIP("fisubl %s\n", dis_buf);
6223 fop = Iop_SubF64;
6224 goto do_fop_m32;
6226 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
6227 DIP("fisubrl %s\n", dis_buf);
6228 fop = Iop_SubF64;
6229 goto do_foprev_m32;
6231 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
6232 DIP("fisubl %s\n", dis_buf);
6233 fop = Iop_DivF64;
6234 goto do_fop_m32;
6236 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
6237 DIP("fidivrl %s\n", dis_buf);
6238 fop = Iop_DivF64;
6239 goto do_foprev_m32;
6241 do_fop_m32:
6242 put_ST_UNCHECKED(0,
6243 triop(fop,
6244 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6245 get_ST(0),
6246 unop(Iop_I32StoF64,
6247 loadLE(Ity_I32, mkexpr(addr)))));
6248 break;
6250 do_foprev_m32:
6251 put_ST_UNCHECKED(0,
6252 triop(fop,
6253 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6254 unop(Iop_I32StoF64,
6255 loadLE(Ity_I32, mkexpr(addr))),
6256 get_ST(0)));
6257 break;
6259 default:
6260 vex_printf("unhandled opc_aux = 0x%2x\n",
6261 (UInt)gregLO3ofRM(modrm));
6262 vex_printf("first_opcode == 0xDA\n");
6263 goto decode_fail;
6266 } else {
6268 delta++;
6269 switch (modrm) {
6271 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
6272 r_src = (UInt)modrm - 0xC0;
6273 DIP("fcmovb %%st(%u), %%st(0)\n", r_src);
6274 put_ST_UNCHECKED(0,
6275 IRExpr_ITE(
6276 mk_amd64g_calculate_condition(AMD64CondB),
6277 get_ST(r_src), get_ST(0)) );
6278 break;
6280 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
6281 r_src = (UInt)modrm - 0xC8;
6282 DIP("fcmovz %%st(%u), %%st(0)\n", r_src);
6283 put_ST_UNCHECKED(0,
6284 IRExpr_ITE(
6285 mk_amd64g_calculate_condition(AMD64CondZ),
6286 get_ST(r_src), get_ST(0)) );
6287 break;
6289 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
6290 r_src = (UInt)modrm - 0xD0;
6291 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src);
6292 put_ST_UNCHECKED(0,
6293 IRExpr_ITE(
6294 mk_amd64g_calculate_condition(AMD64CondBE),
6295 get_ST(r_src), get_ST(0)) );
6296 break;
6298 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
6299 r_src = (UInt)modrm - 0xD8;
6300 DIP("fcmovu %%st(%u), %%st(0)\n", r_src);
6301 put_ST_UNCHECKED(0,
6302 IRExpr_ITE(
6303 mk_amd64g_calculate_condition(AMD64CondP),
6304 get_ST(r_src), get_ST(0)) );
6305 break;
6307 case 0xE9: /* FUCOMPP %st(0),%st(1) */
6308 DIP("fucompp %%st(0),%%st(1)\n");
6309 /* This forces C1 to zero, which isn't right. */
6310 put_C3210(
6311 unop(Iop_32Uto64,
6312 binop( Iop_And32,
6313 binop(Iop_Shl32,
6314 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
6315 mkU8(8)),
6316 mkU32(0x4500)
6317 )));
6318 fp_pop();
6319 fp_pop();
6320 break;
6322 default:
6323 goto decode_fail;
6329 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
6330 else
6331 if (first_opcode == 0xDB) {
6332 if (modrm < 0xC0) {
6334 /* bits 5,4,3 are an opcode extension, and the modRM also
6335 specifies an address. */
6336 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6337 delta += len;
6339 switch (gregLO3ofRM(modrm)) {
6341 case 0: /* FILD m32int */
6342 DIP("fildl %s\n", dis_buf);
6343 fp_push();
6344 put_ST(0, unop(Iop_I32StoF64,
6345 loadLE(Ity_I32, mkexpr(addr))));
6346 break;
6348 case 1: /* FISTTPL m32 (SSE3) */
6349 DIP("fisttpl %s\n", dis_buf);
6350 storeLE( mkexpr(addr),
6351 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) );
6352 fp_pop();
6353 break;
6355 case 2: /* FIST m32 */
6356 DIP("fistl %s\n", dis_buf);
6357 storeLE( mkexpr(addr),
6358 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
6359 break;
6361 case 3: /* FISTP m32 */
6362 DIP("fistpl %s\n", dis_buf);
6363 storeLE( mkexpr(addr),
6364 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
6365 fp_pop();
6366 break;
6368 case 5: { /* FLD extended-real */
6369 /* Uses dirty helper:
6370 ULong amd64g_loadF80le ( ULong )
6371 addr holds the address. First, do a dirty call to
6372 get hold of the data. */
6373 IRTemp val = newTemp(Ity_I64);
6374 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) );
6376 IRDirty* d = unsafeIRDirty_1_N (
6377 val,
6378 0/*regparms*/,
6379 "amd64g_dirtyhelper_loadF80le",
6380 &amd64g_dirtyhelper_loadF80le,
6381 args
6383 /* declare that we're reading memory */
6384 d->mFx = Ifx_Read;
6385 d->mAddr = mkexpr(addr);
6386 d->mSize = 10;
6388 /* execute the dirty call, dumping the result in val. */
6389 stmt( IRStmt_Dirty(d) );
6390 fp_push();
6391 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val)));
6393 DIP("fldt %s\n", dis_buf);
6394 break;
6397 case 7: { /* FSTP extended-real */
6398 /* Uses dirty helper:
6399 void amd64g_storeF80le ( ULong addr, ULong data )
6401 IRExpr** args
6402 = mkIRExprVec_2( mkexpr(addr),
6403 unop(Iop_ReinterpF64asI64, get_ST(0)) );
6405 IRDirty* d = unsafeIRDirty_0_N (
6406 0/*regparms*/,
6407 "amd64g_dirtyhelper_storeF80le",
6408 &amd64g_dirtyhelper_storeF80le,
6409 args
6411 /* declare we're writing memory */
6412 d->mFx = Ifx_Write;
6413 d->mAddr = mkexpr(addr);
6414 d->mSize = 10;
6416 /* execute the dirty call. */
6417 stmt( IRStmt_Dirty(d) );
6418 fp_pop();
6420 DIP("fstpt\n %s", dis_buf);
6421 break;
6424 default:
6425 vex_printf("unhandled opc_aux = 0x%2x\n",
6426 (UInt)gregLO3ofRM(modrm));
6427 vex_printf("first_opcode == 0xDB\n");
6428 goto decode_fail;
6431 } else {
6433 delta++;
6434 switch (modrm) {
6436 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
6437 r_src = (UInt)modrm - 0xC0;
6438 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src);
6439 put_ST_UNCHECKED(0,
6440 IRExpr_ITE(
6441 mk_amd64g_calculate_condition(AMD64CondNB),
6442 get_ST(r_src), get_ST(0)) );
6443 break;
6445 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
6446 r_src = (UInt)modrm - 0xC8;
6447 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src);
6448 put_ST_UNCHECKED(
6450 IRExpr_ITE(
6451 mk_amd64g_calculate_condition(AMD64CondNZ),
6452 get_ST(r_src),
6453 get_ST(0)
6456 break;
6458 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
6459 r_src = (UInt)modrm - 0xD0;
6460 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src);
6461 put_ST_UNCHECKED(
6463 IRExpr_ITE(
6464 mk_amd64g_calculate_condition(AMD64CondNBE),
6465 get_ST(r_src),
6466 get_ST(0)
6469 break;
6471 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
6472 r_src = (UInt)modrm - 0xD8;
6473 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src);
6474 put_ST_UNCHECKED(
6476 IRExpr_ITE(
6477 mk_amd64g_calculate_condition(AMD64CondNP),
6478 get_ST(r_src),
6479 get_ST(0)
6482 break;
6484 case 0xE2:
6485 DIP("fnclex\n");
6486 break;
6488 case 0xE3: {
6489 gen_FINIT_SEQUENCE(NULL/*no guarding condition*/);
6490 DIP("fninit\n");
6491 break;
6494 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
6495 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False );
6496 break;
6498 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
6499 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False );
6500 break;
6502 default:
6503 goto decode_fail;
6508 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
6509 else
6510 if (first_opcode == 0xDC) {
6511 if (modrm < 0xC0) {
6513 /* bits 5,4,3 are an opcode extension, and the modRM also
6514 specifies an address. */
6515 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6516 delta += len;
6518 switch (gregLO3ofRM(modrm)) {
6520 case 0: /* FADD double-real */
6521 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True );
6522 break;
6524 case 1: /* FMUL double-real */
6525 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True );
6526 break;
6528 case 2: /* FCOM double-real */
6529 DIP("fcoml %s\n", dis_buf);
6530 /* This forces C1 to zero, which isn't right. */
6531 put_C3210(
6532 unop(Iop_32Uto64,
6533 binop( Iop_And32,
6534 binop(Iop_Shl32,
6535 binop(Iop_CmpF64,
6536 get_ST(0),
6537 loadLE(Ity_F64,mkexpr(addr))),
6538 mkU8(8)),
6539 mkU32(0x4500)
6540 )));
6541 break;
6543 case 3: /* FCOMP double-real */
6544 DIP("fcompl %s\n", dis_buf);
6545 /* This forces C1 to zero, which isn't right. */
6546 put_C3210(
6547 unop(Iop_32Uto64,
6548 binop( Iop_And32,
6549 binop(Iop_Shl32,
6550 binop(Iop_CmpF64,
6551 get_ST(0),
6552 loadLE(Ity_F64,mkexpr(addr))),
6553 mkU8(8)),
6554 mkU32(0x4500)
6555 )));
6556 fp_pop();
6557 break;
6559 case 4: /* FSUB double-real */
6560 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True );
6561 break;
6563 case 5: /* FSUBR double-real */
6564 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True );
6565 break;
6567 case 6: /* FDIV double-real */
6568 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True );
6569 break;
6571 case 7: /* FDIVR double-real */
6572 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True );
6573 break;
6575 default:
6576 vex_printf("unhandled opc_aux = 0x%2x\n",
6577 (UInt)gregLO3ofRM(modrm));
6578 vex_printf("first_opcode == 0xDC\n");
6579 goto decode_fail;
6582 } else {
6584 delta++;
6585 switch (modrm) {
6587 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
6588 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False );
6589 break;
6591 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
6592 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False );
6593 break;
6595 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
6596 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False );
6597 break;
6599 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
6600 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False );
6601 break;
6603 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
6604 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False );
6605 break;
6607 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
6608 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False );
6609 break;
6611 default:
6612 goto decode_fail;
6618 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
6619 else
6620 if (first_opcode == 0xDD) {
6622 if (modrm < 0xC0) {
6624 /* bits 5,4,3 are an opcode extension, and the modRM also
6625 specifies an address. */
6626 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6627 delta += len;
6629 switch (gregLO3ofRM(modrm)) {
6631 case 0: /* FLD double-real */
6632 DIP("fldl %s\n", dis_buf);
6633 fp_push();
6634 put_ST(0, loadLE(Ity_F64, mkexpr(addr)));
6635 break;
6637 case 1: /* FISTTPQ m64 (SSE3) */
6638 DIP("fistppll %s\n", dis_buf);
6639 storeLE( mkexpr(addr),
6640 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) );
6641 fp_pop();
6642 break;
6644 case 2: /* FST double-real */
6645 DIP("fstl %s\n", dis_buf);
6646 storeLE(mkexpr(addr), get_ST(0));
6647 break;
6649 case 3: /* FSTP double-real */
6650 DIP("fstpl %s\n", dis_buf);
6651 storeLE(mkexpr(addr), get_ST(0));
6652 fp_pop();
6653 break;
6655 case 4: { /* FRSTOR m94/m108 */
6656 IRTemp ew = newTemp(Ity_I32);
6657 IRTemp w64 = newTemp(Ity_I64);
6658 IRDirty* d;
6659 if ( have66(pfx) ) {
6660 /* Uses dirty helper:
6661 VexEmNote amd64g_dirtyhelper_FRSTORS
6662 ( VexGuestAMD64State*, HWord ) */
6663 d = unsafeIRDirty_0_N (
6664 0/*regparms*/,
6665 "amd64g_dirtyhelper_FRSTORS",
6666 &amd64g_dirtyhelper_FRSTORS,
6667 mkIRExprVec_1( mkexpr(addr) )
6669 d->mSize = 94;
6670 } else {
6671 /* Uses dirty helper:
6672 VexEmNote amd64g_dirtyhelper_FRSTOR
6673 ( VexGuestAMD64State*, HWord ) */
6674 d = unsafeIRDirty_0_N (
6675 0/*regparms*/,
6676 "amd64g_dirtyhelper_FRSTOR",
6677 &amd64g_dirtyhelper_FRSTOR,
6678 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
6680 d->mSize = 108;
6683 d->tmp = w64;
6684 /* declare we're reading memory */
6685 d->mFx = Ifx_Read;
6686 d->mAddr = mkexpr(addr);
6687 /* d->mSize set above */
6689 /* declare we're writing guest state */
6690 d->nFxState = 5;
6691 vex_bzero(&d->fxState, sizeof(d->fxState));
6693 d->fxState[0].fx = Ifx_Write;
6694 d->fxState[0].offset = OFFB_FTOP;
6695 d->fxState[0].size = sizeof(UInt);
6697 d->fxState[1].fx = Ifx_Write;
6698 d->fxState[1].offset = OFFB_FPREGS;
6699 d->fxState[1].size = 8 * sizeof(ULong);
6701 d->fxState[2].fx = Ifx_Write;
6702 d->fxState[2].offset = OFFB_FPTAGS;
6703 d->fxState[2].size = 8 * sizeof(UChar);
6705 d->fxState[3].fx = Ifx_Write;
6706 d->fxState[3].offset = OFFB_FPROUND;
6707 d->fxState[3].size = sizeof(ULong);
6709 d->fxState[4].fx = Ifx_Write;
6710 d->fxState[4].offset = OFFB_FC3210;
6711 d->fxState[4].size = sizeof(ULong);
6713 stmt( IRStmt_Dirty(d) );
6715 /* ew contains any emulation warning we may need to
6716 issue. If needed, side-exit to the next insn,
6717 reporting the warning, so that Valgrind's dispatcher
6718 sees the warning. */
6719 assign(ew, unop(Iop_64to32,mkexpr(w64)) );
6720 put_emwarn( mkexpr(ew) );
6721 stmt(
6722 IRStmt_Exit(
6723 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
6724 Ijk_EmWarn,
6725 IRConst_U64( guest_RIP_bbstart+delta ),
6726 OFFB_RIP
6730 if ( have66(pfx) ) {
6731 DIP("frstors %s\n", dis_buf);
6732 } else {
6733 DIP("frstor %s\n", dis_buf);
6735 break;
6738 case 6: { /* FNSAVE m94/m108 */
6739 IRDirty *d;
6740 if ( have66(pfx) ) {
6741 /* Uses dirty helper:
6742 void amd64g_dirtyhelper_FNSAVES ( VexGuestAMD64State*,
6743 HWord ) */
6744 d = unsafeIRDirty_0_N (
6745 0/*regparms*/,
6746 "amd64g_dirtyhelper_FNSAVES",
6747 &amd64g_dirtyhelper_FNSAVES,
6748 mkIRExprVec_1( mkexpr(addr) )
6750 d->mSize = 94;
6751 } else {
6752 /* Uses dirty helper:
6753 void amd64g_dirtyhelper_FNSAVE ( VexGuestAMD64State*,
6754 HWord ) */
6755 d = unsafeIRDirty_0_N (
6756 0/*regparms*/,
6757 "amd64g_dirtyhelper_FNSAVE",
6758 &amd64g_dirtyhelper_FNSAVE,
6759 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
6761 d->mSize = 108;
6764 /* declare we're writing memory */
6765 d->mFx = Ifx_Write;
6766 d->mAddr = mkexpr(addr);
6767 /* d->mSize set above */
6769 /* declare we're reading guest state */
6770 d->nFxState = 5;
6771 vex_bzero(&d->fxState, sizeof(d->fxState));
6773 d->fxState[0].fx = Ifx_Read;
6774 d->fxState[0].offset = OFFB_FTOP;
6775 d->fxState[0].size = sizeof(UInt);
6777 d->fxState[1].fx = Ifx_Read;
6778 d->fxState[1].offset = OFFB_FPREGS;
6779 d->fxState[1].size = 8 * sizeof(ULong);
6781 d->fxState[2].fx = Ifx_Read;
6782 d->fxState[2].offset = OFFB_FPTAGS;
6783 d->fxState[2].size = 8 * sizeof(UChar);
6785 d->fxState[3].fx = Ifx_Read;
6786 d->fxState[3].offset = OFFB_FPROUND;
6787 d->fxState[3].size = sizeof(ULong);
6789 d->fxState[4].fx = Ifx_Read;
6790 d->fxState[4].offset = OFFB_FC3210;
6791 d->fxState[4].size = sizeof(ULong);
6793 stmt( IRStmt_Dirty(d) );
6795 if ( have66(pfx) ) {
6796 DIP("fnsaves %s\n", dis_buf);
6797 } else {
6798 DIP("fnsave %s\n", dis_buf);
6800 break;
6803 case 7: { /* FNSTSW m16 */
6804 IRExpr* sw = get_FPU_sw();
6805 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16);
6806 storeLE( mkexpr(addr), sw );
6807 DIP("fnstsw %s\n", dis_buf);
6808 break;
6811 default:
6812 vex_printf("unhandled opc_aux = 0x%2x\n",
6813 (UInt)gregLO3ofRM(modrm));
6814 vex_printf("first_opcode == 0xDD\n");
6815 goto decode_fail;
6817 } else {
6818 delta++;
6819 switch (modrm) {
6821 case 0xC0 ... 0xC7: /* FFREE %st(?) */
6822 r_dst = (UInt)modrm - 0xC0;
6823 DIP("ffree %%st(%u)\n", r_dst);
6824 put_ST_TAG ( r_dst, mkU8(0) );
6825 break;
6827 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
6828 r_dst = (UInt)modrm - 0xD0;
6829 DIP("fst %%st(0),%%st(%u)\n", r_dst);
6830 /* P4 manual says: "If the destination operand is a
6831 non-empty register, the invalid-operation exception
6832 is not generated. Hence put_ST_UNCHECKED. */
6833 put_ST_UNCHECKED(r_dst, get_ST(0));
6834 break;
6836 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
6837 r_dst = (UInt)modrm - 0xD8;
6838 DIP("fstp %%st(0),%%st(%u)\n", r_dst);
6839 /* P4 manual says: "If the destination operand is a
6840 non-empty register, the invalid-operation exception
6841 is not generated. Hence put_ST_UNCHECKED. */
6842 put_ST_UNCHECKED(r_dst, get_ST(0));
6843 fp_pop();
6844 break;
6846 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
6847 r_dst = (UInt)modrm - 0xE0;
6848 DIP("fucom %%st(0),%%st(%u)\n", r_dst);
6849 /* This forces C1 to zero, which isn't right. */
6850 put_C3210(
6851 unop(Iop_32Uto64,
6852 binop( Iop_And32,
6853 binop(Iop_Shl32,
6854 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
6855 mkU8(8)),
6856 mkU32(0x4500)
6857 )));
6858 break;
6860 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
6861 r_dst = (UInt)modrm - 0xE8;
6862 DIP("fucomp %%st(0),%%st(%u)\n", r_dst);
6863 /* This forces C1 to zero, which isn't right. */
6864 put_C3210(
6865 unop(Iop_32Uto64,
6866 binop( Iop_And32,
6867 binop(Iop_Shl32,
6868 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
6869 mkU8(8)),
6870 mkU32(0x4500)
6871 )));
6872 fp_pop();
6873 break;
6875 default:
6876 goto decode_fail;
6881 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
6882 else
6883 if (first_opcode == 0xDE) {
6885 if (modrm < 0xC0) {
6887 /* bits 5,4,3 are an opcode extension, and the modRM also
6888 specifies an address. */
6889 IROp fop;
6890 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6891 delta += len;
6893 switch (gregLO3ofRM(modrm)) {
6895 case 0: /* FIADD m16int */ /* ST(0) += m16int */
6896 DIP("fiaddw %s\n", dis_buf);
6897 fop = Iop_AddF64;
6898 goto do_fop_m16;
6900 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
6901 DIP("fimulw %s\n", dis_buf);
6902 fop = Iop_MulF64;
6903 goto do_fop_m16;
6905 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
6906 DIP("fisubw %s\n", dis_buf);
6907 fop = Iop_SubF64;
6908 goto do_fop_m16;
6910 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
6911 DIP("fisubrw %s\n", dis_buf);
6912 fop = Iop_SubF64;
6913 goto do_foprev_m16;
6915 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
6916 DIP("fisubw %s\n", dis_buf);
6917 fop = Iop_DivF64;
6918 goto do_fop_m16;
6920 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
6921 DIP("fidivrw %s\n", dis_buf);
6922 fop = Iop_DivF64;
6923 goto do_foprev_m16;
6925 do_fop_m16:
6926 put_ST_UNCHECKED(0,
6927 triop(fop,
6928 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6929 get_ST(0),
6930 unop(Iop_I32StoF64,
6931 unop(Iop_16Sto32,
6932 loadLE(Ity_I16, mkexpr(addr))))));
6933 break;
6935 do_foprev_m16:
6936 put_ST_UNCHECKED(0,
6937 triop(fop,
6938 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6939 unop(Iop_I32StoF64,
6940 unop(Iop_16Sto32,
6941 loadLE(Ity_I16, mkexpr(addr)))),
6942 get_ST(0)));
6943 break;
6945 default:
6946 vex_printf("unhandled opc_aux = 0x%2x\n",
6947 (UInt)gregLO3ofRM(modrm));
6948 vex_printf("first_opcode == 0xDE\n");
6949 goto decode_fail;
6952 } else {
6954 delta++;
6955 switch (modrm) {
6957 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
6958 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True );
6959 break;
6961 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
6962 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True );
6963 break;
6965 case 0xD9: /* FCOMPP %st(0),%st(1) */
6966 DIP("fcompp %%st(0),%%st(1)\n");
6967 /* This forces C1 to zero, which isn't right. */
6968 put_C3210(
6969 unop(Iop_32Uto64,
6970 binop( Iop_And32,
6971 binop(Iop_Shl32,
6972 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
6973 mkU8(8)),
6974 mkU32(0x4500)
6975 )));
6976 fp_pop();
6977 fp_pop();
6978 break;
6980 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
6981 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True );
6982 break;
6984 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
6985 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True );
6986 break;
6988 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
6989 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True );
6990 break;
6992 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
6993 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True );
6994 break;
6996 default:
6997 goto decode_fail;
7003 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
7004 else
7005 if (first_opcode == 0xDF) {
7007 if (modrm < 0xC0) {
7009 /* bits 5,4,3 are an opcode extension, and the modRM also
7010 specifies an address. */
7011 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7012 delta += len;
7014 switch (gregLO3ofRM(modrm)) {
7016 case 0: /* FILD m16int */
7017 DIP("fildw %s\n", dis_buf);
7018 fp_push();
7019 put_ST(0, unop(Iop_I32StoF64,
7020 unop(Iop_16Sto32,
7021 loadLE(Ity_I16, mkexpr(addr)))));
7022 break;
7024 case 1: /* FISTTPS m16 (SSE3) */
7025 DIP("fisttps %s\n", dis_buf);
7026 storeLE( mkexpr(addr),
7027 x87ishly_qnarrow_32_to_16(
7028 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ));
7029 fp_pop();
7030 break;
7032 case 2: /* FIST m16 */
7033 DIP("fists %s\n", dis_buf);
7034 storeLE( mkexpr(addr),
7035 x87ishly_qnarrow_32_to_16(
7036 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
7037 break;
7039 case 3: /* FISTP m16 */
7040 DIP("fistps %s\n", dis_buf);
7041 storeLE( mkexpr(addr),
7042 x87ishly_qnarrow_32_to_16(
7043 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
7044 fp_pop();
7045 break;
7047 case 5: /* FILD m64 */
7048 DIP("fildll %s\n", dis_buf);
7049 fp_push();
7050 put_ST(0, binop(Iop_I64StoF64,
7051 get_roundingmode(),
7052 loadLE(Ity_I64, mkexpr(addr))));
7053 break;
7055 case 7: /* FISTP m64 */
7056 DIP("fistpll %s\n", dis_buf);
7057 storeLE( mkexpr(addr),
7058 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) );
7059 fp_pop();
7060 break;
7062 default:
7063 vex_printf("unhandled opc_aux = 0x%2x\n",
7064 (UInt)gregLO3ofRM(modrm));
7065 vex_printf("first_opcode == 0xDF\n");
7066 goto decode_fail;
7069 } else {
7071 delta++;
7072 switch (modrm) {
7074 case 0xC0: /* FFREEP %st(0) */
7075 DIP("ffreep %%st(%d)\n", 0);
7076 put_ST_TAG ( 0, mkU8(0) );
7077 fp_pop();
7078 break;
7080 case 0xE0: /* FNSTSW %ax */
7081 DIP("fnstsw %%ax\n");
7082 /* Invent a plausible-looking FPU status word value and
7083 dump it in %AX:
7084 ((ftop & 7) << 11) | (c3210 & 0x4700)
7086 putIRegRAX(
7088 unop(Iop_32to16,
7089 binop(Iop_Or32,
7090 binop(Iop_Shl32,
7091 binop(Iop_And32, get_ftop(), mkU32(7)),
7092 mkU8(11)),
7093 binop(Iop_And32,
7094 unop(Iop_64to32, get_C3210()),
7095 mkU32(0x4700))
7096 )));
7097 break;
7099 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
7100 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True );
7101 break;
7103 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
7104 /* not really right since COMIP != UCOMIP */
7105 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True );
7106 break;
7108 default:
7109 goto decode_fail;
7115 else
7116 goto decode_fail;
7118 *decode_ok = True;
7119 return delta;
7121 decode_fail:
7122 *decode_ok = False;
7123 return delta;
7127 /*------------------------------------------------------------*/
7128 /*--- ---*/
7129 /*--- MMX INSTRUCTIONS ---*/
7130 /*--- ---*/
7131 /*------------------------------------------------------------*/
7133 /* Effect of MMX insns on x87 FPU state (table 11-2 of
7134 IA32 arch manual, volume 3):
7136 Read from, or write to MMX register (viz, any insn except EMMS):
7137 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
7138 * FP stack pointer set to zero
7140 EMMS:
7141 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
7142 * FP stack pointer set to zero
7145 static void do_MMX_preamble ( void )
7147 Int i;
7148 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
7149 IRExpr* zero = mkU32(0);
7150 IRExpr* tag1 = mkU8(1);
7151 put_ftop(zero);
7152 for (i = 0; i < 8; i++)
7153 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag1) ) );
7156 static void do_EMMS_preamble ( void )
7158 Int i;
7159 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
7160 IRExpr* zero = mkU32(0);
7161 IRExpr* tag0 = mkU8(0);
7162 put_ftop(zero);
7163 for (i = 0; i < 8; i++)
7164 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag0) ) );
7168 static IRExpr* getMMXReg ( UInt archreg )
7170 vassert(archreg < 8);
7171 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 );
7175 static void putMMXReg ( UInt archreg, IRExpr* e )
7177 vassert(archreg < 8);
7178 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
7179 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) );
7183 /* Helper for non-shift MMX insns. Note this is incomplete in the
7184 sense that it does not first call do_MMX_preamble() -- that is the
7185 responsibility of its caller. */
7187 static
7188 ULong dis_MMXop_regmem_to_reg ( const VexAbiInfo* vbi,
7189 Prefix pfx,
7190 Long delta,
7191 UChar opc,
7192 const HChar* name,
7193 Bool show_granularity )
7195 HChar dis_buf[50];
7196 UChar modrm = getUChar(delta);
7197 Bool isReg = epartIsReg(modrm);
7198 IRExpr* argL = NULL;
7199 IRExpr* argR = NULL;
7200 IRExpr* argG = NULL;
7201 IRExpr* argE = NULL;
7202 IRTemp res = newTemp(Ity_I64);
7204 Bool invG = False;
7205 IROp op = Iop_INVALID;
7206 void* hAddr = NULL;
7207 const HChar* hName = NULL;
7208 Bool eLeft = False;
7210 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
7212 switch (opc) {
7213 /* Original MMX ones */
7214 case 0xFC: op = Iop_Add8x8; break;
7215 case 0xFD: op = Iop_Add16x4; break;
7216 case 0xFE: op = Iop_Add32x2; break;
7218 case 0xEC: op = Iop_QAdd8Sx8; break;
7219 case 0xED: op = Iop_QAdd16Sx4; break;
7221 case 0xDC: op = Iop_QAdd8Ux8; break;
7222 case 0xDD: op = Iop_QAdd16Ux4; break;
7224 case 0xF8: op = Iop_Sub8x8; break;
7225 case 0xF9: op = Iop_Sub16x4; break;
7226 case 0xFA: op = Iop_Sub32x2; break;
7228 case 0xE8: op = Iop_QSub8Sx8; break;
7229 case 0xE9: op = Iop_QSub16Sx4; break;
7231 case 0xD8: op = Iop_QSub8Ux8; break;
7232 case 0xD9: op = Iop_QSub16Ux4; break;
7234 case 0xE5: op = Iop_MulHi16Sx4; break;
7235 case 0xD5: op = Iop_Mul16x4; break;
7236 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break;
7238 case 0x74: op = Iop_CmpEQ8x8; break;
7239 case 0x75: op = Iop_CmpEQ16x4; break;
7240 case 0x76: op = Iop_CmpEQ32x2; break;
7242 case 0x64: op = Iop_CmpGT8Sx8; break;
7243 case 0x65: op = Iop_CmpGT16Sx4; break;
7244 case 0x66: op = Iop_CmpGT32Sx2; break;
7246 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break;
7247 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break;
7248 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break;
7250 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break;
7251 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
7252 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break;
7254 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break;
7255 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break;
7256 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break;
7258 case 0xDB: op = Iop_And64; break;
7259 case 0xDF: op = Iop_And64; invG = True; break;
7260 case 0xEB: op = Iop_Or64; break;
7261 case 0xEF: /* Possibly do better here if argL and argR are the
7262 same reg */
7263 op = Iop_Xor64; break;
7265 /* Introduced in SSE1 */
7266 case 0xE0: op = Iop_Avg8Ux8; break;
7267 case 0xE3: op = Iop_Avg16Ux4; break;
7268 case 0xEE: op = Iop_Max16Sx4; break;
7269 case 0xDE: op = Iop_Max8Ux8; break;
7270 case 0xEA: op = Iop_Min16Sx4; break;
7271 case 0xDA: op = Iop_Min8Ux8; break;
7272 case 0xE4: op = Iop_MulHi16Ux4; break;
7273 case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break;
7275 /* Introduced in SSE2 */
7276 case 0xD4: op = Iop_Add64; break;
7277 case 0xFB: op = Iop_Sub64; break;
7279 default:
7280 vex_printf("\n0x%x\n", (UInt)opc);
7281 vpanic("dis_MMXop_regmem_to_reg");
7284 # undef XXX
7286 argG = getMMXReg(gregLO3ofRM(modrm));
7287 if (invG)
7288 argG = unop(Iop_Not64, argG);
7290 if (isReg) {
7291 delta++;
7292 argE = getMMXReg(eregLO3ofRM(modrm));
7293 } else {
7294 Int len;
7295 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7296 delta += len;
7297 argE = loadLE(Ity_I64, mkexpr(addr));
7300 if (eLeft) {
7301 argL = argE;
7302 argR = argG;
7303 } else {
7304 argL = argG;
7305 argR = argE;
7308 if (op != Iop_INVALID) {
7309 vassert(hName == NULL);
7310 vassert(hAddr == NULL);
7311 assign(res, binop(op, argL, argR));
7312 } else {
7313 vassert(hName != NULL);
7314 vassert(hAddr != NULL);
7315 assign( res,
7316 mkIRExprCCall(
7317 Ity_I64,
7318 0/*regparms*/, hName, hAddr,
7319 mkIRExprVec_2( argL, argR )
7324 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
7326 DIP("%s%s %s, %s\n",
7327 name, show_granularity ? nameMMXGran(opc & 3) : "",
7328 ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ),
7329 nameMMXReg(gregLO3ofRM(modrm)) );
7331 return delta;
7335 /* Vector by scalar shift of G by the amount specified at the bottom
7336 of E. This is a straight copy of dis_SSE_shiftG_byE. */
7338 static ULong dis_MMX_shiftG_byE ( const VexAbiInfo* vbi,
7339 Prefix pfx, Long delta,
7340 const HChar* opname, IROp op )
7342 HChar dis_buf[50];
7343 Int alen, size;
7344 IRTemp addr;
7345 Bool shl, shr, sar;
7346 UChar rm = getUChar(delta);
7347 IRTemp g0 = newTemp(Ity_I64);
7348 IRTemp g1 = newTemp(Ity_I64);
7349 IRTemp amt = newTemp(Ity_I64);
7350 IRTemp amt8 = newTemp(Ity_I8);
7352 if (epartIsReg(rm)) {
7353 assign( amt, getMMXReg(eregLO3ofRM(rm)) );
7354 DIP("%s %s,%s\n", opname,
7355 nameMMXReg(eregLO3ofRM(rm)),
7356 nameMMXReg(gregLO3ofRM(rm)) );
7357 delta++;
7358 } else {
7359 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
7360 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
7361 DIP("%s %s,%s\n", opname,
7362 dis_buf,
7363 nameMMXReg(gregLO3ofRM(rm)) );
7364 delta += alen;
7366 assign( g0, getMMXReg(gregLO3ofRM(rm)) );
7367 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
7369 shl = shr = sar = False;
7370 size = 0;
7371 switch (op) {
7372 case Iop_ShlN16x4: shl = True; size = 32; break;
7373 case Iop_ShlN32x2: shl = True; size = 32; break;
7374 case Iop_Shl64: shl = True; size = 64; break;
7375 case Iop_ShrN16x4: shr = True; size = 16; break;
7376 case Iop_ShrN32x2: shr = True; size = 32; break;
7377 case Iop_Shr64: shr = True; size = 64; break;
7378 case Iop_SarN16x4: sar = True; size = 16; break;
7379 case Iop_SarN32x2: sar = True; size = 32; break;
7380 default: vassert(0);
7383 if (shl || shr) {
7384 assign(
7386 IRExpr_ITE(
7387 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)),
7388 binop(op, mkexpr(g0), mkexpr(amt8)),
7389 mkU64(0)
7392 } else
7393 if (sar) {
7394 assign(
7396 IRExpr_ITE(
7397 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)),
7398 binop(op, mkexpr(g0), mkexpr(amt8)),
7399 binop(op, mkexpr(g0), mkU8(size-1))
7402 } else {
7403 vassert(0);
7406 putMMXReg( gregLO3ofRM(rm), mkexpr(g1) );
7407 return delta;
7411 /* Vector by scalar shift of E by an immediate byte. This is a
7412 straight copy of dis_SSE_shiftE_imm. */
7414 static
7415 ULong dis_MMX_shiftE_imm ( Long delta, const HChar* opname, IROp op )
7417 Bool shl, shr, sar;
7418 UChar rm = getUChar(delta);
7419 IRTemp e0 = newTemp(Ity_I64);
7420 IRTemp e1 = newTemp(Ity_I64);
7421 UChar amt, size;
7422 vassert(epartIsReg(rm));
7423 vassert(gregLO3ofRM(rm) == 2
7424 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
7425 amt = getUChar(delta+1);
7426 delta += 2;
7427 DIP("%s $%d,%s\n", opname,
7428 (Int)amt,
7429 nameMMXReg(eregLO3ofRM(rm)) );
7431 assign( e0, getMMXReg(eregLO3ofRM(rm)) );
7433 shl = shr = sar = False;
7434 size = 0;
7435 switch (op) {
7436 case Iop_ShlN16x4: shl = True; size = 16; break;
7437 case Iop_ShlN32x2: shl = True; size = 32; break;
7438 case Iop_Shl64: shl = True; size = 64; break;
7439 case Iop_SarN16x4: sar = True; size = 16; break;
7440 case Iop_SarN32x2: sar = True; size = 32; break;
7441 case Iop_ShrN16x4: shr = True; size = 16; break;
7442 case Iop_ShrN32x2: shr = True; size = 32; break;
7443 case Iop_Shr64: shr = True; size = 64; break;
7444 default: vassert(0);
7447 if (shl || shr) {
7448 assign( e1, amt >= size
7449 ? mkU64(0)
7450 : binop(op, mkexpr(e0), mkU8(amt))
7452 } else
7453 if (sar) {
7454 assign( e1, amt >= size
7455 ? binop(op, mkexpr(e0), mkU8(size-1))
7456 : binop(op, mkexpr(e0), mkU8(amt))
7458 } else {
7459 vassert(0);
7462 putMMXReg( eregLO3ofRM(rm), mkexpr(e1) );
7463 return delta;
7467 /* Completely handle all MMX instructions except emms. */
7469 static
7470 ULong dis_MMX ( Bool* decode_ok,
7471 const VexAbiInfo* vbi, Prefix pfx, Int sz, Long delta )
7473 Int len;
7474 UChar modrm;
7475 HChar dis_buf[50];
7476 UChar opc = getUChar(delta);
7477 delta++;
7479 /* dis_MMX handles all insns except emms. */
7480 do_MMX_preamble();
7482 switch (opc) {
7484 case 0x6E:
7485 if (sz == 4) {
7486 /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/
7487 modrm = getUChar(delta);
7488 if (epartIsReg(modrm)) {
7489 delta++;
7490 putMMXReg(
7491 gregLO3ofRM(modrm),
7492 binop( Iop_32HLto64,
7493 mkU32(0),
7494 getIReg32(eregOfRexRM(pfx,modrm)) ) );
7495 DIP("movd %s, %s\n",
7496 nameIReg32(eregOfRexRM(pfx,modrm)),
7497 nameMMXReg(gregLO3ofRM(modrm)));
7498 } else {
7499 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7500 delta += len;
7501 putMMXReg(
7502 gregLO3ofRM(modrm),
7503 binop( Iop_32HLto64,
7504 mkU32(0),
7505 loadLE(Ity_I32, mkexpr(addr)) ) );
7506 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7509 else
7510 if (sz == 8) {
7511 /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/
7512 modrm = getUChar(delta);
7513 if (epartIsReg(modrm)) {
7514 delta++;
7515 putMMXReg( gregLO3ofRM(modrm),
7516 getIReg64(eregOfRexRM(pfx,modrm)) );
7517 DIP("movd %s, %s\n",
7518 nameIReg64(eregOfRexRM(pfx,modrm)),
7519 nameMMXReg(gregLO3ofRM(modrm)));
7520 } else {
7521 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7522 delta += len;
7523 putMMXReg( gregLO3ofRM(modrm),
7524 loadLE(Ity_I64, mkexpr(addr)) );
7525 DIP("movd{64} %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7528 else {
7529 goto mmx_decode_failure;
7531 break;
7533 case 0x7E:
7534 if (sz == 4) {
7535 /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */
7536 modrm = getUChar(delta);
7537 if (epartIsReg(modrm)) {
7538 delta++;
7539 putIReg32( eregOfRexRM(pfx,modrm),
7540 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
7541 DIP("movd %s, %s\n",
7542 nameMMXReg(gregLO3ofRM(modrm)),
7543 nameIReg32(eregOfRexRM(pfx,modrm)));
7544 } else {
7545 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7546 delta += len;
7547 storeLE( mkexpr(addr),
7548 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
7549 DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7552 else
7553 if (sz == 8) {
7554 /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */
7555 modrm = getUChar(delta);
7556 if (epartIsReg(modrm)) {
7557 delta++;
7558 putIReg64( eregOfRexRM(pfx,modrm),
7559 getMMXReg(gregLO3ofRM(modrm)) );
7560 DIP("movd %s, %s\n",
7561 nameMMXReg(gregLO3ofRM(modrm)),
7562 nameIReg64(eregOfRexRM(pfx,modrm)));
7563 } else {
7564 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7565 delta += len;
7566 storeLE( mkexpr(addr),
7567 getMMXReg(gregLO3ofRM(modrm)) );
7568 DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7570 } else {
7571 goto mmx_decode_failure;
7573 break;
7575 case 0x6F:
7576 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
7577 if (sz != 4
7578 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7579 goto mmx_decode_failure;
7580 modrm = getUChar(delta);
7581 if (epartIsReg(modrm)) {
7582 delta++;
7583 putMMXReg( gregLO3ofRM(modrm), getMMXReg(eregLO3ofRM(modrm)) );
7584 DIP("movq %s, %s\n",
7585 nameMMXReg(eregLO3ofRM(modrm)),
7586 nameMMXReg(gregLO3ofRM(modrm)));
7587 } else {
7588 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7589 delta += len;
7590 putMMXReg( gregLO3ofRM(modrm), loadLE(Ity_I64, mkexpr(addr)) );
7591 DIP("movq %s, %s\n",
7592 dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7594 break;
7596 case 0x7F:
7597 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
7598 if (sz != 4
7599 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7600 goto mmx_decode_failure;
7601 modrm = getUChar(delta);
7602 if (epartIsReg(modrm)) {
7603 delta++;
7604 putMMXReg( eregLO3ofRM(modrm), getMMXReg(gregLO3ofRM(modrm)) );
7605 DIP("movq %s, %s\n",
7606 nameMMXReg(gregLO3ofRM(modrm)),
7607 nameMMXReg(eregLO3ofRM(modrm)));
7608 } else {
7609 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7610 delta += len;
7611 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
7612 DIP("mov(nt)q %s, %s\n",
7613 nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7615 break;
7617 case 0xFC:
7618 case 0xFD:
7619 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
7620 if (sz != 4)
7621 goto mmx_decode_failure;
7622 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padd", True );
7623 break;
7625 case 0xEC:
7626 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
7627 if (sz != 4
7628 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7629 goto mmx_decode_failure;
7630 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padds", True );
7631 break;
7633 case 0xDC:
7634 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7635 if (sz != 4)
7636 goto mmx_decode_failure;
7637 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "paddus", True );
7638 break;
7640 case 0xF8:
7641 case 0xF9:
7642 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
7643 if (sz != 4)
7644 goto mmx_decode_failure;
7645 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psub", True );
7646 break;
7648 case 0xE8:
7649 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
7650 if (sz != 4)
7651 goto mmx_decode_failure;
7652 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubs", True );
7653 break;
7655 case 0xD8:
7656 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7657 if (sz != 4)
7658 goto mmx_decode_failure;
7659 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubus", True );
7660 break;
7662 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
7663 if (sz != 4)
7664 goto mmx_decode_failure;
7665 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmulhw", False );
7666 break;
7668 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
7669 if (sz != 4)
7670 goto mmx_decode_failure;
7671 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmullw", False );
7672 break;
7674 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
7675 vassert(sz == 4);
7676 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmaddwd", False );
7677 break;
7679 case 0x74:
7680 case 0x75:
7681 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
7682 if (sz != 4)
7683 goto mmx_decode_failure;
7684 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpeq", True );
7685 break;
7687 case 0x64:
7688 case 0x65:
7689 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
7690 if (sz != 4)
7691 goto mmx_decode_failure;
7692 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpgt", True );
7693 break;
7695 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
7696 if (sz != 4)
7697 goto mmx_decode_failure;
7698 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packssdw", False );
7699 break;
7701 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
7702 if (sz != 4)
7703 goto mmx_decode_failure;
7704 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packsswb", False );
7705 break;
7707 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
7708 if (sz != 4)
7709 goto mmx_decode_failure;
7710 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packuswb", False );
7711 break;
7713 case 0x68:
7714 case 0x69:
7715 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
7716 if (sz != 4
7717 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7718 goto mmx_decode_failure;
7719 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckh", True );
7720 break;
7722 case 0x60:
7723 case 0x61:
7724 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
7725 if (sz != 4
7726 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7727 goto mmx_decode_failure;
7728 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckl", True );
7729 break;
7731 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
7732 if (sz != 4)
7733 goto mmx_decode_failure;
7734 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pand", False );
7735 break;
7737 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
7738 if (sz != 4)
7739 goto mmx_decode_failure;
7740 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pandn", False );
7741 break;
7743 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
7744 if (sz != 4)
7745 goto mmx_decode_failure;
7746 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "por", False );
7747 break;
7749 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
7750 if (sz != 4)
7751 goto mmx_decode_failure;
7752 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pxor", False );
7753 break;
7755 # define SHIFT_BY_REG(_name,_op) \
7756 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \
7757 break;
7759 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
7760 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4);
7761 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2);
7762 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64);
7764 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
7765 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4);
7766 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2);
7767 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64);
7769 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
7770 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4);
7771 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2);
7773 # undef SHIFT_BY_REG
7775 case 0x71:
7776 case 0x72:
7777 case 0x73: {
7778 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
7779 UChar byte2, subopc;
7780 if (sz != 4)
7781 goto mmx_decode_failure;
7782 byte2 = getUChar(delta); /* amode / sub-opcode */
7783 subopc = toUChar( (byte2 >> 3) & 7 );
7785 # define SHIFT_BY_IMM(_name,_op) \
7786 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
7787 } while (0)
7789 if (subopc == 2 /*SRL*/ && opc == 0x71)
7790 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4);
7791 else if (subopc == 2 /*SRL*/ && opc == 0x72)
7792 SHIFT_BY_IMM("psrld", Iop_ShrN32x2);
7793 else if (subopc == 2 /*SRL*/ && opc == 0x73)
7794 SHIFT_BY_IMM("psrlq", Iop_Shr64);
7796 else if (subopc == 4 /*SAR*/ && opc == 0x71)
7797 SHIFT_BY_IMM("psraw", Iop_SarN16x4);
7798 else if (subopc == 4 /*SAR*/ && opc == 0x72)
7799 SHIFT_BY_IMM("psrad", Iop_SarN32x2);
7801 else if (subopc == 6 /*SHL*/ && opc == 0x71)
7802 SHIFT_BY_IMM("psllw", Iop_ShlN16x4);
7803 else if (subopc == 6 /*SHL*/ && opc == 0x72)
7804 SHIFT_BY_IMM("pslld", Iop_ShlN32x2);
7805 else if (subopc == 6 /*SHL*/ && opc == 0x73)
7806 SHIFT_BY_IMM("psllq", Iop_Shl64);
7808 else goto mmx_decode_failure;
7810 # undef SHIFT_BY_IMM
7811 break;
7814 case 0xF7: {
7815 IRTemp addr = newTemp(Ity_I64);
7816 IRTemp regD = newTemp(Ity_I64);
7817 IRTemp regM = newTemp(Ity_I64);
7818 IRTemp mask = newTemp(Ity_I64);
7819 IRTemp olddata = newTemp(Ity_I64);
7820 IRTemp newdata = newTemp(Ity_I64);
7822 modrm = getUChar(delta);
7823 if (sz != 4 || (!epartIsReg(modrm)))
7824 goto mmx_decode_failure;
7825 delta++;
7827 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
7828 assign( regM, getMMXReg( eregLO3ofRM(modrm) ));
7829 assign( regD, getMMXReg( gregLO3ofRM(modrm) ));
7830 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) );
7831 assign( olddata, loadLE( Ity_I64, mkexpr(addr) ));
7832 assign( newdata,
7833 binop(Iop_Or64,
7834 binop(Iop_And64,
7835 mkexpr(regD),
7836 mkexpr(mask) ),
7837 binop(Iop_And64,
7838 mkexpr(olddata),
7839 unop(Iop_Not64, mkexpr(mask)))) );
7840 storeLE( mkexpr(addr), mkexpr(newdata) );
7841 DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm) ),
7842 nameMMXReg( gregLO3ofRM(modrm) ) );
7843 break;
7846 /* --- MMX decode failure --- */
7847 default:
7848 mmx_decode_failure:
7849 *decode_ok = False;
7850 return delta; /* ignored */
7854 *decode_ok = True;
7855 return delta;
7859 /*------------------------------------------------------------*/
7860 /*--- More misc arithmetic and other obscure insns. ---*/
7861 /*------------------------------------------------------------*/
7863 /* Generate base << amt with vacated places filled with stuff
7864 from xtra. amt guaranteed in 0 .. 63. */
7865 static
7866 IRExpr* shiftL64_with_extras ( IRTemp base, IRTemp xtra, IRTemp amt )
7868 /* if amt == 0
7869 then base
7870 else (base << amt) | (xtra >>u (64-amt))
7872 return
7873 IRExpr_ITE(
7874 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)),
7875 binop(Iop_Or64,
7876 binop(Iop_Shl64, mkexpr(base), mkexpr(amt)),
7877 binop(Iop_Shr64, mkexpr(xtra),
7878 binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
7880 mkexpr(base)
7884 /* Generate base >>u amt with vacated places filled with stuff
7885 from xtra. amt guaranteed in 0 .. 63. */
7886 static
7887 IRExpr* shiftR64_with_extras ( IRTemp xtra, IRTemp base, IRTemp amt )
7889 /* if amt == 0
7890 then base
7891 else (base >>u amt) | (xtra << (64-amt))
7893 return
7894 IRExpr_ITE(
7895 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)),
7896 binop(Iop_Or64,
7897 binop(Iop_Shr64, mkexpr(base), mkexpr(amt)),
7898 binop(Iop_Shl64, mkexpr(xtra),
7899 binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
7901 mkexpr(base)
7905 /* Double length left and right shifts. Apparently only required in
7906 v-size (no b- variant). */
7907 static
7908 ULong dis_SHLRD_Gv_Ev ( const VexAbiInfo* vbi,
7909 Prefix pfx,
7910 Long delta, UChar modrm,
7911 Int sz,
7912 IRExpr* shift_amt,
7913 Bool amt_is_literal,
7914 const HChar* shift_amt_txt,
7915 Bool left_shift )
7917 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
7918 for printing it. And eip on entry points at the modrm byte. */
7919 Int len;
7920 HChar dis_buf[50];
7922 IRType ty = szToITy(sz);
7923 IRTemp gsrc = newTemp(ty);
7924 IRTemp esrc = newTemp(ty);
7925 IRTemp addr = IRTemp_INVALID;
7926 IRTemp tmpSH = newTemp(Ity_I8);
7927 IRTemp tmpSS = newTemp(Ity_I8);
7928 IRTemp tmp64 = IRTemp_INVALID;
7929 IRTemp res64 = IRTemp_INVALID;
7930 IRTemp rss64 = IRTemp_INVALID;
7931 IRTemp resTy = IRTemp_INVALID;
7932 IRTemp rssTy = IRTemp_INVALID;
7933 Int mask = sz==8 ? 63 : 31;
7935 vassert(sz == 2 || sz == 4 || sz == 8);
7937 /* The E-part is the destination; this is shifted. The G-part
7938 supplies bits to be shifted into the E-part, but is not
7939 changed.
7941 If shifting left, form a double-length word with E at the top
7942 and G at the bottom, and shift this left. The result is then in
7943 the high part.
7945 If shifting right, form a double-length word with G at the top
7946 and E at the bottom, and shift this right. The result is then
7947 at the bottom. */
7949 /* Fetch the operands. */
7951 assign( gsrc, getIRegG(sz, pfx, modrm) );
7953 if (epartIsReg(modrm)) {
7954 delta++;
7955 assign( esrc, getIRegE(sz, pfx, modrm) );
7956 DIP("sh%cd%c %s, %s, %s\n",
7957 ( left_shift ? 'l' : 'r' ), nameISize(sz),
7958 shift_amt_txt,
7959 nameIRegG(sz, pfx, modrm), nameIRegE(sz, pfx, modrm));
7960 } else {
7961 addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
7962 /* # bytes following amode */
7963 amt_is_literal ? 1 : 0 );
7964 delta += len;
7965 assign( esrc, loadLE(ty, mkexpr(addr)) );
7966 DIP("sh%cd%c %s, %s, %s\n",
7967 ( left_shift ? 'l' : 'r' ), nameISize(sz),
7968 shift_amt_txt,
7969 nameIRegG(sz, pfx, modrm), dis_buf);
7972 /* Calculate the masked shift amount (tmpSH), the masked subshift
7973 amount (tmpSS), the shifted value (res64) and the subshifted
7974 value (rss64). */
7976 assign( tmpSH, binop(Iop_And8, shift_amt, mkU8(mask)) );
7977 assign( tmpSS, binop(Iop_And8,
7978 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ),
7979 mkU8(mask)));
7981 tmp64 = newTemp(Ity_I64);
7982 res64 = newTemp(Ity_I64);
7983 rss64 = newTemp(Ity_I64);
7985 if (sz == 2 || sz == 4) {
7987 /* G is xtra; E is data */
7988 /* what a freaking nightmare: */
7989 if (sz == 4 && left_shift) {
7990 assign( tmp64, binop(Iop_32HLto64, mkexpr(esrc), mkexpr(gsrc)) );
7991 assign( res64,
7992 binop(Iop_Shr64,
7993 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
7994 mkU8(32)) );
7995 assign( rss64,
7996 binop(Iop_Shr64,
7997 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSS)),
7998 mkU8(32)) );
8000 else
8001 if (sz == 4 && !left_shift) {
8002 assign( tmp64, binop(Iop_32HLto64, mkexpr(gsrc), mkexpr(esrc)) );
8003 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
8004 assign( rss64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSS)) );
8006 else
8007 if (sz == 2 && left_shift) {
8008 assign( tmp64,
8009 binop(Iop_32HLto64,
8010 binop(Iop_16HLto32, mkexpr(esrc), mkexpr(gsrc)),
8011 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc))
8013 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */
8014 assign( res64,
8015 binop(Iop_Shr64,
8016 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
8017 mkU8(48)) );
8018 /* subshift formed by shifting [esrc'0000'0000'0000] */
8019 assign( rss64,
8020 binop(Iop_Shr64,
8021 binop(Iop_Shl64,
8022 binop(Iop_Shl64, unop(Iop_16Uto64, mkexpr(esrc)),
8023 mkU8(48)),
8024 mkexpr(tmpSS)),
8025 mkU8(48)) );
8027 else
8028 if (sz == 2 && !left_shift) {
8029 assign( tmp64,
8030 binop(Iop_32HLto64,
8031 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)),
8032 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(esrc))
8034 /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */
8035 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
8036 /* subshift formed by shifting [0000'0000'0000'esrc] */
8037 assign( rss64, binop(Iop_Shr64,
8038 unop(Iop_16Uto64, mkexpr(esrc)),
8039 mkexpr(tmpSS)) );
8042 } else {
8044 vassert(sz == 8);
8045 if (left_shift) {
8046 assign( res64, shiftL64_with_extras( esrc, gsrc, tmpSH ));
8047 assign( rss64, shiftL64_with_extras( esrc, gsrc, tmpSS ));
8048 } else {
8049 assign( res64, shiftR64_with_extras( gsrc, esrc, tmpSH ));
8050 assign( rss64, shiftR64_with_extras( gsrc, esrc, tmpSS ));
8055 resTy = newTemp(ty);
8056 rssTy = newTemp(ty);
8057 assign( resTy, narrowTo(ty, mkexpr(res64)) );
8058 assign( rssTy, narrowTo(ty, mkexpr(rss64)) );
8060 /* Put result back and write the flags thunk. */
8061 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl64 : Iop_Sar64,
8062 resTy, rssTy, ty, tmpSH );
8064 if (epartIsReg(modrm)) {
8065 putIRegE(sz, pfx, modrm, mkexpr(resTy));
8066 } else {
8067 storeLE( mkexpr(addr), mkexpr(resTy) );
8070 if (amt_is_literal) delta++;
8071 return delta;
8075 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
8076 required. */
8078 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp;
8080 static const HChar* nameBtOp ( BtOp op )
8082 switch (op) {
8083 case BtOpNone: return "";
8084 case BtOpSet: return "s";
8085 case BtOpReset: return "r";
8086 case BtOpComp: return "c";
8087 default: vpanic("nameBtOp(amd64)");
8092 static
8093 ULong dis_bt_G_E ( const VexAbiInfo* vbi,
8094 Prefix pfx, Int sz, Long delta, BtOp op,
8095 /*OUT*/Bool* decode_OK )
8097 HChar dis_buf[50];
8098 UChar modrm;
8099 Int len;
8100 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
8101 t_addr1, t_rsp, t_mask, t_new;
8103 vassert(sz == 2 || sz == 4 || sz == 8);
8105 t_fetched = t_bitno0 = t_bitno1 = t_bitno2
8106 = t_addr0 = t_addr1 = t_rsp
8107 = t_mask = t_new = IRTemp_INVALID;
8109 t_fetched = newTemp(Ity_I8);
8110 t_new = newTemp(Ity_I8);
8111 t_bitno0 = newTemp(Ity_I64);
8112 t_bitno1 = newTemp(Ity_I64);
8113 t_bitno2 = newTemp(Ity_I8);
8114 t_addr1 = newTemp(Ity_I64);
8115 modrm = getUChar(delta);
8117 *decode_OK = True;
8118 if (epartIsReg(modrm)) {
8119 /* F2 and F3 are never acceptable. */
8120 if (haveF2orF3(pfx)) {
8121 *decode_OK = False;
8122 return delta;
8124 } else {
8125 /* F2 or F3 (but not both) are allowed, provided LOCK is also
8126 present, and only for the BTC/BTS/BTR cases (not BT). */
8127 if (haveF2orF3(pfx)) {
8128 if (haveF2andF3(pfx) || !haveLOCK(pfx) || op == BtOpNone) {
8129 *decode_OK = False;
8130 return delta;
8135 assign( t_bitno0, widenSto64(getIRegG(sz, pfx, modrm)) );
8137 if (epartIsReg(modrm)) {
8138 delta++;
8139 /* Get it onto the client's stack. Oh, this is a horrible
8140 kludge. See https://bugs.kde.org/show_bug.cgi?id=245925.
8141 Because of the ELF ABI stack redzone, there may be live data
8142 up to 128 bytes below %RSP. So we can't just push it on the
8143 stack, else we may wind up trashing live data, and causing
8144 impossible-to-find simulation errors. (Yes, this did
8145 happen.) So we need to drop RSP before at least 128 before
8146 pushing it. That unfortunately means hitting Memcheck's
8147 fast-case painting code. Ideally we should drop more than
8148 128, to reduce the chances of breaking buggy programs that
8149 have live data below -128(%RSP). Memcheck fast-cases moves
8150 of 288 bytes due to the need to handle ppc64-linux quickly,
8151 so let's use 288. Of course the real fix is to get rid of
8152 this kludge entirely. */
8153 t_rsp = newTemp(Ity_I64);
8154 t_addr0 = newTemp(Ity_I64);
8156 vassert(vbi->guest_stack_redzone_size == 128);
8157 assign( t_rsp, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(288)) );
8158 putIReg64(R_RSP, mkexpr(t_rsp));
8160 storeLE( mkexpr(t_rsp), getIRegE(sz, pfx, modrm) );
8162 /* Make t_addr0 point at it. */
8163 assign( t_addr0, mkexpr(t_rsp) );
8165 /* Mask out upper bits of the shift amount, since we're doing a
8166 reg. */
8167 assign( t_bitno1, binop(Iop_And64,
8168 mkexpr(t_bitno0),
8169 mkU64(sz == 8 ? 63 : sz == 4 ? 31 : 15)) );
8171 } else {
8172 t_addr0 = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
8173 delta += len;
8174 assign( t_bitno1, mkexpr(t_bitno0) );
8177 /* At this point: t_addr0 is the address being operated on. If it
8178 was a reg, we will have pushed it onto the client's stack.
8179 t_bitno1 is the bit number, suitably masked in the case of a
8180 reg. */
8182 /* Now the main sequence. */
8183 assign( t_addr1,
8184 binop(Iop_Add64,
8185 mkexpr(t_addr0),
8186 binop(Iop_Sar64, mkexpr(t_bitno1), mkU8(3))) );
8188 /* t_addr1 now holds effective address */
8190 assign( t_bitno2,
8191 unop(Iop_64to8,
8192 binop(Iop_And64, mkexpr(t_bitno1), mkU64(7))) );
8194 /* t_bitno2 contains offset of bit within byte */
8196 if (op != BtOpNone) {
8197 t_mask = newTemp(Ity_I8);
8198 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) );
8201 /* t_mask is now a suitable byte mask */
8203 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) );
8205 if (op != BtOpNone) {
8206 switch (op) {
8207 case BtOpSet:
8208 assign( t_new,
8209 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) );
8210 break;
8211 case BtOpComp:
8212 assign( t_new,
8213 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) );
8214 break;
8215 case BtOpReset:
8216 assign( t_new,
8217 binop(Iop_And8, mkexpr(t_fetched),
8218 unop(Iop_Not8, mkexpr(t_mask))) );
8219 break;
8220 default:
8221 vpanic("dis_bt_G_E(amd64)");
8223 if ((haveLOCK(pfx)) && !epartIsReg(modrm)) {
8224 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/,
8225 mkexpr(t_new)/*new*/,
8226 guest_RIP_curr_instr );
8227 } else {
8228 storeLE( mkexpr(t_addr1), mkexpr(t_new) );
8232 /* Side effect done; now get selected bit into Carry flag. The Intel docs
8233 (as of 2015, at least) say that C holds the result, Z is unchanged, and
8234 O,S,A and P are undefined. However, on Skylake it appears that O,S,A,P
8235 are also unchanged, so let's do that. */
8236 const ULong maskC = AMD64G_CC_MASK_C;
8237 const ULong maskOSZAP = AMD64G_CC_MASK_O | AMD64G_CC_MASK_S
8238 | AMD64G_CC_MASK_Z | AMD64G_CC_MASK_A
8239 | AMD64G_CC_MASK_P;
8241 IRTemp old_rflags = newTemp(Ity_I64);
8242 assign(old_rflags, mk_amd64g_calculate_rflags_all());
8244 IRTemp new_rflags = newTemp(Ity_I64);
8245 assign(new_rflags,
8246 binop(Iop_Or64,
8247 binop(Iop_And64, mkexpr(old_rflags), mkU64(maskOSZAP)),
8248 binop(Iop_And64,
8249 binop(Iop_Shr64,
8250 unop(Iop_8Uto64, mkexpr(t_fetched)),
8251 mkexpr(t_bitno2)),
8252 mkU64(maskC))));
8254 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
8255 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
8256 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) ));
8257 /* Set NDEP even though it isn't used. This makes redundant-PUT
8258 elimination of previous stores to this field work better. */
8259 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
8261 /* Move reg operand from stack back to reg */
8262 if (epartIsReg(modrm)) {
8263 /* t_rsp still points at it. */
8264 /* only write the reg if actually modifying it; doing otherwise
8265 zeroes the top half erroneously when doing btl due to
8266 standard zero-extend rule */
8267 if (op != BtOpNone)
8268 putIRegE(sz, pfx, modrm, loadLE(szToITy(sz), mkexpr(t_rsp)) );
8269 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t_rsp), mkU64(288)) );
8272 DIP("bt%s%c %s, %s\n",
8273 nameBtOp(op), nameISize(sz), nameIRegG(sz, pfx, modrm),
8274 ( epartIsReg(modrm) ? nameIRegE(sz, pfx, modrm) : dis_buf ) );
8276 return delta;
8281 /* Handle BSF/BSR. Only v-size seems necessary. */
8282 static
8283 ULong dis_bs_E_G ( const VexAbiInfo* vbi,
8284 Prefix pfx, Int sz, Long delta, Bool fwds )
8286 Bool isReg;
8287 UChar modrm;
8288 HChar dis_buf[50];
8290 IRType ty = szToITy(sz);
8291 IRTemp src = newTemp(ty);
8292 IRTemp dst = newTemp(ty);
8293 IRTemp src64 = newTemp(Ity_I64);
8294 IRTemp dst64 = newTemp(Ity_I64);
8295 IRTemp srcB = newTemp(Ity_I1);
8297 vassert(sz == 8 || sz == 4 || sz == 2);
8299 modrm = getUChar(delta);
8300 isReg = epartIsReg(modrm);
8301 if (isReg) {
8302 delta++;
8303 assign( src, getIRegE(sz, pfx, modrm) );
8304 } else {
8305 Int len;
8306 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
8307 delta += len;
8308 assign( src, loadLE(ty, mkexpr(addr)) );
8311 DIP("bs%c%c %s, %s\n",
8312 fwds ? 'f' : 'r', nameISize(sz),
8313 ( isReg ? nameIRegE(sz, pfx, modrm) : dis_buf ),
8314 nameIRegG(sz, pfx, modrm));
8316 /* First, widen src to 64 bits if it is not already. */
8317 assign( src64, widenUto64(mkexpr(src)) );
8319 /* Generate a bool expression which is zero iff the original is
8320 zero, and nonzero otherwise. Ask for a CmpNE version which, if
8321 instrumented by Memcheck, is instrumented expensively, since
8322 this may be used on the output of a preceding movmskb insn,
8323 which has been known to be partially defined, and in need of
8324 careful handling. */
8325 assign( srcB, binop(Iop_ExpCmpNE64, mkexpr(src64), mkU64(0)) );
8327 /* Flags: Z is 1 iff source value is zero. All others
8328 are undefined -- we force them to zero. */
8329 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
8330 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
8331 stmt( IRStmt_Put(
8332 OFFB_CC_DEP1,
8333 IRExpr_ITE( mkexpr(srcB),
8334 /* src!=0 */
8335 mkU64(0),
8336 /* src==0 */
8337 mkU64(AMD64G_CC_MASK_Z)
8340 /* Set NDEP even though it isn't used. This makes redundant-PUT
8341 elimination of previous stores to this field work better. */
8342 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
8344 /* Result: iff source value is zero, we can't use
8345 Iop_Clz64/Iop_Ctz64 as they have no defined result in that case.
8346 But anyway, amd64 semantics say the result is undefined in
8347 such situations. Hence handle the zero case specially. */
8349 /* Bleh. What we compute:
8351 bsf64: if src == 0 then {dst is unchanged}
8352 else Ctz64(src)
8354 bsr64: if src == 0 then {dst is unchanged}
8355 else 63 - Clz64(src)
8357 bsf32: if src == 0 then {dst is unchanged}
8358 else Ctz64(32Uto64(src))
8360 bsr32: if src == 0 then {dst is unchanged}
8361 else 63 - Clz64(32Uto64(src))
8363 bsf16: if src == 0 then {dst is unchanged}
8364 else Ctz64(32Uto64(16Uto32(src)))
8366 bsr16: if src == 0 then {dst is unchanged}
8367 else 63 - Clz64(32Uto64(16Uto32(src)))
8370 /* The main computation, guarding against zero. */
8371 assign( dst64,
8372 IRExpr_ITE(
8373 mkexpr(srcB),
8374 /* src != 0 */
8375 fwds ? unop(Iop_Ctz64, mkexpr(src64))
8376 : binop(Iop_Sub64,
8377 mkU64(63),
8378 unop(Iop_Clz64, mkexpr(src64))),
8379 /* src == 0 -- leave dst unchanged */
8380 widenUto64( getIRegG( sz, pfx, modrm ) )
8384 if (sz == 2)
8385 assign( dst, unop(Iop_64to16, mkexpr(dst64)) );
8386 else
8387 if (sz == 4)
8388 assign( dst, unop(Iop_64to32, mkexpr(dst64)) );
8389 else
8390 assign( dst, mkexpr(dst64) );
8392 /* dump result back */
8393 putIRegG( sz, pfx, modrm, mkexpr(dst) );
8395 return delta;
8399 /* swap rAX with the reg specified by reg and REX.B */
8400 static
8401 void codegen_xchg_rAX_Reg ( Prefix pfx, Int sz, UInt regLo3 )
8403 IRType ty = szToITy(sz);
8404 IRTemp t1 = newTemp(ty);
8405 IRTemp t2 = newTemp(ty);
8406 vassert(sz == 2 || sz == 4 || sz == 8);
8407 vassert(regLo3 < 8);
8408 if (sz == 8) {
8409 assign( t1, getIReg64(R_RAX) );
8410 assign( t2, getIRegRexB(8, pfx, regLo3) );
8411 putIReg64( R_RAX, mkexpr(t2) );
8412 putIRegRexB(8, pfx, regLo3, mkexpr(t1) );
8413 } else if (sz == 4) {
8414 assign( t1, getIReg32(R_RAX) );
8415 assign( t2, getIRegRexB(4, pfx, regLo3) );
8416 putIReg32( R_RAX, mkexpr(t2) );
8417 putIRegRexB(4, pfx, regLo3, mkexpr(t1) );
8418 } else {
8419 assign( t1, getIReg16(R_RAX) );
8420 assign( t2, getIRegRexB(2, pfx, regLo3) );
8421 putIReg16( R_RAX, mkexpr(t2) );
8422 putIRegRexB(2, pfx, regLo3, mkexpr(t1) );
8424 DIP("xchg%c %s, %s\n",
8425 nameISize(sz), nameIRegRAX(sz),
8426 nameIRegRexB(sz,pfx, regLo3));
8430 static
8431 void codegen_SAHF ( void )
8433 /* Set the flags to:
8434 (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O)
8435 -- retain the old O flag
8436 | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8437 |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C)
8439 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8440 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P;
8441 IRTemp oldflags = newTemp(Ity_I64);
8442 assign( oldflags, mk_amd64g_calculate_rflags_all() );
8443 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
8444 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
8445 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
8446 stmt( IRStmt_Put( OFFB_CC_DEP1,
8447 binop(Iop_Or64,
8448 binop(Iop_And64, mkexpr(oldflags), mkU64(AMD64G_CC_MASK_O)),
8449 binop(Iop_And64,
8450 binop(Iop_Shr64, getIReg64(R_RAX), mkU8(8)),
8451 mkU64(mask_SZACP))
8457 static
8458 void codegen_LAHF ( void )
8460 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
8461 IRExpr* rax_with_hole;
8462 IRExpr* new_byte;
8463 IRExpr* new_rax;
8464 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8465 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P;
8467 IRTemp flags = newTemp(Ity_I64);
8468 assign( flags, mk_amd64g_calculate_rflags_all() );
8470 rax_with_hole
8471 = binop(Iop_And64, getIReg64(R_RAX), mkU64(~0xFF00ULL));
8472 new_byte
8473 = binop(Iop_Or64, binop(Iop_And64, mkexpr(flags), mkU64(mask_SZACP)),
8474 mkU64(1<<1));
8475 new_rax
8476 = binop(Iop_Or64, rax_with_hole,
8477 binop(Iop_Shl64, new_byte, mkU8(8)));
8478 putIReg64(R_RAX, new_rax);
8482 static
8483 ULong dis_cmpxchg_G_E ( /*OUT*/Bool* ok,
8484 const VexAbiInfo* vbi,
8485 Prefix pfx,
8486 Int size,
8487 Long delta0 )
8489 HChar dis_buf[50];
8490 Int len;
8492 IRType ty = szToITy(size);
8493 IRTemp acc = newTemp(ty);
8494 IRTemp src = newTemp(ty);
8495 IRTemp dest = newTemp(ty);
8496 IRTemp dest2 = newTemp(ty);
8497 IRTemp acc2 = newTemp(ty);
8498 IRTemp cond = newTemp(Ity_I1);
8499 IRTemp addr = IRTemp_INVALID;
8500 UChar rm = getUChar(delta0);
8502 /* There are 3 cases to consider:
8504 reg-reg: ignore any lock prefix, generate sequence based
8505 on ITE
8507 reg-mem, not locked: ignore any lock prefix, generate sequence
8508 based on ITE
8510 reg-mem, locked: use IRCAS
8513 /* Decide whether F2 or F3 are acceptable. Never for register
8514 case, but for the memory case, one or the other is OK provided
8515 LOCK is also present. */
8516 if (epartIsReg(rm)) {
8517 if (haveF2orF3(pfx)) {
8518 *ok = False;
8519 return delta0;
8521 } else {
8522 if (haveF2orF3(pfx)) {
8523 if (haveF2andF3(pfx) || !haveLOCK(pfx)) {
8524 *ok = False;
8525 return delta0;
8530 if (epartIsReg(rm)) {
8531 /* case 1 */
8532 assign( dest, getIRegE(size, pfx, rm) );
8533 delta0++;
8534 assign( src, getIRegG(size, pfx, rm) );
8535 assign( acc, getIRegRAX(size) );
8536 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
8537 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) );
8538 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) );
8539 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
8540 putIRegRAX(size, mkexpr(acc2));
8541 putIRegE(size, pfx, rm, mkexpr(dest2));
8542 DIP("cmpxchg%c %s,%s\n", nameISize(size),
8543 nameIRegG(size,pfx,rm),
8544 nameIRegE(size,pfx,rm) );
8546 else if (!epartIsReg(rm) && !haveLOCK(pfx)) {
8547 /* case 2 */
8548 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8549 assign( dest, loadLE(ty, mkexpr(addr)) );
8550 delta0 += len;
8551 assign( src, getIRegG(size, pfx, rm) );
8552 assign( acc, getIRegRAX(size) );
8553 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
8554 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) );
8555 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) );
8556 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
8557 putIRegRAX(size, mkexpr(acc2));
8558 storeLE( mkexpr(addr), mkexpr(dest2) );
8559 DIP("cmpxchg%c %s,%s\n", nameISize(size),
8560 nameIRegG(size,pfx,rm), dis_buf);
8562 else if (!epartIsReg(rm) && haveLOCK(pfx)) {
8563 /* case 3 */
8564 /* src is new value. acc is expected value. dest is old value.
8565 Compute success from the output of the IRCAS, and steer the
8566 new value for RAX accordingly: in case of success, RAX is
8567 unchanged. */
8568 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8569 delta0 += len;
8570 assign( src, getIRegG(size, pfx, rm) );
8571 assign( acc, getIRegRAX(size) );
8572 stmt( IRStmt_CAS(
8573 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr),
8574 NULL, mkexpr(acc), NULL, mkexpr(src) )
8576 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
8577 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) );
8578 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
8579 putIRegRAX(size, mkexpr(acc2));
8580 DIP("cmpxchg%c %s,%s\n", nameISize(size),
8581 nameIRegG(size,pfx,rm), dis_buf);
8583 else vassert(0);
8585 *ok = True;
8586 return delta0;
8590 /* Handle conditional move instructions of the form
8591 cmovcc E(reg-or-mem), G(reg)
8593 E(src) is reg-or-mem
8594 G(dst) is reg.
8596 If E is reg, --> GET %E, tmps
8597 GET %G, tmpd
8598 CMOVcc tmps, tmpd
8599 PUT tmpd, %G
8601 If E is mem --> (getAddr E) -> tmpa
8602 LD (tmpa), tmps
8603 GET %G, tmpd
8604 CMOVcc tmps, tmpd
8605 PUT tmpd, %G
8607 static
8608 ULong dis_cmov_E_G ( const VexAbiInfo* vbi,
8609 Prefix pfx,
8610 Int sz,
8611 AMD64Condcode cond,
8612 Long delta0 )
8614 UChar rm = getUChar(delta0);
8615 HChar dis_buf[50];
8616 Int len;
8618 IRType ty = szToITy(sz);
8619 IRTemp tmps = newTemp(ty);
8620 IRTemp tmpd = newTemp(ty);
8622 if (epartIsReg(rm)) {
8623 assign( tmps, getIRegE(sz, pfx, rm) );
8624 assign( tmpd, getIRegG(sz, pfx, rm) );
8626 putIRegG( sz, pfx, rm,
8627 IRExpr_ITE( mk_amd64g_calculate_condition(cond),
8628 mkexpr(tmps),
8629 mkexpr(tmpd) )
8631 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond),
8632 nameIRegE(sz,pfx,rm),
8633 nameIRegG(sz,pfx,rm));
8634 return 1+delta0;
8637 /* E refers to memory */
8639 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8640 assign( tmps, loadLE(ty, mkexpr(addr)) );
8641 assign( tmpd, getIRegG(sz, pfx, rm) );
8643 putIRegG( sz, pfx, rm,
8644 IRExpr_ITE( mk_amd64g_calculate_condition(cond),
8645 mkexpr(tmps),
8646 mkexpr(tmpd) )
8649 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond),
8650 dis_buf,
8651 nameIRegG(sz,pfx,rm));
8652 return len+delta0;
8657 static
8658 ULong dis_xadd_G_E ( /*OUT*/Bool* decode_ok,
8659 const VexAbiInfo* vbi,
8660 Prefix pfx, Int sz, Long delta0 )
8662 Int len;
8663 UChar rm = getUChar(delta0);
8664 HChar dis_buf[50];
8666 IRType ty = szToITy(sz);
8667 IRTemp tmpd = newTemp(ty);
8668 IRTemp tmpt0 = newTemp(ty);
8669 IRTemp tmpt1 = newTemp(ty);
8671 /* There are 3 cases to consider:
8673 reg-reg: ignore any lock prefix,
8674 generate 'naive' (non-atomic) sequence
8676 reg-mem, not locked: ignore any lock prefix, generate 'naive'
8677 (non-atomic) sequence
8679 reg-mem, locked: use IRCAS
8682 if (epartIsReg(rm)) {
8683 /* case 1 */
8684 assign( tmpd, getIRegE(sz, pfx, rm) );
8685 assign( tmpt0, getIRegG(sz, pfx, rm) );
8686 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8687 mkexpr(tmpd), mkexpr(tmpt0)) );
8688 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8689 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8690 putIRegE(sz, pfx, rm, mkexpr(tmpt1));
8691 DIP("xadd%c %s, %s\n",
8692 nameISize(sz), nameIRegG(sz,pfx,rm), nameIRegE(sz,pfx,rm));
8693 *decode_ok = True;
8694 return 1+delta0;
8696 else if (!epartIsReg(rm) && !haveLOCK(pfx)) {
8697 /* case 2 */
8698 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8699 assign( tmpd, loadLE(ty, mkexpr(addr)) );
8700 assign( tmpt0, getIRegG(sz, pfx, rm) );
8701 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8702 mkexpr(tmpd), mkexpr(tmpt0)) );
8703 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8704 storeLE( mkexpr(addr), mkexpr(tmpt1) );
8705 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8706 DIP("xadd%c %s, %s\n",
8707 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
8708 *decode_ok = True;
8709 return len+delta0;
8711 else if (!epartIsReg(rm) && haveLOCK(pfx)) {
8712 /* case 3 */
8713 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8714 assign( tmpd, loadLE(ty, mkexpr(addr)) );
8715 assign( tmpt0, getIRegG(sz, pfx, rm) );
8716 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8717 mkexpr(tmpd), mkexpr(tmpt0)) );
8718 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/,
8719 mkexpr(tmpt1)/*newVal*/, guest_RIP_curr_instr );
8720 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8721 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8722 DIP("xadd%c %s, %s\n",
8723 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
8724 *decode_ok = True;
8725 return len+delta0;
8727 /*UNREACHED*/
8728 vassert(0);
8731 //.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
8732 //..
8733 //.. static
8734 //.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 )
8735 //.. {
8736 //.. Int len;
8737 //.. IRTemp addr;
8738 //.. UChar rm = getUChar(delta0);
8739 //.. HChar dis_buf[50];
8740 //..
8741 //.. if (epartIsReg(rm)) {
8742 //.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) );
8743 //.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm)));
8744 //.. return 1+delta0;
8745 //.. } else {
8746 //.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8747 //.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) );
8748 //.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm)));
8749 //.. return len+delta0;
8750 //.. }
8751 //.. }
8752 //..
8753 //.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
8754 //.. dst is ireg and sz==4, zero out top half of it. */
8755 //..
8756 //.. static
8757 //.. UInt dis_mov_Sw_Ew ( UChar sorb,
8758 //.. Int sz,
8759 //.. UInt delta0 )
8760 //.. {
8761 //.. Int len;
8762 //.. IRTemp addr;
8763 //.. UChar rm = getUChar(delta0);
8764 //.. HChar dis_buf[50];
8765 //..
8766 //.. vassert(sz == 2 || sz == 4);
8767 //..
8768 //.. if (epartIsReg(rm)) {
8769 //.. if (sz == 4)
8770 //.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm))));
8771 //.. else
8772 //.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm)));
8773 //..
8774 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
8775 //.. return 1+delta0;
8776 //.. } else {
8777 //.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8778 //.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) );
8779 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf);
8780 //.. return len+delta0;
8781 //.. }
8782 //.. }
8784 /* Handle move instructions of the form
8785 mov S, E meaning
8786 mov sreg, reg-or-mem
8787 Is passed the a ptr to the modRM byte, and the data size. Returns
8788 the address advanced completely over this instruction.
8790 VEX does not currently simulate segment registers on AMD64 which means that
8791 instead of moving a value of a segment register, zero is moved to the
8792 destination. The zero value represents a null (unused) selector. This is
8793 not correct (especially for the %cs, %fs and %gs registers) but it seems to
8794 provide a sufficient simulation for currently seen programs that use this
8795 instruction. If some program actually decides to use the obtained segment
8796 selector for something meaningful then the zero value should be a clear
8797 indicator that there is some problem.
8799 S(src) is sreg.
8800 E(dst) is reg-or-mem
8802 If E is reg, --> PUT $0, %E
8804 If E is mem, --> (getAddr E) -> tmpa
8805 ST $0, (tmpa)
8807 static
8808 ULong dis_mov_S_E ( const VexAbiInfo* vbi,
8809 Prefix pfx,
8810 Int size,
8811 Long delta0 )
8813 Int len;
8814 UChar rm = getUChar(delta0);
8815 HChar dis_buf[50];
8817 if (epartIsReg(rm)) {
8818 putIRegE(size, pfx, rm, mkU(szToITy(size), 0));
8819 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx, rm)),
8820 nameIRegE(size, pfx, rm));
8821 return 1+delta0;
8824 /* E refers to memory */
8826 IRTemp addr = disAMode(&len, vbi, pfx, delta0, dis_buf, 0);
8827 storeLE(mkexpr(addr), mkU16(0));
8828 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx, rm)),
8829 dis_buf);
8830 return len+delta0;
8834 //.. static
8835 //.. void dis_push_segreg ( UInt sreg, Int sz )
8836 //.. {
8837 //.. IRTemp t1 = newTemp(Ity_I16);
8838 //.. IRTemp ta = newTemp(Ity_I32);
8839 //.. vassert(sz == 2 || sz == 4);
8840 //..
8841 //.. assign( t1, getSReg(sreg) );
8842 //.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
8843 //.. putIReg(4, R_ESP, mkexpr(ta));
8844 //.. storeLE( mkexpr(ta), mkexpr(t1) );
8845 //..
8846 //.. DIP("pushw %s\n", nameSReg(sreg));
8847 //.. }
8848 //..
8849 //.. static
8850 //.. void dis_pop_segreg ( UInt sreg, Int sz )
8851 //.. {
8852 //.. IRTemp t1 = newTemp(Ity_I16);
8853 //.. IRTemp ta = newTemp(Ity_I32);
8854 //.. vassert(sz == 2 || sz == 4);
8855 //..
8856 //.. assign( ta, getIReg(4, R_ESP) );
8857 //.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) );
8858 //..
8859 //.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) );
8860 //.. putSReg( sreg, mkexpr(t1) );
8861 //.. DIP("pop %s\n", nameSReg(sreg));
8862 //.. }
8864 static
8865 void dis_ret ( /*MOD*/DisResult* dres, const VexAbiInfo* vbi, ULong d64 )
8867 IRTemp t1 = newTemp(Ity_I64);
8868 IRTemp t2 = newTemp(Ity_I64);
8869 IRTemp t3 = newTemp(Ity_I64);
8870 assign(t1, getIReg64(R_RSP));
8871 assign(t2, loadLE(Ity_I64,mkexpr(t1)));
8872 assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64)));
8873 putIReg64(R_RSP, mkexpr(t3));
8874 make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret");
8875 jmp_treg(dres, Ijk_Ret, t2);
8876 vassert(dres->whatNext == Dis_StopHere);
8880 /*------------------------------------------------------------*/
8881 /*--- SSE/SSE2/SSE3 helpers ---*/
8882 /*------------------------------------------------------------*/
8884 /* Indicates whether the op requires a rounding-mode argument. Note
8885 that this covers only vector floating point arithmetic ops, and
8886 omits the scalar ones that need rounding modes. Note also that
8887 inconsistencies here will get picked up later by the IR sanity
8888 checker, so this isn't correctness-critical. */
8889 static Bool requiresRMode ( IROp op )
8891 switch (op) {
8892 /* 128 bit ops */
8893 case Iop_Add32Fx4: case Iop_Sub32Fx4:
8894 case Iop_Mul32Fx4: case Iop_Div32Fx4:
8895 case Iop_Add64Fx2: case Iop_Sub64Fx2:
8896 case Iop_Mul64Fx2: case Iop_Div64Fx2:
8897 /* 256 bit ops */
8898 case Iop_Add32Fx8: case Iop_Sub32Fx8:
8899 case Iop_Mul32Fx8: case Iop_Div32Fx8:
8900 case Iop_Add64Fx4: case Iop_Sub64Fx4:
8901 case Iop_Mul64Fx4: case Iop_Div64Fx4:
8902 return True;
8903 default:
8904 break;
8906 return False;
8910 /* Worker function; do not call directly.
8911 Handles full width G = G `op` E and G = (not G) `op` E.
8914 static ULong dis_SSE_E_to_G_all_wrk (
8915 const VexAbiInfo* vbi,
8916 Prefix pfx, Long delta,
8917 const HChar* opname, IROp op,
8918 Bool invertG
8921 HChar dis_buf[50];
8922 Int alen;
8923 IRTemp addr;
8924 UChar rm = getUChar(delta);
8925 Bool needsRMode = requiresRMode(op);
8926 IRExpr* gpart
8927 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRexRM(pfx,rm)))
8928 : getXMMReg(gregOfRexRM(pfx,rm));
8929 if (epartIsReg(rm)) {
8930 putXMMReg(
8931 gregOfRexRM(pfx,rm),
8932 needsRMode
8933 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
8934 gpart,
8935 getXMMReg(eregOfRexRM(pfx,rm)))
8936 : binop(op, gpart,
8937 getXMMReg(eregOfRexRM(pfx,rm)))
8939 DIP("%s %s,%s\n", opname,
8940 nameXMMReg(eregOfRexRM(pfx,rm)),
8941 nameXMMReg(gregOfRexRM(pfx,rm)) );
8942 return delta+1;
8943 } else {
8944 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
8945 putXMMReg(
8946 gregOfRexRM(pfx,rm),
8947 needsRMode
8948 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
8949 gpart,
8950 loadLE(Ity_V128, mkexpr(addr)))
8951 : binop(op, gpart,
8952 loadLE(Ity_V128, mkexpr(addr)))
8954 DIP("%s %s,%s\n", opname,
8955 dis_buf,
8956 nameXMMReg(gregOfRexRM(pfx,rm)) );
8957 return delta+alen;
8962 /* All lanes SSE binary operation, G = G `op` E. */
8964 static
8965 ULong dis_SSE_E_to_G_all ( const VexAbiInfo* vbi,
8966 Prefix pfx, Long delta,
8967 const HChar* opname, IROp op )
8969 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, False );
8972 /* All lanes SSE binary operation, G = (not G) `op` E. */
8974 static
8975 ULong dis_SSE_E_to_G_all_invG ( const VexAbiInfo* vbi,
8976 Prefix pfx, Long delta,
8977 const HChar* opname, IROp op )
8979 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, True );
8983 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
8985 static ULong dis_SSE_E_to_G_lo32 ( const VexAbiInfo* vbi,
8986 Prefix pfx, Long delta,
8987 const HChar* opname, IROp op )
8989 HChar dis_buf[50];
8990 Int alen;
8991 IRTemp addr;
8992 UChar rm = getUChar(delta);
8993 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
8994 if (epartIsReg(rm)) {
8995 putXMMReg( gregOfRexRM(pfx,rm),
8996 binop(op, gpart,
8997 getXMMReg(eregOfRexRM(pfx,rm))) );
8998 DIP("%s %s,%s\n", opname,
8999 nameXMMReg(eregOfRexRM(pfx,rm)),
9000 nameXMMReg(gregOfRexRM(pfx,rm)) );
9001 return delta+1;
9002 } else {
9003 /* We can only do a 32-bit memory read, so the upper 3/4 of the
9004 E operand needs to be made simply of zeroes. */
9005 IRTemp epart = newTemp(Ity_V128);
9006 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9007 assign( epart, unop( Iop_32UtoV128,
9008 loadLE(Ity_I32, mkexpr(addr))) );
9009 putXMMReg( gregOfRexRM(pfx,rm),
9010 binop(op, gpart, mkexpr(epart)) );
9011 DIP("%s %s,%s\n", opname,
9012 dis_buf,
9013 nameXMMReg(gregOfRexRM(pfx,rm)) );
9014 return delta+alen;
9019 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
9021 static ULong dis_SSE_E_to_G_lo64 ( const VexAbiInfo* vbi,
9022 Prefix pfx, Long delta,
9023 const HChar* opname, IROp op )
9025 HChar dis_buf[50];
9026 Int alen;
9027 IRTemp addr;
9028 UChar rm = getUChar(delta);
9029 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
9030 if (epartIsReg(rm)) {
9031 putXMMReg( gregOfRexRM(pfx,rm),
9032 binop(op, gpart,
9033 getXMMReg(eregOfRexRM(pfx,rm))) );
9034 DIP("%s %s,%s\n", opname,
9035 nameXMMReg(eregOfRexRM(pfx,rm)),
9036 nameXMMReg(gregOfRexRM(pfx,rm)) );
9037 return delta+1;
9038 } else {
9039 /* We can only do a 64-bit memory read, so the upper half of the
9040 E operand needs to be made simply of zeroes. */
9041 IRTemp epart = newTemp(Ity_V128);
9042 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9043 assign( epart, unop( Iop_64UtoV128,
9044 loadLE(Ity_I64, mkexpr(addr))) );
9045 putXMMReg( gregOfRexRM(pfx,rm),
9046 binop(op, gpart, mkexpr(epart)) );
9047 DIP("%s %s,%s\n", opname,
9048 dis_buf,
9049 nameXMMReg(gregOfRexRM(pfx,rm)) );
9050 return delta+alen;
9055 /* All lanes unary SSE operation, G = op(E). */
9057 static ULong dis_SSE_E_to_G_unary_all (
9058 const VexAbiInfo* vbi,
9059 Prefix pfx, Long delta,
9060 const HChar* opname, IROp op
9063 HChar dis_buf[50];
9064 Int alen;
9065 IRTemp addr;
9066 UChar rm = getUChar(delta);
9067 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
9068 // up in the usual way.
9069 Bool needsIRRM = op == Iop_Sqrt32Fx4 || op == Iop_Sqrt64Fx2;
9070 if (epartIsReg(rm)) {
9071 IRExpr* src = getXMMReg(eregOfRexRM(pfx,rm));
9072 /* XXXROUNDINGFIXME */
9073 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src)
9074 : unop(op, src);
9075 putXMMReg( gregOfRexRM(pfx,rm), res );
9076 DIP("%s %s,%s\n", opname,
9077 nameXMMReg(eregOfRexRM(pfx,rm)),
9078 nameXMMReg(gregOfRexRM(pfx,rm)) );
9079 return delta+1;
9080 } else {
9081 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9082 IRExpr* src = loadLE(Ity_V128, mkexpr(addr));
9083 /* XXXROUNDINGFIXME */
9084 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src)
9085 : unop(op, src);
9086 putXMMReg( gregOfRexRM(pfx,rm), res );
9087 DIP("%s %s,%s\n", opname,
9088 dis_buf,
9089 nameXMMReg(gregOfRexRM(pfx,rm)) );
9090 return delta+alen;
9095 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */
9097 static ULong dis_SSE_E_to_G_unary_lo32 (
9098 const VexAbiInfo* vbi,
9099 Prefix pfx, Long delta,
9100 const HChar* opname, IROp op
9103 /* First we need to get the old G value and patch the low 32 bits
9104 of the E operand into it. Then apply op and write back to G. */
9105 HChar dis_buf[50];
9106 Int alen;
9107 IRTemp addr;
9108 UChar rm = getUChar(delta);
9109 IRTemp oldG0 = newTemp(Ity_V128);
9110 IRTemp oldG1 = newTemp(Ity_V128);
9112 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
9114 if (epartIsReg(rm)) {
9115 assign( oldG1,
9116 binop( Iop_SetV128lo32,
9117 mkexpr(oldG0),
9118 getXMMRegLane32(eregOfRexRM(pfx,rm), 0)) );
9119 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
9120 DIP("%s %s,%s\n", opname,
9121 nameXMMReg(eregOfRexRM(pfx,rm)),
9122 nameXMMReg(gregOfRexRM(pfx,rm)) );
9123 return delta+1;
9124 } else {
9125 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9126 assign( oldG1,
9127 binop( Iop_SetV128lo32,
9128 mkexpr(oldG0),
9129 loadLE(Ity_I32, mkexpr(addr)) ));
9130 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
9131 DIP("%s %s,%s\n", opname,
9132 dis_buf,
9133 nameXMMReg(gregOfRexRM(pfx,rm)) );
9134 return delta+alen;
9139 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */
9141 static ULong dis_SSE_E_to_G_unary_lo64 (
9142 const VexAbiInfo* vbi,
9143 Prefix pfx, Long delta,
9144 const HChar* opname, IROp op
9147 /* First we need to get the old G value and patch the low 64 bits
9148 of the E operand into it. Then apply op and write back to G. */
9149 HChar dis_buf[50];
9150 Int alen;
9151 IRTemp addr;
9152 UChar rm = getUChar(delta);
9153 IRTemp oldG0 = newTemp(Ity_V128);
9154 IRTemp oldG1 = newTemp(Ity_V128);
9156 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
9158 if (epartIsReg(rm)) {
9159 assign( oldG1,
9160 binop( Iop_SetV128lo64,
9161 mkexpr(oldG0),
9162 getXMMRegLane64(eregOfRexRM(pfx,rm), 0)) );
9163 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
9164 DIP("%s %s,%s\n", opname,
9165 nameXMMReg(eregOfRexRM(pfx,rm)),
9166 nameXMMReg(gregOfRexRM(pfx,rm)) );
9167 return delta+1;
9168 } else {
9169 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9170 assign( oldG1,
9171 binop( Iop_SetV128lo64,
9172 mkexpr(oldG0),
9173 loadLE(Ity_I64, mkexpr(addr)) ));
9174 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
9175 DIP("%s %s,%s\n", opname,
9176 dis_buf,
9177 nameXMMReg(gregOfRexRM(pfx,rm)) );
9178 return delta+alen;
9183 /* SSE integer binary operation:
9184 G = G `op` E (eLeft == False)
9185 G = E `op` G (eLeft == True)
9187 static ULong dis_SSEint_E_to_G(
9188 const VexAbiInfo* vbi,
9189 Prefix pfx, Long delta,
9190 const HChar* opname, IROp op,
9191 Bool eLeft
9194 HChar dis_buf[50];
9195 Int alen;
9196 IRTemp addr;
9197 UChar rm = getUChar(delta);
9198 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
9199 IRExpr* epart = NULL;
9200 if (epartIsReg(rm)) {
9201 epart = getXMMReg(eregOfRexRM(pfx,rm));
9202 DIP("%s %s,%s\n", opname,
9203 nameXMMReg(eregOfRexRM(pfx,rm)),
9204 nameXMMReg(gregOfRexRM(pfx,rm)) );
9205 delta += 1;
9206 } else {
9207 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9208 epart = loadLE(Ity_V128, mkexpr(addr));
9209 DIP("%s %s,%s\n", opname,
9210 dis_buf,
9211 nameXMMReg(gregOfRexRM(pfx,rm)) );
9212 delta += alen;
9214 putXMMReg( gregOfRexRM(pfx,rm),
9215 eLeft ? binop(op, epart, gpart)
9216 : binop(op, gpart, epart) );
9217 return delta;
9221 /* Helper for doing SSE FP comparisons. False return ==> unhandled.
9222 This is all a bit of a kludge in that it ignores the subtleties of
9223 ordered-vs-unordered and signalling-vs-nonsignalling in the Intel
9224 spec. The meaning of the outputs is as follows:
9226 preZeroP: the active lanes of both incoming arguments should be set to zero
9227 before performing the operation. IOW the actual args are to be ignored
9228 and instead zero bits are to be used. This is a bit strange but is needed
9229 to make the constant-false/true variants (FALSE_OQ, TRUE_UQ, FALSE_OS,
9230 TRUE_US) work.
9232 preSwapP: the args should be swapped before performing the operation. Note
9233 that zeroing arg input sections (per preZeroP) and swapping them (per
9234 preSwapP) are allowed to happen in either order; the result is the same.
9236 opP: this returns the actual comparison op to perform.
9238 postNotP: if true, the result(ing vector) of the comparison operation should
9239 be bitwise-not-ed. Note that only the lanes of the output actually
9240 computed by opP should be not-ed.
9242 static Bool findSSECmpOp ( /*OUT*/Bool* preZeroP,
9243 /*OUT*/Bool* preSwapP,
9244 /*OUT*/IROp* opP,
9245 /*OUT*/Bool* postNotP,
9246 UInt imm8, Bool all_lanes, Int sz )
9248 vassert(*preZeroP == False);
9249 vassert(*preSwapP == False);
9250 vassert(*opP == Iop_INVALID);
9251 vassert(*postNotP == False);
9253 if (imm8 >= 32) return False;
9255 /* First, compute a (preZero, preSwap, op, postNot) quad from
9256 the supplied imm8. */
9257 Bool preZero = False;
9258 Bool preSwap = False;
9259 IROp op = Iop_INVALID;
9260 Bool postNot = False;
9262 # define XXX(_preZero, _preSwap, _op, _postNot) \
9263 { preZero = _preZero; preSwap = _preSwap; op = _op; postNot = _postNot; }
9264 // If you add a case here, add a corresponding test for both VCMPSD_128
9265 // and VCMPSS_128 in avx-1.c.
9266 // Cases 0xA and above are
9267 // "Enhanced Comparison Predicate[s] for VEX-Encoded [insns]"
9268 switch (imm8) {
9269 // "O" = ordered, "U" = unordered
9270 // "Q" = non-signalling (quiet), "S" = signalling
9272 // replace active arg lanes in operands with zero
9273 // |
9274 // | swap operands before applying the cmp op?
9275 // | |
9276 // | | cmp op invert active lanes after?
9277 // | | | |
9278 // v v v v
9279 case 0x0: XXX(False, False, Iop_CmpEQ32Fx4, False); break; // EQ_OQ
9280 case 0x8: XXX(False, False, Iop_CmpEQ32Fx4, False); break; // EQ_UQ
9281 case 0x10: XXX(False, False, Iop_CmpEQ32Fx4, False); break; // EQ_OS
9282 case 0x18: XXX(False, False, Iop_CmpEQ32Fx4, False); break; // EQ_US
9284 case 0x1: XXX(False, False, Iop_CmpLT32Fx4, False); break; // LT_OS
9285 case 0x11: XXX(False, False, Iop_CmpLT32Fx4, False); break; // LT_OQ
9287 case 0x2: XXX(False, False, Iop_CmpLE32Fx4, False); break; // LE_OS
9288 case 0x12: XXX(False, False, Iop_CmpLE32Fx4, False); break; // LE_OQ
9290 case 0x3: XXX(False, False, Iop_CmpUN32Fx4, False); break; // UNORD_Q
9291 case 0x13: XXX(False, False, Iop_CmpUN32Fx4, False); break; // UNORD_S
9293 // 0xC: this isn't really right because it returns all-1s when
9294 // either operand is a NaN, and it should return all-0s.
9295 case 0x4: XXX(False, False, Iop_CmpEQ32Fx4, True); break; // NEQ_UQ
9296 case 0xC: XXX(False, False, Iop_CmpEQ32Fx4, True); break; // NEQ_OQ
9297 case 0x14: XXX(False, False, Iop_CmpEQ32Fx4, True); break; // NEQ_US
9298 case 0x1C: XXX(False, False, Iop_CmpEQ32Fx4, True); break; // NEQ_OS
9300 case 0x5: XXX(False, False, Iop_CmpLT32Fx4, True); break; // NLT_US
9301 case 0x15: XXX(False, False, Iop_CmpLT32Fx4, True); break; // NLT_UQ
9303 case 0x6: XXX(False, False, Iop_CmpLE32Fx4, True); break; // NLE_US
9304 case 0x16: XXX(False, False, Iop_CmpLE32Fx4, True); break; // NLE_UQ
9306 case 0x7: XXX(False, False, Iop_CmpUN32Fx4, True); break; // ORD_Q
9307 case 0x17: XXX(False, False, Iop_CmpUN32Fx4, True); break; // ORD_S
9309 case 0x9: XXX(False, True, Iop_CmpLE32Fx4, True); break; // NGE_US
9310 case 0x19: XXX(False, True, Iop_CmpLE32Fx4, True); break; // NGE_UQ
9312 case 0xA: XXX(False, True, Iop_CmpLT32Fx4, True); break; // NGT_US
9313 case 0x1A: XXX(False, True, Iop_CmpLT32Fx4, True); break; // NGT_UQ
9315 case 0xD: XXX(False, True, Iop_CmpLE32Fx4, False); break; // GE_OS
9316 case 0x1D: XXX(False, True, Iop_CmpLE32Fx4, False); break; // GE_OQ
9318 case 0xE: XXX(False, True, Iop_CmpLT32Fx4, False); break; // GT_OS
9319 case 0x1E: XXX(False, True, Iop_CmpLT32Fx4, False); break; // GT_OQ
9320 // Constant-value-result ops
9321 case 0xB: XXX(True, False, Iop_CmpEQ32Fx4, True); break; // FALSE_OQ
9322 case 0xF: XXX(True, False, Iop_CmpEQ32Fx4, False); break; // TRUE_UQ
9323 case 0x1B: XXX(True, False, Iop_CmpEQ32Fx4, True); break; // FALSE_OS
9324 case 0x1F: XXX(True, False, Iop_CmpEQ32Fx4, False); break; // TRUE_US
9325 /* Don't forget to add test cases to VCMPSS_128_<imm8> in
9326 avx-1.c if new cases turn up. */
9327 default: break;
9329 # undef XXX
9330 if (op == Iop_INVALID) return False;
9332 /* Now convert the op into one with the same arithmetic but that is
9333 correct for the width and laneage requirements. */
9335 /**/ if (sz == 4 && all_lanes) {
9336 switch (op) {
9337 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32Fx4; break;
9338 case Iop_CmpLT32Fx4: op = Iop_CmpLT32Fx4; break;
9339 case Iop_CmpLE32Fx4: op = Iop_CmpLE32Fx4; break;
9340 case Iop_CmpUN32Fx4: op = Iop_CmpUN32Fx4; break;
9341 default: vassert(0);
9344 else if (sz == 4 && !all_lanes) {
9345 switch (op) {
9346 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32F0x4; break;
9347 case Iop_CmpLT32Fx4: op = Iop_CmpLT32F0x4; break;
9348 case Iop_CmpLE32Fx4: op = Iop_CmpLE32F0x4; break;
9349 case Iop_CmpUN32Fx4: op = Iop_CmpUN32F0x4; break;
9350 default: vassert(0);
9353 else if (sz == 8 && all_lanes) {
9354 switch (op) {
9355 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64Fx2; break;
9356 case Iop_CmpLT32Fx4: op = Iop_CmpLT64Fx2; break;
9357 case Iop_CmpLE32Fx4: op = Iop_CmpLE64Fx2; break;
9358 case Iop_CmpUN32Fx4: op = Iop_CmpUN64Fx2; break;
9359 default: vassert(0);
9362 else if (sz == 8 && !all_lanes) {
9363 switch (op) {
9364 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64F0x2; break;
9365 case Iop_CmpLT32Fx4: op = Iop_CmpLT64F0x2; break;
9366 case Iop_CmpLE32Fx4: op = Iop_CmpLE64F0x2; break;
9367 case Iop_CmpUN32Fx4: op = Iop_CmpUN64F0x2; break;
9368 default: vassert(0);
9371 else {
9372 vpanic("findSSECmpOp(amd64,guest)");
9375 if (preZero) {
9376 // In this case, preSwap is irrelevant, but assert anyway.
9377 vassert(preSwap == False);
9379 *preZeroP = preZero; *preSwapP = preSwap; *opP = op; *postNotP = postNot;
9380 return True;
9384 /* Handles SSE 32F/64F comparisons. It can fail, in which case it
9385 returns the original delta to indicate failure. */
9387 static Long dis_SSE_cmp_E_to_G ( const VexAbiInfo* vbi,
9388 Prefix pfx, Long delta,
9389 const HChar* opname, Bool all_lanes, Int sz )
9391 Long delta0 = delta;
9392 HChar dis_buf[50];
9393 Int alen;
9394 UInt imm8;
9395 IRTemp addr;
9396 Bool preZero = False;
9397 Bool preSwap = False;
9398 IROp op = Iop_INVALID;
9399 Bool postNot = False;
9400 IRTemp plain = newTemp(Ity_V128);
9401 UChar rm = getUChar(delta);
9402 UShort mask = 0;
9403 vassert(sz == 4 || sz == 8);
9404 if (epartIsReg(rm)) {
9405 imm8 = getUChar(delta+1);
9406 if (imm8 >= 8) return delta0; /* FAIL */
9407 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot,
9408 imm8, all_lanes, sz);
9409 if (!ok) return delta0; /* FAIL */
9410 vassert(!preZero); /* never needed for imm8 < 8 */
9411 vassert(!preSwap); /* never needed for imm8 < 8 */
9412 assign( plain, binop(op, getXMMReg(gregOfRexRM(pfx,rm)),
9413 getXMMReg(eregOfRexRM(pfx,rm))) );
9414 delta += 2;
9415 DIP("%s $%u,%s,%s\n", opname,
9416 imm8,
9417 nameXMMReg(eregOfRexRM(pfx,rm)),
9418 nameXMMReg(gregOfRexRM(pfx,rm)) );
9419 } else {
9420 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
9421 imm8 = getUChar(delta+alen);
9422 if (imm8 >= 8) return delta0; /* FAIL */
9423 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot,
9424 imm8, all_lanes, sz);
9425 if (!ok) return delta0; /* FAIL */
9426 vassert(!preZero); /* never needed for imm8 < 8 */
9427 vassert(!preSwap); /* never needed for imm8 < 8 */
9428 assign( plain,
9429 binop(
9431 getXMMReg(gregOfRexRM(pfx,rm)),
9432 all_lanes
9433 ? loadLE(Ity_V128, mkexpr(addr))
9434 : sz == 8
9435 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
9436 : /*sz==4*/
9437 unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr)))
9440 delta += alen+1;
9441 DIP("%s $%u,%s,%s\n", opname,
9442 imm8,
9443 dis_buf,
9444 nameXMMReg(gregOfRexRM(pfx,rm)) );
9447 if (postNot && all_lanes) {
9448 putXMMReg( gregOfRexRM(pfx,rm),
9449 unop(Iop_NotV128, mkexpr(plain)) );
9451 else
9452 if (postNot && !all_lanes) {
9453 mask = toUShort(sz==4 ? 0x000F : 0x00FF);
9454 putXMMReg( gregOfRexRM(pfx,rm),
9455 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) );
9457 else {
9458 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(plain) );
9461 return delta;
9465 /* Vector by scalar shift of G by the amount specified at the bottom
9466 of E. */
9468 static ULong dis_SSE_shiftG_byE ( const VexAbiInfo* vbi,
9469 Prefix pfx, Long delta,
9470 const HChar* opname, IROp op )
9472 HChar dis_buf[50];
9473 Int alen, size;
9474 IRTemp addr;
9475 Bool shl, shr, sar;
9476 UChar rm = getUChar(delta);
9477 IRTemp g0 = newTemp(Ity_V128);
9478 IRTemp g1 = newTemp(Ity_V128);
9479 IRTemp amt = newTemp(Ity_I64);
9480 IRTemp amt8 = newTemp(Ity_I8);
9481 if (epartIsReg(rm)) {
9482 assign( amt, getXMMRegLane64(eregOfRexRM(pfx,rm), 0) );
9483 DIP("%s %s,%s\n", opname,
9484 nameXMMReg(eregOfRexRM(pfx,rm)),
9485 nameXMMReg(gregOfRexRM(pfx,rm)) );
9486 delta++;
9487 } else {
9488 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9489 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
9490 DIP("%s %s,%s\n", opname,
9491 dis_buf,
9492 nameXMMReg(gregOfRexRM(pfx,rm)) );
9493 delta += alen;
9495 assign( g0, getXMMReg(gregOfRexRM(pfx,rm)) );
9496 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
9498 shl = shr = sar = False;
9499 size = 0;
9500 switch (op) {
9501 case Iop_ShlN16x8: shl = True; size = 32; break;
9502 case Iop_ShlN32x4: shl = True; size = 32; break;
9503 case Iop_ShlN64x2: shl = True; size = 64; break;
9504 case Iop_SarN16x8: sar = True; size = 16; break;
9505 case Iop_SarN32x4: sar = True; size = 32; break;
9506 case Iop_ShrN16x8: shr = True; size = 16; break;
9507 case Iop_ShrN32x4: shr = True; size = 32; break;
9508 case Iop_ShrN64x2: shr = True; size = 64; break;
9509 default: vassert(0);
9512 if (shl || shr) {
9513 assign(
9515 IRExpr_ITE(
9516 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
9517 binop(op, mkexpr(g0), mkexpr(amt8)),
9518 mkV128(0x0000)
9521 } else
9522 if (sar) {
9523 assign(
9525 IRExpr_ITE(
9526 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
9527 binop(op, mkexpr(g0), mkexpr(amt8)),
9528 binop(op, mkexpr(g0), mkU8(size-1))
9531 } else {
9532 vassert(0);
9535 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(g1) );
9536 return delta;
9540 /* Vector by scalar shift of E by an immediate byte. */
9542 static
9543 ULong dis_SSE_shiftE_imm ( Prefix pfx,
9544 Long delta, const HChar* opname, IROp op )
9546 Bool shl, shr, sar;
9547 UChar rm = getUChar(delta);
9548 IRTemp e0 = newTemp(Ity_V128);
9549 IRTemp e1 = newTemp(Ity_V128);
9550 UChar amt, size;
9551 vassert(epartIsReg(rm));
9552 vassert(gregLO3ofRM(rm) == 2
9553 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
9554 amt = getUChar(delta+1);
9555 delta += 2;
9556 DIP("%s $%d,%s\n", opname,
9557 (Int)amt,
9558 nameXMMReg(eregOfRexRM(pfx,rm)) );
9559 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) );
9561 shl = shr = sar = False;
9562 size = 0;
9563 switch (op) {
9564 case Iop_ShlN16x8: shl = True; size = 16; break;
9565 case Iop_ShlN32x4: shl = True; size = 32; break;
9566 case Iop_ShlN64x2: shl = True; size = 64; break;
9567 case Iop_SarN16x8: sar = True; size = 16; break;
9568 case Iop_SarN32x4: sar = True; size = 32; break;
9569 case Iop_ShrN16x8: shr = True; size = 16; break;
9570 case Iop_ShrN32x4: shr = True; size = 32; break;
9571 case Iop_ShrN64x2: shr = True; size = 64; break;
9572 default: vassert(0);
9575 if (shl || shr) {
9576 assign( e1, amt >= size
9577 ? mkV128(0x0000)
9578 : binop(op, mkexpr(e0), mkU8(amt))
9580 } else
9581 if (sar) {
9582 assign( e1, amt >= size
9583 ? binop(op, mkexpr(e0), mkU8(size-1))
9584 : binop(op, mkexpr(e0), mkU8(amt))
9586 } else {
9587 vassert(0);
9590 putXMMReg( eregOfRexRM(pfx,rm), mkexpr(e1) );
9591 return delta;
9595 /* Get the current SSE rounding mode. */
9597 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void )
9599 return
9600 unop( Iop_64to32,
9601 binop( Iop_And64,
9602 IRExpr_Get( OFFB_SSEROUND, Ity_I64 ),
9603 mkU64(3) ));
9606 static void put_sse_roundingmode ( IRExpr* sseround )
9608 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32);
9609 stmt( IRStmt_Put( OFFB_SSEROUND,
9610 unop(Iop_32Uto64,sseround) ) );
9613 /* Break a V128-bit value up into four 32-bit ints. */
9615 static void breakupV128to32s ( IRTemp t128,
9616 /*OUTs*/
9617 IRTemp* t3, IRTemp* t2,
9618 IRTemp* t1, IRTemp* t0 )
9620 IRTemp hi64 = newTemp(Ity_I64);
9621 IRTemp lo64 = newTemp(Ity_I64);
9622 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
9623 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) );
9625 vassert(t0 && *t0 == IRTemp_INVALID);
9626 vassert(t1 && *t1 == IRTemp_INVALID);
9627 vassert(t2 && *t2 == IRTemp_INVALID);
9628 vassert(t3 && *t3 == IRTemp_INVALID);
9630 *t0 = newTemp(Ity_I32);
9631 *t1 = newTemp(Ity_I32);
9632 *t2 = newTemp(Ity_I32);
9633 *t3 = newTemp(Ity_I32);
9634 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) );
9635 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
9636 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) );
9637 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
9640 /* Construct a V128-bit value from four 32-bit ints. */
9642 static IRExpr* mkV128from32s ( IRTemp t3, IRTemp t2,
9643 IRTemp t1, IRTemp t0 )
9645 return
9646 binop( Iop_64HLtoV128,
9647 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
9648 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0))
9652 /* Break a 64-bit value up into four 16-bit ints. */
9654 static void breakup64to16s ( IRTemp t64,
9655 /*OUTs*/
9656 IRTemp* t3, IRTemp* t2,
9657 IRTemp* t1, IRTemp* t0 )
9659 IRTemp hi32 = newTemp(Ity_I32);
9660 IRTemp lo32 = newTemp(Ity_I32);
9661 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) );
9662 assign( lo32, unop(Iop_64to32, mkexpr(t64)) );
9664 vassert(t0 && *t0 == IRTemp_INVALID);
9665 vassert(t1 && *t1 == IRTemp_INVALID);
9666 vassert(t2 && *t2 == IRTemp_INVALID);
9667 vassert(t3 && *t3 == IRTemp_INVALID);
9669 *t0 = newTemp(Ity_I16);
9670 *t1 = newTemp(Ity_I16);
9671 *t2 = newTemp(Ity_I16);
9672 *t3 = newTemp(Ity_I16);
9673 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) );
9674 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) );
9675 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) );
9676 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) );
9679 /* Construct a 64-bit value from four 16-bit ints. */
9681 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2,
9682 IRTemp t1, IRTemp t0 )
9684 return
9685 binop( Iop_32HLto64,
9686 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)),
9687 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0))
9691 /* Break a V256-bit value up into four 64-bit ints. */
9693 static void breakupV256to64s ( IRTemp t256,
9694 /*OUTs*/
9695 IRTemp* t3, IRTemp* t2,
9696 IRTemp* t1, IRTemp* t0 )
9698 vassert(t0 && *t0 == IRTemp_INVALID);
9699 vassert(t1 && *t1 == IRTemp_INVALID);
9700 vassert(t2 && *t2 == IRTemp_INVALID);
9701 vassert(t3 && *t3 == IRTemp_INVALID);
9702 *t0 = newTemp(Ity_I64);
9703 *t1 = newTemp(Ity_I64);
9704 *t2 = newTemp(Ity_I64);
9705 *t3 = newTemp(Ity_I64);
9706 assign( *t0, unop(Iop_V256to64_0, mkexpr(t256)) );
9707 assign( *t1, unop(Iop_V256to64_1, mkexpr(t256)) );
9708 assign( *t2, unop(Iop_V256to64_2, mkexpr(t256)) );
9709 assign( *t3, unop(Iop_V256to64_3, mkexpr(t256)) );
9712 /* Break a V256-bit value up into two V128s. */
9714 static void breakupV256toV128s ( IRTemp t256,
9715 /*OUTs*/
9716 IRTemp* t1, IRTemp* t0 )
9718 vassert(t0 && *t0 == IRTemp_INVALID);
9719 vassert(t1 && *t1 == IRTemp_INVALID);
9720 *t0 = newTemp(Ity_V128);
9721 *t1 = newTemp(Ity_V128);
9722 assign(*t1, unop(Iop_V256toV128_1, mkexpr(t256)));
9723 assign(*t0, unop(Iop_V256toV128_0, mkexpr(t256)));
9726 /* Break a V256-bit value up into eight 32-bit ints. */
9728 static void breakupV256to32s ( IRTemp t256,
9729 /*OUTs*/
9730 IRTemp* t7, IRTemp* t6,
9731 IRTemp* t5, IRTemp* t4,
9732 IRTemp* t3, IRTemp* t2,
9733 IRTemp* t1, IRTemp* t0 )
9735 IRTemp t128_1 = IRTemp_INVALID;
9736 IRTemp t128_0 = IRTemp_INVALID;
9737 breakupV256toV128s( t256, &t128_1, &t128_0 );
9738 breakupV128to32s( t128_1, t7, t6, t5, t4 );
9739 breakupV128to32s( t128_0, t3, t2, t1, t0 );
9742 /* Break a V128-bit value up into two 64-bit ints. */
9744 static void breakupV128to64s ( IRTemp t128,
9745 /*OUTs*/
9746 IRTemp* t1, IRTemp* t0 )
9748 vassert(t0 && *t0 == IRTemp_INVALID);
9749 vassert(t1 && *t1 == IRTemp_INVALID);
9750 *t0 = newTemp(Ity_I64);
9751 *t1 = newTemp(Ity_I64);
9752 assign( *t0, unop(Iop_V128to64, mkexpr(t128)) );
9753 assign( *t1, unop(Iop_V128HIto64, mkexpr(t128)) );
9756 /* Construct a V256-bit value from eight 32-bit ints. */
9758 static IRExpr* mkV256from32s ( IRTemp t7, IRTemp t6,
9759 IRTemp t5, IRTemp t4,
9760 IRTemp t3, IRTemp t2,
9761 IRTemp t1, IRTemp t0 )
9763 return
9764 binop( Iop_V128HLtoV256,
9765 binop( Iop_64HLtoV128,
9766 binop(Iop_32HLto64, mkexpr(t7), mkexpr(t6)),
9767 binop(Iop_32HLto64, mkexpr(t5), mkexpr(t4)) ),
9768 binop( Iop_64HLtoV128,
9769 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
9770 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) )
9774 /* Construct a V256-bit value from four 64-bit ints. */
9776 static IRExpr* mkV256from64s ( IRTemp t3, IRTemp t2,
9777 IRTemp t1, IRTemp t0 )
9779 return
9780 binop( Iop_V128HLtoV256,
9781 binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)),
9782 binop(Iop_64HLtoV128, mkexpr(t1), mkexpr(t0))
9786 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
9787 values (aa,bb), computes, for each of the 4 16-bit lanes:
9789 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
9791 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx )
9793 IRTemp aa = newTemp(Ity_I64);
9794 IRTemp bb = newTemp(Ity_I64);
9795 IRTemp aahi32s = newTemp(Ity_I64);
9796 IRTemp aalo32s = newTemp(Ity_I64);
9797 IRTemp bbhi32s = newTemp(Ity_I64);
9798 IRTemp bblo32s = newTemp(Ity_I64);
9799 IRTemp rHi = newTemp(Ity_I64);
9800 IRTemp rLo = newTemp(Ity_I64);
9801 IRTemp one32x2 = newTemp(Ity_I64);
9802 assign(aa, aax);
9803 assign(bb, bbx);
9804 assign( aahi32s,
9805 binop(Iop_SarN32x2,
9806 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)),
9807 mkU8(16) ));
9808 assign( aalo32s,
9809 binop(Iop_SarN32x2,
9810 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)),
9811 mkU8(16) ));
9812 assign( bbhi32s,
9813 binop(Iop_SarN32x2,
9814 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)),
9815 mkU8(16) ));
9816 assign( bblo32s,
9817 binop(Iop_SarN32x2,
9818 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)),
9819 mkU8(16) ));
9820 assign(one32x2, mkU64( (1ULL << 32) + 1 ));
9821 assign(
9822 rHi,
9823 binop(
9824 Iop_ShrN32x2,
9825 binop(
9826 Iop_Add32x2,
9827 binop(
9828 Iop_ShrN32x2,
9829 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)),
9830 mkU8(14)
9832 mkexpr(one32x2)
9834 mkU8(1)
9837 assign(
9838 rLo,
9839 binop(
9840 Iop_ShrN32x2,
9841 binop(
9842 Iop_Add32x2,
9843 binop(
9844 Iop_ShrN32x2,
9845 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)),
9846 mkU8(14)
9848 mkexpr(one32x2)
9850 mkU8(1)
9853 return
9854 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo));
9857 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
9858 values (aa,bb), computes, for each lane:
9860 if aa_lane < 0 then - bb_lane
9861 else if aa_lane > 0 then bb_lane
9862 else 0
9864 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB )
9866 IRTemp aa = newTemp(Ity_I64);
9867 IRTemp bb = newTemp(Ity_I64);
9868 IRTemp zero = newTemp(Ity_I64);
9869 IRTemp bbNeg = newTemp(Ity_I64);
9870 IRTemp negMask = newTemp(Ity_I64);
9871 IRTemp posMask = newTemp(Ity_I64);
9872 IROp opSub = Iop_INVALID;
9873 IROp opCmpGTS = Iop_INVALID;
9875 switch (laneszB) {
9876 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break;
9877 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break;
9878 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break;
9879 default: vassert(0);
9882 assign( aa, aax );
9883 assign( bb, bbx );
9884 assign( zero, mkU64(0) );
9885 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) );
9886 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) );
9887 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) );
9889 return
9890 binop(Iop_Or64,
9891 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)),
9892 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) );
9897 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
9898 value aa, computes, for each lane
9900 if aa < 0 then -aa else aa
9902 Note that the result is interpreted as unsigned, so that the
9903 absolute value of the most negative signed input can be
9904 represented.
9906 static IRTemp math_PABS_MMX ( IRTemp aa, Int laneszB )
9908 IRTemp res = newTemp(Ity_I64);
9909 IRTemp zero = newTemp(Ity_I64);
9910 IRTemp aaNeg = newTemp(Ity_I64);
9911 IRTemp negMask = newTemp(Ity_I64);
9912 IRTemp posMask = newTemp(Ity_I64);
9913 IROp opSub = Iop_INVALID;
9914 IROp opSarN = Iop_INVALID;
9916 switch (laneszB) {
9917 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break;
9918 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break;
9919 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break;
9920 default: vassert(0);
9923 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) );
9924 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) );
9925 assign( zero, mkU64(0) );
9926 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) );
9927 assign( res,
9928 binop(Iop_Or64,
9929 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)),
9930 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) ));
9931 return res;
9934 /* XMM version of math_PABS_MMX. */
9935 static IRTemp math_PABS_XMM ( IRTemp aa, Int laneszB )
9937 IRTemp res = newTemp(Ity_V128);
9938 IRTemp aaHi = newTemp(Ity_I64);
9939 IRTemp aaLo = newTemp(Ity_I64);
9940 assign(aaHi, unop(Iop_V128HIto64, mkexpr(aa)));
9941 assign(aaLo, unop(Iop_V128to64, mkexpr(aa)));
9942 assign(res, binop(Iop_64HLtoV128,
9943 mkexpr(math_PABS_MMX(aaHi, laneszB)),
9944 mkexpr(math_PABS_MMX(aaLo, laneszB))));
9945 return res;
9948 /* Specialisations of math_PABS_XMM, since there's no easy way to do
9949 partial applications in C :-( */
9950 static IRTemp math_PABS_XMM_pap4 ( IRTemp aa ) {
9951 return math_PABS_XMM(aa, 4);
9954 static IRTemp math_PABS_XMM_pap2 ( IRTemp aa ) {
9955 return math_PABS_XMM(aa, 2);
9958 static IRTemp math_PABS_XMM_pap1 ( IRTemp aa ) {
9959 return math_PABS_XMM(aa, 1);
9962 /* YMM version of math_PABS_XMM. */
9963 static IRTemp math_PABS_YMM ( IRTemp aa, Int laneszB )
9965 IRTemp res = newTemp(Ity_V256);
9966 IRTemp aaHi = IRTemp_INVALID;
9967 IRTemp aaLo = IRTemp_INVALID;
9968 breakupV256toV128s(aa, &aaHi, &aaLo);
9969 assign(res, binop(Iop_V128HLtoV256,
9970 mkexpr(math_PABS_XMM(aaHi, laneszB)),
9971 mkexpr(math_PABS_XMM(aaLo, laneszB))));
9972 return res;
9975 static IRTemp math_PABS_YMM_pap4 ( IRTemp aa ) {
9976 return math_PABS_YMM(aa, 4);
9979 static IRTemp math_PABS_YMM_pap2 ( IRTemp aa ) {
9980 return math_PABS_YMM(aa, 2);
9983 static IRTemp math_PABS_YMM_pap1 ( IRTemp aa ) {
9984 return math_PABS_YMM(aa, 1);
9987 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64,
9988 IRTemp lo64, Long byteShift )
9990 vassert(byteShift >= 1 && byteShift <= 7);
9991 return
9992 binop(Iop_Or64,
9993 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))),
9994 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift))
9998 static IRTemp math_PALIGNR_XMM ( IRTemp sV, IRTemp dV, UInt imm8 )
10000 IRTemp res = newTemp(Ity_V128);
10001 IRTemp sHi = newTemp(Ity_I64);
10002 IRTemp sLo = newTemp(Ity_I64);
10003 IRTemp dHi = newTemp(Ity_I64);
10004 IRTemp dLo = newTemp(Ity_I64);
10005 IRTemp rHi = newTemp(Ity_I64);
10006 IRTemp rLo = newTemp(Ity_I64);
10008 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
10009 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
10010 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
10011 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
10013 if (imm8 == 0) {
10014 assign( rHi, mkexpr(sHi) );
10015 assign( rLo, mkexpr(sLo) );
10017 else if (imm8 >= 1 && imm8 <= 7) {
10018 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, imm8) );
10019 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, imm8) );
10021 else if (imm8 == 8) {
10022 assign( rHi, mkexpr(dLo) );
10023 assign( rLo, mkexpr(sHi) );
10025 else if (imm8 >= 9 && imm8 <= 15) {
10026 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-8) );
10027 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, imm8-8) );
10029 else if (imm8 == 16) {
10030 assign( rHi, mkexpr(dHi) );
10031 assign( rLo, mkexpr(dLo) );
10033 else if (imm8 >= 17 && imm8 <= 23) {
10034 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-16))) );
10035 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-16) );
10037 else if (imm8 == 24) {
10038 assign( rHi, mkU64(0) );
10039 assign( rLo, mkexpr(dHi) );
10041 else if (imm8 >= 25 && imm8 <= 31) {
10042 assign( rHi, mkU64(0) );
10043 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-24))) );
10045 else if (imm8 >= 32 && imm8 <= 255) {
10046 assign( rHi, mkU64(0) );
10047 assign( rLo, mkU64(0) );
10049 else
10050 vassert(0);
10052 assign( res, binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)));
10053 return res;
10057 /* Generate a SIGSEGV followed by a restart of the current instruction
10058 if effective_addr is not 16-aligned. This is required behaviour
10059 for some SSE3 instructions and all 128-bit SSSE3 instructions.
10060 This assumes that guest_RIP_curr_instr is set correctly! */
10061 static
10062 void gen_SEGV_if_not_XX_aligned ( IRTemp effective_addr, ULong mask )
10064 stmt(
10065 IRStmt_Exit(
10066 binop(Iop_CmpNE64,
10067 binop(Iop_And64,mkexpr(effective_addr),mkU64(mask)),
10068 mkU64(0)),
10069 Ijk_SigSEGV,
10070 IRConst_U64(guest_RIP_curr_instr),
10071 OFFB_RIP
10076 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) {
10077 gen_SEGV_if_not_XX_aligned(effective_addr, 16-1);
10080 static void gen_SEGV_if_not_32_aligned ( IRTemp effective_addr ) {
10081 gen_SEGV_if_not_XX_aligned(effective_addr, 32-1);
10084 static void gen_SEGV_if_not_64_aligned ( IRTemp effective_addr ) {
10085 gen_SEGV_if_not_XX_aligned(effective_addr, 64-1);
10088 /* Helper for deciding whether a given insn (starting at the opcode
10089 byte) may validly be used with a LOCK prefix. The following insns
10090 may be used with LOCK when their destination operand is in memory.
10091 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
10093 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
10094 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
10095 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
10096 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
10097 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
10098 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
10099 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
10101 DEC FE /1, FF /1
10102 INC FE /0, FF /0
10104 NEG F6 /3, F7 /3
10105 NOT F6 /2, F7 /2
10107 XCHG 86, 87
10109 BTC 0F BB, 0F BA /7
10110 BTR 0F B3, 0F BA /6
10111 BTS 0F AB, 0F BA /5
10113 CMPXCHG 0F B0, 0F B1
10114 CMPXCHG8B 0F C7 /1
10116 XADD 0F C0, 0F C1
10118 ------------------------------
10120 80 /0 = addb $imm8, rm8
10121 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
10122 82 /0 = addb $imm8, rm8
10123 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
10125 00 = addb r8, rm8
10126 01 = addl r32, rm32 and addw r16, rm16
10128 Same for ADD OR ADC SBB AND SUB XOR
10130 FE /1 = dec rm8
10131 FF /1 = dec rm32 and dec rm16
10133 FE /0 = inc rm8
10134 FF /0 = inc rm32 and inc rm16
10136 F6 /3 = neg rm8
10137 F7 /3 = neg rm32 and neg rm16
10139 F6 /2 = not rm8
10140 F7 /2 = not rm32 and not rm16
10142 0F BB = btcw r16, rm16 and btcl r32, rm32
10143 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
10145 Same for BTS, BTR
10147 static Bool can_be_used_with_LOCK_prefix ( const UChar* opc )
10149 switch (opc[0]) {
10150 case 0x00: case 0x01: case 0x08: case 0x09:
10151 case 0x10: case 0x11: case 0x18: case 0x19:
10152 case 0x20: case 0x21: case 0x28: case 0x29:
10153 case 0x30: case 0x31:
10154 if (!epartIsReg(opc[1]))
10155 return True;
10156 break;
10158 case 0x80: case 0x81: case 0x82: case 0x83:
10159 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6
10160 && !epartIsReg(opc[1]))
10161 return True;
10162 break;
10164 case 0xFE: case 0xFF:
10165 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1
10166 && !epartIsReg(opc[1]))
10167 return True;
10168 break;
10170 case 0xF6: case 0xF7:
10171 if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3
10172 && !epartIsReg(opc[1]))
10173 return True;
10174 break;
10176 case 0x86: case 0x87:
10177 if (!epartIsReg(opc[1]))
10178 return True;
10179 break;
10181 case 0x0F: {
10182 switch (opc[1]) {
10183 case 0xBB: case 0xB3: case 0xAB:
10184 if (!epartIsReg(opc[2]))
10185 return True;
10186 break;
10187 case 0xBA:
10188 if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7
10189 && !epartIsReg(opc[2]))
10190 return True;
10191 break;
10192 case 0xB0: case 0xB1:
10193 if (!epartIsReg(opc[2]))
10194 return True;
10195 break;
10196 case 0xC7:
10197 if (gregLO3ofRM(opc[2]) == 1 && !epartIsReg(opc[2]) )
10198 return True;
10199 break;
10200 case 0xC0: case 0xC1:
10201 if (!epartIsReg(opc[2]))
10202 return True;
10203 break;
10204 default:
10205 break;
10206 } /* switch (opc[1]) */
10207 break;
10210 default:
10211 break;
10212 } /* switch (opc[0]) */
10214 return False;
10218 /*------------------------------------------------------------*/
10219 /*--- ---*/
10220 /*--- Top-level SSE/SSE2: dis_ESC_0F__SSE2 ---*/
10221 /*--- ---*/
10222 /*------------------------------------------------------------*/
10224 static Long dis_COMISD ( const VexAbiInfo* vbi, Prefix pfx,
10225 Long delta, Bool isAvx, UChar opc )
10227 vassert(opc == 0x2F/*COMISD*/ || opc == 0x2E/*UCOMISD*/);
10228 Int alen = 0;
10229 HChar dis_buf[50];
10230 IRTemp argL = newTemp(Ity_F64);
10231 IRTemp argR = newTemp(Ity_F64);
10232 UChar modrm = getUChar(delta);
10233 IRTemp addr = IRTemp_INVALID;
10234 if (epartIsReg(modrm)) {
10235 assign( argR, getXMMRegLane64F( eregOfRexRM(pfx,modrm),
10236 0/*lowest lane*/ ) );
10237 delta += 1;
10238 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "",
10239 opc==0x2E ? "u" : "",
10240 nameXMMReg(eregOfRexRM(pfx,modrm)),
10241 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10242 } else {
10243 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10244 assign( argR, loadLE(Ity_F64, mkexpr(addr)) );
10245 delta += alen;
10246 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "",
10247 opc==0x2E ? "u" : "",
10248 dis_buf,
10249 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10251 assign( argL, getXMMRegLane64F( gregOfRexRM(pfx,modrm),
10252 0/*lowest lane*/ ) );
10254 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
10255 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
10256 stmt( IRStmt_Put(
10257 OFFB_CC_DEP1,
10258 binop( Iop_And64,
10259 unop( Iop_32Uto64,
10260 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)) ),
10261 mkU64(0x45)
10262 )));
10263 return delta;
10267 static Long dis_COMISS ( const VexAbiInfo* vbi, Prefix pfx,
10268 Long delta, Bool isAvx, UChar opc )
10270 vassert(opc == 0x2F/*COMISS*/ || opc == 0x2E/*UCOMISS*/);
10271 Int alen = 0;
10272 HChar dis_buf[50];
10273 IRTemp argL = newTemp(Ity_F32);
10274 IRTemp argR = newTemp(Ity_F32);
10275 UChar modrm = getUChar(delta);
10276 IRTemp addr = IRTemp_INVALID;
10277 if (epartIsReg(modrm)) {
10278 assign( argR, getXMMRegLane32F( eregOfRexRM(pfx,modrm),
10279 0/*lowest lane*/ ) );
10280 delta += 1;
10281 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "",
10282 opc==0x2E ? "u" : "",
10283 nameXMMReg(eregOfRexRM(pfx,modrm)),
10284 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10285 } else {
10286 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10287 assign( argR, loadLE(Ity_F32, mkexpr(addr)) );
10288 delta += alen;
10289 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "",
10290 opc==0x2E ? "u" : "",
10291 dis_buf,
10292 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10294 assign( argL, getXMMRegLane32F( gregOfRexRM(pfx,modrm),
10295 0/*lowest lane*/ ) );
10297 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
10298 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
10299 stmt( IRStmt_Put(
10300 OFFB_CC_DEP1,
10301 binop( Iop_And64,
10302 unop( Iop_32Uto64,
10303 binop(Iop_CmpF64,
10304 unop(Iop_F32toF64,mkexpr(argL)),
10305 unop(Iop_F32toF64,mkexpr(argR)))),
10306 mkU64(0x45)
10307 )));
10308 return delta;
10312 static Long dis_PSHUFD_32x4 ( const VexAbiInfo* vbi, Prefix pfx,
10313 Long delta, Bool writesYmm )
10315 Int order;
10316 Int alen = 0;
10317 HChar dis_buf[50];
10318 IRTemp sV = newTemp(Ity_V128);
10319 UChar modrm = getUChar(delta);
10320 const HChar* strV = writesYmm ? "v" : "";
10321 IRTemp addr = IRTemp_INVALID;
10322 if (epartIsReg(modrm)) {
10323 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
10324 order = (Int)getUChar(delta+1);
10325 delta += 1+1;
10326 DIP("%spshufd $%d,%s,%s\n", strV, order,
10327 nameXMMReg(eregOfRexRM(pfx,modrm)),
10328 nameXMMReg(gregOfRexRM(pfx,modrm)));
10329 } else {
10330 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
10331 1/*byte after the amode*/ );
10332 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10333 order = (Int)getUChar(delta+alen);
10334 delta += alen+1;
10335 DIP("%spshufd $%d,%s,%s\n", strV, order,
10336 dis_buf,
10337 nameXMMReg(gregOfRexRM(pfx,modrm)));
10340 IRTemp s3, s2, s1, s0;
10341 s3 = s2 = s1 = s0 = IRTemp_INVALID;
10342 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
10344 # define SEL(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
10345 IRTemp dV = newTemp(Ity_V128);
10346 assign(dV,
10347 mkV128from32s( SEL((order>>6)&3), SEL((order>>4)&3),
10348 SEL((order>>2)&3), SEL((order>>0)&3) )
10350 # undef SEL
10352 (writesYmm ? putYMMRegLoAndZU : putXMMReg)
10353 (gregOfRexRM(pfx,modrm), mkexpr(dV));
10354 return delta;
10358 static Long dis_PSHUFD_32x8 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
10360 Int order;
10361 Int alen = 0;
10362 HChar dis_buf[50];
10363 IRTemp sV = newTemp(Ity_V256);
10364 UChar modrm = getUChar(delta);
10365 IRTemp addr = IRTemp_INVALID;
10366 UInt rG = gregOfRexRM(pfx,modrm);
10367 if (epartIsReg(modrm)) {
10368 UInt rE = eregOfRexRM(pfx,modrm);
10369 assign( sV, getYMMReg(rE) );
10370 order = (Int)getUChar(delta+1);
10371 delta += 1+1;
10372 DIP("vpshufd $%d,%s,%s\n", order, nameYMMReg(rE), nameYMMReg(rG));
10373 } else {
10374 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
10375 1/*byte after the amode*/ );
10376 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
10377 order = (Int)getUChar(delta+alen);
10378 delta += alen+1;
10379 DIP("vpshufd $%d,%s,%s\n", order, dis_buf, nameYMMReg(rG));
10382 IRTemp s[8];
10383 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID;
10384 breakupV256to32s( sV, &s[7], &s[6], &s[5], &s[4],
10385 &s[3], &s[2], &s[1], &s[0] );
10387 putYMMReg( rG, mkV256from32s( s[4 + ((order>>6)&3)],
10388 s[4 + ((order>>4)&3)],
10389 s[4 + ((order>>2)&3)],
10390 s[4 + ((order>>0)&3)],
10391 s[0 + ((order>>6)&3)],
10392 s[0 + ((order>>4)&3)],
10393 s[0 + ((order>>2)&3)],
10394 s[0 + ((order>>0)&3)] ) );
10395 return delta;
10399 static IRTemp math_PSRLDQ ( IRTemp sV, Int imm )
10401 IRTemp dV = newTemp(Ity_V128);
10402 IRTemp hi64 = newTemp(Ity_I64);
10403 IRTemp lo64 = newTemp(Ity_I64);
10404 IRTemp hi64r = newTemp(Ity_I64);
10405 IRTemp lo64r = newTemp(Ity_I64);
10407 vassert(imm >= 0 && imm <= 255);
10408 if (imm >= 16) {
10409 assign(dV, mkV128(0x0000));
10410 return dV;
10413 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
10414 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
10416 if (imm == 0) {
10417 assign( lo64r, mkexpr(lo64) );
10418 assign( hi64r, mkexpr(hi64) );
10420 else
10421 if (imm == 8) {
10422 assign( hi64r, mkU64(0) );
10423 assign( lo64r, mkexpr(hi64) );
10425 else
10426 if (imm > 8) {
10427 assign( hi64r, mkU64(0) );
10428 assign( lo64r, binop( Iop_Shr64, mkexpr(hi64), mkU8( 8*(imm-8) ) ));
10429 } else {
10430 assign( hi64r, binop( Iop_Shr64, mkexpr(hi64), mkU8(8 * imm) ));
10431 assign( lo64r,
10432 binop( Iop_Or64,
10433 binop(Iop_Shr64, mkexpr(lo64),
10434 mkU8(8 * imm)),
10435 binop(Iop_Shl64, mkexpr(hi64),
10436 mkU8(8 * (8 - imm)) )
10441 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
10442 return dV;
10446 static IRTemp math_PSLLDQ ( IRTemp sV, Int imm )
10448 IRTemp dV = newTemp(Ity_V128);
10449 IRTemp hi64 = newTemp(Ity_I64);
10450 IRTemp lo64 = newTemp(Ity_I64);
10451 IRTemp hi64r = newTemp(Ity_I64);
10452 IRTemp lo64r = newTemp(Ity_I64);
10454 vassert(imm >= 0 && imm <= 255);
10455 if (imm >= 16) {
10456 assign(dV, mkV128(0x0000));
10457 return dV;
10460 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
10461 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
10463 if (imm == 0) {
10464 assign( lo64r, mkexpr(lo64) );
10465 assign( hi64r, mkexpr(hi64) );
10467 else
10468 if (imm == 8) {
10469 assign( lo64r, mkU64(0) );
10470 assign( hi64r, mkexpr(lo64) );
10472 else
10473 if (imm > 8) {
10474 assign( lo64r, mkU64(0) );
10475 assign( hi64r, binop( Iop_Shl64, mkexpr(lo64), mkU8( 8*(imm-8) ) ));
10476 } else {
10477 assign( lo64r, binop( Iop_Shl64, mkexpr(lo64), mkU8(8 * imm) ));
10478 assign( hi64r,
10479 binop( Iop_Or64,
10480 binop(Iop_Shl64, mkexpr(hi64),
10481 mkU8(8 * imm)),
10482 binop(Iop_Shr64, mkexpr(lo64),
10483 mkU8(8 * (8 - imm)) )
10488 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
10489 return dV;
10493 static Long dis_CVTxSD2SI ( const VexAbiInfo* vbi, Prefix pfx,
10494 Long delta, Bool isAvx, UChar opc, Int sz )
10496 vassert(opc == 0x2D/*CVTSD2SI*/ || opc == 0x2C/*CVTTSD2SI*/);
10497 HChar dis_buf[50];
10498 Int alen = 0;
10499 UChar modrm = getUChar(delta);
10500 IRTemp addr = IRTemp_INVALID;
10501 IRTemp rmode = newTemp(Ity_I32);
10502 IRTemp f64lo = newTemp(Ity_F64);
10503 Bool r2zero = toBool(opc == 0x2C);
10505 if (epartIsReg(modrm)) {
10506 delta += 1;
10507 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
10508 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10509 nameXMMReg(eregOfRexRM(pfx,modrm)),
10510 nameIReg(sz, gregOfRexRM(pfx,modrm),
10511 False));
10512 } else {
10513 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10514 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
10515 delta += alen;
10516 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10517 dis_buf,
10518 nameIReg(sz, gregOfRexRM(pfx,modrm),
10519 False));
10522 if (r2zero) {
10523 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10524 } else {
10525 assign( rmode, get_sse_roundingmode() );
10528 if (sz == 4) {
10529 putIReg32( gregOfRexRM(pfx,modrm),
10530 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) );
10531 } else {
10532 vassert(sz == 8);
10533 putIReg64( gregOfRexRM(pfx,modrm),
10534 binop( Iop_F64toI64S, mkexpr(rmode), mkexpr(f64lo)) );
10537 return delta;
10541 static Long dis_CVTxSS2SI ( const VexAbiInfo* vbi, Prefix pfx,
10542 Long delta, Bool isAvx, UChar opc, Int sz )
10544 vassert(opc == 0x2D/*CVTSS2SI*/ || opc == 0x2C/*CVTTSS2SI*/);
10545 HChar dis_buf[50];
10546 Int alen = 0;
10547 UChar modrm = getUChar(delta);
10548 IRTemp addr = IRTemp_INVALID;
10549 IRTemp rmode = newTemp(Ity_I32);
10550 IRTemp f32lo = newTemp(Ity_F32);
10551 Bool r2zero = toBool(opc == 0x2C);
10553 if (epartIsReg(modrm)) {
10554 delta += 1;
10555 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
10556 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10557 nameXMMReg(eregOfRexRM(pfx,modrm)),
10558 nameIReg(sz, gregOfRexRM(pfx,modrm),
10559 False));
10560 } else {
10561 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10562 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
10563 delta += alen;
10564 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10565 dis_buf,
10566 nameIReg(sz, gregOfRexRM(pfx,modrm),
10567 False));
10570 if (r2zero) {
10571 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10572 } else {
10573 assign( rmode, get_sse_roundingmode() );
10576 if (sz == 4) {
10577 putIReg32( gregOfRexRM(pfx,modrm),
10578 binop( Iop_F64toI32S,
10579 mkexpr(rmode),
10580 unop(Iop_F32toF64, mkexpr(f32lo))) );
10581 } else {
10582 vassert(sz == 8);
10583 putIReg64( gregOfRexRM(pfx,modrm),
10584 binop( Iop_F64toI64S,
10585 mkexpr(rmode),
10586 unop(Iop_F32toF64, mkexpr(f32lo))) );
10589 return delta;
10593 static Long dis_CVTPS2PD_128 ( const VexAbiInfo* vbi, Prefix pfx,
10594 Long delta, Bool isAvx )
10596 IRTemp addr = IRTemp_INVALID;
10597 Int alen = 0;
10598 HChar dis_buf[50];
10599 IRTemp f32lo = newTemp(Ity_F32);
10600 IRTemp f32hi = newTemp(Ity_F32);
10601 UChar modrm = getUChar(delta);
10602 UInt rG = gregOfRexRM(pfx,modrm);
10603 if (epartIsReg(modrm)) {
10604 UInt rE = eregOfRexRM(pfx,modrm);
10605 assign( f32lo, getXMMRegLane32F(rE, 0) );
10606 assign( f32hi, getXMMRegLane32F(rE, 1) );
10607 delta += 1;
10608 DIP("%scvtps2pd %s,%s\n",
10609 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
10610 } else {
10611 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10612 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) );
10613 assign( f32hi, loadLE(Ity_F32,
10614 binop(Iop_Add64,mkexpr(addr),mkU64(4))) );
10615 delta += alen;
10616 DIP("%scvtps2pd %s,%s\n",
10617 isAvx ? "v" : "", dis_buf, nameXMMReg(rG));
10620 putXMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32hi)) );
10621 putXMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32lo)) );
10622 if (isAvx)
10623 putYMMRegLane128( rG, 1, mkV128(0));
10624 return delta;
10628 static Long dis_CVTPS2PD_256 ( const VexAbiInfo* vbi, Prefix pfx,
10629 Long delta )
10631 IRTemp addr = IRTemp_INVALID;
10632 Int alen = 0;
10633 HChar dis_buf[50];
10634 IRTemp f32_0 = newTemp(Ity_F32);
10635 IRTemp f32_1 = newTemp(Ity_F32);
10636 IRTemp f32_2 = newTemp(Ity_F32);
10637 IRTemp f32_3 = newTemp(Ity_F32);
10638 UChar modrm = getUChar(delta);
10639 UInt rG = gregOfRexRM(pfx,modrm);
10640 if (epartIsReg(modrm)) {
10641 UInt rE = eregOfRexRM(pfx,modrm);
10642 assign( f32_0, getXMMRegLane32F(rE, 0) );
10643 assign( f32_1, getXMMRegLane32F(rE, 1) );
10644 assign( f32_2, getXMMRegLane32F(rE, 2) );
10645 assign( f32_3, getXMMRegLane32F(rE, 3) );
10646 delta += 1;
10647 DIP("vcvtps2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
10648 } else {
10649 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10650 assign( f32_0, loadLE(Ity_F32, mkexpr(addr)) );
10651 assign( f32_1, loadLE(Ity_F32,
10652 binop(Iop_Add64,mkexpr(addr),mkU64(4))) );
10653 assign( f32_2, loadLE(Ity_F32,
10654 binop(Iop_Add64,mkexpr(addr),mkU64(8))) );
10655 assign( f32_3, loadLE(Ity_F32,
10656 binop(Iop_Add64,mkexpr(addr),mkU64(12))) );
10657 delta += alen;
10658 DIP("vcvtps2pd %s,%s\n", dis_buf, nameYMMReg(rG));
10661 putYMMRegLane64F( rG, 3, unop(Iop_F32toF64, mkexpr(f32_3)) );
10662 putYMMRegLane64F( rG, 2, unop(Iop_F32toF64, mkexpr(f32_2)) );
10663 putYMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32_1)) );
10664 putYMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32_0)) );
10665 return delta;
10669 static Long dis_CVTPD2PS_128 ( const VexAbiInfo* vbi, Prefix pfx,
10670 Long delta, Bool isAvx )
10672 IRTemp addr = IRTemp_INVALID;
10673 Int alen = 0;
10674 HChar dis_buf[50];
10675 UChar modrm = getUChar(delta);
10676 UInt rG = gregOfRexRM(pfx,modrm);
10677 IRTemp argV = newTemp(Ity_V128);
10678 IRTemp rmode = newTemp(Ity_I32);
10679 if (epartIsReg(modrm)) {
10680 UInt rE = eregOfRexRM(pfx,modrm);
10681 assign( argV, getXMMReg(rE) );
10682 delta += 1;
10683 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "",
10684 nameXMMReg(rE), nameXMMReg(rG));
10685 } else {
10686 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10687 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10688 delta += alen;
10689 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "",
10690 dis_buf, nameXMMReg(rG) );
10693 assign( rmode, get_sse_roundingmode() );
10694 IRTemp t0 = newTemp(Ity_F64);
10695 IRTemp t1 = newTemp(Ity_F64);
10696 assign( t0, unop(Iop_ReinterpI64asF64,
10697 unop(Iop_V128to64, mkexpr(argV))) );
10698 assign( t1, unop(Iop_ReinterpI64asF64,
10699 unop(Iop_V128HIto64, mkexpr(argV))) );
10701 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), mkexpr(_t) )
10702 putXMMRegLane32( rG, 3, mkU32(0) );
10703 putXMMRegLane32( rG, 2, mkU32(0) );
10704 putXMMRegLane32F( rG, 1, CVT(t1) );
10705 putXMMRegLane32F( rG, 0, CVT(t0) );
10706 # undef CVT
10707 if (isAvx)
10708 putYMMRegLane128( rG, 1, mkV128(0) );
10710 return delta;
10714 static Long dis_CVTxPS2DQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
10715 Long delta, Bool isAvx, Bool r2zero )
10717 IRTemp addr = IRTemp_INVALID;
10718 Int alen = 0;
10719 HChar dis_buf[50];
10720 UChar modrm = getUChar(delta);
10721 IRTemp argV = newTemp(Ity_V128);
10722 IRTemp rmode = newTemp(Ity_I32);
10723 UInt rG = gregOfRexRM(pfx,modrm);
10725 if (epartIsReg(modrm)) {
10726 UInt rE = eregOfRexRM(pfx,modrm);
10727 assign( argV, getXMMReg(rE) );
10728 delta += 1;
10729 DIP("%scvt%sps2dq %s,%s\n",
10730 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG));
10731 } else {
10732 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10733 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10734 delta += alen;
10735 DIP("%scvt%sps2dq %s,%s\n",
10736 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
10739 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO)
10740 : get_sse_roundingmode() );
10741 putXMMReg( rG, binop(Iop_F32toI32Sx4, mkexpr(rmode), mkexpr(argV)) );
10742 if (isAvx)
10743 putYMMRegLane128( rG, 1, mkV128(0) );
10745 return delta;
10749 static Long dis_CVTxPS2DQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
10750 Long delta, Bool r2zero )
10752 IRTemp addr = IRTemp_INVALID;
10753 Int alen = 0;
10754 HChar dis_buf[50];
10755 UChar modrm = getUChar(delta);
10756 IRTemp argV = newTemp(Ity_V256);
10757 IRTemp rmode = newTemp(Ity_I32);
10758 UInt rG = gregOfRexRM(pfx,modrm);
10760 if (epartIsReg(modrm)) {
10761 UInt rE = eregOfRexRM(pfx,modrm);
10762 assign( argV, getYMMReg(rE) );
10763 delta += 1;
10764 DIP("vcvt%sps2dq %s,%s\n",
10765 r2zero ? "t" : "", nameYMMReg(rE), nameYMMReg(rG));
10766 } else {
10767 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10768 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
10769 delta += alen;
10770 DIP("vcvt%sps2dq %s,%s\n",
10771 r2zero ? "t" : "", dis_buf, nameYMMReg(rG) );
10774 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO)
10775 : get_sse_roundingmode() );
10776 putYMMReg( rG, binop(Iop_F32toI32Sx8, mkexpr(rmode), mkexpr(argV)) );
10777 return delta;
10781 static Long dis_CVTxPD2DQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
10782 Long delta, Bool isAvx, Bool r2zero )
10784 IRTemp addr = IRTemp_INVALID;
10785 Int alen = 0;
10786 HChar dis_buf[50];
10787 UChar modrm = getUChar(delta);
10788 IRTemp argV = newTemp(Ity_V128);
10789 IRTemp rmode = newTemp(Ity_I32);
10790 UInt rG = gregOfRexRM(pfx,modrm);
10791 IRTemp t0, t1;
10793 if (epartIsReg(modrm)) {
10794 UInt rE = eregOfRexRM(pfx,modrm);
10795 assign( argV, getXMMReg(rE) );
10796 delta += 1;
10797 DIP("%scvt%spd2dq %s,%s\n",
10798 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG));
10799 } else {
10800 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10801 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10802 delta += alen;
10803 DIP("%scvt%spd2dqx %s,%s\n",
10804 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
10807 if (r2zero) {
10808 assign(rmode, mkU32((UInt)Irrm_ZERO) );
10809 } else {
10810 assign( rmode, get_sse_roundingmode() );
10813 t0 = newTemp(Ity_F64);
10814 t1 = newTemp(Ity_F64);
10815 assign( t0, unop(Iop_ReinterpI64asF64,
10816 unop(Iop_V128to64, mkexpr(argV))) );
10817 assign( t1, unop(Iop_ReinterpI64asF64,
10818 unop(Iop_V128HIto64, mkexpr(argV))) );
10820 # define CVT(_t) binop( Iop_F64toI32S, \
10821 mkexpr(rmode), \
10822 mkexpr(_t) )
10824 putXMMRegLane32( rG, 3, mkU32(0) );
10825 putXMMRegLane32( rG, 2, mkU32(0) );
10826 putXMMRegLane32( rG, 1, CVT(t1) );
10827 putXMMRegLane32( rG, 0, CVT(t0) );
10828 # undef CVT
10829 if (isAvx)
10830 putYMMRegLane128( rG, 1, mkV128(0) );
10832 return delta;
10836 static Long dis_CVTxPD2DQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
10837 Long delta, Bool r2zero )
10839 IRTemp addr = IRTemp_INVALID;
10840 Int alen = 0;
10841 HChar dis_buf[50];
10842 UChar modrm = getUChar(delta);
10843 IRTemp argV = newTemp(Ity_V256);
10844 IRTemp rmode = newTemp(Ity_I32);
10845 UInt rG = gregOfRexRM(pfx,modrm);
10846 IRTemp t0, t1, t2, t3;
10848 if (epartIsReg(modrm)) {
10849 UInt rE = eregOfRexRM(pfx,modrm);
10850 assign( argV, getYMMReg(rE) );
10851 delta += 1;
10852 DIP("vcvt%spd2dq %s,%s\n",
10853 r2zero ? "t" : "", nameYMMReg(rE), nameXMMReg(rG));
10854 } else {
10855 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10856 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
10857 delta += alen;
10858 DIP("vcvt%spd2dqy %s,%s\n",
10859 r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
10862 if (r2zero) {
10863 assign(rmode, mkU32((UInt)Irrm_ZERO) );
10864 } else {
10865 assign( rmode, get_sse_roundingmode() );
10868 t0 = IRTemp_INVALID;
10869 t1 = IRTemp_INVALID;
10870 t2 = IRTemp_INVALID;
10871 t3 = IRTemp_INVALID;
10872 breakupV256to64s( argV, &t3, &t2, &t1, &t0 );
10874 # define CVT(_t) binop( Iop_F64toI32S, \
10875 mkexpr(rmode), \
10876 unop( Iop_ReinterpI64asF64, \
10877 mkexpr(_t) ) )
10879 putXMMRegLane32( rG, 3, CVT(t3) );
10880 putXMMRegLane32( rG, 2, CVT(t2) );
10881 putXMMRegLane32( rG, 1, CVT(t1) );
10882 putXMMRegLane32( rG, 0, CVT(t0) );
10883 # undef CVT
10884 putYMMRegLane128( rG, 1, mkV128(0) );
10886 return delta;
10890 static Long dis_CVTDQ2PS_128 ( const VexAbiInfo* vbi, Prefix pfx,
10891 Long delta, Bool isAvx )
10893 IRTemp addr = IRTemp_INVALID;
10894 Int alen = 0;
10895 HChar dis_buf[50];
10896 UChar modrm = getUChar(delta);
10897 IRTemp argV = newTemp(Ity_V128);
10898 IRTemp rmode = newTemp(Ity_I32);
10899 UInt rG = gregOfRexRM(pfx,modrm);
10901 if (epartIsReg(modrm)) {
10902 UInt rE = eregOfRexRM(pfx,modrm);
10903 assign( argV, getXMMReg(rE) );
10904 delta += 1;
10905 DIP("%scvtdq2ps %s,%s\n",
10906 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
10907 } else {
10908 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10909 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10910 delta += alen;
10911 DIP("%scvtdq2ps %s,%s\n",
10912 isAvx ? "v" : "", dis_buf, nameXMMReg(rG) );
10915 assign( rmode, get_sse_roundingmode() );
10916 putXMMReg(rG, binop(Iop_I32StoF32x4, mkexpr(rmode), mkexpr(argV)));
10918 if (isAvx)
10919 putYMMRegLane128( rG, 1, mkV128(0) );
10921 return delta;
10924 static Long dis_CVTDQ2PS_256 ( const VexAbiInfo* vbi, Prefix pfx,
10925 Long delta )
10927 IRTemp addr = IRTemp_INVALID;
10928 Int alen = 0;
10929 HChar dis_buf[50];
10930 UChar modrm = getUChar(delta);
10931 IRTemp argV = newTemp(Ity_V256);
10932 IRTemp rmode = newTemp(Ity_I32);
10933 UInt rG = gregOfRexRM(pfx,modrm);
10935 if (epartIsReg(modrm)) {
10936 UInt rE = eregOfRexRM(pfx,modrm);
10937 assign( argV, getYMMReg(rE) );
10938 delta += 1;
10939 DIP("vcvtdq2ps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
10940 } else {
10941 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10942 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
10943 delta += alen;
10944 DIP("vcvtdq2ps %s,%s\n", dis_buf, nameYMMReg(rG) );
10947 assign( rmode, get_sse_roundingmode() );
10948 putYMMReg(rG, binop(Iop_I32StoF32x8, mkexpr(rmode), mkexpr(argV)));
10950 return delta;
10954 static Long dis_PMOVMSKB_128 ( const VexAbiInfo* vbi, Prefix pfx,
10955 Long delta, Bool isAvx )
10957 UChar modrm = getUChar(delta);
10958 vassert(epartIsReg(modrm)); /* ensured by caller */
10959 UInt rE = eregOfRexRM(pfx,modrm);
10960 UInt rG = gregOfRexRM(pfx,modrm);
10961 IRTemp t0 = newTemp(Ity_V128);
10962 IRTemp t1 = newTemp(Ity_I32);
10963 assign(t0, getXMMReg(rE));
10964 assign(t1, unop(Iop_16Uto32, unop(Iop_GetMSBs8x16, mkexpr(t0))));
10965 putIReg32(rG, mkexpr(t1));
10966 DIP("%spmovmskb %s,%s\n", isAvx ? "v" : "", nameXMMReg(rE),
10967 nameIReg32(rG));
10968 delta += 1;
10969 return delta;
10973 static Long dis_PMOVMSKB_256 ( const VexAbiInfo* vbi, Prefix pfx,
10974 Long delta )
10976 UChar modrm = getUChar(delta);
10977 vassert(epartIsReg(modrm)); /* ensured by caller */
10978 UInt rE = eregOfRexRM(pfx,modrm);
10979 UInt rG = gregOfRexRM(pfx,modrm);
10980 IRTemp t0 = newTemp(Ity_V128);
10981 IRTemp t1 = newTemp(Ity_V128);
10982 IRTemp t2 = newTemp(Ity_I16);
10983 IRTemp t3 = newTemp(Ity_I16);
10984 assign(t0, getYMMRegLane128(rE, 0));
10985 assign(t1, getYMMRegLane128(rE, 1));
10986 assign(t2, unop(Iop_GetMSBs8x16, mkexpr(t0)));
10987 assign(t3, unop(Iop_GetMSBs8x16, mkexpr(t1)));
10988 putIReg32(rG, binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)));
10989 DIP("vpmovmskb %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
10990 delta += 1;
10991 return delta;
10995 /* FIXME: why not just use InterleaveLO / InterleaveHI? I think the
10996 relevant ops are "xIsH ? InterleaveHI32x4 : InterleaveLO32x4". */
10997 /* Does the maths for 128 bit versions of UNPCKLPS and UNPCKHPS */
10998 static IRTemp math_UNPCKxPS_128 ( IRTemp sV, IRTemp dV, Bool xIsH )
11000 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11001 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11002 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11003 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11004 IRTemp res = newTemp(Ity_V128);
11005 assign(res, xIsH ? mkV128from32s( s3, d3, s2, d2 )
11006 : mkV128from32s( s1, d1, s0, d0 ));
11007 return res;
11011 /* FIXME: why not just use InterleaveLO / InterleaveHI ?? */
11012 /* Does the maths for 128 bit versions of UNPCKLPD and UNPCKHPD */
11013 static IRTemp math_UNPCKxPD_128 ( IRTemp sV, IRTemp dV, Bool xIsH )
11015 IRTemp s1 = newTemp(Ity_I64);
11016 IRTemp s0 = newTemp(Ity_I64);
11017 IRTemp d1 = newTemp(Ity_I64);
11018 IRTemp d0 = newTemp(Ity_I64);
11019 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
11020 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
11021 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
11022 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
11023 IRTemp res = newTemp(Ity_V128);
11024 assign(res, xIsH ? binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1))
11025 : binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)));
11026 return res;
11030 /* Does the maths for 256 bit versions of UNPCKLPD and UNPCKHPD.
11031 Doesn't seem like this fits in either of the Iop_Interleave{LO,HI}
11032 or the Iop_Cat{Odd,Even}Lanes idioms, hence just do it the stupid
11033 way. */
11034 static IRTemp math_UNPCKxPD_256 ( IRTemp sV, IRTemp dV, Bool xIsH )
11036 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11037 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11038 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
11039 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
11040 IRTemp res = newTemp(Ity_V256);
11041 assign(res, xIsH
11042 ? IRExpr_Qop(Iop_64x4toV256, mkexpr(s3), mkexpr(d3),
11043 mkexpr(s1), mkexpr(d1))
11044 : IRExpr_Qop(Iop_64x4toV256, mkexpr(s2), mkexpr(d2),
11045 mkexpr(s0), mkexpr(d0)));
11046 return res;
11050 /* FIXME: this is really bad. Surely can do something better here?
11051 One observation is that the steering in the upper and lower 128 bit
11052 halves is the same as with math_UNPCKxPS_128, so we simply split
11053 into two halves, and use that. Consequently any improvement in
11054 math_UNPCKxPS_128 (probably, to use interleave-style primops)
11055 benefits this too. */
11056 static IRTemp math_UNPCKxPS_256 ( IRTemp sV, IRTemp dV, Bool xIsH )
11058 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11059 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11060 breakupV256toV128s( sV, &sVhi, &sVlo );
11061 breakupV256toV128s( dV, &dVhi, &dVlo );
11062 IRTemp rVhi = math_UNPCKxPS_128(sVhi, dVhi, xIsH);
11063 IRTemp rVlo = math_UNPCKxPS_128(sVlo, dVlo, xIsH);
11064 IRTemp rV = newTemp(Ity_V256);
11065 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11066 return rV;
11070 static IRTemp math_SHUFPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11072 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11073 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11074 vassert(imm8 < 256);
11076 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11077 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11079 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
11080 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11081 IRTemp res = newTemp(Ity_V128);
11082 assign(res,
11083 mkV128from32s( SELS((imm8>>6)&3), SELS((imm8>>4)&3),
11084 SELD((imm8>>2)&3), SELD((imm8>>0)&3) ) );
11085 # undef SELD
11086 # undef SELS
11087 return res;
11091 /* 256-bit SHUFPS appears to steer each of the 128-bit halves
11092 identically. Hence do the clueless thing and use math_SHUFPS_128
11093 twice. */
11094 static IRTemp math_SHUFPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
11096 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11097 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11098 breakupV256toV128s( sV, &sVhi, &sVlo );
11099 breakupV256toV128s( dV, &dVhi, &dVlo );
11100 IRTemp rVhi = math_SHUFPS_128(sVhi, dVhi, imm8);
11101 IRTemp rVlo = math_SHUFPS_128(sVlo, dVlo, imm8);
11102 IRTemp rV = newTemp(Ity_V256);
11103 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11104 return rV;
11108 static IRTemp math_SHUFPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11110 IRTemp s1 = newTemp(Ity_I64);
11111 IRTemp s0 = newTemp(Ity_I64);
11112 IRTemp d1 = newTemp(Ity_I64);
11113 IRTemp d0 = newTemp(Ity_I64);
11115 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
11116 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
11117 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
11118 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
11120 # define SELD(n) mkexpr((n)==0 ? d0 : d1)
11121 # define SELS(n) mkexpr((n)==0 ? s0 : s1)
11123 IRTemp res = newTemp(Ity_V128);
11124 assign(res, binop( Iop_64HLtoV128,
11125 SELS((imm8>>1)&1), SELD((imm8>>0)&1) ) );
11127 # undef SELD
11128 # undef SELS
11129 return res;
11133 static IRTemp math_SHUFPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
11135 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11136 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11137 breakupV256toV128s( sV, &sVhi, &sVlo );
11138 breakupV256toV128s( dV, &dVhi, &dVlo );
11139 IRTemp rVhi = math_SHUFPD_128(sVhi, dVhi, (imm8 >> 2) & 3);
11140 IRTemp rVlo = math_SHUFPD_128(sVlo, dVlo, imm8 & 3);
11141 IRTemp rV = newTemp(Ity_V256);
11142 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11143 return rV;
11147 static IRTemp math_BLENDPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11149 UShort imm8_mask_16;
11150 IRTemp imm8_mask = newTemp(Ity_V128);
11152 switch( imm8 & 3 ) {
11153 case 0: imm8_mask_16 = 0x0000; break;
11154 case 1: imm8_mask_16 = 0x00FF; break;
11155 case 2: imm8_mask_16 = 0xFF00; break;
11156 case 3: imm8_mask_16 = 0xFFFF; break;
11157 default: vassert(0); break;
11159 assign( imm8_mask, mkV128( imm8_mask_16 ) );
11161 IRTemp res = newTemp(Ity_V128);
11162 assign ( res, binop( Iop_OrV128,
11163 binop( Iop_AndV128, mkexpr(sV),
11164 mkexpr(imm8_mask) ),
11165 binop( Iop_AndV128, mkexpr(dV),
11166 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) );
11167 return res;
11171 static IRTemp math_BLENDPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
11173 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11174 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11175 breakupV256toV128s( sV, &sVhi, &sVlo );
11176 breakupV256toV128s( dV, &dVhi, &dVlo );
11177 IRTemp rVhi = math_BLENDPD_128(sVhi, dVhi, (imm8 >> 2) & 3);
11178 IRTemp rVlo = math_BLENDPD_128(sVlo, dVlo, imm8 & 3);
11179 IRTemp rV = newTemp(Ity_V256);
11180 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11181 return rV;
11185 static IRTemp math_BLENDPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11187 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
11188 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
11189 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
11190 0xFFFF };
11191 IRTemp imm8_mask = newTemp(Ity_V128);
11192 assign( imm8_mask, mkV128( imm8_perms[ (imm8 & 15) ] ) );
11194 IRTemp res = newTemp(Ity_V128);
11195 assign ( res, binop( Iop_OrV128,
11196 binop( Iop_AndV128, mkexpr(sV),
11197 mkexpr(imm8_mask) ),
11198 binop( Iop_AndV128, mkexpr(dV),
11199 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) );
11200 return res;
11204 static IRTemp math_BLENDPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
11206 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11207 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11208 breakupV256toV128s( sV, &sVhi, &sVlo );
11209 breakupV256toV128s( dV, &dVhi, &dVlo );
11210 IRTemp rVhi = math_BLENDPS_128(sVhi, dVhi, (imm8 >> 4) & 15);
11211 IRTemp rVlo = math_BLENDPS_128(sVlo, dVlo, imm8 & 15);
11212 IRTemp rV = newTemp(Ity_V256);
11213 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11214 return rV;
11218 static IRTemp math_PBLENDW_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11220 /* Make w be a 16-bit version of imm8, formed by duplicating each
11221 bit in imm8. */
11222 Int i;
11223 UShort imm16 = 0;
11224 for (i = 0; i < 8; i++) {
11225 if (imm8 & (1 << i))
11226 imm16 |= (3 << (2*i));
11228 IRTemp imm16_mask = newTemp(Ity_V128);
11229 assign( imm16_mask, mkV128( imm16 ));
11231 IRTemp res = newTemp(Ity_V128);
11232 assign ( res, binop( Iop_OrV128,
11233 binop( Iop_AndV128, mkexpr(sV),
11234 mkexpr(imm16_mask) ),
11235 binop( Iop_AndV128, mkexpr(dV),
11236 unop( Iop_NotV128, mkexpr(imm16_mask) ) ) ) );
11237 return res;
11241 static IRTemp math_PMULUDQ_128 ( IRTemp sV, IRTemp dV )
11243 /* This is a really poor translation -- could be improved if
11244 performance critical */
11245 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11246 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11247 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11248 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11249 IRTemp res = newTemp(Ity_V128);
11250 assign(res, binop(Iop_64HLtoV128,
11251 binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)),
11252 binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) ));
11253 return res;
11257 static IRTemp math_PMULUDQ_256 ( IRTemp sV, IRTemp dV )
11259 /* This is a really poor translation -- could be improved if
11260 performance critical */
11261 IRTemp sHi, sLo, dHi, dLo;
11262 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11263 breakupV256toV128s( dV, &dHi, &dLo);
11264 breakupV256toV128s( sV, &sHi, &sLo);
11265 IRTemp res = newTemp(Ity_V256);
11266 assign(res, binop(Iop_V128HLtoV256,
11267 mkexpr(math_PMULUDQ_128(sHi, dHi)),
11268 mkexpr(math_PMULUDQ_128(sLo, dLo))));
11269 return res;
11273 static IRTemp math_PMULDQ_128 ( IRTemp dV, IRTemp sV )
11275 /* This is a really poor translation -- could be improved if
11276 performance critical */
11277 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11278 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11279 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11280 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11281 IRTemp res = newTemp(Ity_V128);
11282 assign(res, binop(Iop_64HLtoV128,
11283 binop( Iop_MullS32, mkexpr(d2), mkexpr(s2)),
11284 binop( Iop_MullS32, mkexpr(d0), mkexpr(s0)) ));
11285 return res;
11289 static IRTemp math_PMULDQ_256 ( IRTemp sV, IRTemp dV )
11291 /* This is a really poor translation -- could be improved if
11292 performance critical */
11293 IRTemp sHi, sLo, dHi, dLo;
11294 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11295 breakupV256toV128s( dV, &dHi, &dLo);
11296 breakupV256toV128s( sV, &sHi, &sLo);
11297 IRTemp res = newTemp(Ity_V256);
11298 assign(res, binop(Iop_V128HLtoV256,
11299 mkexpr(math_PMULDQ_128(sHi, dHi)),
11300 mkexpr(math_PMULDQ_128(sLo, dLo))));
11301 return res;
11305 static IRTemp math_PMADDWD_128 ( IRTemp dV, IRTemp sV )
11307 IRTemp sVhi, sVlo, dVhi, dVlo;
11308 IRTemp resHi = newTemp(Ity_I64);
11309 IRTemp resLo = newTemp(Ity_I64);
11310 sVhi = sVlo = dVhi = dVlo = IRTemp_INVALID;
11311 breakupV128to64s( sV, &sVhi, &sVlo );
11312 breakupV128to64s( dV, &dVhi, &dVlo );
11313 assign( resHi, mkIRExprCCall(Ity_I64, 0/*regparms*/,
11314 "amd64g_calculate_mmx_pmaddwd",
11315 &amd64g_calculate_mmx_pmaddwd,
11316 mkIRExprVec_2( mkexpr(sVhi), mkexpr(dVhi))));
11317 assign( resLo, mkIRExprCCall(Ity_I64, 0/*regparms*/,
11318 "amd64g_calculate_mmx_pmaddwd",
11319 &amd64g_calculate_mmx_pmaddwd,
11320 mkIRExprVec_2( mkexpr(sVlo), mkexpr(dVlo))));
11321 IRTemp res = newTemp(Ity_V128);
11322 assign( res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo))) ;
11323 return res;
11327 static IRTemp math_PMADDWD_256 ( IRTemp dV, IRTemp sV )
11329 IRTemp sHi, sLo, dHi, dLo;
11330 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11331 breakupV256toV128s( dV, &dHi, &dLo);
11332 breakupV256toV128s( sV, &sHi, &sLo);
11333 IRTemp res = newTemp(Ity_V256);
11334 assign(res, binop(Iop_V128HLtoV256,
11335 mkexpr(math_PMADDWD_128(dHi, sHi)),
11336 mkexpr(math_PMADDWD_128(dLo, sLo))));
11337 return res;
11341 static IRTemp math_ADDSUBPD_128 ( IRTemp dV, IRTemp sV )
11343 IRTemp addV = newTemp(Ity_V128);
11344 IRTemp subV = newTemp(Ity_V128);
11345 IRTemp a1 = newTemp(Ity_I64);
11346 IRTemp s0 = newTemp(Ity_I64);
11347 IRTemp rm = newTemp(Ity_I32);
11349 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11350 assign( addV, triop(Iop_Add64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11351 assign( subV, triop(Iop_Sub64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11353 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
11354 assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
11356 IRTemp res = newTemp(Ity_V128);
11357 assign( res, binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) );
11358 return res;
11362 static IRTemp math_ADDSUBPD_256 ( IRTemp dV, IRTemp sV )
11364 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
11365 IRTemp addV = newTemp(Ity_V256);
11366 IRTemp subV = newTemp(Ity_V256);
11367 IRTemp rm = newTemp(Ity_I32);
11368 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11370 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11371 assign( addV, triop(Iop_Add64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11372 assign( subV, triop(Iop_Sub64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11374 breakupV256to64s( addV, &a3, &a2, &a1, &a0 );
11375 breakupV256to64s( subV, &s3, &s2, &s1, &s0 );
11377 IRTemp res = newTemp(Ity_V256);
11378 assign( res, mkV256from64s( a3, s2, a1, s0 ) );
11379 return res;
11383 static IRTemp math_ADDSUBPS_128 ( IRTemp dV, IRTemp sV )
11385 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
11386 IRTemp addV = newTemp(Ity_V128);
11387 IRTemp subV = newTemp(Ity_V128);
11388 IRTemp rm = newTemp(Ity_I32);
11389 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11391 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11392 assign( addV, triop(Iop_Add32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11393 assign( subV, triop(Iop_Sub32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11395 breakupV128to32s( addV, &a3, &a2, &a1, &a0 );
11396 breakupV128to32s( subV, &s3, &s2, &s1, &s0 );
11398 IRTemp res = newTemp(Ity_V128);
11399 assign( res, mkV128from32s( a3, s2, a1, s0 ) );
11400 return res;
11404 static IRTemp math_ADDSUBPS_256 ( IRTemp dV, IRTemp sV )
11406 IRTemp a7, a6, a5, a4, a3, a2, a1, a0;
11407 IRTemp s7, s6, s5, s4, s3, s2, s1, s0;
11408 IRTemp addV = newTemp(Ity_V256);
11409 IRTemp subV = newTemp(Ity_V256);
11410 IRTemp rm = newTemp(Ity_I32);
11411 a7 = a6 = a5 = a4 = a3 = a2 = a1 = a0 = IRTemp_INVALID;
11412 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11414 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11415 assign( addV, triop(Iop_Add32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11416 assign( subV, triop(Iop_Sub32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11418 breakupV256to32s( addV, &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0 );
11419 breakupV256to32s( subV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
11421 IRTemp res = newTemp(Ity_V256);
11422 assign( res, mkV256from32s( a7, s6, a5, s4, a3, s2, a1, s0 ) );
11423 return res;
11427 /* Handle 128 bit PSHUFLW and PSHUFHW. */
11428 static Long dis_PSHUFxW_128 ( const VexAbiInfo* vbi, Prefix pfx,
11429 Long delta, Bool isAvx, Bool xIsH )
11431 IRTemp addr = IRTemp_INVALID;
11432 Int alen = 0;
11433 HChar dis_buf[50];
11434 UChar modrm = getUChar(delta);
11435 UInt rG = gregOfRexRM(pfx,modrm);
11436 UInt imm8;
11437 IRTemp sVmut, dVmut, sVcon, sV, dV, s3, s2, s1, s0;
11438 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11439 sV = newTemp(Ity_V128);
11440 dV = newTemp(Ity_V128);
11441 sVmut = newTemp(Ity_I64);
11442 dVmut = newTemp(Ity_I64);
11443 sVcon = newTemp(Ity_I64);
11444 if (epartIsReg(modrm)) {
11445 UInt rE = eregOfRexRM(pfx,modrm);
11446 assign( sV, getXMMReg(rE) );
11447 imm8 = (UInt)getUChar(delta+1);
11448 delta += 1+1;
11449 DIP("%spshuf%cw $%u,%s,%s\n",
11450 isAvx ? "v" : "", xIsH ? 'h' : 'l',
11451 imm8, nameXMMReg(rE), nameXMMReg(rG));
11452 } else {
11453 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
11454 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11455 imm8 = (UInt)getUChar(delta+alen);
11456 delta += alen+1;
11457 DIP("%spshuf%cw $%u,%s,%s\n",
11458 isAvx ? "v" : "", xIsH ? 'h' : 'l',
11459 imm8, dis_buf, nameXMMReg(rG));
11462 /* Get the to-be-changed (mut) and unchanging (con) bits of the
11463 source. */
11464 assign( sVmut, unop(xIsH ? Iop_V128HIto64 : Iop_V128to64, mkexpr(sV)) );
11465 assign( sVcon, unop(xIsH ? Iop_V128to64 : Iop_V128HIto64, mkexpr(sV)) );
11467 breakup64to16s( sVmut, &s3, &s2, &s1, &s0 );
11468 # define SEL(n) \
11469 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11470 assign(dVmut, mk64from16s( SEL((imm8>>6)&3), SEL((imm8>>4)&3),
11471 SEL((imm8>>2)&3), SEL((imm8>>0)&3) ));
11472 # undef SEL
11474 assign(dV, xIsH ? binop(Iop_64HLtoV128, mkexpr(dVmut), mkexpr(sVcon))
11475 : binop(Iop_64HLtoV128, mkexpr(sVcon), mkexpr(dVmut)) );
11477 (isAvx ? putYMMRegLoAndZU : putXMMReg)(rG, mkexpr(dV));
11478 return delta;
11482 /* Handle 256 bit PSHUFLW and PSHUFHW. */
11483 static Long dis_PSHUFxW_256 ( const VexAbiInfo* vbi, Prefix pfx,
11484 Long delta, Bool xIsH )
11486 IRTemp addr = IRTemp_INVALID;
11487 Int alen = 0;
11488 HChar dis_buf[50];
11489 UChar modrm = getUChar(delta);
11490 UInt rG = gregOfRexRM(pfx,modrm);
11491 UInt imm8;
11492 IRTemp sV, s[8], sV64[4], dVhi, dVlo;
11493 sV64[3] = sV64[2] = sV64[1] = sV64[0] = IRTemp_INVALID;
11494 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID;
11495 sV = newTemp(Ity_V256);
11496 dVhi = newTemp(Ity_I64);
11497 dVlo = newTemp(Ity_I64);
11498 if (epartIsReg(modrm)) {
11499 UInt rE = eregOfRexRM(pfx,modrm);
11500 assign( sV, getYMMReg(rE) );
11501 imm8 = (UInt)getUChar(delta+1);
11502 delta += 1+1;
11503 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l',
11504 imm8, nameYMMReg(rE), nameYMMReg(rG));
11505 } else {
11506 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
11507 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
11508 imm8 = (UInt)getUChar(delta+alen);
11509 delta += alen+1;
11510 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l',
11511 imm8, dis_buf, nameYMMReg(rG));
11514 breakupV256to64s( sV, &sV64[3], &sV64[2], &sV64[1], &sV64[0] );
11515 breakup64to16s( sV64[xIsH ? 3 : 2], &s[7], &s[6], &s[5], &s[4] );
11516 breakup64to16s( sV64[xIsH ? 1 : 0], &s[3], &s[2], &s[1], &s[0] );
11518 assign( dVhi, mk64from16s( s[4 + ((imm8>>6)&3)], s[4 + ((imm8>>4)&3)],
11519 s[4 + ((imm8>>2)&3)], s[4 + ((imm8>>0)&3)] ) );
11520 assign( dVlo, mk64from16s( s[0 + ((imm8>>6)&3)], s[0 + ((imm8>>4)&3)],
11521 s[0 + ((imm8>>2)&3)], s[0 + ((imm8>>0)&3)] ) );
11522 putYMMReg( rG, mkV256from64s( xIsH ? dVhi : sV64[3],
11523 xIsH ? sV64[2] : dVhi,
11524 xIsH ? dVlo : sV64[1],
11525 xIsH ? sV64[0] : dVlo ) );
11526 return delta;
11530 static Long dis_PEXTRW_128_EregOnly_toG ( const VexAbiInfo* vbi, Prefix pfx,
11531 Long delta, Bool isAvx )
11533 Long deltaIN = delta;
11534 UChar modrm = getUChar(delta);
11535 UInt rG = gregOfRexRM(pfx,modrm);
11536 IRTemp sV = newTemp(Ity_V128);
11537 IRTemp d16 = newTemp(Ity_I16);
11538 UInt imm8;
11539 IRTemp s0, s1, s2, s3;
11540 if (epartIsReg(modrm)) {
11541 UInt rE = eregOfRexRM(pfx,modrm);
11542 assign(sV, getXMMReg(rE));
11543 imm8 = getUChar(delta+1) & 7;
11544 delta += 1+1;
11545 DIP("%spextrw $%u,%s,%s\n", isAvx ? "v" : "",
11546 imm8, nameXMMReg(rE), nameIReg32(rG));
11547 } else {
11548 /* The memory case is disallowed, apparently. */
11549 return deltaIN; /* FAIL */
11551 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11552 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11553 switch (imm8) {
11554 case 0: assign(d16, unop(Iop_32to16, mkexpr(s0))); break;
11555 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(s0))); break;
11556 case 2: assign(d16, unop(Iop_32to16, mkexpr(s1))); break;
11557 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(s1))); break;
11558 case 4: assign(d16, unop(Iop_32to16, mkexpr(s2))); break;
11559 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(s2))); break;
11560 case 6: assign(d16, unop(Iop_32to16, mkexpr(s3))); break;
11561 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(s3))); break;
11562 default: vassert(0);
11564 putIReg32(rG, unop(Iop_16Uto32, mkexpr(d16)));
11565 return delta;
11569 static Long dis_CVTDQ2PD_128 ( const VexAbiInfo* vbi, Prefix pfx,
11570 Long delta, Bool isAvx )
11572 IRTemp addr = IRTemp_INVALID;
11573 Int alen = 0;
11574 HChar dis_buf[50];
11575 UChar modrm = getUChar(delta);
11576 IRTemp arg64 = newTemp(Ity_I64);
11577 UInt rG = gregOfRexRM(pfx,modrm);
11578 const HChar* mbV = isAvx ? "v" : "";
11579 if (epartIsReg(modrm)) {
11580 UInt rE = eregOfRexRM(pfx,modrm);
11581 assign( arg64, getXMMRegLane64(rE, 0) );
11582 delta += 1;
11583 DIP("%scvtdq2pd %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG));
11584 } else {
11585 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11586 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
11587 delta += alen;
11588 DIP("%scvtdq2pd %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
11590 putXMMRegLane64F(
11591 rG, 0,
11592 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)))
11594 putXMMRegLane64F(
11595 rG, 1,
11596 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)))
11598 if (isAvx)
11599 putYMMRegLane128(rG, 1, mkV128(0));
11600 return delta;
11604 static Long dis_STMXCSR ( const VexAbiInfo* vbi, Prefix pfx,
11605 Long delta, Bool isAvx )
11607 IRTemp addr = IRTemp_INVALID;
11608 Int alen = 0;
11609 HChar dis_buf[50];
11610 UChar modrm = getUChar(delta);
11611 vassert(!epartIsReg(modrm)); /* ensured by caller */
11612 vassert(gregOfRexRM(pfx,modrm) == 3); /* ditto */
11614 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11615 delta += alen;
11617 /* Fake up a native SSE mxcsr word. The only thing it depends on
11618 is SSEROUND[1:0], so call a clean helper to cook it up.
11620 /* ULong amd64h_create_mxcsr ( ULong sseround ) */
11621 DIP("%sstmxcsr %s\n", isAvx ? "v" : "", dis_buf);
11622 storeLE(
11623 mkexpr(addr),
11624 unop(Iop_64to32,
11625 mkIRExprCCall(
11626 Ity_I64, 0/*regp*/,
11627 "amd64g_create_mxcsr", &amd64g_create_mxcsr,
11628 mkIRExprVec_1( unop(Iop_32Uto64,get_sse_roundingmode()) )
11632 return delta;
11636 static Long dis_LDMXCSR ( const VexAbiInfo* vbi, Prefix pfx,
11637 Long delta, Bool isAvx )
11639 IRTemp addr = IRTemp_INVALID;
11640 Int alen = 0;
11641 HChar dis_buf[50];
11642 UChar modrm = getUChar(delta);
11643 vassert(!epartIsReg(modrm)); /* ensured by caller */
11644 vassert(gregOfRexRM(pfx,modrm) == 2); /* ditto */
11646 IRTemp t64 = newTemp(Ity_I64);
11647 IRTemp ew = newTemp(Ity_I32);
11649 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11650 delta += alen;
11651 DIP("%sldmxcsr %s\n", isAvx ? "v" : "", dis_buf);
11653 /* The only thing we observe in %mxcsr is the rounding mode.
11654 Therefore, pass the 32-bit value (SSE native-format control
11655 word) to a clean helper, getting back a 64-bit value, the
11656 lower half of which is the SSEROUND value to store, and the
11657 upper half of which is the emulation-warning token which may
11658 be generated.
11660 /* ULong amd64h_check_ldmxcsr ( ULong ); */
11661 assign( t64, mkIRExprCCall(
11662 Ity_I64, 0/*regparms*/,
11663 "amd64g_check_ldmxcsr",
11664 &amd64g_check_ldmxcsr,
11665 mkIRExprVec_1(
11666 unop(Iop_32Uto64,
11667 loadLE(Ity_I32, mkexpr(addr))
11673 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) );
11674 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
11675 put_emwarn( mkexpr(ew) );
11676 /* Finally, if an emulation warning was reported, side-exit to
11677 the next insn, reporting the warning, so that Valgrind's
11678 dispatcher sees the warning. */
11679 stmt(
11680 IRStmt_Exit(
11681 binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)),
11682 Ijk_EmWarn,
11683 IRConst_U64(guest_RIP_bbstart+delta),
11684 OFFB_RIP
11687 return delta;
11691 static void gen_XSAVE_SEQUENCE ( IRTemp addr, IRTemp rfbm )
11693 /* ------ rfbm[0] gates the x87 state ------ */
11695 /* Uses dirty helper:
11696 void amd64g_do_XSAVE_COMPONENT_0 ( VexGuestAMD64State*, ULong )
11698 IRDirty* d0 = unsafeIRDirty_0_N (
11699 0/*regparms*/,
11700 "amd64g_dirtyhelper_XSAVE_COMPONENT_0",
11701 &amd64g_dirtyhelper_XSAVE_COMPONENT_0,
11702 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
11704 d0->guard = binop(Iop_CmpEQ64, binop(Iop_And64, mkexpr(rfbm), mkU64(1)),
11705 mkU64(1));
11707 /* Declare we're writing memory. Really, bytes 24 through 31
11708 (MXCSR and MXCSR_MASK) aren't written, but we can't express more
11709 than 1 memory area here, so just mark the whole thing as
11710 written. */
11711 d0->mFx = Ifx_Write;
11712 d0->mAddr = mkexpr(addr);
11713 d0->mSize = 160;
11715 /* declare we're reading guest state */
11716 d0->nFxState = 5;
11717 vex_bzero(&d0->fxState, sizeof(d0->fxState));
11719 d0->fxState[0].fx = Ifx_Read;
11720 d0->fxState[0].offset = OFFB_FTOP;
11721 d0->fxState[0].size = sizeof(UInt);
11723 d0->fxState[1].fx = Ifx_Read;
11724 d0->fxState[1].offset = OFFB_FPREGS;
11725 d0->fxState[1].size = 8 * sizeof(ULong);
11727 d0->fxState[2].fx = Ifx_Read;
11728 d0->fxState[2].offset = OFFB_FPTAGS;
11729 d0->fxState[2].size = 8 * sizeof(UChar);
11731 d0->fxState[3].fx = Ifx_Read;
11732 d0->fxState[3].offset = OFFB_FPROUND;
11733 d0->fxState[3].size = sizeof(ULong);
11735 d0->fxState[4].fx = Ifx_Read;
11736 d0->fxState[4].offset = OFFB_FC3210;
11737 d0->fxState[4].size = sizeof(ULong);
11739 stmt( IRStmt_Dirty(d0) );
11741 /* ------ rfbm[1] gates the SSE state ------ */
11743 IRTemp rfbm_1 = newTemp(Ity_I64);
11744 IRTemp rfbm_1or2 = newTemp(Ity_I64);
11745 assign(rfbm_1, binop(Iop_And64, mkexpr(rfbm), mkU64(2)));
11746 assign(rfbm_1or2, binop(Iop_And64, mkexpr(rfbm), mkU64(6)));
11748 IRExpr* guard_1 = binop(Iop_CmpEQ64, mkexpr(rfbm_1), mkU64(2));
11749 IRExpr* guard_1or2 = binop(Iop_CmpNE64, mkexpr(rfbm_1or2), mkU64(0));
11751 /* Uses dirty helper:
11752 void amd64g_do_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
11753 ( VexGuestAMD64State*, ULong )
11754 This creates only MXCSR and MXCSR_MASK. We need to do this if
11755 either components 1 (SSE) or 2 (AVX) are requested. Hence the
11756 guard condition is a bit more complex.
11758 IRDirty* d1 = unsafeIRDirty_0_N (
11759 0/*regparms*/,
11760 "amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS",
11761 &amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS,
11762 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
11764 d1->guard = guard_1or2;
11766 /* Declare we're writing memory: MXCSR and MXCSR_MASK. Note that
11767 the code for rbfm[0] just above claims a write of 0 .. 159, so
11768 this duplicates it. But at least correctly connects 24 .. 31 to
11769 the MXCSR guest state representation (SSEROUND field). */
11770 d1->mFx = Ifx_Write;
11771 d1->mAddr = binop(Iop_Add64, mkexpr(addr), mkU64(24));
11772 d1->mSize = 8;
11774 /* declare we're reading guest state */
11775 d1->nFxState = 1;
11776 vex_bzero(&d1->fxState, sizeof(d1->fxState));
11778 d1->fxState[0].fx = Ifx_Read;
11779 d1->fxState[0].offset = OFFB_SSEROUND;
11780 d1->fxState[0].size = sizeof(ULong);
11782 /* Call the helper. This creates MXCSR and MXCSR_MASK but nothing
11783 else. We do the actual register array, XMM[0..15], separately,
11784 in order that any undefinedness in the XMM registers is tracked
11785 separately by Memcheck and does not "infect" the in-memory
11786 shadow for the other parts of the image. */
11787 stmt( IRStmt_Dirty(d1) );
11789 /* And now the XMMs themselves. */
11790 UInt reg;
11791 for (reg = 0; reg < 16; reg++) {
11792 stmt( IRStmt_StoreG(
11793 Iend_LE,
11794 binop(Iop_Add64, mkexpr(addr), mkU64(160 + reg * 16)),
11795 getXMMReg(reg),
11796 guard_1
11800 /* ------ rfbm[2] gates the AVX state ------ */
11801 /* Component 2 is just a bunch of register saves, so we'll do it
11802 inline, just to be simple and to be Memcheck friendly. */
11804 IRTemp rfbm_2 = newTemp(Ity_I64);
11805 assign(rfbm_2, binop(Iop_And64, mkexpr(rfbm), mkU64(4)));
11807 IRExpr* guard_2 = binop(Iop_CmpEQ64, mkexpr(rfbm_2), mkU64(4));
11809 for (reg = 0; reg < 16; reg++) {
11810 stmt( IRStmt_StoreG(
11811 Iend_LE,
11812 binop(Iop_Add64, mkexpr(addr), mkU64(576 + reg * 16)),
11813 getYMMRegLane128(reg,1),
11814 guard_2
11820 static Long dis_XSAVE ( const VexAbiInfo* vbi,
11821 Prefix pfx, Long delta, Int sz )
11823 /* Note that the presence or absence of REX.W (indicated here by
11824 |sz|) slightly affects the written format: whether the saved FPU
11825 IP and DP pointers are 64 or 32 bits. But the helper function
11826 we call simply writes zero bits in the relevant fields, which
11827 are 64 bits regardless of what REX.W is, and so it's good enough
11828 (iow, equally broken) in both cases. */
11829 IRTemp addr = IRTemp_INVALID;
11830 Int alen = 0;
11831 HChar dis_buf[50];
11832 UChar modrm = getUChar(delta);
11833 vassert(!epartIsReg(modrm)); /* ensured by caller */
11834 vassert(sz == 4 || sz == 8); /* ditto */
11836 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11837 delta += alen;
11838 gen_SEGV_if_not_64_aligned(addr);
11840 DIP("%sxsave %s\n", sz==8 ? "rex64/" : "", dis_buf);
11842 /* VEX's caller is assumed to have checked this. */
11843 const ULong aSSUMED_XCR0_VALUE = 7;
11845 IRTemp rfbm = newTemp(Ity_I64);
11846 assign(rfbm,
11847 binop(Iop_And64,
11848 binop(Iop_Or64,
11849 binop(Iop_Shl64,
11850 unop(Iop_32Uto64, getIRegRDX(4)), mkU8(32)),
11851 unop(Iop_32Uto64, getIRegRAX(4))),
11852 mkU64(aSSUMED_XCR0_VALUE)));
11854 gen_XSAVE_SEQUENCE(addr, rfbm);
11856 /* Finally, we need to update XSTATE_BV in the XSAVE header area, by
11857 OR-ing the RFBM value into it. */
11858 IRTemp addr_plus_512 = newTemp(Ity_I64);
11859 assign(addr_plus_512, binop(Iop_Add64, mkexpr(addr), mkU64(512)));
11860 storeLE( mkexpr(addr_plus_512),
11861 binop(Iop_Or8,
11862 unop(Iop_64to8, mkexpr(rfbm)),
11863 loadLE(Ity_I8, mkexpr(addr_plus_512))) );
11865 return delta;
11869 static Long dis_FXSAVE ( const VexAbiInfo* vbi,
11870 Prefix pfx, Long delta, Int sz )
11872 /* See comment in dis_XSAVE about the significance of REX.W. */
11873 IRTemp addr = IRTemp_INVALID;
11874 Int alen = 0;
11875 HChar dis_buf[50];
11876 UChar modrm = getUChar(delta);
11877 vassert(!epartIsReg(modrm)); /* ensured by caller */
11878 vassert(sz == 4 || sz == 8); /* ditto */
11880 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11881 delta += alen;
11882 gen_SEGV_if_not_16_aligned(addr);
11884 DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf);
11886 /* FXSAVE is just XSAVE with components 0 and 1 selected. Set rfbm
11887 to 0b011, generate the XSAVE sequence accordingly, and let iropt
11888 fold out the unused (AVX) parts accordingly. */
11889 IRTemp rfbm = newTemp(Ity_I64);
11890 assign(rfbm, mkU64(3));
11891 gen_XSAVE_SEQUENCE(addr, rfbm);
11893 return delta;
11897 static void gen_XRSTOR_SEQUENCE ( IRTemp addr, IRTemp xstate_bv, IRTemp rfbm )
11899 /* ------ rfbm[0] gates the x87 state ------ */
11901 /* If rfbm[0] == 1, we have to write the x87 state. If
11902 xstate_bv[0] == 1, we will read it from the memory image, else
11903 we'll set it to initial values. Doing this with a helper
11904 function and getting the definedness flow annotations correct is
11905 too difficult, so generate stupid but simple code: first set the
11906 registers to initial values, regardless of xstate_bv[0]. Then,
11907 conditionally restore from the memory image. */
11909 IRTemp rfbm_0 = newTemp(Ity_I64);
11910 IRTemp xstate_bv_0 = newTemp(Ity_I64);
11911 IRTemp restore_0 = newTemp(Ity_I64);
11912 assign(rfbm_0, binop(Iop_And64, mkexpr(rfbm), mkU64(1)));
11913 assign(xstate_bv_0, binop(Iop_And64, mkexpr(xstate_bv), mkU64(1)));
11914 assign(restore_0, binop(Iop_And64, mkexpr(rfbm_0), mkexpr(xstate_bv_0)));
11916 gen_FINIT_SEQUENCE( binop(Iop_CmpNE64, mkexpr(rfbm_0), mkU64(0)) );
11918 /* Uses dirty helper:
11919 void amd64g_do_XRSTOR_COMPONENT_0 ( VexGuestAMD64State*, ULong )
11921 IRDirty* d0 = unsafeIRDirty_0_N (
11922 0/*regparms*/,
11923 "amd64g_dirtyhelper_XRSTOR_COMPONENT_0",
11924 &amd64g_dirtyhelper_XRSTOR_COMPONENT_0,
11925 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
11927 d0->guard = binop(Iop_CmpNE64, mkexpr(restore_0), mkU64(0));
11929 /* Declare we're reading memory. Really, bytes 24 through 31
11930 (MXCSR and MXCSR_MASK) aren't read, but we can't express more
11931 than 1 memory area here, so just mark the whole thing as
11932 read. */
11933 d0->mFx = Ifx_Read;
11934 d0->mAddr = mkexpr(addr);
11935 d0->mSize = 160;
11937 /* declare we're writing guest state */
11938 d0->nFxState = 5;
11939 vex_bzero(&d0->fxState, sizeof(d0->fxState));
11941 d0->fxState[0].fx = Ifx_Write;
11942 d0->fxState[0].offset = OFFB_FTOP;
11943 d0->fxState[0].size = sizeof(UInt);
11945 d0->fxState[1].fx = Ifx_Write;
11946 d0->fxState[1].offset = OFFB_FPREGS;
11947 d0->fxState[1].size = 8 * sizeof(ULong);
11949 d0->fxState[2].fx = Ifx_Write;
11950 d0->fxState[2].offset = OFFB_FPTAGS;
11951 d0->fxState[2].size = 8 * sizeof(UChar);
11953 d0->fxState[3].fx = Ifx_Write;
11954 d0->fxState[3].offset = OFFB_FPROUND;
11955 d0->fxState[3].size = sizeof(ULong);
11957 d0->fxState[4].fx = Ifx_Write;
11958 d0->fxState[4].offset = OFFB_FC3210;
11959 d0->fxState[4].size = sizeof(ULong);
11961 stmt( IRStmt_Dirty(d0) );
11963 /* ------ rfbm[1] gates the SSE state ------ */
11965 /* Same scheme as component 0: first zero it out, and then possibly
11966 restore from the memory area. */
11967 IRTemp rfbm_1 = newTemp(Ity_I64);
11968 IRTemp xstate_bv_1 = newTemp(Ity_I64);
11969 IRTemp restore_1 = newTemp(Ity_I64);
11970 assign(rfbm_1, binop(Iop_And64, mkexpr(rfbm), mkU64(2)));
11971 assign(xstate_bv_1, binop(Iop_And64, mkexpr(xstate_bv), mkU64(2)));
11972 assign(restore_1, binop(Iop_And64, mkexpr(rfbm_1), mkexpr(xstate_bv_1)));
11973 IRExpr* rfbm_1e = binop(Iop_CmpNE64, mkexpr(rfbm_1), mkU64(0));
11974 IRExpr* restore_1e = binop(Iop_CmpNE64, mkexpr(restore_1), mkU64(0));
11976 IRTemp rfbm_1or2 = newTemp(Ity_I64);
11977 IRTemp xstate_bv_1or2 = newTemp(Ity_I64);
11978 IRTemp restore_1or2 = newTemp(Ity_I64);
11979 assign(rfbm_1or2, binop(Iop_And64, mkexpr(rfbm), mkU64(6)));
11980 assign(xstate_bv_1or2, binop(Iop_And64, mkexpr(xstate_bv), mkU64(6)));
11981 assign(restore_1or2, binop(Iop_And64, mkexpr(rfbm_1or2),
11982 mkexpr(xstate_bv_1or2)));
11983 IRExpr* rfbm_1or2e = binop(Iop_CmpNE64, mkexpr(rfbm_1or2), mkU64(0));
11984 IRExpr* restore_1or2e = binop(Iop_CmpNE64, mkexpr(restore_1or2), mkU64(0));
11986 /* The areas in question are: SSEROUND, and the XMM register array. */
11987 putGuarded(OFFB_SSEROUND, rfbm_1or2e, mkU64(Irrm_NEAREST));
11989 UInt reg;
11990 for (reg = 0; reg < 16; reg++) {
11991 putGuarded(xmmGuestRegOffset(reg), rfbm_1e, mkV128(0));
11994 /* And now possibly restore from MXCSR/MXCSR_MASK */
11995 /* Uses dirty helper:
11996 void amd64g_do_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
11997 ( VexGuestAMD64State*, ULong )
11998 This restores from only MXCSR and MXCSR_MASK. We need to do
11999 this if either components 1 (SSE) or 2 (AVX) are requested.
12000 Hence the guard condition is a bit more complex.
12002 IRDirty* d1 = unsafeIRDirty_0_N (
12003 0/*regparms*/,
12004 "amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS",
12005 &amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS,
12006 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
12008 d1->guard = restore_1or2e;
12010 /* Declare we're reading memory: MXCSR and MXCSR_MASK. Note that
12011 the code for rbfm[0] just above claims a read of 0 .. 159, so
12012 this duplicates it. But at least correctly connects 24 .. 31 to
12013 the MXCSR guest state representation (SSEROUND field). */
12014 d1->mFx = Ifx_Read;
12015 d1->mAddr = binop(Iop_Add64, mkexpr(addr), mkU64(24));
12016 d1->mSize = 8;
12018 /* declare we're writing guest state */
12019 d1->nFxState = 1;
12020 vex_bzero(&d1->fxState, sizeof(d1->fxState));
12022 d1->fxState[0].fx = Ifx_Write;
12023 d1->fxState[0].offset = OFFB_SSEROUND;
12024 d1->fxState[0].size = sizeof(ULong);
12026 /* Call the helper. This creates SSEROUND but nothing
12027 else. We do the actual register array, XMM[0..15], separately,
12028 in order that any undefinedness in the XMM registers is tracked
12029 separately by Memcheck and is not "infected" by the in-memory
12030 shadow for the other parts of the image. */
12031 stmt( IRStmt_Dirty(d1) );
12033 /* And now the XMMs themselves. For each register, we PUT either
12034 its old value, or the value loaded from memory. One convenient
12035 way to do that is with a conditional load that has its the
12036 default value, the old value of the register. */
12037 for (reg = 0; reg < 16; reg++) {
12038 IRExpr* ea = binop(Iop_Add64, mkexpr(addr), mkU64(160 + reg * 16));
12039 IRExpr* alt = getXMMReg(reg);
12040 IRTemp loadedValue = newTemp(Ity_V128);
12041 stmt( IRStmt_LoadG(Iend_LE,
12042 ILGop_IdentV128,
12043 loadedValue, ea, alt, restore_1e) );
12044 putXMMReg(reg, mkexpr(loadedValue));
12047 /* ------ rfbm[2] gates the AVX state ------ */
12048 /* Component 2 is just a bunch of register loads, so we'll do it
12049 inline, just to be simple and to be Memcheck friendly. */
12051 /* Same scheme as component 0: first zero it out, and then possibly
12052 restore from the memory area. */
12053 IRTemp rfbm_2 = newTemp(Ity_I64);
12054 IRTemp xstate_bv_2 = newTemp(Ity_I64);
12055 IRTemp restore_2 = newTemp(Ity_I64);
12056 assign(rfbm_2, binop(Iop_And64, mkexpr(rfbm), mkU64(4)));
12057 assign(xstate_bv_2, binop(Iop_And64, mkexpr(xstate_bv), mkU64(4)));
12058 assign(restore_2, binop(Iop_And64, mkexpr(rfbm_2), mkexpr(xstate_bv_2)));
12060 IRExpr* rfbm_2e = binop(Iop_CmpNE64, mkexpr(rfbm_2), mkU64(0));
12061 IRExpr* restore_2e = binop(Iop_CmpNE64, mkexpr(restore_2), mkU64(0));
12063 for (reg = 0; reg < 16; reg++) {
12064 putGuarded(ymmGuestRegLane128offset(reg, 1), rfbm_2e, mkV128(0));
12067 for (reg = 0; reg < 16; reg++) {
12068 IRExpr* ea = binop(Iop_Add64, mkexpr(addr), mkU64(576 + reg * 16));
12069 IRExpr* alt = getYMMRegLane128(reg, 1);
12070 IRTemp loadedValue = newTemp(Ity_V128);
12071 stmt( IRStmt_LoadG(Iend_LE,
12072 ILGop_IdentV128,
12073 loadedValue, ea, alt, restore_2e) );
12074 putYMMRegLane128(reg, 1, mkexpr(loadedValue));
12079 static Long dis_XRSTOR ( const VexAbiInfo* vbi,
12080 Prefix pfx, Long delta, Int sz )
12082 /* As with XRSTOR above we ignore the value of REX.W since we're
12083 not bothering with the FPU DP and IP fields. */
12084 IRTemp addr = IRTemp_INVALID;
12085 Int alen = 0;
12086 HChar dis_buf[50];
12087 UChar modrm = getUChar(delta);
12088 vassert(!epartIsReg(modrm)); /* ensured by caller */
12089 vassert(sz == 4 || sz == 8); /* ditto */
12091 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12092 delta += alen;
12093 gen_SEGV_if_not_64_aligned(addr);
12095 DIP("%sxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf);
12097 /* VEX's caller is assumed to have checked this. */
12098 const ULong aSSUMED_XCR0_VALUE = 7;
12100 IRTemp rfbm = newTemp(Ity_I64);
12101 assign(rfbm,
12102 binop(Iop_And64,
12103 binop(Iop_Or64,
12104 binop(Iop_Shl64,
12105 unop(Iop_32Uto64, getIRegRDX(4)), mkU8(32)),
12106 unop(Iop_32Uto64, getIRegRAX(4))),
12107 mkU64(aSSUMED_XCR0_VALUE)));
12109 IRTemp xstate_bv = newTemp(Ity_I64);
12110 assign(xstate_bv, loadLE(Ity_I64,
12111 binop(Iop_Add64, mkexpr(addr), mkU64(512+0))));
12113 IRTemp xcomp_bv = newTemp(Ity_I64);
12114 assign(xcomp_bv, loadLE(Ity_I64,
12115 binop(Iop_Add64, mkexpr(addr), mkU64(512+8))));
12117 IRTemp xsavehdr_23_16 = newTemp(Ity_I64);
12118 assign( xsavehdr_23_16,
12119 loadLE(Ity_I64,
12120 binop(Iop_Add64, mkexpr(addr), mkU64(512+16))));
12122 /* We must fault if
12123 * xcomp_bv[63] == 1, since this simulated CPU does not support
12124 the compaction extension.
12125 * xstate_bv sets a bit outside of XCR0 (which we assume to be 7).
12126 * any of the xsave header bytes 23 .. 8 are nonzero. This seems to
12127 imply that xcomp_bv must be zero.
12128 xcomp_bv is header bytes 15 .. 8 and xstate_bv is header bytes 7 .. 0
12130 IRTemp fault_if_nonzero = newTemp(Ity_I64);
12131 assign(fault_if_nonzero,
12132 binop(Iop_Or64,
12133 binop(Iop_And64, mkexpr(xstate_bv), mkU64(~aSSUMED_XCR0_VALUE)),
12134 binop(Iop_Or64, mkexpr(xcomp_bv), mkexpr(xsavehdr_23_16))));
12135 stmt( IRStmt_Exit(binop(Iop_CmpNE64, mkexpr(fault_if_nonzero), mkU64(0)),
12136 Ijk_SigSEGV,
12137 IRConst_U64(guest_RIP_curr_instr),
12138 OFFB_RIP
12141 /* We are guaranteed now that both xstate_bv and rfbm are in the
12142 range 0 .. 7. Generate the restore sequence proper. */
12143 gen_XRSTOR_SEQUENCE(addr, xstate_bv, rfbm);
12145 return delta;
12149 static Long dis_FXRSTOR ( const VexAbiInfo* vbi,
12150 Prefix pfx, Long delta, Int sz )
12152 /* As with FXSAVE above we ignore the value of REX.W since we're
12153 not bothering with the FPU DP and IP fields. */
12154 IRTemp addr = IRTemp_INVALID;
12155 Int alen = 0;
12156 HChar dis_buf[50];
12157 UChar modrm = getUChar(delta);
12158 vassert(!epartIsReg(modrm)); /* ensured by caller */
12159 vassert(sz == 4 || sz == 8); /* ditto */
12161 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12162 delta += alen;
12163 gen_SEGV_if_not_16_aligned(addr);
12165 DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf);
12167 /* FXRSTOR is just XRSTOR with components 0 and 1 selected and also
12168 as if components 0 and 1 are set as present in XSTATE_BV in the
12169 XSAVE header. Set both rfbm and xstate_bv to 0b011 therefore,
12170 generate the XRSTOR sequence accordingly, and let iropt fold out
12171 the unused (AVX) parts accordingly. */
12172 IRTemp three = newTemp(Ity_I64);
12173 assign(three, mkU64(3));
12174 gen_XRSTOR_SEQUENCE(addr, three/*xstate_bv*/, three/*rfbm*/);
12176 return delta;
12180 static IRTemp math_PINSRW_128 ( IRTemp v128, IRTemp u16, UInt imm8 )
12182 vassert(imm8 >= 0 && imm8 <= 7);
12184 // Create a V128 value which has the selected word in the
12185 // specified lane, and zeroes everywhere else.
12186 IRTemp tmp128 = newTemp(Ity_V128);
12187 IRTemp halfshift = newTemp(Ity_I64);
12188 assign(halfshift, binop(Iop_Shl64,
12189 unop(Iop_16Uto64, mkexpr(u16)),
12190 mkU8(16 * (imm8 & 3))));
12191 if (imm8 < 4) {
12192 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift)));
12193 } else {
12194 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0)));
12197 UShort mask = ~(3 << (imm8 * 2));
12198 IRTemp res = newTemp(Ity_V128);
12199 assign( res, binop(Iop_OrV128,
12200 mkexpr(tmp128),
12201 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) );
12202 return res;
12206 static IRTemp math_PSADBW_128 ( IRTemp dV, IRTemp sV )
12208 IRTemp s1, s0, d1, d0;
12209 s1 = s0 = d1 = d0 = IRTemp_INVALID;
12211 breakupV128to64s( sV, &s1, &s0 );
12212 breakupV128to64s( dV, &d1, &d0 );
12214 IRTemp res = newTemp(Ity_V128);
12215 assign( res,
12216 binop(Iop_64HLtoV128,
12217 mkIRExprCCall(Ity_I64, 0/*regparms*/,
12218 "amd64g_calculate_mmx_psadbw",
12219 &amd64g_calculate_mmx_psadbw,
12220 mkIRExprVec_2( mkexpr(s1), mkexpr(d1))),
12221 mkIRExprCCall(Ity_I64, 0/*regparms*/,
12222 "amd64g_calculate_mmx_psadbw",
12223 &amd64g_calculate_mmx_psadbw,
12224 mkIRExprVec_2( mkexpr(s0), mkexpr(d0)))) );
12225 return res;
12229 static IRTemp math_PSADBW_256 ( IRTemp dV, IRTemp sV )
12231 IRTemp sHi, sLo, dHi, dLo;
12232 sHi = sLo = dHi = dLo = IRTemp_INVALID;
12233 breakupV256toV128s( dV, &dHi, &dLo);
12234 breakupV256toV128s( sV, &sHi, &sLo);
12235 IRTemp res = newTemp(Ity_V256);
12236 assign(res, binop(Iop_V128HLtoV256,
12237 mkexpr(math_PSADBW_128(dHi, sHi)),
12238 mkexpr(math_PSADBW_128(dLo, sLo))));
12239 return res;
12243 static Long dis_MASKMOVDQU ( const VexAbiInfo* vbi, Prefix pfx,
12244 Long delta, Bool isAvx )
12246 IRTemp regD = newTemp(Ity_V128);
12247 IRTemp mask = newTemp(Ity_V128);
12248 IRTemp olddata = newTemp(Ity_V128);
12249 IRTemp newdata = newTemp(Ity_V128);
12250 IRTemp addr = newTemp(Ity_I64);
12251 UChar modrm = getUChar(delta);
12252 UInt rG = gregOfRexRM(pfx,modrm);
12253 UInt rE = eregOfRexRM(pfx,modrm);
12255 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
12256 assign( regD, getXMMReg( rG ));
12258 /* Unfortunately can't do the obvious thing with SarN8x16
12259 here since that can't be re-emitted as SSE2 code - no such
12260 insn. */
12261 assign( mask,
12262 binop(Iop_64HLtoV128,
12263 binop(Iop_SarN8x8,
12264 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ),
12265 mkU8(7) ),
12266 binop(Iop_SarN8x8,
12267 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ),
12268 mkU8(7) ) ));
12269 assign( olddata, loadLE( Ity_V128, mkexpr(addr) ));
12270 assign( newdata, binop(Iop_OrV128,
12271 binop(Iop_AndV128,
12272 mkexpr(regD),
12273 mkexpr(mask) ),
12274 binop(Iop_AndV128,
12275 mkexpr(olddata),
12276 unop(Iop_NotV128, mkexpr(mask)))) );
12277 storeLE( mkexpr(addr), mkexpr(newdata) );
12279 delta += 1;
12280 DIP("%smaskmovdqu %s,%s\n", isAvx ? "v" : "",
12281 nameXMMReg(rE), nameXMMReg(rG) );
12282 return delta;
12286 static Long dis_MOVMSKPS_128 ( const VexAbiInfo* vbi, Prefix pfx,
12287 Long delta, Bool isAvx )
12289 UChar modrm = getUChar(delta);
12290 UInt rG = gregOfRexRM(pfx,modrm);
12291 UInt rE = eregOfRexRM(pfx,modrm);
12292 IRTemp t0 = newTemp(Ity_I32);
12293 IRTemp t1 = newTemp(Ity_I32);
12294 IRTemp t2 = newTemp(Ity_I32);
12295 IRTemp t3 = newTemp(Ity_I32);
12296 delta += 1;
12297 assign( t0, binop( Iop_And32,
12298 binop(Iop_Shr32, getXMMRegLane32(rE,0), mkU8(31)),
12299 mkU32(1) ));
12300 assign( t1, binop( Iop_And32,
12301 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(30)),
12302 mkU32(2) ));
12303 assign( t2, binop( Iop_And32,
12304 binop(Iop_Shr32, getXMMRegLane32(rE,2), mkU8(29)),
12305 mkU32(4) ));
12306 assign( t3, binop( Iop_And32,
12307 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(28)),
12308 mkU32(8) ));
12309 putIReg32( rG, binop(Iop_Or32,
12310 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
12311 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) );
12312 DIP("%smovmskps %s,%s\n", isAvx ? "v" : "",
12313 nameXMMReg(rE), nameIReg32(rG));
12314 return delta;
12318 static Long dis_MOVMSKPS_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
12320 UChar modrm = getUChar(delta);
12321 UInt rG = gregOfRexRM(pfx,modrm);
12322 UInt rE = eregOfRexRM(pfx,modrm);
12323 IRTemp t0 = newTemp(Ity_I32);
12324 IRTemp t1 = newTemp(Ity_I32);
12325 IRTemp t2 = newTemp(Ity_I32);
12326 IRTemp t3 = newTemp(Ity_I32);
12327 IRTemp t4 = newTemp(Ity_I32);
12328 IRTemp t5 = newTemp(Ity_I32);
12329 IRTemp t6 = newTemp(Ity_I32);
12330 IRTemp t7 = newTemp(Ity_I32);
12331 delta += 1;
12332 assign( t0, binop( Iop_And32,
12333 binop(Iop_Shr32, getYMMRegLane32(rE,0), mkU8(31)),
12334 mkU32(1) ));
12335 assign( t1, binop( Iop_And32,
12336 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(30)),
12337 mkU32(2) ));
12338 assign( t2, binop( Iop_And32,
12339 binop(Iop_Shr32, getYMMRegLane32(rE,2), mkU8(29)),
12340 mkU32(4) ));
12341 assign( t3, binop( Iop_And32,
12342 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(28)),
12343 mkU32(8) ));
12344 assign( t4, binop( Iop_And32,
12345 binop(Iop_Shr32, getYMMRegLane32(rE,4), mkU8(27)),
12346 mkU32(16) ));
12347 assign( t5, binop( Iop_And32,
12348 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(26)),
12349 mkU32(32) ));
12350 assign( t6, binop( Iop_And32,
12351 binop(Iop_Shr32, getYMMRegLane32(rE,6), mkU8(25)),
12352 mkU32(64) ));
12353 assign( t7, binop( Iop_And32,
12354 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(24)),
12355 mkU32(128) ));
12356 putIReg32( rG, binop(Iop_Or32,
12357 binop(Iop_Or32,
12358 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
12359 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ),
12360 binop(Iop_Or32,
12361 binop(Iop_Or32, mkexpr(t4), mkexpr(t5)),
12362 binop(Iop_Or32, mkexpr(t6), mkexpr(t7)) ) ) );
12363 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
12364 return delta;
12368 static Long dis_MOVMSKPD_128 ( const VexAbiInfo* vbi, Prefix pfx,
12369 Long delta, Bool isAvx )
12371 UChar modrm = getUChar(delta);
12372 UInt rG = gregOfRexRM(pfx,modrm);
12373 UInt rE = eregOfRexRM(pfx,modrm);
12374 IRTemp t0 = newTemp(Ity_I32);
12375 IRTemp t1 = newTemp(Ity_I32);
12376 delta += 1;
12377 assign( t0, binop( Iop_And32,
12378 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(31)),
12379 mkU32(1) ));
12380 assign( t1, binop( Iop_And32,
12381 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(30)),
12382 mkU32(2) ));
12383 putIReg32( rG, binop(Iop_Or32, mkexpr(t0), mkexpr(t1) ) );
12384 DIP("%smovmskpd %s,%s\n", isAvx ? "v" : "",
12385 nameXMMReg(rE), nameIReg32(rG));
12386 return delta;
12390 static Long dis_MOVMSKPD_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
12392 UChar modrm = getUChar(delta);
12393 UInt rG = gregOfRexRM(pfx,modrm);
12394 UInt rE = eregOfRexRM(pfx,modrm);
12395 IRTemp t0 = newTemp(Ity_I32);
12396 IRTemp t1 = newTemp(Ity_I32);
12397 IRTemp t2 = newTemp(Ity_I32);
12398 IRTemp t3 = newTemp(Ity_I32);
12399 delta += 1;
12400 assign( t0, binop( Iop_And32,
12401 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(31)),
12402 mkU32(1) ));
12403 assign( t1, binop( Iop_And32,
12404 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(30)),
12405 mkU32(2) ));
12406 assign( t2, binop( Iop_And32,
12407 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(29)),
12408 mkU32(4) ));
12409 assign( t3, binop( Iop_And32,
12410 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(28)),
12411 mkU32(8) ));
12412 putIReg32( rG, binop(Iop_Or32,
12413 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
12414 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) );
12415 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
12416 return delta;
12420 /* Note, this also handles SSE(1) insns. */
12421 __attribute__((noinline))
12422 static
12423 Long dis_ESC_0F__SSE2 ( Bool* decode_OK,
12424 const VexArchInfo* archinfo,
12425 const VexAbiInfo* vbi,
12426 Prefix pfx, Int sz, Long deltaIN,
12427 DisResult* dres )
12429 IRTemp addr = IRTemp_INVALID;
12430 IRTemp t0 = IRTemp_INVALID;
12431 IRTemp t1 = IRTemp_INVALID;
12432 IRTemp t2 = IRTemp_INVALID;
12433 IRTemp t3 = IRTemp_INVALID;
12434 IRTemp t4 = IRTemp_INVALID;
12435 IRTemp t5 = IRTemp_INVALID;
12436 IRTemp t6 = IRTemp_INVALID;
12437 UChar modrm = 0;
12438 Int alen = 0;
12439 HChar dis_buf[50];
12441 *decode_OK = False;
12443 Long delta = deltaIN;
12444 UChar opc = getUChar(delta);
12445 delta++;
12446 switch (opc) {
12448 case 0x10:
12449 if (have66noF2noF3(pfx)
12450 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12451 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
12452 modrm = getUChar(delta);
12453 if (epartIsReg(modrm)) {
12454 putXMMReg( gregOfRexRM(pfx,modrm),
12455 getXMMReg( eregOfRexRM(pfx,modrm) ));
12456 DIP("movupd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12457 nameXMMReg(gregOfRexRM(pfx,modrm)));
12458 delta += 1;
12459 } else {
12460 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12461 putXMMReg( gregOfRexRM(pfx,modrm),
12462 loadLE(Ity_V128, mkexpr(addr)) );
12463 DIP("movupd %s,%s\n", dis_buf,
12464 nameXMMReg(gregOfRexRM(pfx,modrm)));
12465 delta += alen;
12467 goto decode_success;
12469 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
12470 G (lo half xmm). If E is mem, upper half of G is zeroed out.
12471 If E is reg, upper half of G is unchanged. */
12472 if (haveF2no66noF3(pfx)
12473 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) ) {
12474 modrm = getUChar(delta);
12475 if (epartIsReg(modrm)) {
12476 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
12477 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
12478 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12479 nameXMMReg(gregOfRexRM(pfx,modrm)));
12480 delta += 1;
12481 } else {
12482 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12483 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
12484 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
12485 loadLE(Ity_I64, mkexpr(addr)) );
12486 DIP("movsd %s,%s\n", dis_buf,
12487 nameXMMReg(gregOfRexRM(pfx,modrm)));
12488 delta += alen;
12490 goto decode_success;
12492 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
12493 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
12494 if (haveF3no66noF2(pfx)
12495 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12496 modrm = getUChar(delta);
12497 if (epartIsReg(modrm)) {
12498 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0,
12499 getXMMRegLane32( eregOfRexRM(pfx,modrm), 0 ));
12500 DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12501 nameXMMReg(gregOfRexRM(pfx,modrm)));
12502 delta += 1;
12503 } else {
12504 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12505 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
12506 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0,
12507 loadLE(Ity_I32, mkexpr(addr)) );
12508 DIP("movss %s,%s\n", dis_buf,
12509 nameXMMReg(gregOfRexRM(pfx,modrm)));
12510 delta += alen;
12512 goto decode_success;
12514 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
12515 if (haveNo66noF2noF3(pfx)
12516 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12517 modrm = getUChar(delta);
12518 if (epartIsReg(modrm)) {
12519 putXMMReg( gregOfRexRM(pfx,modrm),
12520 getXMMReg( eregOfRexRM(pfx,modrm) ));
12521 DIP("movups %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12522 nameXMMReg(gregOfRexRM(pfx,modrm)));
12523 delta += 1;
12524 } else {
12525 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12526 putXMMReg( gregOfRexRM(pfx,modrm),
12527 loadLE(Ity_V128, mkexpr(addr)) );
12528 DIP("movups %s,%s\n", dis_buf,
12529 nameXMMReg(gregOfRexRM(pfx,modrm)));
12530 delta += alen;
12532 goto decode_success;
12534 break;
12536 case 0x11:
12537 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
12538 or lo half xmm). */
12539 if (haveF2no66noF3(pfx)
12540 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12541 modrm = getUChar(delta);
12542 if (epartIsReg(modrm)) {
12543 putXMMRegLane64( eregOfRexRM(pfx,modrm), 0,
12544 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 ));
12545 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12546 nameXMMReg(eregOfRexRM(pfx,modrm)));
12547 delta += 1;
12548 } else {
12549 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12550 storeLE( mkexpr(addr),
12551 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) );
12552 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12553 dis_buf);
12554 delta += alen;
12556 goto decode_success;
12558 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
12559 or lo 1/4 xmm). */
12560 if (haveF3no66noF2(pfx) && sz == 4) {
12561 modrm = getUChar(delta);
12562 if (epartIsReg(modrm)) {
12563 /* fall through, we don't yet have a test case */
12564 } else {
12565 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12566 storeLE( mkexpr(addr),
12567 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) );
12568 DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12569 dis_buf);
12570 delta += alen;
12571 goto decode_success;
12574 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
12575 if (have66noF2noF3(pfx)
12576 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12577 modrm = getUChar(delta);
12578 if (epartIsReg(modrm)) {
12579 putXMMReg( eregOfRexRM(pfx,modrm),
12580 getXMMReg( gregOfRexRM(pfx,modrm) ) );
12581 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12582 nameXMMReg(eregOfRexRM(pfx,modrm)));
12583 delta += 1;
12584 } else {
12585 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12586 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12587 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12588 dis_buf );
12589 delta += alen;
12591 goto decode_success;
12593 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
12594 if (haveNo66noF2noF3(pfx)
12595 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12596 modrm = getUChar(delta);
12597 if (epartIsReg(modrm)) {
12598 /* fall through; awaiting test case */
12599 } else {
12600 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12601 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12602 DIP("movups %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12603 dis_buf );
12604 delta += alen;
12605 goto decode_success;
12608 break;
12610 case 0x12:
12611 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
12612 /* Identical to MOVLPS ? */
12613 if (have66noF2noF3(pfx)
12614 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12615 modrm = getUChar(delta);
12616 if (epartIsReg(modrm)) {
12617 /* fall through; apparently reg-reg is not possible */
12618 } else {
12619 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12620 delta += alen;
12621 putXMMRegLane64( gregOfRexRM(pfx,modrm),
12622 0/*lower lane*/,
12623 loadLE(Ity_I64, mkexpr(addr)) );
12624 DIP("movlpd %s, %s\n",
12625 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) ));
12626 goto decode_success;
12629 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
12630 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
12631 if (haveNo66noF2noF3(pfx)
12632 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12633 modrm = getUChar(delta);
12634 if (epartIsReg(modrm)) {
12635 delta += 1;
12636 putXMMRegLane64( gregOfRexRM(pfx,modrm),
12637 0/*lower lane*/,
12638 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ));
12639 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12640 nameXMMReg(gregOfRexRM(pfx,modrm)));
12641 } else {
12642 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12643 delta += alen;
12644 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0/*lower lane*/,
12645 loadLE(Ity_I64, mkexpr(addr)) );
12646 DIP("movlps %s, %s\n",
12647 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) ));
12649 goto decode_success;
12651 break;
12653 case 0x13:
12654 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
12655 if (haveNo66noF2noF3(pfx)
12656 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12657 modrm = getUChar(delta);
12658 if (!epartIsReg(modrm)) {
12659 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12660 delta += alen;
12661 storeLE( mkexpr(addr),
12662 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12663 0/*lower lane*/ ) );
12664 DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12665 dis_buf);
12666 goto decode_success;
12668 /* else fall through */
12670 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
12671 /* Identical to MOVLPS ? */
12672 if (have66noF2noF3(pfx)
12673 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12674 modrm = getUChar(delta);
12675 if (!epartIsReg(modrm)) {
12676 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12677 delta += alen;
12678 storeLE( mkexpr(addr),
12679 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12680 0/*lower lane*/ ) );
12681 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12682 dis_buf);
12683 goto decode_success;
12685 /* else fall through */
12687 break;
12689 case 0x14:
12690 case 0x15:
12691 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
12692 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
12693 /* These just appear to be special cases of SHUFPS */
12694 if (haveNo66noF2noF3(pfx) && sz == 4) {
12695 Bool hi = toBool(opc == 0x15);
12696 IRTemp sV = newTemp(Ity_V128);
12697 IRTemp dV = newTemp(Ity_V128);
12698 modrm = getUChar(delta);
12699 UInt rG = gregOfRexRM(pfx,modrm);
12700 assign( dV, getXMMReg(rG) );
12701 if (epartIsReg(modrm)) {
12702 UInt rE = eregOfRexRM(pfx,modrm);
12703 assign( sV, getXMMReg(rE) );
12704 delta += 1;
12705 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
12706 nameXMMReg(rE), nameXMMReg(rG));
12707 } else {
12708 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12709 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12710 delta += alen;
12711 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
12712 dis_buf, nameXMMReg(rG));
12714 IRTemp res = math_UNPCKxPS_128( sV, dV, hi );
12715 putXMMReg( rG, mkexpr(res) );
12716 goto decode_success;
12718 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
12719 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
12720 /* These just appear to be special cases of SHUFPS */
12721 if (have66noF2noF3(pfx)
12722 && sz == 2 /* could be 8 if rex also present */) {
12723 Bool hi = toBool(opc == 0x15);
12724 IRTemp sV = newTemp(Ity_V128);
12725 IRTemp dV = newTemp(Ity_V128);
12726 modrm = getUChar(delta);
12727 UInt rG = gregOfRexRM(pfx,modrm);
12728 assign( dV, getXMMReg(rG) );
12729 if (epartIsReg(modrm)) {
12730 UInt rE = eregOfRexRM(pfx,modrm);
12731 assign( sV, getXMMReg(rE) );
12732 delta += 1;
12733 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
12734 nameXMMReg(rE), nameXMMReg(rG));
12735 } else {
12736 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12737 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12738 delta += alen;
12739 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
12740 dis_buf, nameXMMReg(rG));
12742 IRTemp res = math_UNPCKxPD_128( sV, dV, hi );
12743 putXMMReg( rG, mkexpr(res) );
12744 goto decode_success;
12746 break;
12748 case 0x16:
12749 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
12750 /* These seems identical to MOVHPS. This instruction encoding is
12751 completely crazy. */
12752 if (have66noF2noF3(pfx)
12753 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12754 modrm = getUChar(delta);
12755 if (epartIsReg(modrm)) {
12756 /* fall through; apparently reg-reg is not possible */
12757 } else {
12758 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12759 delta += alen;
12760 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
12761 loadLE(Ity_I64, mkexpr(addr)) );
12762 DIP("movhpd %s,%s\n", dis_buf,
12763 nameXMMReg( gregOfRexRM(pfx,modrm) ));
12764 goto decode_success;
12767 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
12768 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
12769 if (haveNo66noF2noF3(pfx)
12770 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12771 modrm = getUChar(delta);
12772 if (epartIsReg(modrm)) {
12773 delta += 1;
12774 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
12775 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ) );
12776 DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12777 nameXMMReg(gregOfRexRM(pfx,modrm)));
12778 } else {
12779 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12780 delta += alen;
12781 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
12782 loadLE(Ity_I64, mkexpr(addr)) );
12783 DIP("movhps %s,%s\n", dis_buf,
12784 nameXMMReg( gregOfRexRM(pfx,modrm) ));
12786 goto decode_success;
12788 break;
12790 case 0x17:
12791 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
12792 if (haveNo66noF2noF3(pfx)
12793 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12794 modrm = getUChar(delta);
12795 if (!epartIsReg(modrm)) {
12796 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12797 delta += alen;
12798 storeLE( mkexpr(addr),
12799 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12800 1/*upper lane*/ ) );
12801 DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12802 dis_buf);
12803 goto decode_success;
12805 /* else fall through */
12807 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
12808 /* Again, this seems identical to MOVHPS. */
12809 if (have66noF2noF3(pfx)
12810 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12811 modrm = getUChar(delta);
12812 if (!epartIsReg(modrm)) {
12813 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12814 delta += alen;
12815 storeLE( mkexpr(addr),
12816 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12817 1/*upper lane*/ ) );
12818 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12819 dis_buf);
12820 goto decode_success;
12822 /* else fall through */
12824 break;
12826 case 0x18:
12827 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
12828 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
12829 /* 0F 18 /2 = PREFETCH1 */
12830 /* 0F 18 /3 = PREFETCH2 */
12831 if (haveNo66noF2noF3(pfx)
12832 && !epartIsReg(getUChar(delta))
12833 && gregLO3ofRM(getUChar(delta)) >= 0
12834 && gregLO3ofRM(getUChar(delta)) <= 3) {
12835 const HChar* hintstr = "??";
12837 modrm = getUChar(delta);
12838 vassert(!epartIsReg(modrm));
12840 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12841 delta += alen;
12843 switch (gregLO3ofRM(modrm)) {
12844 case 0: hintstr = "nta"; break;
12845 case 1: hintstr = "t0"; break;
12846 case 2: hintstr = "t1"; break;
12847 case 3: hintstr = "t2"; break;
12848 default: vassert(0);
12851 DIP("prefetch%s %s\n", hintstr, dis_buf);
12852 goto decode_success;
12854 break;
12856 case 0x28:
12857 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
12858 if (have66noF2noF3(pfx)
12859 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12860 modrm = getUChar(delta);
12861 if (epartIsReg(modrm)) {
12862 putXMMReg( gregOfRexRM(pfx,modrm),
12863 getXMMReg( eregOfRexRM(pfx,modrm) ));
12864 DIP("movapd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12865 nameXMMReg(gregOfRexRM(pfx,modrm)));
12866 delta += 1;
12867 } else {
12868 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12869 gen_SEGV_if_not_16_aligned( addr );
12870 putXMMReg( gregOfRexRM(pfx,modrm),
12871 loadLE(Ity_V128, mkexpr(addr)) );
12872 DIP("movapd %s,%s\n", dis_buf,
12873 nameXMMReg(gregOfRexRM(pfx,modrm)));
12874 delta += alen;
12876 goto decode_success;
12878 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
12879 if (haveNo66noF2noF3(pfx)
12880 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12881 modrm = getUChar(delta);
12882 if (epartIsReg(modrm)) {
12883 putXMMReg( gregOfRexRM(pfx,modrm),
12884 getXMMReg( eregOfRexRM(pfx,modrm) ));
12885 DIP("movaps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12886 nameXMMReg(gregOfRexRM(pfx,modrm)));
12887 delta += 1;
12888 } else {
12889 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12890 gen_SEGV_if_not_16_aligned( addr );
12891 putXMMReg( gregOfRexRM(pfx,modrm),
12892 loadLE(Ity_V128, mkexpr(addr)) );
12893 DIP("movaps %s,%s\n", dis_buf,
12894 nameXMMReg(gregOfRexRM(pfx,modrm)));
12895 delta += alen;
12897 goto decode_success;
12899 break;
12901 case 0x29:
12902 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
12903 if (haveNo66noF2noF3(pfx)
12904 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12905 modrm = getUChar(delta);
12906 if (epartIsReg(modrm)) {
12907 putXMMReg( eregOfRexRM(pfx,modrm),
12908 getXMMReg( gregOfRexRM(pfx,modrm) ));
12909 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12910 nameXMMReg(eregOfRexRM(pfx,modrm)));
12911 delta += 1;
12912 } else {
12913 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12914 gen_SEGV_if_not_16_aligned( addr );
12915 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12916 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12917 dis_buf );
12918 delta += alen;
12920 goto decode_success;
12922 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
12923 if (have66noF2noF3(pfx)
12924 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12925 modrm = getUChar(delta);
12926 if (epartIsReg(modrm)) {
12927 putXMMReg( eregOfRexRM(pfx,modrm),
12928 getXMMReg( gregOfRexRM(pfx,modrm) ) );
12929 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12930 nameXMMReg(eregOfRexRM(pfx,modrm)));
12931 delta += 1;
12932 } else {
12933 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12934 gen_SEGV_if_not_16_aligned( addr );
12935 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12936 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12937 dis_buf );
12938 delta += alen;
12940 goto decode_success;
12942 break;
12944 case 0x2A:
12945 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
12946 half xmm */
12947 if (haveNo66noF2noF3(pfx) && sz == 4) {
12948 IRTemp arg64 = newTemp(Ity_I64);
12949 IRTemp rmode = newTemp(Ity_I32);
12951 modrm = getUChar(delta);
12952 if (epartIsReg(modrm)) {
12953 /* Only switch to MMX mode if the source is a MMX register.
12954 See comments on CVTPI2PD for details. Fixes #357059. */
12955 do_MMX_preamble();
12956 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
12957 delta += 1;
12958 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
12959 nameXMMReg(gregOfRexRM(pfx,modrm)));
12960 } else {
12961 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12962 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
12963 delta += alen;
12964 DIP("cvtpi2ps %s,%s\n", dis_buf,
12965 nameXMMReg(gregOfRexRM(pfx,modrm)) );
12968 assign( rmode, get_sse_roundingmode() );
12970 putXMMRegLane32F(
12971 gregOfRexRM(pfx,modrm), 0,
12972 binop(Iop_F64toF32,
12973 mkexpr(rmode),
12974 unop(Iop_I32StoF64,
12975 unop(Iop_64to32, mkexpr(arg64)) )) );
12977 putXMMRegLane32F(
12978 gregOfRexRM(pfx,modrm), 1,
12979 binop(Iop_F64toF32,
12980 mkexpr(rmode),
12981 unop(Iop_I32StoF64,
12982 unop(Iop_64HIto32, mkexpr(arg64)) )) );
12984 goto decode_success;
12986 /* F3 0F 2A = CVTSI2SS
12987 -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm
12988 -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */
12989 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) {
12990 IRTemp rmode = newTemp(Ity_I32);
12991 assign( rmode, get_sse_roundingmode() );
12992 modrm = getUChar(delta);
12993 if (sz == 4) {
12994 IRTemp arg32 = newTemp(Ity_I32);
12995 if (epartIsReg(modrm)) {
12996 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) );
12997 delta += 1;
12998 DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
12999 nameXMMReg(gregOfRexRM(pfx,modrm)));
13000 } else {
13001 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13002 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
13003 delta += alen;
13004 DIP("cvtsi2ss %s,%s\n", dis_buf,
13005 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13007 putXMMRegLane32F(
13008 gregOfRexRM(pfx,modrm), 0,
13009 binop(Iop_F64toF32,
13010 mkexpr(rmode),
13011 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
13012 } else {
13013 /* sz == 8 */
13014 IRTemp arg64 = newTemp(Ity_I64);
13015 if (epartIsReg(modrm)) {
13016 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) );
13017 delta += 1;
13018 DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
13019 nameXMMReg(gregOfRexRM(pfx,modrm)));
13020 } else {
13021 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13022 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
13023 delta += alen;
13024 DIP("cvtsi2ssq %s,%s\n", dis_buf,
13025 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13027 putXMMRegLane32F(
13028 gregOfRexRM(pfx,modrm), 0,
13029 binop(Iop_F64toF32,
13030 mkexpr(rmode),
13031 binop(Iop_I64StoF64, mkexpr(rmode), mkexpr(arg64)) ) );
13033 goto decode_success;
13035 /* F2 0F 2A = CVTSI2SD
13036 when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm
13037 when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm
13039 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) {
13040 modrm = getUChar(delta);
13041 if (sz == 4) {
13042 IRTemp arg32 = newTemp(Ity_I32);
13043 if (epartIsReg(modrm)) {
13044 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) );
13045 delta += 1;
13046 DIP("cvtsi2sdl %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
13047 nameXMMReg(gregOfRexRM(pfx,modrm)));
13048 } else {
13049 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13050 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
13051 delta += alen;
13052 DIP("cvtsi2sdl %s,%s\n", dis_buf,
13053 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13055 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
13056 unop(Iop_I32StoF64, mkexpr(arg32))
13058 } else {
13059 /* sz == 8 */
13060 IRTemp arg64 = newTemp(Ity_I64);
13061 if (epartIsReg(modrm)) {
13062 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) );
13063 delta += 1;
13064 DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
13065 nameXMMReg(gregOfRexRM(pfx,modrm)));
13066 } else {
13067 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13068 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
13069 delta += alen;
13070 DIP("cvtsi2sdq %s,%s\n", dis_buf,
13071 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13073 putXMMRegLane64F(
13074 gregOfRexRM(pfx,modrm),
13076 binop( Iop_I64StoF64,
13077 get_sse_roundingmode(),
13078 mkexpr(arg64)
13082 goto decode_success;
13084 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
13085 xmm(G) */
13086 if (have66noF2noF3(pfx) && sz == 2) {
13087 IRTemp arg64 = newTemp(Ity_I64);
13089 modrm = getUChar(delta);
13090 if (epartIsReg(modrm)) {
13091 /* Only switch to MMX mode if the source is a MMX register.
13092 This is inconsistent with all other instructions which
13093 convert between XMM and (M64 or MMX), which always switch
13094 to MMX mode even if 64-bit operand is M64 and not MMX. At
13095 least, that's what the Intel docs seem to me to say.
13096 Fixes #210264. */
13097 do_MMX_preamble();
13098 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
13099 delta += 1;
13100 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
13101 nameXMMReg(gregOfRexRM(pfx,modrm)));
13102 } else {
13103 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13104 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
13105 delta += alen;
13106 DIP("cvtpi2pd %s,%s\n", dis_buf,
13107 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13110 putXMMRegLane64F(
13111 gregOfRexRM(pfx,modrm), 0,
13112 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) )
13115 putXMMRegLane64F(
13116 gregOfRexRM(pfx,modrm), 1,
13117 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) )
13120 goto decode_success;
13122 break;
13124 case 0x2B:
13125 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
13126 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
13127 if ( (haveNo66noF2noF3(pfx) && sz == 4)
13128 || (have66noF2noF3(pfx) && sz == 2) ) {
13129 modrm = getUChar(delta);
13130 if (!epartIsReg(modrm)) {
13131 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13132 gen_SEGV_if_not_16_aligned( addr );
13133 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
13134 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
13135 dis_buf,
13136 nameXMMReg(gregOfRexRM(pfx,modrm)));
13137 delta += alen;
13138 goto decode_success;
13140 /* else fall through */
13142 break;
13144 case 0x2C:
13145 case 0x2D:
13146 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
13147 I32 in mmx, according to prevailing SSE rounding mode */
13148 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
13149 I32 in mmx, rounding towards zero */
13150 if (haveNo66noF2noF3(pfx) && sz == 4) {
13151 IRTemp dst64 = newTemp(Ity_I64);
13152 IRTemp rmode = newTemp(Ity_I32);
13153 IRTemp f32lo = newTemp(Ity_F32);
13154 IRTemp f32hi = newTemp(Ity_F32);
13155 Bool r2zero = toBool(opc == 0x2C);
13157 do_MMX_preamble();
13158 modrm = getUChar(delta);
13160 if (epartIsReg(modrm)) {
13161 delta += 1;
13162 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
13163 assign(f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1));
13164 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
13165 nameXMMReg(eregOfRexRM(pfx,modrm)),
13166 nameMMXReg(gregLO3ofRM(modrm)));
13167 } else {
13168 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13169 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
13170 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add64,
13171 mkexpr(addr),
13172 mkU64(4) )));
13173 delta += alen;
13174 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
13175 dis_buf,
13176 nameMMXReg(gregLO3ofRM(modrm)));
13179 if (r2zero) {
13180 assign(rmode, mkU32((UInt)Irrm_ZERO) );
13181 } else {
13182 assign( rmode, get_sse_roundingmode() );
13185 assign(
13186 dst64,
13187 binop( Iop_32HLto64,
13188 binop( Iop_F64toI32S,
13189 mkexpr(rmode),
13190 unop( Iop_F32toF64, mkexpr(f32hi) ) ),
13191 binop( Iop_F64toI32S,
13192 mkexpr(rmode),
13193 unop( Iop_F32toF64, mkexpr(f32lo) ) )
13197 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
13198 goto decode_success;
13200 /* F3 0F 2D = CVTSS2SI
13201 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
13202 according to prevailing SSE rounding mode
13203 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
13204 according to prevailing SSE rounding mode
13206 /* F3 0F 2C = CVTTSS2SI
13207 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
13208 truncating towards zero
13209 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
13210 truncating towards zero
13212 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) {
13213 delta = dis_CVTxSS2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz);
13214 goto decode_success;
13216 /* F2 0F 2D = CVTSD2SI
13217 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
13218 according to prevailing SSE rounding mode
13219 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
13220 according to prevailing SSE rounding mode
13222 /* F2 0F 2C = CVTTSD2SI
13223 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
13224 truncating towards zero
13225 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
13226 truncating towards zero
13228 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) {
13229 delta = dis_CVTxSD2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz);
13230 goto decode_success;
13232 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
13233 I32 in mmx, according to prevailing SSE rounding mode */
13234 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
13235 I32 in mmx, rounding towards zero */
13236 if (have66noF2noF3(pfx) && sz == 2) {
13237 IRTemp dst64 = newTemp(Ity_I64);
13238 IRTemp rmode = newTemp(Ity_I32);
13239 IRTemp f64lo = newTemp(Ity_F64);
13240 IRTemp f64hi = newTemp(Ity_F64);
13241 Bool r2zero = toBool(opc == 0x2C);
13243 do_MMX_preamble();
13244 modrm = getUChar(delta);
13246 if (epartIsReg(modrm)) {
13247 delta += 1;
13248 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
13249 assign(f64hi, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 1));
13250 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "",
13251 nameXMMReg(eregOfRexRM(pfx,modrm)),
13252 nameMMXReg(gregLO3ofRM(modrm)));
13253 } else {
13254 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13255 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
13256 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add64,
13257 mkexpr(addr),
13258 mkU64(8) )));
13259 delta += alen;
13260 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "",
13261 dis_buf,
13262 nameMMXReg(gregLO3ofRM(modrm)));
13265 if (r2zero) {
13266 assign(rmode, mkU32((UInt)Irrm_ZERO) );
13267 } else {
13268 assign( rmode, get_sse_roundingmode() );
13271 assign(
13272 dst64,
13273 binop( Iop_32HLto64,
13274 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ),
13275 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) )
13279 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
13280 goto decode_success;
13282 break;
13284 case 0x2E:
13285 case 0x2F:
13286 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
13287 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
13288 if (have66noF2noF3(pfx) && sz == 2) {
13289 delta = dis_COMISD( vbi, pfx, delta, False/*!isAvx*/, opc );
13290 goto decode_success;
13292 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
13293 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
13294 if (haveNo66noF2noF3(pfx) && sz == 4) {
13295 delta = dis_COMISS( vbi, pfx, delta, False/*!isAvx*/, opc );
13296 goto decode_success;
13298 break;
13300 case 0x50:
13301 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
13302 to 4 lowest bits of ireg(G) */
13303 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
13304 && epartIsReg(getUChar(delta))) {
13305 /* sz == 8 is a kludge to handle insns with REX.W redundantly
13306 set to 1, which has been known to happen:
13308 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d
13310 20071106: Intel docs say that REX.W isn't redundant: when
13311 present, a 64-bit register is written; when not present, only
13312 the 32-bit half is written. However, testing on a Core2
13313 machine suggests the entire 64 bit register is written
13314 irrespective of the status of REX.W. That could be because
13315 of the default rule that says "if the lower half of a 32-bit
13316 register is written, the upper half is zeroed". By using
13317 putIReg32 here we inadvertantly produce the same behaviour as
13318 the Core2, for the same reason -- putIReg32 implements said
13319 rule.
13321 AMD docs give no indication that REX.W is even valid for this
13322 insn. */
13323 delta = dis_MOVMSKPS_128( vbi, pfx, delta, False/*!isAvx*/ );
13324 goto decode_success;
13326 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
13327 2 lowest bits of ireg(G) */
13328 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) {
13329 /* sz == 8 is a kludge to handle insns with REX.W redundantly
13330 set to 1, which has been known to happen:
13331 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d
13332 20071106: see further comments on MOVMSKPS implementation above.
13334 delta = dis_MOVMSKPD_128( vbi, pfx, delta, False/*!isAvx*/ );
13335 goto decode_success;
13337 break;
13339 case 0x51:
13340 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
13341 if (haveF3no66noF2(pfx) && sz == 4) {
13342 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
13343 "sqrtss", Iop_Sqrt32F0x4 );
13344 goto decode_success;
13346 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
13347 if (haveNo66noF2noF3(pfx) && sz == 4) {
13348 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
13349 "sqrtps", Iop_Sqrt32Fx4 );
13350 goto decode_success;
13352 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
13353 if (haveF2no66noF3(pfx) && sz == 4) {
13354 delta = dis_SSE_E_to_G_unary_lo64( vbi, pfx, delta,
13355 "sqrtsd", Iop_Sqrt64F0x2 );
13356 goto decode_success;
13358 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
13359 if (have66noF2noF3(pfx) && sz == 2) {
13360 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
13361 "sqrtpd", Iop_Sqrt64Fx2 );
13362 goto decode_success;
13364 break;
13366 case 0x52:
13367 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
13368 if (haveF3no66noF2(pfx) && sz == 4) {
13369 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
13370 "rsqrtss", Iop_RSqrtEst32F0x4 );
13371 goto decode_success;
13373 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
13374 if (haveNo66noF2noF3(pfx) && sz == 4) {
13375 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
13376 "rsqrtps", Iop_RSqrtEst32Fx4 );
13377 goto decode_success;
13379 break;
13381 case 0x53:
13382 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
13383 if (haveF3no66noF2(pfx) && sz == 4) {
13384 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
13385 "rcpss", Iop_RecipEst32F0x4 );
13386 goto decode_success;
13388 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
13389 if (haveNo66noF2noF3(pfx) && sz == 4) {
13390 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
13391 "rcpps", Iop_RecipEst32Fx4 );
13392 goto decode_success;
13394 break;
13396 case 0x54:
13397 /* 0F 54 = ANDPS -- G = G and E */
13398 if (haveNo66noF2noF3(pfx) && sz == 4) {
13399 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andps", Iop_AndV128 );
13400 goto decode_success;
13402 /* 66 0F 54 = ANDPD -- G = G and E */
13403 if (have66noF2noF3(pfx) && sz == 2) {
13404 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andpd", Iop_AndV128 );
13405 goto decode_success;
13407 break;
13409 case 0x55:
13410 /* 0F 55 = ANDNPS -- G = (not G) and E */
13411 if (haveNo66noF2noF3(pfx) && sz == 4) {
13412 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnps",
13413 Iop_AndV128 );
13414 goto decode_success;
13416 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
13417 if (have66noF2noF3(pfx) && sz == 2) {
13418 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnpd",
13419 Iop_AndV128 );
13420 goto decode_success;
13422 break;
13424 case 0x56:
13425 /* 0F 56 = ORPS -- G = G and E */
13426 if (haveNo66noF2noF3(pfx) && sz == 4) {
13427 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orps", Iop_OrV128 );
13428 goto decode_success;
13430 /* 66 0F 56 = ORPD -- G = G and E */
13431 if (have66noF2noF3(pfx) && sz == 2) {
13432 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orpd", Iop_OrV128 );
13433 goto decode_success;
13435 break;
13437 case 0x57:
13438 /* 66 0F 57 = XORPD -- G = G xor E */
13439 if (have66noF2noF3(pfx) && sz == 2) {
13440 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorpd", Iop_XorV128 );
13441 goto decode_success;
13443 /* 0F 57 = XORPS -- G = G xor E */
13444 if (haveNo66noF2noF3(pfx) && sz == 4) {
13445 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorps", Iop_XorV128 );
13446 goto decode_success;
13448 break;
13450 case 0x58:
13451 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
13452 if (haveNo66noF2noF3(pfx) && sz == 4) {
13453 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addps", Iop_Add32Fx4 );
13454 goto decode_success;
13456 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
13457 if (haveF3no66noF2(pfx) && sz == 4) {
13458 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "addss", Iop_Add32F0x4 );
13459 goto decode_success;
13461 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
13462 if (haveF2no66noF3(pfx)
13463 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13464 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "addsd", Iop_Add64F0x2 );
13465 goto decode_success;
13467 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
13468 if (have66noF2noF3(pfx)
13469 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13470 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addpd", Iop_Add64Fx2 );
13471 goto decode_success;
13473 break;
13475 case 0x59:
13476 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
13477 if (haveF2no66noF3(pfx)
13478 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13479 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "mulsd", Iop_Mul64F0x2 );
13480 goto decode_success;
13482 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
13483 if (haveF3no66noF2(pfx) && sz == 4) {
13484 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "mulss", Iop_Mul32F0x4 );
13485 goto decode_success;
13487 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
13488 if (haveNo66noF2noF3(pfx) && sz == 4) {
13489 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulps", Iop_Mul32Fx4 );
13490 goto decode_success;
13492 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
13493 if (have66noF2noF3(pfx)
13494 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13495 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulpd", Iop_Mul64Fx2 );
13496 goto decode_success;
13498 break;
13500 case 0x5A:
13501 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
13502 F64 in xmm(G). */
13503 if (haveNo66noF2noF3(pfx)
13504 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13505 delta = dis_CVTPS2PD_128( vbi, pfx, delta, False/*!isAvx*/ );
13506 goto decode_success;
13508 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
13509 low half xmm(G) */
13510 if (haveF3no66noF2(pfx) && sz == 4) {
13511 IRTemp f32lo = newTemp(Ity_F32);
13513 modrm = getUChar(delta);
13514 if (epartIsReg(modrm)) {
13515 delta += 1;
13516 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
13517 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13518 nameXMMReg(gregOfRexRM(pfx,modrm)));
13519 } else {
13520 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13521 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
13522 delta += alen;
13523 DIP("cvtss2sd %s,%s\n", dis_buf,
13524 nameXMMReg(gregOfRexRM(pfx,modrm)));
13527 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
13528 unop( Iop_F32toF64, mkexpr(f32lo) ) );
13530 goto decode_success;
13532 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
13533 low 1/4 xmm(G), according to prevailing SSE rounding mode */
13534 if (haveF2no66noF3(pfx) && sz == 4) {
13535 IRTemp rmode = newTemp(Ity_I32);
13536 IRTemp f64lo = newTemp(Ity_F64);
13538 modrm = getUChar(delta);
13539 if (epartIsReg(modrm)) {
13540 delta += 1;
13541 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
13542 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13543 nameXMMReg(gregOfRexRM(pfx,modrm)));
13544 } else {
13545 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13546 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
13547 delta += alen;
13548 DIP("cvtsd2ss %s,%s\n", dis_buf,
13549 nameXMMReg(gregOfRexRM(pfx,modrm)));
13552 assign( rmode, get_sse_roundingmode() );
13553 putXMMRegLane32F(
13554 gregOfRexRM(pfx,modrm), 0,
13555 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) )
13558 goto decode_success;
13560 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
13561 lo half xmm(G), rounding according to prevailing SSE rounding
13562 mode, and zero upper half */
13563 /* Note, this is practically identical to CVTPD2DQ. It would have
13564 be nice to merge them together. */
13565 if (have66noF2noF3(pfx) && sz == 2) {
13566 delta = dis_CVTPD2PS_128( vbi, pfx, delta, False/*!isAvx*/ );
13567 goto decode_success;
13569 break;
13571 case 0x5B:
13572 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
13573 xmm(G), rounding towards zero */
13574 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
13575 xmm(G), as per the prevailing rounding mode */
13576 if ( (have66noF2noF3(pfx) && sz == 2)
13577 || (haveF3no66noF2(pfx) && sz == 4) ) {
13578 Bool r2zero = toBool(sz == 4); // FIXME -- unreliable (???)
13579 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta, False/*!isAvx*/, r2zero );
13580 goto decode_success;
13582 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
13583 xmm(G) */
13584 if (haveNo66noF2noF3(pfx) && sz == 4) {
13585 delta = dis_CVTDQ2PS_128( vbi, pfx, delta, False/*!isAvx*/ );
13586 goto decode_success;
13588 break;
13590 case 0x5C:
13591 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
13592 if (haveF3no66noF2(pfx) && sz == 4) {
13593 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "subss", Iop_Sub32F0x4 );
13594 goto decode_success;
13596 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
13597 if (haveF2no66noF3(pfx)
13598 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13599 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "subsd", Iop_Sub64F0x2 );
13600 goto decode_success;
13602 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
13603 if (haveNo66noF2noF3(pfx) && sz == 4) {
13604 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subps", Iop_Sub32Fx4 );
13605 goto decode_success;
13607 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
13608 if (have66noF2noF3(pfx) && sz == 2) {
13609 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subpd", Iop_Sub64Fx2 );
13610 goto decode_success;
13612 break;
13614 case 0x5D:
13615 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
13616 if (haveNo66noF2noF3(pfx) && sz == 4) {
13617 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minps", Iop_Min32Fx4 );
13618 goto decode_success;
13620 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
13621 if (haveF3no66noF2(pfx) && sz == 4) {
13622 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "minss", Iop_Min32F0x4 );
13623 goto decode_success;
13625 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
13626 if (haveF2no66noF3(pfx)
13627 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13628 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "minsd", Iop_Min64F0x2 );
13629 goto decode_success;
13631 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
13632 if (have66noF2noF3(pfx) && sz == 2) {
13633 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minpd", Iop_Min64Fx2 );
13634 goto decode_success;
13636 break;
13638 case 0x5E:
13639 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
13640 if (haveF2no66noF3(pfx) && sz == 4) {
13641 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "divsd", Iop_Div64F0x2 );
13642 goto decode_success;
13644 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
13645 if (haveNo66noF2noF3(pfx) && sz == 4) {
13646 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divps", Iop_Div32Fx4 );
13647 goto decode_success;
13649 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
13650 if (haveF3no66noF2(pfx) && sz == 4) {
13651 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "divss", Iop_Div32F0x4 );
13652 goto decode_success;
13654 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
13655 if (have66noF2noF3(pfx) && sz == 2) {
13656 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divpd", Iop_Div64Fx2 );
13657 goto decode_success;
13659 break;
13661 case 0x5F:
13662 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
13663 if (haveNo66noF2noF3(pfx) && sz == 4) {
13664 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxps", Iop_Max32Fx4 );
13665 goto decode_success;
13667 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
13668 if (haveF3no66noF2(pfx) && sz == 4) {
13669 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "maxss", Iop_Max32F0x4 );
13670 goto decode_success;
13672 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
13673 if (haveF2no66noF3(pfx)
13674 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13675 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "maxsd", Iop_Max64F0x2 );
13676 goto decode_success;
13678 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
13679 if (have66noF2noF3(pfx) && sz == 2) {
13680 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxpd", Iop_Max64Fx2 );
13681 goto decode_success;
13683 break;
13685 case 0x60:
13686 /* 66 0F 60 = PUNPCKLBW */
13687 if (have66noF2noF3(pfx) && sz == 2) {
13688 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13689 "punpcklbw",
13690 Iop_InterleaveLO8x16, True );
13691 goto decode_success;
13693 break;
13695 case 0x61:
13696 /* 66 0F 61 = PUNPCKLWD */
13697 if (have66noF2noF3(pfx) && sz == 2) {
13698 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13699 "punpcklwd",
13700 Iop_InterleaveLO16x8, True );
13701 goto decode_success;
13703 break;
13705 case 0x62:
13706 /* 66 0F 62 = PUNPCKLDQ */
13707 if (have66noF2noF3(pfx) && sz == 2) {
13708 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13709 "punpckldq",
13710 Iop_InterleaveLO32x4, True );
13711 goto decode_success;
13713 break;
13715 case 0x63:
13716 /* 66 0F 63 = PACKSSWB */
13717 if (have66noF2noF3(pfx) && sz == 2) {
13718 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13719 "packsswb",
13720 Iop_QNarrowBin16Sto8Sx16, True );
13721 goto decode_success;
13723 break;
13725 case 0x64:
13726 /* 66 0F 64 = PCMPGTB */
13727 if (have66noF2noF3(pfx) && sz == 2) {
13728 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13729 "pcmpgtb", Iop_CmpGT8Sx16, False );
13730 goto decode_success;
13732 break;
13734 case 0x65:
13735 /* 66 0F 65 = PCMPGTW */
13736 if (have66noF2noF3(pfx) && sz == 2) {
13737 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13738 "pcmpgtw", Iop_CmpGT16Sx8, False );
13739 goto decode_success;
13741 break;
13743 case 0x66:
13744 /* 66 0F 66 = PCMPGTD */
13745 if (have66noF2noF3(pfx) && sz == 2) {
13746 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13747 "pcmpgtd", Iop_CmpGT32Sx4, False );
13748 goto decode_success;
13750 break;
13752 case 0x67:
13753 /* 66 0F 67 = PACKUSWB */
13754 if (have66noF2noF3(pfx) && sz == 2) {
13755 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13756 "packuswb",
13757 Iop_QNarrowBin16Sto8Ux16, True );
13758 goto decode_success;
13760 break;
13762 case 0x68:
13763 /* 66 0F 68 = PUNPCKHBW */
13764 if (have66noF2noF3(pfx) && sz == 2) {
13765 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13766 "punpckhbw",
13767 Iop_InterleaveHI8x16, True );
13768 goto decode_success;
13770 break;
13772 case 0x69:
13773 /* 66 0F 69 = PUNPCKHWD */
13774 if (have66noF2noF3(pfx) && sz == 2) {
13775 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13776 "punpckhwd",
13777 Iop_InterleaveHI16x8, True );
13778 goto decode_success;
13780 break;
13782 case 0x6A:
13783 /* 66 0F 6A = PUNPCKHDQ */
13784 if (have66noF2noF3(pfx) && sz == 2) {
13785 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13786 "punpckhdq",
13787 Iop_InterleaveHI32x4, True );
13788 goto decode_success;
13790 break;
13792 case 0x6B:
13793 /* 66 0F 6B = PACKSSDW */
13794 if (have66noF2noF3(pfx) && sz == 2) {
13795 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13796 "packssdw",
13797 Iop_QNarrowBin32Sto16Sx8, True );
13798 goto decode_success;
13800 break;
13802 case 0x6C:
13803 /* 66 0F 6C = PUNPCKLQDQ */
13804 if (have66noF2noF3(pfx) && sz == 2) {
13805 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13806 "punpcklqdq",
13807 Iop_InterleaveLO64x2, True );
13808 goto decode_success;
13810 break;
13812 case 0x6D:
13813 /* 66 0F 6D = PUNPCKHQDQ */
13814 if (have66noF2noF3(pfx) && sz == 2) {
13815 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13816 "punpckhqdq",
13817 Iop_InterleaveHI64x2, True );
13818 goto decode_success;
13820 break;
13822 case 0x6E:
13823 /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4,
13824 zeroing high 3/4 of xmm. */
13825 /* or from ireg64/m64 to xmm lo 1/2,
13826 zeroing high 1/2 of xmm. */
13827 if (have66noF2noF3(pfx)) {
13828 vassert(sz == 2 || sz == 8);
13829 if (sz == 2) sz = 4;
13830 modrm = getUChar(delta);
13831 if (epartIsReg(modrm)) {
13832 delta += 1;
13833 if (sz == 4) {
13834 putXMMReg(
13835 gregOfRexRM(pfx,modrm),
13836 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) )
13838 DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
13839 nameXMMReg(gregOfRexRM(pfx,modrm)));
13840 } else {
13841 putXMMReg(
13842 gregOfRexRM(pfx,modrm),
13843 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) )
13845 DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
13846 nameXMMReg(gregOfRexRM(pfx,modrm)));
13848 } else {
13849 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
13850 delta += alen;
13851 putXMMReg(
13852 gregOfRexRM(pfx,modrm),
13853 sz == 4
13854 ? unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) )
13855 : unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)) )
13857 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', dis_buf,
13858 nameXMMReg(gregOfRexRM(pfx,modrm)));
13860 goto decode_success;
13862 break;
13864 case 0x6F:
13865 if (have66noF2noF3(pfx)
13866 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13867 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
13868 modrm = getUChar(delta);
13869 if (epartIsReg(modrm)) {
13870 putXMMReg( gregOfRexRM(pfx,modrm),
13871 getXMMReg( eregOfRexRM(pfx,modrm) ));
13872 DIP("movdqa %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13873 nameXMMReg(gregOfRexRM(pfx,modrm)));
13874 delta += 1;
13875 } else {
13876 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13877 gen_SEGV_if_not_16_aligned( addr );
13878 putXMMReg( gregOfRexRM(pfx,modrm),
13879 loadLE(Ity_V128, mkexpr(addr)) );
13880 DIP("movdqa %s,%s\n", dis_buf,
13881 nameXMMReg(gregOfRexRM(pfx,modrm)));
13882 delta += alen;
13884 goto decode_success;
13886 if (haveF3no66noF2(pfx) && sz == 4) {
13887 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
13888 modrm = getUChar(delta);
13889 if (epartIsReg(modrm)) {
13890 putXMMReg( gregOfRexRM(pfx,modrm),
13891 getXMMReg( eregOfRexRM(pfx,modrm) ));
13892 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13893 nameXMMReg(gregOfRexRM(pfx,modrm)));
13894 delta += 1;
13895 } else {
13896 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13897 putXMMReg( gregOfRexRM(pfx,modrm),
13898 loadLE(Ity_V128, mkexpr(addr)) );
13899 DIP("movdqu %s,%s\n", dis_buf,
13900 nameXMMReg(gregOfRexRM(pfx,modrm)));
13901 delta += alen;
13903 goto decode_success;
13905 break;
13907 case 0x70:
13908 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
13909 if (have66noF2noF3(pfx) && sz == 2) {
13910 delta = dis_PSHUFD_32x4( vbi, pfx, delta, False/*!writesYmm*/);
13911 goto decode_success;
13913 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13914 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
13915 if (haveNo66noF2noF3(pfx) && sz == 4) {
13916 Int order;
13917 IRTemp sV, dV, s3, s2, s1, s0;
13918 s3 = s2 = s1 = s0 = IRTemp_INVALID;
13919 sV = newTemp(Ity_I64);
13920 dV = newTemp(Ity_I64);
13921 do_MMX_preamble();
13922 modrm = getUChar(delta);
13923 if (epartIsReg(modrm)) {
13924 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
13925 order = (Int)getUChar(delta+1);
13926 delta += 1+1;
13927 DIP("pshufw $%d,%s,%s\n", order,
13928 nameMMXReg(eregLO3ofRM(modrm)),
13929 nameMMXReg(gregLO3ofRM(modrm)));
13930 } else {
13931 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
13932 1/*extra byte after amode*/ );
13933 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
13934 order = (Int)getUChar(delta+alen);
13935 delta += 1+alen;
13936 DIP("pshufw $%d,%s,%s\n", order,
13937 dis_buf,
13938 nameMMXReg(gregLO3ofRM(modrm)));
13940 breakup64to16s( sV, &s3, &s2, &s1, &s0 );
13941 # define SEL(n) \
13942 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
13943 assign(dV,
13944 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
13945 SEL((order>>2)&3), SEL((order>>0)&3) )
13947 putMMXReg(gregLO3ofRM(modrm), mkexpr(dV));
13948 # undef SEL
13949 goto decode_success;
13951 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
13952 mem) to G(xmm), and copy upper half */
13953 if (haveF2no66noF3(pfx) && sz == 4) {
13954 delta = dis_PSHUFxW_128( vbi, pfx, delta,
13955 False/*!isAvx*/, False/*!xIsH*/ );
13956 goto decode_success;
13958 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
13959 mem) to G(xmm), and copy lower half */
13960 if (haveF3no66noF2(pfx) && sz == 4) {
13961 delta = dis_PSHUFxW_128( vbi, pfx, delta,
13962 False/*!isAvx*/, True/*xIsH*/ );
13963 goto decode_success;
13965 break;
13967 case 0x71:
13968 /* 66 0F 71 /2 ib = PSRLW by immediate */
13969 if (have66noF2noF3(pfx) && sz == 2
13970 && epartIsReg(getUChar(delta))
13971 && gregLO3ofRM(getUChar(delta)) == 2) {
13972 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlw", Iop_ShrN16x8 );
13973 goto decode_success;
13975 /* 66 0F 71 /4 ib = PSRAW by immediate */
13976 if (have66noF2noF3(pfx) && sz == 2
13977 && epartIsReg(getUChar(delta))
13978 && gregLO3ofRM(getUChar(delta)) == 4) {
13979 delta = dis_SSE_shiftE_imm( pfx, delta, "psraw", Iop_SarN16x8 );
13980 goto decode_success;
13982 /* 66 0F 71 /6 ib = PSLLW by immediate */
13983 if (have66noF2noF3(pfx) && sz == 2
13984 && epartIsReg(getUChar(delta))
13985 && gregLO3ofRM(getUChar(delta)) == 6) {
13986 delta = dis_SSE_shiftE_imm( pfx, delta, "psllw", Iop_ShlN16x8 );
13987 goto decode_success;
13989 break;
13991 case 0x72:
13992 /* 66 0F 72 /2 ib = PSRLD by immediate */
13993 if (have66noF2noF3(pfx) && sz == 2
13994 && epartIsReg(getUChar(delta))
13995 && gregLO3ofRM(getUChar(delta)) == 2) {
13996 delta = dis_SSE_shiftE_imm( pfx, delta, "psrld", Iop_ShrN32x4 );
13997 goto decode_success;
13999 /* 66 0F 72 /4 ib = PSRAD by immediate */
14000 if (have66noF2noF3(pfx) && sz == 2
14001 && epartIsReg(getUChar(delta))
14002 && gregLO3ofRM(getUChar(delta)) == 4) {
14003 delta = dis_SSE_shiftE_imm( pfx, delta, "psrad", Iop_SarN32x4 );
14004 goto decode_success;
14006 /* 66 0F 72 /6 ib = PSLLD by immediate */
14007 if (have66noF2noF3(pfx) && sz == 2
14008 && epartIsReg(getUChar(delta))
14009 && gregLO3ofRM(getUChar(delta)) == 6) {
14010 delta = dis_SSE_shiftE_imm( pfx, delta, "pslld", Iop_ShlN32x4 );
14011 goto decode_success;
14013 break;
14015 case 0x73:
14016 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
14017 /* note, if mem case ever filled in, 1 byte after amode */
14018 if (have66noF2noF3(pfx) && sz == 2
14019 && epartIsReg(getUChar(delta))
14020 && gregLO3ofRM(getUChar(delta)) == 3) {
14021 Int imm = (Int)getUChar(delta+1);
14022 Int reg = eregOfRexRM(pfx,getUChar(delta));
14023 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg));
14024 delta += 2;
14025 IRTemp sV = newTemp(Ity_V128);
14026 assign( sV, getXMMReg(reg) );
14027 putXMMReg(reg, mkexpr(math_PSRLDQ( sV, imm )));
14028 goto decode_success;
14030 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
14031 /* note, if mem case ever filled in, 1 byte after amode */
14032 if (have66noF2noF3(pfx) && sz == 2
14033 && epartIsReg(getUChar(delta))
14034 && gregLO3ofRM(getUChar(delta)) == 7) {
14035 Int imm = (Int)getUChar(delta+1);
14036 Int reg = eregOfRexRM(pfx,getUChar(delta));
14037 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg));
14038 vassert(imm >= 0 && imm <= 255);
14039 delta += 2;
14040 IRTemp sV = newTemp(Ity_V128);
14041 assign( sV, getXMMReg(reg) );
14042 putXMMReg(reg, mkexpr(math_PSLLDQ( sV, imm )));
14043 goto decode_success;
14045 /* 66 0F 73 /2 ib = PSRLQ by immediate */
14046 if (have66noF2noF3(pfx) && sz == 2
14047 && epartIsReg(getUChar(delta))
14048 && gregLO3ofRM(getUChar(delta)) == 2) {
14049 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlq", Iop_ShrN64x2 );
14050 goto decode_success;
14052 /* 66 0F 73 /6 ib = PSLLQ by immediate */
14053 if (have66noF2noF3(pfx) && sz == 2
14054 && epartIsReg(getUChar(delta))
14055 && gregLO3ofRM(getUChar(delta)) == 6) {
14056 delta = dis_SSE_shiftE_imm( pfx, delta, "psllq", Iop_ShlN64x2 );
14057 goto decode_success;
14059 break;
14061 case 0x74:
14062 /* 66 0F 74 = PCMPEQB */
14063 if (have66noF2noF3(pfx) && sz == 2) {
14064 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14065 "pcmpeqb", Iop_CmpEQ8x16, False );
14066 goto decode_success;
14068 break;
14070 case 0x75:
14071 /* 66 0F 75 = PCMPEQW */
14072 if (have66noF2noF3(pfx) && sz == 2) {
14073 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14074 "pcmpeqw", Iop_CmpEQ16x8, False );
14075 goto decode_success;
14077 break;
14079 case 0x76:
14080 /* 66 0F 76 = PCMPEQD */
14081 if (have66noF2noF3(pfx) && sz == 2) {
14082 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14083 "pcmpeqd", Iop_CmpEQ32x4, False );
14084 goto decode_success;
14086 break;
14088 case 0x7E:
14089 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
14090 G (lo half xmm). Upper half of G is zeroed out. */
14091 if (haveF3no66noF2(pfx)
14092 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
14093 modrm = getUChar(delta);
14094 if (epartIsReg(modrm)) {
14095 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
14096 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
14097 /* zero bits 127:64 */
14098 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkU64(0) );
14099 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
14100 nameXMMReg(gregOfRexRM(pfx,modrm)));
14101 delta += 1;
14102 } else {
14103 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14104 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
14105 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
14106 loadLE(Ity_I64, mkexpr(addr)) );
14107 DIP("movsd %s,%s\n", dis_buf,
14108 nameXMMReg(gregOfRexRM(pfx,modrm)));
14109 delta += alen;
14111 goto decode_success;
14113 /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */
14114 /* or from xmm low 1/2 to ireg64 or m64. */
14115 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) {
14116 if (sz == 2) sz = 4;
14117 modrm = getUChar(delta);
14118 if (epartIsReg(modrm)) {
14119 delta += 1;
14120 if (sz == 4) {
14121 putIReg32( eregOfRexRM(pfx,modrm),
14122 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) );
14123 DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
14124 nameIReg32(eregOfRexRM(pfx,modrm)));
14125 } else {
14126 putIReg64( eregOfRexRM(pfx,modrm),
14127 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) );
14128 DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
14129 nameIReg64(eregOfRexRM(pfx,modrm)));
14131 } else {
14132 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
14133 delta += alen;
14134 storeLE( mkexpr(addr),
14135 sz == 4
14136 ? getXMMRegLane32(gregOfRexRM(pfx,modrm),0)
14137 : getXMMRegLane64(gregOfRexRM(pfx,modrm),0) );
14138 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q',
14139 nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
14141 goto decode_success;
14143 break;
14145 case 0x7F:
14146 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
14147 if (haveF3no66noF2(pfx) && sz == 4) {
14148 modrm = getUChar(delta);
14149 if (epartIsReg(modrm)) {
14150 goto decode_failure; /* awaiting test case */
14151 delta += 1;
14152 putXMMReg( eregOfRexRM(pfx,modrm),
14153 getXMMReg(gregOfRexRM(pfx,modrm)) );
14154 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
14155 nameXMMReg(eregOfRexRM(pfx,modrm)));
14156 } else {
14157 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
14158 delta += alen;
14159 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
14160 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
14162 goto decode_success;
14164 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
14165 if (have66noF2noF3(pfx) && sz == 2) {
14166 modrm = getUChar(delta);
14167 if (epartIsReg(modrm)) {
14168 delta += 1;
14169 putXMMReg( eregOfRexRM(pfx,modrm),
14170 getXMMReg(gregOfRexRM(pfx,modrm)) );
14171 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
14172 nameXMMReg(eregOfRexRM(pfx,modrm)));
14173 } else {
14174 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
14175 gen_SEGV_if_not_16_aligned( addr );
14176 delta += alen;
14177 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
14178 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
14180 goto decode_success;
14182 break;
14184 case 0xAE:
14185 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
14186 if (haveNo66noF2noF3(pfx)
14187 && epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7
14188 && sz == 4) {
14189 delta += 1;
14190 /* Insert a memory fence. It's sometimes important that these
14191 are carried through to the generated code. */
14192 stmt( IRStmt_MBE(Imbe_Fence) );
14193 DIP("sfence\n");
14194 goto decode_success;
14196 /* mindless duplication follows .. */
14197 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
14198 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
14199 if (haveNo66noF2noF3(pfx)
14200 && epartIsReg(getUChar(delta))
14201 && (gregLO3ofRM(getUChar(delta)) == 5
14202 || gregLO3ofRM(getUChar(delta)) == 6)
14203 && sz == 4) {
14204 delta += 1;
14205 /* Insert a memory fence. It's sometimes important that these
14206 are carried through to the generated code. */
14207 stmt( IRStmt_MBE(Imbe_Fence) );
14208 DIP("%sfence\n", gregLO3ofRM(getUChar(delta-1))==5 ? "l" : "m");
14209 goto decode_success;
14212 /* 0F AE /7 = CLFLUSH -- flush cache line */
14213 if (haveNo66noF2noF3(pfx)
14214 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7
14215 && sz == 4) {
14217 /* This is something of a hack. We need to know the size of
14218 the cache line containing addr. Since we don't (easily),
14219 assume 256 on the basis that no real cache would have a
14220 line that big. It's safe to invalidate more stuff than we
14221 need, just inefficient. */
14222 ULong lineszB = 256ULL;
14224 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14225 delta += alen;
14227 /* Round addr down to the start of the containing block. */
14228 stmt( IRStmt_Put(
14229 OFFB_CMSTART,
14230 binop( Iop_And64,
14231 mkexpr(addr),
14232 mkU64( ~(lineszB-1) ))) );
14234 stmt( IRStmt_Put(OFFB_CMLEN, mkU64(lineszB) ) );
14236 jmp_lit(dres, Ijk_InvalICache, (Addr64)(guest_RIP_bbstart+delta));
14238 DIP("clflush %s\n", dis_buf);
14239 goto decode_success;
14242 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
14243 if (haveNo66noF2noF3(pfx)
14244 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3
14245 && sz == 4) {
14246 delta = dis_STMXCSR(vbi, pfx, delta, False/*!isAvx*/);
14247 goto decode_success;
14249 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
14250 if (haveNo66noF2noF3(pfx)
14251 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2
14252 && sz == 4) {
14253 delta = dis_LDMXCSR(vbi, pfx, delta, False/*!isAvx*/);
14254 goto decode_success;
14256 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */
14257 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
14258 && !epartIsReg(getUChar(delta))
14259 && gregOfRexRM(pfx,getUChar(delta)) == 0) {
14260 delta = dis_FXSAVE(vbi, pfx, delta, sz);
14261 goto decode_success;
14263 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */
14264 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
14265 && !epartIsReg(getUChar(delta))
14266 && gregOfRexRM(pfx,getUChar(delta)) == 1) {
14267 delta = dis_FXRSTOR(vbi, pfx, delta, sz);
14268 goto decode_success;
14270 /* 0F AE /4 = XSAVE mem -- write x87, SSE, AVX state to memory */
14271 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
14272 && !epartIsReg(getUChar(delta))
14273 && gregOfRexRM(pfx,getUChar(delta)) == 4
14274 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
14275 delta = dis_XSAVE(vbi, pfx, delta, sz);
14276 goto decode_success;
14278 /* 0F AE /5 = XRSTOR mem -- read x87, SSE, AVX state from memory */
14279 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
14280 && !epartIsReg(getUChar(delta))
14281 && gregOfRexRM(pfx,getUChar(delta)) == 5
14282 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
14283 delta = dis_XRSTOR(vbi, pfx, delta, sz);
14284 goto decode_success;
14286 break;
14288 case 0xC2:
14289 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
14290 if (haveNo66noF2noF3(pfx) && sz == 4) {
14291 Long delta0 = delta;
14292 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpps", True, 4 );
14293 if (delta > delta0) goto decode_success;
14295 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
14296 if (haveF3no66noF2(pfx) && sz == 4) {
14297 Long delta0 = delta;
14298 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpss", False, 4 );
14299 if (delta > delta0) goto decode_success;
14301 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
14302 if (haveF2no66noF3(pfx) && sz == 4) {
14303 Long delta0 = delta;
14304 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpsd", False, 8 );
14305 if (delta > delta0) goto decode_success;
14307 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
14308 if (have66noF2noF3(pfx) && sz == 2) {
14309 Long delta0 = delta;
14310 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmppd", True, 8 );
14311 if (delta > delta0) goto decode_success;
14313 break;
14315 case 0xC3:
14316 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
14317 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) {
14318 modrm = getUChar(delta);
14319 if (!epartIsReg(modrm)) {
14320 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14321 storeLE( mkexpr(addr), getIRegG(sz, pfx, modrm) );
14322 DIP("movnti %s,%s\n", dis_buf,
14323 nameIRegG(sz, pfx, modrm));
14324 delta += alen;
14325 goto decode_success;
14327 /* else fall through */
14329 break;
14331 case 0xC4:
14332 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14333 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
14334 put it into the specified lane of mmx(G). */
14335 if (haveNo66noF2noF3(pfx)
14336 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
14337 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
14338 mmx reg. t4 is the new lane value. t5 is the original
14339 mmx value. t6 is the new mmx value. */
14340 Int lane;
14341 t4 = newTemp(Ity_I16);
14342 t5 = newTemp(Ity_I64);
14343 t6 = newTemp(Ity_I64);
14344 modrm = getUChar(delta);
14345 do_MMX_preamble();
14347 assign(t5, getMMXReg(gregLO3ofRM(modrm)));
14348 breakup64to16s( t5, &t3, &t2, &t1, &t0 );
14350 if (epartIsReg(modrm)) {
14351 assign(t4, getIReg16(eregOfRexRM(pfx,modrm)));
14352 delta += 1+1;
14353 lane = getUChar(delta-1);
14354 DIP("pinsrw $%d,%s,%s\n", lane,
14355 nameIReg16(eregOfRexRM(pfx,modrm)),
14356 nameMMXReg(gregLO3ofRM(modrm)));
14357 } else {
14358 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
14359 delta += 1+alen;
14360 lane = getUChar(delta-1);
14361 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
14362 DIP("pinsrw $%d,%s,%s\n", lane,
14363 dis_buf,
14364 nameMMXReg(gregLO3ofRM(modrm)));
14367 switch (lane & 3) {
14368 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break;
14369 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break;
14370 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break;
14371 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break;
14372 default: vassert(0);
14374 putMMXReg(gregLO3ofRM(modrm), mkexpr(t6));
14375 goto decode_success;
14377 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
14378 put it into the specified lane of xmm(G). */
14379 if (have66noF2noF3(pfx)
14380 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
14381 Int lane;
14382 t4 = newTemp(Ity_I16);
14383 modrm = getUChar(delta);
14384 UInt rG = gregOfRexRM(pfx,modrm);
14385 if (epartIsReg(modrm)) {
14386 UInt rE = eregOfRexRM(pfx,modrm);
14387 assign(t4, getIReg16(rE));
14388 delta += 1+1;
14389 lane = getUChar(delta-1);
14390 DIP("pinsrw $%d,%s,%s\n",
14391 lane, nameIReg16(rE), nameXMMReg(rG));
14392 } else {
14393 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
14394 1/*byte after the amode*/ );
14395 delta += 1+alen;
14396 lane = getUChar(delta-1);
14397 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
14398 DIP("pinsrw $%d,%s,%s\n",
14399 lane, dis_buf, nameXMMReg(rG));
14401 IRTemp src_vec = newTemp(Ity_V128);
14402 assign(src_vec, getXMMReg(rG));
14403 IRTemp res_vec = math_PINSRW_128( src_vec, t4, lane & 7);
14404 putXMMReg(rG, mkexpr(res_vec));
14405 goto decode_success;
14407 break;
14409 case 0xC5:
14410 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14411 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
14412 zero-extend of it in ireg(G). */
14413 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) {
14414 modrm = getUChar(delta);
14415 if (epartIsReg(modrm)) {
14416 IRTemp sV = newTemp(Ity_I64);
14417 t5 = newTemp(Ity_I16);
14418 do_MMX_preamble();
14419 assign(sV, getMMXReg(eregLO3ofRM(modrm)));
14420 breakup64to16s( sV, &t3, &t2, &t1, &t0 );
14421 switch (getUChar(delta+1) & 3) {
14422 case 0: assign(t5, mkexpr(t0)); break;
14423 case 1: assign(t5, mkexpr(t1)); break;
14424 case 2: assign(t5, mkexpr(t2)); break;
14425 case 3: assign(t5, mkexpr(t3)); break;
14426 default: vassert(0);
14428 if (sz == 8)
14429 putIReg64(gregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(t5)));
14430 else
14431 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t5)));
14432 DIP("pextrw $%d,%s,%s\n",
14433 (Int)getUChar(delta+1),
14434 nameMMXReg(eregLO3ofRM(modrm)),
14435 sz==8 ? nameIReg64(gregOfRexRM(pfx,modrm))
14436 : nameIReg32(gregOfRexRM(pfx,modrm))
14438 delta += 2;
14439 goto decode_success;
14441 /* else fall through */
14442 /* note, for anyone filling in the mem case: this insn has one
14443 byte after the amode and therefore you must pass 1 as the
14444 last arg to disAMode */
14446 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
14447 zero-extend of it in ireg(G). */
14448 if (have66noF2noF3(pfx)
14449 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
14450 Long delta0 = delta;
14451 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta,
14452 False/*!isAvx*/ );
14453 if (delta > delta0) goto decode_success;
14454 /* else fall through -- decoding has failed */
14456 break;
14458 case 0xC6:
14459 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
14460 if (haveNo66noF2noF3(pfx) && sz == 4) {
14461 Int imm8 = 0;
14462 IRTemp sV = newTemp(Ity_V128);
14463 IRTemp dV = newTemp(Ity_V128);
14464 modrm = getUChar(delta);
14465 UInt rG = gregOfRexRM(pfx,modrm);
14466 assign( dV, getXMMReg(rG) );
14467 if (epartIsReg(modrm)) {
14468 UInt rE = eregOfRexRM(pfx,modrm);
14469 assign( sV, getXMMReg(rE) );
14470 imm8 = (Int)getUChar(delta+1);
14471 delta += 1+1;
14472 DIP("shufps $%d,%s,%s\n", imm8, nameXMMReg(rE), nameXMMReg(rG));
14473 } else {
14474 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
14475 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
14476 imm8 = (Int)getUChar(delta+alen);
14477 delta += 1+alen;
14478 DIP("shufps $%d,%s,%s\n", imm8, dis_buf, nameXMMReg(rG));
14480 IRTemp res = math_SHUFPS_128( sV, dV, imm8 );
14481 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
14482 goto decode_success;
14484 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
14485 if (have66noF2noF3(pfx) && sz == 2) {
14486 Int select;
14487 IRTemp sV = newTemp(Ity_V128);
14488 IRTemp dV = newTemp(Ity_V128);
14490 modrm = getUChar(delta);
14491 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
14493 if (epartIsReg(modrm)) {
14494 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
14495 select = (Int)getUChar(delta+1);
14496 delta += 1+1;
14497 DIP("shufpd $%d,%s,%s\n", select,
14498 nameXMMReg(eregOfRexRM(pfx,modrm)),
14499 nameXMMReg(gregOfRexRM(pfx,modrm)));
14500 } else {
14501 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
14502 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
14503 select = getUChar(delta+alen);
14504 delta += 1+alen;
14505 DIP("shufpd $%d,%s,%s\n", select,
14506 dis_buf,
14507 nameXMMReg(gregOfRexRM(pfx,modrm)));
14510 IRTemp res = math_SHUFPD_128( sV, dV, select );
14511 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
14512 goto decode_success;
14514 break;
14516 case 0xD1:
14517 /* 66 0F D1 = PSRLW by E */
14518 if (have66noF2noF3(pfx) && sz == 2) {
14519 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlw", Iop_ShrN16x8 );
14520 goto decode_success;
14522 break;
14524 case 0xD2:
14525 /* 66 0F D2 = PSRLD by E */
14526 if (have66noF2noF3(pfx) && sz == 2) {
14527 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrld", Iop_ShrN32x4 );
14528 goto decode_success;
14530 break;
14532 case 0xD3:
14533 /* 66 0F D3 = PSRLQ by E */
14534 if (have66noF2noF3(pfx) && sz == 2) {
14535 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlq", Iop_ShrN64x2 );
14536 goto decode_success;
14538 break;
14540 case 0xD4:
14541 /* 66 0F D4 = PADDQ */
14542 if (have66noF2noF3(pfx) && sz == 2) {
14543 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14544 "paddq", Iop_Add64x2, False );
14545 goto decode_success;
14547 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
14548 /* 0F D4 = PADDQ -- add 64x1 */
14549 if (haveNo66noF2noF3(pfx) && sz == 4) {
14550 do_MMX_preamble();
14551 delta = dis_MMXop_regmem_to_reg (
14552 vbi, pfx, delta, opc, "paddq", False );
14553 goto decode_success;
14555 break;
14557 case 0xD5:
14558 /* 66 0F D5 = PMULLW -- 16x8 multiply */
14559 if (have66noF2noF3(pfx) && sz == 2) {
14560 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14561 "pmullw", Iop_Mul16x8, False );
14562 goto decode_success;
14564 break;
14566 case 0xD6:
14567 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
14568 hi half). */
14569 if (haveF3no66noF2(pfx) && sz == 4) {
14570 modrm = getUChar(delta);
14571 if (epartIsReg(modrm)) {
14572 do_MMX_preamble();
14573 putXMMReg( gregOfRexRM(pfx,modrm),
14574 unop(Iop_64UtoV128, getMMXReg( eregLO3ofRM(modrm) )) );
14575 DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
14576 nameXMMReg(gregOfRexRM(pfx,modrm)));
14577 delta += 1;
14578 goto decode_success;
14580 /* apparently no mem case for this insn */
14582 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
14583 or lo half xmm). */
14584 if (have66noF2noF3(pfx)
14585 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
14586 modrm = getUChar(delta);
14587 if (epartIsReg(modrm)) {
14588 /* fall through, awaiting test case */
14589 /* dst: lo half copied, hi half zeroed */
14590 } else {
14591 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14592 storeLE( mkexpr(addr),
14593 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 ));
14594 DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf );
14595 delta += alen;
14596 goto decode_success;
14599 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
14600 if (haveF2no66noF3(pfx) && sz == 4) {
14601 modrm = getUChar(delta);
14602 if (epartIsReg(modrm)) {
14603 do_MMX_preamble();
14604 putMMXReg( gregLO3ofRM(modrm),
14605 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
14606 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
14607 nameMMXReg(gregLO3ofRM(modrm)));
14608 delta += 1;
14609 goto decode_success;
14611 /* apparently no mem case for this insn */
14613 break;
14615 case 0xD7:
14616 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16
14617 lanes in xmm(E), turn them into a byte, and put
14618 zero-extend of it in ireg(G). Doing this directly is just
14619 too cumbersome; give up therefore and call a helper. */
14620 if (have66noF2noF3(pfx)
14621 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
14622 && epartIsReg(getUChar(delta))) { /* no memory case, it seems */
14623 delta = dis_PMOVMSKB_128( vbi, pfx, delta, False/*!isAvx*/ );
14624 goto decode_success;
14626 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14627 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
14628 mmx(E), turn them into a byte, and put zero-extend of it in
14629 ireg(G). */
14630 if (haveNo66noF2noF3(pfx)
14631 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
14632 modrm = getUChar(delta);
14633 if (epartIsReg(modrm)) {
14634 do_MMX_preamble();
14635 t0 = newTemp(Ity_I64);
14636 t1 = newTemp(Ity_I32);
14637 assign(t0, getMMXReg(eregLO3ofRM(modrm)));
14638 assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0))));
14639 putIReg32(gregOfRexRM(pfx,modrm), mkexpr(t1));
14640 DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
14641 nameIReg32(gregOfRexRM(pfx,modrm)));
14642 delta += 1;
14643 goto decode_success;
14645 /* else fall through */
14647 break;
14649 case 0xD8:
14650 /* 66 0F D8 = PSUBUSB */
14651 if (have66noF2noF3(pfx) && sz == 2) {
14652 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14653 "psubusb", Iop_QSub8Ux16, False );
14654 goto decode_success;
14656 break;
14658 case 0xD9:
14659 /* 66 0F D9 = PSUBUSW */
14660 if (have66noF2noF3(pfx) && sz == 2) {
14661 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14662 "psubusw", Iop_QSub16Ux8, False );
14663 goto decode_success;
14665 break;
14667 case 0xDA:
14668 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14669 /* 0F DA = PMINUB -- 8x8 unsigned min */
14670 if (haveNo66noF2noF3(pfx) && sz == 4) {
14671 do_MMX_preamble();
14672 delta = dis_MMXop_regmem_to_reg (
14673 vbi, pfx, delta, opc, "pminub", False );
14674 goto decode_success;
14676 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
14677 if (have66noF2noF3(pfx) && sz == 2) {
14678 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14679 "pminub", Iop_Min8Ux16, False );
14680 goto decode_success;
14682 break;
14684 case 0xDB:
14685 /* 66 0F DB = PAND */
14686 if (have66noF2noF3(pfx) && sz == 2) {
14687 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pand", Iop_AndV128 );
14688 goto decode_success;
14690 break;
14692 case 0xDC:
14693 /* 66 0F DC = PADDUSB */
14694 if (have66noF2noF3(pfx) && sz == 2) {
14695 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14696 "paddusb", Iop_QAdd8Ux16, False );
14697 goto decode_success;
14699 break;
14701 case 0xDD:
14702 /* 66 0F DD = PADDUSW */
14703 if (have66noF2noF3(pfx) && sz == 2) {
14704 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14705 "paddusw", Iop_QAdd16Ux8, False );
14706 goto decode_success;
14708 break;
14710 case 0xDE:
14711 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14712 /* 0F DE = PMAXUB -- 8x8 unsigned max */
14713 if (haveNo66noF2noF3(pfx) && sz == 4) {
14714 do_MMX_preamble();
14715 delta = dis_MMXop_regmem_to_reg (
14716 vbi, pfx, delta, opc, "pmaxub", False );
14717 goto decode_success;
14719 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
14720 if (have66noF2noF3(pfx) && sz == 2) {
14721 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14722 "pmaxub", Iop_Max8Ux16, False );
14723 goto decode_success;
14725 break;
14727 case 0xDF:
14728 /* 66 0F DF = PANDN */
14729 if (have66noF2noF3(pfx) && sz == 2) {
14730 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "pandn", Iop_AndV128 );
14731 goto decode_success;
14733 break;
14735 case 0xE0:
14736 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14737 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
14738 if (haveNo66noF2noF3(pfx) && sz == 4) {
14739 do_MMX_preamble();
14740 delta = dis_MMXop_regmem_to_reg (
14741 vbi, pfx, delta, opc, "pavgb", False );
14742 goto decode_success;
14744 /* 66 0F E0 = PAVGB */
14745 if (have66noF2noF3(pfx) && sz == 2) {
14746 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14747 "pavgb", Iop_Avg8Ux16, False );
14748 goto decode_success;
14750 break;
14752 case 0xE1:
14753 /* 66 0F E1 = PSRAW by E */
14754 if (have66noF2noF3(pfx) && sz == 2) {
14755 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psraw", Iop_SarN16x8 );
14756 goto decode_success;
14758 break;
14760 case 0xE2:
14761 /* 66 0F E2 = PSRAD by E */
14762 if (have66noF2noF3(pfx) && sz == 2) {
14763 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrad", Iop_SarN32x4 );
14764 goto decode_success;
14766 break;
14768 case 0xE3:
14769 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14770 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
14771 if (haveNo66noF2noF3(pfx) && sz == 4) {
14772 do_MMX_preamble();
14773 delta = dis_MMXop_regmem_to_reg (
14774 vbi, pfx, delta, opc, "pavgw", False );
14775 goto decode_success;
14777 /* 66 0F E3 = PAVGW */
14778 if (have66noF2noF3(pfx) && sz == 2) {
14779 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14780 "pavgw", Iop_Avg16Ux8, False );
14781 goto decode_success;
14783 break;
14785 case 0xE4:
14786 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14787 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
14788 if (haveNo66noF2noF3(pfx) && sz == 4) {
14789 do_MMX_preamble();
14790 delta = dis_MMXop_regmem_to_reg (
14791 vbi, pfx, delta, opc, "pmuluh", False );
14792 goto decode_success;
14794 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
14795 if (have66noF2noF3(pfx) && sz == 2) {
14796 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14797 "pmulhuw", Iop_MulHi16Ux8, False );
14798 goto decode_success;
14800 break;
14802 case 0xE5:
14803 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
14804 if (have66noF2noF3(pfx) && sz == 2) {
14805 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14806 "pmulhw", Iop_MulHi16Sx8, False );
14807 goto decode_success;
14809 break;
14811 case 0xE6:
14812 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
14813 lo half xmm(G), and zero upper half, rounding towards zero */
14814 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
14815 lo half xmm(G), according to prevailing rounding mode, and zero
14816 upper half */
14817 if ( (haveF2no66noF3(pfx) && sz == 4)
14818 || (have66noF2noF3(pfx) && sz == 2) ) {
14819 delta = dis_CVTxPD2DQ_128( vbi, pfx, delta, False/*!isAvx*/,
14820 toBool(sz == 2)/*r2zero*/);
14821 goto decode_success;
14823 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
14824 F64 in xmm(G) */
14825 if (haveF3no66noF2(pfx) && sz == 4) {
14826 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, False/*!isAvx*/);
14827 goto decode_success;
14829 break;
14831 case 0xE7:
14832 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14833 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
14834 Intel manual does not say anything about the usual business of
14835 the FP reg tags getting trashed whenever an MMX insn happens.
14836 So we just leave them alone.
14838 if (haveNo66noF2noF3(pfx) && sz == 4) {
14839 modrm = getUChar(delta);
14840 if (!epartIsReg(modrm)) {
14841 /* do_MMX_preamble(); Intel docs don't specify this */
14842 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14843 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
14844 DIP("movntq %s,%s\n", dis_buf,
14845 nameMMXReg(gregLO3ofRM(modrm)));
14846 delta += alen;
14847 goto decode_success;
14849 /* else fall through */
14851 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
14852 if (have66noF2noF3(pfx) && sz == 2) {
14853 modrm = getUChar(delta);
14854 if (!epartIsReg(modrm)) {
14855 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14856 gen_SEGV_if_not_16_aligned( addr );
14857 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
14858 DIP("movntdq %s,%s\n", dis_buf,
14859 nameXMMReg(gregOfRexRM(pfx,modrm)));
14860 delta += alen;
14861 goto decode_success;
14863 /* else fall through */
14865 break;
14867 case 0xE8:
14868 /* 66 0F E8 = PSUBSB */
14869 if (have66noF2noF3(pfx) && sz == 2) {
14870 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14871 "psubsb", Iop_QSub8Sx16, False );
14872 goto decode_success;
14874 break;
14876 case 0xE9:
14877 /* 66 0F E9 = PSUBSW */
14878 if (have66noF2noF3(pfx) && sz == 2) {
14879 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14880 "psubsw", Iop_QSub16Sx8, False );
14881 goto decode_success;
14883 break;
14885 case 0xEA:
14886 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14887 /* 0F EA = PMINSW -- 16x4 signed min */
14888 if (haveNo66noF2noF3(pfx) && sz == 4) {
14889 do_MMX_preamble();
14890 delta = dis_MMXop_regmem_to_reg (
14891 vbi, pfx, delta, opc, "pminsw", False );
14892 goto decode_success;
14894 /* 66 0F EA = PMINSW -- 16x8 signed min */
14895 if (have66noF2noF3(pfx) && sz == 2) {
14896 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14897 "pminsw", Iop_Min16Sx8, False );
14898 goto decode_success;
14900 break;
14902 case 0xEB:
14903 /* 66 0F EB = POR */
14904 if (have66noF2noF3(pfx) && sz == 2) {
14905 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "por", Iop_OrV128 );
14906 goto decode_success;
14908 break;
14910 case 0xEC:
14911 /* 66 0F EC = PADDSB */
14912 if (have66noF2noF3(pfx) && sz == 2) {
14913 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14914 "paddsb", Iop_QAdd8Sx16, False );
14915 goto decode_success;
14917 break;
14919 case 0xED:
14920 /* 66 0F ED = PADDSW */
14921 if (have66noF2noF3(pfx) && sz == 2) {
14922 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14923 "paddsw", Iop_QAdd16Sx8, False );
14924 goto decode_success;
14926 break;
14928 case 0xEE:
14929 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14930 /* 0F EE = PMAXSW -- 16x4 signed max */
14931 if (haveNo66noF2noF3(pfx) && sz == 4) {
14932 do_MMX_preamble();
14933 delta = dis_MMXop_regmem_to_reg (
14934 vbi, pfx, delta, opc, "pmaxsw", False );
14935 goto decode_success;
14937 /* 66 0F EE = PMAXSW -- 16x8 signed max */
14938 if (have66noF2noF3(pfx) && sz == 2) {
14939 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14940 "pmaxsw", Iop_Max16Sx8, False );
14941 goto decode_success;
14943 break;
14945 case 0xEF:
14946 /* 66 0F EF = PXOR */
14947 if (have66noF2noF3(pfx) && sz == 2) {
14948 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pxor", Iop_XorV128 );
14949 goto decode_success;
14951 break;
14953 case 0xF1:
14954 /* 66 0F F1 = PSLLW by E */
14955 if (have66noF2noF3(pfx) && sz == 2) {
14956 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllw", Iop_ShlN16x8 );
14957 goto decode_success;
14959 break;
14961 case 0xF2:
14962 /* 66 0F F2 = PSLLD by E */
14963 if (have66noF2noF3(pfx) && sz == 2) {
14964 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "pslld", Iop_ShlN32x4 );
14965 goto decode_success;
14967 break;
14969 case 0xF3:
14970 /* 66 0F F3 = PSLLQ by E */
14971 if (have66noF2noF3(pfx) && sz == 2) {
14972 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllq", Iop_ShlN64x2 );
14973 goto decode_success;
14975 break;
14977 case 0xF4:
14978 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
14979 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
14980 half */
14981 if (have66noF2noF3(pfx) && sz == 2) {
14982 IRTemp sV = newTemp(Ity_V128);
14983 IRTemp dV = newTemp(Ity_V128);
14984 modrm = getUChar(delta);
14985 UInt rG = gregOfRexRM(pfx,modrm);
14986 assign( dV, getXMMReg(rG) );
14987 if (epartIsReg(modrm)) {
14988 UInt rE = eregOfRexRM(pfx,modrm);
14989 assign( sV, getXMMReg(rE) );
14990 delta += 1;
14991 DIP("pmuludq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
14992 } else {
14993 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14994 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
14995 delta += alen;
14996 DIP("pmuludq %s,%s\n", dis_buf, nameXMMReg(rG));
14998 putXMMReg( rG, mkexpr(math_PMULUDQ_128( sV, dV )) );
14999 goto decode_success;
15001 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
15002 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
15003 0 to form 64-bit result */
15004 if (haveNo66noF2noF3(pfx) && sz == 4) {
15005 IRTemp sV = newTemp(Ity_I64);
15006 IRTemp dV = newTemp(Ity_I64);
15007 t1 = newTemp(Ity_I32);
15008 t0 = newTemp(Ity_I32);
15009 modrm = getUChar(delta);
15011 do_MMX_preamble();
15012 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15014 if (epartIsReg(modrm)) {
15015 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15016 delta += 1;
15017 DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
15018 nameMMXReg(gregLO3ofRM(modrm)));
15019 } else {
15020 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15021 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15022 delta += alen;
15023 DIP("pmuludq %s,%s\n", dis_buf,
15024 nameMMXReg(gregLO3ofRM(modrm)));
15027 assign( t0, unop(Iop_64to32, mkexpr(dV)) );
15028 assign( t1, unop(Iop_64to32, mkexpr(sV)) );
15029 putMMXReg( gregLO3ofRM(modrm),
15030 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) );
15031 goto decode_success;
15033 break;
15035 case 0xF5:
15036 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
15037 E(xmm or mem) to G(xmm) */
15038 if (have66noF2noF3(pfx) && sz == 2) {
15039 IRTemp sV = newTemp(Ity_V128);
15040 IRTemp dV = newTemp(Ity_V128);
15041 modrm = getUChar(delta);
15042 UInt rG = gregOfRexRM(pfx,modrm);
15043 if (epartIsReg(modrm)) {
15044 UInt rE = eregOfRexRM(pfx,modrm);
15045 assign( sV, getXMMReg(rE) );
15046 delta += 1;
15047 DIP("pmaddwd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15048 } else {
15049 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15050 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15051 delta += alen;
15052 DIP("pmaddwd %s,%s\n", dis_buf, nameXMMReg(rG));
15054 assign( dV, getXMMReg(rG) );
15055 putXMMReg( rG, mkexpr(math_PMADDWD_128(dV, sV)) );
15056 goto decode_success;
15058 break;
15060 case 0xF6:
15061 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
15062 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
15063 if (haveNo66noF2noF3(pfx) && sz == 4) {
15064 do_MMX_preamble();
15065 delta = dis_MMXop_regmem_to_reg (
15066 vbi, pfx, delta, opc, "psadbw", False );
15067 goto decode_success;
15069 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
15070 from E(xmm or mem) to G(xmm) */
15071 if (have66noF2noF3(pfx) && sz == 2) {
15072 IRTemp sV = newTemp(Ity_V128);
15073 IRTemp dV = newTemp(Ity_V128);
15074 modrm = getUChar(delta);
15075 UInt rG = gregOfRexRM(pfx,modrm);
15076 if (epartIsReg(modrm)) {
15077 UInt rE = eregOfRexRM(pfx,modrm);
15078 assign( sV, getXMMReg(rE) );
15079 delta += 1;
15080 DIP("psadbw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15081 } else {
15082 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15083 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15084 delta += alen;
15085 DIP("psadbw %s,%s\n", dis_buf, nameXMMReg(rG));
15087 assign( dV, getXMMReg(rG) );
15088 putXMMReg( rG, mkexpr( math_PSADBW_128 ( dV, sV ) ) );
15090 goto decode_success;
15092 break;
15094 case 0xF7:
15095 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
15096 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
15097 if (haveNo66noF2noF3(pfx) && sz == 4) {
15098 Bool ok = False;
15099 delta = dis_MMX( &ok, vbi, pfx, sz, delta-1 );
15100 if (ok) goto decode_success;
15102 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
15103 if (have66noF2noF3(pfx) && sz == 2 && epartIsReg(getUChar(delta))) {
15104 delta = dis_MASKMOVDQU( vbi, pfx, delta, False/*!isAvx*/ );
15105 goto decode_success;
15107 break;
15109 case 0xF8:
15110 /* 66 0F F8 = PSUBB */
15111 if (have66noF2noF3(pfx) && sz == 2) {
15112 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15113 "psubb", Iop_Sub8x16, False );
15114 goto decode_success;
15116 break;
15118 case 0xF9:
15119 /* 66 0F F9 = PSUBW */
15120 if (have66noF2noF3(pfx) && sz == 2) {
15121 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15122 "psubw", Iop_Sub16x8, False );
15123 goto decode_success;
15125 break;
15127 case 0xFA:
15128 /* 66 0F FA = PSUBD */
15129 if (have66noF2noF3(pfx) && sz == 2) {
15130 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15131 "psubd", Iop_Sub32x4, False );
15132 goto decode_success;
15134 break;
15136 case 0xFB:
15137 /* 66 0F FB = PSUBQ */
15138 if (have66noF2noF3(pfx) && sz == 2) {
15139 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15140 "psubq", Iop_Sub64x2, False );
15141 goto decode_success;
15143 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
15144 /* 0F FB = PSUBQ -- sub 64x1 */
15145 if (haveNo66noF2noF3(pfx) && sz == 4) {
15146 do_MMX_preamble();
15147 delta = dis_MMXop_regmem_to_reg (
15148 vbi, pfx, delta, opc, "psubq", False );
15149 goto decode_success;
15151 break;
15153 case 0xFC:
15154 /* 66 0F FC = PADDB */
15155 if (have66noF2noF3(pfx) && sz == 2) {
15156 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15157 "paddb", Iop_Add8x16, False );
15158 goto decode_success;
15160 break;
15162 case 0xFD:
15163 /* 66 0F FD = PADDW */
15164 if (have66noF2noF3(pfx) && sz == 2) {
15165 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15166 "paddw", Iop_Add16x8, False );
15167 goto decode_success;
15169 break;
15171 case 0xFE:
15172 /* 66 0F FE = PADDD */
15173 if (have66noF2noF3(pfx) && sz == 2) {
15174 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15175 "paddd", Iop_Add32x4, False );
15176 goto decode_success;
15178 break;
15180 default:
15181 goto decode_failure;
15185 decode_failure:
15186 *decode_OK = False;
15187 return deltaIN;
15189 decode_success:
15190 *decode_OK = True;
15191 return delta;
15195 /*------------------------------------------------------------*/
15196 /*--- ---*/
15197 /*--- Top-level SSE3 (not SupSSE3): dis_ESC_0F__SSE3 ---*/
15198 /*--- ---*/
15199 /*------------------------------------------------------------*/
15201 static Long dis_MOVDDUP_128 ( const VexAbiInfo* vbi, Prefix pfx,
15202 Long delta, Bool isAvx )
15204 IRTemp addr = IRTemp_INVALID;
15205 Int alen = 0;
15206 HChar dis_buf[50];
15207 IRTemp sV = newTemp(Ity_V128);
15208 IRTemp d0 = newTemp(Ity_I64);
15209 UChar modrm = getUChar(delta);
15210 UInt rG = gregOfRexRM(pfx,modrm);
15211 if (epartIsReg(modrm)) {
15212 UInt rE = eregOfRexRM(pfx,modrm);
15213 assign( sV, getXMMReg(rE) );
15214 DIP("%smovddup %s,%s\n",
15215 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
15216 delta += 1;
15217 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) );
15218 } else {
15219 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15220 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
15221 DIP("%smovddup %s,%s\n",
15222 isAvx ? "v" : "", dis_buf, nameXMMReg(rG));
15223 delta += alen;
15225 (isAvx ? putYMMRegLoAndZU : putXMMReg)
15226 ( rG, binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) );
15227 return delta;
15231 static Long dis_MOVDDUP_256 ( const VexAbiInfo* vbi, Prefix pfx,
15232 Long delta )
15234 IRTemp addr = IRTemp_INVALID;
15235 Int alen = 0;
15236 HChar dis_buf[50];
15237 IRTemp d0 = newTemp(Ity_I64);
15238 IRTemp d1 = newTemp(Ity_I64);
15239 UChar modrm = getUChar(delta);
15240 UInt rG = gregOfRexRM(pfx,modrm);
15241 if (epartIsReg(modrm)) {
15242 UInt rE = eregOfRexRM(pfx,modrm);
15243 DIP("vmovddup %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
15244 delta += 1;
15245 assign ( d0, getYMMRegLane64(rE, 0) );
15246 assign ( d1, getYMMRegLane64(rE, 2) );
15247 } else {
15248 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15249 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
15250 assign( d1, loadLE(Ity_I64, binop(Iop_Add64,
15251 mkexpr(addr), mkU64(16))) );
15252 DIP("vmovddup %s,%s\n", dis_buf, nameYMMReg(rG));
15253 delta += alen;
15255 putYMMRegLane64( rG, 0, mkexpr(d0) );
15256 putYMMRegLane64( rG, 1, mkexpr(d0) );
15257 putYMMRegLane64( rG, 2, mkexpr(d1) );
15258 putYMMRegLane64( rG, 3, mkexpr(d1) );
15259 return delta;
15263 static Long dis_MOVSxDUP_128 ( const VexAbiInfo* vbi, Prefix pfx,
15264 Long delta, Bool isAvx, Bool isL )
15266 IRTemp addr = IRTemp_INVALID;
15267 Int alen = 0;
15268 HChar dis_buf[50];
15269 IRTemp sV = newTemp(Ity_V128);
15270 UChar modrm = getUChar(delta);
15271 UInt rG = gregOfRexRM(pfx,modrm);
15272 IRTemp s3, s2, s1, s0;
15273 s3 = s2 = s1 = s0 = IRTemp_INVALID;
15274 if (epartIsReg(modrm)) {
15275 UInt rE = eregOfRexRM(pfx,modrm);
15276 assign( sV, getXMMReg(rE) );
15277 DIP("%smovs%cdup %s,%s\n",
15278 isAvx ? "v" : "", isL ? 'l' : 'h', nameXMMReg(rE), nameXMMReg(rG));
15279 delta += 1;
15280 } else {
15281 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15282 if (!isAvx)
15283 gen_SEGV_if_not_16_aligned( addr );
15284 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15285 DIP("%smovs%cdup %s,%s\n",
15286 isAvx ? "v" : "", isL ? 'l' : 'h', dis_buf, nameXMMReg(rG));
15287 delta += alen;
15289 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
15290 (isAvx ? putYMMRegLoAndZU : putXMMReg)
15291 ( rG, isL ? mkV128from32s( s2, s2, s0, s0 )
15292 : mkV128from32s( s3, s3, s1, s1 ) );
15293 return delta;
15297 static Long dis_MOVSxDUP_256 ( const VexAbiInfo* vbi, Prefix pfx,
15298 Long delta, Bool isL )
15300 IRTemp addr = IRTemp_INVALID;
15301 Int alen = 0;
15302 HChar dis_buf[50];
15303 IRTemp sV = newTemp(Ity_V256);
15304 UChar modrm = getUChar(delta);
15305 UInt rG = gregOfRexRM(pfx,modrm);
15306 IRTemp s7, s6, s5, s4, s3, s2, s1, s0;
15307 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
15308 if (epartIsReg(modrm)) {
15309 UInt rE = eregOfRexRM(pfx,modrm);
15310 assign( sV, getYMMReg(rE) );
15311 DIP("vmovs%cdup %s,%s\n",
15312 isL ? 'l' : 'h', nameYMMReg(rE), nameYMMReg(rG));
15313 delta += 1;
15314 } else {
15315 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15316 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
15317 DIP("vmovs%cdup %s,%s\n",
15318 isL ? 'l' : 'h', dis_buf, nameYMMReg(rG));
15319 delta += alen;
15321 breakupV256to32s( sV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
15322 putYMMRegLane128( rG, 1, isL ? mkV128from32s( s6, s6, s4, s4 )
15323 : mkV128from32s( s7, s7, s5, s5 ) );
15324 putYMMRegLane128( rG, 0, isL ? mkV128from32s( s2, s2, s0, s0 )
15325 : mkV128from32s( s3, s3, s1, s1 ) );
15326 return delta;
15330 static IRTemp math_HADDPS_128 ( IRTemp dV, IRTemp sV, Bool isAdd )
15332 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
15333 IRTemp leftV = newTemp(Ity_V128);
15334 IRTemp rightV = newTemp(Ity_V128);
15335 IRTemp rm = newTemp(Ity_I32);
15336 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
15338 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
15339 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
15341 assign( leftV, mkV128from32s( s2, s0, d2, d0 ) );
15342 assign( rightV, mkV128from32s( s3, s1, d3, d1 ) );
15344 IRTemp res = newTemp(Ity_V128);
15345 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
15346 assign( res, triop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
15347 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
15348 return res;
15352 static IRTemp math_HADDPD_128 ( IRTemp dV, IRTemp sV, Bool isAdd )
15354 IRTemp s1, s0, d1, d0;
15355 IRTemp leftV = newTemp(Ity_V128);
15356 IRTemp rightV = newTemp(Ity_V128);
15357 IRTemp rm = newTemp(Ity_I32);
15358 s1 = s0 = d1 = d0 = IRTemp_INVALID;
15360 breakupV128to64s( sV, &s1, &s0 );
15361 breakupV128to64s( dV, &d1, &d0 );
15363 assign( leftV, binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) );
15364 assign( rightV, binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) );
15366 IRTemp res = newTemp(Ity_V128);
15367 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
15368 assign( res, triop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
15369 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
15370 return res;
15374 __attribute__((noinline))
15375 static
15376 Long dis_ESC_0F__SSE3 ( Bool* decode_OK,
15377 const VexAbiInfo* vbi,
15378 Prefix pfx, Int sz, Long deltaIN )
15380 IRTemp addr = IRTemp_INVALID;
15381 UChar modrm = 0;
15382 Int alen = 0;
15383 HChar dis_buf[50];
15385 *decode_OK = False;
15387 Long delta = deltaIN;
15388 UChar opc = getUChar(delta);
15389 delta++;
15390 switch (opc) {
15392 case 0x12:
15393 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
15394 duplicating some lanes (2:2:0:0). */
15395 if (haveF3no66noF2(pfx) && sz == 4) {
15396 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/,
15397 True/*isL*/ );
15398 goto decode_success;
15400 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
15401 duplicating some lanes (0:1:0:1). */
15402 if (haveF2no66noF3(pfx)
15403 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
15404 delta = dis_MOVDDUP_128( vbi, pfx, delta, False/*!isAvx*/ );
15405 goto decode_success;
15407 break;
15409 case 0x16:
15410 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
15411 duplicating some lanes (3:3:1:1). */
15412 if (haveF3no66noF2(pfx) && sz == 4) {
15413 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/,
15414 False/*!isL*/ );
15415 goto decode_success;
15417 break;
15419 case 0x7C:
15420 case 0x7D:
15421 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
15422 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
15423 if (haveF2no66noF3(pfx) && sz == 4) {
15424 IRTemp eV = newTemp(Ity_V128);
15425 IRTemp gV = newTemp(Ity_V128);
15426 Bool isAdd = opc == 0x7C;
15427 const HChar* str = isAdd ? "add" : "sub";
15428 modrm = getUChar(delta);
15429 UInt rG = gregOfRexRM(pfx,modrm);
15430 if (epartIsReg(modrm)) {
15431 UInt rE = eregOfRexRM(pfx,modrm);
15432 assign( eV, getXMMReg(rE) );
15433 DIP("h%sps %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG));
15434 delta += 1;
15435 } else {
15436 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15437 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
15438 DIP("h%sps %s,%s\n", str, dis_buf, nameXMMReg(rG));
15439 delta += alen;
15442 assign( gV, getXMMReg(rG) );
15443 putXMMReg( rG, mkexpr( math_HADDPS_128 ( gV, eV, isAdd ) ) );
15444 goto decode_success;
15446 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
15447 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
15448 if (have66noF2noF3(pfx) && sz == 2) {
15449 IRTemp eV = newTemp(Ity_V128);
15450 IRTemp gV = newTemp(Ity_V128);
15451 Bool isAdd = opc == 0x7C;
15452 const HChar* str = isAdd ? "add" : "sub";
15453 modrm = getUChar(delta);
15454 UInt rG = gregOfRexRM(pfx,modrm);
15455 if (epartIsReg(modrm)) {
15456 UInt rE = eregOfRexRM(pfx,modrm);
15457 assign( eV, getXMMReg(rE) );
15458 DIP("h%spd %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG));
15459 delta += 1;
15460 } else {
15461 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15462 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
15463 DIP("h%spd %s,%s\n", str, dis_buf, nameXMMReg(rG));
15464 delta += alen;
15467 assign( gV, getXMMReg(rG) );
15468 putXMMReg( rG, mkexpr( math_HADDPD_128 ( gV, eV, isAdd ) ) );
15469 goto decode_success;
15471 break;
15473 case 0xD0:
15474 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
15475 if (have66noF2noF3(pfx) && sz == 2) {
15476 IRTemp eV = newTemp(Ity_V128);
15477 IRTemp gV = newTemp(Ity_V128);
15478 modrm = getUChar(delta);
15479 UInt rG = gregOfRexRM(pfx,modrm);
15480 if (epartIsReg(modrm)) {
15481 UInt rE = eregOfRexRM(pfx,modrm);
15482 assign( eV, getXMMReg(rE) );
15483 DIP("addsubpd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15484 delta += 1;
15485 } else {
15486 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15487 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
15488 DIP("addsubpd %s,%s\n", dis_buf, nameXMMReg(rG));
15489 delta += alen;
15492 assign( gV, getXMMReg(rG) );
15493 putXMMReg( rG, mkexpr( math_ADDSUBPD_128 ( gV, eV ) ) );
15494 goto decode_success;
15496 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
15497 if (haveF2no66noF3(pfx) && sz == 4) {
15498 IRTemp eV = newTemp(Ity_V128);
15499 IRTemp gV = newTemp(Ity_V128);
15500 modrm = getUChar(delta);
15501 UInt rG = gregOfRexRM(pfx,modrm);
15503 modrm = getUChar(delta);
15504 if (epartIsReg(modrm)) {
15505 UInt rE = eregOfRexRM(pfx,modrm);
15506 assign( eV, getXMMReg(rE) );
15507 DIP("addsubps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15508 delta += 1;
15509 } else {
15510 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15511 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
15512 DIP("addsubps %s,%s\n", dis_buf, nameXMMReg(rG));
15513 delta += alen;
15516 assign( gV, getXMMReg(rG) );
15517 putXMMReg( rG, mkexpr( math_ADDSUBPS_128 ( gV, eV ) ) );
15518 goto decode_success;
15520 break;
15522 case 0xF0:
15523 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
15524 if (haveF2no66noF3(pfx) && sz == 4) {
15525 modrm = getUChar(delta);
15526 if (epartIsReg(modrm)) {
15527 goto decode_failure;
15528 } else {
15529 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15530 putXMMReg( gregOfRexRM(pfx,modrm),
15531 loadLE(Ity_V128, mkexpr(addr)) );
15532 DIP("lddqu %s,%s\n", dis_buf,
15533 nameXMMReg(gregOfRexRM(pfx,modrm)));
15534 delta += alen;
15536 goto decode_success;
15538 break;
15540 default:
15541 goto decode_failure;
15545 decode_failure:
15546 *decode_OK = False;
15547 return deltaIN;
15549 decode_success:
15550 *decode_OK = True;
15551 return delta;
15555 /*------------------------------------------------------------*/
15556 /*--- ---*/
15557 /*--- Top-level SSSE3: dis_ESC_0F38__SupSSE3 ---*/
15558 /*--- ---*/
15559 /*------------------------------------------------------------*/
15561 static
15562 IRTemp math_PSHUFB_XMM ( IRTemp dV/*data to perm*/, IRTemp sV/*perm*/ )
15564 IRTemp halfMask = newTemp(Ity_I64);
15565 assign(halfMask, mkU64(0x8F8F8F8F8F8F8F8FULL));
15566 IRExpr* mask = binop(Iop_64HLtoV128, mkexpr(halfMask), mkexpr(halfMask));
15567 IRTemp res = newTemp(Ity_V128);
15568 assign(res,
15569 binop(Iop_PermOrZero8x16,
15570 mkexpr(dV),
15571 // Mask off bits [6:3] of each source operand lane
15572 binop(Iop_AndV128, mkexpr(sV), mask)
15574 return res;
15578 static
15579 IRTemp math_PSHUFB_YMM ( IRTemp dV/*data to perm*/, IRTemp sV/*perm*/ )
15581 IRTemp sHi, sLo, dHi, dLo;
15582 sHi = sLo = dHi = dLo = IRTemp_INVALID;
15583 breakupV256toV128s( dV, &dHi, &dLo);
15584 breakupV256toV128s( sV, &sHi, &sLo);
15585 IRTemp res = newTemp(Ity_V256);
15586 assign(res, binop(Iop_V128HLtoV256,
15587 mkexpr(math_PSHUFB_XMM(dHi, sHi)),
15588 mkexpr(math_PSHUFB_XMM(dLo, sLo))));
15589 return res;
15593 static Long dis_PHADD_128 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
15594 Bool isAvx, UChar opc )
15596 IRTemp addr = IRTemp_INVALID;
15597 Int alen = 0;
15598 HChar dis_buf[50];
15599 const HChar* str = "???";
15600 IROp opV64 = Iop_INVALID;
15601 IROp opCatO = Iop_CatOddLanes16x4;
15602 IROp opCatE = Iop_CatEvenLanes16x4;
15603 IRTemp sV = newTemp(Ity_V128);
15604 IRTemp dV = newTemp(Ity_V128);
15605 IRTemp sHi = newTemp(Ity_I64);
15606 IRTemp sLo = newTemp(Ity_I64);
15607 IRTemp dHi = newTemp(Ity_I64);
15608 IRTemp dLo = newTemp(Ity_I64);
15609 UChar modrm = getUChar(delta);
15610 UInt rG = gregOfRexRM(pfx,modrm);
15611 UInt rV = isAvx ? getVexNvvvv(pfx) : rG;
15613 switch (opc) {
15614 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
15615 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
15616 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
15617 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
15618 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
15619 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
15620 default: vassert(0);
15622 if (opc == 0x02 || opc == 0x06) {
15623 opCatO = Iop_InterleaveHI32x2;
15624 opCatE = Iop_InterleaveLO32x2;
15627 assign( dV, getXMMReg(rV) );
15629 if (epartIsReg(modrm)) {
15630 UInt rE = eregOfRexRM(pfx,modrm);
15631 assign( sV, getXMMReg(rE) );
15632 DIP("%sph%s %s,%s\n", isAvx ? "v" : "", str,
15633 nameXMMReg(rE), nameXMMReg(rG));
15634 delta += 1;
15635 } else {
15636 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15637 if (!isAvx)
15638 gen_SEGV_if_not_16_aligned( addr );
15639 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15640 DIP("%sph%s %s,%s\n", isAvx ? "v" : "", str,
15641 dis_buf, nameXMMReg(rG));
15642 delta += alen;
15645 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
15646 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
15647 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
15648 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
15650 /* This isn't a particularly efficient way to compute the
15651 result, but at least it avoids a proliferation of IROps,
15652 hence avoids complication all the backends. */
15654 (isAvx ? putYMMRegLoAndZU : putXMMReg)
15655 ( rG,
15656 binop(Iop_64HLtoV128,
15657 binop(opV64,
15658 binop(opCatE,mkexpr(sHi),mkexpr(sLo)),
15659 binop(opCatO,mkexpr(sHi),mkexpr(sLo)) ),
15660 binop(opV64,
15661 binop(opCatE,mkexpr(dHi),mkexpr(dLo)),
15662 binop(opCatO,mkexpr(dHi),mkexpr(dLo)) ) ) );
15663 return delta;
15667 static Long dis_PHADD_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
15668 UChar opc )
15670 IRTemp addr = IRTemp_INVALID;
15671 Int alen = 0;
15672 HChar dis_buf[50];
15673 const HChar* str = "???";
15674 IROp opV64 = Iop_INVALID;
15675 IROp opCatO = Iop_CatOddLanes16x4;
15676 IROp opCatE = Iop_CatEvenLanes16x4;
15677 IRTemp sV = newTemp(Ity_V256);
15678 IRTemp dV = newTemp(Ity_V256);
15679 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
15680 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
15681 UChar modrm = getUChar(delta);
15682 UInt rG = gregOfRexRM(pfx,modrm);
15683 UInt rV = getVexNvvvv(pfx);
15685 switch (opc) {
15686 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
15687 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
15688 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
15689 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
15690 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
15691 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
15692 default: vassert(0);
15694 if (opc == 0x02 || opc == 0x06) {
15695 opCatO = Iop_InterleaveHI32x2;
15696 opCatE = Iop_InterleaveLO32x2;
15699 assign( dV, getYMMReg(rV) );
15701 if (epartIsReg(modrm)) {
15702 UInt rE = eregOfRexRM(pfx,modrm);
15703 assign( sV, getYMMReg(rE) );
15704 DIP("vph%s %s,%s\n", str, nameYMMReg(rE), nameYMMReg(rG));
15705 delta += 1;
15706 } else {
15707 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15708 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
15709 DIP("vph%s %s,%s\n", str, dis_buf, nameYMMReg(rG));
15710 delta += alen;
15713 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
15714 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
15716 /* This isn't a particularly efficient way to compute the
15717 result, but at least it avoids a proliferation of IROps,
15718 hence avoids complication all the backends. */
15720 putYMMReg( rG,
15721 binop(Iop_V128HLtoV256,
15722 binop(Iop_64HLtoV128,
15723 binop(opV64,
15724 binop(opCatE,mkexpr(s3),mkexpr(s2)),
15725 binop(opCatO,mkexpr(s3),mkexpr(s2)) ),
15726 binop(opV64,
15727 binop(opCatE,mkexpr(d3),mkexpr(d2)),
15728 binop(opCatO,mkexpr(d3),mkexpr(d2)) ) ),
15729 binop(Iop_64HLtoV128,
15730 binop(opV64,
15731 binop(opCatE,mkexpr(s1),mkexpr(s0)),
15732 binop(opCatO,mkexpr(s1),mkexpr(s0)) ),
15733 binop(opV64,
15734 binop(opCatE,mkexpr(d1),mkexpr(d0)),
15735 binop(opCatO,mkexpr(d1),mkexpr(d0)) ) ) ) );
15736 return delta;
15740 static IRTemp math_PMADDUBSW_128 ( IRTemp dV, IRTemp sV )
15742 IRTemp res = newTemp(Ity_V128);
15743 assign(res, binop(Iop_PwExtUSMulQAdd8x16, mkexpr(dV), mkexpr(sV)));
15744 return res;
15748 static
15749 IRTemp math_PMADDUBSW_256 ( IRTemp dV, IRTemp sV )
15751 IRTemp sHi, sLo, dHi, dLo;
15752 sHi = sLo = dHi = dLo = IRTemp_INVALID;
15753 breakupV256toV128s( dV, &dHi, &dLo);
15754 breakupV256toV128s( sV, &sHi, &sLo);
15755 IRTemp res = newTemp(Ity_V256);
15756 assign(res, binop(Iop_V128HLtoV256,
15757 mkexpr(math_PMADDUBSW_128(dHi, sHi)),
15758 mkexpr(math_PMADDUBSW_128(dLo, sLo))));
15759 return res;
15763 __attribute__((noinline))
15764 static
15765 Long dis_ESC_0F38__SupSSE3 ( Bool* decode_OK,
15766 const VexAbiInfo* vbi,
15767 Prefix pfx, Int sz, Long deltaIN )
15769 IRTemp addr = IRTemp_INVALID;
15770 UChar modrm = 0;
15771 Int alen = 0;
15772 HChar dis_buf[50];
15774 *decode_OK = False;
15776 Long delta = deltaIN;
15777 UChar opc = getUChar(delta);
15778 delta++;
15779 switch (opc) {
15781 case 0x00:
15782 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
15783 if (have66noF2noF3(pfx)
15784 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
15785 IRTemp sV = newTemp(Ity_V128);
15786 IRTemp dV = newTemp(Ity_V128);
15788 modrm = getUChar(delta);
15789 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
15791 if (epartIsReg(modrm)) {
15792 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
15793 delta += 1;
15794 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
15795 nameXMMReg(gregOfRexRM(pfx,modrm)));
15796 } else {
15797 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15798 gen_SEGV_if_not_16_aligned( addr );
15799 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15800 delta += alen;
15801 DIP("pshufb %s,%s\n", dis_buf,
15802 nameXMMReg(gregOfRexRM(pfx,modrm)));
15805 IRTemp res = math_PSHUFB_XMM( dV, sV );
15806 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(res));
15807 goto decode_success;
15809 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
15810 if (haveNo66noF2noF3(pfx) && sz == 4) {
15811 IRTemp sV = newTemp(Ity_I64);
15812 IRTemp dV = newTemp(Ity_I64);
15814 modrm = getUChar(delta);
15815 do_MMX_preamble();
15816 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15818 if (epartIsReg(modrm)) {
15819 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15820 delta += 1;
15821 DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
15822 nameMMXReg(gregLO3ofRM(modrm)));
15823 } else {
15824 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15825 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15826 delta += alen;
15827 DIP("pshufb %s,%s\n", dis_buf,
15828 nameMMXReg(gregLO3ofRM(modrm)));
15831 putMMXReg(
15832 gregLO3ofRM(modrm),
15833 binop(
15834 Iop_PermOrZero8x8,
15835 mkexpr(dV),
15836 // Mask off bits [6:3] of each source operand lane
15837 binop(Iop_And64, mkexpr(sV), mkU64(0x8787878787878787ULL))
15840 goto decode_success;
15842 break;
15844 case 0x01:
15845 case 0x02:
15846 case 0x03:
15847 case 0x05:
15848 case 0x06:
15849 case 0x07:
15850 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
15851 G to G (xmm). */
15852 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
15853 G to G (xmm). */
15854 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
15855 xmm) and G to G (xmm). */
15856 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
15857 G to G (xmm). */
15858 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
15859 G to G (xmm). */
15860 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
15861 xmm) and G to G (xmm). */
15862 if (have66noF2noF3(pfx)
15863 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
15864 delta = dis_PHADD_128( vbi, pfx, delta, False/*isAvx*/, opc );
15865 goto decode_success;
15867 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
15868 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
15869 to G (mmx). */
15870 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
15871 to G (mmx). */
15872 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
15873 mmx) and G to G (mmx). */
15874 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
15875 to G (mmx). */
15876 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
15877 to G (mmx). */
15878 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
15879 mmx) and G to G (mmx). */
15880 if (haveNo66noF2noF3(pfx) && sz == 4) {
15881 const HChar* str = "???";
15882 IROp opV64 = Iop_INVALID;
15883 IROp opCatO = Iop_CatOddLanes16x4;
15884 IROp opCatE = Iop_CatEvenLanes16x4;
15885 IRTemp sV = newTemp(Ity_I64);
15886 IRTemp dV = newTemp(Ity_I64);
15888 modrm = getUChar(delta);
15890 switch (opc) {
15891 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
15892 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
15893 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
15894 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
15895 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
15896 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
15897 default: vassert(0);
15899 if (opc == 0x02 || opc == 0x06) {
15900 opCatO = Iop_InterleaveHI32x2;
15901 opCatE = Iop_InterleaveLO32x2;
15904 do_MMX_preamble();
15905 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15907 if (epartIsReg(modrm)) {
15908 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15909 delta += 1;
15910 DIP("ph%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
15911 nameMMXReg(gregLO3ofRM(modrm)));
15912 } else {
15913 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15914 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15915 delta += alen;
15916 DIP("ph%s %s,%s\n", str, dis_buf,
15917 nameMMXReg(gregLO3ofRM(modrm)));
15920 putMMXReg(
15921 gregLO3ofRM(modrm),
15922 binop(opV64,
15923 binop(opCatE,mkexpr(sV),mkexpr(dV)),
15924 binop(opCatO,mkexpr(sV),mkexpr(dV))
15927 goto decode_success;
15929 break;
15931 case 0x04:
15932 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
15933 Unsigned Bytes (XMM) */
15934 if (have66noF2noF3(pfx)
15935 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
15936 IRTemp sV = newTemp(Ity_V128);
15937 IRTemp dV = newTemp(Ity_V128);
15938 modrm = getUChar(delta);
15939 UInt rG = gregOfRexRM(pfx,modrm);
15941 assign( dV, getXMMReg(rG) );
15943 if (epartIsReg(modrm)) {
15944 UInt rE = eregOfRexRM(pfx,modrm);
15945 assign( sV, getXMMReg(rE) );
15946 delta += 1;
15947 DIP("pmaddubsw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15948 } else {
15949 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15950 gen_SEGV_if_not_16_aligned( addr );
15951 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15952 delta += alen;
15953 DIP("pmaddubsw %s,%s\n", dis_buf, nameXMMReg(rG));
15956 putXMMReg( rG, mkexpr( math_PMADDUBSW_128( dV, sV ) ) );
15957 goto decode_success;
15959 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
15960 Unsigned Bytes (MMX) */
15961 if (haveNo66noF2noF3(pfx) && sz == 4) {
15962 IRTemp sV = newTemp(Ity_I64);
15963 IRTemp dV = newTemp(Ity_I64);
15964 IRTemp sVoddsSX = newTemp(Ity_I64);
15965 IRTemp sVevensSX = newTemp(Ity_I64);
15966 IRTemp dVoddsZX = newTemp(Ity_I64);
15967 IRTemp dVevensZX = newTemp(Ity_I64);
15969 modrm = getUChar(delta);
15970 do_MMX_preamble();
15971 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15973 if (epartIsReg(modrm)) {
15974 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15975 delta += 1;
15976 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
15977 nameMMXReg(gregLO3ofRM(modrm)));
15978 } else {
15979 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15980 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15981 delta += alen;
15982 DIP("pmaddubsw %s,%s\n", dis_buf,
15983 nameMMXReg(gregLO3ofRM(modrm)));
15986 /* compute dV unsigned x sV signed */
15987 assign( sVoddsSX,
15988 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) );
15989 assign( sVevensSX,
15990 binop(Iop_SarN16x4,
15991 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)),
15992 mkU8(8)) );
15993 assign( dVoddsZX,
15994 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) );
15995 assign( dVevensZX,
15996 binop(Iop_ShrN16x4,
15997 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)),
15998 mkU8(8)) );
16000 putMMXReg(
16001 gregLO3ofRM(modrm),
16002 binop(Iop_QAdd16Sx4,
16003 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
16004 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX))
16007 goto decode_success;
16009 break;
16011 case 0x08:
16012 case 0x09:
16013 case 0x0A:
16014 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
16015 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
16016 /* 66 0F 38 0A = PSIGND -- Packed Sign 32x4 (XMM) */
16017 if (have66noF2noF3(pfx)
16018 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16019 IRTemp sV = newTemp(Ity_V128);
16020 IRTemp dV = newTemp(Ity_V128);
16021 IRTemp sHi = newTemp(Ity_I64);
16022 IRTemp sLo = newTemp(Ity_I64);
16023 IRTemp dHi = newTemp(Ity_I64);
16024 IRTemp dLo = newTemp(Ity_I64);
16025 const HChar* str = "???";
16026 Int laneszB = 0;
16028 switch (opc) {
16029 case 0x08: laneszB = 1; str = "b"; break;
16030 case 0x09: laneszB = 2; str = "w"; break;
16031 case 0x0A: laneszB = 4; str = "d"; break;
16032 default: vassert(0);
16035 modrm = getUChar(delta);
16036 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
16038 if (epartIsReg(modrm)) {
16039 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
16040 delta += 1;
16041 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
16042 nameXMMReg(gregOfRexRM(pfx,modrm)));
16043 } else {
16044 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16045 gen_SEGV_if_not_16_aligned( addr );
16046 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16047 delta += alen;
16048 DIP("psign%s %s,%s\n", str, dis_buf,
16049 nameXMMReg(gregOfRexRM(pfx,modrm)));
16052 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
16053 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
16054 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
16055 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
16057 putXMMReg(
16058 gregOfRexRM(pfx,modrm),
16059 binop(Iop_64HLtoV128,
16060 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
16061 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
16064 goto decode_success;
16066 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
16067 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
16068 /* 0F 38 0A = PSIGND -- Packed Sign 32x2 (MMX) */
16069 if (haveNo66noF2noF3(pfx) && sz == 4) {
16070 IRTemp sV = newTemp(Ity_I64);
16071 IRTemp dV = newTemp(Ity_I64);
16072 const HChar* str = "???";
16073 Int laneszB = 0;
16075 switch (opc) {
16076 case 0x08: laneszB = 1; str = "b"; break;
16077 case 0x09: laneszB = 2; str = "w"; break;
16078 case 0x0A: laneszB = 4; str = "d"; break;
16079 default: vassert(0);
16082 modrm = getUChar(delta);
16083 do_MMX_preamble();
16084 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
16086 if (epartIsReg(modrm)) {
16087 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16088 delta += 1;
16089 DIP("psign%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
16090 nameMMXReg(gregLO3ofRM(modrm)));
16091 } else {
16092 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16093 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16094 delta += alen;
16095 DIP("psign%s %s,%s\n", str, dis_buf,
16096 nameMMXReg(gregLO3ofRM(modrm)));
16099 putMMXReg(
16100 gregLO3ofRM(modrm),
16101 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB )
16103 goto decode_success;
16105 break;
16107 case 0x0B:
16108 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
16109 Scale (XMM) */
16110 if (have66noF2noF3(pfx)
16111 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16112 IRTemp sV = newTemp(Ity_V128);
16113 IRTemp dV = newTemp(Ity_V128);
16114 IRTemp sHi = newTemp(Ity_I64);
16115 IRTemp sLo = newTemp(Ity_I64);
16116 IRTemp dHi = newTemp(Ity_I64);
16117 IRTemp dLo = newTemp(Ity_I64);
16119 modrm = getUChar(delta);
16120 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
16122 if (epartIsReg(modrm)) {
16123 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
16124 delta += 1;
16125 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
16126 nameXMMReg(gregOfRexRM(pfx,modrm)));
16127 } else {
16128 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16129 gen_SEGV_if_not_16_aligned( addr );
16130 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16131 delta += alen;
16132 DIP("pmulhrsw %s,%s\n", dis_buf,
16133 nameXMMReg(gregOfRexRM(pfx,modrm)));
16136 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
16137 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
16138 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
16139 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
16141 putXMMReg(
16142 gregOfRexRM(pfx,modrm),
16143 binop(Iop_64HLtoV128,
16144 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
16145 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
16148 goto decode_success;
16150 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
16151 (MMX) */
16152 if (haveNo66noF2noF3(pfx) && sz == 4) {
16153 IRTemp sV = newTemp(Ity_I64);
16154 IRTemp dV = newTemp(Ity_I64);
16156 modrm = getUChar(delta);
16157 do_MMX_preamble();
16158 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
16160 if (epartIsReg(modrm)) {
16161 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16162 delta += 1;
16163 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
16164 nameMMXReg(gregLO3ofRM(modrm)));
16165 } else {
16166 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16167 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16168 delta += alen;
16169 DIP("pmulhrsw %s,%s\n", dis_buf,
16170 nameMMXReg(gregLO3ofRM(modrm)));
16173 putMMXReg(
16174 gregLO3ofRM(modrm),
16175 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) )
16177 goto decode_success;
16179 break;
16181 case 0x1C:
16182 case 0x1D:
16183 case 0x1E:
16184 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
16185 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
16186 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
16187 if (have66noF2noF3(pfx)
16188 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16189 IRTemp sV = newTemp(Ity_V128);
16190 const HChar* str = "???";
16191 Int laneszB = 0;
16193 switch (opc) {
16194 case 0x1C: laneszB = 1; str = "b"; break;
16195 case 0x1D: laneszB = 2; str = "w"; break;
16196 case 0x1E: laneszB = 4; str = "d"; break;
16197 default: vassert(0);
16200 modrm = getUChar(delta);
16201 if (epartIsReg(modrm)) {
16202 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
16203 delta += 1;
16204 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
16205 nameXMMReg(gregOfRexRM(pfx,modrm)));
16206 } else {
16207 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16208 gen_SEGV_if_not_16_aligned( addr );
16209 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16210 delta += alen;
16211 DIP("pabs%s %s,%s\n", str, dis_buf,
16212 nameXMMReg(gregOfRexRM(pfx,modrm)));
16215 putXMMReg( gregOfRexRM(pfx,modrm),
16216 mkexpr(math_PABS_XMM(sV, laneszB)) );
16217 goto decode_success;
16219 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
16220 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
16221 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
16222 if (haveNo66noF2noF3(pfx) && sz == 4) {
16223 IRTemp sV = newTemp(Ity_I64);
16224 const HChar* str = "???";
16225 Int laneszB = 0;
16227 switch (opc) {
16228 case 0x1C: laneszB = 1; str = "b"; break;
16229 case 0x1D: laneszB = 2; str = "w"; break;
16230 case 0x1E: laneszB = 4; str = "d"; break;
16231 default: vassert(0);
16234 modrm = getUChar(delta);
16235 do_MMX_preamble();
16237 if (epartIsReg(modrm)) {
16238 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16239 delta += 1;
16240 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
16241 nameMMXReg(gregLO3ofRM(modrm)));
16242 } else {
16243 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16244 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16245 delta += alen;
16246 DIP("pabs%s %s,%s\n", str, dis_buf,
16247 nameMMXReg(gregLO3ofRM(modrm)));
16250 putMMXReg( gregLO3ofRM(modrm),
16251 mkexpr(math_PABS_MMX( sV, laneszB )) );
16252 goto decode_success;
16254 break;
16256 default:
16257 break;
16261 //decode_failure:
16262 *decode_OK = False;
16263 return deltaIN;
16265 decode_success:
16266 *decode_OK = True;
16267 return delta;
16271 /*------------------------------------------------------------*/
16272 /*--- ---*/
16273 /*--- Top-level SSSE3: dis_ESC_0F3A__SupSSE3 ---*/
16274 /*--- ---*/
16275 /*------------------------------------------------------------*/
16277 __attribute__((noinline))
16278 static
16279 Long dis_ESC_0F3A__SupSSE3 ( Bool* decode_OK,
16280 const VexAbiInfo* vbi,
16281 Prefix pfx, Int sz, Long deltaIN )
16283 Long d64 = 0;
16284 IRTemp addr = IRTemp_INVALID;
16285 UChar modrm = 0;
16286 Int alen = 0;
16287 HChar dis_buf[50];
16289 *decode_OK = False;
16291 Long delta = deltaIN;
16292 UChar opc = getUChar(delta);
16293 delta++;
16294 switch (opc) {
16296 case 0x0F:
16297 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
16298 if (have66noF2noF3(pfx)
16299 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16300 IRTemp sV = newTemp(Ity_V128);
16301 IRTemp dV = newTemp(Ity_V128);
16303 modrm = getUChar(delta);
16304 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
16306 if (epartIsReg(modrm)) {
16307 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
16308 d64 = (Long)getUChar(delta+1);
16309 delta += 1+1;
16310 DIP("palignr $%lld,%s,%s\n", d64,
16311 nameXMMReg(eregOfRexRM(pfx,modrm)),
16312 nameXMMReg(gregOfRexRM(pfx,modrm)));
16313 } else {
16314 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
16315 gen_SEGV_if_not_16_aligned( addr );
16316 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16317 d64 = (Long)getUChar(delta+alen);
16318 delta += alen+1;
16319 DIP("palignr $%lld,%s,%s\n", d64,
16320 dis_buf,
16321 nameXMMReg(gregOfRexRM(pfx,modrm)));
16324 IRTemp res = math_PALIGNR_XMM( sV, dV, d64 );
16325 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
16326 goto decode_success;
16328 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
16329 if (haveNo66noF2noF3(pfx) && sz == 4) {
16330 IRTemp sV = newTemp(Ity_I64);
16331 IRTemp dV = newTemp(Ity_I64);
16332 IRTemp res = newTemp(Ity_I64);
16334 modrm = getUChar(delta);
16335 do_MMX_preamble();
16336 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
16338 if (epartIsReg(modrm)) {
16339 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16340 d64 = (Long)getUChar(delta+1);
16341 delta += 1+1;
16342 DIP("palignr $%lld,%s,%s\n", d64,
16343 nameMMXReg(eregLO3ofRM(modrm)),
16344 nameMMXReg(gregLO3ofRM(modrm)));
16345 } else {
16346 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
16347 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16348 d64 = (Long)getUChar(delta+alen);
16349 delta += alen+1;
16350 DIP("palignr $%lld%s,%s\n", d64,
16351 dis_buf,
16352 nameMMXReg(gregLO3ofRM(modrm)));
16355 if (d64 == 0) {
16356 assign( res, mkexpr(sV) );
16358 else if (d64 >= 1 && d64 <= 7) {
16359 assign(res,
16360 binop(Iop_Or64,
16361 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d64)),
16362 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d64))
16363 )));
16365 else if (d64 == 8) {
16366 assign( res, mkexpr(dV) );
16368 else if (d64 >= 9 && d64 <= 15) {
16369 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d64-8))) );
16371 else if (d64 >= 16 && d64 <= 255) {
16372 assign( res, mkU64(0) );
16374 else
16375 vassert(0);
16377 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
16378 goto decode_success;
16380 break;
16382 default:
16383 break;
16387 //decode_failure:
16388 *decode_OK = False;
16389 return deltaIN;
16391 decode_success:
16392 *decode_OK = True;
16393 return delta;
16397 /*------------------------------------------------------------*/
16398 /*--- ---*/
16399 /*--- Top-level SSE4: dis_ESC_0F__SSE4 ---*/
16400 /*--- ---*/
16401 /*------------------------------------------------------------*/
16403 __attribute__((noinline))
16404 static
16405 Long dis_ESC_0F__SSE4 ( Bool* decode_OK,
16406 const VexArchInfo* archinfo,
16407 const VexAbiInfo* vbi,
16408 Prefix pfx, Int sz, Long deltaIN )
16410 IRTemp addr = IRTemp_INVALID;
16411 IRType ty = Ity_INVALID;
16412 UChar modrm = 0;
16413 Int alen = 0;
16414 HChar dis_buf[50];
16416 *decode_OK = False;
16418 Long delta = deltaIN;
16419 UChar opc = getUChar(delta);
16420 delta++;
16421 switch (opc) {
16423 case 0xB8:
16424 /* F3 0F B8 = POPCNT{W,L,Q}
16425 Count the number of 1 bits in a register
16427 if (haveF3noF2(pfx) /* so both 66 and REX.W are possibilities */
16428 && (sz == 2 || sz == 4 || sz == 8)) {
16429 /*IRType*/ ty = szToITy(sz);
16430 IRTemp src = newTemp(ty);
16431 modrm = getUChar(delta);
16432 if (epartIsReg(modrm)) {
16433 assign(src, getIRegE(sz, pfx, modrm));
16434 delta += 1;
16435 DIP("popcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
16436 nameIRegG(sz, pfx, modrm));
16437 } else {
16438 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
16439 assign(src, loadLE(ty, mkexpr(addr)));
16440 delta += alen;
16441 DIP("popcnt%c %s, %s\n", nameISize(sz), dis_buf,
16442 nameIRegG(sz, pfx, modrm));
16445 IRTemp result = gen_POPCOUNT(ty, src);
16446 putIRegG(sz, pfx, modrm, mkexpr(result));
16448 // Update flags. This is pretty lame .. perhaps can do better
16449 // if this turns out to be performance critical.
16450 // O S A C P are cleared. Z is set if SRC == 0.
16451 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16452 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16453 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16454 stmt( IRStmt_Put( OFFB_CC_DEP1,
16455 binop(Iop_Shl64,
16456 unop(Iop_1Uto64,
16457 binop(Iop_CmpEQ64,
16458 widenUto64(mkexpr(src)),
16459 mkU64(0))),
16460 mkU8(AMD64G_CC_SHIFT_Z))));
16462 goto decode_success;
16464 break;
16466 case 0xBC:
16467 /* F3 0F BC -- TZCNT (count trailing zeroes. A BMI extension,
16468 which we can only decode if we're sure this is a BMI1 capable cpu
16469 that supports TZCNT, since otherwise it's BSF, which behaves
16470 differently on zero source. */
16471 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */
16472 && (sz == 2 || sz == 4 || sz == 8)
16473 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_BMI)) {
16474 /*IRType*/ ty = szToITy(sz);
16475 IRTemp src = newTemp(ty);
16476 modrm = getUChar(delta);
16477 if (epartIsReg(modrm)) {
16478 assign(src, getIRegE(sz, pfx, modrm));
16479 delta += 1;
16480 DIP("tzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
16481 nameIRegG(sz, pfx, modrm));
16482 } else {
16483 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
16484 assign(src, loadLE(ty, mkexpr(addr)));
16485 delta += alen;
16486 DIP("tzcnt%c %s, %s\n", nameISize(sz), dis_buf,
16487 nameIRegG(sz, pfx, modrm));
16490 IRTemp res = gen_TZCNT(ty, src);
16491 putIRegG(sz, pfx, modrm, mkexpr(res));
16493 // Update flags. This is pretty lame .. perhaps can do better
16494 // if this turns out to be performance critical.
16495 // O S A P are cleared. Z is set if RESULT == 0.
16496 // C is set if SRC is zero.
16497 IRTemp src64 = newTemp(Ity_I64);
16498 IRTemp res64 = newTemp(Ity_I64);
16499 assign(src64, widenUto64(mkexpr(src)));
16500 assign(res64, widenUto64(mkexpr(res)));
16502 IRTemp oszacp = newTemp(Ity_I64);
16503 assign(
16504 oszacp,
16505 binop(Iop_Or64,
16506 binop(Iop_Shl64,
16507 unop(Iop_1Uto64,
16508 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))),
16509 mkU8(AMD64G_CC_SHIFT_Z)),
16510 binop(Iop_Shl64,
16511 unop(Iop_1Uto64,
16512 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))),
16513 mkU8(AMD64G_CC_SHIFT_C))
16517 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16518 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16519 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16520 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
16522 goto decode_success;
16524 break;
16526 case 0xBD:
16527 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
16528 which we can only decode if we're sure this is an AMD cpu
16529 that supports LZCNT, since otherwise it's BSR, which behaves
16530 differently. Bizarrely, my Sandy Bridge also accepts these
16531 instructions but produces different results. */
16532 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */
16533 && (sz == 2 || sz == 4 || sz == 8)
16534 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT)) {
16535 /*IRType*/ ty = szToITy(sz);
16536 IRTemp src = newTemp(ty);
16537 modrm = getUChar(delta);
16538 if (epartIsReg(modrm)) {
16539 assign(src, getIRegE(sz, pfx, modrm));
16540 delta += 1;
16541 DIP("lzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
16542 nameIRegG(sz, pfx, modrm));
16543 } else {
16544 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
16545 assign(src, loadLE(ty, mkexpr(addr)));
16546 delta += alen;
16547 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf,
16548 nameIRegG(sz, pfx, modrm));
16551 IRTemp res = gen_LZCNT(ty, src);
16552 putIRegG(sz, pfx, modrm, mkexpr(res));
16554 // Update flags. This is pretty lame .. perhaps can do better
16555 // if this turns out to be performance critical.
16556 // O S A P are cleared. Z is set if RESULT == 0.
16557 // C is set if SRC is zero.
16558 IRTemp src64 = newTemp(Ity_I64);
16559 IRTemp res64 = newTemp(Ity_I64);
16560 assign(src64, widenUto64(mkexpr(src)));
16561 assign(res64, widenUto64(mkexpr(res)));
16563 IRTemp oszacp = newTemp(Ity_I64);
16564 assign(
16565 oszacp,
16566 binop(Iop_Or64,
16567 binop(Iop_Shl64,
16568 unop(Iop_1Uto64,
16569 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))),
16570 mkU8(AMD64G_CC_SHIFT_Z)),
16571 binop(Iop_Shl64,
16572 unop(Iop_1Uto64,
16573 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))),
16574 mkU8(AMD64G_CC_SHIFT_C))
16578 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16579 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16580 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16581 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
16583 goto decode_success;
16585 break;
16587 default:
16588 break;
16592 //decode_failure:
16593 *decode_OK = False;
16594 return deltaIN;
16596 decode_success:
16597 *decode_OK = True;
16598 return delta;
16602 /*------------------------------------------------------------*/
16603 /*--- ---*/
16604 /*--- Top-level SSE4: dis_ESC_0F38__SSE4 ---*/
16605 /*--- ---*/
16606 /*------------------------------------------------------------*/
16608 static IRTemp math_PBLENDVB_128 ( IRTemp vecE, IRTemp vecG,
16609 IRTemp vec0/*controlling mask*/,
16610 UInt gran, IROp opSAR )
16612 /* The tricky bit is to convert vec0 into a suitable mask, by
16613 copying the most significant bit of each lane into all positions
16614 in the lane. */
16615 IRTemp sh = newTemp(Ity_I8);
16616 assign(sh, mkU8(8 * gran - 1));
16618 IRTemp mask = newTemp(Ity_V128);
16619 assign(mask, binop(opSAR, mkexpr(vec0), mkexpr(sh)));
16621 IRTemp notmask = newTemp(Ity_V128);
16622 assign(notmask, unop(Iop_NotV128, mkexpr(mask)));
16624 IRTemp res = newTemp(Ity_V128);
16625 assign(res, binop(Iop_OrV128,
16626 binop(Iop_AndV128, mkexpr(vecE), mkexpr(mask)),
16627 binop(Iop_AndV128, mkexpr(vecG), mkexpr(notmask))));
16628 return res;
16631 static IRTemp math_PBLENDVB_256 ( IRTemp vecE, IRTemp vecG,
16632 IRTemp vec0/*controlling mask*/,
16633 UInt gran, IROp opSAR128 )
16635 /* The tricky bit is to convert vec0 into a suitable mask, by
16636 copying the most significant bit of each lane into all positions
16637 in the lane. */
16638 IRTemp sh = newTemp(Ity_I8);
16639 assign(sh, mkU8(8 * gran - 1));
16641 IRTemp vec0Hi = IRTemp_INVALID;
16642 IRTemp vec0Lo = IRTemp_INVALID;
16643 breakupV256toV128s( vec0, &vec0Hi, &vec0Lo );
16645 IRTemp mask = newTemp(Ity_V256);
16646 assign(mask, binop(Iop_V128HLtoV256,
16647 binop(opSAR128, mkexpr(vec0Hi), mkexpr(sh)),
16648 binop(opSAR128, mkexpr(vec0Lo), mkexpr(sh))));
16650 IRTemp notmask = newTemp(Ity_V256);
16651 assign(notmask, unop(Iop_NotV256, mkexpr(mask)));
16653 IRTemp res = newTemp(Ity_V256);
16654 assign(res, binop(Iop_OrV256,
16655 binop(Iop_AndV256, mkexpr(vecE), mkexpr(mask)),
16656 binop(Iop_AndV256, mkexpr(vecG), mkexpr(notmask))));
16657 return res;
16660 static Long dis_VBLENDV_128 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
16661 const HChar *name, UInt gran, IROp opSAR )
16663 IRTemp addr = IRTemp_INVALID;
16664 Int alen = 0;
16665 HChar dis_buf[50];
16666 UChar modrm = getUChar(delta);
16667 UInt rG = gregOfRexRM(pfx, modrm);
16668 UInt rV = getVexNvvvv(pfx);
16669 UInt rIS4 = 0xFF; /* invalid */
16670 IRTemp vecE = newTemp(Ity_V128);
16671 IRTemp vecV = newTemp(Ity_V128);
16672 IRTemp vecIS4 = newTemp(Ity_V128);
16673 if (epartIsReg(modrm)) {
16674 delta++;
16675 UInt rE = eregOfRexRM(pfx, modrm);
16676 assign(vecE, getXMMReg(rE));
16677 UChar ib = getUChar(delta);
16678 rIS4 = (ib >> 4) & 0xF;
16679 DIP("%s %s,%s,%s,%s\n",
16680 name, nameXMMReg(rIS4), nameXMMReg(rE),
16681 nameXMMReg(rV), nameXMMReg(rG));
16682 } else {
16683 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
16684 delta += alen;
16685 assign(vecE, loadLE(Ity_V128, mkexpr(addr)));
16686 UChar ib = getUChar(delta);
16687 rIS4 = (ib >> 4) & 0xF;
16688 DIP("%s %s,%s,%s,%s\n",
16689 name, nameXMMReg(rIS4), dis_buf, nameXMMReg(rV), nameXMMReg(rG));
16691 delta++;
16692 assign(vecV, getXMMReg(rV));
16693 assign(vecIS4, getXMMReg(rIS4));
16694 IRTemp res = math_PBLENDVB_128( vecE, vecV, vecIS4, gran, opSAR );
16695 putYMMRegLoAndZU( rG, mkexpr(res) );
16696 return delta;
16699 static Long dis_VBLENDV_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
16700 const HChar *name, UInt gran, IROp opSAR128 )
16702 IRTemp addr = IRTemp_INVALID;
16703 Int alen = 0;
16704 HChar dis_buf[50];
16705 UChar modrm = getUChar(delta);
16706 UInt rG = gregOfRexRM(pfx, modrm);
16707 UInt rV = getVexNvvvv(pfx);
16708 UInt rIS4 = 0xFF; /* invalid */
16709 IRTemp vecE = newTemp(Ity_V256);
16710 IRTemp vecV = newTemp(Ity_V256);
16711 IRTemp vecIS4 = newTemp(Ity_V256);
16712 if (epartIsReg(modrm)) {
16713 delta++;
16714 UInt rE = eregOfRexRM(pfx, modrm);
16715 assign(vecE, getYMMReg(rE));
16716 UChar ib = getUChar(delta);
16717 rIS4 = (ib >> 4) & 0xF;
16718 DIP("%s %s,%s,%s,%s\n",
16719 name, nameYMMReg(rIS4), nameYMMReg(rE),
16720 nameYMMReg(rV), nameYMMReg(rG));
16721 } else {
16722 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
16723 delta += alen;
16724 assign(vecE, loadLE(Ity_V256, mkexpr(addr)));
16725 UChar ib = getUChar(delta);
16726 rIS4 = (ib >> 4) & 0xF;
16727 DIP("%s %s,%s,%s,%s\n",
16728 name, nameYMMReg(rIS4), dis_buf, nameYMMReg(rV), nameYMMReg(rG));
16730 delta++;
16731 assign(vecV, getYMMReg(rV));
16732 assign(vecIS4, getYMMReg(rIS4));
16733 IRTemp res = math_PBLENDVB_256( vecE, vecV, vecIS4, gran, opSAR128 );
16734 putYMMReg( rG, mkexpr(res) );
16735 return delta;
16738 static void finish_xTESTy ( IRTemp andV, IRTemp andnV, Int sign )
16740 /* Set Z=1 iff (vecE & vecG) == 0
16741 Set C=1 iff (vecE & not vecG) == 0
16744 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16746 /* andV resp. andnV, reduced to 64-bit values, by or-ing the top
16747 and bottom 64-bits together. It relies on this trick:
16749 InterleaveLO64x2([a,b],[c,d]) == [b,d] hence
16751 InterleaveLO64x2([a,b],[a,b]) == [b,b] and similarly
16752 InterleaveHI64x2([a,b],[a,b]) == [a,a]
16754 and so the OR of the above 2 exprs produces
16755 [a OR b, a OR b], from which we simply take the lower half.
16757 IRTemp and64 = newTemp(Ity_I64);
16758 IRTemp andn64 = newTemp(Ity_I64);
16760 assign(and64,
16761 unop(Iop_V128to64,
16762 binop(Iop_OrV128,
16763 binop(Iop_InterleaveLO64x2,
16764 mkexpr(andV), mkexpr(andV)),
16765 binop(Iop_InterleaveHI64x2,
16766 mkexpr(andV), mkexpr(andV)))));
16768 assign(andn64,
16769 unop(Iop_V128to64,
16770 binop(Iop_OrV128,
16771 binop(Iop_InterleaveLO64x2,
16772 mkexpr(andnV), mkexpr(andnV)),
16773 binop(Iop_InterleaveHI64x2,
16774 mkexpr(andnV), mkexpr(andnV)))));
16776 IRTemp z64 = newTemp(Ity_I64);
16777 IRTemp c64 = newTemp(Ity_I64);
16778 if (sign == 64) {
16779 /* When only interested in the most significant bit, just shift
16780 arithmetically right and negate. */
16781 assign(z64,
16782 unop(Iop_Not64,
16783 binop(Iop_Sar64, mkexpr(and64), mkU8(63))));
16785 assign(c64,
16786 unop(Iop_Not64,
16787 binop(Iop_Sar64, mkexpr(andn64), mkU8(63))));
16788 } else {
16789 if (sign == 32) {
16790 /* When interested in bit 31 and bit 63, mask those bits and
16791 fallthrough into the PTEST handling. */
16792 IRTemp t0 = newTemp(Ity_I64);
16793 IRTemp t1 = newTemp(Ity_I64);
16794 IRTemp t2 = newTemp(Ity_I64);
16795 assign(t0, mkU64(0x8000000080000000ULL));
16796 assign(t1, binop(Iop_And64, mkexpr(and64), mkexpr(t0)));
16797 assign(t2, binop(Iop_And64, mkexpr(andn64), mkexpr(t0)));
16798 and64 = t1;
16799 andn64 = t2;
16801 /* Now convert and64, andn64 to all-zeroes or all-1s, so we can
16802 slice out the Z and C bits conveniently. We use the standard
16803 trick all-zeroes -> all-zeroes, anything-else -> all-ones
16804 done by "(x | -x) >>s (word-size - 1)".
16806 assign(z64,
16807 unop(Iop_Not64,
16808 binop(Iop_Sar64,
16809 binop(Iop_Or64,
16810 binop(Iop_Sub64, mkU64(0), mkexpr(and64)),
16811 mkexpr(and64)), mkU8(63))));
16813 assign(c64,
16814 unop(Iop_Not64,
16815 binop(Iop_Sar64,
16816 binop(Iop_Or64,
16817 binop(Iop_Sub64, mkU64(0), mkexpr(andn64)),
16818 mkexpr(andn64)), mkU8(63))));
16821 /* And finally, slice out the Z and C flags and set the flags
16822 thunk to COPY for them. OSAP are set to zero. */
16823 IRTemp newOSZACP = newTemp(Ity_I64);
16824 assign(newOSZACP,
16825 binop(Iop_Or64,
16826 binop(Iop_And64, mkexpr(z64), mkU64(AMD64G_CC_MASK_Z)),
16827 binop(Iop_And64, mkexpr(c64), mkU64(AMD64G_CC_MASK_C))));
16829 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(newOSZACP)));
16830 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16831 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16832 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16836 /* Handles 128 bit versions of PTEST, VTESTPS or VTESTPD.
16837 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
16838 static Long dis_xTESTy_128 ( const VexAbiInfo* vbi, Prefix pfx,
16839 Long delta, Bool isAvx, Int sign )
16841 IRTemp addr = IRTemp_INVALID;
16842 Int alen = 0;
16843 HChar dis_buf[50];
16844 UChar modrm = getUChar(delta);
16845 UInt rG = gregOfRexRM(pfx, modrm);
16846 IRTemp vecE = newTemp(Ity_V128);
16847 IRTemp vecG = newTemp(Ity_V128);
16849 if ( epartIsReg(modrm) ) {
16850 UInt rE = eregOfRexRM(pfx, modrm);
16851 assign(vecE, getXMMReg(rE));
16852 delta += 1;
16853 DIP( "%s%stest%s %s,%s\n",
16854 isAvx ? "v" : "", sign == 0 ? "p" : "",
16855 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16856 nameXMMReg(rE), nameXMMReg(rG) );
16857 } else {
16858 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16859 if (!isAvx)
16860 gen_SEGV_if_not_16_aligned( addr );
16861 assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
16862 delta += alen;
16863 DIP( "%s%stest%s %s,%s\n",
16864 isAvx ? "v" : "", sign == 0 ? "p" : "",
16865 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16866 dis_buf, nameXMMReg(rG) );
16869 assign(vecG, getXMMReg(rG));
16871 /* Set Z=1 iff (vecE & vecG) == 0
16872 Set C=1 iff (vecE & not vecG) == 0
16875 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16876 IRTemp andV = newTemp(Ity_V128);
16877 IRTemp andnV = newTemp(Ity_V128);
16878 assign(andV, binop(Iop_AndV128, mkexpr(vecE), mkexpr(vecG)));
16879 assign(andnV, binop(Iop_AndV128,
16880 mkexpr(vecE),
16881 binop(Iop_XorV128, mkexpr(vecG),
16882 mkV128(0xFFFF))));
16884 finish_xTESTy ( andV, andnV, sign );
16885 return delta;
16889 /* Handles 256 bit versions of PTEST, VTESTPS or VTESTPD.
16890 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
16891 static Long dis_xTESTy_256 ( const VexAbiInfo* vbi, Prefix pfx,
16892 Long delta, Int sign )
16894 IRTemp addr = IRTemp_INVALID;
16895 Int alen = 0;
16896 HChar dis_buf[50];
16897 UChar modrm = getUChar(delta);
16898 UInt rG = gregOfRexRM(pfx, modrm);
16899 IRTemp vecE = newTemp(Ity_V256);
16900 IRTemp vecG = newTemp(Ity_V256);
16902 if ( epartIsReg(modrm) ) {
16903 UInt rE = eregOfRexRM(pfx, modrm);
16904 assign(vecE, getYMMReg(rE));
16905 delta += 1;
16906 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "",
16907 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16908 nameYMMReg(rE), nameYMMReg(rG) );
16909 } else {
16910 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16911 assign(vecE, loadLE( Ity_V256, mkexpr(addr) ));
16912 delta += alen;
16913 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "",
16914 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16915 dis_buf, nameYMMReg(rG) );
16918 assign(vecG, getYMMReg(rG));
16920 /* Set Z=1 iff (vecE & vecG) == 0
16921 Set C=1 iff (vecE & not vecG) == 0
16924 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16925 IRTemp andV = newTemp(Ity_V256);
16926 IRTemp andnV = newTemp(Ity_V256);
16927 assign(andV, binop(Iop_AndV256, mkexpr(vecE), mkexpr(vecG)));
16928 assign(andnV, binop(Iop_AndV256,
16929 mkexpr(vecE), unop(Iop_NotV256, mkexpr(vecG))));
16931 IRTemp andVhi = IRTemp_INVALID;
16932 IRTemp andVlo = IRTemp_INVALID;
16933 IRTemp andnVhi = IRTemp_INVALID;
16934 IRTemp andnVlo = IRTemp_INVALID;
16935 breakupV256toV128s( andV, &andVhi, &andVlo );
16936 breakupV256toV128s( andnV, &andnVhi, &andnVlo );
16938 IRTemp andV128 = newTemp(Ity_V128);
16939 IRTemp andnV128 = newTemp(Ity_V128);
16940 assign( andV128, binop( Iop_OrV128, mkexpr(andVhi), mkexpr(andVlo) ) );
16941 assign( andnV128, binop( Iop_OrV128, mkexpr(andnVhi), mkexpr(andnVlo) ) );
16943 finish_xTESTy ( andV128, andnV128, sign );
16944 return delta;
16948 /* Handles 128 and 256 bit versions of VCVTPH2PS. */
16949 static Long dis_VCVTPH2PS ( const VexAbiInfo* vbi, Prefix pfx,
16950 Long delta, Bool is256bit )
16952 /* This is a width-doubling load or reg-reg move, that does conversion on the
16953 transferred data. */
16954 UChar modrm = getUChar(delta);
16955 UInt rG = gregOfRexRM(pfx, modrm);
16956 IRTemp srcE = newTemp(is256bit ? Ity_V128 : Ity_I64);
16958 if (epartIsReg(modrm)) {
16959 UInt rE = eregOfRexRM(pfx, modrm);
16960 assign(srcE, is256bit ? unop(Iop_V256toV128_0, getYMMReg(rE))
16961 : unop(Iop_V128to64, getXMMReg(rE)));
16962 delta += 1;
16963 DIP("vcvtph2ps %s,%s\n", nameXMMReg(rE),
16964 (is256bit ? nameYMMReg: nameXMMReg)(rG));
16965 } else {
16966 Int alen = 0;
16967 HChar dis_buf[50];
16968 IRTemp addr = disAMode(&alen, vbi, pfx, delta, dis_buf, 0);
16969 // I don't think we need an alignment check here (not 100% sure tho.)
16970 assign(srcE, loadLE(is256bit ? Ity_V128 : Ity_I64, mkexpr(addr)));
16971 delta += alen;
16972 DIP( "vcvtph2ps %s,%s\n", dis_buf,
16973 (is256bit ? nameYMMReg: nameXMMReg)(rG));
16976 IRExpr* res = unop(is256bit ? Iop_F16toF32x8 : Iop_F16toF32x4, mkexpr(srcE));
16977 (is256bit ? putYMMReg : putYMMRegLoAndZU)(rG, res);
16979 return delta;
16983 /* Handles 128 bit versions of PMOVZXBW and PMOVSXBW. */
16984 static Long dis_PMOVxXBW_128 ( const VexAbiInfo* vbi, Prefix pfx,
16985 Long delta, Bool isAvx, Bool xIsZ )
16987 IRTemp addr = IRTemp_INVALID;
16988 Int alen = 0;
16989 HChar dis_buf[50];
16990 IRTemp srcVec = newTemp(Ity_V128);
16991 UChar modrm = getUChar(delta);
16992 const HChar* mbV = isAvx ? "v" : "";
16993 const HChar how = xIsZ ? 'z' : 's';
16994 UInt rG = gregOfRexRM(pfx, modrm);
16995 if ( epartIsReg(modrm) ) {
16996 UInt rE = eregOfRexRM(pfx, modrm);
16997 assign( srcVec, getXMMReg(rE) );
16998 delta += 1;
16999 DIP( "%spmov%cxbw %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
17000 } else {
17001 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17002 assign( srcVec,
17003 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
17004 delta += alen;
17005 DIP( "%spmov%cxbw %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
17008 IRExpr* res
17009 = xIsZ /* do math for either zero or sign extend */
17010 ? binop( Iop_InterleaveLO8x16,
17011 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) )
17012 : binop( Iop_SarN16x8,
17013 binop( Iop_ShlN16x8,
17014 binop( Iop_InterleaveLO8x16,
17015 IRExpr_Const( IRConst_V128(0) ),
17016 mkexpr(srcVec) ),
17017 mkU8(8) ),
17018 mkU8(8) );
17020 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
17022 return delta;
17026 /* Handles 256 bit versions of PMOVZXBW and PMOVSXBW. */
17027 static Long dis_PMOVxXBW_256 ( const VexAbiInfo* vbi, Prefix pfx,
17028 Long delta, Bool xIsZ )
17030 IRTemp addr = IRTemp_INVALID;
17031 Int alen = 0;
17032 HChar dis_buf[50];
17033 IRTemp srcVec = newTemp(Ity_V128);
17034 UChar modrm = getUChar(delta);
17035 UChar how = xIsZ ? 'z' : 's';
17036 UInt rG = gregOfRexRM(pfx, modrm);
17037 if ( epartIsReg(modrm) ) {
17038 UInt rE = eregOfRexRM(pfx, modrm);
17039 assign( srcVec, getXMMReg(rE) );
17040 delta += 1;
17041 DIP( "vpmov%cxbw %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
17042 } else {
17043 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17044 assign( srcVec, loadLE( Ity_V128, mkexpr(addr) ) );
17045 delta += alen;
17046 DIP( "vpmov%cxbw %s,%s\n", how, dis_buf, nameYMMReg(rG) );
17049 /* First do zero extend. */
17050 IRExpr* res
17051 = binop( Iop_V128HLtoV256,
17052 binop( Iop_InterleaveHI8x16,
17053 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ),
17054 binop( Iop_InterleaveLO8x16,
17055 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
17056 /* And if needed sign extension as well. */
17057 if (!xIsZ)
17058 res = binop( Iop_SarN16x16,
17059 binop( Iop_ShlN16x16, res, mkU8(8) ), mkU8(8) );
17061 putYMMReg ( rG, res );
17063 return delta;
17067 static Long dis_PMOVxXWD_128 ( const VexAbiInfo* vbi, Prefix pfx,
17068 Long delta, Bool isAvx, Bool xIsZ )
17070 IRTemp addr = IRTemp_INVALID;
17071 Int alen = 0;
17072 HChar dis_buf[50];
17073 IRTemp srcVec = newTemp(Ity_V128);
17074 UChar modrm = getUChar(delta);
17075 const HChar* mbV = isAvx ? "v" : "";
17076 const HChar how = xIsZ ? 'z' : 's';
17077 UInt rG = gregOfRexRM(pfx, modrm);
17079 if ( epartIsReg(modrm) ) {
17080 UInt rE = eregOfRexRM(pfx, modrm);
17081 assign( srcVec, getXMMReg(rE) );
17082 delta += 1;
17083 DIP( "%spmov%cxwd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
17084 } else {
17085 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17086 assign( srcVec,
17087 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
17088 delta += alen;
17089 DIP( "%spmov%cxwd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
17092 IRExpr* res
17093 = binop( Iop_InterleaveLO16x8,
17094 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) );
17095 if (!xIsZ)
17096 res = binop(Iop_SarN32x4,
17097 binop(Iop_ShlN32x4, res, mkU8(16)), mkU8(16));
17099 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17100 ( gregOfRexRM(pfx, modrm), res );
17102 return delta;
17106 static Long dis_PMOVxXWD_256 ( const VexAbiInfo* vbi, Prefix pfx,
17107 Long delta, Bool xIsZ )
17109 IRTemp addr = IRTemp_INVALID;
17110 Int alen = 0;
17111 HChar dis_buf[50];
17112 IRTemp srcVec = newTemp(Ity_V128);
17113 UChar modrm = getUChar(delta);
17114 UChar how = xIsZ ? 'z' : 's';
17115 UInt rG = gregOfRexRM(pfx, modrm);
17117 if ( epartIsReg(modrm) ) {
17118 UInt rE = eregOfRexRM(pfx, modrm);
17119 assign( srcVec, getXMMReg(rE) );
17120 delta += 1;
17121 DIP( "vpmov%cxwd %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
17122 } else {
17123 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17124 assign( srcVec, loadLE( Ity_V128, mkexpr(addr) ) );
17125 delta += alen;
17126 DIP( "vpmov%cxwd %s,%s\n", how, dis_buf, nameYMMReg(rG) );
17129 IRExpr* res
17130 = binop( Iop_V128HLtoV256,
17131 binop( Iop_InterleaveHI16x8,
17132 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ),
17133 binop( Iop_InterleaveLO16x8,
17134 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
17135 if (!xIsZ)
17136 res = binop(Iop_SarN32x8,
17137 binop(Iop_ShlN32x8, res, mkU8(16)), mkU8(16));
17139 putYMMReg ( rG, res );
17141 return delta;
17145 static Long dis_PMOVSXWQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17146 Long delta, Bool isAvx )
17148 IRTemp addr = IRTemp_INVALID;
17149 Int alen = 0;
17150 HChar dis_buf[50];
17151 IRTemp srcBytes = newTemp(Ity_I32);
17152 UChar modrm = getUChar(delta);
17153 const HChar* mbV = isAvx ? "v" : "";
17154 UInt rG = gregOfRexRM(pfx, modrm);
17156 if ( epartIsReg( modrm ) ) {
17157 UInt rE = eregOfRexRM(pfx, modrm);
17158 assign( srcBytes, getXMMRegLane32( rE, 0 ) );
17159 delta += 1;
17160 DIP( "%spmovsxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
17161 } else {
17162 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17163 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) );
17164 delta += alen;
17165 DIP( "%spmovsxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
17168 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17169 ( rG, binop( Iop_64HLtoV128,
17170 unop( Iop_16Sto64,
17171 unop( Iop_32HIto16, mkexpr(srcBytes) ) ),
17172 unop( Iop_16Sto64,
17173 unop( Iop_32to16, mkexpr(srcBytes) ) ) ) );
17174 return delta;
17178 static Long dis_PMOVSXWQ_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
17180 IRTemp addr = IRTemp_INVALID;
17181 Int alen = 0;
17182 HChar dis_buf[50];
17183 IRTemp srcBytes = newTemp(Ity_I64);
17184 UChar modrm = getUChar(delta);
17185 UInt rG = gregOfRexRM(pfx, modrm);
17186 IRTemp s3, s2, s1, s0;
17187 s3 = s2 = s1 = s0 = IRTemp_INVALID;
17189 if ( epartIsReg( modrm ) ) {
17190 UInt rE = eregOfRexRM(pfx, modrm);
17191 assign( srcBytes, getXMMRegLane64( rE, 0 ) );
17192 delta += 1;
17193 DIP( "vpmovsxwq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17194 } else {
17195 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17196 assign( srcBytes, loadLE( Ity_I64, mkexpr(addr) ) );
17197 delta += alen;
17198 DIP( "vpmovsxwq %s,%s\n", dis_buf, nameYMMReg(rG) );
17201 breakup64to16s( srcBytes, &s3, &s2, &s1, &s0 );
17202 putYMMReg( rG, binop( Iop_V128HLtoV256,
17203 binop( Iop_64HLtoV128,
17204 unop( Iop_16Sto64, mkexpr(s3) ),
17205 unop( Iop_16Sto64, mkexpr(s2) ) ),
17206 binop( Iop_64HLtoV128,
17207 unop( Iop_16Sto64, mkexpr(s1) ),
17208 unop( Iop_16Sto64, mkexpr(s0) ) ) ) );
17209 return delta;
17213 static Long dis_PMOVZXWQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17214 Long delta, Bool isAvx )
17216 IRTemp addr = IRTemp_INVALID;
17217 Int alen = 0;
17218 HChar dis_buf[50];
17219 IRTemp srcVec = newTemp(Ity_V128);
17220 UChar modrm = getUChar(delta);
17221 const HChar* mbV = isAvx ? "v" : "";
17222 UInt rG = gregOfRexRM(pfx, modrm);
17224 if ( epartIsReg( modrm ) ) {
17225 UInt rE = eregOfRexRM(pfx, modrm);
17226 assign( srcVec, getXMMReg(rE) );
17227 delta += 1;
17228 DIP( "%spmovzxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
17229 } else {
17230 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17231 assign( srcVec,
17232 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
17233 delta += alen;
17234 DIP( "%spmovzxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
17237 IRTemp zeroVec = newTemp( Ity_V128 );
17238 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17240 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17241 ( rG, binop( Iop_InterleaveLO16x8,
17242 mkexpr(zeroVec),
17243 binop( Iop_InterleaveLO16x8,
17244 mkexpr(zeroVec), mkexpr(srcVec) ) ) );
17245 return delta;
17249 static Long dis_PMOVZXWQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
17250 Long delta )
17252 IRTemp addr = IRTemp_INVALID;
17253 Int alen = 0;
17254 HChar dis_buf[50];
17255 IRTemp srcVec = newTemp(Ity_V128);
17256 UChar modrm = getUChar(delta);
17257 UInt rG = gregOfRexRM(pfx, modrm);
17259 if ( epartIsReg( modrm ) ) {
17260 UInt rE = eregOfRexRM(pfx, modrm);
17261 assign( srcVec, getXMMReg(rE) );
17262 delta += 1;
17263 DIP( "vpmovzxwq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17264 } else {
17265 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17266 assign( srcVec,
17267 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
17268 delta += alen;
17269 DIP( "vpmovzxwq %s,%s\n", dis_buf, nameYMMReg(rG) );
17272 IRTemp zeroVec = newTemp( Ity_V128 );
17273 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17275 putYMMReg( rG, binop( Iop_V128HLtoV256,
17276 binop( Iop_InterleaveHI16x8,
17277 mkexpr(zeroVec),
17278 binop( Iop_InterleaveLO16x8,
17279 mkexpr(zeroVec), mkexpr(srcVec) ) ),
17280 binop( Iop_InterleaveLO16x8,
17281 mkexpr(zeroVec),
17282 binop( Iop_InterleaveLO16x8,
17283 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) );
17284 return delta;
17288 /* Handles 128 bit versions of PMOVZXDQ and PMOVSXDQ. */
17289 static Long dis_PMOVxXDQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17290 Long delta, Bool isAvx, Bool xIsZ )
17292 IRTemp addr = IRTemp_INVALID;
17293 Int alen = 0;
17294 HChar dis_buf[50];
17295 IRTemp srcI64 = newTemp(Ity_I64);
17296 IRTemp srcVec = newTemp(Ity_V128);
17297 UChar modrm = getUChar(delta);
17298 const HChar* mbV = isAvx ? "v" : "";
17299 const HChar how = xIsZ ? 'z' : 's';
17300 UInt rG = gregOfRexRM(pfx, modrm);
17301 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
17302 thing in a V128, with arbitrary junk in the top 64 bits. Use
17303 one or both of them and let iropt clean up afterwards (as
17304 usual). */
17305 if ( epartIsReg(modrm) ) {
17306 UInt rE = eregOfRexRM(pfx, modrm);
17307 assign( srcVec, getXMMReg(rE) );
17308 assign( srcI64, unop(Iop_V128to64, mkexpr(srcVec)) );
17309 delta += 1;
17310 DIP( "%spmov%cxdq %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
17311 } else {
17312 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17313 assign( srcI64, loadLE(Ity_I64, mkexpr(addr)) );
17314 assign( srcVec, unop( Iop_64UtoV128, mkexpr(srcI64)) );
17315 delta += alen;
17316 DIP( "%spmov%cxdq %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
17319 IRExpr* res
17320 = xIsZ /* do math for either zero or sign extend */
17321 ? binop( Iop_InterleaveLO32x4,
17322 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) )
17323 : binop( Iop_64HLtoV128,
17324 unop( Iop_32Sto64,
17325 unop( Iop_64HIto32, mkexpr(srcI64) ) ),
17326 unop( Iop_32Sto64,
17327 unop( Iop_64to32, mkexpr(srcI64) ) ) );
17329 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
17331 return delta;
17335 /* Handles 256 bit versions of PMOVZXDQ and PMOVSXDQ. */
17336 static Long dis_PMOVxXDQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
17337 Long delta, Bool xIsZ )
17339 IRTemp addr = IRTemp_INVALID;
17340 Int alen = 0;
17341 HChar dis_buf[50];
17342 IRTemp srcVec = newTemp(Ity_V128);
17343 UChar modrm = getUChar(delta);
17344 UChar how = xIsZ ? 'z' : 's';
17345 UInt rG = gregOfRexRM(pfx, modrm);
17346 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
17347 thing in a V128, with arbitrary junk in the top 64 bits. Use
17348 one or both of them and let iropt clean up afterwards (as
17349 usual). */
17350 if ( epartIsReg(modrm) ) {
17351 UInt rE = eregOfRexRM(pfx, modrm);
17352 assign( srcVec, getXMMReg(rE) );
17353 delta += 1;
17354 DIP( "vpmov%cxdq %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
17355 } else {
17356 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17357 assign( srcVec, loadLE(Ity_V128, mkexpr(addr)) );
17358 delta += alen;
17359 DIP( "vpmov%cxdq %s,%s\n", how, dis_buf, nameYMMReg(rG) );
17362 IRExpr* res;
17363 if (xIsZ)
17364 res = binop( Iop_V128HLtoV256,
17365 binop( Iop_InterleaveHI32x4,
17366 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ),
17367 binop( Iop_InterleaveLO32x4,
17368 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
17369 else {
17370 IRTemp s3, s2, s1, s0;
17371 s3 = s2 = s1 = s0 = IRTemp_INVALID;
17372 breakupV128to32s( srcVec, &s3, &s2, &s1, &s0 );
17373 res = binop( Iop_V128HLtoV256,
17374 binop( Iop_64HLtoV128,
17375 unop( Iop_32Sto64, mkexpr(s3) ),
17376 unop( Iop_32Sto64, mkexpr(s2) ) ),
17377 binop( Iop_64HLtoV128,
17378 unop( Iop_32Sto64, mkexpr(s1) ),
17379 unop( Iop_32Sto64, mkexpr(s0) ) ) );
17382 putYMMReg ( rG, res );
17384 return delta;
17388 /* Handles 128 bit versions of PMOVZXBD and PMOVSXBD. */
17389 static Long dis_PMOVxXBD_128 ( const VexAbiInfo* vbi, Prefix pfx,
17390 Long delta, Bool isAvx, Bool xIsZ )
17392 IRTemp addr = IRTemp_INVALID;
17393 Int alen = 0;
17394 HChar dis_buf[50];
17395 IRTemp srcVec = newTemp(Ity_V128);
17396 UChar modrm = getUChar(delta);
17397 const HChar* mbV = isAvx ? "v" : "";
17398 const HChar how = xIsZ ? 'z' : 's';
17399 UInt rG = gregOfRexRM(pfx, modrm);
17400 if ( epartIsReg(modrm) ) {
17401 UInt rE = eregOfRexRM(pfx, modrm);
17402 assign( srcVec, getXMMReg(rE) );
17403 delta += 1;
17404 DIP( "%spmov%cxbd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
17405 } else {
17406 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17407 assign( srcVec,
17408 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
17409 delta += alen;
17410 DIP( "%spmov%cxbd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
17413 IRTemp zeroVec = newTemp(Ity_V128);
17414 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17416 IRExpr* res
17417 = binop(Iop_InterleaveLO8x16,
17418 mkexpr(zeroVec),
17419 binop(Iop_InterleaveLO8x16,
17420 mkexpr(zeroVec), mkexpr(srcVec)));
17421 if (!xIsZ)
17422 res = binop(Iop_SarN32x4,
17423 binop(Iop_ShlN32x4, res, mkU8(24)), mkU8(24));
17425 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
17427 return delta;
17431 /* Handles 256 bit versions of PMOVZXBD and PMOVSXBD. */
17432 static Long dis_PMOVxXBD_256 ( const VexAbiInfo* vbi, Prefix pfx,
17433 Long delta, Bool xIsZ )
17435 IRTemp addr = IRTemp_INVALID;
17436 Int alen = 0;
17437 HChar dis_buf[50];
17438 IRTemp srcVec = newTemp(Ity_V128);
17439 UChar modrm = getUChar(delta);
17440 UChar how = xIsZ ? 'z' : 's';
17441 UInt rG = gregOfRexRM(pfx, modrm);
17442 if ( epartIsReg(modrm) ) {
17443 UInt rE = eregOfRexRM(pfx, modrm);
17444 assign( srcVec, getXMMReg(rE) );
17445 delta += 1;
17446 DIP( "vpmov%cxbd %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
17447 } else {
17448 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17449 assign( srcVec,
17450 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
17451 delta += alen;
17452 DIP( "vpmov%cxbd %s,%s\n", how, dis_buf, nameYMMReg(rG) );
17455 IRTemp zeroVec = newTemp(Ity_V128);
17456 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17458 IRExpr* res
17459 = binop( Iop_V128HLtoV256,
17460 binop(Iop_InterleaveHI8x16,
17461 mkexpr(zeroVec),
17462 binop(Iop_InterleaveLO8x16,
17463 mkexpr(zeroVec), mkexpr(srcVec)) ),
17464 binop(Iop_InterleaveLO8x16,
17465 mkexpr(zeroVec),
17466 binop(Iop_InterleaveLO8x16,
17467 mkexpr(zeroVec), mkexpr(srcVec)) ) );
17468 if (!xIsZ)
17469 res = binop(Iop_SarN32x8,
17470 binop(Iop_ShlN32x8, res, mkU8(24)), mkU8(24));
17472 putYMMReg ( rG, res );
17474 return delta;
17478 /* Handles 128 bit versions of PMOVSXBQ. */
17479 static Long dis_PMOVSXBQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17480 Long delta, Bool isAvx )
17482 IRTemp addr = IRTemp_INVALID;
17483 Int alen = 0;
17484 HChar dis_buf[50];
17485 IRTemp srcBytes = newTemp(Ity_I16);
17486 UChar modrm = getUChar(delta);
17487 const HChar* mbV = isAvx ? "v" : "";
17488 UInt rG = gregOfRexRM(pfx, modrm);
17489 if ( epartIsReg(modrm) ) {
17490 UInt rE = eregOfRexRM(pfx, modrm);
17491 assign( srcBytes, getXMMRegLane16( rE, 0 ) );
17492 delta += 1;
17493 DIP( "%spmovsxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
17494 } else {
17495 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17496 assign( srcBytes, loadLE( Ity_I16, mkexpr(addr) ) );
17497 delta += alen;
17498 DIP( "%spmovsxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
17501 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17502 ( rG, binop( Iop_64HLtoV128,
17503 unop( Iop_8Sto64,
17504 unop( Iop_16HIto8, mkexpr(srcBytes) ) ),
17505 unop( Iop_8Sto64,
17506 unop( Iop_16to8, mkexpr(srcBytes) ) ) ) );
17507 return delta;
17511 /* Handles 256 bit versions of PMOVSXBQ. */
17512 static Long dis_PMOVSXBQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
17513 Long delta )
17515 IRTemp addr = IRTemp_INVALID;
17516 Int alen = 0;
17517 HChar dis_buf[50];
17518 IRTemp srcBytes = newTemp(Ity_I32);
17519 UChar modrm = getUChar(delta);
17520 UInt rG = gregOfRexRM(pfx, modrm);
17521 if ( epartIsReg(modrm) ) {
17522 UInt rE = eregOfRexRM(pfx, modrm);
17523 assign( srcBytes, getXMMRegLane32( rE, 0 ) );
17524 delta += 1;
17525 DIP( "vpmovsxbq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17526 } else {
17527 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17528 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) );
17529 delta += alen;
17530 DIP( "vpmovsxbq %s,%s\n", dis_buf, nameYMMReg(rG) );
17533 putYMMReg
17534 ( rG, binop( Iop_V128HLtoV256,
17535 binop( Iop_64HLtoV128,
17536 unop( Iop_8Sto64,
17537 unop( Iop_16HIto8,
17538 unop( Iop_32HIto16,
17539 mkexpr(srcBytes) ) ) ),
17540 unop( Iop_8Sto64,
17541 unop( Iop_16to8,
17542 unop( Iop_32HIto16,
17543 mkexpr(srcBytes) ) ) ) ),
17544 binop( Iop_64HLtoV128,
17545 unop( Iop_8Sto64,
17546 unop( Iop_16HIto8,
17547 unop( Iop_32to16,
17548 mkexpr(srcBytes) ) ) ),
17549 unop( Iop_8Sto64,
17550 unop( Iop_16to8,
17551 unop( Iop_32to16,
17552 mkexpr(srcBytes) ) ) ) ) ) );
17553 return delta;
17557 /* Handles 128 bit versions of PMOVZXBQ. */
17558 static Long dis_PMOVZXBQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17559 Long delta, Bool isAvx )
17561 IRTemp addr = IRTemp_INVALID;
17562 Int alen = 0;
17563 HChar dis_buf[50];
17564 IRTemp srcVec = newTemp(Ity_V128);
17565 UChar modrm = getUChar(delta);
17566 const HChar* mbV = isAvx ? "v" : "";
17567 UInt rG = gregOfRexRM(pfx, modrm);
17568 if ( epartIsReg(modrm) ) {
17569 UInt rE = eregOfRexRM(pfx, modrm);
17570 assign( srcVec, getXMMReg(rE) );
17571 delta += 1;
17572 DIP( "%spmovzxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
17573 } else {
17574 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17575 assign( srcVec,
17576 unop( Iop_32UtoV128,
17577 unop( Iop_16Uto32, loadLE( Ity_I16, mkexpr(addr) ))));
17578 delta += alen;
17579 DIP( "%spmovzxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
17582 IRTemp zeroVec = newTemp(Ity_V128);
17583 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17585 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17586 ( rG, binop( Iop_InterleaveLO8x16,
17587 mkexpr(zeroVec),
17588 binop( Iop_InterleaveLO8x16,
17589 mkexpr(zeroVec),
17590 binop( Iop_InterleaveLO8x16,
17591 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) );
17592 return delta;
17596 /* Handles 256 bit versions of PMOVZXBQ. */
17597 static Long dis_PMOVZXBQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
17598 Long delta )
17600 IRTemp addr = IRTemp_INVALID;
17601 Int alen = 0;
17602 HChar dis_buf[50];
17603 IRTemp srcVec = newTemp(Ity_V128);
17604 UChar modrm = getUChar(delta);
17605 UInt rG = gregOfRexRM(pfx, modrm);
17606 if ( epartIsReg(modrm) ) {
17607 UInt rE = eregOfRexRM(pfx, modrm);
17608 assign( srcVec, getXMMReg(rE) );
17609 delta += 1;
17610 DIP( "vpmovzxbq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17611 } else {
17612 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17613 assign( srcVec,
17614 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) )));
17615 delta += alen;
17616 DIP( "vpmovzxbq %s,%s\n", dis_buf, nameYMMReg(rG) );
17619 IRTemp zeroVec = newTemp(Ity_V128);
17620 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17622 putYMMReg
17623 ( rG, binop( Iop_V128HLtoV256,
17624 binop( Iop_InterleaveHI8x16,
17625 mkexpr(zeroVec),
17626 binop( Iop_InterleaveLO8x16,
17627 mkexpr(zeroVec),
17628 binop( Iop_InterleaveLO8x16,
17629 mkexpr(zeroVec), mkexpr(srcVec) ) ) ),
17630 binop( Iop_InterleaveLO8x16,
17631 mkexpr(zeroVec),
17632 binop( Iop_InterleaveLO8x16,
17633 mkexpr(zeroVec),
17634 binop( Iop_InterleaveLO8x16,
17635 mkexpr(zeroVec), mkexpr(srcVec) ) ) )
17636 ) );
17637 return delta;
17641 static Long dis_PHMINPOSUW_128 ( const VexAbiInfo* vbi, Prefix pfx,
17642 Long delta, Bool isAvx )
17644 IRTemp addr = IRTemp_INVALID;
17645 Int alen = 0;
17646 HChar dis_buf[50];
17647 UChar modrm = getUChar(delta);
17648 const HChar* mbV = isAvx ? "v" : "";
17649 IRTemp sV = newTemp(Ity_V128);
17650 IRTemp sHi = newTemp(Ity_I64);
17651 IRTemp sLo = newTemp(Ity_I64);
17652 IRTemp dLo = newTemp(Ity_I64);
17653 UInt rG = gregOfRexRM(pfx,modrm);
17654 if (epartIsReg(modrm)) {
17655 UInt rE = eregOfRexRM(pfx,modrm);
17656 assign( sV, getXMMReg(rE) );
17657 delta += 1;
17658 DIP("%sphminposuw %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG));
17659 } else {
17660 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
17661 if (!isAvx)
17662 gen_SEGV_if_not_16_aligned(addr);
17663 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
17664 delta += alen;
17665 DIP("%sphminposuw %s,%s\n", mbV, dis_buf, nameXMMReg(rG));
17667 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
17668 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
17669 assign( dLo, mkIRExprCCall(
17670 Ity_I64, 0/*regparms*/,
17671 "amd64g_calculate_sse_phminposuw",
17672 &amd64g_calculate_sse_phminposuw,
17673 mkIRExprVec_2( mkexpr(sLo), mkexpr(sHi) )
17675 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17676 (rG, unop(Iop_64UtoV128, mkexpr(dLo)));
17677 return delta;
17681 static Long dis_AESx ( const VexAbiInfo* vbi, Prefix pfx,
17682 Long delta, Bool isAvx, UChar opc )
17684 IRTemp addr = IRTemp_INVALID;
17685 Int alen = 0;
17686 HChar dis_buf[50];
17687 UChar modrm = getUChar(delta);
17688 UInt rG = gregOfRexRM(pfx, modrm);
17689 UInt regNoL = 0;
17690 UInt regNoR = (isAvx && opc != 0xDB) ? getVexNvvvv(pfx) : rG;
17692 /* This is a nasty kludge. We need to pass 2 x V128 to the
17693 helper. Since we can't do that, use a dirty
17694 helper to compute the results directly from the XMM regs in
17695 the guest state. That means for the memory case, we need to
17696 move the left operand into a pseudo-register (XMM16, let's
17697 call it). */
17698 if (epartIsReg(modrm)) {
17699 regNoL = eregOfRexRM(pfx, modrm);
17700 delta += 1;
17701 } else {
17702 regNoL = 16; /* use XMM16 as an intermediary */
17703 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17704 /* alignment check needed ???? */
17705 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
17706 delta += alen;
17709 void* fn = &amd64g_dirtyhelper_AES;
17710 const HChar* nm = "amd64g_dirtyhelper_AES";
17712 /* Round up the arguments. Note that this is a kludge -- the
17713 use of mkU64 rather than mkIRExpr_HWord implies the
17714 assumption that the host's word size is 64-bit. */
17715 UInt gstOffD = ymmGuestRegOffset(rG);
17716 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
17717 UInt gstOffR = ymmGuestRegOffset(regNoR);
17718 IRExpr* opc4 = mkU64(opc);
17719 IRExpr* gstOffDe = mkU64(gstOffD);
17720 IRExpr* gstOffLe = mkU64(gstOffL);
17721 IRExpr* gstOffRe = mkU64(gstOffR);
17722 IRExpr** args
17723 = mkIRExprVec_5( IRExpr_GSPTR(), opc4, gstOffDe, gstOffLe, gstOffRe );
17725 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args );
17726 /* It's not really a dirty call, but we can't use the clean helper
17727 mechanism here for the very lame reason that we can't pass 2 x
17728 V128s by value to a helper. Hence this roundabout scheme. */
17729 d->nFxState = 2;
17730 vex_bzero(&d->fxState, sizeof(d->fxState));
17731 /* AES{ENC,ENCLAST,DEC,DECLAST} read both registers, and writes
17732 the second for !isAvx or the third for isAvx.
17733 AESIMC (0xDB) reads the first register, and writes the second. */
17734 d->fxState[0].fx = Ifx_Read;
17735 d->fxState[0].offset = gstOffL;
17736 d->fxState[0].size = sizeof(U128);
17737 d->fxState[1].offset = gstOffR;
17738 d->fxState[1].size = sizeof(U128);
17739 if (opc == 0xDB)
17740 d->fxState[1].fx = Ifx_Write;
17741 else if (!isAvx || rG == regNoR)
17742 d->fxState[1].fx = Ifx_Modify;
17743 else {
17744 d->fxState[1].fx = Ifx_Read;
17745 d->nFxState++;
17746 d->fxState[2].fx = Ifx_Write;
17747 d->fxState[2].offset = gstOffD;
17748 d->fxState[2].size = sizeof(U128);
17751 stmt( IRStmt_Dirty(d) );
17753 const HChar* opsuf;
17754 switch (opc) {
17755 case 0xDC: opsuf = "enc"; break;
17756 case 0XDD: opsuf = "enclast"; break;
17757 case 0xDE: opsuf = "dec"; break;
17758 case 0xDF: opsuf = "declast"; break;
17759 case 0xDB: opsuf = "imc"; break;
17760 default: vassert(0);
17762 DIP("%saes%s %s,%s%s%s\n", isAvx ? "v" : "", opsuf,
17763 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)),
17764 nameXMMReg(regNoR),
17765 (isAvx && opc != 0xDB) ? "," : "",
17766 (isAvx && opc != 0xDB) ? nameXMMReg(rG) : "");
17768 if (isAvx)
17769 putYMMRegLane128( rG, 1, mkV128(0) );
17770 return delta;
17773 static Long dis_AESKEYGENASSIST ( const VexAbiInfo* vbi, Prefix pfx,
17774 Long delta, Bool isAvx )
17776 IRTemp addr = IRTemp_INVALID;
17777 Int alen = 0;
17778 HChar dis_buf[50];
17779 UChar modrm = getUChar(delta);
17780 UInt regNoL = 0;
17781 UInt regNoR = gregOfRexRM(pfx, modrm);
17782 UChar imm = 0;
17784 /* This is a nasty kludge. See AESENC et al. instructions. */
17785 modrm = getUChar(delta);
17786 if (epartIsReg(modrm)) {
17787 regNoL = eregOfRexRM(pfx, modrm);
17788 imm = getUChar(delta+1);
17789 delta += 1+1;
17790 } else {
17791 regNoL = 16; /* use XMM16 as an intermediary */
17792 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
17793 /* alignment check ???? . */
17794 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
17795 imm = getUChar(delta+alen);
17796 delta += alen+1;
17799 /* Who ya gonna call? Presumably not Ghostbusters. */
17800 void* fn = &amd64g_dirtyhelper_AESKEYGENASSIST;
17801 const HChar* nm = "amd64g_dirtyhelper_AESKEYGENASSIST";
17803 /* Round up the arguments. Note that this is a kludge -- the
17804 use of mkU64 rather than mkIRExpr_HWord implies the
17805 assumption that the host's word size is 64-bit. */
17806 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
17807 UInt gstOffR = ymmGuestRegOffset(regNoR);
17809 IRExpr* imme = mkU64(imm & 0xFF);
17810 IRExpr* gstOffLe = mkU64(gstOffL);
17811 IRExpr* gstOffRe = mkU64(gstOffR);
17812 IRExpr** args
17813 = mkIRExprVec_4( IRExpr_GSPTR(), imme, gstOffLe, gstOffRe );
17815 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args );
17816 /* It's not really a dirty call, but we can't use the clean helper
17817 mechanism here for the very lame reason that we can't pass 2 x
17818 V128s by value to a helper. Hence this roundabout scheme. */
17819 d->nFxState = 2;
17820 vex_bzero(&d->fxState, sizeof(d->fxState));
17821 d->fxState[0].fx = Ifx_Read;
17822 d->fxState[0].offset = gstOffL;
17823 d->fxState[0].size = sizeof(U128);
17824 d->fxState[1].fx = Ifx_Write;
17825 d->fxState[1].offset = gstOffR;
17826 d->fxState[1].size = sizeof(U128);
17827 stmt( IRStmt_Dirty(d) );
17829 DIP("%saeskeygenassist $%x,%s,%s\n", isAvx ? "v" : "", (UInt)imm,
17830 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)),
17831 nameXMMReg(regNoR));
17832 if (isAvx)
17833 putYMMRegLane128( regNoR, 1, mkV128(0) );
17834 return delta;
17838 __attribute__((noinline))
17839 static
17840 Long dis_ESC_0F38__SSE4 ( Bool* decode_OK,
17841 const VexAbiInfo* vbi,
17842 Prefix pfx, Int sz, Long deltaIN )
17844 IRTemp addr = IRTemp_INVALID;
17845 UChar modrm = 0;
17846 Int alen = 0;
17847 HChar dis_buf[50];
17849 *decode_OK = False;
17851 Long delta = deltaIN;
17852 UChar opc = getUChar(delta);
17853 delta++;
17854 switch (opc) {
17856 case 0x10:
17857 case 0x14:
17858 case 0x15:
17859 /* 66 0F 38 10 /r = PBLENDVB xmm1, xmm2/m128 (byte gran)
17860 66 0F 38 14 /r = BLENDVPS xmm1, xmm2/m128 (float gran)
17861 66 0F 38 15 /r = BLENDVPD xmm1, xmm2/m128 (double gran)
17862 Blend at various granularities, with XMM0 (implicit operand)
17863 providing the controlling mask.
17865 if (have66noF2noF3(pfx) && sz == 2) {
17866 modrm = getUChar(delta);
17868 const HChar* nm = NULL;
17869 UInt gran = 0;
17870 IROp opSAR = Iop_INVALID;
17871 switch (opc) {
17872 case 0x10:
17873 nm = "pblendvb"; gran = 1; opSAR = Iop_SarN8x16;
17874 break;
17875 case 0x14:
17876 nm = "blendvps"; gran = 4; opSAR = Iop_SarN32x4;
17877 break;
17878 case 0x15:
17879 nm = "blendvpd"; gran = 8; opSAR = Iop_SarN64x2;
17880 break;
17882 vassert(nm);
17884 IRTemp vecE = newTemp(Ity_V128);
17885 IRTemp vecG = newTemp(Ity_V128);
17886 IRTemp vec0 = newTemp(Ity_V128);
17888 if ( epartIsReg(modrm) ) {
17889 assign(vecE, getXMMReg(eregOfRexRM(pfx, modrm)));
17890 delta += 1;
17891 DIP( "%s %s,%s\n", nm,
17892 nameXMMReg( eregOfRexRM(pfx, modrm) ),
17893 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17894 } else {
17895 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17896 gen_SEGV_if_not_16_aligned( addr );
17897 assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
17898 delta += alen;
17899 DIP( "%s %s,%s\n", nm,
17900 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17903 assign(vecG, getXMMReg(gregOfRexRM(pfx, modrm)));
17904 assign(vec0, getXMMReg(0));
17906 IRTemp res = math_PBLENDVB_128( vecE, vecG, vec0, gran, opSAR );
17907 putXMMReg(gregOfRexRM(pfx, modrm), mkexpr(res));
17909 goto decode_success;
17911 break;
17913 case 0x17:
17914 /* 66 0F 38 17 /r = PTEST xmm1, xmm2/m128
17915 Logical compare (set ZF and CF from AND/ANDN of the operands) */
17916 if (have66noF2noF3(pfx)
17917 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
17918 delta = dis_xTESTy_128( vbi, pfx, delta, False/*!isAvx*/, 0 );
17919 goto decode_success;
17921 break;
17923 case 0x20:
17924 /* 66 0F 38 20 /r = PMOVSXBW xmm1, xmm2/m64
17925 Packed Move with Sign Extend from Byte to Word (XMM) */
17926 if (have66noF2noF3(pfx) && sz == 2) {
17927 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
17928 False/*!isAvx*/, False/*!xIsZ*/ );
17929 goto decode_success;
17931 break;
17933 case 0x21:
17934 /* 66 0F 38 21 /r = PMOVSXBD xmm1, xmm2/m32
17935 Packed Move with Sign Extend from Byte to DWord (XMM) */
17936 if (have66noF2noF3(pfx) && sz == 2) {
17937 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
17938 False/*!isAvx*/, False/*!xIsZ*/ );
17939 goto decode_success;
17941 break;
17943 case 0x22:
17944 /* 66 0F 38 22 /r = PMOVSXBQ xmm1, xmm2/m16
17945 Packed Move with Sign Extend from Byte to QWord (XMM) */
17946 if (have66noF2noF3(pfx) && sz == 2) {
17947 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, False/*!isAvx*/ );
17948 goto decode_success;
17950 break;
17952 case 0x23:
17953 /* 66 0F 38 23 /r = PMOVSXWD xmm1, xmm2/m64
17954 Packed Move with Sign Extend from Word to DWord (XMM) */
17955 if (have66noF2noF3(pfx) && sz == 2) {
17956 delta = dis_PMOVxXWD_128(vbi, pfx, delta,
17957 False/*!isAvx*/, False/*!xIsZ*/);
17958 goto decode_success;
17960 break;
17962 case 0x24:
17963 /* 66 0F 38 24 /r = PMOVSXWQ xmm1, xmm2/m32
17964 Packed Move with Sign Extend from Word to QWord (XMM) */
17965 if (have66noF2noF3(pfx) && sz == 2) {
17966 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, False/*!isAvx*/ );
17967 goto decode_success;
17969 break;
17971 case 0x25:
17972 /* 66 0F 38 25 /r = PMOVSXDQ xmm1, xmm2/m64
17973 Packed Move with Sign Extend from Double Word to Quad Word (XMM) */
17974 if (have66noF2noF3(pfx) && sz == 2) {
17975 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
17976 False/*!isAvx*/, False/*!xIsZ*/ );
17977 goto decode_success;
17979 break;
17981 case 0x28:
17982 /* 66 0F 38 28 = PMULDQ -- signed widening multiply of 32-lanes
17983 0 x 0 to form lower 64-bit half and lanes 2 x 2 to form upper
17984 64-bit half */
17985 /* This is a really poor translation -- could be improved if
17986 performance critical. It's a copy-paste of PMULUDQ, too. */
17987 if (have66noF2noF3(pfx) && sz == 2) {
17988 IRTemp sV = newTemp(Ity_V128);
17989 IRTemp dV = newTemp(Ity_V128);
17990 modrm = getUChar(delta);
17991 UInt rG = gregOfRexRM(pfx,modrm);
17992 assign( dV, getXMMReg(rG) );
17993 if (epartIsReg(modrm)) {
17994 UInt rE = eregOfRexRM(pfx,modrm);
17995 assign( sV, getXMMReg(rE) );
17996 delta += 1;
17997 DIP("pmuldq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
17998 } else {
17999 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
18000 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
18001 delta += alen;
18002 DIP("pmuldq %s,%s\n", dis_buf, nameXMMReg(rG));
18005 putXMMReg( rG, mkexpr(math_PMULDQ_128( dV, sV )) );
18006 goto decode_success;
18008 break;
18010 case 0x29:
18011 /* 66 0F 38 29 = PCMPEQQ
18012 64x2 equality comparison */
18013 if (have66noF2noF3(pfx) && sz == 2) {
18014 /* FIXME: this needs an alignment check */
18015 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
18016 "pcmpeqq", Iop_CmpEQ64x2, False );
18017 goto decode_success;
18019 break;
18021 case 0x2A:
18022 /* 66 0F 38 2A = MOVNTDQA
18023 "non-temporal" "streaming" load
18024 Handle like MOVDQA but only memory operand is allowed */
18025 if (have66noF2noF3(pfx) && sz == 2) {
18026 modrm = getUChar(delta);
18027 if (!epartIsReg(modrm)) {
18028 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
18029 gen_SEGV_if_not_16_aligned( addr );
18030 putXMMReg( gregOfRexRM(pfx,modrm),
18031 loadLE(Ity_V128, mkexpr(addr)) );
18032 DIP("movntdqa %s,%s\n", dis_buf,
18033 nameXMMReg(gregOfRexRM(pfx,modrm)));
18034 delta += alen;
18035 goto decode_success;
18038 break;
18040 case 0x2B:
18041 /* 66 0f 38 2B /r = PACKUSDW xmm1, xmm2/m128
18042 2x 32x4 S->U saturating narrow from xmm2/m128 to xmm1 */
18043 if (have66noF2noF3(pfx) && sz == 2) {
18045 modrm = getUChar(delta);
18047 IRTemp argL = newTemp(Ity_V128);
18048 IRTemp argR = newTemp(Ity_V128);
18050 if ( epartIsReg(modrm) ) {
18051 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) );
18052 delta += 1;
18053 DIP( "packusdw %s,%s\n",
18054 nameXMMReg( eregOfRexRM(pfx, modrm) ),
18055 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18056 } else {
18057 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
18058 gen_SEGV_if_not_16_aligned( addr );
18059 assign( argL, loadLE( Ity_V128, mkexpr(addr) ));
18060 delta += alen;
18061 DIP( "packusdw %s,%s\n",
18062 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18065 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) ));
18067 putXMMReg( gregOfRexRM(pfx, modrm),
18068 binop( Iop_QNarrowBin32Sto16Ux8,
18069 mkexpr(argL), mkexpr(argR)) );
18071 goto decode_success;
18073 break;
18075 case 0x30:
18076 /* 66 0F 38 30 /r = PMOVZXBW xmm1, xmm2/m64
18077 Packed Move with Zero Extend from Byte to Word (XMM) */
18078 if (have66noF2noF3(pfx) && sz == 2) {
18079 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
18080 False/*!isAvx*/, True/*xIsZ*/ );
18081 goto decode_success;
18083 break;
18085 case 0x31:
18086 /* 66 0F 38 31 /r = PMOVZXBD xmm1, xmm2/m32
18087 Packed Move with Zero Extend from Byte to DWord (XMM) */
18088 if (have66noF2noF3(pfx) && sz == 2) {
18089 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
18090 False/*!isAvx*/, True/*xIsZ*/ );
18091 goto decode_success;
18093 break;
18095 case 0x32:
18096 /* 66 0F 38 32 /r = PMOVZXBQ xmm1, xmm2/m16
18097 Packed Move with Zero Extend from Byte to QWord (XMM) */
18098 if (have66noF2noF3(pfx) && sz == 2) {
18099 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, False/*!isAvx*/ );
18100 goto decode_success;
18102 break;
18104 case 0x33:
18105 /* 66 0F 38 33 /r = PMOVZXWD xmm1, xmm2/m64
18106 Packed Move with Zero Extend from Word to DWord (XMM) */
18107 if (have66noF2noF3(pfx) && sz == 2) {
18108 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
18109 False/*!isAvx*/, True/*xIsZ*/ );
18110 goto decode_success;
18112 break;
18114 case 0x34:
18115 /* 66 0F 38 34 /r = PMOVZXWQ xmm1, xmm2/m32
18116 Packed Move with Zero Extend from Word to QWord (XMM) */
18117 if (have66noF2noF3(pfx) && sz == 2) {
18118 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, False/*!isAvx*/ );
18119 goto decode_success;
18121 break;
18123 case 0x35:
18124 /* 66 0F 38 35 /r = PMOVZXDQ xmm1, xmm2/m64
18125 Packed Move with Zero Extend from DWord to QWord (XMM) */
18126 if (have66noF2noF3(pfx) && sz == 2) {
18127 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
18128 False/*!isAvx*/, True/*xIsZ*/ );
18129 goto decode_success;
18131 break;
18133 case 0x37:
18134 /* 66 0F 38 37 = PCMPGTQ
18135 64x2 comparison (signed, presumably; the Intel docs don't say :-)
18137 if (have66noF2noF3(pfx) && sz == 2) {
18138 /* FIXME: this needs an alignment check */
18139 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
18140 "pcmpgtq", Iop_CmpGT64Sx2, False );
18141 goto decode_success;
18143 break;
18145 case 0x38:
18146 case 0x3C:
18147 /* 66 0F 38 38 /r = PMINSB xmm1, xmm2/m128 8Sx16 (signed) min
18148 66 0F 38 3C /r = PMAXSB xmm1, xmm2/m128 8Sx16 (signed) max
18150 if (have66noF2noF3(pfx) && sz == 2) {
18151 /* FIXME: this needs an alignment check */
18152 Bool isMAX = opc == 0x3C;
18153 delta = dis_SSEint_E_to_G(
18154 vbi, pfx, delta,
18155 isMAX ? "pmaxsb" : "pminsb",
18156 isMAX ? Iop_Max8Sx16 : Iop_Min8Sx16,
18157 False
18159 goto decode_success;
18161 break;
18163 case 0x39:
18164 case 0x3D:
18165 /* 66 0F 38 39 /r = PMINSD xmm1, xmm2/m128
18166 Minimum of Packed Signed Double Word Integers (XMM)
18167 66 0F 38 3D /r = PMAXSD xmm1, xmm2/m128
18168 Maximum of Packed Signed Double Word Integers (XMM)
18170 if (have66noF2noF3(pfx) && sz == 2) {
18171 /* FIXME: this needs an alignment check */
18172 Bool isMAX = opc == 0x3D;
18173 delta = dis_SSEint_E_to_G(
18174 vbi, pfx, delta,
18175 isMAX ? "pmaxsd" : "pminsd",
18176 isMAX ? Iop_Max32Sx4 : Iop_Min32Sx4,
18177 False
18179 goto decode_success;
18181 break;
18183 case 0x3A:
18184 case 0x3E:
18185 /* 66 0F 38 3A /r = PMINUW xmm1, xmm2/m128
18186 Minimum of Packed Unsigned Word Integers (XMM)
18187 66 0F 38 3E /r = PMAXUW xmm1, xmm2/m128
18188 Maximum of Packed Unsigned Word Integers (XMM)
18190 if (have66noF2noF3(pfx) && sz == 2) {
18191 /* FIXME: this needs an alignment check */
18192 Bool isMAX = opc == 0x3E;
18193 delta = dis_SSEint_E_to_G(
18194 vbi, pfx, delta,
18195 isMAX ? "pmaxuw" : "pminuw",
18196 isMAX ? Iop_Max16Ux8 : Iop_Min16Ux8,
18197 False
18199 goto decode_success;
18201 break;
18203 case 0x3B:
18204 case 0x3F:
18205 /* 66 0F 38 3B /r = PMINUD xmm1, xmm2/m128
18206 Minimum of Packed Unsigned Doubleword Integers (XMM)
18207 66 0F 38 3F /r = PMAXUD xmm1, xmm2/m128
18208 Maximum of Packed Unsigned Doubleword Integers (XMM)
18210 if (have66noF2noF3(pfx) && sz == 2) {
18211 /* FIXME: this needs an alignment check */
18212 Bool isMAX = opc == 0x3F;
18213 delta = dis_SSEint_E_to_G(
18214 vbi, pfx, delta,
18215 isMAX ? "pmaxud" : "pminud",
18216 isMAX ? Iop_Max32Ux4 : Iop_Min32Ux4,
18217 False
18219 goto decode_success;
18221 break;
18223 case 0x40:
18224 /* 66 0F 38 40 /r = PMULLD xmm1, xmm2/m128
18225 32x4 integer multiply from xmm2/m128 to xmm1 */
18226 if (have66noF2noF3(pfx) && sz == 2) {
18228 modrm = getUChar(delta);
18230 IRTemp argL = newTemp(Ity_V128);
18231 IRTemp argR = newTemp(Ity_V128);
18233 if ( epartIsReg(modrm) ) {
18234 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) );
18235 delta += 1;
18236 DIP( "pmulld %s,%s\n",
18237 nameXMMReg( eregOfRexRM(pfx, modrm) ),
18238 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18239 } else {
18240 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
18241 gen_SEGV_if_not_16_aligned( addr );
18242 assign( argL, loadLE( Ity_V128, mkexpr(addr) ));
18243 delta += alen;
18244 DIP( "pmulld %s,%s\n",
18245 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18248 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) ));
18250 putXMMReg( gregOfRexRM(pfx, modrm),
18251 binop( Iop_Mul32x4, mkexpr(argL), mkexpr(argR)) );
18253 goto decode_success;
18255 break;
18257 case 0x41:
18258 /* 66 0F 38 41 /r = PHMINPOSUW xmm1, xmm2/m128
18259 Packed Horizontal Word Minimum from xmm2/m128 to xmm1 */
18260 if (have66noF2noF3(pfx) && sz == 2) {
18261 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, False/*!isAvx*/ );
18262 goto decode_success;
18264 break;
18266 case 0xDC:
18267 case 0xDD:
18268 case 0xDE:
18269 case 0xDF:
18270 case 0xDB:
18271 /* 66 0F 38 DC /r = AESENC xmm1, xmm2/m128
18272 DD /r = AESENCLAST xmm1, xmm2/m128
18273 DE /r = AESDEC xmm1, xmm2/m128
18274 DF /r = AESDECLAST xmm1, xmm2/m128
18276 DB /r = AESIMC xmm1, xmm2/m128 */
18277 if (have66noF2noF3(pfx) && sz == 2) {
18278 delta = dis_AESx( vbi, pfx, delta, False/*!isAvx*/, opc );
18279 goto decode_success;
18281 break;
18283 case 0xF0:
18284 case 0xF1:
18285 /* F2 0F 38 F0 /r = CRC32 r/m8, r32 (REX.W ok, 66 not ok)
18286 F2 0F 38 F1 /r = CRC32 r/m{16,32,64}, r32
18287 The decoding on this is a bit unusual.
18289 if (haveF2noF3(pfx)
18290 && (opc == 0xF1 || (opc == 0xF0 && !have66(pfx)))) {
18291 modrm = getUChar(delta);
18293 if (opc == 0xF0)
18294 sz = 1;
18295 else
18296 vassert(sz == 2 || sz == 4 || sz == 8);
18298 IRType tyE = szToITy(sz);
18299 IRTemp valE = newTemp(tyE);
18301 if (epartIsReg(modrm)) {
18302 assign(valE, getIRegE(sz, pfx, modrm));
18303 delta += 1;
18304 DIP("crc32b %s,%s\n", nameIRegE(sz, pfx, modrm),
18305 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm));
18306 } else {
18307 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
18308 assign(valE, loadLE(tyE, mkexpr(addr)));
18309 delta += alen;
18310 DIP("crc32b %s,%s\n", dis_buf,
18311 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm));
18314 /* Somewhat funny getting/putting of the crc32 value, in order
18315 to ensure that it turns into 64-bit gets and puts. However,
18316 mask off the upper 32 bits so as to not get memcheck false
18317 +ves around the helper call. */
18318 IRTemp valG0 = newTemp(Ity_I64);
18319 assign(valG0, binop(Iop_And64, getIRegG(8, pfx, modrm),
18320 mkU64(0xFFFFFFFF)));
18322 const HChar* nm = NULL;
18323 void* fn = NULL;
18324 switch (sz) {
18325 case 1: nm = "amd64g_calc_crc32b";
18326 fn = &amd64g_calc_crc32b; break;
18327 case 2: nm = "amd64g_calc_crc32w";
18328 fn = &amd64g_calc_crc32w; break;
18329 case 4: nm = "amd64g_calc_crc32l";
18330 fn = &amd64g_calc_crc32l; break;
18331 case 8: nm = "amd64g_calc_crc32q";
18332 fn = &amd64g_calc_crc32q; break;
18334 vassert(nm && fn);
18335 IRTemp valG1 = newTemp(Ity_I64);
18336 assign(valG1,
18337 mkIRExprCCall(Ity_I64, 0/*regparm*/, nm, fn,
18338 mkIRExprVec_2(mkexpr(valG0),
18339 widenUto64(mkexpr(valE)))));
18341 putIRegG(4, pfx, modrm, unop(Iop_64to32, mkexpr(valG1)));
18342 goto decode_success;
18344 break;
18346 default:
18347 break;
18351 //decode_failure:
18352 *decode_OK = False;
18353 return deltaIN;
18355 decode_success:
18356 *decode_OK = True;
18357 return delta;
18361 /*------------------------------------------------------------*/
18362 /*--- ---*/
18363 /*--- Top-level SSE4: dis_ESC_0F3A__SSE4 ---*/
18364 /*--- ---*/
18365 /*------------------------------------------------------------*/
18367 static Long dis_PEXTRW ( const VexAbiInfo* vbi, Prefix pfx,
18368 Long delta, Bool isAvx )
18370 IRTemp addr = IRTemp_INVALID;
18371 IRTemp t0 = IRTemp_INVALID;
18372 IRTemp t1 = IRTemp_INVALID;
18373 IRTemp t2 = IRTemp_INVALID;
18374 IRTemp t3 = IRTemp_INVALID;
18375 UChar modrm = getUChar(delta);
18376 Int alen = 0;
18377 HChar dis_buf[50];
18378 UInt rG = gregOfRexRM(pfx,modrm);
18379 Int imm8_20;
18380 IRTemp xmm_vec = newTemp(Ity_V128);
18381 IRTemp d16 = newTemp(Ity_I16);
18382 const HChar* mbV = isAvx ? "v" : "";
18384 vassert(0==getRexW(pfx)); /* ensured by caller */
18385 assign( xmm_vec, getXMMReg(rG) );
18386 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
18388 if ( epartIsReg( modrm ) ) {
18389 imm8_20 = (Int)(getUChar(delta+1) & 7);
18390 } else {
18391 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18392 imm8_20 = (Int)(getUChar(delta+alen) & 7);
18395 switch (imm8_20) {
18396 case 0: assign(d16, unop(Iop_32to16, mkexpr(t0))); break;
18397 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(t0))); break;
18398 case 2: assign(d16, unop(Iop_32to16, mkexpr(t1))); break;
18399 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(t1))); break;
18400 case 4: assign(d16, unop(Iop_32to16, mkexpr(t2))); break;
18401 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(t2))); break;
18402 case 6: assign(d16, unop(Iop_32to16, mkexpr(t3))); break;
18403 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(t3))); break;
18404 default: vassert(0);
18407 if ( epartIsReg( modrm ) ) {
18408 UInt rE = eregOfRexRM(pfx,modrm);
18409 putIReg32( rE, unop(Iop_16Uto32, mkexpr(d16)) );
18410 delta += 1+1;
18411 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20,
18412 nameXMMReg( rG ), nameIReg32( rE ) );
18413 } else {
18414 storeLE( mkexpr(addr), mkexpr(d16) );
18415 delta += alen+1;
18416 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20, nameXMMReg( rG ), dis_buf );
18418 return delta;
18422 static Long dis_PEXTRD ( const VexAbiInfo* vbi, Prefix pfx,
18423 Long delta, Bool isAvx )
18425 IRTemp addr = IRTemp_INVALID;
18426 IRTemp t0 = IRTemp_INVALID;
18427 IRTemp t1 = IRTemp_INVALID;
18428 IRTemp t2 = IRTemp_INVALID;
18429 IRTemp t3 = IRTemp_INVALID;
18430 UChar modrm = 0;
18431 Int alen = 0;
18432 HChar dis_buf[50];
18434 Int imm8_10;
18435 IRTemp xmm_vec = newTemp(Ity_V128);
18436 IRTemp src_dword = newTemp(Ity_I32);
18437 const HChar* mbV = isAvx ? "v" : "";
18439 vassert(0==getRexW(pfx)); /* ensured by caller */
18440 modrm = getUChar(delta);
18441 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
18442 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
18444 if ( epartIsReg( modrm ) ) {
18445 imm8_10 = (Int)(getUChar(delta+1) & 3);
18446 } else {
18447 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18448 imm8_10 = (Int)(getUChar(delta+alen) & 3);
18451 switch ( imm8_10 ) {
18452 case 0: assign( src_dword, mkexpr(t0) ); break;
18453 case 1: assign( src_dword, mkexpr(t1) ); break;
18454 case 2: assign( src_dword, mkexpr(t2) ); break;
18455 case 3: assign( src_dword, mkexpr(t3) ); break;
18456 default: vassert(0);
18459 if ( epartIsReg( modrm ) ) {
18460 putIReg32( eregOfRexRM(pfx,modrm), mkexpr(src_dword) );
18461 delta += 1+1;
18462 DIP( "%spextrd $%d, %s,%s\n", mbV, imm8_10,
18463 nameXMMReg( gregOfRexRM(pfx, modrm) ),
18464 nameIReg32( eregOfRexRM(pfx, modrm) ) );
18465 } else {
18466 storeLE( mkexpr(addr), mkexpr(src_dword) );
18467 delta += alen+1;
18468 DIP( "%spextrd $%d, %s,%s\n", mbV,
18469 imm8_10, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
18471 return delta;
18475 static Long dis_PEXTRQ ( const VexAbiInfo* vbi, Prefix pfx,
18476 Long delta, Bool isAvx )
18478 IRTemp addr = IRTemp_INVALID;
18479 UChar modrm = 0;
18480 Int alen = 0;
18481 HChar dis_buf[50];
18483 Int imm8_0;
18484 IRTemp xmm_vec = newTemp(Ity_V128);
18485 IRTemp src_qword = newTemp(Ity_I64);
18486 const HChar* mbV = isAvx ? "v" : "";
18488 vassert(1==getRexW(pfx)); /* ensured by caller */
18489 modrm = getUChar(delta);
18490 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
18492 if ( epartIsReg( modrm ) ) {
18493 imm8_0 = (Int)(getUChar(delta+1) & 1);
18494 } else {
18495 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18496 imm8_0 = (Int)(getUChar(delta+alen) & 1);
18499 switch ( imm8_0 ) {
18500 case 0: assign( src_qword, unop(Iop_V128to64, mkexpr(xmm_vec)) );
18501 break;
18502 case 1: assign( src_qword, unop(Iop_V128HIto64, mkexpr(xmm_vec)) );
18503 break;
18504 default: vassert(0);
18507 if ( epartIsReg( modrm ) ) {
18508 putIReg64( eregOfRexRM(pfx,modrm), mkexpr(src_qword) );
18509 delta += 1+1;
18510 DIP( "%spextrq $%d, %s,%s\n", mbV, imm8_0,
18511 nameXMMReg( gregOfRexRM(pfx, modrm) ),
18512 nameIReg64( eregOfRexRM(pfx, modrm) ) );
18513 } else {
18514 storeLE( mkexpr(addr), mkexpr(src_qword) );
18515 delta += alen+1;
18516 DIP( "%spextrq $%d, %s,%s\n", mbV,
18517 imm8_0, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
18519 return delta;
18522 static IRExpr* math_CTZ32(IRExpr *exp)
18524 /* Iop_Ctz32 isn't implemented by the amd64 back end, so use Iop_Ctz64. */
18525 return unop(Iop_64to32, unop(Iop_Ctz64, unop(Iop_32Uto64, exp)));
18528 static Long dis_PCMPISTRI_3A ( UChar modrm, UInt regNoL, UInt regNoR,
18529 Long delta, UChar opc, UChar imm,
18530 HChar dis_buf[])
18532 /* We only handle PCMPISTRI for now */
18533 vassert((opc & 0x03) == 0x03);
18534 /* And only an immediate byte of 0x38 or 0x3A */
18535 vassert((imm & ~0x02) == 0x38);
18537 /* FIXME: Is this correct when RegNoL == 16 ? */
18538 IRTemp argL = newTemp(Ity_V128);
18539 assign(argL, getXMMReg(regNoL));
18540 IRTemp argR = newTemp(Ity_V128);
18541 assign(argR, getXMMReg(regNoR));
18543 IRTemp zmaskL = newTemp(Ity_I32);
18544 assign(zmaskL, unop(Iop_16Uto32,
18545 unop(Iop_GetMSBs8x16,
18546 binop(Iop_CmpEQ8x16, mkexpr(argL), mkV128(0)))));
18547 IRTemp zmaskR = newTemp(Ity_I32);
18548 assign(zmaskR, unop(Iop_16Uto32,
18549 unop(Iop_GetMSBs8x16,
18550 binop(Iop_CmpEQ8x16, mkexpr(argR), mkV128(0)))));
18552 /* We want validL = ~(zmaskL | -zmaskL)
18554 But this formulation kills memcheck's validity tracking when any
18555 bits above the first "1" are invalid. So reformulate as:
18557 validL = (zmaskL ? (1 << ctz(zmaskL)) : 0) - 1
18560 IRExpr *ctzL = unop(Iop_32to8, math_CTZ32(mkexpr(zmaskL)));
18562 /* Generate a bool expression which is zero iff the original is
18563 zero. Do this carefully so memcheck can propagate validity bits
18564 correctly.
18566 IRTemp zmaskL_zero = newTemp(Ity_I1);
18567 assign(zmaskL_zero, binop(Iop_ExpCmpNE32, mkexpr(zmaskL), mkU32(0)));
18569 IRTemp validL = newTemp(Ity_I32);
18570 assign(validL, binop(Iop_Sub32,
18571 IRExpr_ITE(mkexpr(zmaskL_zero),
18572 binop(Iop_Shl32, mkU32(1), ctzL),
18573 mkU32(0)),
18574 mkU32(1)));
18576 /* And similarly for validR. */
18577 IRExpr *ctzR = unop(Iop_32to8, math_CTZ32(mkexpr(zmaskR)));
18578 IRTemp zmaskR_zero = newTemp(Ity_I1);
18579 assign(zmaskR_zero, binop(Iop_ExpCmpNE32, mkexpr(zmaskR), mkU32(0)));
18580 IRTemp validR = newTemp(Ity_I32);
18581 assign(validR, binop(Iop_Sub32,
18582 IRExpr_ITE(mkexpr(zmaskR_zero),
18583 binop(Iop_Shl32, mkU32(1), ctzR),
18584 mkU32(0)),
18585 mkU32(1)));
18587 /* Do the actual comparison. */
18588 IRExpr *boolResII = unop(Iop_16Uto32,
18589 unop(Iop_GetMSBs8x16,
18590 binop(Iop_CmpEQ8x16, mkexpr(argL),
18591 mkexpr(argR))));
18593 /* Compute boolresII & validL & validR (i.e., if both valid, use
18594 comparison result) */
18595 IRExpr *intRes1_a = binop(Iop_And32, boolResII,
18596 binop(Iop_And32,
18597 mkexpr(validL), mkexpr(validR)));
18599 /* Compute ~(validL | validR); i.e., if both invalid, force 1. */
18600 IRExpr *intRes1_b = unop(Iop_Not32, binop(Iop_Or32,
18601 mkexpr(validL), mkexpr(validR)));
18602 /* Otherwise, zero. */
18603 IRExpr *intRes1 = binop(Iop_And32, mkU32(0xFFFF),
18604 binop(Iop_Or32, intRes1_a, intRes1_b));
18606 /* The "0x30" in imm=0x3A means "polarity=3" means XOR validL with
18607 result. */
18608 IRTemp intRes2 = newTemp(Ity_I32);
18609 assign(intRes2, binop(Iop_And32, mkU32(0xFFFF),
18610 binop(Iop_Xor32, intRes1, mkexpr(validL))));
18612 /* If the 0x40 bit were set in imm=0x3A, we would return the index
18613 of the msb. Since it is clear, we return the index of the
18614 lsb. */
18615 IRExpr *newECX = math_CTZ32(binop(Iop_Or32,
18616 mkexpr(intRes2), mkU32(0x10000)));
18618 /* And thats our rcx. */
18619 putIReg32(R_RCX, newECX);
18621 /* Now for the condition codes... */
18623 /* C == 0 iff intRes2 == 0 */
18624 IRExpr *c_bit = IRExpr_ITE( binop(Iop_ExpCmpNE32, mkexpr(intRes2),
18625 mkU32(0)),
18626 mkU32(1 << AMD64G_CC_SHIFT_C),
18627 mkU32(0));
18628 /* Z == 1 iff any in argL is 0 */
18629 IRExpr *z_bit = IRExpr_ITE( mkexpr(zmaskL_zero),
18630 mkU32(1 << AMD64G_CC_SHIFT_Z),
18631 mkU32(0));
18632 /* S == 1 iff any in argR is 0 */
18633 IRExpr *s_bit = IRExpr_ITE( mkexpr(zmaskR_zero),
18634 mkU32(1 << AMD64G_CC_SHIFT_S),
18635 mkU32(0));
18636 /* O == IntRes2[0] */
18637 IRExpr *o_bit = binop(Iop_Shl32, binop(Iop_And32, mkexpr(intRes2),
18638 mkU32(0x01)),
18639 mkU8(AMD64G_CC_SHIFT_O));
18641 /* Put them all together */
18642 IRTemp cc = newTemp(Ity_I64);
18643 assign(cc, widenUto64(binop(Iop_Or32,
18644 binop(Iop_Or32, c_bit, z_bit),
18645 binop(Iop_Or32, s_bit, o_bit))));
18646 stmt(IRStmt_Put(OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY)));
18647 stmt(IRStmt_Put(OFFB_CC_DEP1, mkexpr(cc)));
18648 stmt(IRStmt_Put(OFFB_CC_DEP2, mkU64(0)));
18649 stmt(IRStmt_Put(OFFB_CC_NDEP, mkU64(0)));
18651 return delta;
18654 /* This can fail, in which case it returns the original (unchanged)
18655 delta. */
18656 static Long dis_PCMPxSTRx ( const VexAbiInfo* vbi, Prefix pfx,
18657 Long delta, Bool isAvx, UChar opc )
18659 Long delta0 = delta;
18660 UInt isISTRx = opc & 2;
18661 UInt isxSTRM = (opc & 1) ^ 1;
18662 UInt regNoL = 0;
18663 UInt regNoR = 0;
18664 UChar imm = 0;
18665 IRTemp addr = IRTemp_INVALID;
18666 Int alen = 0;
18667 HChar dis_buf[50];
18669 /* This is a nasty kludge. We need to pass 2 x V128 to the helper
18670 (which is clean). Since we can't do that, use a dirty helper to
18671 compute the results directly from the XMM regs in the guest
18672 state. That means for the memory case, we need to move the left
18673 operand into a pseudo-register (XMM16, let's call it). */
18674 UChar modrm = getUChar(delta);
18675 if (epartIsReg(modrm)) {
18676 regNoL = eregOfRexRM(pfx, modrm);
18677 regNoR = gregOfRexRM(pfx, modrm);
18678 imm = getUChar(delta+1);
18679 delta += 1+1;
18680 } else {
18681 regNoL = 16; /* use XMM16 as an intermediary */
18682 regNoR = gregOfRexRM(pfx, modrm);
18683 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18684 /* No alignment check; I guess that makes sense, given that
18685 these insns are for dealing with C style strings. */
18686 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
18687 imm = getUChar(delta+alen);
18688 delta += alen+1;
18691 /* Print the insn here, since dis_PCMPISTRI_3A doesn't do so
18692 itself. */
18693 if (regNoL == 16) {
18694 DIP("%spcmp%cstr%c $%x,%s,%s\n",
18695 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i',
18696 (UInt)imm, dis_buf, nameXMMReg(regNoR));
18697 } else {
18698 DIP("%spcmp%cstr%c $%x,%s,%s\n",
18699 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i',
18700 (UInt)imm, nameXMMReg(regNoL), nameXMMReg(regNoR));
18703 /* Handle special case(s). */
18704 if (imm == 0x3A && isISTRx && !isxSTRM) {
18705 return dis_PCMPISTRI_3A ( modrm, regNoL, regNoR, delta,
18706 opc, imm, dis_buf);
18709 /* Now we know the XMM reg numbers for the operands, and the
18710 immediate byte. Is it one we can actually handle? Throw out any
18711 cases for which the helper function has not been verified. */
18712 switch (imm) {
18713 case 0x00: case 0x02:
18714 case 0x08: case 0x0A: case 0x0C: case 0x0E:
18715 case 0x10: case 0x12: case 0x14:
18716 case 0x18: case 0x1A:
18717 case 0x30: case 0x34:
18718 case 0x38: case 0x3A:
18719 case 0x40: case 0x42: case 0x44: case 0x46:
18720 case 0x4A:
18721 case 0x62:
18722 case 0x70: case 0x72:
18723 break;
18724 // the 16-bit character versions of the above
18725 case 0x01: case 0x03:
18726 case 0x09: case 0x0B: case 0x0D:
18727 case 0x13:
18728 case 0x19: case 0x1B:
18729 case 0x39: case 0x3B:
18730 case 0x41: case 0x45:
18731 case 0x4B:
18732 break;
18733 default:
18734 return delta0; /*FAIL*/
18737 /* Who ya gonna call? Presumably not Ghostbusters. */
18738 void* fn = &amd64g_dirtyhelper_PCMPxSTRx;
18739 const HChar* nm = "amd64g_dirtyhelper_PCMPxSTRx";
18741 /* Round up the arguments. Note that this is a kludge -- the use
18742 of mkU64 rather than mkIRExpr_HWord implies the assumption that
18743 the host's word size is 64-bit. */
18744 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
18745 UInt gstOffR = ymmGuestRegOffset(regNoR);
18747 IRExpr* opc4_and_imm = mkU64((opc << 8) | (imm & 0xFF));
18748 IRExpr* gstOffLe = mkU64(gstOffL);
18749 IRExpr* gstOffRe = mkU64(gstOffR);
18750 IRExpr* edxIN = isISTRx ? mkU64(0) : getIRegRDX(8);
18751 IRExpr* eaxIN = isISTRx ? mkU64(0) : getIRegRAX(8);
18752 IRExpr** args
18753 = mkIRExprVec_6( IRExpr_GSPTR(),
18754 opc4_and_imm, gstOffLe, gstOffRe, edxIN, eaxIN );
18756 IRTemp resT = newTemp(Ity_I64);
18757 IRDirty* d = unsafeIRDirty_1_N( resT, 0/*regparms*/, nm, fn, args );
18758 /* It's not really a dirty call, but we can't use the clean helper
18759 mechanism here for the very lame reason that we can't pass 2 x
18760 V128s by value to a helper. Hence this roundabout scheme. */
18761 d->nFxState = 2;
18762 vex_bzero(&d->fxState, sizeof(d->fxState));
18763 d->fxState[0].fx = Ifx_Read;
18764 d->fxState[0].offset = gstOffL;
18765 d->fxState[0].size = sizeof(U128);
18766 d->fxState[1].fx = Ifx_Read;
18767 d->fxState[1].offset = gstOffR;
18768 d->fxState[1].size = sizeof(U128);
18769 if (isxSTRM) {
18770 /* Declare that the helper writes XMM0. */
18771 d->nFxState = 3;
18772 d->fxState[2].fx = Ifx_Write;
18773 d->fxState[2].offset = ymmGuestRegOffset(0);
18774 d->fxState[2].size = sizeof(U128);
18777 stmt( IRStmt_Dirty(d) );
18779 /* Now resT[15:0] holds the new OSZACP values, so the condition
18780 codes must be updated. And for a xSTRI case, resT[31:16] holds
18781 the new ECX value, so stash that too. */
18782 if (!isxSTRM) {
18783 putIReg64(R_RCX, binop(Iop_And64,
18784 binop(Iop_Shr64, mkexpr(resT), mkU8(16)),
18785 mkU64(0xFFFF)));
18788 /* Zap the upper half of the dest reg as per AVX conventions. */
18789 if (isxSTRM && isAvx)
18790 putYMMRegLane128(/*YMM*/0, 1, mkV128(0));
18792 stmt( IRStmt_Put(
18793 OFFB_CC_DEP1,
18794 binop(Iop_And64, mkexpr(resT), mkU64(0xFFFF))
18796 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
18797 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
18798 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
18800 return delta;
18804 static IRTemp math_PINSRB_128 ( IRTemp v128, IRTemp u8, UInt imm8 )
18806 vassert(imm8 >= 0 && imm8 <= 15);
18808 // Create a V128 value which has the selected byte in the
18809 // specified lane, and zeroes everywhere else.
18810 IRTemp tmp128 = newTemp(Ity_V128);
18811 IRTemp halfshift = newTemp(Ity_I64);
18812 assign(halfshift, binop(Iop_Shl64,
18813 unop(Iop_8Uto64, mkexpr(u8)),
18814 mkU8(8 * (imm8 & 7))));
18815 if (imm8 < 8) {
18816 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift)));
18817 } else {
18818 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0)));
18821 UShort mask = ~(1 << imm8);
18822 IRTemp res = newTemp(Ity_V128);
18823 assign( res, binop(Iop_OrV128,
18824 mkexpr(tmp128),
18825 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) );
18826 return res;
18830 static IRTemp math_PINSRD_128 ( IRTemp v128, IRTemp u32, UInt imm8 )
18832 IRTemp z32 = newTemp(Ity_I32);
18833 assign(z32, mkU32(0));
18835 /* Surround u32 with zeroes as per imm, giving us something we can
18836 OR into a suitably masked-out v128.*/
18837 IRTemp withZs = newTemp(Ity_V128);
18838 UShort mask = 0;
18839 switch (imm8) {
18840 case 3: mask = 0x0FFF;
18841 assign(withZs, mkV128from32s(u32, z32, z32, z32));
18842 break;
18843 case 2: mask = 0xF0FF;
18844 assign(withZs, mkV128from32s(z32, u32, z32, z32));
18845 break;
18846 case 1: mask = 0xFF0F;
18847 assign(withZs, mkV128from32s(z32, z32, u32, z32));
18848 break;
18849 case 0: mask = 0xFFF0;
18850 assign(withZs, mkV128from32s(z32, z32, z32, u32));
18851 break;
18852 default: vassert(0);
18855 IRTemp res = newTemp(Ity_V128);
18856 assign(res, binop( Iop_OrV128,
18857 mkexpr(withZs),
18858 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) );
18859 return res;
18863 static IRTemp math_PINSRQ_128 ( IRTemp v128, IRTemp u64, UInt imm8 )
18865 /* Surround u64 with zeroes as per imm, giving us something we can
18866 OR into a suitably masked-out v128.*/
18867 IRTemp withZs = newTemp(Ity_V128);
18868 UShort mask = 0;
18869 if (imm8 == 0) {
18870 mask = 0xFF00;
18871 assign(withZs, binop(Iop_64HLtoV128, mkU64(0), mkexpr(u64)));
18872 } else {
18873 vassert(imm8 == 1);
18874 mask = 0x00FF;
18875 assign( withZs, binop(Iop_64HLtoV128, mkexpr(u64), mkU64(0)));
18878 IRTemp res = newTemp(Ity_V128);
18879 assign( res, binop( Iop_OrV128,
18880 mkexpr(withZs),
18881 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) );
18882 return res;
18886 static IRTemp math_INSERTPS ( IRTemp dstV, IRTemp toInsertD, UInt imm8 )
18888 const IRTemp inval = IRTemp_INVALID;
18889 IRTemp dstDs[4] = { inval, inval, inval, inval };
18890 breakupV128to32s( dstV, &dstDs[3], &dstDs[2], &dstDs[1], &dstDs[0] );
18892 vassert(imm8 <= 255);
18893 dstDs[(imm8 >> 4) & 3] = toInsertD; /* "imm8_count_d" */
18895 UInt imm8_zmask = (imm8 & 15);
18896 IRTemp zero_32 = newTemp(Ity_I32);
18897 assign( zero_32, mkU32(0) );
18898 IRTemp resV = newTemp(Ity_V128);
18899 assign( resV, mkV128from32s(
18900 ((imm8_zmask & 8) == 8) ? zero_32 : dstDs[3],
18901 ((imm8_zmask & 4) == 4) ? zero_32 : dstDs[2],
18902 ((imm8_zmask & 2) == 2) ? zero_32 : dstDs[1],
18903 ((imm8_zmask & 1) == 1) ? zero_32 : dstDs[0]) );
18904 return resV;
18908 static Long dis_PEXTRB_128_GtoE ( const VexAbiInfo* vbi, Prefix pfx,
18909 Long delta, Bool isAvx )
18911 IRTemp addr = IRTemp_INVALID;
18912 Int alen = 0;
18913 HChar dis_buf[50];
18914 IRTemp xmm_vec = newTemp(Ity_V128);
18915 IRTemp sel_lane = newTemp(Ity_I32);
18916 IRTemp shr_lane = newTemp(Ity_I32);
18917 const HChar* mbV = isAvx ? "v" : "";
18918 UChar modrm = getUChar(delta);
18919 IRTemp t3, t2, t1, t0;
18920 Int imm8;
18921 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
18922 t3 = t2 = t1 = t0 = IRTemp_INVALID;
18923 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
18925 if ( epartIsReg( modrm ) ) {
18926 imm8 = (Int)getUChar(delta+1);
18927 } else {
18928 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18929 imm8 = (Int)getUChar(delta+alen);
18931 switch ( (imm8 >> 2) & 3 ) {
18932 case 0: assign( sel_lane, mkexpr(t0) ); break;
18933 case 1: assign( sel_lane, mkexpr(t1) ); break;
18934 case 2: assign( sel_lane, mkexpr(t2) ); break;
18935 case 3: assign( sel_lane, mkexpr(t3) ); break;
18936 default: vassert(0);
18938 assign( shr_lane,
18939 binop( Iop_Shr32, mkexpr(sel_lane), mkU8(((imm8 & 3)*8)) ) );
18941 if ( epartIsReg( modrm ) ) {
18942 putIReg64( eregOfRexRM(pfx,modrm),
18943 unop( Iop_32Uto64,
18944 binop(Iop_And32, mkexpr(shr_lane), mkU32(255)) ) );
18945 delta += 1+1;
18946 DIP( "%spextrb $%d, %s,%s\n", mbV, imm8,
18947 nameXMMReg( gregOfRexRM(pfx, modrm) ),
18948 nameIReg64( eregOfRexRM(pfx, modrm) ) );
18949 } else {
18950 storeLE( mkexpr(addr), unop(Iop_32to8, mkexpr(shr_lane) ) );
18951 delta += alen+1;
18952 DIP( "%spextrb $%d,%s,%s\n", mbV,
18953 imm8, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
18956 return delta;
18960 static IRTemp math_DPPD_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 )
18962 vassert(imm8 < 256);
18963 UShort imm8_perms[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF };
18964 IRTemp and_vec = newTemp(Ity_V128);
18965 IRTemp sum_vec = newTemp(Ity_V128);
18966 IRTemp rm = newTemp(Ity_I32);
18967 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
18968 assign( and_vec, binop( Iop_AndV128,
18969 triop( Iop_Mul64Fx2,
18970 mkexpr(rm),
18971 mkexpr(dst_vec), mkexpr(src_vec) ),
18972 mkV128( imm8_perms[ ((imm8 >> 4) & 3) ] ) ) );
18974 assign( sum_vec, binop( Iop_Add64F0x2,
18975 binop( Iop_InterleaveHI64x2,
18976 mkexpr(and_vec), mkexpr(and_vec) ),
18977 binop( Iop_InterleaveLO64x2,
18978 mkexpr(and_vec), mkexpr(and_vec) ) ) );
18979 IRTemp res = newTemp(Ity_V128);
18980 assign(res, binop( Iop_AndV128,
18981 binop( Iop_InterleaveLO64x2,
18982 mkexpr(sum_vec), mkexpr(sum_vec) ),
18983 mkV128( imm8_perms[ (imm8 & 3) ] ) ) );
18984 return res;
18988 static IRTemp math_DPPS_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 )
18990 vassert(imm8 < 256);
18991 IRTemp tmp_prod_vec = newTemp(Ity_V128);
18992 IRTemp prod_vec = newTemp(Ity_V128);
18993 IRTemp sum_vec = newTemp(Ity_V128);
18994 IRTemp rm = newTemp(Ity_I32);
18995 IRTemp v3, v2, v1, v0;
18996 v3 = v2 = v1 = v0 = IRTemp_INVALID;
18997 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
18998 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
18999 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
19000 0xFFFF };
19002 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
19003 assign( tmp_prod_vec,
19004 binop( Iop_AndV128,
19005 triop( Iop_Mul32Fx4,
19006 mkexpr(rm), mkexpr(dst_vec), mkexpr(src_vec) ),
19007 mkV128( imm8_perms[((imm8 >> 4)& 15)] ) ) );
19008 breakupV128to32s( tmp_prod_vec, &v3, &v2, &v1, &v0 );
19009 assign( prod_vec, mkV128from32s( v3, v1, v2, v0 ) );
19011 assign( sum_vec, triop( Iop_Add32Fx4,
19012 mkexpr(rm),
19013 binop( Iop_InterleaveHI32x4,
19014 mkexpr(prod_vec), mkexpr(prod_vec) ),
19015 binop( Iop_InterleaveLO32x4,
19016 mkexpr(prod_vec), mkexpr(prod_vec) ) ) );
19018 IRTemp res = newTemp(Ity_V128);
19019 assign( res, binop( Iop_AndV128,
19020 triop( Iop_Add32Fx4,
19021 mkexpr(rm),
19022 binop( Iop_InterleaveHI32x4,
19023 mkexpr(sum_vec), mkexpr(sum_vec) ),
19024 binop( Iop_InterleaveLO32x4,
19025 mkexpr(sum_vec), mkexpr(sum_vec) ) ),
19026 mkV128( imm8_perms[ (imm8 & 15) ] ) ) );
19027 return res;
19031 static IRTemp math_MPSADBW_128 ( IRTemp dst_vec, IRTemp src_vec, UInt imm8 )
19033 /* Mask out bits of the operands we don't need. This isn't
19034 strictly necessary, but it does ensure Memcheck doesn't
19035 give us any false uninitialised value errors as a
19036 result. */
19037 UShort src_mask[4] = { 0x000F, 0x00F0, 0x0F00, 0xF000 };
19038 UShort dst_mask[2] = { 0x07FF, 0x7FF0 };
19040 IRTemp src_maskV = newTemp(Ity_V128);
19041 IRTemp dst_maskV = newTemp(Ity_V128);
19042 assign(src_maskV, mkV128( src_mask[ imm8 & 3 ] ));
19043 assign(dst_maskV, mkV128( dst_mask[ (imm8 >> 2) & 1 ] ));
19045 IRTemp src_masked = newTemp(Ity_V128);
19046 IRTemp dst_masked = newTemp(Ity_V128);
19047 assign(src_masked, binop(Iop_AndV128, mkexpr(src_vec), mkexpr(src_maskV)));
19048 assign(dst_masked, binop(Iop_AndV128, mkexpr(dst_vec), mkexpr(dst_maskV)));
19050 /* Generate 4 64 bit values that we can hand to a clean helper */
19051 IRTemp sHi = newTemp(Ity_I64);
19052 IRTemp sLo = newTemp(Ity_I64);
19053 assign( sHi, unop(Iop_V128HIto64, mkexpr(src_masked)) );
19054 assign( sLo, unop(Iop_V128to64, mkexpr(src_masked)) );
19056 IRTemp dHi = newTemp(Ity_I64);
19057 IRTemp dLo = newTemp(Ity_I64);
19058 assign( dHi, unop(Iop_V128HIto64, mkexpr(dst_masked)) );
19059 assign( dLo, unop(Iop_V128to64, mkexpr(dst_masked)) );
19061 /* Compute halves of the result separately */
19062 IRTemp resHi = newTemp(Ity_I64);
19063 IRTemp resLo = newTemp(Ity_I64);
19065 IRExpr** argsHi
19066 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo),
19067 mkU64( 0x80 | (imm8 & 7) ));
19068 IRExpr** argsLo
19069 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo),
19070 mkU64( 0x00 | (imm8 & 7) ));
19072 assign(resHi, mkIRExprCCall( Ity_I64, 0/*regparm*/,
19073 "amd64g_calc_mpsadbw",
19074 &amd64g_calc_mpsadbw, argsHi ));
19075 assign(resLo, mkIRExprCCall( Ity_I64, 0/*regparm*/,
19076 "amd64g_calc_mpsadbw",
19077 &amd64g_calc_mpsadbw, argsLo ));
19079 IRTemp res = newTemp(Ity_V128);
19080 assign(res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo)));
19081 return res;
19084 static Long dis_EXTRACTPS ( const VexAbiInfo* vbi, Prefix pfx,
19085 Long delta, Bool isAvx )
19087 IRTemp addr = IRTemp_INVALID;
19088 Int alen = 0;
19089 HChar dis_buf[50];
19090 UChar modrm = getUChar(delta);
19091 Int imm8_10;
19092 IRTemp xmm_vec = newTemp(Ity_V128);
19093 IRTemp src_dword = newTemp(Ity_I32);
19094 UInt rG = gregOfRexRM(pfx,modrm);
19095 IRTemp t3, t2, t1, t0;
19096 t3 = t2 = t1 = t0 = IRTemp_INVALID;
19098 assign( xmm_vec, getXMMReg( rG ) );
19099 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
19101 if ( epartIsReg( modrm ) ) {
19102 imm8_10 = (Int)(getUChar(delta+1) & 3);
19103 } else {
19104 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19105 imm8_10 = (Int)(getUChar(delta+alen) & 3);
19108 switch ( imm8_10 ) {
19109 case 0: assign( src_dword, mkexpr(t0) ); break;
19110 case 1: assign( src_dword, mkexpr(t1) ); break;
19111 case 2: assign( src_dword, mkexpr(t2) ); break;
19112 case 3: assign( src_dword, mkexpr(t3) ); break;
19113 default: vassert(0);
19116 if ( epartIsReg( modrm ) ) {
19117 UInt rE = eregOfRexRM(pfx,modrm);
19118 putIReg32( rE, mkexpr(src_dword) );
19119 delta += 1+1;
19120 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10,
19121 nameXMMReg( rG ), nameIReg32( rE ) );
19122 } else {
19123 storeLE( mkexpr(addr), mkexpr(src_dword) );
19124 delta += alen+1;
19125 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10,
19126 nameXMMReg( rG ), dis_buf );
19129 return delta;
19133 static IRTemp math_PCLMULQDQ( IRTemp dV, IRTemp sV, UInt imm8 )
19135 IRTemp t0 = newTemp(Ity_I64);
19136 IRTemp t1 = newTemp(Ity_I64);
19137 assign(t0, unop((imm8&1)? Iop_V128HIto64 : Iop_V128to64,
19138 mkexpr(dV)));
19139 assign(t1, unop((imm8&16) ? Iop_V128HIto64 : Iop_V128to64,
19140 mkexpr(sV)));
19142 IRTemp t2 = newTemp(Ity_I64);
19143 IRTemp t3 = newTemp(Ity_I64);
19145 IRExpr** args;
19147 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(0));
19148 assign(t2, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul",
19149 &amd64g_calculate_pclmul, args));
19150 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(1));
19151 assign(t3, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul",
19152 &amd64g_calculate_pclmul, args));
19154 IRTemp res = newTemp(Ity_V128);
19155 assign(res, binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)));
19156 return res;
19160 __attribute__((noinline))
19161 static
19162 Long dis_ESC_0F3A__SSE4 ( Bool* decode_OK,
19163 const VexAbiInfo* vbi,
19164 Prefix pfx, Int sz, Long deltaIN )
19166 IRTemp addr = IRTemp_INVALID;
19167 UChar modrm = 0;
19168 Int alen = 0;
19169 HChar dis_buf[50];
19171 *decode_OK = False;
19173 Long delta = deltaIN;
19174 UChar opc = getUChar(delta);
19175 delta++;
19176 switch (opc) {
19178 case 0x08:
19179 /* 66 0F 3A 08 /r ib = ROUNDPS imm8, xmm2/m128, xmm1 */
19180 if (have66noF2noF3(pfx) && sz == 2) {
19182 IRTemp src0 = newTemp(Ity_F32);
19183 IRTemp src1 = newTemp(Ity_F32);
19184 IRTemp src2 = newTemp(Ity_F32);
19185 IRTemp src3 = newTemp(Ity_F32);
19186 IRTemp res0 = newTemp(Ity_F32);
19187 IRTemp res1 = newTemp(Ity_F32);
19188 IRTemp res2 = newTemp(Ity_F32);
19189 IRTemp res3 = newTemp(Ity_F32);
19190 IRTemp rm = newTemp(Ity_I32);
19191 Int imm = 0;
19193 modrm = getUChar(delta);
19195 if (epartIsReg(modrm)) {
19196 assign( src0,
19197 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
19198 assign( src1,
19199 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 1 ) );
19200 assign( src2,
19201 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 2 ) );
19202 assign( src3,
19203 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 3 ) );
19204 imm = getUChar(delta+1);
19205 if (imm & ~15) goto decode_failure;
19206 delta += 1+1;
19207 DIP( "roundps $%d,%s,%s\n",
19208 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
19209 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19210 } else {
19211 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19212 gen_SEGV_if_not_16_aligned(addr);
19213 assign( src0, loadLE(Ity_F32,
19214 binop(Iop_Add64, mkexpr(addr), mkU64(0) )));
19215 assign( src1, loadLE(Ity_F32,
19216 binop(Iop_Add64, mkexpr(addr), mkU64(4) )));
19217 assign( src2, loadLE(Ity_F32,
19218 binop(Iop_Add64, mkexpr(addr), mkU64(8) )));
19219 assign( src3, loadLE(Ity_F32,
19220 binop(Iop_Add64, mkexpr(addr), mkU64(12) )));
19221 imm = getUChar(delta+alen);
19222 if (imm & ~15) goto decode_failure;
19223 delta += alen+1;
19224 DIP( "roundps $%d,%s,%s\n",
19225 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19228 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19229 that encoding is the same as the encoding for IRRoundingMode,
19230 we can use that value directly in the IR as a rounding
19231 mode. */
19232 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
19234 assign(res0, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src0)) );
19235 assign(res1, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src1)) );
19236 assign(res2, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src2)) );
19237 assign(res3, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src3)) );
19239 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) );
19240 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) );
19241 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 2, mkexpr(res2) );
19242 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 3, mkexpr(res3) );
19244 goto decode_success;
19246 break;
19248 case 0x09:
19249 /* 66 0F 3A 09 /r ib = ROUNDPD imm8, xmm2/m128, xmm1 */
19250 if (have66noF2noF3(pfx) && sz == 2) {
19252 IRTemp src0 = newTemp(Ity_F64);
19253 IRTemp src1 = newTemp(Ity_F64);
19254 IRTemp res0 = newTemp(Ity_F64);
19255 IRTemp res1 = newTemp(Ity_F64);
19256 IRTemp rm = newTemp(Ity_I32);
19257 Int imm = 0;
19259 modrm = getUChar(delta);
19261 if (epartIsReg(modrm)) {
19262 assign( src0,
19263 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) );
19264 assign( src1,
19265 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 1 ) );
19266 imm = getUChar(delta+1);
19267 if (imm & ~15) goto decode_failure;
19268 delta += 1+1;
19269 DIP( "roundpd $%d,%s,%s\n",
19270 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
19271 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19272 } else {
19273 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19274 gen_SEGV_if_not_16_aligned(addr);
19275 assign( src0, loadLE(Ity_F64,
19276 binop(Iop_Add64, mkexpr(addr), mkU64(0) )));
19277 assign( src1, loadLE(Ity_F64,
19278 binop(Iop_Add64, mkexpr(addr), mkU64(8) )));
19279 imm = getUChar(delta+alen);
19280 if (imm & ~15) goto decode_failure;
19281 delta += alen+1;
19282 DIP( "roundpd $%d,%s,%s\n",
19283 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19286 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19287 that encoding is the same as the encoding for IRRoundingMode,
19288 we can use that value directly in the IR as a rounding
19289 mode. */
19290 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
19292 assign(res0, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src0)) );
19293 assign(res1, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src1)) );
19295 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) );
19296 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) );
19298 goto decode_success;
19300 break;
19302 case 0x0A:
19303 case 0x0B:
19304 /* 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
19305 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
19307 if (have66noF2noF3(pfx) && sz == 2) {
19309 Bool isD = opc == 0x0B;
19310 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
19311 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
19312 Int imm = 0;
19314 modrm = getUChar(delta);
19316 if (epartIsReg(modrm)) {
19317 assign( src,
19318 isD ? getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 )
19319 : getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
19320 imm = getUChar(delta+1);
19321 if (imm & ~15) goto decode_failure;
19322 delta += 1+1;
19323 DIP( "rounds%c $%d,%s,%s\n",
19324 isD ? 'd' : 's',
19325 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
19326 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19327 } else {
19328 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19329 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
19330 imm = getUChar(delta+alen);
19331 if (imm & ~15) goto decode_failure;
19332 delta += alen+1;
19333 DIP( "rounds%c $%d,%s,%s\n",
19334 isD ? 'd' : 's',
19335 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19338 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19339 that encoding is the same as the encoding for IRRoundingMode,
19340 we can use that value directly in the IR as a rounding
19341 mode. */
19342 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
19343 (imm & 4) ? get_sse_roundingmode()
19344 : mkU32(imm & 3),
19345 mkexpr(src)) );
19347 if (isD)
19348 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
19349 else
19350 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
19352 goto decode_success;
19354 break;
19356 case 0x0C:
19357 /* 66 0F 3A 0C /r ib = BLENDPS xmm1, xmm2/m128, imm8
19358 Blend Packed Single Precision Floating-Point Values (XMM) */
19359 if (have66noF2noF3(pfx) && sz == 2) {
19361 Int imm8;
19362 IRTemp dst_vec = newTemp(Ity_V128);
19363 IRTemp src_vec = newTemp(Ity_V128);
19365 modrm = getUChar(delta);
19367 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
19369 if ( epartIsReg( modrm ) ) {
19370 imm8 = (Int)getUChar(delta+1);
19371 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
19372 delta += 1+1;
19373 DIP( "blendps $%d, %s,%s\n", imm8,
19374 nameXMMReg( eregOfRexRM(pfx, modrm) ),
19375 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19376 } else {
19377 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19378 1/* imm8 is 1 byte after the amode */ );
19379 gen_SEGV_if_not_16_aligned( addr );
19380 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19381 imm8 = (Int)getUChar(delta+alen);
19382 delta += alen+1;
19383 DIP( "blendps $%d, %s,%s\n",
19384 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19387 putXMMReg( gregOfRexRM(pfx, modrm),
19388 mkexpr( math_BLENDPS_128( src_vec, dst_vec, imm8) ) );
19389 goto decode_success;
19391 break;
19393 case 0x0D:
19394 /* 66 0F 3A 0D /r ib = BLENDPD xmm1, xmm2/m128, imm8
19395 Blend Packed Double Precision Floating-Point Values (XMM) */
19396 if (have66noF2noF3(pfx) && sz == 2) {
19398 Int imm8;
19399 IRTemp dst_vec = newTemp(Ity_V128);
19400 IRTemp src_vec = newTemp(Ity_V128);
19402 modrm = getUChar(delta);
19403 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
19405 if ( epartIsReg( modrm ) ) {
19406 imm8 = (Int)getUChar(delta+1);
19407 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
19408 delta += 1+1;
19409 DIP( "blendpd $%d, %s,%s\n", imm8,
19410 nameXMMReg( eregOfRexRM(pfx, modrm) ),
19411 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19412 } else {
19413 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19414 1/* imm8 is 1 byte after the amode */ );
19415 gen_SEGV_if_not_16_aligned( addr );
19416 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19417 imm8 = (Int)getUChar(delta+alen);
19418 delta += alen+1;
19419 DIP( "blendpd $%d, %s,%s\n",
19420 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19423 putXMMReg( gregOfRexRM(pfx, modrm),
19424 mkexpr( math_BLENDPD_128( src_vec, dst_vec, imm8) ) );
19425 goto decode_success;
19427 break;
19429 case 0x0E:
19430 /* 66 0F 3A 0E /r ib = PBLENDW xmm1, xmm2/m128, imm8
19431 Blend Packed Words (XMM) */
19432 if (have66noF2noF3(pfx) && sz == 2) {
19434 Int imm8;
19435 IRTemp dst_vec = newTemp(Ity_V128);
19436 IRTemp src_vec = newTemp(Ity_V128);
19438 modrm = getUChar(delta);
19440 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
19442 if ( epartIsReg( modrm ) ) {
19443 imm8 = (Int)getUChar(delta+1);
19444 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
19445 delta += 1+1;
19446 DIP( "pblendw $%d, %s,%s\n", imm8,
19447 nameXMMReg( eregOfRexRM(pfx, modrm) ),
19448 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19449 } else {
19450 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19451 1/* imm8 is 1 byte after the amode */ );
19452 gen_SEGV_if_not_16_aligned( addr );
19453 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19454 imm8 = (Int)getUChar(delta+alen);
19455 delta += alen+1;
19456 DIP( "pblendw $%d, %s,%s\n",
19457 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19460 putXMMReg( gregOfRexRM(pfx, modrm),
19461 mkexpr( math_PBLENDW_128( src_vec, dst_vec, imm8) ) );
19462 goto decode_success;
19464 break;
19466 case 0x14:
19467 /* 66 0F 3A 14 /r ib = PEXTRB r/m16, xmm, imm8
19468 Extract Byte from xmm, store in mem or zero-extend + store in gen.reg.
19469 (XMM) */
19470 if (have66noF2noF3(pfx) && sz == 2) {
19471 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ );
19472 goto decode_success;
19474 break;
19476 case 0x15:
19477 /* 66 0F 3A 15 /r ib = PEXTRW r/m16, xmm, imm8
19478 Extract Word from xmm, store in mem or zero-extend + store in gen.reg.
19479 (XMM) */
19480 if (have66noF2noF3(pfx) && sz == 2) {
19481 delta = dis_PEXTRW( vbi, pfx, delta, False/*!isAvx*/ );
19482 goto decode_success;
19484 break;
19486 case 0x16:
19487 /* 66 no-REX.W 0F 3A 16 /r ib = PEXTRD reg/mem32, xmm2, imm8
19488 Extract Doubleword int from xmm reg and store in gen.reg or mem. (XMM)
19489 Note that this insn has the same opcodes as PEXTRQ, but
19490 here the REX.W bit is _not_ present */
19491 if (have66noF2noF3(pfx)
19492 && sz == 2 /* REX.W is _not_ present */) {
19493 delta = dis_PEXTRD( vbi, pfx, delta, False/*!isAvx*/ );
19494 goto decode_success;
19496 /* 66 REX.W 0F 3A 16 /r ib = PEXTRQ reg/mem64, xmm2, imm8
19497 Extract Quadword int from xmm reg and store in gen.reg or mem. (XMM)
19498 Note that this insn has the same opcodes as PEXTRD, but
19499 here the REX.W bit is present */
19500 if (have66noF2noF3(pfx)
19501 && sz == 8 /* REX.W is present */) {
19502 delta = dis_PEXTRQ( vbi, pfx, delta, False/*!isAvx*/);
19503 goto decode_success;
19505 break;
19507 case 0x17:
19508 /* 66 0F 3A 17 /r ib = EXTRACTPS reg/mem32, xmm2, imm8 Extract
19509 float from xmm reg and store in gen.reg or mem. This is
19510 identical to PEXTRD, except that REX.W appears to be ignored.
19512 if (have66noF2noF3(pfx)
19513 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
19514 delta = dis_EXTRACTPS( vbi, pfx, delta, False/*!isAvx*/ );
19515 goto decode_success;
19517 break;
19519 case 0x20:
19520 /* 66 0F 3A 20 /r ib = PINSRB xmm1, r32/m8, imm8
19521 Extract byte from r32/m8 and insert into xmm1 */
19522 if (have66noF2noF3(pfx) && sz == 2) {
19523 Int imm8;
19524 IRTemp new8 = newTemp(Ity_I8);
19525 modrm = getUChar(delta);
19526 UInt rG = gregOfRexRM(pfx, modrm);
19527 if ( epartIsReg( modrm ) ) {
19528 UInt rE = eregOfRexRM(pfx,modrm);
19529 imm8 = (Int)(getUChar(delta+1) & 0xF);
19530 assign( new8, unop(Iop_32to8, getIReg32(rE)) );
19531 delta += 1+1;
19532 DIP( "pinsrb $%d,%s,%s\n", imm8,
19533 nameIReg32(rE), nameXMMReg(rG) );
19534 } else {
19535 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19536 imm8 = (Int)(getUChar(delta+alen) & 0xF);
19537 assign( new8, loadLE( Ity_I8, mkexpr(addr) ) );
19538 delta += alen+1;
19539 DIP( "pinsrb $%d,%s,%s\n",
19540 imm8, dis_buf, nameXMMReg(rG) );
19542 IRTemp src_vec = newTemp(Ity_V128);
19543 assign(src_vec, getXMMReg( gregOfRexRM(pfx, modrm) ));
19544 IRTemp res = math_PINSRB_128( src_vec, new8, imm8 );
19545 putXMMReg( rG, mkexpr(res) );
19546 goto decode_success;
19548 break;
19550 case 0x21:
19551 /* 66 0F 3A 21 /r ib = INSERTPS imm8, xmm2/m32, xmm1
19552 Insert Packed Single Precision Floating-Point Value (XMM) */
19553 if (have66noF2noF3(pfx) && sz == 2) {
19554 UInt imm8;
19555 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */
19556 const IRTemp inval = IRTemp_INVALID;
19558 modrm = getUChar(delta);
19559 UInt rG = gregOfRexRM(pfx, modrm);
19561 if ( epartIsReg( modrm ) ) {
19562 UInt rE = eregOfRexRM(pfx, modrm);
19563 IRTemp vE = newTemp(Ity_V128);
19564 assign( vE, getXMMReg(rE) );
19565 IRTemp dsE[4] = { inval, inval, inval, inval };
19566 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] );
19567 imm8 = getUChar(delta+1);
19568 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */
19569 delta += 1+1;
19570 DIP( "insertps $%u, %s,%s\n",
19571 imm8, nameXMMReg(rE), nameXMMReg(rG) );
19572 } else {
19573 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19574 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) );
19575 imm8 = getUChar(delta+alen);
19576 delta += alen+1;
19577 DIP( "insertps $%u, %s,%s\n",
19578 imm8, dis_buf, nameXMMReg(rG) );
19581 IRTemp vG = newTemp(Ity_V128);
19582 assign( vG, getXMMReg(rG) );
19584 putXMMReg( rG, mkexpr(math_INSERTPS( vG, d2ins, imm8 )) );
19585 goto decode_success;
19587 break;
19589 case 0x22:
19590 /* 66 no-REX.W 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8
19591 Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */
19592 if (have66noF2noF3(pfx)
19593 && sz == 2 /* REX.W is NOT present */) {
19594 Int imm8_10;
19595 IRTemp src_u32 = newTemp(Ity_I32);
19596 modrm = getUChar(delta);
19597 UInt rG = gregOfRexRM(pfx, modrm);
19599 if ( epartIsReg( modrm ) ) {
19600 UInt rE = eregOfRexRM(pfx,modrm);
19601 imm8_10 = (Int)(getUChar(delta+1) & 3);
19602 assign( src_u32, getIReg32( rE ) );
19603 delta += 1+1;
19604 DIP( "pinsrd $%d, %s,%s\n",
19605 imm8_10, nameIReg32(rE), nameXMMReg(rG) );
19606 } else {
19607 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19608 imm8_10 = (Int)(getUChar(delta+alen) & 3);
19609 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) );
19610 delta += alen+1;
19611 DIP( "pinsrd $%d, %s,%s\n",
19612 imm8_10, dis_buf, nameXMMReg(rG) );
19615 IRTemp src_vec = newTemp(Ity_V128);
19616 assign(src_vec, getXMMReg( rG ));
19617 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 );
19618 putXMMReg( rG, mkexpr(res_vec) );
19619 goto decode_success;
19621 /* 66 REX.W 0F 3A 22 /r ib = PINSRQ xmm1, r/m64, imm8
19622 Extract Quadword int from gen.reg/mem64 and insert into xmm1 */
19623 if (have66noF2noF3(pfx)
19624 && sz == 8 /* REX.W is present */) {
19625 Int imm8_0;
19626 IRTemp src_u64 = newTemp(Ity_I64);
19627 modrm = getUChar(delta);
19628 UInt rG = gregOfRexRM(pfx, modrm);
19630 if ( epartIsReg( modrm ) ) {
19631 UInt rE = eregOfRexRM(pfx,modrm);
19632 imm8_0 = (Int)(getUChar(delta+1) & 1);
19633 assign( src_u64, getIReg64( rE ) );
19634 delta += 1+1;
19635 DIP( "pinsrq $%d, %s,%s\n",
19636 imm8_0, nameIReg64(rE), nameXMMReg(rG) );
19637 } else {
19638 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19639 imm8_0 = (Int)(getUChar(delta+alen) & 1);
19640 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) );
19641 delta += alen+1;
19642 DIP( "pinsrq $%d, %s,%s\n",
19643 imm8_0, dis_buf, nameXMMReg(rG) );
19646 IRTemp src_vec = newTemp(Ity_V128);
19647 assign(src_vec, getXMMReg( rG ));
19648 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 );
19649 putXMMReg( rG, mkexpr(res_vec) );
19650 goto decode_success;
19652 break;
19654 case 0x40:
19655 /* 66 0F 3A 40 /r ib = DPPS xmm1, xmm2/m128, imm8
19656 Dot Product of Packed Single Precision Floating-Point Values (XMM) */
19657 if (have66noF2noF3(pfx) && sz == 2) {
19658 modrm = getUChar(delta);
19659 Int imm8;
19660 IRTemp src_vec = newTemp(Ity_V128);
19661 IRTemp dst_vec = newTemp(Ity_V128);
19662 UInt rG = gregOfRexRM(pfx, modrm);
19663 assign( dst_vec, getXMMReg( rG ) );
19664 if ( epartIsReg( modrm ) ) {
19665 UInt rE = eregOfRexRM(pfx, modrm);
19666 imm8 = (Int)getUChar(delta+1);
19667 assign( src_vec, getXMMReg(rE) );
19668 delta += 1+1;
19669 DIP( "dpps $%d, %s,%s\n",
19670 imm8, nameXMMReg(rE), nameXMMReg(rG) );
19671 } else {
19672 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19673 1/* imm8 is 1 byte after the amode */ );
19674 gen_SEGV_if_not_16_aligned( addr );
19675 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19676 imm8 = (Int)getUChar(delta+alen);
19677 delta += alen+1;
19678 DIP( "dpps $%d, %s,%s\n",
19679 imm8, dis_buf, nameXMMReg(rG) );
19681 IRTemp res = math_DPPS_128( src_vec, dst_vec, imm8 );
19682 putXMMReg( rG, mkexpr(res) );
19683 goto decode_success;
19685 break;
19687 case 0x41:
19688 /* 66 0F 3A 41 /r ib = DPPD xmm1, xmm2/m128, imm8
19689 Dot Product of Packed Double Precision Floating-Point Values (XMM) */
19690 if (have66noF2noF3(pfx) && sz == 2) {
19691 modrm = getUChar(delta);
19692 Int imm8;
19693 IRTemp src_vec = newTemp(Ity_V128);
19694 IRTemp dst_vec = newTemp(Ity_V128);
19695 UInt rG = gregOfRexRM(pfx, modrm);
19696 assign( dst_vec, getXMMReg( rG ) );
19697 if ( epartIsReg( modrm ) ) {
19698 UInt rE = eregOfRexRM(pfx, modrm);
19699 imm8 = (Int)getUChar(delta+1);
19700 assign( src_vec, getXMMReg(rE) );
19701 delta += 1+1;
19702 DIP( "dppd $%d, %s,%s\n",
19703 imm8, nameXMMReg(rE), nameXMMReg(rG) );
19704 } else {
19705 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19706 1/* imm8 is 1 byte after the amode */ );
19707 gen_SEGV_if_not_16_aligned( addr );
19708 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19709 imm8 = (Int)getUChar(delta+alen);
19710 delta += alen+1;
19711 DIP( "dppd $%d, %s,%s\n",
19712 imm8, dis_buf, nameXMMReg(rG) );
19714 IRTemp res = math_DPPD_128( src_vec, dst_vec, imm8 );
19715 putXMMReg( rG, mkexpr(res) );
19716 goto decode_success;
19718 break;
19720 case 0x42:
19721 /* 66 0F 3A 42 /r ib = MPSADBW xmm1, xmm2/m128, imm8
19722 Multiple Packed Sums of Absolule Difference (XMM) */
19723 if (have66noF2noF3(pfx) && sz == 2) {
19724 Int imm8;
19725 IRTemp src_vec = newTemp(Ity_V128);
19726 IRTemp dst_vec = newTemp(Ity_V128);
19727 modrm = getUChar(delta);
19728 UInt rG = gregOfRexRM(pfx, modrm);
19730 assign( dst_vec, getXMMReg(rG) );
19732 if ( epartIsReg( modrm ) ) {
19733 UInt rE = eregOfRexRM(pfx, modrm);
19735 imm8 = (Int)getUChar(delta+1);
19736 assign( src_vec, getXMMReg(rE) );
19737 delta += 1+1;
19738 DIP( "mpsadbw $%d, %s,%s\n", imm8,
19739 nameXMMReg(rE), nameXMMReg(rG) );
19740 } else {
19741 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19742 1/* imm8 is 1 byte after the amode */ );
19743 gen_SEGV_if_not_16_aligned( addr );
19744 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19745 imm8 = (Int)getUChar(delta+alen);
19746 delta += alen+1;
19747 DIP( "mpsadbw $%d, %s,%s\n", imm8, dis_buf, nameXMMReg(rG) );
19750 putXMMReg( rG, mkexpr( math_MPSADBW_128(dst_vec, src_vec, imm8) ) );
19751 goto decode_success;
19753 break;
19755 case 0x44:
19756 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
19757 * Carry-less multiplication of selected XMM quadwords into XMM
19758 * registers (a.k.a multiplication of polynomials over GF(2))
19760 if (have66noF2noF3(pfx) && sz == 2) {
19762 Int imm8;
19763 IRTemp svec = newTemp(Ity_V128);
19764 IRTemp dvec = newTemp(Ity_V128);
19765 modrm = getUChar(delta);
19766 UInt rG = gregOfRexRM(pfx, modrm);
19768 assign( dvec, getXMMReg(rG) );
19770 if ( epartIsReg( modrm ) ) {
19771 UInt rE = eregOfRexRM(pfx, modrm);
19772 imm8 = (Int)getUChar(delta+1);
19773 assign( svec, getXMMReg(rE) );
19774 delta += 1+1;
19775 DIP( "pclmulqdq $%d, %s,%s\n", imm8,
19776 nameXMMReg(rE), nameXMMReg(rG) );
19777 } else {
19778 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19779 1/* imm8 is 1 byte after the amode */ );
19780 gen_SEGV_if_not_16_aligned( addr );
19781 assign( svec, loadLE( Ity_V128, mkexpr(addr) ) );
19782 imm8 = (Int)getUChar(delta+alen);
19783 delta += alen+1;
19784 DIP( "pclmulqdq $%d, %s,%s\n",
19785 imm8, dis_buf, nameXMMReg(rG) );
19788 putXMMReg( rG, mkexpr( math_PCLMULQDQ(dvec, svec, imm8) ) );
19789 goto decode_success;
19791 break;
19793 case 0x60:
19794 case 0x61:
19795 case 0x62:
19796 case 0x63:
19797 /* 66 0F 3A 63 /r ib = PCMPISTRI imm8, xmm2/m128, xmm1
19798 66 0F 3A 62 /r ib = PCMPISTRM imm8, xmm2/m128, xmm1
19799 66 0F 3A 61 /r ib = PCMPESTRI imm8, xmm2/m128, xmm1
19800 66 0F 3A 60 /r ib = PCMPESTRM imm8, xmm2/m128, xmm1
19801 (selected special cases that actually occur in glibc,
19802 not by any means a complete implementation.)
19804 if (have66noF2noF3(pfx) && sz == 2) {
19805 Long delta0 = delta;
19806 delta = dis_PCMPxSTRx( vbi, pfx, delta, False/*!isAvx*/, opc );
19807 if (delta > delta0) goto decode_success;
19808 /* else fall though; dis_PCMPxSTRx failed to decode it */
19810 break;
19812 case 0xDF:
19813 /* 66 0F 3A DF /r ib = AESKEYGENASSIST imm8, xmm2/m128, xmm1 */
19814 if (have66noF2noF3(pfx) && sz == 2) {
19815 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, False/*!isAvx*/ );
19816 goto decode_success;
19818 break;
19820 default:
19821 break;
19825 decode_failure:
19826 *decode_OK = False;
19827 return deltaIN;
19829 decode_success:
19830 *decode_OK = True;
19831 return delta;
19835 /*------------------------------------------------------------*/
19836 /*--- ---*/
19837 /*--- Top-level post-escape decoders: dis_ESC_NONE ---*/
19838 /*--- ---*/
19839 /*------------------------------------------------------------*/
19841 __attribute__((noinline))
19842 static
19843 Long dis_ESC_NONE (
19844 /*MB_OUT*/DisResult* dres,
19845 /*MB_OUT*/Bool* expect_CAS,
19846 const VexArchInfo* archinfo,
19847 const VexAbiInfo* vbi,
19848 Prefix pfx, Int sz, Long deltaIN
19851 Long d64 = 0;
19852 UChar abyte = 0;
19853 IRTemp addr = IRTemp_INVALID;
19854 IRTemp t1 = IRTemp_INVALID;
19855 IRTemp t2 = IRTemp_INVALID;
19856 IRTemp t3 = IRTemp_INVALID;
19857 IRTemp t4 = IRTemp_INVALID;
19858 IRTemp t5 = IRTemp_INVALID;
19859 IRType ty = Ity_INVALID;
19860 UChar modrm = 0;
19861 Int am_sz = 0;
19862 Int d_sz = 0;
19863 Int alen = 0;
19864 HChar dis_buf[50];
19866 Long delta = deltaIN;
19867 UChar opc = getUChar(delta); delta++;
19869 /* delta now points at the modrm byte. In most of the cases that
19870 follow, neither the F2 nor F3 prefixes are allowed. However,
19871 for some basic arithmetic operations we have to allow F2/XACQ or
19872 F3/XREL in the case where the destination is memory and the LOCK
19873 prefix is also present. Do this check by looking at the modrm
19874 byte but not advancing delta over it. */
19875 /* By default, F2 and F3 are not allowed, so let's start off with
19876 that setting. */
19877 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
19878 { UChar tmp_modrm = getUChar(delta);
19879 switch (opc) {
19880 case 0x00: /* ADD Gb,Eb */ case 0x01: /* ADD Gv,Ev */
19881 case 0x08: /* OR Gb,Eb */ case 0x09: /* OR Gv,Ev */
19882 case 0x10: /* ADC Gb,Eb */ case 0x11: /* ADC Gv,Ev */
19883 case 0x18: /* SBB Gb,Eb */ case 0x19: /* SBB Gv,Ev */
19884 case 0x20: /* AND Gb,Eb */ case 0x21: /* AND Gv,Ev */
19885 case 0x28: /* SUB Gb,Eb */ case 0x29: /* SUB Gv,Ev */
19886 case 0x30: /* XOR Gb,Eb */ case 0x31: /* XOR Gv,Ev */
19887 if (!epartIsReg(tmp_modrm)
19888 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
19889 /* dst is mem, and we have F2 or F3 but not both */
19890 validF2orF3 = True;
19892 break;
19893 default:
19894 break;
19898 /* Now, in the switch below, for the opc values examined by the
19899 switch above, use validF2orF3 rather than looking at pfx
19900 directly. */
19901 switch (opc) {
19903 case 0x00: /* ADD Gb,Eb */
19904 if (!validF2orF3) goto decode_failure;
19905 delta = dis_op2_G_E ( vbi, pfx, Iop_Add8, WithFlagNone, True, 1, delta, "add" );
19906 return delta;
19907 case 0x01: /* ADD Gv,Ev */
19908 if (!validF2orF3) goto decode_failure;
19909 delta = dis_op2_G_E ( vbi, pfx, Iop_Add8, WithFlagNone, True, sz, delta, "add" );
19910 return delta;
19912 case 0x02: /* ADD Eb,Gb */
19913 if (haveF2orF3(pfx)) goto decode_failure;
19914 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagNone, True, 1, delta, "add" );
19915 return delta;
19916 case 0x03: /* ADD Ev,Gv */
19917 if (haveF2orF3(pfx)) goto decode_failure;
19918 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagNone, True, sz, delta, "add" );
19919 return delta;
19921 case 0x04: /* ADD Ib, AL */
19922 if (haveF2orF3(pfx)) goto decode_failure;
19923 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" );
19924 return delta;
19925 case 0x05: /* ADD Iv, eAX */
19926 if (haveF2orF3(pfx)) goto decode_failure;
19927 delta = dis_op_imm_A(sz, False, Iop_Add8, True, delta, "add" );
19928 return delta;
19930 case 0x08: /* OR Gb,Eb */
19931 if (!validF2orF3) goto decode_failure;
19932 delta = dis_op2_G_E ( vbi, pfx, Iop_Or8, WithFlagNone, True, 1, delta, "or" );
19933 return delta;
19934 case 0x09: /* OR Gv,Ev */
19935 if (!validF2orF3) goto decode_failure;
19936 delta = dis_op2_G_E ( vbi, pfx, Iop_Or8, WithFlagNone, True, sz, delta, "or" );
19937 return delta;
19939 case 0x0A: /* OR Eb,Gb */
19940 if (haveF2orF3(pfx)) goto decode_failure;
19941 delta = dis_op2_E_G ( vbi, pfx, Iop_Or8, WithFlagNone, True, 1, delta, "or" );
19942 return delta;
19943 case 0x0B: /* OR Ev,Gv */
19944 if (haveF2orF3(pfx)) goto decode_failure;
19945 delta = dis_op2_E_G ( vbi, pfx, Iop_Or8, WithFlagNone, True, sz, delta, "or" );
19946 return delta;
19948 case 0x0C: /* OR Ib, AL */
19949 if (haveF2orF3(pfx)) goto decode_failure;
19950 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" );
19951 return delta;
19952 case 0x0D: /* OR Iv, eAX */
19953 if (haveF2orF3(pfx)) goto decode_failure;
19954 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" );
19955 return delta;
19957 case 0x10: /* ADC Gb,Eb */
19958 if (!validF2orF3) goto decode_failure;
19959 delta = dis_op2_G_E ( vbi, pfx, Iop_Add8, WithFlagCarry, True, 1, delta, "adc" );
19960 return delta;
19961 case 0x11: /* ADC Gv,Ev */
19962 if (!validF2orF3) goto decode_failure;
19963 delta = dis_op2_G_E ( vbi, pfx, Iop_Add8, WithFlagCarry, True, sz, delta, "adc" );
19964 return delta;
19966 case 0x12: /* ADC Eb,Gb */
19967 if (haveF2orF3(pfx)) goto decode_failure;
19968 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagCarry, True, 1, delta, "adc" );
19969 return delta;
19970 case 0x13: /* ADC Ev,Gv */
19971 if (haveF2orF3(pfx)) goto decode_failure;
19972 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagCarry, True, sz, delta, "adc" );
19973 return delta;
19975 case 0x14: /* ADC Ib, AL */
19976 if (haveF2orF3(pfx)) goto decode_failure;
19977 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" );
19978 return delta;
19979 case 0x15: /* ADC Iv, eAX */
19980 if (haveF2orF3(pfx)) goto decode_failure;
19981 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" );
19982 return delta;
19984 case 0x18: /* SBB Gb,Eb */
19985 if (!validF2orF3) goto decode_failure;
19986 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagCarry, True, 1, delta, "sbb" );
19987 return delta;
19988 case 0x19: /* SBB Gv,Ev */
19989 if (!validF2orF3) goto decode_failure;
19990 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagCarry, True, sz, delta, "sbb" );
19991 return delta;
19993 case 0x1A: /* SBB Eb,Gb */
19994 if (haveF2orF3(pfx)) goto decode_failure;
19995 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagCarry, True, 1, delta, "sbb" );
19996 return delta;
19997 case 0x1B: /* SBB Ev,Gv */
19998 if (haveF2orF3(pfx)) goto decode_failure;
19999 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagCarry, True, sz, delta, "sbb" );
20000 return delta;
20002 case 0x1C: /* SBB Ib, AL */
20003 if (haveF2orF3(pfx)) goto decode_failure;
20004 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" );
20005 return delta;
20006 case 0x1D: /* SBB Iv, eAX */
20007 if (haveF2orF3(pfx)) goto decode_failure;
20008 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" );
20009 return delta;
20011 case 0x20: /* AND Gb,Eb */
20012 if (!validF2orF3) goto decode_failure;
20013 delta = dis_op2_G_E ( vbi, pfx, Iop_And8, WithFlagNone, True, 1, delta, "and" );
20014 return delta;
20015 case 0x21: /* AND Gv,Ev */
20016 if (!validF2orF3) goto decode_failure;
20017 delta = dis_op2_G_E ( vbi, pfx, Iop_And8, WithFlagNone, True, sz, delta, "and" );
20018 return delta;
20020 case 0x22: /* AND Eb,Gb */
20021 if (haveF2orF3(pfx)) goto decode_failure;
20022 delta = dis_op2_E_G ( vbi, pfx, Iop_And8, WithFlagNone, True, 1, delta, "and" );
20023 return delta;
20024 case 0x23: /* AND Ev,Gv */
20025 if (haveF2orF3(pfx)) goto decode_failure;
20026 delta = dis_op2_E_G ( vbi, pfx, Iop_And8, WithFlagNone, True, sz, delta, "and" );
20027 return delta;
20029 case 0x24: /* AND Ib, AL */
20030 if (haveF2orF3(pfx)) goto decode_failure;
20031 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" );
20032 return delta;
20033 case 0x25: /* AND Iv, eAX */
20034 if (haveF2orF3(pfx)) goto decode_failure;
20035 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" );
20036 return delta;
20038 case 0x28: /* SUB Gb,Eb */
20039 if (!validF2orF3) goto decode_failure;
20040 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagNone, True, 1, delta, "sub" );
20041 return delta;
20042 case 0x29: /* SUB Gv,Ev */
20043 if (!validF2orF3) goto decode_failure;
20044 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagNone, True, sz, delta, "sub" );
20045 return delta;
20047 case 0x2A: /* SUB Eb,Gb */
20048 if (haveF2orF3(pfx)) goto decode_failure;
20049 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagNone, True, 1, delta, "sub" );
20050 return delta;
20051 case 0x2B: /* SUB Ev,Gv */
20052 if (haveF2orF3(pfx)) goto decode_failure;
20053 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagNone, True, sz, delta, "sub" );
20054 return delta;
20056 case 0x2C: /* SUB Ib, AL */
20057 if (haveF2orF3(pfx)) goto decode_failure;
20058 delta = dis_op_imm_A(1, False, Iop_Sub8, True, delta, "sub" );
20059 return delta;
20060 case 0x2D: /* SUB Iv, eAX */
20061 if (haveF2orF3(pfx)) goto decode_failure;
20062 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" );
20063 return delta;
20065 case 0x30: /* XOR Gb,Eb */
20066 if (!validF2orF3) goto decode_failure;
20067 delta = dis_op2_G_E ( vbi, pfx, Iop_Xor8, WithFlagNone, True, 1, delta, "xor" );
20068 return delta;
20069 case 0x31: /* XOR Gv,Ev */
20070 if (!validF2orF3) goto decode_failure;
20071 delta = dis_op2_G_E ( vbi, pfx, Iop_Xor8, WithFlagNone, True, sz, delta, "xor" );
20072 return delta;
20074 case 0x32: /* XOR Eb,Gb */
20075 if (haveF2orF3(pfx)) goto decode_failure;
20076 delta = dis_op2_E_G ( vbi, pfx, Iop_Xor8, WithFlagNone, True, 1, delta, "xor" );
20077 return delta;
20078 case 0x33: /* XOR Ev,Gv */
20079 if (haveF2orF3(pfx)) goto decode_failure;
20080 delta = dis_op2_E_G ( vbi, pfx, Iop_Xor8, WithFlagNone, True, sz, delta, "xor" );
20081 return delta;
20083 case 0x34: /* XOR Ib, AL */
20084 if (haveF2orF3(pfx)) goto decode_failure;
20085 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" );
20086 return delta;
20087 case 0x35: /* XOR Iv, eAX */
20088 if (haveF2orF3(pfx)) goto decode_failure;
20089 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" );
20090 return delta;
20092 case 0x38: /* CMP Gb,Eb */
20093 if (haveF2orF3(pfx)) goto decode_failure;
20094 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagNone, False, 1, delta, "cmp" );
20095 return delta;
20096 case 0x39: /* CMP Gv,Ev */
20097 if (haveF2orF3(pfx)) goto decode_failure;
20098 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagNone, False, sz, delta, "cmp" );
20099 return delta;
20101 case 0x3A: /* CMP Eb,Gb */
20102 if (haveF2orF3(pfx)) goto decode_failure;
20103 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagNone, False, 1, delta, "cmp" );
20104 return delta;
20105 case 0x3B: /* CMP Ev,Gv */
20106 if (haveF2orF3(pfx)) goto decode_failure;
20107 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagNone, False, sz, delta, "cmp" );
20108 return delta;
20110 case 0x3C: /* CMP Ib, AL */
20111 if (haveF2orF3(pfx)) goto decode_failure;
20112 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" );
20113 return delta;
20114 case 0x3D: /* CMP Iv, eAX */
20115 if (haveF2orF3(pfx)) goto decode_failure;
20116 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" );
20117 return delta;
20119 case 0x50: /* PUSH eAX */
20120 case 0x51: /* PUSH eCX */
20121 case 0x52: /* PUSH eDX */
20122 case 0x53: /* PUSH eBX */
20123 case 0x55: /* PUSH eBP */
20124 case 0x56: /* PUSH eSI */
20125 case 0x57: /* PUSH eDI */
20126 case 0x54: /* PUSH eSP */
20127 /* This is the Right Way, in that the value to be pushed is
20128 established before %rsp is changed, so that pushq %rsp
20129 correctly pushes the old value. */
20130 if (haveF2orF3(pfx)) goto decode_failure;
20131 vassert(sz == 2 || sz == 4 || sz == 8);
20132 if (sz == 4)
20133 sz = 8; /* there is no encoding for 32-bit push in 64-bit mode */
20134 ty = sz==2 ? Ity_I16 : Ity_I64;
20135 t1 = newTemp(ty);
20136 t2 = newTemp(Ity_I64);
20137 assign(t1, getIRegRexB(sz, pfx, opc-0x50));
20138 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(sz)));
20139 putIReg64(R_RSP, mkexpr(t2) );
20140 storeLE(mkexpr(t2),mkexpr(t1));
20141 DIP("push%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x50));
20142 return delta;
20144 case 0x58: /* POP eAX */
20145 case 0x59: /* POP eCX */
20146 case 0x5A: /* POP eDX */
20147 case 0x5B: /* POP eBX */
20148 case 0x5D: /* POP eBP */
20149 case 0x5E: /* POP eSI */
20150 case 0x5F: /* POP eDI */
20151 case 0x5C: /* POP eSP */
20152 if (haveF2orF3(pfx)) goto decode_failure;
20153 vassert(sz == 2 || sz == 4 || sz == 8);
20154 if (sz == 4)
20155 sz = 8; /* there is no encoding for 32-bit pop in 64-bit mode */
20156 t1 = newTemp(szToITy(sz));
20157 t2 = newTemp(Ity_I64);
20158 assign(t2, getIReg64(R_RSP));
20159 assign(t1, loadLE(szToITy(sz),mkexpr(t2)));
20160 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz)));
20161 putIRegRexB(sz, pfx, opc-0x58, mkexpr(t1));
20162 DIP("pop%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x58));
20163 return delta;
20165 case 0x63: /* MOVSX */
20166 if (haveF2orF3(pfx)) goto decode_failure;
20167 if (haveREX(pfx) && 1==getRexW(pfx)) {
20168 vassert(sz == 8);
20169 /* movsx r/m32 to r64 */
20170 modrm = getUChar(delta);
20171 if (epartIsReg(modrm)) {
20172 delta++;
20173 putIRegG(8, pfx, modrm,
20174 unop(Iop_32Sto64,
20175 getIRegE(4, pfx, modrm)));
20176 DIP("movslq %s,%s\n",
20177 nameIRegE(4, pfx, modrm),
20178 nameIRegG(8, pfx, modrm));
20179 return delta;
20180 } else {
20181 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
20182 delta += alen;
20183 putIRegG(8, pfx, modrm,
20184 unop(Iop_32Sto64,
20185 loadLE(Ity_I32, mkexpr(addr))));
20186 DIP("movslq %s,%s\n", dis_buf,
20187 nameIRegG(8, pfx, modrm));
20188 return delta;
20190 } else {
20191 goto decode_failure;
20194 case 0x68: /* PUSH Iv */
20195 if (haveF2orF3(pfx)) goto decode_failure;
20196 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
20197 if (sz == 4) sz = 8;
20198 d64 = getSDisp(imin(4,sz),delta);
20199 delta += imin(4,sz);
20200 goto do_push_I;
20202 case 0x69: /* IMUL Iv, Ev, Gv */
20203 if (haveF2orF3(pfx)) goto decode_failure;
20204 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, sz );
20205 return delta;
20207 case 0x6A: /* PUSH Ib, sign-extended to sz */
20208 if (haveF2orF3(pfx)) goto decode_failure;
20209 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
20210 if (sz == 4) sz = 8;
20211 d64 = getSDisp8(delta); delta += 1;
20212 goto do_push_I;
20213 do_push_I:
20214 ty = szToITy(sz);
20215 t1 = newTemp(Ity_I64);
20216 t2 = newTemp(ty);
20217 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
20218 putIReg64(R_RSP, mkexpr(t1) );
20219 /* stop mkU16 asserting if d32 is a negative 16-bit number
20220 (bug #132813) */
20221 if (ty == Ity_I16)
20222 d64 &= 0xFFFF;
20223 storeLE( mkexpr(t1), mkU(ty,d64) );
20224 DIP("push%c $%lld\n", nameISize(sz), (Long)d64);
20225 return delta;
20227 case 0x6B: /* IMUL Ib, Ev, Gv */
20228 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, 1 );
20229 return delta;
20231 case 0x70:
20232 case 0x71:
20233 case 0x72: /* JBb/JNAEb (jump below) */
20234 case 0x73: /* JNBb/JAEb (jump not below) */
20235 case 0x74: /* JZb/JEb (jump zero) */
20236 case 0x75: /* JNZb/JNEb (jump not zero) */
20237 case 0x76: /* JBEb/JNAb (jump below or equal) */
20238 case 0x77: /* JNBEb/JAb (jump not below or equal) */
20239 case 0x78: /* JSb (jump negative) */
20240 case 0x79: /* JSb (jump not negative) */
20241 case 0x7A: /* JP (jump parity even) */
20242 case 0x7B: /* JNP/JPO (jump parity odd) */
20243 case 0x7C: /* JLb/JNGEb (jump less) */
20244 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
20245 case 0x7E: /* JLEb/JNGb (jump less or equal) */
20246 case 0x7F: { /* JGb/JNLEb (jump greater) */
20247 Long jmpDelta;
20248 const HChar* comment = "";
20249 if (haveF3(pfx)) goto decode_failure;
20250 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
20251 jmpDelta = getSDisp8(delta);
20252 vassert(-128 <= jmpDelta && jmpDelta < 128);
20253 d64 = (guest_RIP_bbstart+delta+1) + jmpDelta;
20254 delta++;
20255 /* End the block at this point. */
20256 jcc_01( dres, (AMD64Condcode)(opc - 0x70),
20257 guest_RIP_bbstart+delta, d64 );
20258 vassert(dres->whatNext == Dis_StopHere);
20259 DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc - 0x70), (ULong)d64,
20260 comment);
20261 return delta;
20264 case 0x80: /* Grp1 Ib,Eb */
20265 modrm = getUChar(delta);
20266 /* Disallow F2/XACQ and F3/XREL for the non-mem case. Allow
20267 just one for the mem case and also require LOCK in this case.
20268 Note that this erroneously allows XACQ/XREL on CMP since we
20269 don't check the subopcode here. No big deal. */
20270 if (epartIsReg(modrm) && haveF2orF3(pfx))
20271 goto decode_failure;
20272 if (!epartIsReg(modrm) && haveF2andF3(pfx))
20273 goto decode_failure;
20274 if (!epartIsReg(modrm) && haveF2orF3(pfx) && !haveLOCK(pfx))
20275 goto decode_failure;
20276 am_sz = lengthAMode(pfx,delta);
20277 sz = 1;
20278 d_sz = 1;
20279 d64 = getSDisp8(delta + am_sz);
20280 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
20281 return delta;
20283 case 0x81: /* Grp1 Iv,Ev */
20284 modrm = getUChar(delta);
20285 /* Same comment as for case 0x80 just above. */
20286 if (epartIsReg(modrm) && haveF2orF3(pfx))
20287 goto decode_failure;
20288 if (!epartIsReg(modrm) && haveF2andF3(pfx))
20289 goto decode_failure;
20290 if (!epartIsReg(modrm) && haveF2orF3(pfx) && !haveLOCK(pfx))
20291 goto decode_failure;
20292 am_sz = lengthAMode(pfx,delta);
20293 d_sz = imin(sz,4);
20294 d64 = getSDisp(d_sz, delta + am_sz);
20295 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
20296 return delta;
20298 case 0x83: /* Grp1 Ib,Ev */
20299 if (haveF2orF3(pfx)) goto decode_failure;
20300 modrm = getUChar(delta);
20301 am_sz = lengthAMode(pfx,delta);
20302 d_sz = 1;
20303 d64 = getSDisp8(delta + am_sz);
20304 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
20305 return delta;
20307 case 0x84: /* TEST Eb,Gb */
20308 if (haveF2orF3(pfx)) goto decode_failure;
20309 delta = dis_op2_E_G ( vbi, pfx, Iop_And8, WithFlagNone, False,
20310 1, delta, "test" );
20311 return delta;
20313 case 0x85: /* TEST Ev,Gv */
20314 if (haveF2orF3(pfx)) goto decode_failure;
20315 delta = dis_op2_E_G ( vbi, pfx, Iop_And8, WithFlagNone, False,
20316 sz, delta, "test" );
20317 return delta;
20319 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
20320 prefix. Therefore, generate CAS regardless of the presence or
20321 otherwise of a LOCK prefix. */
20322 case 0x86: /* XCHG Gb,Eb */
20323 sz = 1;
20324 /* Fall through ... */
20325 case 0x87: /* XCHG Gv,Ev */
20326 modrm = getUChar(delta);
20327 /* Check whether F2 or F3 are allowable. For the mem case, one
20328 or the othter but not both are. We don't care about the
20329 presence of LOCK in this case -- XCHG is unusual in this
20330 respect. */
20331 if (haveF2orF3(pfx)) {
20332 if (epartIsReg(modrm)) {
20333 goto decode_failure;
20334 } else {
20335 if (haveF2andF3(pfx))
20336 goto decode_failure;
20339 ty = szToITy(sz);
20340 t1 = newTemp(ty); t2 = newTemp(ty);
20341 if (epartIsReg(modrm)) {
20342 assign(t1, getIRegE(sz, pfx, modrm));
20343 assign(t2, getIRegG(sz, pfx, modrm));
20344 putIRegG(sz, pfx, modrm, mkexpr(t1));
20345 putIRegE(sz, pfx, modrm, mkexpr(t2));
20346 delta++;
20347 DIP("xchg%c %s, %s\n",
20348 nameISize(sz), nameIRegG(sz, pfx, modrm),
20349 nameIRegE(sz, pfx, modrm));
20350 } else {
20351 *expect_CAS = True;
20352 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
20353 assign( t1, loadLE(ty, mkexpr(addr)) );
20354 assign( t2, getIRegG(sz, pfx, modrm) );
20355 casLE( mkexpr(addr),
20356 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
20357 putIRegG( sz, pfx, modrm, mkexpr(t1) );
20358 delta += alen;
20359 DIP("xchg%c %s, %s\n", nameISize(sz),
20360 nameIRegG(sz, pfx, modrm), dis_buf);
20362 return delta;
20364 case 0x88: { /* MOV Gb,Eb */
20365 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */
20366 Bool ok = True;
20367 delta = dis_mov_G_E(vbi, pfx, 1, delta, &ok);
20368 if (!ok) goto decode_failure;
20369 return delta;
20372 case 0x89: { /* MOV Gv,Ev */
20373 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */
20374 Bool ok = True;
20375 delta = dis_mov_G_E(vbi, pfx, sz, delta, &ok);
20376 if (!ok) goto decode_failure;
20377 return delta;
20380 case 0x8A: /* MOV Eb,Gb */
20381 if (haveF2orF3(pfx)) goto decode_failure;
20382 delta = dis_mov_E_G(vbi, pfx, 1, delta);
20383 return delta;
20385 case 0x8B: /* MOV Ev,Gv */
20386 if (haveF2orF3(pfx)) goto decode_failure;
20387 delta = dis_mov_E_G(vbi, pfx, sz, delta);
20388 return delta;
20390 case 0x8C: /* MOV S,E -- MOV from a SEGMENT REGISTER */
20391 if (haveF2orF3(pfx)) goto decode_failure;
20392 delta = dis_mov_S_E(vbi, pfx, sz, delta);
20393 return delta;
20395 case 0x8D: /* LEA M,Gv */
20396 if (haveF2orF3(pfx)) goto decode_failure;
20397 if (sz != 4 && sz != 8)
20398 goto decode_failure;
20399 modrm = getUChar(delta);
20400 if (epartIsReg(modrm))
20401 goto decode_failure;
20402 /* NOTE! this is the one place where a segment override prefix
20403 has no effect on the address calculation. Therefore we clear
20404 any segment override bits in pfx. */
20405 addr = disAMode ( &alen, vbi, clearSegBits(pfx), delta, dis_buf, 0 );
20406 delta += alen;
20407 /* This is a hack. But it isn't clear that really doing the
20408 calculation at 32 bits is really worth it. Hence for leal,
20409 do the full 64-bit calculation and then truncate it. */
20410 putIRegG( sz, pfx, modrm,
20411 sz == 4
20412 ? unop(Iop_64to32, mkexpr(addr))
20413 : mkexpr(addr)
20415 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf,
20416 nameIRegG(sz,pfx,modrm));
20417 return delta;
20419 case 0x8F: { /* POPQ m64 / POPW m16 */
20420 Int len;
20421 UChar rm;
20422 /* There is no encoding for 32-bit pop in 64-bit mode.
20423 So sz==4 actually means sz==8. */
20424 if (haveF2orF3(pfx)) goto decode_failure;
20425 vassert(sz == 2 || sz == 4
20426 || /* tolerate redundant REX.W, see #210481 */ sz == 8);
20427 if (sz == 4) sz = 8;
20428 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
20430 rm = getUChar(delta);
20432 /* make sure this instruction is correct POP */
20433 if (epartIsReg(rm) || gregLO3ofRM(rm) != 0)
20434 goto decode_failure;
20435 /* and has correct size */
20436 vassert(sz == 8);
20438 t1 = newTemp(Ity_I64);
20439 t3 = newTemp(Ity_I64);
20440 assign( t1, getIReg64(R_RSP) );
20441 assign( t3, loadLE(Ity_I64, mkexpr(t1)) );
20443 /* Increase RSP; must be done before the STORE. Intel manual
20444 says: If the RSP register is used as a base register for
20445 addressing a destination operand in memory, the POP
20446 instruction computes the effective address of the operand
20447 after it increments the RSP register. */
20448 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(sz)) );
20450 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
20451 storeLE( mkexpr(addr), mkexpr(t3) );
20453 DIP("popl %s\n", dis_buf);
20455 delta += len;
20456 return delta;
20459 case 0x90: /* XCHG eAX,eAX */
20460 /* detect and handle F3 90 (rep nop) specially */
20461 if (!have66(pfx) && !haveF2(pfx) && haveF3(pfx)) {
20462 DIP("rep nop (P4 pause)\n");
20463 /* "observe" the hint. The Vex client needs to be careful not
20464 to cause very long delays as a result, though. */
20465 jmp_lit(dres, Ijk_Yield, guest_RIP_bbstart+delta);
20466 vassert(dres->whatNext == Dis_StopHere);
20467 return delta;
20469 /* detect and handle NOPs specially */
20470 if (/* F2/F3 probably change meaning completely */
20471 !haveF2orF3(pfx)
20472 /* If REX.B is 1, we're not exchanging rAX with itself */
20473 && getRexB(pfx)==0 ) {
20474 DIP("nop\n");
20475 return delta;
20477 /* else fall through to normal case. */
20478 case 0x91: /* XCHG rAX,rCX */
20479 case 0x92: /* XCHG rAX,rDX */
20480 case 0x93: /* XCHG rAX,rBX */
20481 case 0x94: /* XCHG rAX,rSP */
20482 case 0x95: /* XCHG rAX,rBP */
20483 case 0x96: /* XCHG rAX,rSI */
20484 case 0x97: /* XCHG rAX,rDI */
20485 /* guard against mutancy */
20486 if (haveF2orF3(pfx)) goto decode_failure;
20487 codegen_xchg_rAX_Reg ( pfx, sz, opc - 0x90 );
20488 return delta;
20490 case 0x98: /* CBW */
20491 if (haveF2orF3(pfx)) goto decode_failure;
20492 if (sz == 8) {
20493 putIRegRAX( 8, unop(Iop_32Sto64, getIRegRAX(4)) );
20494 DIP(/*"cdqe\n"*/"cltq");
20495 return delta;
20497 if (sz == 4) {
20498 putIRegRAX( 4, unop(Iop_16Sto32, getIRegRAX(2)) );
20499 DIP("cwtl\n");
20500 return delta;
20502 if (sz == 2) {
20503 putIRegRAX( 2, unop(Iop_8Sto16, getIRegRAX(1)) );
20504 DIP("cbw\n");
20505 return delta;
20507 goto decode_failure;
20509 case 0x99: /* CWD/CDQ/CQO */
20510 if (haveF2orF3(pfx)) goto decode_failure;
20511 vassert(sz == 2 || sz == 4 || sz == 8);
20512 ty = szToITy(sz);
20513 putIRegRDX( sz,
20514 binop(mkSizedOp(ty,Iop_Sar8),
20515 getIRegRAX(sz),
20516 mkU8(sz == 2 ? 15 : (sz == 4 ? 31 : 63))) );
20517 DIP(sz == 2 ? "cwd\n"
20518 : (sz == 4 ? /*"cdq\n"*/ "cltd\n"
20519 : "cqo\n"));
20520 return delta;
20522 case 0x9B: /* FWAIT (X87 insn) */
20523 /* ignore? */
20524 DIP("fwait\n");
20525 return delta;
20527 case 0x9C: /* PUSHF */ {
20528 /* Note. There is no encoding for a 32-bit pushf in 64-bit
20529 mode. So sz==4 actually means sz==8. */
20530 /* 24 July 06: has also been seen with a redundant REX prefix,
20531 so must also allow sz==8. */
20532 if (haveF2orF3(pfx)) goto decode_failure;
20533 vassert(sz == 2 || sz == 4 || sz == 8);
20534 if (sz == 4) sz = 8;
20535 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
20537 t1 = newTemp(Ity_I64);
20538 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
20539 putIReg64(R_RSP, mkexpr(t1) );
20541 t2 = newTemp(Ity_I64);
20542 assign( t2, mk_amd64g_calculate_rflags_all() );
20544 /* Patch in the D flag. This can simply be a copy of bit 10 of
20545 baseBlock[OFFB_DFLAG]. */
20546 t3 = newTemp(Ity_I64);
20547 assign( t3, binop(Iop_Or64,
20548 mkexpr(t2),
20549 binop(Iop_And64,
20550 IRExpr_Get(OFFB_DFLAG,Ity_I64),
20551 mkU64(1<<10)))
20554 /* And patch in the ID flag. */
20555 t4 = newTemp(Ity_I64);
20556 assign( t4, binop(Iop_Or64,
20557 mkexpr(t3),
20558 binop(Iop_And64,
20559 binop(Iop_Shl64, IRExpr_Get(OFFB_IDFLAG,Ity_I64),
20560 mkU8(21)),
20561 mkU64(1<<21)))
20564 /* And patch in the AC flag too. */
20565 t5 = newTemp(Ity_I64);
20566 assign( t5, binop(Iop_Or64,
20567 mkexpr(t4),
20568 binop(Iop_And64,
20569 binop(Iop_Shl64, IRExpr_Get(OFFB_ACFLAG,Ity_I64),
20570 mkU8(18)),
20571 mkU64(1<<18)))
20574 /* if sz==2, the stored value needs to be narrowed. */
20575 if (sz == 2)
20576 storeLE( mkexpr(t1), unop(Iop_32to16,
20577 unop(Iop_64to32,mkexpr(t5))) );
20578 else
20579 storeLE( mkexpr(t1), mkexpr(t5) );
20581 DIP("pushf%c\n", nameISize(sz));
20582 return delta;
20585 case 0x9D: /* POPF */
20586 /* Note. There is no encoding for a 32-bit popf in 64-bit mode.
20587 So sz==4 actually means sz==8. */
20588 if (haveF2orF3(pfx)) goto decode_failure;
20589 vassert(sz == 2 || sz == 4);
20590 if (sz == 4) sz = 8;
20591 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
20592 t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I64);
20593 assign(t2, getIReg64(R_RSP));
20594 assign(t1, widenUto64(loadLE(szToITy(sz),mkexpr(t2))));
20595 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz)));
20596 /* t1 is the flag word. Mask out everything except OSZACP and
20597 set the flags thunk to AMD64G_CC_OP_COPY. */
20598 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
20599 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
20600 stmt( IRStmt_Put( OFFB_CC_DEP1,
20601 binop(Iop_And64,
20602 mkexpr(t1),
20603 mkU64( AMD64G_CC_MASK_C | AMD64G_CC_MASK_P
20604 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_Z
20605 | AMD64G_CC_MASK_S| AMD64G_CC_MASK_O )
20610 /* Also need to set the D flag, which is held in bit 10 of t1.
20611 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
20612 stmt( IRStmt_Put(
20613 OFFB_DFLAG,
20614 IRExpr_ITE(
20615 unop(Iop_64to1,
20616 binop(Iop_And64,
20617 binop(Iop_Shr64, mkexpr(t1), mkU8(10)),
20618 mkU64(1))),
20619 mkU64(0xFFFFFFFFFFFFFFFFULL),
20620 mkU64(1)))
20623 /* And set the ID flag */
20624 stmt( IRStmt_Put(
20625 OFFB_IDFLAG,
20626 IRExpr_ITE(
20627 unop(Iop_64to1,
20628 binop(Iop_And64,
20629 binop(Iop_Shr64, mkexpr(t1), mkU8(21)),
20630 mkU64(1))),
20631 mkU64(1),
20632 mkU64(0)))
20635 /* And set the AC flag too */
20636 stmt( IRStmt_Put(
20637 OFFB_ACFLAG,
20638 IRExpr_ITE(
20639 unop(Iop_64to1,
20640 binop(Iop_And64,
20641 binop(Iop_Shr64, mkexpr(t1), mkU8(18)),
20642 mkU64(1))),
20643 mkU64(1),
20644 mkU64(0)))
20647 DIP("popf%c\n", nameISize(sz));
20648 return delta;
20650 case 0x9E: /* SAHF */
20651 codegen_SAHF();
20652 DIP("sahf\n");
20653 return delta;
20655 case 0x9F: /* LAHF */
20656 codegen_LAHF();
20657 DIP("lahf\n");
20658 return delta;
20660 case 0xA0: /* MOV Ob,AL */
20661 if (have66orF2orF3(pfx)) goto decode_failure;
20662 sz = 1;
20663 /* Fall through ... */
20664 case 0xA1: /* MOV Ov,eAX */
20665 if (sz != 8 && sz != 4 && sz != 2 && sz != 1)
20666 goto decode_failure;
20667 d64 = getDisp64(delta);
20668 delta += 8;
20669 ty = szToITy(sz);
20670 addr = newTemp(Ity_I64);
20671 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) );
20672 putIRegRAX(sz, loadLE( ty, mkexpr(addr) ));
20673 DIP("mov%c %s0x%llx, %s\n", nameISize(sz),
20674 segRegTxt(pfx), (ULong)d64,
20675 nameIRegRAX(sz));
20676 return delta;
20678 case 0xA2: /* MOV AL,Ob */
20679 if (have66orF2orF3(pfx)) goto decode_failure;
20680 sz = 1;
20681 /* Fall through ... */
20682 case 0xA3: /* MOV eAX,Ov */
20683 if (sz != 8 && sz != 4 && sz != 2 && sz != 1)
20684 goto decode_failure;
20685 d64 = getDisp64(delta);
20686 delta += 8;
20687 ty = szToITy(sz);
20688 addr = newTemp(Ity_I64);
20689 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) );
20690 storeLE( mkexpr(addr), getIRegRAX(sz) );
20691 DIP("mov%c %s, %s0x%llx\n", nameISize(sz), nameIRegRAX(sz),
20692 segRegTxt(pfx), (ULong)d64);
20693 return delta;
20695 case 0xA4:
20696 case 0xA5:
20697 /* F3 A4: rep movsb */
20698 if (haveF3(pfx) && !haveF2(pfx)) {
20699 if (opc == 0xA4)
20700 sz = 1;
20701 dis_REP_op ( dres, AMD64CondAlways, dis_MOVS, sz,
20702 guest_RIP_curr_instr,
20703 guest_RIP_bbstart+delta, "rep movs", pfx );
20704 dres->whatNext = Dis_StopHere;
20705 return delta;
20707 /* A4: movsb */
20708 if (!haveF3(pfx) && !haveF2(pfx)) {
20709 if (opc == 0xA4)
20710 sz = 1;
20711 dis_string_op( dis_MOVS, sz, "movs", pfx );
20712 return delta;
20714 goto decode_failure;
20716 case 0xA6:
20717 case 0xA7:
20718 /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */
20719 if (haveF3(pfx) && !haveF2(pfx)) {
20720 if (opc == 0xA6)
20721 sz = 1;
20722 dis_REP_op ( dres, AMD64CondZ, dis_CMPS, sz,
20723 guest_RIP_curr_instr,
20724 guest_RIP_bbstart+delta, "repe cmps", pfx );
20725 dres->whatNext = Dis_StopHere;
20726 return delta;
20728 goto decode_failure;
20730 case 0xAA:
20731 case 0xAB:
20732 /* F3 AA/AB: rep stosb/rep stos{w,l,q} */
20733 if (haveF3(pfx) && !haveF2(pfx)) {
20734 if (opc == 0xAA)
20735 sz = 1;
20736 dis_REP_op ( dres, AMD64CondAlways, dis_STOS, sz,
20737 guest_RIP_curr_instr,
20738 guest_RIP_bbstart+delta, "rep stos", pfx );
20739 vassert(dres->whatNext == Dis_StopHere);
20740 return delta;
20742 /* AA/AB: stosb/stos{w,l,q} */
20743 if (!haveF3(pfx) && !haveF2(pfx)) {
20744 if (opc == 0xAA)
20745 sz = 1;
20746 dis_string_op( dis_STOS, sz, "stos", pfx );
20747 return delta;
20749 goto decode_failure;
20751 case 0xA8: /* TEST Ib, AL */
20752 if (haveF2orF3(pfx)) goto decode_failure;
20753 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" );
20754 return delta;
20755 case 0xA9: /* TEST Iv, eAX */
20756 if (haveF2orF3(pfx)) goto decode_failure;
20757 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" );
20758 return delta;
20760 case 0xAC: /* LODS, no REP prefix */
20761 case 0xAD:
20762 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", pfx );
20763 return delta;
20765 case 0xAE:
20766 case 0xAF:
20767 /* F2 AE/AF: repne scasb/repne scas{w,l,q} */
20768 if (haveF2(pfx) && !haveF3(pfx)) {
20769 if (opc == 0xAE)
20770 sz = 1;
20771 dis_REP_op ( dres, AMD64CondNZ, dis_SCAS, sz,
20772 guest_RIP_curr_instr,
20773 guest_RIP_bbstart+delta, "repne scas", pfx );
20774 vassert(dres->whatNext == Dis_StopHere);
20775 return delta;
20777 /* F3 AE/AF: repe scasb/repe scas{w,l,q} */
20778 if (!haveF2(pfx) && haveF3(pfx)) {
20779 if (opc == 0xAE)
20780 sz = 1;
20781 dis_REP_op ( dres, AMD64CondZ, dis_SCAS, sz,
20782 guest_RIP_curr_instr,
20783 guest_RIP_bbstart+delta, "repe scas", pfx );
20784 vassert(dres->whatNext == Dis_StopHere);
20785 return delta;
20787 /* AE/AF: scasb/scas{w,l,q} */
20788 if (!haveF2(pfx) && !haveF3(pfx)) {
20789 if (opc == 0xAE)
20790 sz = 1;
20791 dis_string_op( dis_SCAS, sz, "scas", pfx );
20792 return delta;
20794 goto decode_failure;
20796 /* XXXX be careful here with moves to AH/BH/CH/DH */
20797 case 0xB0: /* MOV imm,AL */
20798 case 0xB1: /* MOV imm,CL */
20799 case 0xB2: /* MOV imm,DL */
20800 case 0xB3: /* MOV imm,BL */
20801 case 0xB4: /* MOV imm,AH */
20802 case 0xB5: /* MOV imm,CH */
20803 case 0xB6: /* MOV imm,DH */
20804 case 0xB7: /* MOV imm,BH */
20805 if (haveF2orF3(pfx)) goto decode_failure;
20806 d64 = getUChar(delta);
20807 delta += 1;
20808 putIRegRexB(1, pfx, opc-0xB0, mkU8(d64));
20809 DIP("movb $%lld,%s\n", d64, nameIRegRexB(1,pfx,opc-0xB0));
20810 return delta;
20812 case 0xB8: /* MOV imm,eAX */
20813 case 0xB9: /* MOV imm,eCX */
20814 case 0xBA: /* MOV imm,eDX */
20815 case 0xBB: /* MOV imm,eBX */
20816 case 0xBC: /* MOV imm,eSP */
20817 case 0xBD: /* MOV imm,eBP */
20818 case 0xBE: /* MOV imm,eSI */
20819 case 0xBF: /* MOV imm,eDI */
20820 /* This is the one-and-only place where 64-bit literals are
20821 allowed in the instruction stream. */
20822 if (haveF2orF3(pfx)) goto decode_failure;
20823 if (sz == 8) {
20824 d64 = getDisp64(delta);
20825 delta += 8;
20826 putIRegRexB(8, pfx, opc-0xB8, mkU64(d64));
20827 DIP("movabsq $%lld,%s\n", (Long)d64,
20828 nameIRegRexB(8,pfx,opc-0xB8));
20829 } else {
20830 d64 = getSDisp(imin(4,sz),delta);
20831 delta += imin(4,sz);
20832 putIRegRexB(sz, pfx, opc-0xB8,
20833 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
20834 DIP("mov%c $%lld,%s\n", nameISize(sz),
20835 (Long)d64,
20836 nameIRegRexB(sz,pfx,opc-0xB8));
20838 return delta;
20840 case 0xC0: { /* Grp2 Ib,Eb */
20841 Bool decode_OK = True;
20842 if (haveF2orF3(pfx)) goto decode_failure;
20843 modrm = getUChar(delta);
20844 am_sz = lengthAMode(pfx,delta);
20845 d_sz = 1;
20846 d64 = getUChar(delta + am_sz);
20847 sz = 1;
20848 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
20849 mkU8(d64 & 0xFF), NULL, &decode_OK );
20850 if (!decode_OK) goto decode_failure;
20851 return delta;
20854 case 0xC1: { /* Grp2 Ib,Ev */
20855 Bool decode_OK = True;
20856 if (haveF2orF3(pfx)) goto decode_failure;
20857 modrm = getUChar(delta);
20858 am_sz = lengthAMode(pfx,delta);
20859 d_sz = 1;
20860 d64 = getUChar(delta + am_sz);
20861 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
20862 mkU8(d64 & 0xFF), NULL, &decode_OK );
20863 if (!decode_OK) goto decode_failure;
20864 return delta;
20867 case 0xC2: /* RET imm16 */
20868 if (have66orF3(pfx)) goto decode_failure;
20869 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
20870 d64 = getUDisp16(delta);
20871 delta += 2;
20872 dis_ret(dres, vbi, d64);
20873 DIP("ret $%lld\n", d64);
20874 return delta;
20876 case 0xC3: /* RET */
20877 if (have66(pfx)) goto decode_failure;
20878 /* F3 is acceptable on AMD. */
20879 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
20880 dis_ret(dres, vbi, 0);
20881 DIP(haveF3(pfx) ? "rep ; ret\n" : "ret\n");
20882 return delta;
20884 case 0xC6: /* C6 /0 = MOV Ib,Eb */
20885 sz = 1;
20886 goto maybe_do_Mov_I_E;
20887 case 0xC7: /* C7 /0 = MOV Iv,Ev */
20888 goto maybe_do_Mov_I_E;
20889 maybe_do_Mov_I_E:
20890 modrm = getUChar(delta);
20891 if (gregLO3ofRM(modrm) == 0) {
20892 if (epartIsReg(modrm)) {
20893 /* Neither F2 nor F3 are allowable. */
20894 if (haveF2orF3(pfx)) goto decode_failure;
20895 delta++; /* mod/rm byte */
20896 d64 = getSDisp(imin(4,sz),delta);
20897 delta += imin(4,sz);
20898 putIRegE(sz, pfx, modrm,
20899 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
20900 DIP("mov%c $%lld, %s\n", nameISize(sz),
20901 (Long)d64,
20902 nameIRegE(sz,pfx,modrm));
20903 } else {
20904 if (haveF2(pfx)) goto decode_failure;
20905 /* F3(XRELEASE) is allowable here */
20906 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
20907 /*xtra*/imin(4,sz) );
20908 delta += alen;
20909 d64 = getSDisp(imin(4,sz),delta);
20910 delta += imin(4,sz);
20911 storeLE(mkexpr(addr),
20912 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
20913 DIP("mov%c $%lld, %s\n", nameISize(sz), (Long)d64, dis_buf);
20915 return delta;
20917 /* BEGIN HACKY SUPPORT FOR xbegin */
20918 if (opc == 0xC7 && modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 4
20919 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
20920 delta++; /* mod/rm byte */
20921 d64 = getSDisp(4,delta);
20922 delta += 4;
20923 guest_RIP_next_mustcheck = True;
20924 guest_RIP_next_assumed = guest_RIP_bbstart + delta;
20925 Addr64 failAddr = guest_RIP_bbstart + delta + d64;
20926 /* EAX contains the failure status code. Bit 3 is "Set if an
20927 internal buffer overflowed", which seems like the
20928 least-bogus choice we can make here. */
20929 putIRegRAX(4, mkU32(1<<3));
20930 /* And jump to the fail address. */
20931 jmp_lit(dres, Ijk_Boring, failAddr);
20932 vassert(dres->whatNext == Dis_StopHere);
20933 DIP("xbeginq 0x%llx\n", failAddr);
20934 return delta;
20936 /* END HACKY SUPPORT FOR xbegin */
20937 /* BEGIN HACKY SUPPORT FOR xabort */
20938 if (opc == 0xC6 && modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 1
20939 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
20940 delta++; /* mod/rm byte */
20941 abyte = getUChar(delta); delta++;
20942 /* There is never a real transaction in progress, so do nothing. */
20943 DIP("xabort $%d", (Int)abyte);
20944 return delta;
20946 /* END HACKY SUPPORT FOR xabort */
20947 goto decode_failure;
20949 case 0xC8: /* ENTER */
20950 /* Same comments re operand size as for LEAVE below apply.
20951 Also, only handles the case "enter $imm16, $0"; other cases
20952 for the second operand (nesting depth) are not handled. */
20953 if (sz != 4)
20954 goto decode_failure;
20955 d64 = getUDisp16(delta);
20956 delta += 2;
20957 vassert(d64 >= 0 && d64 <= 0xFFFF);
20958 if (getUChar(delta) != 0)
20959 goto decode_failure;
20960 delta++;
20961 /* Intel docs seem to suggest:
20962 push rbp
20963 temp = rsp
20964 rbp = temp
20965 rsp = rsp - imm16
20967 t1 = newTemp(Ity_I64);
20968 assign(t1, getIReg64(R_RBP));
20969 t2 = newTemp(Ity_I64);
20970 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
20971 putIReg64(R_RSP, mkexpr(t2));
20972 storeLE(mkexpr(t2), mkexpr(t1));
20973 putIReg64(R_RBP, mkexpr(t2));
20974 if (d64 > 0) {
20975 putIReg64(R_RSP, binop(Iop_Sub64, mkexpr(t2), mkU64(d64)));
20977 DIP("enter $%u, $0\n", (UInt)d64);
20978 return delta;
20980 case 0xC9: /* LEAVE */
20981 /* In 64-bit mode this defaults to a 64-bit operand size. There
20982 is no way to encode a 32-bit variant. Hence sz==4 but we do
20983 it as if sz=8. */
20984 if (sz != 4)
20985 goto decode_failure;
20986 t1 = newTemp(Ity_I64);
20987 t2 = newTemp(Ity_I64);
20988 assign(t1, getIReg64(R_RBP));
20989 /* First PUT RSP looks redundant, but need it because RSP must
20990 always be up-to-date for Memcheck to work... */
20991 putIReg64(R_RSP, mkexpr(t1));
20992 assign(t2, loadLE(Ity_I64,mkexpr(t1)));
20993 putIReg64(R_RBP, mkexpr(t2));
20994 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(8)) );
20995 DIP("leave\n");
20996 return delta;
20998 case 0xCC: /* INT 3 */
20999 jmp_lit(dres, Ijk_SigTRAP, guest_RIP_bbstart + delta);
21000 vassert(dres->whatNext == Dis_StopHere);
21001 DIP("int $0x3\n");
21002 return delta;
21004 case 0xCD: /* INT imm8 */
21005 d64 = getUChar(delta); delta++;
21007 /* Handle int $0xD2 (Solaris fasttrap syscalls). */
21008 if (d64 == 0xD2) {
21009 jmp_lit(dres, Ijk_Sys_int210, guest_RIP_bbstart + delta);
21010 vassert(dres->whatNext == Dis_StopHere);
21011 DIP("int $0xD2\n");
21012 return delta;
21014 goto decode_failure;
21016 case 0xCF: /* IRET */
21017 /* Note, this is an extremely kludgey and limited implementation of iret
21018 based on the extremely kludgey and limited implementation of iret for x86
21019 popq %RIP; popl %CS; popq %RFLAGS; popq %RSP; popl %SS
21020 %CS and %SS are ignored */
21021 if (sz != 8 || have66orF2orF3(pfx)) goto decode_failure;
21023 t1 = newTemp(Ity_I64); /* RSP */
21024 t2 = newTemp(Ity_I64); /* new RIP */
21025 /* t3 = newTemp(Ity_I32); new CS */
21026 t4 = newTemp(Ity_I64); /* new RFLAGS */
21027 t5 = newTemp(Ity_I64); /* new RSP */
21028 /* t6 = newTemp(Ity_I32); new SS */
21030 assign(t1, getIReg64(R_RSP));
21031 assign(t2, loadLE(Ity_I64, binop(Iop_Add64,mkexpr(t1),mkU64(0))));
21032 /* assign(t3, loadLE(Ity_I32, binop(Iop_Add64,mkexpr(t1),mkU64(8)))); */
21033 assign(t4, loadLE(Ity_I64, binop(Iop_Add64,mkexpr(t1),mkU64(16))));
21034 assign(t5, loadLE(Ity_I64, binop(Iop_Add64,mkexpr(t1),mkU64(24))));
21035 /* assign(t6, loadLE(Ity_I32, binop(Iop_Add64,mkexpr(t1),mkU64(32)))); */
21037 /* set %RFLAGS */
21038 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
21039 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
21040 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
21041 stmt( IRStmt_Put( OFFB_CC_DEP1,
21042 binop(Iop_And64,
21043 mkexpr(t4),
21044 mkU64( AMD64G_CC_MASK_C | AMD64G_CC_MASK_P
21045 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_Z
21046 | AMD64G_CC_MASK_S| AMD64G_CC_MASK_O )
21051 /* Also need to set the D flag, which is held in bit 10 of t4.
21052 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
21053 stmt( IRStmt_Put(
21054 OFFB_DFLAG,
21055 IRExpr_ITE(
21056 unop(Iop_64to1,
21057 binop(Iop_And64,
21058 binop(Iop_Shr64, mkexpr(t4), mkU8(10)),
21059 mkU64(1))),
21060 mkU64(0xFFFFFFFFFFFFFFFFULL),
21061 mkU64(1)))
21064 /* And set the ID flag */
21065 stmt( IRStmt_Put(
21066 OFFB_IDFLAG,
21067 IRExpr_ITE(
21068 unop(Iop_64to1,
21069 binop(Iop_And64,
21070 binop(Iop_Shr64, mkexpr(t4), mkU8(21)),
21071 mkU64(1))),
21072 mkU64(1),
21073 mkU64(0)))
21076 /* And set the AC flag too */
21077 stmt( IRStmt_Put(
21078 OFFB_ACFLAG,
21079 IRExpr_ITE(
21080 unop(Iop_64to1,
21081 binop(Iop_And64,
21082 binop(Iop_Shr64, mkexpr(t4), mkU8(18)),
21083 mkU64(1))),
21084 mkU64(1),
21085 mkU64(0)))
21089 /* set new stack */
21090 putIReg64(R_RSP, mkexpr(t5));
21092 /* goto new RIP value */
21093 jmp_treg(dres, Ijk_Ret, t2);
21094 DIP("iret (very kludgey)\n");
21095 return delta;
21097 case 0xD0: { /* Grp2 1,Eb */
21098 Bool decode_OK = True;
21099 if (haveF2orF3(pfx)) goto decode_failure;
21100 modrm = getUChar(delta);
21101 am_sz = lengthAMode(pfx,delta);
21102 d_sz = 0;
21103 d64 = 1;
21104 sz = 1;
21105 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
21106 mkU8(d64), NULL, &decode_OK );
21107 if (!decode_OK) goto decode_failure;
21108 return delta;
21111 case 0xD1: { /* Grp2 1,Ev */
21112 Bool decode_OK = True;
21113 if (haveF2orF3(pfx)) goto decode_failure;
21114 modrm = getUChar(delta);
21115 am_sz = lengthAMode(pfx,delta);
21116 d_sz = 0;
21117 d64 = 1;
21118 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
21119 mkU8(d64), NULL, &decode_OK );
21120 if (!decode_OK) goto decode_failure;
21121 return delta;
21124 case 0xD2: { /* Grp2 CL,Eb */
21125 Bool decode_OK = True;
21126 if (haveF2orF3(pfx)) goto decode_failure;
21127 modrm = getUChar(delta);
21128 am_sz = lengthAMode(pfx,delta);
21129 d_sz = 0;
21130 sz = 1;
21131 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
21132 getIRegCL(), "%cl", &decode_OK );
21133 if (!decode_OK) goto decode_failure;
21134 return delta;
21137 case 0xD3: { /* Grp2 CL,Ev */
21138 Bool decode_OK = True;
21139 if (haveF2orF3(pfx)) goto decode_failure;
21140 modrm = getUChar(delta);
21141 am_sz = lengthAMode(pfx,delta);
21142 d_sz = 0;
21143 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
21144 getIRegCL(), "%cl", &decode_OK );
21145 if (!decode_OK) goto decode_failure;
21146 return delta;
21149 case 0xD8: /* X87 instructions */
21150 case 0xD9:
21151 case 0xDA:
21152 case 0xDB:
21153 case 0xDC:
21154 case 0xDD:
21155 case 0xDE:
21156 case 0xDF: {
21157 Bool redundantREXWok = False;
21159 if (haveF2orF3(pfx))
21160 goto decode_failure;
21162 /* kludge to tolerate redundant rex.w prefixes (should do this
21163 properly one day) */
21164 /* mono 1.1.18.1 produces 48 D9 FA, which is rex.w fsqrt */
21165 if ( (opc == 0xD9 && getUChar(delta+0) == 0xFA)/*fsqrt*/ )
21166 redundantREXWok = True;
21168 Bool size_OK = False;
21169 if ( sz == 4 )
21170 size_OK = True;
21171 else if ( sz == 8 )
21172 size_OK = redundantREXWok;
21173 else if ( sz == 2 ) {
21174 int mod_rm = getUChar(delta+0);
21175 int reg = gregLO3ofRM(mod_rm);
21176 /* The HotSpot JVM uses these */
21177 if ( (opc == 0xDD) && (reg == 0 /* FLDL */ ||
21178 reg == 4 /* FNSAVE */ ||
21179 reg == 6 /* FRSTOR */ ) )
21180 size_OK = True;
21182 /* AMD manual says 0x66 size override is ignored, except where
21183 it is meaningful */
21184 if (!size_OK)
21185 goto decode_failure;
21187 Bool decode_OK = False;
21188 delta = dis_FPU ( &decode_OK, vbi, pfx, delta );
21189 if (!decode_OK)
21190 goto decode_failure;
21192 return delta;
21195 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
21196 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
21197 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
21198 { /* The docs say this uses rCX as a count depending on the
21199 address size override, not the operand one. */
21200 IRExpr* zbit = NULL;
21201 IRExpr* count = NULL;
21202 IRExpr* cond = NULL;
21203 const HChar* xtra = NULL;
21205 if (have66orF2orF3(pfx) || 1==getRexW(pfx)) goto decode_failure;
21206 /* So at this point we've rejected any variants which appear to
21207 be governed by the usual operand-size modifiers. Hence only
21208 the address size prefix can have an effect. It changes the
21209 size from 64 (default) to 32. */
21210 d64 = guest_RIP_bbstart+delta+1 + getSDisp8(delta);
21211 delta++;
21212 if (haveASO(pfx)) {
21213 /* 64to32 of 64-bit get is merely a get-put improvement
21214 trick. */
21215 putIReg32(R_RCX, binop(Iop_Sub32,
21216 unop(Iop_64to32, getIReg64(R_RCX)),
21217 mkU32(1)));
21218 } else {
21219 putIReg64(R_RCX, binop(Iop_Sub64, getIReg64(R_RCX), mkU64(1)));
21222 /* This is correct, both for 32- and 64-bit versions. If we're
21223 doing a 32-bit dec and the result is zero then the default
21224 zero extension rule will cause the upper 32 bits to be zero
21225 too. Hence a 64-bit check against zero is OK. */
21226 count = getIReg64(R_RCX);
21227 cond = binop(Iop_CmpNE64, count, mkU64(0));
21228 switch (opc) {
21229 case 0xE2:
21230 xtra = "";
21231 break;
21232 case 0xE1:
21233 xtra = "e";
21234 zbit = mk_amd64g_calculate_condition( AMD64CondZ );
21235 cond = mkAnd1(cond, zbit);
21236 break;
21237 case 0xE0:
21238 xtra = "ne";
21239 zbit = mk_amd64g_calculate_condition( AMD64CondNZ );
21240 cond = mkAnd1(cond, zbit);
21241 break;
21242 default:
21243 vassert(0);
21245 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64), OFFB_RIP) );
21247 DIP("loop%s%s 0x%llx\n", xtra, haveASO(pfx) ? "l" : "", (ULong)d64);
21248 return delta;
21251 case 0xE3:
21252 /* JRCXZ or JECXZ, depending address size override. */
21253 if (have66orF2orF3(pfx)) goto decode_failure;
21254 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta);
21255 delta++;
21256 if (haveASO(pfx)) {
21257 /* 32-bit */
21258 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
21259 unop(Iop_32Uto64, getIReg32(R_RCX)),
21260 mkU64(0)),
21261 Ijk_Boring,
21262 IRConst_U64(d64),
21263 OFFB_RIP
21265 DIP("jecxz 0x%llx\n", (ULong)d64);
21266 } else {
21267 /* 64-bit */
21268 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
21269 getIReg64(R_RCX),
21270 mkU64(0)),
21271 Ijk_Boring,
21272 IRConst_U64(d64),
21273 OFFB_RIP
21275 DIP("jrcxz 0x%llx\n", (ULong)d64);
21277 return delta;
21279 case 0xE4: /* IN imm8, AL */
21280 sz = 1;
21281 t1 = newTemp(Ity_I64);
21282 abyte = getUChar(delta); delta++;
21283 assign(t1, mkU64( abyte & 0xFF ));
21284 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz));
21285 goto do_IN;
21286 case 0xE5: /* IN imm8, eAX */
21287 if (!(sz == 2 || sz == 4)) goto decode_failure;
21288 t1 = newTemp(Ity_I64);
21289 abyte = getUChar(delta); delta++;
21290 assign(t1, mkU64( abyte & 0xFF ));
21291 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz));
21292 goto do_IN;
21293 case 0xEC: /* IN %DX, AL */
21294 sz = 1;
21295 t1 = newTemp(Ity_I64);
21296 assign(t1, unop(Iop_16Uto64, getIRegRDX(2)));
21297 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2),
21298 nameIRegRAX(sz));
21299 goto do_IN;
21300 case 0xED: /* IN %DX, eAX */
21301 if (!(sz == 2 || sz == 4)) goto decode_failure;
21302 t1 = newTemp(Ity_I64);
21303 assign(t1, unop(Iop_16Uto64, getIRegRDX(2)));
21304 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2),
21305 nameIRegRAX(sz));
21306 goto do_IN;
21307 do_IN: {
21308 /* At this point, sz indicates the width, and t1 is a 64-bit
21309 value giving port number. */
21310 IRDirty* d;
21311 if (haveF2orF3(pfx)) goto decode_failure;
21312 vassert(sz == 1 || sz == 2 || sz == 4);
21313 ty = szToITy(sz);
21314 t2 = newTemp(Ity_I64);
21315 d = unsafeIRDirty_1_N(
21317 0/*regparms*/,
21318 "amd64g_dirtyhelper_IN",
21319 &amd64g_dirtyhelper_IN,
21320 mkIRExprVec_2( mkexpr(t1), mkU64(sz) )
21322 /* do the call, dumping the result in t2. */
21323 stmt( IRStmt_Dirty(d) );
21324 putIRegRAX(sz, narrowTo( ty, mkexpr(t2) ) );
21325 return delta;
21328 case 0xE6: /* OUT AL, imm8 */
21329 sz = 1;
21330 t1 = newTemp(Ity_I64);
21331 abyte = getUChar(delta); delta++;
21332 assign( t1, mkU64( abyte & 0xFF ) );
21333 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte);
21334 goto do_OUT;
21335 case 0xE7: /* OUT eAX, imm8 */
21336 if (!(sz == 2 || sz == 4)) goto decode_failure;
21337 t1 = newTemp(Ity_I64);
21338 abyte = getUChar(delta); delta++;
21339 assign( t1, mkU64( abyte & 0xFF ) );
21340 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte);
21341 goto do_OUT;
21342 case 0xEE: /* OUT AL, %DX */
21343 sz = 1;
21344 t1 = newTemp(Ity_I64);
21345 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) );
21346 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz),
21347 nameIRegRDX(2));
21348 goto do_OUT;
21349 case 0xEF: /* OUT eAX, %DX */
21350 if (!(sz == 2 || sz == 4)) goto decode_failure;
21351 t1 = newTemp(Ity_I64);
21352 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) );
21353 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz),
21354 nameIRegRDX(2));
21355 goto do_OUT;
21356 do_OUT: {
21357 /* At this point, sz indicates the width, and t1 is a 64-bit
21358 value giving port number. */
21359 IRDirty* d;
21360 if (haveF2orF3(pfx)) goto decode_failure;
21361 vassert(sz == 1 || sz == 2 || sz == 4);
21362 ty = szToITy(sz);
21363 d = unsafeIRDirty_0_N(
21364 0/*regparms*/,
21365 "amd64g_dirtyhelper_OUT",
21366 &amd64g_dirtyhelper_OUT,
21367 mkIRExprVec_3( mkexpr(t1),
21368 widenUto64( getIRegRAX(sz) ),
21369 mkU64(sz) )
21371 stmt( IRStmt_Dirty(d) );
21372 return delta;
21375 case 0xE8: /* CALL J4 */
21376 if (haveF3(pfx)) goto decode_failure;
21377 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
21378 d64 = getSDisp32(delta); delta += 4;
21379 d64 += (guest_RIP_bbstart+delta);
21380 /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */
21381 t1 = newTemp(Ity_I64);
21382 assign(t1, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
21383 putIReg64(R_RSP, mkexpr(t1));
21384 storeLE( mkexpr(t1), mkU64(guest_RIP_bbstart+delta));
21385 t2 = newTemp(Ity_I64);
21386 assign(t2, mkU64((Addr64)d64));
21387 make_redzone_AbiHint(vbi, t1, t2/*nia*/, "call-d32");
21388 jmp_lit(dres, Ijk_Call, d64);
21389 vassert(dres->whatNext == Dis_StopHere);
21390 DIP("call 0x%llx\n", (ULong)d64);
21391 return delta;
21393 case 0xE9: /* Jv (jump, 16/32 offset) */
21394 if (haveF3(pfx)) goto decode_failure;
21395 if (sz != 4)
21396 goto decode_failure; /* JRS added 2004 July 11 */
21397 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
21398 d64 = (guest_RIP_bbstart+delta+sz) + getSDisp(sz,delta);
21399 delta += sz;
21400 jmp_lit(dres, Ijk_Boring, d64);
21401 vassert(dres->whatNext == Dis_StopHere);
21402 DIP("jmp 0x%llx\n", (ULong)d64);
21403 return delta;
21405 case 0xEB: /* Jb (jump, byte offset) */
21406 if (haveF3(pfx)) goto decode_failure;
21407 if (sz != 4)
21408 goto decode_failure; /* JRS added 2004 July 11 */
21409 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
21410 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta);
21411 delta++;
21412 jmp_lit(dres, Ijk_Boring, d64);
21413 vassert(dres->whatNext == Dis_StopHere);
21414 DIP("jmp-8 0x%llx\n", (ULong)d64);
21415 return delta;
21417 case 0xF5: /* CMC */
21418 case 0xF8: /* CLC */
21419 case 0xF9: /* STC */
21420 t1 = newTemp(Ity_I64);
21421 t2 = newTemp(Ity_I64);
21422 assign( t1, mk_amd64g_calculate_rflags_all() );
21423 switch (opc) {
21424 case 0xF5:
21425 assign( t2, binop(Iop_Xor64, mkexpr(t1),
21426 mkU64(AMD64G_CC_MASK_C)));
21427 DIP("cmc\n");
21428 break;
21429 case 0xF8:
21430 assign( t2, binop(Iop_And64, mkexpr(t1),
21431 mkU64(~AMD64G_CC_MASK_C)));
21432 DIP("clc\n");
21433 break;
21434 case 0xF9:
21435 assign( t2, binop(Iop_Or64, mkexpr(t1),
21436 mkU64(AMD64G_CC_MASK_C)));
21437 DIP("stc\n");
21438 break;
21439 default:
21440 vpanic("disInstr(x64)(cmc/clc/stc)");
21442 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
21443 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
21444 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t2) ));
21445 /* Set NDEP even though it isn't used. This makes redundant-PUT
21446 elimination of previous stores to this field work better. */
21447 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
21448 return delta;
21450 case 0xF6: { /* Grp3 Eb */
21451 Bool decode_OK = True;
21452 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21453 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */
21454 delta = dis_Grp3 ( vbi, pfx, 1, delta, &decode_OK );
21455 if (!decode_OK) goto decode_failure;
21456 return delta;
21459 case 0xF7: { /* Grp3 Ev */
21460 Bool decode_OK = True;
21461 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21462 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */
21463 delta = dis_Grp3 ( vbi, pfx, sz, delta, &decode_OK );
21464 if (!decode_OK) goto decode_failure;
21465 return delta;
21468 case 0xFC: /* CLD */
21469 if (haveF2orF3(pfx)) goto decode_failure;
21470 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(1)) );
21471 DIP("cld\n");
21472 return delta;
21474 case 0xFD: /* STD */
21475 if (haveF2orF3(pfx)) goto decode_failure;
21476 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(-1ULL)) );
21477 DIP("std\n");
21478 return delta;
21480 case 0xFE: { /* Grp4 Eb */
21481 Bool decode_OK = True;
21482 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21483 /* We now let dis_Grp4 itself decide if F2 and/or F3 are valid */
21484 delta = dis_Grp4 ( vbi, pfx, delta, &decode_OK );
21485 if (!decode_OK) goto decode_failure;
21486 return delta;
21489 case 0xFF: { /* Grp5 Ev */
21490 Bool decode_OK = True;
21491 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21492 /* We now let dis_Grp5 itself decide if F2 and/or F3 are valid */
21493 delta = dis_Grp5 ( vbi, pfx, sz, delta, dres, &decode_OK );
21494 if (!decode_OK) goto decode_failure;
21495 return delta;
21498 default:
21499 break;
21503 decode_failure:
21504 return deltaIN; /* fail */
21508 /*------------------------------------------------------------*/
21509 /*--- ---*/
21510 /*--- Top-level post-escape decoders: dis_ESC_0F ---*/
21511 /*--- ---*/
21512 /*------------------------------------------------------------*/
21514 static IRTemp math_BSWAP ( IRTemp t1, IRType ty )
21516 IRTemp t2 = newTemp(ty);
21517 if (ty == Ity_I64) {
21518 IRTemp m8 = newTemp(Ity_I64);
21519 IRTemp s8 = newTemp(Ity_I64);
21520 IRTemp m16 = newTemp(Ity_I64);
21521 IRTemp s16 = newTemp(Ity_I64);
21522 IRTemp m32 = newTemp(Ity_I64);
21523 assign( m8, mkU64(0xFF00FF00FF00FF00ULL) );
21524 assign( s8,
21525 binop(Iop_Or64,
21526 binop(Iop_Shr64,
21527 binop(Iop_And64,mkexpr(t1),mkexpr(m8)),
21528 mkU8(8)),
21529 binop(Iop_And64,
21530 binop(Iop_Shl64,mkexpr(t1),mkU8(8)),
21531 mkexpr(m8))
21535 assign( m16, mkU64(0xFFFF0000FFFF0000ULL) );
21536 assign( s16,
21537 binop(Iop_Or64,
21538 binop(Iop_Shr64,
21539 binop(Iop_And64,mkexpr(s8),mkexpr(m16)),
21540 mkU8(16)),
21541 binop(Iop_And64,
21542 binop(Iop_Shl64,mkexpr(s8),mkU8(16)),
21543 mkexpr(m16))
21547 assign( m32, mkU64(0xFFFFFFFF00000000ULL) );
21548 assign( t2,
21549 binop(Iop_Or64,
21550 binop(Iop_Shr64,
21551 binop(Iop_And64,mkexpr(s16),mkexpr(m32)),
21552 mkU8(32)),
21553 binop(Iop_And64,
21554 binop(Iop_Shl64,mkexpr(s16),mkU8(32)),
21555 mkexpr(m32))
21558 return t2;
21560 if (ty == Ity_I32) {
21561 assign( t2,
21562 binop(
21563 Iop_Or32,
21564 binop(Iop_Shl32, mkexpr(t1), mkU8(24)),
21565 binop(
21566 Iop_Or32,
21567 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)),
21568 mkU32(0x00FF0000)),
21569 binop(Iop_Or32,
21570 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)),
21571 mkU32(0x0000FF00)),
21572 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)),
21573 mkU32(0x000000FF) )
21576 return t2;
21578 if (ty == Ity_I16) {
21579 assign(t2,
21580 binop(Iop_Or16,
21581 binop(Iop_Shl16, mkexpr(t1), mkU8(8)),
21582 binop(Iop_Shr16, mkexpr(t1), mkU8(8)) ));
21583 return t2;
21585 vassert(0);
21586 /*NOTREACHED*/
21587 return IRTemp_INVALID;
21591 __attribute__((noinline))
21592 static
21593 Long dis_ESC_0F (
21594 /*MB_OUT*/DisResult* dres,
21595 /*MB_OUT*/Bool* expect_CAS,
21596 const VexArchInfo* archinfo,
21597 const VexAbiInfo* vbi,
21598 Prefix pfx, Int sz, Long deltaIN
21601 Long d64 = 0;
21602 IRTemp addr = IRTemp_INVALID;
21603 IRTemp t1 = IRTemp_INVALID;
21604 IRTemp t2 = IRTemp_INVALID;
21605 UChar modrm = 0;
21606 Int am_sz = 0;
21607 Int alen = 0;
21608 HChar dis_buf[50];
21610 /* In the first switch, look for ordinary integer insns. */
21611 Long delta = deltaIN;
21612 UChar opc = getUChar(delta);
21613 delta++;
21614 switch (opc) { /* first switch */
21616 case 0x01:
21618 modrm = getUChar(delta);
21619 /* 0F 01 /0 -- SGDT */
21620 /* 0F 01 /1 -- SIDT */
21621 if (!epartIsReg(modrm)
21622 && (gregLO3ofRM(modrm) == 0 || gregLO3ofRM(modrm) == 1)) {
21623 /* This is really revolting, but ... since each processor
21624 (core) only has one IDT and one GDT, just let the guest
21625 see it (pass-through semantics). I can't see any way to
21626 construct a faked-up value, so don't bother to try. */
21627 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21628 delta += alen;
21629 switch (gregLO3ofRM(modrm)) {
21630 case 0: DIP("sgdt %s\n", dis_buf); break;
21631 case 1: DIP("sidt %s\n", dis_buf); break;
21632 default: vassert(0); /*NOTREACHED*/
21634 IRDirty* d = unsafeIRDirty_0_N (
21635 0/*regparms*/,
21636 "amd64g_dirtyhelper_SxDT",
21637 &amd64g_dirtyhelper_SxDT,
21638 mkIRExprVec_2( mkexpr(addr),
21639 mkU64(gregLO3ofRM(modrm)) )
21641 /* declare we're writing memory */
21642 d->mFx = Ifx_Write;
21643 d->mAddr = mkexpr(addr);
21644 d->mSize = 6;
21645 stmt( IRStmt_Dirty(d) );
21646 return delta;
21648 /* 0F 01 D0 = XGETBV */
21649 if (modrm == 0xD0 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21650 delta += 1;
21651 DIP("xgetbv\n");
21652 /* Fault (SEGV) if ECX isn't zero. Intel docs say #GP and I
21653 am not sure if that translates in to SEGV or to something
21654 else, in user space. */
21655 t1 = newTemp(Ity_I32);
21656 assign( t1, getIReg32(R_RCX) );
21657 stmt( IRStmt_Exit(binop(Iop_CmpNE32, mkexpr(t1), mkU32(0)),
21658 Ijk_SigSEGV,
21659 IRConst_U64(guest_RIP_curr_instr),
21660 OFFB_RIP
21662 putIRegRAX(4, mkU32(7));
21663 putIRegRDX(4, mkU32(0));
21664 return delta;
21666 /* BEGIN HACKY SUPPORT FOR xend */
21667 /* 0F 01 D5 = XEND */
21668 if (modrm == 0xD5 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21669 /* We are never in an transaction (xbegin immediately aborts).
21670 So this just always generates a General Protection Fault. */
21671 delta += 1;
21672 jmp_lit(dres, Ijk_SigSEGV, guest_RIP_bbstart + delta);
21673 vassert(dres->whatNext == Dis_StopHere);
21674 DIP("xend\n");
21675 return delta;
21677 /* END HACKY SUPPORT FOR xend */
21678 /* BEGIN HACKY SUPPORT FOR xtest */
21679 /* 0F 01 D6 = XTEST */
21680 if (modrm == 0xD6 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21681 /* Sets ZF because there never is a transaction, and all
21682 CF, OF, SF, PF and AF are always cleared by xtest. */
21683 delta += 1;
21684 DIP("xtest\n");
21685 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
21686 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
21687 stmt( IRStmt_Put( OFFB_CC_DEP1, mkU64(AMD64G_CC_MASK_Z) ));
21688 /* Set NDEP even though it isn't used. This makes redundant-PUT
21689 elimination of previous stores to this field work better. */
21690 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
21691 return delta;
21693 /* END HACKY SUPPORT FOR xtest */
21694 /* 0F 01 F9 = RDTSCP */
21695 if (modrm == 0xF9 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDTSCP)) {
21696 delta += 1;
21697 /* Uses dirty helper:
21698 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* )
21699 declared to wr rax, rcx, rdx
21701 const HChar* fName = "amd64g_dirtyhelper_RDTSCP";
21702 void* fAddr = &amd64g_dirtyhelper_RDTSCP;
21703 IRDirty* d
21704 = unsafeIRDirty_0_N ( 0/*regparms*/,
21705 fName, fAddr, mkIRExprVec_1(IRExpr_GSPTR()) );
21706 /* declare guest state effects */
21707 d->nFxState = 3;
21708 vex_bzero(&d->fxState, sizeof(d->fxState));
21709 d->fxState[0].fx = Ifx_Write;
21710 d->fxState[0].offset = OFFB_RAX;
21711 d->fxState[0].size = 8;
21712 d->fxState[1].fx = Ifx_Write;
21713 d->fxState[1].offset = OFFB_RCX;
21714 d->fxState[1].size = 8;
21715 d->fxState[2].fx = Ifx_Write;
21716 d->fxState[2].offset = OFFB_RDX;
21717 d->fxState[2].size = 8;
21718 /* execute the dirty call, side-effecting guest state */
21719 stmt( IRStmt_Dirty(d) );
21720 /* RDTSCP is a serialising insn. So, just in case someone is
21721 using it as a memory fence ... */
21722 stmt( IRStmt_MBE(Imbe_Fence) );
21723 DIP("rdtscp\n");
21724 return delta;
21726 /* else decode failed */
21727 break;
21730 case 0x05: /* SYSCALL */
21731 guest_RIP_next_mustcheck = True;
21732 guest_RIP_next_assumed = guest_RIP_bbstart + delta;
21733 putIReg64( R_RCX, mkU64(guest_RIP_next_assumed) );
21734 /* It's important that all guest state is up-to-date
21735 at this point. So we declare an end-of-block here, which
21736 forces any cached guest state to be flushed. */
21737 jmp_lit(dres, Ijk_Sys_syscall, guest_RIP_next_assumed);
21738 vassert(dres->whatNext == Dis_StopHere);
21739 DIP("syscall\n");
21740 return delta;
21742 case 0x0B: /* UD2 */
21743 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
21744 jmp_lit(dres, Ijk_NoDecode, guest_RIP_curr_instr);
21745 vassert(dres->whatNext == Dis_StopHere);
21746 DIP("ud2\n");
21747 return delta;
21749 case 0x0D: /* 0F 0D /0 -- prefetch mem8 */
21750 /* 0F 0D /1 -- prefetchw mem8 */
21751 if (have66orF2orF3(pfx)) goto decode_failure;
21752 modrm = getUChar(delta);
21753 if (epartIsReg(modrm)) goto decode_failure;
21754 if (gregLO3ofRM(modrm) != 0 && gregLO3ofRM(modrm) != 1)
21755 goto decode_failure;
21756 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21757 delta += alen;
21758 switch (gregLO3ofRM(modrm)) {
21759 case 0: DIP("prefetch %s\n", dis_buf); break;
21760 case 1: DIP("prefetchw %s\n", dis_buf); break;
21761 default: vassert(0); /*NOTREACHED*/
21763 return delta;
21765 case 0x19:
21766 case 0x1C:
21767 case 0x1D:
21768 case 0x1E:
21769 case 0x1F:
21770 // Intel CET instructions can have any prefixes before NOPs
21771 // and can use any ModRM, SIB and disp
21772 modrm = getUChar(delta);
21773 if (epartIsReg(modrm)) {
21774 delta += 1;
21775 DIP("nop%c\n", nameISize(sz));
21776 } else {
21777 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21778 delta += alen;
21779 DIP("nop%c %s\n", nameISize(sz), dis_buf);
21781 return delta;
21783 case 0x31: { /* RDTSC */
21784 IRTemp val = newTemp(Ity_I64);
21785 IRExpr** args = mkIRExprVec_0();
21786 IRDirty* d = unsafeIRDirty_1_N (
21787 val,
21788 0/*regparms*/,
21789 "amd64g_dirtyhelper_RDTSC",
21790 &amd64g_dirtyhelper_RDTSC,
21791 args
21793 if (have66orF2orF3(pfx)) goto decode_failure;
21794 /* execute the dirty call, dumping the result in val. */
21795 stmt( IRStmt_Dirty(d) );
21796 putIRegRDX(4, unop(Iop_64HIto32, mkexpr(val)));
21797 putIRegRAX(4, unop(Iop_64to32, mkexpr(val)));
21798 DIP("rdtsc\n");
21799 return delta;
21802 case 0x40:
21803 case 0x41:
21804 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
21805 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
21806 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
21807 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
21808 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
21809 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
21810 case 0x48: /* CMOVSb (cmov negative) */
21811 case 0x49: /* CMOVSb (cmov not negative) */
21812 case 0x4A: /* CMOVP (cmov parity even) */
21813 case 0x4B: /* CMOVNP (cmov parity odd) */
21814 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
21815 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
21816 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
21817 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
21818 if (haveF2orF3(pfx)) goto decode_failure;
21819 delta = dis_cmov_E_G(vbi, pfx, sz, (AMD64Condcode)(opc - 0x40), delta);
21820 return delta;
21822 case 0x80:
21823 case 0x81:
21824 case 0x82: /* JBb/JNAEb (jump below) */
21825 case 0x83: /* JNBb/JAEb (jump not below) */
21826 case 0x84: /* JZb/JEb (jump zero) */
21827 case 0x85: /* JNZb/JNEb (jump not zero) */
21828 case 0x86: /* JBEb/JNAb (jump below or equal) */
21829 case 0x87: /* JNBEb/JAb (jump not below or equal) */
21830 case 0x88: /* JSb (jump negative) */
21831 case 0x89: /* JSb (jump not negative) */
21832 case 0x8A: /* JP (jump parity even) */
21833 case 0x8B: /* JNP/JPO (jump parity odd) */
21834 case 0x8C: /* JLb/JNGEb (jump less) */
21835 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
21836 case 0x8E: /* JLEb/JNGb (jump less or equal) */
21837 case 0x8F: { /* JGb/JNLEb (jump greater) */
21838 Long jmpDelta;
21839 const HChar* comment = "";
21840 if (haveF3(pfx)) goto decode_failure;
21841 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
21842 jmpDelta = getSDisp32(delta);
21843 d64 = (guest_RIP_bbstart+delta+4) + jmpDelta;
21844 delta += 4;
21845 /* End the block at this point. */
21846 jcc_01( dres, (AMD64Condcode)(opc - 0x80),
21847 guest_RIP_bbstart+delta, d64 );
21848 vassert(dres->whatNext == Dis_StopHere);
21849 DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc - 0x80), (ULong)d64,
21850 comment);
21851 return delta;
21854 case 0x90:
21855 case 0x91:
21856 case 0x92: /* set-Bb/set-NAEb (set if below) */
21857 case 0x93: /* set-NBb/set-AEb (set if not below) */
21858 case 0x94: /* set-Zb/set-Eb (set if zero) */
21859 case 0x95: /* set-NZb/set-NEb (set if not zero) */
21860 case 0x96: /* set-BEb/set-NAb (set if below or equal) */
21861 case 0x97: /* set-NBEb/set-Ab (set if not below or equal) */
21862 case 0x98: /* set-Sb (set if negative) */
21863 case 0x99: /* set-Sb (set if not negative) */
21864 case 0x9A: /* set-P (set if parity even) */
21865 case 0x9B: /* set-NP (set if parity odd) */
21866 case 0x9C: /* set-Lb/set-NGEb (set if less) */
21867 case 0x9D: /* set-GEb/set-NLb (set if greater or equal) */
21868 case 0x9E: /* set-LEb/set-NGb (set if less or equal) */
21869 case 0x9F: /* set-Gb/set-NLEb (set if greater) */
21870 if (haveF2orF3(pfx)) goto decode_failure;
21871 t1 = newTemp(Ity_I8);
21872 assign( t1, unop(Iop_1Uto8,mk_amd64g_calculate_condition(opc-0x90)) );
21873 modrm = getUChar(delta);
21874 if (epartIsReg(modrm)) {
21875 delta++;
21876 putIRegE(1, pfx, modrm, mkexpr(t1));
21877 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90),
21878 nameIRegE(1,pfx,modrm));
21879 } else {
21880 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21881 delta += alen;
21882 storeLE( mkexpr(addr), mkexpr(t1) );
21883 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), dis_buf);
21885 return delta;
21887 case 0x1A:
21888 case 0x1B: { /* Future MPX instructions, currently NOPs.
21889 BNDMK b, m F3 0F 1B
21890 BNDCL b, r/m F3 0F 1A
21891 BNDCU b, r/m F2 0F 1A
21892 BNDCN b, r/m F2 0F 1B
21893 BNDMOV b, b/m 66 0F 1A
21894 BNDMOV b/m, b 66 0F 1B
21895 BNDLDX b, mib 0F 1A
21896 BNDSTX mib, b 0F 1B */
21898 /* All instructions have two operands. One operand is always the
21899 bnd register number (bnd0-bnd3, other register numbers are
21900 ignored when MPX isn't enabled, but should generate an
21901 exception if MPX is enabled) given by gregOfRexRM. The other
21902 operand is either a ModRM:reg, ModRM:r/m or a SIB encoded
21903 address, all of which can be decoded by using either
21904 eregOfRexRM or disAMode. */
21906 modrm = getUChar(delta);
21907 int bnd = gregOfRexRM(pfx,modrm);
21908 const HChar *oper;
21909 if (epartIsReg(modrm)) {
21910 oper = nameIReg64 (eregOfRexRM(pfx,modrm));
21911 delta += 1;
21912 } else {
21913 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21914 delta += alen;
21915 oper = dis_buf;
21918 if (haveF3no66noF2 (pfx)) {
21919 if (opc == 0x1B) {
21920 DIP ("bndmk %s, %%bnd%d\n", oper, bnd);
21921 } else /* opc == 0x1A */ {
21922 DIP ("bndcl %s, %%bnd%d\n", oper, bnd);
21924 } else if (haveF2no66noF3 (pfx)) {
21925 if (opc == 0x1A) {
21926 DIP ("bndcu %s, %%bnd%d\n", oper, bnd);
21927 } else /* opc == 0x1B */ {
21928 DIP ("bndcn %s, %%bnd%d\n", oper, bnd);
21930 } else if (have66noF2noF3 (pfx)) {
21931 if (opc == 0x1A) {
21932 DIP ("bndmov %s, %%bnd%d\n", oper, bnd);
21933 } else /* opc == 0x1B */ {
21934 DIP ("bndmov %%bnd%d, %s\n", bnd, oper);
21936 } else if (haveNo66noF2noF3 (pfx)) {
21937 if (opc == 0x1A) {
21938 DIP ("bndldx %s, %%bnd%d\n", oper, bnd);
21939 } else /* opc == 0x1B */ {
21940 DIP ("bndstx %%bnd%d, %s\n", bnd, oper);
21942 } else goto decode_failure;
21944 return delta;
21947 case 0xA2: { /* CPUID */
21948 /* Uses dirty helper:
21949 void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* )
21950 declared to mod rax, wr rbx, rcx, rdx
21952 IRDirty* d = NULL;
21953 const HChar* fName = NULL;
21954 void* fAddr = NULL;
21956 if (haveF2orF3(pfx)) goto decode_failure;
21958 /* This isn't entirely correct, CPUID should depend on the VEX
21959 capabilities, not on the underlying CPU. See bug #324882. */
21960 if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSSE3) &&
21961 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16) &&
21962 (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX2)) {
21963 fName = "amd64g_dirtyhelper_CPUID_avx2";
21964 fAddr = &amd64g_dirtyhelper_CPUID_avx2;
21965 /* This is a Core-i7-4910-like machine */
21967 else if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSSE3) &&
21968 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16) &&
21969 (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21970 fName = "amd64g_dirtyhelper_CPUID_avx_and_cx16";
21971 fAddr = &amd64g_dirtyhelper_CPUID_avx_and_cx16;
21972 /* This is a Core-i5-2300-like machine */
21974 else if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSSE3) &&
21975 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16)) {
21976 fName = "amd64g_dirtyhelper_CPUID_sse42_and_cx16";
21977 fAddr = &amd64g_dirtyhelper_CPUID_sse42_and_cx16;
21978 /* This is a Core-i5-670-like machine */
21980 else {
21981 /* Give a CPUID for at least a baseline machine, SSE2
21982 only, and no CX16 */
21983 fName = "amd64g_dirtyhelper_CPUID_baseline";
21984 fAddr = &amd64g_dirtyhelper_CPUID_baseline;
21987 vassert(fName); vassert(fAddr);
21988 IRExpr** args = NULL;
21989 if (fAddr == &amd64g_dirtyhelper_CPUID_avx2
21990 || fAddr == &amd64g_dirtyhelper_CPUID_avx_and_cx16) {
21991 Bool hasF16C = (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C) != 0;
21992 Bool hasRDRAND = (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDRAND) != 0;
21993 args = mkIRExprVec_3(IRExpr_GSPTR(),
21994 mkIRExpr_HWord(hasF16C ? 1 : 0),
21995 mkIRExpr_HWord(hasRDRAND ? 1 : 0));
21996 } else {
21997 args = mkIRExprVec_1(IRExpr_GSPTR());
21999 d = unsafeIRDirty_0_N ( 0/*regparms*/, fName, fAddr, args );
22001 /* declare guest state effects */
22002 d->nFxState = 4;
22003 vex_bzero(&d->fxState, sizeof(d->fxState));
22004 d->fxState[0].fx = Ifx_Modify;
22005 d->fxState[0].offset = OFFB_RAX;
22006 d->fxState[0].size = 8;
22007 d->fxState[1].fx = Ifx_Write;
22008 d->fxState[1].offset = OFFB_RBX;
22009 d->fxState[1].size = 8;
22010 d->fxState[2].fx = Ifx_Modify;
22011 d->fxState[2].offset = OFFB_RCX;
22012 d->fxState[2].size = 8;
22013 d->fxState[3].fx = Ifx_Write;
22014 d->fxState[3].offset = OFFB_RDX;
22015 d->fxState[3].size = 8;
22016 /* execute the dirty call, side-effecting guest state */
22017 stmt( IRStmt_Dirty(d) );
22018 /* CPUID is a serialising insn. So, just in case someone is
22019 using it as a memory fence ... */
22020 stmt( IRStmt_MBE(Imbe_Fence) );
22021 DIP("cpuid\n");
22022 return delta;
22025 case 0xA3: { /* BT Gv,Ev */
22026 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22027 Bool ok = True;
22028 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
22029 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpNone, &ok );
22030 if (!ok) goto decode_failure;
22031 return delta;
22034 case 0xA4: /* SHLDv imm8,Gv,Ev */
22035 modrm = getUChar(delta);
22036 d64 = delta + lengthAMode(pfx, delta);
22037 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64));
22038 delta = dis_SHLRD_Gv_Ev (
22039 vbi, pfx, delta, modrm, sz,
22040 mkU8(getUChar(d64)), True, /* literal */
22041 dis_buf, True /* left */ );
22042 return delta;
22044 case 0xA5: /* SHLDv %cl,Gv,Ev */
22045 modrm = getUChar(delta);
22046 delta = dis_SHLRD_Gv_Ev (
22047 vbi, pfx, delta, modrm, sz,
22048 getIRegCL(), False, /* not literal */
22049 "%cl", True /* left */ );
22050 return delta;
22052 case 0xAB: { /* BTS Gv,Ev */
22053 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22054 Bool ok = True;
22055 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
22056 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpSet, &ok );
22057 if (!ok) goto decode_failure;
22058 return delta;
22061 case 0xAC: /* SHRDv imm8,Gv,Ev */
22062 modrm = getUChar(delta);
22063 d64 = delta + lengthAMode(pfx, delta);
22064 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64));
22065 delta = dis_SHLRD_Gv_Ev (
22066 vbi, pfx, delta, modrm, sz,
22067 mkU8(getUChar(d64)), True, /* literal */
22068 dis_buf, False /* right */ );
22069 return delta;
22071 case 0xAD: /* SHRDv %cl,Gv,Ev */
22072 modrm = getUChar(delta);
22073 delta = dis_SHLRD_Gv_Ev (
22074 vbi, pfx, delta, modrm, sz,
22075 getIRegCL(), False, /* not literal */
22076 "%cl", False /* right */);
22077 return delta;
22079 case 0xAF: /* IMUL Ev, Gv */
22080 if (haveF2orF3(pfx)) goto decode_failure;
22081 delta = dis_mul_E_G ( vbi, pfx, sz, delta );
22082 return delta;
22084 case 0xB0: { /* CMPXCHG Gb,Eb */
22085 Bool ok = True;
22086 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */
22087 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, 1, delta );
22088 if (!ok) goto decode_failure;
22089 return delta;
22092 case 0xB1: { /* CMPXCHG Gv,Ev (allowed in 16,32,64 bit) */
22093 Bool ok = True;
22094 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */
22095 if (sz != 2 && sz != 4 && sz != 8) goto decode_failure;
22096 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, sz, delta );
22097 if (!ok) goto decode_failure;
22098 return delta;
22101 case 0xB3: { /* BTR Gv,Ev */
22102 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22103 Bool ok = True;
22104 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
22105 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpReset, &ok );
22106 if (!ok) goto decode_failure;
22107 return delta;
22110 case 0xB6: /* MOVZXb Eb,Gv */
22111 if (haveF2orF3(pfx)) goto decode_failure;
22112 if (sz != 2 && sz != 4 && sz != 8)
22113 goto decode_failure;
22114 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, False );
22115 return delta;
22117 case 0xB7: /* MOVZXw Ew,Gv */
22118 if (haveF2orF3(pfx)) goto decode_failure;
22119 if (sz != 4 && sz != 8)
22120 goto decode_failure;
22121 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, False );
22122 return delta;
22124 case 0xBA: { /* Grp8 Ib,Ev */
22125 /* We let dis_Grp8_Imm decide whether F2 or F3 are allowable. */
22126 Bool decode_OK = False;
22127 modrm = getUChar(delta);
22128 am_sz = lengthAMode(pfx,delta);
22129 d64 = getSDisp8(delta + am_sz);
22130 delta = dis_Grp8_Imm ( vbi, pfx, delta, modrm, am_sz, sz, d64,
22131 &decode_OK );
22132 if (!decode_OK)
22133 goto decode_failure;
22134 return delta;
22137 case 0xBB: { /* BTC Gv,Ev */
22138 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22139 Bool ok = False;
22140 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
22141 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpComp, &ok );
22142 if (!ok) goto decode_failure;
22143 return delta;
22146 case 0xBC: /* BSF Gv,Ev */
22147 if (!haveF2orF3(pfx)
22148 || (haveF3noF2(pfx)
22149 && 0 == (archinfo->hwcaps & VEX_HWCAPS_AMD64_BMI))) {
22150 /* no-F2 no-F3 0F BC = BSF
22151 or F3 0F BC = REP; BSF on older CPUs. */
22152 delta = dis_bs_E_G ( vbi, pfx, sz, delta, True );
22153 return delta;
22155 /* Fall through, since F3 0F BC is TZCNT, and needs to
22156 be handled by dis_ESC_0F__SSE4. */
22157 break;
22159 case 0xBD: /* BSR Gv,Ev */
22160 if (!haveF2orF3(pfx)
22161 || (haveF3noF2(pfx)
22162 && 0 == (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT))) {
22163 /* no-F2 no-F3 0F BD = BSR
22164 or F3 0F BD = REP; BSR on older CPUs. */
22165 delta = dis_bs_E_G ( vbi, pfx, sz, delta, False );
22166 return delta;
22168 /* Fall through, since F3 0F BD is LZCNT, and needs to
22169 be handled by dis_ESC_0F__SSE4. */
22170 break;
22172 case 0xBE: /* MOVSXb Eb,Gv */
22173 if (haveF2orF3(pfx)) goto decode_failure;
22174 if (sz != 2 && sz != 4 && sz != 8)
22175 goto decode_failure;
22176 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, True );
22177 return delta;
22179 case 0xBF: /* MOVSXw Ew,Gv */
22180 if (haveF2orF3(pfx)) goto decode_failure;
22181 if (sz != 4 && sz != 8)
22182 goto decode_failure;
22183 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, True );
22184 return delta;
22186 case 0xC0: { /* XADD Gb,Eb */
22187 Bool decode_OK = False;
22188 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, 1, delta );
22189 if (!decode_OK)
22190 goto decode_failure;
22191 return delta;
22194 case 0xC1: { /* XADD Gv,Ev */
22195 Bool decode_OK = False;
22196 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, sz, delta );
22197 if (!decode_OK)
22198 goto decode_failure;
22199 return delta;
22202 case 0xC7: {
22203 modrm = getUChar(delta);
22205 // Detecting valid CMPXCHG combinations is pretty complex.
22206 Bool isValidCMPXCHG = gregLO3ofRM(modrm) == 1;
22207 if (isValidCMPXCHG) {
22208 if (have66(pfx)) isValidCMPXCHG = False;
22209 if (sz != 4 && sz != 8) isValidCMPXCHG = False;
22210 if (sz == 8 && !(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16))
22211 isValidCMPXCHG = False;
22212 if (epartIsReg(modrm)) isValidCMPXCHG = False;
22213 if (haveF2orF3(pfx)) {
22214 /* Since the e-part is memory only, F2 or F3 (one or the
22215 other) is acceptable if LOCK is also present. But only
22216 for cmpxchg8b. */
22217 if (sz == 8) isValidCMPXCHG = False;
22218 if (haveF2andF3(pfx) || !haveLOCK(pfx)) isValidCMPXCHG = False;
22222 /* 0F C7 /1 (with qualifications) = CMPXCHG */
22223 if (isValidCMPXCHG) {
22224 // Note that we've already read the modrm byte by this point, but we
22225 // haven't moved delta past it.
22226 IRType elemTy = sz==4 ? Ity_I32 : Ity_I64;
22227 IRTemp expdHi = newTemp(elemTy);
22228 IRTemp expdLo = newTemp(elemTy);
22229 IRTemp dataHi = newTemp(elemTy);
22230 IRTemp dataLo = newTemp(elemTy);
22231 IRTemp oldHi = newTemp(elemTy);
22232 IRTemp oldLo = newTemp(elemTy);
22233 IRTemp flags_old = newTemp(Ity_I64);
22234 IRTemp flags_new = newTemp(Ity_I64);
22235 IRTemp success = newTemp(Ity_I1);
22236 IROp opOR = sz==4 ? Iop_Or32 : Iop_Or64;
22237 IROp opXOR = sz==4 ? Iop_Xor32 : Iop_Xor64;
22238 IROp opCasCmpEQ = sz==4 ? Iop_CasCmpEQ32 : Iop_CasCmpEQ64;
22239 IRExpr* zero = sz==4 ? mkU32(0) : mkU64(0);
22240 IRTemp expdHi64 = newTemp(Ity_I64);
22241 IRTemp expdLo64 = newTemp(Ity_I64);
22243 /* Translate this using a DCAS, even if there is no LOCK
22244 prefix. Life is too short to bother with generating two
22245 different translations for the with/without-LOCK-prefix
22246 cases. */
22247 *expect_CAS = True;
22249 /* Generate address */
22250 vassert(!epartIsReg(modrm));
22251 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22252 delta += alen;
22254 /* cmpxchg16b requires an alignment check. */
22255 if (sz == 8)
22256 gen_SEGV_if_not_16_aligned( addr );
22258 /* Get the expected and new values. */
22259 assign( expdHi64, getIReg64(R_RDX) );
22260 assign( expdLo64, getIReg64(R_RAX) );
22262 /* These are the correctly-sized expected and new values.
22263 However, we also get expdHi64/expdLo64 above as 64-bits
22264 regardless, because we will need them later in the 32-bit
22265 case (paradoxically). */
22266 assign( expdHi, sz==4 ? unop(Iop_64to32, mkexpr(expdHi64))
22267 : mkexpr(expdHi64) );
22268 assign( expdLo, sz==4 ? unop(Iop_64to32, mkexpr(expdLo64))
22269 : mkexpr(expdLo64) );
22270 assign( dataHi, sz==4 ? getIReg32(R_RCX) : getIReg64(R_RCX) );
22271 assign( dataLo, sz==4 ? getIReg32(R_RBX) : getIReg64(R_RBX) );
22273 /* Do the DCAS */
22274 stmt( IRStmt_CAS(
22275 mkIRCAS( oldHi, oldLo,
22276 Iend_LE, mkexpr(addr),
22277 mkexpr(expdHi), mkexpr(expdLo),
22278 mkexpr(dataHi), mkexpr(dataLo)
22279 )));
22281 /* success when oldHi:oldLo == expdHi:expdLo */
22282 assign( success,
22283 binop(opCasCmpEQ,
22284 binop(opOR,
22285 binop(opXOR, mkexpr(oldHi), mkexpr(expdHi)),
22286 binop(opXOR, mkexpr(oldLo), mkexpr(expdLo))
22288 zero
22291 /* If the DCAS is successful, that is to say oldHi:oldLo ==
22292 expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX,
22293 which is where they came from originally. Both the actual
22294 contents of these two regs, and any shadow values, are
22295 unchanged. If the DCAS fails then we're putting into
22296 RDX:RAX the value seen in memory. */
22297 /* Now of course there's a complication in the 32-bit case
22298 (bah!): if the DCAS succeeds, we need to leave RDX:RAX
22299 unchanged; but if we use the same scheme as in the 64-bit
22300 case, we get hit by the standard rule that a write to the
22301 bottom 32 bits of an integer register zeros the upper 32
22302 bits. And so the upper halves of RDX and RAX mysteriously
22303 become zero. So we have to stuff back in the original
22304 64-bit values which we previously stashed in
22305 expdHi64:expdLo64, even if we're doing a cmpxchg8b. */
22306 /* It's just _so_ much fun ... */
22307 putIRegRDX( 8,
22308 IRExpr_ITE( mkexpr(success),
22309 mkexpr(expdHi64),
22310 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldHi))
22311 : mkexpr(oldHi)
22313 putIRegRAX( 8,
22314 IRExpr_ITE( mkexpr(success),
22315 mkexpr(expdLo64),
22316 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldLo))
22317 : mkexpr(oldLo)
22320 /* Copy the success bit into the Z flag and leave the others
22321 unchanged */
22322 assign( flags_old, widenUto64(mk_amd64g_calculate_rflags_all()));
22323 assign(
22324 flags_new,
22325 binop(Iop_Or64,
22326 binop(Iop_And64, mkexpr(flags_old),
22327 mkU64(~AMD64G_CC_MASK_Z)),
22328 binop(Iop_Shl64,
22329 binop(Iop_And64,
22330 unop(Iop_1Uto64, mkexpr(success)), mkU64(1)),
22331 mkU8(AMD64G_CC_SHIFT_Z)) ));
22333 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
22334 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) ));
22335 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
22336 /* Set NDEP even though it isn't used. This makes
22337 redundant-PUT elimination of previous stores to this field
22338 work better. */
22339 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
22341 /* Sheesh. Aren't you glad it was me and not you that had to
22342 write and validate all this grunge? */
22344 DIP("cmpxchg8b %s\n", dis_buf);
22345 return delta;
22346 } // if (isValidCMPXCHG)
22348 /* 0F C7 /6 no-F2-or-F3 = RDRAND */
22349 if (gregLO3ofRM(modrm) == 6/*RDRAND*/
22350 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDRAND)
22351 && epartIsReg(modrm) && haveNoF2noF3(pfx)
22352 && (sz == 8 || sz == 4 || sz == 2)) {
22353 delta++; // move past modrm
22354 IRType ty = szToITy(sz);
22356 // Pull a first 32 bits of randomness, plus C flag, out of the host.
22357 IRTemp pairLO = newTemp(Ity_I64);
22358 IRDirty* dLO
22359 = unsafeIRDirty_1_N(pairLO, 0/*regparms*/,
22360 "amd64g_dirtyhelper_RDRAND",
22361 &amd64g_dirtyhelper_RDRAND, mkIRExprVec_0());
22362 // There are no guest state or memory effects to declare for |dLO|.
22363 stmt( IRStmt_Dirty(dLO) );
22365 IRTemp randsLO = newTemp(Ity_I32);
22366 assign(randsLO, unop(Iop_64to32, mkexpr(pairLO)));
22367 IRTemp cLO = newTemp(Ity_I64);
22368 assign(cLO, binop(Iop_Shr64, mkexpr(pairLO), mkU8(32)));
22370 // We'll assemble the final pairing in (cFinal, randsNearlyFinal).
22371 IRTemp randsNearlyFinal = newTemp(Ity_I64);
22372 IRTemp cFinal = newTemp(Ity_I64);
22374 if (ty == Ity_I64) {
22375 // Pull another 32 bits of randomness out of the host.
22376 IRTemp pairHI = newTemp(Ity_I64);
22377 IRDirty* dHI
22378 = unsafeIRDirty_1_N(pairHI, 0/*regparms*/,
22379 "amd64g_dirtyhelper_RDRAND",
22380 &amd64g_dirtyhelper_RDRAND, mkIRExprVec_0());
22381 // There are no guest state or memory effects to declare for |dHI|.
22382 stmt( IRStmt_Dirty(dHI) );
22384 IRTemp randsHI = newTemp(Ity_I32);
22385 assign(randsHI, unop(Iop_64to32, mkexpr(pairHI)));
22386 IRTemp cHI = newTemp(Ity_I64);
22387 assign(cHI, binop(Iop_Shr64, mkexpr(pairHI), mkU8(32)));
22388 assign(randsNearlyFinal, binop(Iop_32HLto64,
22389 mkexpr(randsHI), mkexpr(randsLO)));
22390 assign(cFinal, binop(Iop_And64,
22391 binop(Iop_And64, mkexpr(cHI), mkexpr(cLO)),
22392 mkU64(1)));
22393 } else {
22394 assign(randsNearlyFinal, unop(Iop_32Uto64, mkexpr(randsLO)));
22395 assign(cFinal, binop(Iop_And64, mkexpr(cLO), mkU64(1)));
22398 /* Now cFinal[0] is the final success/failure flag (cFinal[0] == 1
22399 means success). But there's another twist. If we failed then the
22400 returned value must be forced to zero. Otherwise we could have the
22401 situation, when sz==8, where one of the host calls failed but the
22402 other didn't. This would give cFinal[0] == 0 (correctly) but
22403 randsNearlyFinal not being zero, because it contains the 32 bit
22404 result of the non-failing call. */
22405 IRTemp randsFinal = newTemp(Ity_I64);
22406 assign(randsFinal,
22407 binop(Iop_And64,
22408 mkexpr(randsNearlyFinal),
22409 binop(Iop_Sar64,
22410 binop(Iop_Shl64, mkexpr(cFinal), mkU8(63)),
22411 mkU8(63))
22414 // So, finally, update the guest state.
22415 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(randsFinal)));
22417 // Set C=<success indication>, O,S,Z,A,P = 0. cFinal has already been
22418 // masked so only the lowest bit remains.
22419 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
22420 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(cFinal) ));
22421 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
22422 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
22424 DIP("rdrand %s", nameIRegE(sz, pfx, modrm));
22425 return delta;
22428 goto decode_failure;
22431 case 0xC8: /* BSWAP %eax */
22432 case 0xC9:
22433 case 0xCA:
22434 case 0xCB:
22435 case 0xCC:
22436 case 0xCD:
22437 case 0xCE:
22438 case 0xCF: /* BSWAP %edi */
22439 if (haveF2orF3(pfx)) goto decode_failure;
22440 /* According to the AMD64 docs, this insn can have size 4 or
22441 8. */
22442 if (sz == 4) {
22443 t1 = newTemp(Ity_I32);
22444 assign( t1, getIRegRexB(4, pfx, opc-0xC8) );
22445 t2 = math_BSWAP( t1, Ity_I32 );
22446 putIRegRexB(4, pfx, opc-0xC8, mkexpr(t2));
22447 DIP("bswapl %s\n", nameIRegRexB(4, pfx, opc-0xC8));
22448 return delta;
22450 if (sz == 8) {
22451 t1 = newTemp(Ity_I64);
22452 t2 = newTemp(Ity_I64);
22453 assign( t1, getIRegRexB(8, pfx, opc-0xC8) );
22454 t2 = math_BSWAP( t1, Ity_I64 );
22455 putIRegRexB(8, pfx, opc-0xC8, mkexpr(t2));
22456 DIP("bswapq %s\n", nameIRegRexB(8, pfx, opc-0xC8));
22457 return delta;
22459 goto decode_failure;
22461 default:
22462 break;
22464 } /* first switch */
22467 /* =-=-=-=-=-=-=-=-= MMXery =-=-=-=-=-=-=-=-= */
22468 /* In the second switch, pick off MMX insns. */
22470 if (!have66orF2orF3(pfx)) {
22471 /* So there's no SIMD prefix. */
22473 vassert(sz == 4 || sz == 8);
22475 switch (opc) { /* second switch */
22477 case 0x71:
22478 case 0x72:
22479 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
22481 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
22482 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
22483 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
22484 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
22486 case 0xFC:
22487 case 0xFD:
22488 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
22490 case 0xEC:
22491 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
22493 case 0xDC:
22494 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
22496 case 0xF8:
22497 case 0xF9:
22498 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
22500 case 0xE8:
22501 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
22503 case 0xD8:
22504 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
22506 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
22507 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
22509 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
22511 case 0x74:
22512 case 0x75:
22513 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
22515 case 0x64:
22516 case 0x65:
22517 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
22519 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
22520 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
22521 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
22523 case 0x68:
22524 case 0x69:
22525 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
22527 case 0x60:
22528 case 0x61:
22529 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
22531 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
22532 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
22533 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
22534 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
22536 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
22537 case 0xF2:
22538 case 0xF3:
22540 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
22541 case 0xD2:
22542 case 0xD3:
22544 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
22545 case 0xE2: {
22546 Bool decode_OK = False;
22547 delta = dis_MMX ( &decode_OK, vbi, pfx, sz, deltaIN );
22548 if (decode_OK)
22549 return delta;
22550 goto decode_failure;
22553 default:
22554 break;
22555 } /* second switch */
22559 /* A couple of MMX corner cases */
22560 if (opc == 0x0E/* FEMMS */ || opc == 0x77/* EMMS */) {
22561 if (sz != 4)
22562 goto decode_failure;
22563 do_EMMS_preamble();
22564 DIP("{f}emms\n");
22565 return delta;
22568 /* =-=-=-=-=-=-=-=-= SSE2ery =-=-=-=-=-=-=-=-= */
22569 /* Perhaps it's an SSE or SSE2 instruction. We can try this
22570 without checking the guest hwcaps because SSE2 is a baseline
22571 facility in 64 bit mode. */
22573 Bool decode_OK = False;
22574 delta = dis_ESC_0F__SSE2 ( &decode_OK,
22575 archinfo, vbi, pfx, sz, deltaIN, dres );
22576 if (decode_OK)
22577 return delta;
22580 /* =-=-=-=-=-=-=-=-= SSE3ery =-=-=-=-=-=-=-=-= */
22581 /* Perhaps it's a SSE3 instruction. FIXME: check guest hwcaps
22582 first. */
22584 Bool decode_OK = False;
22585 delta = dis_ESC_0F__SSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
22586 if (decode_OK)
22587 return delta;
22590 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22591 /* Perhaps it's a SSE4 instruction. FIXME: check guest hwcaps
22592 first. */
22594 Bool decode_OK = False;
22595 delta = dis_ESC_0F__SSE4 ( &decode_OK,
22596 archinfo, vbi, pfx, sz, deltaIN );
22597 if (decode_OK)
22598 return delta;
22601 decode_failure:
22602 return deltaIN; /* fail */
22606 /*------------------------------------------------------------*/
22607 /*--- ---*/
22608 /*--- Top-level post-escape decoders: dis_ESC_0F38 ---*/
22609 /*--- ---*/
22610 /*------------------------------------------------------------*/
22612 __attribute__((noinline))
22613 static
22614 Long dis_ESC_0F38 (
22615 /*MB_OUT*/DisResult* dres,
22616 const VexArchInfo* archinfo,
22617 const VexAbiInfo* vbi,
22618 Prefix pfx, Int sz, Long deltaIN
22621 Long delta = deltaIN;
22622 UChar opc = getUChar(delta);
22623 delta++;
22624 switch (opc) {
22626 case 0xF0: /* 0F 38 F0 = MOVBE m16/32/64(E), r16/32/64(G) */
22627 case 0xF1: { /* 0F 38 F1 = MOVBE r16/32/64(G), m16/32/64(E) */
22628 if (!haveF2orF3(pfx) && !haveVEX(pfx)
22629 && (sz == 2 || sz == 4 || sz == 8)) {
22630 IRTemp addr = IRTemp_INVALID;
22631 UChar modrm = 0;
22632 Int alen = 0;
22633 HChar dis_buf[50];
22634 modrm = getUChar(delta);
22635 if (epartIsReg(modrm)) break;
22636 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22637 delta += alen;
22638 IRType ty = szToITy(sz);
22639 IRTemp src = newTemp(ty);
22640 if (opc == 0xF0) { /* LOAD */
22641 assign(src, loadLE(ty, mkexpr(addr)));
22642 IRTemp dst = math_BSWAP(src, ty);
22643 putIRegG(sz, pfx, modrm, mkexpr(dst));
22644 DIP("movbe %s,%s\n", dis_buf, nameIRegG(sz, pfx, modrm));
22645 } else { /* STORE */
22646 assign(src, getIRegG(sz, pfx, modrm));
22647 IRTemp dst = math_BSWAP(src, ty);
22648 storeLE(mkexpr(addr), mkexpr(dst));
22649 DIP("movbe %s,%s\n", nameIRegG(sz, pfx, modrm), dis_buf);
22651 return delta;
22653 /* else fall through; maybe one of the decoders below knows what
22654 it is. */
22655 break;
22658 default:
22659 break;
22662 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
22663 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
22664 rather than proceeding indiscriminately. */
22666 Bool decode_OK = False;
22667 delta = dis_ESC_0F38__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
22668 if (decode_OK)
22669 return delta;
22672 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22673 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
22674 rather than proceeding indiscriminately. */
22676 Bool decode_OK = False;
22677 delta = dis_ESC_0F38__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN );
22678 if (decode_OK)
22679 return delta;
22682 /* Ignore previous decode attempts and restart from the beginning of
22683 the instruction. */
22684 delta = deltaIN;
22685 opc = getUChar(delta);
22686 delta++;
22688 switch (opc) {
22690 case 0xF6: {
22691 /* 66 0F 38 F6 = ADCX r32/64(G), m32/64(E) */
22692 /* F3 0F 38 F6 = ADOX r32/64(G), m32/64(E) */
22693 /* These were introduced in Broadwell. Gate them on AVX so as to at
22694 least reject them on earlier guests. Has no host requirements. */
22695 if (have66noF2noF3(pfx) && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
22696 if (sz == 2) {
22697 sz = 4; /* 66 prefix but operand size is 4/8 */
22699 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagCarryX, True,
22700 sz, delta, "adcx" );
22701 return delta;
22703 if (haveF3no66noF2(pfx) && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
22704 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagOverX, True,
22705 sz, delta, "adox" );
22706 return delta;
22708 /* else fall through */
22709 break;
22712 default:
22713 break;
22716 /*decode_failure:*/
22717 return deltaIN; /* fail */
22721 /*------------------------------------------------------------*/
22722 /*--- ---*/
22723 /*--- Top-level post-escape decoders: dis_ESC_0F3A ---*/
22724 /*--- ---*/
22725 /*------------------------------------------------------------*/
22727 __attribute__((noinline))
22728 static
22729 Long dis_ESC_0F3A (
22730 /*MB_OUT*/DisResult* dres,
22731 const VexArchInfo* archinfo,
22732 const VexAbiInfo* vbi,
22733 Prefix pfx, Int sz, Long deltaIN
22736 Long delta = deltaIN;
22737 UChar opc = getUChar(delta);
22738 delta++;
22739 switch (opc) {
22741 default:
22742 break;
22746 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
22747 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
22748 rather than proceeding indiscriminately. */
22750 Bool decode_OK = False;
22751 delta = dis_ESC_0F3A__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
22752 if (decode_OK)
22753 return delta;
22756 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22757 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
22758 rather than proceeding indiscriminately. */
22760 Bool decode_OK = False;
22761 delta = dis_ESC_0F3A__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN );
22762 if (decode_OK)
22763 return delta;
22766 return deltaIN; /* fail */
22770 /*------------------------------------------------------------*/
22771 /*--- ---*/
22772 /*--- Top-level post-escape decoders: dis_ESC_0F__VEX ---*/
22773 /*--- ---*/
22774 /*------------------------------------------------------------*/
22776 /* FIXME: common up with the _256_ version below? */
22777 static
22778 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG (
22779 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
22780 Prefix pfx, Long delta, const HChar* name,
22781 /* The actual operation. Use either 'op' or 'opfn',
22782 but not both. */
22783 IROp op, IRTemp(*opFn)(IRTemp,IRTemp),
22784 Bool invertLeftArg,
22785 Bool swapArgs
22788 UChar modrm = getUChar(delta);
22789 UInt rD = gregOfRexRM(pfx, modrm);
22790 UInt rSL = getVexNvvvv(pfx);
22791 IRTemp tSL = newTemp(Ity_V128);
22792 IRTemp tSR = newTemp(Ity_V128);
22793 IRTemp addr = IRTemp_INVALID;
22794 HChar dis_buf[50];
22795 Int alen = 0;
22796 vassert(0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*WIG?*/);
22798 assign(tSL, invertLeftArg ? unop(Iop_NotV128, getXMMReg(rSL))
22799 : getXMMReg(rSL));
22801 if (epartIsReg(modrm)) {
22802 UInt rSR = eregOfRexRM(pfx, modrm);
22803 delta += 1;
22804 assign(tSR, getXMMReg(rSR));
22805 DIP("%s %s,%s,%s\n",
22806 name, nameXMMReg(rSR), nameXMMReg(rSL), nameXMMReg(rD));
22807 } else {
22808 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
22809 delta += alen;
22810 assign(tSR, loadLE(Ity_V128, mkexpr(addr)));
22811 DIP("%s %s,%s,%s\n",
22812 name, dis_buf, nameXMMReg(rSL), nameXMMReg(rD));
22815 IRTemp res = IRTemp_INVALID;
22816 if (op != Iop_INVALID) {
22817 vassert(opFn == NULL);
22818 res = newTemp(Ity_V128);
22819 if (requiresRMode(op)) {
22820 IRTemp rm = newTemp(Ity_I32);
22821 assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
22822 assign(res, swapArgs
22823 ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL))
22824 : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR)));
22825 } else {
22826 assign(res, swapArgs
22827 ? binop(op, mkexpr(tSR), mkexpr(tSL))
22828 : binop(op, mkexpr(tSL), mkexpr(tSR)));
22830 } else {
22831 vassert(opFn != NULL);
22832 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR);
22835 putYMMRegLoAndZU(rD, mkexpr(res));
22837 *uses_vvvv = True;
22838 return delta;
22842 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, with a simple IROp
22843 for the operation, no inversion of the left arg, and no swapping of
22844 args. */
22845 static
22846 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple (
22847 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
22848 Prefix pfx, Long delta, const HChar* name,
22849 IROp op
22852 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22853 uses_vvvv, vbi, pfx, delta, name, op, NULL, False, False);
22857 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, using the given IR
22858 generator to compute the result, no inversion of the left
22859 arg, and no swapping of args. */
22860 static
22861 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex (
22862 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
22863 Prefix pfx, Long delta, const HChar* name,
22864 IRTemp(*opFn)(IRTemp,IRTemp)
22867 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22868 uses_vvvv, vbi, pfx, delta, name,
22869 Iop_INVALID, opFn, False, False );
22873 /* Vector by scalar shift of V by the amount specified at the bottom
22874 of E. */
22875 static ULong dis_AVX128_shiftV_byE ( const VexAbiInfo* vbi,
22876 Prefix pfx, Long delta,
22877 const HChar* opname, IROp op )
22879 HChar dis_buf[50];
22880 Int alen, size;
22881 IRTemp addr;
22882 Bool shl, shr, sar;
22883 UChar modrm = getUChar(delta);
22884 UInt rG = gregOfRexRM(pfx,modrm);
22885 UInt rV = getVexNvvvv(pfx);;
22886 IRTemp g0 = newTemp(Ity_V128);
22887 IRTemp g1 = newTemp(Ity_V128);
22888 IRTemp amt = newTemp(Ity_I64);
22889 IRTemp amt8 = newTemp(Ity_I8);
22890 if (epartIsReg(modrm)) {
22891 UInt rE = eregOfRexRM(pfx,modrm);
22892 assign( amt, getXMMRegLane64(rE, 0) );
22893 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE),
22894 nameXMMReg(rV), nameXMMReg(rG) );
22895 delta++;
22896 } else {
22897 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22898 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
22899 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
22900 delta += alen;
22902 assign( g0, getXMMReg(rV) );
22903 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
22905 shl = shr = sar = False;
22906 size = 0;
22907 switch (op) {
22908 case Iop_ShlN16x8: shl = True; size = 32; break;
22909 case Iop_ShlN32x4: shl = True; size = 32; break;
22910 case Iop_ShlN64x2: shl = True; size = 64; break;
22911 case Iop_SarN16x8: sar = True; size = 16; break;
22912 case Iop_SarN32x4: sar = True; size = 32; break;
22913 case Iop_ShrN16x8: shr = True; size = 16; break;
22914 case Iop_ShrN32x4: shr = True; size = 32; break;
22915 case Iop_ShrN64x2: shr = True; size = 64; break;
22916 default: vassert(0);
22919 if (shl || shr) {
22920 assign(
22922 IRExpr_ITE(
22923 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
22924 binop(op, mkexpr(g0), mkexpr(amt8)),
22925 mkV128(0x0000)
22928 } else
22929 if (sar) {
22930 assign(
22932 IRExpr_ITE(
22933 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
22934 binop(op, mkexpr(g0), mkexpr(amt8)),
22935 binop(op, mkexpr(g0), mkU8(size-1))
22938 } else {
22939 vassert(0);
22942 putYMMRegLoAndZU( rG, mkexpr(g1) );
22943 return delta;
22947 /* Vector by scalar shift of V by the amount specified at the bottom
22948 of E. */
22949 static ULong dis_AVX256_shiftV_byE ( const VexAbiInfo* vbi,
22950 Prefix pfx, Long delta,
22951 const HChar* opname, IROp op )
22953 HChar dis_buf[50];
22954 Int alen, size;
22955 IRTemp addr;
22956 Bool shl, shr, sar;
22957 UChar modrm = getUChar(delta);
22958 UInt rG = gregOfRexRM(pfx,modrm);
22959 UInt rV = getVexNvvvv(pfx);;
22960 IRTemp g0 = newTemp(Ity_V256);
22961 IRTemp g1 = newTemp(Ity_V256);
22962 IRTemp amt = newTemp(Ity_I64);
22963 IRTemp amt8 = newTemp(Ity_I8);
22964 if (epartIsReg(modrm)) {
22965 UInt rE = eregOfRexRM(pfx,modrm);
22966 assign( amt, getXMMRegLane64(rE, 0) );
22967 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE),
22968 nameYMMReg(rV), nameYMMReg(rG) );
22969 delta++;
22970 } else {
22971 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22972 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
22973 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
22974 delta += alen;
22976 assign( g0, getYMMReg(rV) );
22977 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
22979 shl = shr = sar = False;
22980 size = 0;
22981 switch (op) {
22982 case Iop_ShlN16x16: shl = True; size = 32; break;
22983 case Iop_ShlN32x8: shl = True; size = 32; break;
22984 case Iop_ShlN64x4: shl = True; size = 64; break;
22985 case Iop_SarN16x16: sar = True; size = 16; break;
22986 case Iop_SarN32x8: sar = True; size = 32; break;
22987 case Iop_ShrN16x16: shr = True; size = 16; break;
22988 case Iop_ShrN32x8: shr = True; size = 32; break;
22989 case Iop_ShrN64x4: shr = True; size = 64; break;
22990 default: vassert(0);
22993 if (shl || shr) {
22994 assign(
22996 IRExpr_ITE(
22997 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
22998 binop(op, mkexpr(g0), mkexpr(amt8)),
22999 binop(Iop_V128HLtoV256, mkV128(0), mkV128(0))
23002 } else
23003 if (sar) {
23004 assign(
23006 IRExpr_ITE(
23007 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
23008 binop(op, mkexpr(g0), mkexpr(amt8)),
23009 binop(op, mkexpr(g0), mkU8(size-1))
23012 } else {
23013 vassert(0);
23016 putYMMReg( rG, mkexpr(g1) );
23017 return delta;
23021 /* Vector by vector shift of V by the amount specified at the bottom
23022 of E. Vector by vector shifts are defined for all shift amounts,
23023 so not using Iop_S*x* here (and SSE2 doesn't support variable shifts
23024 anyway). */
23025 static ULong dis_AVX_var_shiftV_byE ( const VexAbiInfo* vbi,
23026 Prefix pfx, Long delta,
23027 const HChar* opname, IROp op, Bool isYMM )
23029 HChar dis_buf[50];
23030 Int alen, size, i;
23031 IRTemp addr;
23032 UChar modrm = getUChar(delta);
23033 UInt rG = gregOfRexRM(pfx,modrm);
23034 UInt rV = getVexNvvvv(pfx);;
23035 IRTemp sV = isYMM ? newTemp(Ity_V256) : newTemp(Ity_V128);
23036 IRTemp amt = isYMM ? newTemp(Ity_V256) : newTemp(Ity_V128);
23037 IRTemp amts[8], sVs[8], res[8];
23038 if (epartIsReg(modrm)) {
23039 UInt rE = eregOfRexRM(pfx,modrm);
23040 assign( amt, isYMM ? getYMMReg(rE) : getXMMReg(rE) );
23041 if (isYMM) {
23042 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rE),
23043 nameYMMReg(rV), nameYMMReg(rG) );
23044 } else {
23045 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE),
23046 nameXMMReg(rV), nameXMMReg(rG) );
23048 delta++;
23049 } else {
23050 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23051 assign( amt, loadLE(isYMM ? Ity_V256 : Ity_V128, mkexpr(addr)) );
23052 if (isYMM) {
23053 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV),
23054 nameYMMReg(rG) );
23055 } else {
23056 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV),
23057 nameXMMReg(rG) );
23059 delta += alen;
23061 assign( sV, isYMM ? getYMMReg(rV) : getXMMReg(rV) );
23063 size = 0;
23064 switch (op) {
23065 case Iop_Shl32: size = 32; break;
23066 case Iop_Shl64: size = 64; break;
23067 case Iop_Sar32: size = 32; break;
23068 case Iop_Shr32: size = 32; break;
23069 case Iop_Shr64: size = 64; break;
23070 default: vassert(0);
23073 for (i = 0; i < 8; i++) {
23074 sVs[i] = IRTemp_INVALID;
23075 amts[i] = IRTemp_INVALID;
23077 switch (size) {
23078 case 32:
23079 if (isYMM) {
23080 breakupV256to32s( sV, &sVs[7], &sVs[6], &sVs[5], &sVs[4],
23081 &sVs[3], &sVs[2], &sVs[1], &sVs[0] );
23082 breakupV256to32s( amt, &amts[7], &amts[6], &amts[5], &amts[4],
23083 &amts[3], &amts[2], &amts[1], &amts[0] );
23084 } else {
23085 breakupV128to32s( sV, &sVs[3], &sVs[2], &sVs[1], &sVs[0] );
23086 breakupV128to32s( amt, &amts[3], &amts[2], &amts[1], &amts[0] );
23088 break;
23089 case 64:
23090 if (isYMM) {
23091 breakupV256to64s( sV, &sVs[3], &sVs[2], &sVs[1], &sVs[0] );
23092 breakupV256to64s( amt, &amts[3], &amts[2], &amts[1], &amts[0] );
23093 } else {
23094 breakupV128to64s( sV, &sVs[1], &sVs[0] );
23095 breakupV128to64s( amt, &amts[1], &amts[0] );
23097 break;
23098 default: vassert(0);
23100 for (i = 0; i < 8; i++)
23101 if (sVs[i] != IRTemp_INVALID) {
23102 res[i] = size == 32 ? newTemp(Ity_I32) : newTemp(Ity_I64);
23103 assign( res[i],
23104 IRExpr_ITE(
23105 binop(size == 32 ? Iop_CmpLT32U : Iop_CmpLT64U,
23106 mkexpr(amts[i]),
23107 size == 32 ? mkU32(size) : mkU64(size)),
23108 binop(op, mkexpr(sVs[i]),
23109 unop(size == 32 ? Iop_32to8 : Iop_64to8,
23110 mkexpr(amts[i]))),
23111 op == Iop_Sar32 ? binop(op, mkexpr(sVs[i]), mkU8(size-1))
23112 : size == 32 ? mkU32(0) : mkU64(0)
23115 switch (size) {
23116 case 32:
23117 for (i = 0; i < 8; i++)
23118 putYMMRegLane32( rG, i, (i < 4 || isYMM)
23119 ? mkexpr(res[i]) : mkU32(0) );
23120 break;
23121 case 64:
23122 for (i = 0; i < 4; i++)
23123 putYMMRegLane64( rG, i, (i < 2 || isYMM)
23124 ? mkexpr(res[i]) : mkU64(0) );
23125 break;
23126 default: vassert(0);
23129 return delta;
23133 /* Vector by scalar shift of E into V, by an immediate byte. Modified
23134 version of dis_SSE_shiftE_imm. */
23135 static
23136 Long dis_AVX128_shiftE_to_V_imm( Prefix pfx,
23137 Long delta, const HChar* opname, IROp op )
23139 Bool shl, shr, sar;
23140 UChar rm = getUChar(delta);
23141 IRTemp e0 = newTemp(Ity_V128);
23142 IRTemp e1 = newTemp(Ity_V128);
23143 UInt rD = getVexNvvvv(pfx);
23144 UChar amt, size;
23145 vassert(epartIsReg(rm));
23146 vassert(gregLO3ofRM(rm) == 2
23147 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
23148 amt = getUChar(delta+1);
23149 delta += 2;
23150 DIP("%s $%d,%s,%s\n", opname,
23151 (Int)amt,
23152 nameXMMReg(eregOfRexRM(pfx,rm)),
23153 nameXMMReg(rD));
23154 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) );
23156 shl = shr = sar = False;
23157 size = 0;
23158 switch (op) {
23159 case Iop_ShlN16x8: shl = True; size = 16; break;
23160 case Iop_ShlN32x4: shl = True; size = 32; break;
23161 case Iop_ShlN64x2: shl = True; size = 64; break;
23162 case Iop_SarN16x8: sar = True; size = 16; break;
23163 case Iop_SarN32x4: sar = True; size = 32; break;
23164 case Iop_ShrN16x8: shr = True; size = 16; break;
23165 case Iop_ShrN32x4: shr = True; size = 32; break;
23166 case Iop_ShrN64x2: shr = True; size = 64; break;
23167 default: vassert(0);
23170 if (shl || shr) {
23171 assign( e1, amt >= size
23172 ? mkV128(0x0000)
23173 : binop(op, mkexpr(e0), mkU8(amt))
23175 } else
23176 if (sar) {
23177 assign( e1, amt >= size
23178 ? binop(op, mkexpr(e0), mkU8(size-1))
23179 : binop(op, mkexpr(e0), mkU8(amt))
23181 } else {
23182 vassert(0);
23185 putYMMRegLoAndZU( rD, mkexpr(e1) );
23186 return delta;
23190 /* Vector by scalar shift of E into V, by an immediate byte. Modified
23191 version of dis_AVX128_shiftE_to_V_imm. */
23192 static
23193 Long dis_AVX256_shiftE_to_V_imm( Prefix pfx,
23194 Long delta, const HChar* opname, IROp op )
23196 Bool shl, shr, sar;
23197 UChar rm = getUChar(delta);
23198 IRTemp e0 = newTemp(Ity_V256);
23199 IRTemp e1 = newTemp(Ity_V256);
23200 UInt rD = getVexNvvvv(pfx);
23201 UChar amt, size;
23202 vassert(epartIsReg(rm));
23203 vassert(gregLO3ofRM(rm) == 2
23204 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
23205 amt = getUChar(delta+1);
23206 delta += 2;
23207 DIP("%s $%d,%s,%s\n", opname,
23208 (Int)amt,
23209 nameYMMReg(eregOfRexRM(pfx,rm)),
23210 nameYMMReg(rD));
23211 assign( e0, getYMMReg(eregOfRexRM(pfx,rm)) );
23213 shl = shr = sar = False;
23214 size = 0;
23215 switch (op) {
23216 case Iop_ShlN16x16: shl = True; size = 16; break;
23217 case Iop_ShlN32x8: shl = True; size = 32; break;
23218 case Iop_ShlN64x4: shl = True; size = 64; break;
23219 case Iop_SarN16x16: sar = True; size = 16; break;
23220 case Iop_SarN32x8: sar = True; size = 32; break;
23221 case Iop_ShrN16x16: shr = True; size = 16; break;
23222 case Iop_ShrN32x8: shr = True; size = 32; break;
23223 case Iop_ShrN64x4: shr = True; size = 64; break;
23224 default: vassert(0);
23228 if (shl || shr) {
23229 assign( e1, amt >= size
23230 ? binop(Iop_V128HLtoV256, mkV128(0), mkV128(0))
23231 : binop(op, mkexpr(e0), mkU8(amt))
23233 } else
23234 if (sar) {
23235 assign( e1, amt >= size
23236 ? binop(op, mkexpr(e0), mkU8(size-1))
23237 : binop(op, mkexpr(e0), mkU8(amt))
23239 } else {
23240 vassert(0);
23243 putYMMReg( rD, mkexpr(e1) );
23244 return delta;
23248 /* Lower 64-bit lane only AVX128 binary operation:
23249 G[63:0] = V[63:0] `op` E[63:0]
23250 G[127:64] = V[127:64]
23251 G[255:128] = 0.
23252 The specified op must be of the 64F0x2 kind, so that it
23253 copies the upper half of the left operand to the result.
23255 static Long dis_AVX128_E_V_to_G_lo64 ( /*OUT*/Bool* uses_vvvv,
23256 const VexAbiInfo* vbi,
23257 Prefix pfx, Long delta,
23258 const HChar* opname, IROp op )
23260 HChar dis_buf[50];
23261 Int alen;
23262 IRTemp addr;
23263 UChar rm = getUChar(delta);
23264 UInt rG = gregOfRexRM(pfx,rm);
23265 UInt rV = getVexNvvvv(pfx);
23266 IRExpr* vpart = getXMMReg(rV);
23267 if (epartIsReg(rm)) {
23268 UInt rE = eregOfRexRM(pfx,rm);
23269 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) );
23270 DIP("%s %s,%s,%s\n", opname,
23271 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23272 delta = delta+1;
23273 } else {
23274 /* We can only do a 64-bit memory read, so the upper half of the
23275 E operand needs to be made simply of zeroes. */
23276 IRTemp epart = newTemp(Ity_V128);
23277 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23278 assign( epart, unop( Iop_64UtoV128,
23279 loadLE(Ity_I64, mkexpr(addr))) );
23280 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) );
23281 DIP("%s %s,%s,%s\n", opname,
23282 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23283 delta = delta+alen;
23285 putYMMRegLane128( rG, 1, mkV128(0) );
23286 *uses_vvvv = True;
23287 return delta;
23291 /* Lower 64-bit lane only AVX128 unary operation:
23292 G[63:0] = op(E[63:0])
23293 G[127:64] = V[127:64]
23294 G[255:128] = 0
23295 The specified op must be of the 64F0x2 kind, so that it
23296 copies the upper half of the operand to the result.
23298 static Long dis_AVX128_E_V_to_G_lo64_unary ( /*OUT*/Bool* uses_vvvv,
23299 const VexAbiInfo* vbi,
23300 Prefix pfx, Long delta,
23301 const HChar* opname, IROp op )
23303 HChar dis_buf[50];
23304 Int alen;
23305 IRTemp addr;
23306 UChar rm = getUChar(delta);
23307 UInt rG = gregOfRexRM(pfx,rm);
23308 UInt rV = getVexNvvvv(pfx);
23309 IRTemp e64 = newTemp(Ity_I64);
23311 /* Fetch E[63:0] */
23312 if (epartIsReg(rm)) {
23313 UInt rE = eregOfRexRM(pfx,rm);
23314 assign(e64, getXMMRegLane64(rE, 0));
23315 DIP("%s %s,%s,%s\n", opname,
23316 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23317 delta += 1;
23318 } else {
23319 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23320 assign(e64, loadLE(Ity_I64, mkexpr(addr)));
23321 DIP("%s %s,%s,%s\n", opname,
23322 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23323 delta += alen;
23326 /* Create a value 'arg' as V[127:64]++E[63:0] */
23327 IRTemp arg = newTemp(Ity_V128);
23328 assign(arg,
23329 binop(Iop_SetV128lo64,
23330 getXMMReg(rV), mkexpr(e64)));
23331 /* and apply op to it */
23332 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) );
23333 *uses_vvvv = True;
23334 return delta;
23338 /* Lower 32-bit lane only AVX128 unary operation:
23339 G[31:0] = op(E[31:0])
23340 G[127:32] = V[127:32]
23341 G[255:128] = 0
23342 The specified op must be of the 32F0x4 kind, so that it
23343 copies the upper 3/4 of the operand to the result.
23345 static Long dis_AVX128_E_V_to_G_lo32_unary ( /*OUT*/Bool* uses_vvvv,
23346 const VexAbiInfo* vbi,
23347 Prefix pfx, Long delta,
23348 const HChar* opname, IROp op )
23350 HChar dis_buf[50];
23351 Int alen;
23352 IRTemp addr;
23353 UChar rm = getUChar(delta);
23354 UInt rG = gregOfRexRM(pfx,rm);
23355 UInt rV = getVexNvvvv(pfx);
23356 IRTemp e32 = newTemp(Ity_I32);
23358 /* Fetch E[31:0] */
23359 if (epartIsReg(rm)) {
23360 UInt rE = eregOfRexRM(pfx,rm);
23361 assign(e32, getXMMRegLane32(rE, 0));
23362 DIP("%s %s,%s,%s\n", opname,
23363 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23364 delta += 1;
23365 } else {
23366 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23367 assign(e32, loadLE(Ity_I32, mkexpr(addr)));
23368 DIP("%s %s,%s,%s\n", opname,
23369 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23370 delta += alen;
23373 /* Create a value 'arg' as V[127:32]++E[31:0] */
23374 IRTemp arg = newTemp(Ity_V128);
23375 assign(arg,
23376 binop(Iop_SetV128lo32,
23377 getXMMReg(rV), mkexpr(e32)));
23378 /* and apply op to it */
23379 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) );
23380 *uses_vvvv = True;
23381 return delta;
23385 /* Lower 32-bit lane only AVX128 binary operation:
23386 G[31:0] = V[31:0] `op` E[31:0]
23387 G[127:32] = V[127:32]
23388 G[255:128] = 0.
23389 The specified op must be of the 32F0x4 kind, so that it
23390 copies the upper 3/4 of the left operand to the result.
23392 static Long dis_AVX128_E_V_to_G_lo32 ( /*OUT*/Bool* uses_vvvv,
23393 const VexAbiInfo* vbi,
23394 Prefix pfx, Long delta,
23395 const HChar* opname, IROp op )
23397 HChar dis_buf[50];
23398 Int alen;
23399 IRTemp addr;
23400 UChar rm = getUChar(delta);
23401 UInt rG = gregOfRexRM(pfx,rm);
23402 UInt rV = getVexNvvvv(pfx);
23403 IRExpr* vpart = getXMMReg(rV);
23404 if (epartIsReg(rm)) {
23405 UInt rE = eregOfRexRM(pfx,rm);
23406 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) );
23407 DIP("%s %s,%s,%s\n", opname,
23408 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23409 delta = delta+1;
23410 } else {
23411 /* We can only do a 32-bit memory read, so the upper 3/4 of the
23412 E operand needs to be made simply of zeroes. */
23413 IRTemp epart = newTemp(Ity_V128);
23414 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23415 assign( epart, unop( Iop_32UtoV128,
23416 loadLE(Ity_I32, mkexpr(addr))) );
23417 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) );
23418 DIP("%s %s,%s,%s\n", opname,
23419 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23420 delta = delta+alen;
23422 putYMMRegLane128( rG, 1, mkV128(0) );
23423 *uses_vvvv = True;
23424 return delta;
23428 /* All-lanes AVX128 binary operation:
23429 G[127:0] = V[127:0] `op` E[127:0]
23430 G[255:128] = 0.
23432 static Long dis_AVX128_E_V_to_G ( /*OUT*/Bool* uses_vvvv,
23433 const VexAbiInfo* vbi,
23434 Prefix pfx, Long delta,
23435 const HChar* opname, IROp op )
23437 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
23438 uses_vvvv, vbi, pfx, delta, opname, op,
23439 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/
23444 /* Handles AVX128 32F/64F comparisons. A derivative of
23445 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
23446 original delta to indicate failure. */
23447 static
23448 Long dis_AVX128_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv,
23449 const VexAbiInfo* vbi,
23450 Prefix pfx, Long delta,
23451 const HChar* opname, Bool all_lanes, Int sz )
23453 vassert(sz == 4 || sz == 8);
23454 Long deltaIN = delta;
23455 HChar dis_buf[50];
23456 Int alen;
23457 UInt imm8;
23458 IRTemp addr;
23459 Bool preZero = False;
23460 Bool preSwap = False;
23461 IROp op = Iop_INVALID;
23462 Bool postNot = False;
23463 IRTemp plain = newTemp(Ity_V128);
23464 UChar rm = getUChar(delta);
23465 UInt rG = gregOfRexRM(pfx, rm);
23466 UInt rV = getVexNvvvv(pfx);
23467 IRTemp argL = newTemp(Ity_V128);
23468 IRTemp argR = newTemp(Ity_V128);
23470 assign(argL, getXMMReg(rV));
23471 if (epartIsReg(rm)) {
23472 imm8 = getUChar(delta+1);
23473 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot,
23474 imm8, all_lanes, sz);
23475 if (!ok) return deltaIN; /* FAIL */
23476 UInt rE = eregOfRexRM(pfx,rm);
23477 assign(argR, getXMMReg(rE));
23478 delta += 1+1;
23479 DIP("%s $%u,%s,%s,%s\n",
23480 opname, imm8,
23481 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23482 } else {
23483 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
23484 imm8 = getUChar(delta+alen);
23485 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot,
23486 imm8, all_lanes, sz);
23487 if (!ok) return deltaIN; /* FAIL */
23488 assign(argR,
23489 all_lanes ? loadLE(Ity_V128, mkexpr(addr))
23490 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
23491 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr))));
23492 delta += alen+1;
23493 DIP("%s $%u,%s,%s,%s\n",
23494 opname, imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23497 IRTemp argMask = newTemp(Ity_V128);
23498 if (preZero) {
23499 // In this case, preSwap is irrelevant, but it's harmless to honour it
23500 // anyway.
23501 assign(argMask, mkV128(all_lanes ? 0x0000 : (sz==4 ? 0xFFF0 : 0xFF00)));
23502 } else {
23503 assign(argMask, mkV128(0xFFFF));
23506 assign(
23507 plain,
23508 preSwap ? binop(op, binop(Iop_AndV128, mkexpr(argR), mkexpr(argMask)),
23509 binop(Iop_AndV128, mkexpr(argL), mkexpr(argMask)))
23510 : binop(op, binop(Iop_AndV128, mkexpr(argL), mkexpr(argMask)),
23511 binop(Iop_AndV128, mkexpr(argR), mkexpr(argMask)))
23514 if (all_lanes) {
23515 /* This is simple: just invert the result, if necessary, and
23516 have done. */
23517 if (postNot) {
23518 putYMMRegLoAndZU( rG, unop(Iop_NotV128, mkexpr(plain)) );
23519 } else {
23520 putYMMRegLoAndZU( rG, mkexpr(plain) );
23523 else
23524 if (!preSwap) {
23525 /* More complex. It's a one-lane-only, hence need to possibly
23526 invert only that one lane. But at least the other lanes are
23527 correctly "in" the result, having been copied from the left
23528 operand (argL). */
23529 if (postNot) {
23530 IRExpr* mask = mkV128(sz==4 ? 0x000F : 0x00FF);
23531 putYMMRegLoAndZU( rG, binop(Iop_XorV128, mkexpr(plain),
23532 mask) );
23533 } else {
23534 putYMMRegLoAndZU( rG, mkexpr(plain) );
23537 else {
23538 /* This is the most complex case. One-lane-only, but the args
23539 were swapped. So we have to possibly invert the bottom lane,
23540 and (definitely) we have to copy the upper lane(s) from argL
23541 since, due to the swapping, what's currently there is from
23542 argR, which is not correct. */
23543 IRTemp res = newTemp(Ity_V128);
23544 IRTemp mask = newTemp(Ity_V128);
23545 IRTemp notMask = newTemp(Ity_V128);
23546 assign(mask, mkV128(sz==4 ? 0x000F : 0x00FF));
23547 assign(notMask, mkV128(sz==4 ? 0xFFF0 : 0xFF00));
23548 if (postNot) {
23549 assign(res,
23550 binop(Iop_OrV128,
23551 binop(Iop_AndV128,
23552 unop(Iop_NotV128, mkexpr(plain)),
23553 mkexpr(mask)),
23554 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask))));
23555 } else {
23556 assign(res,
23557 binop(Iop_OrV128,
23558 binop(Iop_AndV128,
23559 mkexpr(plain),
23560 mkexpr(mask)),
23561 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask))));
23563 putYMMRegLoAndZU( rG, mkexpr(res) );
23566 *uses_vvvv = True;
23567 return delta;
23571 /* Handles AVX256 32F/64F comparisons. A derivative of
23572 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
23573 original delta to indicate failure. */
23574 static
23575 Long dis_AVX256_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv,
23576 const VexAbiInfo* vbi,
23577 Prefix pfx, Long delta,
23578 const HChar* opname, Int sz )
23580 vassert(sz == 4 || sz == 8);
23581 Long deltaIN = delta;
23582 HChar dis_buf[50];
23583 Int alen;
23584 UInt imm8;
23585 IRTemp addr;
23586 Bool preZero = False;
23587 Bool preSwap = False;
23588 IROp op = Iop_INVALID;
23589 Bool postNot = False;
23590 IRTemp plain = newTemp(Ity_V256);
23591 UChar rm = getUChar(delta);
23592 UInt rG = gregOfRexRM(pfx, rm);
23593 UInt rV = getVexNvvvv(pfx);
23594 IRTemp argL = newTemp(Ity_V256);
23595 IRTemp argR = newTemp(Ity_V256);
23596 IRTemp argLhi = IRTemp_INVALID;
23597 IRTemp argLlo = IRTemp_INVALID;
23598 IRTemp argRhi = IRTemp_INVALID;
23599 IRTemp argRlo = IRTemp_INVALID;
23601 assign(argL, getYMMReg(rV));
23602 if (epartIsReg(rm)) {
23603 imm8 = getUChar(delta+1);
23604 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot, imm8,
23605 True/*all_lanes*/, sz);
23606 if (!ok) return deltaIN; /* FAIL */
23607 UInt rE = eregOfRexRM(pfx,rm);
23608 assign(argR, getYMMReg(rE));
23609 delta += 1+1;
23610 DIP("%s $%u,%s,%s,%s\n",
23611 opname, imm8,
23612 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
23613 } else {
23614 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
23615 imm8 = getUChar(delta+alen);
23616 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot, imm8,
23617 True/*all_lanes*/, sz);
23618 if (!ok) return deltaIN; /* FAIL */
23619 assign(argR, loadLE(Ity_V256, mkexpr(addr)) );
23620 delta += alen+1;
23621 DIP("%s $%u,%s,%s,%s\n",
23622 opname, imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
23625 breakupV256toV128s( preSwap ? argR : argL, &argLhi, &argLlo );
23626 breakupV256toV128s( preSwap ? argL : argR, &argRhi, &argRlo );
23628 IRTemp argMask = newTemp(Ity_V128);
23629 if (preZero) {
23630 // In this case, preSwap is irrelevant, but it's harmless to honour it
23631 // anyway.
23632 assign(argMask, mkV128(0x0000));
23633 } else {
23634 assign(argMask, mkV128(0xFFFF));
23637 assign(
23638 plain,
23639 binop( Iop_V128HLtoV256,
23640 binop(op, binop(Iop_AndV128, mkexpr(argLhi), mkexpr(argMask)),
23641 binop(Iop_AndV128, mkexpr(argRhi), mkexpr(argMask))),
23642 binop(op, binop(Iop_AndV128, mkexpr(argLlo), mkexpr(argMask)),
23643 binop(Iop_AndV128, mkexpr(argRlo), mkexpr(argMask))))
23646 /* This is simple: just invert the result, if necessary, and
23647 have done. */
23648 if (postNot) {
23649 putYMMReg( rG, unop(Iop_NotV256, mkexpr(plain)) );
23650 } else {
23651 putYMMReg( rG, mkexpr(plain) );
23654 *uses_vvvv = True;
23655 return delta;
23659 /* Handles AVX128 unary E-to-G all-lanes operations. */
23660 static
23661 Long dis_AVX128_E_to_G_unary ( /*OUT*/Bool* uses_vvvv,
23662 const VexAbiInfo* vbi,
23663 Prefix pfx, Long delta,
23664 const HChar* opname,
23665 IRTemp (*opFn)(IRTemp) )
23667 HChar dis_buf[50];
23668 Int alen;
23669 IRTemp addr;
23670 IRTemp res = newTemp(Ity_V128);
23671 IRTemp arg = newTemp(Ity_V128);
23672 UChar rm = getUChar(delta);
23673 UInt rG = gregOfRexRM(pfx, rm);
23674 if (epartIsReg(rm)) {
23675 UInt rE = eregOfRexRM(pfx,rm);
23676 assign(arg, getXMMReg(rE));
23677 delta += 1;
23678 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG));
23679 } else {
23680 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23681 assign(arg, loadLE(Ity_V128, mkexpr(addr)));
23682 delta += alen;
23683 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG));
23685 res = opFn(arg);
23686 putYMMRegLoAndZU( rG, mkexpr(res) );
23687 *uses_vvvv = False;
23688 return delta;
23692 /* Handles AVX128 unary E-to-G all-lanes operations. */
23693 static
23694 Long dis_AVX128_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv,
23695 const VexAbiInfo* vbi,
23696 Prefix pfx, Long delta,
23697 const HChar* opname, IROp op )
23699 HChar dis_buf[50];
23700 Int alen;
23701 IRTemp addr;
23702 IRTemp arg = newTemp(Ity_V128);
23703 UChar rm = getUChar(delta);
23704 UInt rG = gregOfRexRM(pfx, rm);
23705 if (epartIsReg(rm)) {
23706 UInt rE = eregOfRexRM(pfx,rm);
23707 assign(arg, getXMMReg(rE));
23708 delta += 1;
23709 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG));
23710 } else {
23711 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23712 assign(arg, loadLE(Ity_V128, mkexpr(addr)));
23713 delta += alen;
23714 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG));
23716 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
23717 // up in the usual way.
23718 Bool needsIRRM = op == Iop_Sqrt32Fx4 || op == Iop_Sqrt64Fx2;
23719 /* XXXROUNDINGFIXME */
23720 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), mkexpr(arg))
23721 : unop(op, mkexpr(arg));
23722 putYMMRegLoAndZU( rG, res );
23723 *uses_vvvv = False;
23724 return delta;
23728 /* FIXME: common up with the _128_ version above? */
23729 static
23730 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG (
23731 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
23732 Prefix pfx, Long delta, const HChar* name,
23733 /* The actual operation. Use either 'op' or 'opfn',
23734 but not both. */
23735 IROp op, IRTemp(*opFn)(IRTemp,IRTemp),
23736 Bool invertLeftArg,
23737 Bool swapArgs
23740 UChar modrm = getUChar(delta);
23741 UInt rD = gregOfRexRM(pfx, modrm);
23742 UInt rSL = getVexNvvvv(pfx);
23743 IRTemp tSL = newTemp(Ity_V256);
23744 IRTemp tSR = newTemp(Ity_V256);
23745 IRTemp addr = IRTemp_INVALID;
23746 HChar dis_buf[50];
23747 Int alen = 0;
23748 vassert(1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*WIG?*/);
23750 assign(tSL, invertLeftArg ? unop(Iop_NotV256, getYMMReg(rSL))
23751 : getYMMReg(rSL));
23753 if (epartIsReg(modrm)) {
23754 UInt rSR = eregOfRexRM(pfx, modrm);
23755 delta += 1;
23756 assign(tSR, getYMMReg(rSR));
23757 DIP("%s %s,%s,%s\n",
23758 name, nameYMMReg(rSR), nameYMMReg(rSL), nameYMMReg(rD));
23759 } else {
23760 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
23761 delta += alen;
23762 assign(tSR, loadLE(Ity_V256, mkexpr(addr)));
23763 DIP("%s %s,%s,%s\n",
23764 name, dis_buf, nameYMMReg(rSL), nameYMMReg(rD));
23767 IRTemp res = IRTemp_INVALID;
23768 if (op != Iop_INVALID) {
23769 vassert(opFn == NULL);
23770 res = newTemp(Ity_V256);
23771 if (requiresRMode(op)) {
23772 IRTemp rm = newTemp(Ity_I32);
23773 assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
23774 assign(res, swapArgs
23775 ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL))
23776 : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR)));
23777 } else {
23778 assign(res, swapArgs
23779 ? binop(op, mkexpr(tSR), mkexpr(tSL))
23780 : binop(op, mkexpr(tSL), mkexpr(tSR)));
23782 } else {
23783 vassert(opFn != NULL);
23784 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR);
23787 putYMMReg(rD, mkexpr(res));
23789 *uses_vvvv = True;
23790 return delta;
23794 /* All-lanes AVX256 binary operation:
23795 G[255:0] = V[255:0] `op` E[255:0]
23797 static Long dis_AVX256_E_V_to_G ( /*OUT*/Bool* uses_vvvv,
23798 const VexAbiInfo* vbi,
23799 Prefix pfx, Long delta,
23800 const HChar* opname, IROp op )
23802 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23803 uses_vvvv, vbi, pfx, delta, opname, op,
23804 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/
23809 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, with a simple IROp
23810 for the operation, no inversion of the left arg, and no swapping of
23811 args. */
23812 static
23813 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple (
23814 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
23815 Prefix pfx, Long delta, const HChar* name,
23816 IROp op
23819 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23820 uses_vvvv, vbi, pfx, delta, name, op, NULL, False, False);
23824 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, using the given IR
23825 generator to compute the result, no inversion of the left
23826 arg, and no swapping of args. */
23827 static
23828 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex (
23829 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
23830 Prefix pfx, Long delta, const HChar* name,
23831 IRTemp(*opFn)(IRTemp,IRTemp)
23834 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23835 uses_vvvv, vbi, pfx, delta, name,
23836 Iop_INVALID, opFn, False, False );
23840 /* Handles AVX256 unary E-to-G all-lanes operations. */
23841 static
23842 Long dis_AVX256_E_to_G_unary ( /*OUT*/Bool* uses_vvvv,
23843 const VexAbiInfo* vbi,
23844 Prefix pfx, Long delta,
23845 const HChar* opname,
23846 IRTemp (*opFn)(IRTemp) )
23848 HChar dis_buf[50];
23849 Int alen;
23850 IRTemp addr;
23851 IRTemp res = newTemp(Ity_V256);
23852 IRTemp arg = newTemp(Ity_V256);
23853 UChar rm = getUChar(delta);
23854 UInt rG = gregOfRexRM(pfx, rm);
23855 if (epartIsReg(rm)) {
23856 UInt rE = eregOfRexRM(pfx,rm);
23857 assign(arg, getYMMReg(rE));
23858 delta += 1;
23859 DIP("%s %s,%s\n", opname, nameYMMReg(rE), nameYMMReg(rG));
23860 } else {
23861 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23862 assign(arg, loadLE(Ity_V256, mkexpr(addr)));
23863 delta += alen;
23864 DIP("%s %s,%s\n", opname, dis_buf, nameYMMReg(rG));
23866 res = opFn(arg);
23867 putYMMReg( rG, mkexpr(res) );
23868 *uses_vvvv = False;
23869 return delta;
23873 /* Handles AVX256 unary E-to-G all-lanes operations. */
23874 static
23875 Long dis_AVX256_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv,
23876 const VexAbiInfo* vbi,
23877 Prefix pfx, Long delta,
23878 const HChar* opname, IROp op )
23880 HChar dis_buf[50];
23881 Int alen;
23882 IRTemp addr;
23883 IRTemp arg = newTemp(Ity_V256);
23884 UChar rm = getUChar(delta);
23885 UInt rG = gregOfRexRM(pfx, rm);
23886 if (epartIsReg(rm)) {
23887 UInt rE = eregOfRexRM(pfx,rm);
23888 assign(arg, getYMMReg(rE));
23889 delta += 1;
23890 DIP("%s %s,%s\n", opname, nameYMMReg(rE), nameYMMReg(rG));
23891 } else {
23892 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23893 assign(arg, loadLE(Ity_V256, mkexpr(addr)));
23894 delta += alen;
23895 DIP("%s %s,%s\n", opname, dis_buf, nameYMMReg(rG));
23897 putYMMReg( rG, unop(op, mkexpr(arg)) );
23898 *uses_vvvv = False;
23899 return delta;
23903 /* The use of ReinterpF64asI64 is ugly. Surely could do better if we
23904 had a variant of Iop_64x4toV256 that took F64s as args instead. */
23905 static Long dis_CVTDQ2PD_256 ( const VexAbiInfo* vbi, Prefix pfx,
23906 Long delta )
23908 IRTemp addr = IRTemp_INVALID;
23909 Int alen = 0;
23910 HChar dis_buf[50];
23911 UChar modrm = getUChar(delta);
23912 IRTemp sV = newTemp(Ity_V128);
23913 UInt rG = gregOfRexRM(pfx,modrm);
23914 if (epartIsReg(modrm)) {
23915 UInt rE = eregOfRexRM(pfx,modrm);
23916 assign( sV, getXMMReg(rE) );
23917 delta += 1;
23918 DIP("vcvtdq2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
23919 } else {
23920 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23921 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
23922 delta += alen;
23923 DIP("vcvtdq2pd %s,%s\n", dis_buf, nameYMMReg(rG) );
23925 IRTemp s3, s2, s1, s0;
23926 s3 = s2 = s1 = s0 = IRTemp_INVALID;
23927 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
23928 IRExpr* res
23929 = IRExpr_Qop(
23930 Iop_64x4toV256,
23931 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s3))),
23932 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s2))),
23933 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s1))),
23934 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s0)))
23936 putYMMReg(rG, res);
23937 return delta;
23941 static Long dis_CVTPD2PS_256 ( const VexAbiInfo* vbi, Prefix pfx,
23942 Long delta )
23944 IRTemp addr = IRTemp_INVALID;
23945 Int alen = 0;
23946 HChar dis_buf[50];
23947 UChar modrm = getUChar(delta);
23948 UInt rG = gregOfRexRM(pfx,modrm);
23949 IRTemp argV = newTemp(Ity_V256);
23950 IRTemp rmode = newTemp(Ity_I32);
23951 if (epartIsReg(modrm)) {
23952 UInt rE = eregOfRexRM(pfx,modrm);
23953 assign( argV, getYMMReg(rE) );
23954 delta += 1;
23955 DIP("vcvtpd2psy %s,%s\n", nameYMMReg(rE), nameXMMReg(rG));
23956 } else {
23957 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23958 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
23959 delta += alen;
23960 DIP("vcvtpd2psy %s,%s\n", dis_buf, nameXMMReg(rG) );
23963 assign( rmode, get_sse_roundingmode() );
23964 IRTemp t3, t2, t1, t0;
23965 t3 = t2 = t1 = t0 = IRTemp_INVALID;
23966 breakupV256to64s( argV, &t3, &t2, &t1, &t0 );
23967 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), \
23968 unop(Iop_ReinterpI64asF64, mkexpr(_t)) )
23969 putXMMRegLane32F( rG, 3, CVT(t3) );
23970 putXMMRegLane32F( rG, 2, CVT(t2) );
23971 putXMMRegLane32F( rG, 1, CVT(t1) );
23972 putXMMRegLane32F( rG, 0, CVT(t0) );
23973 # undef CVT
23974 putYMMRegLane128( rG, 1, mkV128(0) );
23975 return delta;
23979 static IRTemp math_VPUNPCK_YMM ( IRTemp tL, IRType tR, IROp op )
23981 IRTemp tLhi, tLlo, tRhi, tRlo;
23982 tLhi = tLlo = tRhi = tRlo = IRTemp_INVALID;
23983 IRTemp res = newTemp(Ity_V256);
23984 breakupV256toV128s( tL, &tLhi, &tLlo );
23985 breakupV256toV128s( tR, &tRhi, &tRlo );
23986 assign( res, binop( Iop_V128HLtoV256,
23987 binop( op, mkexpr(tRhi), mkexpr(tLhi) ),
23988 binop( op, mkexpr(tRlo), mkexpr(tLlo) ) ) );
23989 return res;
23993 static IRTemp math_VPUNPCKLBW_YMM ( IRTemp tL, IRTemp tR )
23995 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO8x16 );
23999 static IRTemp math_VPUNPCKLWD_YMM ( IRTemp tL, IRTemp tR )
24001 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO16x8 );
24005 static IRTemp math_VPUNPCKLDQ_YMM ( IRTemp tL, IRTemp tR )
24007 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO32x4 );
24011 static IRTemp math_VPUNPCKLQDQ_YMM ( IRTemp tL, IRTemp tR )
24013 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO64x2 );
24017 static IRTemp math_VPUNPCKHBW_YMM ( IRTemp tL, IRTemp tR )
24019 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI8x16 );
24023 static IRTemp math_VPUNPCKHWD_YMM ( IRTemp tL, IRTemp tR )
24025 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI16x8 );
24029 static IRTemp math_VPUNPCKHDQ_YMM ( IRTemp tL, IRTemp tR )
24031 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI32x4 );
24035 static IRTemp math_VPUNPCKHQDQ_YMM ( IRTemp tL, IRTemp tR )
24037 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI64x2 );
24041 static IRTemp math_VPACKSSWB_YMM ( IRTemp tL, IRTemp tR )
24043 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin16Sto8Sx16 );
24047 static IRTemp math_VPACKUSWB_YMM ( IRTemp tL, IRTemp tR )
24049 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin16Sto8Ux16 );
24053 static IRTemp math_VPACKSSDW_YMM ( IRTemp tL, IRTemp tR )
24055 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin32Sto16Sx8 );
24059 static IRTemp math_VPACKUSDW_YMM ( IRTemp tL, IRTemp tR )
24061 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin32Sto16Ux8 );
24065 __attribute__((noinline))
24066 static
24067 Long dis_ESC_0F__VEX (
24068 /*MB_OUT*/DisResult* dres,
24069 /*OUT*/ Bool* uses_vvvv,
24070 const VexArchInfo* archinfo,
24071 const VexAbiInfo* vbi,
24072 Prefix pfx, Int sz, Long deltaIN
24075 IRTemp addr = IRTemp_INVALID;
24076 Int alen = 0;
24077 HChar dis_buf[50];
24078 Long delta = deltaIN;
24079 UChar opc = getUChar(delta);
24080 delta++;
24081 *uses_vvvv = False;
24083 switch (opc) {
24085 case 0x10:
24086 /* VMOVSD m64, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
24087 /* Move 64 bits from E (mem only) to G (lo half xmm).
24088 Bits 255-64 of the dest are zeroed out. */
24089 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) {
24090 UChar modrm = getUChar(delta);
24091 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24092 UInt rG = gregOfRexRM(pfx,modrm);
24093 IRTemp z128 = newTemp(Ity_V128);
24094 assign(z128, mkV128(0));
24095 putXMMReg( rG, mkexpr(z128) );
24096 /* FIXME: ALIGNMENT CHECK? */
24097 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) );
24098 putYMMRegLane128( rG, 1, mkexpr(z128) );
24099 DIP("vmovsd %s,%s\n", dis_buf, nameXMMReg(rG));
24100 delta += alen;
24101 goto decode_success;
24103 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
24104 /* Reg form. */
24105 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) {
24106 UChar modrm = getUChar(delta);
24107 UInt rG = gregOfRexRM(pfx, modrm);
24108 UInt rE = eregOfRexRM(pfx, modrm);
24109 UInt rV = getVexNvvvv(pfx);
24110 delta++;
24111 DIP("vmovsd %s,%s,%s\n",
24112 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
24113 IRTemp res = newTemp(Ity_V128);
24114 assign(res, binop(Iop_64HLtoV128,
24115 getXMMRegLane64(rV, 1),
24116 getXMMRegLane64(rE, 0)));
24117 putYMMRegLoAndZU(rG, mkexpr(res));
24118 *uses_vvvv = True;
24119 goto decode_success;
24121 /* VMOVSS m32, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
24122 /* Move 32 bits from E (mem only) to G (lo half xmm).
24123 Bits 255-32 of the dest are zeroed out. */
24124 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) {
24125 UChar modrm = getUChar(delta);
24126 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24127 UInt rG = gregOfRexRM(pfx,modrm);
24128 IRTemp z128 = newTemp(Ity_V128);
24129 assign(z128, mkV128(0));
24130 putXMMReg( rG, mkexpr(z128) );
24131 /* FIXME: ALIGNMENT CHECK? */
24132 putXMMRegLane32( rG, 0, loadLE(Ity_I32, mkexpr(addr)) );
24133 putYMMRegLane128( rG, 1, mkexpr(z128) );
24134 DIP("vmovss %s,%s\n", dis_buf, nameXMMReg(rG));
24135 delta += alen;
24136 goto decode_success;
24138 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
24139 /* Reg form. */
24140 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) {
24141 UChar modrm = getUChar(delta);
24142 UInt rG = gregOfRexRM(pfx, modrm);
24143 UInt rE = eregOfRexRM(pfx, modrm);
24144 UInt rV = getVexNvvvv(pfx);
24145 delta++;
24146 DIP("vmovss %s,%s,%s\n",
24147 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
24148 IRTemp res = newTemp(Ity_V128);
24149 assign( res, binop( Iop_64HLtoV128,
24150 getXMMRegLane64(rV, 1),
24151 binop(Iop_32HLto64,
24152 getXMMRegLane32(rV, 1),
24153 getXMMRegLane32(rE, 0)) ) );
24154 putYMMRegLoAndZU(rG, mkexpr(res));
24155 *uses_vvvv = True;
24156 goto decode_success;
24158 /* VMOVUPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 10 /r */
24159 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24160 UChar modrm = getUChar(delta);
24161 UInt rG = gregOfRexRM(pfx, modrm);
24162 if (epartIsReg(modrm)) {
24163 UInt rE = eregOfRexRM(pfx,modrm);
24164 putYMMRegLoAndZU( rG, getXMMReg( rE ));
24165 DIP("vmovupd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
24166 delta += 1;
24167 } else {
24168 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24169 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
24170 DIP("vmovupd %s,%s\n", dis_buf, nameXMMReg(rG));
24171 delta += alen;
24173 goto decode_success;
24175 /* VMOVUPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 10 /r */
24176 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24177 UChar modrm = getUChar(delta);
24178 UInt rG = gregOfRexRM(pfx, modrm);
24179 if (epartIsReg(modrm)) {
24180 UInt rE = eregOfRexRM(pfx,modrm);
24181 putYMMReg( rG, getYMMReg( rE ));
24182 DIP("vmovupd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
24183 delta += 1;
24184 } else {
24185 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24186 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
24187 DIP("vmovupd %s,%s\n", dis_buf, nameYMMReg(rG));
24188 delta += alen;
24190 goto decode_success;
24192 /* VMOVUPS xmm2/m128, xmm1 = VEX.128.0F.WIG 10 /r */
24193 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24194 UChar modrm = getUChar(delta);
24195 UInt rG = gregOfRexRM(pfx, modrm);
24196 if (epartIsReg(modrm)) {
24197 UInt rE = eregOfRexRM(pfx,modrm);
24198 putYMMRegLoAndZU( rG, getXMMReg( rE ));
24199 DIP("vmovups %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
24200 delta += 1;
24201 } else {
24202 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24203 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
24204 DIP("vmovups %s,%s\n", dis_buf, nameXMMReg(rG));
24205 delta += alen;
24207 goto decode_success;
24209 /* VMOVUPS ymm2/m256, ymm1 = VEX.256.0F.WIG 10 /r */
24210 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24211 UChar modrm = getUChar(delta);
24212 UInt rG = gregOfRexRM(pfx, modrm);
24213 if (epartIsReg(modrm)) {
24214 UInt rE = eregOfRexRM(pfx,modrm);
24215 putYMMReg( rG, getYMMReg( rE ));
24216 DIP("vmovups %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
24217 delta += 1;
24218 } else {
24219 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24220 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
24221 DIP("vmovups %s,%s\n", dis_buf, nameYMMReg(rG));
24222 delta += alen;
24224 goto decode_success;
24226 break;
24228 case 0x11:
24229 /* VMOVSD xmm1, m64 = VEX.LIG.F2.0F.WIG 11 /r */
24230 /* Move 64 bits from G (low half xmm) to mem only. */
24231 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) {
24232 UChar modrm = getUChar(delta);
24233 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24234 UInt rG = gregOfRexRM(pfx,modrm);
24235 /* FIXME: ALIGNMENT CHECK? */
24236 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0));
24237 DIP("vmovsd %s,%s\n", nameXMMReg(rG), dis_buf);
24238 delta += alen;
24239 goto decode_success;
24241 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 11 /r */
24242 /* Reg form. */
24243 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) {
24244 UChar modrm = getUChar(delta);
24245 UInt rG = gregOfRexRM(pfx, modrm);
24246 UInt rE = eregOfRexRM(pfx, modrm);
24247 UInt rV = getVexNvvvv(pfx);
24248 delta++;
24249 DIP("vmovsd %s,%s,%s\n",
24250 nameXMMReg(rG), nameXMMReg(rV), nameXMMReg(rE));
24251 IRTemp res = newTemp(Ity_V128);
24252 assign(res, binop(Iop_64HLtoV128,
24253 getXMMRegLane64(rV, 1),
24254 getXMMRegLane64(rG, 0)));
24255 putYMMRegLoAndZU(rE, mkexpr(res));
24256 *uses_vvvv = True;
24257 goto decode_success;
24259 /* VMOVSS xmm1, m64 = VEX.LIG.F3.0F.WIG 11 /r */
24260 /* Move 32 bits from G (low 1/4 xmm) to mem only. */
24261 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) {
24262 UChar modrm = getUChar(delta);
24263 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24264 UInt rG = gregOfRexRM(pfx,modrm);
24265 /* FIXME: ALIGNMENT CHECK? */
24266 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0));
24267 DIP("vmovss %s,%s\n", nameXMMReg(rG), dis_buf);
24268 delta += alen;
24269 goto decode_success;
24271 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 11 /r */
24272 /* Reg form. */
24273 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) {
24274 UChar modrm = getUChar(delta);
24275 UInt rG = gregOfRexRM(pfx, modrm);
24276 UInt rE = eregOfRexRM(pfx, modrm);
24277 UInt rV = getVexNvvvv(pfx);
24278 delta++;
24279 DIP("vmovss %s,%s,%s\n",
24280 nameXMMReg(rG), nameXMMReg(rV), nameXMMReg(rE));
24281 IRTemp res = newTemp(Ity_V128);
24282 assign( res, binop( Iop_64HLtoV128,
24283 getXMMRegLane64(rV, 1),
24284 binop(Iop_32HLto64,
24285 getXMMRegLane32(rV, 1),
24286 getXMMRegLane32(rG, 0)) ) );
24287 putYMMRegLoAndZU(rE, mkexpr(res));
24288 *uses_vvvv = True;
24289 goto decode_success;
24291 /* VMOVUPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 11 /r */
24292 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24293 UChar modrm = getUChar(delta);
24294 UInt rG = gregOfRexRM(pfx,modrm);
24295 if (epartIsReg(modrm)) {
24296 UInt rE = eregOfRexRM(pfx,modrm);
24297 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24298 DIP("vmovupd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24299 delta += 1;
24300 } else {
24301 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24302 storeLE( mkexpr(addr), getXMMReg(rG) );
24303 DIP("vmovupd %s,%s\n", nameXMMReg(rG), dis_buf);
24304 delta += alen;
24306 goto decode_success;
24308 /* VMOVUPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 11 /r */
24309 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24310 UChar modrm = getUChar(delta);
24311 UInt rG = gregOfRexRM(pfx,modrm);
24312 if (epartIsReg(modrm)) {
24313 UInt rE = eregOfRexRM(pfx,modrm);
24314 putYMMReg( rE, getYMMReg(rG) );
24315 DIP("vmovupd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
24316 delta += 1;
24317 } else {
24318 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24319 storeLE( mkexpr(addr), getYMMReg(rG) );
24320 DIP("vmovupd %s,%s\n", nameYMMReg(rG), dis_buf);
24321 delta += alen;
24323 goto decode_success;
24325 /* VMOVUPS xmm1, xmm2/m128 = VEX.128.0F.WIG 11 /r */
24326 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24327 UChar modrm = getUChar(delta);
24328 UInt rG = gregOfRexRM(pfx,modrm);
24329 if (epartIsReg(modrm)) {
24330 UInt rE = eregOfRexRM(pfx,modrm);
24331 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24332 DIP("vmovups %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24333 delta += 1;
24334 } else {
24335 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24336 storeLE( mkexpr(addr), getXMMReg(rG) );
24337 DIP("vmovups %s,%s\n", nameXMMReg(rG), dis_buf);
24338 delta += alen;
24340 goto decode_success;
24342 /* VMOVUPS ymm1, ymm2/m256 = VEX.256.0F.WIG 11 /r */
24343 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24344 UChar modrm = getUChar(delta);
24345 UInt rG = gregOfRexRM(pfx,modrm);
24346 if (epartIsReg(modrm)) {
24347 UInt rE = eregOfRexRM(pfx,modrm);
24348 putYMMReg( rE, getYMMReg(rG) );
24349 DIP("vmovups %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
24350 delta += 1;
24351 } else {
24352 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24353 storeLE( mkexpr(addr), getYMMReg(rG) );
24354 DIP("vmovups %s,%s\n", nameYMMReg(rG), dis_buf);
24355 delta += alen;
24357 goto decode_success;
24359 break;
24361 case 0x12:
24362 /* VMOVDDUP xmm2/m64, xmm1 = VEX.128.F2.0F.WIG /12 r */
24363 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24364 delta = dis_MOVDDUP_128( vbi, pfx, delta, True/*isAvx*/ );
24365 goto decode_success;
24367 /* VMOVDDUP ymm2/m256, ymm1 = VEX.256.F2.0F.WIG /12 r */
24368 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24369 delta = dis_MOVDDUP_256( vbi, pfx, delta );
24370 goto decode_success;
24372 /* VMOVHLPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 12 /r */
24373 /* Insn only exists in reg form */
24374 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
24375 && epartIsReg(getUChar(delta))) {
24376 UChar modrm = getUChar(delta);
24377 UInt rG = gregOfRexRM(pfx, modrm);
24378 UInt rE = eregOfRexRM(pfx, modrm);
24379 UInt rV = getVexNvvvv(pfx);
24380 delta++;
24381 DIP("vmovhlps %s,%s,%s\n",
24382 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
24383 IRTemp res = newTemp(Ity_V128);
24384 assign(res, binop(Iop_64HLtoV128,
24385 getXMMRegLane64(rV, 1),
24386 getXMMRegLane64(rE, 1)));
24387 putYMMRegLoAndZU(rG, mkexpr(res));
24388 *uses_vvvv = True;
24389 goto decode_success;
24391 /* VMOVLPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 12 /r */
24392 /* Insn exists only in mem form, it appears. */
24393 /* VMOVLPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 12 /r */
24394 /* Insn exists only in mem form, it appears. */
24395 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24396 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24397 UChar modrm = getUChar(delta);
24398 UInt rG = gregOfRexRM(pfx, modrm);
24399 UInt rV = getVexNvvvv(pfx);
24400 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24401 delta += alen;
24402 DIP("vmovlpd %s,%s,%s\n",
24403 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
24404 IRTemp res = newTemp(Ity_V128);
24405 assign(res, binop(Iop_64HLtoV128,
24406 getXMMRegLane64(rV, 1),
24407 loadLE(Ity_I64, mkexpr(addr))));
24408 putYMMRegLoAndZU(rG, mkexpr(res));
24409 *uses_vvvv = True;
24410 goto decode_success;
24412 /* VMOVSLDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 12 /r */
24413 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
24414 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/,
24415 True/*isL*/ );
24416 goto decode_success;
24418 /* VMOVSLDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 12 /r */
24419 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
24420 delta = dis_MOVSxDUP_256( vbi, pfx, delta, True/*isL*/ );
24421 goto decode_success;
24423 break;
24425 case 0x13:
24426 /* VMOVLPS xmm1, m64 = VEX.128.0F.WIG 13 /r */
24427 /* Insn exists only in mem form, it appears. */
24428 /* VMOVLPD xmm1, m64 = VEX.128.66.0F.WIG 13 /r */
24429 /* Insn exists only in mem form, it appears. */
24430 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24431 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24432 UChar modrm = getUChar(delta);
24433 UInt rG = gregOfRexRM(pfx, modrm);
24434 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24435 delta += alen;
24436 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0));
24437 DIP("vmovlpd %s,%s\n", nameXMMReg(rG), dis_buf);
24438 goto decode_success;
24440 break;
24442 case 0x14:
24443 case 0x15:
24444 /* VUNPCKLPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 14 /r */
24445 /* VUNPCKHPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 15 /r */
24446 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24447 Bool hi = opc == 0x15;
24448 UChar modrm = getUChar(delta);
24449 UInt rG = gregOfRexRM(pfx,modrm);
24450 UInt rV = getVexNvvvv(pfx);
24451 IRTemp eV = newTemp(Ity_V128);
24452 IRTemp vV = newTemp(Ity_V128);
24453 assign( vV, getXMMReg(rV) );
24454 if (epartIsReg(modrm)) {
24455 UInt rE = eregOfRexRM(pfx,modrm);
24456 assign( eV, getXMMReg(rE) );
24457 delta += 1;
24458 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
24459 nameXMMReg(rE), nameXMMReg(rG));
24460 } else {
24461 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24462 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
24463 delta += alen;
24464 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
24465 dis_buf, nameXMMReg(rG));
24467 IRTemp res = math_UNPCKxPS_128( eV, vV, hi );
24468 putYMMRegLoAndZU( rG, mkexpr(res) );
24469 *uses_vvvv = True;
24470 goto decode_success;
24472 /* VUNPCKLPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 14 /r */
24473 /* VUNPCKHPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 15 /r */
24474 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24475 Bool hi = opc == 0x15;
24476 UChar modrm = getUChar(delta);
24477 UInt rG = gregOfRexRM(pfx,modrm);
24478 UInt rV = getVexNvvvv(pfx);
24479 IRTemp eV = newTemp(Ity_V256);
24480 IRTemp vV = newTemp(Ity_V256);
24481 assign( vV, getYMMReg(rV) );
24482 if (epartIsReg(modrm)) {
24483 UInt rE = eregOfRexRM(pfx,modrm);
24484 assign( eV, getYMMReg(rE) );
24485 delta += 1;
24486 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
24487 nameYMMReg(rE), nameYMMReg(rG));
24488 } else {
24489 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24490 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
24491 delta += alen;
24492 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
24493 dis_buf, nameYMMReg(rG));
24495 IRTemp res = math_UNPCKxPS_256( eV, vV, hi );
24496 putYMMReg( rG, mkexpr(res) );
24497 *uses_vvvv = True;
24498 goto decode_success;
24500 /* VUNPCKLPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 14 /r */
24501 /* VUNPCKHPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 15 /r */
24502 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24503 Bool hi = opc == 0x15;
24504 UChar modrm = getUChar(delta);
24505 UInt rG = gregOfRexRM(pfx,modrm);
24506 UInt rV = getVexNvvvv(pfx);
24507 IRTemp eV = newTemp(Ity_V128);
24508 IRTemp vV = newTemp(Ity_V128);
24509 assign( vV, getXMMReg(rV) );
24510 if (epartIsReg(modrm)) {
24511 UInt rE = eregOfRexRM(pfx,modrm);
24512 assign( eV, getXMMReg(rE) );
24513 delta += 1;
24514 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
24515 nameXMMReg(rE), nameXMMReg(rG));
24516 } else {
24517 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24518 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
24519 delta += alen;
24520 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
24521 dis_buf, nameXMMReg(rG));
24523 IRTemp res = math_UNPCKxPD_128( eV, vV, hi );
24524 putYMMRegLoAndZU( rG, mkexpr(res) );
24525 *uses_vvvv = True;
24526 goto decode_success;
24528 /* VUNPCKLPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 14 /r */
24529 /* VUNPCKHPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 15 /r */
24530 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24531 Bool hi = opc == 0x15;
24532 UChar modrm = getUChar(delta);
24533 UInt rG = gregOfRexRM(pfx,modrm);
24534 UInt rV = getVexNvvvv(pfx);
24535 IRTemp eV = newTemp(Ity_V256);
24536 IRTemp vV = newTemp(Ity_V256);
24537 assign( vV, getYMMReg(rV) );
24538 if (epartIsReg(modrm)) {
24539 UInt rE = eregOfRexRM(pfx,modrm);
24540 assign( eV, getYMMReg(rE) );
24541 delta += 1;
24542 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
24543 nameYMMReg(rE), nameYMMReg(rG));
24544 } else {
24545 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24546 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
24547 delta += alen;
24548 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
24549 dis_buf, nameYMMReg(rG));
24551 IRTemp res = math_UNPCKxPD_256( eV, vV, hi );
24552 putYMMReg( rG, mkexpr(res) );
24553 *uses_vvvv = True;
24554 goto decode_success;
24556 break;
24558 case 0x16:
24559 /* VMOVLHPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 16 /r */
24560 /* Insn only exists in reg form */
24561 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
24562 && epartIsReg(getUChar(delta))) {
24563 UChar modrm = getUChar(delta);
24564 UInt rG = gregOfRexRM(pfx, modrm);
24565 UInt rE = eregOfRexRM(pfx, modrm);
24566 UInt rV = getVexNvvvv(pfx);
24567 delta++;
24568 DIP("vmovlhps %s,%s,%s\n",
24569 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
24570 IRTemp res = newTemp(Ity_V128);
24571 assign(res, binop(Iop_64HLtoV128,
24572 getXMMRegLane64(rE, 0),
24573 getXMMRegLane64(rV, 0)));
24574 putYMMRegLoAndZU(rG, mkexpr(res));
24575 *uses_vvvv = True;
24576 goto decode_success;
24578 /* VMOVHPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 16 /r */
24579 /* Insn exists only in mem form, it appears. */
24580 /* VMOVHPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 16 /r */
24581 /* Insn exists only in mem form, it appears. */
24582 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24583 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24584 UChar modrm = getUChar(delta);
24585 UInt rG = gregOfRexRM(pfx, modrm);
24586 UInt rV = getVexNvvvv(pfx);
24587 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24588 delta += alen;
24589 DIP("vmovhp%c %s,%s,%s\n", have66(pfx) ? 'd' : 's',
24590 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
24591 IRTemp res = newTemp(Ity_V128);
24592 assign(res, binop(Iop_64HLtoV128,
24593 loadLE(Ity_I64, mkexpr(addr)),
24594 getXMMRegLane64(rV, 0)));
24595 putYMMRegLoAndZU(rG, mkexpr(res));
24596 *uses_vvvv = True;
24597 goto decode_success;
24599 /* VMOVSHDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 16 /r */
24600 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
24601 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/,
24602 False/*!isL*/ );
24603 goto decode_success;
24605 /* VMOVSHDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 16 /r */
24606 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
24607 delta = dis_MOVSxDUP_256( vbi, pfx, delta, False/*!isL*/ );
24608 goto decode_success;
24610 break;
24612 case 0x17:
24613 /* VMOVHPS xmm1, m64 = VEX.128.0F.WIG 17 /r */
24614 /* Insn exists only in mem form, it appears. */
24615 /* VMOVHPD xmm1, m64 = VEX.128.66.0F.WIG 17 /r */
24616 /* Insn exists only in mem form, it appears. */
24617 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24618 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24619 UChar modrm = getUChar(delta);
24620 UInt rG = gregOfRexRM(pfx, modrm);
24621 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24622 delta += alen;
24623 storeLE( mkexpr(addr), getXMMRegLane64( rG, 1));
24624 DIP("vmovhp%c %s,%s\n", have66(pfx) ? 'd' : 's',
24625 nameXMMReg(rG), dis_buf);
24626 goto decode_success;
24628 break;
24630 case 0x28:
24631 /* VMOVAPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 28 /r */
24632 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24633 UChar modrm = getUChar(delta);
24634 UInt rG = gregOfRexRM(pfx, modrm);
24635 if (epartIsReg(modrm)) {
24636 UInt rE = eregOfRexRM(pfx,modrm);
24637 putYMMRegLoAndZU( rG, getXMMReg( rE ));
24638 DIP("vmovapd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
24639 delta += 1;
24640 } else {
24641 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24642 gen_SEGV_if_not_16_aligned( addr );
24643 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
24644 DIP("vmovapd %s,%s\n", dis_buf, nameXMMReg(rG));
24645 delta += alen;
24647 goto decode_success;
24649 /* VMOVAPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 28 /r */
24650 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24651 UChar modrm = getUChar(delta);
24652 UInt rG = gregOfRexRM(pfx, modrm);
24653 if (epartIsReg(modrm)) {
24654 UInt rE = eregOfRexRM(pfx,modrm);
24655 putYMMReg( rG, getYMMReg( rE ));
24656 DIP("vmovapd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
24657 delta += 1;
24658 } else {
24659 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24660 gen_SEGV_if_not_32_aligned( addr );
24661 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
24662 DIP("vmovapd %s,%s\n", dis_buf, nameYMMReg(rG));
24663 delta += alen;
24665 goto decode_success;
24667 /* VMOVAPS xmm2/m128, xmm1 = VEX.128.0F.WIG 28 /r */
24668 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24669 UChar modrm = getUChar(delta);
24670 UInt rG = gregOfRexRM(pfx, modrm);
24671 if (epartIsReg(modrm)) {
24672 UInt rE = eregOfRexRM(pfx,modrm);
24673 putYMMRegLoAndZU( rG, getXMMReg( rE ));
24674 DIP("vmovaps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
24675 delta += 1;
24676 } else {
24677 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24678 gen_SEGV_if_not_16_aligned( addr );
24679 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
24680 DIP("vmovaps %s,%s\n", dis_buf, nameXMMReg(rG));
24681 delta += alen;
24683 goto decode_success;
24685 /* VMOVAPS ymm2/m256, ymm1 = VEX.256.0F.WIG 28 /r */
24686 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24687 UChar modrm = getUChar(delta);
24688 UInt rG = gregOfRexRM(pfx, modrm);
24689 if (epartIsReg(modrm)) {
24690 UInt rE = eregOfRexRM(pfx,modrm);
24691 putYMMReg( rG, getYMMReg( rE ));
24692 DIP("vmovaps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
24693 delta += 1;
24694 } else {
24695 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24696 gen_SEGV_if_not_32_aligned( addr );
24697 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
24698 DIP("vmovaps %s,%s\n", dis_buf, nameYMMReg(rG));
24699 delta += alen;
24701 goto decode_success;
24703 break;
24705 case 0x29:
24706 /* VMOVAPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 29 /r */
24707 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24708 UChar modrm = getUChar(delta);
24709 UInt rG = gregOfRexRM(pfx,modrm);
24710 if (epartIsReg(modrm)) {
24711 UInt rE = eregOfRexRM(pfx,modrm);
24712 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24713 DIP("vmovapd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24714 delta += 1;
24715 } else {
24716 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24717 gen_SEGV_if_not_16_aligned( addr );
24718 storeLE( mkexpr(addr), getXMMReg(rG) );
24719 DIP("vmovapd %s,%s\n", nameXMMReg(rG), dis_buf );
24720 delta += alen;
24722 goto decode_success;
24724 /* VMOVAPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 29 /r */
24725 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24726 UChar modrm = getUChar(delta);
24727 UInt rG = gregOfRexRM(pfx,modrm);
24728 if (epartIsReg(modrm)) {
24729 UInt rE = eregOfRexRM(pfx,modrm);
24730 putYMMReg( rE, getYMMReg(rG) );
24731 DIP("vmovapd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
24732 delta += 1;
24733 } else {
24734 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24735 gen_SEGV_if_not_32_aligned( addr );
24736 storeLE( mkexpr(addr), getYMMReg(rG) );
24737 DIP("vmovapd %s,%s\n", nameYMMReg(rG), dis_buf );
24738 delta += alen;
24740 goto decode_success;
24742 /* VMOVAPS xmm1, xmm2/m128 = VEX.128.0F.WIG 29 /r */
24743 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24744 UChar modrm = getUChar(delta);
24745 UInt rG = gregOfRexRM(pfx,modrm);
24746 if (epartIsReg(modrm)) {
24747 UInt rE = eregOfRexRM(pfx,modrm);
24748 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24749 DIP("vmovaps %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24750 delta += 1;
24751 goto decode_success;
24752 } else {
24753 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24754 gen_SEGV_if_not_16_aligned( addr );
24755 storeLE( mkexpr(addr), getXMMReg(rG) );
24756 DIP("vmovaps %s,%s\n", nameXMMReg(rG), dis_buf );
24757 delta += alen;
24758 goto decode_success;
24761 /* VMOVAPS ymm1, ymm2/m256 = VEX.256.0F.WIG 29 /r */
24762 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24763 UChar modrm = getUChar(delta);
24764 UInt rG = gregOfRexRM(pfx,modrm);
24765 if (epartIsReg(modrm)) {
24766 UInt rE = eregOfRexRM(pfx,modrm);
24767 putYMMReg( rE, getYMMReg(rG) );
24768 DIP("vmovaps %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
24769 delta += 1;
24770 goto decode_success;
24771 } else {
24772 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24773 gen_SEGV_if_not_32_aligned( addr );
24774 storeLE( mkexpr(addr), getYMMReg(rG) );
24775 DIP("vmovaps %s,%s\n", nameYMMReg(rG), dis_buf );
24776 delta += alen;
24777 goto decode_success;
24780 break;
24782 case 0x2A: {
24783 IRTemp rmode = newTemp(Ity_I32);
24784 assign( rmode, get_sse_roundingmode() );
24785 /* VCVTSI2SD r/m32, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W0 2A /r */
24786 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
24787 UChar modrm = getUChar(delta);
24788 UInt rV = getVexNvvvv(pfx);
24789 UInt rD = gregOfRexRM(pfx, modrm);
24790 IRTemp arg32 = newTemp(Ity_I32);
24791 if (epartIsReg(modrm)) {
24792 UInt rS = eregOfRexRM(pfx,modrm);
24793 assign( arg32, getIReg32(rS) );
24794 delta += 1;
24795 DIP("vcvtsi2sdl %s,%s,%s\n",
24796 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD));
24797 } else {
24798 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24799 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
24800 delta += alen;
24801 DIP("vcvtsi2sdl %s,%s,%s\n",
24802 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24804 putXMMRegLane64F( rD, 0,
24805 unop(Iop_I32StoF64, mkexpr(arg32)));
24806 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24807 putYMMRegLane128( rD, 1, mkV128(0) );
24808 *uses_vvvv = True;
24809 goto decode_success;
24811 /* VCVTSI2SD r/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W1 2A /r */
24812 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
24813 UChar modrm = getUChar(delta);
24814 UInt rV = getVexNvvvv(pfx);
24815 UInt rD = gregOfRexRM(pfx, modrm);
24816 IRTemp arg64 = newTemp(Ity_I64);
24817 if (epartIsReg(modrm)) {
24818 UInt rS = eregOfRexRM(pfx,modrm);
24819 assign( arg64, getIReg64(rS) );
24820 delta += 1;
24821 DIP("vcvtsi2sdq %s,%s,%s\n",
24822 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD));
24823 } else {
24824 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24825 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
24826 delta += alen;
24827 DIP("vcvtsi2sdq %s,%s,%s\n",
24828 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24830 putXMMRegLane64F( rD, 0,
24831 binop( Iop_I64StoF64,
24832 get_sse_roundingmode(),
24833 mkexpr(arg64)) );
24834 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24835 putYMMRegLane128( rD, 1, mkV128(0) );
24836 *uses_vvvv = True;
24837 goto decode_success;
24839 /* VCVTSI2SS r/m64, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W1 2A /r */
24840 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
24841 UChar modrm = getUChar(delta);
24842 UInt rV = getVexNvvvv(pfx);
24843 UInt rD = gregOfRexRM(pfx, modrm);
24844 IRTemp arg64 = newTemp(Ity_I64);
24845 if (epartIsReg(modrm)) {
24846 UInt rS = eregOfRexRM(pfx,modrm);
24847 assign( arg64, getIReg64(rS) );
24848 delta += 1;
24849 DIP("vcvtsi2ssq %s,%s,%s\n",
24850 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD));
24851 } else {
24852 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24853 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
24854 delta += alen;
24855 DIP("vcvtsi2ssq %s,%s,%s\n",
24856 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24858 putXMMRegLane32F( rD, 0,
24859 binop(Iop_F64toF32,
24860 mkexpr(rmode),
24861 binop(Iop_I64StoF64, mkexpr(rmode),
24862 mkexpr(arg64)) ) );
24863 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
24864 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24865 putYMMRegLane128( rD, 1, mkV128(0) );
24866 *uses_vvvv = True;
24867 goto decode_success;
24869 /* VCVTSI2SS r/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W0 2A /r */
24870 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
24871 UChar modrm = getUChar(delta);
24872 UInt rV = getVexNvvvv(pfx);
24873 UInt rD = gregOfRexRM(pfx, modrm);
24874 IRTemp arg32 = newTemp(Ity_I32);
24875 if (epartIsReg(modrm)) {
24876 UInt rS = eregOfRexRM(pfx,modrm);
24877 assign( arg32, getIReg32(rS) );
24878 delta += 1;
24879 DIP("vcvtsi2ssl %s,%s,%s\n",
24880 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD));
24881 } else {
24882 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24883 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
24884 delta += alen;
24885 DIP("vcvtsi2ssl %s,%s,%s\n",
24886 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24888 putXMMRegLane32F( rD, 0,
24889 binop(Iop_F64toF32,
24890 mkexpr(rmode),
24891 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
24892 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
24893 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24894 putYMMRegLane128( rD, 1, mkV128(0) );
24895 *uses_vvvv = True;
24896 goto decode_success;
24898 break;
24901 case 0x2B:
24902 /* VMOVNTPD xmm1, m128 = VEX.128.66.0F.WIG 2B /r */
24903 /* VMOVNTPS xmm1, m128 = VEX.128.0F.WIG 2B /r */
24904 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24905 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24906 UChar modrm = getUChar(delta);
24907 UInt rS = gregOfRexRM(pfx, modrm);
24908 IRTemp tS = newTemp(Ity_V128);
24909 assign(tS, getXMMReg(rS));
24910 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
24911 delta += alen;
24912 gen_SEGV_if_not_16_aligned(addr);
24913 storeLE(mkexpr(addr), mkexpr(tS));
24914 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's',
24915 nameXMMReg(rS), dis_buf);
24916 goto decode_success;
24918 /* VMOVNTPD ymm1, m256 = VEX.256.66.0F.WIG 2B /r */
24919 /* VMOVNTPS ymm1, m256 = VEX.256.0F.WIG 2B /r */
24920 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24921 && 1==getVexL(pfx)/*256*/ && !epartIsReg(getUChar(delta))) {
24922 UChar modrm = getUChar(delta);
24923 UInt rS = gregOfRexRM(pfx, modrm);
24924 IRTemp tS = newTemp(Ity_V256);
24925 assign(tS, getYMMReg(rS));
24926 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
24927 delta += alen;
24928 gen_SEGV_if_not_32_aligned(addr);
24929 storeLE(mkexpr(addr), mkexpr(tS));
24930 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's',
24931 nameYMMReg(rS), dis_buf);
24932 goto decode_success;
24934 break;
24936 case 0x2C:
24937 /* VCVTTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2C /r */
24938 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
24939 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
24940 goto decode_success;
24942 /* VCVTTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2C /r */
24943 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
24944 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
24945 goto decode_success;
24947 /* VCVTTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2C /r */
24948 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
24949 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
24950 goto decode_success;
24952 /* VCVTTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2C /r */
24953 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
24954 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
24955 goto decode_success;
24957 break;
24959 case 0x2D:
24960 /* VCVTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2D /r */
24961 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
24962 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
24963 goto decode_success;
24965 /* VCVTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2D /r */
24966 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
24967 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
24968 goto decode_success;
24970 /* VCVTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2D /r */
24971 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
24972 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
24973 goto decode_success;
24975 /* VCVTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2D /r */
24976 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
24977 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
24978 goto decode_success;
24980 break;
24982 case 0x2E:
24983 case 0x2F:
24984 /* VUCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2E /r */
24985 /* VCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2F /r */
24986 if (have66noF2noF3(pfx)) {
24987 delta = dis_COMISD( vbi, pfx, delta, True/*isAvx*/, opc );
24988 goto decode_success;
24990 /* VUCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2E /r */
24991 /* VCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2F /r */
24992 if (haveNo66noF2noF3(pfx)) {
24993 delta = dis_COMISS( vbi, pfx, delta, True/*isAvx*/, opc );
24994 goto decode_success;
24996 break;
24998 case 0x50:
24999 /* VMOVMSKPD xmm2, r32 = VEX.128.66.0F.WIG 50 /r */
25000 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25001 delta = dis_MOVMSKPD_128( vbi, pfx, delta, True/*isAvx*/ );
25002 goto decode_success;
25004 /* VMOVMSKPD ymm2, r32 = VEX.256.66.0F.WIG 50 /r */
25005 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25006 delta = dis_MOVMSKPD_256( vbi, pfx, delta );
25007 goto decode_success;
25009 /* VMOVMSKPS xmm2, r32 = VEX.128.0F.WIG 50 /r */
25010 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25011 delta = dis_MOVMSKPS_128( vbi, pfx, delta, True/*isAvx*/ );
25012 goto decode_success;
25014 /* VMOVMSKPS ymm2, r32 = VEX.256.0F.WIG 50 /r */
25015 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25016 delta = dis_MOVMSKPS_256( vbi, pfx, delta );
25017 goto decode_success;
25019 break;
25021 case 0x51:
25022 /* VSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 51 /r */
25023 if (haveF3no66noF2(pfx)) {
25024 delta = dis_AVX128_E_V_to_G_lo32_unary(
25025 uses_vvvv, vbi, pfx, delta, "vsqrtss", Iop_Sqrt32F0x4 );
25026 goto decode_success;
25028 /* VSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 51 /r */
25029 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25030 delta = dis_AVX128_E_to_G_unary_all(
25031 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx4 );
25032 goto decode_success;
25034 /* VSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 51 /r */
25035 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25036 delta = dis_AVX256_E_to_G_unary_all(
25037 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx8 );
25038 goto decode_success;
25040 /* VSQRTSD xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F2.0F.WIG 51 /r */
25041 if (haveF2no66noF3(pfx)) {
25042 delta = dis_AVX128_E_V_to_G_lo64_unary(
25043 uses_vvvv, vbi, pfx, delta, "vsqrtsd", Iop_Sqrt64F0x2 );
25044 goto decode_success;
25046 /* VSQRTPD xmm2/m128(E), xmm1(G) = VEX.NDS.128.66.0F.WIG 51 /r */
25047 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25048 delta = dis_AVX128_E_to_G_unary_all(
25049 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx2 );
25050 goto decode_success;
25052 /* VSQRTPD ymm2/m256(E), ymm1(G) = VEX.NDS.256.66.0F.WIG 51 /r */
25053 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25054 delta = dis_AVX256_E_to_G_unary_all(
25055 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx4 );
25056 goto decode_success;
25058 break;
25060 case 0x52:
25061 /* VRSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 52 /r */
25062 if (haveF3no66noF2(pfx)) {
25063 delta = dis_AVX128_E_V_to_G_lo32_unary(
25064 uses_vvvv, vbi, pfx, delta, "vrsqrtss",
25065 Iop_RSqrtEst32F0x4 );
25066 goto decode_success;
25068 /* VRSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 52 /r */
25069 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25070 delta = dis_AVX128_E_to_G_unary_all(
25071 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrtEst32Fx4 );
25072 goto decode_success;
25074 /* VRSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 52 /r */
25075 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25076 delta = dis_AVX256_E_to_G_unary_all(
25077 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrtEst32Fx8 );
25078 goto decode_success;
25080 break;
25082 case 0x53:
25083 /* VRCPSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 53 /r */
25084 if (haveF3no66noF2(pfx)) {
25085 delta = dis_AVX128_E_V_to_G_lo32_unary(
25086 uses_vvvv, vbi, pfx, delta, "vrcpss", Iop_RecipEst32F0x4 );
25087 goto decode_success;
25089 /* VRCPPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 53 /r */
25090 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25091 delta = dis_AVX128_E_to_G_unary_all(
25092 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_RecipEst32Fx4 );
25093 goto decode_success;
25095 /* VRCPPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 53 /r */
25096 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25097 delta = dis_AVX256_E_to_G_unary_all(
25098 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_RecipEst32Fx8 );
25099 goto decode_success;
25101 break;
25103 case 0x54:
25104 /* VANDPD r/m, rV, r ::: r = rV & r/m */
25105 /* VANDPD = VEX.NDS.128.66.0F.WIG 54 /r */
25106 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25107 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25108 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128 );
25109 goto decode_success;
25111 /* VANDPD r/m, rV, r ::: r = rV & r/m */
25112 /* VANDPD = VEX.NDS.256.66.0F.WIG 54 /r */
25113 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25114 delta = dis_AVX256_E_V_to_G(
25115 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256 );
25116 goto decode_success;
25118 /* VANDPS = VEX.NDS.128.0F.WIG 54 /r */
25119 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25120 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25121 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128 );
25122 goto decode_success;
25124 /* VANDPS = VEX.NDS.256.0F.WIG 54 /r */
25125 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25126 delta = dis_AVX256_E_V_to_G(
25127 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256 );
25128 goto decode_success;
25130 break;
25132 case 0x55:
25133 /* VANDNPD r/m, rV, r ::: r = (not rV) & r/m */
25134 /* VANDNPD = VEX.NDS.128.66.0F.WIG 55 /r */
25135 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25136 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25137 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128,
25138 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
25139 goto decode_success;
25141 /* VANDNPD = VEX.NDS.256.66.0F.WIG 55 /r */
25142 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25143 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
25144 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256,
25145 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
25146 goto decode_success;
25148 /* VANDNPS = VEX.NDS.128.0F.WIG 55 /r */
25149 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25150 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25151 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128,
25152 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
25153 goto decode_success;
25155 /* VANDNPS = VEX.NDS.256.0F.WIG 55 /r */
25156 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25157 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
25158 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256,
25159 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
25160 goto decode_success;
25162 break;
25164 case 0x56:
25165 /* VORPD r/m, rV, r ::: r = rV | r/m */
25166 /* VORPD = VEX.NDS.128.66.0F.WIG 56 /r */
25167 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25168 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25169 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV128 );
25170 goto decode_success;
25172 /* VORPD r/m, rV, r ::: r = rV | r/m */
25173 /* VORPD = VEX.NDS.256.66.0F.WIG 56 /r */
25174 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25175 delta = dis_AVX256_E_V_to_G(
25176 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV256 );
25177 goto decode_success;
25179 /* VORPS r/m, rV, r ::: r = rV | r/m */
25180 /* VORPS = VEX.NDS.128.0F.WIG 56 /r */
25181 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25182 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25183 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV128 );
25184 goto decode_success;
25186 /* VORPS r/m, rV, r ::: r = rV | r/m */
25187 /* VORPS = VEX.NDS.256.0F.WIG 56 /r */
25188 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25189 delta = dis_AVX256_E_V_to_G(
25190 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV256 );
25191 goto decode_success;
25193 break;
25195 case 0x57:
25196 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
25197 /* VXORPD = VEX.NDS.128.66.0F.WIG 57 /r */
25198 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25199 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25200 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV128 );
25201 goto decode_success;
25203 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
25204 /* VXORPD = VEX.NDS.256.66.0F.WIG 57 /r */
25205 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25206 delta = dis_AVX256_E_V_to_G(
25207 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV256 );
25208 goto decode_success;
25210 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
25211 /* VXORPS = VEX.NDS.128.0F.WIG 57 /r */
25212 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25213 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25214 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV128 );
25215 goto decode_success;
25217 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
25218 /* VXORPS = VEX.NDS.256.0F.WIG 57 /r */
25219 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25220 delta = dis_AVX256_E_V_to_G(
25221 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV256 );
25222 goto decode_success;
25224 break;
25226 case 0x58:
25227 /* VADDSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 58 /r */
25228 if (haveF2no66noF3(pfx)) {
25229 delta = dis_AVX128_E_V_to_G_lo64(
25230 uses_vvvv, vbi, pfx, delta, "vaddsd", Iop_Add64F0x2 );
25231 goto decode_success;
25233 /* VADDSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 58 /r */
25234 if (haveF3no66noF2(pfx)) {
25235 delta = dis_AVX128_E_V_to_G_lo32(
25236 uses_vvvv, vbi, pfx, delta, "vaddss", Iop_Add32F0x4 );
25237 goto decode_success;
25239 /* VADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 58 /r */
25240 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25241 delta = dis_AVX128_E_V_to_G(
25242 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx4 );
25243 goto decode_success;
25245 /* VADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 58 /r */
25246 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25247 delta = dis_AVX256_E_V_to_G(
25248 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx8 );
25249 goto decode_success;
25251 /* VADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 58 /r */
25252 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25253 delta = dis_AVX128_E_V_to_G(
25254 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx2 );
25255 goto decode_success;
25257 /* VADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 58 /r */
25258 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25259 delta = dis_AVX256_E_V_to_G(
25260 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx4 );
25261 goto decode_success;
25263 break;
25265 case 0x59:
25266 /* VMULSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 59 /r */
25267 if (haveF2no66noF3(pfx)) {
25268 delta = dis_AVX128_E_V_to_G_lo64(
25269 uses_vvvv, vbi, pfx, delta, "vmulsd", Iop_Mul64F0x2 );
25270 goto decode_success;
25272 /* VMULSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 59 /r */
25273 if (haveF3no66noF2(pfx)) {
25274 delta = dis_AVX128_E_V_to_G_lo32(
25275 uses_vvvv, vbi, pfx, delta, "vmulss", Iop_Mul32F0x4 );
25276 goto decode_success;
25278 /* VMULPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 59 /r */
25279 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25280 delta = dis_AVX128_E_V_to_G(
25281 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx4 );
25282 goto decode_success;
25284 /* VMULPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 59 /r */
25285 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25286 delta = dis_AVX256_E_V_to_G(
25287 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx8 );
25288 goto decode_success;
25290 /* VMULPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 59 /r */
25291 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25292 delta = dis_AVX128_E_V_to_G(
25293 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx2 );
25294 goto decode_success;
25296 /* VMULPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 59 /r */
25297 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25298 delta = dis_AVX256_E_V_to_G(
25299 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx4 );
25300 goto decode_success;
25302 break;
25304 case 0x5A:
25305 /* VCVTPS2PD xmm2/m64, xmm1 = VEX.128.0F.WIG 5A /r */
25306 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25307 delta = dis_CVTPS2PD_128( vbi, pfx, delta, True/*isAvx*/ );
25308 goto decode_success;
25310 /* VCVTPS2PD xmm2/m128, ymm1 = VEX.256.0F.WIG 5A /r */
25311 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25312 delta = dis_CVTPS2PD_256( vbi, pfx, delta );
25313 goto decode_success;
25315 /* VCVTPD2PS xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5A /r */
25316 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25317 delta = dis_CVTPD2PS_128( vbi, pfx, delta, True/*isAvx*/ );
25318 goto decode_success;
25320 /* VCVTPD2PS ymm2/m256, xmm1 = VEX.256.66.0F.WIG 5A /r */
25321 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25322 delta = dis_CVTPD2PS_256( vbi, pfx, delta );
25323 goto decode_success;
25325 /* VCVTSD2SS xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5A /r */
25326 if (haveF2no66noF3(pfx)) {
25327 UChar modrm = getUChar(delta);
25328 UInt rV = getVexNvvvv(pfx);
25329 UInt rD = gregOfRexRM(pfx, modrm);
25330 IRTemp f64lo = newTemp(Ity_F64);
25331 IRTemp rmode = newTemp(Ity_I32);
25332 assign( rmode, get_sse_roundingmode() );
25333 if (epartIsReg(modrm)) {
25334 UInt rS = eregOfRexRM(pfx,modrm);
25335 assign(f64lo, getXMMRegLane64F(rS, 0));
25336 delta += 1;
25337 DIP("vcvtsd2ss %s,%s,%s\n",
25338 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD));
25339 } else {
25340 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25341 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)) );
25342 delta += alen;
25343 DIP("vcvtsd2ss %s,%s,%s\n",
25344 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
25346 putXMMRegLane32F( rD, 0,
25347 binop( Iop_F64toF32, mkexpr(rmode),
25348 mkexpr(f64lo)) );
25349 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
25350 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
25351 putYMMRegLane128( rD, 1, mkV128(0) );
25352 *uses_vvvv = True;
25353 goto decode_success;
25355 /* VCVTSS2SD xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5A /r */
25356 if (haveF3no66noF2(pfx)) {
25357 UChar modrm = getUChar(delta);
25358 UInt rV = getVexNvvvv(pfx);
25359 UInt rD = gregOfRexRM(pfx, modrm);
25360 IRTemp f32lo = newTemp(Ity_F32);
25361 if (epartIsReg(modrm)) {
25362 UInt rS = eregOfRexRM(pfx,modrm);
25363 assign(f32lo, getXMMRegLane32F(rS, 0));
25364 delta += 1;
25365 DIP("vcvtss2sd %s,%s,%s\n",
25366 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD));
25367 } else {
25368 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25369 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)) );
25370 delta += alen;
25371 DIP("vcvtss2sd %s,%s,%s\n",
25372 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
25374 putXMMRegLane64F( rD, 0,
25375 unop( Iop_F32toF64, mkexpr(f32lo)) );
25376 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
25377 putYMMRegLane128( rD, 1, mkV128(0) );
25378 *uses_vvvv = True;
25379 goto decode_success;
25381 break;
25383 case 0x5B:
25384 /* VCVTPS2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5B /r */
25385 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25386 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta,
25387 True/*isAvx*/, False/*!r2zero*/ );
25388 goto decode_success;
25390 /* VCVTPS2DQ ymm2/m256, ymm1 = VEX.256.66.0F.WIG 5B /r */
25391 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25392 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta,
25393 False/*!r2zero*/ );
25394 goto decode_success;
25396 /* VCVTTPS2DQ xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 5B /r */
25397 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
25398 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta,
25399 True/*isAvx*/, True/*r2zero*/ );
25400 goto decode_success;
25402 /* VCVTTPS2DQ ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 5B /r */
25403 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
25404 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta,
25405 True/*r2zero*/ );
25406 goto decode_success;
25408 /* VCVTDQ2PS xmm2/m128, xmm1 = VEX.128.0F.WIG 5B /r */
25409 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25410 delta = dis_CVTDQ2PS_128 ( vbi, pfx, delta, True/*isAvx*/ );
25411 goto decode_success;
25413 /* VCVTDQ2PS ymm2/m256, ymm1 = VEX.256.0F.WIG 5B /r */
25414 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25415 delta = dis_CVTDQ2PS_256 ( vbi, pfx, delta );
25416 goto decode_success;
25418 break;
25420 case 0x5C:
25421 /* VSUBSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5C /r */
25422 if (haveF2no66noF3(pfx)) {
25423 delta = dis_AVX128_E_V_to_G_lo64(
25424 uses_vvvv, vbi, pfx, delta, "vsubsd", Iop_Sub64F0x2 );
25425 goto decode_success;
25427 /* VSUBSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5C /r */
25428 if (haveF3no66noF2(pfx)) {
25429 delta = dis_AVX128_E_V_to_G_lo32(
25430 uses_vvvv, vbi, pfx, delta, "vsubss", Iop_Sub32F0x4 );
25431 goto decode_success;
25433 /* VSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5C /r */
25434 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25435 delta = dis_AVX128_E_V_to_G(
25436 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx4 );
25437 goto decode_success;
25439 /* VSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5C /r */
25440 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25441 delta = dis_AVX256_E_V_to_G(
25442 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx8 );
25443 goto decode_success;
25445 /* VSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5C /r */
25446 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25447 delta = dis_AVX128_E_V_to_G(
25448 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx2 );
25449 goto decode_success;
25451 /* VSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5C /r */
25452 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25453 delta = dis_AVX256_E_V_to_G(
25454 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx4 );
25455 goto decode_success;
25457 break;
25459 case 0x5D:
25460 /* VMINSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5D /r */
25461 if (haveF2no66noF3(pfx)) {
25462 delta = dis_AVX128_E_V_to_G_lo64(
25463 uses_vvvv, vbi, pfx, delta, "vminsd", Iop_Min64F0x2 );
25464 goto decode_success;
25466 /* VMINSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5D /r */
25467 if (haveF3no66noF2(pfx)) {
25468 delta = dis_AVX128_E_V_to_G_lo32(
25469 uses_vvvv, vbi, pfx, delta, "vminss", Iop_Min32F0x4 );
25470 goto decode_success;
25472 /* VMINPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5D /r */
25473 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25474 delta = dis_AVX128_E_V_to_G(
25475 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx4 );
25476 goto decode_success;
25478 /* VMINPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5D /r */
25479 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25480 delta = dis_AVX256_E_V_to_G(
25481 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx8 );
25482 goto decode_success;
25484 /* VMINPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5D /r */
25485 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25486 delta = dis_AVX128_E_V_to_G(
25487 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx2 );
25488 goto decode_success;
25490 /* VMINPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5D /r */
25491 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25492 delta = dis_AVX256_E_V_to_G(
25493 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx4 );
25494 goto decode_success;
25496 break;
25498 case 0x5E:
25499 /* VDIVSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5E /r */
25500 if (haveF2no66noF3(pfx)) {
25501 delta = dis_AVX128_E_V_to_G_lo64(
25502 uses_vvvv, vbi, pfx, delta, "vdivsd", Iop_Div64F0x2 );
25503 goto decode_success;
25505 /* VDIVSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5E /r */
25506 if (haveF3no66noF2(pfx)) {
25507 delta = dis_AVX128_E_V_to_G_lo32(
25508 uses_vvvv, vbi, pfx, delta, "vdivss", Iop_Div32F0x4 );
25509 goto decode_success;
25511 /* VDIVPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5E /r */
25512 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25513 delta = dis_AVX128_E_V_to_G(
25514 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx4 );
25515 goto decode_success;
25517 /* VDIVPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5E /r */
25518 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25519 delta = dis_AVX256_E_V_to_G(
25520 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx8 );
25521 goto decode_success;
25523 /* VDIVPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5E /r */
25524 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25525 delta = dis_AVX128_E_V_to_G(
25526 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx2 );
25527 goto decode_success;
25529 /* VDIVPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5E /r */
25530 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25531 delta = dis_AVX256_E_V_to_G(
25532 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx4 );
25533 goto decode_success;
25535 break;
25537 case 0x5F:
25538 /* VMAXSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5F /r */
25539 if (haveF2no66noF3(pfx)) {
25540 delta = dis_AVX128_E_V_to_G_lo64(
25541 uses_vvvv, vbi, pfx, delta, "vmaxsd", Iop_Max64F0x2 );
25542 goto decode_success;
25544 /* VMAXSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5F /r */
25545 if (haveF3no66noF2(pfx)) {
25546 delta = dis_AVX128_E_V_to_G_lo32(
25547 uses_vvvv, vbi, pfx, delta, "vmaxss", Iop_Max32F0x4 );
25548 goto decode_success;
25550 /* VMAXPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5F /r */
25551 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25552 delta = dis_AVX128_E_V_to_G(
25553 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx4 );
25554 goto decode_success;
25556 /* VMAXPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5F /r */
25557 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25558 delta = dis_AVX256_E_V_to_G(
25559 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx8 );
25560 goto decode_success;
25562 /* VMAXPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5F /r */
25563 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25564 delta = dis_AVX128_E_V_to_G(
25565 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx2 );
25566 goto decode_success;
25568 /* VMAXPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5F /r */
25569 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25570 delta = dis_AVX256_E_V_to_G(
25571 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx4 );
25572 goto decode_success;
25574 break;
25576 case 0x60:
25577 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
25578 /* VPUNPCKLBW = VEX.NDS.128.66.0F.WIG 60 /r */
25579 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25580 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25581 uses_vvvv, vbi, pfx, delta, "vpunpcklbw",
25582 Iop_InterleaveLO8x16, NULL,
25583 False/*!invertLeftArg*/, True/*swapArgs*/ );
25584 goto decode_success;
25586 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
25587 /* VPUNPCKLBW = VEX.NDS.256.66.0F.WIG 60 /r */
25588 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25589 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25590 uses_vvvv, vbi, pfx, delta, "vpunpcklbw",
25591 math_VPUNPCKLBW_YMM );
25592 goto decode_success;
25594 break;
25596 case 0x61:
25597 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
25598 /* VPUNPCKLWD = VEX.NDS.128.66.0F.WIG 61 /r */
25599 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25600 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25601 uses_vvvv, vbi, pfx, delta, "vpunpcklwd",
25602 Iop_InterleaveLO16x8, NULL,
25603 False/*!invertLeftArg*/, True/*swapArgs*/ );
25604 goto decode_success;
25606 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
25607 /* VPUNPCKLWD = VEX.NDS.256.66.0F.WIG 61 /r */
25608 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25609 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25610 uses_vvvv, vbi, pfx, delta, "vpunpcklwd",
25611 math_VPUNPCKLWD_YMM );
25612 goto decode_success;
25614 break;
25616 case 0x62:
25617 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
25618 /* VPUNPCKLDQ = VEX.NDS.128.66.0F.WIG 62 /r */
25619 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25620 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25621 uses_vvvv, vbi, pfx, delta, "vpunpckldq",
25622 Iop_InterleaveLO32x4, NULL,
25623 False/*!invertLeftArg*/, True/*swapArgs*/ );
25624 goto decode_success;
25626 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
25627 /* VPUNPCKLDQ = VEX.NDS.256.66.0F.WIG 62 /r */
25628 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25629 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25630 uses_vvvv, vbi, pfx, delta, "vpunpckldq",
25631 math_VPUNPCKLDQ_YMM );
25632 goto decode_success;
25634 break;
25636 case 0x63:
25637 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
25638 /* VPACKSSWB = VEX.NDS.128.66.0F.WIG 63 /r */
25639 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25640 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25641 uses_vvvv, vbi, pfx, delta, "vpacksswb",
25642 Iop_QNarrowBin16Sto8Sx16, NULL,
25643 False/*!invertLeftArg*/, True/*swapArgs*/ );
25644 goto decode_success;
25646 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
25647 /* VPACKSSWB = VEX.NDS.256.66.0F.WIG 63 /r */
25648 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25649 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25650 uses_vvvv, vbi, pfx, delta, "vpacksswb",
25651 math_VPACKSSWB_YMM );
25652 goto decode_success;
25654 break;
25656 case 0x64:
25657 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
25658 /* VPCMPGTB = VEX.NDS.128.66.0F.WIG 64 /r */
25659 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25660 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25661 uses_vvvv, vbi, pfx, delta, "vpcmpgtb", Iop_CmpGT8Sx16 );
25662 goto decode_success;
25664 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
25665 /* VPCMPGTB = VEX.NDS.256.66.0F.WIG 64 /r */
25666 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25667 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25668 uses_vvvv, vbi, pfx, delta, "vpcmpgtb", Iop_CmpGT8Sx32 );
25669 goto decode_success;
25671 break;
25673 case 0x65:
25674 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
25675 /* VPCMPGTW = VEX.NDS.128.66.0F.WIG 65 /r */
25676 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25677 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25678 uses_vvvv, vbi, pfx, delta, "vpcmpgtw", Iop_CmpGT16Sx8 );
25679 goto decode_success;
25681 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
25682 /* VPCMPGTW = VEX.NDS.256.66.0F.WIG 65 /r */
25683 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25684 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25685 uses_vvvv, vbi, pfx, delta, "vpcmpgtw", Iop_CmpGT16Sx16 );
25686 goto decode_success;
25688 break;
25690 case 0x66:
25691 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
25692 /* VPCMPGTD = VEX.NDS.128.66.0F.WIG 66 /r */
25693 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25694 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25695 uses_vvvv, vbi, pfx, delta, "vpcmpgtd", Iop_CmpGT32Sx4 );
25696 goto decode_success;
25698 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
25699 /* VPCMPGTD = VEX.NDS.256.66.0F.WIG 66 /r */
25700 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25701 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25702 uses_vvvv, vbi, pfx, delta, "vpcmpgtd", Iop_CmpGT32Sx8 );
25703 goto decode_success;
25705 break;
25707 case 0x67:
25708 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
25709 /* VPACKUSWB = VEX.NDS.128.66.0F.WIG 67 /r */
25710 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25711 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25712 uses_vvvv, vbi, pfx, delta, "vpackuswb",
25713 Iop_QNarrowBin16Sto8Ux16, NULL,
25714 False/*!invertLeftArg*/, True/*swapArgs*/ );
25715 goto decode_success;
25717 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
25718 /* VPACKUSWB = VEX.NDS.256.66.0F.WIG 67 /r */
25719 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25720 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25721 uses_vvvv, vbi, pfx, delta, "vpackuswb",
25722 math_VPACKUSWB_YMM );
25723 goto decode_success;
25725 break;
25727 case 0x68:
25728 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
25729 /* VPUNPCKHBW = VEX.NDS.128.0F.WIG 68 /r */
25730 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25731 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25732 uses_vvvv, vbi, pfx, delta, "vpunpckhbw",
25733 Iop_InterleaveHI8x16, NULL,
25734 False/*!invertLeftArg*/, True/*swapArgs*/ );
25735 goto decode_success;
25737 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
25738 /* VPUNPCKHBW = VEX.NDS.256.0F.WIG 68 /r */
25739 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25740 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25741 uses_vvvv, vbi, pfx, delta, "vpunpckhbw",
25742 math_VPUNPCKHBW_YMM );
25743 goto decode_success;
25745 break;
25747 case 0x69:
25748 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
25749 /* VPUNPCKHWD = VEX.NDS.128.0F.WIG 69 /r */
25750 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25751 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25752 uses_vvvv, vbi, pfx, delta, "vpunpckhwd",
25753 Iop_InterleaveHI16x8, NULL,
25754 False/*!invertLeftArg*/, True/*swapArgs*/ );
25755 goto decode_success;
25757 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
25758 /* VPUNPCKHWD = VEX.NDS.256.0F.WIG 69 /r */
25759 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25760 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25761 uses_vvvv, vbi, pfx, delta, "vpunpckhwd",
25762 math_VPUNPCKHWD_YMM );
25763 goto decode_success;
25765 break;
25767 case 0x6A:
25768 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
25769 /* VPUNPCKHDQ = VEX.NDS.128.66.0F.WIG 6A /r */
25770 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25771 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25772 uses_vvvv, vbi, pfx, delta, "vpunpckhdq",
25773 Iop_InterleaveHI32x4, NULL,
25774 False/*!invertLeftArg*/, True/*swapArgs*/ );
25775 goto decode_success;
25777 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
25778 /* VPUNPCKHDQ = VEX.NDS.256.66.0F.WIG 6A /r */
25779 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25780 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25781 uses_vvvv, vbi, pfx, delta, "vpunpckhdq",
25782 math_VPUNPCKHDQ_YMM );
25783 goto decode_success;
25785 break;
25787 case 0x6B:
25788 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
25789 /* VPACKSSDW = VEX.NDS.128.66.0F.WIG 6B /r */
25790 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25791 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25792 uses_vvvv, vbi, pfx, delta, "vpackssdw",
25793 Iop_QNarrowBin32Sto16Sx8, NULL,
25794 False/*!invertLeftArg*/, True/*swapArgs*/ );
25795 goto decode_success;
25797 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
25798 /* VPACKSSDW = VEX.NDS.256.66.0F.WIG 6B /r */
25799 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25800 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25801 uses_vvvv, vbi, pfx, delta, "vpackssdw",
25802 math_VPACKSSDW_YMM );
25803 goto decode_success;
25805 break;
25807 case 0x6C:
25808 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
25809 /* VPUNPCKLQDQ = VEX.NDS.128.0F.WIG 6C /r */
25810 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25811 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25812 uses_vvvv, vbi, pfx, delta, "vpunpcklqdq",
25813 Iop_InterleaveLO64x2, NULL,
25814 False/*!invertLeftArg*/, True/*swapArgs*/ );
25815 goto decode_success;
25817 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
25818 /* VPUNPCKLQDQ = VEX.NDS.256.0F.WIG 6C /r */
25819 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25820 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25821 uses_vvvv, vbi, pfx, delta, "vpunpcklqdq",
25822 math_VPUNPCKLQDQ_YMM );
25823 goto decode_success;
25825 break;
25827 case 0x6D:
25828 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
25829 /* VPUNPCKHQDQ = VEX.NDS.128.0F.WIG 6D /r */
25830 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25831 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25832 uses_vvvv, vbi, pfx, delta, "vpunpckhqdq",
25833 Iop_InterleaveHI64x2, NULL,
25834 False/*!invertLeftArg*/, True/*swapArgs*/ );
25835 goto decode_success;
25837 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
25838 /* VPUNPCKHQDQ = VEX.NDS.256.0F.WIG 6D /r */
25839 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25840 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25841 uses_vvvv, vbi, pfx, delta, "vpunpckhqdq",
25842 math_VPUNPCKHQDQ_YMM );
25843 goto decode_success;
25845 break;
25847 case 0x6E:
25848 /* VMOVD r32/m32, xmm1 = VEX.128.66.0F.W0 6E */
25849 if (have66noF2noF3(pfx)
25850 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
25851 vassert(sz == 2); /* even tho we are transferring 4, not 2. */
25852 UChar modrm = getUChar(delta);
25853 if (epartIsReg(modrm)) {
25854 delta += 1;
25855 putYMMRegLoAndZU(
25856 gregOfRexRM(pfx,modrm),
25857 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) )
25859 DIP("vmovd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
25860 nameXMMReg(gregOfRexRM(pfx,modrm)));
25861 } else {
25862 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25863 delta += alen;
25864 putYMMRegLoAndZU(
25865 gregOfRexRM(pfx,modrm),
25866 unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)))
25868 DIP("vmovd %s, %s\n", dis_buf,
25869 nameXMMReg(gregOfRexRM(pfx,modrm)));
25871 goto decode_success;
25873 /* VMOVQ r64/m64, xmm1 = VEX.128.66.0F.W1 6E */
25874 if (have66noF2noF3(pfx)
25875 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
25876 vassert(sz == 2); /* even tho we are transferring 8, not 2. */
25877 UChar modrm = getUChar(delta);
25878 if (epartIsReg(modrm)) {
25879 delta += 1;
25880 putYMMRegLoAndZU(
25881 gregOfRexRM(pfx,modrm),
25882 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) )
25884 DIP("vmovq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
25885 nameXMMReg(gregOfRexRM(pfx,modrm)));
25886 } else {
25887 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25888 delta += alen;
25889 putYMMRegLoAndZU(
25890 gregOfRexRM(pfx,modrm),
25891 unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)))
25893 DIP("vmovq %s, %s\n", dis_buf,
25894 nameXMMReg(gregOfRexRM(pfx,modrm)));
25896 goto decode_success;
25898 break;
25900 case 0x6F:
25901 /* VMOVDQA ymm2/m256, ymm1 = VEX.256.66.0F.WIG 6F */
25902 /* VMOVDQU ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 6F */
25903 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
25904 && 1==getVexL(pfx)/*256*/) {
25905 UChar modrm = getUChar(delta);
25906 UInt rD = gregOfRexRM(pfx, modrm);
25907 IRTemp tD = newTemp(Ity_V256);
25908 Bool isA = have66noF2noF3(pfx);
25909 HChar ch = isA ? 'a' : 'u';
25910 if (epartIsReg(modrm)) {
25911 UInt rS = eregOfRexRM(pfx, modrm);
25912 delta += 1;
25913 assign(tD, getYMMReg(rS));
25914 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD));
25915 } else {
25916 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25917 delta += alen;
25918 if (isA)
25919 gen_SEGV_if_not_32_aligned(addr);
25920 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
25921 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameYMMReg(rD));
25923 putYMMReg(rD, mkexpr(tD));
25924 goto decode_success;
25926 /* VMOVDQA xmm2/m128, xmm1 = VEX.128.66.0F.WIG 6F */
25927 /* VMOVDQU xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 6F */
25928 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
25929 && 0==getVexL(pfx)/*128*/) {
25930 UChar modrm = getUChar(delta);
25931 UInt rD = gregOfRexRM(pfx, modrm);
25932 IRTemp tD = newTemp(Ity_V128);
25933 Bool isA = have66noF2noF3(pfx);
25934 HChar ch = isA ? 'a' : 'u';
25935 if (epartIsReg(modrm)) {
25936 UInt rS = eregOfRexRM(pfx, modrm);
25937 delta += 1;
25938 assign(tD, getXMMReg(rS));
25939 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD));
25940 } else {
25941 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25942 delta += alen;
25943 if (isA)
25944 gen_SEGV_if_not_16_aligned(addr);
25945 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
25946 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameXMMReg(rD));
25948 putYMMRegLoAndZU(rD, mkexpr(tD));
25949 goto decode_success;
25951 break;
25953 case 0x70:
25954 /* VPSHUFD imm8, xmm2/m128, xmm1 = VEX.128.66.0F.WIG 70 /r ib */
25955 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25956 delta = dis_PSHUFD_32x4( vbi, pfx, delta, True/*writesYmm*/);
25957 goto decode_success;
25959 /* VPSHUFD imm8, ymm2/m256, ymm1 = VEX.256.66.0F.WIG 70 /r ib */
25960 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25961 delta = dis_PSHUFD_32x8( vbi, pfx, delta);
25962 goto decode_success;
25964 /* VPSHUFLW imm8, xmm2/m128, xmm1 = VEX.128.F2.0F.WIG 70 /r ib */
25965 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25966 delta = dis_PSHUFxW_128( vbi, pfx, delta,
25967 True/*isAvx*/, False/*!xIsH*/ );
25968 goto decode_success;
25970 /* VPSHUFLW imm8, ymm2/m256, ymm1 = VEX.256.F2.0F.WIG 70 /r ib */
25971 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25972 delta = dis_PSHUFxW_256( vbi, pfx, delta, False/*!xIsH*/ );
25973 goto decode_success;
25975 /* VPSHUFHW imm8, xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 70 /r ib */
25976 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
25977 delta = dis_PSHUFxW_128( vbi, pfx, delta,
25978 True/*isAvx*/, True/*xIsH*/ );
25979 goto decode_success;
25981 /* VPSHUFHW imm8, ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 70 /r ib */
25982 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
25983 delta = dis_PSHUFxW_256( vbi, pfx, delta, True/*xIsH*/ );
25984 goto decode_success;
25986 break;
25988 case 0x71:
25989 /* VPSRLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /2 ib */
25990 /* VPSRAW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /4 ib */
25991 /* VPSLLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /6 ib */
25992 if (have66noF2noF3(pfx)
25993 && 0==getVexL(pfx)/*128*/
25994 && epartIsReg(getUChar(delta))) {
25995 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
25996 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
25997 "vpsrlw", Iop_ShrN16x8 );
25998 *uses_vvvv = True;
25999 goto decode_success;
26001 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
26002 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26003 "vpsraw", Iop_SarN16x8 );
26004 *uses_vvvv = True;
26005 goto decode_success;
26007 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
26008 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26009 "vpsllw", Iop_ShlN16x8 );
26010 *uses_vvvv = True;
26011 goto decode_success;
26013 /* else fall through */
26015 /* VPSRLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /2 ib */
26016 /* VPSRAW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /4 ib */
26017 /* VPSLLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /6 ib */
26018 if (have66noF2noF3(pfx)
26019 && 1==getVexL(pfx)/*256*/
26020 && epartIsReg(getUChar(delta))) {
26021 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
26022 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26023 "vpsrlw", Iop_ShrN16x16 );
26024 *uses_vvvv = True;
26025 goto decode_success;
26027 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
26028 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26029 "vpsraw", Iop_SarN16x16 );
26030 *uses_vvvv = True;
26031 goto decode_success;
26033 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
26034 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26035 "vpsllw", Iop_ShlN16x16 );
26036 *uses_vvvv = True;
26037 goto decode_success;
26039 /* else fall through */
26041 break;
26043 case 0x72:
26044 /* VPSRLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /2 ib */
26045 /* VPSRAD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /4 ib */
26046 /* VPSLLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /6 ib */
26047 if (have66noF2noF3(pfx)
26048 && 0==getVexL(pfx)/*128*/
26049 && epartIsReg(getUChar(delta))) {
26050 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
26051 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26052 "vpsrld", Iop_ShrN32x4 );
26053 *uses_vvvv = True;
26054 goto decode_success;
26056 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
26057 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26058 "vpsrad", Iop_SarN32x4 );
26059 *uses_vvvv = True;
26060 goto decode_success;
26062 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
26063 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26064 "vpslld", Iop_ShlN32x4 );
26065 *uses_vvvv = True;
26066 goto decode_success;
26068 /* else fall through */
26070 /* VPSRLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /2 ib */
26071 /* VPSRAD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /4 ib */
26072 /* VPSLLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /6 ib */
26073 if (have66noF2noF3(pfx)
26074 && 1==getVexL(pfx)/*256*/
26075 && epartIsReg(getUChar(delta))) {
26076 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
26077 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26078 "vpsrld", Iop_ShrN32x8 );
26079 *uses_vvvv = True;
26080 goto decode_success;
26082 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
26083 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26084 "vpsrad", Iop_SarN32x8 );
26085 *uses_vvvv = True;
26086 goto decode_success;
26088 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
26089 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26090 "vpslld", Iop_ShlN32x8 );
26091 *uses_vvvv = True;
26092 goto decode_success;
26094 /* else fall through */
26096 break;
26098 case 0x73:
26099 /* VPSRLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /3 ib */
26100 /* VPSLLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /7 ib */
26101 /* VPSRLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /2 ib */
26102 /* VPSLLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /6 ib */
26103 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
26104 && epartIsReg(getUChar(delta))) {
26105 Int rS = eregOfRexRM(pfx,getUChar(delta));
26106 Int rD = getVexNvvvv(pfx);
26107 IRTemp vecS = newTemp(Ity_V128);
26108 if (gregLO3ofRM(getUChar(delta)) == 3) {
26109 Int imm = (Int)getUChar(delta+1);
26110 DIP("vpsrldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD));
26111 delta += 2;
26112 assign( vecS, getXMMReg(rS) );
26113 putYMMRegLoAndZU(rD, mkexpr(math_PSRLDQ( vecS, imm )));
26114 *uses_vvvv = True;
26115 goto decode_success;
26117 if (gregLO3ofRM(getUChar(delta)) == 7) {
26118 Int imm = (Int)getUChar(delta+1);
26119 DIP("vpslldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD));
26120 delta += 2;
26121 assign( vecS, getXMMReg(rS) );
26122 putYMMRegLoAndZU(rD, mkexpr(math_PSLLDQ( vecS, imm )));
26123 *uses_vvvv = True;
26124 goto decode_success;
26126 if (gregLO3ofRM(getUChar(delta)) == 2) {
26127 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26128 "vpsrlq", Iop_ShrN64x2 );
26129 *uses_vvvv = True;
26130 goto decode_success;
26132 if (gregLO3ofRM(getUChar(delta)) == 6) {
26133 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26134 "vpsllq", Iop_ShlN64x2 );
26135 *uses_vvvv = True;
26136 goto decode_success;
26138 /* else fall through */
26140 /* VPSRLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /3 ib */
26141 /* VPSLLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /7 ib */
26142 /* VPSRLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /2 ib */
26143 /* VPSLLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /6 ib */
26144 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
26145 && epartIsReg(getUChar(delta))) {
26146 Int rS = eregOfRexRM(pfx,getUChar(delta));
26147 Int rD = getVexNvvvv(pfx);
26148 if (gregLO3ofRM(getUChar(delta)) == 3) {
26149 IRTemp vecS0 = newTemp(Ity_V128);
26150 IRTemp vecS1 = newTemp(Ity_V128);
26151 Int imm = (Int)getUChar(delta+1);
26152 DIP("vpsrldq $%d,%s,%s\n", imm, nameYMMReg(rS), nameYMMReg(rD));
26153 delta += 2;
26154 assign( vecS0, getYMMRegLane128(rS, 0));
26155 assign( vecS1, getYMMRegLane128(rS, 1));
26156 putYMMRegLane128(rD, 0, mkexpr(math_PSRLDQ( vecS0, imm )));
26157 putYMMRegLane128(rD, 1, mkexpr(math_PSRLDQ( vecS1, imm )));
26158 *uses_vvvv = True;
26159 goto decode_success;
26161 if (gregLO3ofRM(getUChar(delta)) == 7) {
26162 IRTemp vecS0 = newTemp(Ity_V128);
26163 IRTemp vecS1 = newTemp(Ity_V128);
26164 Int imm = (Int)getUChar(delta+1);
26165 DIP("vpslldq $%d,%s,%s\n", imm, nameYMMReg(rS), nameYMMReg(rD));
26166 delta += 2;
26167 assign( vecS0, getYMMRegLane128(rS, 0));
26168 assign( vecS1, getYMMRegLane128(rS, 1));
26169 putYMMRegLane128(rD, 0, mkexpr(math_PSLLDQ( vecS0, imm )));
26170 putYMMRegLane128(rD, 1, mkexpr(math_PSLLDQ( vecS1, imm )));
26171 *uses_vvvv = True;
26172 goto decode_success;
26174 if (gregLO3ofRM(getUChar(delta)) == 2) {
26175 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26176 "vpsrlq", Iop_ShrN64x4 );
26177 *uses_vvvv = True;
26178 goto decode_success;
26180 if (gregLO3ofRM(getUChar(delta)) == 6) {
26181 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26182 "vpsllq", Iop_ShlN64x4 );
26183 *uses_vvvv = True;
26184 goto decode_success;
26186 /* else fall through */
26188 break;
26190 case 0x74:
26191 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
26192 /* VPCMPEQB = VEX.NDS.128.66.0F.WIG 74 /r */
26193 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26194 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26195 uses_vvvv, vbi, pfx, delta, "vpcmpeqb", Iop_CmpEQ8x16 );
26196 goto decode_success;
26198 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
26199 /* VPCMPEQB = VEX.NDS.256.66.0F.WIG 74 /r */
26200 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26201 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26202 uses_vvvv, vbi, pfx, delta, "vpcmpeqb", Iop_CmpEQ8x32 );
26203 goto decode_success;
26205 break;
26207 case 0x75:
26208 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
26209 /* VPCMPEQW = VEX.NDS.128.66.0F.WIG 75 /r */
26210 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26211 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26212 uses_vvvv, vbi, pfx, delta, "vpcmpeqw", Iop_CmpEQ16x8 );
26213 goto decode_success;
26215 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
26216 /* VPCMPEQW = VEX.NDS.256.66.0F.WIG 75 /r */
26217 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26218 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26219 uses_vvvv, vbi, pfx, delta, "vpcmpeqw", Iop_CmpEQ16x16 );
26220 goto decode_success;
26222 break;
26224 case 0x76:
26225 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
26226 /* VPCMPEQD = VEX.NDS.128.66.0F.WIG 76 /r */
26227 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26228 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26229 uses_vvvv, vbi, pfx, delta, "vpcmpeqd", Iop_CmpEQ32x4 );
26230 goto decode_success;
26232 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
26233 /* VPCMPEQD = VEX.NDS.256.66.0F.WIG 76 /r */
26234 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26235 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26236 uses_vvvv, vbi, pfx, delta, "vpcmpeqd", Iop_CmpEQ32x8 );
26237 goto decode_success;
26239 break;
26241 case 0x77:
26242 /* VZEROUPPER = VEX.128.0F.WIG 77 */
26243 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26244 Int i;
26245 IRTemp zero128 = newTemp(Ity_V128);
26246 assign(zero128, mkV128(0));
26247 for (i = 0; i < 16; i++) {
26248 putYMMRegLane128(i, 1, mkexpr(zero128));
26250 DIP("vzeroupper\n");
26251 goto decode_success;
26253 /* VZEROALL = VEX.256.0F.WIG 77 */
26254 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26255 Int i;
26256 IRTemp zero128 = newTemp(Ity_V128);
26257 assign(zero128, mkV128(0));
26258 for (i = 0; i < 16; i++) {
26259 putYMMRegLoAndZU(i, mkexpr(zero128));
26261 DIP("vzeroall\n");
26262 goto decode_success;
26264 break;
26266 case 0x7C:
26267 case 0x7D:
26268 /* VHADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7C /r */
26269 /* VHSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7D /r */
26270 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26271 IRTemp sV = newTemp(Ity_V128);
26272 IRTemp dV = newTemp(Ity_V128);
26273 Bool isAdd = opc == 0x7C;
26274 const HChar* str = isAdd ? "add" : "sub";
26275 UChar modrm = getUChar(delta);
26276 UInt rG = gregOfRexRM(pfx,modrm);
26277 UInt rV = getVexNvvvv(pfx);
26278 if (epartIsReg(modrm)) {
26279 UInt rE = eregOfRexRM(pfx,modrm);
26280 assign( sV, getXMMReg(rE) );
26281 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE),
26282 nameXMMReg(rV), nameXMMReg(rG));
26283 delta += 1;
26284 } else {
26285 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26286 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
26287 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
26288 nameXMMReg(rV), nameXMMReg(rG));
26289 delta += alen;
26291 assign( dV, getXMMReg(rV) );
26292 putYMMRegLoAndZU( rG, mkexpr( math_HADDPS_128 ( dV, sV, isAdd ) ) );
26293 *uses_vvvv = True;
26294 goto decode_success;
26296 /* VHADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7C /r */
26297 /* VHSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7D /r */
26298 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26299 IRTemp sV = newTemp(Ity_V256);
26300 IRTemp dV = newTemp(Ity_V256);
26301 IRTemp s1, s0, d1, d0;
26302 Bool isAdd = opc == 0x7C;
26303 const HChar* str = isAdd ? "add" : "sub";
26304 UChar modrm = getUChar(delta);
26305 UInt rG = gregOfRexRM(pfx,modrm);
26306 UInt rV = getVexNvvvv(pfx);
26307 s1 = s0 = d1 = d0 = IRTemp_INVALID;
26308 if (epartIsReg(modrm)) {
26309 UInt rE = eregOfRexRM(pfx,modrm);
26310 assign( sV, getYMMReg(rE) );
26311 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE),
26312 nameYMMReg(rV), nameYMMReg(rG));
26313 delta += 1;
26314 } else {
26315 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26316 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
26317 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
26318 nameYMMReg(rV), nameYMMReg(rG));
26319 delta += alen;
26321 assign( dV, getYMMReg(rV) );
26322 breakupV256toV128s( dV, &d1, &d0 );
26323 breakupV256toV128s( sV, &s1, &s0 );
26324 putYMMReg( rG, binop(Iop_V128HLtoV256,
26325 mkexpr( math_HADDPS_128 ( d1, s1, isAdd ) ),
26326 mkexpr( math_HADDPS_128 ( d0, s0, isAdd ) ) ) );
26327 *uses_vvvv = True;
26328 goto decode_success;
26330 /* VHADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7C /r */
26331 /* VHSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7D /r */
26332 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26333 IRTemp sV = newTemp(Ity_V128);
26334 IRTemp dV = newTemp(Ity_V128);
26335 Bool isAdd = opc == 0x7C;
26336 const HChar* str = isAdd ? "add" : "sub";
26337 UChar modrm = getUChar(delta);
26338 UInt rG = gregOfRexRM(pfx,modrm);
26339 UInt rV = getVexNvvvv(pfx);
26340 if (epartIsReg(modrm)) {
26341 UInt rE = eregOfRexRM(pfx,modrm);
26342 assign( sV, getXMMReg(rE) );
26343 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE),
26344 nameXMMReg(rV), nameXMMReg(rG));
26345 delta += 1;
26346 } else {
26347 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26348 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
26349 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
26350 nameXMMReg(rV), nameXMMReg(rG));
26351 delta += alen;
26353 assign( dV, getXMMReg(rV) );
26354 putYMMRegLoAndZU( rG, mkexpr( math_HADDPD_128 ( dV, sV, isAdd ) ) );
26355 *uses_vvvv = True;
26356 goto decode_success;
26358 /* VHADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7C /r */
26359 /* VHSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7D /r */
26360 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26361 IRTemp sV = newTemp(Ity_V256);
26362 IRTemp dV = newTemp(Ity_V256);
26363 IRTemp s1, s0, d1, d0;
26364 Bool isAdd = opc == 0x7C;
26365 const HChar* str = isAdd ? "add" : "sub";
26366 UChar modrm = getUChar(delta);
26367 UInt rG = gregOfRexRM(pfx,modrm);
26368 UInt rV = getVexNvvvv(pfx);
26369 s1 = s0 = d1 = d0 = IRTemp_INVALID;
26370 if (epartIsReg(modrm)) {
26371 UInt rE = eregOfRexRM(pfx,modrm);
26372 assign( sV, getYMMReg(rE) );
26373 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE),
26374 nameYMMReg(rV), nameYMMReg(rG));
26375 delta += 1;
26376 } else {
26377 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26378 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
26379 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
26380 nameYMMReg(rV), nameYMMReg(rG));
26381 delta += alen;
26383 assign( dV, getYMMReg(rV) );
26384 breakupV256toV128s( dV, &d1, &d0 );
26385 breakupV256toV128s( sV, &s1, &s0 );
26386 putYMMReg( rG, binop(Iop_V128HLtoV256,
26387 mkexpr( math_HADDPD_128 ( d1, s1, isAdd ) ),
26388 mkexpr( math_HADDPD_128 ( d0, s0, isAdd ) ) ) );
26389 *uses_vvvv = True;
26390 goto decode_success;
26392 break;
26394 case 0x7E:
26395 /* Note the Intel docs don't make sense for this. I think they
26396 are wrong. They seem to imply it is a store when in fact I
26397 think it is a load. Also it's unclear whether this is W0, W1
26398 or WIG. */
26399 /* VMOVQ xmm2/m64, xmm1 = VEX.128.F3.0F.W0 7E /r */
26400 if (haveF3no66noF2(pfx)
26401 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
26402 vassert(sz == 4); /* even tho we are transferring 8, not 4. */
26403 UChar modrm = getUChar(delta);
26404 UInt rG = gregOfRexRM(pfx,modrm);
26405 if (epartIsReg(modrm)) {
26406 UInt rE = eregOfRexRM(pfx,modrm);
26407 putXMMRegLane64( rG, 0, getXMMRegLane64( rE, 0 ));
26408 DIP("vmovq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
26409 delta += 1;
26410 } else {
26411 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26412 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) );
26413 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG));
26414 delta += alen;
26416 /* zero bits 255:64 */
26417 putXMMRegLane64( rG, 1, mkU64(0) );
26418 putYMMRegLane128( rG, 1, mkV128(0) );
26419 goto decode_success;
26421 /* VMOVQ xmm1, r64 = VEX.128.66.0F.W1 7E /r (reg case only) */
26422 /* Moves from G to E, so is a store-form insn */
26423 /* Intel docs list this in the VMOVD entry for some reason. */
26424 if (have66noF2noF3(pfx)
26425 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
26426 UChar modrm = getUChar(delta);
26427 UInt rG = gregOfRexRM(pfx,modrm);
26428 if (epartIsReg(modrm)) {
26429 UInt rE = eregOfRexRM(pfx,modrm);
26430 DIP("vmovq %s,%s\n", nameXMMReg(rG), nameIReg64(rE));
26431 putIReg64(rE, getXMMRegLane64(rG, 0));
26432 delta += 1;
26433 } else {
26434 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26435 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0) );
26436 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG));
26437 delta += alen;
26439 goto decode_success;
26441 /* VMOVD xmm1, m32/r32 = VEX.128.66.0F.W0 7E /r (reg case only) */
26442 /* Moves from G to E, so is a store-form insn */
26443 if (have66noF2noF3(pfx)
26444 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
26445 UChar modrm = getUChar(delta);
26446 UInt rG = gregOfRexRM(pfx,modrm);
26447 if (epartIsReg(modrm)) {
26448 UInt rE = eregOfRexRM(pfx,modrm);
26449 DIP("vmovd %s,%s\n", nameXMMReg(rG), nameIReg32(rE));
26450 putIReg32(rE, getXMMRegLane32(rG, 0));
26451 delta += 1;
26452 } else {
26453 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26454 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0) );
26455 DIP("vmovd %s,%s\n", dis_buf, nameXMMReg(rG));
26456 delta += alen;
26458 goto decode_success;
26460 break;
26462 case 0x7F:
26463 /* VMOVDQA ymm1, ymm2/m256 = VEX.256.66.0F.WIG 7F */
26464 /* VMOVDQU ymm1, ymm2/m256 = VEX.256.F3.0F.WIG 7F */
26465 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
26466 && 1==getVexL(pfx)/*256*/) {
26467 UChar modrm = getUChar(delta);
26468 UInt rS = gregOfRexRM(pfx, modrm);
26469 IRTemp tS = newTemp(Ity_V256);
26470 Bool isA = have66noF2noF3(pfx);
26471 HChar ch = isA ? 'a' : 'u';
26472 assign(tS, getYMMReg(rS));
26473 if (epartIsReg(modrm)) {
26474 UInt rD = eregOfRexRM(pfx, modrm);
26475 delta += 1;
26476 putYMMReg(rD, mkexpr(tS));
26477 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD));
26478 } else {
26479 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
26480 delta += alen;
26481 if (isA)
26482 gen_SEGV_if_not_32_aligned(addr);
26483 storeLE(mkexpr(addr), mkexpr(tS));
26484 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), dis_buf);
26486 goto decode_success;
26488 /* VMOVDQA xmm1, xmm2/m128 = VEX.128.66.0F.WIG 7F */
26489 /* VMOVDQU xmm1, xmm2/m128 = VEX.128.F3.0F.WIG 7F */
26490 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
26491 && 0==getVexL(pfx)/*128*/) {
26492 UChar modrm = getUChar(delta);
26493 UInt rS = gregOfRexRM(pfx, modrm);
26494 IRTemp tS = newTemp(Ity_V128);
26495 Bool isA = have66noF2noF3(pfx);
26496 HChar ch = isA ? 'a' : 'u';
26497 assign(tS, getXMMReg(rS));
26498 if (epartIsReg(modrm)) {
26499 UInt rD = eregOfRexRM(pfx, modrm);
26500 delta += 1;
26501 putYMMRegLoAndZU(rD, mkexpr(tS));
26502 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD));
26503 } else {
26504 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
26505 delta += alen;
26506 if (isA)
26507 gen_SEGV_if_not_16_aligned(addr);
26508 storeLE(mkexpr(addr), mkexpr(tS));
26509 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), dis_buf);
26511 goto decode_success;
26513 break;
26515 case 0xAE:
26516 /* VSTMXCSR m32 = VEX.LZ.0F.WIG AE /3 */
26517 if (haveNo66noF2noF3(pfx)
26518 && 0==getVexL(pfx)/*LZ*/
26519 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */
26520 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3
26521 && sz == 4) {
26522 delta = dis_STMXCSR(vbi, pfx, delta, True/*isAvx*/);
26523 goto decode_success;
26525 /* VLDMXCSR m32 = VEX.LZ.0F.WIG AE /2 */
26526 if (haveNo66noF2noF3(pfx)
26527 && 0==getVexL(pfx)/*LZ*/
26528 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */
26529 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2
26530 && sz == 4) {
26531 delta = dis_LDMXCSR(vbi, pfx, delta, True/*isAvx*/);
26532 goto decode_success;
26534 break;
26536 case 0xC2:
26537 /* VCMPSD xmm3/m64(E=argL), xmm2(V=argR), xmm1(G) */
26538 /* = VEX.NDS.LIG.F2.0F.WIG C2 /r ib */
26539 if (haveF2no66noF3(pfx)) {
26540 Long delta0 = delta;
26541 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26542 "vcmpsd", False/*!all_lanes*/,
26543 8/*sz*/);
26544 if (delta > delta0) goto decode_success;
26545 /* else fall through -- decoding has failed */
26547 /* VCMPSS xmm3/m32(E=argL), xmm2(V=argR), xmm1(G) */
26548 /* = VEX.NDS.LIG.F3.0F.WIG C2 /r ib */
26549 if (haveF3no66noF2(pfx)) {
26550 Long delta0 = delta;
26551 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26552 "vcmpss", False/*!all_lanes*/,
26553 4/*sz*/);
26554 if (delta > delta0) goto decode_success;
26555 /* else fall through -- decoding has failed */
26557 /* VCMPPD xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
26558 /* = VEX.NDS.128.66.0F.WIG C2 /r ib */
26559 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26560 Long delta0 = delta;
26561 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26562 "vcmppd", True/*all_lanes*/,
26563 8/*sz*/);
26564 if (delta > delta0) goto decode_success;
26565 /* else fall through -- decoding has failed */
26567 /* VCMPPD ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
26568 /* = VEX.NDS.256.66.0F.WIG C2 /r ib */
26569 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26570 Long delta0 = delta;
26571 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26572 "vcmppd", 8/*sz*/);
26573 if (delta > delta0) goto decode_success;
26574 /* else fall through -- decoding has failed */
26576 /* VCMPPS xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
26577 /* = VEX.NDS.128.0F.WIG C2 /r ib */
26578 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26579 Long delta0 = delta;
26580 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26581 "vcmpps", True/*all_lanes*/,
26582 4/*sz*/);
26583 if (delta > delta0) goto decode_success;
26584 /* else fall through -- decoding has failed */
26586 /* VCMPPS ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
26587 /* = VEX.NDS.256.0F.WIG C2 /r ib */
26588 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26589 Long delta0 = delta;
26590 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26591 "vcmpps", 4/*sz*/);
26592 if (delta > delta0) goto decode_success;
26593 /* else fall through -- decoding has failed */
26595 break;
26597 case 0xC4:
26598 /* VPINSRW r32/m16, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG C4 /r ib */
26599 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26600 UChar modrm = getUChar(delta);
26601 UInt rG = gregOfRexRM(pfx, modrm);
26602 UInt rV = getVexNvvvv(pfx);
26603 Int imm8;
26604 IRTemp new16 = newTemp(Ity_I16);
26606 if ( epartIsReg( modrm ) ) {
26607 imm8 = (Int)(getUChar(delta+1) & 7);
26608 assign( new16, unop(Iop_32to16,
26609 getIReg32(eregOfRexRM(pfx,modrm))) );
26610 delta += 1+1;
26611 DIP( "vpinsrw $%d,%s,%s\n", imm8,
26612 nameIReg32( eregOfRexRM(pfx, modrm) ), nameXMMReg(rG) );
26613 } else {
26614 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
26615 imm8 = (Int)(getUChar(delta+alen) & 7);
26616 assign( new16, loadLE( Ity_I16, mkexpr(addr) ));
26617 delta += alen+1;
26618 DIP( "vpinsrw $%d,%s,%s\n",
26619 imm8, dis_buf, nameXMMReg(rG) );
26622 IRTemp src_vec = newTemp(Ity_V128);
26623 assign(src_vec, getXMMReg( rV ));
26624 IRTemp res_vec = math_PINSRW_128( src_vec, new16, imm8 );
26625 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
26626 *uses_vvvv = True;
26627 goto decode_success;
26629 break;
26631 case 0xC5:
26632 /* VPEXTRW imm8, xmm1, reg32 = VEX.128.66.0F.W0 C5 /r ib */
26633 if (have66noF2noF3(pfx)
26634 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
26635 Long delta0 = delta;
26636 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta,
26637 True/*isAvx*/ );
26638 if (delta > delta0) goto decode_success;
26639 /* else fall through -- decoding has failed */
26641 break;
26643 case 0xC6:
26644 /* VSHUFPS imm8, xmm3/m128, xmm2, xmm1, xmm2 */
26645 /* = VEX.NDS.128.0F.WIG C6 /r ib */
26646 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26647 Int imm8 = 0;
26648 IRTemp eV = newTemp(Ity_V128);
26649 IRTemp vV = newTemp(Ity_V128);
26650 UInt modrm = getUChar(delta);
26651 UInt rG = gregOfRexRM(pfx,modrm);
26652 UInt rV = getVexNvvvv(pfx);
26653 assign( vV, getXMMReg(rV) );
26654 if (epartIsReg(modrm)) {
26655 UInt rE = eregOfRexRM(pfx,modrm);
26656 assign( eV, getXMMReg(rE) );
26657 imm8 = (Int)getUChar(delta+1);
26658 delta += 1+1;
26659 DIP("vshufps $%d,%s,%s,%s\n",
26660 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
26661 } else {
26662 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26663 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
26664 imm8 = (Int)getUChar(delta+alen);
26665 delta += 1+alen;
26666 DIP("vshufps $%d,%s,%s,%s\n",
26667 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
26669 IRTemp res = math_SHUFPS_128( eV, vV, imm8 );
26670 putYMMRegLoAndZU( rG, mkexpr(res) );
26671 *uses_vvvv = True;
26672 goto decode_success;
26674 /* VSHUFPS imm8, ymm3/m256, ymm2, ymm1, ymm2 */
26675 /* = VEX.NDS.256.0F.WIG C6 /r ib */
26676 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26677 Int imm8 = 0;
26678 IRTemp eV = newTemp(Ity_V256);
26679 IRTemp vV = newTemp(Ity_V256);
26680 UInt modrm = getUChar(delta);
26681 UInt rG = gregOfRexRM(pfx,modrm);
26682 UInt rV = getVexNvvvv(pfx);
26683 assign( vV, getYMMReg(rV) );
26684 if (epartIsReg(modrm)) {
26685 UInt rE = eregOfRexRM(pfx,modrm);
26686 assign( eV, getYMMReg(rE) );
26687 imm8 = (Int)getUChar(delta+1);
26688 delta += 1+1;
26689 DIP("vshufps $%d,%s,%s,%s\n",
26690 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
26691 } else {
26692 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26693 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
26694 imm8 = (Int)getUChar(delta+alen);
26695 delta += 1+alen;
26696 DIP("vshufps $%d,%s,%s,%s\n",
26697 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
26699 IRTemp res = math_SHUFPS_256( eV, vV, imm8 );
26700 putYMMReg( rG, mkexpr(res) );
26701 *uses_vvvv = True;
26702 goto decode_success;
26704 /* VSHUFPD imm8, xmm3/m128, xmm2, xmm1, xmm2 */
26705 /* = VEX.NDS.128.66.0F.WIG C6 /r ib */
26706 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26707 Int imm8 = 0;
26708 IRTemp eV = newTemp(Ity_V128);
26709 IRTemp vV = newTemp(Ity_V128);
26710 UInt modrm = getUChar(delta);
26711 UInt rG = gregOfRexRM(pfx,modrm);
26712 UInt rV = getVexNvvvv(pfx);
26713 assign( vV, getXMMReg(rV) );
26714 if (epartIsReg(modrm)) {
26715 UInt rE = eregOfRexRM(pfx,modrm);
26716 assign( eV, getXMMReg(rE) );
26717 imm8 = (Int)getUChar(delta+1);
26718 delta += 1+1;
26719 DIP("vshufpd $%d,%s,%s,%s\n",
26720 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
26721 } else {
26722 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26723 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
26724 imm8 = (Int)getUChar(delta+alen);
26725 delta += 1+alen;
26726 DIP("vshufpd $%d,%s,%s,%s\n",
26727 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
26729 IRTemp res = math_SHUFPD_128( eV, vV, imm8 );
26730 putYMMRegLoAndZU( rG, mkexpr(res) );
26731 *uses_vvvv = True;
26732 goto decode_success;
26734 /* VSHUFPD imm8, ymm3/m256, ymm2, ymm1, ymm2 */
26735 /* = VEX.NDS.256.66.0F.WIG C6 /r ib */
26736 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26737 Int imm8 = 0;
26738 IRTemp eV = newTemp(Ity_V256);
26739 IRTemp vV = newTemp(Ity_V256);
26740 UInt modrm = getUChar(delta);
26741 UInt rG = gregOfRexRM(pfx,modrm);
26742 UInt rV = getVexNvvvv(pfx);
26743 assign( vV, getYMMReg(rV) );
26744 if (epartIsReg(modrm)) {
26745 UInt rE = eregOfRexRM(pfx,modrm);
26746 assign( eV, getYMMReg(rE) );
26747 imm8 = (Int)getUChar(delta+1);
26748 delta += 1+1;
26749 DIP("vshufpd $%d,%s,%s,%s\n",
26750 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
26751 } else {
26752 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26753 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
26754 imm8 = (Int)getUChar(delta+alen);
26755 delta += 1+alen;
26756 DIP("vshufpd $%d,%s,%s,%s\n",
26757 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
26759 IRTemp res = math_SHUFPD_256( eV, vV, imm8 );
26760 putYMMReg( rG, mkexpr(res) );
26761 *uses_vvvv = True;
26762 goto decode_success;
26764 break;
26766 case 0xD0:
26767 /* VADDSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D0 /r */
26768 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26769 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26770 uses_vvvv, vbi, pfx, delta,
26771 "vaddsubpd", math_ADDSUBPD_128 );
26772 goto decode_success;
26774 /* VADDSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D0 /r */
26775 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26776 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26777 uses_vvvv, vbi, pfx, delta,
26778 "vaddsubpd", math_ADDSUBPD_256 );
26779 goto decode_success;
26781 /* VADDSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG D0 /r */
26782 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26783 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26784 uses_vvvv, vbi, pfx, delta,
26785 "vaddsubps", math_ADDSUBPS_128 );
26786 goto decode_success;
26788 /* VADDSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG D0 /r */
26789 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26790 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26791 uses_vvvv, vbi, pfx, delta,
26792 "vaddsubps", math_ADDSUBPS_256 );
26793 goto decode_success;
26795 break;
26797 case 0xD1:
26798 /* VPSRLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D1 /r */
26799 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26800 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26801 "vpsrlw", Iop_ShrN16x8 );
26802 *uses_vvvv = True;
26803 goto decode_success;
26806 /* VPSRLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D1 /r */
26807 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26808 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26809 "vpsrlw", Iop_ShrN16x16 );
26810 *uses_vvvv = True;
26811 goto decode_success;
26814 break;
26816 case 0xD2:
26817 /* VPSRLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D2 /r */
26818 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26819 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26820 "vpsrld", Iop_ShrN32x4 );
26821 *uses_vvvv = True;
26822 goto decode_success;
26824 /* VPSRLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D2 /r */
26825 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26826 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26827 "vpsrld", Iop_ShrN32x8 );
26828 *uses_vvvv = True;
26829 goto decode_success;
26831 break;
26833 case 0xD3:
26834 /* VPSRLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D3 /r */
26835 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26836 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26837 "vpsrlq", Iop_ShrN64x2 );
26838 *uses_vvvv = True;
26839 goto decode_success;
26841 /* VPSRLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D3 /r */
26842 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26843 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26844 "vpsrlq", Iop_ShrN64x4 );
26845 *uses_vvvv = True;
26846 goto decode_success;
26848 break;
26850 case 0xD4:
26851 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
26852 /* VPADDQ = VEX.NDS.128.66.0F.WIG D4 /r */
26853 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26854 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26855 uses_vvvv, vbi, pfx, delta, "vpaddq", Iop_Add64x2 );
26856 goto decode_success;
26858 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
26859 /* VPADDQ = VEX.NDS.256.66.0F.WIG D4 /r */
26860 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26861 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26862 uses_vvvv, vbi, pfx, delta, "vpaddq", Iop_Add64x4 );
26863 goto decode_success;
26865 break;
26867 case 0xD5:
26868 /* VPMULLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D5 /r */
26869 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26870 delta = dis_AVX128_E_V_to_G(
26871 uses_vvvv, vbi, pfx, delta, "vpmullw", Iop_Mul16x8 );
26872 goto decode_success;
26874 /* VPMULLW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D5 /r */
26875 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26876 delta = dis_AVX256_E_V_to_G(
26877 uses_vvvv, vbi, pfx, delta, "vpmullw", Iop_Mul16x16 );
26878 goto decode_success;
26880 break;
26882 case 0xD6:
26883 /* I can't even find any Intel docs for this one. */
26884 /* Basically: 66 0F D6 = MOVQ -- move 64 bits from G (lo half
26885 xmm) to E (mem or lo half xmm). Looks like L==0(128), W==0
26886 (WIG, maybe?) */
26887 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
26888 && 0==getRexW(pfx)/*this might be redundant, dunno*/) {
26889 UChar modrm = getUChar(delta);
26890 UInt rG = gregOfRexRM(pfx,modrm);
26891 if (epartIsReg(modrm)) {
26892 /* fall through, awaiting test case */
26893 /* dst: lo half copied, hi half zeroed */
26894 } else {
26895 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26896 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0 ));
26897 DIP("vmovq %s,%s\n", nameXMMReg(rG), dis_buf );
26898 delta += alen;
26899 goto decode_success;
26902 break;
26904 case 0xD7:
26905 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB xmm1, r32 */
26906 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26907 delta = dis_PMOVMSKB_128( vbi, pfx, delta, True/*isAvx*/ );
26908 goto decode_success;
26910 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB ymm1, r32 */
26911 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26912 delta = dis_PMOVMSKB_256( vbi, pfx, delta );
26913 goto decode_success;
26915 break;
26917 case 0xD8:
26918 /* VPSUBUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D8 /r */
26919 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26920 delta = dis_AVX128_E_V_to_G(
26921 uses_vvvv, vbi, pfx, delta, "vpsubusb", Iop_QSub8Ux16 );
26922 goto decode_success;
26924 /* VPSUBUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D8 /r */
26925 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26926 delta = dis_AVX256_E_V_to_G(
26927 uses_vvvv, vbi, pfx, delta, "vpsubusb", Iop_QSub8Ux32 );
26928 goto decode_success;
26930 break;
26932 case 0xD9:
26933 /* VPSUBUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D9 /r */
26934 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26935 delta = dis_AVX128_E_V_to_G(
26936 uses_vvvv, vbi, pfx, delta, "vpsubusw", Iop_QSub16Ux8 );
26937 goto decode_success;
26939 /* VPSUBUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D9 /r */
26940 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26941 delta = dis_AVX256_E_V_to_G(
26942 uses_vvvv, vbi, pfx, delta, "vpsubusw", Iop_QSub16Ux16 );
26943 goto decode_success;
26945 break;
26947 case 0xDA:
26948 /* VPMINUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DA /r */
26949 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26950 delta = dis_AVX128_E_V_to_G(
26951 uses_vvvv, vbi, pfx, delta, "vpminub", Iop_Min8Ux16 );
26952 goto decode_success;
26954 /* VPMINUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DA /r */
26955 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26956 delta = dis_AVX256_E_V_to_G(
26957 uses_vvvv, vbi, pfx, delta, "vpminub", Iop_Min8Ux32 );
26958 goto decode_success;
26960 break;
26962 case 0xDB:
26963 /* VPAND r/m, rV, r ::: r = rV & r/m */
26964 /* VEX.NDS.128.66.0F.WIG DB /r = VPAND xmm3/m128, xmm2, xmm1 */
26965 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26966 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26967 uses_vvvv, vbi, pfx, delta, "vpand", Iop_AndV128 );
26968 goto decode_success;
26970 /* VPAND r/m, rV, r ::: r = rV & r/m */
26971 /* VEX.NDS.256.66.0F.WIG DB /r = VPAND ymm3/m256, ymm2, ymm1 */
26972 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26973 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26974 uses_vvvv, vbi, pfx, delta, "vpand", Iop_AndV256 );
26975 goto decode_success;
26977 break;
26979 case 0xDC:
26980 /* VPADDUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DC /r */
26981 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26982 delta = dis_AVX128_E_V_to_G(
26983 uses_vvvv, vbi, pfx, delta, "vpaddusb", Iop_QAdd8Ux16 );
26984 goto decode_success;
26986 /* VPADDUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DC /r */
26987 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26988 delta = dis_AVX256_E_V_to_G(
26989 uses_vvvv, vbi, pfx, delta, "vpaddusb", Iop_QAdd8Ux32 );
26990 goto decode_success;
26992 break;
26994 case 0xDD:
26995 /* VPADDUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DD /r */
26996 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26997 delta = dis_AVX128_E_V_to_G(
26998 uses_vvvv, vbi, pfx, delta, "vpaddusw", Iop_QAdd16Ux8 );
26999 goto decode_success;
27001 /* VPADDUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DD /r */
27002 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27003 delta = dis_AVX256_E_V_to_G(
27004 uses_vvvv, vbi, pfx, delta, "vpaddusw", Iop_QAdd16Ux16 );
27005 goto decode_success;
27007 break;
27009 case 0xDE:
27010 /* VPMAXUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DE /r */
27011 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27012 delta = dis_AVX128_E_V_to_G(
27013 uses_vvvv, vbi, pfx, delta, "vpmaxub", Iop_Max8Ux16 );
27014 goto decode_success;
27016 /* VPMAXUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DE /r */
27017 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27018 delta = dis_AVX256_E_V_to_G(
27019 uses_vvvv, vbi, pfx, delta, "vpmaxub", Iop_Max8Ux32 );
27020 goto decode_success;
27022 break;
27024 case 0xDF:
27025 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
27026 /* VEX.NDS.128.66.0F.WIG DF /r = VPANDN xmm3/m128, xmm2, xmm1 */
27027 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27028 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
27029 uses_vvvv, vbi, pfx, delta, "vpandn", Iop_AndV128,
27030 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
27031 goto decode_success;
27033 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
27034 /* VEX.NDS.256.66.0F.WIG DF /r = VPANDN ymm3/m256, ymm2, ymm1 */
27035 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27036 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
27037 uses_vvvv, vbi, pfx, delta, "vpandn", Iop_AndV256,
27038 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
27039 goto decode_success;
27041 break;
27043 case 0xE0:
27044 /* VPAVGB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E0 /r */
27045 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27046 delta = dis_AVX128_E_V_to_G(
27047 uses_vvvv, vbi, pfx, delta, "vpavgb", Iop_Avg8Ux16 );
27048 goto decode_success;
27050 /* VPAVGB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E0 /r */
27051 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27052 delta = dis_AVX256_E_V_to_G(
27053 uses_vvvv, vbi, pfx, delta, "vpavgb", Iop_Avg8Ux32 );
27054 goto decode_success;
27056 break;
27058 case 0xE1:
27059 /* VPSRAW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E1 /r */
27060 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27061 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27062 "vpsraw", Iop_SarN16x8 );
27063 *uses_vvvv = True;
27064 goto decode_success;
27066 /* VPSRAW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E1 /r */
27067 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27068 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27069 "vpsraw", Iop_SarN16x16 );
27070 *uses_vvvv = True;
27071 goto decode_success;
27073 break;
27075 case 0xE2:
27076 /* VPSRAD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E2 /r */
27077 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27078 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27079 "vpsrad", Iop_SarN32x4 );
27080 *uses_vvvv = True;
27081 goto decode_success;
27083 /* VPSRAD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E2 /r */
27084 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27085 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27086 "vpsrad", Iop_SarN32x8 );
27087 *uses_vvvv = True;
27088 goto decode_success;
27090 break;
27092 case 0xE3:
27093 /* VPAVGW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E3 /r */
27094 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27095 delta = dis_AVX128_E_V_to_G(
27096 uses_vvvv, vbi, pfx, delta, "vpavgw", Iop_Avg16Ux8 );
27097 goto decode_success;
27099 /* VPAVGW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E3 /r */
27100 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27101 delta = dis_AVX256_E_V_to_G(
27102 uses_vvvv, vbi, pfx, delta, "vpavgw", Iop_Avg16Ux16 );
27103 goto decode_success;
27105 break;
27107 case 0xE4:
27108 /* VPMULHUW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E4 /r */
27109 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27110 delta = dis_AVX128_E_V_to_G(
27111 uses_vvvv, vbi, pfx, delta, "vpmulhuw", Iop_MulHi16Ux8 );
27112 goto decode_success;
27114 /* VPMULHUW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E4 /r */
27115 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27116 delta = dis_AVX256_E_V_to_G(
27117 uses_vvvv, vbi, pfx, delta, "vpmulhuw", Iop_MulHi16Ux16 );
27118 goto decode_success;
27120 break;
27122 case 0xE5:
27123 /* VPMULHW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E5 /r */
27124 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27125 delta = dis_AVX128_E_V_to_G(
27126 uses_vvvv, vbi, pfx, delta, "vpmulhw", Iop_MulHi16Sx8 );
27127 goto decode_success;
27129 /* VPMULHW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E5 /r */
27130 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27131 delta = dis_AVX256_E_V_to_G(
27132 uses_vvvv, vbi, pfx, delta, "vpmulhw", Iop_MulHi16Sx16 );
27133 goto decode_success;
27135 break;
27137 case 0xE6:
27138 /* VCVTDQ2PD xmm2/m64, xmm1 = VEX.128.F3.0F.WIG E6 /r */
27139 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
27140 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, True/*isAvx*/);
27141 goto decode_success;
27143 /* VCVTDQ2PD xmm2/m128, ymm1 = VEX.256.F3.0F.WIG E6 /r */
27144 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
27145 delta = dis_CVTDQ2PD_256(vbi, pfx, delta);
27146 goto decode_success;
27148 /* VCVTTPD2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG E6 /r */
27149 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27150 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/,
27151 True/*r2zero*/);
27152 goto decode_success;
27154 /* VCVTTPD2DQ ymm2/m256, xmm1 = VEX.256.66.0F.WIG E6 /r */
27155 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27156 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, True/*r2zero*/);
27157 goto decode_success;
27159 /* VCVTPD2DQ xmm2/m128, xmm1 = VEX.128.F2.0F.WIG E6 /r */
27160 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27161 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/,
27162 False/*!r2zero*/);
27163 goto decode_success;
27165 /* VCVTPD2DQ ymm2/m256, xmm1 = VEX.256.F2.0F.WIG E6 /r */
27166 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27167 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, False/*!r2zero*/);
27168 goto decode_success;
27170 break;
27172 case 0xE7:
27173 /* VMOVNTDQ xmm1, m128 = VEX.128.66.0F.WIG E7 /r */
27174 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27175 UChar modrm = getUChar(delta);
27176 UInt rG = gregOfRexRM(pfx,modrm);
27177 if (!epartIsReg(modrm)) {
27178 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27179 gen_SEGV_if_not_16_aligned( addr );
27180 storeLE( mkexpr(addr), getXMMReg(rG) );
27181 DIP("vmovntdq %s,%s\n", dis_buf, nameXMMReg(rG));
27182 delta += alen;
27183 goto decode_success;
27185 /* else fall through */
27187 /* VMOVNTDQ ymm1, m256 = VEX.256.66.0F.WIG E7 /r */
27188 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27189 UChar modrm = getUChar(delta);
27190 UInt rG = gregOfRexRM(pfx,modrm);
27191 if (!epartIsReg(modrm)) {
27192 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27193 gen_SEGV_if_not_32_aligned( addr );
27194 storeLE( mkexpr(addr), getYMMReg(rG) );
27195 DIP("vmovntdq %s,%s\n", dis_buf, nameYMMReg(rG));
27196 delta += alen;
27197 goto decode_success;
27199 /* else fall through */
27201 break;
27203 case 0xE8:
27204 /* VPSUBSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E8 /r */
27205 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27206 delta = dis_AVX128_E_V_to_G(
27207 uses_vvvv, vbi, pfx, delta, "vpsubsb", Iop_QSub8Sx16 );
27208 goto decode_success;
27210 /* VPSUBSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E8 /r */
27211 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27212 delta = dis_AVX256_E_V_to_G(
27213 uses_vvvv, vbi, pfx, delta, "vpsubsb", Iop_QSub8Sx32 );
27214 goto decode_success;
27216 break;
27218 case 0xE9:
27219 /* VPSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E9 /r */
27220 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27221 delta = dis_AVX128_E_V_to_G(
27222 uses_vvvv, vbi, pfx, delta, "vpsubsw", Iop_QSub16Sx8 );
27223 goto decode_success;
27225 /* VPSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E9 /r */
27226 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27227 delta = dis_AVX256_E_V_to_G(
27228 uses_vvvv, vbi, pfx, delta, "vpsubsw", Iop_QSub16Sx16 );
27229 goto decode_success;
27231 break;
27233 case 0xEA:
27234 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
27235 /* VPMINSW = VEX.NDS.128.66.0F.WIG EA /r */
27236 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27237 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27238 uses_vvvv, vbi, pfx, delta, "vpminsw", Iop_Min16Sx8 );
27239 goto decode_success;
27241 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
27242 /* VPMINSW = VEX.NDS.256.66.0F.WIG EA /r */
27243 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27244 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27245 uses_vvvv, vbi, pfx, delta, "vpminsw", Iop_Min16Sx16 );
27246 goto decode_success;
27248 break;
27250 case 0xEB:
27251 /* VPOR r/m, rV, r ::: r = rV | r/m */
27252 /* VPOR = VEX.NDS.128.66.0F.WIG EB /r */
27253 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27254 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27255 uses_vvvv, vbi, pfx, delta, "vpor", Iop_OrV128 );
27256 goto decode_success;
27258 /* VPOR r/m, rV, r ::: r = rV | r/m */
27259 /* VPOR = VEX.NDS.256.66.0F.WIG EB /r */
27260 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27261 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27262 uses_vvvv, vbi, pfx, delta, "vpor", Iop_OrV256 );
27263 goto decode_success;
27265 break;
27267 case 0xEC:
27268 /* VPADDSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG EC /r */
27269 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27270 delta = dis_AVX128_E_V_to_G(
27271 uses_vvvv, vbi, pfx, delta, "vpaddsb", Iop_QAdd8Sx16 );
27272 goto decode_success;
27274 /* VPADDSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG EC /r */
27275 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27276 delta = dis_AVX256_E_V_to_G(
27277 uses_vvvv, vbi, pfx, delta, "vpaddsb", Iop_QAdd8Sx32 );
27278 goto decode_success;
27280 break;
27282 case 0xED:
27283 /* VPADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG ED /r */
27284 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27285 delta = dis_AVX128_E_V_to_G(
27286 uses_vvvv, vbi, pfx, delta, "vpaddsw", Iop_QAdd16Sx8 );
27287 goto decode_success;
27289 /* VPADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG ED /r */
27290 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27291 delta = dis_AVX256_E_V_to_G(
27292 uses_vvvv, vbi, pfx, delta, "vpaddsw", Iop_QAdd16Sx16 );
27293 goto decode_success;
27295 break;
27297 case 0xEE:
27298 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
27299 /* VPMAXSW = VEX.NDS.128.66.0F.WIG EE /r */
27300 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27301 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27302 uses_vvvv, vbi, pfx, delta, "vpmaxsw", Iop_Max16Sx8 );
27303 goto decode_success;
27305 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
27306 /* VPMAXSW = VEX.NDS.256.66.0F.WIG EE /r */
27307 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27308 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27309 uses_vvvv, vbi, pfx, delta, "vpmaxsw", Iop_Max16Sx16 );
27310 goto decode_success;
27312 break;
27314 case 0xEF:
27315 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
27316 /* VPXOR = VEX.NDS.128.66.0F.WIG EF /r */
27317 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27318 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27319 uses_vvvv, vbi, pfx, delta, "vpxor", Iop_XorV128 );
27320 goto decode_success;
27322 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
27323 /* VPXOR = VEX.NDS.256.66.0F.WIG EF /r */
27324 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27325 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27326 uses_vvvv, vbi, pfx, delta, "vpxor", Iop_XorV256 );
27327 goto decode_success;
27329 break;
27331 case 0xF0:
27332 /* VLDDQU m256, ymm1 = VEX.256.F2.0F.WIG F0 /r */
27333 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27334 UChar modrm = getUChar(delta);
27335 UInt rD = gregOfRexRM(pfx, modrm);
27336 IRTemp tD = newTemp(Ity_V256);
27337 if (epartIsReg(modrm)) break;
27338 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27339 delta += alen;
27340 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
27341 DIP("vlddqu %s,%s\n", dis_buf, nameYMMReg(rD));
27342 putYMMReg(rD, mkexpr(tD));
27343 goto decode_success;
27345 /* VLDDQU m128, xmm1 = VEX.128.F2.0F.WIG F0 /r */
27346 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27347 UChar modrm = getUChar(delta);
27348 UInt rD = gregOfRexRM(pfx, modrm);
27349 IRTemp tD = newTemp(Ity_V128);
27350 if (epartIsReg(modrm)) break;
27351 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27352 delta += alen;
27353 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
27354 DIP("vlddqu %s,%s\n", dis_buf, nameXMMReg(rD));
27355 putYMMRegLoAndZU(rD, mkexpr(tD));
27356 goto decode_success;
27358 break;
27360 case 0xF1:
27361 /* VPSLLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F1 /r */
27362 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27363 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27364 "vpsllw", Iop_ShlN16x8 );
27365 *uses_vvvv = True;
27366 goto decode_success;
27369 /* VPSLLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F1 /r */
27370 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27371 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27372 "vpsllw", Iop_ShlN16x16 );
27373 *uses_vvvv = True;
27374 goto decode_success;
27377 break;
27379 case 0xF2:
27380 /* VPSLLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F2 /r */
27381 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27382 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27383 "vpslld", Iop_ShlN32x4 );
27384 *uses_vvvv = True;
27385 goto decode_success;
27387 /* VPSLLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F2 /r */
27388 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27389 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27390 "vpslld", Iop_ShlN32x8 );
27391 *uses_vvvv = True;
27392 goto decode_success;
27394 break;
27396 case 0xF3:
27397 /* VPSLLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F3 /r */
27398 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27399 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27400 "vpsllq", Iop_ShlN64x2 );
27401 *uses_vvvv = True;
27402 goto decode_success;
27404 /* VPSLLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F3 /r */
27405 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27406 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27407 "vpsllq", Iop_ShlN64x4 );
27408 *uses_vvvv = True;
27409 goto decode_success;
27411 break;
27413 case 0xF4:
27414 /* VPMULUDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F4 /r */
27415 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27416 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27417 uses_vvvv, vbi, pfx, delta,
27418 "vpmuludq", math_PMULUDQ_128 );
27419 goto decode_success;
27421 /* VPMULUDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F4 /r */
27422 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27423 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27424 uses_vvvv, vbi, pfx, delta,
27425 "vpmuludq", math_PMULUDQ_256 );
27426 goto decode_success;
27428 break;
27430 case 0xF5:
27431 /* VPMADDWD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F5 /r */
27432 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27433 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27434 uses_vvvv, vbi, pfx, delta,
27435 "vpmaddwd", math_PMADDWD_128 );
27436 goto decode_success;
27438 /* VPMADDWD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F5 /r */
27439 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27440 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27441 uses_vvvv, vbi, pfx, delta,
27442 "vpmaddwd", math_PMADDWD_256 );
27443 goto decode_success;
27445 break;
27447 case 0xF6:
27448 /* VPSADBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F6 /r */
27449 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27450 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27451 uses_vvvv, vbi, pfx, delta,
27452 "vpsadbw", math_PSADBW_128 );
27453 goto decode_success;
27455 /* VPSADBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F6 /r */
27456 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27457 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27458 uses_vvvv, vbi, pfx, delta,
27459 "vpsadbw", math_PSADBW_256 );
27460 goto decode_success;
27462 break;
27464 case 0xF7:
27465 /* VMASKMOVDQU xmm2, xmm1 = VEX.128.66.0F.WIG F7 /r */
27466 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
27467 && epartIsReg(getUChar(delta))) {
27468 delta = dis_MASKMOVDQU( vbi, pfx, delta, True/*isAvx*/ );
27469 goto decode_success;
27471 break;
27473 case 0xF8:
27474 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
27475 /* VPSUBB = VEX.NDS.128.66.0F.WIG F8 /r */
27476 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27477 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27478 uses_vvvv, vbi, pfx, delta, "vpsubb", Iop_Sub8x16 );
27479 goto decode_success;
27481 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
27482 /* VPSUBB = VEX.NDS.256.66.0F.WIG F8 /r */
27483 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27484 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27485 uses_vvvv, vbi, pfx, delta, "vpsubb", Iop_Sub8x32 );
27486 goto decode_success;
27488 break;
27490 case 0xF9:
27491 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
27492 /* VPSUBW = VEX.NDS.128.66.0F.WIG F9 /r */
27493 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27494 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27495 uses_vvvv, vbi, pfx, delta, "vpsubw", Iop_Sub16x8 );
27496 goto decode_success;
27498 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
27499 /* VPSUBW = VEX.NDS.256.66.0F.WIG F9 /r */
27500 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27501 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27502 uses_vvvv, vbi, pfx, delta, "vpsubw", Iop_Sub16x16 );
27503 goto decode_success;
27505 break;
27507 case 0xFA:
27508 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
27509 /* VPSUBD = VEX.NDS.128.66.0F.WIG FA /r */
27510 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27511 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27512 uses_vvvv, vbi, pfx, delta, "vpsubd", Iop_Sub32x4 );
27513 goto decode_success;
27515 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
27516 /* VPSUBD = VEX.NDS.256.66.0F.WIG FA /r */
27517 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27518 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27519 uses_vvvv, vbi, pfx, delta, "vpsubd", Iop_Sub32x8 );
27520 goto decode_success;
27522 break;
27524 case 0xFB:
27525 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
27526 /* VPSUBQ = VEX.NDS.128.66.0F.WIG FB /r */
27527 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27528 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27529 uses_vvvv, vbi, pfx, delta, "vpsubq", Iop_Sub64x2 );
27530 goto decode_success;
27532 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
27533 /* VPSUBQ = VEX.NDS.256.66.0F.WIG FB /r */
27534 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27535 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27536 uses_vvvv, vbi, pfx, delta, "vpsubq", Iop_Sub64x4 );
27537 goto decode_success;
27539 break;
27541 case 0xFC:
27542 /* VPADDB r/m, rV, r ::: r = rV + r/m */
27543 /* VPADDB = VEX.NDS.128.66.0F.WIG FC /r */
27544 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27545 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27546 uses_vvvv, vbi, pfx, delta, "vpaddb", Iop_Add8x16 );
27547 goto decode_success;
27549 /* VPADDB r/m, rV, r ::: r = rV + r/m */
27550 /* VPADDB = VEX.NDS.256.66.0F.WIG FC /r */
27551 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27552 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27553 uses_vvvv, vbi, pfx, delta, "vpaddb", Iop_Add8x32 );
27554 goto decode_success;
27556 break;
27558 case 0xFD:
27559 /* VPADDW r/m, rV, r ::: r = rV + r/m */
27560 /* VPADDW = VEX.NDS.128.66.0F.WIG FD /r */
27561 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27562 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27563 uses_vvvv, vbi, pfx, delta, "vpaddw", Iop_Add16x8 );
27564 goto decode_success;
27566 /* VPADDW r/m, rV, r ::: r = rV + r/m */
27567 /* VPADDW = VEX.NDS.256.66.0F.WIG FD /r */
27568 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27569 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27570 uses_vvvv, vbi, pfx, delta, "vpaddw", Iop_Add16x16 );
27571 goto decode_success;
27573 break;
27575 case 0xFE:
27576 /* VPADDD r/m, rV, r ::: r = rV + r/m */
27577 /* VPADDD = VEX.NDS.128.66.0F.WIG FE /r */
27578 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27579 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27580 uses_vvvv, vbi, pfx, delta, "vpaddd", Iop_Add32x4 );
27581 goto decode_success;
27583 /* VPADDD r/m, rV, r ::: r = rV + r/m */
27584 /* VPADDD = VEX.NDS.256.66.0F.WIG FE /r */
27585 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27586 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27587 uses_vvvv, vbi, pfx, delta, "vpaddd", Iop_Add32x8 );
27588 goto decode_success;
27590 break;
27592 default:
27593 break;
27597 //decode_failure:
27598 return deltaIN;
27600 decode_success:
27601 return delta;
27605 /*------------------------------------------------------------*/
27606 /*--- ---*/
27607 /*--- Top-level post-escape decoders: dis_ESC_0F38__VEX ---*/
27608 /*--- ---*/
27609 /*------------------------------------------------------------*/
27611 static IRTemp math_PERMILPS_VAR_128 ( IRTemp dataV, IRTemp ctrlV )
27613 /* In the control vector, zero out all but the bottom two bits of
27614 each 32-bit lane. */
27615 IRExpr* cv1 = binop(Iop_ShrN32x4,
27616 binop(Iop_ShlN32x4, mkexpr(ctrlV), mkU8(30)),
27617 mkU8(30));
27618 /* And use the resulting cleaned-up control vector as steering
27619 in a Perm operation. */
27620 IRTemp res = newTemp(Ity_V128);
27621 assign(res, binop(Iop_Perm32x4, mkexpr(dataV), cv1));
27622 return res;
27625 static IRTemp math_PERMILPS_VAR_256 ( IRTemp dataV, IRTemp ctrlV )
27627 IRTemp dHi, dLo, cHi, cLo;
27628 dHi = dLo = cHi = cLo = IRTemp_INVALID;
27629 breakupV256toV128s( dataV, &dHi, &dLo );
27630 breakupV256toV128s( ctrlV, &cHi, &cLo );
27631 IRTemp rHi = math_PERMILPS_VAR_128( dHi, cHi );
27632 IRTemp rLo = math_PERMILPS_VAR_128( dLo, cLo );
27633 IRTemp res = newTemp(Ity_V256);
27634 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo)));
27635 return res;
27638 static IRTemp math_PERMILPD_VAR_128 ( IRTemp dataV, IRTemp ctrlV )
27640 /* No cleverness here .. */
27641 IRTemp dHi, dLo, cHi, cLo;
27642 dHi = dLo = cHi = cLo = IRTemp_INVALID;
27643 breakupV128to64s( dataV, &dHi, &dLo );
27644 breakupV128to64s( ctrlV, &cHi, &cLo );
27645 IRExpr* rHi
27646 = IRExpr_ITE( unop(Iop_64to1,
27647 binop(Iop_Shr64, mkexpr(cHi), mkU8(1))),
27648 mkexpr(dHi), mkexpr(dLo) );
27649 IRExpr* rLo
27650 = IRExpr_ITE( unop(Iop_64to1,
27651 binop(Iop_Shr64, mkexpr(cLo), mkU8(1))),
27652 mkexpr(dHi), mkexpr(dLo) );
27653 IRTemp res = newTemp(Ity_V128);
27654 assign(res, binop(Iop_64HLtoV128, rHi, rLo));
27655 return res;
27658 static IRTemp math_PERMILPD_VAR_256 ( IRTemp dataV, IRTemp ctrlV )
27660 IRTemp dHi, dLo, cHi, cLo;
27661 dHi = dLo = cHi = cLo = IRTemp_INVALID;
27662 breakupV256toV128s( dataV, &dHi, &dLo );
27663 breakupV256toV128s( ctrlV, &cHi, &cLo );
27664 IRTemp rHi = math_PERMILPD_VAR_128( dHi, cHi );
27665 IRTemp rLo = math_PERMILPD_VAR_128( dLo, cLo );
27666 IRTemp res = newTemp(Ity_V256);
27667 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo)));
27668 return res;
27671 static IRTemp math_VPERMD ( IRTemp ctrlV, IRTemp dataV )
27673 /* In the control vector, zero out all but the bottom three bits of
27674 each 32-bit lane. */
27675 IRExpr* cv1 = binop(Iop_ShrN32x8,
27676 binop(Iop_ShlN32x8, mkexpr(ctrlV), mkU8(29)),
27677 mkU8(29));
27678 /* And use the resulting cleaned-up control vector as steering
27679 in a Perm operation. */
27680 IRTemp res = newTemp(Ity_V256);
27681 assign(res, binop(Iop_Perm32x8, mkexpr(dataV), cv1));
27682 return res;
27685 static Long dis_SHIFTX ( /*OUT*/Bool* uses_vvvv,
27686 const VexAbiInfo* vbi, Prefix pfx, Long delta,
27687 const HChar* opname, IROp op8 )
27689 HChar dis_buf[50];
27690 Int alen;
27691 Int size = getRexW(pfx) ? 8 : 4;
27692 IRType ty = szToITy(size);
27693 IRTemp src = newTemp(ty);
27694 IRTemp amt = newTemp(ty);
27695 UChar rm = getUChar(delta);
27697 assign( amt, getIRegV(size,pfx) );
27698 if (epartIsReg(rm)) {
27699 assign( src, getIRegE(size,pfx,rm) );
27700 DIP("%s %s,%s,%s\n", opname, nameIRegV(size,pfx),
27701 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm));
27702 delta++;
27703 } else {
27704 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27705 assign( src, loadLE(ty, mkexpr(addr)) );
27706 DIP("%s %s,%s,%s\n", opname, nameIRegV(size,pfx), dis_buf,
27707 nameIRegG(size,pfx,rm));
27708 delta += alen;
27711 putIRegG( size, pfx, rm,
27712 binop(mkSizedOp(ty,op8), mkexpr(src),
27713 narrowTo(Ity_I8, binop(mkSizedOp(ty,Iop_And8), mkexpr(amt),
27714 mkU(ty,8*size-1)))) );
27715 /* Flags aren't modified. */
27716 *uses_vvvv = True;
27717 return delta;
27721 static Long dis_FMA ( const VexAbiInfo* vbi, Prefix pfx, Long delta, UChar opc )
27723 UChar modrm = getUChar(delta);
27724 UInt rG = gregOfRexRM(pfx, modrm);
27725 UInt rV = getVexNvvvv(pfx);
27726 Bool scalar = (opc & 0xF) > 7 && (opc & 1);
27727 IRType ty = getRexW(pfx) ? Ity_F64 : Ity_F32;
27728 IRType vty = scalar ? ty : (getVexL(pfx) ? Ity_V256 : Ity_V128);
27729 IRTemp addr = IRTemp_INVALID;
27730 HChar dis_buf[50];
27731 Int alen = 0;
27732 const HChar *name;
27733 const HChar *suffix;
27734 const HChar *order;
27735 Bool negateRes = False;
27736 Bool negateZeven = False;
27737 Bool negateZodd = False;
27738 UInt count = 0;
27740 switch (opc & 0xF) {
27741 case 0x6: name = "addsub"; negateZeven = True; break;
27742 case 0x7: name = "subadd"; negateZodd = True; break;
27743 case 0x8:
27744 case 0x9: name = "add"; break;
27745 case 0xA:
27746 case 0xB: name = "sub"; negateZeven = True; negateZodd = True;
27747 break;
27748 case 0xC:
27749 case 0xD: name = "add"; negateRes = True; negateZeven = True;
27750 negateZodd = True; break;
27751 case 0xE:
27752 case 0xF: name = "sub"; negateRes = True; break;
27753 default: vpanic("dis_FMA(amd64)"); break;
27755 switch (opc & 0xF0) {
27756 case 0x90: order = "132"; break;
27757 case 0xA0: order = "213"; break;
27758 case 0xB0: order = "231"; break;
27759 default: vpanic("dis_FMA(amd64)"); break;
27761 if (scalar) {
27762 suffix = ty == Ity_F64 ? "sd" : "ss";
27763 } else {
27764 suffix = ty == Ity_F64 ? "pd" : "ps";
27767 // Figure out |count| (the number of elements) by considering |vty| and |ty|.
27768 count = sizeofIRType(vty) / sizeofIRType(ty);
27769 vassert(count == 1 || count == 2 || count == 4 || count == 8);
27771 // Fetch operands into the first |count| elements of |sX|, |sY| and |sZ|.
27772 UInt i;
27773 IRExpr *sX[8], *sY[8], *sZ[8], *res[8];
27774 for (i = 0; i < 8; i++) sX[i] = sY[i] = sZ[i] = res[i] = NULL;
27776 IRExpr* (*getYMMRegLane)(UInt,Int)
27777 = ty == Ity_F32 ? getYMMRegLane32F : getYMMRegLane64F;
27778 void (*putYMMRegLane)(UInt,Int,IRExpr*)
27779 = ty == Ity_F32 ? putYMMRegLane32F : putYMMRegLane64F;
27781 for (i = 0; i < count; i++) {
27782 sX[i] = getYMMRegLane(rG, i);
27783 sZ[i] = getYMMRegLane(rV, i);
27786 if (epartIsReg(modrm)) {
27787 UInt rE = eregOfRexRM(pfx, modrm);
27788 delta += 1;
27789 for (i = 0; i < count; i++) {
27790 sY[i] = getYMMRegLane(rE, i);
27792 if (vty == Ity_V256) {
27793 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27794 name, order, suffix, nameYMMReg(rE), nameYMMReg(rV),
27795 nameYMMReg(rG));
27796 } else {
27797 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27798 name, order, suffix, nameXMMReg(rE), nameXMMReg(rV),
27799 nameXMMReg(rG));
27801 } else {
27802 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27803 delta += alen;
27804 for (i = 0; i < count; i++) {
27805 sY[i] = loadLE(ty, binop(Iop_Add64, mkexpr(addr),
27806 mkU64(i * sizeofIRType(ty))));
27808 if (vty == Ity_V256) {
27809 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27810 name, order, suffix, dis_buf, nameYMMReg(rV),
27811 nameYMMReg(rG));
27812 } else {
27813 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27814 name, order, suffix, dis_buf, nameXMMReg(rV),
27815 nameXMMReg(rG));
27819 /* vX/vY/vZ are now in 132 order. If the instruction requires a different
27820 order, swap them around. */
27822 # define COPY_ARR(_dst, _src) \
27823 do { for (int j = 0; j < 8; j++) { _dst[j] = _src[j]; } } while (0)
27825 if ((opc & 0xF0) != 0x90) {
27826 IRExpr* temp[8];
27827 COPY_ARR(temp, sX);
27828 if ((opc & 0xF0) == 0xA0) {
27829 COPY_ARR(sX, sZ);
27830 COPY_ARR(sZ, sY);
27831 COPY_ARR(sY, temp);
27832 } else {
27833 COPY_ARR(sX, sZ);
27834 COPY_ARR(sZ, temp);
27838 # undef COPY_ARR
27840 for (i = 0; i < count; i++) {
27841 IROp opNEG = ty == Ity_F64 ? Iop_NegF64 : Iop_NegF32;
27842 if ((i & 1) ? negateZodd : negateZeven) {
27843 sZ[i] = unop(opNEG, sZ[i]);
27845 res[i] = IRExpr_Qop(ty == Ity_F64 ? Iop_MAddF64 : Iop_MAddF32,
27846 get_FAKE_roundingmode(), sX[i], sY[i], sZ[i]);
27847 if (negateRes) {
27848 res[i] = unop(opNEG, res[i]);
27852 for (i = 0; i < count; i++) {
27853 putYMMRegLane(rG, i, res[i]);
27856 switch (vty) {
27857 case Ity_F32: putYMMRegLane32(rG, 1, mkU32(0)); /*fallthru*/
27858 case Ity_F64: putYMMRegLane64(rG, 1, mkU64(0)); /*fallthru*/
27859 case Ity_V128: putYMMRegLane128(rG, 1, mkV128(0)); /*fallthru*/
27860 case Ity_V256: break;
27861 default: vassert(0);
27864 return delta;
27868 /* Masked load or masked store. */
27869 static ULong dis_VMASKMOV ( Bool *uses_vvvv, const VexAbiInfo* vbi,
27870 Prefix pfx, Long delta,
27871 const HChar* opname, Bool isYMM, IRType ty,
27872 Bool isLoad )
27874 HChar dis_buf[50];
27875 Int alen, i;
27876 IRTemp addr;
27877 UChar modrm = getUChar(delta);
27878 UInt rG = gregOfRexRM(pfx,modrm);
27879 UInt rV = getVexNvvvv(pfx);
27881 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27882 delta += alen;
27884 /**/ if (isLoad && isYMM) {
27885 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
27887 else if (isLoad && !isYMM) {
27888 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
27891 else if (!isLoad && isYMM) {
27892 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rG), nameYMMReg(rV), dis_buf );
27894 else {
27895 vassert(!isLoad && !isYMM);
27896 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rG), nameXMMReg(rV), dis_buf );
27899 vassert(ty == Ity_I32 || ty == Ity_I64);
27900 Bool laneIs32 = ty == Ity_I32;
27902 Int nLanes = (isYMM ? 2 : 1) * (laneIs32 ? 4 : 2);
27904 for (i = 0; i < nLanes; i++) {
27905 IRExpr* shAmt = laneIs32 ? mkU8(31) : mkU8(63);
27906 IRExpr* one = laneIs32 ? mkU32(1) : mkU64(1);
27907 IROp opSHR = laneIs32 ? Iop_Shr32 : Iop_Shr64;
27908 IROp opEQ = laneIs32 ? Iop_CmpEQ32 : Iop_CmpEQ64;
27909 IRExpr* lane = (laneIs32 ? getYMMRegLane32 : getYMMRegLane64)( rV, i );
27911 IRTemp cond = newTemp(Ity_I1);
27912 assign(cond, binop(opEQ, binop(opSHR, lane, shAmt), one));
27914 IRTemp data = newTemp(ty);
27915 IRExpr* ea = binop(Iop_Add64, mkexpr(addr),
27916 mkU64(i * (laneIs32 ? 4 : 8)));
27917 if (isLoad) {
27918 stmt(
27919 IRStmt_LoadG(
27920 Iend_LE, laneIs32 ? ILGop_Ident32 : ILGop_Ident64,
27921 data, ea, laneIs32 ? mkU32(0) : mkU64(0), mkexpr(cond)
27923 (laneIs32 ? putYMMRegLane32 : putYMMRegLane64)( rG, i, mkexpr(data) );
27924 } else {
27925 assign(data, (laneIs32 ? getYMMRegLane32 : getYMMRegLane64)( rG, i ));
27926 stmt( IRStmt_StoreG(Iend_LE, ea, mkexpr(data), mkexpr(cond)) );
27930 if (isLoad && !isYMM)
27931 putYMMRegLane128( rG, 1, mkV128(0) );
27933 *uses_vvvv = True;
27934 return delta;
27938 /* Gather. */
27939 static ULong dis_VGATHER ( Bool *uses_vvvv, const VexAbiInfo* vbi,
27940 Prefix pfx, Long delta,
27941 const HChar* opname, Bool isYMM,
27942 Bool isVM64x, IRType ty )
27944 HChar dis_buf[50];
27945 Int alen, i, vscale, count1, count2;
27946 IRTemp addr;
27947 UChar modrm = getUChar(delta);
27948 UInt rG = gregOfRexRM(pfx,modrm);
27949 UInt rV = getVexNvvvv(pfx);
27950 UInt rI;
27951 IRType dstTy = (isYMM && (ty == Ity_I64 || !isVM64x)) ? Ity_V256 : Ity_V128;
27952 IRType idxTy = (isYMM && (ty == Ity_I32 || isVM64x)) ? Ity_V256 : Ity_V128;
27953 IRTemp cond;
27954 addr = disAVSIBMode ( &alen, vbi, pfx, delta, dis_buf, &rI,
27955 idxTy, &vscale );
27956 if (addr == IRTemp_INVALID || rI == rG || rI == rV || rG == rV)
27957 return delta;
27958 if (dstTy == Ity_V256) {
27959 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rV), dis_buf, nameYMMReg(rG) );
27960 } else {
27961 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rV), dis_buf, nameXMMReg(rG) );
27963 delta += alen;
27965 if (ty == Ity_I32) {
27966 count1 = isYMM ? 8 : 4;
27967 count2 = isVM64x ? count1 / 2 : count1;
27968 } else {
27969 count1 = count2 = isYMM ? 4 : 2;
27972 /* First update the mask register to copies of the sign bit. */
27973 if (ty == Ity_I32) {
27974 if (isYMM)
27975 putYMMReg( rV, binop(Iop_SarN32x8, getYMMReg( rV ), mkU8(31)) );
27976 else
27977 putYMMRegLoAndZU( rV, binop(Iop_SarN32x4, getXMMReg( rV ), mkU8(31)) );
27978 } else {
27979 for (i = 0; i < count1; i++) {
27980 putYMMRegLane64( rV, i, binop(Iop_Sar64, getYMMRegLane64( rV, i ),
27981 mkU8(63)) );
27985 /* Next gather the individual elements. If any fault occurs, the
27986 corresponding mask element will be set and the loop stops. */
27987 for (i = 0; i < count2; i++) {
27988 IRExpr *expr, *addr_expr;
27989 cond = newTemp(Ity_I1);
27990 assign( cond,
27991 binop(ty == Ity_I32 ? Iop_CmpLT32S : Iop_CmpLT64S,
27992 ty == Ity_I32 ? getYMMRegLane32( rV, i )
27993 : getYMMRegLane64( rV, i ),
27994 mkU(ty, 0)) );
27995 expr = ty == Ity_I32 ? getYMMRegLane32( rG, i )
27996 : getYMMRegLane64( rG, i );
27997 addr_expr = isVM64x ? getYMMRegLane64( rI, i )
27998 : unop(Iop_32Sto64, getYMMRegLane32( rI, i ));
27999 switch (vscale) {
28000 case 2: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(1)); break;
28001 case 4: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(2)); break;
28002 case 8: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(3)); break;
28003 default: break;
28005 addr_expr = binop(Iop_Add64, mkexpr(addr), addr_expr);
28006 addr_expr = handleAddrOverrides(vbi, pfx, addr_expr);
28007 addr_expr = IRExpr_ITE(mkexpr(cond), addr_expr, getIReg64(R_RSP));
28008 expr = IRExpr_ITE(mkexpr(cond), loadLE(ty, addr_expr), expr);
28009 if (ty == Ity_I32) {
28010 putYMMRegLane32( rG, i, expr );
28011 putYMMRegLane32( rV, i, mkU32(0) );
28012 } else {
28013 putYMMRegLane64( rG, i, expr);
28014 putYMMRegLane64( rV, i, mkU64(0) );
28018 if (!isYMM || (ty == Ity_I32 && isVM64x)) {
28019 if (ty == Ity_I64 || isYMM)
28020 putYMMRegLane128( rV, 1, mkV128(0) );
28021 else if (ty == Ity_I32 && count2 == 2) {
28022 putYMMRegLane64( rV, 1, mkU64(0) );
28023 putYMMRegLane64( rG, 1, mkU64(0) );
28025 putYMMRegLane128( rG, 1, mkV128(0) );
28028 *uses_vvvv = True;
28029 return delta;
28033 __attribute__((noinline))
28034 static
28035 Long dis_ESC_0F38__VEX (
28036 /*MB_OUT*/DisResult* dres,
28037 /*OUT*/ Bool* uses_vvvv,
28038 const VexArchInfo* archinfo,
28039 const VexAbiInfo* vbi,
28040 Prefix pfx, Int sz, Long deltaIN
28043 IRTemp addr = IRTemp_INVALID;
28044 Int alen = 0;
28045 HChar dis_buf[50];
28046 Long delta = deltaIN;
28047 UChar opc = getUChar(delta);
28048 delta++;
28049 *uses_vvvv = False;
28051 switch (opc) {
28053 case 0x00:
28054 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
28055 /* VPSHUFB = VEX.NDS.128.66.0F38.WIG 00 /r */
28056 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28057 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28058 uses_vvvv, vbi, pfx, delta, "vpshufb", math_PSHUFB_XMM );
28059 goto decode_success;
28061 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
28062 /* VPSHUFB = VEX.NDS.256.66.0F38.WIG 00 /r */
28063 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28064 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28065 uses_vvvv, vbi, pfx, delta, "vpshufb", math_PSHUFB_YMM );
28066 goto decode_success;
28068 break;
28070 case 0x01:
28071 case 0x02:
28072 case 0x03:
28073 /* VPHADDW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 01 /r */
28074 /* VPHADDD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 02 /r */
28075 /* VPHADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 03 /r */
28076 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28077 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc );
28078 *uses_vvvv = True;
28079 goto decode_success;
28081 /* VPHADDW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 01 /r */
28082 /* VPHADDD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 02 /r */
28083 /* VPHADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 03 /r */
28084 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28085 delta = dis_PHADD_256( vbi, pfx, delta, opc );
28086 *uses_vvvv = True;
28087 goto decode_success;
28089 break;
28091 case 0x04:
28092 /* VPMADDUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 04 /r */
28093 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28094 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28095 uses_vvvv, vbi, pfx, delta, "vpmaddubsw",
28096 math_PMADDUBSW_128 );
28097 goto decode_success;
28099 /* VPMADDUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 04 /r */
28100 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28101 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28102 uses_vvvv, vbi, pfx, delta, "vpmaddubsw",
28103 math_PMADDUBSW_256 );
28104 goto decode_success;
28106 break;
28108 case 0x05:
28109 case 0x06:
28110 case 0x07:
28111 /* VPHSUBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 05 /r */
28112 /* VPHSUBD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 06 /r */
28113 /* VPHSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 07 /r */
28114 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28115 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc );
28116 *uses_vvvv = True;
28117 goto decode_success;
28119 /* VPHSUBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 05 /r */
28120 /* VPHSUBD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 06 /r */
28121 /* VPHSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 07 /r */
28122 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28123 delta = dis_PHADD_256( vbi, pfx, delta, opc );
28124 *uses_vvvv = True;
28125 goto decode_success;
28127 break;
28129 case 0x08:
28130 case 0x09:
28131 case 0x0A:
28132 /* VPSIGNB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 08 /r */
28133 /* VPSIGNW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 09 /r */
28134 /* VPSIGND xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0A /r */
28135 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28136 IRTemp sV = newTemp(Ity_V128);
28137 IRTemp dV = newTemp(Ity_V128);
28138 IRTemp sHi, sLo, dHi, dLo;
28139 sHi = sLo = dHi = dLo = IRTemp_INVALID;
28140 HChar ch = '?';
28141 Int laneszB = 0;
28142 UChar modrm = getUChar(delta);
28143 UInt rG = gregOfRexRM(pfx,modrm);
28144 UInt rV = getVexNvvvv(pfx);
28146 switch (opc) {
28147 case 0x08: laneszB = 1; ch = 'b'; break;
28148 case 0x09: laneszB = 2; ch = 'w'; break;
28149 case 0x0A: laneszB = 4; ch = 'd'; break;
28150 default: vassert(0);
28153 assign( dV, getXMMReg(rV) );
28155 if (epartIsReg(modrm)) {
28156 UInt rE = eregOfRexRM(pfx,modrm);
28157 assign( sV, getXMMReg(rE) );
28158 delta += 1;
28159 DIP("vpsign%c %s,%s,%s\n", ch, nameXMMReg(rE),
28160 nameXMMReg(rV), nameXMMReg(rG));
28161 } else {
28162 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
28163 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
28164 delta += alen;
28165 DIP("vpsign%c %s,%s,%s\n", ch, dis_buf,
28166 nameXMMReg(rV), nameXMMReg(rG));
28169 breakupV128to64s( dV, &dHi, &dLo );
28170 breakupV128to64s( sV, &sHi, &sLo );
28172 putYMMRegLoAndZU(
28174 binop(Iop_64HLtoV128,
28175 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
28176 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
28179 *uses_vvvv = True;
28180 goto decode_success;
28182 /* VPSIGNB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 08 /r */
28183 /* VPSIGNW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 09 /r */
28184 /* VPSIGND ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0A /r */
28185 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28186 IRTemp sV = newTemp(Ity_V256);
28187 IRTemp dV = newTemp(Ity_V256);
28188 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
28189 s3 = s2 = s1 = s0 = IRTemp_INVALID;
28190 d3 = d2 = d1 = d0 = IRTemp_INVALID;
28191 UChar ch = '?';
28192 Int laneszB = 0;
28193 UChar modrm = getUChar(delta);
28194 UInt rG = gregOfRexRM(pfx,modrm);
28195 UInt rV = getVexNvvvv(pfx);
28197 switch (opc) {
28198 case 0x08: laneszB = 1; ch = 'b'; break;
28199 case 0x09: laneszB = 2; ch = 'w'; break;
28200 case 0x0A: laneszB = 4; ch = 'd'; break;
28201 default: vassert(0);
28204 assign( dV, getYMMReg(rV) );
28206 if (epartIsReg(modrm)) {
28207 UInt rE = eregOfRexRM(pfx,modrm);
28208 assign( sV, getYMMReg(rE) );
28209 delta += 1;
28210 DIP("vpsign%c %s,%s,%s\n", ch, nameYMMReg(rE),
28211 nameYMMReg(rV), nameYMMReg(rG));
28212 } else {
28213 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
28214 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
28215 delta += alen;
28216 DIP("vpsign%c %s,%s,%s\n", ch, dis_buf,
28217 nameYMMReg(rV), nameYMMReg(rG));
28220 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
28221 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
28223 putYMMReg(
28225 binop( Iop_V128HLtoV256,
28226 binop(Iop_64HLtoV128,
28227 dis_PSIGN_helper( mkexpr(s3), mkexpr(d3), laneszB ),
28228 dis_PSIGN_helper( mkexpr(s2), mkexpr(d2), laneszB )
28230 binop(Iop_64HLtoV128,
28231 dis_PSIGN_helper( mkexpr(s1), mkexpr(d1), laneszB ),
28232 dis_PSIGN_helper( mkexpr(s0), mkexpr(d0), laneszB )
28236 *uses_vvvv = True;
28237 goto decode_success;
28239 break;
28241 case 0x0B:
28242 /* VPMULHRSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0B /r */
28243 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28244 IRTemp sV = newTemp(Ity_V128);
28245 IRTemp dV = newTemp(Ity_V128);
28246 IRTemp sHi, sLo, dHi, dLo;
28247 sHi = sLo = dHi = dLo = IRTemp_INVALID;
28248 UChar modrm = getUChar(delta);
28249 UInt rG = gregOfRexRM(pfx,modrm);
28250 UInt rV = getVexNvvvv(pfx);
28252 assign( dV, getXMMReg(rV) );
28254 if (epartIsReg(modrm)) {
28255 UInt rE = eregOfRexRM(pfx,modrm);
28256 assign( sV, getXMMReg(rE) );
28257 delta += 1;
28258 DIP("vpmulhrsw %s,%s,%s\n", nameXMMReg(rE),
28259 nameXMMReg(rV), nameXMMReg(rG));
28260 } else {
28261 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
28262 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
28263 delta += alen;
28264 DIP("vpmulhrsw %s,%s,%s\n", dis_buf,
28265 nameXMMReg(rV), nameXMMReg(rG));
28268 breakupV128to64s( dV, &dHi, &dLo );
28269 breakupV128to64s( sV, &sHi, &sLo );
28271 putYMMRegLoAndZU(
28273 binop(Iop_64HLtoV128,
28274 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
28275 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
28278 *uses_vvvv = True;
28279 goto decode_success;
28281 /* VPMULHRSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0B /r */
28282 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28283 IRTemp sV = newTemp(Ity_V256);
28284 IRTemp dV = newTemp(Ity_V256);
28285 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
28286 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
28287 UChar modrm = getUChar(delta);
28288 UInt rG = gregOfRexRM(pfx,modrm);
28289 UInt rV = getVexNvvvv(pfx);
28291 assign( dV, getYMMReg(rV) );
28293 if (epartIsReg(modrm)) {
28294 UInt rE = eregOfRexRM(pfx,modrm);
28295 assign( sV, getYMMReg(rE) );
28296 delta += 1;
28297 DIP("vpmulhrsw %s,%s,%s\n", nameYMMReg(rE),
28298 nameYMMReg(rV), nameYMMReg(rG));
28299 } else {
28300 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
28301 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
28302 delta += alen;
28303 DIP("vpmulhrsw %s,%s,%s\n", dis_buf,
28304 nameYMMReg(rV), nameYMMReg(rG));
28307 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
28308 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
28310 putYMMReg(
28312 binop(Iop_V128HLtoV256,
28313 binop(Iop_64HLtoV128,
28314 dis_PMULHRSW_helper( mkexpr(s3), mkexpr(d3) ),
28315 dis_PMULHRSW_helper( mkexpr(s2), mkexpr(d2) ) ),
28316 binop(Iop_64HLtoV128,
28317 dis_PMULHRSW_helper( mkexpr(s1), mkexpr(d1) ),
28318 dis_PMULHRSW_helper( mkexpr(s0), mkexpr(d0) ) )
28321 *uses_vvvv = True;
28322 dres->hint = Dis_HintVerbose;
28323 goto decode_success;
28325 break;
28327 case 0x0C:
28328 /* VPERMILPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0C /r */
28329 if (have66noF2noF3(pfx)
28330 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
28331 UChar modrm = getUChar(delta);
28332 UInt rG = gregOfRexRM(pfx, modrm);
28333 UInt rV = getVexNvvvv(pfx);
28334 IRTemp ctrlV = newTemp(Ity_V128);
28335 if (epartIsReg(modrm)) {
28336 UInt rE = eregOfRexRM(pfx, modrm);
28337 delta += 1;
28338 DIP("vpermilps %s,%s,%s\n",
28339 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
28340 assign(ctrlV, getXMMReg(rE));
28341 } else {
28342 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28343 delta += alen;
28344 DIP("vpermilps %s,%s,%s\n",
28345 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
28346 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr)));
28348 IRTemp dataV = newTemp(Ity_V128);
28349 assign(dataV, getXMMReg(rV));
28350 IRTemp resV = math_PERMILPS_VAR_128(dataV, ctrlV);
28351 putYMMRegLoAndZU(rG, mkexpr(resV));
28352 *uses_vvvv = True;
28353 goto decode_success;
28355 /* VPERMILPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0C /r */
28356 if (have66noF2noF3(pfx)
28357 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
28358 UChar modrm = getUChar(delta);
28359 UInt rG = gregOfRexRM(pfx, modrm);
28360 UInt rV = getVexNvvvv(pfx);
28361 IRTemp ctrlV = newTemp(Ity_V256);
28362 if (epartIsReg(modrm)) {
28363 UInt rE = eregOfRexRM(pfx, modrm);
28364 delta += 1;
28365 DIP("vpermilps %s,%s,%s\n",
28366 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
28367 assign(ctrlV, getYMMReg(rE));
28368 } else {
28369 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28370 delta += alen;
28371 DIP("vpermilps %s,%s,%s\n",
28372 dis_buf, nameYMMReg(rV), nameYMMReg(rG));
28373 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr)));
28375 IRTemp dataV = newTemp(Ity_V256);
28376 assign(dataV, getYMMReg(rV));
28377 IRTemp resV = math_PERMILPS_VAR_256(dataV, ctrlV);
28378 putYMMReg(rG, mkexpr(resV));
28379 *uses_vvvv = True;
28380 goto decode_success;
28382 break;
28384 case 0x0D:
28385 /* VPERMILPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0D /r */
28386 if (have66noF2noF3(pfx)
28387 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
28388 UChar modrm = getUChar(delta);
28389 UInt rG = gregOfRexRM(pfx, modrm);
28390 UInt rV = getVexNvvvv(pfx);
28391 IRTemp ctrlV = newTemp(Ity_V128);
28392 if (epartIsReg(modrm)) {
28393 UInt rE = eregOfRexRM(pfx, modrm);
28394 delta += 1;
28395 DIP("vpermilpd %s,%s,%s\n",
28396 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
28397 assign(ctrlV, getXMMReg(rE));
28398 } else {
28399 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28400 delta += alen;
28401 DIP("vpermilpd %s,%s,%s\n",
28402 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
28403 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr)));
28405 IRTemp dataV = newTemp(Ity_V128);
28406 assign(dataV, getXMMReg(rV));
28407 IRTemp resV = math_PERMILPD_VAR_128(dataV, ctrlV);
28408 putYMMRegLoAndZU(rG, mkexpr(resV));
28409 *uses_vvvv = True;
28410 goto decode_success;
28412 /* VPERMILPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0D /r */
28413 if (have66noF2noF3(pfx)
28414 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
28415 UChar modrm = getUChar(delta);
28416 UInt rG = gregOfRexRM(pfx, modrm);
28417 UInt rV = getVexNvvvv(pfx);
28418 IRTemp ctrlV = newTemp(Ity_V256);
28419 if (epartIsReg(modrm)) {
28420 UInt rE = eregOfRexRM(pfx, modrm);
28421 delta += 1;
28422 DIP("vpermilpd %s,%s,%s\n",
28423 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
28424 assign(ctrlV, getYMMReg(rE));
28425 } else {
28426 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28427 delta += alen;
28428 DIP("vpermilpd %s,%s,%s\n",
28429 dis_buf, nameYMMReg(rV), nameYMMReg(rG));
28430 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr)));
28432 IRTemp dataV = newTemp(Ity_V256);
28433 assign(dataV, getYMMReg(rV));
28434 IRTemp resV = math_PERMILPD_VAR_256(dataV, ctrlV);
28435 putYMMReg(rG, mkexpr(resV));
28436 *uses_vvvv = True;
28437 goto decode_success;
28439 break;
28441 case 0x0E:
28442 /* VTESTPS xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0E /r */
28443 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28444 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 32 );
28445 goto decode_success;
28447 /* VTESTPS ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0E /r */
28448 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28449 delta = dis_xTESTy_256( vbi, pfx, delta, 32 );
28450 goto decode_success;
28452 break;
28454 case 0x0F:
28455 /* VTESTPD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0F /r */
28456 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28457 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 64 );
28458 goto decode_success;
28460 /* VTESTPD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0F /r */
28461 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28462 delta = dis_xTESTy_256( vbi, pfx, delta, 64 );
28463 goto decode_success;
28465 break;
28467 case 0x13:
28468 /* VCVTPH2PS xmm2/m64, xmm1 = VEX.128.66.0F38.W0 13 /r */
28469 if (have66noF2noF3(pfx)
28470 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/
28471 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C)) {
28472 delta = dis_VCVTPH2PS( vbi, pfx, delta, /*is256bit=*/False );
28473 goto decode_success;
28475 /* VCVTPH2PS xmm2/m128, xmm1 = VEX.256.66.0F38.W0 13 /r */
28476 if (have66noF2noF3(pfx)
28477 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/
28478 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C)) {
28479 delta = dis_VCVTPH2PS( vbi, pfx, delta, /*is256bit=*/True );
28480 goto decode_success;
28482 break;
28484 case 0x16:
28485 /* VPERMPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 16 /r */
28486 if (have66noF2noF3(pfx)
28487 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
28488 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28489 uses_vvvv, vbi, pfx, delta, "vpermps", math_VPERMD );
28490 goto decode_success;
28492 break;
28494 case 0x17:
28495 /* VPTEST xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 17 /r */
28496 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28497 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 0 );
28498 goto decode_success;
28500 /* VPTEST ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 17 /r */
28501 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28502 delta = dis_xTESTy_256( vbi, pfx, delta, 0 );
28503 goto decode_success;
28505 break;
28507 case 0x18:
28508 /* VBROADCASTSS m32, xmm1 = VEX.128.66.0F38.WIG 18 /r */
28509 if (have66noF2noF3(pfx)
28510 && 0==getVexL(pfx)/*128*/
28511 && !epartIsReg(getUChar(delta))) {
28512 UChar modrm = getUChar(delta);
28513 UInt rG = gregOfRexRM(pfx, modrm);
28514 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28515 delta += alen;
28516 DIP("vbroadcastss %s,%s\n", dis_buf, nameXMMReg(rG));
28517 IRTemp t32 = newTemp(Ity_I32);
28518 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
28519 IRTemp t64 = newTemp(Ity_I64);
28520 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28521 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
28522 putYMMRegLoAndZU(rG, res);
28523 goto decode_success;
28525 /* VBROADCASTSS m32, ymm1 = VEX.256.66.0F38.WIG 18 /r */
28526 if (have66noF2noF3(pfx)
28527 && 1==getVexL(pfx)/*256*/
28528 && !epartIsReg(getUChar(delta))) {
28529 UChar modrm = getUChar(delta);
28530 UInt rG = gregOfRexRM(pfx, modrm);
28531 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28532 delta += alen;
28533 DIP("vbroadcastss %s,%s\n", dis_buf, nameYMMReg(rG));
28534 IRTemp t32 = newTemp(Ity_I32);
28535 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
28536 IRTemp t64 = newTemp(Ity_I64);
28537 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28538 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28539 mkexpr(t64), mkexpr(t64));
28540 putYMMReg(rG, res);
28541 goto decode_success;
28543 /* VBROADCASTSS xmm2, xmm1 = VEX.128.66.0F38.WIG 18 /r */
28544 if (have66noF2noF3(pfx)
28545 && 0==getVexL(pfx)/*128*/
28546 && epartIsReg(getUChar(delta))) {
28547 UChar modrm = getUChar(delta);
28548 UInt rG = gregOfRexRM(pfx, modrm);
28549 UInt rE = eregOfRexRM(pfx, modrm);
28550 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
28551 IRTemp t32 = newTemp(Ity_I32);
28552 assign(t32, getXMMRegLane32(rE, 0));
28553 IRTemp t64 = newTemp(Ity_I64);
28554 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28555 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
28556 putYMMRegLoAndZU(rG, res);
28557 delta++;
28558 goto decode_success;
28560 /* VBROADCASTSS xmm2, ymm1 = VEX.256.66.0F38.WIG 18 /r */
28561 if (have66noF2noF3(pfx)
28562 && 1==getVexL(pfx)/*256*/
28563 && epartIsReg(getUChar(delta))) {
28564 UChar modrm = getUChar(delta);
28565 UInt rG = gregOfRexRM(pfx, modrm);
28566 UInt rE = eregOfRexRM(pfx, modrm);
28567 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
28568 IRTemp t32 = newTemp(Ity_I32);
28569 assign(t32, getXMMRegLane32(rE, 0));
28570 IRTemp t64 = newTemp(Ity_I64);
28571 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28572 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28573 mkexpr(t64), mkexpr(t64));
28574 putYMMReg(rG, res);
28575 delta++;
28576 goto decode_success;
28578 break;
28580 case 0x19:
28581 /* VBROADCASTSD m64, ymm1 = VEX.256.66.0F38.WIG 19 /r */
28582 if (have66noF2noF3(pfx)
28583 && 1==getVexL(pfx)/*256*/
28584 && !epartIsReg(getUChar(delta))) {
28585 UChar modrm = getUChar(delta);
28586 UInt rG = gregOfRexRM(pfx, modrm);
28587 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28588 delta += alen;
28589 DIP("vbroadcastsd %s,%s\n", dis_buf, nameYMMReg(rG));
28590 IRTemp t64 = newTemp(Ity_I64);
28591 assign(t64, loadLE(Ity_I64, mkexpr(addr)));
28592 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28593 mkexpr(t64), mkexpr(t64));
28594 putYMMReg(rG, res);
28595 goto decode_success;
28597 /* VBROADCASTSD xmm2, ymm1 = VEX.256.66.0F38.WIG 19 /r */
28598 if (have66noF2noF3(pfx)
28599 && 1==getVexL(pfx)/*256*/
28600 && epartIsReg(getUChar(delta))) {
28601 UChar modrm = getUChar(delta);
28602 UInt rG = gregOfRexRM(pfx, modrm);
28603 UInt rE = eregOfRexRM(pfx, modrm);
28604 DIP("vbroadcastsd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
28605 IRTemp t64 = newTemp(Ity_I64);
28606 assign(t64, getXMMRegLane64(rE, 0));
28607 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28608 mkexpr(t64), mkexpr(t64));
28609 putYMMReg(rG, res);
28610 delta++;
28611 goto decode_success;
28613 break;
28615 case 0x1A:
28616 /* VBROADCASTF128 m128, ymm1 = VEX.256.66.0F38.WIG 1A /r */
28617 if (have66noF2noF3(pfx)
28618 && 1==getVexL(pfx)/*256*/
28619 && !epartIsReg(getUChar(delta))) {
28620 UChar modrm = getUChar(delta);
28621 UInt rG = gregOfRexRM(pfx, modrm);
28622 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28623 delta += alen;
28624 DIP("vbroadcastf128 %s,%s\n", dis_buf, nameYMMReg(rG));
28625 IRTemp t128 = newTemp(Ity_V128);
28626 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
28627 putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) );
28628 goto decode_success;
28630 break;
28632 case 0x1C:
28633 /* VPABSB xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1C /r */
28634 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28635 delta = dis_AVX128_E_to_G_unary(
28636 uses_vvvv, vbi, pfx, delta,
28637 "vpabsb", math_PABS_XMM_pap1 );
28638 goto decode_success;
28640 /* VPABSB ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1C /r */
28641 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28642 delta = dis_AVX256_E_to_G_unary(
28643 uses_vvvv, vbi, pfx, delta,
28644 "vpabsb", math_PABS_YMM_pap1 );
28645 goto decode_success;
28647 break;
28649 case 0x1D:
28650 /* VPABSW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1D /r */
28651 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28652 delta = dis_AVX128_E_to_G_unary(
28653 uses_vvvv, vbi, pfx, delta,
28654 "vpabsw", math_PABS_XMM_pap2 );
28655 goto decode_success;
28657 /* VPABSW ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1D /r */
28658 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28659 delta = dis_AVX256_E_to_G_unary(
28660 uses_vvvv, vbi, pfx, delta,
28661 "vpabsw", math_PABS_YMM_pap2 );
28662 goto decode_success;
28664 break;
28666 case 0x1E:
28667 /* VPABSD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1E /r */
28668 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28669 delta = dis_AVX128_E_to_G_unary(
28670 uses_vvvv, vbi, pfx, delta,
28671 "vpabsd", math_PABS_XMM_pap4 );
28672 goto decode_success;
28674 /* VPABSD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1E /r */
28675 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28676 delta = dis_AVX256_E_to_G_unary(
28677 uses_vvvv, vbi, pfx, delta,
28678 "vpabsd", math_PABS_YMM_pap4 );
28679 goto decode_success;
28681 break;
28683 case 0x20:
28684 /* VPMOVSXBW xmm2/m64, xmm1 */
28685 /* VPMOVSXBW = VEX.128.66.0F38.WIG 20 /r */
28686 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28687 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
28688 True/*isAvx*/, False/*!xIsZ*/ );
28689 goto decode_success;
28691 /* VPMOVSXBW xmm2/m128, ymm1 */
28692 /* VPMOVSXBW = VEX.256.66.0F38.WIG 20 /r */
28693 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28694 delta = dis_PMOVxXBW_256( vbi, pfx, delta, False/*!xIsZ*/ );
28695 goto decode_success;
28697 break;
28699 case 0x21:
28700 /* VPMOVSXBD xmm2/m32, xmm1 */
28701 /* VPMOVSXBD = VEX.128.66.0F38.WIG 21 /r */
28702 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28703 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
28704 True/*isAvx*/, False/*!xIsZ*/ );
28705 goto decode_success;
28707 /* VPMOVSXBD xmm2/m64, ymm1 */
28708 /* VPMOVSXBD = VEX.256.66.0F38.WIG 21 /r */
28709 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28710 delta = dis_PMOVxXBD_256( vbi, pfx, delta, False/*!xIsZ*/ );
28711 goto decode_success;
28713 break;
28715 case 0x22:
28716 /* VPMOVSXBQ xmm2/m16, xmm1 */
28717 /* VPMOVSXBQ = VEX.128.66.0F38.WIG 22 /r */
28718 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28719 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, True/*isAvx*/ );
28720 goto decode_success;
28722 /* VPMOVSXBQ xmm2/m32, ymm1 */
28723 /* VPMOVSXBQ = VEX.256.66.0F38.WIG 22 /r */
28724 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28725 delta = dis_PMOVSXBQ_256( vbi, pfx, delta );
28726 goto decode_success;
28728 break;
28730 case 0x23:
28731 /* VPMOVSXWD xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 23 /r */
28732 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28733 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
28734 True/*isAvx*/, False/*!xIsZ*/ );
28735 goto decode_success;
28737 /* VPMOVSXWD xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 23 /r */
28738 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28739 delta = dis_PMOVxXWD_256( vbi, pfx, delta, False/*!xIsZ*/ );
28740 goto decode_success;
28742 break;
28744 case 0x24:
28745 /* VPMOVSXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 24 /r */
28746 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28747 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, True/*isAvx*/ );
28748 goto decode_success;
28750 /* VPMOVSXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 24 /r */
28751 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28752 delta = dis_PMOVSXWQ_256( vbi, pfx, delta );
28753 goto decode_success;
28755 break;
28757 case 0x25:
28758 /* VPMOVSXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 25 /r */
28759 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28760 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
28761 True/*isAvx*/, False/*!xIsZ*/ );
28762 goto decode_success;
28764 /* VPMOVSXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 25 /r */
28765 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28766 delta = dis_PMOVxXDQ_256( vbi, pfx, delta, False/*!xIsZ*/ );
28767 goto decode_success;
28769 break;
28771 case 0x28:
28772 /* VPMULDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 28 /r */
28773 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28774 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28775 uses_vvvv, vbi, pfx, delta,
28776 "vpmuldq", math_PMULDQ_128 );
28777 goto decode_success;
28779 /* VPMULDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 28 /r */
28780 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28781 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28782 uses_vvvv, vbi, pfx, delta,
28783 "vpmuldq", math_PMULDQ_256 );
28784 goto decode_success;
28786 break;
28788 case 0x29:
28789 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
28790 /* VPCMPEQQ = VEX.NDS.128.66.0F38.WIG 29 /r */
28791 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28792 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28793 uses_vvvv, vbi, pfx, delta, "vpcmpeqq", Iop_CmpEQ64x2 );
28794 goto decode_success;
28796 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
28797 /* VPCMPEQQ = VEX.NDS.256.66.0F38.WIG 29 /r */
28798 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28799 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28800 uses_vvvv, vbi, pfx, delta, "vpcmpeqq", Iop_CmpEQ64x4 );
28801 goto decode_success;
28803 break;
28805 case 0x2A:
28806 /* VMOVNTDQA m128, xmm1 = VEX.128.66.0F38.WIG 2A /r */
28807 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28808 && !epartIsReg(getUChar(delta))) {
28809 UChar modrm = getUChar(delta);
28810 UInt rD = gregOfRexRM(pfx, modrm);
28811 IRTemp tD = newTemp(Ity_V128);
28812 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28813 delta += alen;
28814 gen_SEGV_if_not_16_aligned(addr);
28815 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
28816 DIP("vmovntdqa %s,%s\n", dis_buf, nameXMMReg(rD));
28817 putYMMRegLoAndZU(rD, mkexpr(tD));
28818 goto decode_success;
28820 /* VMOVNTDQA m256, ymm1 = VEX.256.66.0F38.WIG 2A /r */
28821 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28822 && !epartIsReg(getUChar(delta))) {
28823 UChar modrm = getUChar(delta);
28824 UInt rD = gregOfRexRM(pfx, modrm);
28825 IRTemp tD = newTemp(Ity_V256);
28826 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28827 delta += alen;
28828 gen_SEGV_if_not_32_aligned(addr);
28829 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
28830 DIP("vmovntdqa %s,%s\n", dis_buf, nameYMMReg(rD));
28831 putYMMReg(rD, mkexpr(tD));
28832 goto decode_success;
28834 break;
28836 case 0x2B:
28837 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
28838 /* VPACKUSDW = VEX.NDS.128.66.0F38.WIG 2B /r */
28839 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28840 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
28841 uses_vvvv, vbi, pfx, delta, "vpackusdw",
28842 Iop_QNarrowBin32Sto16Ux8, NULL,
28843 False/*!invertLeftArg*/, True/*swapArgs*/ );
28844 goto decode_success;
28846 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
28847 /* VPACKUSDW = VEX.NDS.256.66.0F38.WIG 2B /r */
28848 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28849 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28850 uses_vvvv, vbi, pfx, delta, "vpackusdw",
28851 math_VPACKUSDW_YMM );
28852 goto decode_success;
28854 break;
28856 case 0x2C:
28857 /* VMASKMOVPS m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2C /r */
28858 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28859 && 0==getRexW(pfx)/*W0*/
28860 && !epartIsReg(getUChar(delta))) {
28861 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
28862 /*!isYMM*/False, Ity_I32, /*isLoad*/True );
28863 goto decode_success;
28865 /* VMASKMOVPS m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2C /r */
28866 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28867 && 0==getRexW(pfx)/*W0*/
28868 && !epartIsReg(getUChar(delta))) {
28869 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
28870 /*isYMM*/True, Ity_I32, /*isLoad*/True );
28871 goto decode_success;
28873 break;
28875 case 0x2D:
28876 /* VMASKMOVPD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2D /r */
28877 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28878 && 0==getRexW(pfx)/*W0*/
28879 && !epartIsReg(getUChar(delta))) {
28880 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
28881 /*!isYMM*/False, Ity_I64, /*isLoad*/True );
28882 goto decode_success;
28884 /* VMASKMOVPD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2D /r */
28885 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28886 && 0==getRexW(pfx)/*W0*/
28887 && !epartIsReg(getUChar(delta))) {
28888 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
28889 /*isYMM*/True, Ity_I64, /*isLoad*/True );
28890 goto decode_success;
28892 break;
28894 case 0x2E:
28895 /* VMASKMOVPS xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2E /r */
28896 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28897 && 0==getRexW(pfx)/*W0*/
28898 && !epartIsReg(getUChar(delta))) {
28899 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
28900 /*!isYMM*/False, Ity_I32, /*!isLoad*/False );
28901 goto decode_success;
28903 /* VMASKMOVPS ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2E /r */
28904 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28905 && 0==getRexW(pfx)/*W0*/
28906 && !epartIsReg(getUChar(delta))) {
28907 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
28908 /*isYMM*/True, Ity_I32, /*!isLoad*/False );
28909 goto decode_success;
28911 break;
28913 case 0x2F:
28914 /* VMASKMOVPD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2F /r */
28915 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28916 && 0==getRexW(pfx)/*W0*/
28917 && !epartIsReg(getUChar(delta))) {
28918 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
28919 /*!isYMM*/False, Ity_I64, /*!isLoad*/False );
28920 goto decode_success;
28922 /* VMASKMOVPD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2F /r */
28923 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28924 && 0==getRexW(pfx)/*W0*/
28925 && !epartIsReg(getUChar(delta))) {
28926 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
28927 /*isYMM*/True, Ity_I64, /*!isLoad*/False );
28928 goto decode_success;
28930 break;
28932 case 0x30:
28933 /* VPMOVZXBW xmm2/m64, xmm1 */
28934 /* VPMOVZXBW = VEX.128.66.0F38.WIG 30 /r */
28935 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28936 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
28937 True/*isAvx*/, True/*xIsZ*/ );
28938 goto decode_success;
28940 /* VPMOVZXBW xmm2/m128, ymm1 */
28941 /* VPMOVZXBW = VEX.256.66.0F38.WIG 30 /r */
28942 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28943 delta = dis_PMOVxXBW_256( vbi, pfx, delta, True/*xIsZ*/ );
28944 goto decode_success;
28946 break;
28948 case 0x31:
28949 /* VPMOVZXBD xmm2/m32, xmm1 */
28950 /* VPMOVZXBD = VEX.128.66.0F38.WIG 31 /r */
28951 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28952 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
28953 True/*isAvx*/, True/*xIsZ*/ );
28954 goto decode_success;
28956 /* VPMOVZXBD xmm2/m64, ymm1 */
28957 /* VPMOVZXBD = VEX.256.66.0F38.WIG 31 /r */
28958 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28959 delta = dis_PMOVxXBD_256( vbi, pfx, delta, True/*xIsZ*/ );
28960 goto decode_success;
28962 break;
28964 case 0x32:
28965 /* VPMOVZXBQ xmm2/m16, xmm1 */
28966 /* VPMOVZXBQ = VEX.128.66.0F38.WIG 32 /r */
28967 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28968 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, True/*isAvx*/ );
28969 goto decode_success;
28971 /* VPMOVZXBQ xmm2/m32, ymm1 */
28972 /* VPMOVZXBQ = VEX.256.66.0F38.WIG 32 /r */
28973 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28974 delta = dis_PMOVZXBQ_256( vbi, pfx, delta );
28975 goto decode_success;
28977 break;
28979 case 0x33:
28980 /* VPMOVZXWD xmm2/m64, xmm1 */
28981 /* VPMOVZXWD = VEX.128.66.0F38.WIG 33 /r */
28982 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28983 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
28984 True/*isAvx*/, True/*xIsZ*/ );
28985 goto decode_success;
28987 /* VPMOVZXWD xmm2/m128, ymm1 */
28988 /* VPMOVZXWD = VEX.256.66.0F38.WIG 33 /r */
28989 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28990 delta = dis_PMOVxXWD_256( vbi, pfx, delta, True/*xIsZ*/ );
28991 goto decode_success;
28993 break;
28995 case 0x34:
28996 /* VPMOVZXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 34 /r */
28997 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28998 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, True/*isAvx*/ );
28999 goto decode_success;
29001 /* VPMOVZXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 34 /r */
29002 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29003 delta = dis_PMOVZXWQ_256( vbi, pfx, delta );
29004 goto decode_success;
29006 break;
29008 case 0x35:
29009 /* VPMOVZXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 35 /r */
29010 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29011 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
29012 True/*isAvx*/, True/*xIsZ*/ );
29013 goto decode_success;
29015 /* VPMOVZXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 35 /r */
29016 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29017 delta = dis_PMOVxXDQ_256( vbi, pfx, delta, True/*xIsZ*/ );
29018 goto decode_success;
29020 break;
29022 case 0x36:
29023 /* VPERMD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 36 /r */
29024 if (have66noF2noF3(pfx)
29025 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
29026 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
29027 uses_vvvv, vbi, pfx, delta, "vpermd", math_VPERMD );
29028 goto decode_success;
29030 break;
29032 case 0x37:
29033 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
29034 /* VPCMPGTQ = VEX.NDS.128.66.0F38.WIG 37 /r */
29035 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29036 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29037 uses_vvvv, vbi, pfx, delta, "vpcmpgtq", Iop_CmpGT64Sx2 );
29038 goto decode_success;
29040 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
29041 /* VPCMPGTQ = VEX.NDS.256.66.0F38.WIG 37 /r */
29042 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29043 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29044 uses_vvvv, vbi, pfx, delta, "vpcmpgtq", Iop_CmpGT64Sx4 );
29045 goto decode_success;
29047 break;
29049 case 0x38:
29050 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
29051 /* VPMINSB = VEX.NDS.128.66.0F38.WIG 38 /r */
29052 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29053 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29054 uses_vvvv, vbi, pfx, delta, "vpminsb", Iop_Min8Sx16 );
29055 goto decode_success;
29057 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
29058 /* VPMINSB = VEX.NDS.256.66.0F38.WIG 38 /r */
29059 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29060 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29061 uses_vvvv, vbi, pfx, delta, "vpminsb", Iop_Min8Sx32 );
29062 goto decode_success;
29064 break;
29066 case 0x39:
29067 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
29068 /* VPMINSD = VEX.NDS.128.66.0F38.WIG 39 /r */
29069 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29070 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29071 uses_vvvv, vbi, pfx, delta, "vpminsd", Iop_Min32Sx4 );
29072 goto decode_success;
29074 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
29075 /* VPMINSD = VEX.NDS.256.66.0F38.WIG 39 /r */
29076 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29077 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29078 uses_vvvv, vbi, pfx, delta, "vpminsd", Iop_Min32Sx8 );
29079 goto decode_success;
29081 break;
29083 case 0x3A:
29084 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
29085 /* VPMINUW = VEX.NDS.128.66.0F38.WIG 3A /r */
29086 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29087 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29088 uses_vvvv, vbi, pfx, delta, "vpminuw", Iop_Min16Ux8 );
29089 goto decode_success;
29091 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
29092 /* VPMINUW = VEX.NDS.256.66.0F38.WIG 3A /r */
29093 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29094 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29095 uses_vvvv, vbi, pfx, delta, "vpminuw", Iop_Min16Ux16 );
29096 goto decode_success;
29098 break;
29100 case 0x3B:
29101 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
29102 /* VPMINUD = VEX.NDS.128.66.0F38.WIG 3B /r */
29103 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29104 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29105 uses_vvvv, vbi, pfx, delta, "vpminud", Iop_Min32Ux4 );
29106 goto decode_success;
29108 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
29109 /* VPMINUD = VEX.NDS.256.66.0F38.WIG 3B /r */
29110 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29111 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29112 uses_vvvv, vbi, pfx, delta, "vpminud", Iop_Min32Ux8 );
29113 goto decode_success;
29115 break;
29117 case 0x3C:
29118 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
29119 /* VPMAXSB = VEX.NDS.128.66.0F38.WIG 3C /r */
29120 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29121 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29122 uses_vvvv, vbi, pfx, delta, "vpmaxsb", Iop_Max8Sx16 );
29123 goto decode_success;
29125 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
29126 /* VPMAXSB = VEX.NDS.256.66.0F38.WIG 3C /r */
29127 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29128 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29129 uses_vvvv, vbi, pfx, delta, "vpmaxsb", Iop_Max8Sx32 );
29130 goto decode_success;
29132 break;
29134 case 0x3D:
29135 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
29136 /* VPMAXSD = VEX.NDS.128.66.0F38.WIG 3D /r */
29137 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29138 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29139 uses_vvvv, vbi, pfx, delta, "vpmaxsd", Iop_Max32Sx4 );
29140 goto decode_success;
29142 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
29143 /* VPMAXSD = VEX.NDS.256.66.0F38.WIG 3D /r */
29144 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29145 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29146 uses_vvvv, vbi, pfx, delta, "vpmaxsd", Iop_Max32Sx8 );
29147 goto decode_success;
29149 break;
29151 case 0x3E:
29152 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
29153 /* VPMAXUW = VEX.NDS.128.66.0F38.WIG 3E /r */
29154 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29155 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29156 uses_vvvv, vbi, pfx, delta, "vpmaxuw", Iop_Max16Ux8 );
29157 goto decode_success;
29159 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
29160 /* VPMAXUW = VEX.NDS.256.66.0F38.WIG 3E /r */
29161 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29162 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29163 uses_vvvv, vbi, pfx, delta, "vpmaxuw", Iop_Max16Ux16 );
29164 goto decode_success;
29166 break;
29168 case 0x3F:
29169 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
29170 /* VPMAXUD = VEX.NDS.128.66.0F38.WIG 3F /r */
29171 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29172 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29173 uses_vvvv, vbi, pfx, delta, "vpmaxud", Iop_Max32Ux4 );
29174 goto decode_success;
29176 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
29177 /* VPMAXUD = VEX.NDS.256.66.0F38.WIG 3F /r */
29178 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29179 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29180 uses_vvvv, vbi, pfx, delta, "vpmaxud", Iop_Max32Ux8 );
29181 goto decode_success;
29183 break;
29185 case 0x40:
29186 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
29187 /* VPMULLD = VEX.NDS.128.66.0F38.WIG 40 /r */
29188 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29189 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29190 uses_vvvv, vbi, pfx, delta, "vpmulld", Iop_Mul32x4 );
29191 goto decode_success;
29193 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
29194 /* VPMULLD = VEX.NDS.256.66.0F38.WIG 40 /r */
29195 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29196 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29197 uses_vvvv, vbi, pfx, delta, "vpmulld", Iop_Mul32x8 );
29198 goto decode_success;
29200 break;
29202 case 0x41:
29203 /* VPHMINPOSUW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 41 /r */
29204 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29205 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, True/*isAvx*/ );
29206 goto decode_success;
29208 break;
29210 case 0x45:
29211 /* VPSRLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 45 /r */
29212 /* VPSRLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 45 /r */
29213 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
29214 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsrlvd",
29215 Iop_Shr32, 1==getVexL(pfx) );
29216 *uses_vvvv = True;
29217 goto decode_success;
29219 /* VPSRLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 45 /r */
29220 /* VPSRLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 45 /r */
29221 if (have66noF2noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
29222 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsrlvq",
29223 Iop_Shr64, 1==getVexL(pfx) );
29224 *uses_vvvv = True;
29225 goto decode_success;
29227 break;
29229 case 0x46:
29230 /* VPSRAVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 46 /r */
29231 /* VPSRAVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 46 /r */
29232 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
29233 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsravd",
29234 Iop_Sar32, 1==getVexL(pfx) );
29235 *uses_vvvv = True;
29236 goto decode_success;
29238 break;
29240 case 0x47:
29241 /* VPSLLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 47 /r */
29242 /* VPSLLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 47 /r */
29243 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
29244 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsllvd",
29245 Iop_Shl32, 1==getVexL(pfx) );
29246 *uses_vvvv = True;
29247 goto decode_success;
29249 /* VPSLLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 47 /r */
29250 /* VPSLLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 47 /r */
29251 if (have66noF2noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
29252 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsllvq",
29253 Iop_Shl64, 1==getVexL(pfx) );
29254 *uses_vvvv = True;
29255 goto decode_success;
29257 break;
29259 case 0x58:
29260 /* VPBROADCASTD xmm2/m32, xmm1 = VEX.128.66.0F38.W0 58 /r */
29261 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29262 && 0==getRexW(pfx)/*W0*/) {
29263 UChar modrm = getUChar(delta);
29264 UInt rG = gregOfRexRM(pfx, modrm);
29265 IRTemp t32 = newTemp(Ity_I32);
29266 if (epartIsReg(modrm)) {
29267 UInt rE = eregOfRexRM(pfx, modrm);
29268 delta++;
29269 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
29270 assign(t32, getXMMRegLane32(rE, 0));
29271 } else {
29272 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29273 delta += alen;
29274 DIP("vpbroadcastd %s,%s\n", dis_buf, nameXMMReg(rG));
29275 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
29277 IRTemp t64 = newTemp(Ity_I64);
29278 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29279 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
29280 putYMMRegLoAndZU(rG, res);
29281 goto decode_success;
29283 /* VPBROADCASTD xmm2/m32, ymm1 = VEX.256.66.0F38.W0 58 /r */
29284 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29285 && 0==getRexW(pfx)/*W0*/) {
29286 UChar modrm = getUChar(delta);
29287 UInt rG = gregOfRexRM(pfx, modrm);
29288 IRTemp t32 = newTemp(Ity_I32);
29289 if (epartIsReg(modrm)) {
29290 UInt rE = eregOfRexRM(pfx, modrm);
29291 delta++;
29292 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
29293 assign(t32, getXMMRegLane32(rE, 0));
29294 } else {
29295 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29296 delta += alen;
29297 DIP("vpbroadcastd %s,%s\n", dis_buf, nameYMMReg(rG));
29298 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
29300 IRTemp t64 = newTemp(Ity_I64);
29301 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29302 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
29303 mkexpr(t64), mkexpr(t64));
29304 putYMMReg(rG, res);
29305 goto decode_success;
29307 break;
29309 case 0x59:
29310 /* VPBROADCASTQ xmm2/m64, xmm1 = VEX.128.66.0F38.W0 59 /r */
29311 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29312 && 0==getRexW(pfx)/*W0*/) {
29313 UChar modrm = getUChar(delta);
29314 UInt rG = gregOfRexRM(pfx, modrm);
29315 IRTemp t64 = newTemp(Ity_I64);
29316 if (epartIsReg(modrm)) {
29317 UInt rE = eregOfRexRM(pfx, modrm);
29318 delta++;
29319 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
29320 assign(t64, getXMMRegLane64(rE, 0));
29321 } else {
29322 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29323 delta += alen;
29324 DIP("vpbroadcastq %s,%s\n", dis_buf, nameXMMReg(rG));
29325 assign(t64, loadLE(Ity_I64, mkexpr(addr)));
29327 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
29328 putYMMRegLoAndZU(rG, res);
29329 goto decode_success;
29331 /* VPBROADCASTQ xmm2/m64, ymm1 = VEX.256.66.0F38.W0 59 /r */
29332 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29333 && 0==getRexW(pfx)/*W0*/) {
29334 UChar modrm = getUChar(delta);
29335 UInt rG = gregOfRexRM(pfx, modrm);
29336 IRTemp t64 = newTemp(Ity_I64);
29337 if (epartIsReg(modrm)) {
29338 UInt rE = eregOfRexRM(pfx, modrm);
29339 delta++;
29340 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
29341 assign(t64, getXMMRegLane64(rE, 0));
29342 } else {
29343 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29344 delta += alen;
29345 DIP("vpbroadcastq %s,%s\n", dis_buf, nameYMMReg(rG));
29346 assign(t64, loadLE(Ity_I64, mkexpr(addr)));
29348 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
29349 mkexpr(t64), mkexpr(t64));
29350 putYMMReg(rG, res);
29351 goto decode_success;
29353 break;
29355 case 0x5A:
29356 /* VBROADCASTI128 m128, ymm1 = VEX.256.66.0F38.WIG 5A /r */
29357 if (have66noF2noF3(pfx)
29358 && 1==getVexL(pfx)/*256*/
29359 && !epartIsReg(getUChar(delta))) {
29360 UChar modrm = getUChar(delta);
29361 UInt rG = gregOfRexRM(pfx, modrm);
29362 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29363 delta += alen;
29364 DIP("vbroadcasti128 %s,%s\n", dis_buf, nameYMMReg(rG));
29365 IRTemp t128 = newTemp(Ity_V128);
29366 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
29367 putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) );
29368 goto decode_success;
29370 break;
29372 case 0x78:
29373 /* VPBROADCASTB xmm2/m8, xmm1 = VEX.128.66.0F38.W0 78 /r */
29374 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29375 && 0==getRexW(pfx)/*W0*/) {
29376 UChar modrm = getUChar(delta);
29377 UInt rG = gregOfRexRM(pfx, modrm);
29378 IRTemp t8 = newTemp(Ity_I8);
29379 if (epartIsReg(modrm)) {
29380 UInt rE = eregOfRexRM(pfx, modrm);
29381 delta++;
29382 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
29383 assign(t8, unop(Iop_32to8, getXMMRegLane32(rE, 0)));
29384 } else {
29385 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29386 delta += alen;
29387 DIP("vpbroadcastb %s,%s\n", dis_buf, nameXMMReg(rG));
29388 assign(t8, loadLE(Ity_I8, mkexpr(addr)));
29390 IRTemp t16 = newTemp(Ity_I16);
29391 assign(t16, binop(Iop_8HLto16, mkexpr(t8), mkexpr(t8)));
29392 IRTemp t32 = newTemp(Ity_I32);
29393 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
29394 IRTemp t64 = newTemp(Ity_I64);
29395 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29396 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
29397 putYMMRegLoAndZU(rG, res);
29398 goto decode_success;
29400 /* VPBROADCASTB xmm2/m8, ymm1 = VEX.256.66.0F38.W0 78 /r */
29401 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29402 && 0==getRexW(pfx)/*W0*/) {
29403 UChar modrm = getUChar(delta);
29404 UInt rG = gregOfRexRM(pfx, modrm);
29405 IRTemp t8 = newTemp(Ity_I8);
29406 if (epartIsReg(modrm)) {
29407 UInt rE = eregOfRexRM(pfx, modrm);
29408 delta++;
29409 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
29410 assign(t8, unop(Iop_32to8, getXMMRegLane32(rE, 0)));
29411 } else {
29412 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29413 delta += alen;
29414 DIP("vpbroadcastb %s,%s\n", dis_buf, nameYMMReg(rG));
29415 assign(t8, loadLE(Ity_I8, mkexpr(addr)));
29417 IRTemp t16 = newTemp(Ity_I16);
29418 assign(t16, binop(Iop_8HLto16, mkexpr(t8), mkexpr(t8)));
29419 IRTemp t32 = newTemp(Ity_I32);
29420 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
29421 IRTemp t64 = newTemp(Ity_I64);
29422 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29423 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
29424 mkexpr(t64), mkexpr(t64));
29425 putYMMReg(rG, res);
29426 goto decode_success;
29428 break;
29430 case 0x79:
29431 /* VPBROADCASTW xmm2/m16, xmm1 = VEX.128.66.0F38.W0 79 /r */
29432 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29433 && 0==getRexW(pfx)/*W0*/) {
29434 UChar modrm = getUChar(delta);
29435 UInt rG = gregOfRexRM(pfx, modrm);
29436 IRTemp t16 = newTemp(Ity_I16);
29437 if (epartIsReg(modrm)) {
29438 UInt rE = eregOfRexRM(pfx, modrm);
29439 delta++;
29440 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
29441 assign(t16, unop(Iop_32to16, getXMMRegLane32(rE, 0)));
29442 } else {
29443 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29444 delta += alen;
29445 DIP("vpbroadcastw %s,%s\n", dis_buf, nameXMMReg(rG));
29446 assign(t16, loadLE(Ity_I16, mkexpr(addr)));
29448 IRTemp t32 = newTemp(Ity_I32);
29449 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
29450 IRTemp t64 = newTemp(Ity_I64);
29451 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29452 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
29453 putYMMRegLoAndZU(rG, res);
29454 goto decode_success;
29456 /* VPBROADCASTW xmm2/m16, ymm1 = VEX.256.66.0F38.W0 79 /r */
29457 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29458 && 0==getRexW(pfx)/*W0*/) {
29459 UChar modrm = getUChar(delta);
29460 UInt rG = gregOfRexRM(pfx, modrm);
29461 IRTemp t16 = newTemp(Ity_I16);
29462 if (epartIsReg(modrm)) {
29463 UInt rE = eregOfRexRM(pfx, modrm);
29464 delta++;
29465 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
29466 assign(t16, unop(Iop_32to16, getXMMRegLane32(rE, 0)));
29467 } else {
29468 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29469 delta += alen;
29470 DIP("vpbroadcastw %s,%s\n", dis_buf, nameYMMReg(rG));
29471 assign(t16, loadLE(Ity_I16, mkexpr(addr)));
29473 IRTemp t32 = newTemp(Ity_I32);
29474 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
29475 IRTemp t64 = newTemp(Ity_I64);
29476 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29477 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
29478 mkexpr(t64), mkexpr(t64));
29479 putYMMReg(rG, res);
29480 goto decode_success;
29482 break;
29484 case 0x8C:
29485 /* VPMASKMOVD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 8C /r */
29486 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29487 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29488 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
29489 /*!isYMM*/False, Ity_I32, /*isLoad*/True );
29490 goto decode_success;
29492 /* VPMASKMOVD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 8C /r */
29493 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29494 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29495 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
29496 /*isYMM*/True, Ity_I32, /*isLoad*/True );
29497 goto decode_success;
29499 /* VPMASKMOVQ m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 8C /r */
29500 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29501 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29502 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
29503 /*!isYMM*/False, Ity_I64, /*isLoad*/True );
29504 goto decode_success;
29506 /* VPMASKMOVQ m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 8C /r */
29507 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29508 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29509 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
29510 /*isYMM*/True, Ity_I64, /*isLoad*/True );
29511 goto decode_success;
29513 break;
29515 case 0x8E:
29516 /* VPMASKMOVD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 8E /r */
29517 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29518 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29519 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
29520 /*!isYMM*/False, Ity_I32, /*!isLoad*/False );
29521 goto decode_success;
29523 /* VPMASKMOVD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 8E /r */
29524 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29525 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29526 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
29527 /*isYMM*/True, Ity_I32, /*!isLoad*/False );
29528 goto decode_success;
29530 /* VPMASKMOVQ xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W1 8E /r */
29531 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29532 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29533 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
29534 /*!isYMM*/False, Ity_I64, /*!isLoad*/False );
29535 goto decode_success;
29537 /* VPMASKMOVQ ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W1 8E /r */
29538 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29539 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29540 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
29541 /*isYMM*/True, Ity_I64, /*!isLoad*/False );
29542 goto decode_success;
29544 break;
29546 case 0x90:
29547 /* VPGATHERDD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 90 /r */
29548 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29549 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29550 Long delta0 = delta;
29551 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdd",
29552 /*!isYMM*/False, /*!isVM64x*/False, Ity_I32 );
29553 if (delta != delta0)
29554 goto decode_success;
29556 /* VPGATHERDD ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 90 /r */
29557 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29558 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29559 Long delta0 = delta;
29560 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdd",
29561 /*isYMM*/True, /*!isVM64x*/False, Ity_I32 );
29562 if (delta != delta0)
29563 goto decode_success;
29565 /* VPGATHERDQ xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 90 /r */
29566 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29567 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29568 Long delta0 = delta;
29569 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdq",
29570 /*!isYMM*/False, /*!isVM64x*/False, Ity_I64 );
29571 if (delta != delta0)
29572 goto decode_success;
29574 /* VPGATHERDQ ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 90 /r */
29575 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29576 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29577 Long delta0 = delta;
29578 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdq",
29579 /*isYMM*/True, /*!isVM64x*/False, Ity_I64 );
29580 if (delta != delta0)
29581 goto decode_success;
29583 break;
29585 case 0x91:
29586 /* VPGATHERQD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 91 /r */
29587 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29588 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29589 Long delta0 = delta;
29590 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqd",
29591 /*!isYMM*/False, /*isVM64x*/True, Ity_I32 );
29592 if (delta != delta0)
29593 goto decode_success;
29595 /* VPGATHERQD xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 91 /r */
29596 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29597 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29598 Long delta0 = delta;
29599 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqd",
29600 /*isYMM*/True, /*isVM64x*/True, Ity_I32 );
29601 if (delta != delta0)
29602 goto decode_success;
29604 /* VPGATHERQQ xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 91 /r */
29605 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29606 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29607 Long delta0 = delta;
29608 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqq",
29609 /*!isYMM*/False, /*isVM64x*/True, Ity_I64 );
29610 if (delta != delta0)
29611 goto decode_success;
29613 /* VPGATHERQQ ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 91 /r */
29614 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29615 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29616 Long delta0 = delta;
29617 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqq",
29618 /*isYMM*/True, /*isVM64x*/True, Ity_I64 );
29619 if (delta != delta0)
29620 goto decode_success;
29622 break;
29624 case 0x92:
29625 /* VGATHERDPS xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 92 /r */
29626 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29627 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29628 Long delta0 = delta;
29629 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdps",
29630 /*!isYMM*/False, /*!isVM64x*/False, Ity_I32 );
29631 if (delta != delta0)
29632 goto decode_success;
29634 /* VGATHERDPS ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 92 /r */
29635 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29636 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29637 Long delta0 = delta;
29638 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdps",
29639 /*isYMM*/True, /*!isVM64x*/False, Ity_I32 );
29640 if (delta != delta0)
29641 goto decode_success;
29643 /* VGATHERDPD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 92 /r */
29644 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29645 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29646 Long delta0 = delta;
29647 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdpd",
29648 /*!isYMM*/False, /*!isVM64x*/False, Ity_I64 );
29649 if (delta != delta0)
29650 goto decode_success;
29652 /* VGATHERDPD ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 92 /r */
29653 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29654 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29655 Long delta0 = delta;
29656 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdpd",
29657 /*isYMM*/True, /*!isVM64x*/False, Ity_I64 );
29658 if (delta != delta0)
29659 goto decode_success;
29661 break;
29663 case 0x93:
29664 /* VGATHERQPS xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 93 /r */
29665 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29666 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29667 Long delta0 = delta;
29668 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqps",
29669 /*!isYMM*/False, /*isVM64x*/True, Ity_I32 );
29670 if (delta != delta0)
29671 goto decode_success;
29673 /* VGATHERQPS xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 93 /r */
29674 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29675 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29676 Long delta0 = delta;
29677 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqps",
29678 /*isYMM*/True, /*isVM64x*/True, Ity_I32 );
29679 if (delta != delta0)
29680 goto decode_success;
29682 /* VGATHERQPD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 93 /r */
29683 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29684 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29685 Long delta0 = delta;
29686 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqpd",
29687 /*!isYMM*/False, /*isVM64x*/True, Ity_I64 );
29688 if (delta != delta0)
29689 goto decode_success;
29691 /* VGATHERQPD ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 93 /r */
29692 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29693 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29694 Long delta0 = delta;
29695 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqpd",
29696 /*isYMM*/True, /*isVM64x*/True, Ity_I64 );
29697 if (delta != delta0)
29698 goto decode_success;
29700 break;
29702 case 0x96 ... 0x9F:
29703 case 0xA6 ... 0xAF:
29704 case 0xB6 ... 0xBF:
29705 /* VFMADDSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 96 /r */
29706 /* VFMADDSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 96 /r */
29707 /* VFMADDSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 96 /r */
29708 /* VFMADDSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 96 /r */
29709 /* VFMSUBADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 97 /r */
29710 /* VFMSUBADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 97 /r */
29711 /* VFMSUBADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 97 /r */
29712 /* VFMSUBADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 97 /r */
29713 /* VFMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 98 /r */
29714 /* VFMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 98 /r */
29715 /* VFMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 98 /r */
29716 /* VFMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 98 /r */
29717 /* VFMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 99 /r */
29718 /* VFMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 99 /r */
29719 /* VFMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9A /r */
29720 /* VFMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9A /r */
29721 /* VFMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9A /r */
29722 /* VFMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9A /r */
29723 /* VFMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9B /r */
29724 /* VFMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9B /r */
29725 /* VFNMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9C /r */
29726 /* VFNMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9C /r */
29727 /* VFNMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9C /r */
29728 /* VFNMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9C /r */
29729 /* VFNMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9D /r */
29730 /* VFNMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9D /r */
29731 /* VFNMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9E /r */
29732 /* VFNMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9E /r */
29733 /* VFNMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9E /r */
29734 /* VFNMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9E /r */
29735 /* VFNMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9F /r */
29736 /* VFNMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9F /r */
29737 /* VFMADDSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A6 /r */
29738 /* VFMADDSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A6 /r */
29739 /* VFMADDSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A6 /r */
29740 /* VFMADDSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A6 /r */
29741 /* VFMSUBADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A7 /r */
29742 /* VFMSUBADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A7 /r */
29743 /* VFMSUBADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A7 /r */
29744 /* VFMSUBADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A7 /r */
29745 /* VFMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A8 /r */
29746 /* VFMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A8 /r */
29747 /* VFMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A8 /r */
29748 /* VFMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A8 /r */
29749 /* VFMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 A9 /r */
29750 /* VFMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 A9 /r */
29751 /* VFMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AA /r */
29752 /* VFMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AA /r */
29753 /* VFMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AA /r */
29754 /* VFMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AA /r */
29755 /* VFMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AB /r */
29756 /* VFMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AB /r */
29757 /* VFNMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AC /r */
29758 /* VFNMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AC /r */
29759 /* VFNMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AC /r */
29760 /* VFNMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AC /r */
29761 /* VFNMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AD /r */
29762 /* VFNMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AD /r */
29763 /* VFNMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AE /r */
29764 /* VFNMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AE /r */
29765 /* VFNMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AE /r */
29766 /* VFNMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AE /r */
29767 /* VFNMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AF /r */
29768 /* VFNMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AF /r */
29769 /* VFMADDSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B6 /r */
29770 /* VFMADDSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B6 /r */
29771 /* VFMADDSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B6 /r */
29772 /* VFMADDSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B6 /r */
29773 /* VFMSUBADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B7 /r */
29774 /* VFMSUBADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B7 /r */
29775 /* VFMSUBADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B7 /r */
29776 /* VFMSUBADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B7 /r */
29777 /* VFMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B8 /r */
29778 /* VFMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B8 /r */
29779 /* VFMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B8 /r */
29780 /* VFMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B8 /r */
29781 /* VFMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 B9 /r */
29782 /* VFMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 B9 /r */
29783 /* VFMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BA /r */
29784 /* VFMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BA /r */
29785 /* VFMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BA /r */
29786 /* VFMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BA /r */
29787 /* VFMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BB /r */
29788 /* VFMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BB /r */
29789 /* VFNMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BC /r */
29790 /* VFNMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BC /r */
29791 /* VFNMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BC /r */
29792 /* VFNMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BC /r */
29793 /* VFNMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BD /r */
29794 /* VFNMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BD /r */
29795 /* VFNMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BE /r */
29796 /* VFNMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BE /r */
29797 /* VFNMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BE /r */
29798 /* VFNMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BE /r */
29799 /* VFNMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BF /r */
29800 /* VFNMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BF /r */
29801 if (have66noF2noF3(pfx)) {
29802 delta = dis_FMA( vbi, pfx, delta, opc );
29803 *uses_vvvv = True;
29804 dres->hint = Dis_HintVerbose;
29805 goto decode_success;
29807 break;
29809 case 0xDB:
29810 case 0xDC:
29811 case 0xDD:
29812 case 0xDE:
29813 case 0xDF:
29814 /* VAESIMC xmm2/m128, xmm1 = VEX.128.66.0F38.WIG DB /r */
29815 /* VAESENC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DC /r */
29816 /* VAESENCLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DD /r */
29817 /* VAESDEC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DE /r */
29818 /* VAESDECLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DF /r */
29819 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29820 delta = dis_AESx( vbi, pfx, delta, True/*!isAvx*/, opc );
29821 if (opc != 0xDB) *uses_vvvv = True;
29822 goto decode_success;
29824 break;
29826 case 0xF2:
29827 /* ANDN r/m32, r32b, r32a = VEX.NDS.LZ.0F38.W0 F2 /r */
29828 /* ANDN r/m64, r64b, r64a = VEX.NDS.LZ.0F38.W1 F2 /r */
29829 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29830 Int size = getRexW(pfx) ? 8 : 4;
29831 IRType ty = szToITy(size);
29832 IRTemp dst = newTemp(ty);
29833 IRTemp src1 = newTemp(ty);
29834 IRTemp src2 = newTemp(ty);
29835 UChar rm = getUChar(delta);
29837 assign( src1, getIRegV(size,pfx) );
29838 if (epartIsReg(rm)) {
29839 assign( src2, getIRegE(size,pfx,rm) );
29840 DIP("andn %s,%s,%s\n", nameIRegE(size,pfx,rm),
29841 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
29842 delta++;
29843 } else {
29844 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29845 assign( src2, loadLE(ty, mkexpr(addr)) );
29846 DIP("andn %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
29847 nameIRegG(size,pfx,rm));
29848 delta += alen;
29851 assign( dst, binop( mkSizedOp(ty,Iop_And8),
29852 unop( mkSizedOp(ty,Iop_Not8), mkexpr(src1) ),
29853 mkexpr(src2) ) );
29854 putIRegG( size, pfx, rm, mkexpr(dst) );
29855 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29856 ? AMD64G_CC_OP_ANDN64
29857 : AMD64G_CC_OP_ANDN32)) );
29858 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
29859 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
29860 *uses_vvvv = True;
29861 goto decode_success;
29863 break;
29865 case 0xF3:
29866 /* BLSI r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /3 */
29867 /* BLSI r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /3 */
29868 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/
29869 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 3) {
29870 Int size = getRexW(pfx) ? 8 : 4;
29871 IRType ty = szToITy(size);
29872 IRTemp src = newTemp(ty);
29873 IRTemp dst = newTemp(ty);
29874 UChar rm = getUChar(delta);
29876 if (epartIsReg(rm)) {
29877 assign( src, getIRegE(size,pfx,rm) );
29878 DIP("blsi %s,%s\n", nameIRegE(size,pfx,rm),
29879 nameIRegV(size,pfx));
29880 delta++;
29881 } else {
29882 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29883 assign( src, loadLE(ty, mkexpr(addr)) );
29884 DIP("blsi %s,%s\n", dis_buf, nameIRegV(size,pfx));
29885 delta += alen;
29888 assign( dst, binop(mkSizedOp(ty,Iop_And8),
29889 binop(mkSizedOp(ty,Iop_Sub8), mkU(ty, 0),
29890 mkexpr(src)), mkexpr(src)) );
29891 putIRegV( size, pfx, mkexpr(dst) );
29892 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29893 ? AMD64G_CC_OP_BLSI64
29894 : AMD64G_CC_OP_BLSI32)) );
29895 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
29896 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) );
29897 *uses_vvvv = True;
29898 goto decode_success;
29900 /* BLSMSK r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /2 */
29901 /* BLSMSK r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /2 */
29902 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/
29903 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 2) {
29904 Int size = getRexW(pfx) ? 8 : 4;
29905 IRType ty = szToITy(size);
29906 IRTemp src = newTemp(ty);
29907 IRTemp dst = newTemp(ty);
29908 UChar rm = getUChar(delta);
29910 if (epartIsReg(rm)) {
29911 assign( src, getIRegE(size,pfx,rm) );
29912 DIP("blsmsk %s,%s\n", nameIRegE(size,pfx,rm),
29913 nameIRegV(size,pfx));
29914 delta++;
29915 } else {
29916 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29917 assign( src, loadLE(ty, mkexpr(addr)) );
29918 DIP("blsmsk %s,%s\n", dis_buf, nameIRegV(size,pfx));
29919 delta += alen;
29922 assign( dst, binop(mkSizedOp(ty,Iop_Xor8),
29923 binop(mkSizedOp(ty,Iop_Sub8), mkexpr(src),
29924 mkU(ty, 1)), mkexpr(src)) );
29925 putIRegV( size, pfx, mkexpr(dst) );
29926 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29927 ? AMD64G_CC_OP_BLSMSK64
29928 : AMD64G_CC_OP_BLSMSK32)) );
29929 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
29930 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) );
29931 *uses_vvvv = True;
29932 goto decode_success;
29934 /* BLSR r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /1 */
29935 /* BLSR r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /1 */
29936 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/
29937 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 1) {
29938 Int size = getRexW(pfx) ? 8 : 4;
29939 IRType ty = szToITy(size);
29940 IRTemp src = newTemp(ty);
29941 IRTemp dst = newTemp(ty);
29942 UChar rm = getUChar(delta);
29944 if (epartIsReg(rm)) {
29945 assign( src, getIRegE(size,pfx,rm) );
29946 DIP("blsr %s,%s\n", nameIRegE(size,pfx,rm),
29947 nameIRegV(size,pfx));
29948 delta++;
29949 } else {
29950 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29951 assign( src, loadLE(ty, mkexpr(addr)) );
29952 DIP("blsr %s,%s\n", dis_buf, nameIRegV(size,pfx));
29953 delta += alen;
29956 assign( dst, binop(mkSizedOp(ty,Iop_And8),
29957 binop(mkSizedOp(ty,Iop_Sub8), mkexpr(src),
29958 mkU(ty, 1)), mkexpr(src)) );
29959 putIRegV( size, pfx, mkexpr(dst) );
29960 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29961 ? AMD64G_CC_OP_BLSR64
29962 : AMD64G_CC_OP_BLSR32)) );
29963 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
29964 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) );
29965 *uses_vvvv = True;
29966 goto decode_success;
29968 break;
29970 case 0xF5:
29971 /* BZHI r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F5 /r */
29972 /* BZHI r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F5 /r */
29973 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29974 Int size = getRexW(pfx) ? 8 : 4;
29975 IRType ty = szToITy(size);
29976 IRTemp dst = newTemp(ty);
29977 IRTemp src1 = newTemp(ty);
29978 IRTemp src2 = newTemp(ty);
29979 IRTemp start = newTemp(Ity_I8);
29980 IRTemp cond = newTemp(Ity_I1);
29981 UChar rm = getUChar(delta);
29983 assign( src2, getIRegV(size,pfx) );
29984 if (epartIsReg(rm)) {
29985 assign( src1, getIRegE(size,pfx,rm) );
29986 DIP("bzhi %s,%s,%s\n", nameIRegV(size,pfx),
29987 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm));
29988 delta++;
29989 } else {
29990 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29991 assign( src1, loadLE(ty, mkexpr(addr)) );
29992 DIP("bzhi %s,%s,%s\n", nameIRegV(size,pfx), dis_buf,
29993 nameIRegG(size,pfx,rm));
29994 delta += alen;
29997 assign( start, narrowTo( Ity_I8, mkexpr(src2) ) );
29998 assign( cond, binop(Iop_CmpLT32U,
29999 unop(Iop_8Uto32, mkexpr(start)),
30000 mkU32(8*size)) );
30001 /* if (start < opsize) {
30002 if (start == 0)
30003 dst = 0;
30004 else
30005 dst = (src1 << (opsize-start)) u>> (opsize-start);
30006 } else {
30007 dst = src1;
30008 } */
30009 assign( dst,
30010 IRExpr_ITE(
30011 mkexpr(cond),
30012 IRExpr_ITE(
30013 binop(Iop_CmpEQ8, mkexpr(start), mkU8(0)),
30014 mkU(ty, 0),
30015 binop(
30016 mkSizedOp(ty,Iop_Shr8),
30017 binop(
30018 mkSizedOp(ty,Iop_Shl8),
30019 mkexpr(src1),
30020 binop(Iop_Sub8, mkU8(8*size), mkexpr(start))
30022 binop(Iop_Sub8, mkU8(8*size), mkexpr(start))
30025 mkexpr(src1)
30028 putIRegG( size, pfx, rm, mkexpr(dst) );
30029 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
30030 ? AMD64G_CC_OP_BLSR64
30031 : AMD64G_CC_OP_BLSR32)) );
30032 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
30033 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(cond))) );
30034 *uses_vvvv = True;
30035 goto decode_success;
30037 /* PDEP r/m32, r32b, r32a = VEX.NDS.LZ.F2.0F38.W0 F5 /r */
30038 /* PDEP r/m64, r64b, r64a = VEX.NDS.LZ.F2.0F38.W1 F5 /r */
30039 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30040 Int size = getRexW(pfx) ? 8 : 4;
30041 IRType ty = szToITy(size);
30042 IRTemp src = newTemp(ty);
30043 IRTemp mask = newTemp(ty);
30044 UChar rm = getUChar(delta);
30046 assign( src, getIRegV(size,pfx) );
30047 if (epartIsReg(rm)) {
30048 assign( mask, getIRegE(size,pfx,rm) );
30049 DIP("pdep %s,%s,%s\n", nameIRegE(size,pfx,rm),
30050 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
30051 delta++;
30052 } else {
30053 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30054 assign( mask, loadLE(ty, mkexpr(addr)) );
30055 DIP("pdep %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
30056 nameIRegG(size,pfx,rm));
30057 delta += alen;
30060 IRExpr** args = mkIRExprVec_2( widenUto64(mkexpr(src)),
30061 widenUto64(mkexpr(mask)) );
30062 putIRegG( size, pfx, rm,
30063 narrowTo(ty, mkIRExprCCall(Ity_I64, 0/*regparms*/,
30064 "amd64g_calculate_pdep",
30065 &amd64g_calculate_pdep, args)) );
30066 *uses_vvvv = True;
30067 /* Flags aren't modified. */
30068 goto decode_success;
30070 /* PEXT r/m32, r32b, r32a = VEX.NDS.LZ.F3.0F38.W0 F5 /r */
30071 /* PEXT r/m64, r64b, r64a = VEX.NDS.LZ.F3.0F38.W1 F5 /r */
30072 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30073 Int size = getRexW(pfx) ? 8 : 4;
30074 IRType ty = szToITy(size);
30075 IRTemp src = newTemp(ty);
30076 IRTemp mask = newTemp(ty);
30077 UChar rm = getUChar(delta);
30079 assign( src, getIRegV(size,pfx) );
30080 if (epartIsReg(rm)) {
30081 assign( mask, getIRegE(size,pfx,rm) );
30082 DIP("pext %s,%s,%s\n", nameIRegE(size,pfx,rm),
30083 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
30084 delta++;
30085 } else {
30086 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30087 assign( mask, loadLE(ty, mkexpr(addr)) );
30088 DIP("pext %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
30089 nameIRegG(size,pfx,rm));
30090 delta += alen;
30093 /* First mask off bits not set in mask, they are ignored
30094 and it should be fine if they contain undefined values. */
30095 IRExpr* masked = binop(mkSizedOp(ty,Iop_And8),
30096 mkexpr(src), mkexpr(mask));
30097 IRExpr** args = mkIRExprVec_2( widenUto64(masked),
30098 widenUto64(mkexpr(mask)) );
30099 putIRegG( size, pfx, rm,
30100 narrowTo(ty, mkIRExprCCall(Ity_I64, 0/*regparms*/,
30101 "amd64g_calculate_pext",
30102 &amd64g_calculate_pext, args)) );
30103 *uses_vvvv = True;
30104 /* Flags aren't modified. */
30105 goto decode_success;
30107 break;
30109 case 0xF6:
30110 /* MULX r/m32, r32b, r32a = VEX.NDD.LZ.F2.0F38.W0 F6 /r */
30111 /* MULX r/m64, r64b, r64a = VEX.NDD.LZ.F2.0F38.W1 F6 /r */
30112 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30113 Int size = getRexW(pfx) ? 8 : 4;
30114 IRType ty = szToITy(size);
30115 IRTemp src1 = newTemp(ty);
30116 IRTemp src2 = newTemp(ty);
30117 IRTemp res = newTemp(size == 8 ? Ity_I128 : Ity_I64);
30118 UChar rm = getUChar(delta);
30120 assign( src1, getIRegRDX(size) );
30121 if (epartIsReg(rm)) {
30122 assign( src2, getIRegE(size,pfx,rm) );
30123 DIP("mulx %s,%s,%s\n", nameIRegE(size,pfx,rm),
30124 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
30125 delta++;
30126 } else {
30127 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30128 assign( src2, loadLE(ty, mkexpr(addr)) );
30129 DIP("mulx %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
30130 nameIRegG(size,pfx,rm));
30131 delta += alen;
30134 assign( res, binop(size == 8 ? Iop_MullU64 : Iop_MullU32,
30135 mkexpr(src1), mkexpr(src2)) );
30136 putIRegV( size, pfx,
30137 unop(size == 8 ? Iop_128to64 : Iop_64to32, mkexpr(res)) );
30138 putIRegG( size, pfx, rm,
30139 unop(size == 8 ? Iop_128HIto64 : Iop_64HIto32,
30140 mkexpr(res)) );
30141 *uses_vvvv = True;
30142 /* Flags aren't modified. */
30143 goto decode_success;
30145 break;
30147 case 0xF7:
30148 /* SARX r32b, r/m32, r32a = VEX.NDS.LZ.F3.0F38.W0 F7 /r */
30149 /* SARX r64b, r/m64, r64a = VEX.NDS.LZ.F3.0F38.W1 F7 /r */
30150 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30151 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "sarx", Iop_Sar8 );
30152 goto decode_success;
30154 /* SHLX r32b, r/m32, r32a = VEX.NDS.LZ.66.0F38.W0 F7 /r */
30155 /* SHLX r64b, r/m64, r64a = VEX.NDS.LZ.66.0F38.W1 F7 /r */
30156 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30157 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "shlx", Iop_Shl8 );
30158 goto decode_success;
30160 /* SHRX r32b, r/m32, r32a = VEX.NDS.LZ.F2.0F38.W0 F7 /r */
30161 /* SHRX r64b, r/m64, r64a = VEX.NDS.LZ.F2.0F38.W1 F7 /r */
30162 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30163 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "shrx", Iop_Shr8 );
30164 goto decode_success;
30166 /* BEXTR r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F7 /r */
30167 /* BEXTR r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F7 /r */
30168 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30169 Int size = getRexW(pfx) ? 8 : 4;
30170 IRType ty = szToITy(size);
30171 IRTemp dst = newTemp(ty);
30172 IRTemp src1 = newTemp(ty);
30173 IRTemp src2 = newTemp(ty);
30174 IRTemp stle = newTemp(Ity_I16);
30175 IRTemp start = newTemp(Ity_I8);
30176 IRTemp len = newTemp(Ity_I8);
30177 UChar rm = getUChar(delta);
30179 assign( src2, getIRegV(size,pfx) );
30180 if (epartIsReg(rm)) {
30181 assign( src1, getIRegE(size,pfx,rm) );
30182 DIP("bextr %s,%s,%s\n", nameIRegV(size,pfx),
30183 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm));
30184 delta++;
30185 } else {
30186 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30187 assign( src1, loadLE(ty, mkexpr(addr)) );
30188 DIP("bextr %s,%s,%s\n", nameIRegV(size,pfx), dis_buf,
30189 nameIRegG(size,pfx,rm));
30190 delta += alen;
30193 assign( stle, narrowTo( Ity_I16, mkexpr(src2) ) );
30194 assign( start, unop( Iop_16to8, mkexpr(stle) ) );
30195 assign( len, unop( Iop_16HIto8, mkexpr(stle) ) );
30196 /* if (start+len < opsize) {
30197 if (len != 0)
30198 dst = (src1 << (opsize-start-len)) u>> (opsize-len);
30199 else
30200 dst = 0;
30201 } else {
30202 if (start < opsize)
30203 dst = src1 u>> start;
30204 else
30205 dst = 0;
30206 } */
30207 assign( dst,
30208 IRExpr_ITE(
30209 binop(Iop_CmpLT32U,
30210 binop(Iop_Add32,
30211 unop(Iop_8Uto32, mkexpr(start)),
30212 unop(Iop_8Uto32, mkexpr(len))),
30213 mkU32(8*size)),
30214 IRExpr_ITE(
30215 binop(Iop_CmpEQ8, mkexpr(len), mkU8(0)),
30216 mkU(ty, 0),
30217 binop(mkSizedOp(ty,Iop_Shr8),
30218 binop(mkSizedOp(ty,Iop_Shl8), mkexpr(src1),
30219 binop(Iop_Sub8,
30220 binop(Iop_Sub8, mkU8(8*size),
30221 mkexpr(start)),
30222 mkexpr(len))),
30223 binop(Iop_Sub8, mkU8(8*size),
30224 mkexpr(len)))
30226 IRExpr_ITE(
30227 binop(Iop_CmpLT32U,
30228 unop(Iop_8Uto32, mkexpr(start)),
30229 mkU32(8*size)),
30230 binop(mkSizedOp(ty,Iop_Shr8), mkexpr(src1),
30231 mkexpr(start)),
30232 mkU(ty, 0)
30236 putIRegG( size, pfx, rm, mkexpr(dst) );
30237 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
30238 ? AMD64G_CC_OP_ANDN64
30239 : AMD64G_CC_OP_ANDN32)) );
30240 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
30241 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
30242 *uses_vvvv = True;
30243 goto decode_success;
30245 break;
30247 default:
30248 break;
30252 //decode_failure:
30253 return deltaIN;
30255 decode_success:
30256 return delta;
30259 /* operand format:
30260 * [0] = dst
30261 * [n] = srcn
30263 static Long decode_vregW(Int count, Long delta, UChar modrm, Prefix pfx,
30264 const VexAbiInfo* vbi, IRTemp *v, UInt *dst, Int swap)
30266 v[0] = newTemp(Ity_V128);
30267 v[1] = newTemp(Ity_V128);
30268 v[2] = newTemp(Ity_V128);
30269 v[3] = newTemp(Ity_V128);
30270 IRTemp addr = IRTemp_INVALID;
30271 Int alen = 0;
30272 HChar dis_buf[50];
30274 *dst = gregOfRexRM(pfx, modrm);
30275 assign( v[0], getXMMReg(*dst) );
30277 if ( epartIsReg( modrm ) ) {
30278 UInt ereg = eregOfRexRM(pfx, modrm);
30279 assign(swap ? v[count-1] : v[count-2], getXMMReg(ereg) );
30280 DIS(dis_buf, "%s", nameXMMReg(ereg));
30281 } else {
30282 Bool extra_byte = (getUChar(delta - 3) & 0xF) != 9;
30283 addr = disAMode(&alen, vbi, pfx, delta, dis_buf, extra_byte);
30284 assign(swap ? v[count-1] : v[count-2], loadLE(Ity_V128, mkexpr(addr)));
30285 delta += alen - 1;
30288 UInt vvvv = getVexNvvvv(pfx);
30289 switch(count) {
30290 case 2:
30291 DIP( "%s,%s", nameXMMReg(*dst), dis_buf );
30292 break;
30293 case 3:
30294 assign( swap ? v[1] : v[2], getXMMReg(vvvv) );
30295 DIP( "%s,%s,%s", nameXMMReg(*dst), nameXMMReg(vvvv), dis_buf );
30296 break;
30297 case 4:
30299 assign( v[1], getXMMReg(vvvv) );
30300 UInt src2 = getUChar(delta + 1) >> 4;
30301 assign( swap ? v[2] : v[3], getXMMReg(src2) );
30302 DIP( "%s,%s,%s,%s", nameXMMReg(*dst), nameXMMReg(vvvv),
30303 nameXMMReg(src2), dis_buf );
30305 break;
30307 return delta + 1;
30310 static Long dis_FMA4 (Prefix pfx, Long delta, UChar opc,
30311 Bool* uses_vvvv, const VexAbiInfo* vbi )
30313 UInt dst;
30314 *uses_vvvv = True;
30316 UChar modrm = getUChar(delta);
30318 Bool zero_64F = False;
30319 Bool zero_96F = False;
30320 UInt is_F32 = ((opc & 0x01) == 0x00) ? 1 : 0;
30321 Bool neg = (opc & 0xF0) == 0x70;
30322 Bool alt = (opc & 0xF0) == 0x50;
30323 Bool sub = alt ? (opc & 0x0E) != 0x0E : (opc & 0x0C) == 0x0C;
30325 IRTemp operand[4];
30326 switch(opc & 0xF) {
30327 case 0x0A: zero_96F = (opc >> 4) != 0x05; break;
30328 case 0x0B: zero_64F = (opc >> 4) != 0x05; break;
30329 case 0x0E: zero_96F = (opc >> 4) != 0x05; break;
30330 case 0x0F: zero_64F = (opc >> 4) != 0x05; break;
30331 default: break;
30333 DIP("vfm%s", neg ? "n" : "");
30334 if(alt) DIP("%s", sub ? "add" : "sub");
30335 DIP("%s", sub ? "sub" : "add");
30336 DIP("%c ", (zero_64F || zero_96F) ? 's' : 'p');
30337 DIP("%c ", is_F32 ? 's' : 'd');
30338 delta = decode_vregW(4, delta, modrm, pfx, vbi, operand, &dst, getRexW(pfx));
30339 DIP("\n");
30340 IRExpr *src[3];
30342 void (*putXMM[2])(UInt,Int,IRExpr*) = {&putXMMRegLane64F, &putXMMRegLane32F};
30344 IROp size_op[] = {Iop_V128to64, Iop_V128HIto64, Iop_64to32, Iop_64HIto32};
30345 IROp neg_op[] = {Iop_NegF64, Iop_NegF32};
30346 int i, j;
30347 for(i = 0; i < is_F32 * 2 + 2; i++) {
30348 for(j = 0; j < 3; j++) {
30349 if(is_F32) {
30350 src[j] = unop(Iop_ReinterpI32asF32,
30351 unop(size_op[i%2+2],
30352 unop(size_op[i/2],
30353 mkexpr(operand[j + 1])
30356 } else {
30357 src[j] = unop(Iop_ReinterpI64asF64,
30358 unop(size_op[i%2],
30359 mkexpr(operand[j + 1])
30363 putXMM[is_F32](dst, i, IRExpr_Qop(is_F32 ? Iop_MAddF32 : Iop_MAddF64,
30364 get_FAKE_roundingmode(),
30365 neg ? unop(neg_op[is_F32], src[0])
30366 : src[0],
30367 src[1],
30368 sub ? unop(neg_op[is_F32], src[2])
30369 : src[2]
30371 if(alt) {
30372 sub = !sub;
30376 /* Zero out top bits of ymm/xmm register. */
30377 putYMMRegLane128( dst, 1, mkV128(0) );
30379 if(zero_64F || zero_96F) {
30380 putXMMRegLane64( dst, 1, IRExpr_Const(IRConst_U64(0)));
30383 if(zero_96F) {
30384 putXMMRegLane32( dst, 1, IRExpr_Const(IRConst_U32(0)));
30387 return delta+1;
30390 /*------------------------------------------------------------*/
30391 /*--- ---*/
30392 /*--- Top-level post-escape decoders: dis_ESC_0F3A__VEX ---*/
30393 /*--- ---*/
30394 /*------------------------------------------------------------*/
30396 static IRTemp math_VPERMILPS_128 ( IRTemp sV, UInt imm8 )
30398 vassert(imm8 < 256);
30399 IRTemp s3, s2, s1, s0;
30400 s3 = s2 = s1 = s0 = IRTemp_INVALID;
30401 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
30402 # define SEL(_nn) (((_nn)==0) ? s0 : ((_nn)==1) ? s1 \
30403 : ((_nn)==2) ? s2 : s3)
30404 IRTemp res = newTemp(Ity_V128);
30405 assign(res, mkV128from32s( SEL((imm8 >> 6) & 3),
30406 SEL((imm8 >> 4) & 3),
30407 SEL((imm8 >> 2) & 3),
30408 SEL((imm8 >> 0) & 3) ));
30409 # undef SEL
30410 return res;
30413 /* Handles 128 and 256 bit versions of VCVTPS2PH. */
30414 static Long dis_VCVTPS2PH ( const VexAbiInfo* vbi, Prefix pfx,
30415 Long delta, Bool is256bit )
30417 /* This is a width-halving store or reg-reg move, that does conversion on the
30418 transferred data. */
30419 UChar modrm = getUChar(delta);
30420 UInt rG = gregOfRexRM(pfx, modrm);
30421 IRTemp rm = newTemp(Ity_I32);
30422 IROp op = is256bit ? Iop_F32toF16x8 : Iop_F32toF16x4;
30423 IRExpr* srcG = (is256bit ? getYMMReg : getXMMReg)(rG);
30425 /* (imm & 3) contains an Intel-encoded rounding mode. Because that encoding
30426 is the same as the encoding for IRRoundingMode, we can use that value
30427 directly in the IR as a rounding mode. */
30429 if (epartIsReg(modrm)) {
30430 UInt rE = eregOfRexRM(pfx, modrm);
30431 delta += 1;
30432 UInt imm = getUChar(delta);
30433 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30434 IRExpr* res = binop(op, mkexpr(rm), srcG);
30435 if (!is256bit)
30436 res = unop(Iop_64UtoV128, res);
30437 putYMMRegLoAndZU(rE, res);
30438 DIP("vcvtps2ph $%u,%s,%s\n",
30439 imm, (is256bit ? nameYMMReg : nameXMMReg)(rG), nameXMMReg(rE));
30440 } else {
30441 Int alen = 0;
30442 HChar dis_buf[50];
30443 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30444 delta += alen;
30445 UInt imm = getUChar(delta);
30446 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30447 IRExpr* res = binop(op, mkexpr(rm), srcG);
30448 storeLE(mkexpr(addr), res);
30449 DIP("vcvtps2ph $%u,%s,%s\n",
30450 imm, (is256bit ? nameYMMReg : nameXMMReg)(rG), dis_buf);
30452 delta++;
30453 /* doesn't use vvvv */
30454 return delta;
30457 __attribute__((noinline))
30458 static
30459 Long dis_ESC_0F3A__VEX (
30460 /*MB_OUT*/DisResult* dres,
30461 /*OUT*/ Bool* uses_vvvv,
30462 const VexArchInfo* archinfo,
30463 const VexAbiInfo* vbi,
30464 Prefix pfx, Int sz, Long deltaIN
30467 IRTemp addr = IRTemp_INVALID;
30468 Int alen = 0;
30469 HChar dis_buf[50];
30470 Long delta = deltaIN;
30471 UChar opc = getUChar(delta);
30472 delta++;
30473 *uses_vvvv = False;
30475 switch (opc) {
30477 case 0x00:
30478 case 0x01:
30479 /* VPERMQ imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 00 /r ib */
30480 /* VPERMPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 01 /r ib */
30481 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
30482 && 1==getRexW(pfx)/*W1*/) {
30483 UChar modrm = getUChar(delta);
30484 UInt imm8 = 0;
30485 UInt rG = gregOfRexRM(pfx, modrm);
30486 IRTemp sV = newTemp(Ity_V256);
30487 const HChar *name = opc == 0 ? "vpermq" : "vpermpd";
30488 if (epartIsReg(modrm)) {
30489 UInt rE = eregOfRexRM(pfx, modrm);
30490 delta += 1;
30491 imm8 = getUChar(delta);
30492 DIP("%s $%u,%s,%s\n",
30493 name, imm8, nameYMMReg(rE), nameYMMReg(rG));
30494 assign(sV, getYMMReg(rE));
30495 } else {
30496 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30497 delta += alen;
30498 imm8 = getUChar(delta);
30499 DIP("%s $%u,%s,%s\n",
30500 name, imm8, dis_buf, nameYMMReg(rG));
30501 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
30503 delta++;
30504 IRTemp s[4];
30505 s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID;
30506 breakupV256to64s(sV, &s[3], &s[2], &s[1], &s[0]);
30507 IRTemp dV = newTemp(Ity_V256);
30508 assign(dV, IRExpr_Qop(Iop_64x4toV256,
30509 mkexpr(s[(imm8 >> 6) & 3]),
30510 mkexpr(s[(imm8 >> 4) & 3]),
30511 mkexpr(s[(imm8 >> 2) & 3]),
30512 mkexpr(s[(imm8 >> 0) & 3])));
30513 putYMMReg(rG, mkexpr(dV));
30514 goto decode_success;
30516 break;
30518 case 0x02:
30519 /* VPBLENDD imm8, xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 02 /r ib */
30520 if (have66noF2noF3(pfx)
30521 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
30522 UChar modrm = getUChar(delta);
30523 UInt imm8 = 0;
30524 UInt rG = gregOfRexRM(pfx, modrm);
30525 UInt rV = getVexNvvvv(pfx);
30526 IRTemp sV = newTemp(Ity_V128);
30527 IRTemp dV = newTemp(Ity_V128);
30528 UInt i;
30529 IRTemp s[4], d[4];
30530 assign(sV, getXMMReg(rV));
30531 if (epartIsReg(modrm)) {
30532 UInt rE = eregOfRexRM(pfx, modrm);
30533 delta += 1;
30534 imm8 = getUChar(delta);
30535 DIP("vpblendd $%u,%s,%s,%s\n",
30536 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
30537 assign(dV, getXMMReg(rE));
30538 } else {
30539 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30540 delta += alen;
30541 imm8 = getUChar(delta);
30542 DIP("vpblendd $%u,%s,%s,%s\n",
30543 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
30544 assign(dV, loadLE(Ity_V128, mkexpr(addr)));
30546 delta++;
30547 for (i = 0; i < 4; i++) {
30548 s[i] = IRTemp_INVALID;
30549 d[i] = IRTemp_INVALID;
30551 breakupV128to32s( sV, &s[3], &s[2], &s[1], &s[0] );
30552 breakupV128to32s( dV, &d[3], &d[2], &d[1], &d[0] );
30553 for (i = 0; i < 4; i++)
30554 putYMMRegLane32(rG, i, mkexpr((imm8 & (1<<i)) ? d[i] : s[i]));
30555 putYMMRegLane128(rG, 1, mkV128(0));
30556 *uses_vvvv = True;
30557 goto decode_success;
30559 /* VPBLENDD imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F3A.W0 02 /r ib */
30560 if (have66noF2noF3(pfx)
30561 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
30562 UChar modrm = getUChar(delta);
30563 UInt imm8 = 0;
30564 UInt rG = gregOfRexRM(pfx, modrm);
30565 UInt rV = getVexNvvvv(pfx);
30566 IRTemp sV = newTemp(Ity_V256);
30567 IRTemp dV = newTemp(Ity_V256);
30568 UInt i;
30569 IRTemp s[8], d[8];
30570 assign(sV, getYMMReg(rV));
30571 if (epartIsReg(modrm)) {
30572 UInt rE = eregOfRexRM(pfx, modrm);
30573 delta += 1;
30574 imm8 = getUChar(delta);
30575 DIP("vpblendd $%u,%s,%s,%s\n",
30576 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
30577 assign(dV, getYMMReg(rE));
30578 } else {
30579 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30580 delta += alen;
30581 imm8 = getUChar(delta);
30582 DIP("vpblendd $%u,%s,%s,%s\n",
30583 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
30584 assign(dV, loadLE(Ity_V256, mkexpr(addr)));
30586 delta++;
30587 for (i = 0; i < 8; i++) {
30588 s[i] = IRTemp_INVALID;
30589 d[i] = IRTemp_INVALID;
30591 breakupV256to32s( sV, &s[7], &s[6], &s[5], &s[4],
30592 &s[3], &s[2], &s[1], &s[0] );
30593 breakupV256to32s( dV, &d[7], &d[6], &d[5], &d[4],
30594 &d[3], &d[2], &d[1], &d[0] );
30595 for (i = 0; i < 8; i++)
30596 putYMMRegLane32(rG, i, mkexpr((imm8 & (1<<i)) ? d[i] : s[i]));
30597 *uses_vvvv = True;
30598 goto decode_success;
30600 break;
30602 case 0x04:
30603 /* VPERMILPS imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 04 /r ib */
30604 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30605 UChar modrm = getUChar(delta);
30606 UInt imm8 = 0;
30607 UInt rG = gregOfRexRM(pfx, modrm);
30608 IRTemp sV = newTemp(Ity_V256);
30609 if (epartIsReg(modrm)) {
30610 UInt rE = eregOfRexRM(pfx, modrm);
30611 delta += 1;
30612 imm8 = getUChar(delta);
30613 DIP("vpermilps $%u,%s,%s\n",
30614 imm8, nameYMMReg(rE), nameYMMReg(rG));
30615 assign(sV, getYMMReg(rE));
30616 } else {
30617 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30618 delta += alen;
30619 imm8 = getUChar(delta);
30620 DIP("vpermilps $%u,%s,%s\n",
30621 imm8, dis_buf, nameYMMReg(rG));
30622 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
30624 delta++;
30625 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
30626 breakupV256toV128s( sV, &sVhi, &sVlo );
30627 IRTemp dVhi = math_VPERMILPS_128( sVhi, imm8 );
30628 IRTemp dVlo = math_VPERMILPS_128( sVlo, imm8 );
30629 IRExpr* res = binop(Iop_V128HLtoV256, mkexpr(dVhi), mkexpr(dVlo));
30630 putYMMReg(rG, res);
30631 goto decode_success;
30633 /* VPERMILPS imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 04 /r ib */
30634 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30635 UChar modrm = getUChar(delta);
30636 UInt imm8 = 0;
30637 UInt rG = gregOfRexRM(pfx, modrm);
30638 IRTemp sV = newTemp(Ity_V128);
30639 if (epartIsReg(modrm)) {
30640 UInt rE = eregOfRexRM(pfx, modrm);
30641 delta += 1;
30642 imm8 = getUChar(delta);
30643 DIP("vpermilps $%u,%s,%s\n",
30644 imm8, nameXMMReg(rE), nameXMMReg(rG));
30645 assign(sV, getXMMReg(rE));
30646 } else {
30647 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30648 delta += alen;
30649 imm8 = getUChar(delta);
30650 DIP("vpermilps $%u,%s,%s\n",
30651 imm8, dis_buf, nameXMMReg(rG));
30652 assign(sV, loadLE(Ity_V128, mkexpr(addr)));
30654 delta++;
30655 putYMMRegLoAndZU(rG, mkexpr ( math_VPERMILPS_128 ( sV, imm8 ) ) );
30656 goto decode_success;
30658 break;
30660 case 0x05:
30661 /* VPERMILPD imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 05 /r ib */
30662 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30663 UChar modrm = getUChar(delta);
30664 UInt imm8 = 0;
30665 UInt rG = gregOfRexRM(pfx, modrm);
30666 IRTemp sV = newTemp(Ity_V128);
30667 if (epartIsReg(modrm)) {
30668 UInt rE = eregOfRexRM(pfx, modrm);
30669 delta += 1;
30670 imm8 = getUChar(delta);
30671 DIP("vpermilpd $%u,%s,%s\n",
30672 imm8, nameXMMReg(rE), nameXMMReg(rG));
30673 assign(sV, getXMMReg(rE));
30674 } else {
30675 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30676 delta += alen;
30677 imm8 = getUChar(delta);
30678 DIP("vpermilpd $%u,%s,%s\n",
30679 imm8, dis_buf, nameXMMReg(rG));
30680 assign(sV, loadLE(Ity_V128, mkexpr(addr)));
30682 delta++;
30683 IRTemp s1 = newTemp(Ity_I64);
30684 IRTemp s0 = newTemp(Ity_I64);
30685 assign(s1, unop(Iop_V128HIto64, mkexpr(sV)));
30686 assign(s0, unop(Iop_V128to64, mkexpr(sV)));
30687 IRTemp dV = newTemp(Ity_V128);
30688 assign(dV, binop(Iop_64HLtoV128,
30689 mkexpr((imm8 & (1<<1)) ? s1 : s0),
30690 mkexpr((imm8 & (1<<0)) ? s1 : s0)));
30691 putYMMRegLoAndZU(rG, mkexpr(dV));
30692 goto decode_success;
30694 /* VPERMILPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 05 /r ib */
30695 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30696 UChar modrm = getUChar(delta);
30697 UInt imm8 = 0;
30698 UInt rG = gregOfRexRM(pfx, modrm);
30699 IRTemp sV = newTemp(Ity_V256);
30700 if (epartIsReg(modrm)) {
30701 UInt rE = eregOfRexRM(pfx, modrm);
30702 delta += 1;
30703 imm8 = getUChar(delta);
30704 DIP("vpermilpd $%u,%s,%s\n",
30705 imm8, nameYMMReg(rE), nameYMMReg(rG));
30706 assign(sV, getYMMReg(rE));
30707 } else {
30708 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30709 delta += alen;
30710 imm8 = getUChar(delta);
30711 DIP("vpermilpd $%u,%s,%s\n",
30712 imm8, dis_buf, nameYMMReg(rG));
30713 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
30715 delta++;
30716 IRTemp s3, s2, s1, s0;
30717 s3 = s2 = s1 = s0 = IRTemp_INVALID;
30718 breakupV256to64s(sV, &s3, &s2, &s1, &s0);
30719 IRTemp dV = newTemp(Ity_V256);
30720 assign(dV, IRExpr_Qop(Iop_64x4toV256,
30721 mkexpr((imm8 & (1<<3)) ? s3 : s2),
30722 mkexpr((imm8 & (1<<2)) ? s3 : s2),
30723 mkexpr((imm8 & (1<<1)) ? s1 : s0),
30724 mkexpr((imm8 & (1<<0)) ? s1 : s0)));
30725 putYMMReg(rG, mkexpr(dV));
30726 goto decode_success;
30728 break;
30730 case 0x06:
30731 /* VPERM2F128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 06 /r ib */
30732 if (have66noF2noF3(pfx)
30733 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
30734 UChar modrm = getUChar(delta);
30735 UInt imm8 = 0;
30736 UInt rG = gregOfRexRM(pfx, modrm);
30737 UInt rV = getVexNvvvv(pfx);
30738 IRTemp s00 = newTemp(Ity_V128);
30739 IRTemp s01 = newTemp(Ity_V128);
30740 IRTemp s10 = newTemp(Ity_V128);
30741 IRTemp s11 = newTemp(Ity_V128);
30742 assign(s00, getYMMRegLane128(rV, 0));
30743 assign(s01, getYMMRegLane128(rV, 1));
30744 if (epartIsReg(modrm)) {
30745 UInt rE = eregOfRexRM(pfx, modrm);
30746 delta += 1;
30747 imm8 = getUChar(delta);
30748 DIP("vperm2f128 $%u,%s,%s,%s\n",
30749 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
30750 assign(s10, getYMMRegLane128(rE, 0));
30751 assign(s11, getYMMRegLane128(rE, 1));
30752 } else {
30753 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30754 delta += alen;
30755 imm8 = getUChar(delta);
30756 DIP("vperm2f128 $%u,%s,%s,%s\n",
30757 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
30758 assign(s10, loadLE(Ity_V128, binop(Iop_Add64,
30759 mkexpr(addr), mkU64(0))));
30760 assign(s11, loadLE(Ity_V128, binop(Iop_Add64,
30761 mkexpr(addr), mkU64(16))));
30763 delta++;
30764 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
30765 : ((_nn)==2) ? s10 : s11)
30766 putYMMRegLane128(rG, 0, mkexpr(SEL((imm8 >> 0) & 3)));
30767 putYMMRegLane128(rG, 1, mkexpr(SEL((imm8 >> 4) & 3)));
30768 # undef SEL
30769 if (imm8 & (1<<3)) putYMMRegLane128(rG, 0, mkV128(0));
30770 if (imm8 & (1<<7)) putYMMRegLane128(rG, 1, mkV128(0));
30771 *uses_vvvv = True;
30772 goto decode_success;
30774 break;
30776 case 0x08:
30777 /* VROUNDPS imm8, xmm2/m128, xmm1 */
30778 /* VROUNDPS = VEX.NDS.128.66.0F3A.WIG 08 ib */
30779 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30780 UChar modrm = getUChar(delta);
30781 UInt rG = gregOfRexRM(pfx, modrm);
30782 IRTemp src = newTemp(Ity_V128);
30783 IRTemp s0 = IRTemp_INVALID;
30784 IRTemp s1 = IRTemp_INVALID;
30785 IRTemp s2 = IRTemp_INVALID;
30786 IRTemp s3 = IRTemp_INVALID;
30787 IRTemp rm = newTemp(Ity_I32);
30788 Int imm = 0;
30790 modrm = getUChar(delta);
30792 if (epartIsReg(modrm)) {
30793 UInt rE = eregOfRexRM(pfx, modrm);
30794 assign( src, getXMMReg( rE ) );
30795 imm = getUChar(delta+1);
30796 if (imm & ~15) break;
30797 delta += 1+1;
30798 DIP( "vroundps $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) );
30799 } else {
30800 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30801 assign( src, loadLE(Ity_V128, mkexpr(addr) ) );
30802 imm = getUChar(delta+alen);
30803 if (imm & ~15) break;
30804 delta += alen+1;
30805 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) );
30808 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30809 that encoding is the same as the encoding for IRRoundingMode,
30810 we can use that value directly in the IR as a rounding
30811 mode. */
30812 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30814 breakupV128to32s( src, &s3, &s2, &s1, &s0 );
30815 putYMMRegLane128( rG, 1, mkV128(0) );
30816 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
30817 unop(Iop_ReinterpI32asF32, mkexpr(s)))
30818 putYMMRegLane32F( rG, 3, CVT(s3) );
30819 putYMMRegLane32F( rG, 2, CVT(s2) );
30820 putYMMRegLane32F( rG, 1, CVT(s1) );
30821 putYMMRegLane32F( rG, 0, CVT(s0) );
30822 # undef CVT
30823 goto decode_success;
30825 /* VROUNDPS imm8, ymm2/m256, ymm1 */
30826 /* VROUNDPS = VEX.NDS.256.66.0F3A.WIG 08 ib */
30827 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30828 UChar modrm = getUChar(delta);
30829 UInt rG = gregOfRexRM(pfx, modrm);
30830 IRTemp src = newTemp(Ity_V256);
30831 IRTemp s0 = IRTemp_INVALID;
30832 IRTemp s1 = IRTemp_INVALID;
30833 IRTemp s2 = IRTemp_INVALID;
30834 IRTemp s3 = IRTemp_INVALID;
30835 IRTemp s4 = IRTemp_INVALID;
30836 IRTemp s5 = IRTemp_INVALID;
30837 IRTemp s6 = IRTemp_INVALID;
30838 IRTemp s7 = IRTemp_INVALID;
30839 IRTemp rm = newTemp(Ity_I32);
30840 Int imm = 0;
30842 modrm = getUChar(delta);
30844 if (epartIsReg(modrm)) {
30845 UInt rE = eregOfRexRM(pfx, modrm);
30846 assign( src, getYMMReg( rE ) );
30847 imm = getUChar(delta+1);
30848 if (imm & ~15) break;
30849 delta += 1+1;
30850 DIP( "vroundps $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) );
30851 } else {
30852 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30853 assign( src, loadLE(Ity_V256, mkexpr(addr) ) );
30854 imm = getUChar(delta+alen);
30855 if (imm & ~15) break;
30856 delta += alen+1;
30857 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) );
30860 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30861 that encoding is the same as the encoding for IRRoundingMode,
30862 we can use that value directly in the IR as a rounding
30863 mode. */
30864 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30866 breakupV256to32s( src, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
30867 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
30868 unop(Iop_ReinterpI32asF32, mkexpr(s)))
30869 putYMMRegLane32F( rG, 7, CVT(s7) );
30870 putYMMRegLane32F( rG, 6, CVT(s6) );
30871 putYMMRegLane32F( rG, 5, CVT(s5) );
30872 putYMMRegLane32F( rG, 4, CVT(s4) );
30873 putYMMRegLane32F( rG, 3, CVT(s3) );
30874 putYMMRegLane32F( rG, 2, CVT(s2) );
30875 putYMMRegLane32F( rG, 1, CVT(s1) );
30876 putYMMRegLane32F( rG, 0, CVT(s0) );
30877 # undef CVT
30878 goto decode_success;
30880 break;
30882 case 0x09:
30883 /* VROUNDPD imm8, xmm2/m128, xmm1 */
30884 /* VROUNDPD = VEX.NDS.128.66.0F3A.WIG 09 ib */
30885 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30886 UChar modrm = getUChar(delta);
30887 UInt rG = gregOfRexRM(pfx, modrm);
30888 IRTemp src = newTemp(Ity_V128);
30889 IRTemp s0 = IRTemp_INVALID;
30890 IRTemp s1 = IRTemp_INVALID;
30891 IRTemp rm = newTemp(Ity_I32);
30892 Int imm = 0;
30894 modrm = getUChar(delta);
30896 if (epartIsReg(modrm)) {
30897 UInt rE = eregOfRexRM(pfx, modrm);
30898 assign( src, getXMMReg( rE ) );
30899 imm = getUChar(delta+1);
30900 if (imm & ~15) break;
30901 delta += 1+1;
30902 DIP( "vroundpd $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) );
30903 } else {
30904 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30905 assign( src, loadLE(Ity_V128, mkexpr(addr) ) );
30906 imm = getUChar(delta+alen);
30907 if (imm & ~15) break;
30908 delta += alen+1;
30909 DIP( "vroundpd $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) );
30912 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30913 that encoding is the same as the encoding for IRRoundingMode,
30914 we can use that value directly in the IR as a rounding
30915 mode. */
30916 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30918 breakupV128to64s( src, &s1, &s0 );
30919 putYMMRegLane128( rG, 1, mkV128(0) );
30920 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
30921 unop(Iop_ReinterpI64asF64, mkexpr(s)))
30922 putYMMRegLane64F( rG, 1, CVT(s1) );
30923 putYMMRegLane64F( rG, 0, CVT(s0) );
30924 # undef CVT
30925 goto decode_success;
30927 /* VROUNDPD imm8, ymm2/m256, ymm1 */
30928 /* VROUNDPD = VEX.NDS.256.66.0F3A.WIG 09 ib */
30929 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30930 UChar modrm = getUChar(delta);
30931 UInt rG = gregOfRexRM(pfx, modrm);
30932 IRTemp src = newTemp(Ity_V256);
30933 IRTemp s0 = IRTemp_INVALID;
30934 IRTemp s1 = IRTemp_INVALID;
30935 IRTemp s2 = IRTemp_INVALID;
30936 IRTemp s3 = IRTemp_INVALID;
30937 IRTemp rm = newTemp(Ity_I32);
30938 Int imm = 0;
30940 modrm = getUChar(delta);
30942 if (epartIsReg(modrm)) {
30943 UInt rE = eregOfRexRM(pfx, modrm);
30944 assign( src, getYMMReg( rE ) );
30945 imm = getUChar(delta+1);
30946 if (imm & ~15) break;
30947 delta += 1+1;
30948 DIP( "vroundpd $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) );
30949 } else {
30950 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30951 assign( src, loadLE(Ity_V256, mkexpr(addr) ) );
30952 imm = getUChar(delta+alen);
30953 if (imm & ~15) break;
30954 delta += alen+1;
30955 DIP( "vroundpd $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) );
30958 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30959 that encoding is the same as the encoding for IRRoundingMode,
30960 we can use that value directly in the IR as a rounding
30961 mode. */
30962 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30964 breakupV256to64s( src, &s3, &s2, &s1, &s0 );
30965 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
30966 unop(Iop_ReinterpI64asF64, mkexpr(s)))
30967 putYMMRegLane64F( rG, 3, CVT(s3) );
30968 putYMMRegLane64F( rG, 2, CVT(s2) );
30969 putYMMRegLane64F( rG, 1, CVT(s1) );
30970 putYMMRegLane64F( rG, 0, CVT(s0) );
30971 # undef CVT
30972 goto decode_success;
30974 break;
30976 case 0x0A:
30977 case 0x0B:
30978 /* VROUNDSS imm8, xmm3/m32, xmm2, xmm1 */
30979 /* VROUNDSS = VEX.NDS.128.66.0F3A.WIG 0A ib */
30980 /* VROUNDSD imm8, xmm3/m64, xmm2, xmm1 */
30981 /* VROUNDSD = VEX.NDS.128.66.0F3A.WIG 0B ib */
30982 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30983 UChar modrm = getUChar(delta);
30984 UInt rG = gregOfRexRM(pfx, modrm);
30985 UInt rV = getVexNvvvv(pfx);
30986 Bool isD = opc == 0x0B;
30987 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
30988 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
30989 Int imm = 0;
30991 if (epartIsReg(modrm)) {
30992 UInt rE = eregOfRexRM(pfx, modrm);
30993 assign( src,
30994 isD ? getXMMRegLane64F(rE, 0) : getXMMRegLane32F(rE, 0) );
30995 imm = getUChar(delta+1);
30996 if (imm & ~15) break;
30997 delta += 1+1;
30998 DIP( "vrounds%c $%d,%s,%s,%s\n",
30999 isD ? 'd' : 's',
31000 imm, nameXMMReg( rE ), nameXMMReg( rV ), nameXMMReg( rG ) );
31001 } else {
31002 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31003 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
31004 imm = getUChar(delta+alen);
31005 if (imm & ~15) break;
31006 delta += alen+1;
31007 DIP( "vrounds%c $%d,%s,%s,%s\n",
31008 isD ? 'd' : 's',
31009 imm, dis_buf, nameXMMReg( rV ), nameXMMReg( rG ) );
31012 /* (imm & 3) contains an Intel-encoded rounding mode. Because
31013 that encoding is the same as the encoding for IRRoundingMode,
31014 we can use that value directly in the IR as a rounding
31015 mode. */
31016 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
31017 (imm & 4) ? get_sse_roundingmode()
31018 : mkU32(imm & 3),
31019 mkexpr(src)) );
31021 if (isD)
31022 putXMMRegLane64F( rG, 0, mkexpr(res) );
31023 else {
31024 putXMMRegLane32F( rG, 0, mkexpr(res) );
31025 putXMMRegLane32F( rG, 1, getXMMRegLane32F( rV, 1 ) );
31027 putXMMRegLane64F( rG, 1, getXMMRegLane64F( rV, 1 ) );
31028 putYMMRegLane128( rG, 1, mkV128(0) );
31029 *uses_vvvv = True;
31030 goto decode_success;
31032 break;
31034 case 0x0C:
31035 /* VBLENDPS imm8, ymm3/m256, ymm2, ymm1 */
31036 /* VBLENDPS = VEX.NDS.256.66.0F3A.WIG 0C /r ib */
31037 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31038 UChar modrm = getUChar(delta);
31039 UInt imm8;
31040 UInt rG = gregOfRexRM(pfx, modrm);
31041 UInt rV = getVexNvvvv(pfx);
31042 IRTemp sV = newTemp(Ity_V256);
31043 IRTemp sE = newTemp(Ity_V256);
31044 assign ( sV, getYMMReg(rV) );
31045 if (epartIsReg(modrm)) {
31046 UInt rE = eregOfRexRM(pfx, modrm);
31047 delta += 1;
31048 imm8 = getUChar(delta);
31049 DIP("vblendps $%u,%s,%s,%s\n",
31050 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31051 assign(sE, getYMMReg(rE));
31052 } else {
31053 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31054 delta += alen;
31055 imm8 = getUChar(delta);
31056 DIP("vblendps $%u,%s,%s,%s\n",
31057 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31058 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
31060 delta++;
31061 putYMMReg( rG,
31062 mkexpr( math_BLENDPS_256( sE, sV, imm8) ) );
31063 *uses_vvvv = True;
31064 goto decode_success;
31066 /* VBLENDPS imm8, xmm3/m128, xmm2, xmm1 */
31067 /* VBLENDPS = VEX.NDS.128.66.0F3A.WIG 0C /r ib */
31068 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31069 UChar modrm = getUChar(delta);
31070 UInt imm8;
31071 UInt rG = gregOfRexRM(pfx, modrm);
31072 UInt rV = getVexNvvvv(pfx);
31073 IRTemp sV = newTemp(Ity_V128);
31074 IRTemp sE = newTemp(Ity_V128);
31075 assign ( sV, getXMMReg(rV) );
31076 if (epartIsReg(modrm)) {
31077 UInt rE = eregOfRexRM(pfx, modrm);
31078 delta += 1;
31079 imm8 = getUChar(delta);
31080 DIP("vblendps $%u,%s,%s,%s\n",
31081 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
31082 assign(sE, getXMMReg(rE));
31083 } else {
31084 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31085 delta += alen;
31086 imm8 = getUChar(delta);
31087 DIP("vblendps $%u,%s,%s,%s\n",
31088 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
31089 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
31091 delta++;
31092 putYMMRegLoAndZU( rG,
31093 mkexpr( math_BLENDPS_128( sE, sV, imm8) ) );
31094 *uses_vvvv = True;
31095 goto decode_success;
31097 break;
31099 case 0x0D:
31100 /* VBLENDPD imm8, ymm3/m256, ymm2, ymm1 */
31101 /* VBLENDPD = VEX.NDS.256.66.0F3A.WIG 0D /r ib */
31102 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31103 UChar modrm = getUChar(delta);
31104 UInt imm8;
31105 UInt rG = gregOfRexRM(pfx, modrm);
31106 UInt rV = getVexNvvvv(pfx);
31107 IRTemp sV = newTemp(Ity_V256);
31108 IRTemp sE = newTemp(Ity_V256);
31109 assign ( sV, getYMMReg(rV) );
31110 if (epartIsReg(modrm)) {
31111 UInt rE = eregOfRexRM(pfx, modrm);
31112 delta += 1;
31113 imm8 = getUChar(delta);
31114 DIP("vblendpd $%u,%s,%s,%s\n",
31115 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31116 assign(sE, getYMMReg(rE));
31117 } else {
31118 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31119 delta += alen;
31120 imm8 = getUChar(delta);
31121 DIP("vblendpd $%u,%s,%s,%s\n",
31122 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31123 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
31125 delta++;
31126 putYMMReg( rG,
31127 mkexpr( math_BLENDPD_256( sE, sV, imm8) ) );
31128 *uses_vvvv = True;
31129 goto decode_success;
31131 /* VBLENDPD imm8, xmm3/m128, xmm2, xmm1 */
31132 /* VBLENDPD = VEX.NDS.128.66.0F3A.WIG 0D /r ib */
31133 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31134 UChar modrm = getUChar(delta);
31135 UInt imm8;
31136 UInt rG = gregOfRexRM(pfx, modrm);
31137 UInt rV = getVexNvvvv(pfx);
31138 IRTemp sV = newTemp(Ity_V128);
31139 IRTemp sE = newTemp(Ity_V128);
31140 assign ( sV, getXMMReg(rV) );
31141 if (epartIsReg(modrm)) {
31142 UInt rE = eregOfRexRM(pfx, modrm);
31143 delta += 1;
31144 imm8 = getUChar(delta);
31145 DIP("vblendpd $%u,%s,%s,%s\n",
31146 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
31147 assign(sE, getXMMReg(rE));
31148 } else {
31149 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31150 delta += alen;
31151 imm8 = getUChar(delta);
31152 DIP("vblendpd $%u,%s,%s,%s\n",
31153 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
31154 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
31156 delta++;
31157 putYMMRegLoAndZU( rG,
31158 mkexpr( math_BLENDPD_128( sE, sV, imm8) ) );
31159 *uses_vvvv = True;
31160 goto decode_success;
31162 break;
31164 case 0x0E:
31165 /* VPBLENDW imm8, xmm3/m128, xmm2, xmm1 */
31166 /* VPBLENDW = VEX.NDS.128.66.0F3A.WIG 0E /r ib */
31167 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31168 UChar modrm = getUChar(delta);
31169 UInt imm8;
31170 UInt rG = gregOfRexRM(pfx, modrm);
31171 UInt rV = getVexNvvvv(pfx);
31172 IRTemp sV = newTemp(Ity_V128);
31173 IRTemp sE = newTemp(Ity_V128);
31174 assign ( sV, getXMMReg(rV) );
31175 if (epartIsReg(modrm)) {
31176 UInt rE = eregOfRexRM(pfx, modrm);
31177 delta += 1;
31178 imm8 = getUChar(delta);
31179 DIP("vpblendw $%u,%s,%s,%s\n",
31180 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
31181 assign(sE, getXMMReg(rE));
31182 } else {
31183 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31184 delta += alen;
31185 imm8 = getUChar(delta);
31186 DIP("vpblendw $%u,%s,%s,%s\n",
31187 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
31188 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
31190 delta++;
31191 putYMMRegLoAndZU( rG,
31192 mkexpr( math_PBLENDW_128( sE, sV, imm8) ) );
31193 *uses_vvvv = True;
31194 goto decode_success;
31196 /* VPBLENDW imm8, ymm3/m256, ymm2, ymm1 */
31197 /* VPBLENDW = VEX.NDS.256.66.0F3A.WIG 0E /r ib */
31198 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31199 UChar modrm = getUChar(delta);
31200 UInt imm8;
31201 UInt rG = gregOfRexRM(pfx, modrm);
31202 UInt rV = getVexNvvvv(pfx);
31203 IRTemp sV = newTemp(Ity_V256);
31204 IRTemp sE = newTemp(Ity_V256);
31205 IRTemp sVhi, sVlo, sEhi, sElo;
31206 sVhi = sVlo = sEhi = sElo = IRTemp_INVALID;
31207 assign ( sV, getYMMReg(rV) );
31208 if (epartIsReg(modrm)) {
31209 UInt rE = eregOfRexRM(pfx, modrm);
31210 delta += 1;
31211 imm8 = getUChar(delta);
31212 DIP("vpblendw $%u,%s,%s,%s\n",
31213 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31214 assign(sE, getYMMReg(rE));
31215 } else {
31216 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31217 delta += alen;
31218 imm8 = getUChar(delta);
31219 DIP("vpblendw $%u,%s,%s,%s\n",
31220 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31221 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
31223 delta++;
31224 breakupV256toV128s( sV, &sVhi, &sVlo );
31225 breakupV256toV128s( sE, &sEhi, &sElo );
31226 putYMMReg( rG, binop( Iop_V128HLtoV256,
31227 mkexpr( math_PBLENDW_128( sEhi, sVhi, imm8) ),
31228 mkexpr( math_PBLENDW_128( sElo, sVlo, imm8) ) ) );
31229 *uses_vvvv = True;
31230 goto decode_success;
31232 break;
31234 case 0x0F:
31235 /* VPALIGNR imm8, xmm3/m128, xmm2, xmm1 */
31236 /* VPALIGNR = VEX.NDS.128.66.0F3A.WIG 0F /r ib */
31237 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31238 UChar modrm = getUChar(delta);
31239 UInt rG = gregOfRexRM(pfx, modrm);
31240 UInt rV = getVexNvvvv(pfx);
31241 IRTemp sV = newTemp(Ity_V128);
31242 IRTemp dV = newTemp(Ity_V128);
31243 UInt imm8;
31245 assign( dV, getXMMReg(rV) );
31247 if ( epartIsReg( modrm ) ) {
31248 UInt rE = eregOfRexRM(pfx, modrm);
31249 assign( sV, getXMMReg(rE) );
31250 imm8 = getUChar(delta+1);
31251 delta += 1+1;
31252 DIP("vpalignr $%u,%s,%s,%s\n", imm8, nameXMMReg(rE),
31253 nameXMMReg(rV), nameXMMReg(rG));
31254 } else {
31255 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31256 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
31257 imm8 = getUChar(delta+alen);
31258 delta += alen+1;
31259 DIP("vpalignr $%u,%s,%s,%s\n", imm8, dis_buf,
31260 nameXMMReg(rV), nameXMMReg(rG));
31263 IRTemp res = math_PALIGNR_XMM( sV, dV, imm8 );
31264 putYMMRegLoAndZU( rG, mkexpr(res) );
31265 *uses_vvvv = True;
31266 goto decode_success;
31268 /* VPALIGNR imm8, ymm3/m256, ymm2, ymm1 */
31269 /* VPALIGNR = VEX.NDS.256.66.0F3A.WIG 0F /r ib */
31270 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31271 UChar modrm = getUChar(delta);
31272 UInt rG = gregOfRexRM(pfx, modrm);
31273 UInt rV = getVexNvvvv(pfx);
31274 IRTemp sV = newTemp(Ity_V256);
31275 IRTemp dV = newTemp(Ity_V256);
31276 IRTemp sHi, sLo, dHi, dLo;
31277 sHi = sLo = dHi = dLo = IRTemp_INVALID;
31278 UInt imm8;
31280 assign( dV, getYMMReg(rV) );
31282 if ( epartIsReg( modrm ) ) {
31283 UInt rE = eregOfRexRM(pfx, modrm);
31284 assign( sV, getYMMReg(rE) );
31285 imm8 = getUChar(delta+1);
31286 delta += 1+1;
31287 DIP("vpalignr $%u,%s,%s,%s\n", imm8, nameYMMReg(rE),
31288 nameYMMReg(rV), nameYMMReg(rG));
31289 } else {
31290 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31291 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
31292 imm8 = getUChar(delta+alen);
31293 delta += alen+1;
31294 DIP("vpalignr $%u,%s,%s,%s\n", imm8, dis_buf,
31295 nameYMMReg(rV), nameYMMReg(rG));
31298 breakupV256toV128s( dV, &dHi, &dLo );
31299 breakupV256toV128s( sV, &sHi, &sLo );
31300 putYMMReg( rG, binop( Iop_V128HLtoV256,
31301 mkexpr( math_PALIGNR_XMM( sHi, dHi, imm8 ) ),
31302 mkexpr( math_PALIGNR_XMM( sLo, dLo, imm8 ) ) )
31304 *uses_vvvv = True;
31305 goto decode_success;
31307 break;
31309 case 0x14:
31310 /* VPEXTRB imm8, xmm2, reg/m8 = VEX.128.66.0F3A.W0 14 /r ib */
31311 if (have66noF2noF3(pfx)
31312 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31313 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ );
31314 goto decode_success;
31316 break;
31318 case 0x15:
31319 /* VPEXTRW imm8, reg/m16, xmm2 */
31320 /* VPEXTRW = VEX.128.66.0F3A.W0 15 /r ib */
31321 if (have66noF2noF3(pfx)
31322 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31323 delta = dis_PEXTRW( vbi, pfx, delta, True/*isAvx*/ );
31324 goto decode_success;
31326 break;
31328 case 0x16:
31329 /* VPEXTRD imm8, r32/m32, xmm2 */
31330 /* VPEXTRD = VEX.128.66.0F3A.W0 16 /r ib */
31331 if (have66noF2noF3(pfx)
31332 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31333 delta = dis_PEXTRD( vbi, pfx, delta, True/*isAvx*/ );
31334 goto decode_success;
31336 /* VPEXTRQ = VEX.128.66.0F3A.W1 16 /r ib */
31337 if (have66noF2noF3(pfx)
31338 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
31339 delta = dis_PEXTRQ( vbi, pfx, delta, True/*isAvx*/ );
31340 goto decode_success;
31342 break;
31344 case 0x17:
31345 /* VEXTRACTPS imm8, xmm1, r32/m32 = VEX.128.66.0F3A.WIG 17 /r ib */
31346 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31347 delta = dis_EXTRACTPS( vbi, pfx, delta, True/*isAvx*/ );
31348 goto decode_success;
31350 break;
31352 case 0x18:
31353 /* VINSERTF128 r/m, rV, rD
31354 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
31355 /* VINSERTF128 = VEX.NDS.256.66.0F3A.W0 18 /r ib */
31356 if (have66noF2noF3(pfx)
31357 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31358 UChar modrm = getUChar(delta);
31359 UInt ib = 0;
31360 UInt rG = gregOfRexRM(pfx, modrm);
31361 UInt rV = getVexNvvvv(pfx);
31362 IRTemp t128 = newTemp(Ity_V128);
31363 if (epartIsReg(modrm)) {
31364 UInt rE = eregOfRexRM(pfx, modrm);
31365 delta += 1;
31366 assign(t128, getXMMReg(rE));
31367 ib = getUChar(delta);
31368 DIP("vinsertf128 $%u,%s,%s,%s\n",
31369 ib, nameXMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31370 } else {
31371 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31372 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
31373 delta += alen;
31374 ib = getUChar(delta);
31375 DIP("vinsertf128 $%u,%s,%s,%s\n",
31376 ib, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31378 delta++;
31379 putYMMRegLane128(rG, 0, getYMMRegLane128(rV, 0));
31380 putYMMRegLane128(rG, 1, getYMMRegLane128(rV, 1));
31381 putYMMRegLane128(rG, ib & 1, mkexpr(t128));
31382 *uses_vvvv = True;
31383 goto decode_success;
31385 break;
31387 case 0x19:
31388 /* VEXTRACTF128 $lane_no, rS, r/m
31389 ::: r/m:V128 = a lane of rS:V256 (RM format) */
31390 /* VEXTRACTF128 = VEX.256.66.0F3A.W0 19 /r ib */
31391 if (have66noF2noF3(pfx)
31392 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31393 UChar modrm = getUChar(delta);
31394 UInt ib = 0;
31395 UInt rS = gregOfRexRM(pfx, modrm);
31396 IRTemp t128 = newTemp(Ity_V128);
31397 if (epartIsReg(modrm)) {
31398 UInt rD = eregOfRexRM(pfx, modrm);
31399 delta += 1;
31400 ib = getUChar(delta);
31401 assign(t128, getYMMRegLane128(rS, ib & 1));
31402 putYMMRegLoAndZU(rD, mkexpr(t128));
31403 DIP("vextractf128 $%u,%s,%s\n",
31404 ib, nameXMMReg(rS), nameYMMReg(rD));
31405 } else {
31406 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31407 delta += alen;
31408 ib = getUChar(delta);
31409 assign(t128, getYMMRegLane128(rS, ib & 1));
31410 storeLE(mkexpr(addr), mkexpr(t128));
31411 DIP("vextractf128 $%u,%s,%s\n",
31412 ib, nameYMMReg(rS), dis_buf);
31414 delta++;
31415 /* doesn't use vvvv */
31416 goto decode_success;
31418 break;
31420 case 0x1D:
31421 /* VCVTPS2PH imm8, xmm2, xmm1/m64 = VEX.128.66.0F3A.W0 1D /r ib */
31422 if (have66noF2noF3(pfx)
31423 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/
31424 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C)) {
31425 delta = dis_VCVTPS2PH( vbi, pfx, delta, /*is256bit=*/False );
31426 goto decode_success;
31428 /* VCVTPS2PH imm8, ymm2, ymm1/m128 = VEX.256.66.0F3A.W0 1D /r ib */
31429 if (have66noF2noF3(pfx)
31430 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/
31431 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C)) {
31432 delta = dis_VCVTPS2PH( vbi, pfx, delta, /*is256bit=*/True );
31433 goto decode_success;
31435 break;
31437 case 0x20:
31438 /* VPINSRB r32/m8, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 20 /r ib */
31439 if (have66noF2noF3(pfx)
31440 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31441 UChar modrm = getUChar(delta);
31442 UInt rG = gregOfRexRM(pfx, modrm);
31443 UInt rV = getVexNvvvv(pfx);
31444 Int imm8;
31445 IRTemp src_u8 = newTemp(Ity_I8);
31447 if ( epartIsReg( modrm ) ) {
31448 UInt rE = eregOfRexRM(pfx,modrm);
31449 imm8 = (Int)(getUChar(delta+1) & 15);
31450 assign( src_u8, unop(Iop_32to8, getIReg32( rE )) );
31451 delta += 1+1;
31452 DIP( "vpinsrb $%d,%s,%s,%s\n",
31453 imm8, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) );
31454 } else {
31455 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31456 imm8 = (Int)(getUChar(delta+alen) & 15);
31457 assign( src_u8, loadLE( Ity_I8, mkexpr(addr) ) );
31458 delta += alen+1;
31459 DIP( "vpinsrb $%d,%s,%s,%s\n",
31460 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31463 IRTemp src_vec = newTemp(Ity_V128);
31464 assign(src_vec, getXMMReg( rV ));
31465 IRTemp res_vec = math_PINSRB_128( src_vec, src_u8, imm8 );
31466 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31467 *uses_vvvv = True;
31468 goto decode_success;
31470 break;
31472 case 0x21:
31473 /* VINSERTPS imm8, xmm3/m32, xmm2, xmm1
31474 = VEX.NDS.128.66.0F3A.WIG 21 /r ib */
31475 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31476 UChar modrm = getUChar(delta);
31477 UInt rG = gregOfRexRM(pfx, modrm);
31478 UInt rV = getVexNvvvv(pfx);
31479 UInt imm8;
31480 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */
31481 const IRTemp inval = IRTemp_INVALID;
31483 if ( epartIsReg( modrm ) ) {
31484 UInt rE = eregOfRexRM(pfx, modrm);
31485 IRTemp vE = newTemp(Ity_V128);
31486 assign( vE, getXMMReg(rE) );
31487 IRTemp dsE[4] = { inval, inval, inval, inval };
31488 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] );
31489 imm8 = getUChar(delta+1);
31490 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */
31491 delta += 1+1;
31492 DIP( "insertps $%u, %s,%s\n",
31493 imm8, nameXMMReg(rE), nameXMMReg(rG) );
31494 } else {
31495 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31496 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) );
31497 imm8 = getUChar(delta+alen);
31498 delta += alen+1;
31499 DIP( "insertps $%u, %s,%s\n",
31500 imm8, dis_buf, nameXMMReg(rG) );
31503 IRTemp vV = newTemp(Ity_V128);
31504 assign( vV, getXMMReg(rV) );
31506 putYMMRegLoAndZU( rG, mkexpr(math_INSERTPS( vV, d2ins, imm8 )) );
31507 *uses_vvvv = True;
31508 goto decode_success;
31510 break;
31512 case 0x22:
31513 /* VPINSRD r32/m32, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 22 /r ib */
31514 if (have66noF2noF3(pfx)
31515 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31516 UChar modrm = getUChar(delta);
31517 UInt rG = gregOfRexRM(pfx, modrm);
31518 UInt rV = getVexNvvvv(pfx);
31519 Int imm8_10;
31520 IRTemp src_u32 = newTemp(Ity_I32);
31522 if ( epartIsReg( modrm ) ) {
31523 UInt rE = eregOfRexRM(pfx,modrm);
31524 imm8_10 = (Int)(getUChar(delta+1) & 3);
31525 assign( src_u32, getIReg32( rE ) );
31526 delta += 1+1;
31527 DIP( "vpinsrd $%d,%s,%s,%s\n",
31528 imm8_10, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) );
31529 } else {
31530 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31531 imm8_10 = (Int)(getUChar(delta+alen) & 3);
31532 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) );
31533 delta += alen+1;
31534 DIP( "vpinsrd $%d,%s,%s,%s\n",
31535 imm8_10, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31538 IRTemp src_vec = newTemp(Ity_V128);
31539 assign(src_vec, getXMMReg( rV ));
31540 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 );
31541 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31542 *uses_vvvv = True;
31543 goto decode_success;
31545 /* VPINSRQ r64/m64, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W1 22 /r ib */
31546 if (have66noF2noF3(pfx)
31547 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
31548 UChar modrm = getUChar(delta);
31549 UInt rG = gregOfRexRM(pfx, modrm);
31550 UInt rV = getVexNvvvv(pfx);
31551 Int imm8_0;
31552 IRTemp src_u64 = newTemp(Ity_I64);
31554 if ( epartIsReg( modrm ) ) {
31555 UInt rE = eregOfRexRM(pfx,modrm);
31556 imm8_0 = (Int)(getUChar(delta+1) & 1);
31557 assign( src_u64, getIReg64( rE ) );
31558 delta += 1+1;
31559 DIP( "vpinsrq $%d,%s,%s,%s\n",
31560 imm8_0, nameIReg64(rE), nameXMMReg(rV), nameXMMReg(rG) );
31561 } else {
31562 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31563 imm8_0 = (Int)(getUChar(delta+alen) & 1);
31564 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) );
31565 delta += alen+1;
31566 DIP( "vpinsrq $%d,%s,%s,%s\n",
31567 imm8_0, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31570 IRTemp src_vec = newTemp(Ity_V128);
31571 assign(src_vec, getXMMReg( rV ));
31572 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 );
31573 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31574 *uses_vvvv = True;
31575 goto decode_success;
31577 break;
31579 case 0x38:
31580 /* VINSERTI128 r/m, rV, rD
31581 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
31582 /* VINSERTI128 = VEX.NDS.256.66.0F3A.W0 38 /r ib */
31583 if (have66noF2noF3(pfx)
31584 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31585 UChar modrm = getUChar(delta);
31586 UInt ib = 0;
31587 UInt rG = gregOfRexRM(pfx, modrm);
31588 UInt rV = getVexNvvvv(pfx);
31589 IRTemp t128 = newTemp(Ity_V128);
31590 if (epartIsReg(modrm)) {
31591 UInt rE = eregOfRexRM(pfx, modrm);
31592 delta += 1;
31593 assign(t128, getXMMReg(rE));
31594 ib = getUChar(delta);
31595 DIP("vinserti128 $%u,%s,%s,%s\n",
31596 ib, nameXMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31597 } else {
31598 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31599 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
31600 delta += alen;
31601 ib = getUChar(delta);
31602 DIP("vinserti128 $%u,%s,%s,%s\n",
31603 ib, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31605 delta++;
31606 putYMMRegLane128(rG, 0, getYMMRegLane128(rV, 0));
31607 putYMMRegLane128(rG, 1, getYMMRegLane128(rV, 1));
31608 putYMMRegLane128(rG, ib & 1, mkexpr(t128));
31609 *uses_vvvv = True;
31610 goto decode_success;
31612 break;
31614 case 0x39:
31615 /* VEXTRACTI128 $lane_no, rS, r/m
31616 ::: r/m:V128 = a lane of rS:V256 (RM format) */
31617 /* VEXTRACTI128 = VEX.256.66.0F3A.W0 39 /r ib */
31618 if (have66noF2noF3(pfx)
31619 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31620 UChar modrm = getUChar(delta);
31621 UInt ib = 0;
31622 UInt rS = gregOfRexRM(pfx, modrm);
31623 IRTemp t128 = newTemp(Ity_V128);
31624 if (epartIsReg(modrm)) {
31625 UInt rD = eregOfRexRM(pfx, modrm);
31626 delta += 1;
31627 ib = getUChar(delta);
31628 assign(t128, getYMMRegLane128(rS, ib & 1));
31629 putYMMRegLoAndZU(rD, mkexpr(t128));
31630 DIP("vextracti128 $%u,%s,%s\n",
31631 ib, nameXMMReg(rS), nameYMMReg(rD));
31632 } else {
31633 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31634 delta += alen;
31635 ib = getUChar(delta);
31636 assign(t128, getYMMRegLane128(rS, ib & 1));
31637 storeLE(mkexpr(addr), mkexpr(t128));
31638 DIP("vextracti128 $%u,%s,%s\n",
31639 ib, nameYMMReg(rS), dis_buf);
31641 delta++;
31642 /* doesn't use vvvv */
31643 goto decode_success;
31645 break;
31647 case 0x40:
31648 /* VDPPS imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 40 /r ib */
31649 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31650 UChar modrm = getUChar(delta);
31651 UInt rG = gregOfRexRM(pfx, modrm);
31652 UInt rV = getVexNvvvv(pfx);
31653 IRTemp dst_vec = newTemp(Ity_V128);
31654 Int imm8;
31655 if (epartIsReg( modrm )) {
31656 UInt rE = eregOfRexRM(pfx,modrm);
31657 imm8 = (Int)getUChar(delta+1);
31658 assign( dst_vec, getXMMReg( rE ) );
31659 delta += 1+1;
31660 DIP( "vdpps $%d,%s,%s,%s\n",
31661 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
31662 } else {
31663 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31664 imm8 = (Int)getUChar(delta+alen);
31665 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) );
31666 delta += alen+1;
31667 DIP( "vdpps $%d,%s,%s,%s\n",
31668 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31671 IRTemp src_vec = newTemp(Ity_V128);
31672 assign(src_vec, getXMMReg( rV ));
31673 IRTemp res_vec = math_DPPS_128( src_vec, dst_vec, imm8 );
31674 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31675 *uses_vvvv = True;
31676 goto decode_success;
31678 /* VDPPS imm8, ymm3/m128,ymm2,ymm1 = VEX.NDS.256.66.0F3A.WIG 40 /r ib */
31679 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31680 UChar modrm = getUChar(delta);
31681 UInt rG = gregOfRexRM(pfx, modrm);
31682 UInt rV = getVexNvvvv(pfx);
31683 IRTemp dst_vec = newTemp(Ity_V256);
31684 Int imm8;
31685 if (epartIsReg( modrm )) {
31686 UInt rE = eregOfRexRM(pfx,modrm);
31687 imm8 = (Int)getUChar(delta+1);
31688 assign( dst_vec, getYMMReg( rE ) );
31689 delta += 1+1;
31690 DIP( "vdpps $%d,%s,%s,%s\n",
31691 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG) );
31692 } else {
31693 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31694 imm8 = (Int)getUChar(delta+alen);
31695 assign( dst_vec, loadLE( Ity_V256, mkexpr(addr) ) );
31696 delta += alen+1;
31697 DIP( "vdpps $%d,%s,%s,%s\n",
31698 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
31701 IRTemp src_vec = newTemp(Ity_V256);
31702 assign(src_vec, getYMMReg( rV ));
31703 IRTemp s0, s1, d0, d1;
31704 s0 = s1 = d0 = d1 = IRTemp_INVALID;
31705 breakupV256toV128s( dst_vec, &d1, &d0 );
31706 breakupV256toV128s( src_vec, &s1, &s0 );
31707 putYMMReg( rG, binop( Iop_V128HLtoV256,
31708 mkexpr( math_DPPS_128(s1, d1, imm8) ),
31709 mkexpr( math_DPPS_128(s0, d0, imm8) ) ) );
31710 *uses_vvvv = True;
31711 goto decode_success;
31713 break;
31715 case 0x41:
31716 /* VDPPD imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 41 /r ib */
31717 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31718 UChar modrm = getUChar(delta);
31719 UInt rG = gregOfRexRM(pfx, modrm);
31720 UInt rV = getVexNvvvv(pfx);
31721 IRTemp dst_vec = newTemp(Ity_V128);
31722 Int imm8;
31723 if (epartIsReg( modrm )) {
31724 UInt rE = eregOfRexRM(pfx,modrm);
31725 imm8 = (Int)getUChar(delta+1);
31726 assign( dst_vec, getXMMReg( rE ) );
31727 delta += 1+1;
31728 DIP( "vdppd $%d,%s,%s,%s\n",
31729 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
31730 } else {
31731 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31732 imm8 = (Int)getUChar(delta+alen);
31733 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) );
31734 delta += alen+1;
31735 DIP( "vdppd $%d,%s,%s,%s\n",
31736 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31739 IRTemp src_vec = newTemp(Ity_V128);
31740 assign(src_vec, getXMMReg( rV ));
31741 IRTemp res_vec = math_DPPD_128( src_vec, dst_vec, imm8 );
31742 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31743 *uses_vvvv = True;
31744 goto decode_success;
31746 break;
31748 case 0x42:
31749 /* VMPSADBW imm8, xmm3/m128,xmm2,xmm1 */
31750 /* VMPSADBW = VEX.NDS.128.66.0F3A.WIG 42 /r ib */
31751 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31752 UChar modrm = getUChar(delta);
31753 Int imm8;
31754 IRTemp src_vec = newTemp(Ity_V128);
31755 IRTemp dst_vec = newTemp(Ity_V128);
31756 UInt rG = gregOfRexRM(pfx, modrm);
31757 UInt rV = getVexNvvvv(pfx);
31759 assign( dst_vec, getXMMReg(rV) );
31761 if ( epartIsReg( modrm ) ) {
31762 UInt rE = eregOfRexRM(pfx, modrm);
31764 imm8 = (Int)getUChar(delta+1);
31765 assign( src_vec, getXMMReg(rE) );
31766 delta += 1+1;
31767 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
31768 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
31769 } else {
31770 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
31771 1/* imm8 is 1 byte after the amode */ );
31772 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
31773 imm8 = (Int)getUChar(delta+alen);
31774 delta += alen+1;
31775 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
31776 dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31779 putYMMRegLoAndZU( rG, mkexpr( math_MPSADBW_128(dst_vec,
31780 src_vec, imm8) ) );
31781 *uses_vvvv = True;
31782 goto decode_success;
31784 /* VMPSADBW imm8, ymm3/m256,ymm2,ymm1 */
31785 /* VMPSADBW = VEX.NDS.256.66.0F3A.WIG 42 /r ib */
31786 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31787 UChar modrm = getUChar(delta);
31788 Int imm8;
31789 IRTemp src_vec = newTemp(Ity_V256);
31790 IRTemp dst_vec = newTemp(Ity_V256);
31791 UInt rG = gregOfRexRM(pfx, modrm);
31792 UInt rV = getVexNvvvv(pfx);
31793 IRTemp sHi, sLo, dHi, dLo;
31794 sHi = sLo = dHi = dLo = IRTemp_INVALID;
31796 assign( dst_vec, getYMMReg(rV) );
31798 if ( epartIsReg( modrm ) ) {
31799 UInt rE = eregOfRexRM(pfx, modrm);
31801 imm8 = (Int)getUChar(delta+1);
31802 assign( src_vec, getYMMReg(rE) );
31803 delta += 1+1;
31804 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
31805 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG) );
31806 } else {
31807 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
31808 1/* imm8 is 1 byte after the amode */ );
31809 assign( src_vec, loadLE( Ity_V256, mkexpr(addr) ) );
31810 imm8 = (Int)getUChar(delta+alen);
31811 delta += alen+1;
31812 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
31813 dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
31816 breakupV256toV128s( dst_vec, &dHi, &dLo );
31817 breakupV256toV128s( src_vec, &sHi, &sLo );
31818 putYMMReg( rG, binop( Iop_V128HLtoV256,
31819 mkexpr( math_MPSADBW_128(dHi, sHi, imm8 >> 3) ),
31820 mkexpr( math_MPSADBW_128(dLo, sLo, imm8) ) ) );
31821 *uses_vvvv = True;
31822 goto decode_success;
31824 break;
31826 case 0x44:
31827 /* VPCLMULQDQ imm8, xmm3/m128,xmm2,xmm1 */
31828 /* VPCLMULQDQ = VEX.NDS.128.66.0F3A.WIG 44 /r ib */
31829 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
31830 * Carry-less multiplication of selected XMM quadwords into XMM
31831 * registers (a.k.a multiplication of polynomials over GF(2))
31833 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31834 UChar modrm = getUChar(delta);
31835 Int imm8;
31836 IRTemp sV = newTemp(Ity_V128);
31837 IRTemp dV = newTemp(Ity_V128);
31838 UInt rG = gregOfRexRM(pfx, modrm);
31839 UInt rV = getVexNvvvv(pfx);
31841 assign( dV, getXMMReg(rV) );
31843 if ( epartIsReg( modrm ) ) {
31844 UInt rE = eregOfRexRM(pfx, modrm);
31845 imm8 = (Int)getUChar(delta+1);
31846 assign( sV, getXMMReg(rE) );
31847 delta += 1+1;
31848 DIP( "vpclmulqdq $%d, %s,%s,%s\n", imm8,
31849 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
31850 } else {
31851 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
31852 1/* imm8 is 1 byte after the amode */ );
31853 assign( sV, loadLE( Ity_V128, mkexpr(addr) ) );
31854 imm8 = (Int)getUChar(delta+alen);
31855 delta += alen+1;
31856 DIP( "vpclmulqdq $%d, %s,%s,%s\n",
31857 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31860 putYMMRegLoAndZU( rG, mkexpr( math_PCLMULQDQ(dV, sV, imm8) ) );
31861 *uses_vvvv = True;
31862 goto decode_success;
31864 break;
31866 case 0x46:
31867 /* VPERM2I128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 46 /r ib */
31868 if (have66noF2noF3(pfx)
31869 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31870 UChar modrm = getUChar(delta);
31871 UInt imm8 = 0;
31872 UInt rG = gregOfRexRM(pfx, modrm);
31873 UInt rV = getVexNvvvv(pfx);
31874 IRTemp s00 = newTemp(Ity_V128);
31875 IRTemp s01 = newTemp(Ity_V128);
31876 IRTemp s10 = newTemp(Ity_V128);
31877 IRTemp s11 = newTemp(Ity_V128);
31878 assign(s00, getYMMRegLane128(rV, 0));
31879 assign(s01, getYMMRegLane128(rV, 1));
31880 if (epartIsReg(modrm)) {
31881 UInt rE = eregOfRexRM(pfx, modrm);
31882 delta += 1;
31883 imm8 = getUChar(delta);
31884 DIP("vperm2i128 $%u,%s,%s,%s\n",
31885 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31886 assign(s10, getYMMRegLane128(rE, 0));
31887 assign(s11, getYMMRegLane128(rE, 1));
31888 } else {
31889 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31890 delta += alen;
31891 imm8 = getUChar(delta);
31892 DIP("vperm2i128 $%u,%s,%s,%s\n",
31893 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31894 assign(s10, loadLE(Ity_V128, binop(Iop_Add64,
31895 mkexpr(addr), mkU64(0))));
31896 assign(s11, loadLE(Ity_V128, binop(Iop_Add64,
31897 mkexpr(addr), mkU64(16))));
31899 delta++;
31900 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
31901 : ((_nn)==2) ? s10 : s11)
31902 putYMMRegLane128(rG, 0, mkexpr(SEL((imm8 >> 0) & 3)));
31903 putYMMRegLane128(rG, 1, mkexpr(SEL((imm8 >> 4) & 3)));
31904 # undef SEL
31905 if (imm8 & (1<<3)) putYMMRegLane128(rG, 0, mkV128(0));
31906 if (imm8 & (1<<7)) putYMMRegLane128(rG, 1, mkV128(0));
31907 *uses_vvvv = True;
31908 goto decode_success;
31910 break;
31912 case 0x4A:
31913 /* VBLENDVPS xmmG, xmmE/memE, xmmV, xmmIS4
31914 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
31915 /* VBLENDVPS = VEX.NDS.128.66.0F3A.WIG 4A /r /is4 */
31916 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31917 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
31918 "vblendvps", 4, Iop_SarN32x4 );
31919 *uses_vvvv = True;
31920 goto decode_success;
31922 /* VBLENDVPS ymmG, ymmE/memE, ymmV, ymmIS4
31923 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
31924 /* VBLENDVPS = VEX.NDS.256.66.0F3A.WIG 4A /r /is4 */
31925 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31926 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
31927 "vblendvps", 4, Iop_SarN32x4 );
31928 *uses_vvvv = True;
31929 goto decode_success;
31931 break;
31933 case 0x4B:
31934 /* VBLENDVPD xmmG, xmmE/memE, xmmV, xmmIS4
31935 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
31936 /* VBLENDVPD = VEX.NDS.128.66.0F3A.WIG 4B /r /is4 */
31937 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31938 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
31939 "vblendvpd", 8, Iop_SarN64x2 );
31940 *uses_vvvv = True;
31941 goto decode_success;
31943 /* VBLENDVPD ymmG, ymmE/memE, ymmV, ymmIS4
31944 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
31945 /* VBLENDVPD = VEX.NDS.256.66.0F3A.WIG 4B /r /is4 */
31946 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31947 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
31948 "vblendvpd", 8, Iop_SarN64x2 );
31949 *uses_vvvv = True;
31950 goto decode_success;
31952 break;
31954 case 0x4C:
31955 /* VPBLENDVB xmmG, xmmE/memE, xmmV, xmmIS4
31956 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
31957 /* VPBLENDVB = VEX.NDS.128.66.0F3A.WIG 4C /r /is4 */
31958 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31959 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
31960 "vpblendvb", 1, Iop_SarN8x16 );
31961 *uses_vvvv = True;
31962 goto decode_success;
31964 /* VPBLENDVB ymmG, ymmE/memE, ymmV, ymmIS4
31965 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
31966 /* VPBLENDVB = VEX.NDS.256.66.0F3A.WIG 4C /r /is4 */
31967 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31968 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
31969 "vpblendvb", 1, Iop_SarN8x16 );
31970 *uses_vvvv = True;
31971 goto decode_success;
31973 break;
31975 case 0x60:
31976 case 0x61:
31977 case 0x62:
31978 case 0x63:
31979 /* VEX.128.66.0F3A.WIG 63 /r ib = VPCMPISTRI imm8, xmm2/m128, xmm1
31980 VEX.128.66.0F3A.WIG 62 /r ib = VPCMPISTRM imm8, xmm2/m128, xmm1
31981 VEX.128.66.0F3A.WIG 61 /r ib = VPCMPESTRI imm8, xmm2/m128, xmm1
31982 VEX.128.66.0F3A.WIG 60 /r ib = VPCMPESTRM imm8, xmm2/m128, xmm1
31983 (selected special cases that actually occur in glibc,
31984 not by any means a complete implementation.)
31986 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31987 Long delta0 = delta;
31988 delta = dis_PCMPxSTRx( vbi, pfx, delta, True/*isAvx*/, opc );
31989 if (delta > delta0) goto decode_success;
31990 /* else fall though; dis_PCMPxSTRx failed to decode it */
31992 break;
31994 case 0x5C ... 0x5F:
31995 case 0x68 ... 0x6F:
31996 case 0x78 ... 0x7F:
31997 /* FIXME: list the instructions decoded here */
31998 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31999 Long delta0 = delta;
32000 delta = dis_FMA4( pfx, delta, opc, uses_vvvv, vbi );
32001 if (delta > delta0) {
32002 dres->hint = Dis_HintVerbose;
32003 goto decode_success;
32005 /* else fall though; dis_FMA4 failed to decode it */
32007 break;
32009 case 0xDF:
32010 /* VAESKEYGENASSIST imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG DF /r */
32011 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
32012 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, True/*!isAvx*/ );
32013 goto decode_success;
32015 break;
32017 case 0xF0:
32018 /* RORX imm8, r/m32, r32a = VEX.LZ.F2.0F3A.W0 F0 /r /i */
32019 /* RORX imm8, r/m64, r64a = VEX.LZ.F2.0F3A.W1 F0 /r /i */
32020 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
32021 Int size = getRexW(pfx) ? 8 : 4;
32022 IRType ty = szToITy(size);
32023 IRTemp src = newTemp(ty);
32024 UChar rm = getUChar(delta);
32025 UChar imm8;
32027 if (epartIsReg(rm)) {
32028 imm8 = getUChar(delta+1);
32029 assign( src, getIRegE(size,pfx,rm) );
32030 DIP("rorx %d,%s,%s\n", imm8, nameIRegE(size,pfx,rm),
32031 nameIRegG(size,pfx,rm));
32032 delta += 2;
32033 } else {
32034 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
32035 imm8 = getUChar(delta+alen);
32036 assign( src, loadLE(ty, mkexpr(addr)) );
32037 DIP("rorx %d,%s,%s\n", imm8, dis_buf, nameIRegG(size,pfx,rm));
32038 delta += alen + 1;
32040 imm8 &= 8*size-1;
32042 /* dst = (src >>u imm8) | (src << (size-imm8)) */
32043 putIRegG( size, pfx, rm,
32044 imm8 == 0 ? mkexpr(src)
32045 : binop( mkSizedOp(ty,Iop_Or8),
32046 binop( mkSizedOp(ty,Iop_Shr8), mkexpr(src),
32047 mkU8(imm8) ),
32048 binop( mkSizedOp(ty,Iop_Shl8), mkexpr(src),
32049 mkU8(8*size-imm8) ) ) );
32050 /* Flags aren't modified. */
32051 goto decode_success;
32053 break;
32055 default:
32056 break;
32060 //decode_failure:
32061 return deltaIN;
32063 decode_success:
32064 return delta;
32068 /*------------------------------------------------------------*/
32069 /*--- ---*/
32070 /*--- Disassemble a single instruction ---*/
32071 /*--- ---*/
32072 /*------------------------------------------------------------*/
32074 /* Disassemble a single instruction into IR. The instruction is
32075 located in host memory at &guest_code[delta]. */
32077 static
32078 DisResult disInstr_AMD64_WRK (
32079 /*OUT*/Bool* expect_CAS,
32080 Long delta64,
32081 const VexArchInfo* archinfo,
32082 const VexAbiInfo* vbi,
32083 Bool sigill_diag
32086 IRTemp t1, t2;
32087 UChar pre;
32088 Int n, n_prefixes;
32089 DisResult dres;
32091 /* The running delta */
32092 Long delta = delta64;
32094 /* Holds eip at the start of the insn, so that we can print
32095 consistent error messages for unimplemented insns. */
32096 Long delta_start = delta;
32098 /* sz denotes the nominal data-op size of the insn; we change it to
32099 2 if an 0x66 prefix is seen and 8 if REX.W is 1. In case of
32100 conflict REX.W takes precedence. */
32101 Int sz = 4;
32103 /* pfx holds the summary of prefixes. */
32104 Prefix pfx = PFX_EMPTY;
32106 /* Holds the computed opcode-escape indication. */
32107 Escape esc = ESC_NONE;
32109 /* Set result defaults. */
32110 dres.whatNext = Dis_Continue;
32111 dres.len = 0;
32112 dres.jk_StopHere = Ijk_INVALID;
32113 dres.hint = Dis_HintNone;
32114 *expect_CAS = False;
32116 vassert(guest_RIP_next_assumed == 0);
32117 vassert(guest_RIP_next_mustcheck == False);
32119 t1 = t2 = IRTemp_INVALID;
32121 DIP("\t0x%llx: ", guest_RIP_bbstart+delta);
32123 /* Spot "Special" instructions (see comment at top of file). */
32125 const UChar* code = guest_code + delta;
32126 /* Spot the 16-byte preamble:
32127 48C1C703 rolq $3, %rdi
32128 48C1C70D rolq $13, %rdi
32129 48C1C73D rolq $61, %rdi
32130 48C1C733 rolq $51, %rdi
32132 if (code[ 0] == 0x48 && code[ 1] == 0xC1 && code[ 2] == 0xC7
32133 && code[ 3] == 0x03 &&
32134 code[ 4] == 0x48 && code[ 5] == 0xC1 && code[ 6] == 0xC7
32135 && code[ 7] == 0x0D &&
32136 code[ 8] == 0x48 && code[ 9] == 0xC1 && code[10] == 0xC7
32137 && code[11] == 0x3D &&
32138 code[12] == 0x48 && code[13] == 0xC1 && code[14] == 0xC7
32139 && code[15] == 0x33) {
32140 /* Got a "Special" instruction preamble. Which one is it? */
32141 if (code[16] == 0x48 && code[17] == 0x87
32142 && code[18] == 0xDB /* xchgq %rbx,%rbx */) {
32143 /* %RDX = client_request ( %RAX ) */
32144 DIP("%%rdx = client_request ( %%rax )\n");
32145 delta += 19;
32146 jmp_lit(&dres, Ijk_ClientReq, guest_RIP_bbstart+delta);
32147 vassert(dres.whatNext == Dis_StopHere);
32148 goto decode_success;
32150 else
32151 if (code[16] == 0x48 && code[17] == 0x87
32152 && code[18] == 0xC9 /* xchgq %rcx,%rcx */) {
32153 /* %RAX = guest_NRADDR */
32154 DIP("%%rax = guest_NRADDR\n");
32155 delta += 19;
32156 putIRegRAX(8, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
32157 goto decode_success;
32159 else
32160 if (code[16] == 0x48 && code[17] == 0x87
32161 && code[18] == 0xD2 /* xchgq %rdx,%rdx */) {
32162 /* call-noredir *%RAX */
32163 DIP("call-noredir *%%rax\n");
32164 delta += 19;
32165 t1 = newTemp(Ity_I64);
32166 assign(t1, getIRegRAX(8));
32167 t2 = newTemp(Ity_I64);
32168 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
32169 putIReg64(R_RSP, mkexpr(t2));
32170 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta));
32171 jmp_treg(&dres, Ijk_NoRedir, t1);
32172 vassert(dres.whatNext == Dis_StopHere);
32173 goto decode_success;
32175 else
32176 if (code[16] == 0x48 && code[17] == 0x87
32177 && code[18] == 0xff /* xchgq %rdi,%rdi */) {
32178 /* IR injection */
32179 DIP("IR injection\n");
32180 vex_inject_ir(irsb, Iend_LE);
32182 // Invalidate the current insn. The reason is that the IRop we're
32183 // injecting here can change. In which case the translation has to
32184 // be redone. For ease of handling, we simply invalidate all the
32185 // time.
32186 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_RIP_curr_instr)));
32187 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(19)));
32189 delta += 19;
32191 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) );
32192 dres.whatNext = Dis_StopHere;
32193 dres.jk_StopHere = Ijk_InvalICache;
32194 goto decode_success;
32196 /* We don't know what it is. */
32197 goto decode_failure;
32198 /*NOTREACHED*/
32202 /* Eat prefixes, summarising the result in pfx and sz, and rejecting
32203 as many invalid combinations as possible. */
32204 n_prefixes = 0;
32205 while (True) {
32206 if (n_prefixes > 7) goto decode_failure;
32207 pre = getUChar(delta);
32208 switch (pre) {
32209 case 0x66: pfx |= PFX_66; break;
32210 case 0x67: pfx |= PFX_ASO; break;
32211 case 0xF2: pfx |= PFX_F2; break;
32212 case 0xF3: pfx |= PFX_F3; break;
32213 case 0xF0: pfx |= PFX_LOCK; *expect_CAS = True; break;
32214 case 0x2E: pfx |= PFX_CS; break;
32215 case 0x3E: pfx |= PFX_DS; break;
32216 case 0x26: pfx |= PFX_ES; break;
32217 case 0x64: pfx |= PFX_FS; break;
32218 case 0x65: pfx |= PFX_GS; break;
32219 case 0x36: pfx |= PFX_SS; break;
32220 case 0x40 ... 0x4F:
32221 pfx |= PFX_REX;
32222 if (pre & (1<<3)) pfx |= PFX_REXW;
32223 if (pre & (1<<2)) pfx |= PFX_REXR;
32224 if (pre & (1<<1)) pfx |= PFX_REXX;
32225 if (pre & (1<<0)) pfx |= PFX_REXB;
32226 break;
32227 default:
32228 goto not_a_legacy_prefix;
32230 n_prefixes++;
32231 delta++;
32234 not_a_legacy_prefix:
32235 /* We've used up all the non-VEX prefixes. Parse and validate a
32236 VEX prefix if that's appropriate. */
32237 if (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX) {
32238 /* Used temporarily for holding VEX prefixes. */
32239 UChar vex0 = getUChar(delta);
32240 if (vex0 == 0xC4) {
32241 /* 3-byte VEX */
32242 UChar vex1 = getUChar(delta+1);
32243 UChar vex2 = getUChar(delta+2);
32244 delta += 3;
32245 pfx |= PFX_VEX;
32246 /* Snarf contents of byte 1 */
32247 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR;
32248 /* X */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_REXX;
32249 /* B */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_REXB;
32250 /* m-mmmm */
32251 switch (vex1 & 0x1F) {
32252 case 1: esc = ESC_0F; break;
32253 case 2: esc = ESC_0F38; break;
32254 case 3: esc = ESC_0F3A; break;
32255 /* Any other m-mmmm field will #UD */
32256 default: goto decode_failure;
32258 /* Snarf contents of byte 2 */
32259 /* W */ pfx |= (vex2 & (1<<7)) ? PFX_REXW : 0;
32260 /* ~v3 */ pfx |= (vex2 & (1<<6)) ? 0 : PFX_VEXnV3;
32261 /* ~v2 */ pfx |= (vex2 & (1<<5)) ? 0 : PFX_VEXnV2;
32262 /* ~v1 */ pfx |= (vex2 & (1<<4)) ? 0 : PFX_VEXnV1;
32263 /* ~v0 */ pfx |= (vex2 & (1<<3)) ? 0 : PFX_VEXnV0;
32264 /* L */ pfx |= (vex2 & (1<<2)) ? PFX_VEXL : 0;
32265 /* pp */
32266 switch (vex2 & 3) {
32267 case 0: break;
32268 case 1: pfx |= PFX_66; break;
32269 case 2: pfx |= PFX_F3; break;
32270 case 3: pfx |= PFX_F2; break;
32271 default: vassert(0);
32274 else if (vex0 == 0xC5) {
32275 /* 2-byte VEX */
32276 UChar vex1 = getUChar(delta+1);
32277 delta += 2;
32278 pfx |= PFX_VEX;
32279 /* Snarf contents of byte 1 */
32280 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR;
32281 /* ~v3 */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_VEXnV3;
32282 /* ~v2 */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_VEXnV2;
32283 /* ~v1 */ pfx |= (vex1 & (1<<4)) ? 0 : PFX_VEXnV1;
32284 /* ~v0 */ pfx |= (vex1 & (1<<3)) ? 0 : PFX_VEXnV0;
32285 /* L */ pfx |= (vex1 & (1<<2)) ? PFX_VEXL : 0;
32286 /* pp */
32287 switch (vex1 & 3) {
32288 case 0: break;
32289 case 1: pfx |= PFX_66; break;
32290 case 2: pfx |= PFX_F3; break;
32291 case 3: pfx |= PFX_F2; break;
32292 default: vassert(0);
32294 /* implied: */
32295 esc = ESC_0F;
32297 /* Can't have both VEX and REX */
32298 if ((pfx & PFX_VEX) && (pfx & PFX_REX))
32299 goto decode_failure; /* can't have both */
32302 /* Dump invalid combinations */
32303 n = 0;
32304 if (pfx & PFX_F2) n++;
32305 if (pfx & PFX_F3) n++;
32306 if (n > 1)
32307 goto decode_failure; /* can't have both */
32309 n = 0;
32310 if (pfx & PFX_CS) n++;
32311 if (pfx & PFX_DS) n++;
32312 if (pfx & PFX_ES) n++;
32313 if (pfx & PFX_FS) n++;
32314 if (pfx & PFX_GS) n++;
32315 if (pfx & PFX_SS) n++;
32316 if (n > 1)
32317 goto decode_failure; /* multiple seg overrides == illegal */
32319 /* We have a %fs prefix. Reject it if there's no evidence in 'vbi'
32320 that we should accept it. */
32321 if ((pfx & PFX_FS) && !vbi->guest_amd64_assume_fs_is_const)
32322 goto decode_failure;
32324 /* Ditto for %gs prefixes. */
32325 if ((pfx & PFX_GS) && !vbi->guest_amd64_assume_gs_is_const)
32326 goto decode_failure;
32328 /* Set up sz. */
32329 sz = 4;
32330 if (pfx & PFX_66) sz = 2;
32331 if ((pfx & PFX_REX) && (pfx & PFX_REXW)) sz = 8;
32333 /* Now we should be looking at the primary opcode byte or the
32334 leading escapes. Check that any LOCK prefix is actually
32335 allowed. */
32336 if (haveLOCK(pfx)) {
32337 if (can_be_used_with_LOCK_prefix( &guest_code[delta] )) {
32338 DIP("lock ");
32339 } else {
32340 *expect_CAS = False;
32341 goto decode_failure;
32345 /* Eat up opcode escape bytes, until we're really looking at the
32346 primary opcode byte. But only if there's no VEX present. */
32347 if (!(pfx & PFX_VEX)) {
32348 vassert(esc == ESC_NONE);
32349 pre = getUChar(delta);
32350 if (pre == 0x0F) {
32351 delta++;
32352 pre = getUChar(delta);
32353 switch (pre) {
32354 case 0x38: esc = ESC_0F38; delta++; break;
32355 case 0x3A: esc = ESC_0F3A; delta++; break;
32356 default: esc = ESC_0F; break;
32361 /* So now we're really really looking at the primary opcode
32362 byte. */
32363 Long delta_at_primary_opcode = delta;
32365 if (!(pfx & PFX_VEX)) {
32366 /* Handle non-VEX prefixed instructions. "Legacy" (non-VEX) SSE
32367 instructions preserve the upper 128 bits of YMM registers;
32368 iow we can simply ignore the presence of the upper halves of
32369 these registers. */
32370 switch (esc) {
32371 case ESC_NONE:
32372 delta = dis_ESC_NONE( &dres, expect_CAS,
32373 archinfo, vbi, pfx, sz, delta );
32374 break;
32375 case ESC_0F:
32376 delta = dis_ESC_0F ( &dres, expect_CAS,
32377 archinfo, vbi, pfx, sz, delta );
32378 break;
32379 case ESC_0F38:
32380 delta = dis_ESC_0F38( &dres,
32381 archinfo, vbi, pfx, sz, delta );
32382 break;
32383 case ESC_0F3A:
32384 delta = dis_ESC_0F3A( &dres,
32385 archinfo, vbi, pfx, sz, delta );
32386 break;
32387 default:
32388 vassert(0);
32390 } else {
32391 /* VEX prefixed instruction */
32392 /* Sloppy Intel wording: "An instruction encoded with a VEX.128
32393 prefix that loads a YMM register operand ..." zeroes out bits
32394 128 and above of the register. */
32395 Bool uses_vvvv = False;
32396 switch (esc) {
32397 case ESC_0F:
32398 delta = dis_ESC_0F__VEX ( &dres, &uses_vvvv,
32399 archinfo, vbi, pfx, sz, delta );
32400 break;
32401 case ESC_0F38:
32402 delta = dis_ESC_0F38__VEX ( &dres, &uses_vvvv,
32403 archinfo, vbi, pfx, sz, delta );
32404 break;
32405 case ESC_0F3A:
32406 delta = dis_ESC_0F3A__VEX ( &dres, &uses_vvvv,
32407 archinfo, vbi, pfx, sz, delta );
32408 break;
32409 case ESC_NONE:
32410 /* The presence of a VEX prefix, by Intel definition,
32411 always implies at least an 0F escape. */
32412 goto decode_failure;
32413 default:
32414 vassert(0);
32416 /* If the insn doesn't use VEX.vvvv then it must be all ones.
32417 Check this. */
32418 if (!uses_vvvv) {
32419 if (getVexNvvvv(pfx) != 0)
32420 goto decode_failure;
32424 vassert(delta - delta_at_primary_opcode >= 0);
32425 vassert(delta - delta_at_primary_opcode < 16/*let's say*/);
32427 /* Use delta == delta_at_primary_opcode to denote decode failure.
32428 This implies that any successful decode must use at least one
32429 byte up. */
32430 if (delta == delta_at_primary_opcode)
32431 goto decode_failure;
32432 else
32433 goto decode_success; /* \o/ */
32436 decode_failure:
32437 /* All decode failures end up here. */
32438 if (sigill_diag) {
32439 vex_printf("vex amd64->IR: unhandled instruction bytes: "
32440 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
32441 getUChar(delta_start+0),
32442 getUChar(delta_start+1),
32443 getUChar(delta_start+2),
32444 getUChar(delta_start+3),
32445 getUChar(delta_start+4),
32446 getUChar(delta_start+5),
32447 getUChar(delta_start+6),
32448 getUChar(delta_start+7),
32449 getUChar(delta_start+8),
32450 getUChar(delta_start+9) );
32451 vex_printf("vex amd64->IR: REX=%d REX.W=%d REX.R=%d REX.X=%d REX.B=%d\n",
32452 haveREX(pfx) ? 1 : 0, getRexW(pfx), getRexR(pfx),
32453 getRexX(pfx), getRexB(pfx));
32454 vex_printf("vex amd64->IR: VEX=%d VEX.L=%d VEX.nVVVV=0x%x ESC=%s\n",
32455 haveVEX(pfx) ? 1 : 0, getVexL(pfx),
32456 getVexNvvvv(pfx),
32457 esc==ESC_NONE ? "NONE" :
32458 esc==ESC_0F ? "0F" :
32459 esc==ESC_0F38 ? "0F38" :
32460 esc==ESC_0F3A ? "0F3A" : "???");
32461 vex_printf("vex amd64->IR: PFX.66=%d PFX.F2=%d PFX.F3=%d\n",
32462 have66(pfx) ? 1 : 0, haveF2(pfx) ? 1 : 0,
32463 haveF3(pfx) ? 1 : 0);
32466 /* Tell the dispatcher that this insn cannot be decoded, and so has
32467 not been executed, and (is currently) the next to be executed.
32468 RIP should be up-to-date since it made so at the start of each
32469 insn, but nevertheless be paranoid and update it again right
32470 now. */
32471 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
32472 jmp_lit(&dres, Ijk_NoDecode, guest_RIP_curr_instr);
32473 vassert(dres.whatNext == Dis_StopHere);
32474 dres.len = 0;
32475 /* We also need to say that a CAS is not expected now, regardless
32476 of what it might have been set to at the start of the function,
32477 since the IR that we've emitted just above (to synthesis a
32478 SIGILL) does not involve any CAS, and presumably no other IR has
32479 been emitted for this (non-decoded) insn. */
32480 *expect_CAS = False;
32481 return dres;
32484 decode_success:
32485 /* All decode successes end up here. */
32486 switch (dres.whatNext) {
32487 case Dis_Continue:
32488 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) );
32489 break;
32490 case Dis_StopHere:
32491 break;
32492 default:
32493 vassert(0);
32496 DIP("\n");
32497 dres.len = toUInt(delta - delta_start);
32498 return dres;
32501 #undef DIP
32502 #undef DIS
32505 /*------------------------------------------------------------*/
32506 /*--- Top-level fn ---*/
32507 /*------------------------------------------------------------*/
32509 /* Disassemble a single instruction into IR. The instruction
32510 is located in host memory at &guest_code[delta]. */
32512 DisResult disInstr_AMD64 ( IRSB* irsb_IN,
32513 const UChar* guest_code_IN,
32514 Long delta,
32515 Addr guest_IP,
32516 VexArch guest_arch,
32517 const VexArchInfo* archinfo,
32518 const VexAbiInfo* abiinfo,
32519 VexEndness host_endness_IN,
32520 Bool sigill_diag_IN )
32522 Int i, x1, x2;
32523 Bool expect_CAS, has_CAS;
32524 DisResult dres;
32526 /* Set globals (see top of this file) */
32527 vassert(guest_arch == VexArchAMD64);
32528 guest_code = guest_code_IN;
32529 irsb = irsb_IN;
32530 host_endness = host_endness_IN;
32531 guest_RIP_curr_instr = guest_IP;
32532 guest_RIP_bbstart = guest_IP - delta;
32534 /* We'll consult these after doing disInstr_AMD64_WRK. */
32535 guest_RIP_next_assumed = 0;
32536 guest_RIP_next_mustcheck = False;
32538 x1 = irsb_IN->stmts_used;
32539 expect_CAS = False;
32540 dres = disInstr_AMD64_WRK ( &expect_CAS,
32541 delta, archinfo, abiinfo, sigill_diag_IN );
32542 x2 = irsb_IN->stmts_used;
32543 vassert(x2 >= x1);
32545 /* If disInstr_AMD64_WRK tried to figure out the next rip, check it
32546 got it right. Failure of this assertion is serious and denotes
32547 a bug in disInstr. */
32548 if (guest_RIP_next_mustcheck
32549 && guest_RIP_next_assumed != guest_RIP_curr_instr + dres.len) {
32550 vex_printf("\n");
32551 vex_printf("assumed next %%rip = 0x%llx\n",
32552 guest_RIP_next_assumed );
32553 vex_printf(" actual next %%rip = 0x%llx\n",
32554 guest_RIP_curr_instr + dres.len );
32555 vpanic("disInstr_AMD64: disInstr miscalculated next %rip");
32558 /* See comment at the top of disInstr_AMD64_WRK for meaning of
32559 expect_CAS. Here, we (sanity-)check for the presence/absence of
32560 IRCAS as directed by the returned expect_CAS value. */
32561 has_CAS = False;
32562 for (i = x1; i < x2; i++) {
32563 if (irsb_IN->stmts[i]->tag == Ist_CAS)
32564 has_CAS = True;
32567 if (expect_CAS != has_CAS) {
32568 /* inconsistency detected. re-disassemble the instruction so as
32569 to generate a useful error message; then assert. */
32570 vex_traceflags |= VEX_TRACE_FE;
32571 dres = disInstr_AMD64_WRK ( &expect_CAS,
32572 delta, archinfo, abiinfo, sigill_diag_IN );
32573 for (i = x1; i < x2; i++) {
32574 vex_printf("\t\t");
32575 ppIRStmt(irsb_IN->stmts[i]);
32576 vex_printf("\n");
32578 /* Failure of this assertion is serious and denotes a bug in
32579 disInstr. */
32580 vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling");
32583 return dres;
32587 /*------------------------------------------------------------*/
32588 /*--- Unused stuff ---*/
32589 /*------------------------------------------------------------*/
32591 // A potentially more Memcheck-friendly version of gen_LZCNT, if
32592 // this should ever be needed.
32594 //static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
32596 // /* Scheme is simple: propagate the most significant 1-bit into all
32597 // lower positions in the word. This gives a word of the form
32598 // 0---01---1. Now invert it, giving a word of the form
32599 // 1---10---0, then do a population-count idiom (to count the 1s,
32600 // which is the number of leading zeroes, or the word size if the
32601 // original word was 0.
32602 // */
32603 // Int i;
32604 // IRTemp t[7];
32605 // for (i = 0; i < 7; i++) {
32606 // t[i] = newTemp(ty);
32607 // }
32608 // if (ty == Ity_I64) {
32609 // assign(t[0], binop(Iop_Or64, mkexpr(src),
32610 // binop(Iop_Shr64, mkexpr(src), mkU8(1))));
32611 // assign(t[1], binop(Iop_Or64, mkexpr(t[0]),
32612 // binop(Iop_Shr64, mkexpr(t[0]), mkU8(2))));
32613 // assign(t[2], binop(Iop_Or64, mkexpr(t[1]),
32614 // binop(Iop_Shr64, mkexpr(t[1]), mkU8(4))));
32615 // assign(t[3], binop(Iop_Or64, mkexpr(t[2]),
32616 // binop(Iop_Shr64, mkexpr(t[2]), mkU8(8))));
32617 // assign(t[4], binop(Iop_Or64, mkexpr(t[3]),
32618 // binop(Iop_Shr64, mkexpr(t[3]), mkU8(16))));
32619 // assign(t[5], binop(Iop_Or64, mkexpr(t[4]),
32620 // binop(Iop_Shr64, mkexpr(t[4]), mkU8(32))));
32621 // assign(t[6], unop(Iop_Not64, mkexpr(t[5])));
32622 // return gen_POPCOUNT(ty, t[6]);
32623 // }
32624 // if (ty == Ity_I32) {
32625 // assign(t[0], binop(Iop_Or32, mkexpr(src),
32626 // binop(Iop_Shr32, mkexpr(src), mkU8(1))));
32627 // assign(t[1], binop(Iop_Or32, mkexpr(t[0]),
32628 // binop(Iop_Shr32, mkexpr(t[0]), mkU8(2))));
32629 // assign(t[2], binop(Iop_Or32, mkexpr(t[1]),
32630 // binop(Iop_Shr32, mkexpr(t[1]), mkU8(4))));
32631 // assign(t[3], binop(Iop_Or32, mkexpr(t[2]),
32632 // binop(Iop_Shr32, mkexpr(t[2]), mkU8(8))));
32633 // assign(t[4], binop(Iop_Or32, mkexpr(t[3]),
32634 // binop(Iop_Shr32, mkexpr(t[3]), mkU8(16))));
32635 // assign(t[5], unop(Iop_Not32, mkexpr(t[4])));
32636 // return gen_POPCOUNT(ty, t[5]);
32637 // }
32638 // if (ty == Ity_I16) {
32639 // assign(t[0], binop(Iop_Or16, mkexpr(src),
32640 // binop(Iop_Shr16, mkexpr(src), mkU8(1))));
32641 // assign(t[1], binop(Iop_Or16, mkexpr(t[0]),
32642 // binop(Iop_Shr16, mkexpr(t[0]), mkU8(2))));
32643 // assign(t[2], binop(Iop_Or16, mkexpr(t[1]),
32644 // binop(Iop_Shr16, mkexpr(t[1]), mkU8(4))));
32645 // assign(t[3], binop(Iop_Or16, mkexpr(t[2]),
32646 // binop(Iop_Shr16, mkexpr(t[2]), mkU8(8))));
32647 // assign(t[4], unop(Iop_Not16, mkexpr(t[3])));
32648 // return gen_POPCOUNT(ty, t[4]);
32649 // }
32650 // vassert(0);
32654 /*--------------------------------------------------------------------*/
32655 /*--- end guest_amd64_toIR.c ---*/
32656 /*--------------------------------------------------------------------*/