Remove warning caused by D demangle testcase
[valgrind.git] / VEX / priv / guest_amd64_toIR.c
blobf7c3d34ce791d6563e45efede55ac97189e863cd
2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_amd64_toIR.c ---*/
4 /*--------------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 /* Translates AMD64 code to IR. */
36 /* TODO:
38 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
39 to ensure a 64-bit value is being written.
41 x87 FP Limitations:
43 * all arithmetic done at 64 bits
45 * no FP exceptions, except for handling stack over/underflow
47 * FP rounding mode observed only for float->int conversions and
48 int->float conversions which could lose accuracy, and for
49 float-to-float rounding. For all other operations,
50 round-to-nearest is used, regardless.
52 * some of the FCOM cases could do with testing -- not convinced
53 that the args are the right way round.
55 * FSAVE does not re-initialise the FPU; it should do
57 * FINIT not only initialises the FPU environment, it also zeroes
58 all the FP registers. It should leave the registers unchanged.
60 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
61 per Intel docs this bit has no meaning anyway. Since PUSHF is the
62 only way to observe eflags[1], a proper fix would be to make that
63 bit be set by PUSHF.
65 This module uses global variables and so is not MT-safe (if that
66 should ever become relevant).
69 /* Notes re address size overrides (0x67).
71 According to the AMD documentation (24594 Rev 3.09, Sept 2003,
72 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose
73 and System Instructions"), Section 1.2.3 ("Address-Size Override
74 Prefix"):
76 0x67 applies to all explicit memory references, causing the top
77 32 bits of the effective address to become zero.
79 0x67 has no effect on stack references (push/pop); these always
80 use a 64-bit address.
82 0x67 changes the interpretation of instructions which implicitly
83 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used
84 instead. These are:
86 cmp{s,sb,sw,sd,sq}
87 in{s,sb,sw,sd}
88 jcxz, jecxz, jrcxz
89 lod{s,sb,sw,sd,sq}
90 loop{,e,bz,be,z}
91 mov{s,sb,sw,sd,sq}
92 out{s,sb,sw,sd}
93 rep{,e,ne,nz}
94 sca{s,sb,sw,sd,sq}
95 sto{s,sb,sw,sd,sq}
96 xlat{,b} */
98 /* "Special" instructions.
100 This instruction decoder can decode three special instructions
101 which mean nothing natively (are no-ops as far as regs/mem are
102 concerned) but have meaning for supporting Valgrind. A special
103 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D
104 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq
105 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi).
106 Following that, one of the following 3 are allowed (standard
107 interpretation in parentheses):
109 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX )
110 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR
111 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX
112 4887F6 (xchgq %rdi,%rdi) IR injection
114 Any other bytes following the 16-byte preamble are illegal and
115 constitute a failure in instruction decoding. This all assumes
116 that the preamble will never occur except in specific code
117 fragments designed for Valgrind to catch.
119 No prefixes may precede a "Special" instruction.
122 /* casLE (implementation of lock-prefixed insns) and rep-prefixed
123 insns: the side-exit back to the start of the insn is done with
124 Ijk_Boring. This is quite wrong, it should be done with
125 Ijk_NoRedir, since otherwise the side exit, which is intended to
126 restart the instruction for whatever reason, could go somewhere
127 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
128 no-redir jumps performance critical, at least for rep-prefixed
129 instructions, since all iterations thereof would involve such a
130 jump. It's not such a big deal with casLE since the side exit is
131 only taken if the CAS fails, that is, the location is contended,
132 which is relatively unlikely.
134 Note also, the test for CAS success vs failure is done using
135 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
136 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
137 shouldn't definedness-check these comparisons. See
138 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
139 background/rationale.
142 /* LOCK prefixed instructions. These are translated using IR-level
143 CAS statements (IRCAS) and are believed to preserve atomicity, even
144 from the point of view of some other process racing against a
145 simulated one (presumably they communicate via a shared memory
146 segment).
148 Handlers which are aware of LOCK prefixes are:
149 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
150 dis_cmpxchg_G_E (cmpxchg)
151 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
152 dis_Grp3 (not, neg)
153 dis_Grp4 (inc, dec)
154 dis_Grp5 (inc, dec)
155 dis_Grp8_Imm (bts, btc, btr)
156 dis_bt_G_E (bts, btc, btr)
157 dis_xadd_G_E (xadd)
161 #include "libvex_basictypes.h"
162 #include "libvex_ir.h"
163 #include "libvex.h"
164 #include "libvex_guest_amd64.h"
166 #include "main_util.h"
167 #include "main_globals.h"
168 #include "guest_generic_bb_to_IR.h"
169 #include "guest_generic_x87.h"
170 #include "guest_amd64_defs.h"
173 /*------------------------------------------------------------*/
174 /*--- Globals ---*/
175 /*------------------------------------------------------------*/
177 /* These are set at the start of the translation of an insn, right
178 down in disInstr_AMD64, so that we don't have to pass them around
179 endlessly. They are all constant during the translation of any
180 given insn. */
182 /* These are set at the start of the translation of a BB, so
183 that we don't have to pass them around endlessly. */
185 /* We need to know this to do sub-register accesses correctly. */
186 static VexEndness host_endness;
188 /* Pointer to the guest code area (points to start of BB, not to the
189 insn being processed). */
190 static const UChar* guest_code;
192 /* The guest address corresponding to guest_code[0]. */
193 static Addr64 guest_RIP_bbstart;
195 /* The guest address for the instruction currently being
196 translated. */
197 static Addr64 guest_RIP_curr_instr;
199 /* The IRSB* into which we're generating code. */
200 static IRSB* irsb;
202 /* For ensuring that %rip-relative addressing is done right. A read
203 of %rip generates the address of the next instruction. It may be
204 that we don't conveniently know that inside disAMode(). For sanity
205 checking, if the next insn %rip is needed, we make a guess at what
206 it is, record that guess here, and set the accompanying Bool to
207 indicate that -- after this insn's decode is finished -- that guess
208 needs to be checked. */
210 /* At the start of each insn decode, is set to (0, False).
211 After the decode, if _mustcheck is now True, _assumed is
212 checked. */
214 static Addr64 guest_RIP_next_assumed;
215 static Bool guest_RIP_next_mustcheck;
218 /*------------------------------------------------------------*/
219 /*--- Helpers for constructing IR. ---*/
220 /*------------------------------------------------------------*/
222 /* Generate a new temporary of the given type. */
223 static IRTemp newTemp ( IRType ty )
225 vassert(isPlausibleIRType(ty));
226 return newIRTemp( irsb->tyenv, ty );
229 /* Add a statement to the list held by "irsb". */
230 static void stmt ( IRStmt* st )
232 addStmtToIRSB( irsb, st );
235 /* Generate a statement "dst := e". */
236 static void assign ( IRTemp dst, IRExpr* e )
238 stmt( IRStmt_WrTmp(dst, e) );
241 static IRExpr* unop ( IROp op, IRExpr* a )
243 return IRExpr_Unop(op, a);
246 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
248 return IRExpr_Binop(op, a1, a2);
251 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
253 return IRExpr_Triop(op, a1, a2, a3);
256 static IRExpr* mkexpr ( IRTemp tmp )
258 return IRExpr_RdTmp(tmp);
261 static IRExpr* mkU8 ( ULong i )
263 vassert(i < 256);
264 return IRExpr_Const(IRConst_U8( (UChar)i ));
267 static IRExpr* mkU16 ( ULong i )
269 vassert(i < 0x10000ULL);
270 return IRExpr_Const(IRConst_U16( (UShort)i ));
273 static IRExpr* mkU32 ( ULong i )
275 vassert(i < 0x100000000ULL);
276 return IRExpr_Const(IRConst_U32( (UInt)i ));
279 static IRExpr* mkU64 ( ULong i )
281 return IRExpr_Const(IRConst_U64(i));
284 static IRExpr* mkU ( IRType ty, ULong i )
286 switch (ty) {
287 case Ity_I8: return mkU8(i);
288 case Ity_I16: return mkU16(i);
289 case Ity_I32: return mkU32(i);
290 case Ity_I64: return mkU64(i);
291 default: vpanic("mkU(amd64)");
295 static void storeLE ( IRExpr* addr, IRExpr* data )
297 stmt( IRStmt_Store(Iend_LE, addr, data) );
300 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
302 return IRExpr_Load(Iend_LE, ty, addr);
305 static IROp mkSizedOp ( IRType ty, IROp op8 )
307 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8
308 || op8 == Iop_Mul8
309 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8
310 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8
311 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8
312 || op8 == Iop_CasCmpNE8
313 || op8 == Iop_Not8 );
314 switch (ty) {
315 case Ity_I8: return 0 +op8;
316 case Ity_I16: return 1 +op8;
317 case Ity_I32: return 2 +op8;
318 case Ity_I64: return 3 +op8;
319 default: vpanic("mkSizedOp(amd64)");
323 static
324 IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src )
326 if (szSmall == 1 && szBig == 4) {
327 return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src);
329 if (szSmall == 1 && szBig == 2) {
330 return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src);
332 if (szSmall == 2 && szBig == 4) {
333 return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src);
335 if (szSmall == 1 && szBig == 8 && !signd) {
336 return unop(Iop_8Uto64, src);
338 if (szSmall == 1 && szBig == 8 && signd) {
339 return unop(Iop_8Sto64, src);
341 if (szSmall == 2 && szBig == 8 && !signd) {
342 return unop(Iop_16Uto64, src);
344 if (szSmall == 2 && szBig == 8 && signd) {
345 return unop(Iop_16Sto64, src);
347 vpanic("doScalarWidening(amd64)");
350 static
351 void putGuarded ( Int gstOffB, IRExpr* guard, IRExpr* value )
353 IRType ty = typeOfIRExpr(irsb->tyenv, value);
354 stmt( IRStmt_Put(gstOffB,
355 IRExpr_ITE(guard, value, IRExpr_Get(gstOffB, ty))) );
359 /*------------------------------------------------------------*/
360 /*--- Debugging output ---*/
361 /*------------------------------------------------------------*/
363 /* Bomb out if we can't handle something. */
364 __attribute__ ((noreturn))
365 static void unimplemented ( const HChar* str )
367 vex_printf("amd64toIR: unimplemented feature\n");
368 vpanic(str);
371 #define DIP(format, args...) \
372 if (vex_traceflags & VEX_TRACE_FE) \
373 vex_printf(format, ## args)
375 #define DIS(buf, format, args...) \
376 if (vex_traceflags & VEX_TRACE_FE) \
377 vex_sprintf(buf, format, ## args)
380 /*------------------------------------------------------------*/
381 /*--- Offsets of various parts of the amd64 guest state. ---*/
382 /*------------------------------------------------------------*/
384 #define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX)
385 #define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX)
386 #define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX)
387 #define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX)
388 #define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP)
389 #define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP)
390 #define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI)
391 #define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI)
392 #define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8)
393 #define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9)
394 #define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10)
395 #define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11)
396 #define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12)
397 #define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13)
398 #define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14)
399 #define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15)
401 #define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP)
403 #define OFFB_FS_CONST offsetof(VexGuestAMD64State,guest_FS_CONST)
404 #define OFFB_GS_CONST offsetof(VexGuestAMD64State,guest_GS_CONST)
406 #define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP)
407 #define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1)
408 #define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2)
409 #define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP)
411 #define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0])
412 #define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0])
413 #define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG)
414 #define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG)
415 #define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG)
416 #define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP)
417 #define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210)
418 #define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND)
420 #define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND)
421 #define OFFB_YMM0 offsetof(VexGuestAMD64State,guest_YMM0)
422 #define OFFB_YMM1 offsetof(VexGuestAMD64State,guest_YMM1)
423 #define OFFB_YMM2 offsetof(VexGuestAMD64State,guest_YMM2)
424 #define OFFB_YMM3 offsetof(VexGuestAMD64State,guest_YMM3)
425 #define OFFB_YMM4 offsetof(VexGuestAMD64State,guest_YMM4)
426 #define OFFB_YMM5 offsetof(VexGuestAMD64State,guest_YMM5)
427 #define OFFB_YMM6 offsetof(VexGuestAMD64State,guest_YMM6)
428 #define OFFB_YMM7 offsetof(VexGuestAMD64State,guest_YMM7)
429 #define OFFB_YMM8 offsetof(VexGuestAMD64State,guest_YMM8)
430 #define OFFB_YMM9 offsetof(VexGuestAMD64State,guest_YMM9)
431 #define OFFB_YMM10 offsetof(VexGuestAMD64State,guest_YMM10)
432 #define OFFB_YMM11 offsetof(VexGuestAMD64State,guest_YMM11)
433 #define OFFB_YMM12 offsetof(VexGuestAMD64State,guest_YMM12)
434 #define OFFB_YMM13 offsetof(VexGuestAMD64State,guest_YMM13)
435 #define OFFB_YMM14 offsetof(VexGuestAMD64State,guest_YMM14)
436 #define OFFB_YMM15 offsetof(VexGuestAMD64State,guest_YMM15)
437 #define OFFB_YMM16 offsetof(VexGuestAMD64State,guest_YMM16)
439 #define OFFB_EMNOTE offsetof(VexGuestAMD64State,guest_EMNOTE)
440 #define OFFB_CMSTART offsetof(VexGuestAMD64State,guest_CMSTART)
441 #define OFFB_CMLEN offsetof(VexGuestAMD64State,guest_CMLEN)
443 #define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR)
446 /*------------------------------------------------------------*/
447 /*--- Helper bits and pieces for deconstructing the ---*/
448 /*--- amd64 insn stream. ---*/
449 /*------------------------------------------------------------*/
451 /* This is the AMD64 register encoding -- integer regs. */
452 #define R_RAX 0
453 #define R_RCX 1
454 #define R_RDX 2
455 #define R_RBX 3
456 #define R_RSP 4
457 #define R_RBP 5
458 #define R_RSI 6
459 #define R_RDI 7
460 #define R_R8 8
461 #define R_R9 9
462 #define R_R10 10
463 #define R_R11 11
464 #define R_R12 12
465 #define R_R13 13
466 #define R_R14 14
467 #define R_R15 15
469 /* This is the Intel register encoding -- segment regs. */
470 #define R_ES 0
471 #define R_CS 1
472 #define R_SS 2
473 #define R_DS 3
474 #define R_FS 4
475 #define R_GS 5
478 /* Various simple conversions */
480 static ULong extend_s_8to64 ( UChar x )
482 return (ULong)((Long)(((ULong)x) << 56) >> 56);
485 static ULong extend_s_16to64 ( UShort x )
487 return (ULong)((Long)(((ULong)x) << 48) >> 48);
490 static ULong extend_s_32to64 ( UInt x )
492 return (ULong)((Long)(((ULong)x) << 32) >> 32);
495 /* Figure out whether the mod and rm parts of a modRM byte refer to a
496 register or memory. If so, the byte will have the form 11XXXYYY,
497 where YYY is the register number. */
498 inline
499 static Bool epartIsReg ( UChar mod_reg_rm )
501 return toBool(0xC0 == (mod_reg_rm & 0xC0));
504 /* Extract the 'g' field from a modRM byte. This only produces 3
505 bits, which is not a complete register number. You should avoid
506 this function if at all possible. */
507 inline
508 static Int gregLO3ofRM ( UChar mod_reg_rm )
510 return (Int)( (mod_reg_rm >> 3) & 7 );
513 /* Ditto the 'e' field of a modRM byte. */
514 inline
515 static Int eregLO3ofRM ( UChar mod_reg_rm )
517 return (Int)(mod_reg_rm & 0x7);
520 /* Get a 8/16/32-bit unsigned value out of the insn stream. */
522 static inline UChar getUChar ( Long delta )
524 UChar v = guest_code[delta+0];
525 return v;
528 static UInt getUDisp16 ( Long delta )
530 UInt v = guest_code[delta+1]; v <<= 8;
531 v |= guest_code[delta+0];
532 return v & 0xFFFF;
535 //.. static UInt getUDisp ( Int size, Long delta )
536 //.. {
537 //.. switch (size) {
538 //.. case 4: return getUDisp32(delta);
539 //.. case 2: return getUDisp16(delta);
540 //.. case 1: return getUChar(delta);
541 //.. default: vpanic("getUDisp(x86)");
542 //.. }
543 //.. return 0; /*notreached*/
544 //.. }
547 /* Get a byte value out of the insn stream and sign-extend to 64
548 bits. */
549 static Long getSDisp8 ( Long delta )
551 return extend_s_8to64( guest_code[delta] );
554 /* Get a 16-bit value out of the insn stream and sign-extend to 64
555 bits. */
556 static Long getSDisp16 ( Long delta )
558 UInt v = guest_code[delta+1]; v <<= 8;
559 v |= guest_code[delta+0];
560 return extend_s_16to64( (UShort)v );
563 /* Get a 32-bit value out of the insn stream and sign-extend to 64
564 bits. */
565 static Long getSDisp32 ( Long delta )
567 UInt v = guest_code[delta+3]; v <<= 8;
568 v |= guest_code[delta+2]; v <<= 8;
569 v |= guest_code[delta+1]; v <<= 8;
570 v |= guest_code[delta+0];
571 return extend_s_32to64( v );
574 /* Get a 64-bit value out of the insn stream. */
575 static Long getDisp64 ( Long delta )
577 ULong v = 0;
578 v |= guest_code[delta+7]; v <<= 8;
579 v |= guest_code[delta+6]; v <<= 8;
580 v |= guest_code[delta+5]; v <<= 8;
581 v |= guest_code[delta+4]; v <<= 8;
582 v |= guest_code[delta+3]; v <<= 8;
583 v |= guest_code[delta+2]; v <<= 8;
584 v |= guest_code[delta+1]; v <<= 8;
585 v |= guest_code[delta+0];
586 return v;
589 /* Note: because AMD64 doesn't allow 64-bit literals, it is an error
590 if this is called with size==8. Should not happen. */
591 static Long getSDisp ( Int size, Long delta )
593 switch (size) {
594 case 4: return getSDisp32(delta);
595 case 2: return getSDisp16(delta);
596 case 1: return getSDisp8(delta);
597 default: vpanic("getSDisp(amd64)");
601 static ULong mkSizeMask ( Int sz )
603 switch (sz) {
604 case 1: return 0x00000000000000FFULL;
605 case 2: return 0x000000000000FFFFULL;
606 case 4: return 0x00000000FFFFFFFFULL;
607 case 8: return 0xFFFFFFFFFFFFFFFFULL;
608 default: vpanic("mkSzMask(amd64)");
612 static Int imin ( Int a, Int b )
614 return (a < b) ? a : b;
617 static IRType szToITy ( Int n )
619 switch (n) {
620 case 1: return Ity_I8;
621 case 2: return Ity_I16;
622 case 4: return Ity_I32;
623 case 8: return Ity_I64;
624 default: vex_printf("\nszToITy(%d)\n", n);
625 vpanic("szToITy(amd64)");
630 /*------------------------------------------------------------*/
631 /*--- For dealing with prefixes. ---*/
632 /*------------------------------------------------------------*/
634 /* The idea is to pass around an int holding a bitmask summarising
635 info from the prefixes seen on the current instruction, including
636 info from the REX byte. This info is used in various places, but
637 most especially when making sense of register fields in
638 instructions.
640 The top 8 bits of the prefix are 0x55, just as a hacky way to
641 ensure it really is a valid prefix.
643 Things you can safely assume about a well-formed prefix:
644 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set.
645 * if REX is not present then REXW,REXR,REXX,REXB will read
646 as zero.
647 * F2 and F3 will not both be 1.
650 typedef UInt Prefix;
652 #define PFX_ASO (1<<0) /* address-size override present (0x67) */
653 #define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */
654 #define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */
655 #define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */
656 #define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */
657 #define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */
658 #define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */
659 #define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */
660 #define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */
661 #define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */
662 #define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */
663 #define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */
664 #define PFX_ES (1<<12) /* ES segment prefix present (0x26) */
665 #define PFX_FS (1<<13) /* FS segment prefix present (0x64) */
666 #define PFX_GS (1<<14) /* GS segment prefix present (0x65) */
667 #define PFX_SS (1<<15) /* SS segment prefix present (0x36) */
668 #define PFX_VEX (1<<16) /* VEX prefix present (0xC4 or 0xC5) */
669 #define PFX_VEXL (1<<17) /* VEX L bit, if VEX present, else 0 */
670 /* The extra register field VEX.vvvv is encoded (after not-ing it) as
671 PFX_VEXnV3 .. PFX_VEXnV0, so these must occupy adjacent bit
672 positions. */
673 #define PFX_VEXnV0 (1<<18) /* ~VEX vvvv[0], if VEX present, else 0 */
674 #define PFX_VEXnV1 (1<<19) /* ~VEX vvvv[1], if VEX present, else 0 */
675 #define PFX_VEXnV2 (1<<20) /* ~VEX vvvv[2], if VEX present, else 0 */
676 #define PFX_VEXnV3 (1<<21) /* ~VEX vvvv[3], if VEX present, else 0 */
679 #define PFX_EMPTY 0x55000000
681 static Bool IS_VALID_PFX ( Prefix pfx ) {
682 return toBool((pfx & 0xFF000000) == PFX_EMPTY);
685 static Bool haveREX ( Prefix pfx ) {
686 return toBool(pfx & PFX_REX);
689 static Int getRexW ( Prefix pfx ) {
690 return (pfx & PFX_REXW) ? 1 : 0;
692 static Int getRexR ( Prefix pfx ) {
693 return (pfx & PFX_REXR) ? 1 : 0;
695 static Int getRexX ( Prefix pfx ) {
696 return (pfx & PFX_REXX) ? 1 : 0;
698 static Int getRexB ( Prefix pfx ) {
699 return (pfx & PFX_REXB) ? 1 : 0;
702 /* Check a prefix doesn't have F2 or F3 set in it, since usually that
703 completely changes what instruction it really is. */
704 static Bool haveF2orF3 ( Prefix pfx ) {
705 return toBool((pfx & (PFX_F2|PFX_F3)) > 0);
707 static Bool haveF2andF3 ( Prefix pfx ) {
708 return toBool((pfx & (PFX_F2|PFX_F3)) == (PFX_F2|PFX_F3));
710 static Bool haveF2 ( Prefix pfx ) {
711 return toBool((pfx & PFX_F2) > 0);
713 static Bool haveF3 ( Prefix pfx ) {
714 return toBool((pfx & PFX_F3) > 0);
717 static Bool have66 ( Prefix pfx ) {
718 return toBool((pfx & PFX_66) > 0);
720 static Bool haveASO ( Prefix pfx ) {
721 return toBool((pfx & PFX_ASO) > 0);
723 static Bool haveLOCK ( Prefix pfx ) {
724 return toBool((pfx & PFX_LOCK) > 0);
727 /* Return True iff pfx has 66 set and F2 and F3 clear */
728 static Bool have66noF2noF3 ( Prefix pfx )
730 return
731 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66);
734 /* Return True iff pfx has F2 set and 66 and F3 clear */
735 static Bool haveF2no66noF3 ( Prefix pfx )
737 return
738 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2);
741 /* Return True iff pfx has F3 set and 66 and F2 clear */
742 static Bool haveF3no66noF2 ( Prefix pfx )
744 return
745 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3);
748 /* Return True iff pfx has F3 set and F2 clear */
749 static Bool haveF3noF2 ( Prefix pfx )
751 return
752 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F3);
755 /* Return True iff pfx has F2 set and F3 clear */
756 static Bool haveF2noF3 ( Prefix pfx )
758 return
759 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F2);
762 /* Return True iff pfx has F2 and F3 clear */
763 static Bool haveNoF2noF3 ( Prefix pfx )
765 return
766 toBool((pfx & (PFX_F2|PFX_F3)) == 0);
769 /* Return True iff pfx has 66, F2 and F3 clear */
770 static Bool haveNo66noF2noF3 ( Prefix pfx )
772 return
773 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0);
776 /* Return True iff pfx has any of 66, F2 and F3 set */
777 static Bool have66orF2orF3 ( Prefix pfx )
779 return toBool( ! haveNo66noF2noF3(pfx) );
782 /* Return True iff pfx has 66 or F3 set */
783 static Bool have66orF3 ( Prefix pfx )
785 return toBool((pfx & (PFX_66|PFX_F3)) > 0);
788 /* Clear all the segment-override bits in a prefix. */
789 static Prefix clearSegBits ( Prefix p )
791 return
792 p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS);
795 /* Get the (inverted, hence back to "normal") VEX.vvvv field. */
796 static UInt getVexNvvvv ( Prefix pfx ) {
797 UInt r = (UInt)pfx;
798 r /= (UInt)PFX_VEXnV0; /* pray this turns into a shift */
799 return r & 0xF;
802 static Bool haveVEX ( Prefix pfx ) {
803 return toBool(pfx & PFX_VEX);
806 static Int getVexL ( Prefix pfx ) {
807 return (pfx & PFX_VEXL) ? 1 : 0;
811 /*------------------------------------------------------------*/
812 /*--- For dealing with escapes ---*/
813 /*------------------------------------------------------------*/
816 /* Escapes come after the prefixes, but before the primary opcode
817 byte. They escape the primary opcode byte into a bigger space.
818 The 0xF0000000 isn't significant, except so as to make it not
819 overlap valid Prefix values, for sanity checking.
822 typedef
823 enum {
824 ESC_NONE=0xF0000000, // none
825 ESC_0F, // 0F
826 ESC_0F38, // 0F 38
827 ESC_0F3A // 0F 3A
829 Escape;
832 /*------------------------------------------------------------*/
833 /*--- For dealing with integer registers ---*/
834 /*------------------------------------------------------------*/
836 /* This is somewhat complex. The rules are:
838 For 64, 32 and 16 bit register references, the e or g fields in the
839 modrm bytes supply the low 3 bits of the register number. The
840 fourth (most-significant) bit of the register number is supplied by
841 the REX byte, if it is present; else that bit is taken to be zero.
843 The REX.R bit supplies the high bit corresponding to the g register
844 field, and the REX.B bit supplies the high bit corresponding to the
845 e register field (when the mod part of modrm indicates that modrm's
846 e component refers to a register and not to memory).
848 The REX.X bit supplies a high register bit for certain registers
849 in SIB address modes, and is generally rarely used.
851 For 8 bit register references, the presence of the REX byte itself
852 has significance. If there is no REX present, then the 3-bit
853 number extracted from the modrm e or g field is treated as an index
854 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the
855 old x86 encoding scheme.
857 But if there is a REX present, the register reference is
858 interpreted in the same way as for 64/32/16-bit references: a high
859 bit is extracted from REX, giving a 4-bit number, and the denoted
860 register is the lowest 8 bits of the 16 integer registers denoted
861 by the number. In particular, values 3 through 7 of this sequence
862 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of
863 %rsp %rbp %rsi %rdi.
865 The REX.W bit has no bearing at all on register numbers. Instead
866 its presence indicates that the operand size is to be overridden
867 from its default value (32 bits) to 64 bits instead. This is in
868 the same fashion that an 0x66 prefix indicates the operand size is
869 to be overridden from 32 bits down to 16 bits. When both REX.W and
870 0x66 are present there is a conflict, and REX.W takes precedence.
872 Rather than try to handle this complexity using a single huge
873 function, several smaller ones are provided. The aim is to make it
874 as difficult as possible to screw up register decoding in a subtle
875 and hard-to-track-down way.
877 Because these routines fish around in the host's memory (that is,
878 in the guest state area) for sub-parts of guest registers, their
879 correctness depends on the host's endianness. So far these
880 routines only work for little-endian hosts. Those for which
881 endianness is important have assertions to ensure sanity.
885 /* About the simplest question you can ask: where do the 64-bit
886 integer registers live (in the guest state) ? */
888 static Int integerGuestReg64Offset ( UInt reg )
890 switch (reg) {
891 case R_RAX: return OFFB_RAX;
892 case R_RCX: return OFFB_RCX;
893 case R_RDX: return OFFB_RDX;
894 case R_RBX: return OFFB_RBX;
895 case R_RSP: return OFFB_RSP;
896 case R_RBP: return OFFB_RBP;
897 case R_RSI: return OFFB_RSI;
898 case R_RDI: return OFFB_RDI;
899 case R_R8: return OFFB_R8;
900 case R_R9: return OFFB_R9;
901 case R_R10: return OFFB_R10;
902 case R_R11: return OFFB_R11;
903 case R_R12: return OFFB_R12;
904 case R_R13: return OFFB_R13;
905 case R_R14: return OFFB_R14;
906 case R_R15: return OFFB_R15;
907 default: vpanic("integerGuestReg64Offset(amd64)");
912 /* Produce the name of an integer register, for printing purposes.
913 reg is a number in the range 0 .. 15 that has been generated from a
914 3-bit reg-field number and a REX extension bit. irregular denotes
915 the case where sz==1 and no REX byte is present and where the denoted
916 sub-register is bits 15:8 of the containing 64-bit register. */
918 static
919 const HChar* nameIReg ( Int sz, UInt reg, Bool irregular )
921 static const HChar* ireg64_names[16]
922 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
923 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
924 static const HChar* ireg32_names[16]
925 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
926 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" };
927 static const HChar* ireg16_names[16]
928 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di",
929 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" };
930 static const HChar* ireg8_names[16]
931 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil",
932 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" };
933 static const HChar* ireg8_irregular[4]
934 = { "%ah", "%ch", "%dh", "%bh" };
936 vassert(reg < 16);
937 if (sz == 1) {
938 if (irregular)
939 vassert(reg >= 4 && reg < 8);
940 } else {
941 vassert(irregular == False);
944 switch (sz) {
945 case 8: return ireg64_names[reg];
946 case 4: return ireg32_names[reg];
947 case 2: return ireg16_names[reg];
948 case 1: if (irregular) {
949 vassert(reg >= 4 && reg < 8);
950 return ireg8_irregular[reg - 4];
951 } else {
952 return ireg8_names[reg];
954 default: vpanic("nameIReg(amd64)");
958 /* Using the same argument conventions as nameIReg, produce the
959 guest state offset of an integer register. */
961 static
962 Int offsetIReg ( Int sz, UInt reg, Bool irregular )
964 vassert(reg < 16);
965 if (sz == 1) {
966 if (irregular)
967 vassert(reg >= 4 && reg < 8);
968 } else {
969 vassert(irregular == False);
972 /* Deal with irregular case -- sz==1 and no REX present */
973 if (sz == 1 && irregular) {
974 switch (reg) {
975 case R_RSP: return 1+ OFFB_RAX;
976 case R_RBP: return 1+ OFFB_RCX;
977 case R_RSI: return 1+ OFFB_RDX;
978 case R_RDI: return 1+ OFFB_RBX;
979 default: break; /* use the normal case */
983 /* Normal case */
984 return integerGuestReg64Offset(reg);
988 /* Read the %CL register :: Ity_I8, for shift/rotate operations. */
990 static IRExpr* getIRegCL ( void )
992 vassert(host_endness == VexEndnessLE);
993 return unop(Iop_64to8, IRExpr_Get( OFFB_RCX, Ity_I64 ));
997 /* Write to the %AH register. */
999 static void putIRegAH ( IRExpr* e )
1001 vassert(host_endness == VexEndnessLE);
1002 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8);
1003 stmt( IRStmt_Put( OFFB_RAX+1, e ) );
1007 /* Read/write various widths of %RAX, as it has various
1008 special-purpose uses. */
1010 static const HChar* nameIRegRAX ( Int sz )
1012 switch (sz) {
1013 case 1: return "%al";
1014 case 2: return "%ax";
1015 case 4: return "%eax";
1016 case 8: return "%rax";
1017 default: vpanic("nameIRegRAX(amd64)");
1021 static IRExpr* getIRegRAX ( Int sz )
1023 vassert(host_endness == VexEndnessLE);
1024 switch (sz) {
1025 case 1: return unop(Iop_64to8, IRExpr_Get( OFFB_RAX, Ity_I64 ));
1026 case 2: return unop(Iop_64to16, IRExpr_Get( OFFB_RAX, Ity_I64 ));
1027 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 ));
1028 case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 );
1029 default: vpanic("getIRegRAX(amd64)");
1033 static void putIRegRAX ( Int sz, IRExpr* e )
1035 IRType ty = typeOfIRExpr(irsb->tyenv, e);
1036 vassert(host_endness == VexEndnessLE);
1037 switch (sz) {
1038 case 8: vassert(ty == Ity_I64);
1039 stmt( IRStmt_Put( OFFB_RAX, e ));
1040 break;
1041 case 4: vassert(ty == Ity_I32);
1042 stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) ));
1043 break;
1044 case 2: vassert(ty == Ity_I16);
1045 stmt( IRStmt_Put( OFFB_RAX, e ));
1046 break;
1047 case 1: vassert(ty == Ity_I8);
1048 stmt( IRStmt_Put( OFFB_RAX, e ));
1049 break;
1050 default: vpanic("putIRegRAX(amd64)");
1055 /* Read/write various widths of %RDX, as it has various
1056 special-purpose uses. */
1058 static const HChar* nameIRegRDX ( Int sz )
1060 switch (sz) {
1061 case 1: return "%dl";
1062 case 2: return "%dx";
1063 case 4: return "%edx";
1064 case 8: return "%rdx";
1065 default: vpanic("nameIRegRDX(amd64)");
1069 static IRExpr* getIRegRDX ( Int sz )
1071 vassert(host_endness == VexEndnessLE);
1072 switch (sz) {
1073 case 1: return unop(Iop_64to8, IRExpr_Get( OFFB_RDX, Ity_I64 ));
1074 case 2: return unop(Iop_64to16, IRExpr_Get( OFFB_RDX, Ity_I64 ));
1075 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 ));
1076 case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 );
1077 default: vpanic("getIRegRDX(amd64)");
1081 static void putIRegRDX ( Int sz, IRExpr* e )
1083 vassert(host_endness == VexEndnessLE);
1084 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
1085 switch (sz) {
1086 case 8: stmt( IRStmt_Put( OFFB_RDX, e ));
1087 break;
1088 case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) ));
1089 break;
1090 case 2: stmt( IRStmt_Put( OFFB_RDX, e ));
1091 break;
1092 case 1: stmt( IRStmt_Put( OFFB_RDX, e ));
1093 break;
1094 default: vpanic("putIRegRDX(amd64)");
1099 /* Simplistic functions to deal with the integer registers as a
1100 straightforward bank of 16 64-bit regs. */
1102 static IRExpr* getIReg64 ( UInt regno )
1104 return IRExpr_Get( integerGuestReg64Offset(regno),
1105 Ity_I64 );
1108 static void putIReg64 ( UInt regno, IRExpr* e )
1110 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
1111 stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) );
1114 static const HChar* nameIReg64 ( UInt regno )
1116 return nameIReg( 8, regno, False );
1120 /* Simplistic functions to deal with the lower halves of integer
1121 registers as a straightforward bank of 16 32-bit regs. */
1123 static IRExpr* getIReg32 ( UInt regno )
1125 vassert(host_endness == VexEndnessLE);
1126 return unop(Iop_64to32,
1127 IRExpr_Get( integerGuestReg64Offset(regno),
1128 Ity_I64 ));
1131 static void putIReg32 ( UInt regno, IRExpr* e )
1133 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
1134 stmt( IRStmt_Put( integerGuestReg64Offset(regno),
1135 unop(Iop_32Uto64,e) ) );
1138 static const HChar* nameIReg32 ( UInt regno )
1140 return nameIReg( 4, regno, False );
1144 /* Simplistic functions to deal with the lower quarters of integer
1145 registers as a straightforward bank of 16 16-bit regs. */
1147 static IRExpr* getIReg16 ( UInt regno )
1149 vassert(host_endness == VexEndnessLE);
1150 return unop(Iop_64to16,
1151 IRExpr_Get( integerGuestReg64Offset(regno),
1152 Ity_I64 ));
1155 static void putIReg16 ( UInt regno, IRExpr* e )
1157 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
1158 stmt( IRStmt_Put( integerGuestReg64Offset(regno),
1159 unop(Iop_16Uto64,e) ) );
1162 static const HChar* nameIReg16 ( UInt regno )
1164 return nameIReg( 2, regno, False );
1168 /* Sometimes what we know is a 3-bit register number, a REX byte, and
1169 which field of the REX byte is to be used to extend to a 4-bit
1170 number. These functions cater for that situation.
1172 static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits )
1174 vassert(lo3bits < 8);
1175 vassert(IS_VALID_PFX(pfx));
1176 return getIReg64( lo3bits | (getRexX(pfx) << 3) );
1179 static const HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits )
1181 vassert(lo3bits < 8);
1182 vassert(IS_VALID_PFX(pfx));
1183 return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False );
1186 static const HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
1188 vassert(lo3bits < 8);
1189 vassert(IS_VALID_PFX(pfx));
1190 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1191 UInt regNo = lo3bits | (getRexB(pfx) << 3);
1192 return nameIReg( sz, regNo,
1193 toBool(sz==1 && !haveREX(pfx) && regNo >= 4 && regNo < 8));
1196 static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
1198 vassert(lo3bits < 8);
1199 vassert(IS_VALID_PFX(pfx));
1200 UInt regNo = (getRexB(pfx) << 3) | lo3bits;
1201 switch (sz) {
1202 case 8: {
1203 return IRExpr_Get(
1204 offsetIReg( 8, regNo, False/*!irregular*/ ),
1205 Ity_I64
1208 case 4: {
1209 return unop(Iop_64to32,
1210 IRExpr_Get(
1211 offsetIReg( 8, regNo, False/*!irregular*/ ),
1212 Ity_I64
1215 case 2: {
1216 return unop(Iop_64to16,
1217 IRExpr_Get(
1218 offsetIReg( 8, regNo, False/*!irregular*/ ),
1219 Ity_I64
1222 case 1: {
1223 Bool irregular = !haveREX(pfx) && regNo >= 4 && regNo < 8;
1224 if (irregular) {
1225 return IRExpr_Get(
1226 offsetIReg( 1, regNo, True/*irregular*/ ),
1227 Ity_I8
1229 } else {
1230 return unop(Iop_64to8,
1231 IRExpr_Get(
1232 offsetIReg( 8, regNo, False/*!irregular*/ ),
1233 Ity_I64
1237 default: {
1238 vpanic("getIRegRexB");
1243 static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e )
1245 vassert(lo3bits < 8);
1246 vassert(IS_VALID_PFX(pfx));
1247 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1248 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
1249 Bool irregular = sz == 1 && !haveREX(pfx) && lo3bits >= 4 && lo3bits < 8;
1250 stmt( IRStmt_Put(
1251 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), irregular ),
1252 sz==4 ? unop(Iop_32Uto64,e) : e
1257 /* Functions for getting register numbers from modrm bytes and REX
1258 when we don't have to consider the complexities of integer subreg
1259 accesses.
1261 /* Extract the g reg field from a modRM byte, and augment it using the
1262 REX.R bit from the supplied REX byte. The R bit usually is
1263 associated with the g register field.
1265 static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
1267 Int reg = (Int)( (mod_reg_rm >> 3) & 7 );
1268 reg += (pfx & PFX_REXR) ? 8 : 0;
1269 return reg;
1272 /* Extract the e reg field from a modRM byte, and augment it using the
1273 REX.B bit from the supplied REX byte. The B bit usually is
1274 associated with the e register field (when modrm indicates e is a
1275 register, that is).
1277 static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
1279 Int rm;
1280 vassert(epartIsReg(mod_reg_rm));
1281 rm = (Int)(mod_reg_rm & 0x7);
1282 rm += (pfx & PFX_REXB) ? 8 : 0;
1283 return rm;
1287 /* General functions for dealing with integer register access. */
1289 /* Produce the guest state offset for a reference to the 'g' register
1290 field in a modrm byte, taking into account REX (or its absence),
1291 and the size of the access.
1293 static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1295 UInt reg;
1296 vassert(host_endness == VexEndnessLE);
1297 vassert(IS_VALID_PFX(pfx));
1298 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1299 reg = gregOfRexRM( pfx, mod_reg_rm );
1300 Bool irregular = sz == 1 && !haveREX(pfx) && reg >= 4 && reg < 8;
1301 return offsetIReg( sz, reg, irregular );
1304 static
1305 IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1307 switch (sz) {
1308 case 8: {
1309 return IRExpr_Get( offsetIRegG( 8, pfx, mod_reg_rm ), Ity_I64 );
1311 case 4: {
1312 return unop(Iop_64to32,
1313 IRExpr_Get( offsetIRegG( 8, pfx, mod_reg_rm ), Ity_I64 ));
1315 case 2: {
1316 return unop(Iop_64to16,
1317 IRExpr_Get( offsetIRegG( 8, pfx, mod_reg_rm ), Ity_I64 ));
1319 case 1: {
1320 UInt regNo = gregOfRexRM( pfx, mod_reg_rm );
1321 Bool irregular = !haveREX(pfx) && regNo >= 4 && regNo < 8;
1322 if (irregular) {
1323 return IRExpr_Get( offsetIRegG( 1, pfx, mod_reg_rm ), Ity_I8 );
1324 } else {
1325 return unop(Iop_64to8,
1326 IRExpr_Get( offsetIRegG( 8, pfx, mod_reg_rm ),
1327 Ity_I64 ));
1330 default: {
1331 vpanic("getIRegG");
1336 static
1337 void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
1339 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
1340 if (sz == 4) {
1341 e = unop(Iop_32Uto64,e);
1343 stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) );
1346 static
1347 const HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1349 UInt regNo = gregOfRexRM( pfx, mod_reg_rm );
1350 Bool irregular = sz == 1 && !haveREX(pfx) && regNo >= 4 && regNo < 8;
1351 return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm), irregular );
1355 static
1356 IRExpr* getIRegV ( Int sz, Prefix pfx )
1358 vassert(sz == 8 || sz == 4);
1359 if (sz == 4) {
1360 return unop(Iop_64to32,
1361 IRExpr_Get( offsetIReg( 8, getVexNvvvv(pfx), False ),
1362 Ity_I64 ));
1363 } else if (sz == 2) {
1364 return unop(Iop_64to16,
1365 IRExpr_Get( offsetIReg( 8, getVexNvvvv(pfx), False ),
1366 Ity_I64 ));
1367 } else {
1368 return IRExpr_Get( offsetIReg( sz, getVexNvvvv(pfx), False ),
1369 szToITy(sz) );
1373 static
1374 void putIRegV ( Int sz, Prefix pfx, IRExpr* e )
1376 vassert(sz == 8 || sz == 4);
1377 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
1378 if (sz == 4) {
1379 e = unop(Iop_32Uto64,e);
1381 stmt( IRStmt_Put( offsetIReg( sz, getVexNvvvv(pfx), False ), e ) );
1384 static
1385 const HChar* nameIRegV ( Int sz, Prefix pfx )
1387 vassert(sz == 8 || sz == 4);
1388 return nameIReg( sz, getVexNvvvv(pfx), False );
1393 /* Produce the guest state offset for a reference to the 'e' register
1394 field in a modrm byte, taking into account REX (or its absence),
1395 and the size of the access. eregOfRexRM will assert if mod_reg_rm
1396 denotes a memory access rather than a register access.
1398 static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1400 UInt reg;
1401 vassert(host_endness == VexEndnessLE);
1402 vassert(IS_VALID_PFX(pfx));
1403 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1404 reg = eregOfRexRM( pfx, mod_reg_rm );
1405 Bool irregular = sz == 1 && !haveREX(pfx) && (reg >= 4 && reg < 8);
1406 return offsetIReg( sz, reg, irregular );
1409 static
1410 IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1412 switch (sz) {
1413 case 8: {
1414 return IRExpr_Get( offsetIRegE( 8, pfx, mod_reg_rm ), Ity_I64 );
1416 case 4: {
1417 return unop(Iop_64to32,
1418 IRExpr_Get( offsetIRegE( 8, pfx, mod_reg_rm ), Ity_I64 ));
1420 case 2: {
1421 return unop(Iop_64to16,
1422 IRExpr_Get( offsetIRegE( 8, pfx, mod_reg_rm ), Ity_I64 ));
1424 case 1: {
1425 UInt regNo = eregOfRexRM( pfx, mod_reg_rm );
1426 Bool irregular = !haveREX(pfx) && regNo >= 4 && regNo < 8;
1427 if (irregular) {
1428 return IRExpr_Get( offsetIRegE( 1, pfx, mod_reg_rm ), Ity_I8 );
1429 } else {
1430 return unop(Iop_64to8,
1431 IRExpr_Get( offsetIRegE( 8, pfx, mod_reg_rm ),
1432 Ity_I64 ));
1435 default: {
1436 vpanic("getIRegE");
1441 static
1442 void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
1444 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
1445 if (sz == 4) {
1446 e = unop(Iop_32Uto64,e);
1448 stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) );
1451 static
1452 const HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1454 UInt regNo = eregOfRexRM( pfx, mod_reg_rm );
1455 Bool irregular = sz == 1 && !haveREX(pfx) && regNo >= 4 && regNo < 8;
1456 return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm), irregular );
1460 /*------------------------------------------------------------*/
1461 /*--- For dealing with XMM registers ---*/
1462 /*------------------------------------------------------------*/
1464 static Int ymmGuestRegOffset ( UInt ymmreg )
1466 switch (ymmreg) {
1467 case 0: return OFFB_YMM0;
1468 case 1: return OFFB_YMM1;
1469 case 2: return OFFB_YMM2;
1470 case 3: return OFFB_YMM3;
1471 case 4: return OFFB_YMM4;
1472 case 5: return OFFB_YMM5;
1473 case 6: return OFFB_YMM6;
1474 case 7: return OFFB_YMM7;
1475 case 8: return OFFB_YMM8;
1476 case 9: return OFFB_YMM9;
1477 case 10: return OFFB_YMM10;
1478 case 11: return OFFB_YMM11;
1479 case 12: return OFFB_YMM12;
1480 case 13: return OFFB_YMM13;
1481 case 14: return OFFB_YMM14;
1482 case 15: return OFFB_YMM15;
1483 default: vpanic("ymmGuestRegOffset(amd64)");
1487 static Int xmmGuestRegOffset ( UInt xmmreg )
1489 /* Correct for little-endian host only. */
1490 vassert(host_endness == VexEndnessLE);
1491 return ymmGuestRegOffset( xmmreg );
1494 /* Lanes of vector registers are always numbered from zero being the
1495 least significant lane (rightmost in the register). */
1497 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno )
1499 /* Correct for little-endian host only. */
1500 vassert(host_endness == VexEndnessLE);
1501 vassert(laneno >= 0 && laneno < 8);
1502 return xmmGuestRegOffset( xmmreg ) + 2 * laneno;
1505 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno )
1507 /* Correct for little-endian host only. */
1508 vassert(host_endness == VexEndnessLE);
1509 vassert(laneno >= 0 && laneno < 4);
1510 return xmmGuestRegOffset( xmmreg ) + 4 * laneno;
1513 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno )
1515 /* Correct for little-endian host only. */
1516 vassert(host_endness == VexEndnessLE);
1517 vassert(laneno >= 0 && laneno < 2);
1518 return xmmGuestRegOffset( xmmreg ) + 8 * laneno;
1521 static Int ymmGuestRegLane128offset ( UInt ymmreg, Int laneno )
1523 /* Correct for little-endian host only. */
1524 vassert(host_endness == VexEndnessLE);
1525 vassert(laneno >= 0 && laneno < 2);
1526 return ymmGuestRegOffset( ymmreg ) + 16 * laneno;
1529 static Int ymmGuestRegLane64offset ( UInt ymmreg, Int laneno )
1531 /* Correct for little-endian host only. */
1532 vassert(host_endness == VexEndnessLE);
1533 vassert(laneno >= 0 && laneno < 4);
1534 return ymmGuestRegOffset( ymmreg ) + 8 * laneno;
1537 static Int ymmGuestRegLane32offset ( UInt ymmreg, Int laneno )
1539 /* Correct for little-endian host only. */
1540 vassert(host_endness == VexEndnessLE);
1541 vassert(laneno >= 0 && laneno < 8);
1542 return ymmGuestRegOffset( ymmreg ) + 4 * laneno;
1545 static IRExpr* getXMMReg ( UInt xmmreg )
1547 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 );
1550 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno )
1552 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 );
1555 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno )
1557 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 );
1560 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno )
1562 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 );
1565 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno )
1567 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 );
1570 static IRExpr* getXMMRegLane16 ( UInt xmmreg, Int laneno )
1572 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg,laneno), Ity_I16 );
1575 static void putXMMReg ( UInt xmmreg, IRExpr* e )
1577 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
1578 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) );
1581 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e )
1583 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
1584 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
1587 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e )
1589 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
1590 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
1593 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e )
1595 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
1596 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
1599 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e )
1601 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
1602 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
1605 static IRExpr* getYMMReg ( UInt xmmreg )
1607 return IRExpr_Get( ymmGuestRegOffset(xmmreg), Ity_V256 );
1610 static IRExpr* getYMMRegLane128 ( UInt ymmreg, Int laneno )
1612 return IRExpr_Get( ymmGuestRegLane128offset(ymmreg,laneno), Ity_V128 );
1615 static IRExpr* getYMMRegLane64F ( UInt ymmreg, Int laneno )
1617 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg,laneno), Ity_F64 );
1620 static IRExpr* getYMMRegLane64 ( UInt ymmreg, Int laneno )
1622 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg,laneno), Ity_I64 );
1625 static IRExpr* getYMMRegLane32F ( UInt ymmreg, Int laneno )
1627 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg,laneno), Ity_F32 );
1630 static IRExpr* getYMMRegLane32 ( UInt ymmreg, Int laneno )
1632 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg,laneno), Ity_I32 );
1635 static void putYMMReg ( UInt ymmreg, IRExpr* e )
1637 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V256);
1638 stmt( IRStmt_Put( ymmGuestRegOffset(ymmreg), e ) );
1641 static void putYMMRegLane128 ( UInt ymmreg, Int laneno, IRExpr* e )
1643 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
1644 stmt( IRStmt_Put( ymmGuestRegLane128offset(ymmreg,laneno), e ) );
1647 static void putYMMRegLane64F ( UInt ymmreg, Int laneno, IRExpr* e )
1649 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
1650 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) );
1653 static void putYMMRegLane64 ( UInt ymmreg, Int laneno, IRExpr* e )
1655 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
1656 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) );
1659 static void putYMMRegLane32F ( UInt ymmreg, Int laneno, IRExpr* e )
1661 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
1662 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) );
1665 static void putYMMRegLane32 ( UInt ymmreg, Int laneno, IRExpr* e )
1667 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
1668 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) );
1671 static IRExpr* mkV128 ( UShort mask )
1673 return IRExpr_Const(IRConst_V128(mask));
1676 /* Write the low half of a YMM reg and zero out the upper half. */
1677 static void putYMMRegLoAndZU ( UInt ymmreg, IRExpr* e )
1679 putYMMRegLane128( ymmreg, 0, e );
1680 putYMMRegLane128( ymmreg, 1, mkV128(0) );
1683 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y )
1685 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1);
1686 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1);
1687 return unop(Iop_64to1,
1688 binop(Iop_And64,
1689 unop(Iop_1Uto64,x),
1690 unop(Iop_1Uto64,y)));
1693 /* Generate a compare-and-swap operation, operating on memory at
1694 'addr'. The expected value is 'expVal' and the new value is
1695 'newVal'. If the operation fails, then transfer control (with a
1696 no-redir jump (XXX no -- see comment at top of this file)) to
1697 'restart_point', which is presumably the address of the guest
1698 instruction again -- retrying, essentially. */
1699 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
1700 Addr64 restart_point )
1702 IRCAS* cas;
1703 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal);
1704 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal);
1705 IRTemp oldTmp = newTemp(tyE);
1706 IRTemp expTmp = newTemp(tyE);
1707 vassert(tyE == tyN);
1708 vassert(tyE == Ity_I64 || tyE == Ity_I32
1709 || tyE == Ity_I16 || tyE == Ity_I8);
1710 assign(expTmp, expVal);
1711 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
1712 NULL, mkexpr(expTmp), NULL, newVal );
1713 stmt( IRStmt_CAS(cas) );
1714 stmt( IRStmt_Exit(
1715 binop( mkSizedOp(tyE,Iop_CasCmpNE8),
1716 mkexpr(oldTmp), mkexpr(expTmp) ),
1717 Ijk_Boring, /*Ijk_NoRedir*/
1718 IRConst_U64( restart_point ),
1719 OFFB_RIP
1724 /*------------------------------------------------------------*/
1725 /*--- Helpers for %rflags. ---*/
1726 /*------------------------------------------------------------*/
1728 /* -------------- Evaluating the flags-thunk. -------------- */
1730 /* Build IR to calculate all the eflags from stored
1731 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1732 Ity_I64. */
1733 static IRExpr* mk_amd64g_calculate_rflags_all ( void )
1735 IRExpr** args
1736 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1737 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1738 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1739 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1740 IRExpr* call
1741 = mkIRExprCCall(
1742 Ity_I64,
1743 0/*regparm*/,
1744 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all,
1745 args
1747 /* Exclude OP and NDEP from definedness checking. We're only
1748 interested in DEP1 and DEP2. */
1749 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1750 return call;
1753 /* Build IR to calculate some particular condition from stored
1754 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1755 Ity_Bit. */
1756 static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond )
1758 IRExpr** args
1759 = mkIRExprVec_5( mkU64(cond),
1760 IRExpr_Get(OFFB_CC_OP, Ity_I64),
1761 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1762 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1763 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1764 IRExpr* call
1765 = mkIRExprCCall(
1766 Ity_I64,
1767 0/*regparm*/,
1768 "amd64g_calculate_condition", &amd64g_calculate_condition,
1769 args
1771 /* Exclude the requested condition, OP and NDEP from definedness
1772 checking. We're only interested in DEP1 and DEP2. */
1773 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4);
1774 return unop(Iop_64to1, call);
1777 /* Build IR to calculate just the carry flag from stored
1778 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */
1779 static IRExpr* mk_amd64g_calculate_rflags_c ( void )
1781 IRExpr** args
1782 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1783 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1784 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1785 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1786 IRExpr* call
1787 = mkIRExprCCall(
1788 Ity_I64,
1789 0/*regparm*/,
1790 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c,
1791 args
1793 /* Exclude OP and NDEP from definedness checking. We're only
1794 interested in DEP1 and DEP2. */
1795 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1796 return call;
1800 /* -------------- Building the flags-thunk. -------------- */
1802 /* The machinery in this section builds the flag-thunk following a
1803 flag-setting operation. Hence the various setFlags_* functions.
1806 static Bool isAddSub ( IROp op8 )
1808 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8);
1811 static Bool isLogic ( IROp op8 )
1813 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8);
1816 /* U-widen 1/8/16/32/64 bit int expr to 64. */
1817 static IRExpr* widenUto64 ( IRExpr* e )
1819 switch (typeOfIRExpr(irsb->tyenv,e)) {
1820 case Ity_I64: return e;
1821 case Ity_I32: return unop(Iop_32Uto64, e);
1822 case Ity_I16: return unop(Iop_16Uto64, e);
1823 case Ity_I8: return unop(Iop_8Uto64, e);
1824 case Ity_I1: return unop(Iop_1Uto64, e);
1825 default: vpanic("widenUto64");
1829 /* S-widen 8/16/32/64 bit int expr to 32. */
1830 static IRExpr* widenSto64 ( IRExpr* e )
1832 switch (typeOfIRExpr(irsb->tyenv,e)) {
1833 case Ity_I64: return e;
1834 case Ity_I32: return unop(Iop_32Sto64, e);
1835 case Ity_I16: return unop(Iop_16Sto64, e);
1836 case Ity_I8: return unop(Iop_8Sto64, e);
1837 default: vpanic("widenSto64");
1841 /* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some
1842 of these combinations make sense. */
1843 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e )
1845 IRType src_ty = typeOfIRExpr(irsb->tyenv,e);
1846 if (src_ty == dst_ty)
1847 return e;
1848 if (src_ty == Ity_I32 && dst_ty == Ity_I16)
1849 return unop(Iop_32to16, e);
1850 if (src_ty == Ity_I32 && dst_ty == Ity_I8)
1851 return unop(Iop_32to8, e);
1852 if (src_ty == Ity_I64 && dst_ty == Ity_I32)
1853 return unop(Iop_64to32, e);
1854 if (src_ty == Ity_I64 && dst_ty == Ity_I16)
1855 return unop(Iop_64to16, e);
1856 if (src_ty == Ity_I64 && dst_ty == Ity_I8)
1857 return unop(Iop_64to8, e);
1859 vex_printf("\nsrc, dst tys are: ");
1860 ppIRType(src_ty);
1861 vex_printf(", ");
1862 ppIRType(dst_ty);
1863 vex_printf("\n");
1864 vpanic("narrowTo(amd64)");
1868 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
1869 auto-sized up to the real op. */
1871 static
1872 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty )
1874 Int ccOp = 0;
1875 switch (ty) {
1876 case Ity_I8: ccOp = 0; break;
1877 case Ity_I16: ccOp = 1; break;
1878 case Ity_I32: ccOp = 2; break;
1879 case Ity_I64: ccOp = 3; break;
1880 default: vassert(0);
1882 switch (op8) {
1883 case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB; break;
1884 case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB; break;
1885 default: ppIROp(op8);
1886 vpanic("setFlags_DEP1_DEP2(amd64)");
1888 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1889 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
1890 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) );
1891 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
1895 /* Set the OP and DEP1 fields only, and write zero to DEP2. */
1897 static
1898 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty )
1900 Int ccOp = 0;
1901 switch (ty) {
1902 case Ity_I8: ccOp = 0; break;
1903 case Ity_I16: ccOp = 1; break;
1904 case Ity_I32: ccOp = 2; break;
1905 case Ity_I64: ccOp = 3; break;
1906 default: vassert(0);
1908 switch (op8) {
1909 case Iop_Or8:
1910 case Iop_And8:
1911 case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break;
1912 default: ppIROp(op8);
1913 vpanic("setFlags_DEP1(amd64)");
1915 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1916 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
1917 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
1918 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
1922 /* For shift operations, we put in the result and the undershifted
1923 result. Except if the shift amount is zero, the thunk is left
1924 unchanged. */
1926 static void setFlags_DEP1_DEP2_shift ( IROp op64,
1927 IRTemp res,
1928 IRTemp resUS,
1929 IRType ty,
1930 IRTemp guard )
1932 Int ccOp = 0;
1933 switch (ty) {
1934 case Ity_I8: ccOp = 0; break;
1935 case Ity_I16: ccOp = 1; break;
1936 case Ity_I32: ccOp = 2; break;
1937 case Ity_I64: ccOp = 3; break;
1938 default: vassert(0);
1941 vassert(guard);
1943 /* Both kinds of right shifts are handled by the same thunk
1944 operation. */
1945 switch (op64) {
1946 case Iop_Shr64:
1947 case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break;
1948 case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break;
1949 default: ppIROp(op64);
1950 vpanic("setFlags_DEP1_DEP2_shift(amd64)");
1953 /* guard :: Ity_I8. We need to convert it to I1. */
1954 IRTemp guardB = newTemp(Ity_I1);
1955 assign( guardB, binop(Iop_CmpNE8, mkexpr(guard), mkU8(0)) );
1957 /* DEP1 contains the result, DEP2 contains the undershifted value. */
1958 stmt( IRStmt_Put( OFFB_CC_OP,
1959 IRExpr_ITE( mkexpr(guardB),
1960 mkU64(ccOp),
1961 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) ));
1962 stmt( IRStmt_Put( OFFB_CC_DEP1,
1963 IRExpr_ITE( mkexpr(guardB),
1964 widenUto64(mkexpr(res)),
1965 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) ));
1966 stmt( IRStmt_Put( OFFB_CC_DEP2,
1967 IRExpr_ITE( mkexpr(guardB),
1968 widenUto64(mkexpr(resUS)),
1969 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) ));
1970 stmt( IRStmt_Put( OFFB_CC_NDEP,
1971 mkU64(0) ));
1975 /* For the inc/dec case, we store in DEP1 the result value and in NDEP
1976 the former value of the carry flag, which unfortunately we have to
1977 compute. */
1979 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty )
1981 Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB;
1983 switch (ty) {
1984 case Ity_I8: ccOp += 0; break;
1985 case Ity_I16: ccOp += 1; break;
1986 case Ity_I32: ccOp += 2; break;
1987 case Ity_I64: ccOp += 3; break;
1988 default: vassert(0);
1991 /* This has to come first, because calculating the C flag
1992 may require reading all four thunk fields. */
1993 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) );
1994 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1995 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) );
1996 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
2000 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
2001 two arguments. */
2003 static
2004 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op )
2006 switch (ty) {
2007 case Ity_I8:
2008 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) );
2009 break;
2010 case Ity_I16:
2011 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) );
2012 break;
2013 case Ity_I32:
2014 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) );
2015 break;
2016 case Ity_I64:
2017 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) );
2018 break;
2019 default:
2020 vpanic("setFlags_MUL(amd64)");
2022 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) ));
2023 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) ));
2024 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
2028 /* -------------- Condition codes. -------------- */
2030 /* Condition codes, using the AMD encoding. */
2032 static const HChar* name_AMD64Condcode ( AMD64Condcode cond )
2034 switch (cond) {
2035 case AMD64CondO: return "o";
2036 case AMD64CondNO: return "no";
2037 case AMD64CondB: return "b";
2038 case AMD64CondNB: return "ae"; /*"nb";*/
2039 case AMD64CondZ: return "e"; /*"z";*/
2040 case AMD64CondNZ: return "ne"; /*"nz";*/
2041 case AMD64CondBE: return "be";
2042 case AMD64CondNBE: return "a"; /*"nbe";*/
2043 case AMD64CondS: return "s";
2044 case AMD64CondNS: return "ns";
2045 case AMD64CondP: return "p";
2046 case AMD64CondNP: return "np";
2047 case AMD64CondL: return "l";
2048 case AMD64CondNL: return "ge"; /*"nl";*/
2049 case AMD64CondLE: return "le";
2050 case AMD64CondNLE: return "g"; /*"nle";*/
2051 case AMD64CondAlways: return "ALWAYS";
2052 default: vpanic("name_AMD64Condcode");
2056 static
2057 AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode cond,
2058 /*OUT*/Bool* needInvert )
2060 vassert(cond >= AMD64CondO && cond <= AMD64CondNLE);
2061 if (cond & 1) {
2062 *needInvert = True;
2063 return cond-1;
2064 } else {
2065 *needInvert = False;
2066 return cond;
2071 /* -------------- Helpers for ADD/SUB with carry. -------------- */
2073 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
2074 appropriately.
2076 Optionally, generate a store for the 'tres' value. This can either
2077 be a normal store, or it can be a cas-with-possible-failure style
2078 store:
2080 if taddr is IRTemp_INVALID, then no store is generated.
2082 if taddr is not IRTemp_INVALID, then a store (using taddr as
2083 the address) is generated:
2085 if texpVal is IRTemp_INVALID then a normal store is
2086 generated, and restart_point must be zero (it is irrelevant).
2088 if texpVal is not IRTemp_INVALID then a cas-style store is
2089 generated. texpVal is the expected value, restart_point
2090 is the restart point if the store fails, and texpVal must
2091 have the same type as tres.
2094 static void helper_ADC ( Int sz,
2095 IRTemp tres, IRTemp ta1, IRTemp ta2,
2096 /* info about optional store: */
2097 IRTemp taddr, IRTemp texpVal, Addr64 restart_point )
2099 UInt thunkOp;
2100 IRType ty = szToITy(sz);
2101 IRTemp oldc = newTemp(Ity_I64);
2102 IRTemp oldcn = newTemp(ty);
2103 IROp plus = mkSizedOp(ty, Iop_Add8);
2104 IROp xor = mkSizedOp(ty, Iop_Xor8);
2106 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
2108 switch (sz) {
2109 case 8: thunkOp = AMD64G_CC_OP_ADCQ; break;
2110 case 4: thunkOp = AMD64G_CC_OP_ADCL; break;
2111 case 2: thunkOp = AMD64G_CC_OP_ADCW; break;
2112 case 1: thunkOp = AMD64G_CC_OP_ADCB; break;
2113 default: vassert(0);
2116 /* oldc = old carry flag, 0 or 1 */
2117 assign( oldc, binop(Iop_And64,
2118 mk_amd64g_calculate_rflags_c(),
2119 mkU64(1)) );
2121 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
2123 assign( tres, binop(plus,
2124 binop(plus,mkexpr(ta1),mkexpr(ta2)),
2125 mkexpr(oldcn)) );
2127 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2128 start of this function. */
2129 if (taddr != IRTemp_INVALID) {
2130 if (texpVal == IRTemp_INVALID) {
2131 vassert(restart_point == 0);
2132 storeLE( mkexpr(taddr), mkexpr(tres) );
2133 } else {
2134 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
2135 /* .. and hence 'texpVal' has the same type as 'tres'. */
2136 casLE( mkexpr(taddr),
2137 mkexpr(texpVal), mkexpr(tres), restart_point );
2141 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
2142 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) ));
2143 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
2144 mkexpr(oldcn)) )) );
2145 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
2149 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
2150 appropriately. As with helper_ADC, possibly generate a store of
2151 the result -- see comments on helper_ADC for details.
2153 static void helper_SBB ( Int sz,
2154 IRTemp tres, IRTemp ta1, IRTemp ta2,
2155 /* info about optional store: */
2156 IRTemp taddr, IRTemp texpVal, Addr64 restart_point )
2158 UInt thunkOp;
2159 IRType ty = szToITy(sz);
2160 IRTemp oldc = newTemp(Ity_I64);
2161 IRTemp oldcn = newTemp(ty);
2162 IROp minus = mkSizedOp(ty, Iop_Sub8);
2163 IROp xor = mkSizedOp(ty, Iop_Xor8);
2165 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
2167 switch (sz) {
2168 case 8: thunkOp = AMD64G_CC_OP_SBBQ; break;
2169 case 4: thunkOp = AMD64G_CC_OP_SBBL; break;
2170 case 2: thunkOp = AMD64G_CC_OP_SBBW; break;
2171 case 1: thunkOp = AMD64G_CC_OP_SBBB; break;
2172 default: vassert(0);
2175 /* oldc = old carry flag, 0 or 1 */
2176 assign( oldc, binop(Iop_And64,
2177 mk_amd64g_calculate_rflags_c(),
2178 mkU64(1)) );
2180 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
2182 assign( tres, binop(minus,
2183 binop(minus,mkexpr(ta1),mkexpr(ta2)),
2184 mkexpr(oldcn)) );
2186 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2187 start of this function. */
2188 if (taddr != IRTemp_INVALID) {
2189 if (texpVal == IRTemp_INVALID) {
2190 vassert(restart_point == 0);
2191 storeLE( mkexpr(taddr), mkexpr(tres) );
2192 } else {
2193 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
2194 /* .. and hence 'texpVal' has the same type as 'tres'. */
2195 casLE( mkexpr(taddr),
2196 mkexpr(texpVal), mkexpr(tres), restart_point );
2200 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
2201 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) );
2202 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
2203 mkexpr(oldcn)) )) );
2204 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
2208 /* Given ta1, ta2 and tres, compute tres = ADCX(ta1,ta2) or tres = ADOX(ta1,ta2)
2209 and set flags appropriately.
2211 static void helper_ADCX_ADOX ( Bool isADCX, Int sz,
2212 IRTemp tres, IRTemp ta1, IRTemp ta2 )
2214 UInt thunkOp;
2215 IRType ty = szToITy(sz);
2216 IRTemp oldflags = newTemp(Ity_I64);
2217 IRTemp oldOC = newTemp(Ity_I64); // old O or C flag
2218 IRTemp oldOCn = newTemp(ty); // old O or C flag, narrowed
2219 IROp plus = mkSizedOp(ty, Iop_Add8);
2220 IROp xor = mkSizedOp(ty, Iop_Xor8);
2222 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
2224 switch (sz) {
2225 case 8: thunkOp = isADCX ? AMD64G_CC_OP_ADCX64
2226 : AMD64G_CC_OP_ADOX64; break;
2227 case 4: thunkOp = isADCX ? AMD64G_CC_OP_ADCX32
2228 : AMD64G_CC_OP_ADOX32; break;
2229 default: vassert(0);
2232 assign( oldflags, mk_amd64g_calculate_rflags_all() );
2234 /* oldOC = old overflow/carry flag, 0 or 1 */
2235 assign( oldOC, binop(Iop_And64,
2236 binop(Iop_Shr64,
2237 mkexpr(oldflags),
2238 mkU8(isADCX ? AMD64G_CC_SHIFT_C
2239 : AMD64G_CC_SHIFT_O)),
2240 mkU64(1)) );
2242 assign( oldOCn, narrowTo(ty, mkexpr(oldOC)) );
2244 assign( tres, binop(plus,
2245 binop(plus,mkexpr(ta1),mkexpr(ta2)),
2246 mkexpr(oldOCn)) );
2248 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
2249 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) ));
2250 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
2251 mkexpr(oldOCn)) )) );
2252 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldflags) ) );
2256 /* -------------- Helpers for disassembly printing. -------------- */
2258 static const HChar* nameGrp1 ( Int opc_aux )
2260 static const HChar* grp1_names[8]
2261 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
2262 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)");
2263 return grp1_names[opc_aux];
2266 static const HChar* nameGrp2 ( Int opc_aux )
2268 static const HChar* grp2_names[8]
2269 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
2270 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)");
2271 return grp2_names[opc_aux];
2274 static const HChar* nameGrp4 ( Int opc_aux )
2276 static const HChar* grp4_names[8]
2277 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
2278 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)");
2279 return grp4_names[opc_aux];
2282 static const HChar* nameGrp5 ( Int opc_aux )
2284 static const HChar* grp5_names[8]
2285 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
2286 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)");
2287 return grp5_names[opc_aux];
2290 static const HChar* nameGrp8 ( Int opc_aux )
2292 static const HChar* grp8_names[8]
2293 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
2294 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)");
2295 return grp8_names[opc_aux];
2298 static const HChar* nameSReg ( UInt sreg )
2300 switch (sreg) {
2301 case R_ES: return "%es";
2302 case R_CS: return "%cs";
2303 case R_SS: return "%ss";
2304 case R_DS: return "%ds";
2305 case R_FS: return "%fs";
2306 case R_GS: return "%gs";
2307 default: vpanic("nameSReg(amd64)");
2311 static const HChar* nameMMXReg ( Int mmxreg )
2313 static const HChar* mmx_names[8]
2314 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
2315 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)");
2316 return mmx_names[mmxreg];
2319 static const HChar* nameXMMReg ( Int xmmreg )
2321 static const HChar* xmm_names[16]
2322 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
2323 "%xmm4", "%xmm5", "%xmm6", "%xmm7",
2324 "%xmm8", "%xmm9", "%xmm10", "%xmm11",
2325 "%xmm12", "%xmm13", "%xmm14", "%xmm15" };
2326 if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)");
2327 return xmm_names[xmmreg];
2330 static const HChar* nameMMXGran ( Int gran )
2332 switch (gran) {
2333 case 0: return "b";
2334 case 1: return "w";
2335 case 2: return "d";
2336 case 3: return "q";
2337 default: vpanic("nameMMXGran(amd64,guest)");
2341 static HChar nameISize ( Int size )
2343 switch (size) {
2344 case 8: return 'q';
2345 case 4: return 'l';
2346 case 2: return 'w';
2347 case 1: return 'b';
2348 default: vpanic("nameISize(amd64)");
2352 static const HChar* nameYMMReg ( Int ymmreg )
2354 static const HChar* ymm_names[16]
2355 = { "%ymm0", "%ymm1", "%ymm2", "%ymm3",
2356 "%ymm4", "%ymm5", "%ymm6", "%ymm7",
2357 "%ymm8", "%ymm9", "%ymm10", "%ymm11",
2358 "%ymm12", "%ymm13", "%ymm14", "%ymm15" };
2359 if (ymmreg < 0 || ymmreg > 15) vpanic("nameYMMReg(amd64)");
2360 return ymm_names[ymmreg];
2364 /*------------------------------------------------------------*/
2365 /*--- JMP helpers ---*/
2366 /*------------------------------------------------------------*/
2368 static void jmp_lit( /*MOD*/DisResult* dres,
2369 IRJumpKind kind, Addr64 d64 )
2371 vassert(dres->whatNext == Dis_Continue);
2372 vassert(dres->len == 0);
2373 vassert(dres->jk_StopHere == Ijk_INVALID);
2374 dres->whatNext = Dis_StopHere;
2375 dres->jk_StopHere = kind;
2376 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64) ) );
2379 static void jmp_treg( /*MOD*/DisResult* dres,
2380 IRJumpKind kind, IRTemp t )
2382 vassert(dres->whatNext == Dis_Continue);
2383 vassert(dres->len == 0);
2384 vassert(dres->jk_StopHere == Ijk_INVALID);
2385 dres->whatNext = Dis_StopHere;
2386 dres->jk_StopHere = kind;
2387 stmt( IRStmt_Put( OFFB_RIP, mkexpr(t) ) );
2390 static
2391 void jcc_01 ( /*MOD*/DisResult* dres,
2392 AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true )
2394 Bool invert;
2395 AMD64Condcode condPos;
2396 vassert(dres->whatNext == Dis_Continue);
2397 vassert(dres->len == 0);
2398 vassert(dres->jk_StopHere == Ijk_INVALID);
2399 dres->whatNext = Dis_StopHere;
2400 dres->jk_StopHere = Ijk_Boring;
2401 condPos = positiveIse_AMD64Condcode ( cond, &invert );
2402 if (invert) {
2403 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
2404 Ijk_Boring,
2405 IRConst_U64(d64_false),
2406 OFFB_RIP ) );
2407 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_true) ) );
2408 } else {
2409 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
2410 Ijk_Boring,
2411 IRConst_U64(d64_true),
2412 OFFB_RIP ) );
2413 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_false) ) );
2417 /* Let new_rsp be the %rsp value after a call/return. Let nia be the
2418 guest address of the next instruction to be executed.
2420 This function generates an AbiHint to say that -128(%rsp)
2421 .. -1(%rsp) should now be regarded as uninitialised.
2423 static
2424 void make_redzone_AbiHint ( const VexAbiInfo* vbi,
2425 IRTemp new_rsp, IRTemp nia, const HChar* who )
2427 Int szB = vbi->guest_stack_redzone_size;
2428 vassert(szB >= 0);
2430 /* A bit of a kludge. Currently the only AbI we've guested AMD64
2431 for is ELF. So just check it's the expected 128 value
2432 (paranoia). */
2433 vassert(szB == 128);
2435 if (0) vex_printf("AbiHint: %s\n", who);
2436 vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64);
2437 vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64);
2438 if (szB > 0)
2439 stmt( IRStmt_AbiHint(
2440 binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)),
2441 szB,
2442 mkexpr(nia)
2447 /*------------------------------------------------------------*/
2448 /*--- Disassembling addressing modes ---*/
2449 /*------------------------------------------------------------*/
2451 static
2452 const HChar* segRegTxt ( Prefix pfx )
2454 if (pfx & PFX_CS) return "%cs:";
2455 if (pfx & PFX_DS) return "%ds:";
2456 if (pfx & PFX_ES) return "%es:";
2457 if (pfx & PFX_FS) return "%fs:";
2458 if (pfx & PFX_GS) return "%gs:";
2459 if (pfx & PFX_SS) return "%ss:";
2460 return ""; /* no override */
2464 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
2465 linear address by adding any required segment override as indicated
2466 by sorb, and also dealing with any address size override
2467 present. */
2468 static
2469 IRExpr* handleAddrOverrides ( const VexAbiInfo* vbi,
2470 Prefix pfx, IRExpr* virtual )
2472 /* --- address size override --- */
2473 if (haveASO(pfx))
2474 virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual));
2476 /* Note that the below are hacks that relies on the assumption
2477 that %fs or %gs are constant.
2478 Typically, %fs is always 0x63 on linux (in the main thread, it
2479 stays at value 0), %gs always 0x60 on Darwin, ... */
2480 /* --- segment overrides --- */
2481 if (pfx & PFX_FS) {
2482 if (vbi->guest_amd64_assume_fs_is_const) {
2483 /* return virtual + guest_FS_CONST. */
2484 virtual = binop(Iop_Add64, virtual,
2485 IRExpr_Get(OFFB_FS_CONST, Ity_I64));
2486 } else {
2487 unimplemented("amd64 %fs segment override");
2491 if (pfx & PFX_GS) {
2492 if (vbi->guest_amd64_assume_gs_is_const) {
2493 /* return virtual + guest_GS_CONST. */
2494 virtual = binop(Iop_Add64, virtual,
2495 IRExpr_Get(OFFB_GS_CONST, Ity_I64));
2496 } else {
2497 unimplemented("amd64 %gs segment override");
2501 /* cs, ds, es and ss are simply ignored in 64-bit mode. */
2503 return virtual;
2506 //.. {
2507 //.. Int sreg;
2508 //.. IRType hWordTy;
2509 //.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
2510 //..
2511 //.. if (sorb == 0)
2512 //.. /* the common case - no override */
2513 //.. return virtual;
2514 //..
2515 //.. switch (sorb) {
2516 //.. case 0x3E: sreg = R_DS; break;
2517 //.. case 0x26: sreg = R_ES; break;
2518 //.. case 0x64: sreg = R_FS; break;
2519 //.. case 0x65: sreg = R_GS; break;
2520 //.. default: vpanic("handleAddrOverrides(x86,guest)");
2521 //.. }
2522 //..
2523 //.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
2524 //..
2525 //.. seg_selector = newTemp(Ity_I32);
2526 //.. ldt_ptr = newTemp(hWordTy);
2527 //.. gdt_ptr = newTemp(hWordTy);
2528 //.. r64 = newTemp(Ity_I64);
2529 //..
2530 //.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
2531 //.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
2532 //.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
2533 //..
2534 //.. /*
2535 //.. Call this to do the translation and limit checks:
2536 //.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2537 //.. UInt seg_selector, UInt virtual_addr )
2538 //.. */
2539 //.. assign(
2540 //.. r64,
2541 //.. mkIRExprCCall(
2542 //.. Ity_I64,
2543 //.. 0/*regparms*/,
2544 //.. "x86g_use_seg_selector",
2545 //.. &x86g_use_seg_selector,
2546 //.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
2547 //.. mkexpr(seg_selector), virtual)
2548 //.. )
2549 //.. );
2550 //..
2551 //.. /* If the high 32 of the result are non-zero, there was a
2552 //.. failure in address translation. In which case, make a
2553 //.. quick exit.
2554 //.. */
2555 //.. stmt(
2556 //.. IRStmt_Exit(
2557 //.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
2558 //.. Ijk_MapFail,
2559 //.. IRConst_U32( guest_eip_curr_instr )
2560 //.. )
2561 //.. );
2562 //..
2563 //.. /* otherwise, here's the translated result. */
2564 //.. return unop(Iop_64to32, mkexpr(r64));
2565 //.. }
2568 /* Generate IR to calculate an address indicated by a ModRM and
2569 following SIB bytes. The expression, and the number of bytes in
2570 the address mode, are returned (the latter in *len). Note that
2571 this fn should not be called if the R/M part of the address denotes
2572 a register instead of memory. If print_codegen is true, text of
2573 the addressing mode is placed in buf.
2575 The computed address is stored in a new tempreg, and the
2576 identity of the tempreg is returned.
2578 extra_bytes holds the number of bytes after the amode, as supplied
2579 by the caller. This is needed to make sense of %rip-relative
2580 addresses. Note that the value that *len is set to is only the
2581 length of the amode itself and does not include the value supplied
2582 in extra_bytes.
2585 static IRTemp disAMode_copy2tmp ( IRExpr* addr64 )
2587 IRTemp tmp = newTemp(Ity_I64);
2588 assign( tmp, addr64 );
2589 return tmp;
2592 static
2593 IRTemp disAMode ( /*OUT*/Int* len,
2594 const VexAbiInfo* vbi, Prefix pfx, Long delta,
2595 /*OUT*/HChar* buf, Int extra_bytes )
2597 UChar mod_reg_rm = getUChar(delta);
2598 delta++;
2600 buf[0] = (UChar)0;
2601 vassert(extra_bytes >= 0 && extra_bytes < 10);
2603 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2604 jump table seems a bit excessive.
2606 mod_reg_rm &= 0xC7; /* is now XX000YYY */
2607 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
2608 /* is now XX0XXYYY */
2609 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
2610 switch (mod_reg_rm) {
2612 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2613 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2615 case 0x00: case 0x01: case 0x02: case 0x03:
2616 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
2617 { UChar rm = toUChar(mod_reg_rm & 7);
2618 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
2619 *len = 1;
2620 return disAMode_copy2tmp(
2621 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm)));
2624 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2625 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2627 case 0x08: case 0x09: case 0x0A: case 0x0B:
2628 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
2629 { UChar rm = toUChar(mod_reg_rm & 7);
2630 Long d = getSDisp8(delta);
2631 if (d == 0) {
2632 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
2633 } else {
2634 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
2636 *len = 2;
2637 return disAMode_copy2tmp(
2638 handleAddrOverrides(vbi, pfx,
2639 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
2642 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2643 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2645 case 0x10: case 0x11: case 0x12: case 0x13:
2646 /* ! 14 */ case 0x15: case 0x16: case 0x17:
2647 { UChar rm = toUChar(mod_reg_rm & 7);
2648 Long d = getSDisp32(delta);
2649 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
2650 *len = 5;
2651 return disAMode_copy2tmp(
2652 handleAddrOverrides(vbi, pfx,
2653 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
2656 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2657 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2658 case 0x18: case 0x19: case 0x1A: case 0x1B:
2659 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2660 vpanic("disAMode(amd64): not an addr!");
2662 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set
2663 correctly at the start of handling each instruction. */
2664 case 0x05:
2665 { Long d = getSDisp32(delta);
2666 *len = 5;
2667 DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d);
2668 /* We need to know the next instruction's start address.
2669 Try and figure out what it is, record the guess, and ask
2670 the top-level driver logic (bbToIR_AMD64) to check we
2671 guessed right, after the instruction is completely
2672 decoded. */
2673 guest_RIP_next_mustcheck = True;
2674 guest_RIP_next_assumed = guest_RIP_bbstart
2675 + delta+4 + extra_bytes;
2676 return disAMode_copy2tmp(
2677 handleAddrOverrides(vbi, pfx,
2678 binop(Iop_Add64, mkU64(guest_RIP_next_assumed),
2679 mkU64(d))));
2682 case 0x04: {
2683 /* SIB, with no displacement. Special cases:
2684 -- %rsp cannot act as an index value.
2685 If index_r indicates %rsp, zero is used for the index.
2686 -- when mod is zero and base indicates RBP or R13, base is
2687 instead a 32-bit sign-extended literal.
2688 It's all madness, I tell you. Extract %index, %base and
2689 scale from the SIB byte. The value denoted is then:
2690 | %index == %RSP && (%base == %RBP || %base == %R13)
2691 = d32 following SIB byte
2692 | %index == %RSP && !(%base == %RBP || %base == %R13)
2693 = %base
2694 | %index != %RSP && (%base == %RBP || %base == %R13)
2695 = d32 following SIB byte + (%index << scale)
2696 | %index != %RSP && !(%base == %RBP || %base == %R13)
2697 = %base + (%index << scale)
2699 UChar sib = getUChar(delta);
2700 UChar scale = toUChar((sib >> 6) & 3);
2701 UChar index_r = toUChar((sib >> 3) & 7);
2702 UChar base_r = toUChar(sib & 7);
2703 /* correct since #(R13) == 8 + #(RBP) */
2704 Bool base_is_BPor13 = toBool(base_r == R_RBP);
2705 Bool index_is_SP = toBool(index_r == R_RSP && 0==getRexX(pfx));
2706 delta++;
2708 if ((!index_is_SP) && (!base_is_BPor13)) {
2709 if (scale == 0) {
2710 DIS(buf, "%s(%s,%s)", segRegTxt(pfx),
2711 nameIRegRexB(8,pfx,base_r),
2712 nameIReg64rexX(pfx,index_r));
2713 } else {
2714 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx),
2715 nameIRegRexB(8,pfx,base_r),
2716 nameIReg64rexX(pfx,index_r), 1<<scale);
2718 *len = 2;
2719 return
2720 disAMode_copy2tmp(
2721 handleAddrOverrides(vbi, pfx,
2722 binop(Iop_Add64,
2723 getIRegRexB(8,pfx,base_r),
2724 binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
2725 mkU8(scale)))));
2728 if ((!index_is_SP) && base_is_BPor13) {
2729 Long d = getSDisp32(delta);
2730 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d,
2731 nameIReg64rexX(pfx,index_r), 1<<scale);
2732 *len = 6;
2733 return
2734 disAMode_copy2tmp(
2735 handleAddrOverrides(vbi, pfx,
2736 binop(Iop_Add64,
2737 binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
2738 mkU8(scale)),
2739 mkU64(d))));
2742 if (index_is_SP && (!base_is_BPor13)) {
2743 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r));
2744 *len = 2;
2745 return disAMode_copy2tmp(
2746 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,base_r)));
2749 if (index_is_SP && base_is_BPor13) {
2750 Long d = getSDisp32(delta);
2751 DIS(buf, "%s%lld", segRegTxt(pfx), d);
2752 *len = 6;
2753 return disAMode_copy2tmp(
2754 handleAddrOverrides(vbi, pfx, mkU64(d)));
2757 vassert(0);
2760 /* SIB, with 8-bit displacement. Special cases:
2761 -- %esp cannot act as an index value.
2762 If index_r indicates %esp, zero is used for the index.
2763 Denoted value is:
2764 | %index == %ESP
2765 = d8 + %base
2766 | %index != %ESP
2767 = d8 + %base + (%index << scale)
2769 case 0x0C: {
2770 UChar sib = getUChar(delta);
2771 UChar scale = toUChar((sib >> 6) & 3);
2772 UChar index_r = toUChar((sib >> 3) & 7);
2773 UChar base_r = toUChar(sib & 7);
2774 Long d = getSDisp8(delta+1);
2776 if (index_r == R_RSP && 0==getRexX(pfx)) {
2777 DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
2778 d, nameIRegRexB(8,pfx,base_r));
2779 *len = 3;
2780 return disAMode_copy2tmp(
2781 handleAddrOverrides(vbi, pfx,
2782 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
2783 } else {
2784 if (scale == 0) {
2785 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
2786 nameIRegRexB(8,pfx,base_r),
2787 nameIReg64rexX(pfx,index_r));
2788 } else {
2789 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
2790 nameIRegRexB(8,pfx,base_r),
2791 nameIReg64rexX(pfx,index_r), 1<<scale);
2793 *len = 3;
2794 return
2795 disAMode_copy2tmp(
2796 handleAddrOverrides(vbi, pfx,
2797 binop(Iop_Add64,
2798 binop(Iop_Add64,
2799 getIRegRexB(8,pfx,base_r),
2800 binop(Iop_Shl64,
2801 getIReg64rexX(pfx,index_r), mkU8(scale))),
2802 mkU64(d))));
2804 vassert(0); /*NOTREACHED*/
2807 /* SIB, with 32-bit displacement. Special cases:
2808 -- %rsp cannot act as an index value.
2809 If index_r indicates %rsp, zero is used for the index.
2810 Denoted value is:
2811 | %index == %RSP
2812 = d32 + %base
2813 | %index != %RSP
2814 = d32 + %base + (%index << scale)
2816 case 0x14: {
2817 UChar sib = getUChar(delta);
2818 UChar scale = toUChar((sib >> 6) & 3);
2819 UChar index_r = toUChar((sib >> 3) & 7);
2820 UChar base_r = toUChar(sib & 7);
2821 Long d = getSDisp32(delta+1);
2823 if (index_r == R_RSP && 0==getRexX(pfx)) {
2824 DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
2825 d, nameIRegRexB(8,pfx,base_r));
2826 *len = 6;
2827 return disAMode_copy2tmp(
2828 handleAddrOverrides(vbi, pfx,
2829 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
2830 } else {
2831 if (scale == 0) {
2832 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
2833 nameIRegRexB(8,pfx,base_r),
2834 nameIReg64rexX(pfx,index_r));
2835 } else {
2836 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
2837 nameIRegRexB(8,pfx,base_r),
2838 nameIReg64rexX(pfx,index_r), 1<<scale);
2840 *len = 6;
2841 return
2842 disAMode_copy2tmp(
2843 handleAddrOverrides(vbi, pfx,
2844 binop(Iop_Add64,
2845 binop(Iop_Add64,
2846 getIRegRexB(8,pfx,base_r),
2847 binop(Iop_Shl64,
2848 getIReg64rexX(pfx,index_r), mkU8(scale))),
2849 mkU64(d))));
2851 vassert(0); /*NOTREACHED*/
2854 default:
2855 vpanic("disAMode(amd64)");
2856 return 0; /*notreached*/
2861 /* Similarly for VSIB addressing. This returns just the addend,
2862 and fills in *rI and *vscale with the register number of the vector
2863 index and its multiplicand. */
2864 static
2865 IRTemp disAVSIBMode ( /*OUT*/Int* len,
2866 const VexAbiInfo* vbi, Prefix pfx, Long delta,
2867 /*OUT*/HChar* buf, /*OUT*/UInt* rI,
2868 IRType ty, /*OUT*/Int* vscale )
2870 UChar mod_reg_rm = getUChar(delta);
2871 const HChar *vindex;
2873 *len = 0;
2874 *rI = 0;
2875 *vscale = 0;
2876 buf[0] = (UChar)0;
2877 if ((mod_reg_rm & 7) != 4 || epartIsReg(mod_reg_rm))
2878 return IRTemp_INVALID;
2880 UChar sib = getUChar(delta+1);
2881 UChar scale = toUChar((sib >> 6) & 3);
2882 UChar index_r = toUChar((sib >> 3) & 7);
2883 UChar base_r = toUChar(sib & 7);
2884 Long d = 0;
2885 /* correct since #(R13) == 8 + #(RBP) */
2886 Bool base_is_BPor13 = toBool(base_r == R_RBP);
2887 delta += 2;
2888 *len = 2;
2890 *rI = index_r | (getRexX(pfx) << 3);
2891 if (ty == Ity_V128)
2892 vindex = nameXMMReg(*rI);
2893 else
2894 vindex = nameYMMReg(*rI);
2895 *vscale = 1<<scale;
2897 switch (mod_reg_rm >> 6) {
2898 case 0:
2899 if (base_is_BPor13) {
2900 d = getSDisp32(delta);
2901 *len += 4;
2902 if (scale == 0) {
2903 DIS(buf, "%s%lld(,%s)", segRegTxt(pfx), d, vindex);
2904 } else {
2905 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d, vindex, 1<<scale);
2907 return disAMode_copy2tmp( mkU64(d) );
2908 } else {
2909 if (scale == 0) {
2910 DIS(buf, "%s(%s,%s)", segRegTxt(pfx),
2911 nameIRegRexB(8,pfx,base_r), vindex);
2912 } else {
2913 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx),
2914 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale);
2917 break;
2918 case 1:
2919 d = getSDisp8(delta);
2920 *len += 1;
2921 goto have_disp;
2922 case 2:
2923 d = getSDisp32(delta);
2924 *len += 4;
2925 have_disp:
2926 if (scale == 0) {
2927 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
2928 nameIRegRexB(8,pfx,base_r), vindex);
2929 } else {
2930 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
2931 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale);
2933 break;
2936 if (!d)
2937 return disAMode_copy2tmp( getIRegRexB(8,pfx,base_r) );
2938 return disAMode_copy2tmp( binop(Iop_Add64, getIRegRexB(8,pfx,base_r),
2939 mkU64(d)) );
2943 /* Figure out the number of (insn-stream) bytes constituting the amode
2944 beginning at delta. Is useful for getting hold of literals beyond
2945 the end of the amode before it has been disassembled. */
2947 static UInt lengthAMode ( Prefix pfx, Long delta )
2949 UChar mod_reg_rm = getUChar(delta);
2950 delta++;
2952 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2953 jump table seems a bit excessive.
2955 mod_reg_rm &= 0xC7; /* is now XX000YYY */
2956 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
2957 /* is now XX0XXYYY */
2958 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
2959 switch (mod_reg_rm) {
2961 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2962 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2964 case 0x00: case 0x01: case 0x02: case 0x03:
2965 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
2966 return 1;
2968 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2969 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2971 case 0x08: case 0x09: case 0x0A: case 0x0B:
2972 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
2973 return 2;
2975 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2976 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2978 case 0x10: case 0x11: case 0x12: case 0x13:
2979 /* ! 14 */ case 0x15: case 0x16: case 0x17:
2980 return 5;
2982 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2983 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2984 /* Not an address, but still handled. */
2985 case 0x18: case 0x19: case 0x1A: case 0x1B:
2986 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2987 return 1;
2989 /* RIP + disp32. */
2990 case 0x05:
2991 return 5;
2993 case 0x04: {
2994 /* SIB, with no displacement. */
2995 UChar sib = getUChar(delta);
2996 UChar base_r = toUChar(sib & 7);
2997 /* correct since #(R13) == 8 + #(RBP) */
2998 Bool base_is_BPor13 = toBool(base_r == R_RBP);
3000 if (base_is_BPor13) {
3001 return 6;
3002 } else {
3003 return 2;
3007 /* SIB, with 8-bit displacement. */
3008 case 0x0C:
3009 return 3;
3011 /* SIB, with 32-bit displacement. */
3012 case 0x14:
3013 return 6;
3015 default:
3016 vpanic("lengthAMode(amd64)");
3017 return 0; /*notreached*/
3022 /*------------------------------------------------------------*/
3023 /*--- Disassembling common idioms ---*/
3024 /*------------------------------------------------------------*/
3026 typedef
3027 enum { WithFlagNone=2, WithFlagCarry, WithFlagCarryX, WithFlagOverX }
3028 WithFlag;
3030 /* Handle binary integer instructions of the form
3031 op E, G meaning
3032 op reg-or-mem, reg
3033 Is passed the a ptr to the modRM byte, the actual operation, and the
3034 data size. Returns the address advanced completely over this
3035 instruction.
3037 E(src) is reg-or-mem
3038 G(dst) is reg.
3040 If E is reg, --> GET %G, tmp
3041 OP %E, tmp
3042 PUT tmp, %G
3044 If E is mem and OP is not reversible,
3045 --> (getAddr E) -> tmpa
3046 LD (tmpa), tmpa
3047 GET %G, tmp2
3048 OP tmpa, tmp2
3049 PUT tmp2, %G
3051 If E is mem and OP is reversible
3052 --> (getAddr E) -> tmpa
3053 LD (tmpa), tmpa
3054 OP %G, tmpa
3055 PUT tmpa, %G
3057 static
3058 ULong dis_op2_E_G ( const VexAbiInfo* vbi,
3059 Prefix pfx,
3060 IROp op8,
3061 WithFlag flag,
3062 Bool keep,
3063 Int size,
3064 Long delta0,
3065 const HChar* t_amd64opc )
3067 HChar dis_buf[50];
3068 Int len;
3069 IRType ty = szToITy(size);
3070 IRTemp dst1 = newTemp(ty);
3071 IRTemp src = newTemp(ty);
3072 IRTemp dst0 = newTemp(ty);
3073 UChar rm = getUChar(delta0);
3074 IRTemp addr = IRTemp_INVALID;
3076 /* Stay sane -- check for valid (op8, flag, keep) combinations. */
3077 switch (op8) {
3078 case Iop_Add8:
3079 switch (flag) {
3080 case WithFlagNone: case WithFlagCarry:
3081 case WithFlagCarryX: case WithFlagOverX:
3082 vassert(keep);
3083 break;
3084 default:
3085 vassert(0);
3087 break;
3088 case Iop_Sub8:
3089 vassert(flag == WithFlagNone || flag == WithFlagCarry);
3090 if (flag == WithFlagCarry) vassert(keep);
3091 break;
3092 case Iop_And8:
3093 vassert(flag == WithFlagNone);
3094 break;
3095 case Iop_Or8: case Iop_Xor8:
3096 vassert(flag == WithFlagNone);
3097 vassert(keep);
3098 break;
3099 default:
3100 vassert(0);
3103 if (epartIsReg(rm)) {
3104 /* Specially handle XOR reg,reg, because that doesn't really
3105 depend on reg, and doing the obvious thing potentially
3106 generates a spurious value check failure due to the bogus
3107 dependency. Ditto SUB/SBB reg,reg. */
3108 if ((op8 == Iop_Xor8 || ((op8 == Iop_Sub8) && keep))
3109 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
3110 putIRegG(size,pfx,rm, mkU(ty,0));
3113 assign( dst0, getIRegG(size,pfx,rm) );
3114 assign( src, getIRegE(size,pfx,rm) );
3116 if (op8 == Iop_Add8 && flag == WithFlagCarry) {
3117 helper_ADC( size, dst1, dst0, src,
3118 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3119 putIRegG(size, pfx, rm, mkexpr(dst1));
3120 } else
3121 if (op8 == Iop_Sub8 && flag == WithFlagCarry) {
3122 helper_SBB( size, dst1, dst0, src,
3123 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3124 putIRegG(size, pfx, rm, mkexpr(dst1));
3125 } else
3126 if (op8 == Iop_Add8 && flag == WithFlagCarryX) {
3127 helper_ADCX_ADOX( True/*isADCX*/, size, dst1, dst0, src );
3128 putIRegG(size, pfx, rm, mkexpr(dst1));
3129 } else
3130 if (op8 == Iop_Add8 && flag == WithFlagOverX) {
3131 helper_ADCX_ADOX( False/*!isADCX*/, size, dst1, dst0, src );
3132 putIRegG(size, pfx, rm, mkexpr(dst1));
3133 } else {
3134 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
3135 if (isAddSub(op8))
3136 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3137 else
3138 setFlags_DEP1(op8, dst1, ty);
3139 if (keep)
3140 putIRegG(size, pfx, rm, mkexpr(dst1));
3143 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
3144 nameIRegE(size,pfx,rm),
3145 nameIRegG(size,pfx,rm));
3146 return 1+delta0;
3147 } else {
3148 /* E refers to memory */
3149 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
3150 assign( dst0, getIRegG(size,pfx,rm) );
3151 assign( src, loadLE(szToITy(size), mkexpr(addr)) );
3153 if (op8 == Iop_Add8 && flag == WithFlagCarry) {
3154 helper_ADC( size, dst1, dst0, src,
3155 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3156 putIRegG(size, pfx, rm, mkexpr(dst1));
3157 } else
3158 if (op8 == Iop_Sub8 && flag == WithFlagCarry) {
3159 helper_SBB( size, dst1, dst0, src,
3160 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3161 putIRegG(size, pfx, rm, mkexpr(dst1));
3162 } else
3163 if (op8 == Iop_Add8 && flag == WithFlagCarryX) {
3164 helper_ADCX_ADOX( True/*isADCX*/, size, dst1, dst0, src );
3165 putIRegG(size, pfx, rm, mkexpr(dst1));
3166 } else
3167 if (op8 == Iop_Add8 && flag == WithFlagOverX) {
3168 helper_ADCX_ADOX( False/*!isADCX*/, size, dst1, dst0, src );
3169 putIRegG(size, pfx, rm, mkexpr(dst1));
3170 } else {
3171 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
3172 if (isAddSub(op8))
3173 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3174 else
3175 setFlags_DEP1(op8, dst1, ty);
3176 if (keep)
3177 putIRegG(size, pfx, rm, mkexpr(dst1));
3180 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
3181 dis_buf, nameIRegG(size, pfx, rm));
3182 return len+delta0;
3188 /* Handle binary integer instructions of the form
3189 op G, E meaning
3190 op reg, reg-or-mem
3191 Is passed the a ptr to the modRM byte, the actual operation, and the
3192 data size. Returns the address advanced completely over this
3193 instruction.
3195 G(src) is reg.
3196 E(dst) is reg-or-mem
3198 If E is reg, --> GET %E, tmp
3199 OP %G, tmp
3200 PUT tmp, %E
3202 If E is mem, --> (getAddr E) -> tmpa
3203 LD (tmpa), tmpv
3204 OP %G, tmpv
3205 ST tmpv, (tmpa)
3207 static
3208 ULong dis_op2_G_E ( const VexAbiInfo* vbi,
3209 Prefix pfx,
3210 IROp op8,
3211 WithFlag flag,
3212 Bool keep,
3213 Int size,
3214 Long delta0,
3215 const HChar* t_amd64opc )
3217 HChar dis_buf[50];
3218 Int len;
3219 IRType ty = szToITy(size);
3220 IRTemp dst1 = newTemp(ty);
3221 IRTemp src = newTemp(ty);
3222 IRTemp dst0 = newTemp(ty);
3223 UChar rm = getUChar(delta0);
3224 IRTemp addr = IRTemp_INVALID;
3226 /* Stay sane -- check for valid (op8, flag, keep) combinations. */
3227 switch (op8) {
3228 case Iop_Add8:
3229 vassert(flag == WithFlagNone || flag == WithFlagCarry);
3230 vassert(keep);
3231 break;
3232 case Iop_Sub8:
3233 vassert(flag == WithFlagNone || flag == WithFlagCarry);
3234 if (flag == WithFlagCarry) vassert(keep);
3235 break;
3236 case Iop_And8: case Iop_Or8: case Iop_Xor8:
3237 vassert(flag == WithFlagNone);
3238 vassert(keep);
3239 break;
3240 default:
3241 vassert(0);
3244 /* flag != WithFlagNone is only allowed for Add and Sub and indicates the
3245 intended operation is add-with-carry or subtract-with-borrow. */
3247 if (epartIsReg(rm)) {
3248 /* Specially handle XOR reg,reg, because that doesn't really
3249 depend on reg, and doing the obvious thing potentially
3250 generates a spurious value check failure due to the bogus
3251 dependency. Ditto SUB/SBB reg,reg. */
3252 if ((op8 == Iop_Xor8 || ((op8 == Iop_Sub8) && keep))
3253 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
3254 putIRegE(size,pfx,rm, mkU(ty,0));
3257 assign(dst0, getIRegE(size,pfx,rm));
3258 assign(src, getIRegG(size,pfx,rm));
3260 if (op8 == Iop_Add8 && flag == WithFlagCarry) {
3261 helper_ADC( size, dst1, dst0, src,
3262 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3263 putIRegE(size, pfx, rm, mkexpr(dst1));
3264 } else
3265 if (op8 == Iop_Sub8 && flag == WithFlagCarry) {
3266 helper_SBB( size, dst1, dst0, src,
3267 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3268 putIRegE(size, pfx, rm, mkexpr(dst1));
3269 } else {
3270 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3271 if (isAddSub(op8))
3272 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3273 else
3274 setFlags_DEP1(op8, dst1, ty);
3275 if (keep)
3276 putIRegE(size, pfx, rm, mkexpr(dst1));
3279 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
3280 nameIRegG(size,pfx,rm),
3281 nameIRegE(size,pfx,rm));
3282 return 1+delta0;
3285 /* E refers to memory */
3287 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
3288 assign(dst0, loadLE(ty,mkexpr(addr)));
3289 assign(src, getIRegG(size,pfx,rm));
3291 if (op8 == Iop_Add8 && flag == WithFlagCarry) {
3292 if (haveLOCK(pfx)) {
3293 /* cas-style store */
3294 helper_ADC( size, dst1, dst0, src,
3295 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3296 } else {
3297 /* normal store */
3298 helper_ADC( size, dst1, dst0, src,
3299 /*store*/addr, IRTemp_INVALID, 0 );
3301 } else
3302 if (op8 == Iop_Sub8 && flag == WithFlagCarry) {
3303 if (haveLOCK(pfx)) {
3304 /* cas-style store */
3305 helper_SBB( size, dst1, dst0, src,
3306 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3307 } else {
3308 /* normal store */
3309 helper_SBB( size, dst1, dst0, src,
3310 /*store*/addr, IRTemp_INVALID, 0 );
3312 } else {
3313 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3314 if (keep) {
3315 if (haveLOCK(pfx)) {
3316 if (0) vex_printf("locked case\n" );
3317 casLE( mkexpr(addr),
3318 mkexpr(dst0)/*expval*/,
3319 mkexpr(dst1)/*newval*/, guest_RIP_curr_instr );
3320 } else {
3321 if (0) vex_printf("nonlocked case\n");
3322 storeLE(mkexpr(addr), mkexpr(dst1));
3325 if (isAddSub(op8))
3326 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3327 else
3328 setFlags_DEP1(op8, dst1, ty);
3331 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
3332 nameIRegG(size,pfx,rm), dis_buf);
3333 return len+delta0;
3338 /* Handle move instructions of the form
3339 mov E, G meaning
3340 mov reg-or-mem, reg
3341 Is passed the a ptr to the modRM byte, and the data size. Returns
3342 the address advanced completely over this instruction.
3344 E(src) is reg-or-mem
3345 G(dst) is reg.
3347 If E is reg, --> GET %E, tmpv
3348 PUT tmpv, %G
3350 If E is mem --> (getAddr E) -> tmpa
3351 LD (tmpa), tmpb
3352 PUT tmpb, %G
3354 static
3355 ULong dis_mov_E_G ( const VexAbiInfo* vbi,
3356 Prefix pfx,
3357 Int size,
3358 Long delta0 )
3360 Int len;
3361 UChar rm = getUChar(delta0);
3362 HChar dis_buf[50];
3364 if (epartIsReg(rm)) {
3365 putIRegG(size, pfx, rm, getIRegE(size, pfx, rm));
3366 DIP("mov%c %s,%s\n", nameISize(size),
3367 nameIRegE(size,pfx,rm),
3368 nameIRegG(size,pfx,rm));
3369 return 1+delta0;
3372 /* E refers to memory */
3374 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
3375 putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr)));
3376 DIP("mov%c %s,%s\n", nameISize(size),
3377 dis_buf,
3378 nameIRegG(size,pfx,rm));
3379 return delta0+len;
3384 /* Handle move instructions of the form
3385 mov G, E meaning
3386 mov reg, reg-or-mem
3387 Is passed the a ptr to the modRM byte, and the data size. Returns
3388 the address advanced completely over this instruction.
3389 We have to decide here whether F2 or F3 are acceptable. F2 never is.
3391 G(src) is reg.
3392 E(dst) is reg-or-mem
3394 If E is reg, --> GET %G, tmp
3395 PUT tmp, %E
3397 If E is mem, --> (getAddr E) -> tmpa
3398 GET %G, tmpv
3399 ST tmpv, (tmpa)
3401 static
3402 ULong dis_mov_G_E ( const VexAbiInfo* vbi,
3403 Prefix pfx,
3404 Int size,
3405 Long delta0,
3406 /*OUT*/Bool* ok )
3408 Int len;
3409 UChar rm = getUChar(delta0);
3410 HChar dis_buf[50];
3412 *ok = True;
3414 if (epartIsReg(rm)) {
3415 if (haveF2orF3(pfx)) { *ok = False; return delta0; }
3416 putIRegE(size, pfx, rm, getIRegG(size, pfx, rm));
3417 DIP("mov%c %s,%s\n", nameISize(size),
3418 nameIRegG(size,pfx,rm),
3419 nameIRegE(size,pfx,rm));
3420 return 1+delta0;
3423 /* E refers to memory */
3425 if (haveF2(pfx)) { *ok = False; return delta0; }
3426 /* F3(XRELEASE) is acceptable, though. */
3427 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
3428 storeLE( mkexpr(addr), getIRegG(size, pfx, rm) );
3429 DIP("mov%c %s,%s\n", nameISize(size),
3430 nameIRegG(size,pfx,rm),
3431 dis_buf);
3432 return len+delta0;
3437 /* op $immediate, AL/AX/EAX/RAX. */
3438 static
3439 ULong dis_op_imm_A ( Int size,
3440 Bool carrying,
3441 IROp op8,
3442 Bool keep,
3443 Long delta,
3444 const HChar* t_amd64opc )
3446 Int size4 = imin(size,4);
3447 IRType ty = szToITy(size);
3448 IRTemp dst0 = newTemp(ty);
3449 IRTemp src = newTemp(ty);
3450 IRTemp dst1 = newTemp(ty);
3451 Long lit = getSDisp(size4,delta);
3452 assign(dst0, getIRegRAX(size));
3453 assign(src, mkU(ty,lit & mkSizeMask(size)));
3455 if (isAddSub(op8) && !carrying) {
3456 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
3457 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3459 else
3460 if (isLogic(op8)) {
3461 vassert(!carrying);
3462 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
3463 setFlags_DEP1(op8, dst1, ty);
3465 else
3466 if (op8 == Iop_Add8 && carrying) {
3467 helper_ADC( size, dst1, dst0, src,
3468 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3470 else
3471 if (op8 == Iop_Sub8 && carrying) {
3472 helper_SBB( size, dst1, dst0, src,
3473 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3475 else
3476 vpanic("dis_op_imm_A(amd64,guest)");
3478 if (keep)
3479 putIRegRAX(size, mkexpr(dst1));
3481 DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size),
3482 lit, nameIRegRAX(size));
3483 return delta+size4;
3487 /* Sign- and Zero-extending moves. */
3488 static
3489 ULong dis_movx_E_G ( const VexAbiInfo* vbi,
3490 Prefix pfx,
3491 Long delta, Int szs, Int szd, Bool sign_extend )
3493 UChar rm = getUChar(delta);
3494 if (epartIsReg(rm)) {
3495 putIRegG(szd, pfx, rm,
3496 doScalarWidening(
3497 szs,szd,sign_extend,
3498 getIRegE(szs,pfx,rm)));
3499 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
3500 nameISize(szs),
3501 nameISize(szd),
3502 nameIRegE(szs,pfx,rm),
3503 nameIRegG(szd,pfx,rm));
3504 return 1+delta;
3507 /* E refers to memory */
3509 Int len;
3510 HChar dis_buf[50];
3511 IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
3512 putIRegG(szd, pfx, rm,
3513 doScalarWidening(
3514 szs,szd,sign_extend,
3515 loadLE(szToITy(szs),mkexpr(addr))));
3516 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
3517 nameISize(szs),
3518 nameISize(szd),
3519 dis_buf,
3520 nameIRegG(szd,pfx,rm));
3521 return len+delta;
3526 /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by
3527 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */
3528 static
3529 void codegen_div ( Int sz, IRTemp t, Bool signed_divide )
3531 /* special-case the 64-bit case */
3532 if (sz == 8) {
3533 IROp op = signed_divide ? Iop_DivModS128to64
3534 : Iop_DivModU128to64;
3535 IRTemp src128 = newTemp(Ity_I128);
3536 IRTemp dst128 = newTemp(Ity_I128);
3537 assign( src128, binop(Iop_64HLto128,
3538 getIReg64(R_RDX),
3539 getIReg64(R_RAX)) );
3540 assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) );
3541 putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) );
3542 putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) );
3543 } else {
3544 IROp op = signed_divide ? Iop_DivModS64to32
3545 : Iop_DivModU64to32;
3546 IRTemp src64 = newTemp(Ity_I64);
3547 IRTemp dst64 = newTemp(Ity_I64);
3548 switch (sz) {
3549 case 4:
3550 assign( src64,
3551 binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) );
3552 assign( dst64,
3553 binop(op, mkexpr(src64), mkexpr(t)) );
3554 putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) );
3555 putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) );
3556 break;
3557 case 2: {
3558 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
3559 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
3560 assign( src64, unop(widen3264,
3561 binop(Iop_16HLto32,
3562 getIRegRDX(2),
3563 getIRegRAX(2))) );
3564 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) );
3565 putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) );
3566 putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) );
3567 break;
3569 case 1: {
3570 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
3571 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
3572 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16;
3573 assign( src64, unop(widen3264,
3574 unop(widen1632, getIRegRAX(2))) );
3575 assign( dst64,
3576 binop(op, mkexpr(src64),
3577 unop(widen1632, unop(widen816, mkexpr(t)))) );
3578 putIRegRAX( 1, unop(Iop_16to8,
3579 unop(Iop_32to16,
3580 unop(Iop_64to32,mkexpr(dst64)))) );
3581 putIRegAH( unop(Iop_16to8,
3582 unop(Iop_32to16,
3583 unop(Iop_64HIto32,mkexpr(dst64)))) );
3584 break;
3586 default:
3587 vpanic("codegen_div(amd64)");
3592 static
3593 ULong dis_Grp1 ( const VexAbiInfo* vbi,
3594 Prefix pfx,
3595 Long delta, UChar modrm,
3596 Int am_sz, Int d_sz, Int sz, Long d64 )
3598 Int len;
3599 HChar dis_buf[50];
3600 IRType ty = szToITy(sz);
3601 IRTemp dst1 = newTemp(ty);
3602 IRTemp src = newTemp(ty);
3603 IRTemp dst0 = newTemp(ty);
3604 IRTemp addr = IRTemp_INVALID;
3605 IROp op8 = Iop_INVALID;
3606 ULong mask = mkSizeMask(sz);
3608 switch (gregLO3ofRM(modrm)) {
3609 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break;
3610 case 2: break; // ADC
3611 case 3: break; // SBB
3612 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break;
3613 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break;
3614 /*NOTREACHED*/
3615 default: vpanic("dis_Grp1(amd64): unhandled case");
3618 if (epartIsReg(modrm)) {
3619 vassert(am_sz == 1);
3621 assign(dst0, getIRegE(sz,pfx,modrm));
3622 assign(src, mkU(ty,d64 & mask));
3624 if (gregLO3ofRM(modrm) == 2 /* ADC */) {
3625 helper_ADC( sz, dst1, dst0, src,
3626 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3627 } else
3628 if (gregLO3ofRM(modrm) == 3 /* SBB */) {
3629 helper_SBB( sz, dst1, dst0, src,
3630 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3631 } else {
3632 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3633 if (isAddSub(op8))
3634 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3635 else
3636 setFlags_DEP1(op8, dst1, ty);
3639 if (gregLO3ofRM(modrm) < 7)
3640 putIRegE(sz, pfx, modrm, mkexpr(dst1));
3642 delta += (am_sz + d_sz);
3643 DIP("%s%c $%lld, %s\n",
3644 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64,
3645 nameIRegE(sz,pfx,modrm));
3646 } else {
3647 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
3649 assign(dst0, loadLE(ty,mkexpr(addr)));
3650 assign(src, mkU(ty,d64 & mask));
3652 if (gregLO3ofRM(modrm) == 2 /* ADC */) {
3653 if (haveLOCK(pfx)) {
3654 /* cas-style store */
3655 helper_ADC( sz, dst1, dst0, src,
3656 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3657 } else {
3658 /* normal store */
3659 helper_ADC( sz, dst1, dst0, src,
3660 /*store*/addr, IRTemp_INVALID, 0 );
3662 } else
3663 if (gregLO3ofRM(modrm) == 3 /* SBB */) {
3664 if (haveLOCK(pfx)) {
3665 /* cas-style store */
3666 helper_SBB( sz, dst1, dst0, src,
3667 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3668 } else {
3669 /* normal store */
3670 helper_SBB( sz, dst1, dst0, src,
3671 /*store*/addr, IRTemp_INVALID, 0 );
3673 } else {
3674 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3675 if (gregLO3ofRM(modrm) < 7) {
3676 if (haveLOCK(pfx)) {
3677 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
3678 mkexpr(dst1)/*newVal*/,
3679 guest_RIP_curr_instr );
3680 } else {
3681 storeLE(mkexpr(addr), mkexpr(dst1));
3684 if (isAddSub(op8))
3685 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3686 else
3687 setFlags_DEP1(op8, dst1, ty);
3690 delta += (len+d_sz);
3691 DIP("%s%c $%lld, %s\n",
3692 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz),
3693 d64, dis_buf);
3695 return delta;
3699 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed
3700 expression. */
3702 static
3703 ULong dis_Grp2 ( const VexAbiInfo* vbi,
3704 Prefix pfx,
3705 Long delta, UChar modrm,
3706 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr,
3707 const HChar* shift_expr_txt, Bool* decode_OK )
3709 /* delta on entry points at the modrm byte. */
3710 HChar dis_buf[50];
3711 Int len;
3712 Bool isShift, isRotate, isRotateC;
3713 IRType ty = szToITy(sz);
3714 IRTemp dst0 = newTemp(ty);
3715 IRTemp dst1 = newTemp(ty);
3716 IRTemp addr = IRTemp_INVALID;
3718 *decode_OK = True;
3720 vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
3722 /* Put value to shift/rotate in dst0. */
3723 if (epartIsReg(modrm)) {
3724 assign(dst0, getIRegE(sz, pfx, modrm));
3725 delta += (am_sz + d_sz);
3726 } else {
3727 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
3728 assign(dst0, loadLE(ty,mkexpr(addr)));
3729 delta += len + d_sz;
3732 isShift = False;
3733 switch (gregLO3ofRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; }
3735 isRotate = False;
3736 switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; }
3738 isRotateC = False;
3739 switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; }
3741 if (!isShift && !isRotate && !isRotateC) {
3742 /*NOTREACHED*/
3743 vpanic("dis_Grp2(Reg): unhandled case(amd64)");
3746 if (isRotateC) {
3747 /* Call a helper; this insn is so ridiculous it does not deserve
3748 better. One problem is, the helper has to calculate both the
3749 new value and the new flags. This is more than 64 bits, and
3750 there is no way to return more than 64 bits from the helper.
3751 Hence the crude and obvious solution is to call it twice,
3752 using the sign of the sz field to indicate whether it is the
3753 value or rflags result we want.
3755 Bool left = toBool(gregLO3ofRM(modrm) == 2);
3756 IRExpr** argsVALUE;
3757 IRExpr** argsRFLAGS;
3759 IRTemp new_value = newTemp(Ity_I64);
3760 IRTemp new_rflags = newTemp(Ity_I64);
3761 IRTemp old_rflags = newTemp(Ity_I64);
3763 assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) );
3765 argsVALUE
3766 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
3767 widenUto64(shift_expr), /* rotate amount */
3768 mkexpr(old_rflags),
3769 mkU64(sz) );
3770 assign( new_value,
3771 mkIRExprCCall(
3772 Ity_I64,
3773 0/*regparm*/,
3774 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3775 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR,
3776 argsVALUE
3780 argsRFLAGS
3781 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
3782 widenUto64(shift_expr), /* rotate amount */
3783 mkexpr(old_rflags),
3784 mkU64(-sz) );
3785 assign( new_rflags,
3786 mkIRExprCCall(
3787 Ity_I64,
3788 0/*regparm*/,
3789 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3790 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR,
3791 argsRFLAGS
3795 assign( dst1, narrowTo(ty, mkexpr(new_value)) );
3796 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
3797 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) ));
3798 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
3799 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
3802 else
3803 if (isShift) {
3805 IRTemp pre64 = newTemp(Ity_I64);
3806 IRTemp res64 = newTemp(Ity_I64);
3807 IRTemp res64ss = newTemp(Ity_I64);
3808 IRTemp shift_amt = newTemp(Ity_I8);
3809 UChar mask = toUChar(sz==8 ? 63 : 31);
3810 IROp op64;
3812 switch (gregLO3ofRM(modrm)) {
3813 case 4: op64 = Iop_Shl64; break;
3814 case 5: op64 = Iop_Shr64; break;
3815 case 6: op64 = Iop_Shl64; break;
3816 case 7: op64 = Iop_Sar64; break;
3817 /*NOTREACHED*/
3818 default: vpanic("dis_Grp2:shift"); break;
3821 /* Widen the value to be shifted to 64 bits, do the shift, and
3822 narrow back down. This seems surprisingly long-winded, but
3823 unfortunately the AMD semantics requires that 8/16/32-bit
3824 shifts give defined results for shift values all the way up
3825 to 32, and this seems the simplest way to do it. It has the
3826 advantage that the only IR level shifts generated are of 64
3827 bit values, and the shift amount is guaranteed to be in the
3828 range 0 .. 63, thereby observing the IR semantics requiring
3829 all shift values to be in the range 0 .. 2^word_size-1.
3831 Therefore the shift amount is masked with 63 for 64-bit shifts
3832 and 31 for all others.
3834 /* shift_amt = shift_expr & MASK, regardless of operation size */
3835 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) );
3837 /* suitably widen the value to be shifted to 64 bits. */
3838 assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0))
3839 : widenUto64(mkexpr(dst0)) );
3841 /* res64 = pre64 `shift` shift_amt */
3842 assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) );
3844 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */
3845 assign( res64ss,
3846 binop(op64,
3847 mkexpr(pre64),
3848 binop(Iop_And8,
3849 binop(Iop_Sub8,
3850 mkexpr(shift_amt), mkU8(1)),
3851 mkU8(mask))) );
3853 /* Build the flags thunk. */
3854 setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt);
3856 /* Narrow the result back down. */
3857 assign( dst1, narrowTo(ty, mkexpr(res64)) );
3859 } /* if (isShift) */
3861 else
3862 if (isRotate) {
3863 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1
3864 : (ty==Ity_I32 ? 2 : 3));
3865 Bool left = toBool(gregLO3ofRM(modrm) == 0);
3866 IRTemp rot_amt = newTemp(Ity_I8);
3867 IRTemp rot_amt64 = newTemp(Ity_I8);
3868 IRTemp oldFlags = newTemp(Ity_I64);
3869 UChar mask = toUChar(sz==8 ? 63 : 31);
3871 /* rot_amt = shift_expr & mask */
3872 /* By masking the rotate amount thusly, the IR-level Shl/Shr
3873 expressions never shift beyond the word size and thus remain
3874 well defined. */
3875 assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask)));
3877 if (ty == Ity_I64)
3878 assign(rot_amt, mkexpr(rot_amt64));
3879 else
3880 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1)));
3882 if (left) {
3884 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
3885 assign(dst1,
3886 binop( mkSizedOp(ty,Iop_Or8),
3887 binop( mkSizedOp(ty,Iop_Shl8),
3888 mkexpr(dst0),
3889 mkexpr(rot_amt)
3891 binop( mkSizedOp(ty,Iop_Shr8),
3892 mkexpr(dst0),
3893 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
3897 ccOp += AMD64G_CC_OP_ROLB;
3899 } else { /* right */
3901 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
3902 assign(dst1,
3903 binop( mkSizedOp(ty,Iop_Or8),
3904 binop( mkSizedOp(ty,Iop_Shr8),
3905 mkexpr(dst0),
3906 mkexpr(rot_amt)
3908 binop( mkSizedOp(ty,Iop_Shl8),
3909 mkexpr(dst0),
3910 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
3914 ccOp += AMD64G_CC_OP_RORB;
3918 /* dst1 now holds the rotated value. Build flag thunk. We
3919 need the resulting value for this, and the previous flags.
3920 Except don't set it if the rotate count is zero. */
3922 assign(oldFlags, mk_amd64g_calculate_rflags_all());
3924 /* rot_amt64 :: Ity_I8. We need to convert it to I1. */
3925 IRTemp rot_amt64b = newTemp(Ity_I1);
3926 assign(rot_amt64b, binop(Iop_CmpNE8, mkexpr(rot_amt64), mkU8(0)) );
3928 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
3929 stmt( IRStmt_Put( OFFB_CC_OP,
3930 IRExpr_ITE( mkexpr(rot_amt64b),
3931 mkU64(ccOp),
3932 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) ));
3933 stmt( IRStmt_Put( OFFB_CC_DEP1,
3934 IRExpr_ITE( mkexpr(rot_amt64b),
3935 widenUto64(mkexpr(dst1)),
3936 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) ));
3937 stmt( IRStmt_Put( OFFB_CC_DEP2,
3938 IRExpr_ITE( mkexpr(rot_amt64b),
3939 mkU64(0),
3940 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) ));
3941 stmt( IRStmt_Put( OFFB_CC_NDEP,
3942 IRExpr_ITE( mkexpr(rot_amt64b),
3943 mkexpr(oldFlags),
3944 IRExpr_Get(OFFB_CC_NDEP,Ity_I64) ) ));
3945 } /* if (isRotate) */
3947 /* Save result, and finish up. */
3948 if (epartIsReg(modrm)) {
3949 putIRegE(sz, pfx, modrm, mkexpr(dst1));
3950 if (vex_traceflags & VEX_TRACE_FE) {
3951 vex_printf("%s%c ",
3952 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
3953 if (shift_expr_txt)
3954 vex_printf("%s", shift_expr_txt);
3955 else
3956 ppIRExpr(shift_expr);
3957 vex_printf(", %s\n", nameIRegE(sz,pfx,modrm));
3959 } else {
3960 storeLE(mkexpr(addr), mkexpr(dst1));
3961 if (vex_traceflags & VEX_TRACE_FE) {
3962 vex_printf("%s%c ",
3963 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
3964 if (shift_expr_txt)
3965 vex_printf("%s", shift_expr_txt);
3966 else
3967 ppIRExpr(shift_expr);
3968 vex_printf(", %s\n", dis_buf);
3971 return delta;
3975 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
3976 static
3977 ULong dis_Grp8_Imm ( const VexAbiInfo* vbi,
3978 Prefix pfx,
3979 Long delta, UChar modrm,
3980 Int am_sz, Int sz, ULong src_val,
3981 Bool* decode_OK )
3983 /* src_val denotes a d8.
3984 And delta on entry points at the modrm byte. */
3986 IRType ty = szToITy(sz);
3987 IRTemp t2 = newTemp(Ity_I64);
3988 IRTemp t2m = newTemp(Ity_I64);
3989 IRTemp t_addr = IRTemp_INVALID;
3990 HChar dis_buf[50];
3991 ULong mask;
3993 /* we're optimists :-) */
3994 *decode_OK = True;
3996 /* Check whether F2 or F3 are acceptable. */
3997 if (epartIsReg(modrm)) {
3998 /* F2 or F3 are not allowed in the register case. */
3999 if (haveF2orF3(pfx)) {
4000 *decode_OK = False;
4001 return delta;
4003 } else {
4004 /* F2 or F3 (but not both) are allowable provided LOCK is also
4005 present. */
4006 if (haveF2orF3(pfx)) {
4007 if (haveF2andF3(pfx) || !haveLOCK(pfx)) {
4008 *decode_OK = False;
4009 return delta;
4014 /* Limit src_val -- the bit offset -- to something within a word.
4015 The Intel docs say that literal offsets larger than a word are
4016 masked in this way. */
4017 switch (sz) {
4018 case 2: src_val &= 15; break;
4019 case 4: src_val &= 31; break;
4020 case 8: src_val &= 63; break;
4021 default: *decode_OK = False; return delta;
4024 /* Invent a mask suitable for the operation. */
4025 switch (gregLO3ofRM(modrm)) {
4026 case 4: /* BT */ mask = 0; break;
4027 case 5: /* BTS */ mask = 1ULL << src_val; break;
4028 case 6: /* BTR */ mask = ~(1ULL << src_val); break;
4029 case 7: /* BTC */ mask = 1ULL << src_val; break;
4030 /* If this needs to be extended, probably simplest to make a
4031 new function to handle the other cases (0 .. 3). The
4032 Intel docs do however not indicate any use for 0 .. 3, so
4033 we don't expect this to happen. */
4034 default: *decode_OK = False; return delta;
4037 /* Fetch the value to be tested and modified into t2, which is
4038 64-bits wide regardless of sz. */
4039 if (epartIsReg(modrm)) {
4040 vassert(am_sz == 1);
4041 assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) );
4042 delta += (am_sz + 1);
4043 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
4044 nameISize(sz),
4045 src_val, nameIRegE(sz,pfx,modrm));
4046 } else {
4047 Int len;
4048 t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 );
4049 delta += (len+1);
4050 assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) );
4051 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
4052 nameISize(sz),
4053 src_val, dis_buf);
4056 /* Compute the new value into t2m, if non-BT. */
4057 switch (gregLO3ofRM(modrm)) {
4058 case 4: /* BT */
4059 break;
4060 case 5: /* BTS */
4061 assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) );
4062 break;
4063 case 6: /* BTR */
4064 assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) );
4065 break;
4066 case 7: /* BTC */
4067 assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) );
4068 break;
4069 default:
4070 /*NOTREACHED*/ /*the previous switch guards this*/
4071 vassert(0);
4074 /* Write the result back, if non-BT. */
4075 if (gregLO3ofRM(modrm) != 4 /* BT */) {
4076 if (epartIsReg(modrm)) {
4077 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m)));
4078 } else {
4079 if (haveLOCK(pfx)) {
4080 casLE( mkexpr(t_addr),
4081 narrowTo(ty, mkexpr(t2))/*expd*/,
4082 narrowTo(ty, mkexpr(t2m))/*new*/,
4083 guest_RIP_curr_instr );
4084 } else {
4085 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
4090 /* Copy relevant bit from t2 into the carry flag. */
4091 /* Flags: C=selected bit, O,S,A,P undefined, Z unchanged */
4092 /* so let's also keep O,S,A,P unchanged */
4093 const ULong maskC = AMD64G_CC_MASK_C;
4094 const ULong maskOSZAP = AMD64G_CC_MASK_O | AMD64G_CC_MASK_S
4095 | AMD64G_CC_MASK_Z | AMD64G_CC_MASK_A
4096 | AMD64G_CC_MASK_P;
4098 IRTemp old_rflags = newTemp(Ity_I64);
4099 assign(old_rflags, mk_amd64g_calculate_rflags_all());
4101 IRTemp new_rflags = newTemp(Ity_I64);
4102 assign(new_rflags,
4103 binop(Iop_Or64,
4104 binop(Iop_And64, mkexpr(old_rflags), mkU64(maskOSZAP)),
4105 binop(Iop_And64,
4106 binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)),
4107 mkU64(maskC)) ));
4109 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
4110 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
4111 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) ));
4112 /* Set NDEP even though it isn't used. This makes redundant-PUT
4113 elimination of previous stores to this field work better. */
4114 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
4116 return delta;
4120 /* Signed/unsigned widening multiply. Generate IR to multiply the
4121 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in
4122 RDX:RAX/EDX:EAX/DX:AX/AX.
4124 static void codegen_mulL_A_D ( Int sz, Bool syned,
4125 IRTemp tmp, const HChar* tmp_txt )
4127 IRType ty = szToITy(sz);
4128 IRTemp t1 = newTemp(ty);
4130 assign( t1, getIRegRAX(sz) );
4132 switch (ty) {
4133 case Ity_I64: {
4134 IRTemp res128 = newTemp(Ity_I128);
4135 IRTemp resHi = newTemp(Ity_I64);
4136 IRTemp resLo = newTemp(Ity_I64);
4137 IROp mulOp = syned ? Iop_MullS64 : Iop_MullU64;
4138 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
4139 setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp );
4140 assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
4141 assign( resHi, unop(Iop_128HIto64,mkexpr(res128)));
4142 assign( resLo, unop(Iop_128to64,mkexpr(res128)));
4143 putIReg64(R_RDX, mkexpr(resHi));
4144 putIReg64(R_RAX, mkexpr(resLo));
4145 break;
4147 case Ity_I32: {
4148 IRTemp res64 = newTemp(Ity_I64);
4149 IRTemp resHi = newTemp(Ity_I32);
4150 IRTemp resLo = newTemp(Ity_I32);
4151 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32;
4152 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
4153 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp );
4154 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
4155 assign( resHi, unop(Iop_64HIto32,mkexpr(res64)));
4156 assign( resLo, unop(Iop_64to32,mkexpr(res64)));
4157 putIRegRDX(4, mkexpr(resHi));
4158 putIRegRAX(4, mkexpr(resLo));
4159 break;
4161 case Ity_I16: {
4162 IRTemp res32 = newTemp(Ity_I32);
4163 IRTemp resHi = newTemp(Ity_I16);
4164 IRTemp resLo = newTemp(Ity_I16);
4165 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16;
4166 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
4167 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp );
4168 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
4169 assign( resHi, unop(Iop_32HIto16,mkexpr(res32)));
4170 assign( resLo, unop(Iop_32to16,mkexpr(res32)));
4171 putIRegRDX(2, mkexpr(resHi));
4172 putIRegRAX(2, mkexpr(resLo));
4173 break;
4175 case Ity_I8: {
4176 IRTemp res16 = newTemp(Ity_I16);
4177 IRTemp resHi = newTemp(Ity_I8);
4178 IRTemp resLo = newTemp(Ity_I8);
4179 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8;
4180 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
4181 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp );
4182 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
4183 assign( resHi, unop(Iop_16HIto8,mkexpr(res16)));
4184 assign( resLo, unop(Iop_16to8,mkexpr(res16)));
4185 putIRegRAX(2, mkexpr(res16));
4186 break;
4188 default:
4189 ppIRType(ty);
4190 vpanic("codegen_mulL_A_D(amd64)");
4192 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt);
4196 /* Group 3 extended opcodes. We have to decide here whether F2 and F3
4197 might be valid.*/
4198 static
4199 ULong dis_Grp3 ( const VexAbiInfo* vbi,
4200 Prefix pfx, Int sz, Long delta, Bool* decode_OK )
4202 Long d64;
4203 UChar modrm;
4204 HChar dis_buf[50];
4205 Int len;
4206 IRTemp addr;
4207 IRType ty = szToITy(sz);
4208 IRTemp t1 = newTemp(ty);
4209 IRTemp dst1, src, dst0;
4210 *decode_OK = True;
4211 modrm = getUChar(delta);
4212 if (epartIsReg(modrm)) {
4213 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */
4214 if (haveF2orF3(pfx)) goto unhandled;
4215 switch (gregLO3ofRM(modrm)) {
4216 case 0: { /* TEST */
4217 delta++;
4218 d64 = getSDisp(imin(4,sz), delta);
4219 delta += imin(4,sz);
4220 dst1 = newTemp(ty);
4221 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
4222 getIRegE(sz,pfx,modrm),
4223 mkU(ty, d64 & mkSizeMask(sz))));
4224 setFlags_DEP1( Iop_And8, dst1, ty );
4225 DIP("test%c $%lld, %s\n",
4226 nameISize(sz), d64,
4227 nameIRegE(sz, pfx, modrm));
4228 break;
4230 case 1:
4231 *decode_OK = False;
4232 return delta;
4233 case 2: /* NOT */
4234 delta++;
4235 putIRegE(sz, pfx, modrm,
4236 unop(mkSizedOp(ty,Iop_Not8),
4237 getIRegE(sz, pfx, modrm)));
4238 DIP("not%c %s\n", nameISize(sz),
4239 nameIRegE(sz, pfx, modrm));
4240 break;
4241 case 3: /* NEG */
4242 delta++;
4243 dst0 = newTemp(ty);
4244 src = newTemp(ty);
4245 dst1 = newTemp(ty);
4246 assign(dst0, mkU(ty,0));
4247 assign(src, getIRegE(sz, pfx, modrm));
4248 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
4249 mkexpr(src)));
4250 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
4251 putIRegE(sz, pfx, modrm, mkexpr(dst1));
4252 DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm));
4253 break;
4254 case 4: /* MUL (unsigned widening) */
4255 delta++;
4256 src = newTemp(ty);
4257 assign(src, getIRegE(sz,pfx,modrm));
4258 codegen_mulL_A_D ( sz, False, src,
4259 nameIRegE(sz,pfx,modrm) );
4260 break;
4261 case 5: /* IMUL (signed widening) */
4262 delta++;
4263 src = newTemp(ty);
4264 assign(src, getIRegE(sz,pfx,modrm));
4265 codegen_mulL_A_D ( sz, True, src,
4266 nameIRegE(sz,pfx,modrm) );
4267 break;
4268 case 6: /* DIV */
4269 delta++;
4270 assign( t1, getIRegE(sz, pfx, modrm) );
4271 codegen_div ( sz, t1, False );
4272 DIP("div%c %s\n", nameISize(sz),
4273 nameIRegE(sz, pfx, modrm));
4274 break;
4275 case 7: /* IDIV */
4276 delta++;
4277 assign( t1, getIRegE(sz, pfx, modrm) );
4278 codegen_div ( sz, t1, True );
4279 DIP("idiv%c %s\n", nameISize(sz),
4280 nameIRegE(sz, pfx, modrm));
4281 break;
4282 default:
4283 /*NOTREACHED*/
4284 vpanic("Grp3(amd64,R)");
4286 } else {
4287 /* Decide if F2/XACQ or F3/XREL might be valid. */
4288 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
4289 if ((gregLO3ofRM(modrm) == 3/*NEG*/ || gregLO3ofRM(modrm) == 2/*NOT*/)
4290 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
4291 validF2orF3 = True;
4293 if (!validF2orF3) goto unhandled;
4294 /* */
4295 addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
4296 /* we have to inform disAMode of any immediate
4297 bytes used */
4298 gregLO3ofRM(modrm)==0/*TEST*/
4299 ? imin(4,sz)
4302 t1 = newTemp(ty);
4303 delta += len;
4304 assign(t1, loadLE(ty,mkexpr(addr)));
4305 switch (gregLO3ofRM(modrm)) {
4306 case 0: { /* TEST */
4307 d64 = getSDisp(imin(4,sz), delta);
4308 delta += imin(4,sz);
4309 dst1 = newTemp(ty);
4310 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
4311 mkexpr(t1),
4312 mkU(ty, d64 & mkSizeMask(sz))));
4313 setFlags_DEP1( Iop_And8, dst1, ty );
4314 DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf);
4315 break;
4317 case 1:
4318 *decode_OK = False;
4319 return delta;
4320 case 2: /* NOT */
4321 dst1 = newTemp(ty);
4322 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
4323 if (haveLOCK(pfx)) {
4324 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
4325 guest_RIP_curr_instr );
4326 } else {
4327 storeLE( mkexpr(addr), mkexpr(dst1) );
4329 DIP("not%c %s\n", nameISize(sz), dis_buf);
4330 break;
4331 case 3: /* NEG */
4332 dst0 = newTemp(ty);
4333 src = newTemp(ty);
4334 dst1 = newTemp(ty);
4335 assign(dst0, mkU(ty,0));
4336 assign(src, mkexpr(t1));
4337 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
4338 mkexpr(src)));
4339 if (haveLOCK(pfx)) {
4340 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
4341 guest_RIP_curr_instr );
4342 } else {
4343 storeLE( mkexpr(addr), mkexpr(dst1) );
4345 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
4346 DIP("neg%c %s\n", nameISize(sz), dis_buf);
4347 break;
4348 case 4: /* MUL (unsigned widening) */
4349 codegen_mulL_A_D ( sz, False, t1, dis_buf );
4350 break;
4351 case 5: /* IMUL */
4352 codegen_mulL_A_D ( sz, True, t1, dis_buf );
4353 break;
4354 case 6: /* DIV */
4355 codegen_div ( sz, t1, False );
4356 DIP("div%c %s\n", nameISize(sz), dis_buf);
4357 break;
4358 case 7: /* IDIV */
4359 codegen_div ( sz, t1, True );
4360 DIP("idiv%c %s\n", nameISize(sz), dis_buf);
4361 break;
4362 default:
4363 /*NOTREACHED*/
4364 vpanic("Grp3(amd64,M)");
4367 return delta;
4368 unhandled:
4369 *decode_OK = False;
4370 return delta;
4374 /* Group 4 extended opcodes. We have to decide here whether F2 and F3
4375 might be valid. */
4376 static
4377 ULong dis_Grp4 ( const VexAbiInfo* vbi,
4378 Prefix pfx, Long delta, Bool* decode_OK )
4380 Int alen;
4381 UChar modrm;
4382 HChar dis_buf[50];
4383 IRType ty = Ity_I8;
4384 IRTemp t1 = newTemp(ty);
4385 IRTemp t2 = newTemp(ty);
4387 *decode_OK = True;
4389 modrm = getUChar(delta);
4390 if (epartIsReg(modrm)) {
4391 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */
4392 if (haveF2orF3(pfx)) goto unhandled;
4393 assign(t1, getIRegE(1, pfx, modrm));
4394 switch (gregLO3ofRM(modrm)) {
4395 case 0: /* INC */
4396 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
4397 putIRegE(1, pfx, modrm, mkexpr(t2));
4398 setFlags_INC_DEC( True, t2, ty );
4399 break;
4400 case 1: /* DEC */
4401 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
4402 putIRegE(1, pfx, modrm, mkexpr(t2));
4403 setFlags_INC_DEC( False, t2, ty );
4404 break;
4405 default:
4406 *decode_OK = False;
4407 return delta;
4409 delta++;
4410 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)),
4411 nameIRegE(1, pfx, modrm));
4412 } else {
4413 /* Decide if F2/XACQ or F3/XREL might be valid. */
4414 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
4415 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/)
4416 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
4417 validF2orF3 = True;
4419 if (!validF2orF3) goto unhandled;
4420 /* */
4421 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
4422 assign( t1, loadLE(ty, mkexpr(addr)) );
4423 switch (gregLO3ofRM(modrm)) {
4424 case 0: /* INC */
4425 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
4426 if (haveLOCK(pfx)) {
4427 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
4428 guest_RIP_curr_instr );
4429 } else {
4430 storeLE( mkexpr(addr), mkexpr(t2) );
4432 setFlags_INC_DEC( True, t2, ty );
4433 break;
4434 case 1: /* DEC */
4435 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
4436 if (haveLOCK(pfx)) {
4437 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
4438 guest_RIP_curr_instr );
4439 } else {
4440 storeLE( mkexpr(addr), mkexpr(t2) );
4442 setFlags_INC_DEC( False, t2, ty );
4443 break;
4444 default:
4445 *decode_OK = False;
4446 return delta;
4448 delta += alen;
4449 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf);
4451 return delta;
4452 unhandled:
4453 *decode_OK = False;
4454 return delta;
4458 /* Group 5 extended opcodes. We have to decide here whether F2 and F3
4459 might be valid. */
4460 static
4461 ULong dis_Grp5 ( const VexAbiInfo* vbi,
4462 Prefix pfx, Int sz, Long delta,
4463 /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK )
4465 Int len;
4466 UChar modrm;
4467 HChar dis_buf[50];
4468 IRTemp addr = IRTemp_INVALID;
4469 IRType ty = szToITy(sz);
4470 IRTemp t1 = newTemp(ty);
4471 IRTemp t2 = IRTemp_INVALID;
4472 IRTemp t3 = IRTemp_INVALID;
4473 Bool showSz = True;
4475 *decode_OK = True;
4477 modrm = getUChar(delta);
4478 if (epartIsReg(modrm)) {
4479 /* F2/XACQ and F3/XREL are always invalid in the non-mem case.
4480 F2/CALL and F2/JMP may have bnd prefix. */
4481 if (haveF2orF3(pfx)
4482 && ! (haveF2(pfx)
4483 && (gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4)))
4484 goto unhandledR;
4485 assign(t1, getIRegE(sz,pfx,modrm));
4486 switch (gregLO3ofRM(modrm)) {
4487 case 0: /* INC */
4488 t2 = newTemp(ty);
4489 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
4490 mkexpr(t1), mkU(ty,1)));
4491 setFlags_INC_DEC( True, t2, ty );
4492 putIRegE(sz,pfx,modrm, mkexpr(t2));
4493 break;
4494 case 1: /* DEC */
4495 t2 = newTemp(ty);
4496 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
4497 mkexpr(t1), mkU(ty,1)));
4498 setFlags_INC_DEC( False, t2, ty );
4499 putIRegE(sz,pfx,modrm, mkexpr(t2));
4500 break;
4501 case 2: /* call Ev */
4502 /* Ignore any sz value and operate as if sz==8. */
4503 if (!(sz == 4 || sz == 8)) goto unhandledR;
4504 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
4505 sz = 8;
4506 t3 = newTemp(Ity_I64);
4507 assign(t3, getIRegE(sz,pfx,modrm));
4508 t2 = newTemp(Ity_I64);
4509 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
4510 putIReg64(R_RSP, mkexpr(t2));
4511 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1));
4512 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)");
4513 jmp_treg(dres, Ijk_Call, t3);
4514 vassert(dres->whatNext == Dis_StopHere);
4515 showSz = False;
4516 break;
4517 case 4: /* jmp Ev */
4518 /* Ignore any sz value and operate as if sz==8. */
4519 if (!(sz == 4 || sz == 8)) goto unhandledR;
4520 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
4521 sz = 8;
4522 t3 = newTemp(Ity_I64);
4523 assign(t3, getIRegE(sz,pfx,modrm));
4524 jmp_treg(dres, Ijk_Boring, t3);
4525 vassert(dres->whatNext == Dis_StopHere);
4526 showSz = False;
4527 break;
4528 case 6: /* PUSH Ev */
4529 /* There is no encoding for 32-bit operand size; hence ... */
4530 if (sz == 4) sz = 8;
4531 if (sz == 8 || sz == 2) {
4532 ty = szToITy(sz); /* redo it, since sz might have changed */
4533 t3 = newTemp(ty);
4534 assign(t3, getIRegE(sz,pfx,modrm));
4535 t2 = newTemp(Ity_I64);
4536 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
4537 putIReg64(R_RSP, mkexpr(t2) );
4538 storeLE( mkexpr(t2), mkexpr(t3) );
4539 break;
4540 } else {
4541 goto unhandledR; /* awaiting test case */
4543 default:
4544 unhandledR:
4545 *decode_OK = False;
4546 return delta;
4548 delta++;
4549 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
4550 showSz ? nameISize(sz) : ' ',
4551 nameIRegE(sz, pfx, modrm));
4552 } else {
4553 /* Decide if F2/XACQ, F3/XREL, F2/CALL or F2/JMP might be valid. */
4554 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
4555 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/)
4556 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
4557 validF2orF3 = True;
4558 } else if ((gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4)
4559 && (haveF2(pfx) && !haveF3(pfx))) {
4560 validF2orF3 = True;
4562 if (!validF2orF3) goto unhandledM;
4563 /* */
4564 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
4565 if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4
4566 && gregLO3ofRM(modrm) != 6) {
4567 assign(t1, loadLE(ty,mkexpr(addr)));
4569 switch (gregLO3ofRM(modrm)) {
4570 case 0: /* INC */
4571 t2 = newTemp(ty);
4572 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
4573 mkexpr(t1), mkU(ty,1)));
4574 if (haveLOCK(pfx)) {
4575 casLE( mkexpr(addr),
4576 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
4577 } else {
4578 storeLE(mkexpr(addr),mkexpr(t2));
4580 setFlags_INC_DEC( True, t2, ty );
4581 break;
4582 case 1: /* DEC */
4583 t2 = newTemp(ty);
4584 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
4585 mkexpr(t1), mkU(ty,1)));
4586 if (haveLOCK(pfx)) {
4587 casLE( mkexpr(addr),
4588 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
4589 } else {
4590 storeLE(mkexpr(addr),mkexpr(t2));
4592 setFlags_INC_DEC( False, t2, ty );
4593 break;
4594 case 2: /* call Ev */
4595 /* Ignore any sz value and operate as if sz==8. */
4596 if (!(sz == 4 || sz == 8)) goto unhandledM;
4597 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
4598 sz = 8;
4599 t3 = newTemp(Ity_I64);
4600 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
4601 t2 = newTemp(Ity_I64);
4602 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
4603 putIReg64(R_RSP, mkexpr(t2));
4604 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len));
4605 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)");
4606 jmp_treg(dres, Ijk_Call, t3);
4607 vassert(dres->whatNext == Dis_StopHere);
4608 showSz = False;
4609 break;
4610 case 4: /* JMP Ev */
4611 /* Ignore any sz value and operate as if sz==8. */
4612 if (!(sz == 4 || sz == 8)) goto unhandledM;
4613 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
4614 sz = 8;
4615 t3 = newTemp(Ity_I64);
4616 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
4617 jmp_treg(dres, Ijk_Boring, t3);
4618 vassert(dres->whatNext == Dis_StopHere);
4619 showSz = False;
4620 break;
4621 case 6: /* PUSH Ev */
4622 /* There is no encoding for 32-bit operand size; hence ... */
4623 if (sz == 4) sz = 8;
4624 if (sz == 8 || sz == 2) {
4625 ty = szToITy(sz); /* redo it, since sz might have changed */
4626 t3 = newTemp(ty);
4627 assign(t3, loadLE(ty,mkexpr(addr)));
4628 t2 = newTemp(Ity_I64);
4629 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
4630 putIReg64(R_RSP, mkexpr(t2) );
4631 storeLE( mkexpr(t2), mkexpr(t3) );
4632 break;
4633 } else {
4634 goto unhandledM; /* awaiting test case */
4636 default:
4637 unhandledM:
4638 *decode_OK = False;
4639 return delta;
4641 delta += len;
4642 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
4643 showSz ? nameISize(sz) : ' ',
4644 dis_buf);
4646 return delta;
4650 /*------------------------------------------------------------*/
4651 /*--- Disassembling string ops (including REP prefixes) ---*/
4652 /*------------------------------------------------------------*/
4654 /* Code shared by all the string ops */
4655 static
4656 void dis_string_op_increment ( Int sz, IRTemp t_inc )
4658 UChar logSz;
4659 if (sz == 8 || sz == 4 || sz == 2) {
4660 logSz = 1;
4661 if (sz == 4) logSz = 2;
4662 if (sz == 8) logSz = 3;
4663 assign( t_inc,
4664 binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ),
4665 mkU8(logSz) ) );
4666 } else {
4667 assign( t_inc,
4668 IRExpr_Get( OFFB_DFLAG, Ity_I64 ) );
4672 static
4673 void dis_string_op( void (*dis_OP)( Int, IRTemp, Prefix pfx ),
4674 Int sz, const HChar* name, Prefix pfx )
4676 IRTemp t_inc = newTemp(Ity_I64);
4677 /* Really we ought to inspect the override prefixes, but we don't.
4678 The following assertion catches any resulting sillyness. */
4679 vassert(pfx == clearSegBits(pfx));
4680 dis_string_op_increment(sz, t_inc);
4681 dis_OP( sz, t_inc, pfx );
4682 DIP("%s%c\n", name, nameISize(sz));
4685 static
4686 void dis_MOVS ( Int sz, IRTemp t_inc, Prefix pfx )
4688 IRType ty = szToITy(sz);
4689 IRTemp td = newTemp(Ity_I64); /* RDI */
4690 IRTemp ts = newTemp(Ity_I64); /* RSI */
4691 IRExpr *incd, *incs;
4693 if (haveASO(pfx)) {
4694 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4695 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4696 } else {
4697 assign( td, getIReg64(R_RDI) );
4698 assign( ts, getIReg64(R_RSI) );
4701 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) );
4703 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4704 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4705 if (haveASO(pfx)) {
4706 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4707 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4709 putIReg64( R_RDI, incd );
4710 putIReg64( R_RSI, incs );
4713 static
4714 void dis_LODS ( Int sz, IRTemp t_inc, Prefix pfx )
4716 IRType ty = szToITy(sz);
4717 IRTemp ts = newTemp(Ity_I64); /* RSI */
4718 IRExpr *incs;
4720 if (haveASO(pfx))
4721 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4722 else
4723 assign( ts, getIReg64(R_RSI) );
4725 putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) );
4727 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4728 if (haveASO(pfx))
4729 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4730 putIReg64( R_RSI, incs );
4733 static
4734 void dis_STOS ( Int sz, IRTemp t_inc, Prefix pfx )
4736 IRType ty = szToITy(sz);
4737 IRTemp ta = newTemp(ty); /* rAX */
4738 IRTemp td = newTemp(Ity_I64); /* RDI */
4739 IRExpr *incd;
4741 assign( ta, getIRegRAX(sz) );
4743 if (haveASO(pfx))
4744 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4745 else
4746 assign( td, getIReg64(R_RDI) );
4748 storeLE( mkexpr(td), mkexpr(ta) );
4750 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4751 if (haveASO(pfx))
4752 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4753 putIReg64( R_RDI, incd );
4756 static
4757 void dis_CMPS ( Int sz, IRTemp t_inc, Prefix pfx )
4759 IRType ty = szToITy(sz);
4760 IRTemp tdv = newTemp(ty); /* (RDI) */
4761 IRTemp tsv = newTemp(ty); /* (RSI) */
4762 IRTemp td = newTemp(Ity_I64); /* RDI */
4763 IRTemp ts = newTemp(Ity_I64); /* RSI */
4764 IRExpr *incd, *incs;
4766 if (haveASO(pfx)) {
4767 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4768 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4769 } else {
4770 assign( td, getIReg64(R_RDI) );
4771 assign( ts, getIReg64(R_RSI) );
4774 assign( tdv, loadLE(ty,mkexpr(td)) );
4776 assign( tsv, loadLE(ty,mkexpr(ts)) );
4778 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty );
4780 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4781 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4782 if (haveASO(pfx)) {
4783 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4784 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4786 putIReg64( R_RDI, incd );
4787 putIReg64( R_RSI, incs );
4790 static
4791 void dis_SCAS ( Int sz, IRTemp t_inc, Prefix pfx )
4793 IRType ty = szToITy(sz);
4794 IRTemp ta = newTemp(ty); /* rAX */
4795 IRTemp td = newTemp(Ity_I64); /* RDI */
4796 IRTemp tdv = newTemp(ty); /* (RDI) */
4797 IRExpr *incd;
4799 assign( ta, getIRegRAX(sz) );
4801 if (haveASO(pfx))
4802 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4803 else
4804 assign( td, getIReg64(R_RDI) );
4806 assign( tdv, loadLE(ty,mkexpr(td)) );
4808 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty );
4810 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4811 if (haveASO(pfx))
4812 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4813 putIReg64( R_RDI, incd );
4817 /* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume
4818 the insn is the last one in the basic block, and so emit a jump to
4819 the next insn, rather than just falling through. */
4820 static
4821 void dis_REP_op ( /*MOD*/DisResult* dres,
4822 AMD64Condcode cond,
4823 void (*dis_OP)(Int, IRTemp, Prefix),
4824 Int sz, Addr64 rip, Addr64 rip_next, const HChar* name,
4825 Prefix pfx )
4827 IRTemp t_inc = newTemp(Ity_I64);
4828 IRTemp tc;
4829 IRExpr* cmp;
4831 /* Really we ought to inspect the override prefixes, but we don't.
4832 The following assertion catches any resulting sillyness. */
4833 vassert(pfx == clearSegBits(pfx));
4835 if (haveASO(pfx)) {
4836 tc = newTemp(Ity_I32); /* ECX */
4837 assign( tc, getIReg32(R_RCX) );
4838 cmp = binop(Iop_CmpEQ32, mkexpr(tc), mkU32(0));
4839 } else {
4840 tc = newTemp(Ity_I64); /* RCX */
4841 assign( tc, getIReg64(R_RCX) );
4842 cmp = binop(Iop_CmpEQ64, mkexpr(tc), mkU64(0));
4845 stmt( IRStmt_Exit( cmp, Ijk_Boring,
4846 IRConst_U64(rip_next), OFFB_RIP ) );
4848 if (haveASO(pfx))
4849 putIReg32(R_RCX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) );
4850 else
4851 putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) );
4853 dis_string_op_increment(sz, t_inc);
4854 dis_OP (sz, t_inc, pfx);
4856 if (cond == AMD64CondAlways) {
4857 jmp_lit(dres, Ijk_Boring, rip);
4858 vassert(dres->whatNext == Dis_StopHere);
4859 } else {
4860 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond),
4861 Ijk_Boring,
4862 IRConst_U64(rip),
4863 OFFB_RIP ) );
4864 jmp_lit(dres, Ijk_Boring, rip_next);
4865 vassert(dres->whatNext == Dis_StopHere);
4867 DIP("%s%c\n", name, nameISize(sz));
4871 /*------------------------------------------------------------*/
4872 /*--- Arithmetic, etc. ---*/
4873 /*------------------------------------------------------------*/
4875 /* IMUL E, G. Supplied eip points to the modR/M byte. */
4876 static
4877 ULong dis_mul_E_G ( const VexAbiInfo* vbi,
4878 Prefix pfx,
4879 Int size,
4880 Long delta0 )
4882 Int alen;
4883 HChar dis_buf[50];
4884 UChar rm = getUChar(delta0);
4885 IRType ty = szToITy(size);
4886 IRTemp te = newTemp(ty);
4887 IRTemp tg = newTemp(ty);
4888 IRTemp resLo = newTemp(ty);
4890 assign( tg, getIRegG(size, pfx, rm) );
4891 if (epartIsReg(rm)) {
4892 assign( te, getIRegE(size, pfx, rm) );
4893 } else {
4894 IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 );
4895 assign( te, loadLE(ty,mkexpr(addr)) );
4898 setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB );
4900 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) );
4902 putIRegG(size, pfx, rm, mkexpr(resLo) );
4904 if (epartIsReg(rm)) {
4905 DIP("imul%c %s, %s\n", nameISize(size),
4906 nameIRegE(size,pfx,rm),
4907 nameIRegG(size,pfx,rm));
4908 return 1+delta0;
4909 } else {
4910 DIP("imul%c %s, %s\n", nameISize(size),
4911 dis_buf,
4912 nameIRegG(size,pfx,rm));
4913 return alen+delta0;
4918 /* IMUL I * E -> G. Supplied rip points to the modR/M byte. */
4919 static
4920 ULong dis_imul_I_E_G ( const VexAbiInfo* vbi,
4921 Prefix pfx,
4922 Int size,
4923 Long delta,
4924 Int litsize )
4926 Long d64;
4927 Int alen;
4928 HChar dis_buf[50];
4929 UChar rm = getUChar(delta);
4930 IRType ty = szToITy(size);
4931 IRTemp te = newTemp(ty);
4932 IRTemp tl = newTemp(ty);
4933 IRTemp resLo = newTemp(ty);
4935 vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8);
4937 if (epartIsReg(rm)) {
4938 assign(te, getIRegE(size, pfx, rm));
4939 delta++;
4940 } else {
4941 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
4942 imin(4,litsize) );
4943 assign(te, loadLE(ty, mkexpr(addr)));
4944 delta += alen;
4946 d64 = getSDisp(imin(4,litsize),delta);
4947 delta += imin(4,litsize);
4949 d64 &= mkSizeMask(size);
4950 assign(tl, mkU(ty,d64));
4952 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) ));
4954 setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB );
4956 putIRegG(size, pfx, rm, mkexpr(resLo));
4958 DIP("imul%c $%lld, %s, %s\n",
4959 nameISize(size), d64,
4960 ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ),
4961 nameIRegG(size,pfx,rm) );
4962 return delta;
4966 /* Generate an IR sequence to do a popcount operation on the supplied
4967 IRTemp, and return a new IRTemp holding the result. 'ty' may be
4968 Ity_I16, Ity_I32 or Ity_I64 only. */
4969 static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src )
4971 Int i;
4972 if (ty == Ity_I16) {
4973 IRTemp old = IRTemp_INVALID;
4974 IRTemp nyu = IRTemp_INVALID;
4975 IRTemp mask[4], shift[4];
4976 for (i = 0; i < 4; i++) {
4977 mask[i] = newTemp(ty);
4978 shift[i] = 1 << i;
4980 assign(mask[0], mkU16(0x5555));
4981 assign(mask[1], mkU16(0x3333));
4982 assign(mask[2], mkU16(0x0F0F));
4983 assign(mask[3], mkU16(0x00FF));
4984 old = src;
4985 for (i = 0; i < 4; i++) {
4986 nyu = newTemp(ty);
4987 assign(nyu,
4988 binop(Iop_Add16,
4989 binop(Iop_And16,
4990 mkexpr(old),
4991 mkexpr(mask[i])),
4992 binop(Iop_And16,
4993 binop(Iop_Shr16, mkexpr(old), mkU8(shift[i])),
4994 mkexpr(mask[i]))));
4995 old = nyu;
4997 return nyu;
4999 if (ty == Ity_I32) {
5000 IRTemp old = IRTemp_INVALID;
5001 IRTemp nyu = IRTemp_INVALID;
5002 IRTemp mask[5], shift[5];
5003 for (i = 0; i < 5; i++) {
5004 mask[i] = newTemp(ty);
5005 shift[i] = 1 << i;
5007 assign(mask[0], mkU32(0x55555555));
5008 assign(mask[1], mkU32(0x33333333));
5009 assign(mask[2], mkU32(0x0F0F0F0F));
5010 assign(mask[3], mkU32(0x00FF00FF));
5011 assign(mask[4], mkU32(0x0000FFFF));
5012 old = src;
5013 for (i = 0; i < 5; i++) {
5014 nyu = newTemp(ty);
5015 assign(nyu,
5016 binop(Iop_Add32,
5017 binop(Iop_And32,
5018 mkexpr(old),
5019 mkexpr(mask[i])),
5020 binop(Iop_And32,
5021 binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
5022 mkexpr(mask[i]))));
5023 old = nyu;
5025 return nyu;
5027 if (ty == Ity_I64) {
5028 IRTemp old = IRTemp_INVALID;
5029 IRTemp nyu = IRTemp_INVALID;
5030 IRTemp mask[6], shift[6];
5031 for (i = 0; i < 6; i++) {
5032 mask[i] = newTemp(ty);
5033 shift[i] = 1 << i;
5035 assign(mask[0], mkU64(0x5555555555555555ULL));
5036 assign(mask[1], mkU64(0x3333333333333333ULL));
5037 assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL));
5038 assign(mask[3], mkU64(0x00FF00FF00FF00FFULL));
5039 assign(mask[4], mkU64(0x0000FFFF0000FFFFULL));
5040 assign(mask[5], mkU64(0x00000000FFFFFFFFULL));
5041 old = src;
5042 for (i = 0; i < 6; i++) {
5043 nyu = newTemp(ty);
5044 assign(nyu,
5045 binop(Iop_Add64,
5046 binop(Iop_And64,
5047 mkexpr(old),
5048 mkexpr(mask[i])),
5049 binop(Iop_And64,
5050 binop(Iop_Shr64, mkexpr(old), mkU8(shift[i])),
5051 mkexpr(mask[i]))));
5052 old = nyu;
5054 return nyu;
5056 /*NOTREACHED*/
5057 vassert(0);
5061 /* Generate an IR sequence to do a count-leading-zeroes operation on
5062 the supplied IRTemp, and return a new IRTemp holding the result.
5063 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
5064 the argument is zero, return the number of bits in the word (the
5065 natural semantics). */
5066 static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
5068 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16);
5070 IRTemp src64 = newTemp(Ity_I64);
5071 assign(src64, widenUto64( mkexpr(src) ));
5073 IRTemp src64x = newTemp(Ity_I64);
5074 assign(src64x,
5075 binop(Iop_Shl64, mkexpr(src64),
5076 mkU8(64 - 8 * sizeofIRType(ty))));
5078 // Clz64 has undefined semantics when its input is zero, so
5079 // special-case around that.
5080 IRTemp res64 = newTemp(Ity_I64);
5081 assign(res64,
5082 IRExpr_ITE(
5083 binop(Iop_CmpEQ64, mkexpr(src64x), mkU64(0)),
5084 mkU64(8 * sizeofIRType(ty)),
5085 unop(Iop_Clz64, mkexpr(src64x))
5088 IRTemp res = newTemp(ty);
5089 assign(res, narrowTo(ty, mkexpr(res64)));
5090 return res;
5094 /* Generate an IR sequence to do a count-trailing-zeroes operation on
5095 the supplied IRTemp, and return a new IRTemp holding the result.
5096 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
5097 the argument is zero, return the number of bits in the word (the
5098 natural semantics). */
5099 static IRTemp gen_TZCNT ( IRType ty, IRTemp src )
5101 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16);
5103 IRTemp src64 = newTemp(Ity_I64);
5104 assign(src64, widenUto64( mkexpr(src) ));
5106 // Ctz64 has undefined semantics when its input is zero, so
5107 // special-case around that.
5108 IRTemp res64 = newTemp(Ity_I64);
5109 assign(res64,
5110 IRExpr_ITE(
5111 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0)),
5112 mkU64(8 * sizeofIRType(ty)),
5113 unop(Iop_Ctz64, mkexpr(src64))
5116 IRTemp res = newTemp(ty);
5117 assign(res, narrowTo(ty, mkexpr(res64)));
5118 return res;
5122 /*------------------------------------------------------------*/
5123 /*--- ---*/
5124 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/
5125 /*--- ---*/
5126 /*------------------------------------------------------------*/
5128 /* --- Helper functions for dealing with the register stack. --- */
5130 /* --- Set the emulation-warning pseudo-register. --- */
5132 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ )
5134 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
5135 stmt( IRStmt_Put( OFFB_EMNOTE, e ) );
5138 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
5140 static IRExpr* mkQNaN64 ( void )
5142 /* QNaN is 0 2047 1 0(51times)
5143 == 0b 11111111111b 1 0(51times)
5144 == 0x7FF8 0000 0000 0000
5146 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL));
5149 /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */
5151 static IRExpr* get_ftop ( void )
5153 return IRExpr_Get( OFFB_FTOP, Ity_I32 );
5156 static void put_ftop ( IRExpr* e )
5158 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
5159 stmt( IRStmt_Put( OFFB_FTOP, e ) );
5162 /* --------- Get/put the C3210 bits. --------- */
5164 static IRExpr* /* :: Ity_I64 */ get_C3210 ( void )
5166 return IRExpr_Get( OFFB_FC3210, Ity_I64 );
5169 static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ )
5171 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
5172 stmt( IRStmt_Put( OFFB_FC3210, e ) );
5175 /* --------- Get/put the FPU rounding mode. --------- */
5176 static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
5178 return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 ));
5181 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e )
5183 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
5184 stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) );
5188 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
5189 /* Produces a value in 0 .. 3, which is encoded as per the type
5190 IRRoundingMode. Since the guest_FPROUND value is also encoded as
5191 per IRRoundingMode, we merely need to get it and mask it for
5192 safety.
5194 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
5196 return binop( Iop_And32, get_fpround(), mkU32(3) );
5199 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
5201 return mkU32(Irrm_NEAREST);
5205 /* --------- Get/set FP register tag bytes. --------- */
5207 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
5209 static void put_ST_TAG ( Int i, IRExpr* value )
5211 IRRegArray* descr;
5212 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8);
5213 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
5214 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
5217 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
5218 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
5220 static IRExpr* get_ST_TAG ( Int i )
5222 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
5223 return IRExpr_GetI( descr, get_ftop(), i );
5227 /* --------- Get/set FP registers. --------- */
5229 /* Given i, and some expression e, emit 'ST(i) = e' and set the
5230 register's tag to indicate the register is full. The previous
5231 state of the register is not checked. */
5233 static void put_ST_UNCHECKED ( Int i, IRExpr* value )
5235 IRRegArray* descr;
5236 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64);
5237 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
5238 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
5239 /* Mark the register as in-use. */
5240 put_ST_TAG(i, mkU8(1));
5243 /* Given i, and some expression e, emit
5244 ST(i) = is_full(i) ? NaN : e
5245 and set the tag accordingly.
5248 static void put_ST ( Int i, IRExpr* value )
5250 put_ST_UNCHECKED(
5252 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)),
5253 /* non-0 means full */
5254 mkQNaN64(),
5255 /* 0 means empty */
5256 value
5262 /* Given i, generate an expression yielding 'ST(i)'. */
5264 static IRExpr* get_ST_UNCHECKED ( Int i )
5266 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
5267 return IRExpr_GetI( descr, get_ftop(), i );
5271 /* Given i, generate an expression yielding
5272 is_full(i) ? ST(i) : NaN
5275 static IRExpr* get_ST ( Int i )
5277 return
5278 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)),
5279 /* non-0 means full */
5280 get_ST_UNCHECKED(i),
5281 /* 0 means empty */
5282 mkQNaN64());
5286 /* Given i, and some expression e, and a condition cond, generate IR
5287 which has the same effect as put_ST(i,e) when cond is true and has
5288 no effect when cond is false. Given the lack of proper
5289 if-then-else in the IR, this is pretty tricky.
5292 static void maybe_put_ST ( IRTemp cond, Int i, IRExpr* value )
5294 // new_tag = if cond then FULL else old_tag
5295 // new_val = if cond then (if old_tag==FULL then NaN else val)
5296 // else old_val
5298 IRTemp old_tag = newTemp(Ity_I8);
5299 assign(old_tag, get_ST_TAG(i));
5300 IRTemp new_tag = newTemp(Ity_I8);
5301 assign(new_tag,
5302 IRExpr_ITE(mkexpr(cond), mkU8(1)/*FULL*/, mkexpr(old_tag)));
5304 IRTemp old_val = newTemp(Ity_F64);
5305 assign(old_val, get_ST_UNCHECKED(i));
5306 IRTemp new_val = newTemp(Ity_F64);
5307 assign(new_val,
5308 IRExpr_ITE(mkexpr(cond),
5309 IRExpr_ITE(binop(Iop_CmpNE8, mkexpr(old_tag), mkU8(0)),
5310 /* non-0 means full */
5311 mkQNaN64(),
5312 /* 0 means empty */
5313 value),
5314 mkexpr(old_val)));
5316 put_ST_UNCHECKED(i, mkexpr(new_val));
5317 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So
5318 // now set it to new_tag instead.
5319 put_ST_TAG(i, mkexpr(new_tag));
5322 /* Adjust FTOP downwards by one register. */
5324 static void fp_push ( void )
5326 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) );
5329 /* Adjust FTOP downwards by one register when COND is 1:I1. Else
5330 don't change it. */
5332 static void maybe_fp_push ( IRTemp cond )
5334 put_ftop( binop(Iop_Sub32, get_ftop(), unop(Iop_1Uto32,mkexpr(cond))) );
5337 /* Adjust FTOP upwards by one register, and mark the vacated register
5338 as empty. */
5340 static void fp_pop ( void )
5342 put_ST_TAG(0, mkU8(0));
5343 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
5346 /* Set the C2 bit of the FPU status register to e[0]. Assumes that
5347 e[31:1] == 0.
5349 static void set_C2 ( IRExpr* e )
5351 IRExpr* cleared = binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2));
5352 put_C3210( binop(Iop_Or64,
5353 cleared,
5354 binop(Iop_Shl64, e, mkU8(AMD64G_FC_SHIFT_C2))) );
5357 /* Generate code to check that abs(d64) < 2^63 and is finite. This is
5358 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The
5359 test is simple, but the derivation of it is not so simple.
5361 The exponent field for an IEEE754 double is 11 bits. That means it
5362 can take values 0 through 0x7FF. If the exponent has value 0x7FF,
5363 the number is either a NaN or an Infinity and so is not finite.
5364 Furthermore, a finite value of exactly 2^63 is the smallest value
5365 that has exponent value 0x43E. Hence, what we need to do is
5366 extract the exponent, ignoring the sign bit and mantissa, and check
5367 it is < 0x43E, or <= 0x43D.
5369 To make this easily applicable to 32- and 64-bit targets, a
5370 roundabout approach is used. First the number is converted to I64,
5371 then the top 32 bits are taken. Shifting them right by 20 bits
5372 places the sign bit and exponent in the bottom 12 bits. Anding
5373 with 0x7FF gets rid of the sign bit, leaving just the exponent
5374 available for comparison.
5376 static IRTemp math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64 )
5378 IRTemp i64 = newTemp(Ity_I64);
5379 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(d64)) );
5380 IRTemp exponent = newTemp(Ity_I32);
5381 assign(exponent,
5382 binop(Iop_And32,
5383 binop(Iop_Shr32, unop(Iop_64HIto32, mkexpr(i64)), mkU8(20)),
5384 mkU32(0x7FF)));
5385 IRTemp in_range_and_finite = newTemp(Ity_I1);
5386 assign(in_range_and_finite,
5387 binop(Iop_CmpLE32U, mkexpr(exponent), mkU32(0x43D)));
5388 return in_range_and_finite;
5391 /* Invent a plausible-looking FPU status word value:
5392 ((ftop & 7) << 11) | (c3210 & 0x4700)
5394 static IRExpr* get_FPU_sw ( void )
5396 return
5397 unop(Iop_32to16,
5398 binop(Iop_Or32,
5399 binop(Iop_Shl32,
5400 binop(Iop_And32, get_ftop(), mkU32(7)),
5401 mkU8(11)),
5402 binop(Iop_And32, unop(Iop_64to32, get_C3210()),
5403 mkU32(0x4700))
5408 /* Generate a dirty helper call that initialises the x87 state a la
5409 FINIT. If |guard| is NULL, it is done unconditionally. Otherwise
5410 |guard| is used as a guarding condition.
5412 static void gen_FINIT_SEQUENCE ( IRExpr* guard )
5414 /* Uses dirty helper:
5415 void amd64g_do_FINIT ( VexGuestAMD64State* ) */
5416 IRDirty* d = unsafeIRDirty_0_N (
5417 0/*regparms*/,
5418 "amd64g_dirtyhelper_FINIT",
5419 &amd64g_dirtyhelper_FINIT,
5420 mkIRExprVec_1( IRExpr_GSPTR() )
5423 /* declare we're writing guest state */
5424 d->nFxState = 5;
5425 vex_bzero(&d->fxState, sizeof(d->fxState));
5427 d->fxState[0].fx = Ifx_Write;
5428 d->fxState[0].offset = OFFB_FTOP;
5429 d->fxState[0].size = sizeof(UInt);
5431 d->fxState[1].fx = Ifx_Write;
5432 d->fxState[1].offset = OFFB_FPREGS;
5433 d->fxState[1].size = 8 * sizeof(ULong);
5435 d->fxState[2].fx = Ifx_Write;
5436 d->fxState[2].offset = OFFB_FPTAGS;
5437 d->fxState[2].size = 8 * sizeof(UChar);
5439 d->fxState[3].fx = Ifx_Write;
5440 d->fxState[3].offset = OFFB_FPROUND;
5441 d->fxState[3].size = sizeof(ULong);
5443 d->fxState[4].fx = Ifx_Write;
5444 d->fxState[4].offset = OFFB_FC3210;
5445 d->fxState[4].size = sizeof(ULong);
5447 if (guard)
5448 d->guard = guard;
5450 stmt( IRStmt_Dirty(d) );
5454 /* ------------------------------------------------------- */
5455 /* Given all that stack-mangling junk, we can now go ahead
5456 and describe FP instructions.
5459 /* ST(0) = ST(0) `op` mem64/32(addr)
5460 Need to check ST(0)'s tag on read, but not on write.
5462 static
5463 void fp_do_op_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf,
5464 IROp op, Bool dbl )
5466 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
5467 if (dbl) {
5468 put_ST_UNCHECKED(0,
5469 triop( op,
5470 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5471 get_ST(0),
5472 loadLE(Ity_F64,mkexpr(addr))
5474 } else {
5475 put_ST_UNCHECKED(0,
5476 triop( op,
5477 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5478 get_ST(0),
5479 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr)))
5485 /* ST(0) = mem64/32(addr) `op` ST(0)
5486 Need to check ST(0)'s tag on read, but not on write.
5488 static
5489 void fp_do_oprev_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf,
5490 IROp op, Bool dbl )
5492 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
5493 if (dbl) {
5494 put_ST_UNCHECKED(0,
5495 triop( op,
5496 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5497 loadLE(Ity_F64,mkexpr(addr)),
5498 get_ST(0)
5500 } else {
5501 put_ST_UNCHECKED(0,
5502 triop( op,
5503 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5504 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))),
5505 get_ST(0)
5511 /* ST(dst) = ST(dst) `op` ST(src).
5512 Check dst and src tags when reading but not on write.
5514 static
5515 void fp_do_op_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
5516 Bool pop_after )
5518 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
5519 put_ST_UNCHECKED(
5520 st_dst,
5521 triop( op,
5522 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5523 get_ST(st_dst),
5524 get_ST(st_src) )
5526 if (pop_after)
5527 fp_pop();
5530 /* ST(dst) = ST(src) `op` ST(dst).
5531 Check dst and src tags when reading but not on write.
5533 static
5534 void fp_do_oprev_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
5535 Bool pop_after )
5537 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
5538 put_ST_UNCHECKED(
5539 st_dst,
5540 triop( op,
5541 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5542 get_ST(st_src),
5543 get_ST(st_dst) )
5545 if (pop_after)
5546 fp_pop();
5549 /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */
5550 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after )
5552 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i);
5553 /* This is a bit of a hack (and isn't really right). It sets
5554 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
5555 documentation implies A and S are unchanged.
5557 /* It's also fishy in that it is used both for COMIP and
5558 UCOMIP, and they aren't the same (although similar). */
5559 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
5560 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
5561 stmt( IRStmt_Put(
5562 OFFB_CC_DEP1,
5563 binop( Iop_And64,
5564 unop( Iop_32Uto64,
5565 binop(Iop_CmpF64, get_ST(0), get_ST(i))),
5566 mkU64(0x45)
5567 )));
5568 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
5569 if (pop_after)
5570 fp_pop();
5574 /* returns
5575 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 )
5577 static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 )
5579 IRTemp t32 = newTemp(Ity_I32);
5580 assign( t32, e32 );
5581 return
5582 IRExpr_ITE(
5583 binop(Iop_CmpLT64U,
5584 unop(Iop_32Uto64,
5585 binop(Iop_Add32, mkexpr(t32), mkU32(32768))),
5586 mkU64(65536)),
5587 unop(Iop_32to16, mkexpr(t32)),
5588 mkU16( 0x8000 ) );
5592 static
5593 ULong dis_FPU ( /*OUT*/Bool* decode_ok,
5594 const VexAbiInfo* vbi, Prefix pfx, Long delta )
5596 Int len;
5597 UInt r_src, r_dst;
5598 HChar dis_buf[50];
5599 IRTemp t1, t2;
5601 /* On entry, delta points at the second byte of the insn (the modrm
5602 byte).*/
5603 UChar first_opcode = getUChar(delta-1);
5604 UChar modrm = getUChar(delta+0);
5606 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
5608 if (first_opcode == 0xD8) {
5609 if (modrm < 0xC0) {
5611 /* bits 5,4,3 are an opcode extension, and the modRM also
5612 specifies an address. */
5613 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
5614 delta += len;
5616 switch (gregLO3ofRM(modrm)) {
5618 case 0: /* FADD single-real */
5619 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False );
5620 break;
5622 case 1: /* FMUL single-real */
5623 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False );
5624 break;
5626 case 2: /* FCOM single-real */
5627 DIP("fcoms %s\n", dis_buf);
5628 /* This forces C1 to zero, which isn't right. */
5629 /* The AMD documentation suggests that forcing C1 to
5630 zero is correct (Eliot Moss) */
5631 put_C3210(
5632 unop( Iop_32Uto64,
5633 binop( Iop_And32,
5634 binop(Iop_Shl32,
5635 binop(Iop_CmpF64,
5636 get_ST(0),
5637 unop(Iop_F32toF64,
5638 loadLE(Ity_F32,mkexpr(addr)))),
5639 mkU8(8)),
5640 mkU32(0x4500)
5641 )));
5642 break;
5644 case 3: /* FCOMP single-real */
5645 /* The AMD documentation suggests that forcing C1 to
5646 zero is correct (Eliot Moss) */
5647 DIP("fcomps %s\n", dis_buf);
5648 /* This forces C1 to zero, which isn't right. */
5649 put_C3210(
5650 unop( Iop_32Uto64,
5651 binop( Iop_And32,
5652 binop(Iop_Shl32,
5653 binop(Iop_CmpF64,
5654 get_ST(0),
5655 unop(Iop_F32toF64,
5656 loadLE(Ity_F32,mkexpr(addr)))),
5657 mkU8(8)),
5658 mkU32(0x4500)
5659 )));
5660 fp_pop();
5661 break;
5663 case 4: /* FSUB single-real */
5664 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False );
5665 break;
5667 case 5: /* FSUBR single-real */
5668 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False );
5669 break;
5671 case 6: /* FDIV single-real */
5672 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False );
5673 break;
5675 case 7: /* FDIVR single-real */
5676 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False );
5677 break;
5679 default:
5680 vex_printf("unhandled opc_aux = 0x%2x\n",
5681 (UInt)gregLO3ofRM(modrm));
5682 vex_printf("first_opcode == 0xD8\n");
5683 goto decode_fail;
5685 } else {
5686 delta++;
5687 switch (modrm) {
5689 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
5690 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False );
5691 break;
5693 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
5694 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False );
5695 break;
5697 /* Dunno if this is right */
5698 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
5699 r_dst = (UInt)modrm - 0xD0;
5700 DIP("fcom %%st(0),%%st(%u)\n", r_dst);
5701 /* This forces C1 to zero, which isn't right. */
5702 put_C3210(
5703 unop(Iop_32Uto64,
5704 binop( Iop_And32,
5705 binop(Iop_Shl32,
5706 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5707 mkU8(8)),
5708 mkU32(0x4500)
5709 )));
5710 break;
5712 /* Dunno if this is right */
5713 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
5714 r_dst = (UInt)modrm - 0xD8;
5715 DIP("fcomp %%st(0),%%st(%u)\n", r_dst);
5716 /* This forces C1 to zero, which isn't right. */
5717 put_C3210(
5718 unop(Iop_32Uto64,
5719 binop( Iop_And32,
5720 binop(Iop_Shl32,
5721 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5722 mkU8(8)),
5723 mkU32(0x4500)
5724 )));
5725 fp_pop();
5726 break;
5728 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
5729 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False );
5730 break;
5732 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
5733 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False );
5734 break;
5736 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
5737 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False );
5738 break;
5740 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
5741 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False );
5742 break;
5744 default:
5745 goto decode_fail;
5750 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
5751 else
5752 if (first_opcode == 0xD9) {
5753 if (modrm < 0xC0) {
5755 /* bits 5,4,3 are an opcode extension, and the modRM also
5756 specifies an address. */
5757 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
5758 delta += len;
5760 switch (gregLO3ofRM(modrm)) {
5762 case 0: /* FLD single-real */
5763 DIP("flds %s\n", dis_buf);
5764 fp_push();
5765 put_ST(0, unop(Iop_F32toF64,
5766 loadLE(Ity_F32, mkexpr(addr))));
5767 break;
5769 case 2: /* FST single-real */
5770 DIP("fsts %s\n", dis_buf);
5771 storeLE(mkexpr(addr),
5772 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
5773 break;
5775 case 3: /* FSTP single-real */
5776 DIP("fstps %s\n", dis_buf);
5777 storeLE(mkexpr(addr),
5778 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
5779 fp_pop();
5780 break;
5782 case 4: { /* FLDENV m28 */
5783 /* Uses dirty helper:
5784 VexEmNote amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */
5785 IRTemp ew = newTemp(Ity_I32);
5786 IRTemp w64 = newTemp(Ity_I64);
5787 IRDirty* d = unsafeIRDirty_0_N (
5788 0/*regparms*/,
5789 "amd64g_dirtyhelper_FLDENV",
5790 &amd64g_dirtyhelper_FLDENV,
5791 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
5793 d->tmp = w64;
5794 /* declare we're reading memory */
5795 d->mFx = Ifx_Read;
5796 d->mAddr = mkexpr(addr);
5797 d->mSize = 28;
5799 /* declare we're writing guest state */
5800 d->nFxState = 4;
5801 vex_bzero(&d->fxState, sizeof(d->fxState));
5803 d->fxState[0].fx = Ifx_Write;
5804 d->fxState[0].offset = OFFB_FTOP;
5805 d->fxState[0].size = sizeof(UInt);
5807 d->fxState[1].fx = Ifx_Write;
5808 d->fxState[1].offset = OFFB_FPTAGS;
5809 d->fxState[1].size = 8 * sizeof(UChar);
5811 d->fxState[2].fx = Ifx_Write;
5812 d->fxState[2].offset = OFFB_FPROUND;
5813 d->fxState[2].size = sizeof(ULong);
5815 d->fxState[3].fx = Ifx_Write;
5816 d->fxState[3].offset = OFFB_FC3210;
5817 d->fxState[3].size = sizeof(ULong);
5819 stmt( IRStmt_Dirty(d) );
5821 /* ew contains any emulation warning we may need to
5822 issue. If needed, side-exit to the next insn,
5823 reporting the warning, so that Valgrind's dispatcher
5824 sees the warning. */
5825 assign(ew, unop(Iop_64to32,mkexpr(w64)) );
5826 put_emwarn( mkexpr(ew) );
5827 stmt(
5828 IRStmt_Exit(
5829 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5830 Ijk_EmWarn,
5831 IRConst_U64( guest_RIP_bbstart+delta ),
5832 OFFB_RIP
5836 DIP("fldenv %s\n", dis_buf);
5837 break;
5840 case 5: {/* FLDCW */
5841 /* The only thing we observe in the control word is the
5842 rounding mode. Therefore, pass the 16-bit value
5843 (x87 native-format control word) to a clean helper,
5844 getting back a 64-bit value, the lower half of which
5845 is the FPROUND value to store, and the upper half of
5846 which is the emulation-warning token which may be
5847 generated.
5849 /* ULong amd64h_check_fldcw ( ULong ); */
5850 IRTemp t64 = newTemp(Ity_I64);
5851 IRTemp ew = newTemp(Ity_I32);
5852 DIP("fldcw %s\n", dis_buf);
5853 assign( t64, mkIRExprCCall(
5854 Ity_I64, 0/*regparms*/,
5855 "amd64g_check_fldcw",
5856 &amd64g_check_fldcw,
5857 mkIRExprVec_1(
5858 unop( Iop_16Uto64,
5859 loadLE(Ity_I16, mkexpr(addr)))
5864 put_fpround( unop(Iop_64to32, mkexpr(t64)) );
5865 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
5866 put_emwarn( mkexpr(ew) );
5867 /* Finally, if an emulation warning was reported,
5868 side-exit to the next insn, reporting the warning,
5869 so that Valgrind's dispatcher sees the warning. */
5870 stmt(
5871 IRStmt_Exit(
5872 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5873 Ijk_EmWarn,
5874 IRConst_U64( guest_RIP_bbstart+delta ),
5875 OFFB_RIP
5878 break;
5881 case 6: { /* FNSTENV m28 */
5882 /* Uses dirty helper:
5883 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */
5884 IRDirty* d = unsafeIRDirty_0_N (
5885 0/*regparms*/,
5886 "amd64g_dirtyhelper_FSTENV",
5887 &amd64g_dirtyhelper_FSTENV,
5888 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
5890 /* declare we're writing memory */
5891 d->mFx = Ifx_Write;
5892 d->mAddr = mkexpr(addr);
5893 d->mSize = 28;
5895 /* declare we're reading guest state */
5896 d->nFxState = 4;
5897 vex_bzero(&d->fxState, sizeof(d->fxState));
5899 d->fxState[0].fx = Ifx_Read;
5900 d->fxState[0].offset = OFFB_FTOP;
5901 d->fxState[0].size = sizeof(UInt);
5903 d->fxState[1].fx = Ifx_Read;
5904 d->fxState[1].offset = OFFB_FPTAGS;
5905 d->fxState[1].size = 8 * sizeof(UChar);
5907 d->fxState[2].fx = Ifx_Read;
5908 d->fxState[2].offset = OFFB_FPROUND;
5909 d->fxState[2].size = sizeof(ULong);
5911 d->fxState[3].fx = Ifx_Read;
5912 d->fxState[3].offset = OFFB_FC3210;
5913 d->fxState[3].size = sizeof(ULong);
5915 stmt( IRStmt_Dirty(d) );
5917 DIP("fnstenv %s\n", dis_buf);
5918 break;
5921 case 7: /* FNSTCW */
5922 /* Fake up a native x87 FPU control word. The only
5923 thing it depends on is FPROUND[1:0], so call a clean
5924 helper to cook it up. */
5925 /* ULong amd64g_create_fpucw ( ULong fpround ) */
5926 DIP("fnstcw %s\n", dis_buf);
5927 storeLE(
5928 mkexpr(addr),
5929 unop( Iop_64to16,
5930 mkIRExprCCall(
5931 Ity_I64, 0/*regp*/,
5932 "amd64g_create_fpucw", &amd64g_create_fpucw,
5933 mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) )
5937 break;
5939 default:
5940 vex_printf("unhandled opc_aux = 0x%2x\n",
5941 (UInt)gregLO3ofRM(modrm));
5942 vex_printf("first_opcode == 0xD9\n");
5943 goto decode_fail;
5946 } else {
5947 delta++;
5948 switch (modrm) {
5950 case 0xC0 ... 0xC7: /* FLD %st(?) */
5951 r_src = (UInt)modrm - 0xC0;
5952 DIP("fld %%st(%u)\n", r_src);
5953 t1 = newTemp(Ity_F64);
5954 assign(t1, get_ST(r_src));
5955 fp_push();
5956 put_ST(0, mkexpr(t1));
5957 break;
5959 case 0xC8 ... 0xCF: /* FXCH %st(?) */
5960 r_src = (UInt)modrm - 0xC8;
5961 DIP("fxch %%st(%u)\n", r_src);
5962 t1 = newTemp(Ity_F64);
5963 t2 = newTemp(Ity_F64);
5964 assign(t1, get_ST(0));
5965 assign(t2, get_ST(r_src));
5966 put_ST_UNCHECKED(0, mkexpr(t2));
5967 put_ST_UNCHECKED(r_src, mkexpr(t1));
5968 break;
5970 case 0xE0: /* FCHS */
5971 DIP("fchs\n");
5972 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0)));
5973 break;
5975 case 0xE1: /* FABS */
5976 DIP("fabs\n");
5977 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0)));
5978 break;
5980 case 0xE5: { /* FXAM */
5981 /* This is an interesting one. It examines %st(0),
5982 regardless of whether the tag says it's empty or not.
5983 Here, just pass both the tag (in our format) and the
5984 value (as a double, actually a ULong) to a helper
5985 function. */
5986 IRExpr** args
5987 = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)),
5988 unop(Iop_ReinterpF64asI64,
5989 get_ST_UNCHECKED(0)) );
5990 put_C3210(mkIRExprCCall(
5991 Ity_I64,
5992 0/*regparm*/,
5993 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM,
5994 args
5996 DIP("fxam\n");
5997 break;
6000 case 0xE8: /* FLD1 */
6001 DIP("fld1\n");
6002 fp_push();
6003 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
6004 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL)));
6005 break;
6007 case 0xE9: /* FLDL2T */
6008 DIP("fldl2t\n");
6009 fp_push();
6010 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
6011 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL)));
6012 break;
6014 case 0xEA: /* FLDL2E */
6015 DIP("fldl2e\n");
6016 fp_push();
6017 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
6018 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL)));
6019 break;
6021 case 0xEB: /* FLDPI */
6022 DIP("fldpi\n");
6023 fp_push();
6024 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
6025 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL)));
6026 break;
6028 case 0xEC: /* FLDLG2 */
6029 DIP("fldlg2\n");
6030 fp_push();
6031 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
6032 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL)));
6033 break;
6035 case 0xED: /* FLDLN2 */
6036 DIP("fldln2\n");
6037 fp_push();
6038 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
6039 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL)));
6040 break;
6042 case 0xEE: /* FLDZ */
6043 DIP("fldz\n");
6044 fp_push();
6045 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
6046 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL)));
6047 break;
6049 case 0xF0: /* F2XM1 */
6050 DIP("f2xm1\n");
6051 put_ST_UNCHECKED(0,
6052 binop(Iop_2xm1F64,
6053 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6054 get_ST(0)));
6055 break;
6057 case 0xF1: /* FYL2X */
6058 DIP("fyl2x\n");
6059 put_ST_UNCHECKED(1,
6060 triop(Iop_Yl2xF64,
6061 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6062 get_ST(1),
6063 get_ST(0)));
6064 fp_pop();
6065 break;
6067 case 0xF2: { /* FPTAN */
6068 DIP("fptan\n");
6069 IRTemp argD = newTemp(Ity_F64);
6070 assign(argD, get_ST(0));
6071 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
6072 IRTemp resD = newTemp(Ity_F64);
6073 assign(resD,
6074 IRExpr_ITE(
6075 mkexpr(argOK),
6076 binop(Iop_TanF64,
6077 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6078 mkexpr(argD)),
6079 mkexpr(argD))
6081 put_ST_UNCHECKED(0, mkexpr(resD));
6082 /* Conditionally push 1.0 on the stack, if the arg is
6083 in range */
6084 maybe_fp_push(argOK);
6085 maybe_put_ST(argOK, 0,
6086 IRExpr_Const(IRConst_F64(1.0)));
6087 set_C2( binop(Iop_Xor64,
6088 unop(Iop_1Uto64, mkexpr(argOK)),
6089 mkU64(1)) );
6090 break;
6093 case 0xF3: /* FPATAN */
6094 DIP("fpatan\n");
6095 put_ST_UNCHECKED(1,
6096 triop(Iop_AtanF64,
6097 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6098 get_ST(1),
6099 get_ST(0)));
6100 fp_pop();
6101 break;
6103 case 0xF4: { /* FXTRACT */
6104 IRTemp argF = newTemp(Ity_F64);
6105 IRTemp sigF = newTemp(Ity_F64);
6106 IRTemp expF = newTemp(Ity_F64);
6107 IRTemp argI = newTemp(Ity_I64);
6108 IRTemp sigI = newTemp(Ity_I64);
6109 IRTemp expI = newTemp(Ity_I64);
6110 DIP("fxtract\n");
6111 assign( argF, get_ST(0) );
6112 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF)));
6113 assign( sigI,
6114 mkIRExprCCall(
6115 Ity_I64, 0/*regparms*/,
6116 "x86amd64g_calculate_FXTRACT",
6117 &x86amd64g_calculate_FXTRACT,
6118 mkIRExprVec_2( mkexpr(argI),
6119 mkIRExpr_HWord(0)/*sig*/ ))
6121 assign( expI,
6122 mkIRExprCCall(
6123 Ity_I64, 0/*regparms*/,
6124 "x86amd64g_calculate_FXTRACT",
6125 &x86amd64g_calculate_FXTRACT,
6126 mkIRExprVec_2( mkexpr(argI),
6127 mkIRExpr_HWord(1)/*exp*/ ))
6129 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) );
6130 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) );
6131 /* exponent */
6132 put_ST_UNCHECKED(0, mkexpr(expF) );
6133 fp_push();
6134 /* significand */
6135 put_ST(0, mkexpr(sigF) );
6136 break;
6139 case 0xF5: { /* FPREM1 -- IEEE compliant */
6140 IRTemp a1 = newTemp(Ity_F64);
6141 IRTemp a2 = newTemp(Ity_F64);
6142 DIP("fprem1\n");
6143 /* Do FPREM1 twice, once to get the remainder, and once
6144 to get the C3210 flag values. */
6145 assign( a1, get_ST(0) );
6146 assign( a2, get_ST(1) );
6147 put_ST_UNCHECKED(0,
6148 triop(Iop_PRem1F64,
6149 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6150 mkexpr(a1),
6151 mkexpr(a2)));
6152 put_C3210(
6153 unop(Iop_32Uto64,
6154 triop(Iop_PRem1C3210F64,
6155 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6156 mkexpr(a1),
6157 mkexpr(a2)) ));
6158 break;
6161 case 0xF7: /* FINCSTP */
6162 DIP("fincstp\n");
6163 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
6164 break;
6166 case 0xF8: { /* FPREM -- not IEEE compliant */
6167 IRTemp a1 = newTemp(Ity_F64);
6168 IRTemp a2 = newTemp(Ity_F64);
6169 DIP("fprem\n");
6170 /* Do FPREM twice, once to get the remainder, and once
6171 to get the C3210 flag values. */
6172 assign( a1, get_ST(0) );
6173 assign( a2, get_ST(1) );
6174 put_ST_UNCHECKED(0,
6175 triop(Iop_PRemF64,
6176 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6177 mkexpr(a1),
6178 mkexpr(a2)));
6179 put_C3210(
6180 unop(Iop_32Uto64,
6181 triop(Iop_PRemC3210F64,
6182 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6183 mkexpr(a1),
6184 mkexpr(a2)) ));
6185 break;
6188 case 0xF9: /* FYL2XP1 */
6189 DIP("fyl2xp1\n");
6190 put_ST_UNCHECKED(1,
6191 triop(Iop_Yl2xp1F64,
6192 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6193 get_ST(1),
6194 get_ST(0)));
6195 fp_pop();
6196 break;
6198 case 0xFA: /* FSQRT */
6199 DIP("fsqrt\n");
6200 put_ST_UNCHECKED(0,
6201 binop(Iop_SqrtF64,
6202 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6203 get_ST(0)));
6204 break;
6206 case 0xFB: { /* FSINCOS */
6207 DIP("fsincos\n");
6208 IRTemp argD = newTemp(Ity_F64);
6209 assign(argD, get_ST(0));
6210 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
6211 IRTemp resD = newTemp(Ity_F64);
6212 assign(resD,
6213 IRExpr_ITE(
6214 mkexpr(argOK),
6215 binop(Iop_SinF64,
6216 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6217 mkexpr(argD)),
6218 mkexpr(argD))
6220 put_ST_UNCHECKED(0, mkexpr(resD));
6221 /* Conditionally push the cos value on the stack, if
6222 the arg is in range */
6223 maybe_fp_push(argOK);
6224 maybe_put_ST(argOK, 0,
6225 binop(Iop_CosF64,
6226 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6227 mkexpr(argD)));
6228 set_C2( binop(Iop_Xor64,
6229 unop(Iop_1Uto64, mkexpr(argOK)),
6230 mkU64(1)) );
6231 break;
6234 case 0xFC: /* FRNDINT */
6235 DIP("frndint\n");
6236 put_ST_UNCHECKED(0,
6237 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) );
6238 break;
6240 case 0xFD: /* FSCALE */
6241 DIP("fscale\n");
6242 put_ST_UNCHECKED(0,
6243 triop(Iop_ScaleF64,
6244 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6245 get_ST(0),
6246 get_ST(1)));
6247 break;
6249 case 0xFE: /* FSIN */
6250 case 0xFF: { /* FCOS */
6251 Bool isSIN = modrm == 0xFE;
6252 DIP("%s\n", isSIN ? "fsin" : "fcos");
6253 IRTemp argD = newTemp(Ity_F64);
6254 assign(argD, get_ST(0));
6255 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
6256 IRTemp resD = newTemp(Ity_F64);
6257 assign(resD,
6258 IRExpr_ITE(
6259 mkexpr(argOK),
6260 binop(isSIN ? Iop_SinF64 : Iop_CosF64,
6261 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6262 mkexpr(argD)),
6263 mkexpr(argD))
6265 put_ST_UNCHECKED(0, mkexpr(resD));
6266 set_C2( binop(Iop_Xor64,
6267 unop(Iop_1Uto64, mkexpr(argOK)),
6268 mkU64(1)) );
6269 break;
6272 default:
6273 goto decode_fail;
6278 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
6279 else
6280 if (first_opcode == 0xDA) {
6282 if (modrm < 0xC0) {
6284 /* bits 5,4,3 are an opcode extension, and the modRM also
6285 specifies an address. */
6286 IROp fop;
6287 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6288 delta += len;
6289 switch (gregLO3ofRM(modrm)) {
6291 case 0: /* FIADD m32int */ /* ST(0) += m32int */
6292 DIP("fiaddl %s\n", dis_buf);
6293 fop = Iop_AddF64;
6294 goto do_fop_m32;
6296 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
6297 DIP("fimull %s\n", dis_buf);
6298 fop = Iop_MulF64;
6299 goto do_fop_m32;
6301 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
6302 DIP("fisubl %s\n", dis_buf);
6303 fop = Iop_SubF64;
6304 goto do_fop_m32;
6306 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
6307 DIP("fisubrl %s\n", dis_buf);
6308 fop = Iop_SubF64;
6309 goto do_foprev_m32;
6311 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
6312 DIP("fisubl %s\n", dis_buf);
6313 fop = Iop_DivF64;
6314 goto do_fop_m32;
6316 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
6317 DIP("fidivrl %s\n", dis_buf);
6318 fop = Iop_DivF64;
6319 goto do_foprev_m32;
6321 do_fop_m32:
6322 put_ST_UNCHECKED(0,
6323 triop(fop,
6324 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6325 get_ST(0),
6326 unop(Iop_I32StoF64,
6327 loadLE(Ity_I32, mkexpr(addr)))));
6328 break;
6330 do_foprev_m32:
6331 put_ST_UNCHECKED(0,
6332 triop(fop,
6333 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6334 unop(Iop_I32StoF64,
6335 loadLE(Ity_I32, mkexpr(addr))),
6336 get_ST(0)));
6337 break;
6339 default:
6340 vex_printf("unhandled opc_aux = 0x%2x\n",
6341 (UInt)gregLO3ofRM(modrm));
6342 vex_printf("first_opcode == 0xDA\n");
6343 goto decode_fail;
6346 } else {
6348 delta++;
6349 switch (modrm) {
6351 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
6352 r_src = (UInt)modrm - 0xC0;
6353 DIP("fcmovb %%st(%u), %%st(0)\n", r_src);
6354 put_ST_UNCHECKED(0,
6355 IRExpr_ITE(
6356 mk_amd64g_calculate_condition(AMD64CondB),
6357 get_ST(r_src), get_ST(0)) );
6358 break;
6360 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
6361 r_src = (UInt)modrm - 0xC8;
6362 DIP("fcmovz %%st(%u), %%st(0)\n", r_src);
6363 put_ST_UNCHECKED(0,
6364 IRExpr_ITE(
6365 mk_amd64g_calculate_condition(AMD64CondZ),
6366 get_ST(r_src), get_ST(0)) );
6367 break;
6369 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
6370 r_src = (UInt)modrm - 0xD0;
6371 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src);
6372 put_ST_UNCHECKED(0,
6373 IRExpr_ITE(
6374 mk_amd64g_calculate_condition(AMD64CondBE),
6375 get_ST(r_src), get_ST(0)) );
6376 break;
6378 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
6379 r_src = (UInt)modrm - 0xD8;
6380 DIP("fcmovu %%st(%u), %%st(0)\n", r_src);
6381 put_ST_UNCHECKED(0,
6382 IRExpr_ITE(
6383 mk_amd64g_calculate_condition(AMD64CondP),
6384 get_ST(r_src), get_ST(0)) );
6385 break;
6387 case 0xE9: /* FUCOMPP %st(0),%st(1) */
6388 DIP("fucompp %%st(0),%%st(1)\n");
6389 /* This forces C1 to zero, which isn't right. */
6390 put_C3210(
6391 unop(Iop_32Uto64,
6392 binop( Iop_And32,
6393 binop(Iop_Shl32,
6394 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
6395 mkU8(8)),
6396 mkU32(0x4500)
6397 )));
6398 fp_pop();
6399 fp_pop();
6400 break;
6402 default:
6403 goto decode_fail;
6409 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
6410 else
6411 if (first_opcode == 0xDB) {
6412 if (modrm < 0xC0) {
6414 /* bits 5,4,3 are an opcode extension, and the modRM also
6415 specifies an address. */
6416 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6417 delta += len;
6419 switch (gregLO3ofRM(modrm)) {
6421 case 0: /* FILD m32int */
6422 DIP("fildl %s\n", dis_buf);
6423 fp_push();
6424 put_ST(0, unop(Iop_I32StoF64,
6425 loadLE(Ity_I32, mkexpr(addr))));
6426 break;
6428 case 1: /* FISTTPL m32 (SSE3) */
6429 DIP("fisttpl %s\n", dis_buf);
6430 storeLE( mkexpr(addr),
6431 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) );
6432 fp_pop();
6433 break;
6435 case 2: /* FIST m32 */
6436 DIP("fistl %s\n", dis_buf);
6437 storeLE( mkexpr(addr),
6438 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
6439 break;
6441 case 3: /* FISTP m32 */
6442 DIP("fistpl %s\n", dis_buf);
6443 storeLE( mkexpr(addr),
6444 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
6445 fp_pop();
6446 break;
6448 case 5: { /* FLD extended-real */
6449 /* Uses dirty helper:
6450 ULong amd64g_loadF80le ( ULong )
6451 addr holds the address. First, do a dirty call to
6452 get hold of the data. */
6453 IRTemp val = newTemp(Ity_I64);
6454 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) );
6456 IRDirty* d = unsafeIRDirty_1_N (
6457 val,
6458 0/*regparms*/,
6459 "amd64g_dirtyhelper_loadF80le",
6460 &amd64g_dirtyhelper_loadF80le,
6461 args
6463 /* declare that we're reading memory */
6464 d->mFx = Ifx_Read;
6465 d->mAddr = mkexpr(addr);
6466 d->mSize = 10;
6468 /* execute the dirty call, dumping the result in val. */
6469 stmt( IRStmt_Dirty(d) );
6470 fp_push();
6471 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val)));
6473 DIP("fldt %s\n", dis_buf);
6474 break;
6477 case 7: { /* FSTP extended-real */
6478 /* Uses dirty helper:
6479 void amd64g_storeF80le ( ULong addr, ULong data )
6481 IRExpr** args
6482 = mkIRExprVec_2( mkexpr(addr),
6483 unop(Iop_ReinterpF64asI64, get_ST(0)) );
6485 IRDirty* d = unsafeIRDirty_0_N (
6486 0/*regparms*/,
6487 "amd64g_dirtyhelper_storeF80le",
6488 &amd64g_dirtyhelper_storeF80le,
6489 args
6491 /* declare we're writing memory */
6492 d->mFx = Ifx_Write;
6493 d->mAddr = mkexpr(addr);
6494 d->mSize = 10;
6496 /* execute the dirty call. */
6497 stmt( IRStmt_Dirty(d) );
6498 fp_pop();
6500 DIP("fstpt\n %s", dis_buf);
6501 break;
6504 default:
6505 vex_printf("unhandled opc_aux = 0x%2x\n",
6506 (UInt)gregLO3ofRM(modrm));
6507 vex_printf("first_opcode == 0xDB\n");
6508 goto decode_fail;
6511 } else {
6513 delta++;
6514 switch (modrm) {
6516 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
6517 r_src = (UInt)modrm - 0xC0;
6518 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src);
6519 put_ST_UNCHECKED(0,
6520 IRExpr_ITE(
6521 mk_amd64g_calculate_condition(AMD64CondNB),
6522 get_ST(r_src), get_ST(0)) );
6523 break;
6525 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
6526 r_src = (UInt)modrm - 0xC8;
6527 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src);
6528 put_ST_UNCHECKED(
6530 IRExpr_ITE(
6531 mk_amd64g_calculate_condition(AMD64CondNZ),
6532 get_ST(r_src),
6533 get_ST(0)
6536 break;
6538 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
6539 r_src = (UInt)modrm - 0xD0;
6540 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src);
6541 put_ST_UNCHECKED(
6543 IRExpr_ITE(
6544 mk_amd64g_calculate_condition(AMD64CondNBE),
6545 get_ST(r_src),
6546 get_ST(0)
6549 break;
6551 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
6552 r_src = (UInt)modrm - 0xD8;
6553 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src);
6554 put_ST_UNCHECKED(
6556 IRExpr_ITE(
6557 mk_amd64g_calculate_condition(AMD64CondNP),
6558 get_ST(r_src),
6559 get_ST(0)
6562 break;
6564 case 0xE2:
6565 DIP("fnclex\n");
6566 break;
6568 case 0xE3: {
6569 gen_FINIT_SEQUENCE(NULL/*no guarding condition*/);
6570 DIP("fninit\n");
6571 break;
6574 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
6575 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False );
6576 break;
6578 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
6579 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False );
6580 break;
6582 default:
6583 goto decode_fail;
6588 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
6589 else
6590 if (first_opcode == 0xDC) {
6591 if (modrm < 0xC0) {
6593 /* bits 5,4,3 are an opcode extension, and the modRM also
6594 specifies an address. */
6595 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6596 delta += len;
6598 switch (gregLO3ofRM(modrm)) {
6600 case 0: /* FADD double-real */
6601 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True );
6602 break;
6604 case 1: /* FMUL double-real */
6605 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True );
6606 break;
6608 case 2: /* FCOM double-real */
6609 DIP("fcoml %s\n", dis_buf);
6610 /* This forces C1 to zero, which isn't right. */
6611 put_C3210(
6612 unop(Iop_32Uto64,
6613 binop( Iop_And32,
6614 binop(Iop_Shl32,
6615 binop(Iop_CmpF64,
6616 get_ST(0),
6617 loadLE(Ity_F64,mkexpr(addr))),
6618 mkU8(8)),
6619 mkU32(0x4500)
6620 )));
6621 break;
6623 case 3: /* FCOMP double-real */
6624 DIP("fcompl %s\n", dis_buf);
6625 /* This forces C1 to zero, which isn't right. */
6626 put_C3210(
6627 unop(Iop_32Uto64,
6628 binop( Iop_And32,
6629 binop(Iop_Shl32,
6630 binop(Iop_CmpF64,
6631 get_ST(0),
6632 loadLE(Ity_F64,mkexpr(addr))),
6633 mkU8(8)),
6634 mkU32(0x4500)
6635 )));
6636 fp_pop();
6637 break;
6639 case 4: /* FSUB double-real */
6640 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True );
6641 break;
6643 case 5: /* FSUBR double-real */
6644 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True );
6645 break;
6647 case 6: /* FDIV double-real */
6648 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True );
6649 break;
6651 case 7: /* FDIVR double-real */
6652 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True );
6653 break;
6655 default:
6656 vex_printf("unhandled opc_aux = 0x%2x\n",
6657 (UInt)gregLO3ofRM(modrm));
6658 vex_printf("first_opcode == 0xDC\n");
6659 goto decode_fail;
6662 } else {
6664 delta++;
6665 switch (modrm) {
6667 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
6668 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False );
6669 break;
6671 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
6672 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False );
6673 break;
6675 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
6676 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False );
6677 break;
6679 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
6680 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False );
6681 break;
6683 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
6684 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False );
6685 break;
6687 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
6688 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False );
6689 break;
6691 default:
6692 goto decode_fail;
6698 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
6699 else
6700 if (first_opcode == 0xDD) {
6702 if (modrm < 0xC0) {
6704 /* bits 5,4,3 are an opcode extension, and the modRM also
6705 specifies an address. */
6706 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6707 delta += len;
6709 switch (gregLO3ofRM(modrm)) {
6711 case 0: /* FLD double-real */
6712 DIP("fldl %s\n", dis_buf);
6713 fp_push();
6714 put_ST(0, loadLE(Ity_F64, mkexpr(addr)));
6715 break;
6717 case 1: /* FISTTPQ m64 (SSE3) */
6718 DIP("fistppll %s\n", dis_buf);
6719 storeLE( mkexpr(addr),
6720 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) );
6721 fp_pop();
6722 break;
6724 case 2: /* FST double-real */
6725 DIP("fstl %s\n", dis_buf);
6726 storeLE(mkexpr(addr), get_ST(0));
6727 break;
6729 case 3: /* FSTP double-real */
6730 DIP("fstpl %s\n", dis_buf);
6731 storeLE(mkexpr(addr), get_ST(0));
6732 fp_pop();
6733 break;
6735 case 4: { /* FRSTOR m94/m108 */
6736 IRTemp ew = newTemp(Ity_I32);
6737 IRTemp w64 = newTemp(Ity_I64);
6738 IRDirty* d;
6739 if ( have66(pfx) ) {
6740 /* Uses dirty helper:
6741 VexEmNote amd64g_dirtyhelper_FRSTORS
6742 ( VexGuestAMD64State*, HWord ) */
6743 d = unsafeIRDirty_0_N (
6744 0/*regparms*/,
6745 "amd64g_dirtyhelper_FRSTORS",
6746 &amd64g_dirtyhelper_FRSTORS,
6747 mkIRExprVec_1( mkexpr(addr) )
6749 d->mSize = 94;
6750 } else {
6751 /* Uses dirty helper:
6752 VexEmNote amd64g_dirtyhelper_FRSTOR
6753 ( VexGuestAMD64State*, HWord ) */
6754 d = unsafeIRDirty_0_N (
6755 0/*regparms*/,
6756 "amd64g_dirtyhelper_FRSTOR",
6757 &amd64g_dirtyhelper_FRSTOR,
6758 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
6760 d->mSize = 108;
6763 d->tmp = w64;
6764 /* declare we're reading memory */
6765 d->mFx = Ifx_Read;
6766 d->mAddr = mkexpr(addr);
6767 /* d->mSize set above */
6769 /* declare we're writing guest state */
6770 d->nFxState = 5;
6771 vex_bzero(&d->fxState, sizeof(d->fxState));
6773 d->fxState[0].fx = Ifx_Write;
6774 d->fxState[0].offset = OFFB_FTOP;
6775 d->fxState[0].size = sizeof(UInt);
6777 d->fxState[1].fx = Ifx_Write;
6778 d->fxState[1].offset = OFFB_FPREGS;
6779 d->fxState[1].size = 8 * sizeof(ULong);
6781 d->fxState[2].fx = Ifx_Write;
6782 d->fxState[2].offset = OFFB_FPTAGS;
6783 d->fxState[2].size = 8 * sizeof(UChar);
6785 d->fxState[3].fx = Ifx_Write;
6786 d->fxState[3].offset = OFFB_FPROUND;
6787 d->fxState[3].size = sizeof(ULong);
6789 d->fxState[4].fx = Ifx_Write;
6790 d->fxState[4].offset = OFFB_FC3210;
6791 d->fxState[4].size = sizeof(ULong);
6793 stmt( IRStmt_Dirty(d) );
6795 /* ew contains any emulation warning we may need to
6796 issue. If needed, side-exit to the next insn,
6797 reporting the warning, so that Valgrind's dispatcher
6798 sees the warning. */
6799 assign(ew, unop(Iop_64to32,mkexpr(w64)) );
6800 put_emwarn( mkexpr(ew) );
6801 stmt(
6802 IRStmt_Exit(
6803 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
6804 Ijk_EmWarn,
6805 IRConst_U64( guest_RIP_bbstart+delta ),
6806 OFFB_RIP
6810 if ( have66(pfx) ) {
6811 DIP("frstors %s\n", dis_buf);
6812 } else {
6813 DIP("frstor %s\n", dis_buf);
6815 break;
6818 case 6: { /* FNSAVE m94/m108 */
6819 IRDirty *d;
6820 if ( have66(pfx) ) {
6821 /* Uses dirty helper:
6822 void amd64g_dirtyhelper_FNSAVES ( VexGuestAMD64State*,
6823 HWord ) */
6824 d = unsafeIRDirty_0_N (
6825 0/*regparms*/,
6826 "amd64g_dirtyhelper_FNSAVES",
6827 &amd64g_dirtyhelper_FNSAVES,
6828 mkIRExprVec_1( mkexpr(addr) )
6830 d->mSize = 94;
6831 } else {
6832 /* Uses dirty helper:
6833 void amd64g_dirtyhelper_FNSAVE ( VexGuestAMD64State*,
6834 HWord ) */
6835 d = unsafeIRDirty_0_N (
6836 0/*regparms*/,
6837 "amd64g_dirtyhelper_FNSAVE",
6838 &amd64g_dirtyhelper_FNSAVE,
6839 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
6841 d->mSize = 108;
6844 /* declare we're writing memory */
6845 d->mFx = Ifx_Write;
6846 d->mAddr = mkexpr(addr);
6847 /* d->mSize set above */
6849 /* declare we're reading guest state */
6850 d->nFxState = 5;
6851 vex_bzero(&d->fxState, sizeof(d->fxState));
6853 d->fxState[0].fx = Ifx_Read;
6854 d->fxState[0].offset = OFFB_FTOP;
6855 d->fxState[0].size = sizeof(UInt);
6857 d->fxState[1].fx = Ifx_Read;
6858 d->fxState[1].offset = OFFB_FPREGS;
6859 d->fxState[1].size = 8 * sizeof(ULong);
6861 d->fxState[2].fx = Ifx_Read;
6862 d->fxState[2].offset = OFFB_FPTAGS;
6863 d->fxState[2].size = 8 * sizeof(UChar);
6865 d->fxState[3].fx = Ifx_Read;
6866 d->fxState[3].offset = OFFB_FPROUND;
6867 d->fxState[3].size = sizeof(ULong);
6869 d->fxState[4].fx = Ifx_Read;
6870 d->fxState[4].offset = OFFB_FC3210;
6871 d->fxState[4].size = sizeof(ULong);
6873 stmt( IRStmt_Dirty(d) );
6875 if ( have66(pfx) ) {
6876 DIP("fnsaves %s\n", dis_buf);
6877 } else {
6878 DIP("fnsave %s\n", dis_buf);
6880 break;
6883 case 7: { /* FNSTSW m16 */
6884 IRExpr* sw = get_FPU_sw();
6885 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16);
6886 storeLE( mkexpr(addr), sw );
6887 DIP("fnstsw %s\n", dis_buf);
6888 break;
6891 default:
6892 vex_printf("unhandled opc_aux = 0x%2x\n",
6893 (UInt)gregLO3ofRM(modrm));
6894 vex_printf("first_opcode == 0xDD\n");
6895 goto decode_fail;
6897 } else {
6898 delta++;
6899 switch (modrm) {
6901 case 0xC0 ... 0xC7: /* FFREE %st(?) */
6902 r_dst = (UInt)modrm - 0xC0;
6903 DIP("ffree %%st(%u)\n", r_dst);
6904 put_ST_TAG ( r_dst, mkU8(0) );
6905 break;
6907 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
6908 r_dst = (UInt)modrm - 0xD0;
6909 DIP("fst %%st(0),%%st(%u)\n", r_dst);
6910 /* P4 manual says: "If the destination operand is a
6911 non-empty register, the invalid-operation exception
6912 is not generated. Hence put_ST_UNCHECKED. */
6913 put_ST_UNCHECKED(r_dst, get_ST(0));
6914 break;
6916 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
6917 r_dst = (UInt)modrm - 0xD8;
6918 DIP("fstp %%st(0),%%st(%u)\n", r_dst);
6919 /* P4 manual says: "If the destination operand is a
6920 non-empty register, the invalid-operation exception
6921 is not generated. Hence put_ST_UNCHECKED. */
6922 put_ST_UNCHECKED(r_dst, get_ST(0));
6923 fp_pop();
6924 break;
6926 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
6927 r_dst = (UInt)modrm - 0xE0;
6928 DIP("fucom %%st(0),%%st(%u)\n", r_dst);
6929 /* This forces C1 to zero, which isn't right. */
6930 put_C3210(
6931 unop(Iop_32Uto64,
6932 binop( Iop_And32,
6933 binop(Iop_Shl32,
6934 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
6935 mkU8(8)),
6936 mkU32(0x4500)
6937 )));
6938 break;
6940 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
6941 r_dst = (UInt)modrm - 0xE8;
6942 DIP("fucomp %%st(0),%%st(%u)\n", r_dst);
6943 /* This forces C1 to zero, which isn't right. */
6944 put_C3210(
6945 unop(Iop_32Uto64,
6946 binop( Iop_And32,
6947 binop(Iop_Shl32,
6948 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
6949 mkU8(8)),
6950 mkU32(0x4500)
6951 )));
6952 fp_pop();
6953 break;
6955 default:
6956 goto decode_fail;
6961 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
6962 else
6963 if (first_opcode == 0xDE) {
6965 if (modrm < 0xC0) {
6967 /* bits 5,4,3 are an opcode extension, and the modRM also
6968 specifies an address. */
6969 IROp fop;
6970 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6971 delta += len;
6973 switch (gregLO3ofRM(modrm)) {
6975 case 0: /* FIADD m16int */ /* ST(0) += m16int */
6976 DIP("fiaddw %s\n", dis_buf);
6977 fop = Iop_AddF64;
6978 goto do_fop_m16;
6980 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
6981 DIP("fimulw %s\n", dis_buf);
6982 fop = Iop_MulF64;
6983 goto do_fop_m16;
6985 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
6986 DIP("fisubw %s\n", dis_buf);
6987 fop = Iop_SubF64;
6988 goto do_fop_m16;
6990 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
6991 DIP("fisubrw %s\n", dis_buf);
6992 fop = Iop_SubF64;
6993 goto do_foprev_m16;
6995 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
6996 DIP("fisubw %s\n", dis_buf);
6997 fop = Iop_DivF64;
6998 goto do_fop_m16;
7000 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
7001 DIP("fidivrw %s\n", dis_buf);
7002 fop = Iop_DivF64;
7003 goto do_foprev_m16;
7005 do_fop_m16:
7006 put_ST_UNCHECKED(0,
7007 triop(fop,
7008 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
7009 get_ST(0),
7010 unop(Iop_I32StoF64,
7011 unop(Iop_16Sto32,
7012 loadLE(Ity_I16, mkexpr(addr))))));
7013 break;
7015 do_foprev_m16:
7016 put_ST_UNCHECKED(0,
7017 triop(fop,
7018 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
7019 unop(Iop_I32StoF64,
7020 unop(Iop_16Sto32,
7021 loadLE(Ity_I16, mkexpr(addr)))),
7022 get_ST(0)));
7023 break;
7025 default:
7026 vex_printf("unhandled opc_aux = 0x%2x\n",
7027 (UInt)gregLO3ofRM(modrm));
7028 vex_printf("first_opcode == 0xDE\n");
7029 goto decode_fail;
7032 } else {
7034 delta++;
7035 switch (modrm) {
7037 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
7038 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True );
7039 break;
7041 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
7042 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True );
7043 break;
7045 case 0xD9: /* FCOMPP %st(0),%st(1) */
7046 DIP("fcompp %%st(0),%%st(1)\n");
7047 /* This forces C1 to zero, which isn't right. */
7048 put_C3210(
7049 unop(Iop_32Uto64,
7050 binop( Iop_And32,
7051 binop(Iop_Shl32,
7052 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
7053 mkU8(8)),
7054 mkU32(0x4500)
7055 )));
7056 fp_pop();
7057 fp_pop();
7058 break;
7060 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
7061 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True );
7062 break;
7064 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
7065 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True );
7066 break;
7068 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
7069 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True );
7070 break;
7072 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
7073 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True );
7074 break;
7076 default:
7077 goto decode_fail;
7083 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
7084 else
7085 if (first_opcode == 0xDF) {
7087 if (modrm < 0xC0) {
7089 /* bits 5,4,3 are an opcode extension, and the modRM also
7090 specifies an address. */
7091 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7092 delta += len;
7094 switch (gregLO3ofRM(modrm)) {
7096 case 0: /* FILD m16int */
7097 DIP("fildw %s\n", dis_buf);
7098 fp_push();
7099 put_ST(0, unop(Iop_I32StoF64,
7100 unop(Iop_16Sto32,
7101 loadLE(Ity_I16, mkexpr(addr)))));
7102 break;
7104 case 1: /* FISTTPS m16 (SSE3) */
7105 DIP("fisttps %s\n", dis_buf);
7106 storeLE( mkexpr(addr),
7107 x87ishly_qnarrow_32_to_16(
7108 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ));
7109 fp_pop();
7110 break;
7112 case 2: /* FIST m16 */
7113 DIP("fists %s\n", dis_buf);
7114 storeLE( mkexpr(addr),
7115 x87ishly_qnarrow_32_to_16(
7116 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
7117 break;
7119 case 3: /* FISTP m16 */
7120 DIP("fistps %s\n", dis_buf);
7121 storeLE( mkexpr(addr),
7122 x87ishly_qnarrow_32_to_16(
7123 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
7124 fp_pop();
7125 break;
7127 case 5: /* FILD m64 */
7128 DIP("fildll %s\n", dis_buf);
7129 fp_push();
7130 put_ST(0, binop(Iop_I64StoF64,
7131 get_roundingmode(),
7132 loadLE(Ity_I64, mkexpr(addr))));
7133 break;
7135 case 7: /* FISTP m64 */
7136 DIP("fistpll %s\n", dis_buf);
7137 storeLE( mkexpr(addr),
7138 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) );
7139 fp_pop();
7140 break;
7142 default:
7143 vex_printf("unhandled opc_aux = 0x%2x\n",
7144 (UInt)gregLO3ofRM(modrm));
7145 vex_printf("first_opcode == 0xDF\n");
7146 goto decode_fail;
7149 } else {
7151 delta++;
7152 switch (modrm) {
7154 case 0xC0: /* FFREEP %st(0) */
7155 DIP("ffreep %%st(%d)\n", 0);
7156 put_ST_TAG ( 0, mkU8(0) );
7157 fp_pop();
7158 break;
7160 case 0xE0: /* FNSTSW %ax */
7161 DIP("fnstsw %%ax\n");
7162 /* Invent a plausible-looking FPU status word value and
7163 dump it in %AX:
7164 ((ftop & 7) << 11) | (c3210 & 0x4700)
7166 putIRegRAX(
7168 unop(Iop_32to16,
7169 binop(Iop_Or32,
7170 binop(Iop_Shl32,
7171 binop(Iop_And32, get_ftop(), mkU32(7)),
7172 mkU8(11)),
7173 binop(Iop_And32,
7174 unop(Iop_64to32, get_C3210()),
7175 mkU32(0x4700))
7176 )));
7177 break;
7179 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
7180 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True );
7181 break;
7183 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
7184 /* not really right since COMIP != UCOMIP */
7185 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True );
7186 break;
7188 default:
7189 goto decode_fail;
7195 else
7196 goto decode_fail;
7198 *decode_ok = True;
7199 return delta;
7201 decode_fail:
7202 *decode_ok = False;
7203 return delta;
7207 /*------------------------------------------------------------*/
7208 /*--- ---*/
7209 /*--- MMX INSTRUCTIONS ---*/
7210 /*--- ---*/
7211 /*------------------------------------------------------------*/
7213 /* Effect of MMX insns on x87 FPU state (table 11-2 of
7214 IA32 arch manual, volume 3):
7216 Read from, or write to MMX register (viz, any insn except EMMS):
7217 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
7218 * FP stack pointer set to zero
7220 EMMS:
7221 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
7222 * FP stack pointer set to zero
7225 static void do_MMX_preamble ( void )
7227 Int i;
7228 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
7229 IRExpr* zero = mkU32(0);
7230 IRExpr* tag1 = mkU8(1);
7231 put_ftop(zero);
7232 for (i = 0; i < 8; i++)
7233 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag1) ) );
7236 static void do_EMMS_preamble ( void )
7238 Int i;
7239 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
7240 IRExpr* zero = mkU32(0);
7241 IRExpr* tag0 = mkU8(0);
7242 put_ftop(zero);
7243 for (i = 0; i < 8; i++)
7244 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag0) ) );
7248 static IRExpr* getMMXReg ( UInt archreg )
7250 vassert(archreg < 8);
7251 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 );
7255 static void putMMXReg ( UInt archreg, IRExpr* e )
7257 vassert(archreg < 8);
7258 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
7259 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) );
7263 /* Helper for non-shift MMX insns. Note this is incomplete in the
7264 sense that it does not first call do_MMX_preamble() -- that is the
7265 responsibility of its caller. */
7267 static
7268 ULong dis_MMXop_regmem_to_reg ( const VexAbiInfo* vbi,
7269 Prefix pfx,
7270 Long delta,
7271 UChar opc,
7272 const HChar* name,
7273 Bool show_granularity )
7275 HChar dis_buf[50];
7276 UChar modrm = getUChar(delta);
7277 Bool isReg = epartIsReg(modrm);
7278 IRExpr* argL = NULL;
7279 IRExpr* argR = NULL;
7280 IRExpr* argG = NULL;
7281 IRExpr* argE = NULL;
7282 IRTemp res = newTemp(Ity_I64);
7284 Bool invG = False;
7285 IROp op = Iop_INVALID;
7286 void* hAddr = NULL;
7287 const HChar* hName = NULL;
7288 Bool eLeft = False;
7290 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
7292 switch (opc) {
7293 /* Original MMX ones */
7294 case 0xFC: op = Iop_Add8x8; break;
7295 case 0xFD: op = Iop_Add16x4; break;
7296 case 0xFE: op = Iop_Add32x2; break;
7298 case 0xEC: op = Iop_QAdd8Sx8; break;
7299 case 0xED: op = Iop_QAdd16Sx4; break;
7301 case 0xDC: op = Iop_QAdd8Ux8; break;
7302 case 0xDD: op = Iop_QAdd16Ux4; break;
7304 case 0xF8: op = Iop_Sub8x8; break;
7305 case 0xF9: op = Iop_Sub16x4; break;
7306 case 0xFA: op = Iop_Sub32x2; break;
7308 case 0xE8: op = Iop_QSub8Sx8; break;
7309 case 0xE9: op = Iop_QSub16Sx4; break;
7311 case 0xD8: op = Iop_QSub8Ux8; break;
7312 case 0xD9: op = Iop_QSub16Ux4; break;
7314 case 0xE5: op = Iop_MulHi16Sx4; break;
7315 case 0xD5: op = Iop_Mul16x4; break;
7316 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break;
7318 case 0x74: op = Iop_CmpEQ8x8; break;
7319 case 0x75: op = Iop_CmpEQ16x4; break;
7320 case 0x76: op = Iop_CmpEQ32x2; break;
7322 case 0x64: op = Iop_CmpGT8Sx8; break;
7323 case 0x65: op = Iop_CmpGT16Sx4; break;
7324 case 0x66: op = Iop_CmpGT32Sx2; break;
7326 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break;
7327 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break;
7328 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break;
7330 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break;
7331 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
7332 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break;
7334 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break;
7335 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break;
7336 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break;
7338 case 0xDB: op = Iop_And64; break;
7339 case 0xDF: op = Iop_And64; invG = True; break;
7340 case 0xEB: op = Iop_Or64; break;
7341 case 0xEF: /* Possibly do better here if argL and argR are the
7342 same reg */
7343 op = Iop_Xor64; break;
7345 /* Introduced in SSE1 */
7346 case 0xE0: op = Iop_Avg8Ux8; break;
7347 case 0xE3: op = Iop_Avg16Ux4; break;
7348 case 0xEE: op = Iop_Max16Sx4; break;
7349 case 0xDE: op = Iop_Max8Ux8; break;
7350 case 0xEA: op = Iop_Min16Sx4; break;
7351 case 0xDA: op = Iop_Min8Ux8; break;
7352 case 0xE4: op = Iop_MulHi16Ux4; break;
7353 case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break;
7355 /* Introduced in SSE2 */
7356 case 0xD4: op = Iop_Add64; break;
7357 case 0xFB: op = Iop_Sub64; break;
7359 default:
7360 vex_printf("\n0x%x\n", (UInt)opc);
7361 vpanic("dis_MMXop_regmem_to_reg");
7364 # undef XXX
7366 argG = getMMXReg(gregLO3ofRM(modrm));
7367 if (invG)
7368 argG = unop(Iop_Not64, argG);
7370 if (isReg) {
7371 delta++;
7372 argE = getMMXReg(eregLO3ofRM(modrm));
7373 } else {
7374 Int len;
7375 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7376 delta += len;
7377 argE = loadLE(Ity_I64, mkexpr(addr));
7380 if (eLeft) {
7381 argL = argE;
7382 argR = argG;
7383 } else {
7384 argL = argG;
7385 argR = argE;
7388 if (op != Iop_INVALID) {
7389 vassert(hName == NULL);
7390 vassert(hAddr == NULL);
7391 assign(res, binop(op, argL, argR));
7392 } else {
7393 vassert(hName != NULL);
7394 vassert(hAddr != NULL);
7395 assign( res,
7396 mkIRExprCCall(
7397 Ity_I64,
7398 0/*regparms*/, hName, hAddr,
7399 mkIRExprVec_2( argL, argR )
7404 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
7406 DIP("%s%s %s, %s\n",
7407 name, show_granularity ? nameMMXGran(opc & 3) : "",
7408 ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ),
7409 nameMMXReg(gregLO3ofRM(modrm)) );
7411 return delta;
7415 /* Vector by scalar shift of G by the amount specified at the bottom
7416 of E. This is a straight copy of dis_SSE_shiftG_byE. */
7418 static ULong dis_MMX_shiftG_byE ( const VexAbiInfo* vbi,
7419 Prefix pfx, Long delta,
7420 const HChar* opname, IROp op )
7422 HChar dis_buf[50];
7423 Int alen, size;
7424 IRTemp addr;
7425 Bool shl, shr, sar;
7426 UChar rm = getUChar(delta);
7427 IRTemp g0 = newTemp(Ity_I64);
7428 IRTemp g1 = newTemp(Ity_I64);
7429 IRTemp amt = newTemp(Ity_I64);
7430 IRTemp amt8 = newTemp(Ity_I8);
7432 if (epartIsReg(rm)) {
7433 assign( amt, getMMXReg(eregLO3ofRM(rm)) );
7434 DIP("%s %s,%s\n", opname,
7435 nameMMXReg(eregLO3ofRM(rm)),
7436 nameMMXReg(gregLO3ofRM(rm)) );
7437 delta++;
7438 } else {
7439 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
7440 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
7441 DIP("%s %s,%s\n", opname,
7442 dis_buf,
7443 nameMMXReg(gregLO3ofRM(rm)) );
7444 delta += alen;
7446 assign( g0, getMMXReg(gregLO3ofRM(rm)) );
7447 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
7449 shl = shr = sar = False;
7450 size = 0;
7451 switch (op) {
7452 case Iop_ShlN16x4: shl = True; size = 32; break;
7453 case Iop_ShlN32x2: shl = True; size = 32; break;
7454 case Iop_Shl64: shl = True; size = 64; break;
7455 case Iop_ShrN16x4: shr = True; size = 16; break;
7456 case Iop_ShrN32x2: shr = True; size = 32; break;
7457 case Iop_Shr64: shr = True; size = 64; break;
7458 case Iop_SarN16x4: sar = True; size = 16; break;
7459 case Iop_SarN32x2: sar = True; size = 32; break;
7460 default: vassert(0);
7463 if (shl || shr) {
7464 assign(
7466 IRExpr_ITE(
7467 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)),
7468 binop(op, mkexpr(g0), mkexpr(amt8)),
7469 mkU64(0)
7472 } else
7473 if (sar) {
7474 assign(
7476 IRExpr_ITE(
7477 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)),
7478 binop(op, mkexpr(g0), mkexpr(amt8)),
7479 binop(op, mkexpr(g0), mkU8(size-1))
7482 } else {
7483 vassert(0);
7486 putMMXReg( gregLO3ofRM(rm), mkexpr(g1) );
7487 return delta;
7491 /* Vector by scalar shift of E by an immediate byte. This is a
7492 straight copy of dis_SSE_shiftE_imm. */
7494 static
7495 ULong dis_MMX_shiftE_imm ( Long delta, const HChar* opname, IROp op )
7497 Bool shl, shr, sar;
7498 UChar rm = getUChar(delta);
7499 IRTemp e0 = newTemp(Ity_I64);
7500 IRTemp e1 = newTemp(Ity_I64);
7501 UChar amt, size;
7502 vassert(epartIsReg(rm));
7503 vassert(gregLO3ofRM(rm) == 2
7504 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
7505 amt = getUChar(delta+1);
7506 delta += 2;
7507 DIP("%s $%d,%s\n", opname,
7508 (Int)amt,
7509 nameMMXReg(eregLO3ofRM(rm)) );
7511 assign( e0, getMMXReg(eregLO3ofRM(rm)) );
7513 shl = shr = sar = False;
7514 size = 0;
7515 switch (op) {
7516 case Iop_ShlN16x4: shl = True; size = 16; break;
7517 case Iop_ShlN32x2: shl = True; size = 32; break;
7518 case Iop_Shl64: shl = True; size = 64; break;
7519 case Iop_SarN16x4: sar = True; size = 16; break;
7520 case Iop_SarN32x2: sar = True; size = 32; break;
7521 case Iop_ShrN16x4: shr = True; size = 16; break;
7522 case Iop_ShrN32x2: shr = True; size = 32; break;
7523 case Iop_Shr64: shr = True; size = 64; break;
7524 default: vassert(0);
7527 if (shl || shr) {
7528 assign( e1, amt >= size
7529 ? mkU64(0)
7530 : binop(op, mkexpr(e0), mkU8(amt))
7532 } else
7533 if (sar) {
7534 assign( e1, amt >= size
7535 ? binop(op, mkexpr(e0), mkU8(size-1))
7536 : binop(op, mkexpr(e0), mkU8(amt))
7538 } else {
7539 vassert(0);
7542 putMMXReg( eregLO3ofRM(rm), mkexpr(e1) );
7543 return delta;
7547 /* Completely handle all MMX instructions except emms. */
7549 static
7550 ULong dis_MMX ( Bool* decode_ok,
7551 const VexAbiInfo* vbi, Prefix pfx, Int sz, Long delta )
7553 Int len;
7554 UChar modrm;
7555 HChar dis_buf[50];
7556 UChar opc = getUChar(delta);
7557 delta++;
7559 /* dis_MMX handles all insns except emms. */
7560 do_MMX_preamble();
7562 switch (opc) {
7564 case 0x6E:
7565 if (sz == 4) {
7566 /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/
7567 modrm = getUChar(delta);
7568 if (epartIsReg(modrm)) {
7569 delta++;
7570 putMMXReg(
7571 gregLO3ofRM(modrm),
7572 binop( Iop_32HLto64,
7573 mkU32(0),
7574 getIReg32(eregOfRexRM(pfx,modrm)) ) );
7575 DIP("movd %s, %s\n",
7576 nameIReg32(eregOfRexRM(pfx,modrm)),
7577 nameMMXReg(gregLO3ofRM(modrm)));
7578 } else {
7579 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7580 delta += len;
7581 putMMXReg(
7582 gregLO3ofRM(modrm),
7583 binop( Iop_32HLto64,
7584 mkU32(0),
7585 loadLE(Ity_I32, mkexpr(addr)) ) );
7586 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7589 else
7590 if (sz == 8) {
7591 /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/
7592 modrm = getUChar(delta);
7593 if (epartIsReg(modrm)) {
7594 delta++;
7595 putMMXReg( gregLO3ofRM(modrm),
7596 getIReg64(eregOfRexRM(pfx,modrm)) );
7597 DIP("movd %s, %s\n",
7598 nameIReg64(eregOfRexRM(pfx,modrm)),
7599 nameMMXReg(gregLO3ofRM(modrm)));
7600 } else {
7601 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7602 delta += len;
7603 putMMXReg( gregLO3ofRM(modrm),
7604 loadLE(Ity_I64, mkexpr(addr)) );
7605 DIP("movd{64} %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7608 else {
7609 goto mmx_decode_failure;
7611 break;
7613 case 0x7E:
7614 if (sz == 4) {
7615 /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */
7616 modrm = getUChar(delta);
7617 if (epartIsReg(modrm)) {
7618 delta++;
7619 putIReg32( eregOfRexRM(pfx,modrm),
7620 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
7621 DIP("movd %s, %s\n",
7622 nameMMXReg(gregLO3ofRM(modrm)),
7623 nameIReg32(eregOfRexRM(pfx,modrm)));
7624 } else {
7625 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7626 delta += len;
7627 storeLE( mkexpr(addr),
7628 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
7629 DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7632 else
7633 if (sz == 8) {
7634 /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */
7635 modrm = getUChar(delta);
7636 if (epartIsReg(modrm)) {
7637 delta++;
7638 putIReg64( eregOfRexRM(pfx,modrm),
7639 getMMXReg(gregLO3ofRM(modrm)) );
7640 DIP("movd %s, %s\n",
7641 nameMMXReg(gregLO3ofRM(modrm)),
7642 nameIReg64(eregOfRexRM(pfx,modrm)));
7643 } else {
7644 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7645 delta += len;
7646 storeLE( mkexpr(addr),
7647 getMMXReg(gregLO3ofRM(modrm)) );
7648 DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7650 } else {
7651 goto mmx_decode_failure;
7653 break;
7655 case 0x6F:
7656 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
7657 if (sz != 4
7658 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7659 goto mmx_decode_failure;
7660 modrm = getUChar(delta);
7661 if (epartIsReg(modrm)) {
7662 delta++;
7663 putMMXReg( gregLO3ofRM(modrm), getMMXReg(eregLO3ofRM(modrm)) );
7664 DIP("movq %s, %s\n",
7665 nameMMXReg(eregLO3ofRM(modrm)),
7666 nameMMXReg(gregLO3ofRM(modrm)));
7667 } else {
7668 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7669 delta += len;
7670 putMMXReg( gregLO3ofRM(modrm), loadLE(Ity_I64, mkexpr(addr)) );
7671 DIP("movq %s, %s\n",
7672 dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7674 break;
7676 case 0x7F:
7677 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
7678 if (sz != 4
7679 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7680 goto mmx_decode_failure;
7681 modrm = getUChar(delta);
7682 if (epartIsReg(modrm)) {
7683 delta++;
7684 putMMXReg( eregLO3ofRM(modrm), getMMXReg(gregLO3ofRM(modrm)) );
7685 DIP("movq %s, %s\n",
7686 nameMMXReg(gregLO3ofRM(modrm)),
7687 nameMMXReg(eregLO3ofRM(modrm)));
7688 } else {
7689 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7690 delta += len;
7691 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
7692 DIP("mov(nt)q %s, %s\n",
7693 nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7695 break;
7697 case 0xFC:
7698 case 0xFD:
7699 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
7700 if (sz != 4)
7701 goto mmx_decode_failure;
7702 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padd", True );
7703 break;
7705 case 0xEC:
7706 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
7707 if (sz != 4
7708 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7709 goto mmx_decode_failure;
7710 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padds", True );
7711 break;
7713 case 0xDC:
7714 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7715 if (sz != 4)
7716 goto mmx_decode_failure;
7717 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "paddus", True );
7718 break;
7720 case 0xF8:
7721 case 0xF9:
7722 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
7723 if (sz != 4)
7724 goto mmx_decode_failure;
7725 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psub", True );
7726 break;
7728 case 0xE8:
7729 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
7730 if (sz != 4)
7731 goto mmx_decode_failure;
7732 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubs", True );
7733 break;
7735 case 0xD8:
7736 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7737 if (sz != 4)
7738 goto mmx_decode_failure;
7739 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubus", True );
7740 break;
7742 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
7743 if (sz != 4)
7744 goto mmx_decode_failure;
7745 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmulhw", False );
7746 break;
7748 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
7749 if (sz != 4)
7750 goto mmx_decode_failure;
7751 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmullw", False );
7752 break;
7754 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
7755 vassert(sz == 4);
7756 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmaddwd", False );
7757 break;
7759 case 0x74:
7760 case 0x75:
7761 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
7762 if (sz != 4)
7763 goto mmx_decode_failure;
7764 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpeq", True );
7765 break;
7767 case 0x64:
7768 case 0x65:
7769 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
7770 if (sz != 4)
7771 goto mmx_decode_failure;
7772 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpgt", True );
7773 break;
7775 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
7776 if (sz != 4)
7777 goto mmx_decode_failure;
7778 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packssdw", False );
7779 break;
7781 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
7782 if (sz != 4)
7783 goto mmx_decode_failure;
7784 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packsswb", False );
7785 break;
7787 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
7788 if (sz != 4)
7789 goto mmx_decode_failure;
7790 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packuswb", False );
7791 break;
7793 case 0x68:
7794 case 0x69:
7795 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
7796 if (sz != 4
7797 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7798 goto mmx_decode_failure;
7799 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckh", True );
7800 break;
7802 case 0x60:
7803 case 0x61:
7804 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
7805 if (sz != 4
7806 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7807 goto mmx_decode_failure;
7808 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckl", True );
7809 break;
7811 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
7812 if (sz != 4)
7813 goto mmx_decode_failure;
7814 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pand", False );
7815 break;
7817 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
7818 if (sz != 4)
7819 goto mmx_decode_failure;
7820 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pandn", False );
7821 break;
7823 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
7824 if (sz != 4)
7825 goto mmx_decode_failure;
7826 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "por", False );
7827 break;
7829 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
7830 if (sz != 4)
7831 goto mmx_decode_failure;
7832 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pxor", False );
7833 break;
7835 # define SHIFT_BY_REG(_name,_op) \
7836 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \
7837 break;
7839 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
7840 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4);
7841 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2);
7842 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64);
7844 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
7845 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4);
7846 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2);
7847 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64);
7849 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
7850 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4);
7851 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2);
7853 # undef SHIFT_BY_REG
7855 case 0x71:
7856 case 0x72:
7857 case 0x73: {
7858 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
7859 UChar byte2, subopc;
7860 if (sz != 4)
7861 goto mmx_decode_failure;
7862 byte2 = getUChar(delta); /* amode / sub-opcode */
7863 subopc = toUChar( (byte2 >> 3) & 7 );
7865 # define SHIFT_BY_IMM(_name,_op) \
7866 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
7867 } while (0)
7869 if (subopc == 2 /*SRL*/ && opc == 0x71)
7870 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4);
7871 else if (subopc == 2 /*SRL*/ && opc == 0x72)
7872 SHIFT_BY_IMM("psrld", Iop_ShrN32x2);
7873 else if (subopc == 2 /*SRL*/ && opc == 0x73)
7874 SHIFT_BY_IMM("psrlq", Iop_Shr64);
7876 else if (subopc == 4 /*SAR*/ && opc == 0x71)
7877 SHIFT_BY_IMM("psraw", Iop_SarN16x4);
7878 else if (subopc == 4 /*SAR*/ && opc == 0x72)
7879 SHIFT_BY_IMM("psrad", Iop_SarN32x2);
7881 else if (subopc == 6 /*SHL*/ && opc == 0x71)
7882 SHIFT_BY_IMM("psllw", Iop_ShlN16x4);
7883 else if (subopc == 6 /*SHL*/ && opc == 0x72)
7884 SHIFT_BY_IMM("pslld", Iop_ShlN32x2);
7885 else if (subopc == 6 /*SHL*/ && opc == 0x73)
7886 SHIFT_BY_IMM("psllq", Iop_Shl64);
7888 else goto mmx_decode_failure;
7890 # undef SHIFT_BY_IMM
7891 break;
7894 case 0xF7: {
7895 IRTemp addr = newTemp(Ity_I64);
7896 IRTemp regD = newTemp(Ity_I64);
7897 IRTemp regM = newTemp(Ity_I64);
7898 IRTemp mask = newTemp(Ity_I64);
7899 IRTemp olddata = newTemp(Ity_I64);
7900 IRTemp newdata = newTemp(Ity_I64);
7902 modrm = getUChar(delta);
7903 if (sz != 4 || (!epartIsReg(modrm)))
7904 goto mmx_decode_failure;
7905 delta++;
7907 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
7908 assign( regM, getMMXReg( eregLO3ofRM(modrm) ));
7909 assign( regD, getMMXReg( gregLO3ofRM(modrm) ));
7910 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) );
7911 assign( olddata, loadLE( Ity_I64, mkexpr(addr) ));
7912 assign( newdata,
7913 binop(Iop_Or64,
7914 binop(Iop_And64,
7915 mkexpr(regD),
7916 mkexpr(mask) ),
7917 binop(Iop_And64,
7918 mkexpr(olddata),
7919 unop(Iop_Not64, mkexpr(mask)))) );
7920 storeLE( mkexpr(addr), mkexpr(newdata) );
7921 DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm) ),
7922 nameMMXReg( gregLO3ofRM(modrm) ) );
7923 break;
7926 /* --- MMX decode failure --- */
7927 default:
7928 mmx_decode_failure:
7929 *decode_ok = False;
7930 return delta; /* ignored */
7934 *decode_ok = True;
7935 return delta;
7939 /*------------------------------------------------------------*/
7940 /*--- More misc arithmetic and other obscure insns. ---*/
7941 /*------------------------------------------------------------*/
7943 /* Generate base << amt with vacated places filled with stuff
7944 from xtra. amt guaranteed in 0 .. 63. */
7945 static
7946 IRExpr* shiftL64_with_extras ( IRTemp base, IRTemp xtra, IRTemp amt )
7948 /* if amt == 0
7949 then base
7950 else (base << amt) | (xtra >>u (64-amt))
7952 return
7953 IRExpr_ITE(
7954 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)),
7955 binop(Iop_Or64,
7956 binop(Iop_Shl64, mkexpr(base), mkexpr(amt)),
7957 binop(Iop_Shr64, mkexpr(xtra),
7958 binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
7960 mkexpr(base)
7964 /* Generate base >>u amt with vacated places filled with stuff
7965 from xtra. amt guaranteed in 0 .. 63. */
7966 static
7967 IRExpr* shiftR64_with_extras ( IRTemp xtra, IRTemp base, IRTemp amt )
7969 /* if amt == 0
7970 then base
7971 else (base >>u amt) | (xtra << (64-amt))
7973 return
7974 IRExpr_ITE(
7975 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)),
7976 binop(Iop_Or64,
7977 binop(Iop_Shr64, mkexpr(base), mkexpr(amt)),
7978 binop(Iop_Shl64, mkexpr(xtra),
7979 binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
7981 mkexpr(base)
7985 /* Double length left and right shifts. Apparently only required in
7986 v-size (no b- variant). */
7987 static
7988 ULong dis_SHLRD_Gv_Ev ( const VexAbiInfo* vbi,
7989 Prefix pfx,
7990 Long delta, UChar modrm,
7991 Int sz,
7992 IRExpr* shift_amt,
7993 Bool amt_is_literal,
7994 const HChar* shift_amt_txt,
7995 Bool left_shift )
7997 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
7998 for printing it. And eip on entry points at the modrm byte. */
7999 Int len;
8000 HChar dis_buf[50];
8002 IRType ty = szToITy(sz);
8003 IRTemp gsrc = newTemp(ty);
8004 IRTemp esrc = newTemp(ty);
8005 IRTemp addr = IRTemp_INVALID;
8006 IRTemp tmpSH = newTemp(Ity_I8);
8007 IRTemp tmpSS = newTemp(Ity_I8);
8008 IRTemp tmp64 = IRTemp_INVALID;
8009 IRTemp res64 = IRTemp_INVALID;
8010 IRTemp rss64 = IRTemp_INVALID;
8011 IRTemp resTy = IRTemp_INVALID;
8012 IRTemp rssTy = IRTemp_INVALID;
8013 Int mask = sz==8 ? 63 : 31;
8015 vassert(sz == 2 || sz == 4 || sz == 8);
8017 /* The E-part is the destination; this is shifted. The G-part
8018 supplies bits to be shifted into the E-part, but is not
8019 changed.
8021 If shifting left, form a double-length word with E at the top
8022 and G at the bottom, and shift this left. The result is then in
8023 the high part.
8025 If shifting right, form a double-length word with G at the top
8026 and E at the bottom, and shift this right. The result is then
8027 at the bottom. */
8029 /* Fetch the operands. */
8031 assign( gsrc, getIRegG(sz, pfx, modrm) );
8033 if (epartIsReg(modrm)) {
8034 delta++;
8035 assign( esrc, getIRegE(sz, pfx, modrm) );
8036 DIP("sh%cd%c %s, %s, %s\n",
8037 ( left_shift ? 'l' : 'r' ), nameISize(sz),
8038 shift_amt_txt,
8039 nameIRegG(sz, pfx, modrm), nameIRegE(sz, pfx, modrm));
8040 } else {
8041 addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
8042 /* # bytes following amode */
8043 amt_is_literal ? 1 : 0 );
8044 delta += len;
8045 assign( esrc, loadLE(ty, mkexpr(addr)) );
8046 DIP("sh%cd%c %s, %s, %s\n",
8047 ( left_shift ? 'l' : 'r' ), nameISize(sz),
8048 shift_amt_txt,
8049 nameIRegG(sz, pfx, modrm), dis_buf);
8052 /* Calculate the masked shift amount (tmpSH), the masked subshift
8053 amount (tmpSS), the shifted value (res64) and the subshifted
8054 value (rss64). */
8056 assign( tmpSH, binop(Iop_And8, shift_amt, mkU8(mask)) );
8057 assign( tmpSS, binop(Iop_And8,
8058 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ),
8059 mkU8(mask)));
8061 tmp64 = newTemp(Ity_I64);
8062 res64 = newTemp(Ity_I64);
8063 rss64 = newTemp(Ity_I64);
8065 if (sz == 2 || sz == 4) {
8067 /* G is xtra; E is data */
8068 /* what a freaking nightmare: */
8069 if (sz == 4 && left_shift) {
8070 assign( tmp64, binop(Iop_32HLto64, mkexpr(esrc), mkexpr(gsrc)) );
8071 assign( res64,
8072 binop(Iop_Shr64,
8073 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
8074 mkU8(32)) );
8075 assign( rss64,
8076 binop(Iop_Shr64,
8077 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSS)),
8078 mkU8(32)) );
8080 else
8081 if (sz == 4 && !left_shift) {
8082 assign( tmp64, binop(Iop_32HLto64, mkexpr(gsrc), mkexpr(esrc)) );
8083 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
8084 assign( rss64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSS)) );
8086 else
8087 if (sz == 2 && left_shift) {
8088 assign( tmp64,
8089 binop(Iop_32HLto64,
8090 binop(Iop_16HLto32, mkexpr(esrc), mkexpr(gsrc)),
8091 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc))
8093 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */
8094 assign( res64,
8095 binop(Iop_Shr64,
8096 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
8097 mkU8(48)) );
8098 /* subshift formed by shifting [esrc'0000'0000'0000] */
8099 assign( rss64,
8100 binop(Iop_Shr64,
8101 binop(Iop_Shl64,
8102 binop(Iop_Shl64, unop(Iop_16Uto64, mkexpr(esrc)),
8103 mkU8(48)),
8104 mkexpr(tmpSS)),
8105 mkU8(48)) );
8107 else
8108 if (sz == 2 && !left_shift) {
8109 assign( tmp64,
8110 binop(Iop_32HLto64,
8111 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)),
8112 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(esrc))
8114 /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */
8115 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
8116 /* subshift formed by shifting [0000'0000'0000'esrc] */
8117 assign( rss64, binop(Iop_Shr64,
8118 unop(Iop_16Uto64, mkexpr(esrc)),
8119 mkexpr(tmpSS)) );
8122 } else {
8124 vassert(sz == 8);
8125 if (left_shift) {
8126 assign( res64, shiftL64_with_extras( esrc, gsrc, tmpSH ));
8127 assign( rss64, shiftL64_with_extras( esrc, gsrc, tmpSS ));
8128 } else {
8129 assign( res64, shiftR64_with_extras( gsrc, esrc, tmpSH ));
8130 assign( rss64, shiftR64_with_extras( gsrc, esrc, tmpSS ));
8135 resTy = newTemp(ty);
8136 rssTy = newTemp(ty);
8137 assign( resTy, narrowTo(ty, mkexpr(res64)) );
8138 assign( rssTy, narrowTo(ty, mkexpr(rss64)) );
8140 /* Put result back and write the flags thunk. */
8141 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl64 : Iop_Sar64,
8142 resTy, rssTy, ty, tmpSH );
8144 if (epartIsReg(modrm)) {
8145 putIRegE(sz, pfx, modrm, mkexpr(resTy));
8146 } else {
8147 storeLE( mkexpr(addr), mkexpr(resTy) );
8150 if (amt_is_literal) delta++;
8151 return delta;
8155 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
8156 required. */
8158 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp;
8160 static const HChar* nameBtOp ( BtOp op )
8162 switch (op) {
8163 case BtOpNone: return "";
8164 case BtOpSet: return "s";
8165 case BtOpReset: return "r";
8166 case BtOpComp: return "c";
8167 default: vpanic("nameBtOp(amd64)");
8172 static
8173 ULong dis_bt_G_E ( const VexAbiInfo* vbi,
8174 Prefix pfx, Int sz, Long delta, BtOp op,
8175 /*OUT*/Bool* decode_OK )
8177 HChar dis_buf[50];
8178 UChar modrm;
8179 Int len;
8180 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
8181 t_addr1, t_rsp, t_mask, t_new;
8183 vassert(sz == 2 || sz == 4 || sz == 8);
8185 t_fetched = t_bitno0 = t_bitno1 = t_bitno2
8186 = t_addr0 = t_addr1 = t_rsp
8187 = t_mask = t_new = IRTemp_INVALID;
8189 t_fetched = newTemp(Ity_I8);
8190 t_new = newTemp(Ity_I8);
8191 t_bitno0 = newTemp(Ity_I64);
8192 t_bitno1 = newTemp(Ity_I64);
8193 t_bitno2 = newTemp(Ity_I8);
8194 t_addr1 = newTemp(Ity_I64);
8195 modrm = getUChar(delta);
8197 *decode_OK = True;
8198 if (epartIsReg(modrm)) {
8199 /* F2 and F3 are never acceptable. */
8200 if (haveF2orF3(pfx)) {
8201 *decode_OK = False;
8202 return delta;
8204 } else {
8205 /* F2 or F3 (but not both) are allowed, provided LOCK is also
8206 present, and only for the BTC/BTS/BTR cases (not BT). */
8207 if (haveF2orF3(pfx)) {
8208 if (haveF2andF3(pfx) || !haveLOCK(pfx) || op == BtOpNone) {
8209 *decode_OK = False;
8210 return delta;
8215 assign( t_bitno0, widenSto64(getIRegG(sz, pfx, modrm)) );
8217 if (epartIsReg(modrm)) {
8218 delta++;
8219 /* Get it onto the client's stack. Oh, this is a horrible
8220 kludge. See https://bugs.kde.org/show_bug.cgi?id=245925.
8221 Because of the ELF ABI stack redzone, there may be live data
8222 up to 128 bytes below %RSP. So we can't just push it on the
8223 stack, else we may wind up trashing live data, and causing
8224 impossible-to-find simulation errors. (Yes, this did
8225 happen.) So we need to drop RSP before at least 128 before
8226 pushing it. That unfortunately means hitting Memcheck's
8227 fast-case painting code. Ideally we should drop more than
8228 128, to reduce the chances of breaking buggy programs that
8229 have live data below -128(%RSP). Memcheck fast-cases moves
8230 of 288 bytes due to the need to handle ppc64-linux quickly,
8231 so let's use 288. Of course the real fix is to get rid of
8232 this kludge entirely. */
8233 t_rsp = newTemp(Ity_I64);
8234 t_addr0 = newTemp(Ity_I64);
8236 vassert(vbi->guest_stack_redzone_size == 128);
8237 assign( t_rsp, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(288)) );
8238 putIReg64(R_RSP, mkexpr(t_rsp));
8240 storeLE( mkexpr(t_rsp), getIRegE(sz, pfx, modrm) );
8242 /* Make t_addr0 point at it. */
8243 assign( t_addr0, mkexpr(t_rsp) );
8245 /* Mask out upper bits of the shift amount, since we're doing a
8246 reg. */
8247 assign( t_bitno1, binop(Iop_And64,
8248 mkexpr(t_bitno0),
8249 mkU64(sz == 8 ? 63 : sz == 4 ? 31 : 15)) );
8251 } else {
8252 t_addr0 = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
8253 delta += len;
8254 assign( t_bitno1, mkexpr(t_bitno0) );
8257 /* At this point: t_addr0 is the address being operated on. If it
8258 was a reg, we will have pushed it onto the client's stack.
8259 t_bitno1 is the bit number, suitably masked in the case of a
8260 reg. */
8262 /* Now the main sequence. */
8263 assign( t_addr1,
8264 binop(Iop_Add64,
8265 mkexpr(t_addr0),
8266 binop(Iop_Sar64, mkexpr(t_bitno1), mkU8(3))) );
8268 /* t_addr1 now holds effective address */
8270 assign( t_bitno2,
8271 unop(Iop_64to8,
8272 binop(Iop_And64, mkexpr(t_bitno1), mkU64(7))) );
8274 /* t_bitno2 contains offset of bit within byte */
8276 if (op != BtOpNone) {
8277 t_mask = newTemp(Ity_I8);
8278 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) );
8281 /* t_mask is now a suitable byte mask */
8283 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) );
8285 if (op != BtOpNone) {
8286 switch (op) {
8287 case BtOpSet:
8288 assign( t_new,
8289 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) );
8290 break;
8291 case BtOpComp:
8292 assign( t_new,
8293 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) );
8294 break;
8295 case BtOpReset:
8296 assign( t_new,
8297 binop(Iop_And8, mkexpr(t_fetched),
8298 unop(Iop_Not8, mkexpr(t_mask))) );
8299 break;
8300 default:
8301 vpanic("dis_bt_G_E(amd64)");
8303 if ((haveLOCK(pfx)) && !epartIsReg(modrm)) {
8304 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/,
8305 mkexpr(t_new)/*new*/,
8306 guest_RIP_curr_instr );
8307 } else {
8308 storeLE( mkexpr(t_addr1), mkexpr(t_new) );
8312 /* Side effect done; now get selected bit into Carry flag. The Intel docs
8313 (as of 2015, at least) say that C holds the result, Z is unchanged, and
8314 O,S,A and P are undefined. However, on Skylake it appears that O,S,A,P
8315 are also unchanged, so let's do that. */
8316 const ULong maskC = AMD64G_CC_MASK_C;
8317 const ULong maskOSZAP = AMD64G_CC_MASK_O | AMD64G_CC_MASK_S
8318 | AMD64G_CC_MASK_Z | AMD64G_CC_MASK_A
8319 | AMD64G_CC_MASK_P;
8321 IRTemp old_rflags = newTemp(Ity_I64);
8322 assign(old_rflags, mk_amd64g_calculate_rflags_all());
8324 IRTemp new_rflags = newTemp(Ity_I64);
8325 assign(new_rflags,
8326 binop(Iop_Or64,
8327 binop(Iop_And64, mkexpr(old_rflags), mkU64(maskOSZAP)),
8328 binop(Iop_And64,
8329 binop(Iop_Shr64,
8330 unop(Iop_8Uto64, mkexpr(t_fetched)),
8331 mkexpr(t_bitno2)),
8332 mkU64(maskC))));
8334 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
8335 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
8336 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) ));
8337 /* Set NDEP even though it isn't used. This makes redundant-PUT
8338 elimination of previous stores to this field work better. */
8339 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
8341 /* Move reg operand from stack back to reg */
8342 if (epartIsReg(modrm)) {
8343 /* t_rsp still points at it. */
8344 /* only write the reg if actually modifying it; doing otherwise
8345 zeroes the top half erroneously when doing btl due to
8346 standard zero-extend rule */
8347 if (op != BtOpNone)
8348 putIRegE(sz, pfx, modrm, loadLE(szToITy(sz), mkexpr(t_rsp)) );
8349 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t_rsp), mkU64(288)) );
8352 DIP("bt%s%c %s, %s\n",
8353 nameBtOp(op), nameISize(sz), nameIRegG(sz, pfx, modrm),
8354 ( epartIsReg(modrm) ? nameIRegE(sz, pfx, modrm) : dis_buf ) );
8356 return delta;
8361 /* Handle BSF/BSR. Only v-size seems necessary. */
8362 static
8363 ULong dis_bs_E_G ( const VexAbiInfo* vbi,
8364 Prefix pfx, Int sz, Long delta, Bool fwds )
8366 Bool isReg;
8367 UChar modrm;
8368 HChar dis_buf[50];
8370 IRType ty = szToITy(sz);
8371 IRTemp src = newTemp(ty);
8372 IRTemp dst = newTemp(ty);
8373 IRTemp src64 = newTemp(Ity_I64);
8374 IRTemp dst64 = newTemp(Ity_I64);
8375 IRTemp srcB = newTemp(Ity_I1);
8377 vassert(sz == 8 || sz == 4 || sz == 2);
8379 modrm = getUChar(delta);
8380 isReg = epartIsReg(modrm);
8381 if (isReg) {
8382 delta++;
8383 assign( src, getIRegE(sz, pfx, modrm) );
8384 } else {
8385 Int len;
8386 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
8387 delta += len;
8388 assign( src, loadLE(ty, mkexpr(addr)) );
8391 DIP("bs%c%c %s, %s\n",
8392 fwds ? 'f' : 'r', nameISize(sz),
8393 ( isReg ? nameIRegE(sz, pfx, modrm) : dis_buf ),
8394 nameIRegG(sz, pfx, modrm));
8396 /* First, widen src to 64 bits if it is not already. */
8397 assign( src64, widenUto64(mkexpr(src)) );
8399 /* Generate a bool expression which is zero iff the original is
8400 zero, and nonzero otherwise. Ask for a CmpNE version which, if
8401 instrumented by Memcheck, is instrumented expensively, since
8402 this may be used on the output of a preceding movmskb insn,
8403 which has been known to be partially defined, and in need of
8404 careful handling. */
8405 assign( srcB, binop(Iop_ExpCmpNE64, mkexpr(src64), mkU64(0)) );
8407 /* Flags: Z is 1 iff source value is zero. All others
8408 are undefined -- we force them to zero. */
8409 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
8410 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
8411 stmt( IRStmt_Put(
8412 OFFB_CC_DEP1,
8413 IRExpr_ITE( mkexpr(srcB),
8414 /* src!=0 */
8415 mkU64(0),
8416 /* src==0 */
8417 mkU64(AMD64G_CC_MASK_Z)
8420 /* Set NDEP even though it isn't used. This makes redundant-PUT
8421 elimination of previous stores to this field work better. */
8422 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
8424 /* Result: iff source value is zero, we can't use
8425 Iop_Clz64/Iop_Ctz64 as they have no defined result in that case.
8426 But anyway, amd64 semantics say the result is undefined in
8427 such situations. Hence handle the zero case specially. */
8429 /* Bleh. What we compute:
8431 bsf64: if src == 0 then {dst is unchanged}
8432 else Ctz64(src)
8434 bsr64: if src == 0 then {dst is unchanged}
8435 else 63 - Clz64(src)
8437 bsf32: if src == 0 then {dst is unchanged}
8438 else Ctz64(32Uto64(src))
8440 bsr32: if src == 0 then {dst is unchanged}
8441 else 63 - Clz64(32Uto64(src))
8443 bsf16: if src == 0 then {dst is unchanged}
8444 else Ctz64(32Uto64(16Uto32(src)))
8446 bsr16: if src == 0 then {dst is unchanged}
8447 else 63 - Clz64(32Uto64(16Uto32(src)))
8450 /* The main computation, guarding against zero. */
8451 assign( dst64,
8452 IRExpr_ITE(
8453 mkexpr(srcB),
8454 /* src != 0 */
8455 fwds ? unop(Iop_Ctz64, mkexpr(src64))
8456 : binop(Iop_Sub64,
8457 mkU64(63),
8458 unop(Iop_Clz64, mkexpr(src64))),
8459 /* src == 0 -- leave dst unchanged */
8460 widenUto64( getIRegG( sz, pfx, modrm ) )
8464 if (sz == 2)
8465 assign( dst, unop(Iop_64to16, mkexpr(dst64)) );
8466 else
8467 if (sz == 4)
8468 assign( dst, unop(Iop_64to32, mkexpr(dst64)) );
8469 else
8470 assign( dst, mkexpr(dst64) );
8472 /* dump result back */
8473 putIRegG( sz, pfx, modrm, mkexpr(dst) );
8475 return delta;
8479 /* swap rAX with the reg specified by reg and REX.B */
8480 static
8481 void codegen_xchg_rAX_Reg ( Prefix pfx, Int sz, UInt regLo3 )
8483 IRType ty = szToITy(sz);
8484 IRTemp t1 = newTemp(ty);
8485 IRTemp t2 = newTemp(ty);
8486 vassert(sz == 2 || sz == 4 || sz == 8);
8487 vassert(regLo3 < 8);
8488 if (sz == 8) {
8489 assign( t1, getIReg64(R_RAX) );
8490 assign( t2, getIRegRexB(8, pfx, regLo3) );
8491 putIReg64( R_RAX, mkexpr(t2) );
8492 putIRegRexB(8, pfx, regLo3, mkexpr(t1) );
8493 } else if (sz == 4) {
8494 assign( t1, getIReg32(R_RAX) );
8495 assign( t2, getIRegRexB(4, pfx, regLo3) );
8496 putIReg32( R_RAX, mkexpr(t2) );
8497 putIRegRexB(4, pfx, regLo3, mkexpr(t1) );
8498 } else {
8499 assign( t1, getIReg16(R_RAX) );
8500 assign( t2, getIRegRexB(2, pfx, regLo3) );
8501 putIReg16( R_RAX, mkexpr(t2) );
8502 putIRegRexB(2, pfx, regLo3, mkexpr(t1) );
8504 DIP("xchg%c %s, %s\n",
8505 nameISize(sz), nameIRegRAX(sz),
8506 nameIRegRexB(sz,pfx, regLo3));
8510 static
8511 void codegen_SAHF ( void )
8513 /* Set the flags to:
8514 (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O)
8515 -- retain the old O flag
8516 | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8517 |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C)
8519 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8520 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P;
8521 IRTemp oldflags = newTemp(Ity_I64);
8522 assign( oldflags, mk_amd64g_calculate_rflags_all() );
8523 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
8524 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
8525 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
8526 stmt( IRStmt_Put( OFFB_CC_DEP1,
8527 binop(Iop_Or64,
8528 binop(Iop_And64, mkexpr(oldflags), mkU64(AMD64G_CC_MASK_O)),
8529 binop(Iop_And64,
8530 binop(Iop_Shr64, getIReg64(R_RAX), mkU8(8)),
8531 mkU64(mask_SZACP))
8537 static
8538 void codegen_LAHF ( void )
8540 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
8541 IRExpr* rax_with_hole;
8542 IRExpr* new_byte;
8543 IRExpr* new_rax;
8544 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8545 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P;
8547 IRTemp flags = newTemp(Ity_I64);
8548 assign( flags, mk_amd64g_calculate_rflags_all() );
8550 rax_with_hole
8551 = binop(Iop_And64, getIReg64(R_RAX), mkU64(~0xFF00ULL));
8552 new_byte
8553 = binop(Iop_Or64, binop(Iop_And64, mkexpr(flags), mkU64(mask_SZACP)),
8554 mkU64(1<<1));
8555 new_rax
8556 = binop(Iop_Or64, rax_with_hole,
8557 binop(Iop_Shl64, new_byte, mkU8(8)));
8558 putIReg64(R_RAX, new_rax);
8562 static
8563 ULong dis_cmpxchg_G_E ( /*OUT*/Bool* ok,
8564 const VexAbiInfo* vbi,
8565 Prefix pfx,
8566 Int size,
8567 Long delta0 )
8569 HChar dis_buf[50];
8570 Int len;
8572 IRType ty = szToITy(size);
8573 IRTemp acc = newTemp(ty);
8574 IRTemp src = newTemp(ty);
8575 IRTemp dest = newTemp(ty);
8576 IRTemp dest2 = newTemp(ty);
8577 IRTemp acc2 = newTemp(ty);
8578 IRTemp cond = newTemp(Ity_I1);
8579 IRTemp addr = IRTemp_INVALID;
8580 UChar rm = getUChar(delta0);
8582 /* There are 3 cases to consider:
8584 reg-reg: ignore any lock prefix, generate sequence based
8585 on ITE
8587 reg-mem, not locked: ignore any lock prefix, generate sequence
8588 based on ITE
8590 reg-mem, locked: use IRCAS
8593 /* Decide whether F2 or F3 are acceptable. Never for register
8594 case, but for the memory case, one or the other is OK provided
8595 LOCK is also present. */
8596 if (epartIsReg(rm)) {
8597 if (haveF2orF3(pfx)) {
8598 *ok = False;
8599 return delta0;
8601 } else {
8602 if (haveF2orF3(pfx)) {
8603 if (haveF2andF3(pfx) || !haveLOCK(pfx)) {
8604 *ok = False;
8605 return delta0;
8610 if (epartIsReg(rm)) {
8611 /* case 1 */
8612 assign( dest, getIRegE(size, pfx, rm) );
8613 delta0++;
8614 assign( src, getIRegG(size, pfx, rm) );
8615 assign( acc, getIRegRAX(size) );
8616 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
8617 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) );
8618 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) );
8619 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
8620 putIRegRAX(size, mkexpr(acc2));
8621 putIRegE(size, pfx, rm, mkexpr(dest2));
8622 DIP("cmpxchg%c %s,%s\n", nameISize(size),
8623 nameIRegG(size,pfx,rm),
8624 nameIRegE(size,pfx,rm) );
8626 else if (!epartIsReg(rm) && !haveLOCK(pfx)) {
8627 /* case 2 */
8628 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8629 assign( dest, loadLE(ty, mkexpr(addr)) );
8630 delta0 += len;
8631 assign( src, getIRegG(size, pfx, rm) );
8632 assign( acc, getIRegRAX(size) );
8633 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
8634 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) );
8635 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) );
8636 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
8637 putIRegRAX(size, mkexpr(acc2));
8638 storeLE( mkexpr(addr), mkexpr(dest2) );
8639 DIP("cmpxchg%c %s,%s\n", nameISize(size),
8640 nameIRegG(size,pfx,rm), dis_buf);
8642 else if (!epartIsReg(rm) && haveLOCK(pfx)) {
8643 /* case 3 */
8644 /* src is new value. acc is expected value. dest is old value.
8645 Compute success from the output of the IRCAS, and steer the
8646 new value for RAX accordingly: in case of success, RAX is
8647 unchanged. */
8648 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8649 delta0 += len;
8650 assign( src, getIRegG(size, pfx, rm) );
8651 assign( acc, getIRegRAX(size) );
8652 stmt( IRStmt_CAS(
8653 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr),
8654 NULL, mkexpr(acc), NULL, mkexpr(src) )
8656 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
8657 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) );
8658 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
8659 putIRegRAX(size, mkexpr(acc2));
8660 DIP("cmpxchg%c %s,%s\n", nameISize(size),
8661 nameIRegG(size,pfx,rm), dis_buf);
8663 else vassert(0);
8665 *ok = True;
8666 return delta0;
8670 /* Handle conditional move instructions of the form
8671 cmovcc E(reg-or-mem), G(reg)
8673 E(src) is reg-or-mem
8674 G(dst) is reg.
8676 If E is reg, --> GET %E, tmps
8677 GET %G, tmpd
8678 CMOVcc tmps, tmpd
8679 PUT tmpd, %G
8681 If E is mem --> (getAddr E) -> tmpa
8682 LD (tmpa), tmps
8683 GET %G, tmpd
8684 CMOVcc tmps, tmpd
8685 PUT tmpd, %G
8687 static
8688 ULong dis_cmov_E_G ( const VexAbiInfo* vbi,
8689 Prefix pfx,
8690 Int sz,
8691 AMD64Condcode cond,
8692 Long delta0 )
8694 UChar rm = getUChar(delta0);
8695 HChar dis_buf[50];
8696 Int len;
8698 IRType ty = szToITy(sz);
8699 IRTemp tmps = newTemp(ty);
8700 IRTemp tmpd = newTemp(ty);
8702 if (epartIsReg(rm)) {
8703 assign( tmps, getIRegE(sz, pfx, rm) );
8704 assign( tmpd, getIRegG(sz, pfx, rm) );
8706 putIRegG( sz, pfx, rm,
8707 IRExpr_ITE( mk_amd64g_calculate_condition(cond),
8708 mkexpr(tmps),
8709 mkexpr(tmpd) )
8711 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond),
8712 nameIRegE(sz,pfx,rm),
8713 nameIRegG(sz,pfx,rm));
8714 return 1+delta0;
8717 /* E refers to memory */
8719 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8720 assign( tmps, loadLE(ty, mkexpr(addr)) );
8721 assign( tmpd, getIRegG(sz, pfx, rm) );
8723 putIRegG( sz, pfx, rm,
8724 IRExpr_ITE( mk_amd64g_calculate_condition(cond),
8725 mkexpr(tmps),
8726 mkexpr(tmpd) )
8729 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond),
8730 dis_buf,
8731 nameIRegG(sz,pfx,rm));
8732 return len+delta0;
8737 static
8738 ULong dis_xadd_G_E ( /*OUT*/Bool* decode_ok,
8739 const VexAbiInfo* vbi,
8740 Prefix pfx, Int sz, Long delta0 )
8742 Int len;
8743 UChar rm = getUChar(delta0);
8744 HChar dis_buf[50];
8746 IRType ty = szToITy(sz);
8747 IRTemp tmpd = newTemp(ty);
8748 IRTemp tmpt0 = newTemp(ty);
8749 IRTemp tmpt1 = newTemp(ty);
8751 /* There are 3 cases to consider:
8753 reg-reg: ignore any lock prefix,
8754 generate 'naive' (non-atomic) sequence
8756 reg-mem, not locked: ignore any lock prefix, generate 'naive'
8757 (non-atomic) sequence
8759 reg-mem, locked: use IRCAS
8762 if (epartIsReg(rm)) {
8763 /* case 1 */
8764 assign( tmpd, getIRegE(sz, pfx, rm) );
8765 assign( tmpt0, getIRegG(sz, pfx, rm) );
8766 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8767 mkexpr(tmpd), mkexpr(tmpt0)) );
8768 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8769 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8770 putIRegE(sz, pfx, rm, mkexpr(tmpt1));
8771 DIP("xadd%c %s, %s\n",
8772 nameISize(sz), nameIRegG(sz,pfx,rm), nameIRegE(sz,pfx,rm));
8773 *decode_ok = True;
8774 return 1+delta0;
8776 else if (!epartIsReg(rm) && !haveLOCK(pfx)) {
8777 /* case 2 */
8778 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8779 assign( tmpd, loadLE(ty, mkexpr(addr)) );
8780 assign( tmpt0, getIRegG(sz, pfx, rm) );
8781 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8782 mkexpr(tmpd), mkexpr(tmpt0)) );
8783 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8784 storeLE( mkexpr(addr), mkexpr(tmpt1) );
8785 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8786 DIP("xadd%c %s, %s\n",
8787 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
8788 *decode_ok = True;
8789 return len+delta0;
8791 else if (!epartIsReg(rm) && haveLOCK(pfx)) {
8792 /* case 3 */
8793 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8794 assign( tmpd, loadLE(ty, mkexpr(addr)) );
8795 assign( tmpt0, getIRegG(sz, pfx, rm) );
8796 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8797 mkexpr(tmpd), mkexpr(tmpt0)) );
8798 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/,
8799 mkexpr(tmpt1)/*newVal*/, guest_RIP_curr_instr );
8800 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8801 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8802 DIP("xadd%c %s, %s\n",
8803 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
8804 *decode_ok = True;
8805 return len+delta0;
8807 /*UNREACHED*/
8808 vassert(0);
8811 //.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
8812 //..
8813 //.. static
8814 //.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 )
8815 //.. {
8816 //.. Int len;
8817 //.. IRTemp addr;
8818 //.. UChar rm = getUChar(delta0);
8819 //.. HChar dis_buf[50];
8820 //..
8821 //.. if (epartIsReg(rm)) {
8822 //.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) );
8823 //.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm)));
8824 //.. return 1+delta0;
8825 //.. } else {
8826 //.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8827 //.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) );
8828 //.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm)));
8829 //.. return len+delta0;
8830 //.. }
8831 //.. }
8832 //..
8833 //.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
8834 //.. dst is ireg and sz==4, zero out top half of it. */
8835 //..
8836 //.. static
8837 //.. UInt dis_mov_Sw_Ew ( UChar sorb,
8838 //.. Int sz,
8839 //.. UInt delta0 )
8840 //.. {
8841 //.. Int len;
8842 //.. IRTemp addr;
8843 //.. UChar rm = getUChar(delta0);
8844 //.. HChar dis_buf[50];
8845 //..
8846 //.. vassert(sz == 2 || sz == 4);
8847 //..
8848 //.. if (epartIsReg(rm)) {
8849 //.. if (sz == 4)
8850 //.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm))));
8851 //.. else
8852 //.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm)));
8853 //..
8854 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
8855 //.. return 1+delta0;
8856 //.. } else {
8857 //.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8858 //.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) );
8859 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf);
8860 //.. return len+delta0;
8861 //.. }
8862 //.. }
8864 /* Handle move instructions of the form
8865 mov S, E meaning
8866 mov sreg, reg-or-mem
8867 Is passed the a ptr to the modRM byte, and the data size. Returns
8868 the address advanced completely over this instruction.
8870 VEX does not currently simulate segment registers on AMD64 which means that
8871 instead of moving a value of a segment register, zero is moved to the
8872 destination. The zero value represents a null (unused) selector. This is
8873 not correct (especially for the %cs, %fs and %gs registers) but it seems to
8874 provide a sufficient simulation for currently seen programs that use this
8875 instruction. If some program actually decides to use the obtained segment
8876 selector for something meaningful then the zero value should be a clear
8877 indicator that there is some problem.
8879 S(src) is sreg.
8880 E(dst) is reg-or-mem
8882 If E is reg, --> PUT $0, %E
8884 If E is mem, --> (getAddr E) -> tmpa
8885 ST $0, (tmpa)
8887 static
8888 ULong dis_mov_S_E ( const VexAbiInfo* vbi,
8889 Prefix pfx,
8890 Int size,
8891 Long delta0 )
8893 Int len;
8894 UChar rm = getUChar(delta0);
8895 HChar dis_buf[50];
8897 if (epartIsReg(rm)) {
8898 putIRegE(size, pfx, rm, mkU(szToITy(size), 0));
8899 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx, rm)),
8900 nameIRegE(size, pfx, rm));
8901 return 1+delta0;
8904 /* E refers to memory */
8906 IRTemp addr = disAMode(&len, vbi, pfx, delta0, dis_buf, 0);
8907 storeLE(mkexpr(addr), mkU16(0));
8908 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx, rm)),
8909 dis_buf);
8910 return len+delta0;
8914 //.. static
8915 //.. void dis_push_segreg ( UInt sreg, Int sz )
8916 //.. {
8917 //.. IRTemp t1 = newTemp(Ity_I16);
8918 //.. IRTemp ta = newTemp(Ity_I32);
8919 //.. vassert(sz == 2 || sz == 4);
8920 //..
8921 //.. assign( t1, getSReg(sreg) );
8922 //.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
8923 //.. putIReg(4, R_ESP, mkexpr(ta));
8924 //.. storeLE( mkexpr(ta), mkexpr(t1) );
8925 //..
8926 //.. DIP("pushw %s\n", nameSReg(sreg));
8927 //.. }
8928 //..
8929 //.. static
8930 //.. void dis_pop_segreg ( UInt sreg, Int sz )
8931 //.. {
8932 //.. IRTemp t1 = newTemp(Ity_I16);
8933 //.. IRTemp ta = newTemp(Ity_I32);
8934 //.. vassert(sz == 2 || sz == 4);
8935 //..
8936 //.. assign( ta, getIReg(4, R_ESP) );
8937 //.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) );
8938 //..
8939 //.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) );
8940 //.. putSReg( sreg, mkexpr(t1) );
8941 //.. DIP("pop %s\n", nameSReg(sreg));
8942 //.. }
8944 static
8945 void dis_ret ( /*MOD*/DisResult* dres, const VexAbiInfo* vbi, ULong d64 )
8947 IRTemp t1 = newTemp(Ity_I64);
8948 IRTemp t2 = newTemp(Ity_I64);
8949 IRTemp t3 = newTemp(Ity_I64);
8950 assign(t1, getIReg64(R_RSP));
8951 assign(t2, loadLE(Ity_I64,mkexpr(t1)));
8952 assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64)));
8953 putIReg64(R_RSP, mkexpr(t3));
8954 make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret");
8955 jmp_treg(dres, Ijk_Ret, t2);
8956 vassert(dres->whatNext == Dis_StopHere);
8960 /*------------------------------------------------------------*/
8961 /*--- SSE/SSE2/SSE3 helpers ---*/
8962 /*------------------------------------------------------------*/
8964 /* Indicates whether the op requires a rounding-mode argument. Note
8965 that this covers only vector floating point arithmetic ops, and
8966 omits the scalar ones that need rounding modes. Note also that
8967 inconsistencies here will get picked up later by the IR sanity
8968 checker, so this isn't correctness-critical. */
8969 static Bool requiresRMode ( IROp op )
8971 switch (op) {
8972 /* 128 bit ops */
8973 case Iop_Add32Fx4: case Iop_Sub32Fx4:
8974 case Iop_Mul32Fx4: case Iop_Div32Fx4:
8975 case Iop_Add64Fx2: case Iop_Sub64Fx2:
8976 case Iop_Mul64Fx2: case Iop_Div64Fx2:
8977 /* 256 bit ops */
8978 case Iop_Add32Fx8: case Iop_Sub32Fx8:
8979 case Iop_Mul32Fx8: case Iop_Div32Fx8:
8980 case Iop_Add64Fx4: case Iop_Sub64Fx4:
8981 case Iop_Mul64Fx4: case Iop_Div64Fx4:
8982 return True;
8983 default:
8984 break;
8986 return False;
8990 /* Worker function; do not call directly.
8991 Handles full width G = G `op` E and G = (not G) `op` E.
8994 static ULong dis_SSE_E_to_G_all_wrk (
8995 const VexAbiInfo* vbi,
8996 Prefix pfx, Long delta,
8997 const HChar* opname, IROp op,
8998 Bool invertG
9001 HChar dis_buf[50];
9002 Int alen;
9003 IRTemp addr;
9004 UChar rm = getUChar(delta);
9005 Bool needsRMode = requiresRMode(op);
9006 IRExpr* gpart
9007 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRexRM(pfx,rm)))
9008 : getXMMReg(gregOfRexRM(pfx,rm));
9009 if (epartIsReg(rm)) {
9010 putXMMReg(
9011 gregOfRexRM(pfx,rm),
9012 needsRMode
9013 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
9014 gpart,
9015 getXMMReg(eregOfRexRM(pfx,rm)))
9016 : binop(op, gpart,
9017 getXMMReg(eregOfRexRM(pfx,rm)))
9019 DIP("%s %s,%s\n", opname,
9020 nameXMMReg(eregOfRexRM(pfx,rm)),
9021 nameXMMReg(gregOfRexRM(pfx,rm)) );
9022 return delta+1;
9023 } else {
9024 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9025 putXMMReg(
9026 gregOfRexRM(pfx,rm),
9027 needsRMode
9028 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
9029 gpart,
9030 loadLE(Ity_V128, mkexpr(addr)))
9031 : binop(op, gpart,
9032 loadLE(Ity_V128, mkexpr(addr)))
9034 DIP("%s %s,%s\n", opname,
9035 dis_buf,
9036 nameXMMReg(gregOfRexRM(pfx,rm)) );
9037 return delta+alen;
9042 /* All lanes SSE binary operation, G = G `op` E. */
9044 static
9045 ULong dis_SSE_E_to_G_all ( const VexAbiInfo* vbi,
9046 Prefix pfx, Long delta,
9047 const HChar* opname, IROp op )
9049 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, False );
9052 /* All lanes SSE binary operation, G = (not G) `op` E. */
9054 static
9055 ULong dis_SSE_E_to_G_all_invG ( const VexAbiInfo* vbi,
9056 Prefix pfx, Long delta,
9057 const HChar* opname, IROp op )
9059 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, True );
9063 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
9065 static ULong dis_SSE_E_to_G_lo32 ( const VexAbiInfo* vbi,
9066 Prefix pfx, Long delta,
9067 const HChar* opname, IROp op )
9069 HChar dis_buf[50];
9070 Int alen;
9071 IRTemp addr;
9072 UChar rm = getUChar(delta);
9073 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
9074 if (epartIsReg(rm)) {
9075 putXMMReg( gregOfRexRM(pfx,rm),
9076 binop(op, gpart,
9077 getXMMReg(eregOfRexRM(pfx,rm))) );
9078 DIP("%s %s,%s\n", opname,
9079 nameXMMReg(eregOfRexRM(pfx,rm)),
9080 nameXMMReg(gregOfRexRM(pfx,rm)) );
9081 return delta+1;
9082 } else {
9083 /* We can only do a 32-bit memory read, so the upper 3/4 of the
9084 E operand needs to be made simply of zeroes. */
9085 IRTemp epart = newTemp(Ity_V128);
9086 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9087 assign( epart, unop( Iop_32UtoV128,
9088 loadLE(Ity_I32, mkexpr(addr))) );
9089 putXMMReg( gregOfRexRM(pfx,rm),
9090 binop(op, gpart, mkexpr(epart)) );
9091 DIP("%s %s,%s\n", opname,
9092 dis_buf,
9093 nameXMMReg(gregOfRexRM(pfx,rm)) );
9094 return delta+alen;
9099 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
9101 static ULong dis_SSE_E_to_G_lo64 ( const VexAbiInfo* vbi,
9102 Prefix pfx, Long delta,
9103 const HChar* opname, IROp op )
9105 HChar dis_buf[50];
9106 Int alen;
9107 IRTemp addr;
9108 UChar rm = getUChar(delta);
9109 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
9110 if (epartIsReg(rm)) {
9111 putXMMReg( gregOfRexRM(pfx,rm),
9112 binop(op, gpart,
9113 getXMMReg(eregOfRexRM(pfx,rm))) );
9114 DIP("%s %s,%s\n", opname,
9115 nameXMMReg(eregOfRexRM(pfx,rm)),
9116 nameXMMReg(gregOfRexRM(pfx,rm)) );
9117 return delta+1;
9118 } else {
9119 /* We can only do a 64-bit memory read, so the upper half of the
9120 E operand needs to be made simply of zeroes. */
9121 IRTemp epart = newTemp(Ity_V128);
9122 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9123 assign( epart, unop( Iop_64UtoV128,
9124 loadLE(Ity_I64, mkexpr(addr))) );
9125 putXMMReg( gregOfRexRM(pfx,rm),
9126 binop(op, gpart, mkexpr(epart)) );
9127 DIP("%s %s,%s\n", opname,
9128 dis_buf,
9129 nameXMMReg(gregOfRexRM(pfx,rm)) );
9130 return delta+alen;
9135 /* All lanes unary SSE operation, G = op(E). */
9137 static ULong dis_SSE_E_to_G_unary_all (
9138 const VexAbiInfo* vbi,
9139 Prefix pfx, Long delta,
9140 const HChar* opname, IROp op
9143 HChar dis_buf[50];
9144 Int alen;
9145 IRTemp addr;
9146 UChar rm = getUChar(delta);
9147 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
9148 // up in the usual way.
9149 Bool needsIRRM = op == Iop_Sqrt32Fx4 || op == Iop_Sqrt64Fx2;
9150 if (epartIsReg(rm)) {
9151 IRExpr* src = getXMMReg(eregOfRexRM(pfx,rm));
9152 /* XXXROUNDINGFIXME */
9153 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src)
9154 : unop(op, src);
9155 putXMMReg( gregOfRexRM(pfx,rm), res );
9156 DIP("%s %s,%s\n", opname,
9157 nameXMMReg(eregOfRexRM(pfx,rm)),
9158 nameXMMReg(gregOfRexRM(pfx,rm)) );
9159 return delta+1;
9160 } else {
9161 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9162 IRExpr* src = loadLE(Ity_V128, mkexpr(addr));
9163 /* XXXROUNDINGFIXME */
9164 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src)
9165 : unop(op, src);
9166 putXMMReg( gregOfRexRM(pfx,rm), res );
9167 DIP("%s %s,%s\n", opname,
9168 dis_buf,
9169 nameXMMReg(gregOfRexRM(pfx,rm)) );
9170 return delta+alen;
9175 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */
9177 static ULong dis_SSE_E_to_G_unary_lo32 (
9178 const VexAbiInfo* vbi,
9179 Prefix pfx, Long delta,
9180 const HChar* opname, IROp op
9183 /* First we need to get the old G value and patch the low 32 bits
9184 of the E operand into it. Then apply op and write back to G. */
9185 HChar dis_buf[50];
9186 Int alen;
9187 IRTemp addr;
9188 UChar rm = getUChar(delta);
9189 IRTemp oldG0 = newTemp(Ity_V128);
9190 IRTemp oldG1 = newTemp(Ity_V128);
9192 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
9194 if (epartIsReg(rm)) {
9195 assign( oldG1,
9196 binop( Iop_SetV128lo32,
9197 mkexpr(oldG0),
9198 getXMMRegLane32(eregOfRexRM(pfx,rm), 0)) );
9199 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
9200 DIP("%s %s,%s\n", opname,
9201 nameXMMReg(eregOfRexRM(pfx,rm)),
9202 nameXMMReg(gregOfRexRM(pfx,rm)) );
9203 return delta+1;
9204 } else {
9205 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9206 assign( oldG1,
9207 binop( Iop_SetV128lo32,
9208 mkexpr(oldG0),
9209 loadLE(Ity_I32, mkexpr(addr)) ));
9210 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
9211 DIP("%s %s,%s\n", opname,
9212 dis_buf,
9213 nameXMMReg(gregOfRexRM(pfx,rm)) );
9214 return delta+alen;
9219 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */
9221 static ULong dis_SSE_E_to_G_unary_lo64 (
9222 const VexAbiInfo* vbi,
9223 Prefix pfx, Long delta,
9224 const HChar* opname, IROp op
9227 /* First we need to get the old G value and patch the low 64 bits
9228 of the E operand into it. Then apply op and write back to G. */
9229 HChar dis_buf[50];
9230 Int alen;
9231 IRTemp addr;
9232 UChar rm = getUChar(delta);
9233 IRTemp oldG0 = newTemp(Ity_V128);
9234 IRTemp oldG1 = newTemp(Ity_V128);
9236 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
9238 if (epartIsReg(rm)) {
9239 assign( oldG1,
9240 binop( Iop_SetV128lo64,
9241 mkexpr(oldG0),
9242 getXMMRegLane64(eregOfRexRM(pfx,rm), 0)) );
9243 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
9244 DIP("%s %s,%s\n", opname,
9245 nameXMMReg(eregOfRexRM(pfx,rm)),
9246 nameXMMReg(gregOfRexRM(pfx,rm)) );
9247 return delta+1;
9248 } else {
9249 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9250 assign( oldG1,
9251 binop( Iop_SetV128lo64,
9252 mkexpr(oldG0),
9253 loadLE(Ity_I64, mkexpr(addr)) ));
9254 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
9255 DIP("%s %s,%s\n", opname,
9256 dis_buf,
9257 nameXMMReg(gregOfRexRM(pfx,rm)) );
9258 return delta+alen;
9263 /* SSE integer binary operation:
9264 G = G `op` E (eLeft == False)
9265 G = E `op` G (eLeft == True)
9267 static ULong dis_SSEint_E_to_G(
9268 const VexAbiInfo* vbi,
9269 Prefix pfx, Long delta,
9270 const HChar* opname, IROp op,
9271 Bool eLeft
9274 HChar dis_buf[50];
9275 Int alen;
9276 IRTemp addr;
9277 UChar rm = getUChar(delta);
9278 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
9279 IRExpr* epart = NULL;
9280 if (epartIsReg(rm)) {
9281 epart = getXMMReg(eregOfRexRM(pfx,rm));
9282 DIP("%s %s,%s\n", opname,
9283 nameXMMReg(eregOfRexRM(pfx,rm)),
9284 nameXMMReg(gregOfRexRM(pfx,rm)) );
9285 delta += 1;
9286 } else {
9287 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9288 epart = loadLE(Ity_V128, mkexpr(addr));
9289 DIP("%s %s,%s\n", opname,
9290 dis_buf,
9291 nameXMMReg(gregOfRexRM(pfx,rm)) );
9292 delta += alen;
9294 putXMMReg( gregOfRexRM(pfx,rm),
9295 eLeft ? binop(op, epart, gpart)
9296 : binop(op, gpart, epart) );
9297 return delta;
9301 /* Helper for doing SSE FP comparisons. False return ==> unhandled.
9302 This is all a bit of a kludge in that it ignores the subtleties of
9303 ordered-vs-unordered and signalling-vs-nonsignalling in the Intel
9304 spec. The meaning of the outputs is as follows:
9306 preZeroP: the active lanes of both incoming arguments should be set to zero
9307 before performing the operation. IOW the actual args are to be ignored
9308 and instead zero bits are to be used. This is a bit strange but is needed
9309 to make the constant-false/true variants (FALSE_OQ, TRUE_UQ, FALSE_OS,
9310 TRUE_US) work.
9312 preSwapP: the args should be swapped before performing the operation. Note
9313 that zeroing arg input sections (per preZeroP) and swapping them (per
9314 preSwapP) are allowed to happen in either order; the result is the same.
9316 opP: this returns the actual comparison op to perform.
9318 postNotP: if true, the result(ing vector) of the comparison operation should
9319 be bitwise-not-ed. Note that only the lanes of the output actually
9320 computed by opP should be not-ed.
9322 static Bool findSSECmpOp ( /*OUT*/Bool* preZeroP,
9323 /*OUT*/Bool* preSwapP,
9324 /*OUT*/IROp* opP,
9325 /*OUT*/Bool* postNotP,
9326 UInt imm8, Bool all_lanes, Int sz )
9328 vassert(*preZeroP == False);
9329 vassert(*preSwapP == False);
9330 vassert(*opP == Iop_INVALID);
9331 vassert(*postNotP == False);
9333 if (imm8 >= 32) return False;
9335 /* First, compute a (preZero, preSwap, op, postNot) quad from
9336 the supplied imm8. */
9337 Bool preZero = False;
9338 Bool preSwap = False;
9339 IROp op = Iop_INVALID;
9340 Bool postNot = False;
9342 # define XXX(_preZero, _preSwap, _op, _postNot) \
9343 { preZero = _preZero; preSwap = _preSwap; op = _op; postNot = _postNot; }
9344 // If you add a case here, add a corresponding test for both VCMPSD_128
9345 // and VCMPSS_128 in avx-1.c.
9346 // Cases 0xA and above are
9347 // "Enhanced Comparison Predicate[s] for VEX-Encoded [insns]"
9348 switch (imm8) {
9349 // "O" = ordered, "U" = unordered
9350 // "Q" = non-signalling (quiet), "S" = signalling
9352 // replace active arg lanes in operands with zero
9353 // |
9354 // | swap operands before applying the cmp op?
9355 // | |
9356 // | | cmp op invert active lanes after?
9357 // | | | |
9358 // v v v v
9359 case 0x0: XXX(False, False, Iop_CmpEQ32Fx4, False); break; // EQ_OQ
9360 case 0x8: XXX(False, False, Iop_CmpEQ32Fx4, False); break; // EQ_UQ
9361 case 0x10: XXX(False, False, Iop_CmpEQ32Fx4, False); break; // EQ_OS
9362 case 0x18: XXX(False, False, Iop_CmpEQ32Fx4, False); break; // EQ_US
9364 case 0x1: XXX(False, False, Iop_CmpLT32Fx4, False); break; // LT_OS
9365 case 0x11: XXX(False, False, Iop_CmpLT32Fx4, False); break; // LT_OQ
9367 case 0x2: XXX(False, False, Iop_CmpLE32Fx4, False); break; // LE_OS
9368 case 0x12: XXX(False, False, Iop_CmpLE32Fx4, False); break; // LE_OQ
9370 case 0x3: XXX(False, False, Iop_CmpUN32Fx4, False); break; // UNORD_Q
9371 case 0x13: XXX(False, False, Iop_CmpUN32Fx4, False); break; // UNORD_S
9373 // 0xC: this isn't really right because it returns all-1s when
9374 // either operand is a NaN, and it should return all-0s.
9375 case 0x4: XXX(False, False, Iop_CmpEQ32Fx4, True); break; // NEQ_UQ
9376 case 0xC: XXX(False, False, Iop_CmpEQ32Fx4, True); break; // NEQ_OQ
9377 case 0x14: XXX(False, False, Iop_CmpEQ32Fx4, True); break; // NEQ_US
9378 case 0x1C: XXX(False, False, Iop_CmpEQ32Fx4, True); break; // NEQ_OS
9380 case 0x5: XXX(False, False, Iop_CmpLT32Fx4, True); break; // NLT_US
9381 case 0x15: XXX(False, False, Iop_CmpLT32Fx4, True); break; // NLT_UQ
9383 case 0x6: XXX(False, False, Iop_CmpLE32Fx4, True); break; // NLE_US
9384 case 0x16: XXX(False, False, Iop_CmpLE32Fx4, True); break; // NLE_UQ
9386 case 0x7: XXX(False, False, Iop_CmpUN32Fx4, True); break; // ORD_Q
9387 case 0x17: XXX(False, False, Iop_CmpUN32Fx4, True); break; // ORD_S
9389 case 0x9: XXX(False, True, Iop_CmpLE32Fx4, True); break; // NGE_US
9390 case 0x19: XXX(False, True, Iop_CmpLE32Fx4, True); break; // NGE_UQ
9392 case 0xA: XXX(False, True, Iop_CmpLT32Fx4, True); break; // NGT_US
9393 case 0x1A: XXX(False, True, Iop_CmpLT32Fx4, True); break; // NGT_UQ
9395 case 0xD: XXX(False, True, Iop_CmpLE32Fx4, False); break; // GE_OS
9396 case 0x1D: XXX(False, True, Iop_CmpLE32Fx4, False); break; // GE_OQ
9398 case 0xE: XXX(False, True, Iop_CmpLT32Fx4, False); break; // GT_OS
9399 case 0x1E: XXX(False, True, Iop_CmpLT32Fx4, False); break; // GT_OQ
9400 // Constant-value-result ops
9401 case 0xB: XXX(True, False, Iop_CmpEQ32Fx4, True); break; // FALSE_OQ
9402 case 0xF: XXX(True, False, Iop_CmpEQ32Fx4, False); break; // TRUE_UQ
9403 case 0x1B: XXX(True, False, Iop_CmpEQ32Fx4, True); break; // FALSE_OS
9404 case 0x1F: XXX(True, False, Iop_CmpEQ32Fx4, False); break; // TRUE_US
9405 /* Don't forget to add test cases to VCMPSS_128_<imm8> in
9406 avx-1.c if new cases turn up. */
9407 default: break;
9409 # undef XXX
9410 if (op == Iop_INVALID) return False;
9412 /* Now convert the op into one with the same arithmetic but that is
9413 correct for the width and laneage requirements. */
9415 /**/ if (sz == 4 && all_lanes) {
9416 switch (op) {
9417 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32Fx4; break;
9418 case Iop_CmpLT32Fx4: op = Iop_CmpLT32Fx4; break;
9419 case Iop_CmpLE32Fx4: op = Iop_CmpLE32Fx4; break;
9420 case Iop_CmpUN32Fx4: op = Iop_CmpUN32Fx4; break;
9421 default: vassert(0);
9424 else if (sz == 4 && !all_lanes) {
9425 switch (op) {
9426 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32F0x4; break;
9427 case Iop_CmpLT32Fx4: op = Iop_CmpLT32F0x4; break;
9428 case Iop_CmpLE32Fx4: op = Iop_CmpLE32F0x4; break;
9429 case Iop_CmpUN32Fx4: op = Iop_CmpUN32F0x4; break;
9430 default: vassert(0);
9433 else if (sz == 8 && all_lanes) {
9434 switch (op) {
9435 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64Fx2; break;
9436 case Iop_CmpLT32Fx4: op = Iop_CmpLT64Fx2; break;
9437 case Iop_CmpLE32Fx4: op = Iop_CmpLE64Fx2; break;
9438 case Iop_CmpUN32Fx4: op = Iop_CmpUN64Fx2; break;
9439 default: vassert(0);
9442 else if (sz == 8 && !all_lanes) {
9443 switch (op) {
9444 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64F0x2; break;
9445 case Iop_CmpLT32Fx4: op = Iop_CmpLT64F0x2; break;
9446 case Iop_CmpLE32Fx4: op = Iop_CmpLE64F0x2; break;
9447 case Iop_CmpUN32Fx4: op = Iop_CmpUN64F0x2; break;
9448 default: vassert(0);
9451 else {
9452 vpanic("findSSECmpOp(amd64,guest)");
9455 if (preZero) {
9456 // In this case, preSwap is irrelevant, but assert anyway.
9457 vassert(preSwap == False);
9459 *preZeroP = preZero; *preSwapP = preSwap; *opP = op; *postNotP = postNot;
9460 return True;
9464 /* Handles SSE 32F/64F comparisons. It can fail, in which case it
9465 returns the original delta to indicate failure. */
9467 static Long dis_SSE_cmp_E_to_G ( const VexAbiInfo* vbi,
9468 Prefix pfx, Long delta,
9469 const HChar* opname, Bool all_lanes, Int sz )
9471 Long delta0 = delta;
9472 HChar dis_buf[50];
9473 Int alen;
9474 UInt imm8;
9475 IRTemp addr;
9476 Bool preZero = False;
9477 Bool preSwap = False;
9478 IROp op = Iop_INVALID;
9479 Bool postNot = False;
9480 IRTemp plain = newTemp(Ity_V128);
9481 UChar rm = getUChar(delta);
9482 UShort mask = 0;
9483 vassert(sz == 4 || sz == 8);
9484 if (epartIsReg(rm)) {
9485 imm8 = getUChar(delta+1);
9486 if (imm8 >= 8) return delta0; /* FAIL */
9487 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot,
9488 imm8, all_lanes, sz);
9489 if (!ok) return delta0; /* FAIL */
9490 vassert(!preZero); /* never needed for imm8 < 8 */
9491 vassert(!preSwap); /* never needed for imm8 < 8 */
9492 assign( plain, binop(op, getXMMReg(gregOfRexRM(pfx,rm)),
9493 getXMMReg(eregOfRexRM(pfx,rm))) );
9494 delta += 2;
9495 DIP("%s $%u,%s,%s\n", opname,
9496 imm8,
9497 nameXMMReg(eregOfRexRM(pfx,rm)),
9498 nameXMMReg(gregOfRexRM(pfx,rm)) );
9499 } else {
9500 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
9501 imm8 = getUChar(delta+alen);
9502 if (imm8 >= 8) return delta0; /* FAIL */
9503 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot,
9504 imm8, all_lanes, sz);
9505 if (!ok) return delta0; /* FAIL */
9506 vassert(!preZero); /* never needed for imm8 < 8 */
9507 vassert(!preSwap); /* never needed for imm8 < 8 */
9508 assign( plain,
9509 binop(
9511 getXMMReg(gregOfRexRM(pfx,rm)),
9512 all_lanes
9513 ? loadLE(Ity_V128, mkexpr(addr))
9514 : sz == 8
9515 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
9516 : /*sz==4*/
9517 unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr)))
9520 delta += alen+1;
9521 DIP("%s $%u,%s,%s\n", opname,
9522 imm8,
9523 dis_buf,
9524 nameXMMReg(gregOfRexRM(pfx,rm)) );
9527 if (postNot && all_lanes) {
9528 putXMMReg( gregOfRexRM(pfx,rm),
9529 unop(Iop_NotV128, mkexpr(plain)) );
9531 else
9532 if (postNot && !all_lanes) {
9533 mask = toUShort(sz==4 ? 0x000F : 0x00FF);
9534 putXMMReg( gregOfRexRM(pfx,rm),
9535 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) );
9537 else {
9538 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(plain) );
9541 return delta;
9545 /* Vector by scalar shift of G by the amount specified at the bottom
9546 of E. */
9548 static ULong dis_SSE_shiftG_byE ( const VexAbiInfo* vbi,
9549 Prefix pfx, Long delta,
9550 const HChar* opname, IROp op )
9552 HChar dis_buf[50];
9553 Int alen, size;
9554 IRTemp addr;
9555 Bool shl, shr, sar;
9556 UChar rm = getUChar(delta);
9557 IRTemp g0 = newTemp(Ity_V128);
9558 IRTemp g1 = newTemp(Ity_V128);
9559 IRTemp amt = newTemp(Ity_I64);
9560 IRTemp amt8 = newTemp(Ity_I8);
9561 if (epartIsReg(rm)) {
9562 assign( amt, getXMMRegLane64(eregOfRexRM(pfx,rm), 0) );
9563 DIP("%s %s,%s\n", opname,
9564 nameXMMReg(eregOfRexRM(pfx,rm)),
9565 nameXMMReg(gregOfRexRM(pfx,rm)) );
9566 delta++;
9567 } else {
9568 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9569 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
9570 DIP("%s %s,%s\n", opname,
9571 dis_buf,
9572 nameXMMReg(gregOfRexRM(pfx,rm)) );
9573 delta += alen;
9575 assign( g0, getXMMReg(gregOfRexRM(pfx,rm)) );
9576 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
9578 shl = shr = sar = False;
9579 size = 0;
9580 switch (op) {
9581 case Iop_ShlN16x8: shl = True; size = 32; break;
9582 case Iop_ShlN32x4: shl = True; size = 32; break;
9583 case Iop_ShlN64x2: shl = True; size = 64; break;
9584 case Iop_SarN16x8: sar = True; size = 16; break;
9585 case Iop_SarN32x4: sar = True; size = 32; break;
9586 case Iop_ShrN16x8: shr = True; size = 16; break;
9587 case Iop_ShrN32x4: shr = True; size = 32; break;
9588 case Iop_ShrN64x2: shr = True; size = 64; break;
9589 default: vassert(0);
9592 if (shl || shr) {
9593 assign(
9595 IRExpr_ITE(
9596 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
9597 binop(op, mkexpr(g0), mkexpr(amt8)),
9598 mkV128(0x0000)
9601 } else
9602 if (sar) {
9603 assign(
9605 IRExpr_ITE(
9606 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
9607 binop(op, mkexpr(g0), mkexpr(amt8)),
9608 binop(op, mkexpr(g0), mkU8(size-1))
9611 } else {
9612 vassert(0);
9615 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(g1) );
9616 return delta;
9620 /* Vector by scalar shift of E by an immediate byte. */
9622 static
9623 ULong dis_SSE_shiftE_imm ( Prefix pfx,
9624 Long delta, const HChar* opname, IROp op )
9626 Bool shl, shr, sar;
9627 UChar rm = getUChar(delta);
9628 IRTemp e0 = newTemp(Ity_V128);
9629 IRTemp e1 = newTemp(Ity_V128);
9630 UChar amt, size;
9631 vassert(epartIsReg(rm));
9632 vassert(gregLO3ofRM(rm) == 2
9633 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
9634 amt = getUChar(delta+1);
9635 delta += 2;
9636 DIP("%s $%d,%s\n", opname,
9637 (Int)amt,
9638 nameXMMReg(eregOfRexRM(pfx,rm)) );
9639 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) );
9641 shl = shr = sar = False;
9642 size = 0;
9643 switch (op) {
9644 case Iop_ShlN16x8: shl = True; size = 16; break;
9645 case Iop_ShlN32x4: shl = True; size = 32; break;
9646 case Iop_ShlN64x2: shl = True; size = 64; break;
9647 case Iop_SarN16x8: sar = True; size = 16; break;
9648 case Iop_SarN32x4: sar = True; size = 32; break;
9649 case Iop_ShrN16x8: shr = True; size = 16; break;
9650 case Iop_ShrN32x4: shr = True; size = 32; break;
9651 case Iop_ShrN64x2: shr = True; size = 64; break;
9652 default: vassert(0);
9655 if (shl || shr) {
9656 assign( e1, amt >= size
9657 ? mkV128(0x0000)
9658 : binop(op, mkexpr(e0), mkU8(amt))
9660 } else
9661 if (sar) {
9662 assign( e1, amt >= size
9663 ? binop(op, mkexpr(e0), mkU8(size-1))
9664 : binop(op, mkexpr(e0), mkU8(amt))
9666 } else {
9667 vassert(0);
9670 putXMMReg( eregOfRexRM(pfx,rm), mkexpr(e1) );
9671 return delta;
9675 /* Get the current SSE rounding mode. */
9677 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void )
9679 return
9680 unop( Iop_64to32,
9681 binop( Iop_And64,
9682 IRExpr_Get( OFFB_SSEROUND, Ity_I64 ),
9683 mkU64(3) ));
9686 static void put_sse_roundingmode ( IRExpr* sseround )
9688 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32);
9689 stmt( IRStmt_Put( OFFB_SSEROUND,
9690 unop(Iop_32Uto64,sseround) ) );
9693 /* Break a V128-bit value up into four 32-bit ints. */
9695 static void breakupV128to32s ( IRTemp t128,
9696 /*OUTs*/
9697 IRTemp* t3, IRTemp* t2,
9698 IRTemp* t1, IRTemp* t0 )
9700 IRTemp hi64 = newTemp(Ity_I64);
9701 IRTemp lo64 = newTemp(Ity_I64);
9702 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
9703 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) );
9705 vassert(t0 && *t0 == IRTemp_INVALID);
9706 vassert(t1 && *t1 == IRTemp_INVALID);
9707 vassert(t2 && *t2 == IRTemp_INVALID);
9708 vassert(t3 && *t3 == IRTemp_INVALID);
9710 *t0 = newTemp(Ity_I32);
9711 *t1 = newTemp(Ity_I32);
9712 *t2 = newTemp(Ity_I32);
9713 *t3 = newTemp(Ity_I32);
9714 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) );
9715 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
9716 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) );
9717 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
9720 /* Construct a V128-bit value from four 32-bit ints. */
9722 static IRExpr* mkV128from32s ( IRTemp t3, IRTemp t2,
9723 IRTemp t1, IRTemp t0 )
9725 return
9726 binop( Iop_64HLtoV128,
9727 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
9728 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0))
9732 /* Break a 64-bit value up into four 16-bit ints. */
9734 static void breakup64to16s ( IRTemp t64,
9735 /*OUTs*/
9736 IRTemp* t3, IRTemp* t2,
9737 IRTemp* t1, IRTemp* t0 )
9739 IRTemp hi32 = newTemp(Ity_I32);
9740 IRTemp lo32 = newTemp(Ity_I32);
9741 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) );
9742 assign( lo32, unop(Iop_64to32, mkexpr(t64)) );
9744 vassert(t0 && *t0 == IRTemp_INVALID);
9745 vassert(t1 && *t1 == IRTemp_INVALID);
9746 vassert(t2 && *t2 == IRTemp_INVALID);
9747 vassert(t3 && *t3 == IRTemp_INVALID);
9749 *t0 = newTemp(Ity_I16);
9750 *t1 = newTemp(Ity_I16);
9751 *t2 = newTemp(Ity_I16);
9752 *t3 = newTemp(Ity_I16);
9753 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) );
9754 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) );
9755 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) );
9756 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) );
9759 /* Construct a 64-bit value from four 16-bit ints. */
9761 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2,
9762 IRTemp t1, IRTemp t0 )
9764 return
9765 binop( Iop_32HLto64,
9766 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)),
9767 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0))
9771 /* Break a V256-bit value up into four 64-bit ints. */
9773 static void breakupV256to64s ( IRTemp t256,
9774 /*OUTs*/
9775 IRTemp* t3, IRTemp* t2,
9776 IRTemp* t1, IRTemp* t0 )
9778 vassert(t0 && *t0 == IRTemp_INVALID);
9779 vassert(t1 && *t1 == IRTemp_INVALID);
9780 vassert(t2 && *t2 == IRTemp_INVALID);
9781 vassert(t3 && *t3 == IRTemp_INVALID);
9782 *t0 = newTemp(Ity_I64);
9783 *t1 = newTemp(Ity_I64);
9784 *t2 = newTemp(Ity_I64);
9785 *t3 = newTemp(Ity_I64);
9786 assign( *t0, unop(Iop_V256to64_0, mkexpr(t256)) );
9787 assign( *t1, unop(Iop_V256to64_1, mkexpr(t256)) );
9788 assign( *t2, unop(Iop_V256to64_2, mkexpr(t256)) );
9789 assign( *t3, unop(Iop_V256to64_3, mkexpr(t256)) );
9792 /* Break a V256-bit value up into two V128s. */
9794 static void breakupV256toV128s ( IRTemp t256,
9795 /*OUTs*/
9796 IRTemp* t1, IRTemp* t0 )
9798 vassert(t0 && *t0 == IRTemp_INVALID);
9799 vassert(t1 && *t1 == IRTemp_INVALID);
9800 *t0 = newTemp(Ity_V128);
9801 *t1 = newTemp(Ity_V128);
9802 assign(*t1, unop(Iop_V256toV128_1, mkexpr(t256)));
9803 assign(*t0, unop(Iop_V256toV128_0, mkexpr(t256)));
9806 /* Break a V256-bit value up into eight 32-bit ints. */
9808 static void breakupV256to32s ( IRTemp t256,
9809 /*OUTs*/
9810 IRTemp* t7, IRTemp* t6,
9811 IRTemp* t5, IRTemp* t4,
9812 IRTemp* t3, IRTemp* t2,
9813 IRTemp* t1, IRTemp* t0 )
9815 IRTemp t128_1 = IRTemp_INVALID;
9816 IRTemp t128_0 = IRTemp_INVALID;
9817 breakupV256toV128s( t256, &t128_1, &t128_0 );
9818 breakupV128to32s( t128_1, t7, t6, t5, t4 );
9819 breakupV128to32s( t128_0, t3, t2, t1, t0 );
9822 /* Break a V128-bit value up into two 64-bit ints. */
9824 static void breakupV128to64s ( IRTemp t128,
9825 /*OUTs*/
9826 IRTemp* t1, IRTemp* t0 )
9828 vassert(t0 && *t0 == IRTemp_INVALID);
9829 vassert(t1 && *t1 == IRTemp_INVALID);
9830 *t0 = newTemp(Ity_I64);
9831 *t1 = newTemp(Ity_I64);
9832 assign( *t0, unop(Iop_V128to64, mkexpr(t128)) );
9833 assign( *t1, unop(Iop_V128HIto64, mkexpr(t128)) );
9836 /* Construct a V256-bit value from eight 32-bit ints. */
9838 static IRExpr* mkV256from32s ( IRTemp t7, IRTemp t6,
9839 IRTemp t5, IRTemp t4,
9840 IRTemp t3, IRTemp t2,
9841 IRTemp t1, IRTemp t0 )
9843 return
9844 binop( Iop_V128HLtoV256,
9845 binop( Iop_64HLtoV128,
9846 binop(Iop_32HLto64, mkexpr(t7), mkexpr(t6)),
9847 binop(Iop_32HLto64, mkexpr(t5), mkexpr(t4)) ),
9848 binop( Iop_64HLtoV128,
9849 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
9850 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) )
9854 /* Construct a V256-bit value from four 64-bit ints. */
9856 static IRExpr* mkV256from64s ( IRTemp t3, IRTemp t2,
9857 IRTemp t1, IRTemp t0 )
9859 return
9860 binop( Iop_V128HLtoV256,
9861 binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)),
9862 binop(Iop_64HLtoV128, mkexpr(t1), mkexpr(t0))
9866 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
9867 values (aa,bb), computes, for each of the 4 16-bit lanes:
9869 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
9871 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx )
9873 IRTemp aa = newTemp(Ity_I64);
9874 IRTemp bb = newTemp(Ity_I64);
9875 IRTemp aahi32s = newTemp(Ity_I64);
9876 IRTemp aalo32s = newTemp(Ity_I64);
9877 IRTemp bbhi32s = newTemp(Ity_I64);
9878 IRTemp bblo32s = newTemp(Ity_I64);
9879 IRTemp rHi = newTemp(Ity_I64);
9880 IRTemp rLo = newTemp(Ity_I64);
9881 IRTemp one32x2 = newTemp(Ity_I64);
9882 assign(aa, aax);
9883 assign(bb, bbx);
9884 assign( aahi32s,
9885 binop(Iop_SarN32x2,
9886 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)),
9887 mkU8(16) ));
9888 assign( aalo32s,
9889 binop(Iop_SarN32x2,
9890 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)),
9891 mkU8(16) ));
9892 assign( bbhi32s,
9893 binop(Iop_SarN32x2,
9894 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)),
9895 mkU8(16) ));
9896 assign( bblo32s,
9897 binop(Iop_SarN32x2,
9898 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)),
9899 mkU8(16) ));
9900 assign(one32x2, mkU64( (1ULL << 32) + 1 ));
9901 assign(
9902 rHi,
9903 binop(
9904 Iop_ShrN32x2,
9905 binop(
9906 Iop_Add32x2,
9907 binop(
9908 Iop_ShrN32x2,
9909 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)),
9910 mkU8(14)
9912 mkexpr(one32x2)
9914 mkU8(1)
9917 assign(
9918 rLo,
9919 binop(
9920 Iop_ShrN32x2,
9921 binop(
9922 Iop_Add32x2,
9923 binop(
9924 Iop_ShrN32x2,
9925 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)),
9926 mkU8(14)
9928 mkexpr(one32x2)
9930 mkU8(1)
9933 return
9934 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo));
9937 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
9938 values (aa,bb), computes, for each lane:
9940 if aa_lane < 0 then - bb_lane
9941 else if aa_lane > 0 then bb_lane
9942 else 0
9944 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB )
9946 IRTemp aa = newTemp(Ity_I64);
9947 IRTemp bb = newTemp(Ity_I64);
9948 IRTemp zero = newTemp(Ity_I64);
9949 IRTemp bbNeg = newTemp(Ity_I64);
9950 IRTemp negMask = newTemp(Ity_I64);
9951 IRTemp posMask = newTemp(Ity_I64);
9952 IROp opSub = Iop_INVALID;
9953 IROp opCmpGTS = Iop_INVALID;
9955 switch (laneszB) {
9956 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break;
9957 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break;
9958 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break;
9959 default: vassert(0);
9962 assign( aa, aax );
9963 assign( bb, bbx );
9964 assign( zero, mkU64(0) );
9965 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) );
9966 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) );
9967 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) );
9969 return
9970 binop(Iop_Or64,
9971 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)),
9972 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) );
9977 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
9978 value aa, computes, for each lane
9980 if aa < 0 then -aa else aa
9982 Note that the result is interpreted as unsigned, so that the
9983 absolute value of the most negative signed input can be
9984 represented.
9986 static IRTemp math_PABS_MMX ( IRTemp aa, Int laneszB )
9988 IRTemp res = newTemp(Ity_I64);
9989 IRTemp zero = newTemp(Ity_I64);
9990 IRTemp aaNeg = newTemp(Ity_I64);
9991 IRTemp negMask = newTemp(Ity_I64);
9992 IRTemp posMask = newTemp(Ity_I64);
9993 IROp opSub = Iop_INVALID;
9994 IROp opSarN = Iop_INVALID;
9996 switch (laneszB) {
9997 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break;
9998 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break;
9999 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break;
10000 default: vassert(0);
10003 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) );
10004 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) );
10005 assign( zero, mkU64(0) );
10006 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) );
10007 assign( res,
10008 binop(Iop_Or64,
10009 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)),
10010 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) ));
10011 return res;
10014 /* XMM version of math_PABS_MMX. */
10015 static IRTemp math_PABS_XMM ( IRTemp aa, Int laneszB )
10017 IRTemp res = newTemp(Ity_V128);
10018 IRTemp aaHi = newTemp(Ity_I64);
10019 IRTemp aaLo = newTemp(Ity_I64);
10020 assign(aaHi, unop(Iop_V128HIto64, mkexpr(aa)));
10021 assign(aaLo, unop(Iop_V128to64, mkexpr(aa)));
10022 assign(res, binop(Iop_64HLtoV128,
10023 mkexpr(math_PABS_MMX(aaHi, laneszB)),
10024 mkexpr(math_PABS_MMX(aaLo, laneszB))));
10025 return res;
10028 /* Specialisations of math_PABS_XMM, since there's no easy way to do
10029 partial applications in C :-( */
10030 static IRTemp math_PABS_XMM_pap4 ( IRTemp aa ) {
10031 return math_PABS_XMM(aa, 4);
10034 static IRTemp math_PABS_XMM_pap2 ( IRTemp aa ) {
10035 return math_PABS_XMM(aa, 2);
10038 static IRTemp math_PABS_XMM_pap1 ( IRTemp aa ) {
10039 return math_PABS_XMM(aa, 1);
10042 /* YMM version of math_PABS_XMM. */
10043 static IRTemp math_PABS_YMM ( IRTemp aa, Int laneszB )
10045 IRTemp res = newTemp(Ity_V256);
10046 IRTemp aaHi = IRTemp_INVALID;
10047 IRTemp aaLo = IRTemp_INVALID;
10048 breakupV256toV128s(aa, &aaHi, &aaLo);
10049 assign(res, binop(Iop_V128HLtoV256,
10050 mkexpr(math_PABS_XMM(aaHi, laneszB)),
10051 mkexpr(math_PABS_XMM(aaLo, laneszB))));
10052 return res;
10055 static IRTemp math_PABS_YMM_pap4 ( IRTemp aa ) {
10056 return math_PABS_YMM(aa, 4);
10059 static IRTemp math_PABS_YMM_pap2 ( IRTemp aa ) {
10060 return math_PABS_YMM(aa, 2);
10063 static IRTemp math_PABS_YMM_pap1 ( IRTemp aa ) {
10064 return math_PABS_YMM(aa, 1);
10067 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64,
10068 IRTemp lo64, Long byteShift )
10070 vassert(byteShift >= 1 && byteShift <= 7);
10071 return
10072 binop(Iop_Or64,
10073 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))),
10074 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift))
10078 static IRTemp math_PALIGNR_XMM ( IRTemp sV, IRTemp dV, UInt imm8 )
10080 IRTemp res = newTemp(Ity_V128);
10081 IRTemp sHi = newTemp(Ity_I64);
10082 IRTemp sLo = newTemp(Ity_I64);
10083 IRTemp dHi = newTemp(Ity_I64);
10084 IRTemp dLo = newTemp(Ity_I64);
10085 IRTemp rHi = newTemp(Ity_I64);
10086 IRTemp rLo = newTemp(Ity_I64);
10088 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
10089 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
10090 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
10091 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
10093 if (imm8 == 0) {
10094 assign( rHi, mkexpr(sHi) );
10095 assign( rLo, mkexpr(sLo) );
10097 else if (imm8 >= 1 && imm8 <= 7) {
10098 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, imm8) );
10099 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, imm8) );
10101 else if (imm8 == 8) {
10102 assign( rHi, mkexpr(dLo) );
10103 assign( rLo, mkexpr(sHi) );
10105 else if (imm8 >= 9 && imm8 <= 15) {
10106 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-8) );
10107 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, imm8-8) );
10109 else if (imm8 == 16) {
10110 assign( rHi, mkexpr(dHi) );
10111 assign( rLo, mkexpr(dLo) );
10113 else if (imm8 >= 17 && imm8 <= 23) {
10114 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-16))) );
10115 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-16) );
10117 else if (imm8 == 24) {
10118 assign( rHi, mkU64(0) );
10119 assign( rLo, mkexpr(dHi) );
10121 else if (imm8 >= 25 && imm8 <= 31) {
10122 assign( rHi, mkU64(0) );
10123 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-24))) );
10125 else if (imm8 >= 32 && imm8 <= 255) {
10126 assign( rHi, mkU64(0) );
10127 assign( rLo, mkU64(0) );
10129 else
10130 vassert(0);
10132 assign( res, binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)));
10133 return res;
10136 /* Generate a SIGSEGV followed by a restart of the current instruction
10137 if effective_addr is not 16-aligned. This is required behaviour
10138 for some SSE3 instructions and all 128-bit SSSE3 instructions.
10139 This assumes that guest_RIP_curr_instr is set correctly!
10140 On FreeBSD, this kind of error generates a SIGBUS. */
10141 static
10142 void gen_SIGNAL_if_not_XX_aligned ( const VexAbiInfo* vbi,
10143 IRTemp effective_addr, ULong mask )
10145 stmt(
10146 IRStmt_Exit(
10147 binop(Iop_CmpNE64,
10148 binop(Iop_And64,mkexpr(effective_addr),mkU64(mask)),
10149 mkU64(0)),
10150 vbi->guest_amd64_sigbus_on_misalign ? Ijk_SigBUS : Ijk_SigSEGV,
10151 IRConst_U64(guest_RIP_curr_instr),
10152 OFFB_RIP
10157 static void gen_SIGNAL_if_not_16_aligned ( const VexAbiInfo* vbi,
10158 IRTemp effective_addr ) {
10159 gen_SIGNAL_if_not_XX_aligned(vbi, effective_addr, 16-1);
10162 static void gen_SIGNAL_if_not_32_aligned ( const VexAbiInfo* vbi,
10163 IRTemp effective_addr ) {
10164 gen_SIGNAL_if_not_XX_aligned(vbi, effective_addr, 32-1);
10167 static void gen_SIGNAL_if_not_64_aligned ( const VexAbiInfo* vbi,
10168 IRTemp effective_addr ) {
10169 gen_SIGNAL_if_not_XX_aligned(vbi, effective_addr, 64-1);
10173 /* Helper for deciding whether a given insn (starting at the opcode
10174 byte) may validly be used with a LOCK prefix. The following insns
10175 may be used with LOCK when their destination operand is in memory.
10176 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
10178 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
10179 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
10180 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
10181 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
10182 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
10183 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
10184 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
10186 DEC FE /1, FF /1
10187 INC FE /0, FF /0
10189 NEG F6 /3, F7 /3
10190 NOT F6 /2, F7 /2
10192 XCHG 86, 87
10194 BTC 0F BB, 0F BA /7
10195 BTR 0F B3, 0F BA /6
10196 BTS 0F AB, 0F BA /5
10198 CMPXCHG 0F B0, 0F B1
10199 CMPXCHG8B 0F C7 /1
10201 XADD 0F C0, 0F C1
10203 ------------------------------
10205 80 /0 = addb $imm8, rm8
10206 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
10207 82 /0 = addb $imm8, rm8
10208 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
10210 00 = addb r8, rm8
10211 01 = addl r32, rm32 and addw r16, rm16
10213 Same for ADD OR ADC SBB AND SUB XOR
10215 FE /1 = dec rm8
10216 FF /1 = dec rm32 and dec rm16
10218 FE /0 = inc rm8
10219 FF /0 = inc rm32 and inc rm16
10221 F6 /3 = neg rm8
10222 F7 /3 = neg rm32 and neg rm16
10224 F6 /2 = not rm8
10225 F7 /2 = not rm32 and not rm16
10227 0F BB = btcw r16, rm16 and btcl r32, rm32
10228 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
10230 Same for BTS, BTR
10232 static Bool can_be_used_with_LOCK_prefix ( const UChar* opc )
10234 switch (opc[0]) {
10235 case 0x00: case 0x01: case 0x08: case 0x09:
10236 case 0x10: case 0x11: case 0x18: case 0x19:
10237 case 0x20: case 0x21: case 0x28: case 0x29:
10238 case 0x30: case 0x31:
10239 if (!epartIsReg(opc[1]))
10240 return True;
10241 break;
10243 case 0x80: case 0x81: case 0x82: case 0x83:
10244 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6
10245 && !epartIsReg(opc[1]))
10246 return True;
10247 break;
10249 case 0xFE: case 0xFF:
10250 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1
10251 && !epartIsReg(opc[1]))
10252 return True;
10253 break;
10255 case 0xF6: case 0xF7:
10256 if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3
10257 && !epartIsReg(opc[1]))
10258 return True;
10259 break;
10261 case 0x86: case 0x87:
10262 if (!epartIsReg(opc[1]))
10263 return True;
10264 break;
10266 case 0x0F: {
10267 switch (opc[1]) {
10268 case 0xBB: case 0xB3: case 0xAB:
10269 if (!epartIsReg(opc[2]))
10270 return True;
10271 break;
10272 case 0xBA:
10273 if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7
10274 && !epartIsReg(opc[2]))
10275 return True;
10276 break;
10277 case 0xB0: case 0xB1:
10278 if (!epartIsReg(opc[2]))
10279 return True;
10280 break;
10281 case 0xC7:
10282 if (gregLO3ofRM(opc[2]) == 1 && !epartIsReg(opc[2]) )
10283 return True;
10284 break;
10285 case 0xC0: case 0xC1:
10286 if (!epartIsReg(opc[2]))
10287 return True;
10288 break;
10289 default:
10290 break;
10291 } /* switch (opc[1]) */
10292 break;
10295 default:
10296 break;
10297 } /* switch (opc[0]) */
10299 return False;
10303 /*------------------------------------------------------------*/
10304 /*--- ---*/
10305 /*--- Top-level SSE/SSE2: dis_ESC_0F__SSE2 ---*/
10306 /*--- ---*/
10307 /*------------------------------------------------------------*/
10309 static Long dis_COMISD ( const VexAbiInfo* vbi, Prefix pfx,
10310 Long delta, Bool isAvx, UChar opc )
10312 vassert(opc == 0x2F/*COMISD*/ || opc == 0x2E/*UCOMISD*/);
10313 Int alen = 0;
10314 HChar dis_buf[50];
10315 IRTemp argL = newTemp(Ity_F64);
10316 IRTemp argR = newTemp(Ity_F64);
10317 UChar modrm = getUChar(delta);
10318 IRTemp addr = IRTemp_INVALID;
10319 if (epartIsReg(modrm)) {
10320 assign( argR, getXMMRegLane64F( eregOfRexRM(pfx,modrm),
10321 0/*lowest lane*/ ) );
10322 delta += 1;
10323 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "",
10324 opc==0x2E ? "u" : "",
10325 nameXMMReg(eregOfRexRM(pfx,modrm)),
10326 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10327 } else {
10328 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10329 assign( argR, loadLE(Ity_F64, mkexpr(addr)) );
10330 delta += alen;
10331 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "",
10332 opc==0x2E ? "u" : "",
10333 dis_buf,
10334 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10336 assign( argL, getXMMRegLane64F( gregOfRexRM(pfx,modrm),
10337 0/*lowest lane*/ ) );
10339 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
10340 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
10341 stmt( IRStmt_Put(
10342 OFFB_CC_DEP1,
10343 binop( Iop_And64,
10344 unop( Iop_32Uto64,
10345 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)) ),
10346 mkU64(0x45)
10347 )));
10348 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
10349 return delta;
10353 static Long dis_COMISS ( const VexAbiInfo* vbi, Prefix pfx,
10354 Long delta, Bool isAvx, UChar opc )
10356 vassert(opc == 0x2F/*COMISS*/ || opc == 0x2E/*UCOMISS*/);
10357 Int alen = 0;
10358 HChar dis_buf[50];
10359 IRTemp argL = newTemp(Ity_F32);
10360 IRTemp argR = newTemp(Ity_F32);
10361 UChar modrm = getUChar(delta);
10362 IRTemp addr = IRTemp_INVALID;
10363 if (epartIsReg(modrm)) {
10364 assign( argR, getXMMRegLane32F( eregOfRexRM(pfx,modrm),
10365 0/*lowest lane*/ ) );
10366 delta += 1;
10367 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "",
10368 opc==0x2E ? "u" : "",
10369 nameXMMReg(eregOfRexRM(pfx,modrm)),
10370 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10371 } else {
10372 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10373 assign( argR, loadLE(Ity_F32, mkexpr(addr)) );
10374 delta += alen;
10375 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "",
10376 opc==0x2E ? "u" : "",
10377 dis_buf,
10378 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10380 assign( argL, getXMMRegLane32F( gregOfRexRM(pfx,modrm),
10381 0/*lowest lane*/ ) );
10383 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
10384 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
10385 stmt( IRStmt_Put(
10386 OFFB_CC_DEP1,
10387 binop( Iop_And64,
10388 unop( Iop_32Uto64,
10389 binop(Iop_CmpF64,
10390 unop(Iop_F32toF64,mkexpr(argL)),
10391 unop(Iop_F32toF64,mkexpr(argR)))),
10392 mkU64(0x45)
10393 )));
10394 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
10395 return delta;
10399 static Long dis_PSHUFD_32x4 ( const VexAbiInfo* vbi, Prefix pfx,
10400 Long delta, Bool writesYmm )
10402 Int order;
10403 Int alen = 0;
10404 HChar dis_buf[50];
10405 IRTemp sV = newTemp(Ity_V128);
10406 UChar modrm = getUChar(delta);
10407 const HChar* strV = writesYmm ? "v" : "";
10408 IRTemp addr = IRTemp_INVALID;
10409 if (epartIsReg(modrm)) {
10410 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
10411 order = (Int)getUChar(delta+1);
10412 delta += 1+1;
10413 DIP("%spshufd $%d,%s,%s\n", strV, order,
10414 nameXMMReg(eregOfRexRM(pfx,modrm)),
10415 nameXMMReg(gregOfRexRM(pfx,modrm)));
10416 } else {
10417 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
10418 1/*byte after the amode*/ );
10419 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10420 order = (Int)getUChar(delta+alen);
10421 delta += alen+1;
10422 DIP("%spshufd $%d,%s,%s\n", strV, order,
10423 dis_buf,
10424 nameXMMReg(gregOfRexRM(pfx,modrm)));
10427 IRTemp s3, s2, s1, s0;
10428 s3 = s2 = s1 = s0 = IRTemp_INVALID;
10429 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
10431 # define SEL(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
10432 IRTemp dV = newTemp(Ity_V128);
10433 assign(dV,
10434 mkV128from32s( SEL((order>>6)&3), SEL((order>>4)&3),
10435 SEL((order>>2)&3), SEL((order>>0)&3) )
10437 # undef SEL
10439 (writesYmm ? putYMMRegLoAndZU : putXMMReg)
10440 (gregOfRexRM(pfx,modrm), mkexpr(dV));
10441 return delta;
10445 static Long dis_PSHUFD_32x8 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
10447 Int order;
10448 Int alen = 0;
10449 HChar dis_buf[50];
10450 IRTemp sV = newTemp(Ity_V256);
10451 UChar modrm = getUChar(delta);
10452 IRTemp addr = IRTemp_INVALID;
10453 UInt rG = gregOfRexRM(pfx,modrm);
10454 if (epartIsReg(modrm)) {
10455 UInt rE = eregOfRexRM(pfx,modrm);
10456 assign( sV, getYMMReg(rE) );
10457 order = (Int)getUChar(delta+1);
10458 delta += 1+1;
10459 DIP("vpshufd $%d,%s,%s\n", order, nameYMMReg(rE), nameYMMReg(rG));
10460 } else {
10461 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
10462 1/*byte after the amode*/ );
10463 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
10464 order = (Int)getUChar(delta+alen);
10465 delta += alen+1;
10466 DIP("vpshufd $%d,%s,%s\n", order, dis_buf, nameYMMReg(rG));
10469 IRTemp s[8];
10470 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID;
10471 breakupV256to32s( sV, &s[7], &s[6], &s[5], &s[4],
10472 &s[3], &s[2], &s[1], &s[0] );
10474 putYMMReg( rG, mkV256from32s( s[4 + ((order>>6)&3)],
10475 s[4 + ((order>>4)&3)],
10476 s[4 + ((order>>2)&3)],
10477 s[4 + ((order>>0)&3)],
10478 s[0 + ((order>>6)&3)],
10479 s[0 + ((order>>4)&3)],
10480 s[0 + ((order>>2)&3)],
10481 s[0 + ((order>>0)&3)] ) );
10482 return delta;
10486 static IRTemp math_PSRLDQ ( IRTemp sV, Int imm )
10488 IRTemp dV = newTemp(Ity_V128);
10489 IRTemp hi64 = newTemp(Ity_I64);
10490 IRTemp lo64 = newTemp(Ity_I64);
10491 IRTemp hi64r = newTemp(Ity_I64);
10492 IRTemp lo64r = newTemp(Ity_I64);
10494 vassert(imm >= 0 && imm <= 255);
10495 if (imm >= 16) {
10496 assign(dV, mkV128(0x0000));
10497 return dV;
10500 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
10501 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
10503 if (imm == 0) {
10504 assign( lo64r, mkexpr(lo64) );
10505 assign( hi64r, mkexpr(hi64) );
10507 else
10508 if (imm == 8) {
10509 assign( hi64r, mkU64(0) );
10510 assign( lo64r, mkexpr(hi64) );
10512 else
10513 if (imm > 8) {
10514 assign( hi64r, mkU64(0) );
10515 assign( lo64r, binop( Iop_Shr64, mkexpr(hi64), mkU8( 8*(imm-8) ) ));
10516 } else {
10517 assign( hi64r, binop( Iop_Shr64, mkexpr(hi64), mkU8(8 * imm) ));
10518 assign( lo64r,
10519 binop( Iop_Or64,
10520 binop(Iop_Shr64, mkexpr(lo64),
10521 mkU8(8 * imm)),
10522 binop(Iop_Shl64, mkexpr(hi64),
10523 mkU8(8 * (8 - imm)) )
10528 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
10529 return dV;
10533 static IRTemp math_PSLLDQ ( IRTemp sV, Int imm )
10535 IRTemp dV = newTemp(Ity_V128);
10536 IRTemp hi64 = newTemp(Ity_I64);
10537 IRTemp lo64 = newTemp(Ity_I64);
10538 IRTemp hi64r = newTemp(Ity_I64);
10539 IRTemp lo64r = newTemp(Ity_I64);
10541 vassert(imm >= 0 && imm <= 255);
10542 if (imm >= 16) {
10543 assign(dV, mkV128(0x0000));
10544 return dV;
10547 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
10548 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
10550 if (imm == 0) {
10551 assign( lo64r, mkexpr(lo64) );
10552 assign( hi64r, mkexpr(hi64) );
10554 else
10555 if (imm == 8) {
10556 assign( lo64r, mkU64(0) );
10557 assign( hi64r, mkexpr(lo64) );
10559 else
10560 if (imm > 8) {
10561 assign( lo64r, mkU64(0) );
10562 assign( hi64r, binop( Iop_Shl64, mkexpr(lo64), mkU8( 8*(imm-8) ) ));
10563 } else {
10564 assign( lo64r, binop( Iop_Shl64, mkexpr(lo64), mkU8(8 * imm) ));
10565 assign( hi64r,
10566 binop( Iop_Or64,
10567 binop(Iop_Shl64, mkexpr(hi64),
10568 mkU8(8 * imm)),
10569 binop(Iop_Shr64, mkexpr(lo64),
10570 mkU8(8 * (8 - imm)) )
10575 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
10576 return dV;
10580 static Long dis_CVTxSD2SI ( const VexAbiInfo* vbi, Prefix pfx,
10581 Long delta, Bool isAvx, UChar opc, Int sz )
10583 vassert(opc == 0x2D/*CVTSD2SI*/ || opc == 0x2C/*CVTTSD2SI*/);
10584 HChar dis_buf[50];
10585 Int alen = 0;
10586 UChar modrm = getUChar(delta);
10587 IRTemp addr = IRTemp_INVALID;
10588 IRTemp rmode = newTemp(Ity_I32);
10589 IRTemp f64lo = newTemp(Ity_F64);
10590 Bool r2zero = toBool(opc == 0x2C);
10592 if (epartIsReg(modrm)) {
10593 delta += 1;
10594 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
10595 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10596 nameXMMReg(eregOfRexRM(pfx,modrm)),
10597 nameIReg(sz, gregOfRexRM(pfx,modrm),
10598 False));
10599 } else {
10600 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10601 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
10602 delta += alen;
10603 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10604 dis_buf,
10605 nameIReg(sz, gregOfRexRM(pfx,modrm),
10606 False));
10609 if (r2zero) {
10610 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10611 } else {
10612 assign( rmode, get_sse_roundingmode() );
10615 if (sz == 4) {
10616 putIReg32( gregOfRexRM(pfx,modrm),
10617 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) );
10618 } else {
10619 vassert(sz == 8);
10620 putIReg64( gregOfRexRM(pfx,modrm),
10621 binop( Iop_F64toI64S, mkexpr(rmode), mkexpr(f64lo)) );
10624 return delta;
10628 static Long dis_CVTxSS2SI ( const VexAbiInfo* vbi, Prefix pfx,
10629 Long delta, Bool isAvx, UChar opc, Int sz )
10631 vassert(opc == 0x2D/*CVTSS2SI*/ || opc == 0x2C/*CVTTSS2SI*/);
10632 HChar dis_buf[50];
10633 Int alen = 0;
10634 UChar modrm = getUChar(delta);
10635 IRTemp addr = IRTemp_INVALID;
10636 IRTemp rmode = newTemp(Ity_I32);
10637 IRTemp f32lo = newTemp(Ity_F32);
10638 Bool r2zero = toBool(opc == 0x2C);
10640 if (epartIsReg(modrm)) {
10641 delta += 1;
10642 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
10643 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10644 nameXMMReg(eregOfRexRM(pfx,modrm)),
10645 nameIReg(sz, gregOfRexRM(pfx,modrm),
10646 False));
10647 } else {
10648 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10649 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
10650 delta += alen;
10651 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10652 dis_buf,
10653 nameIReg(sz, gregOfRexRM(pfx,modrm),
10654 False));
10657 if (r2zero) {
10658 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10659 } else {
10660 assign( rmode, get_sse_roundingmode() );
10663 if (sz == 4) {
10664 putIReg32( gregOfRexRM(pfx,modrm),
10665 binop( Iop_F64toI32S,
10666 mkexpr(rmode),
10667 unop(Iop_F32toF64, mkexpr(f32lo))) );
10668 } else {
10669 vassert(sz == 8);
10670 putIReg64( gregOfRexRM(pfx,modrm),
10671 binop( Iop_F64toI64S,
10672 mkexpr(rmode),
10673 unop(Iop_F32toF64, mkexpr(f32lo))) );
10676 return delta;
10680 static Long dis_CVTPS2PD_128 ( const VexAbiInfo* vbi, Prefix pfx,
10681 Long delta, Bool isAvx )
10683 IRTemp addr = IRTemp_INVALID;
10684 Int alen = 0;
10685 HChar dis_buf[50];
10686 IRTemp f32lo = newTemp(Ity_F32);
10687 IRTemp f32hi = newTemp(Ity_F32);
10688 UChar modrm = getUChar(delta);
10689 UInt rG = gregOfRexRM(pfx,modrm);
10690 if (epartIsReg(modrm)) {
10691 UInt rE = eregOfRexRM(pfx,modrm);
10692 assign( f32lo, getXMMRegLane32F(rE, 0) );
10693 assign( f32hi, getXMMRegLane32F(rE, 1) );
10694 delta += 1;
10695 DIP("%scvtps2pd %s,%s\n",
10696 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
10697 } else {
10698 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10699 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) );
10700 assign( f32hi, loadLE(Ity_F32,
10701 binop(Iop_Add64,mkexpr(addr),mkU64(4))) );
10702 delta += alen;
10703 DIP("%scvtps2pd %s,%s\n",
10704 isAvx ? "v" : "", dis_buf, nameXMMReg(rG));
10707 putXMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32hi)) );
10708 putXMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32lo)) );
10709 if (isAvx)
10710 putYMMRegLane128( rG, 1, mkV128(0));
10711 return delta;
10715 static Long dis_CVTPS2PD_256 ( const VexAbiInfo* vbi, Prefix pfx,
10716 Long delta )
10718 IRTemp addr = IRTemp_INVALID;
10719 Int alen = 0;
10720 HChar dis_buf[50];
10721 IRTemp f32_0 = newTemp(Ity_F32);
10722 IRTemp f32_1 = newTemp(Ity_F32);
10723 IRTemp f32_2 = newTemp(Ity_F32);
10724 IRTemp f32_3 = newTemp(Ity_F32);
10725 UChar modrm = getUChar(delta);
10726 UInt rG = gregOfRexRM(pfx,modrm);
10727 if (epartIsReg(modrm)) {
10728 UInt rE = eregOfRexRM(pfx,modrm);
10729 assign( f32_0, getXMMRegLane32F(rE, 0) );
10730 assign( f32_1, getXMMRegLane32F(rE, 1) );
10731 assign( f32_2, getXMMRegLane32F(rE, 2) );
10732 assign( f32_3, getXMMRegLane32F(rE, 3) );
10733 delta += 1;
10734 DIP("vcvtps2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
10735 } else {
10736 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10737 assign( f32_0, loadLE(Ity_F32, mkexpr(addr)) );
10738 assign( f32_1, loadLE(Ity_F32,
10739 binop(Iop_Add64,mkexpr(addr),mkU64(4))) );
10740 assign( f32_2, loadLE(Ity_F32,
10741 binop(Iop_Add64,mkexpr(addr),mkU64(8))) );
10742 assign( f32_3, loadLE(Ity_F32,
10743 binop(Iop_Add64,mkexpr(addr),mkU64(12))) );
10744 delta += alen;
10745 DIP("vcvtps2pd %s,%s\n", dis_buf, nameYMMReg(rG));
10748 putYMMRegLane64F( rG, 3, unop(Iop_F32toF64, mkexpr(f32_3)) );
10749 putYMMRegLane64F( rG, 2, unop(Iop_F32toF64, mkexpr(f32_2)) );
10750 putYMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32_1)) );
10751 putYMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32_0)) );
10752 return delta;
10756 static Long dis_CVTPD2PS_128 ( const VexAbiInfo* vbi, Prefix pfx,
10757 Long delta, Bool isAvx )
10759 IRTemp addr = IRTemp_INVALID;
10760 Int alen = 0;
10761 HChar dis_buf[50];
10762 UChar modrm = getUChar(delta);
10763 UInt rG = gregOfRexRM(pfx,modrm);
10764 IRTemp argV = newTemp(Ity_V128);
10765 IRTemp rmode = newTemp(Ity_I32);
10766 if (epartIsReg(modrm)) {
10767 UInt rE = eregOfRexRM(pfx,modrm);
10768 assign( argV, getXMMReg(rE) );
10769 delta += 1;
10770 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "",
10771 nameXMMReg(rE), nameXMMReg(rG));
10772 } else {
10773 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10774 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10775 delta += alen;
10776 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "",
10777 dis_buf, nameXMMReg(rG) );
10780 assign( rmode, get_sse_roundingmode() );
10781 IRTemp t0 = newTemp(Ity_F64);
10782 IRTemp t1 = newTemp(Ity_F64);
10783 assign( t0, unop(Iop_ReinterpI64asF64,
10784 unop(Iop_V128to64, mkexpr(argV))) );
10785 assign( t1, unop(Iop_ReinterpI64asF64,
10786 unop(Iop_V128HIto64, mkexpr(argV))) );
10788 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), mkexpr(_t) )
10789 putXMMRegLane32( rG, 3, mkU32(0) );
10790 putXMMRegLane32( rG, 2, mkU32(0) );
10791 putXMMRegLane32F( rG, 1, CVT(t1) );
10792 putXMMRegLane32F( rG, 0, CVT(t0) );
10793 # undef CVT
10794 if (isAvx)
10795 putYMMRegLane128( rG, 1, mkV128(0) );
10797 return delta;
10801 static Long dis_CVTxPS2DQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
10802 Long delta, Bool isAvx, Bool r2zero )
10804 IRTemp addr = IRTemp_INVALID;
10805 Int alen = 0;
10806 HChar dis_buf[50];
10807 UChar modrm = getUChar(delta);
10808 IRTemp argV = newTemp(Ity_V128);
10809 IRTemp rmode = newTemp(Ity_I32);
10810 UInt rG = gregOfRexRM(pfx,modrm);
10812 if (epartIsReg(modrm)) {
10813 UInt rE = eregOfRexRM(pfx,modrm);
10814 assign( argV, getXMMReg(rE) );
10815 delta += 1;
10816 DIP("%scvt%sps2dq %s,%s\n",
10817 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG));
10818 } else {
10819 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10820 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10821 delta += alen;
10822 DIP("%scvt%sps2dq %s,%s\n",
10823 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
10826 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO)
10827 : get_sse_roundingmode() );
10828 putXMMReg( rG, binop(Iop_F32toI32Sx4, mkexpr(rmode), mkexpr(argV)) );
10829 if (isAvx)
10830 putYMMRegLane128( rG, 1, mkV128(0) );
10832 return delta;
10836 static Long dis_CVTxPS2DQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
10837 Long delta, Bool r2zero )
10839 IRTemp addr = IRTemp_INVALID;
10840 Int alen = 0;
10841 HChar dis_buf[50];
10842 UChar modrm = getUChar(delta);
10843 IRTemp argV = newTemp(Ity_V256);
10844 IRTemp rmode = newTemp(Ity_I32);
10845 UInt rG = gregOfRexRM(pfx,modrm);
10847 if (epartIsReg(modrm)) {
10848 UInt rE = eregOfRexRM(pfx,modrm);
10849 assign( argV, getYMMReg(rE) );
10850 delta += 1;
10851 DIP("vcvt%sps2dq %s,%s\n",
10852 r2zero ? "t" : "", nameYMMReg(rE), nameYMMReg(rG));
10853 } else {
10854 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10855 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
10856 delta += alen;
10857 DIP("vcvt%sps2dq %s,%s\n",
10858 r2zero ? "t" : "", dis_buf, nameYMMReg(rG) );
10861 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO)
10862 : get_sse_roundingmode() );
10863 putYMMReg( rG, binop(Iop_F32toI32Sx8, mkexpr(rmode), mkexpr(argV)) );
10864 return delta;
10868 static Long dis_CVTxPD2DQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
10869 Long delta, Bool isAvx, Bool r2zero )
10871 IRTemp addr = IRTemp_INVALID;
10872 Int alen = 0;
10873 HChar dis_buf[50];
10874 UChar modrm = getUChar(delta);
10875 IRTemp argV = newTemp(Ity_V128);
10876 IRTemp rmode = newTemp(Ity_I32);
10877 UInt rG = gregOfRexRM(pfx,modrm);
10878 IRTemp t0, t1;
10880 if (epartIsReg(modrm)) {
10881 UInt rE = eregOfRexRM(pfx,modrm);
10882 assign( argV, getXMMReg(rE) );
10883 delta += 1;
10884 DIP("%scvt%spd2dq %s,%s\n",
10885 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG));
10886 } else {
10887 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10888 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10889 delta += alen;
10890 DIP("%scvt%spd2dqx %s,%s\n",
10891 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
10894 if (r2zero) {
10895 assign(rmode, mkU32((UInt)Irrm_ZERO) );
10896 } else {
10897 assign( rmode, get_sse_roundingmode() );
10900 t0 = newTemp(Ity_F64);
10901 t1 = newTemp(Ity_F64);
10902 assign( t0, unop(Iop_ReinterpI64asF64,
10903 unop(Iop_V128to64, mkexpr(argV))) );
10904 assign( t1, unop(Iop_ReinterpI64asF64,
10905 unop(Iop_V128HIto64, mkexpr(argV))) );
10907 # define CVT(_t) binop( Iop_F64toI32S, \
10908 mkexpr(rmode), \
10909 mkexpr(_t) )
10911 putXMMRegLane32( rG, 3, mkU32(0) );
10912 putXMMRegLane32( rG, 2, mkU32(0) );
10913 putXMMRegLane32( rG, 1, CVT(t1) );
10914 putXMMRegLane32( rG, 0, CVT(t0) );
10915 # undef CVT
10916 if (isAvx)
10917 putYMMRegLane128( rG, 1, mkV128(0) );
10919 return delta;
10923 static Long dis_CVTxPD2DQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
10924 Long delta, Bool r2zero )
10926 IRTemp addr = IRTemp_INVALID;
10927 Int alen = 0;
10928 HChar dis_buf[50];
10929 UChar modrm = getUChar(delta);
10930 IRTemp argV = newTemp(Ity_V256);
10931 IRTemp rmode = newTemp(Ity_I32);
10932 UInt rG = gregOfRexRM(pfx,modrm);
10933 IRTemp t0, t1, t2, t3;
10935 if (epartIsReg(modrm)) {
10936 UInt rE = eregOfRexRM(pfx,modrm);
10937 assign( argV, getYMMReg(rE) );
10938 delta += 1;
10939 DIP("vcvt%spd2dq %s,%s\n",
10940 r2zero ? "t" : "", nameYMMReg(rE), nameXMMReg(rG));
10941 } else {
10942 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10943 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
10944 delta += alen;
10945 DIP("vcvt%spd2dqy %s,%s\n",
10946 r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
10949 if (r2zero) {
10950 assign(rmode, mkU32((UInt)Irrm_ZERO) );
10951 } else {
10952 assign( rmode, get_sse_roundingmode() );
10955 t0 = IRTemp_INVALID;
10956 t1 = IRTemp_INVALID;
10957 t2 = IRTemp_INVALID;
10958 t3 = IRTemp_INVALID;
10959 breakupV256to64s( argV, &t3, &t2, &t1, &t0 );
10961 # define CVT(_t) binop( Iop_F64toI32S, \
10962 mkexpr(rmode), \
10963 unop( Iop_ReinterpI64asF64, \
10964 mkexpr(_t) ) )
10966 putXMMRegLane32( rG, 3, CVT(t3) );
10967 putXMMRegLane32( rG, 2, CVT(t2) );
10968 putXMMRegLane32( rG, 1, CVT(t1) );
10969 putXMMRegLane32( rG, 0, CVT(t0) );
10970 # undef CVT
10971 putYMMRegLane128( rG, 1, mkV128(0) );
10973 return delta;
10977 static Long dis_CVTDQ2PS_128 ( const VexAbiInfo* vbi, Prefix pfx,
10978 Long delta, Bool isAvx )
10980 IRTemp addr = IRTemp_INVALID;
10981 Int alen = 0;
10982 HChar dis_buf[50];
10983 UChar modrm = getUChar(delta);
10984 IRTemp argV = newTemp(Ity_V128);
10985 IRTemp rmode = newTemp(Ity_I32);
10986 UInt rG = gregOfRexRM(pfx,modrm);
10988 if (epartIsReg(modrm)) {
10989 UInt rE = eregOfRexRM(pfx,modrm);
10990 assign( argV, getXMMReg(rE) );
10991 delta += 1;
10992 DIP("%scvtdq2ps %s,%s\n",
10993 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
10994 } else {
10995 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10996 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10997 delta += alen;
10998 DIP("%scvtdq2ps %s,%s\n",
10999 isAvx ? "v" : "", dis_buf, nameXMMReg(rG) );
11002 assign( rmode, get_sse_roundingmode() );
11003 putXMMReg(rG, binop(Iop_I32StoF32x4, mkexpr(rmode), mkexpr(argV)));
11005 if (isAvx)
11006 putYMMRegLane128( rG, 1, mkV128(0) );
11008 return delta;
11011 static Long dis_CVTDQ2PS_256 ( const VexAbiInfo* vbi, Prefix pfx,
11012 Long delta )
11014 IRTemp addr = IRTemp_INVALID;
11015 Int alen = 0;
11016 HChar dis_buf[50];
11017 UChar modrm = getUChar(delta);
11018 IRTemp argV = newTemp(Ity_V256);
11019 IRTemp rmode = newTemp(Ity_I32);
11020 UInt rG = gregOfRexRM(pfx,modrm);
11022 if (epartIsReg(modrm)) {
11023 UInt rE = eregOfRexRM(pfx,modrm);
11024 assign( argV, getYMMReg(rE) );
11025 delta += 1;
11026 DIP("vcvtdq2ps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
11027 } else {
11028 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11029 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
11030 delta += alen;
11031 DIP("vcvtdq2ps %s,%s\n", dis_buf, nameYMMReg(rG) );
11034 assign( rmode, get_sse_roundingmode() );
11035 putYMMReg(rG, binop(Iop_I32StoF32x8, mkexpr(rmode), mkexpr(argV)));
11037 return delta;
11041 static Long dis_PMOVMSKB_128 ( const VexAbiInfo* vbi, Prefix pfx,
11042 Long delta, Bool isAvx )
11044 UChar modrm = getUChar(delta);
11045 vassert(epartIsReg(modrm)); /* ensured by caller */
11046 UInt rE = eregOfRexRM(pfx,modrm);
11047 UInt rG = gregOfRexRM(pfx,modrm);
11048 IRTemp t0 = newTemp(Ity_V128);
11049 IRTemp t1 = newTemp(Ity_I32);
11050 assign(t0, getXMMReg(rE));
11051 assign(t1, unop(Iop_16Uto32, unop(Iop_GetMSBs8x16, mkexpr(t0))));
11052 putIReg32(rG, mkexpr(t1));
11053 DIP("%spmovmskb %s,%s\n", isAvx ? "v" : "", nameXMMReg(rE),
11054 nameIReg32(rG));
11055 delta += 1;
11056 return delta;
11060 static Long dis_PMOVMSKB_256 ( const VexAbiInfo* vbi, Prefix pfx,
11061 Long delta )
11063 UChar modrm = getUChar(delta);
11064 vassert(epartIsReg(modrm)); /* ensured by caller */
11065 UInt rE = eregOfRexRM(pfx,modrm);
11066 UInt rG = gregOfRexRM(pfx,modrm);
11067 IRTemp t0 = newTemp(Ity_V128);
11068 IRTemp t1 = newTemp(Ity_V128);
11069 IRTemp t2 = newTemp(Ity_I16);
11070 IRTemp t3 = newTemp(Ity_I16);
11071 assign(t0, getYMMRegLane128(rE, 0));
11072 assign(t1, getYMMRegLane128(rE, 1));
11073 assign(t2, unop(Iop_GetMSBs8x16, mkexpr(t0)));
11074 assign(t3, unop(Iop_GetMSBs8x16, mkexpr(t1)));
11075 putIReg32(rG, binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)));
11076 DIP("vpmovmskb %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
11077 delta += 1;
11078 return delta;
11082 /* FIXME: why not just use InterleaveLO / InterleaveHI? I think the
11083 relevant ops are "xIsH ? InterleaveHI32x4 : InterleaveLO32x4". */
11084 /* Does the maths for 128 bit versions of UNPCKLPS and UNPCKHPS */
11085 static IRTemp math_UNPCKxPS_128 ( IRTemp sV, IRTemp dV, Bool xIsH )
11087 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11088 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11089 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11090 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11091 IRTemp res = newTemp(Ity_V128);
11092 assign(res, xIsH ? mkV128from32s( s3, d3, s2, d2 )
11093 : mkV128from32s( s1, d1, s0, d0 ));
11094 return res;
11098 /* FIXME: why not just use InterleaveLO / InterleaveHI ?? */
11099 /* Does the maths for 128 bit versions of UNPCKLPD and UNPCKHPD */
11100 static IRTemp math_UNPCKxPD_128 ( IRTemp sV, IRTemp dV, Bool xIsH )
11102 IRTemp s1 = newTemp(Ity_I64);
11103 IRTemp s0 = newTemp(Ity_I64);
11104 IRTemp d1 = newTemp(Ity_I64);
11105 IRTemp d0 = newTemp(Ity_I64);
11106 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
11107 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
11108 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
11109 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
11110 IRTemp res = newTemp(Ity_V128);
11111 assign(res, xIsH ? binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1))
11112 : binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)));
11113 return res;
11117 /* Does the maths for 256 bit versions of UNPCKLPD and UNPCKHPD.
11118 Doesn't seem like this fits in either of the Iop_Interleave{LO,HI}
11119 or the Iop_Cat{Odd,Even}Lanes idioms, hence just do it the stupid
11120 way. */
11121 static IRTemp math_UNPCKxPD_256 ( IRTemp sV, IRTemp dV, Bool xIsH )
11123 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11124 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11125 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
11126 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
11127 IRTemp res = newTemp(Ity_V256);
11128 assign(res, xIsH
11129 ? IRExpr_Qop(Iop_64x4toV256, mkexpr(s3), mkexpr(d3),
11130 mkexpr(s1), mkexpr(d1))
11131 : IRExpr_Qop(Iop_64x4toV256, mkexpr(s2), mkexpr(d2),
11132 mkexpr(s0), mkexpr(d0)));
11133 return res;
11137 /* FIXME: this is really bad. Surely can do something better here?
11138 One observation is that the steering in the upper and lower 128 bit
11139 halves is the same as with math_UNPCKxPS_128, so we simply split
11140 into two halves, and use that. Consequently any improvement in
11141 math_UNPCKxPS_128 (probably, to use interleave-style primops)
11142 benefits this too. */
11143 static IRTemp math_UNPCKxPS_256 ( IRTemp sV, IRTemp dV, Bool xIsH )
11145 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11146 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11147 breakupV256toV128s( sV, &sVhi, &sVlo );
11148 breakupV256toV128s( dV, &dVhi, &dVlo );
11149 IRTemp rVhi = math_UNPCKxPS_128(sVhi, dVhi, xIsH);
11150 IRTemp rVlo = math_UNPCKxPS_128(sVlo, dVlo, xIsH);
11151 IRTemp rV = newTemp(Ity_V256);
11152 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11153 return rV;
11157 static IRTemp math_SHUFPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11159 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11160 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11161 vassert(imm8 < 256);
11163 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11164 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11166 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
11167 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11168 IRTemp res = newTemp(Ity_V128);
11169 assign(res,
11170 mkV128from32s( SELS((imm8>>6)&3), SELS((imm8>>4)&3),
11171 SELD((imm8>>2)&3), SELD((imm8>>0)&3) ) );
11172 # undef SELD
11173 # undef SELS
11174 return res;
11178 /* 256-bit SHUFPS appears to steer each of the 128-bit halves
11179 identically. Hence do the clueless thing and use math_SHUFPS_128
11180 twice. */
11181 static IRTemp math_SHUFPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
11183 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11184 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11185 breakupV256toV128s( sV, &sVhi, &sVlo );
11186 breakupV256toV128s( dV, &dVhi, &dVlo );
11187 IRTemp rVhi = math_SHUFPS_128(sVhi, dVhi, imm8);
11188 IRTemp rVlo = math_SHUFPS_128(sVlo, dVlo, imm8);
11189 IRTemp rV = newTemp(Ity_V256);
11190 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11191 return rV;
11195 static IRTemp math_SHUFPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11197 IRTemp s1 = newTemp(Ity_I64);
11198 IRTemp s0 = newTemp(Ity_I64);
11199 IRTemp d1 = newTemp(Ity_I64);
11200 IRTemp d0 = newTemp(Ity_I64);
11202 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
11203 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
11204 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
11205 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
11207 # define SELD(n) mkexpr((n)==0 ? d0 : d1)
11208 # define SELS(n) mkexpr((n)==0 ? s0 : s1)
11210 IRTemp res = newTemp(Ity_V128);
11211 assign(res, binop( Iop_64HLtoV128,
11212 SELS((imm8>>1)&1), SELD((imm8>>0)&1) ) );
11214 # undef SELD
11215 # undef SELS
11216 return res;
11220 static IRTemp math_SHUFPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
11222 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11223 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11224 breakupV256toV128s( sV, &sVhi, &sVlo );
11225 breakupV256toV128s( dV, &dVhi, &dVlo );
11226 IRTemp rVhi = math_SHUFPD_128(sVhi, dVhi, (imm8 >> 2) & 3);
11227 IRTemp rVlo = math_SHUFPD_128(sVlo, dVlo, imm8 & 3);
11228 IRTemp rV = newTemp(Ity_V256);
11229 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11230 return rV;
11234 static IRTemp math_BLENDPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11236 UShort imm8_mask_16;
11237 IRTemp imm8_mask = newTemp(Ity_V128);
11239 switch( imm8 & 3 ) {
11240 case 0: imm8_mask_16 = 0x0000; break;
11241 case 1: imm8_mask_16 = 0x00FF; break;
11242 case 2: imm8_mask_16 = 0xFF00; break;
11243 case 3: imm8_mask_16 = 0xFFFF; break;
11244 default: vassert(0); break;
11246 assign( imm8_mask, mkV128( imm8_mask_16 ) );
11248 IRTemp res = newTemp(Ity_V128);
11249 assign ( res, binop( Iop_OrV128,
11250 binop( Iop_AndV128, mkexpr(sV),
11251 mkexpr(imm8_mask) ),
11252 binop( Iop_AndV128, mkexpr(dV),
11253 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) );
11254 return res;
11258 static IRTemp math_BLENDPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
11260 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11261 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11262 breakupV256toV128s( sV, &sVhi, &sVlo );
11263 breakupV256toV128s( dV, &dVhi, &dVlo );
11264 IRTemp rVhi = math_BLENDPD_128(sVhi, dVhi, (imm8 >> 2) & 3);
11265 IRTemp rVlo = math_BLENDPD_128(sVlo, dVlo, imm8 & 3);
11266 IRTemp rV = newTemp(Ity_V256);
11267 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11268 return rV;
11272 static IRTemp math_BLENDPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11274 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
11275 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
11276 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
11277 0xFFFF };
11278 IRTemp imm8_mask = newTemp(Ity_V128);
11279 assign( imm8_mask, mkV128( imm8_perms[ (imm8 & 15) ] ) );
11281 IRTemp res = newTemp(Ity_V128);
11282 assign ( res, binop( Iop_OrV128,
11283 binop( Iop_AndV128, mkexpr(sV),
11284 mkexpr(imm8_mask) ),
11285 binop( Iop_AndV128, mkexpr(dV),
11286 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) );
11287 return res;
11291 static IRTemp math_BLENDPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
11293 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11294 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11295 breakupV256toV128s( sV, &sVhi, &sVlo );
11296 breakupV256toV128s( dV, &dVhi, &dVlo );
11297 IRTemp rVhi = math_BLENDPS_128(sVhi, dVhi, (imm8 >> 4) & 15);
11298 IRTemp rVlo = math_BLENDPS_128(sVlo, dVlo, imm8 & 15);
11299 IRTemp rV = newTemp(Ity_V256);
11300 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11301 return rV;
11305 static IRTemp math_PBLENDW_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11307 /* Make w be a 16-bit version of imm8, formed by duplicating each
11308 bit in imm8. */
11309 Int i;
11310 UShort imm16 = 0;
11311 for (i = 0; i < 8; i++) {
11312 if (imm8 & (1 << i))
11313 imm16 |= (3 << (2*i));
11315 IRTemp imm16_mask = newTemp(Ity_V128);
11316 assign( imm16_mask, mkV128( imm16 ));
11318 IRTemp res = newTemp(Ity_V128);
11319 assign ( res, binop( Iop_OrV128,
11320 binop( Iop_AndV128, mkexpr(sV),
11321 mkexpr(imm16_mask) ),
11322 binop( Iop_AndV128, mkexpr(dV),
11323 unop( Iop_NotV128, mkexpr(imm16_mask) ) ) ) );
11324 return res;
11328 static IRTemp math_PMULUDQ_128 ( IRTemp sV, IRTemp dV )
11330 /* This is a really poor translation -- could be improved if
11331 performance critical */
11332 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11333 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11334 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11335 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11336 IRTemp res = newTemp(Ity_V128);
11337 assign(res, binop(Iop_64HLtoV128,
11338 binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)),
11339 binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) ));
11340 return res;
11344 static IRTemp math_PMULUDQ_256 ( IRTemp sV, IRTemp dV )
11346 /* This is a really poor translation -- could be improved if
11347 performance critical */
11348 IRTemp sHi, sLo, dHi, dLo;
11349 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11350 breakupV256toV128s( dV, &dHi, &dLo);
11351 breakupV256toV128s( sV, &sHi, &sLo);
11352 IRTemp res = newTemp(Ity_V256);
11353 assign(res, binop(Iop_V128HLtoV256,
11354 mkexpr(math_PMULUDQ_128(sHi, dHi)),
11355 mkexpr(math_PMULUDQ_128(sLo, dLo))));
11356 return res;
11360 static IRTemp math_PMULDQ_128 ( IRTemp dV, IRTemp sV )
11362 /* This is a really poor translation -- could be improved if
11363 performance critical */
11364 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11365 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11366 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11367 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11368 IRTemp res = newTemp(Ity_V128);
11369 assign(res, binop(Iop_64HLtoV128,
11370 binop( Iop_MullS32, mkexpr(d2), mkexpr(s2)),
11371 binop( Iop_MullS32, mkexpr(d0), mkexpr(s0)) ));
11372 return res;
11376 static IRTemp math_PMULDQ_256 ( IRTemp sV, IRTemp dV )
11378 /* This is a really poor translation -- could be improved if
11379 performance critical */
11380 IRTemp sHi, sLo, dHi, dLo;
11381 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11382 breakupV256toV128s( dV, &dHi, &dLo);
11383 breakupV256toV128s( sV, &sHi, &sLo);
11384 IRTemp res = newTemp(Ity_V256);
11385 assign(res, binop(Iop_V128HLtoV256,
11386 mkexpr(math_PMULDQ_128(sHi, dHi)),
11387 mkexpr(math_PMULDQ_128(sLo, dLo))));
11388 return res;
11392 static IRTemp math_PMADDWD_128 ( IRTemp dV, IRTemp sV )
11394 IRTemp sVhi, sVlo, dVhi, dVlo;
11395 IRTemp resHi = newTemp(Ity_I64);
11396 IRTemp resLo = newTemp(Ity_I64);
11397 sVhi = sVlo = dVhi = dVlo = IRTemp_INVALID;
11398 breakupV128to64s( sV, &sVhi, &sVlo );
11399 breakupV128to64s( dV, &dVhi, &dVlo );
11400 assign( resHi, mkIRExprCCall(Ity_I64, 0/*regparms*/,
11401 "amd64g_calculate_mmx_pmaddwd",
11402 &amd64g_calculate_mmx_pmaddwd,
11403 mkIRExprVec_2( mkexpr(sVhi), mkexpr(dVhi))));
11404 assign( resLo, mkIRExprCCall(Ity_I64, 0/*regparms*/,
11405 "amd64g_calculate_mmx_pmaddwd",
11406 &amd64g_calculate_mmx_pmaddwd,
11407 mkIRExprVec_2( mkexpr(sVlo), mkexpr(dVlo))));
11408 IRTemp res = newTemp(Ity_V128);
11409 assign( res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo))) ;
11410 return res;
11414 static IRTemp math_PMADDWD_256 ( IRTemp dV, IRTemp sV )
11416 IRTemp sHi, sLo, dHi, dLo;
11417 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11418 breakupV256toV128s( dV, &dHi, &dLo);
11419 breakupV256toV128s( sV, &sHi, &sLo);
11420 IRTemp res = newTemp(Ity_V256);
11421 assign(res, binop(Iop_V128HLtoV256,
11422 mkexpr(math_PMADDWD_128(dHi, sHi)),
11423 mkexpr(math_PMADDWD_128(dLo, sLo))));
11424 return res;
11428 static IRTemp math_ADDSUBPD_128 ( IRTemp dV, IRTemp sV )
11430 IRTemp addV = newTemp(Ity_V128);
11431 IRTemp subV = newTemp(Ity_V128);
11432 IRTemp a1 = newTemp(Ity_I64);
11433 IRTemp s0 = newTemp(Ity_I64);
11434 IRTemp rm = newTemp(Ity_I32);
11436 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11437 assign( addV, triop(Iop_Add64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11438 assign( subV, triop(Iop_Sub64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11440 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
11441 assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
11443 IRTemp res = newTemp(Ity_V128);
11444 assign( res, binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) );
11445 return res;
11449 static IRTemp math_ADDSUBPD_256 ( IRTemp dV, IRTemp sV )
11451 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
11452 IRTemp addV = newTemp(Ity_V256);
11453 IRTemp subV = newTemp(Ity_V256);
11454 IRTemp rm = newTemp(Ity_I32);
11455 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11457 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11458 assign( addV, triop(Iop_Add64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11459 assign( subV, triop(Iop_Sub64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11461 breakupV256to64s( addV, &a3, &a2, &a1, &a0 );
11462 breakupV256to64s( subV, &s3, &s2, &s1, &s0 );
11464 IRTemp res = newTemp(Ity_V256);
11465 assign( res, mkV256from64s( a3, s2, a1, s0 ) );
11466 return res;
11470 static IRTemp math_ADDSUBPS_128 ( IRTemp dV, IRTemp sV )
11472 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
11473 IRTemp addV = newTemp(Ity_V128);
11474 IRTemp subV = newTemp(Ity_V128);
11475 IRTemp rm = newTemp(Ity_I32);
11476 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11478 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11479 assign( addV, triop(Iop_Add32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11480 assign( subV, triop(Iop_Sub32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11482 breakupV128to32s( addV, &a3, &a2, &a1, &a0 );
11483 breakupV128to32s( subV, &s3, &s2, &s1, &s0 );
11485 IRTemp res = newTemp(Ity_V128);
11486 assign( res, mkV128from32s( a3, s2, a1, s0 ) );
11487 return res;
11491 static IRTemp math_ADDSUBPS_256 ( IRTemp dV, IRTemp sV )
11493 IRTemp a7, a6, a5, a4, a3, a2, a1, a0;
11494 IRTemp s7, s6, s5, s4, s3, s2, s1, s0;
11495 IRTemp addV = newTemp(Ity_V256);
11496 IRTemp subV = newTemp(Ity_V256);
11497 IRTemp rm = newTemp(Ity_I32);
11498 a7 = a6 = a5 = a4 = a3 = a2 = a1 = a0 = IRTemp_INVALID;
11499 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11501 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11502 assign( addV, triop(Iop_Add32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11503 assign( subV, triop(Iop_Sub32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11505 breakupV256to32s( addV, &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0 );
11506 breakupV256to32s( subV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
11508 IRTemp res = newTemp(Ity_V256);
11509 assign( res, mkV256from32s( a7, s6, a5, s4, a3, s2, a1, s0 ) );
11510 return res;
11514 /* Handle 128 bit PSHUFLW and PSHUFHW. */
11515 static Long dis_PSHUFxW_128 ( const VexAbiInfo* vbi, Prefix pfx,
11516 Long delta, Bool isAvx, Bool xIsH )
11518 IRTemp addr = IRTemp_INVALID;
11519 Int alen = 0;
11520 HChar dis_buf[50];
11521 UChar modrm = getUChar(delta);
11522 UInt rG = gregOfRexRM(pfx,modrm);
11523 UInt imm8;
11524 IRTemp sVmut, dVmut, sVcon, sV, dV, s3, s2, s1, s0;
11525 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11526 sV = newTemp(Ity_V128);
11527 dV = newTemp(Ity_V128);
11528 sVmut = newTemp(Ity_I64);
11529 dVmut = newTemp(Ity_I64);
11530 sVcon = newTemp(Ity_I64);
11531 if (epartIsReg(modrm)) {
11532 UInt rE = eregOfRexRM(pfx,modrm);
11533 assign( sV, getXMMReg(rE) );
11534 imm8 = (UInt)getUChar(delta+1);
11535 delta += 1+1;
11536 DIP("%spshuf%cw $%u,%s,%s\n",
11537 isAvx ? "v" : "", xIsH ? 'h' : 'l',
11538 imm8, nameXMMReg(rE), nameXMMReg(rG));
11539 } else {
11540 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
11541 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11542 imm8 = (UInt)getUChar(delta+alen);
11543 delta += alen+1;
11544 DIP("%spshuf%cw $%u,%s,%s\n",
11545 isAvx ? "v" : "", xIsH ? 'h' : 'l',
11546 imm8, dis_buf, nameXMMReg(rG));
11549 /* Get the to-be-changed (mut) and unchanging (con) bits of the
11550 source. */
11551 assign( sVmut, unop(xIsH ? Iop_V128HIto64 : Iop_V128to64, mkexpr(sV)) );
11552 assign( sVcon, unop(xIsH ? Iop_V128to64 : Iop_V128HIto64, mkexpr(sV)) );
11554 breakup64to16s( sVmut, &s3, &s2, &s1, &s0 );
11555 # define SEL(n) \
11556 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11557 assign(dVmut, mk64from16s( SEL((imm8>>6)&3), SEL((imm8>>4)&3),
11558 SEL((imm8>>2)&3), SEL((imm8>>0)&3) ));
11559 # undef SEL
11561 assign(dV, xIsH ? binop(Iop_64HLtoV128, mkexpr(dVmut), mkexpr(sVcon))
11562 : binop(Iop_64HLtoV128, mkexpr(sVcon), mkexpr(dVmut)) );
11564 (isAvx ? putYMMRegLoAndZU : putXMMReg)(rG, mkexpr(dV));
11565 return delta;
11569 /* Handle 256 bit PSHUFLW and PSHUFHW. */
11570 static Long dis_PSHUFxW_256 ( const VexAbiInfo* vbi, Prefix pfx,
11571 Long delta, Bool xIsH )
11573 IRTemp addr = IRTemp_INVALID;
11574 Int alen = 0;
11575 HChar dis_buf[50];
11576 UChar modrm = getUChar(delta);
11577 UInt rG = gregOfRexRM(pfx,modrm);
11578 UInt imm8;
11579 IRTemp sV, s[8], sV64[4], dVhi, dVlo;
11580 sV64[3] = sV64[2] = sV64[1] = sV64[0] = IRTemp_INVALID;
11581 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID;
11582 sV = newTemp(Ity_V256);
11583 dVhi = newTemp(Ity_I64);
11584 dVlo = newTemp(Ity_I64);
11585 if (epartIsReg(modrm)) {
11586 UInt rE = eregOfRexRM(pfx,modrm);
11587 assign( sV, getYMMReg(rE) );
11588 imm8 = (UInt)getUChar(delta+1);
11589 delta += 1+1;
11590 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l',
11591 imm8, nameYMMReg(rE), nameYMMReg(rG));
11592 } else {
11593 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
11594 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
11595 imm8 = (UInt)getUChar(delta+alen);
11596 delta += alen+1;
11597 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l',
11598 imm8, dis_buf, nameYMMReg(rG));
11601 breakupV256to64s( sV, &sV64[3], &sV64[2], &sV64[1], &sV64[0] );
11602 breakup64to16s( sV64[xIsH ? 3 : 2], &s[7], &s[6], &s[5], &s[4] );
11603 breakup64to16s( sV64[xIsH ? 1 : 0], &s[3], &s[2], &s[1], &s[0] );
11605 assign( dVhi, mk64from16s( s[4 + ((imm8>>6)&3)], s[4 + ((imm8>>4)&3)],
11606 s[4 + ((imm8>>2)&3)], s[4 + ((imm8>>0)&3)] ) );
11607 assign( dVlo, mk64from16s( s[0 + ((imm8>>6)&3)], s[0 + ((imm8>>4)&3)],
11608 s[0 + ((imm8>>2)&3)], s[0 + ((imm8>>0)&3)] ) );
11609 putYMMReg( rG, mkV256from64s( xIsH ? dVhi : sV64[3],
11610 xIsH ? sV64[2] : dVhi,
11611 xIsH ? dVlo : sV64[1],
11612 xIsH ? sV64[0] : dVlo ) );
11613 return delta;
11617 static Long dis_PEXTRW_128_EregOnly_toG ( const VexAbiInfo* vbi, Prefix pfx,
11618 Long delta, Bool isAvx )
11620 Long deltaIN = delta;
11621 UChar modrm = getUChar(delta);
11622 UInt rG = gregOfRexRM(pfx,modrm);
11623 IRTemp sV = newTemp(Ity_V128);
11624 IRTemp d16 = newTemp(Ity_I16);
11625 UInt imm8;
11626 IRTemp s0, s1, s2, s3;
11627 if (epartIsReg(modrm)) {
11628 UInt rE = eregOfRexRM(pfx,modrm);
11629 assign(sV, getXMMReg(rE));
11630 imm8 = getUChar(delta+1) & 7;
11631 delta += 1+1;
11632 DIP("%spextrw $%u,%s,%s\n", isAvx ? "v" : "",
11633 imm8, nameXMMReg(rE), nameIReg32(rG));
11634 } else {
11635 /* The memory case is disallowed, apparently. */
11636 return deltaIN; /* FAIL */
11638 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11639 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11640 switch (imm8) {
11641 case 0: assign(d16, unop(Iop_32to16, mkexpr(s0))); break;
11642 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(s0))); break;
11643 case 2: assign(d16, unop(Iop_32to16, mkexpr(s1))); break;
11644 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(s1))); break;
11645 case 4: assign(d16, unop(Iop_32to16, mkexpr(s2))); break;
11646 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(s2))); break;
11647 case 6: assign(d16, unop(Iop_32to16, mkexpr(s3))); break;
11648 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(s3))); break;
11649 default: vassert(0);
11651 putIReg32(rG, unop(Iop_16Uto32, mkexpr(d16)));
11652 return delta;
11656 static Long dis_CVTDQ2PD_128 ( const VexAbiInfo* vbi, Prefix pfx,
11657 Long delta, Bool isAvx )
11659 IRTemp addr = IRTemp_INVALID;
11660 Int alen = 0;
11661 HChar dis_buf[50];
11662 UChar modrm = getUChar(delta);
11663 IRTemp arg64 = newTemp(Ity_I64);
11664 UInt rG = gregOfRexRM(pfx,modrm);
11665 const HChar* mbV = isAvx ? "v" : "";
11666 if (epartIsReg(modrm)) {
11667 UInt rE = eregOfRexRM(pfx,modrm);
11668 assign( arg64, getXMMRegLane64(rE, 0) );
11669 delta += 1;
11670 DIP("%scvtdq2pd %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG));
11671 } else {
11672 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11673 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
11674 delta += alen;
11675 DIP("%scvtdq2pd %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
11677 putXMMRegLane64F(
11678 rG, 0,
11679 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)))
11681 putXMMRegLane64F(
11682 rG, 1,
11683 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)))
11685 if (isAvx)
11686 putYMMRegLane128(rG, 1, mkV128(0));
11687 return delta;
11691 static Long dis_STMXCSR ( const VexAbiInfo* vbi, Prefix pfx,
11692 Long delta, Bool isAvx )
11694 IRTemp addr = IRTemp_INVALID;
11695 Int alen = 0;
11696 HChar dis_buf[50];
11697 UChar modrm = getUChar(delta);
11698 vassert(!epartIsReg(modrm)); /* ensured by caller */
11699 vassert(gregOfRexRM(pfx,modrm) == 3); /* ditto */
11701 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11702 delta += alen;
11704 /* Fake up a native SSE mxcsr word. The only thing it depends on
11705 is SSEROUND[1:0], so call a clean helper to cook it up.
11707 /* ULong amd64h_create_mxcsr ( ULong sseround ) */
11708 DIP("%sstmxcsr %s\n", isAvx ? "v" : "", dis_buf);
11709 storeLE(
11710 mkexpr(addr),
11711 unop(Iop_64to32,
11712 mkIRExprCCall(
11713 Ity_I64, 0/*regp*/,
11714 "amd64g_create_mxcsr", &amd64g_create_mxcsr,
11715 mkIRExprVec_1( unop(Iop_32Uto64,get_sse_roundingmode()) )
11719 return delta;
11723 static Long dis_LDMXCSR ( const VexAbiInfo* vbi, Prefix pfx,
11724 Long delta, Bool isAvx )
11726 IRTemp addr = IRTemp_INVALID;
11727 Int alen = 0;
11728 HChar dis_buf[50];
11729 UChar modrm = getUChar(delta);
11730 vassert(!epartIsReg(modrm)); /* ensured by caller */
11731 vassert(gregOfRexRM(pfx,modrm) == 2); /* ditto */
11733 IRTemp t64 = newTemp(Ity_I64);
11734 IRTemp ew = newTemp(Ity_I32);
11736 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11737 delta += alen;
11738 DIP("%sldmxcsr %s\n", isAvx ? "v" : "", dis_buf);
11740 /* The only thing we observe in %mxcsr is the rounding mode.
11741 Therefore, pass the 32-bit value (SSE native-format control
11742 word) to a clean helper, getting back a 64-bit value, the
11743 lower half of which is the SSEROUND value to store, and the
11744 upper half of which is the emulation-warning token which may
11745 be generated.
11747 /* ULong amd64h_check_ldmxcsr ( ULong ); */
11748 assign( t64, mkIRExprCCall(
11749 Ity_I64, 0/*regparms*/,
11750 "amd64g_check_ldmxcsr",
11751 &amd64g_check_ldmxcsr,
11752 mkIRExprVec_1(
11753 unop(Iop_32Uto64,
11754 loadLE(Ity_I32, mkexpr(addr))
11760 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) );
11761 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
11762 put_emwarn( mkexpr(ew) );
11763 /* Finally, if an emulation warning was reported, side-exit to
11764 the next insn, reporting the warning, so that Valgrind's
11765 dispatcher sees the warning. */
11766 stmt(
11767 IRStmt_Exit(
11768 binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)),
11769 Ijk_EmWarn,
11770 IRConst_U64(guest_RIP_bbstart+delta),
11771 OFFB_RIP
11774 return delta;
11778 static void gen_XSAVE_SEQUENCE ( IRTemp addr, IRTemp rfbm )
11780 /* ------ rfbm[0] gates the x87 state ------ */
11782 /* Uses dirty helper:
11783 void amd64g_do_XSAVE_COMPONENT_0 ( VexGuestAMD64State*, ULong )
11785 IRDirty* d0 = unsafeIRDirty_0_N (
11786 0/*regparms*/,
11787 "amd64g_dirtyhelper_XSAVE_COMPONENT_0",
11788 &amd64g_dirtyhelper_XSAVE_COMPONENT_0,
11789 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
11791 d0->guard = binop(Iop_CmpEQ64, binop(Iop_And64, mkexpr(rfbm), mkU64(1)),
11792 mkU64(1));
11794 /* Declare we're writing memory. Really, bytes 24 through 31
11795 (MXCSR and MXCSR_MASK) aren't written, but we can't express more
11796 than 1 memory area here, so just mark the whole thing as
11797 written. */
11798 d0->mFx = Ifx_Write;
11799 d0->mAddr = mkexpr(addr);
11800 d0->mSize = 160;
11802 /* declare we're reading guest state */
11803 d0->nFxState = 5;
11804 vex_bzero(&d0->fxState, sizeof(d0->fxState));
11806 d0->fxState[0].fx = Ifx_Read;
11807 d0->fxState[0].offset = OFFB_FTOP;
11808 d0->fxState[0].size = sizeof(UInt);
11810 d0->fxState[1].fx = Ifx_Read;
11811 d0->fxState[1].offset = OFFB_FPREGS;
11812 d0->fxState[1].size = 8 * sizeof(ULong);
11814 d0->fxState[2].fx = Ifx_Read;
11815 d0->fxState[2].offset = OFFB_FPTAGS;
11816 d0->fxState[2].size = 8 * sizeof(UChar);
11818 d0->fxState[3].fx = Ifx_Read;
11819 d0->fxState[3].offset = OFFB_FPROUND;
11820 d0->fxState[3].size = sizeof(ULong);
11822 d0->fxState[4].fx = Ifx_Read;
11823 d0->fxState[4].offset = OFFB_FC3210;
11824 d0->fxState[4].size = sizeof(ULong);
11826 stmt( IRStmt_Dirty(d0) );
11828 /* ------ rfbm[1] gates the SSE state ------ */
11830 IRTemp rfbm_1 = newTemp(Ity_I64);
11831 IRTemp rfbm_1or2 = newTemp(Ity_I64);
11832 assign(rfbm_1, binop(Iop_And64, mkexpr(rfbm), mkU64(2)));
11833 assign(rfbm_1or2, binop(Iop_And64, mkexpr(rfbm), mkU64(6)));
11835 IRExpr* guard_1 = binop(Iop_CmpEQ64, mkexpr(rfbm_1), mkU64(2));
11836 IRExpr* guard_1or2 = binop(Iop_CmpNE64, mkexpr(rfbm_1or2), mkU64(0));
11838 /* Uses dirty helper:
11839 void amd64g_do_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
11840 ( VexGuestAMD64State*, ULong )
11841 This creates only MXCSR and MXCSR_MASK. We need to do this if
11842 either components 1 (SSE) or 2 (AVX) are requested. Hence the
11843 guard condition is a bit more complex.
11845 IRDirty* d1 = unsafeIRDirty_0_N (
11846 0/*regparms*/,
11847 "amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS",
11848 &amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS,
11849 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
11851 d1->guard = guard_1or2;
11853 /* Declare we're writing memory: MXCSR and MXCSR_MASK. Note that
11854 the code for rbfm[0] just above claims a write of 0 .. 159, so
11855 this duplicates it. But at least correctly connects 24 .. 31 to
11856 the MXCSR guest state representation (SSEROUND field). */
11857 d1->mFx = Ifx_Write;
11858 d1->mAddr = binop(Iop_Add64, mkexpr(addr), mkU64(24));
11859 d1->mSize = 8;
11861 /* declare we're reading guest state */
11862 d1->nFxState = 1;
11863 vex_bzero(&d1->fxState, sizeof(d1->fxState));
11865 d1->fxState[0].fx = Ifx_Read;
11866 d1->fxState[0].offset = OFFB_SSEROUND;
11867 d1->fxState[0].size = sizeof(ULong);
11869 /* Call the helper. This creates MXCSR and MXCSR_MASK but nothing
11870 else. We do the actual register array, XMM[0..15], separately,
11871 in order that any undefinedness in the XMM registers is tracked
11872 separately by Memcheck and does not "infect" the in-memory
11873 shadow for the other parts of the image. */
11874 stmt( IRStmt_Dirty(d1) );
11876 /* And now the XMMs themselves. */
11877 UInt reg;
11878 for (reg = 0; reg < 16; reg++) {
11879 stmt( IRStmt_StoreG(
11880 Iend_LE,
11881 binop(Iop_Add64, mkexpr(addr), mkU64(160 + reg * 16)),
11882 getXMMReg(reg),
11883 guard_1
11887 /* ------ rfbm[2] gates the AVX state ------ */
11888 /* Component 2 is just a bunch of register saves, so we'll do it
11889 inline, just to be simple and to be Memcheck friendly. */
11891 IRTemp rfbm_2 = newTemp(Ity_I64);
11892 assign(rfbm_2, binop(Iop_And64, mkexpr(rfbm), mkU64(4)));
11894 IRExpr* guard_2 = binop(Iop_CmpEQ64, mkexpr(rfbm_2), mkU64(4));
11896 for (reg = 0; reg < 16; reg++) {
11897 stmt( IRStmt_StoreG(
11898 Iend_LE,
11899 binop(Iop_Add64, mkexpr(addr), mkU64(576 + reg * 16)),
11900 getYMMRegLane128(reg,1),
11901 guard_2
11907 static Long dis_XSAVE ( const VexAbiInfo* vbi,
11908 Prefix pfx, Long delta, Int sz )
11910 /* Note that the presence or absence of REX.W (indicated here by
11911 |sz|) slightly affects the written format: whether the saved FPU
11912 IP and DP pointers are 64 or 32 bits. But the helper function
11913 we call simply writes zero bits in the relevant fields, which
11914 are 64 bits regardless of what REX.W is, and so it's good enough
11915 (iow, equally broken) in both cases. */
11916 IRTemp addr = IRTemp_INVALID;
11917 Int alen = 0;
11918 HChar dis_buf[50];
11919 UChar modrm = getUChar(delta);
11920 vassert(!epartIsReg(modrm)); /* ensured by caller */
11921 vassert(sz == 4 || sz == 8); /* ditto */
11923 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11924 delta += alen;
11925 gen_SIGNAL_if_not_64_aligned(vbi, addr);
11927 DIP("%sxsave %s\n", sz==8 ? "rex64/" : "", dis_buf);
11929 /* VEX's caller is assumed to have checked this. */
11930 const ULong aSSUMED_XCR0_VALUE = 7;
11932 IRTemp rfbm = newTemp(Ity_I64);
11933 assign(rfbm,
11934 binop(Iop_And64,
11935 binop(Iop_Or64,
11936 binop(Iop_Shl64,
11937 unop(Iop_32Uto64, getIRegRDX(4)), mkU8(32)),
11938 unop(Iop_32Uto64, getIRegRAX(4))),
11939 mkU64(aSSUMED_XCR0_VALUE)));
11941 gen_XSAVE_SEQUENCE(addr, rfbm);
11943 /* Finally, we need to update XSTATE_BV in the XSAVE header area, by
11944 OR-ing the RFBM value into it. */
11945 IRTemp addr_plus_512 = newTemp(Ity_I64);
11946 assign(addr_plus_512, binop(Iop_Add64, mkexpr(addr), mkU64(512)));
11947 storeLE( mkexpr(addr_plus_512),
11948 binop(Iop_Or8,
11949 unop(Iop_64to8, mkexpr(rfbm)),
11950 loadLE(Ity_I8, mkexpr(addr_plus_512))) );
11952 return delta;
11956 static Long dis_FXSAVE ( const VexAbiInfo* vbi,
11957 Prefix pfx, Long delta, Int sz )
11959 /* See comment in dis_XSAVE about the significance of REX.W. */
11960 IRTemp addr = IRTemp_INVALID;
11961 Int alen = 0;
11962 HChar dis_buf[50];
11963 UChar modrm = getUChar(delta);
11964 vassert(!epartIsReg(modrm)); /* ensured by caller */
11965 vassert(sz == 4 || sz == 8); /* ditto */
11967 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11968 delta += alen;
11969 gen_SIGNAL_if_not_16_aligned(vbi, addr);
11971 DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf);
11973 /* FXSAVE is just XSAVE with components 0 and 1 selected. Set rfbm
11974 to 0b011, generate the XSAVE sequence accordingly, and let iropt
11975 fold out the unused (AVX) parts accordingly. */
11976 IRTemp rfbm = newTemp(Ity_I64);
11977 assign(rfbm, mkU64(3));
11978 gen_XSAVE_SEQUENCE(addr, rfbm);
11980 return delta;
11984 static void gen_XRSTOR_SEQUENCE ( IRTemp addr, IRTemp xstate_bv, IRTemp rfbm )
11986 /* ------ rfbm[0] gates the x87 state ------ */
11988 /* If rfbm[0] == 1, we have to write the x87 state. If
11989 xstate_bv[0] == 1, we will read it from the memory image, else
11990 we'll set it to initial values. Doing this with a helper
11991 function and getting the definedness flow annotations correct is
11992 too difficult, so generate stupid but simple code: first set the
11993 registers to initial values, regardless of xstate_bv[0]. Then,
11994 conditionally restore from the memory image. */
11996 IRTemp rfbm_0 = newTemp(Ity_I64);
11997 IRTemp xstate_bv_0 = newTemp(Ity_I64);
11998 IRTemp restore_0 = newTemp(Ity_I64);
11999 assign(rfbm_0, binop(Iop_And64, mkexpr(rfbm), mkU64(1)));
12000 assign(xstate_bv_0, binop(Iop_And64, mkexpr(xstate_bv), mkU64(1)));
12001 assign(restore_0, binop(Iop_And64, mkexpr(rfbm_0), mkexpr(xstate_bv_0)));
12003 gen_FINIT_SEQUENCE( binop(Iop_CmpNE64, mkexpr(rfbm_0), mkU64(0)) );
12005 /* Uses dirty helper:
12006 void amd64g_do_XRSTOR_COMPONENT_0 ( VexGuestAMD64State*, ULong )
12008 IRDirty* d0 = unsafeIRDirty_0_N (
12009 0/*regparms*/,
12010 "amd64g_dirtyhelper_XRSTOR_COMPONENT_0",
12011 &amd64g_dirtyhelper_XRSTOR_COMPONENT_0,
12012 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
12014 d0->guard = binop(Iop_CmpNE64, mkexpr(restore_0), mkU64(0));
12016 /* Declare we're reading memory. Really, bytes 24 through 31
12017 (MXCSR and MXCSR_MASK) aren't read, but we can't express more
12018 than 1 memory area here, so just mark the whole thing as
12019 read. */
12020 d0->mFx = Ifx_Read;
12021 d0->mAddr = mkexpr(addr);
12022 d0->mSize = 160;
12024 /* declare we're writing guest state */
12025 d0->nFxState = 5;
12026 vex_bzero(&d0->fxState, sizeof(d0->fxState));
12028 d0->fxState[0].fx = Ifx_Write;
12029 d0->fxState[0].offset = OFFB_FTOP;
12030 d0->fxState[0].size = sizeof(UInt);
12032 d0->fxState[1].fx = Ifx_Write;
12033 d0->fxState[1].offset = OFFB_FPREGS;
12034 d0->fxState[1].size = 8 * sizeof(ULong);
12036 d0->fxState[2].fx = Ifx_Write;
12037 d0->fxState[2].offset = OFFB_FPTAGS;
12038 d0->fxState[2].size = 8 * sizeof(UChar);
12040 d0->fxState[3].fx = Ifx_Write;
12041 d0->fxState[3].offset = OFFB_FPROUND;
12042 d0->fxState[3].size = sizeof(ULong);
12044 d0->fxState[4].fx = Ifx_Write;
12045 d0->fxState[4].offset = OFFB_FC3210;
12046 d0->fxState[4].size = sizeof(ULong);
12048 stmt( IRStmt_Dirty(d0) );
12050 /* ------ rfbm[1] gates the SSE state ------ */
12052 /* Same scheme as component 0: first zero it out, and then possibly
12053 restore from the memory area. */
12054 IRTemp rfbm_1 = newTemp(Ity_I64);
12055 IRTemp xstate_bv_1 = newTemp(Ity_I64);
12056 IRTemp restore_1 = newTemp(Ity_I64);
12057 assign(rfbm_1, binop(Iop_And64, mkexpr(rfbm), mkU64(2)));
12058 assign(xstate_bv_1, binop(Iop_And64, mkexpr(xstate_bv), mkU64(2)));
12059 assign(restore_1, binop(Iop_And64, mkexpr(rfbm_1), mkexpr(xstate_bv_1)));
12060 IRExpr* rfbm_1e = binop(Iop_CmpNE64, mkexpr(rfbm_1), mkU64(0));
12061 IRExpr* restore_1e = binop(Iop_CmpNE64, mkexpr(restore_1), mkU64(0));
12063 IRTemp rfbm_1or2 = newTemp(Ity_I64);
12064 IRTemp xstate_bv_1or2 = newTemp(Ity_I64);
12065 IRTemp restore_1or2 = newTemp(Ity_I64);
12066 assign(rfbm_1or2, binop(Iop_And64, mkexpr(rfbm), mkU64(6)));
12067 assign(xstate_bv_1or2, binop(Iop_And64, mkexpr(xstate_bv), mkU64(6)));
12068 assign(restore_1or2, binop(Iop_And64, mkexpr(rfbm_1or2),
12069 mkexpr(xstate_bv_1or2)));
12070 IRExpr* rfbm_1or2e = binop(Iop_CmpNE64, mkexpr(rfbm_1or2), mkU64(0));
12071 IRExpr* restore_1or2e = binop(Iop_CmpNE64, mkexpr(restore_1or2), mkU64(0));
12073 /* The areas in question are: SSEROUND, and the XMM register array. */
12074 putGuarded(OFFB_SSEROUND, rfbm_1or2e, mkU64(Irrm_NEAREST));
12076 UInt reg;
12077 for (reg = 0; reg < 16; reg++) {
12078 putGuarded(xmmGuestRegOffset(reg), rfbm_1e, mkV128(0));
12081 /* And now possibly restore from MXCSR/MXCSR_MASK */
12082 /* Uses dirty helper:
12083 void amd64g_do_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
12084 ( VexGuestAMD64State*, ULong )
12085 This restores from only MXCSR and MXCSR_MASK. We need to do
12086 this if either components 1 (SSE) or 2 (AVX) are requested.
12087 Hence the guard condition is a bit more complex.
12089 IRDirty* d1 = unsafeIRDirty_0_N (
12090 0/*regparms*/,
12091 "amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS",
12092 &amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS,
12093 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
12095 d1->guard = restore_1or2e;
12097 /* Declare we're reading memory: MXCSR and MXCSR_MASK. Note that
12098 the code for rbfm[0] just above claims a read of 0 .. 159, so
12099 this duplicates it. But at least correctly connects 24 .. 31 to
12100 the MXCSR guest state representation (SSEROUND field). */
12101 d1->mFx = Ifx_Read;
12102 d1->mAddr = binop(Iop_Add64, mkexpr(addr), mkU64(24));
12103 d1->mSize = 8;
12105 /* declare we're writing guest state */
12106 d1->nFxState = 1;
12107 vex_bzero(&d1->fxState, sizeof(d1->fxState));
12109 d1->fxState[0].fx = Ifx_Write;
12110 d1->fxState[0].offset = OFFB_SSEROUND;
12111 d1->fxState[0].size = sizeof(ULong);
12113 /* Call the helper. This creates SSEROUND but nothing
12114 else. We do the actual register array, XMM[0..15], separately,
12115 in order that any undefinedness in the XMM registers is tracked
12116 separately by Memcheck and is not "infected" by the in-memory
12117 shadow for the other parts of the image. */
12118 stmt( IRStmt_Dirty(d1) );
12120 /* And now the XMMs themselves. For each register, we PUT either
12121 its old value, or the value loaded from memory. One convenient
12122 way to do that is with a conditional load that has its the
12123 default value, the old value of the register. */
12124 for (reg = 0; reg < 16; reg++) {
12125 IRExpr* ea = binop(Iop_Add64, mkexpr(addr), mkU64(160 + reg * 16));
12126 IRExpr* alt = getXMMReg(reg);
12127 IRTemp loadedValue = newTemp(Ity_V128);
12128 stmt( IRStmt_LoadG(Iend_LE,
12129 ILGop_IdentV128,
12130 loadedValue, ea, alt, restore_1e) );
12131 putXMMReg(reg, mkexpr(loadedValue));
12134 /* ------ rfbm[2] gates the AVX state ------ */
12135 /* Component 2 is just a bunch of register loads, so we'll do it
12136 inline, just to be simple and to be Memcheck friendly. */
12138 /* Same scheme as component 0: first zero it out, and then possibly
12139 restore from the memory area. */
12140 IRTemp rfbm_2 = newTemp(Ity_I64);
12141 IRTemp xstate_bv_2 = newTemp(Ity_I64);
12142 IRTemp restore_2 = newTemp(Ity_I64);
12143 assign(rfbm_2, binop(Iop_And64, mkexpr(rfbm), mkU64(4)));
12144 assign(xstate_bv_2, binop(Iop_And64, mkexpr(xstate_bv), mkU64(4)));
12145 assign(restore_2, binop(Iop_And64, mkexpr(rfbm_2), mkexpr(xstate_bv_2)));
12147 IRExpr* rfbm_2e = binop(Iop_CmpNE64, mkexpr(rfbm_2), mkU64(0));
12148 IRExpr* restore_2e = binop(Iop_CmpNE64, mkexpr(restore_2), mkU64(0));
12150 for (reg = 0; reg < 16; reg++) {
12151 putGuarded(ymmGuestRegLane128offset(reg, 1), rfbm_2e, mkV128(0));
12154 for (reg = 0; reg < 16; reg++) {
12155 IRExpr* ea = binop(Iop_Add64, mkexpr(addr), mkU64(576 + reg * 16));
12156 IRExpr* alt = getYMMRegLane128(reg, 1);
12157 IRTemp loadedValue = newTemp(Ity_V128);
12158 stmt( IRStmt_LoadG(Iend_LE,
12159 ILGop_IdentV128,
12160 loadedValue, ea, alt, restore_2e) );
12161 putYMMRegLane128(reg, 1, mkexpr(loadedValue));
12166 static Long dis_XRSTOR ( const VexAbiInfo* vbi,
12167 Prefix pfx, Long delta, Int sz )
12169 /* As with XRSTOR above we ignore the value of REX.W since we're
12170 not bothering with the FPU DP and IP fields. */
12171 IRTemp addr = IRTemp_INVALID;
12172 Int alen = 0;
12173 HChar dis_buf[50];
12174 UChar modrm = getUChar(delta);
12175 vassert(!epartIsReg(modrm)); /* ensured by caller */
12176 vassert(sz == 4 || sz == 8); /* ditto */
12178 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12179 delta += alen;
12180 gen_SIGNAL_if_not_64_aligned(vbi, addr);
12182 DIP("%sxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf);
12184 /* VEX's caller is assumed to have checked this. */
12185 const ULong aSSUMED_XCR0_VALUE = 7;
12187 IRTemp rfbm = newTemp(Ity_I64);
12188 assign(rfbm,
12189 binop(Iop_And64,
12190 binop(Iop_Or64,
12191 binop(Iop_Shl64,
12192 unop(Iop_32Uto64, getIRegRDX(4)), mkU8(32)),
12193 unop(Iop_32Uto64, getIRegRAX(4))),
12194 mkU64(aSSUMED_XCR0_VALUE)));
12196 IRTemp xstate_bv = newTemp(Ity_I64);
12197 assign(xstate_bv, loadLE(Ity_I64,
12198 binop(Iop_Add64, mkexpr(addr), mkU64(512+0))));
12200 IRTemp xcomp_bv = newTemp(Ity_I64);
12201 assign(xcomp_bv, loadLE(Ity_I64,
12202 binop(Iop_Add64, mkexpr(addr), mkU64(512+8))));
12204 IRTemp xsavehdr_23_16 = newTemp(Ity_I64);
12205 assign( xsavehdr_23_16,
12206 loadLE(Ity_I64,
12207 binop(Iop_Add64, mkexpr(addr), mkU64(512+16))));
12209 /* We must fault if
12210 * xcomp_bv[63] == 1, since this simulated CPU does not support
12211 the compaction extension.
12212 * xstate_bv sets a bit outside of XCR0 (which we assume to be 7).
12213 * any of the xsave header bytes 23 .. 8 are nonzero. This seems to
12214 imply that xcomp_bv must be zero.
12215 xcomp_bv is header bytes 15 .. 8 and xstate_bv is header bytes 7 .. 0
12217 IRTemp fault_if_nonzero = newTemp(Ity_I64);
12218 assign(fault_if_nonzero,
12219 binop(Iop_Or64,
12220 binop(Iop_And64, mkexpr(xstate_bv), mkU64(~aSSUMED_XCR0_VALUE)),
12221 binop(Iop_Or64, mkexpr(xcomp_bv), mkexpr(xsavehdr_23_16))));
12222 stmt( IRStmt_Exit(binop(Iop_CmpNE64, mkexpr(fault_if_nonzero), mkU64(0)),
12223 Ijk_SigSEGV,
12224 IRConst_U64(guest_RIP_curr_instr),
12225 OFFB_RIP
12228 /* We are guaranteed now that both xstate_bv and rfbm are in the
12229 range 0 .. 7. Generate the restore sequence proper. */
12230 gen_XRSTOR_SEQUENCE(addr, xstate_bv, rfbm);
12232 return delta;
12236 static Long dis_FXRSTOR ( const VexAbiInfo* vbi,
12237 Prefix pfx, Long delta, Int sz )
12239 /* As with FXSAVE above we ignore the value of REX.W since we're
12240 not bothering with the FPU DP and IP fields. */
12241 IRTemp addr = IRTemp_INVALID;
12242 Int alen = 0;
12243 HChar dis_buf[50];
12244 UChar modrm = getUChar(delta);
12245 vassert(!epartIsReg(modrm)); /* ensured by caller */
12246 vassert(sz == 4 || sz == 8); /* ditto */
12248 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12249 delta += alen;
12250 gen_SIGNAL_if_not_16_aligned(vbi, addr);
12252 DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf);
12254 /* FXRSTOR is just XRSTOR with components 0 and 1 selected and also
12255 as if components 0 and 1 are set as present in XSTATE_BV in the
12256 XSAVE header. Set both rfbm and xstate_bv to 0b011 therefore,
12257 generate the XRSTOR sequence accordingly, and let iropt fold out
12258 the unused (AVX) parts accordingly. */
12259 IRTemp three = newTemp(Ity_I64);
12260 assign(three, mkU64(3));
12261 gen_XRSTOR_SEQUENCE(addr, three/*xstate_bv*/, three/*rfbm*/);
12263 return delta;
12267 static IRTemp math_PINSRW_128 ( IRTemp v128, IRTemp u16, UInt imm8 )
12269 vassert(imm8 >= 0 && imm8 <= 7);
12271 // Create a V128 value which has the selected word in the
12272 // specified lane, and zeroes everywhere else.
12273 IRTemp tmp128 = newTemp(Ity_V128);
12274 IRTemp halfshift = newTemp(Ity_I64);
12275 assign(halfshift, binop(Iop_Shl64,
12276 unop(Iop_16Uto64, mkexpr(u16)),
12277 mkU8(16 * (imm8 & 3))));
12278 if (imm8 < 4) {
12279 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift)));
12280 } else {
12281 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0)));
12284 UShort mask = ~(3 << (imm8 * 2));
12285 IRTemp res = newTemp(Ity_V128);
12286 assign( res, binop(Iop_OrV128,
12287 mkexpr(tmp128),
12288 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) );
12289 return res;
12293 static IRTemp math_PSADBW_128 ( IRTemp dV, IRTemp sV )
12295 IRTemp s1, s0, d1, d0;
12296 s1 = s0 = d1 = d0 = IRTemp_INVALID;
12298 breakupV128to64s( sV, &s1, &s0 );
12299 breakupV128to64s( dV, &d1, &d0 );
12301 IRTemp res = newTemp(Ity_V128);
12302 assign( res,
12303 binop(Iop_64HLtoV128,
12304 mkIRExprCCall(Ity_I64, 0/*regparms*/,
12305 "amd64g_calculate_mmx_psadbw",
12306 &amd64g_calculate_mmx_psadbw,
12307 mkIRExprVec_2( mkexpr(s1), mkexpr(d1))),
12308 mkIRExprCCall(Ity_I64, 0/*regparms*/,
12309 "amd64g_calculate_mmx_psadbw",
12310 &amd64g_calculate_mmx_psadbw,
12311 mkIRExprVec_2( mkexpr(s0), mkexpr(d0)))) );
12312 return res;
12316 static IRTemp math_PSADBW_256 ( IRTemp dV, IRTemp sV )
12318 IRTemp sHi, sLo, dHi, dLo;
12319 sHi = sLo = dHi = dLo = IRTemp_INVALID;
12320 breakupV256toV128s( dV, &dHi, &dLo);
12321 breakupV256toV128s( sV, &sHi, &sLo);
12322 IRTemp res = newTemp(Ity_V256);
12323 assign(res, binop(Iop_V128HLtoV256,
12324 mkexpr(math_PSADBW_128(dHi, sHi)),
12325 mkexpr(math_PSADBW_128(dLo, sLo))));
12326 return res;
12330 static Long dis_MASKMOVDQU ( const VexAbiInfo* vbi, Prefix pfx,
12331 Long delta, Bool isAvx )
12333 IRTemp regD = newTemp(Ity_V128);
12334 IRTemp mask = newTemp(Ity_V128);
12335 IRTemp olddata = newTemp(Ity_V128);
12336 IRTemp newdata = newTemp(Ity_V128);
12337 IRTemp addr = newTemp(Ity_I64);
12338 UChar modrm = getUChar(delta);
12339 UInt rG = gregOfRexRM(pfx,modrm);
12340 UInt rE = eregOfRexRM(pfx,modrm);
12342 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
12343 assign( regD, getXMMReg( rG ));
12345 /* Unfortunately can't do the obvious thing with SarN8x16
12346 here since that can't be re-emitted as SSE2 code - no such
12347 insn. */
12348 assign( mask,
12349 binop(Iop_64HLtoV128,
12350 binop(Iop_SarN8x8,
12351 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ),
12352 mkU8(7) ),
12353 binop(Iop_SarN8x8,
12354 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ),
12355 mkU8(7) ) ));
12356 assign( olddata, loadLE( Ity_V128, mkexpr(addr) ));
12357 assign( newdata, binop(Iop_OrV128,
12358 binop(Iop_AndV128,
12359 mkexpr(regD),
12360 mkexpr(mask) ),
12361 binop(Iop_AndV128,
12362 mkexpr(olddata),
12363 unop(Iop_NotV128, mkexpr(mask)))) );
12364 storeLE( mkexpr(addr), mkexpr(newdata) );
12366 delta += 1;
12367 DIP("%smaskmovdqu %s,%s\n", isAvx ? "v" : "",
12368 nameXMMReg(rE), nameXMMReg(rG) );
12369 return delta;
12373 static Long dis_MOVMSKPS_128 ( const VexAbiInfo* vbi, Prefix pfx,
12374 Long delta, Bool isAvx )
12376 UChar modrm = getUChar(delta);
12377 UInt rG = gregOfRexRM(pfx,modrm);
12378 UInt rE = eregOfRexRM(pfx,modrm);
12379 IRTemp t0 = newTemp(Ity_I32);
12380 IRTemp t1 = newTemp(Ity_I32);
12381 IRTemp t2 = newTemp(Ity_I32);
12382 IRTemp t3 = newTemp(Ity_I32);
12383 delta += 1;
12384 assign( t0, binop( Iop_And32,
12385 binop(Iop_Shr32, getXMMRegLane32(rE,0), mkU8(31)),
12386 mkU32(1) ));
12387 assign( t1, binop( Iop_And32,
12388 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(30)),
12389 mkU32(2) ));
12390 assign( t2, binop( Iop_And32,
12391 binop(Iop_Shr32, getXMMRegLane32(rE,2), mkU8(29)),
12392 mkU32(4) ));
12393 assign( t3, binop( Iop_And32,
12394 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(28)),
12395 mkU32(8) ));
12396 putIReg32( rG, binop(Iop_Or32,
12397 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
12398 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) );
12399 DIP("%smovmskps %s,%s\n", isAvx ? "v" : "",
12400 nameXMMReg(rE), nameIReg32(rG));
12401 return delta;
12405 static Long dis_MOVMSKPS_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
12407 UChar modrm = getUChar(delta);
12408 UInt rG = gregOfRexRM(pfx,modrm);
12409 UInt rE = eregOfRexRM(pfx,modrm);
12410 IRTemp t0 = newTemp(Ity_I32);
12411 IRTemp t1 = newTemp(Ity_I32);
12412 IRTemp t2 = newTemp(Ity_I32);
12413 IRTemp t3 = newTemp(Ity_I32);
12414 IRTemp t4 = newTemp(Ity_I32);
12415 IRTemp t5 = newTemp(Ity_I32);
12416 IRTemp t6 = newTemp(Ity_I32);
12417 IRTemp t7 = newTemp(Ity_I32);
12418 delta += 1;
12419 assign( t0, binop( Iop_And32,
12420 binop(Iop_Shr32, getYMMRegLane32(rE,0), mkU8(31)),
12421 mkU32(1) ));
12422 assign( t1, binop( Iop_And32,
12423 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(30)),
12424 mkU32(2) ));
12425 assign( t2, binop( Iop_And32,
12426 binop(Iop_Shr32, getYMMRegLane32(rE,2), mkU8(29)),
12427 mkU32(4) ));
12428 assign( t3, binop( Iop_And32,
12429 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(28)),
12430 mkU32(8) ));
12431 assign( t4, binop( Iop_And32,
12432 binop(Iop_Shr32, getYMMRegLane32(rE,4), mkU8(27)),
12433 mkU32(16) ));
12434 assign( t5, binop( Iop_And32,
12435 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(26)),
12436 mkU32(32) ));
12437 assign( t6, binop( Iop_And32,
12438 binop(Iop_Shr32, getYMMRegLane32(rE,6), mkU8(25)),
12439 mkU32(64) ));
12440 assign( t7, binop( Iop_And32,
12441 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(24)),
12442 mkU32(128) ));
12443 putIReg32( rG, binop(Iop_Or32,
12444 binop(Iop_Or32,
12445 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
12446 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ),
12447 binop(Iop_Or32,
12448 binop(Iop_Or32, mkexpr(t4), mkexpr(t5)),
12449 binop(Iop_Or32, mkexpr(t6), mkexpr(t7)) ) ) );
12450 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
12451 return delta;
12455 static Long dis_MOVMSKPD_128 ( const VexAbiInfo* vbi, Prefix pfx,
12456 Long delta, Bool isAvx )
12458 UChar modrm = getUChar(delta);
12459 UInt rG = gregOfRexRM(pfx,modrm);
12460 UInt rE = eregOfRexRM(pfx,modrm);
12461 IRTemp t0 = newTemp(Ity_I32);
12462 IRTemp t1 = newTemp(Ity_I32);
12463 delta += 1;
12464 assign( t0, binop( Iop_And32,
12465 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(31)),
12466 mkU32(1) ));
12467 assign( t1, binop( Iop_And32,
12468 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(30)),
12469 mkU32(2) ));
12470 putIReg32( rG, binop(Iop_Or32, mkexpr(t0), mkexpr(t1) ) );
12471 DIP("%smovmskpd %s,%s\n", isAvx ? "v" : "",
12472 nameXMMReg(rE), nameIReg32(rG));
12473 return delta;
12477 static Long dis_MOVMSKPD_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
12479 UChar modrm = getUChar(delta);
12480 UInt rG = gregOfRexRM(pfx,modrm);
12481 UInt rE = eregOfRexRM(pfx,modrm);
12482 IRTemp t0 = newTemp(Ity_I32);
12483 IRTemp t1 = newTemp(Ity_I32);
12484 IRTemp t2 = newTemp(Ity_I32);
12485 IRTemp t3 = newTemp(Ity_I32);
12486 delta += 1;
12487 assign( t0, binop( Iop_And32,
12488 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(31)),
12489 mkU32(1) ));
12490 assign( t1, binop( Iop_And32,
12491 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(30)),
12492 mkU32(2) ));
12493 assign( t2, binop( Iop_And32,
12494 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(29)),
12495 mkU32(4) ));
12496 assign( t3, binop( Iop_And32,
12497 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(28)),
12498 mkU32(8) ));
12499 putIReg32( rG, binop(Iop_Or32,
12500 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
12501 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) );
12502 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
12503 return delta;
12507 /* Note, this also handles SSE(1) insns. */
12508 __attribute__((noinline))
12509 static
12510 Long dis_ESC_0F__SSE2 ( Bool* decode_OK,
12511 const VexArchInfo* archinfo,
12512 const VexAbiInfo* vbi,
12513 Prefix pfx, Int sz, Long deltaIN,
12514 DisResult* dres )
12516 IRTemp addr = IRTemp_INVALID;
12517 IRTemp t0 = IRTemp_INVALID;
12518 IRTemp t1 = IRTemp_INVALID;
12519 IRTemp t2 = IRTemp_INVALID;
12520 IRTemp t3 = IRTemp_INVALID;
12521 IRTemp t4 = IRTemp_INVALID;
12522 IRTemp t5 = IRTemp_INVALID;
12523 IRTemp t6 = IRTemp_INVALID;
12524 UChar modrm = 0;
12525 Int alen = 0;
12526 HChar dis_buf[50];
12528 *decode_OK = False;
12530 Long delta = deltaIN;
12531 UChar opc = getUChar(delta);
12532 delta++;
12533 switch (opc) {
12535 case 0x10:
12536 if (have66noF2noF3(pfx)
12537 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12538 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
12539 modrm = getUChar(delta);
12540 if (epartIsReg(modrm)) {
12541 putXMMReg( gregOfRexRM(pfx,modrm),
12542 getXMMReg( eregOfRexRM(pfx,modrm) ));
12543 DIP("movupd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12544 nameXMMReg(gregOfRexRM(pfx,modrm)));
12545 delta += 1;
12546 } else {
12547 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12548 putXMMReg( gregOfRexRM(pfx,modrm),
12549 loadLE(Ity_V128, mkexpr(addr)) );
12550 DIP("movupd %s,%s\n", dis_buf,
12551 nameXMMReg(gregOfRexRM(pfx,modrm)));
12552 delta += alen;
12554 goto decode_success;
12556 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
12557 G (lo half xmm). If E is mem, upper half of G is zeroed out.
12558 If E is reg, upper half of G is unchanged. */
12559 if (haveF2no66noF3(pfx)
12560 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) ) {
12561 modrm = getUChar(delta);
12562 if (epartIsReg(modrm)) {
12563 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
12564 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
12565 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12566 nameXMMReg(gregOfRexRM(pfx,modrm)));
12567 delta += 1;
12568 } else {
12569 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12570 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
12571 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
12572 loadLE(Ity_I64, mkexpr(addr)) );
12573 DIP("movsd %s,%s\n", dis_buf,
12574 nameXMMReg(gregOfRexRM(pfx,modrm)));
12575 delta += alen;
12577 goto decode_success;
12579 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
12580 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
12581 if (haveF3no66noF2(pfx)
12582 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12583 modrm = getUChar(delta);
12584 if (epartIsReg(modrm)) {
12585 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0,
12586 getXMMRegLane32( eregOfRexRM(pfx,modrm), 0 ));
12587 DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12588 nameXMMReg(gregOfRexRM(pfx,modrm)));
12589 delta += 1;
12590 } else {
12591 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12592 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
12593 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0,
12594 loadLE(Ity_I32, mkexpr(addr)) );
12595 DIP("movss %s,%s\n", dis_buf,
12596 nameXMMReg(gregOfRexRM(pfx,modrm)));
12597 delta += alen;
12599 goto decode_success;
12601 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
12602 if (haveNo66noF2noF3(pfx)
12603 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12604 modrm = getUChar(delta);
12605 if (epartIsReg(modrm)) {
12606 putXMMReg( gregOfRexRM(pfx,modrm),
12607 getXMMReg( eregOfRexRM(pfx,modrm) ));
12608 DIP("movups %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12609 nameXMMReg(gregOfRexRM(pfx,modrm)));
12610 delta += 1;
12611 } else {
12612 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12613 putXMMReg( gregOfRexRM(pfx,modrm),
12614 loadLE(Ity_V128, mkexpr(addr)) );
12615 DIP("movups %s,%s\n", dis_buf,
12616 nameXMMReg(gregOfRexRM(pfx,modrm)));
12617 delta += alen;
12619 goto decode_success;
12621 break;
12623 case 0x11:
12624 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
12625 or lo half xmm). */
12626 if (haveF2no66noF3(pfx)
12627 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12628 modrm = getUChar(delta);
12629 if (epartIsReg(modrm)) {
12630 putXMMRegLane64( eregOfRexRM(pfx,modrm), 0,
12631 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 ));
12632 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12633 nameXMMReg(eregOfRexRM(pfx,modrm)));
12634 delta += 1;
12635 } else {
12636 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12637 storeLE( mkexpr(addr),
12638 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) );
12639 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12640 dis_buf);
12641 delta += alen;
12643 goto decode_success;
12645 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
12646 or lo 1/4 xmm). */
12647 if (haveF3no66noF2(pfx) && sz == 4) {
12648 modrm = getUChar(delta);
12649 if (epartIsReg(modrm)) {
12650 /* fall through, we don't yet have a test case */
12651 } else {
12652 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12653 storeLE( mkexpr(addr),
12654 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) );
12655 DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12656 dis_buf);
12657 delta += alen;
12658 goto decode_success;
12661 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
12662 if (have66noF2noF3(pfx)
12663 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12664 modrm = getUChar(delta);
12665 if (epartIsReg(modrm)) {
12666 putXMMReg( eregOfRexRM(pfx,modrm),
12667 getXMMReg( gregOfRexRM(pfx,modrm) ) );
12668 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12669 nameXMMReg(eregOfRexRM(pfx,modrm)));
12670 delta += 1;
12671 } else {
12672 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12673 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12674 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12675 dis_buf );
12676 delta += alen;
12678 goto decode_success;
12680 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
12681 if (haveNo66noF2noF3(pfx)
12682 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12683 modrm = getUChar(delta);
12684 if (epartIsReg(modrm)) {
12685 /* fall through; awaiting test case */
12686 } else {
12687 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12688 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12689 DIP("movups %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12690 dis_buf );
12691 delta += alen;
12692 goto decode_success;
12695 break;
12697 case 0x12:
12698 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
12699 /* Identical to MOVLPS ? */
12700 if (have66noF2noF3(pfx)
12701 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12702 modrm = getUChar(delta);
12703 if (epartIsReg(modrm)) {
12704 /* fall through; apparently reg-reg is not possible */
12705 } else {
12706 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12707 delta += alen;
12708 putXMMRegLane64( gregOfRexRM(pfx,modrm),
12709 0/*lower lane*/,
12710 loadLE(Ity_I64, mkexpr(addr)) );
12711 DIP("movlpd %s, %s\n",
12712 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) ));
12713 goto decode_success;
12716 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
12717 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
12718 if (haveNo66noF2noF3(pfx)
12719 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12720 modrm = getUChar(delta);
12721 if (epartIsReg(modrm)) {
12722 delta += 1;
12723 putXMMRegLane64( gregOfRexRM(pfx,modrm),
12724 0/*lower lane*/,
12725 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ));
12726 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12727 nameXMMReg(gregOfRexRM(pfx,modrm)));
12728 } else {
12729 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12730 delta += alen;
12731 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0/*lower lane*/,
12732 loadLE(Ity_I64, mkexpr(addr)) );
12733 DIP("movlps %s, %s\n",
12734 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) ));
12736 goto decode_success;
12738 break;
12740 case 0x13:
12741 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
12742 if (haveNo66noF2noF3(pfx)
12743 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12744 modrm = getUChar(delta);
12745 if (!epartIsReg(modrm)) {
12746 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12747 delta += alen;
12748 storeLE( mkexpr(addr),
12749 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12750 0/*lower lane*/ ) );
12751 DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12752 dis_buf);
12753 goto decode_success;
12755 /* else fall through */
12757 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
12758 /* Identical to MOVLPS ? */
12759 if (have66noF2noF3(pfx)
12760 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12761 modrm = getUChar(delta);
12762 if (!epartIsReg(modrm)) {
12763 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12764 delta += alen;
12765 storeLE( mkexpr(addr),
12766 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12767 0/*lower lane*/ ) );
12768 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12769 dis_buf);
12770 goto decode_success;
12772 /* else fall through */
12774 break;
12776 case 0x14:
12777 case 0x15:
12778 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
12779 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
12780 /* These just appear to be special cases of SHUFPS */
12781 if (haveNo66noF2noF3(pfx) && sz == 4) {
12782 Bool hi = toBool(opc == 0x15);
12783 IRTemp sV = newTemp(Ity_V128);
12784 IRTemp dV = newTemp(Ity_V128);
12785 modrm = getUChar(delta);
12786 UInt rG = gregOfRexRM(pfx,modrm);
12787 assign( dV, getXMMReg(rG) );
12788 if (epartIsReg(modrm)) {
12789 UInt rE = eregOfRexRM(pfx,modrm);
12790 assign( sV, getXMMReg(rE) );
12791 delta += 1;
12792 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
12793 nameXMMReg(rE), nameXMMReg(rG));
12794 } else {
12795 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12796 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12797 delta += alen;
12798 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
12799 dis_buf, nameXMMReg(rG));
12801 IRTemp res = math_UNPCKxPS_128( sV, dV, hi );
12802 putXMMReg( rG, mkexpr(res) );
12803 goto decode_success;
12805 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
12806 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
12807 /* These just appear to be special cases of SHUFPS */
12808 if (have66noF2noF3(pfx)
12809 && sz == 2 /* could be 8 if rex also present */) {
12810 Bool hi = toBool(opc == 0x15);
12811 IRTemp sV = newTemp(Ity_V128);
12812 IRTemp dV = newTemp(Ity_V128);
12813 modrm = getUChar(delta);
12814 UInt rG = gregOfRexRM(pfx,modrm);
12815 assign( dV, getXMMReg(rG) );
12816 if (epartIsReg(modrm)) {
12817 UInt rE = eregOfRexRM(pfx,modrm);
12818 assign( sV, getXMMReg(rE) );
12819 delta += 1;
12820 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
12821 nameXMMReg(rE), nameXMMReg(rG));
12822 } else {
12823 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12824 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12825 delta += alen;
12826 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
12827 dis_buf, nameXMMReg(rG));
12829 IRTemp res = math_UNPCKxPD_128( sV, dV, hi );
12830 putXMMReg( rG, mkexpr(res) );
12831 goto decode_success;
12833 break;
12835 case 0x16:
12836 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
12837 /* These seems identical to MOVHPS. This instruction encoding is
12838 completely crazy. */
12839 if (have66noF2noF3(pfx)
12840 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12841 modrm = getUChar(delta);
12842 if (epartIsReg(modrm)) {
12843 /* fall through; apparently reg-reg is not possible */
12844 } else {
12845 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12846 delta += alen;
12847 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
12848 loadLE(Ity_I64, mkexpr(addr)) );
12849 DIP("movhpd %s,%s\n", dis_buf,
12850 nameXMMReg( gregOfRexRM(pfx,modrm) ));
12851 goto decode_success;
12854 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
12855 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
12856 if (haveNo66noF2noF3(pfx)
12857 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12858 modrm = getUChar(delta);
12859 if (epartIsReg(modrm)) {
12860 delta += 1;
12861 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
12862 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ) );
12863 DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12864 nameXMMReg(gregOfRexRM(pfx,modrm)));
12865 } else {
12866 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12867 delta += alen;
12868 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
12869 loadLE(Ity_I64, mkexpr(addr)) );
12870 DIP("movhps %s,%s\n", dis_buf,
12871 nameXMMReg( gregOfRexRM(pfx,modrm) ));
12873 goto decode_success;
12875 break;
12877 case 0x17:
12878 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
12879 if (haveNo66noF2noF3(pfx)
12880 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12881 modrm = getUChar(delta);
12882 if (!epartIsReg(modrm)) {
12883 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12884 delta += alen;
12885 storeLE( mkexpr(addr),
12886 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12887 1/*upper lane*/ ) );
12888 DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12889 dis_buf);
12890 goto decode_success;
12892 /* else fall through */
12894 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
12895 /* Again, this seems identical to MOVHPS. */
12896 if (have66noF2noF3(pfx)
12897 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12898 modrm = getUChar(delta);
12899 if (!epartIsReg(modrm)) {
12900 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12901 delta += alen;
12902 storeLE( mkexpr(addr),
12903 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12904 1/*upper lane*/ ) );
12905 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12906 dis_buf);
12907 goto decode_success;
12909 /* else fall through */
12911 break;
12913 case 0x18:
12914 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
12915 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
12916 /* 0F 18 /2 = PREFETCH1 */
12917 /* 0F 18 /3 = PREFETCH2 */
12918 if (haveNo66noF2noF3(pfx)
12919 && !epartIsReg(getUChar(delta))
12920 && gregLO3ofRM(getUChar(delta)) >= 0
12921 && gregLO3ofRM(getUChar(delta)) <= 3) {
12922 const HChar* hintstr = "??";
12924 modrm = getUChar(delta);
12925 vassert(!epartIsReg(modrm));
12927 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12928 delta += alen;
12930 switch (gregLO3ofRM(modrm)) {
12931 case 0: hintstr = "nta"; break;
12932 case 1: hintstr = "t0"; break;
12933 case 2: hintstr = "t1"; break;
12934 case 3: hintstr = "t2"; break;
12935 default: vassert(0);
12938 DIP("prefetch%s %s\n", hintstr, dis_buf);
12939 goto decode_success;
12941 break;
12943 case 0x28:
12944 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
12945 if (have66noF2noF3(pfx)
12946 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12947 modrm = getUChar(delta);
12948 if (epartIsReg(modrm)) {
12949 putXMMReg( gregOfRexRM(pfx,modrm),
12950 getXMMReg( eregOfRexRM(pfx,modrm) ));
12951 DIP("movapd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12952 nameXMMReg(gregOfRexRM(pfx,modrm)));
12953 delta += 1;
12954 } else {
12955 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12956 gen_SIGNAL_if_not_16_aligned( vbi, addr );
12957 putXMMReg( gregOfRexRM(pfx,modrm),
12958 loadLE(Ity_V128, mkexpr(addr)) );
12959 DIP("movapd %s,%s\n", dis_buf,
12960 nameXMMReg(gregOfRexRM(pfx,modrm)));
12961 delta += alen;
12963 goto decode_success;
12965 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
12966 if (haveNo66noF2noF3(pfx)
12967 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12968 modrm = getUChar(delta);
12969 if (epartIsReg(modrm)) {
12970 putXMMReg( gregOfRexRM(pfx,modrm),
12971 getXMMReg( eregOfRexRM(pfx,modrm) ));
12972 DIP("movaps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12973 nameXMMReg(gregOfRexRM(pfx,modrm)));
12974 delta += 1;
12975 } else {
12976 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12977 gen_SIGNAL_if_not_16_aligned( vbi, addr );
12978 putXMMReg( gregOfRexRM(pfx,modrm),
12979 loadLE(Ity_V128, mkexpr(addr)) );
12980 DIP("movaps %s,%s\n", dis_buf,
12981 nameXMMReg(gregOfRexRM(pfx,modrm)));
12982 delta += alen;
12984 goto decode_success;
12986 break;
12988 case 0x29:
12989 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
12990 if (haveNo66noF2noF3(pfx)
12991 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12992 modrm = getUChar(delta);
12993 if (epartIsReg(modrm)) {
12994 putXMMReg( eregOfRexRM(pfx,modrm),
12995 getXMMReg( gregOfRexRM(pfx,modrm) ));
12996 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12997 nameXMMReg(eregOfRexRM(pfx,modrm)));
12998 delta += 1;
12999 } else {
13000 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13001 gen_SIGNAL_if_not_16_aligned( vbi, addr );
13002 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
13003 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
13004 dis_buf );
13005 delta += alen;
13007 goto decode_success;
13009 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
13010 if (have66noF2noF3(pfx)
13011 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13012 modrm = getUChar(delta);
13013 if (epartIsReg(modrm)) {
13014 putXMMReg( eregOfRexRM(pfx,modrm),
13015 getXMMReg( gregOfRexRM(pfx,modrm) ) );
13016 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
13017 nameXMMReg(eregOfRexRM(pfx,modrm)));
13018 delta += 1;
13019 } else {
13020 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13021 gen_SIGNAL_if_not_16_aligned( vbi, addr );
13022 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
13023 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
13024 dis_buf );
13025 delta += alen;
13027 goto decode_success;
13029 break;
13031 case 0x2A:
13032 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
13033 half xmm */
13034 if (haveNo66noF2noF3(pfx) && sz == 4) {
13035 IRTemp arg64 = newTemp(Ity_I64);
13036 IRTemp rmode = newTemp(Ity_I32);
13038 modrm = getUChar(delta);
13039 if (epartIsReg(modrm)) {
13040 /* Only switch to MMX mode if the source is a MMX register.
13041 See comments on CVTPI2PD for details. Fixes #357059. */
13042 do_MMX_preamble();
13043 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
13044 delta += 1;
13045 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
13046 nameXMMReg(gregOfRexRM(pfx,modrm)));
13047 } else {
13048 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13049 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
13050 delta += alen;
13051 DIP("cvtpi2ps %s,%s\n", dis_buf,
13052 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13055 assign( rmode, get_sse_roundingmode() );
13057 putXMMRegLane32F(
13058 gregOfRexRM(pfx,modrm), 0,
13059 binop(Iop_F64toF32,
13060 mkexpr(rmode),
13061 unop(Iop_I32StoF64,
13062 unop(Iop_64to32, mkexpr(arg64)) )) );
13064 putXMMRegLane32F(
13065 gregOfRexRM(pfx,modrm), 1,
13066 binop(Iop_F64toF32,
13067 mkexpr(rmode),
13068 unop(Iop_I32StoF64,
13069 unop(Iop_64HIto32, mkexpr(arg64)) )) );
13071 goto decode_success;
13073 /* F3 0F 2A = CVTSI2SS
13074 -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm
13075 -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */
13076 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) {
13077 IRTemp rmode = newTemp(Ity_I32);
13078 assign( rmode, get_sse_roundingmode() );
13079 modrm = getUChar(delta);
13080 if (sz == 4) {
13081 IRTemp arg32 = newTemp(Ity_I32);
13082 if (epartIsReg(modrm)) {
13083 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) );
13084 delta += 1;
13085 DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
13086 nameXMMReg(gregOfRexRM(pfx,modrm)));
13087 } else {
13088 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13089 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
13090 delta += alen;
13091 DIP("cvtsi2ss %s,%s\n", dis_buf,
13092 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13094 putXMMRegLane32F(
13095 gregOfRexRM(pfx,modrm), 0,
13096 binop(Iop_F64toF32,
13097 mkexpr(rmode),
13098 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
13099 } else {
13100 /* sz == 8 */
13101 IRTemp arg64 = newTemp(Ity_I64);
13102 if (epartIsReg(modrm)) {
13103 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) );
13104 delta += 1;
13105 DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
13106 nameXMMReg(gregOfRexRM(pfx,modrm)));
13107 } else {
13108 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13109 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
13110 delta += alen;
13111 DIP("cvtsi2ssq %s,%s\n", dis_buf,
13112 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13114 putXMMRegLane32F(
13115 gregOfRexRM(pfx,modrm), 0,
13116 binop(Iop_F64toF32,
13117 mkexpr(rmode),
13118 binop(Iop_I64StoF64, mkexpr(rmode), mkexpr(arg64)) ) );
13120 goto decode_success;
13122 /* F2 0F 2A = CVTSI2SD
13123 when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm
13124 when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm
13126 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) {
13127 modrm = getUChar(delta);
13128 if (sz == 4) {
13129 IRTemp arg32 = newTemp(Ity_I32);
13130 if (epartIsReg(modrm)) {
13131 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) );
13132 delta += 1;
13133 DIP("cvtsi2sdl %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
13134 nameXMMReg(gregOfRexRM(pfx,modrm)));
13135 } else {
13136 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13137 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
13138 delta += alen;
13139 DIP("cvtsi2sdl %s,%s\n", dis_buf,
13140 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13142 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
13143 unop(Iop_I32StoF64, mkexpr(arg32))
13145 } else {
13146 /* sz == 8 */
13147 IRTemp arg64 = newTemp(Ity_I64);
13148 if (epartIsReg(modrm)) {
13149 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) );
13150 delta += 1;
13151 DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
13152 nameXMMReg(gregOfRexRM(pfx,modrm)));
13153 } else {
13154 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13155 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
13156 delta += alen;
13157 DIP("cvtsi2sdq %s,%s\n", dis_buf,
13158 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13160 putXMMRegLane64F(
13161 gregOfRexRM(pfx,modrm),
13163 binop( Iop_I64StoF64,
13164 get_sse_roundingmode(),
13165 mkexpr(arg64)
13169 goto decode_success;
13171 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
13172 xmm(G) */
13173 if (have66noF2noF3(pfx) && sz == 2) {
13174 IRTemp arg64 = newTemp(Ity_I64);
13176 modrm = getUChar(delta);
13177 if (epartIsReg(modrm)) {
13178 /* Only switch to MMX mode if the source is a MMX register.
13179 This is inconsistent with all other instructions which
13180 convert between XMM and (M64 or MMX), which always switch
13181 to MMX mode even if 64-bit operand is M64 and not MMX. At
13182 least, that's what the Intel docs seem to me to say.
13183 Fixes #210264. */
13184 do_MMX_preamble();
13185 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
13186 delta += 1;
13187 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
13188 nameXMMReg(gregOfRexRM(pfx,modrm)));
13189 } else {
13190 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13191 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
13192 delta += alen;
13193 DIP("cvtpi2pd %s,%s\n", dis_buf,
13194 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13197 putXMMRegLane64F(
13198 gregOfRexRM(pfx,modrm), 0,
13199 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) )
13202 putXMMRegLane64F(
13203 gregOfRexRM(pfx,modrm), 1,
13204 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) )
13207 goto decode_success;
13209 break;
13211 case 0x2B:
13212 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
13213 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
13214 if ( (haveNo66noF2noF3(pfx) && sz == 4)
13215 || (have66noF2noF3(pfx) && sz == 2) ) {
13216 modrm = getUChar(delta);
13217 if (!epartIsReg(modrm)) {
13218 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13219 gen_SIGNAL_if_not_16_aligned( vbi, addr );
13220 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
13221 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
13222 dis_buf,
13223 nameXMMReg(gregOfRexRM(pfx,modrm)));
13224 delta += alen;
13225 goto decode_success;
13227 /* else fall through */
13229 break;
13231 case 0x2C:
13232 case 0x2D:
13233 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
13234 I32 in mmx, according to prevailing SSE rounding mode */
13235 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
13236 I32 in mmx, rounding towards zero */
13237 if (haveNo66noF2noF3(pfx) && sz == 4) {
13238 IRTemp dst64 = newTemp(Ity_I64);
13239 IRTemp rmode = newTemp(Ity_I32);
13240 IRTemp f32lo = newTemp(Ity_F32);
13241 IRTemp f32hi = newTemp(Ity_F32);
13242 Bool r2zero = toBool(opc == 0x2C);
13244 do_MMX_preamble();
13245 modrm = getUChar(delta);
13247 if (epartIsReg(modrm)) {
13248 delta += 1;
13249 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
13250 assign(f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1));
13251 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
13252 nameXMMReg(eregOfRexRM(pfx,modrm)),
13253 nameMMXReg(gregLO3ofRM(modrm)));
13254 } else {
13255 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13256 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
13257 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add64,
13258 mkexpr(addr),
13259 mkU64(4) )));
13260 delta += alen;
13261 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
13262 dis_buf,
13263 nameMMXReg(gregLO3ofRM(modrm)));
13266 if (r2zero) {
13267 assign(rmode, mkU32((UInt)Irrm_ZERO) );
13268 } else {
13269 assign( rmode, get_sse_roundingmode() );
13272 assign(
13273 dst64,
13274 binop( Iop_32HLto64,
13275 binop( Iop_F64toI32S,
13276 mkexpr(rmode),
13277 unop( Iop_F32toF64, mkexpr(f32hi) ) ),
13278 binop( Iop_F64toI32S,
13279 mkexpr(rmode),
13280 unop( Iop_F32toF64, mkexpr(f32lo) ) )
13284 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
13285 goto decode_success;
13287 /* F3 0F 2D = CVTSS2SI
13288 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
13289 according to prevailing SSE rounding mode
13290 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
13291 according to prevailing SSE rounding mode
13293 /* F3 0F 2C = CVTTSS2SI
13294 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
13295 truncating towards zero
13296 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
13297 truncating towards zero
13299 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) {
13300 delta = dis_CVTxSS2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz);
13301 goto decode_success;
13303 /* F2 0F 2D = CVTSD2SI
13304 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
13305 according to prevailing SSE rounding mode
13306 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
13307 according to prevailing SSE rounding mode
13309 /* F2 0F 2C = CVTTSD2SI
13310 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
13311 truncating towards zero
13312 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
13313 truncating towards zero
13315 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) {
13316 delta = dis_CVTxSD2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz);
13317 goto decode_success;
13319 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
13320 I32 in mmx, according to prevailing SSE rounding mode */
13321 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
13322 I32 in mmx, rounding towards zero */
13323 if (have66noF2noF3(pfx) && sz == 2) {
13324 IRTemp dst64 = newTemp(Ity_I64);
13325 IRTemp rmode = newTemp(Ity_I32);
13326 IRTemp f64lo = newTemp(Ity_F64);
13327 IRTemp f64hi = newTemp(Ity_F64);
13328 Bool r2zero = toBool(opc == 0x2C);
13330 do_MMX_preamble();
13331 modrm = getUChar(delta);
13333 if (epartIsReg(modrm)) {
13334 delta += 1;
13335 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
13336 assign(f64hi, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 1));
13337 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "",
13338 nameXMMReg(eregOfRexRM(pfx,modrm)),
13339 nameMMXReg(gregLO3ofRM(modrm)));
13340 } else {
13341 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13342 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
13343 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add64,
13344 mkexpr(addr),
13345 mkU64(8) )));
13346 delta += alen;
13347 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "",
13348 dis_buf,
13349 nameMMXReg(gregLO3ofRM(modrm)));
13352 if (r2zero) {
13353 assign(rmode, mkU32((UInt)Irrm_ZERO) );
13354 } else {
13355 assign( rmode, get_sse_roundingmode() );
13358 assign(
13359 dst64,
13360 binop( Iop_32HLto64,
13361 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ),
13362 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) )
13366 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
13367 goto decode_success;
13369 break;
13371 case 0x2E:
13372 case 0x2F:
13373 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
13374 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
13375 if (have66noF2noF3(pfx) && sz == 2) {
13376 delta = dis_COMISD( vbi, pfx, delta, False/*!isAvx*/, opc );
13377 goto decode_success;
13379 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
13380 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
13381 if (haveNo66noF2noF3(pfx) && sz == 4) {
13382 delta = dis_COMISS( vbi, pfx, delta, False/*!isAvx*/, opc );
13383 goto decode_success;
13385 break;
13387 case 0x50:
13388 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
13389 to 4 lowest bits of ireg(G) */
13390 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
13391 && epartIsReg(getUChar(delta))) {
13392 /* sz == 8 is a kludge to handle insns with REX.W redundantly
13393 set to 1, which has been known to happen:
13395 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d
13397 20071106: Intel docs say that REX.W isn't redundant: when
13398 present, a 64-bit register is written; when not present, only
13399 the 32-bit half is written. However, testing on a Core2
13400 machine suggests the entire 64 bit register is written
13401 irrespective of the status of REX.W. That could be because
13402 of the default rule that says "if the lower half of a 32-bit
13403 register is written, the upper half is zeroed". By using
13404 putIReg32 here we inadvertantly produce the same behaviour as
13405 the Core2, for the same reason -- putIReg32 implements said
13406 rule.
13408 AMD docs give no indication that REX.W is even valid for this
13409 insn. */
13410 delta = dis_MOVMSKPS_128( vbi, pfx, delta, False/*!isAvx*/ );
13411 goto decode_success;
13413 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
13414 2 lowest bits of ireg(G) */
13415 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) {
13416 /* sz == 8 is a kludge to handle insns with REX.W redundantly
13417 set to 1, which has been known to happen:
13418 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d
13419 20071106: see further comments on MOVMSKPS implementation above.
13421 delta = dis_MOVMSKPD_128( vbi, pfx, delta, False/*!isAvx*/ );
13422 goto decode_success;
13424 break;
13426 case 0x51:
13427 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
13428 if (haveF3no66noF2(pfx) && sz == 4) {
13429 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
13430 "sqrtss", Iop_Sqrt32F0x4 );
13431 goto decode_success;
13433 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
13434 if (haveNo66noF2noF3(pfx) && sz == 4) {
13435 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
13436 "sqrtps", Iop_Sqrt32Fx4 );
13437 goto decode_success;
13439 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
13440 if (haveF2no66noF3(pfx) && sz == 4) {
13441 delta = dis_SSE_E_to_G_unary_lo64( vbi, pfx, delta,
13442 "sqrtsd", Iop_Sqrt64F0x2 );
13443 goto decode_success;
13445 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
13446 if (have66noF2noF3(pfx) && sz == 2) {
13447 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
13448 "sqrtpd", Iop_Sqrt64Fx2 );
13449 goto decode_success;
13451 break;
13453 case 0x52:
13454 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
13455 if (haveF3no66noF2(pfx) && sz == 4) {
13456 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
13457 "rsqrtss", Iop_RSqrtEst32F0x4 );
13458 goto decode_success;
13460 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
13461 if (haveNo66noF2noF3(pfx) && sz == 4) {
13462 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
13463 "rsqrtps", Iop_RSqrtEst32Fx4 );
13464 goto decode_success;
13466 break;
13468 case 0x53:
13469 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
13470 if (haveF3no66noF2(pfx) && sz == 4) {
13471 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
13472 "rcpss", Iop_RecipEst32F0x4 );
13473 goto decode_success;
13475 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
13476 if (haveNo66noF2noF3(pfx) && sz == 4) {
13477 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
13478 "rcpps", Iop_RecipEst32Fx4 );
13479 goto decode_success;
13481 break;
13483 case 0x54:
13484 /* 0F 54 = ANDPS -- G = G and E */
13485 if (haveNo66noF2noF3(pfx) && sz == 4) {
13486 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andps", Iop_AndV128 );
13487 goto decode_success;
13489 /* 66 0F 54 = ANDPD -- G = G and E */
13490 if (have66noF2noF3(pfx) && sz == 2) {
13491 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andpd", Iop_AndV128 );
13492 goto decode_success;
13494 break;
13496 case 0x55:
13497 /* 0F 55 = ANDNPS -- G = (not G) and E */
13498 if (haveNo66noF2noF3(pfx) && sz == 4) {
13499 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnps",
13500 Iop_AndV128 );
13501 goto decode_success;
13503 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
13504 if (have66noF2noF3(pfx) && sz == 2) {
13505 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnpd",
13506 Iop_AndV128 );
13507 goto decode_success;
13509 break;
13511 case 0x56:
13512 /* 0F 56 = ORPS -- G = G and E */
13513 if (haveNo66noF2noF3(pfx) && sz == 4) {
13514 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orps", Iop_OrV128 );
13515 goto decode_success;
13517 /* 66 0F 56 = ORPD -- G = G and E */
13518 if (have66noF2noF3(pfx) && sz == 2) {
13519 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orpd", Iop_OrV128 );
13520 goto decode_success;
13522 break;
13524 case 0x57:
13525 /* 66 0F 57 = XORPD -- G = G xor E */
13526 if (have66noF2noF3(pfx) && sz == 2) {
13527 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorpd", Iop_XorV128 );
13528 goto decode_success;
13530 /* 0F 57 = XORPS -- G = G xor E */
13531 if (haveNo66noF2noF3(pfx) && sz == 4) {
13532 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorps", Iop_XorV128 );
13533 goto decode_success;
13535 break;
13537 case 0x58:
13538 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
13539 if (haveNo66noF2noF3(pfx) && sz == 4) {
13540 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addps", Iop_Add32Fx4 );
13541 goto decode_success;
13543 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
13544 if (haveF3no66noF2(pfx) && sz == 4) {
13545 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "addss", Iop_Add32F0x4 );
13546 goto decode_success;
13548 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
13549 if (haveF2no66noF3(pfx)
13550 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13551 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "addsd", Iop_Add64F0x2 );
13552 goto decode_success;
13554 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
13555 if (have66noF2noF3(pfx)
13556 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13557 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addpd", Iop_Add64Fx2 );
13558 goto decode_success;
13560 break;
13562 case 0x59:
13563 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
13564 if (haveF2no66noF3(pfx)
13565 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13566 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "mulsd", Iop_Mul64F0x2 );
13567 goto decode_success;
13569 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
13570 if (haveF3no66noF2(pfx) && sz == 4) {
13571 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "mulss", Iop_Mul32F0x4 );
13572 goto decode_success;
13574 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
13575 if (haveNo66noF2noF3(pfx) && sz == 4) {
13576 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulps", Iop_Mul32Fx4 );
13577 goto decode_success;
13579 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
13580 if (have66noF2noF3(pfx)
13581 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13582 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulpd", Iop_Mul64Fx2 );
13583 goto decode_success;
13585 break;
13587 case 0x5A:
13588 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
13589 F64 in xmm(G). */
13590 if (haveNo66noF2noF3(pfx)
13591 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13592 delta = dis_CVTPS2PD_128( vbi, pfx, delta, False/*!isAvx*/ );
13593 goto decode_success;
13595 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
13596 low half xmm(G) */
13597 if (haveF3no66noF2(pfx) && sz == 4) {
13598 IRTemp f32lo = newTemp(Ity_F32);
13600 modrm = getUChar(delta);
13601 if (epartIsReg(modrm)) {
13602 delta += 1;
13603 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
13604 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13605 nameXMMReg(gregOfRexRM(pfx,modrm)));
13606 } else {
13607 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13608 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
13609 delta += alen;
13610 DIP("cvtss2sd %s,%s\n", dis_buf,
13611 nameXMMReg(gregOfRexRM(pfx,modrm)));
13614 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
13615 unop( Iop_F32toF64, mkexpr(f32lo) ) );
13617 goto decode_success;
13619 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
13620 low 1/4 xmm(G), according to prevailing SSE rounding mode */
13621 if (haveF2no66noF3(pfx) && sz == 4) {
13622 IRTemp rmode = newTemp(Ity_I32);
13623 IRTemp f64lo = newTemp(Ity_F64);
13625 modrm = getUChar(delta);
13626 if (epartIsReg(modrm)) {
13627 delta += 1;
13628 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
13629 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13630 nameXMMReg(gregOfRexRM(pfx,modrm)));
13631 } else {
13632 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13633 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
13634 delta += alen;
13635 DIP("cvtsd2ss %s,%s\n", dis_buf,
13636 nameXMMReg(gregOfRexRM(pfx,modrm)));
13639 assign( rmode, get_sse_roundingmode() );
13640 putXMMRegLane32F(
13641 gregOfRexRM(pfx,modrm), 0,
13642 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) )
13645 goto decode_success;
13647 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
13648 lo half xmm(G), rounding according to prevailing SSE rounding
13649 mode, and zero upper half */
13650 /* Note, this is practically identical to CVTPD2DQ. It would have
13651 be nice to merge them together. */
13652 if (have66noF2noF3(pfx) && sz == 2) {
13653 delta = dis_CVTPD2PS_128( vbi, pfx, delta, False/*!isAvx*/ );
13654 goto decode_success;
13656 break;
13658 case 0x5B:
13659 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
13660 xmm(G), rounding towards zero */
13661 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
13662 xmm(G), as per the prevailing rounding mode */
13663 if ( (have66noF2noF3(pfx) && sz == 2)
13664 || (haveF3no66noF2(pfx) && sz == 4) ) {
13665 Bool r2zero = toBool(sz == 4); // FIXME -- unreliable (???)
13666 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta, False/*!isAvx*/, r2zero );
13667 goto decode_success;
13669 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
13670 xmm(G) */
13671 if (haveNo66noF2noF3(pfx) && sz == 4) {
13672 delta = dis_CVTDQ2PS_128( vbi, pfx, delta, False/*!isAvx*/ );
13673 goto decode_success;
13675 break;
13677 case 0x5C:
13678 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
13679 if (haveF3no66noF2(pfx) && sz == 4) {
13680 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "subss", Iop_Sub32F0x4 );
13681 goto decode_success;
13683 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
13684 if (haveF2no66noF3(pfx)
13685 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13686 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "subsd", Iop_Sub64F0x2 );
13687 goto decode_success;
13689 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
13690 if (haveNo66noF2noF3(pfx) && sz == 4) {
13691 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subps", Iop_Sub32Fx4 );
13692 goto decode_success;
13694 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
13695 if (have66noF2noF3(pfx) && sz == 2) {
13696 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subpd", Iop_Sub64Fx2 );
13697 goto decode_success;
13699 break;
13701 case 0x5D:
13702 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
13703 if (haveNo66noF2noF3(pfx) && sz == 4) {
13704 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minps", Iop_Min32Fx4 );
13705 goto decode_success;
13707 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
13708 if (haveF3no66noF2(pfx) && sz == 4) {
13709 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "minss", Iop_Min32F0x4 );
13710 goto decode_success;
13712 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
13713 if (haveF2no66noF3(pfx)
13714 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13715 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "minsd", Iop_Min64F0x2 );
13716 goto decode_success;
13718 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
13719 if (have66noF2noF3(pfx) && sz == 2) {
13720 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minpd", Iop_Min64Fx2 );
13721 goto decode_success;
13723 break;
13725 case 0x5E:
13726 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
13727 if (haveF2no66noF3(pfx) && sz == 4) {
13728 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "divsd", Iop_Div64F0x2 );
13729 goto decode_success;
13731 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
13732 if (haveNo66noF2noF3(pfx) && sz == 4) {
13733 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divps", Iop_Div32Fx4 );
13734 goto decode_success;
13736 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
13737 if (haveF3no66noF2(pfx) && sz == 4) {
13738 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "divss", Iop_Div32F0x4 );
13739 goto decode_success;
13741 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
13742 if (have66noF2noF3(pfx) && sz == 2) {
13743 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divpd", Iop_Div64Fx2 );
13744 goto decode_success;
13746 break;
13748 case 0x5F:
13749 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
13750 if (haveNo66noF2noF3(pfx) && sz == 4) {
13751 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxps", Iop_Max32Fx4 );
13752 goto decode_success;
13754 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
13755 if (haveF3no66noF2(pfx) && sz == 4) {
13756 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "maxss", Iop_Max32F0x4 );
13757 goto decode_success;
13759 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
13760 if (haveF2no66noF3(pfx)
13761 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13762 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "maxsd", Iop_Max64F0x2 );
13763 goto decode_success;
13765 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
13766 if (have66noF2noF3(pfx) && sz == 2) {
13767 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxpd", Iop_Max64Fx2 );
13768 goto decode_success;
13770 break;
13772 case 0x60:
13773 /* 66 0F 60 = PUNPCKLBW */
13774 if (have66noF2noF3(pfx) && sz == 2) {
13775 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13776 "punpcklbw",
13777 Iop_InterleaveLO8x16, True );
13778 goto decode_success;
13780 break;
13782 case 0x61:
13783 /* 66 0F 61 = PUNPCKLWD */
13784 if (have66noF2noF3(pfx) && sz == 2) {
13785 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13786 "punpcklwd",
13787 Iop_InterleaveLO16x8, True );
13788 goto decode_success;
13790 break;
13792 case 0x62:
13793 /* 66 0F 62 = PUNPCKLDQ */
13794 if (have66noF2noF3(pfx) && sz == 2) {
13795 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13796 "punpckldq",
13797 Iop_InterleaveLO32x4, True );
13798 goto decode_success;
13800 break;
13802 case 0x63:
13803 /* 66 0F 63 = PACKSSWB */
13804 if (have66noF2noF3(pfx) && sz == 2) {
13805 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13806 "packsswb",
13807 Iop_QNarrowBin16Sto8Sx16, True );
13808 goto decode_success;
13810 break;
13812 case 0x64:
13813 /* 66 0F 64 = PCMPGTB */
13814 if (have66noF2noF3(pfx) && sz == 2) {
13815 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13816 "pcmpgtb", Iop_CmpGT8Sx16, False );
13817 goto decode_success;
13819 break;
13821 case 0x65:
13822 /* 66 0F 65 = PCMPGTW */
13823 if (have66noF2noF3(pfx) && sz == 2) {
13824 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13825 "pcmpgtw", Iop_CmpGT16Sx8, False );
13826 goto decode_success;
13828 break;
13830 case 0x66:
13831 /* 66 0F 66 = PCMPGTD */
13832 if (have66noF2noF3(pfx) && sz == 2) {
13833 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13834 "pcmpgtd", Iop_CmpGT32Sx4, False );
13835 goto decode_success;
13837 break;
13839 case 0x67:
13840 /* 66 0F 67 = PACKUSWB */
13841 if (have66noF2noF3(pfx) && sz == 2) {
13842 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13843 "packuswb",
13844 Iop_QNarrowBin16Sto8Ux16, True );
13845 goto decode_success;
13847 break;
13849 case 0x68:
13850 /* 66 0F 68 = PUNPCKHBW */
13851 if (have66noF2noF3(pfx) && sz == 2) {
13852 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13853 "punpckhbw",
13854 Iop_InterleaveHI8x16, True );
13855 goto decode_success;
13857 break;
13859 case 0x69:
13860 /* 66 0F 69 = PUNPCKHWD */
13861 if (have66noF2noF3(pfx) && sz == 2) {
13862 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13863 "punpckhwd",
13864 Iop_InterleaveHI16x8, True );
13865 goto decode_success;
13867 break;
13869 case 0x6A:
13870 /* 66 0F 6A = PUNPCKHDQ */
13871 if (have66noF2noF3(pfx) && sz == 2) {
13872 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13873 "punpckhdq",
13874 Iop_InterleaveHI32x4, True );
13875 goto decode_success;
13877 break;
13879 case 0x6B:
13880 /* 66 0F 6B = PACKSSDW */
13881 if (have66noF2noF3(pfx) && sz == 2) {
13882 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13883 "packssdw",
13884 Iop_QNarrowBin32Sto16Sx8, True );
13885 goto decode_success;
13887 break;
13889 case 0x6C:
13890 /* 66 0F 6C = PUNPCKLQDQ */
13891 if (have66noF2noF3(pfx) && sz == 2) {
13892 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13893 "punpcklqdq",
13894 Iop_InterleaveLO64x2, True );
13895 goto decode_success;
13897 break;
13899 case 0x6D:
13900 /* 66 0F 6D = PUNPCKHQDQ */
13901 if (have66noF2noF3(pfx) && sz == 2) {
13902 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13903 "punpckhqdq",
13904 Iop_InterleaveHI64x2, True );
13905 goto decode_success;
13907 break;
13909 case 0x6E:
13910 /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4,
13911 zeroing high 3/4 of xmm. */
13912 /* or from ireg64/m64 to xmm lo 1/2,
13913 zeroing high 1/2 of xmm. */
13914 if (have66noF2noF3(pfx)) {
13915 vassert(sz == 2 || sz == 8);
13916 if (sz == 2) sz = 4;
13917 modrm = getUChar(delta);
13918 if (epartIsReg(modrm)) {
13919 delta += 1;
13920 if (sz == 4) {
13921 putXMMReg(
13922 gregOfRexRM(pfx,modrm),
13923 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) )
13925 DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
13926 nameXMMReg(gregOfRexRM(pfx,modrm)));
13927 } else {
13928 putXMMReg(
13929 gregOfRexRM(pfx,modrm),
13930 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) )
13932 DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
13933 nameXMMReg(gregOfRexRM(pfx,modrm)));
13935 } else {
13936 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
13937 delta += alen;
13938 putXMMReg(
13939 gregOfRexRM(pfx,modrm),
13940 sz == 4
13941 ? unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) )
13942 : unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)) )
13944 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', dis_buf,
13945 nameXMMReg(gregOfRexRM(pfx,modrm)));
13947 goto decode_success;
13949 break;
13951 case 0x6F:
13952 if (have66noF2noF3(pfx)
13953 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13954 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
13955 modrm = getUChar(delta);
13956 if (epartIsReg(modrm)) {
13957 putXMMReg( gregOfRexRM(pfx,modrm),
13958 getXMMReg( eregOfRexRM(pfx,modrm) ));
13959 DIP("movdqa %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13960 nameXMMReg(gregOfRexRM(pfx,modrm)));
13961 delta += 1;
13962 } else {
13963 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13964 gen_SIGNAL_if_not_16_aligned( vbi, addr );
13965 putXMMReg( gregOfRexRM(pfx,modrm),
13966 loadLE(Ity_V128, mkexpr(addr)) );
13967 DIP("movdqa %s,%s\n", dis_buf,
13968 nameXMMReg(gregOfRexRM(pfx,modrm)));
13969 delta += alen;
13971 goto decode_success;
13973 if (haveF3no66noF2(pfx)
13974 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13975 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
13976 modrm = getUChar(delta);
13977 if (epartIsReg(modrm)) {
13978 putXMMReg( gregOfRexRM(pfx,modrm),
13979 getXMMReg( eregOfRexRM(pfx,modrm) ));
13980 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13981 nameXMMReg(gregOfRexRM(pfx,modrm)));
13982 delta += 1;
13983 } else {
13984 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13985 putXMMReg( gregOfRexRM(pfx,modrm),
13986 loadLE(Ity_V128, mkexpr(addr)) );
13987 DIP("movdqu %s,%s\n", dis_buf,
13988 nameXMMReg(gregOfRexRM(pfx,modrm)));
13989 delta += alen;
13991 goto decode_success;
13993 break;
13995 case 0x70:
13996 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
13997 if (have66noF2noF3(pfx) && sz == 2) {
13998 delta = dis_PSHUFD_32x4( vbi, pfx, delta, False/*!writesYmm*/);
13999 goto decode_success;
14001 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14002 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
14003 if (haveNo66noF2noF3(pfx) && sz == 4) {
14004 Int order;
14005 IRTemp sV, dV, s3, s2, s1, s0;
14006 s3 = s2 = s1 = s0 = IRTemp_INVALID;
14007 sV = newTemp(Ity_I64);
14008 dV = newTemp(Ity_I64);
14009 do_MMX_preamble();
14010 modrm = getUChar(delta);
14011 if (epartIsReg(modrm)) {
14012 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
14013 order = (Int)getUChar(delta+1);
14014 delta += 1+1;
14015 DIP("pshufw $%d,%s,%s\n", order,
14016 nameMMXReg(eregLO3ofRM(modrm)),
14017 nameMMXReg(gregLO3ofRM(modrm)));
14018 } else {
14019 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
14020 1/*extra byte after amode*/ );
14021 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
14022 order = (Int)getUChar(delta+alen);
14023 delta += 1+alen;
14024 DIP("pshufw $%d,%s,%s\n", order,
14025 dis_buf,
14026 nameMMXReg(gregLO3ofRM(modrm)));
14028 breakup64to16s( sV, &s3, &s2, &s1, &s0 );
14029 # define SEL(n) \
14030 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
14031 assign(dV,
14032 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
14033 SEL((order>>2)&3), SEL((order>>0)&3) )
14035 putMMXReg(gregLO3ofRM(modrm), mkexpr(dV));
14036 # undef SEL
14037 goto decode_success;
14039 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
14040 mem) to G(xmm), and copy upper half */
14041 if (haveF2no66noF3(pfx) && sz == 4) {
14042 delta = dis_PSHUFxW_128( vbi, pfx, delta,
14043 False/*!isAvx*/, False/*!xIsH*/ );
14044 goto decode_success;
14046 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
14047 mem) to G(xmm), and copy lower half */
14048 if (haveF3no66noF2(pfx) && sz == 4) {
14049 delta = dis_PSHUFxW_128( vbi, pfx, delta,
14050 False/*!isAvx*/, True/*xIsH*/ );
14051 goto decode_success;
14053 break;
14055 case 0x71:
14056 /* 66 0F 71 /2 ib = PSRLW by immediate */
14057 if (have66noF2noF3(pfx) && sz == 2
14058 && epartIsReg(getUChar(delta))
14059 && gregLO3ofRM(getUChar(delta)) == 2) {
14060 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlw", Iop_ShrN16x8 );
14061 goto decode_success;
14063 /* 66 0F 71 /4 ib = PSRAW by immediate */
14064 if (have66noF2noF3(pfx) && sz == 2
14065 && epartIsReg(getUChar(delta))
14066 && gregLO3ofRM(getUChar(delta)) == 4) {
14067 delta = dis_SSE_shiftE_imm( pfx, delta, "psraw", Iop_SarN16x8 );
14068 goto decode_success;
14070 /* 66 0F 71 /6 ib = PSLLW by immediate */
14071 if (have66noF2noF3(pfx) && sz == 2
14072 && epartIsReg(getUChar(delta))
14073 && gregLO3ofRM(getUChar(delta)) == 6) {
14074 delta = dis_SSE_shiftE_imm( pfx, delta, "psllw", Iop_ShlN16x8 );
14075 goto decode_success;
14077 break;
14079 case 0x72:
14080 /* 66 0F 72 /2 ib = PSRLD by immediate */
14081 if (have66noF2noF3(pfx) && sz == 2
14082 && epartIsReg(getUChar(delta))
14083 && gregLO3ofRM(getUChar(delta)) == 2) {
14084 delta = dis_SSE_shiftE_imm( pfx, delta, "psrld", Iop_ShrN32x4 );
14085 goto decode_success;
14087 /* 66 0F 72 /4 ib = PSRAD by immediate */
14088 if (have66noF2noF3(pfx) && sz == 2
14089 && epartIsReg(getUChar(delta))
14090 && gregLO3ofRM(getUChar(delta)) == 4) {
14091 delta = dis_SSE_shiftE_imm( pfx, delta, "psrad", Iop_SarN32x4 );
14092 goto decode_success;
14094 /* 66 0F 72 /6 ib = PSLLD by immediate */
14095 if (have66noF2noF3(pfx) && sz == 2
14096 && epartIsReg(getUChar(delta))
14097 && gregLO3ofRM(getUChar(delta)) == 6) {
14098 delta = dis_SSE_shiftE_imm( pfx, delta, "pslld", Iop_ShlN32x4 );
14099 goto decode_success;
14101 break;
14103 case 0x73:
14104 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
14105 /* note, if mem case ever filled in, 1 byte after amode */
14106 if (have66noF2noF3(pfx) && sz == 2
14107 && epartIsReg(getUChar(delta))
14108 && gregLO3ofRM(getUChar(delta)) == 3) {
14109 Int imm = (Int)getUChar(delta+1);
14110 Int reg = eregOfRexRM(pfx,getUChar(delta));
14111 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg));
14112 delta += 2;
14113 IRTemp sV = newTemp(Ity_V128);
14114 assign( sV, getXMMReg(reg) );
14115 putXMMReg(reg, mkexpr(math_PSRLDQ( sV, imm )));
14116 goto decode_success;
14118 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
14119 /* note, if mem case ever filled in, 1 byte after amode */
14120 if (have66noF2noF3(pfx) && sz == 2
14121 && epartIsReg(getUChar(delta))
14122 && gregLO3ofRM(getUChar(delta)) == 7) {
14123 Int imm = (Int)getUChar(delta+1);
14124 Int reg = eregOfRexRM(pfx,getUChar(delta));
14125 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg));
14126 vassert(imm >= 0 && imm <= 255);
14127 delta += 2;
14128 IRTemp sV = newTemp(Ity_V128);
14129 assign( sV, getXMMReg(reg) );
14130 putXMMReg(reg, mkexpr(math_PSLLDQ( sV, imm )));
14131 goto decode_success;
14133 /* 66 0F 73 /2 ib = PSRLQ by immediate */
14134 if (have66noF2noF3(pfx) && sz == 2
14135 && epartIsReg(getUChar(delta))
14136 && gregLO3ofRM(getUChar(delta)) == 2) {
14137 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlq", Iop_ShrN64x2 );
14138 goto decode_success;
14140 /* 66 0F 73 /6 ib = PSLLQ by immediate */
14141 if (have66noF2noF3(pfx) && sz == 2
14142 && epartIsReg(getUChar(delta))
14143 && gregLO3ofRM(getUChar(delta)) == 6) {
14144 delta = dis_SSE_shiftE_imm( pfx, delta, "psllq", Iop_ShlN64x2 );
14145 goto decode_success;
14147 break;
14149 case 0x74:
14150 /* 66 0F 74 = PCMPEQB */
14151 if (have66noF2noF3(pfx) && sz == 2) {
14152 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14153 "pcmpeqb", Iop_CmpEQ8x16, False );
14154 goto decode_success;
14156 break;
14158 case 0x75:
14159 /* 66 0F 75 = PCMPEQW */
14160 if (have66noF2noF3(pfx) && sz == 2) {
14161 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14162 "pcmpeqw", Iop_CmpEQ16x8, False );
14163 goto decode_success;
14165 break;
14167 case 0x76:
14168 /* 66 0F 76 = PCMPEQD */
14169 if (have66noF2noF3(pfx) && sz == 2) {
14170 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14171 "pcmpeqd", Iop_CmpEQ32x4, False );
14172 goto decode_success;
14174 break;
14176 case 0x7E:
14177 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
14178 G (lo half xmm). Upper half of G is zeroed out. */
14179 if (haveF3no66noF2(pfx)
14180 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
14181 modrm = getUChar(delta);
14182 if (epartIsReg(modrm)) {
14183 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
14184 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
14185 /* zero bits 127:64 */
14186 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkU64(0) );
14187 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
14188 nameXMMReg(gregOfRexRM(pfx,modrm)));
14189 delta += 1;
14190 } else {
14191 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14192 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
14193 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
14194 loadLE(Ity_I64, mkexpr(addr)) );
14195 DIP("movsd %s,%s\n", dis_buf,
14196 nameXMMReg(gregOfRexRM(pfx,modrm)));
14197 delta += alen;
14199 goto decode_success;
14201 /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */
14202 /* or from xmm low 1/2 to ireg64 or m64. */
14203 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) {
14204 if (sz == 2) sz = 4;
14205 modrm = getUChar(delta);
14206 if (epartIsReg(modrm)) {
14207 delta += 1;
14208 if (sz == 4) {
14209 putIReg32( eregOfRexRM(pfx,modrm),
14210 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) );
14211 DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
14212 nameIReg32(eregOfRexRM(pfx,modrm)));
14213 } else {
14214 putIReg64( eregOfRexRM(pfx,modrm),
14215 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) );
14216 DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
14217 nameIReg64(eregOfRexRM(pfx,modrm)));
14219 } else {
14220 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
14221 delta += alen;
14222 storeLE( mkexpr(addr),
14223 sz == 4
14224 ? getXMMRegLane32(gregOfRexRM(pfx,modrm),0)
14225 : getXMMRegLane64(gregOfRexRM(pfx,modrm),0) );
14226 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q',
14227 nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
14229 goto decode_success;
14231 break;
14233 case 0x7F:
14234 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
14235 if (haveF3no66noF2(pfx)
14236 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
14237 modrm = getUChar(delta);
14238 if (epartIsReg(modrm)) {
14239 goto decode_failure; /* awaiting test case */
14240 delta += 1;
14241 putXMMReg( eregOfRexRM(pfx,modrm),
14242 getXMMReg(gregOfRexRM(pfx,modrm)) );
14243 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
14244 nameXMMReg(eregOfRexRM(pfx,modrm)));
14245 } else {
14246 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
14247 delta += alen;
14248 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
14249 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
14251 goto decode_success;
14253 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
14254 if (have66noF2noF3(pfx) && sz == 2) {
14255 modrm = getUChar(delta);
14256 if (epartIsReg(modrm)) {
14257 delta += 1;
14258 putXMMReg( eregOfRexRM(pfx,modrm),
14259 getXMMReg(gregOfRexRM(pfx,modrm)) );
14260 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
14261 nameXMMReg(eregOfRexRM(pfx,modrm)));
14262 } else {
14263 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
14264 gen_SIGNAL_if_not_16_aligned( vbi, addr );
14265 delta += alen;
14266 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
14267 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
14269 goto decode_success;
14271 break;
14273 case 0xAE:
14274 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
14275 if (haveNo66noF2noF3(pfx)
14276 && epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7
14277 && sz == 4) {
14278 delta += 1;
14279 /* Insert a memory fence. It's sometimes important that these
14280 are carried through to the generated code. */
14281 stmt( IRStmt_MBE(Imbe_Fence) );
14282 DIP("sfence\n");
14283 goto decode_success;
14285 /* mindless duplication follows .. */
14286 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
14287 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
14288 if (haveNo66noF2noF3(pfx)
14289 && epartIsReg(getUChar(delta))
14290 && (gregLO3ofRM(getUChar(delta)) == 5
14291 || gregLO3ofRM(getUChar(delta)) == 6)
14292 && sz == 4) {
14293 delta += 1;
14294 /* Insert a memory fence. It's sometimes important that these
14295 are carried through to the generated code. */
14296 stmt( IRStmt_MBE(Imbe_Fence) );
14297 DIP("%sfence\n", gregLO3ofRM(getUChar(delta-1))==5 ? "l" : "m");
14298 goto decode_success;
14301 /* 0F AE /7 = CLFLUSH -- flush cache line */
14302 if (haveNo66noF2noF3(pfx)
14303 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7
14304 && sz == 4) {
14306 /* This is something of a hack. We need to know the size of
14307 the cache line containing addr. Since we don't (easily),
14308 assume 256 on the basis that no real cache would have a
14309 line that big. It's safe to invalidate more stuff than we
14310 need, just inefficient. */
14311 ULong lineszB = 256ULL;
14313 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14314 delta += alen;
14316 /* Round addr down to the start of the containing block. */
14317 stmt( IRStmt_Put(
14318 OFFB_CMSTART,
14319 binop( Iop_And64,
14320 mkexpr(addr),
14321 mkU64( ~(lineszB-1) ))) );
14323 stmt( IRStmt_Put(OFFB_CMLEN, mkU64(lineszB) ) );
14325 jmp_lit(dres, Ijk_InvalICache, (Addr64)(guest_RIP_bbstart+delta));
14327 DIP("clflush %s\n", dis_buf);
14328 goto decode_success;
14331 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
14332 if (haveNo66noF2noF3(pfx)
14333 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3
14334 && sz == 4) {
14335 delta = dis_STMXCSR(vbi, pfx, delta, False/*!isAvx*/);
14336 goto decode_success;
14338 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
14339 if (haveNo66noF2noF3(pfx)
14340 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2
14341 && sz == 4) {
14342 delta = dis_LDMXCSR(vbi, pfx, delta, False/*!isAvx*/);
14343 goto decode_success;
14345 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */
14346 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
14347 && !epartIsReg(getUChar(delta))
14348 && gregOfRexRM(pfx,getUChar(delta)) == 0) {
14349 delta = dis_FXSAVE(vbi, pfx, delta, sz);
14350 goto decode_success;
14352 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */
14353 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
14354 && !epartIsReg(getUChar(delta))
14355 && gregOfRexRM(pfx,getUChar(delta)) == 1) {
14356 delta = dis_FXRSTOR(vbi, pfx, delta, sz);
14357 goto decode_success;
14359 /* 0F AE /4 = XSAVE mem -- write x87, SSE, AVX state to memory */
14360 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
14361 && !epartIsReg(getUChar(delta))
14362 && gregOfRexRM(pfx,getUChar(delta)) == 4
14363 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
14364 delta = dis_XSAVE(vbi, pfx, delta, sz);
14365 goto decode_success;
14367 /* 0F AE /5 = XRSTOR mem -- read x87, SSE, AVX state from memory */
14368 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
14369 && !epartIsReg(getUChar(delta))
14370 && gregOfRexRM(pfx,getUChar(delta)) == 5
14371 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
14372 delta = dis_XRSTOR(vbi, pfx, delta, sz);
14373 goto decode_success;
14375 break;
14377 case 0xC2:
14378 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
14379 if (haveNo66noF2noF3(pfx) && sz == 4) {
14380 Long delta0 = delta;
14381 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpps", True, 4 );
14382 if (delta > delta0) goto decode_success;
14384 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
14385 if (haveF3no66noF2(pfx) && sz == 4) {
14386 Long delta0 = delta;
14387 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpss", False, 4 );
14388 if (delta > delta0) goto decode_success;
14390 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
14391 if (haveF2no66noF3(pfx) && sz == 4) {
14392 Long delta0 = delta;
14393 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpsd", False, 8 );
14394 if (delta > delta0) goto decode_success;
14396 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
14397 if (have66noF2noF3(pfx) && sz == 2) {
14398 Long delta0 = delta;
14399 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmppd", True, 8 );
14400 if (delta > delta0) goto decode_success;
14402 break;
14404 case 0xC3:
14405 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
14406 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) {
14407 modrm = getUChar(delta);
14408 if (!epartIsReg(modrm)) {
14409 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14410 storeLE( mkexpr(addr), getIRegG(sz, pfx, modrm) );
14411 DIP("movnti %s,%s\n", dis_buf,
14412 nameIRegG(sz, pfx, modrm));
14413 delta += alen;
14414 goto decode_success;
14416 /* else fall through */
14418 break;
14420 case 0xC4:
14421 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14422 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
14423 put it into the specified lane of mmx(G). */
14424 if (haveNo66noF2noF3(pfx)
14425 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
14426 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
14427 mmx reg. t4 is the new lane value. t5 is the original
14428 mmx value. t6 is the new mmx value. */
14429 Int lane;
14430 t4 = newTemp(Ity_I16);
14431 t5 = newTemp(Ity_I64);
14432 t6 = newTemp(Ity_I64);
14433 modrm = getUChar(delta);
14434 do_MMX_preamble();
14436 assign(t5, getMMXReg(gregLO3ofRM(modrm)));
14437 breakup64to16s( t5, &t3, &t2, &t1, &t0 );
14439 if (epartIsReg(modrm)) {
14440 assign(t4, getIReg16(eregOfRexRM(pfx,modrm)));
14441 delta += 1+1;
14442 lane = getUChar(delta-1);
14443 DIP("pinsrw $%d,%s,%s\n", lane,
14444 nameIReg16(eregOfRexRM(pfx,modrm)),
14445 nameMMXReg(gregLO3ofRM(modrm)));
14446 } else {
14447 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
14448 delta += 1+alen;
14449 lane = getUChar(delta-1);
14450 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
14451 DIP("pinsrw $%d,%s,%s\n", lane,
14452 dis_buf,
14453 nameMMXReg(gregLO3ofRM(modrm)));
14456 switch (lane & 3) {
14457 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break;
14458 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break;
14459 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break;
14460 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break;
14461 default: vassert(0);
14463 putMMXReg(gregLO3ofRM(modrm), mkexpr(t6));
14464 goto decode_success;
14466 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
14467 put it into the specified lane of xmm(G). */
14468 if (have66noF2noF3(pfx)
14469 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
14470 Int lane;
14471 t4 = newTemp(Ity_I16);
14472 modrm = getUChar(delta);
14473 UInt rG = gregOfRexRM(pfx,modrm);
14474 if (epartIsReg(modrm)) {
14475 UInt rE = eregOfRexRM(pfx,modrm);
14476 assign(t4, getIReg16(rE));
14477 delta += 1+1;
14478 lane = getUChar(delta-1);
14479 DIP("pinsrw $%d,%s,%s\n",
14480 lane, nameIReg16(rE), nameXMMReg(rG));
14481 } else {
14482 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
14483 1/*byte after the amode*/ );
14484 delta += 1+alen;
14485 lane = getUChar(delta-1);
14486 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
14487 DIP("pinsrw $%d,%s,%s\n",
14488 lane, dis_buf, nameXMMReg(rG));
14490 IRTemp src_vec = newTemp(Ity_V128);
14491 assign(src_vec, getXMMReg(rG));
14492 IRTemp res_vec = math_PINSRW_128( src_vec, t4, lane & 7);
14493 putXMMReg(rG, mkexpr(res_vec));
14494 goto decode_success;
14496 break;
14498 case 0xC5:
14499 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14500 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
14501 zero-extend of it in ireg(G). */
14502 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) {
14503 modrm = getUChar(delta);
14504 if (epartIsReg(modrm)) {
14505 IRTemp sV = newTemp(Ity_I64);
14506 t5 = newTemp(Ity_I16);
14507 do_MMX_preamble();
14508 assign(sV, getMMXReg(eregLO3ofRM(modrm)));
14509 breakup64to16s( sV, &t3, &t2, &t1, &t0 );
14510 switch (getUChar(delta+1) & 3) {
14511 case 0: assign(t5, mkexpr(t0)); break;
14512 case 1: assign(t5, mkexpr(t1)); break;
14513 case 2: assign(t5, mkexpr(t2)); break;
14514 case 3: assign(t5, mkexpr(t3)); break;
14515 default: vassert(0);
14517 if (sz == 8)
14518 putIReg64(gregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(t5)));
14519 else
14520 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t5)));
14521 DIP("pextrw $%d,%s,%s\n",
14522 (Int)getUChar(delta+1),
14523 nameMMXReg(eregLO3ofRM(modrm)),
14524 sz==8 ? nameIReg64(gregOfRexRM(pfx,modrm))
14525 : nameIReg32(gregOfRexRM(pfx,modrm))
14527 delta += 2;
14528 goto decode_success;
14530 /* else fall through */
14531 /* note, for anyone filling in the mem case: this insn has one
14532 byte after the amode and therefore you must pass 1 as the
14533 last arg to disAMode */
14535 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
14536 zero-extend of it in ireg(G). */
14537 if (have66noF2noF3(pfx)
14538 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
14539 Long delta0 = delta;
14540 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta,
14541 False/*!isAvx*/ );
14542 if (delta > delta0) goto decode_success;
14543 /* else fall through -- decoding has failed */
14545 break;
14547 case 0xC6:
14548 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
14549 if (haveNo66noF2noF3(pfx) && sz == 4) {
14550 Int imm8 = 0;
14551 IRTemp sV = newTemp(Ity_V128);
14552 IRTemp dV = newTemp(Ity_V128);
14553 modrm = getUChar(delta);
14554 UInt rG = gregOfRexRM(pfx,modrm);
14555 assign( dV, getXMMReg(rG) );
14556 if (epartIsReg(modrm)) {
14557 UInt rE = eregOfRexRM(pfx,modrm);
14558 assign( sV, getXMMReg(rE) );
14559 imm8 = (Int)getUChar(delta+1);
14560 delta += 1+1;
14561 DIP("shufps $%d,%s,%s\n", imm8, nameXMMReg(rE), nameXMMReg(rG));
14562 } else {
14563 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
14564 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
14565 imm8 = (Int)getUChar(delta+alen);
14566 delta += 1+alen;
14567 DIP("shufps $%d,%s,%s\n", imm8, dis_buf, nameXMMReg(rG));
14569 IRTemp res = math_SHUFPS_128( sV, dV, imm8 );
14570 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
14571 goto decode_success;
14573 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
14574 if (have66noF2noF3(pfx) && sz == 2) {
14575 Int select;
14576 IRTemp sV = newTemp(Ity_V128);
14577 IRTemp dV = newTemp(Ity_V128);
14579 modrm = getUChar(delta);
14580 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
14582 if (epartIsReg(modrm)) {
14583 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
14584 select = (Int)getUChar(delta+1);
14585 delta += 1+1;
14586 DIP("shufpd $%d,%s,%s\n", select,
14587 nameXMMReg(eregOfRexRM(pfx,modrm)),
14588 nameXMMReg(gregOfRexRM(pfx,modrm)));
14589 } else {
14590 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
14591 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
14592 select = getUChar(delta+alen);
14593 delta += 1+alen;
14594 DIP("shufpd $%d,%s,%s\n", select,
14595 dis_buf,
14596 nameXMMReg(gregOfRexRM(pfx,modrm)));
14599 IRTemp res = math_SHUFPD_128( sV, dV, select );
14600 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
14601 goto decode_success;
14603 break;
14605 case 0xD1:
14606 /* 66 0F D1 = PSRLW by E */
14607 if (have66noF2noF3(pfx) && sz == 2) {
14608 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlw", Iop_ShrN16x8 );
14609 goto decode_success;
14611 break;
14613 case 0xD2:
14614 /* 66 0F D2 = PSRLD by E */
14615 if (have66noF2noF3(pfx) && sz == 2) {
14616 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrld", Iop_ShrN32x4 );
14617 goto decode_success;
14619 break;
14621 case 0xD3:
14622 /* 66 0F D3 = PSRLQ by E */
14623 if (have66noF2noF3(pfx) && sz == 2) {
14624 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlq", Iop_ShrN64x2 );
14625 goto decode_success;
14627 break;
14629 case 0xD4:
14630 /* 66 0F D4 = PADDQ */
14631 if (have66noF2noF3(pfx) && sz == 2) {
14632 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14633 "paddq", Iop_Add64x2, False );
14634 goto decode_success;
14636 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
14637 /* 0F D4 = PADDQ -- add 64x1 */
14638 if (haveNo66noF2noF3(pfx) && sz == 4) {
14639 do_MMX_preamble();
14640 delta = dis_MMXop_regmem_to_reg (
14641 vbi, pfx, delta, opc, "paddq", False );
14642 goto decode_success;
14644 break;
14646 case 0xD5:
14647 /* 66 0F D5 = PMULLW -- 16x8 multiply */
14648 if (have66noF2noF3(pfx) && sz == 2) {
14649 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14650 "pmullw", Iop_Mul16x8, False );
14651 goto decode_success;
14653 break;
14655 case 0xD6:
14656 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
14657 hi half). */
14658 if (haveF3no66noF2(pfx) && sz == 4) {
14659 modrm = getUChar(delta);
14660 if (epartIsReg(modrm)) {
14661 do_MMX_preamble();
14662 putXMMReg( gregOfRexRM(pfx,modrm),
14663 unop(Iop_64UtoV128, getMMXReg( eregLO3ofRM(modrm) )) );
14664 DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
14665 nameXMMReg(gregOfRexRM(pfx,modrm)));
14666 delta += 1;
14667 goto decode_success;
14669 /* apparently no mem case for this insn */
14671 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
14672 or lo half xmm). */
14673 if (have66noF2noF3(pfx)
14674 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
14675 modrm = getUChar(delta);
14676 if (epartIsReg(modrm)) {
14677 /* fall through, awaiting test case */
14678 /* dst: lo half copied, hi half zeroed */
14679 } else {
14680 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14681 storeLE( mkexpr(addr),
14682 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 ));
14683 DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf );
14684 delta += alen;
14685 goto decode_success;
14688 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
14689 if (haveF2no66noF3(pfx) && sz == 4) {
14690 modrm = getUChar(delta);
14691 if (epartIsReg(modrm)) {
14692 do_MMX_preamble();
14693 putMMXReg( gregLO3ofRM(modrm),
14694 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
14695 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
14696 nameMMXReg(gregLO3ofRM(modrm)));
14697 delta += 1;
14698 goto decode_success;
14700 /* apparently no mem case for this insn */
14702 break;
14704 case 0xD7:
14705 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16
14706 lanes in xmm(E), turn them into a byte, and put
14707 zero-extend of it in ireg(G). Doing this directly is just
14708 too cumbersome; give up therefore and call a helper. */
14709 if (have66noF2noF3(pfx)
14710 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
14711 && epartIsReg(getUChar(delta))) { /* no memory case, it seems */
14712 delta = dis_PMOVMSKB_128( vbi, pfx, delta, False/*!isAvx*/ );
14713 goto decode_success;
14715 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14716 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
14717 mmx(E), turn them into a byte, and put zero-extend of it in
14718 ireg(G). */
14719 if (haveNo66noF2noF3(pfx)
14720 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
14721 modrm = getUChar(delta);
14722 if (epartIsReg(modrm)) {
14723 do_MMX_preamble();
14724 t0 = newTemp(Ity_I64);
14725 t1 = newTemp(Ity_I32);
14726 assign(t0, getMMXReg(eregLO3ofRM(modrm)));
14727 assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0))));
14728 putIReg32(gregOfRexRM(pfx,modrm), mkexpr(t1));
14729 DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
14730 nameIReg32(gregOfRexRM(pfx,modrm)));
14731 delta += 1;
14732 goto decode_success;
14734 /* else fall through */
14736 break;
14738 case 0xD8:
14739 /* 66 0F D8 = PSUBUSB */
14740 if (have66noF2noF3(pfx) && sz == 2) {
14741 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14742 "psubusb", Iop_QSub8Ux16, False );
14743 goto decode_success;
14745 break;
14747 case 0xD9:
14748 /* 66 0F D9 = PSUBUSW */
14749 if (have66noF2noF3(pfx) && sz == 2) {
14750 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14751 "psubusw", Iop_QSub16Ux8, False );
14752 goto decode_success;
14754 break;
14756 case 0xDA:
14757 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14758 /* 0F DA = PMINUB -- 8x8 unsigned min */
14759 if (haveNo66noF2noF3(pfx) && sz == 4) {
14760 do_MMX_preamble();
14761 delta = dis_MMXop_regmem_to_reg (
14762 vbi, pfx, delta, opc, "pminub", False );
14763 goto decode_success;
14765 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
14766 if (have66noF2noF3(pfx) && sz == 2) {
14767 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14768 "pminub", Iop_Min8Ux16, False );
14769 goto decode_success;
14771 break;
14773 case 0xDB:
14774 /* 66 0F DB = PAND */
14775 if (have66noF2noF3(pfx) && sz == 2) {
14776 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pand", Iop_AndV128 );
14777 goto decode_success;
14779 break;
14781 case 0xDC:
14782 /* 66 0F DC = PADDUSB */
14783 if (have66noF2noF3(pfx) && sz == 2) {
14784 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14785 "paddusb", Iop_QAdd8Ux16, False );
14786 goto decode_success;
14788 break;
14790 case 0xDD:
14791 /* 66 0F DD = PADDUSW */
14792 if (have66noF2noF3(pfx) && sz == 2) {
14793 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14794 "paddusw", Iop_QAdd16Ux8, False );
14795 goto decode_success;
14797 break;
14799 case 0xDE:
14800 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14801 /* 0F DE = PMAXUB -- 8x8 unsigned max */
14802 if (haveNo66noF2noF3(pfx) && sz == 4) {
14803 do_MMX_preamble();
14804 delta = dis_MMXop_regmem_to_reg (
14805 vbi, pfx, delta, opc, "pmaxub", False );
14806 goto decode_success;
14808 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
14809 if (have66noF2noF3(pfx) && sz == 2) {
14810 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14811 "pmaxub", Iop_Max8Ux16, False );
14812 goto decode_success;
14814 break;
14816 case 0xDF:
14817 /* 66 0F DF = PANDN */
14818 if (have66noF2noF3(pfx) && sz == 2) {
14819 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "pandn", Iop_AndV128 );
14820 goto decode_success;
14822 break;
14824 case 0xE0:
14825 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14826 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
14827 if (haveNo66noF2noF3(pfx) && sz == 4) {
14828 do_MMX_preamble();
14829 delta = dis_MMXop_regmem_to_reg (
14830 vbi, pfx, delta, opc, "pavgb", False );
14831 goto decode_success;
14833 /* 66 0F E0 = PAVGB */
14834 if (have66noF2noF3(pfx) && sz == 2) {
14835 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14836 "pavgb", Iop_Avg8Ux16, False );
14837 goto decode_success;
14839 break;
14841 case 0xE1:
14842 /* 66 0F E1 = PSRAW by E */
14843 if (have66noF2noF3(pfx) && sz == 2) {
14844 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psraw", Iop_SarN16x8 );
14845 goto decode_success;
14847 break;
14849 case 0xE2:
14850 /* 66 0F E2 = PSRAD by E */
14851 if (have66noF2noF3(pfx) && sz == 2) {
14852 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrad", Iop_SarN32x4 );
14853 goto decode_success;
14855 break;
14857 case 0xE3:
14858 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14859 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
14860 if (haveNo66noF2noF3(pfx) && sz == 4) {
14861 do_MMX_preamble();
14862 delta = dis_MMXop_regmem_to_reg (
14863 vbi, pfx, delta, opc, "pavgw", False );
14864 goto decode_success;
14866 /* 66 0F E3 = PAVGW */
14867 if (have66noF2noF3(pfx) && sz == 2) {
14868 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14869 "pavgw", Iop_Avg16Ux8, False );
14870 goto decode_success;
14872 break;
14874 case 0xE4:
14875 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14876 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
14877 if (haveNo66noF2noF3(pfx) && sz == 4) {
14878 do_MMX_preamble();
14879 delta = dis_MMXop_regmem_to_reg (
14880 vbi, pfx, delta, opc, "pmuluh", False );
14881 goto decode_success;
14883 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
14884 if (have66noF2noF3(pfx) && sz == 2) {
14885 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14886 "pmulhuw", Iop_MulHi16Ux8, False );
14887 goto decode_success;
14889 break;
14891 case 0xE5:
14892 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
14893 if (have66noF2noF3(pfx) && sz == 2) {
14894 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14895 "pmulhw", Iop_MulHi16Sx8, False );
14896 goto decode_success;
14898 break;
14900 case 0xE6:
14901 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
14902 lo half xmm(G), and zero upper half, rounding towards zero */
14903 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
14904 lo half xmm(G), according to prevailing rounding mode, and zero
14905 upper half */
14906 if ( (haveF2no66noF3(pfx) && sz == 4)
14907 || (have66noF2noF3(pfx) && sz == 2) ) {
14908 delta = dis_CVTxPD2DQ_128( vbi, pfx, delta, False/*!isAvx*/,
14909 toBool(sz == 2)/*r2zero*/);
14910 goto decode_success;
14912 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
14913 F64 in xmm(G) */
14914 if (haveF3no66noF2(pfx) && sz == 4) {
14915 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, False/*!isAvx*/);
14916 goto decode_success;
14918 break;
14920 case 0xE7:
14921 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14922 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
14923 Intel manual does not say anything about the usual business of
14924 the FP reg tags getting trashed whenever an MMX insn happens.
14925 So we just leave them alone.
14927 if (haveNo66noF2noF3(pfx) && sz == 4) {
14928 modrm = getUChar(delta);
14929 if (!epartIsReg(modrm)) {
14930 /* do_MMX_preamble(); Intel docs don't specify this */
14931 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14932 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
14933 DIP("movntq %s,%s\n", dis_buf,
14934 nameMMXReg(gregLO3ofRM(modrm)));
14935 delta += alen;
14936 goto decode_success;
14938 /* else fall through */
14940 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
14941 if (have66noF2noF3(pfx) && sz == 2) {
14942 modrm = getUChar(delta);
14943 if (!epartIsReg(modrm)) {
14944 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14945 gen_SIGNAL_if_not_16_aligned( vbi, addr );
14946 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
14947 DIP("movntdq %s,%s\n", dis_buf,
14948 nameXMMReg(gregOfRexRM(pfx,modrm)));
14949 delta += alen;
14950 goto decode_success;
14952 /* else fall through */
14954 break;
14956 case 0xE8:
14957 /* 66 0F E8 = PSUBSB */
14958 if (have66noF2noF3(pfx) && sz == 2) {
14959 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14960 "psubsb", Iop_QSub8Sx16, False );
14961 goto decode_success;
14963 break;
14965 case 0xE9:
14966 /* 66 0F E9 = PSUBSW */
14967 if (have66noF2noF3(pfx) && sz == 2) {
14968 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14969 "psubsw", Iop_QSub16Sx8, False );
14970 goto decode_success;
14972 break;
14974 case 0xEA:
14975 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14976 /* 0F EA = PMINSW -- 16x4 signed min */
14977 if (haveNo66noF2noF3(pfx) && sz == 4) {
14978 do_MMX_preamble();
14979 delta = dis_MMXop_regmem_to_reg (
14980 vbi, pfx, delta, opc, "pminsw", False );
14981 goto decode_success;
14983 /* 66 0F EA = PMINSW -- 16x8 signed min */
14984 if (have66noF2noF3(pfx) && sz == 2) {
14985 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14986 "pminsw", Iop_Min16Sx8, False );
14987 goto decode_success;
14989 break;
14991 case 0xEB:
14992 /* 66 0F EB = POR */
14993 if (have66noF2noF3(pfx) && sz == 2) {
14994 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "por", Iop_OrV128 );
14995 goto decode_success;
14997 break;
14999 case 0xEC:
15000 /* 66 0F EC = PADDSB */
15001 if (have66noF2noF3(pfx) && sz == 2) {
15002 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15003 "paddsb", Iop_QAdd8Sx16, False );
15004 goto decode_success;
15006 break;
15008 case 0xED:
15009 /* 66 0F ED = PADDSW */
15010 if (have66noF2noF3(pfx) && sz == 2) {
15011 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15012 "paddsw", Iop_QAdd16Sx8, False );
15013 goto decode_success;
15015 break;
15017 case 0xEE:
15018 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
15019 /* 0F EE = PMAXSW -- 16x4 signed max */
15020 if (haveNo66noF2noF3(pfx) && sz == 4) {
15021 do_MMX_preamble();
15022 delta = dis_MMXop_regmem_to_reg (
15023 vbi, pfx, delta, opc, "pmaxsw", False );
15024 goto decode_success;
15026 /* 66 0F EE = PMAXSW -- 16x8 signed max */
15027 if (have66noF2noF3(pfx) && sz == 2) {
15028 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15029 "pmaxsw", Iop_Max16Sx8, False );
15030 goto decode_success;
15032 break;
15034 case 0xEF:
15035 /* 66 0F EF = PXOR */
15036 if (have66noF2noF3(pfx) && sz == 2) {
15037 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pxor", Iop_XorV128 );
15038 goto decode_success;
15040 break;
15042 case 0xF1:
15043 /* 66 0F F1 = PSLLW by E */
15044 if (have66noF2noF3(pfx) && sz == 2) {
15045 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllw", Iop_ShlN16x8 );
15046 goto decode_success;
15048 break;
15050 case 0xF2:
15051 /* 66 0F F2 = PSLLD by E */
15052 if (have66noF2noF3(pfx) && sz == 2) {
15053 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "pslld", Iop_ShlN32x4 );
15054 goto decode_success;
15056 break;
15058 case 0xF3:
15059 /* 66 0F F3 = PSLLQ by E */
15060 if (have66noF2noF3(pfx) && sz == 2) {
15061 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllq", Iop_ShlN64x2 );
15062 goto decode_success;
15064 break;
15066 case 0xF4:
15067 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
15068 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
15069 half */
15070 if (have66noF2noF3(pfx) && sz == 2) {
15071 IRTemp sV = newTemp(Ity_V128);
15072 IRTemp dV = newTemp(Ity_V128);
15073 modrm = getUChar(delta);
15074 UInt rG = gregOfRexRM(pfx,modrm);
15075 assign( dV, getXMMReg(rG) );
15076 if (epartIsReg(modrm)) {
15077 UInt rE = eregOfRexRM(pfx,modrm);
15078 assign( sV, getXMMReg(rE) );
15079 delta += 1;
15080 DIP("pmuludq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15081 } else {
15082 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15083 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15084 delta += alen;
15085 DIP("pmuludq %s,%s\n", dis_buf, nameXMMReg(rG));
15087 putXMMReg( rG, mkexpr(math_PMULUDQ_128( sV, dV )) );
15088 goto decode_success;
15090 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
15091 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
15092 0 to form 64-bit result */
15093 if (haveNo66noF2noF3(pfx) && sz == 4) {
15094 IRTemp sV = newTemp(Ity_I64);
15095 IRTemp dV = newTemp(Ity_I64);
15096 t1 = newTemp(Ity_I32);
15097 t0 = newTemp(Ity_I32);
15098 modrm = getUChar(delta);
15100 do_MMX_preamble();
15101 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15103 if (epartIsReg(modrm)) {
15104 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15105 delta += 1;
15106 DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
15107 nameMMXReg(gregLO3ofRM(modrm)));
15108 } else {
15109 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15110 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15111 delta += alen;
15112 DIP("pmuludq %s,%s\n", dis_buf,
15113 nameMMXReg(gregLO3ofRM(modrm)));
15116 assign( t0, unop(Iop_64to32, mkexpr(dV)) );
15117 assign( t1, unop(Iop_64to32, mkexpr(sV)) );
15118 putMMXReg( gregLO3ofRM(modrm),
15119 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) );
15120 goto decode_success;
15122 break;
15124 case 0xF5:
15125 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
15126 E(xmm or mem) to G(xmm) */
15127 if (have66noF2noF3(pfx) && sz == 2) {
15128 IRTemp sV = newTemp(Ity_V128);
15129 IRTemp dV = newTemp(Ity_V128);
15130 modrm = getUChar(delta);
15131 UInt rG = gregOfRexRM(pfx,modrm);
15132 if (epartIsReg(modrm)) {
15133 UInt rE = eregOfRexRM(pfx,modrm);
15134 assign( sV, getXMMReg(rE) );
15135 delta += 1;
15136 DIP("pmaddwd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15137 } else {
15138 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15139 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15140 delta += alen;
15141 DIP("pmaddwd %s,%s\n", dis_buf, nameXMMReg(rG));
15143 assign( dV, getXMMReg(rG) );
15144 putXMMReg( rG, mkexpr(math_PMADDWD_128(dV, sV)) );
15145 goto decode_success;
15147 break;
15149 case 0xF6:
15150 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
15151 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
15152 if (haveNo66noF2noF3(pfx) && sz == 4) {
15153 do_MMX_preamble();
15154 delta = dis_MMXop_regmem_to_reg (
15155 vbi, pfx, delta, opc, "psadbw", False );
15156 goto decode_success;
15158 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
15159 from E(xmm or mem) to G(xmm) */
15160 if (have66noF2noF3(pfx) && sz == 2) {
15161 IRTemp sV = newTemp(Ity_V128);
15162 IRTemp dV = newTemp(Ity_V128);
15163 modrm = getUChar(delta);
15164 UInt rG = gregOfRexRM(pfx,modrm);
15165 if (epartIsReg(modrm)) {
15166 UInt rE = eregOfRexRM(pfx,modrm);
15167 assign( sV, getXMMReg(rE) );
15168 delta += 1;
15169 DIP("psadbw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15170 } else {
15171 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15172 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15173 delta += alen;
15174 DIP("psadbw %s,%s\n", dis_buf, nameXMMReg(rG));
15176 assign( dV, getXMMReg(rG) );
15177 putXMMReg( rG, mkexpr( math_PSADBW_128 ( dV, sV ) ) );
15179 goto decode_success;
15181 break;
15183 case 0xF7:
15184 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
15185 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
15186 if (haveNo66noF2noF3(pfx) && sz == 4) {
15187 Bool ok = False;
15188 delta = dis_MMX( &ok, vbi, pfx, sz, delta-1 );
15189 if (ok) goto decode_success;
15191 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
15192 if (have66noF2noF3(pfx) && sz == 2 && epartIsReg(getUChar(delta))) {
15193 delta = dis_MASKMOVDQU( vbi, pfx, delta, False/*!isAvx*/ );
15194 goto decode_success;
15196 break;
15198 case 0xF8:
15199 /* 66 0F F8 = PSUBB */
15200 if (have66noF2noF3(pfx) && sz == 2) {
15201 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15202 "psubb", Iop_Sub8x16, False );
15203 goto decode_success;
15205 break;
15207 case 0xF9:
15208 /* 66 0F F9 = PSUBW */
15209 if (have66noF2noF3(pfx) && sz == 2) {
15210 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15211 "psubw", Iop_Sub16x8, False );
15212 goto decode_success;
15214 break;
15216 case 0xFA:
15217 /* 66 0F FA = PSUBD */
15218 if (have66noF2noF3(pfx) && sz == 2) {
15219 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15220 "psubd", Iop_Sub32x4, False );
15221 goto decode_success;
15223 break;
15225 case 0xFB:
15226 /* 66 0F FB = PSUBQ */
15227 if (have66noF2noF3(pfx) && sz == 2) {
15228 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15229 "psubq", Iop_Sub64x2, False );
15230 goto decode_success;
15232 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
15233 /* 0F FB = PSUBQ -- sub 64x1 */
15234 if (haveNo66noF2noF3(pfx) && sz == 4) {
15235 do_MMX_preamble();
15236 delta = dis_MMXop_regmem_to_reg (
15237 vbi, pfx, delta, opc, "psubq", False );
15238 goto decode_success;
15240 break;
15242 case 0xFC:
15243 /* 66 0F FC = PADDB */
15244 if (have66noF2noF3(pfx) && sz == 2) {
15245 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15246 "paddb", Iop_Add8x16, False );
15247 goto decode_success;
15249 break;
15251 case 0xFD:
15252 /* 66 0F FD = PADDW */
15253 if (have66noF2noF3(pfx) && sz == 2) {
15254 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15255 "paddw", Iop_Add16x8, False );
15256 goto decode_success;
15258 break;
15260 case 0xFE:
15261 /* 66 0F FE = PADDD */
15262 if (have66noF2noF3(pfx) && sz == 2) {
15263 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15264 "paddd", Iop_Add32x4, False );
15265 goto decode_success;
15267 break;
15269 default:
15270 goto decode_failure;
15274 decode_failure:
15275 *decode_OK = False;
15276 return deltaIN;
15278 decode_success:
15279 *decode_OK = True;
15280 return delta;
15284 /*------------------------------------------------------------*/
15285 /*--- ---*/
15286 /*--- Top-level SSE3 (not SupSSE3): dis_ESC_0F__SSE3 ---*/
15287 /*--- ---*/
15288 /*------------------------------------------------------------*/
15290 static Long dis_MOVDDUP_128 ( const VexAbiInfo* vbi, Prefix pfx,
15291 Long delta, Bool isAvx )
15293 IRTemp addr = IRTemp_INVALID;
15294 Int alen = 0;
15295 HChar dis_buf[50];
15296 IRTemp sV = newTemp(Ity_V128);
15297 IRTemp d0 = newTemp(Ity_I64);
15298 UChar modrm = getUChar(delta);
15299 UInt rG = gregOfRexRM(pfx,modrm);
15300 if (epartIsReg(modrm)) {
15301 UInt rE = eregOfRexRM(pfx,modrm);
15302 assign( sV, getXMMReg(rE) );
15303 DIP("%smovddup %s,%s\n",
15304 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
15305 delta += 1;
15306 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) );
15307 } else {
15308 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15309 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
15310 DIP("%smovddup %s,%s\n",
15311 isAvx ? "v" : "", dis_buf, nameXMMReg(rG));
15312 delta += alen;
15314 (isAvx ? putYMMRegLoAndZU : putXMMReg)
15315 ( rG, binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) );
15316 return delta;
15320 static Long dis_MOVDDUP_256 ( const VexAbiInfo* vbi, Prefix pfx,
15321 Long delta )
15323 IRTemp addr = IRTemp_INVALID;
15324 Int alen = 0;
15325 HChar dis_buf[50];
15326 IRTemp d0 = newTemp(Ity_I64);
15327 IRTemp d1 = newTemp(Ity_I64);
15328 UChar modrm = getUChar(delta);
15329 UInt rG = gregOfRexRM(pfx,modrm);
15330 if (epartIsReg(modrm)) {
15331 UInt rE = eregOfRexRM(pfx,modrm);
15332 DIP("vmovddup %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
15333 delta += 1;
15334 assign ( d0, getYMMRegLane64(rE, 0) );
15335 assign ( d1, getYMMRegLane64(rE, 2) );
15336 } else {
15337 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15338 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
15339 assign( d1, loadLE(Ity_I64, binop(Iop_Add64,
15340 mkexpr(addr), mkU64(16))) );
15341 DIP("vmovddup %s,%s\n", dis_buf, nameYMMReg(rG));
15342 delta += alen;
15344 putYMMRegLane64( rG, 0, mkexpr(d0) );
15345 putYMMRegLane64( rG, 1, mkexpr(d0) );
15346 putYMMRegLane64( rG, 2, mkexpr(d1) );
15347 putYMMRegLane64( rG, 3, mkexpr(d1) );
15348 return delta;
15352 static Long dis_MOVSxDUP_128 ( const VexAbiInfo* vbi, Prefix pfx,
15353 Long delta, Bool isAvx, Bool isL )
15355 IRTemp addr = IRTemp_INVALID;
15356 Int alen = 0;
15357 HChar dis_buf[50];
15358 IRTemp sV = newTemp(Ity_V128);
15359 UChar modrm = getUChar(delta);
15360 UInt rG = gregOfRexRM(pfx,modrm);
15361 IRTemp s3, s2, s1, s0;
15362 s3 = s2 = s1 = s0 = IRTemp_INVALID;
15363 if (epartIsReg(modrm)) {
15364 UInt rE = eregOfRexRM(pfx,modrm);
15365 assign( sV, getXMMReg(rE) );
15366 DIP("%smovs%cdup %s,%s\n",
15367 isAvx ? "v" : "", isL ? 'l' : 'h', nameXMMReg(rE), nameXMMReg(rG));
15368 delta += 1;
15369 } else {
15370 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15371 if (!isAvx)
15372 gen_SIGNAL_if_not_16_aligned( vbi, addr );
15373 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15374 DIP("%smovs%cdup %s,%s\n",
15375 isAvx ? "v" : "", isL ? 'l' : 'h', dis_buf, nameXMMReg(rG));
15376 delta += alen;
15378 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
15379 (isAvx ? putYMMRegLoAndZU : putXMMReg)
15380 ( rG, isL ? mkV128from32s( s2, s2, s0, s0 )
15381 : mkV128from32s( s3, s3, s1, s1 ) );
15382 return delta;
15386 static Long dis_MOVSxDUP_256 ( const VexAbiInfo* vbi, Prefix pfx,
15387 Long delta, Bool isL )
15389 IRTemp addr = IRTemp_INVALID;
15390 Int alen = 0;
15391 HChar dis_buf[50];
15392 IRTemp sV = newTemp(Ity_V256);
15393 UChar modrm = getUChar(delta);
15394 UInt rG = gregOfRexRM(pfx,modrm);
15395 IRTemp s7, s6, s5, s4, s3, s2, s1, s0;
15396 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
15397 if (epartIsReg(modrm)) {
15398 UInt rE = eregOfRexRM(pfx,modrm);
15399 assign( sV, getYMMReg(rE) );
15400 DIP("vmovs%cdup %s,%s\n",
15401 isL ? 'l' : 'h', nameYMMReg(rE), nameYMMReg(rG));
15402 delta += 1;
15403 } else {
15404 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15405 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
15406 DIP("vmovs%cdup %s,%s\n",
15407 isL ? 'l' : 'h', dis_buf, nameYMMReg(rG));
15408 delta += alen;
15410 breakupV256to32s( sV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
15411 putYMMRegLane128( rG, 1, isL ? mkV128from32s( s6, s6, s4, s4 )
15412 : mkV128from32s( s7, s7, s5, s5 ) );
15413 putYMMRegLane128( rG, 0, isL ? mkV128from32s( s2, s2, s0, s0 )
15414 : mkV128from32s( s3, s3, s1, s1 ) );
15415 return delta;
15419 static IRTemp math_HADDPS_128 ( IRTemp dV, IRTemp sV, Bool isAdd )
15421 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
15422 IRTemp leftV = newTemp(Ity_V128);
15423 IRTemp rightV = newTemp(Ity_V128);
15424 IRTemp rm = newTemp(Ity_I32);
15425 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
15427 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
15428 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
15430 assign( leftV, mkV128from32s( s2, s0, d2, d0 ) );
15431 assign( rightV, mkV128from32s( s3, s1, d3, d1 ) );
15433 IRTemp res = newTemp(Ity_V128);
15434 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
15435 assign( res, triop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
15436 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
15437 return res;
15441 static IRTemp math_HADDPD_128 ( IRTemp dV, IRTemp sV, Bool isAdd )
15443 IRTemp s1, s0, d1, d0;
15444 IRTemp leftV = newTemp(Ity_V128);
15445 IRTemp rightV = newTemp(Ity_V128);
15446 IRTemp rm = newTemp(Ity_I32);
15447 s1 = s0 = d1 = d0 = IRTemp_INVALID;
15449 breakupV128to64s( sV, &s1, &s0 );
15450 breakupV128to64s( dV, &d1, &d0 );
15452 assign( leftV, binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) );
15453 assign( rightV, binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) );
15455 IRTemp res = newTemp(Ity_V128);
15456 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
15457 assign( res, triop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
15458 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
15459 return res;
15463 __attribute__((noinline))
15464 static
15465 Long dis_ESC_0F__SSE3 ( Bool* decode_OK,
15466 const VexAbiInfo* vbi,
15467 Prefix pfx, Int sz, Long deltaIN )
15469 IRTemp addr = IRTemp_INVALID;
15470 UChar modrm = 0;
15471 Int alen = 0;
15472 HChar dis_buf[50];
15474 *decode_OK = False;
15476 Long delta = deltaIN;
15477 UChar opc = getUChar(delta);
15478 delta++;
15479 switch (opc) {
15481 case 0x12:
15482 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
15483 duplicating some lanes (2:2:0:0). */
15484 if (haveF3no66noF2(pfx) && sz == 4) {
15485 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/,
15486 True/*isL*/ );
15487 goto decode_success;
15489 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
15490 duplicating some lanes (0:1:0:1). */
15491 if (haveF2no66noF3(pfx)
15492 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
15493 delta = dis_MOVDDUP_128( vbi, pfx, delta, False/*!isAvx*/ );
15494 goto decode_success;
15496 break;
15498 case 0x16:
15499 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
15500 duplicating some lanes (3:3:1:1). */
15501 if (haveF3no66noF2(pfx) && sz == 4) {
15502 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/,
15503 False/*!isL*/ );
15504 goto decode_success;
15506 break;
15508 case 0x7C:
15509 case 0x7D:
15510 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
15511 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
15512 if (haveF2no66noF3(pfx) && sz == 4) {
15513 IRTemp eV = newTemp(Ity_V128);
15514 IRTemp gV = newTemp(Ity_V128);
15515 Bool isAdd = opc == 0x7C;
15516 const HChar* str = isAdd ? "add" : "sub";
15517 modrm = getUChar(delta);
15518 UInt rG = gregOfRexRM(pfx,modrm);
15519 if (epartIsReg(modrm)) {
15520 UInt rE = eregOfRexRM(pfx,modrm);
15521 assign( eV, getXMMReg(rE) );
15522 DIP("h%sps %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG));
15523 delta += 1;
15524 } else {
15525 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15526 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
15527 DIP("h%sps %s,%s\n", str, dis_buf, nameXMMReg(rG));
15528 delta += alen;
15531 assign( gV, getXMMReg(rG) );
15532 putXMMReg( rG, mkexpr( math_HADDPS_128 ( gV, eV, isAdd ) ) );
15533 goto decode_success;
15535 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
15536 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
15537 if (have66noF2noF3(pfx) && sz == 2) {
15538 IRTemp eV = newTemp(Ity_V128);
15539 IRTemp gV = newTemp(Ity_V128);
15540 Bool isAdd = opc == 0x7C;
15541 const HChar* str = isAdd ? "add" : "sub";
15542 modrm = getUChar(delta);
15543 UInt rG = gregOfRexRM(pfx,modrm);
15544 if (epartIsReg(modrm)) {
15545 UInt rE = eregOfRexRM(pfx,modrm);
15546 assign( eV, getXMMReg(rE) );
15547 DIP("h%spd %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG));
15548 delta += 1;
15549 } else {
15550 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15551 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
15552 DIP("h%spd %s,%s\n", str, dis_buf, nameXMMReg(rG));
15553 delta += alen;
15556 assign( gV, getXMMReg(rG) );
15557 putXMMReg( rG, mkexpr( math_HADDPD_128 ( gV, eV, isAdd ) ) );
15558 goto decode_success;
15560 break;
15562 case 0xD0:
15563 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
15564 if (have66noF2noF3(pfx) && sz == 2) {
15565 IRTemp eV = newTemp(Ity_V128);
15566 IRTemp gV = newTemp(Ity_V128);
15567 modrm = getUChar(delta);
15568 UInt rG = gregOfRexRM(pfx,modrm);
15569 if (epartIsReg(modrm)) {
15570 UInt rE = eregOfRexRM(pfx,modrm);
15571 assign( eV, getXMMReg(rE) );
15572 DIP("addsubpd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15573 delta += 1;
15574 } else {
15575 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15576 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
15577 DIP("addsubpd %s,%s\n", dis_buf, nameXMMReg(rG));
15578 delta += alen;
15581 assign( gV, getXMMReg(rG) );
15582 putXMMReg( rG, mkexpr( math_ADDSUBPD_128 ( gV, eV ) ) );
15583 goto decode_success;
15585 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
15586 if (haveF2no66noF3(pfx) && sz == 4) {
15587 IRTemp eV = newTemp(Ity_V128);
15588 IRTemp gV = newTemp(Ity_V128);
15589 modrm = getUChar(delta);
15590 UInt rG = gregOfRexRM(pfx,modrm);
15592 modrm = getUChar(delta);
15593 if (epartIsReg(modrm)) {
15594 UInt rE = eregOfRexRM(pfx,modrm);
15595 assign( eV, getXMMReg(rE) );
15596 DIP("addsubps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15597 delta += 1;
15598 } else {
15599 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15600 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
15601 DIP("addsubps %s,%s\n", dis_buf, nameXMMReg(rG));
15602 delta += alen;
15605 assign( gV, getXMMReg(rG) );
15606 putXMMReg( rG, mkexpr( math_ADDSUBPS_128 ( gV, eV ) ) );
15607 goto decode_success;
15609 break;
15611 case 0xF0:
15612 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
15613 if (haveF2no66noF3(pfx) && sz == 4) {
15614 modrm = getUChar(delta);
15615 if (epartIsReg(modrm)) {
15616 goto decode_failure;
15617 } else {
15618 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15619 putXMMReg( gregOfRexRM(pfx,modrm),
15620 loadLE(Ity_V128, mkexpr(addr)) );
15621 DIP("lddqu %s,%s\n", dis_buf,
15622 nameXMMReg(gregOfRexRM(pfx,modrm)));
15623 delta += alen;
15625 goto decode_success;
15627 break;
15629 default:
15630 goto decode_failure;
15634 decode_failure:
15635 *decode_OK = False;
15636 return deltaIN;
15638 decode_success:
15639 *decode_OK = True;
15640 return delta;
15644 /*------------------------------------------------------------*/
15645 /*--- ---*/
15646 /*--- Top-level SSSE3: dis_ESC_0F38__SupSSE3 ---*/
15647 /*--- ---*/
15648 /*------------------------------------------------------------*/
15650 static
15651 IRTemp math_PSHUFB_XMM ( IRTemp dV/*data to perm*/, IRTemp sV/*perm*/ )
15653 IRTemp halfMask = newTemp(Ity_I64);
15654 assign(halfMask, mkU64(0x8F8F8F8F8F8F8F8FULL));
15655 IRExpr* mask = binop(Iop_64HLtoV128, mkexpr(halfMask), mkexpr(halfMask));
15656 IRTemp res = newTemp(Ity_V128);
15657 assign(res,
15658 binop(Iop_PermOrZero8x16,
15659 mkexpr(dV),
15660 // Mask off bits [6:3] of each source operand lane
15661 binop(Iop_AndV128, mkexpr(sV), mask)
15663 return res;
15667 static
15668 IRTemp math_PSHUFB_YMM ( IRTemp dV/*data to perm*/, IRTemp sV/*perm*/ )
15670 IRTemp sHi, sLo, dHi, dLo;
15671 sHi = sLo = dHi = dLo = IRTemp_INVALID;
15672 breakupV256toV128s( dV, &dHi, &dLo);
15673 breakupV256toV128s( sV, &sHi, &sLo);
15674 IRTemp res = newTemp(Ity_V256);
15675 assign(res, binop(Iop_V128HLtoV256,
15676 mkexpr(math_PSHUFB_XMM(dHi, sHi)),
15677 mkexpr(math_PSHUFB_XMM(dLo, sLo))));
15678 return res;
15682 static Long dis_PHADD_128 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
15683 Bool isAvx, UChar opc )
15685 IRTemp addr = IRTemp_INVALID;
15686 Int alen = 0;
15687 HChar dis_buf[50];
15688 const HChar* str = "???";
15689 IROp opV64 = Iop_INVALID;
15690 IROp opCatO = Iop_CatOddLanes16x4;
15691 IROp opCatE = Iop_CatEvenLanes16x4;
15692 IRTemp sV = newTemp(Ity_V128);
15693 IRTemp dV = newTemp(Ity_V128);
15694 IRTemp sHi = newTemp(Ity_I64);
15695 IRTemp sLo = newTemp(Ity_I64);
15696 IRTemp dHi = newTemp(Ity_I64);
15697 IRTemp dLo = newTemp(Ity_I64);
15698 UChar modrm = getUChar(delta);
15699 UInt rG = gregOfRexRM(pfx,modrm);
15700 UInt rV = isAvx ? getVexNvvvv(pfx) : rG;
15702 switch (opc) {
15703 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
15704 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
15705 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
15706 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
15707 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
15708 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
15709 default: vassert(0);
15711 if (opc == 0x02 || opc == 0x06) {
15712 opCatO = Iop_InterleaveHI32x2;
15713 opCatE = Iop_InterleaveLO32x2;
15716 assign( dV, getXMMReg(rV) );
15718 if (epartIsReg(modrm)) {
15719 UInt rE = eregOfRexRM(pfx,modrm);
15720 assign( sV, getXMMReg(rE) );
15721 DIP("%sph%s %s,%s\n", isAvx ? "v" : "", str,
15722 nameXMMReg(rE), nameXMMReg(rG));
15723 delta += 1;
15724 } else {
15725 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15726 if (!isAvx)
15727 gen_SIGNAL_if_not_16_aligned( vbi, addr );
15728 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15729 DIP("%sph%s %s,%s\n", isAvx ? "v" : "", str,
15730 dis_buf, nameXMMReg(rG));
15731 delta += alen;
15734 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
15735 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
15736 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
15737 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
15739 /* This isn't a particularly efficient way to compute the
15740 result, but at least it avoids a proliferation of IROps,
15741 hence avoids complication all the backends. */
15743 (isAvx ? putYMMRegLoAndZU : putXMMReg)
15744 ( rG,
15745 binop(Iop_64HLtoV128,
15746 binop(opV64,
15747 binop(opCatE,mkexpr(sHi),mkexpr(sLo)),
15748 binop(opCatO,mkexpr(sHi),mkexpr(sLo)) ),
15749 binop(opV64,
15750 binop(opCatE,mkexpr(dHi),mkexpr(dLo)),
15751 binop(opCatO,mkexpr(dHi),mkexpr(dLo)) ) ) );
15752 return delta;
15756 static Long dis_PHADD_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
15757 UChar opc )
15759 IRTemp addr = IRTemp_INVALID;
15760 Int alen = 0;
15761 HChar dis_buf[50];
15762 const HChar* str = "???";
15763 IROp opV64 = Iop_INVALID;
15764 IROp opCatO = Iop_CatOddLanes16x4;
15765 IROp opCatE = Iop_CatEvenLanes16x4;
15766 IRTemp sV = newTemp(Ity_V256);
15767 IRTemp dV = newTemp(Ity_V256);
15768 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
15769 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
15770 UChar modrm = getUChar(delta);
15771 UInt rG = gregOfRexRM(pfx,modrm);
15772 UInt rV = getVexNvvvv(pfx);
15774 switch (opc) {
15775 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
15776 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
15777 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
15778 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
15779 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
15780 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
15781 default: vassert(0);
15783 if (opc == 0x02 || opc == 0x06) {
15784 opCatO = Iop_InterleaveHI32x2;
15785 opCatE = Iop_InterleaveLO32x2;
15788 assign( dV, getYMMReg(rV) );
15790 if (epartIsReg(modrm)) {
15791 UInt rE = eregOfRexRM(pfx,modrm);
15792 assign( sV, getYMMReg(rE) );
15793 DIP("vph%s %s,%s\n", str, nameYMMReg(rE), nameYMMReg(rG));
15794 delta += 1;
15795 } else {
15796 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15797 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
15798 DIP("vph%s %s,%s\n", str, dis_buf, nameYMMReg(rG));
15799 delta += alen;
15802 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
15803 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
15805 /* This isn't a particularly efficient way to compute the
15806 result, but at least it avoids a proliferation of IROps,
15807 hence avoids complication all the backends. */
15809 putYMMReg( rG,
15810 binop(Iop_V128HLtoV256,
15811 binop(Iop_64HLtoV128,
15812 binop(opV64,
15813 binop(opCatE,mkexpr(s3),mkexpr(s2)),
15814 binop(opCatO,mkexpr(s3),mkexpr(s2)) ),
15815 binop(opV64,
15816 binop(opCatE,mkexpr(d3),mkexpr(d2)),
15817 binop(opCatO,mkexpr(d3),mkexpr(d2)) ) ),
15818 binop(Iop_64HLtoV128,
15819 binop(opV64,
15820 binop(opCatE,mkexpr(s1),mkexpr(s0)),
15821 binop(opCatO,mkexpr(s1),mkexpr(s0)) ),
15822 binop(opV64,
15823 binop(opCatE,mkexpr(d1),mkexpr(d0)),
15824 binop(opCatO,mkexpr(d1),mkexpr(d0)) ) ) ) );
15825 return delta;
15829 static IRTemp math_PMADDUBSW_128 ( IRTemp dV, IRTemp sV )
15831 IRTemp res = newTemp(Ity_V128);
15832 assign(res, binop(Iop_PwExtUSMulQAdd8x16, mkexpr(dV), mkexpr(sV)));
15833 return res;
15837 static
15838 IRTemp math_PMADDUBSW_256 ( IRTemp dV, IRTemp sV )
15840 IRTemp sHi, sLo, dHi, dLo;
15841 sHi = sLo = dHi = dLo = IRTemp_INVALID;
15842 breakupV256toV128s( dV, &dHi, &dLo);
15843 breakupV256toV128s( sV, &sHi, &sLo);
15844 IRTemp res = newTemp(Ity_V256);
15845 assign(res, binop(Iop_V128HLtoV256,
15846 mkexpr(math_PMADDUBSW_128(dHi, sHi)),
15847 mkexpr(math_PMADDUBSW_128(dLo, sLo))));
15848 return res;
15852 __attribute__((noinline))
15853 static
15854 Long dis_ESC_0F38__SupSSE3 ( Bool* decode_OK,
15855 const VexAbiInfo* vbi,
15856 Prefix pfx, Int sz, Long deltaIN )
15858 IRTemp addr = IRTemp_INVALID;
15859 UChar modrm = 0;
15860 Int alen = 0;
15861 HChar dis_buf[50];
15863 *decode_OK = False;
15865 Long delta = deltaIN;
15866 UChar opc = getUChar(delta);
15867 delta++;
15868 switch (opc) {
15870 case 0x00:
15871 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
15872 if (have66noF2noF3(pfx)
15873 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
15874 IRTemp sV = newTemp(Ity_V128);
15875 IRTemp dV = newTemp(Ity_V128);
15877 modrm = getUChar(delta);
15878 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
15880 if (epartIsReg(modrm)) {
15881 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
15882 delta += 1;
15883 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
15884 nameXMMReg(gregOfRexRM(pfx,modrm)));
15885 } else {
15886 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15887 gen_SIGNAL_if_not_16_aligned( vbi, addr );
15888 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15889 delta += alen;
15890 DIP("pshufb %s,%s\n", dis_buf,
15891 nameXMMReg(gregOfRexRM(pfx,modrm)));
15894 IRTemp res = math_PSHUFB_XMM( dV, sV );
15895 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(res));
15896 goto decode_success;
15898 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
15899 if (haveNo66noF2noF3(pfx) && sz == 4) {
15900 IRTemp sV = newTemp(Ity_I64);
15901 IRTemp dV = newTemp(Ity_I64);
15903 modrm = getUChar(delta);
15904 do_MMX_preamble();
15905 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15907 if (epartIsReg(modrm)) {
15908 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15909 delta += 1;
15910 DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
15911 nameMMXReg(gregLO3ofRM(modrm)));
15912 } else {
15913 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15914 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15915 delta += alen;
15916 DIP("pshufb %s,%s\n", dis_buf,
15917 nameMMXReg(gregLO3ofRM(modrm)));
15920 putMMXReg(
15921 gregLO3ofRM(modrm),
15922 binop(
15923 Iop_PermOrZero8x8,
15924 mkexpr(dV),
15925 // Mask off bits [6:3] of each source operand lane
15926 binop(Iop_And64, mkexpr(sV), mkU64(0x8787878787878787ULL))
15929 goto decode_success;
15931 break;
15933 case 0x01:
15934 case 0x02:
15935 case 0x03:
15936 case 0x05:
15937 case 0x06:
15938 case 0x07:
15939 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
15940 G to G (xmm). */
15941 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
15942 G to G (xmm). */
15943 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
15944 xmm) and G to G (xmm). */
15945 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
15946 G to G (xmm). */
15947 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
15948 G to G (xmm). */
15949 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
15950 xmm) and G to G (xmm). */
15951 if (have66noF2noF3(pfx)
15952 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
15953 delta = dis_PHADD_128( vbi, pfx, delta, False/*isAvx*/, opc );
15954 goto decode_success;
15956 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
15957 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
15958 to G (mmx). */
15959 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
15960 to G (mmx). */
15961 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
15962 mmx) and G to G (mmx). */
15963 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
15964 to G (mmx). */
15965 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
15966 to G (mmx). */
15967 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
15968 mmx) and G to G (mmx). */
15969 if (haveNo66noF2noF3(pfx) && sz == 4) {
15970 const HChar* str = "???";
15971 IROp opV64 = Iop_INVALID;
15972 IROp opCatO = Iop_CatOddLanes16x4;
15973 IROp opCatE = Iop_CatEvenLanes16x4;
15974 IRTemp sV = newTemp(Ity_I64);
15975 IRTemp dV = newTemp(Ity_I64);
15977 modrm = getUChar(delta);
15979 switch (opc) {
15980 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
15981 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
15982 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
15983 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
15984 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
15985 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
15986 default: vassert(0);
15988 if (opc == 0x02 || opc == 0x06) {
15989 opCatO = Iop_InterleaveHI32x2;
15990 opCatE = Iop_InterleaveLO32x2;
15993 do_MMX_preamble();
15994 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15996 if (epartIsReg(modrm)) {
15997 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15998 delta += 1;
15999 DIP("ph%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
16000 nameMMXReg(gregLO3ofRM(modrm)));
16001 } else {
16002 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16003 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16004 delta += alen;
16005 DIP("ph%s %s,%s\n", str, dis_buf,
16006 nameMMXReg(gregLO3ofRM(modrm)));
16009 putMMXReg(
16010 gregLO3ofRM(modrm),
16011 binop(opV64,
16012 binop(opCatE,mkexpr(sV),mkexpr(dV)),
16013 binop(opCatO,mkexpr(sV),mkexpr(dV))
16016 goto decode_success;
16018 break;
16020 case 0x04:
16021 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
16022 Unsigned Bytes (XMM) */
16023 if (have66noF2noF3(pfx)
16024 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16025 IRTemp sV = newTemp(Ity_V128);
16026 IRTemp dV = newTemp(Ity_V128);
16027 modrm = getUChar(delta);
16028 UInt rG = gregOfRexRM(pfx,modrm);
16030 assign( dV, getXMMReg(rG) );
16032 if (epartIsReg(modrm)) {
16033 UInt rE = eregOfRexRM(pfx,modrm);
16034 assign( sV, getXMMReg(rE) );
16035 delta += 1;
16036 DIP("pmaddubsw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
16037 } else {
16038 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16039 gen_SIGNAL_if_not_16_aligned( vbi, addr );
16040 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16041 delta += alen;
16042 DIP("pmaddubsw %s,%s\n", dis_buf, nameXMMReg(rG));
16045 putXMMReg( rG, mkexpr( math_PMADDUBSW_128( dV, sV ) ) );
16046 goto decode_success;
16048 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
16049 Unsigned Bytes (MMX) */
16050 if (haveNo66noF2noF3(pfx) && sz == 4) {
16051 IRTemp sV = newTemp(Ity_I64);
16052 IRTemp dV = newTemp(Ity_I64);
16053 IRTemp sVoddsSX = newTemp(Ity_I64);
16054 IRTemp sVevensSX = newTemp(Ity_I64);
16055 IRTemp dVoddsZX = newTemp(Ity_I64);
16056 IRTemp dVevensZX = newTemp(Ity_I64);
16058 modrm = getUChar(delta);
16059 do_MMX_preamble();
16060 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
16062 if (epartIsReg(modrm)) {
16063 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16064 delta += 1;
16065 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
16066 nameMMXReg(gregLO3ofRM(modrm)));
16067 } else {
16068 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16069 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16070 delta += alen;
16071 DIP("pmaddubsw %s,%s\n", dis_buf,
16072 nameMMXReg(gregLO3ofRM(modrm)));
16075 /* compute dV unsigned x sV signed */
16076 assign( sVoddsSX,
16077 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) );
16078 assign( sVevensSX,
16079 binop(Iop_SarN16x4,
16080 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)),
16081 mkU8(8)) );
16082 assign( dVoddsZX,
16083 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) );
16084 assign( dVevensZX,
16085 binop(Iop_ShrN16x4,
16086 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)),
16087 mkU8(8)) );
16089 putMMXReg(
16090 gregLO3ofRM(modrm),
16091 binop(Iop_QAdd16Sx4,
16092 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
16093 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX))
16096 goto decode_success;
16098 break;
16100 case 0x08:
16101 case 0x09:
16102 case 0x0A:
16103 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
16104 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
16105 /* 66 0F 38 0A = PSIGND -- Packed Sign 32x4 (XMM) */
16106 if (have66noF2noF3(pfx)
16107 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16108 IRTemp sV = newTemp(Ity_V128);
16109 IRTemp dV = newTemp(Ity_V128);
16110 IRTemp sHi = newTemp(Ity_I64);
16111 IRTemp sLo = newTemp(Ity_I64);
16112 IRTemp dHi = newTemp(Ity_I64);
16113 IRTemp dLo = newTemp(Ity_I64);
16114 const HChar* str = "???";
16115 Int laneszB = 0;
16117 switch (opc) {
16118 case 0x08: laneszB = 1; str = "b"; break;
16119 case 0x09: laneszB = 2; str = "w"; break;
16120 case 0x0A: laneszB = 4; str = "d"; break;
16121 default: vassert(0);
16124 modrm = getUChar(delta);
16125 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
16127 if (epartIsReg(modrm)) {
16128 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
16129 delta += 1;
16130 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
16131 nameXMMReg(gregOfRexRM(pfx,modrm)));
16132 } else {
16133 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16134 gen_SIGNAL_if_not_16_aligned( vbi, addr );
16135 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16136 delta += alen;
16137 DIP("psign%s %s,%s\n", str, dis_buf,
16138 nameXMMReg(gregOfRexRM(pfx,modrm)));
16141 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
16142 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
16143 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
16144 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
16146 putXMMReg(
16147 gregOfRexRM(pfx,modrm),
16148 binop(Iop_64HLtoV128,
16149 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
16150 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
16153 goto decode_success;
16155 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
16156 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
16157 /* 0F 38 0A = PSIGND -- Packed Sign 32x2 (MMX) */
16158 if (haveNo66noF2noF3(pfx) && sz == 4) {
16159 IRTemp sV = newTemp(Ity_I64);
16160 IRTemp dV = newTemp(Ity_I64);
16161 const HChar* str = "???";
16162 Int laneszB = 0;
16164 switch (opc) {
16165 case 0x08: laneszB = 1; str = "b"; break;
16166 case 0x09: laneszB = 2; str = "w"; break;
16167 case 0x0A: laneszB = 4; str = "d"; break;
16168 default: vassert(0);
16171 modrm = getUChar(delta);
16172 do_MMX_preamble();
16173 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
16175 if (epartIsReg(modrm)) {
16176 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16177 delta += 1;
16178 DIP("psign%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
16179 nameMMXReg(gregLO3ofRM(modrm)));
16180 } else {
16181 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16182 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16183 delta += alen;
16184 DIP("psign%s %s,%s\n", str, dis_buf,
16185 nameMMXReg(gregLO3ofRM(modrm)));
16188 putMMXReg(
16189 gregLO3ofRM(modrm),
16190 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB )
16192 goto decode_success;
16194 break;
16196 case 0x0B:
16197 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
16198 Scale (XMM) */
16199 if (have66noF2noF3(pfx)
16200 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16201 IRTemp sV = newTemp(Ity_V128);
16202 IRTemp dV = newTemp(Ity_V128);
16203 IRTemp sHi = newTemp(Ity_I64);
16204 IRTemp sLo = newTemp(Ity_I64);
16205 IRTemp dHi = newTemp(Ity_I64);
16206 IRTemp dLo = newTemp(Ity_I64);
16208 modrm = getUChar(delta);
16209 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
16211 if (epartIsReg(modrm)) {
16212 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
16213 delta += 1;
16214 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
16215 nameXMMReg(gregOfRexRM(pfx,modrm)));
16216 } else {
16217 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16218 gen_SIGNAL_if_not_16_aligned( vbi, addr );
16219 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16220 delta += alen;
16221 DIP("pmulhrsw %s,%s\n", dis_buf,
16222 nameXMMReg(gregOfRexRM(pfx,modrm)));
16225 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
16226 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
16227 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
16228 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
16230 putXMMReg(
16231 gregOfRexRM(pfx,modrm),
16232 binop(Iop_64HLtoV128,
16233 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
16234 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
16237 goto decode_success;
16239 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
16240 (MMX) */
16241 if (haveNo66noF2noF3(pfx) && sz == 4) {
16242 IRTemp sV = newTemp(Ity_I64);
16243 IRTemp dV = newTemp(Ity_I64);
16245 modrm = getUChar(delta);
16246 do_MMX_preamble();
16247 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
16249 if (epartIsReg(modrm)) {
16250 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16251 delta += 1;
16252 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
16253 nameMMXReg(gregLO3ofRM(modrm)));
16254 } else {
16255 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16256 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16257 delta += alen;
16258 DIP("pmulhrsw %s,%s\n", dis_buf,
16259 nameMMXReg(gregLO3ofRM(modrm)));
16262 putMMXReg(
16263 gregLO3ofRM(modrm),
16264 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) )
16266 goto decode_success;
16268 break;
16270 case 0x1C:
16271 case 0x1D:
16272 case 0x1E:
16273 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
16274 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
16275 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
16276 if (have66noF2noF3(pfx)
16277 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16278 IRTemp sV = newTemp(Ity_V128);
16279 const HChar* str = "???";
16280 Int laneszB = 0;
16282 switch (opc) {
16283 case 0x1C: laneszB = 1; str = "b"; break;
16284 case 0x1D: laneszB = 2; str = "w"; break;
16285 case 0x1E: laneszB = 4; str = "d"; break;
16286 default: vassert(0);
16289 modrm = getUChar(delta);
16290 if (epartIsReg(modrm)) {
16291 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
16292 delta += 1;
16293 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
16294 nameXMMReg(gregOfRexRM(pfx,modrm)));
16295 } else {
16296 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16297 gen_SIGNAL_if_not_16_aligned( vbi, addr );
16298 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16299 delta += alen;
16300 DIP("pabs%s %s,%s\n", str, dis_buf,
16301 nameXMMReg(gregOfRexRM(pfx,modrm)));
16304 putXMMReg( gregOfRexRM(pfx,modrm),
16305 mkexpr(math_PABS_XMM(sV, laneszB)) );
16306 goto decode_success;
16308 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
16309 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
16310 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
16311 if (haveNo66noF2noF3(pfx) && sz == 4) {
16312 IRTemp sV = newTemp(Ity_I64);
16313 const HChar* str = "???";
16314 Int laneszB = 0;
16316 switch (opc) {
16317 case 0x1C: laneszB = 1; str = "b"; break;
16318 case 0x1D: laneszB = 2; str = "w"; break;
16319 case 0x1E: laneszB = 4; str = "d"; break;
16320 default: vassert(0);
16323 modrm = getUChar(delta);
16324 do_MMX_preamble();
16326 if (epartIsReg(modrm)) {
16327 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16328 delta += 1;
16329 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
16330 nameMMXReg(gregLO3ofRM(modrm)));
16331 } else {
16332 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16333 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16334 delta += alen;
16335 DIP("pabs%s %s,%s\n", str, dis_buf,
16336 nameMMXReg(gregLO3ofRM(modrm)));
16339 putMMXReg( gregLO3ofRM(modrm),
16340 mkexpr(math_PABS_MMX( sV, laneszB )) );
16341 goto decode_success;
16343 break;
16345 default:
16346 break;
16350 //decode_failure:
16351 *decode_OK = False;
16352 return deltaIN;
16354 decode_success:
16355 *decode_OK = True;
16356 return delta;
16360 /*------------------------------------------------------------*/
16361 /*--- ---*/
16362 /*--- Top-level SSSE3: dis_ESC_0F3A__SupSSE3 ---*/
16363 /*--- ---*/
16364 /*------------------------------------------------------------*/
16366 __attribute__((noinline))
16367 static
16368 Long dis_ESC_0F3A__SupSSE3 ( Bool* decode_OK,
16369 const VexAbiInfo* vbi,
16370 Prefix pfx, Int sz, Long deltaIN )
16372 Long d64 = 0;
16373 IRTemp addr = IRTemp_INVALID;
16374 UChar modrm = 0;
16375 Int alen = 0;
16376 HChar dis_buf[50];
16378 *decode_OK = False;
16380 Long delta = deltaIN;
16381 UChar opc = getUChar(delta);
16382 delta++;
16383 switch (opc) {
16385 case 0x0F:
16386 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
16387 if (have66noF2noF3(pfx)
16388 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16389 IRTemp sV = newTemp(Ity_V128);
16390 IRTemp dV = newTemp(Ity_V128);
16392 modrm = getUChar(delta);
16393 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
16395 if (epartIsReg(modrm)) {
16396 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
16397 d64 = (Long)getUChar(delta+1);
16398 delta += 1+1;
16399 DIP("palignr $%lld,%s,%s\n", d64,
16400 nameXMMReg(eregOfRexRM(pfx,modrm)),
16401 nameXMMReg(gregOfRexRM(pfx,modrm)));
16402 } else {
16403 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
16404 gen_SIGNAL_if_not_16_aligned( vbi, addr );
16405 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16406 d64 = (Long)getUChar(delta+alen);
16407 delta += alen+1;
16408 DIP("palignr $%lld,%s,%s\n", d64,
16409 dis_buf,
16410 nameXMMReg(gregOfRexRM(pfx,modrm)));
16413 IRTemp res = math_PALIGNR_XMM( sV, dV, d64 );
16414 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
16415 goto decode_success;
16417 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
16418 if (haveNo66noF2noF3(pfx) && sz == 4) {
16419 IRTemp sV = newTemp(Ity_I64);
16420 IRTemp dV = newTemp(Ity_I64);
16421 IRTemp res = newTemp(Ity_I64);
16423 modrm = getUChar(delta);
16424 do_MMX_preamble();
16425 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
16427 if (epartIsReg(modrm)) {
16428 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16429 d64 = (Long)getUChar(delta+1);
16430 delta += 1+1;
16431 DIP("palignr $%lld,%s,%s\n", d64,
16432 nameMMXReg(eregLO3ofRM(modrm)),
16433 nameMMXReg(gregLO3ofRM(modrm)));
16434 } else {
16435 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
16436 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16437 d64 = (Long)getUChar(delta+alen);
16438 delta += alen+1;
16439 DIP("palignr $%lld%s,%s\n", d64,
16440 dis_buf,
16441 nameMMXReg(gregLO3ofRM(modrm)));
16444 if (d64 == 0) {
16445 assign( res, mkexpr(sV) );
16447 else if (d64 >= 1 && d64 <= 7) {
16448 assign(res,
16449 binop(Iop_Or64,
16450 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d64)),
16451 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d64))
16452 )));
16454 else if (d64 == 8) {
16455 assign( res, mkexpr(dV) );
16457 else if (d64 >= 9 && d64 <= 15) {
16458 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d64-8))) );
16460 else if (d64 >= 16 && d64 <= 255) {
16461 assign( res, mkU64(0) );
16463 else
16464 vassert(0);
16466 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
16467 goto decode_success;
16469 break;
16471 default:
16472 break;
16476 //decode_failure:
16477 *decode_OK = False;
16478 return deltaIN;
16480 decode_success:
16481 *decode_OK = True;
16482 return delta;
16486 /*------------------------------------------------------------*/
16487 /*--- ---*/
16488 /*--- Top-level SSE4: dis_ESC_0F__SSE4 ---*/
16489 /*--- ---*/
16490 /*------------------------------------------------------------*/
16492 __attribute__((noinline))
16493 static
16494 Long dis_ESC_0F__SSE4 ( Bool* decode_OK,
16495 const VexArchInfo* archinfo,
16496 const VexAbiInfo* vbi,
16497 Prefix pfx, Int sz, Long deltaIN )
16499 IRTemp addr = IRTemp_INVALID;
16500 IRType ty = Ity_INVALID;
16501 UChar modrm = 0;
16502 Int alen = 0;
16503 HChar dis_buf[50];
16505 *decode_OK = False;
16507 Long delta = deltaIN;
16508 UChar opc = getUChar(delta);
16509 delta++;
16510 switch (opc) {
16512 case 0xB8:
16513 /* F3 0F B8 = POPCNT{W,L,Q}
16514 Count the number of 1 bits in a register
16516 if (haveF3noF2(pfx) /* so both 66 and REX.W are possibilities */
16517 && (sz == 2 || sz == 4 || sz == 8)) {
16518 /*IRType*/ ty = szToITy(sz);
16519 IRTemp src = newTemp(ty);
16520 modrm = getUChar(delta);
16521 if (epartIsReg(modrm)) {
16522 assign(src, getIRegE(sz, pfx, modrm));
16523 delta += 1;
16524 DIP("popcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
16525 nameIRegG(sz, pfx, modrm));
16526 } else {
16527 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
16528 assign(src, loadLE(ty, mkexpr(addr)));
16529 delta += alen;
16530 DIP("popcnt%c %s, %s\n", nameISize(sz), dis_buf,
16531 nameIRegG(sz, pfx, modrm));
16534 IRTemp result = gen_POPCOUNT(ty, src);
16535 putIRegG(sz, pfx, modrm, mkexpr(result));
16537 // Update flags. This is pretty lame .. perhaps can do better
16538 // if this turns out to be performance critical.
16539 // O S A C P are cleared. Z is set if SRC == 0.
16540 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16541 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16542 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16543 stmt( IRStmt_Put( OFFB_CC_DEP1,
16544 binop(Iop_Shl64,
16545 unop(Iop_1Uto64,
16546 binop(Iop_CmpEQ64,
16547 widenUto64(mkexpr(src)),
16548 mkU64(0))),
16549 mkU8(AMD64G_CC_SHIFT_Z))));
16551 goto decode_success;
16553 break;
16555 case 0xBC:
16556 /* F3 0F BC -- TZCNT (count trailing zeroes. A BMI extension,
16557 which we can only decode if we're sure this is a BMI1 capable cpu
16558 that supports TZCNT, since otherwise it's BSF, which behaves
16559 differently on zero source. */
16560 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */
16561 && (sz == 2 || sz == 4 || sz == 8)
16562 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_BMI)) {
16563 /*IRType*/ ty = szToITy(sz);
16564 IRTemp src = newTemp(ty);
16565 modrm = getUChar(delta);
16566 if (epartIsReg(modrm)) {
16567 assign(src, getIRegE(sz, pfx, modrm));
16568 delta += 1;
16569 DIP("tzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
16570 nameIRegG(sz, pfx, modrm));
16571 } else {
16572 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
16573 assign(src, loadLE(ty, mkexpr(addr)));
16574 delta += alen;
16575 DIP("tzcnt%c %s, %s\n", nameISize(sz), dis_buf,
16576 nameIRegG(sz, pfx, modrm));
16579 IRTemp res = gen_TZCNT(ty, src);
16580 putIRegG(sz, pfx, modrm, mkexpr(res));
16582 // Update flags. This is pretty lame .. perhaps can do better
16583 // if this turns out to be performance critical.
16584 // O S A P are cleared. Z is set if RESULT == 0.
16585 // C is set if SRC is zero.
16586 IRTemp src64 = newTemp(Ity_I64);
16587 IRTemp res64 = newTemp(Ity_I64);
16588 assign(src64, widenUto64(mkexpr(src)));
16589 assign(res64, widenUto64(mkexpr(res)));
16591 IRTemp oszacp = newTemp(Ity_I64);
16592 assign(
16593 oszacp,
16594 binop(Iop_Or64,
16595 binop(Iop_Shl64,
16596 unop(Iop_1Uto64,
16597 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))),
16598 mkU8(AMD64G_CC_SHIFT_Z)),
16599 binop(Iop_Shl64,
16600 unop(Iop_1Uto64,
16601 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))),
16602 mkU8(AMD64G_CC_SHIFT_C))
16606 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16607 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16608 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16609 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
16611 goto decode_success;
16613 break;
16615 case 0xBD:
16616 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
16617 which we can only decode if we're sure this is an AMD cpu
16618 that supports LZCNT, since otherwise it's BSR, which behaves
16619 differently. Bizarrely, my Sandy Bridge also accepts these
16620 instructions but produces different results. */
16621 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */
16622 && (sz == 2 || sz == 4 || sz == 8)
16623 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT)) {
16624 /*IRType*/ ty = szToITy(sz);
16625 IRTemp src = newTemp(ty);
16626 modrm = getUChar(delta);
16627 if (epartIsReg(modrm)) {
16628 assign(src, getIRegE(sz, pfx, modrm));
16629 delta += 1;
16630 DIP("lzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
16631 nameIRegG(sz, pfx, modrm));
16632 } else {
16633 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
16634 assign(src, loadLE(ty, mkexpr(addr)));
16635 delta += alen;
16636 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf,
16637 nameIRegG(sz, pfx, modrm));
16640 IRTemp res = gen_LZCNT(ty, src);
16641 putIRegG(sz, pfx, modrm, mkexpr(res));
16643 // Update flags. This is pretty lame .. perhaps can do better
16644 // if this turns out to be performance critical.
16645 // O S A P are cleared. Z is set if RESULT == 0.
16646 // C is set if SRC is zero.
16647 IRTemp src64 = newTemp(Ity_I64);
16648 IRTemp res64 = newTemp(Ity_I64);
16649 assign(src64, widenUto64(mkexpr(src)));
16650 assign(res64, widenUto64(mkexpr(res)));
16652 IRTemp oszacp = newTemp(Ity_I64);
16653 assign(
16654 oszacp,
16655 binop(Iop_Or64,
16656 binop(Iop_Shl64,
16657 unop(Iop_1Uto64,
16658 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))),
16659 mkU8(AMD64G_CC_SHIFT_Z)),
16660 binop(Iop_Shl64,
16661 unop(Iop_1Uto64,
16662 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))),
16663 mkU8(AMD64G_CC_SHIFT_C))
16667 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16668 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16669 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16670 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
16672 goto decode_success;
16674 break;
16676 default:
16677 break;
16681 //decode_failure:
16682 *decode_OK = False;
16683 return deltaIN;
16685 decode_success:
16686 *decode_OK = True;
16687 return delta;
16691 /*------------------------------------------------------------*/
16692 /*--- ---*/
16693 /*--- Top-level SSE4: dis_ESC_0F38__SSE4 ---*/
16694 /*--- ---*/
16695 /*------------------------------------------------------------*/
16697 static IRTemp math_PBLENDVB_128 ( IRTemp vecE, IRTemp vecG,
16698 IRTemp vec0/*controlling mask*/,
16699 UInt gran, IROp opSAR )
16701 /* The tricky bit is to convert vec0 into a suitable mask, by
16702 copying the most significant bit of each lane into all positions
16703 in the lane. */
16704 IRTemp sh = newTemp(Ity_I8);
16705 assign(sh, mkU8(8 * gran - 1));
16707 IRTemp mask = newTemp(Ity_V128);
16708 assign(mask, binop(opSAR, mkexpr(vec0), mkexpr(sh)));
16710 IRTemp notmask = newTemp(Ity_V128);
16711 assign(notmask, unop(Iop_NotV128, mkexpr(mask)));
16713 IRTemp res = newTemp(Ity_V128);
16714 assign(res, binop(Iop_OrV128,
16715 binop(Iop_AndV128, mkexpr(vecE), mkexpr(mask)),
16716 binop(Iop_AndV128, mkexpr(vecG), mkexpr(notmask))));
16717 return res;
16720 static IRTemp math_PBLENDVB_256 ( IRTemp vecE, IRTemp vecG,
16721 IRTemp vec0/*controlling mask*/,
16722 UInt gran, IROp opSAR128 )
16724 /* The tricky bit is to convert vec0 into a suitable mask, by
16725 copying the most significant bit of each lane into all positions
16726 in the lane. */
16727 IRTemp sh = newTemp(Ity_I8);
16728 assign(sh, mkU8(8 * gran - 1));
16730 IRTemp vec0Hi = IRTemp_INVALID;
16731 IRTemp vec0Lo = IRTemp_INVALID;
16732 breakupV256toV128s( vec0, &vec0Hi, &vec0Lo );
16734 IRTemp mask = newTemp(Ity_V256);
16735 assign(mask, binop(Iop_V128HLtoV256,
16736 binop(opSAR128, mkexpr(vec0Hi), mkexpr(sh)),
16737 binop(opSAR128, mkexpr(vec0Lo), mkexpr(sh))));
16739 IRTemp notmask = newTemp(Ity_V256);
16740 assign(notmask, unop(Iop_NotV256, mkexpr(mask)));
16742 IRTemp res = newTemp(Ity_V256);
16743 assign(res, binop(Iop_OrV256,
16744 binop(Iop_AndV256, mkexpr(vecE), mkexpr(mask)),
16745 binop(Iop_AndV256, mkexpr(vecG), mkexpr(notmask))));
16746 return res;
16749 static Long dis_VBLENDV_128 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
16750 const HChar *name, UInt gran, IROp opSAR )
16752 IRTemp addr = IRTemp_INVALID;
16753 Int alen = 0;
16754 HChar dis_buf[50];
16755 UChar modrm = getUChar(delta);
16756 UInt rG = gregOfRexRM(pfx, modrm);
16757 UInt rV = getVexNvvvv(pfx);
16758 UInt rIS4 = 0xFF; /* invalid */
16759 IRTemp vecE = newTemp(Ity_V128);
16760 IRTemp vecV = newTemp(Ity_V128);
16761 IRTemp vecIS4 = newTemp(Ity_V128);
16762 if (epartIsReg(modrm)) {
16763 delta++;
16764 UInt rE = eregOfRexRM(pfx, modrm);
16765 assign(vecE, getXMMReg(rE));
16766 UChar ib = getUChar(delta);
16767 rIS4 = (ib >> 4) & 0xF;
16768 DIP("%s %s,%s,%s,%s\n",
16769 name, nameXMMReg(rIS4), nameXMMReg(rE),
16770 nameXMMReg(rV), nameXMMReg(rG));
16771 } else {
16772 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
16773 delta += alen;
16774 assign(vecE, loadLE(Ity_V128, mkexpr(addr)));
16775 UChar ib = getUChar(delta);
16776 rIS4 = (ib >> 4) & 0xF;
16777 DIP("%s %s,%s,%s,%s\n",
16778 name, nameXMMReg(rIS4), dis_buf, nameXMMReg(rV), nameXMMReg(rG));
16780 delta++;
16781 assign(vecV, getXMMReg(rV));
16782 assign(vecIS4, getXMMReg(rIS4));
16783 IRTemp res = math_PBLENDVB_128( vecE, vecV, vecIS4, gran, opSAR );
16784 putYMMRegLoAndZU( rG, mkexpr(res) );
16785 return delta;
16788 static Long dis_VBLENDV_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
16789 const HChar *name, UInt gran, IROp opSAR128 )
16791 IRTemp addr = IRTemp_INVALID;
16792 Int alen = 0;
16793 HChar dis_buf[50];
16794 UChar modrm = getUChar(delta);
16795 UInt rG = gregOfRexRM(pfx, modrm);
16796 UInt rV = getVexNvvvv(pfx);
16797 UInt rIS4 = 0xFF; /* invalid */
16798 IRTemp vecE = newTemp(Ity_V256);
16799 IRTemp vecV = newTemp(Ity_V256);
16800 IRTemp vecIS4 = newTemp(Ity_V256);
16801 if (epartIsReg(modrm)) {
16802 delta++;
16803 UInt rE = eregOfRexRM(pfx, modrm);
16804 assign(vecE, getYMMReg(rE));
16805 UChar ib = getUChar(delta);
16806 rIS4 = (ib >> 4) & 0xF;
16807 DIP("%s %s,%s,%s,%s\n",
16808 name, nameYMMReg(rIS4), nameYMMReg(rE),
16809 nameYMMReg(rV), nameYMMReg(rG));
16810 } else {
16811 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
16812 delta += alen;
16813 assign(vecE, loadLE(Ity_V256, mkexpr(addr)));
16814 UChar ib = getUChar(delta);
16815 rIS4 = (ib >> 4) & 0xF;
16816 DIP("%s %s,%s,%s,%s\n",
16817 name, nameYMMReg(rIS4), dis_buf, nameYMMReg(rV), nameYMMReg(rG));
16819 delta++;
16820 assign(vecV, getYMMReg(rV));
16821 assign(vecIS4, getYMMReg(rIS4));
16822 IRTemp res = math_PBLENDVB_256( vecE, vecV, vecIS4, gran, opSAR128 );
16823 putYMMReg( rG, mkexpr(res) );
16824 return delta;
16827 static void finish_xTESTy ( IRTemp andV, IRTemp andnV, Int sign )
16829 /* Set Z=1 iff (vecE & vecG) == 0
16830 Set C=1 iff (vecE & not vecG) == 0
16833 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16835 /* andV resp. andnV, reduced to 64-bit values, by or-ing the top
16836 and bottom 64-bits together. It relies on this trick:
16838 InterleaveLO64x2([a,b],[c,d]) == [b,d] hence
16840 InterleaveLO64x2([a,b],[a,b]) == [b,b] and similarly
16841 InterleaveHI64x2([a,b],[a,b]) == [a,a]
16843 and so the OR of the above 2 exprs produces
16844 [a OR b, a OR b], from which we simply take the lower half.
16846 IRTemp and64 = newTemp(Ity_I64);
16847 IRTemp andn64 = newTemp(Ity_I64);
16849 assign(and64,
16850 unop(Iop_V128to64,
16851 binop(Iop_OrV128,
16852 binop(Iop_InterleaveLO64x2,
16853 mkexpr(andV), mkexpr(andV)),
16854 binop(Iop_InterleaveHI64x2,
16855 mkexpr(andV), mkexpr(andV)))));
16857 assign(andn64,
16858 unop(Iop_V128to64,
16859 binop(Iop_OrV128,
16860 binop(Iop_InterleaveLO64x2,
16861 mkexpr(andnV), mkexpr(andnV)),
16862 binop(Iop_InterleaveHI64x2,
16863 mkexpr(andnV), mkexpr(andnV)))));
16865 IRTemp z64 = newTemp(Ity_I64);
16866 IRTemp c64 = newTemp(Ity_I64);
16867 if (sign == 64) {
16868 /* When only interested in the most significant bit, just shift
16869 arithmetically right and negate. */
16870 assign(z64,
16871 unop(Iop_Not64,
16872 binop(Iop_Sar64, mkexpr(and64), mkU8(63))));
16874 assign(c64,
16875 unop(Iop_Not64,
16876 binop(Iop_Sar64, mkexpr(andn64), mkU8(63))));
16877 } else {
16878 if (sign == 32) {
16879 /* When interested in bit 31 and bit 63, mask those bits and
16880 fallthrough into the PTEST handling. */
16881 IRTemp t0 = newTemp(Ity_I64);
16882 IRTemp t1 = newTemp(Ity_I64);
16883 IRTemp t2 = newTemp(Ity_I64);
16884 assign(t0, mkU64(0x8000000080000000ULL));
16885 assign(t1, binop(Iop_And64, mkexpr(and64), mkexpr(t0)));
16886 assign(t2, binop(Iop_And64, mkexpr(andn64), mkexpr(t0)));
16887 and64 = t1;
16888 andn64 = t2;
16890 /* Now convert and64, andn64 to all-zeroes or all-1s, so we can
16891 slice out the Z and C bits conveniently. We use the standard
16892 trick all-zeroes -> all-zeroes, anything-else -> all-ones
16893 done by "(x | -x) >>s (word-size - 1)".
16895 assign(z64,
16896 unop(Iop_Not64,
16897 binop(Iop_Sar64,
16898 binop(Iop_Or64,
16899 binop(Iop_Sub64, mkU64(0), mkexpr(and64)),
16900 mkexpr(and64)), mkU8(63))));
16902 assign(c64,
16903 unop(Iop_Not64,
16904 binop(Iop_Sar64,
16905 binop(Iop_Or64,
16906 binop(Iop_Sub64, mkU64(0), mkexpr(andn64)),
16907 mkexpr(andn64)), mkU8(63))));
16910 /* And finally, slice out the Z and C flags and set the flags
16911 thunk to COPY for them. OSAP are set to zero. */
16912 IRTemp newOSZACP = newTemp(Ity_I64);
16913 assign(newOSZACP,
16914 binop(Iop_Or64,
16915 binop(Iop_And64, mkexpr(z64), mkU64(AMD64G_CC_MASK_Z)),
16916 binop(Iop_And64, mkexpr(c64), mkU64(AMD64G_CC_MASK_C))));
16918 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(newOSZACP)));
16919 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16920 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16921 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16925 /* Handles 128 bit versions of PTEST, VTESTPS or VTESTPD.
16926 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
16927 static Long dis_xTESTy_128 ( const VexAbiInfo* vbi, Prefix pfx,
16928 Long delta, Bool isAvx, Int sign )
16930 IRTemp addr = IRTemp_INVALID;
16931 Int alen = 0;
16932 HChar dis_buf[50];
16933 UChar modrm = getUChar(delta);
16934 UInt rG = gregOfRexRM(pfx, modrm);
16935 IRTemp vecE = newTemp(Ity_V128);
16936 IRTemp vecG = newTemp(Ity_V128);
16938 if ( epartIsReg(modrm) ) {
16939 UInt rE = eregOfRexRM(pfx, modrm);
16940 assign(vecE, getXMMReg(rE));
16941 delta += 1;
16942 DIP( "%s%stest%s %s,%s\n",
16943 isAvx ? "v" : "", sign == 0 ? "p" : "",
16944 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16945 nameXMMReg(rE), nameXMMReg(rG) );
16946 } else {
16947 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16948 if (!isAvx)
16949 gen_SIGNAL_if_not_16_aligned( vbi, addr );
16950 assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
16951 delta += alen;
16952 DIP( "%s%stest%s %s,%s\n",
16953 isAvx ? "v" : "", sign == 0 ? "p" : "",
16954 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16955 dis_buf, nameXMMReg(rG) );
16958 assign(vecG, getXMMReg(rG));
16960 /* Set Z=1 iff (vecE & vecG) == 0
16961 Set C=1 iff (vecE & not vecG) == 0
16964 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16965 IRTemp andV = newTemp(Ity_V128);
16966 IRTemp andnV = newTemp(Ity_V128);
16967 assign(andV, binop(Iop_AndV128, mkexpr(vecE), mkexpr(vecG)));
16968 assign(andnV, binop(Iop_AndV128,
16969 mkexpr(vecE),
16970 binop(Iop_XorV128, mkexpr(vecG),
16971 mkV128(0xFFFF))));
16973 finish_xTESTy ( andV, andnV, sign );
16974 return delta;
16978 /* Handles 256 bit versions of PTEST, VTESTPS or VTESTPD.
16979 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
16980 static Long dis_xTESTy_256 ( const VexAbiInfo* vbi, Prefix pfx,
16981 Long delta, Int sign )
16983 IRTemp addr = IRTemp_INVALID;
16984 Int alen = 0;
16985 HChar dis_buf[50];
16986 UChar modrm = getUChar(delta);
16987 UInt rG = gregOfRexRM(pfx, modrm);
16988 IRTemp vecE = newTemp(Ity_V256);
16989 IRTemp vecG = newTemp(Ity_V256);
16991 if ( epartIsReg(modrm) ) {
16992 UInt rE = eregOfRexRM(pfx, modrm);
16993 assign(vecE, getYMMReg(rE));
16994 delta += 1;
16995 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "",
16996 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16997 nameYMMReg(rE), nameYMMReg(rG) );
16998 } else {
16999 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17000 assign(vecE, loadLE( Ity_V256, mkexpr(addr) ));
17001 delta += alen;
17002 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "",
17003 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
17004 dis_buf, nameYMMReg(rG) );
17007 assign(vecG, getYMMReg(rG));
17009 /* Set Z=1 iff (vecE & vecG) == 0
17010 Set C=1 iff (vecE & not vecG) == 0
17013 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
17014 IRTemp andV = newTemp(Ity_V256);
17015 IRTemp andnV = newTemp(Ity_V256);
17016 assign(andV, binop(Iop_AndV256, mkexpr(vecE), mkexpr(vecG)));
17017 assign(andnV, binop(Iop_AndV256,
17018 mkexpr(vecE), unop(Iop_NotV256, mkexpr(vecG))));
17020 IRTemp andVhi = IRTemp_INVALID;
17021 IRTemp andVlo = IRTemp_INVALID;
17022 IRTemp andnVhi = IRTemp_INVALID;
17023 IRTemp andnVlo = IRTemp_INVALID;
17024 breakupV256toV128s( andV, &andVhi, &andVlo );
17025 breakupV256toV128s( andnV, &andnVhi, &andnVlo );
17027 IRTemp andV128 = newTemp(Ity_V128);
17028 IRTemp andnV128 = newTemp(Ity_V128);
17029 assign( andV128, binop( Iop_OrV128, mkexpr(andVhi), mkexpr(andVlo) ) );
17030 assign( andnV128, binop( Iop_OrV128, mkexpr(andnVhi), mkexpr(andnVlo) ) );
17032 finish_xTESTy ( andV128, andnV128, sign );
17033 return delta;
17037 /* Handles 128 and 256 bit versions of VCVTPH2PS. */
17038 static Long dis_VCVTPH2PS ( const VexAbiInfo* vbi, Prefix pfx,
17039 Long delta, Bool is256bit )
17041 /* This is a width-doubling load or reg-reg move, that does conversion on the
17042 transferred data. */
17043 UChar modrm = getUChar(delta);
17044 UInt rG = gregOfRexRM(pfx, modrm);
17045 IRTemp srcE = newTemp(is256bit ? Ity_V128 : Ity_I64);
17047 if (epartIsReg(modrm)) {
17048 UInt rE = eregOfRexRM(pfx, modrm);
17049 assign(srcE, is256bit ? unop(Iop_V256toV128_0, getYMMReg(rE))
17050 : unop(Iop_V128to64, getXMMReg(rE)));
17051 delta += 1;
17052 DIP("vcvtph2ps %s,%s\n", nameXMMReg(rE),
17053 (is256bit ? nameYMMReg: nameXMMReg)(rG));
17054 } else {
17055 Int alen = 0;
17056 HChar dis_buf[50];
17057 IRTemp addr = disAMode(&alen, vbi, pfx, delta, dis_buf, 0);
17058 // I don't think we need an alignment check here (not 100% sure tho.)
17059 assign(srcE, loadLE(is256bit ? Ity_V128 : Ity_I64, mkexpr(addr)));
17060 delta += alen;
17061 DIP( "vcvtph2ps %s,%s\n", dis_buf,
17062 (is256bit ? nameYMMReg: nameXMMReg)(rG));
17065 IRExpr* res = unop(is256bit ? Iop_F16toF32x8 : Iop_F16toF32x4, mkexpr(srcE));
17066 (is256bit ? putYMMReg : putYMMRegLoAndZU)(rG, res);
17068 return delta;
17072 /* Handles 128 bit versions of PMOVZXBW and PMOVSXBW. */
17073 static Long dis_PMOVxXBW_128 ( const VexAbiInfo* vbi, Prefix pfx,
17074 Long delta, Bool isAvx, Bool xIsZ )
17076 IRTemp addr = IRTemp_INVALID;
17077 Int alen = 0;
17078 HChar dis_buf[50];
17079 IRTemp srcVec = newTemp(Ity_V128);
17080 UChar modrm = getUChar(delta);
17081 const HChar* mbV = isAvx ? "v" : "";
17082 const HChar how = xIsZ ? 'z' : 's';
17083 UInt rG = gregOfRexRM(pfx, modrm);
17084 if ( epartIsReg(modrm) ) {
17085 UInt rE = eregOfRexRM(pfx, modrm);
17086 assign( srcVec, getXMMReg(rE) );
17087 delta += 1;
17088 DIP( "%spmov%cxbw %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
17089 } else {
17090 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17091 assign( srcVec,
17092 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
17093 delta += alen;
17094 DIP( "%spmov%cxbw %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
17097 IRExpr* res
17098 = xIsZ /* do math for either zero or sign extend */
17099 ? binop( Iop_InterleaveLO8x16,
17100 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) )
17101 : binop( Iop_SarN16x8,
17102 binop( Iop_ShlN16x8,
17103 binop( Iop_InterleaveLO8x16,
17104 IRExpr_Const( IRConst_V128(0) ),
17105 mkexpr(srcVec) ),
17106 mkU8(8) ),
17107 mkU8(8) );
17109 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
17111 return delta;
17115 /* Handles 256 bit versions of PMOVZXBW and PMOVSXBW. */
17116 static Long dis_PMOVxXBW_256 ( const VexAbiInfo* vbi, Prefix pfx,
17117 Long delta, Bool xIsZ )
17119 IRTemp addr = IRTemp_INVALID;
17120 Int alen = 0;
17121 HChar dis_buf[50];
17122 IRTemp srcVec = newTemp(Ity_V128);
17123 UChar modrm = getUChar(delta);
17124 UChar how = xIsZ ? 'z' : 's';
17125 UInt rG = gregOfRexRM(pfx, modrm);
17126 if ( epartIsReg(modrm) ) {
17127 UInt rE = eregOfRexRM(pfx, modrm);
17128 assign( srcVec, getXMMReg(rE) );
17129 delta += 1;
17130 DIP( "vpmov%cxbw %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
17131 } else {
17132 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17133 assign( srcVec, loadLE( Ity_V128, mkexpr(addr) ) );
17134 delta += alen;
17135 DIP( "vpmov%cxbw %s,%s\n", how, dis_buf, nameYMMReg(rG) );
17138 /* First do zero extend. */
17139 IRExpr* res
17140 = binop( Iop_V128HLtoV256,
17141 binop( Iop_InterleaveHI8x16,
17142 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ),
17143 binop( Iop_InterleaveLO8x16,
17144 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
17145 /* And if needed sign extension as well. */
17146 if (!xIsZ)
17147 res = binop( Iop_SarN16x16,
17148 binop( Iop_ShlN16x16, res, mkU8(8) ), mkU8(8) );
17150 putYMMReg ( rG, res );
17152 return delta;
17156 static Long dis_PMOVxXWD_128 ( const VexAbiInfo* vbi, Prefix pfx,
17157 Long delta, Bool isAvx, Bool xIsZ )
17159 IRTemp addr = IRTemp_INVALID;
17160 Int alen = 0;
17161 HChar dis_buf[50];
17162 IRTemp srcVec = newTemp(Ity_V128);
17163 UChar modrm = getUChar(delta);
17164 const HChar* mbV = isAvx ? "v" : "";
17165 const HChar how = xIsZ ? 'z' : 's';
17166 UInt rG = gregOfRexRM(pfx, modrm);
17168 if ( epartIsReg(modrm) ) {
17169 UInt rE = eregOfRexRM(pfx, modrm);
17170 assign( srcVec, getXMMReg(rE) );
17171 delta += 1;
17172 DIP( "%spmov%cxwd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
17173 } else {
17174 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17175 assign( srcVec,
17176 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
17177 delta += alen;
17178 DIP( "%spmov%cxwd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
17181 IRExpr* res
17182 = binop( Iop_InterleaveLO16x8,
17183 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) );
17184 if (!xIsZ)
17185 res = binop(Iop_SarN32x4,
17186 binop(Iop_ShlN32x4, res, mkU8(16)), mkU8(16));
17188 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17189 ( gregOfRexRM(pfx, modrm), res );
17191 return delta;
17195 static Long dis_PMOVxXWD_256 ( const VexAbiInfo* vbi, Prefix pfx,
17196 Long delta, Bool xIsZ )
17198 IRTemp addr = IRTemp_INVALID;
17199 Int alen = 0;
17200 HChar dis_buf[50];
17201 IRTemp srcVec = newTemp(Ity_V128);
17202 UChar modrm = getUChar(delta);
17203 UChar how = xIsZ ? 'z' : 's';
17204 UInt rG = gregOfRexRM(pfx, modrm);
17206 if ( epartIsReg(modrm) ) {
17207 UInt rE = eregOfRexRM(pfx, modrm);
17208 assign( srcVec, getXMMReg(rE) );
17209 delta += 1;
17210 DIP( "vpmov%cxwd %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
17211 } else {
17212 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17213 assign( srcVec, loadLE( Ity_V128, mkexpr(addr) ) );
17214 delta += alen;
17215 DIP( "vpmov%cxwd %s,%s\n", how, dis_buf, nameYMMReg(rG) );
17218 IRExpr* res
17219 = binop( Iop_V128HLtoV256,
17220 binop( Iop_InterleaveHI16x8,
17221 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ),
17222 binop( Iop_InterleaveLO16x8,
17223 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
17224 if (!xIsZ)
17225 res = binop(Iop_SarN32x8,
17226 binop(Iop_ShlN32x8, res, mkU8(16)), mkU8(16));
17228 putYMMReg ( rG, res );
17230 return delta;
17234 static Long dis_PMOVSXWQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17235 Long delta, Bool isAvx )
17237 IRTemp addr = IRTemp_INVALID;
17238 Int alen = 0;
17239 HChar dis_buf[50];
17240 IRTemp srcBytes = newTemp(Ity_I32);
17241 UChar modrm = getUChar(delta);
17242 const HChar* mbV = isAvx ? "v" : "";
17243 UInt rG = gregOfRexRM(pfx, modrm);
17245 if ( epartIsReg( modrm ) ) {
17246 UInt rE = eregOfRexRM(pfx, modrm);
17247 assign( srcBytes, getXMMRegLane32( rE, 0 ) );
17248 delta += 1;
17249 DIP( "%spmovsxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
17250 } else {
17251 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17252 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) );
17253 delta += alen;
17254 DIP( "%spmovsxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
17257 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17258 ( rG, binop( Iop_64HLtoV128,
17259 unop( Iop_16Sto64,
17260 unop( Iop_32HIto16, mkexpr(srcBytes) ) ),
17261 unop( Iop_16Sto64,
17262 unop( Iop_32to16, mkexpr(srcBytes) ) ) ) );
17263 return delta;
17267 static Long dis_PMOVSXWQ_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
17269 IRTemp addr = IRTemp_INVALID;
17270 Int alen = 0;
17271 HChar dis_buf[50];
17272 IRTemp srcBytes = newTemp(Ity_I64);
17273 UChar modrm = getUChar(delta);
17274 UInt rG = gregOfRexRM(pfx, modrm);
17275 IRTemp s3, s2, s1, s0;
17276 s3 = s2 = s1 = s0 = IRTemp_INVALID;
17278 if ( epartIsReg( modrm ) ) {
17279 UInt rE = eregOfRexRM(pfx, modrm);
17280 assign( srcBytes, getXMMRegLane64( rE, 0 ) );
17281 delta += 1;
17282 DIP( "vpmovsxwq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17283 } else {
17284 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17285 assign( srcBytes, loadLE( Ity_I64, mkexpr(addr) ) );
17286 delta += alen;
17287 DIP( "vpmovsxwq %s,%s\n", dis_buf, nameYMMReg(rG) );
17290 breakup64to16s( srcBytes, &s3, &s2, &s1, &s0 );
17291 putYMMReg( rG, binop( Iop_V128HLtoV256,
17292 binop( Iop_64HLtoV128,
17293 unop( Iop_16Sto64, mkexpr(s3) ),
17294 unop( Iop_16Sto64, mkexpr(s2) ) ),
17295 binop( Iop_64HLtoV128,
17296 unop( Iop_16Sto64, mkexpr(s1) ),
17297 unop( Iop_16Sto64, mkexpr(s0) ) ) ) );
17298 return delta;
17302 static Long dis_PMOVZXWQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17303 Long delta, Bool isAvx )
17305 IRTemp addr = IRTemp_INVALID;
17306 Int alen = 0;
17307 HChar dis_buf[50];
17308 IRTemp srcVec = newTemp(Ity_V128);
17309 UChar modrm = getUChar(delta);
17310 const HChar* mbV = isAvx ? "v" : "";
17311 UInt rG = gregOfRexRM(pfx, modrm);
17313 if ( epartIsReg( modrm ) ) {
17314 UInt rE = eregOfRexRM(pfx, modrm);
17315 assign( srcVec, getXMMReg(rE) );
17316 delta += 1;
17317 DIP( "%spmovzxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
17318 } else {
17319 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17320 assign( srcVec,
17321 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
17322 delta += alen;
17323 DIP( "%spmovzxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
17326 IRTemp zeroVec = newTemp( Ity_V128 );
17327 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17329 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17330 ( rG, binop( Iop_InterleaveLO16x8,
17331 mkexpr(zeroVec),
17332 binop( Iop_InterleaveLO16x8,
17333 mkexpr(zeroVec), mkexpr(srcVec) ) ) );
17334 return delta;
17338 static Long dis_PMOVZXWQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
17339 Long delta )
17341 IRTemp addr = IRTemp_INVALID;
17342 Int alen = 0;
17343 HChar dis_buf[50];
17344 IRTemp srcVec = newTemp(Ity_V128);
17345 UChar modrm = getUChar(delta);
17346 UInt rG = gregOfRexRM(pfx, modrm);
17348 if ( epartIsReg( modrm ) ) {
17349 UInt rE = eregOfRexRM(pfx, modrm);
17350 assign( srcVec, getXMMReg(rE) );
17351 delta += 1;
17352 DIP( "vpmovzxwq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17353 } else {
17354 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17355 assign( srcVec,
17356 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
17357 delta += alen;
17358 DIP( "vpmovzxwq %s,%s\n", dis_buf, nameYMMReg(rG) );
17361 IRTemp zeroVec = newTemp( Ity_V128 );
17362 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17364 putYMMReg( rG, binop( Iop_V128HLtoV256,
17365 binop( Iop_InterleaveHI16x8,
17366 mkexpr(zeroVec),
17367 binop( Iop_InterleaveLO16x8,
17368 mkexpr(zeroVec), mkexpr(srcVec) ) ),
17369 binop( Iop_InterleaveLO16x8,
17370 mkexpr(zeroVec),
17371 binop( Iop_InterleaveLO16x8,
17372 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) );
17373 return delta;
17377 /* Handles 128 bit versions of PMOVZXDQ and PMOVSXDQ. */
17378 static Long dis_PMOVxXDQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17379 Long delta, Bool isAvx, Bool xIsZ )
17381 IRTemp addr = IRTemp_INVALID;
17382 Int alen = 0;
17383 HChar dis_buf[50];
17384 IRTemp srcI64 = newTemp(Ity_I64);
17385 IRTemp srcVec = newTemp(Ity_V128);
17386 UChar modrm = getUChar(delta);
17387 const HChar* mbV = isAvx ? "v" : "";
17388 const HChar how = xIsZ ? 'z' : 's';
17389 UInt rG = gregOfRexRM(pfx, modrm);
17390 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
17391 thing in a V128, with arbitrary junk in the top 64 bits. Use
17392 one or both of them and let iropt clean up afterwards (as
17393 usual). */
17394 if ( epartIsReg(modrm) ) {
17395 UInt rE = eregOfRexRM(pfx, modrm);
17396 assign( srcVec, getXMMReg(rE) );
17397 assign( srcI64, unop(Iop_V128to64, mkexpr(srcVec)) );
17398 delta += 1;
17399 DIP( "%spmov%cxdq %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
17400 } else {
17401 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17402 assign( srcI64, loadLE(Ity_I64, mkexpr(addr)) );
17403 assign( srcVec, unop( Iop_64UtoV128, mkexpr(srcI64)) );
17404 delta += alen;
17405 DIP( "%spmov%cxdq %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
17408 IRExpr* res
17409 = xIsZ /* do math for either zero or sign extend */
17410 ? binop( Iop_InterleaveLO32x4,
17411 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) )
17412 : binop( Iop_64HLtoV128,
17413 unop( Iop_32Sto64,
17414 unop( Iop_64HIto32, mkexpr(srcI64) ) ),
17415 unop( Iop_32Sto64,
17416 unop( Iop_64to32, mkexpr(srcI64) ) ) );
17418 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
17420 return delta;
17424 /* Handles 256 bit versions of PMOVZXDQ and PMOVSXDQ. */
17425 static Long dis_PMOVxXDQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
17426 Long delta, Bool xIsZ )
17428 IRTemp addr = IRTemp_INVALID;
17429 Int alen = 0;
17430 HChar dis_buf[50];
17431 IRTemp srcVec = newTemp(Ity_V128);
17432 UChar modrm = getUChar(delta);
17433 UChar how = xIsZ ? 'z' : 's';
17434 UInt rG = gregOfRexRM(pfx, modrm);
17435 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
17436 thing in a V128, with arbitrary junk in the top 64 bits. Use
17437 one or both of them and let iropt clean up afterwards (as
17438 usual). */
17439 if ( epartIsReg(modrm) ) {
17440 UInt rE = eregOfRexRM(pfx, modrm);
17441 assign( srcVec, getXMMReg(rE) );
17442 delta += 1;
17443 DIP( "vpmov%cxdq %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
17444 } else {
17445 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17446 assign( srcVec, loadLE(Ity_V128, mkexpr(addr)) );
17447 delta += alen;
17448 DIP( "vpmov%cxdq %s,%s\n", how, dis_buf, nameYMMReg(rG) );
17451 IRExpr* res;
17452 if (xIsZ)
17453 res = binop( Iop_V128HLtoV256,
17454 binop( Iop_InterleaveHI32x4,
17455 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ),
17456 binop( Iop_InterleaveLO32x4,
17457 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
17458 else {
17459 IRTemp s3, s2, s1, s0;
17460 s3 = s2 = s1 = s0 = IRTemp_INVALID;
17461 breakupV128to32s( srcVec, &s3, &s2, &s1, &s0 );
17462 res = binop( Iop_V128HLtoV256,
17463 binop( Iop_64HLtoV128,
17464 unop( Iop_32Sto64, mkexpr(s3) ),
17465 unop( Iop_32Sto64, mkexpr(s2) ) ),
17466 binop( Iop_64HLtoV128,
17467 unop( Iop_32Sto64, mkexpr(s1) ),
17468 unop( Iop_32Sto64, mkexpr(s0) ) ) );
17471 putYMMReg ( rG, res );
17473 return delta;
17477 /* Handles 128 bit versions of PMOVZXBD and PMOVSXBD. */
17478 static Long dis_PMOVxXBD_128 ( const VexAbiInfo* vbi, Prefix pfx,
17479 Long delta, Bool isAvx, Bool xIsZ )
17481 IRTemp addr = IRTemp_INVALID;
17482 Int alen = 0;
17483 HChar dis_buf[50];
17484 IRTemp srcVec = newTemp(Ity_V128);
17485 UChar modrm = getUChar(delta);
17486 const HChar* mbV = isAvx ? "v" : "";
17487 const HChar how = xIsZ ? 'z' : 's';
17488 UInt rG = gregOfRexRM(pfx, modrm);
17489 if ( epartIsReg(modrm) ) {
17490 UInt rE = eregOfRexRM(pfx, modrm);
17491 assign( srcVec, getXMMReg(rE) );
17492 delta += 1;
17493 DIP( "%spmov%cxbd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
17494 } else {
17495 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17496 assign( srcVec,
17497 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
17498 delta += alen;
17499 DIP( "%spmov%cxbd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
17502 IRTemp zeroVec = newTemp(Ity_V128);
17503 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17505 IRExpr* res
17506 = binop(Iop_InterleaveLO8x16,
17507 mkexpr(zeroVec),
17508 binop(Iop_InterleaveLO8x16,
17509 mkexpr(zeroVec), mkexpr(srcVec)));
17510 if (!xIsZ)
17511 res = binop(Iop_SarN32x4,
17512 binop(Iop_ShlN32x4, res, mkU8(24)), mkU8(24));
17514 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
17516 return delta;
17520 /* Handles 256 bit versions of PMOVZXBD and PMOVSXBD. */
17521 static Long dis_PMOVxXBD_256 ( const VexAbiInfo* vbi, Prefix pfx,
17522 Long delta, Bool xIsZ )
17524 IRTemp addr = IRTemp_INVALID;
17525 Int alen = 0;
17526 HChar dis_buf[50];
17527 IRTemp srcVec = newTemp(Ity_V128);
17528 UChar modrm = getUChar(delta);
17529 UChar how = xIsZ ? 'z' : 's';
17530 UInt rG = gregOfRexRM(pfx, modrm);
17531 if ( epartIsReg(modrm) ) {
17532 UInt rE = eregOfRexRM(pfx, modrm);
17533 assign( srcVec, getXMMReg(rE) );
17534 delta += 1;
17535 DIP( "vpmov%cxbd %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
17536 } else {
17537 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17538 assign( srcVec,
17539 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
17540 delta += alen;
17541 DIP( "vpmov%cxbd %s,%s\n", how, dis_buf, nameYMMReg(rG) );
17544 IRTemp zeroVec = newTemp(Ity_V128);
17545 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17547 IRExpr* res
17548 = binop( Iop_V128HLtoV256,
17549 binop(Iop_InterleaveHI8x16,
17550 mkexpr(zeroVec),
17551 binop(Iop_InterleaveLO8x16,
17552 mkexpr(zeroVec), mkexpr(srcVec)) ),
17553 binop(Iop_InterleaveLO8x16,
17554 mkexpr(zeroVec),
17555 binop(Iop_InterleaveLO8x16,
17556 mkexpr(zeroVec), mkexpr(srcVec)) ) );
17557 if (!xIsZ)
17558 res = binop(Iop_SarN32x8,
17559 binop(Iop_ShlN32x8, res, mkU8(24)), mkU8(24));
17561 putYMMReg ( rG, res );
17563 return delta;
17567 /* Handles 128 bit versions of PMOVSXBQ. */
17568 static Long dis_PMOVSXBQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17569 Long delta, Bool isAvx )
17571 IRTemp addr = IRTemp_INVALID;
17572 Int alen = 0;
17573 HChar dis_buf[50];
17574 IRTemp srcBytes = newTemp(Ity_I16);
17575 UChar modrm = getUChar(delta);
17576 const HChar* mbV = isAvx ? "v" : "";
17577 UInt rG = gregOfRexRM(pfx, modrm);
17578 if ( epartIsReg(modrm) ) {
17579 UInt rE = eregOfRexRM(pfx, modrm);
17580 assign( srcBytes, getXMMRegLane16( rE, 0 ) );
17581 delta += 1;
17582 DIP( "%spmovsxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
17583 } else {
17584 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17585 assign( srcBytes, loadLE( Ity_I16, mkexpr(addr) ) );
17586 delta += alen;
17587 DIP( "%spmovsxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
17590 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17591 ( rG, binop( Iop_64HLtoV128,
17592 unop( Iop_8Sto64,
17593 unop( Iop_16HIto8, mkexpr(srcBytes) ) ),
17594 unop( Iop_8Sto64,
17595 unop( Iop_16to8, mkexpr(srcBytes) ) ) ) );
17596 return delta;
17600 /* Handles 256 bit versions of PMOVSXBQ. */
17601 static Long dis_PMOVSXBQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
17602 Long delta )
17604 IRTemp addr = IRTemp_INVALID;
17605 Int alen = 0;
17606 HChar dis_buf[50];
17607 IRTemp srcBytes = newTemp(Ity_I32);
17608 UChar modrm = getUChar(delta);
17609 UInt rG = gregOfRexRM(pfx, modrm);
17610 if ( epartIsReg(modrm) ) {
17611 UInt rE = eregOfRexRM(pfx, modrm);
17612 assign( srcBytes, getXMMRegLane32( rE, 0 ) );
17613 delta += 1;
17614 DIP( "vpmovsxbq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17615 } else {
17616 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17617 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) );
17618 delta += alen;
17619 DIP( "vpmovsxbq %s,%s\n", dis_buf, nameYMMReg(rG) );
17622 putYMMReg
17623 ( rG, binop( Iop_V128HLtoV256,
17624 binop( Iop_64HLtoV128,
17625 unop( Iop_8Sto64,
17626 unop( Iop_16HIto8,
17627 unop( Iop_32HIto16,
17628 mkexpr(srcBytes) ) ) ),
17629 unop( Iop_8Sto64,
17630 unop( Iop_16to8,
17631 unop( Iop_32HIto16,
17632 mkexpr(srcBytes) ) ) ) ),
17633 binop( Iop_64HLtoV128,
17634 unop( Iop_8Sto64,
17635 unop( Iop_16HIto8,
17636 unop( Iop_32to16,
17637 mkexpr(srcBytes) ) ) ),
17638 unop( Iop_8Sto64,
17639 unop( Iop_16to8,
17640 unop( Iop_32to16,
17641 mkexpr(srcBytes) ) ) ) ) ) );
17642 return delta;
17646 /* Handles 128 bit versions of PMOVZXBQ. */
17647 static Long dis_PMOVZXBQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17648 Long delta, Bool isAvx )
17650 IRTemp addr = IRTemp_INVALID;
17651 Int alen = 0;
17652 HChar dis_buf[50];
17653 IRTemp srcVec = newTemp(Ity_V128);
17654 UChar modrm = getUChar(delta);
17655 const HChar* mbV = isAvx ? "v" : "";
17656 UInt rG = gregOfRexRM(pfx, modrm);
17657 if ( epartIsReg(modrm) ) {
17658 UInt rE = eregOfRexRM(pfx, modrm);
17659 assign( srcVec, getXMMReg(rE) );
17660 delta += 1;
17661 DIP( "%spmovzxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
17662 } else {
17663 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17664 assign( srcVec,
17665 unop( Iop_32UtoV128,
17666 unop( Iop_16Uto32, loadLE( Ity_I16, mkexpr(addr) ))));
17667 delta += alen;
17668 DIP( "%spmovzxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
17671 IRTemp zeroVec = newTemp(Ity_V128);
17672 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17674 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17675 ( rG, binop( Iop_InterleaveLO8x16,
17676 mkexpr(zeroVec),
17677 binop( Iop_InterleaveLO8x16,
17678 mkexpr(zeroVec),
17679 binop( Iop_InterleaveLO8x16,
17680 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) );
17681 return delta;
17685 /* Handles 256 bit versions of PMOVZXBQ. */
17686 static Long dis_PMOVZXBQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
17687 Long delta )
17689 IRTemp addr = IRTemp_INVALID;
17690 Int alen = 0;
17691 HChar dis_buf[50];
17692 IRTemp srcVec = newTemp(Ity_V128);
17693 UChar modrm = getUChar(delta);
17694 UInt rG = gregOfRexRM(pfx, modrm);
17695 if ( epartIsReg(modrm) ) {
17696 UInt rE = eregOfRexRM(pfx, modrm);
17697 assign( srcVec, getXMMReg(rE) );
17698 delta += 1;
17699 DIP( "vpmovzxbq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17700 } else {
17701 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17702 assign( srcVec,
17703 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) )));
17704 delta += alen;
17705 DIP( "vpmovzxbq %s,%s\n", dis_buf, nameYMMReg(rG) );
17708 IRTemp zeroVec = newTemp(Ity_V128);
17709 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17711 putYMMReg
17712 ( rG, binop( Iop_V128HLtoV256,
17713 binop( Iop_InterleaveHI8x16,
17714 mkexpr(zeroVec),
17715 binop( Iop_InterleaveLO8x16,
17716 mkexpr(zeroVec),
17717 binop( Iop_InterleaveLO8x16,
17718 mkexpr(zeroVec), mkexpr(srcVec) ) ) ),
17719 binop( Iop_InterleaveLO8x16,
17720 mkexpr(zeroVec),
17721 binop( Iop_InterleaveLO8x16,
17722 mkexpr(zeroVec),
17723 binop( Iop_InterleaveLO8x16,
17724 mkexpr(zeroVec), mkexpr(srcVec) ) ) )
17725 ) );
17726 return delta;
17730 static Long dis_PHMINPOSUW_128 ( const VexAbiInfo* vbi, Prefix pfx,
17731 Long delta, Bool isAvx )
17733 IRTemp addr = IRTemp_INVALID;
17734 Int alen = 0;
17735 HChar dis_buf[50];
17736 UChar modrm = getUChar(delta);
17737 const HChar* mbV = isAvx ? "v" : "";
17738 IRTemp sV = newTemp(Ity_V128);
17739 IRTemp sHi = newTemp(Ity_I64);
17740 IRTemp sLo = newTemp(Ity_I64);
17741 IRTemp dLo = newTemp(Ity_I64);
17742 UInt rG = gregOfRexRM(pfx,modrm);
17743 if (epartIsReg(modrm)) {
17744 UInt rE = eregOfRexRM(pfx,modrm);
17745 assign( sV, getXMMReg(rE) );
17746 delta += 1;
17747 DIP("%sphminposuw %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG));
17748 } else {
17749 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
17750 if (!isAvx)
17751 gen_SIGNAL_if_not_16_aligned(vbi, addr);
17752 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
17753 delta += alen;
17754 DIP("%sphminposuw %s,%s\n", mbV, dis_buf, nameXMMReg(rG));
17756 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
17757 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
17758 assign( dLo, mkIRExprCCall(
17759 Ity_I64, 0/*regparms*/,
17760 "amd64g_calculate_sse_phminposuw",
17761 &amd64g_calculate_sse_phminposuw,
17762 mkIRExprVec_2( mkexpr(sLo), mkexpr(sHi) )
17764 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17765 (rG, unop(Iop_64UtoV128, mkexpr(dLo)));
17766 return delta;
17770 static Long dis_AESx ( const VexAbiInfo* vbi, Prefix pfx,
17771 Long delta, Bool isAvx, UChar opc )
17773 IRTemp addr = IRTemp_INVALID;
17774 Int alen = 0;
17775 HChar dis_buf[50];
17776 UChar modrm = getUChar(delta);
17777 UInt rG = gregOfRexRM(pfx, modrm);
17778 UInt regNoL = 0;
17779 UInt regNoR = (isAvx && opc != 0xDB) ? getVexNvvvv(pfx) : rG;
17781 /* This is a nasty kludge. We need to pass 2 x V128 to the
17782 helper. Since we can't do that, use a dirty
17783 helper to compute the results directly from the XMM regs in
17784 the guest state. That means for the memory case, we need to
17785 move the left operand into a pseudo-register (XMM16, let's
17786 call it). */
17787 if (epartIsReg(modrm)) {
17788 regNoL = eregOfRexRM(pfx, modrm);
17789 delta += 1;
17790 } else {
17791 regNoL = 16; /* use XMM16 as an intermediary */
17792 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17793 /* alignment check needed ???? */
17794 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
17795 delta += alen;
17798 void* fn = &amd64g_dirtyhelper_AES;
17799 const HChar* nm = "amd64g_dirtyhelper_AES";
17801 /* Round up the arguments. Note that this is a kludge -- the
17802 use of mkU64 rather than mkIRExpr_HWord implies the
17803 assumption that the host's word size is 64-bit. */
17804 UInt gstOffD = ymmGuestRegOffset(rG);
17805 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
17806 UInt gstOffR = ymmGuestRegOffset(regNoR);
17807 IRExpr* opc4 = mkU64(opc);
17808 IRExpr* gstOffDe = mkU64(gstOffD);
17809 IRExpr* gstOffLe = mkU64(gstOffL);
17810 IRExpr* gstOffRe = mkU64(gstOffR);
17811 IRExpr** args
17812 = mkIRExprVec_5( IRExpr_GSPTR(), opc4, gstOffDe, gstOffLe, gstOffRe );
17814 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args );
17815 /* It's not really a dirty call, but we can't use the clean helper
17816 mechanism here for the very lame reason that we can't pass 2 x
17817 V128s by value to a helper. Hence this roundabout scheme. */
17818 d->nFxState = 2;
17819 vex_bzero(&d->fxState, sizeof(d->fxState));
17820 /* AES{ENC,ENCLAST,DEC,DECLAST} read both registers, and writes
17821 the second for !isAvx or the third for isAvx.
17822 AESIMC (0xDB) reads the first register, and writes the second. */
17823 d->fxState[0].fx = Ifx_Read;
17824 d->fxState[0].offset = gstOffL;
17825 d->fxState[0].size = sizeof(U128);
17826 d->fxState[1].offset = gstOffR;
17827 d->fxState[1].size = sizeof(U128);
17828 if (opc == 0xDB)
17829 d->fxState[1].fx = Ifx_Write;
17830 else if (!isAvx || rG == regNoR)
17831 d->fxState[1].fx = Ifx_Modify;
17832 else {
17833 d->fxState[1].fx = Ifx_Read;
17834 d->nFxState++;
17835 d->fxState[2].fx = Ifx_Write;
17836 d->fxState[2].offset = gstOffD;
17837 d->fxState[2].size = sizeof(U128);
17840 stmt( IRStmt_Dirty(d) );
17842 const HChar* opsuf;
17843 switch (opc) {
17844 case 0xDC: opsuf = "enc"; break;
17845 case 0XDD: opsuf = "enclast"; break;
17846 case 0xDE: opsuf = "dec"; break;
17847 case 0xDF: opsuf = "declast"; break;
17848 case 0xDB: opsuf = "imc"; break;
17849 default: vassert(0);
17851 DIP("%saes%s %s,%s%s%s\n", isAvx ? "v" : "", opsuf,
17852 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)),
17853 nameXMMReg(regNoR),
17854 (isAvx && opc != 0xDB) ? "," : "",
17855 (isAvx && opc != 0xDB) ? nameXMMReg(rG) : "");
17857 if (isAvx)
17858 putYMMRegLane128( rG, 1, mkV128(0) );
17859 return delta;
17862 static Long dis_AESKEYGENASSIST ( const VexAbiInfo* vbi, Prefix pfx,
17863 Long delta, Bool isAvx )
17865 IRTemp addr = IRTemp_INVALID;
17866 Int alen = 0;
17867 HChar dis_buf[50];
17868 UChar modrm = getUChar(delta);
17869 UInt regNoL = 0;
17870 UInt regNoR = gregOfRexRM(pfx, modrm);
17871 UChar imm = 0;
17873 /* This is a nasty kludge. See AESENC et al. instructions. */
17874 modrm = getUChar(delta);
17875 if (epartIsReg(modrm)) {
17876 regNoL = eregOfRexRM(pfx, modrm);
17877 imm = getUChar(delta+1);
17878 delta += 1+1;
17879 } else {
17880 regNoL = 16; /* use XMM16 as an intermediary */
17881 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
17882 /* alignment check ???? . */
17883 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
17884 imm = getUChar(delta+alen);
17885 delta += alen+1;
17888 /* Who ya gonna call? Presumably not Ghostbusters. */
17889 void* fn = &amd64g_dirtyhelper_AESKEYGENASSIST;
17890 const HChar* nm = "amd64g_dirtyhelper_AESKEYGENASSIST";
17892 /* Round up the arguments. Note that this is a kludge -- the
17893 use of mkU64 rather than mkIRExpr_HWord implies the
17894 assumption that the host's word size is 64-bit. */
17895 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
17896 UInt gstOffR = ymmGuestRegOffset(regNoR);
17898 IRExpr* imme = mkU64(imm & 0xFF);
17899 IRExpr* gstOffLe = mkU64(gstOffL);
17900 IRExpr* gstOffRe = mkU64(gstOffR);
17901 IRExpr** args
17902 = mkIRExprVec_4( IRExpr_GSPTR(), imme, gstOffLe, gstOffRe );
17904 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args );
17905 /* It's not really a dirty call, but we can't use the clean helper
17906 mechanism here for the very lame reason that we can't pass 2 x
17907 V128s by value to a helper. Hence this roundabout scheme. */
17908 d->nFxState = 2;
17909 vex_bzero(&d->fxState, sizeof(d->fxState));
17910 d->fxState[0].fx = Ifx_Read;
17911 d->fxState[0].offset = gstOffL;
17912 d->fxState[0].size = sizeof(U128);
17913 d->fxState[1].fx = Ifx_Write;
17914 d->fxState[1].offset = gstOffR;
17915 d->fxState[1].size = sizeof(U128);
17916 stmt( IRStmt_Dirty(d) );
17918 DIP("%saeskeygenassist $%x,%s,%s\n", isAvx ? "v" : "", (UInt)imm,
17919 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)),
17920 nameXMMReg(regNoR));
17921 if (isAvx)
17922 putYMMRegLane128( regNoR, 1, mkV128(0) );
17923 return delta;
17927 __attribute__((noinline))
17928 static
17929 Long dis_ESC_0F38__SSE4 ( Bool* decode_OK,
17930 const VexAbiInfo* vbi,
17931 Prefix pfx, Int sz, Long deltaIN )
17933 IRTemp addr = IRTemp_INVALID;
17934 UChar modrm = 0;
17935 Int alen = 0;
17936 HChar dis_buf[50];
17938 *decode_OK = False;
17940 Long delta = deltaIN;
17941 UChar opc = getUChar(delta);
17942 delta++;
17943 switch (opc) {
17945 case 0x10:
17946 case 0x14:
17947 case 0x15:
17948 /* 66 0F 38 10 /r = PBLENDVB xmm1, xmm2/m128 (byte gran)
17949 66 0F 38 14 /r = BLENDVPS xmm1, xmm2/m128 (float gran)
17950 66 0F 38 15 /r = BLENDVPD xmm1, xmm2/m128 (double gran)
17951 Blend at various granularities, with XMM0 (implicit operand)
17952 providing the controlling mask.
17954 if (have66noF2noF3(pfx) && sz == 2) {
17955 modrm = getUChar(delta);
17957 const HChar* nm = NULL;
17958 UInt gran = 0;
17959 IROp opSAR = Iop_INVALID;
17960 switch (opc) {
17961 case 0x10:
17962 nm = "pblendvb"; gran = 1; opSAR = Iop_SarN8x16;
17963 break;
17964 case 0x14:
17965 nm = "blendvps"; gran = 4; opSAR = Iop_SarN32x4;
17966 break;
17967 case 0x15:
17968 nm = "blendvpd"; gran = 8; opSAR = Iop_SarN64x2;
17969 break;
17971 vassert(nm);
17973 IRTemp vecE = newTemp(Ity_V128);
17974 IRTemp vecG = newTemp(Ity_V128);
17975 IRTemp vec0 = newTemp(Ity_V128);
17977 if ( epartIsReg(modrm) ) {
17978 assign(vecE, getXMMReg(eregOfRexRM(pfx, modrm)));
17979 delta += 1;
17980 DIP( "%s %s,%s\n", nm,
17981 nameXMMReg( eregOfRexRM(pfx, modrm) ),
17982 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17983 } else {
17984 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17985 gen_SIGNAL_if_not_16_aligned( vbi, addr );
17986 assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
17987 delta += alen;
17988 DIP( "%s %s,%s\n", nm,
17989 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17992 assign(vecG, getXMMReg(gregOfRexRM(pfx, modrm)));
17993 assign(vec0, getXMMReg(0));
17995 IRTemp res = math_PBLENDVB_128( vecE, vecG, vec0, gran, opSAR );
17996 putXMMReg(gregOfRexRM(pfx, modrm), mkexpr(res));
17998 goto decode_success;
18000 break;
18002 case 0x17:
18003 /* 66 0F 38 17 /r = PTEST xmm1, xmm2/m128
18004 Logical compare (set ZF and CF from AND/ANDN of the operands) */
18005 if (have66noF2noF3(pfx)
18006 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
18007 delta = dis_xTESTy_128( vbi, pfx, delta, False/*!isAvx*/, 0 );
18008 goto decode_success;
18010 break;
18012 case 0x20:
18013 /* 66 0F 38 20 /r = PMOVSXBW xmm1, xmm2/m64
18014 Packed Move with Sign Extend from Byte to Word (XMM) */
18015 if (have66noF2noF3(pfx) && sz == 2) {
18016 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
18017 False/*!isAvx*/, False/*!xIsZ*/ );
18018 goto decode_success;
18020 break;
18022 case 0x21:
18023 /* 66 0F 38 21 /r = PMOVSXBD xmm1, xmm2/m32
18024 Packed Move with Sign Extend from Byte to DWord (XMM) */
18025 if (have66noF2noF3(pfx) && sz == 2) {
18026 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
18027 False/*!isAvx*/, False/*!xIsZ*/ );
18028 goto decode_success;
18030 break;
18032 case 0x22:
18033 /* 66 0F 38 22 /r = PMOVSXBQ xmm1, xmm2/m16
18034 Packed Move with Sign Extend from Byte to QWord (XMM) */
18035 if (have66noF2noF3(pfx) && sz == 2) {
18036 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, False/*!isAvx*/ );
18037 goto decode_success;
18039 break;
18041 case 0x23:
18042 /* 66 0F 38 23 /r = PMOVSXWD xmm1, xmm2/m64
18043 Packed Move with Sign Extend from Word to DWord (XMM) */
18044 if (have66noF2noF3(pfx) && sz == 2) {
18045 delta = dis_PMOVxXWD_128(vbi, pfx, delta,
18046 False/*!isAvx*/, False/*!xIsZ*/);
18047 goto decode_success;
18049 break;
18051 case 0x24:
18052 /* 66 0F 38 24 /r = PMOVSXWQ xmm1, xmm2/m32
18053 Packed Move with Sign Extend from Word to QWord (XMM) */
18054 if (have66noF2noF3(pfx) && sz == 2) {
18055 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, False/*!isAvx*/ );
18056 goto decode_success;
18058 break;
18060 case 0x25:
18061 /* 66 0F 38 25 /r = PMOVSXDQ xmm1, xmm2/m64
18062 Packed Move with Sign Extend from Double Word to Quad Word (XMM) */
18063 if (have66noF2noF3(pfx) && sz == 2) {
18064 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
18065 False/*!isAvx*/, False/*!xIsZ*/ );
18066 goto decode_success;
18068 break;
18070 case 0x28:
18071 /* 66 0F 38 28 = PMULDQ -- signed widening multiply of 32-lanes
18072 0 x 0 to form lower 64-bit half and lanes 2 x 2 to form upper
18073 64-bit half */
18074 /* This is a really poor translation -- could be improved if
18075 performance critical. It's a copy-paste of PMULUDQ, too. */
18076 if (have66noF2noF3(pfx) && sz == 2) {
18077 IRTemp sV = newTemp(Ity_V128);
18078 IRTemp dV = newTemp(Ity_V128);
18079 modrm = getUChar(delta);
18080 UInt rG = gregOfRexRM(pfx,modrm);
18081 assign( dV, getXMMReg(rG) );
18082 if (epartIsReg(modrm)) {
18083 UInt rE = eregOfRexRM(pfx,modrm);
18084 assign( sV, getXMMReg(rE) );
18085 delta += 1;
18086 DIP("pmuldq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
18087 } else {
18088 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
18089 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
18090 delta += alen;
18091 DIP("pmuldq %s,%s\n", dis_buf, nameXMMReg(rG));
18094 putXMMReg( rG, mkexpr(math_PMULDQ_128( dV, sV )) );
18095 goto decode_success;
18097 break;
18099 case 0x29:
18100 /* 66 0F 38 29 = PCMPEQQ
18101 64x2 equality comparison */
18102 if (have66noF2noF3(pfx) && sz == 2) {
18103 /* FIXME: this needs an alignment check */
18104 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
18105 "pcmpeqq", Iop_CmpEQ64x2, False );
18106 goto decode_success;
18108 break;
18110 case 0x2A:
18111 /* 66 0F 38 2A = MOVNTDQA
18112 "non-temporal" "streaming" load
18113 Handle like MOVDQA but only memory operand is allowed */
18114 if (have66noF2noF3(pfx) && sz == 2) {
18115 modrm = getUChar(delta);
18116 if (!epartIsReg(modrm)) {
18117 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
18118 gen_SIGNAL_if_not_16_aligned( vbi, addr );
18119 putXMMReg( gregOfRexRM(pfx,modrm),
18120 loadLE(Ity_V128, mkexpr(addr)) );
18121 DIP("movntdqa %s,%s\n", dis_buf,
18122 nameXMMReg(gregOfRexRM(pfx,modrm)));
18123 delta += alen;
18124 goto decode_success;
18127 break;
18129 case 0x2B:
18130 /* 66 0f 38 2B /r = PACKUSDW xmm1, xmm2/m128
18131 2x 32x4 S->U saturating narrow from xmm2/m128 to xmm1 */
18132 if (have66noF2noF3(pfx) && sz == 2) {
18134 modrm = getUChar(delta);
18136 IRTemp argL = newTemp(Ity_V128);
18137 IRTemp argR = newTemp(Ity_V128);
18139 if ( epartIsReg(modrm) ) {
18140 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) );
18141 delta += 1;
18142 DIP( "packusdw %s,%s\n",
18143 nameXMMReg( eregOfRexRM(pfx, modrm) ),
18144 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18145 } else {
18146 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
18147 gen_SIGNAL_if_not_16_aligned( vbi, addr );
18148 assign( argL, loadLE( Ity_V128, mkexpr(addr) ));
18149 delta += alen;
18150 DIP( "packusdw %s,%s\n",
18151 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18154 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) ));
18156 putXMMReg( gregOfRexRM(pfx, modrm),
18157 binop( Iop_QNarrowBin32Sto16Ux8,
18158 mkexpr(argL), mkexpr(argR)) );
18160 goto decode_success;
18162 break;
18164 case 0x30:
18165 /* 66 0F 38 30 /r = PMOVZXBW xmm1, xmm2/m64
18166 Packed Move with Zero Extend from Byte to Word (XMM) */
18167 if (have66noF2noF3(pfx) && sz == 2) {
18168 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
18169 False/*!isAvx*/, True/*xIsZ*/ );
18170 goto decode_success;
18172 break;
18174 case 0x31:
18175 /* 66 0F 38 31 /r = PMOVZXBD xmm1, xmm2/m32
18176 Packed Move with Zero Extend from Byte to DWord (XMM) */
18177 if (have66noF2noF3(pfx) && sz == 2) {
18178 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
18179 False/*!isAvx*/, True/*xIsZ*/ );
18180 goto decode_success;
18182 break;
18184 case 0x32:
18185 /* 66 0F 38 32 /r = PMOVZXBQ xmm1, xmm2/m16
18186 Packed Move with Zero Extend from Byte to QWord (XMM) */
18187 if (have66noF2noF3(pfx) && sz == 2) {
18188 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, False/*!isAvx*/ );
18189 goto decode_success;
18191 break;
18193 case 0x33:
18194 /* 66 0F 38 33 /r = PMOVZXWD xmm1, xmm2/m64
18195 Packed Move with Zero Extend from Word to DWord (XMM) */
18196 if (have66noF2noF3(pfx) && sz == 2) {
18197 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
18198 False/*!isAvx*/, True/*xIsZ*/ );
18199 goto decode_success;
18201 break;
18203 case 0x34:
18204 /* 66 0F 38 34 /r = PMOVZXWQ xmm1, xmm2/m32
18205 Packed Move with Zero Extend from Word to QWord (XMM) */
18206 if (have66noF2noF3(pfx) && sz == 2) {
18207 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, False/*!isAvx*/ );
18208 goto decode_success;
18210 break;
18212 case 0x35:
18213 /* 66 0F 38 35 /r = PMOVZXDQ xmm1, xmm2/m64
18214 Packed Move with Zero Extend from DWord to QWord (XMM) */
18215 if (have66noF2noF3(pfx) && sz == 2) {
18216 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
18217 False/*!isAvx*/, True/*xIsZ*/ );
18218 goto decode_success;
18220 break;
18222 case 0x37:
18223 /* 66 0F 38 37 = PCMPGTQ
18224 64x2 comparison (signed, presumably; the Intel docs don't say :-)
18226 if (have66noF2noF3(pfx) && sz == 2) {
18227 /* FIXME: this needs an alignment check */
18228 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
18229 "pcmpgtq", Iop_CmpGT64Sx2, False );
18230 goto decode_success;
18232 break;
18234 case 0x38:
18235 case 0x3C:
18236 /* 66 0F 38 38 /r = PMINSB xmm1, xmm2/m128 8Sx16 (signed) min
18237 66 0F 38 3C /r = PMAXSB xmm1, xmm2/m128 8Sx16 (signed) max
18239 if (have66noF2noF3(pfx) && sz == 2) {
18240 /* FIXME: this needs an alignment check */
18241 Bool isMAX = opc == 0x3C;
18242 delta = dis_SSEint_E_to_G(
18243 vbi, pfx, delta,
18244 isMAX ? "pmaxsb" : "pminsb",
18245 isMAX ? Iop_Max8Sx16 : Iop_Min8Sx16,
18246 False
18248 goto decode_success;
18250 break;
18252 case 0x39:
18253 case 0x3D:
18254 /* 66 0F 38 39 /r = PMINSD xmm1, xmm2/m128
18255 Minimum of Packed Signed Double Word Integers (XMM)
18256 66 0F 38 3D /r = PMAXSD xmm1, xmm2/m128
18257 Maximum of Packed Signed Double Word Integers (XMM)
18259 if (have66noF2noF3(pfx) && sz == 2) {
18260 /* FIXME: this needs an alignment check */
18261 Bool isMAX = opc == 0x3D;
18262 delta = dis_SSEint_E_to_G(
18263 vbi, pfx, delta,
18264 isMAX ? "pmaxsd" : "pminsd",
18265 isMAX ? Iop_Max32Sx4 : Iop_Min32Sx4,
18266 False
18268 goto decode_success;
18270 break;
18272 case 0x3A:
18273 case 0x3E:
18274 /* 66 0F 38 3A /r = PMINUW xmm1, xmm2/m128
18275 Minimum of Packed Unsigned Word Integers (XMM)
18276 66 0F 38 3E /r = PMAXUW xmm1, xmm2/m128
18277 Maximum of Packed Unsigned Word Integers (XMM)
18279 if (have66noF2noF3(pfx) && sz == 2) {
18280 /* FIXME: this needs an alignment check */
18281 Bool isMAX = opc == 0x3E;
18282 delta = dis_SSEint_E_to_G(
18283 vbi, pfx, delta,
18284 isMAX ? "pmaxuw" : "pminuw",
18285 isMAX ? Iop_Max16Ux8 : Iop_Min16Ux8,
18286 False
18288 goto decode_success;
18290 break;
18292 case 0x3B:
18293 case 0x3F:
18294 /* 66 0F 38 3B /r = PMINUD xmm1, xmm2/m128
18295 Minimum of Packed Unsigned Doubleword Integers (XMM)
18296 66 0F 38 3F /r = PMAXUD xmm1, xmm2/m128
18297 Maximum of Packed Unsigned Doubleword Integers (XMM)
18299 if (have66noF2noF3(pfx) && sz == 2) {
18300 /* FIXME: this needs an alignment check */
18301 Bool isMAX = opc == 0x3F;
18302 delta = dis_SSEint_E_to_G(
18303 vbi, pfx, delta,
18304 isMAX ? "pmaxud" : "pminud",
18305 isMAX ? Iop_Max32Ux4 : Iop_Min32Ux4,
18306 False
18308 goto decode_success;
18310 break;
18312 case 0x40:
18313 /* 66 0F 38 40 /r = PMULLD xmm1, xmm2/m128
18314 32x4 integer multiply from xmm2/m128 to xmm1 */
18315 if (have66noF2noF3(pfx) && sz == 2) {
18317 modrm = getUChar(delta);
18319 IRTemp argL = newTemp(Ity_V128);
18320 IRTemp argR = newTemp(Ity_V128);
18322 if ( epartIsReg(modrm) ) {
18323 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) );
18324 delta += 1;
18325 DIP( "pmulld %s,%s\n",
18326 nameXMMReg( eregOfRexRM(pfx, modrm) ),
18327 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18328 } else {
18329 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
18330 gen_SIGNAL_if_not_16_aligned( vbi, addr );
18331 assign( argL, loadLE( Ity_V128, mkexpr(addr) ));
18332 delta += alen;
18333 DIP( "pmulld %s,%s\n",
18334 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18337 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) ));
18339 putXMMReg( gregOfRexRM(pfx, modrm),
18340 binop( Iop_Mul32x4, mkexpr(argL), mkexpr(argR)) );
18342 goto decode_success;
18344 break;
18346 case 0x41:
18347 /* 66 0F 38 41 /r = PHMINPOSUW xmm1, xmm2/m128
18348 Packed Horizontal Word Minimum from xmm2/m128 to xmm1 */
18349 if (have66noF2noF3(pfx) && sz == 2) {
18350 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, False/*!isAvx*/ );
18351 goto decode_success;
18353 break;
18355 case 0xDC:
18356 case 0xDD:
18357 case 0xDE:
18358 case 0xDF:
18359 case 0xDB:
18360 /* 66 0F 38 DC /r = AESENC xmm1, xmm2/m128
18361 DD /r = AESENCLAST xmm1, xmm2/m128
18362 DE /r = AESDEC xmm1, xmm2/m128
18363 DF /r = AESDECLAST xmm1, xmm2/m128
18365 DB /r = AESIMC xmm1, xmm2/m128 */
18366 if (have66noF2noF3(pfx) && sz == 2) {
18367 delta = dis_AESx( vbi, pfx, delta, False/*!isAvx*/, opc );
18368 goto decode_success;
18370 break;
18372 case 0xF0:
18373 case 0xF1:
18374 /* F2 0F 38 F0 /r = CRC32 r/m8, r32 (REX.W ok, 66 not ok)
18375 F2 0F 38 F1 /r = CRC32 r/m{16,32,64}, r32
18376 The decoding on this is a bit unusual.
18378 if (haveF2noF3(pfx)
18379 && (opc == 0xF1 || (opc == 0xF0 && !have66(pfx)))) {
18380 modrm = getUChar(delta);
18382 if (opc == 0xF0)
18383 sz = 1;
18384 else
18385 vassert(sz == 2 || sz == 4 || sz == 8);
18387 IRType tyE = szToITy(sz);
18388 IRTemp valE = newTemp(tyE);
18390 if (epartIsReg(modrm)) {
18391 assign(valE, getIRegE(sz, pfx, modrm));
18392 delta += 1;
18393 DIP("crc32b %s,%s\n", nameIRegE(sz, pfx, modrm),
18394 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm));
18395 } else {
18396 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
18397 assign(valE, loadLE(tyE, mkexpr(addr)));
18398 delta += alen;
18399 DIP("crc32b %s,%s\n", dis_buf,
18400 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm));
18403 /* Somewhat funny getting/putting of the crc32 value, in order
18404 to ensure that it turns into 64-bit gets and puts. However,
18405 mask off the upper 32 bits so as to not get memcheck false
18406 +ves around the helper call. */
18407 IRTemp valG0 = newTemp(Ity_I64);
18408 assign(valG0, binop(Iop_And64, getIRegG(8, pfx, modrm),
18409 mkU64(0xFFFFFFFF)));
18411 const HChar* nm = NULL;
18412 void* fn = NULL;
18413 switch (sz) {
18414 case 1: nm = "amd64g_calc_crc32b";
18415 fn = &amd64g_calc_crc32b; break;
18416 case 2: nm = "amd64g_calc_crc32w";
18417 fn = &amd64g_calc_crc32w; break;
18418 case 4: nm = "amd64g_calc_crc32l";
18419 fn = &amd64g_calc_crc32l; break;
18420 case 8: nm = "amd64g_calc_crc32q";
18421 fn = &amd64g_calc_crc32q; break;
18423 vassert(nm && fn);
18424 IRTemp valG1 = newTemp(Ity_I64);
18425 assign(valG1,
18426 mkIRExprCCall(Ity_I64, 0/*regparm*/, nm, fn,
18427 mkIRExprVec_2(mkexpr(valG0),
18428 widenUto64(mkexpr(valE)))));
18430 putIRegG(4, pfx, modrm, unop(Iop_64to32, mkexpr(valG1)));
18431 goto decode_success;
18433 break;
18435 default:
18436 break;
18440 //decode_failure:
18441 *decode_OK = False;
18442 return deltaIN;
18444 decode_success:
18445 *decode_OK = True;
18446 return delta;
18450 /*------------------------------------------------------------*/
18451 /*--- ---*/
18452 /*--- Top-level SSE4: dis_ESC_0F3A__SSE4 ---*/
18453 /*--- ---*/
18454 /*------------------------------------------------------------*/
18456 static Long dis_PEXTRW ( const VexAbiInfo* vbi, Prefix pfx,
18457 Long delta, Bool isAvx )
18459 IRTemp addr = IRTemp_INVALID;
18460 IRTemp t0 = IRTemp_INVALID;
18461 IRTemp t1 = IRTemp_INVALID;
18462 IRTemp t2 = IRTemp_INVALID;
18463 IRTemp t3 = IRTemp_INVALID;
18464 UChar modrm = getUChar(delta);
18465 Int alen = 0;
18466 HChar dis_buf[50];
18467 UInt rG = gregOfRexRM(pfx,modrm);
18468 Int imm8_20;
18469 IRTemp xmm_vec = newTemp(Ity_V128);
18470 IRTemp d16 = newTemp(Ity_I16);
18471 const HChar* mbV = isAvx ? "v" : "";
18473 vassert(0==getRexW(pfx)); /* ensured by caller */
18474 assign( xmm_vec, getXMMReg(rG) );
18475 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
18477 if ( epartIsReg( modrm ) ) {
18478 imm8_20 = (Int)(getUChar(delta+1) & 7);
18479 } else {
18480 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18481 imm8_20 = (Int)(getUChar(delta+alen) & 7);
18484 switch (imm8_20) {
18485 case 0: assign(d16, unop(Iop_32to16, mkexpr(t0))); break;
18486 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(t0))); break;
18487 case 2: assign(d16, unop(Iop_32to16, mkexpr(t1))); break;
18488 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(t1))); break;
18489 case 4: assign(d16, unop(Iop_32to16, mkexpr(t2))); break;
18490 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(t2))); break;
18491 case 6: assign(d16, unop(Iop_32to16, mkexpr(t3))); break;
18492 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(t3))); break;
18493 default: vassert(0);
18496 if ( epartIsReg( modrm ) ) {
18497 UInt rE = eregOfRexRM(pfx,modrm);
18498 putIReg32( rE, unop(Iop_16Uto32, mkexpr(d16)) );
18499 delta += 1+1;
18500 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20,
18501 nameXMMReg( rG ), nameIReg32( rE ) );
18502 } else {
18503 storeLE( mkexpr(addr), mkexpr(d16) );
18504 delta += alen+1;
18505 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20, nameXMMReg( rG ), dis_buf );
18507 return delta;
18511 static Long dis_PEXTRD ( const VexAbiInfo* vbi, Prefix pfx,
18512 Long delta, Bool isAvx )
18514 IRTemp addr = IRTemp_INVALID;
18515 IRTemp t0 = IRTemp_INVALID;
18516 IRTemp t1 = IRTemp_INVALID;
18517 IRTemp t2 = IRTemp_INVALID;
18518 IRTemp t3 = IRTemp_INVALID;
18519 UChar modrm = 0;
18520 Int alen = 0;
18521 HChar dis_buf[50];
18523 Int imm8_10;
18524 IRTemp xmm_vec = newTemp(Ity_V128);
18525 IRTemp src_dword = newTemp(Ity_I32);
18526 const HChar* mbV = isAvx ? "v" : "";
18528 vassert(0==getRexW(pfx)); /* ensured by caller */
18529 modrm = getUChar(delta);
18530 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
18531 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
18533 if ( epartIsReg( modrm ) ) {
18534 imm8_10 = (Int)(getUChar(delta+1) & 3);
18535 } else {
18536 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18537 imm8_10 = (Int)(getUChar(delta+alen) & 3);
18540 switch ( imm8_10 ) {
18541 case 0: assign( src_dword, mkexpr(t0) ); break;
18542 case 1: assign( src_dword, mkexpr(t1) ); break;
18543 case 2: assign( src_dword, mkexpr(t2) ); break;
18544 case 3: assign( src_dword, mkexpr(t3) ); break;
18545 default: vassert(0);
18548 if ( epartIsReg( modrm ) ) {
18549 putIReg32( eregOfRexRM(pfx,modrm), mkexpr(src_dword) );
18550 delta += 1+1;
18551 DIP( "%spextrd $%d, %s,%s\n", mbV, imm8_10,
18552 nameXMMReg( gregOfRexRM(pfx, modrm) ),
18553 nameIReg32( eregOfRexRM(pfx, modrm) ) );
18554 } else {
18555 storeLE( mkexpr(addr), mkexpr(src_dword) );
18556 delta += alen+1;
18557 DIP( "%spextrd $%d, %s,%s\n", mbV,
18558 imm8_10, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
18560 return delta;
18564 static Long dis_PEXTRQ ( const VexAbiInfo* vbi, Prefix pfx,
18565 Long delta, Bool isAvx )
18567 IRTemp addr = IRTemp_INVALID;
18568 UChar modrm = 0;
18569 Int alen = 0;
18570 HChar dis_buf[50];
18572 Int imm8_0;
18573 IRTemp xmm_vec = newTemp(Ity_V128);
18574 IRTemp src_qword = newTemp(Ity_I64);
18575 const HChar* mbV = isAvx ? "v" : "";
18577 vassert(1==getRexW(pfx)); /* ensured by caller */
18578 modrm = getUChar(delta);
18579 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
18581 if ( epartIsReg( modrm ) ) {
18582 imm8_0 = (Int)(getUChar(delta+1) & 1);
18583 } else {
18584 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18585 imm8_0 = (Int)(getUChar(delta+alen) & 1);
18588 switch ( imm8_0 ) {
18589 case 0: assign( src_qword, unop(Iop_V128to64, mkexpr(xmm_vec)) );
18590 break;
18591 case 1: assign( src_qword, unop(Iop_V128HIto64, mkexpr(xmm_vec)) );
18592 break;
18593 default: vassert(0);
18596 if ( epartIsReg( modrm ) ) {
18597 putIReg64( eregOfRexRM(pfx,modrm), mkexpr(src_qword) );
18598 delta += 1+1;
18599 DIP( "%spextrq $%d, %s,%s\n", mbV, imm8_0,
18600 nameXMMReg( gregOfRexRM(pfx, modrm) ),
18601 nameIReg64( eregOfRexRM(pfx, modrm) ) );
18602 } else {
18603 storeLE( mkexpr(addr), mkexpr(src_qword) );
18604 delta += alen+1;
18605 DIP( "%spextrq $%d, %s,%s\n", mbV,
18606 imm8_0, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
18608 return delta;
18611 static IRExpr* math_CTZ32(IRExpr *exp)
18613 /* Iop_Ctz32 isn't implemented by the amd64 back end, so use Iop_Ctz64. */
18614 return unop(Iop_64to32, unop(Iop_Ctz64, unop(Iop_32Uto64, exp)));
18617 static Long dis_PCMPISTRI_3A ( UChar modrm, UInt regNoL, UInt regNoR,
18618 Long delta, UChar opc, UChar imm,
18619 HChar dis_buf[])
18621 /* We only handle PCMPISTRI for now */
18622 vassert((opc & 0x03) == 0x03);
18623 /* And only an immediate byte of 0x38 or 0x3A */
18624 vassert((imm & ~0x02) == 0x38);
18626 /* FIXME: Is this correct when RegNoL == 16 ? */
18627 IRTemp argL = newTemp(Ity_V128);
18628 assign(argL, getXMMReg(regNoL));
18629 IRTemp argR = newTemp(Ity_V128);
18630 assign(argR, getXMMReg(regNoR));
18632 IRTemp zmaskL = newTemp(Ity_I32);
18633 assign(zmaskL, unop(Iop_16Uto32,
18634 unop(Iop_GetMSBs8x16,
18635 binop(Iop_CmpEQ8x16, mkexpr(argL), mkV128(0)))));
18636 IRTemp zmaskR = newTemp(Ity_I32);
18637 assign(zmaskR, unop(Iop_16Uto32,
18638 unop(Iop_GetMSBs8x16,
18639 binop(Iop_CmpEQ8x16, mkexpr(argR), mkV128(0)))));
18641 /* We want validL = ~(zmaskL | -zmaskL)
18643 But this formulation kills memcheck's validity tracking when any
18644 bits above the first "1" are invalid. So reformulate as:
18646 validL = (zmaskL ? (1 << ctz(zmaskL)) : 0) - 1
18649 IRExpr *ctzL = unop(Iop_32to8, math_CTZ32(mkexpr(zmaskL)));
18651 /* Generate a bool expression which is zero iff the original is
18652 zero. Do this carefully so memcheck can propagate validity bits
18653 correctly.
18655 IRTemp zmaskL_zero = newTemp(Ity_I1);
18656 assign(zmaskL_zero, binop(Iop_ExpCmpNE32, mkexpr(zmaskL), mkU32(0)));
18658 IRTemp validL = newTemp(Ity_I32);
18659 assign(validL, binop(Iop_Sub32,
18660 IRExpr_ITE(mkexpr(zmaskL_zero),
18661 binop(Iop_Shl32, mkU32(1), ctzL),
18662 mkU32(0)),
18663 mkU32(1)));
18665 /* And similarly for validR. */
18666 IRExpr *ctzR = unop(Iop_32to8, math_CTZ32(mkexpr(zmaskR)));
18667 IRTemp zmaskR_zero = newTemp(Ity_I1);
18668 assign(zmaskR_zero, binop(Iop_ExpCmpNE32, mkexpr(zmaskR), mkU32(0)));
18669 IRTemp validR = newTemp(Ity_I32);
18670 assign(validR, binop(Iop_Sub32,
18671 IRExpr_ITE(mkexpr(zmaskR_zero),
18672 binop(Iop_Shl32, mkU32(1), ctzR),
18673 mkU32(0)),
18674 mkU32(1)));
18676 /* Do the actual comparison. */
18677 IRExpr *boolResII = unop(Iop_16Uto32,
18678 unop(Iop_GetMSBs8x16,
18679 binop(Iop_CmpEQ8x16, mkexpr(argL),
18680 mkexpr(argR))));
18682 /* Compute boolresII & validL & validR (i.e., if both valid, use
18683 comparison result) */
18684 IRExpr *intRes1_a = binop(Iop_And32, boolResII,
18685 binop(Iop_And32,
18686 mkexpr(validL), mkexpr(validR)));
18688 /* Compute ~(validL | validR); i.e., if both invalid, force 1. */
18689 IRExpr *intRes1_b = unop(Iop_Not32, binop(Iop_Or32,
18690 mkexpr(validL), mkexpr(validR)));
18691 /* Otherwise, zero. */
18692 IRExpr *intRes1 = binop(Iop_And32, mkU32(0xFFFF),
18693 binop(Iop_Or32, intRes1_a, intRes1_b));
18695 /* The "0x30" in imm=0x3A means "polarity=3" means XOR validL with
18696 result. */
18697 IRTemp intRes2 = newTemp(Ity_I32);
18698 assign(intRes2, binop(Iop_And32, mkU32(0xFFFF),
18699 binop(Iop_Xor32, intRes1, mkexpr(validL))));
18701 /* If the 0x40 bit were set in imm=0x3A, we would return the index
18702 of the msb. Since it is clear, we return the index of the
18703 lsb. */
18704 IRExpr *newECX = math_CTZ32(binop(Iop_Or32,
18705 mkexpr(intRes2), mkU32(0x10000)));
18707 /* And thats our rcx. */
18708 putIReg32(R_RCX, newECX);
18710 /* Now for the condition codes... */
18712 /* C == 0 iff intRes2 == 0 */
18713 IRExpr *c_bit = IRExpr_ITE( binop(Iop_ExpCmpNE32, mkexpr(intRes2),
18714 mkU32(0)),
18715 mkU32(1 << AMD64G_CC_SHIFT_C),
18716 mkU32(0));
18717 /* Z == 1 iff any in argL is 0 */
18718 IRExpr *z_bit = IRExpr_ITE( mkexpr(zmaskL_zero),
18719 mkU32(1 << AMD64G_CC_SHIFT_Z),
18720 mkU32(0));
18721 /* S == 1 iff any in argR is 0 */
18722 IRExpr *s_bit = IRExpr_ITE( mkexpr(zmaskR_zero),
18723 mkU32(1 << AMD64G_CC_SHIFT_S),
18724 mkU32(0));
18725 /* O == IntRes2[0] */
18726 IRExpr *o_bit = binop(Iop_Shl32, binop(Iop_And32, mkexpr(intRes2),
18727 mkU32(0x01)),
18728 mkU8(AMD64G_CC_SHIFT_O));
18730 /* Put them all together */
18731 IRTemp cc = newTemp(Ity_I64);
18732 assign(cc, widenUto64(binop(Iop_Or32,
18733 binop(Iop_Or32, c_bit, z_bit),
18734 binop(Iop_Or32, s_bit, o_bit))));
18735 stmt(IRStmt_Put(OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY)));
18736 stmt(IRStmt_Put(OFFB_CC_DEP1, mkexpr(cc)));
18737 stmt(IRStmt_Put(OFFB_CC_DEP2, mkU64(0)));
18738 stmt(IRStmt_Put(OFFB_CC_NDEP, mkU64(0)));
18740 return delta;
18743 /* This can fail, in which case it returns the original (unchanged)
18744 delta. */
18745 static Long dis_PCMPxSTRx ( const VexAbiInfo* vbi, Prefix pfx,
18746 Long delta, Bool isAvx, UChar opc )
18748 Long delta0 = delta;
18749 UInt isISTRx = opc & 2;
18750 UInt isxSTRM = (opc & 1) ^ 1;
18751 UInt regNoL = 0;
18752 UInt regNoR = 0;
18753 UChar imm = 0;
18754 IRTemp addr = IRTemp_INVALID;
18755 Int alen = 0;
18756 HChar dis_buf[50];
18758 /* This is a nasty kludge. We need to pass 2 x V128 to the helper
18759 (which is clean). Since we can't do that, use a dirty helper to
18760 compute the results directly from the XMM regs in the guest
18761 state. That means for the memory case, we need to move the left
18762 operand into a pseudo-register (XMM16, let's call it). */
18763 UChar modrm = getUChar(delta);
18764 if (epartIsReg(modrm)) {
18765 regNoL = eregOfRexRM(pfx, modrm);
18766 regNoR = gregOfRexRM(pfx, modrm);
18767 imm = getUChar(delta+1);
18768 delta += 1+1;
18769 } else {
18770 regNoL = 16; /* use XMM16 as an intermediary */
18771 regNoR = gregOfRexRM(pfx, modrm);
18772 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18773 /* No alignment check; I guess that makes sense, given that
18774 these insns are for dealing with C style strings. */
18775 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
18776 imm = getUChar(delta+alen);
18777 delta += alen+1;
18780 /* Print the insn here, since dis_PCMPISTRI_3A doesn't do so
18781 itself. */
18782 if (regNoL == 16) {
18783 DIP("%spcmp%cstr%c $%x,%s,%s\n",
18784 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i',
18785 (UInt)imm, dis_buf, nameXMMReg(regNoR));
18786 } else {
18787 DIP("%spcmp%cstr%c $%x,%s,%s\n",
18788 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i',
18789 (UInt)imm, nameXMMReg(regNoL), nameXMMReg(regNoR));
18792 /* Handle special case(s). */
18793 if (imm == 0x3A && isISTRx && !isxSTRM) {
18794 return dis_PCMPISTRI_3A ( modrm, regNoL, regNoR, delta,
18795 opc, imm, dis_buf);
18798 /* Now we know the XMM reg numbers for the operands, and the
18799 immediate byte. Is it one we can actually handle? Throw out any
18800 cases for which the helper function has not been verified. */
18801 switch (imm) {
18802 case 0x00: case 0x02:
18803 case 0x08: case 0x0A: case 0x0C: case 0x0E:
18804 case 0x10: case 0x12: case 0x14:
18805 case 0x18: case 0x1A:
18806 case 0x30: case 0x34:
18807 case 0x38: case 0x3A:
18808 case 0x40: case 0x42: case 0x44: case 0x46:
18809 case 0x4A:
18810 case 0x62:
18811 case 0x70: case 0x72:
18812 break;
18813 // the 16-bit character versions of the above
18814 case 0x01: case 0x03:
18815 case 0x09: case 0x0B: case 0x0D:
18816 case 0x13:
18817 case 0x19: case 0x1B:
18818 case 0x39: case 0x3B:
18819 case 0x41: case 0x45:
18820 case 0x4B:
18821 break;
18822 default:
18823 return delta0; /*FAIL*/
18826 /* Who ya gonna call? Presumably not Ghostbusters. */
18827 void* fn = &amd64g_dirtyhelper_PCMPxSTRx;
18828 const HChar* nm = "amd64g_dirtyhelper_PCMPxSTRx";
18830 /* Round up the arguments. Note that this is a kludge -- the use
18831 of mkU64 rather than mkIRExpr_HWord implies the assumption that
18832 the host's word size is 64-bit. */
18833 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
18834 UInt gstOffR = ymmGuestRegOffset(regNoR);
18836 IRExpr* opc4_and_imm = mkU64((opc << 8) | (imm & 0xFF));
18837 IRExpr* gstOffLe = mkU64(gstOffL);
18838 IRExpr* gstOffRe = mkU64(gstOffR);
18839 IRExpr* edxIN = isISTRx ? mkU64(0) : getIRegRDX(8);
18840 IRExpr* eaxIN = isISTRx ? mkU64(0) : getIRegRAX(8);
18841 IRExpr** args
18842 = mkIRExprVec_6( IRExpr_GSPTR(),
18843 opc4_and_imm, gstOffLe, gstOffRe, edxIN, eaxIN );
18845 IRTemp resT = newTemp(Ity_I64);
18846 IRDirty* d = unsafeIRDirty_1_N( resT, 0/*regparms*/, nm, fn, args );
18847 /* It's not really a dirty call, but we can't use the clean helper
18848 mechanism here for the very lame reason that we can't pass 2 x
18849 V128s by value to a helper. Hence this roundabout scheme. */
18850 d->nFxState = 2;
18851 vex_bzero(&d->fxState, sizeof(d->fxState));
18852 d->fxState[0].fx = Ifx_Read;
18853 d->fxState[0].offset = gstOffL;
18854 d->fxState[0].size = sizeof(U128);
18855 d->fxState[1].fx = Ifx_Read;
18856 d->fxState[1].offset = gstOffR;
18857 d->fxState[1].size = sizeof(U128);
18858 if (isxSTRM) {
18859 /* Declare that the helper writes XMM0. */
18860 d->nFxState = 3;
18861 d->fxState[2].fx = Ifx_Write;
18862 d->fxState[2].offset = ymmGuestRegOffset(0);
18863 d->fxState[2].size = sizeof(U128);
18866 stmt( IRStmt_Dirty(d) );
18868 /* Now resT[15:0] holds the new OSZACP values, so the condition
18869 codes must be updated. And for a xSTRI case, resT[31:16] holds
18870 the new ECX value, so stash that too. */
18871 if (!isxSTRM) {
18872 putIReg64(R_RCX, binop(Iop_And64,
18873 binop(Iop_Shr64, mkexpr(resT), mkU8(16)),
18874 mkU64(0xFFFF)));
18877 /* Zap the upper half of the dest reg as per AVX conventions. */
18878 if (isxSTRM && isAvx)
18879 putYMMRegLane128(/*YMM*/0, 1, mkV128(0));
18881 stmt( IRStmt_Put(
18882 OFFB_CC_DEP1,
18883 binop(Iop_And64, mkexpr(resT), mkU64(0xFFFF))
18885 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
18886 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
18887 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
18889 return delta;
18893 static IRTemp math_PINSRB_128 ( IRTemp v128, IRTemp u8, UInt imm8 )
18895 vassert(imm8 >= 0 && imm8 <= 15);
18897 // Create a V128 value which has the selected byte in the
18898 // specified lane, and zeroes everywhere else.
18899 IRTemp tmp128 = newTemp(Ity_V128);
18900 IRTemp halfshift = newTemp(Ity_I64);
18901 assign(halfshift, binop(Iop_Shl64,
18902 unop(Iop_8Uto64, mkexpr(u8)),
18903 mkU8(8 * (imm8 & 7))));
18904 if (imm8 < 8) {
18905 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift)));
18906 } else {
18907 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0)));
18910 UShort mask = ~(1 << imm8);
18911 IRTemp res = newTemp(Ity_V128);
18912 assign( res, binop(Iop_OrV128,
18913 mkexpr(tmp128),
18914 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) );
18915 return res;
18919 static IRTemp math_PINSRD_128 ( IRTemp v128, IRTemp u32, UInt imm8 )
18921 IRTemp z32 = newTemp(Ity_I32);
18922 assign(z32, mkU32(0));
18924 /* Surround u32 with zeroes as per imm, giving us something we can
18925 OR into a suitably masked-out v128.*/
18926 IRTemp withZs = newTemp(Ity_V128);
18927 UShort mask = 0;
18928 switch (imm8) {
18929 case 3: mask = 0x0FFF;
18930 assign(withZs, mkV128from32s(u32, z32, z32, z32));
18931 break;
18932 case 2: mask = 0xF0FF;
18933 assign(withZs, mkV128from32s(z32, u32, z32, z32));
18934 break;
18935 case 1: mask = 0xFF0F;
18936 assign(withZs, mkV128from32s(z32, z32, u32, z32));
18937 break;
18938 case 0: mask = 0xFFF0;
18939 assign(withZs, mkV128from32s(z32, z32, z32, u32));
18940 break;
18941 default: vassert(0);
18944 IRTemp res = newTemp(Ity_V128);
18945 assign(res, binop( Iop_OrV128,
18946 mkexpr(withZs),
18947 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) );
18948 return res;
18952 static IRTemp math_PINSRQ_128 ( IRTemp v128, IRTemp u64, UInt imm8 )
18954 /* Surround u64 with zeroes as per imm, giving us something we can
18955 OR into a suitably masked-out v128.*/
18956 IRTemp withZs = newTemp(Ity_V128);
18957 UShort mask = 0;
18958 if (imm8 == 0) {
18959 mask = 0xFF00;
18960 assign(withZs, binop(Iop_64HLtoV128, mkU64(0), mkexpr(u64)));
18961 } else {
18962 vassert(imm8 == 1);
18963 mask = 0x00FF;
18964 assign( withZs, binop(Iop_64HLtoV128, mkexpr(u64), mkU64(0)));
18967 IRTemp res = newTemp(Ity_V128);
18968 assign( res, binop( Iop_OrV128,
18969 mkexpr(withZs),
18970 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) );
18971 return res;
18975 static IRTemp math_INSERTPS ( IRTemp dstV, IRTemp toInsertD, UInt imm8 )
18977 const IRTemp inval = IRTemp_INVALID;
18978 IRTemp dstDs[4] = { inval, inval, inval, inval };
18979 breakupV128to32s( dstV, &dstDs[3], &dstDs[2], &dstDs[1], &dstDs[0] );
18981 vassert(imm8 <= 255);
18982 dstDs[(imm8 >> 4) & 3] = toInsertD; /* "imm8_count_d" */
18984 UInt imm8_zmask = (imm8 & 15);
18985 IRTemp zero_32 = newTemp(Ity_I32);
18986 assign( zero_32, mkU32(0) );
18987 IRTemp resV = newTemp(Ity_V128);
18988 assign( resV, mkV128from32s(
18989 ((imm8_zmask & 8) == 8) ? zero_32 : dstDs[3],
18990 ((imm8_zmask & 4) == 4) ? zero_32 : dstDs[2],
18991 ((imm8_zmask & 2) == 2) ? zero_32 : dstDs[1],
18992 ((imm8_zmask & 1) == 1) ? zero_32 : dstDs[0]) );
18993 return resV;
18997 static Long dis_PEXTRB_128_GtoE ( const VexAbiInfo* vbi, Prefix pfx,
18998 Long delta, Bool isAvx )
19000 IRTemp addr = IRTemp_INVALID;
19001 Int alen = 0;
19002 HChar dis_buf[50];
19003 IRTemp xmm_vec = newTemp(Ity_V128);
19004 IRTemp sel_lane = newTemp(Ity_I32);
19005 IRTemp shr_lane = newTemp(Ity_I32);
19006 const HChar* mbV = isAvx ? "v" : "";
19007 UChar modrm = getUChar(delta);
19008 IRTemp t3, t2, t1, t0;
19009 Int imm8;
19010 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
19011 t3 = t2 = t1 = t0 = IRTemp_INVALID;
19012 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
19014 if ( epartIsReg( modrm ) ) {
19015 imm8 = (Int)getUChar(delta+1);
19016 } else {
19017 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19018 imm8 = (Int)getUChar(delta+alen);
19020 switch ( (imm8 >> 2) & 3 ) {
19021 case 0: assign( sel_lane, mkexpr(t0) ); break;
19022 case 1: assign( sel_lane, mkexpr(t1) ); break;
19023 case 2: assign( sel_lane, mkexpr(t2) ); break;
19024 case 3: assign( sel_lane, mkexpr(t3) ); break;
19025 default: vassert(0);
19027 assign( shr_lane,
19028 binop( Iop_Shr32, mkexpr(sel_lane), mkU8(((imm8 & 3)*8)) ) );
19030 if ( epartIsReg( modrm ) ) {
19031 putIReg64( eregOfRexRM(pfx,modrm),
19032 unop( Iop_32Uto64,
19033 binop(Iop_And32, mkexpr(shr_lane), mkU32(255)) ) );
19034 delta += 1+1;
19035 DIP( "%spextrb $%d, %s,%s\n", mbV, imm8,
19036 nameXMMReg( gregOfRexRM(pfx, modrm) ),
19037 nameIReg64( eregOfRexRM(pfx, modrm) ) );
19038 } else {
19039 storeLE( mkexpr(addr), unop(Iop_32to8, mkexpr(shr_lane) ) );
19040 delta += alen+1;
19041 DIP( "%spextrb $%d,%s,%s\n", mbV,
19042 imm8, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
19045 return delta;
19049 static IRTemp math_DPPD_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 )
19051 vassert(imm8 < 256);
19052 UShort imm8_perms[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF };
19053 IRTemp and_vec = newTemp(Ity_V128);
19054 IRTemp sum_vec = newTemp(Ity_V128);
19055 IRTemp rm = newTemp(Ity_I32);
19056 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
19057 assign( and_vec, binop( Iop_AndV128,
19058 triop( Iop_Mul64Fx2,
19059 mkexpr(rm),
19060 mkexpr(dst_vec), mkexpr(src_vec) ),
19061 mkV128( imm8_perms[ ((imm8 >> 4) & 3) ] ) ) );
19063 assign( sum_vec, binop( Iop_Add64F0x2,
19064 binop( Iop_InterleaveHI64x2,
19065 mkexpr(and_vec), mkexpr(and_vec) ),
19066 binop( Iop_InterleaveLO64x2,
19067 mkexpr(and_vec), mkexpr(and_vec) ) ) );
19068 IRTemp res = newTemp(Ity_V128);
19069 assign(res, binop( Iop_AndV128,
19070 binop( Iop_InterleaveLO64x2,
19071 mkexpr(sum_vec), mkexpr(sum_vec) ),
19072 mkV128( imm8_perms[ (imm8 & 3) ] ) ) );
19073 return res;
19077 static IRTemp math_DPPS_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 )
19079 vassert(imm8 < 256);
19080 IRTemp tmp_prod_vec = newTemp(Ity_V128);
19081 IRTemp prod_vec = newTemp(Ity_V128);
19082 IRTemp sum_vec = newTemp(Ity_V128);
19083 IRTemp rm = newTemp(Ity_I32);
19084 IRTemp v3, v2, v1, v0;
19085 v3 = v2 = v1 = v0 = IRTemp_INVALID;
19086 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
19087 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
19088 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
19089 0xFFFF };
19091 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
19092 assign( tmp_prod_vec,
19093 binop( Iop_AndV128,
19094 triop( Iop_Mul32Fx4,
19095 mkexpr(rm), mkexpr(dst_vec), mkexpr(src_vec) ),
19096 mkV128( imm8_perms[((imm8 >> 4)& 15)] ) ) );
19097 breakupV128to32s( tmp_prod_vec, &v3, &v2, &v1, &v0 );
19098 assign( prod_vec, mkV128from32s( v3, v1, v2, v0 ) );
19100 assign( sum_vec, triop( Iop_Add32Fx4,
19101 mkexpr(rm),
19102 binop( Iop_InterleaveHI32x4,
19103 mkexpr(prod_vec), mkexpr(prod_vec) ),
19104 binop( Iop_InterleaveLO32x4,
19105 mkexpr(prod_vec), mkexpr(prod_vec) ) ) );
19107 IRTemp res = newTemp(Ity_V128);
19108 assign( res, binop( Iop_AndV128,
19109 triop( Iop_Add32Fx4,
19110 mkexpr(rm),
19111 binop( Iop_InterleaveHI32x4,
19112 mkexpr(sum_vec), mkexpr(sum_vec) ),
19113 binop( Iop_InterleaveLO32x4,
19114 mkexpr(sum_vec), mkexpr(sum_vec) ) ),
19115 mkV128( imm8_perms[ (imm8 & 15) ] ) ) );
19116 return res;
19120 static IRTemp math_MPSADBW_128 ( IRTemp dst_vec, IRTemp src_vec, UInt imm8 )
19122 /* Mask out bits of the operands we don't need. This isn't
19123 strictly necessary, but it does ensure Memcheck doesn't
19124 give us any false uninitialised value errors as a
19125 result. */
19126 UShort src_mask[4] = { 0x000F, 0x00F0, 0x0F00, 0xF000 };
19127 UShort dst_mask[2] = { 0x07FF, 0x7FF0 };
19129 IRTemp src_maskV = newTemp(Ity_V128);
19130 IRTemp dst_maskV = newTemp(Ity_V128);
19131 assign(src_maskV, mkV128( src_mask[ imm8 & 3 ] ));
19132 assign(dst_maskV, mkV128( dst_mask[ (imm8 >> 2) & 1 ] ));
19134 IRTemp src_masked = newTemp(Ity_V128);
19135 IRTemp dst_masked = newTemp(Ity_V128);
19136 assign(src_masked, binop(Iop_AndV128, mkexpr(src_vec), mkexpr(src_maskV)));
19137 assign(dst_masked, binop(Iop_AndV128, mkexpr(dst_vec), mkexpr(dst_maskV)));
19139 /* Generate 4 64 bit values that we can hand to a clean helper */
19140 IRTemp sHi = newTemp(Ity_I64);
19141 IRTemp sLo = newTemp(Ity_I64);
19142 assign( sHi, unop(Iop_V128HIto64, mkexpr(src_masked)) );
19143 assign( sLo, unop(Iop_V128to64, mkexpr(src_masked)) );
19145 IRTemp dHi = newTemp(Ity_I64);
19146 IRTemp dLo = newTemp(Ity_I64);
19147 assign( dHi, unop(Iop_V128HIto64, mkexpr(dst_masked)) );
19148 assign( dLo, unop(Iop_V128to64, mkexpr(dst_masked)) );
19150 /* Compute halves of the result separately */
19151 IRTemp resHi = newTemp(Ity_I64);
19152 IRTemp resLo = newTemp(Ity_I64);
19154 IRExpr** argsHi
19155 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo),
19156 mkU64( 0x80 | (imm8 & 7) ));
19157 IRExpr** argsLo
19158 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo),
19159 mkU64( 0x00 | (imm8 & 7) ));
19161 assign(resHi, mkIRExprCCall( Ity_I64, 0/*regparm*/,
19162 "amd64g_calc_mpsadbw",
19163 &amd64g_calc_mpsadbw, argsHi ));
19164 assign(resLo, mkIRExprCCall( Ity_I64, 0/*regparm*/,
19165 "amd64g_calc_mpsadbw",
19166 &amd64g_calc_mpsadbw, argsLo ));
19168 IRTemp res = newTemp(Ity_V128);
19169 assign(res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo)));
19170 return res;
19173 static Long dis_EXTRACTPS ( const VexAbiInfo* vbi, Prefix pfx,
19174 Long delta, Bool isAvx )
19176 IRTemp addr = IRTemp_INVALID;
19177 Int alen = 0;
19178 HChar dis_buf[50];
19179 UChar modrm = getUChar(delta);
19180 Int imm8_10;
19181 IRTemp xmm_vec = newTemp(Ity_V128);
19182 IRTemp src_dword = newTemp(Ity_I32);
19183 UInt rG = gregOfRexRM(pfx,modrm);
19184 IRTemp t3, t2, t1, t0;
19185 t3 = t2 = t1 = t0 = IRTemp_INVALID;
19187 assign( xmm_vec, getXMMReg( rG ) );
19188 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
19190 if ( epartIsReg( modrm ) ) {
19191 imm8_10 = (Int)(getUChar(delta+1) & 3);
19192 } else {
19193 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19194 imm8_10 = (Int)(getUChar(delta+alen) & 3);
19197 switch ( imm8_10 ) {
19198 case 0: assign( src_dword, mkexpr(t0) ); break;
19199 case 1: assign( src_dword, mkexpr(t1) ); break;
19200 case 2: assign( src_dword, mkexpr(t2) ); break;
19201 case 3: assign( src_dword, mkexpr(t3) ); break;
19202 default: vassert(0);
19205 if ( epartIsReg( modrm ) ) {
19206 UInt rE = eregOfRexRM(pfx,modrm);
19207 putIReg32( rE, mkexpr(src_dword) );
19208 delta += 1+1;
19209 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10,
19210 nameXMMReg( rG ), nameIReg32( rE ) );
19211 } else {
19212 storeLE( mkexpr(addr), mkexpr(src_dword) );
19213 delta += alen+1;
19214 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10,
19215 nameXMMReg( rG ), dis_buf );
19218 return delta;
19222 static IRTemp math_PCLMULQDQ( IRTemp dV, IRTemp sV, UInt imm8 )
19224 IRTemp t0 = newTemp(Ity_I64);
19225 IRTemp t1 = newTemp(Ity_I64);
19226 assign(t0, unop((imm8&1)? Iop_V128HIto64 : Iop_V128to64,
19227 mkexpr(dV)));
19228 assign(t1, unop((imm8&16) ? Iop_V128HIto64 : Iop_V128to64,
19229 mkexpr(sV)));
19231 IRTemp t2 = newTemp(Ity_I64);
19232 IRTemp t3 = newTemp(Ity_I64);
19234 IRExpr** args;
19236 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(0));
19237 assign(t2, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul",
19238 &amd64g_calculate_pclmul, args));
19239 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(1));
19240 assign(t3, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul",
19241 &amd64g_calculate_pclmul, args));
19243 IRTemp res = newTemp(Ity_V128);
19244 assign(res, binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)));
19245 return res;
19249 __attribute__((noinline))
19250 static
19251 Long dis_ESC_0F3A__SSE4 ( Bool* decode_OK,
19252 const VexAbiInfo* vbi,
19253 Prefix pfx, Int sz, Long deltaIN )
19255 IRTemp addr = IRTemp_INVALID;
19256 UChar modrm = 0;
19257 Int alen = 0;
19258 HChar dis_buf[50];
19260 *decode_OK = False;
19262 Long delta = deltaIN;
19263 UChar opc = getUChar(delta);
19264 delta++;
19265 switch (opc) {
19267 case 0x08:
19268 /* 66 0F 3A 08 /r ib = ROUNDPS imm8, xmm2/m128, xmm1 */
19269 if (have66noF2noF3(pfx) && sz == 2) {
19271 IRTemp src0 = newTemp(Ity_F32);
19272 IRTemp src1 = newTemp(Ity_F32);
19273 IRTemp src2 = newTemp(Ity_F32);
19274 IRTemp src3 = newTemp(Ity_F32);
19275 IRTemp res0 = newTemp(Ity_F32);
19276 IRTemp res1 = newTemp(Ity_F32);
19277 IRTemp res2 = newTemp(Ity_F32);
19278 IRTemp res3 = newTemp(Ity_F32);
19279 IRTemp rm = newTemp(Ity_I32);
19280 Int imm = 0;
19282 modrm = getUChar(delta);
19284 if (epartIsReg(modrm)) {
19285 assign( src0,
19286 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
19287 assign( src1,
19288 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 1 ) );
19289 assign( src2,
19290 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 2 ) );
19291 assign( src3,
19292 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 3 ) );
19293 imm = getUChar(delta+1);
19294 if (imm & ~15) goto decode_failure;
19295 delta += 1+1;
19296 DIP( "roundps $%d,%s,%s\n",
19297 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
19298 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19299 } else {
19300 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19301 gen_SIGNAL_if_not_16_aligned(vbi, addr);
19302 assign( src0, loadLE(Ity_F32,
19303 binop(Iop_Add64, mkexpr(addr), mkU64(0) )));
19304 assign( src1, loadLE(Ity_F32,
19305 binop(Iop_Add64, mkexpr(addr), mkU64(4) )));
19306 assign( src2, loadLE(Ity_F32,
19307 binop(Iop_Add64, mkexpr(addr), mkU64(8) )));
19308 assign( src3, loadLE(Ity_F32,
19309 binop(Iop_Add64, mkexpr(addr), mkU64(12) )));
19310 imm = getUChar(delta+alen);
19311 if (imm & ~15) goto decode_failure;
19312 delta += alen+1;
19313 DIP( "roundps $%d,%s,%s\n",
19314 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19317 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19318 that encoding is the same as the encoding for IRRoundingMode,
19319 we can use that value directly in the IR as a rounding
19320 mode. */
19321 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
19323 assign(res0, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src0)) );
19324 assign(res1, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src1)) );
19325 assign(res2, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src2)) );
19326 assign(res3, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src3)) );
19328 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) );
19329 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) );
19330 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 2, mkexpr(res2) );
19331 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 3, mkexpr(res3) );
19333 goto decode_success;
19335 break;
19337 case 0x09:
19338 /* 66 0F 3A 09 /r ib = ROUNDPD imm8, xmm2/m128, xmm1 */
19339 if (have66noF2noF3(pfx) && sz == 2) {
19341 IRTemp src0 = newTemp(Ity_F64);
19342 IRTemp src1 = newTemp(Ity_F64);
19343 IRTemp res0 = newTemp(Ity_F64);
19344 IRTemp res1 = newTemp(Ity_F64);
19345 IRTemp rm = newTemp(Ity_I32);
19346 Int imm = 0;
19348 modrm = getUChar(delta);
19350 if (epartIsReg(modrm)) {
19351 assign( src0,
19352 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) );
19353 assign( src1,
19354 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 1 ) );
19355 imm = getUChar(delta+1);
19356 if (imm & ~15) goto decode_failure;
19357 delta += 1+1;
19358 DIP( "roundpd $%d,%s,%s\n",
19359 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
19360 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19361 } else {
19362 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19363 gen_SIGNAL_if_not_16_aligned(vbi, addr);
19364 assign( src0, loadLE(Ity_F64,
19365 binop(Iop_Add64, mkexpr(addr), mkU64(0) )));
19366 assign( src1, loadLE(Ity_F64,
19367 binop(Iop_Add64, mkexpr(addr), mkU64(8) )));
19368 imm = getUChar(delta+alen);
19369 if (imm & ~15) goto decode_failure;
19370 delta += alen+1;
19371 DIP( "roundpd $%d,%s,%s\n",
19372 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19375 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19376 that encoding is the same as the encoding for IRRoundingMode,
19377 we can use that value directly in the IR as a rounding
19378 mode. */
19379 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
19381 assign(res0, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src0)) );
19382 assign(res1, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src1)) );
19384 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) );
19385 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) );
19387 goto decode_success;
19389 break;
19391 case 0x0A:
19392 case 0x0B:
19393 /* 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
19394 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
19396 if (have66noF2noF3(pfx) && sz == 2) {
19398 Bool isD = opc == 0x0B;
19399 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
19400 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
19401 Int imm = 0;
19403 modrm = getUChar(delta);
19405 if (epartIsReg(modrm)) {
19406 assign( src,
19407 isD ? getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 )
19408 : getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
19409 imm = getUChar(delta+1);
19410 if (imm & ~15) goto decode_failure;
19411 delta += 1+1;
19412 DIP( "rounds%c $%d,%s,%s\n",
19413 isD ? 'd' : 's',
19414 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
19415 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19416 } else {
19417 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19418 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
19419 imm = getUChar(delta+alen);
19420 if (imm & ~15) goto decode_failure;
19421 delta += alen+1;
19422 DIP( "rounds%c $%d,%s,%s\n",
19423 isD ? 'd' : 's',
19424 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19427 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19428 that encoding is the same as the encoding for IRRoundingMode,
19429 we can use that value directly in the IR as a rounding
19430 mode. */
19431 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
19432 (imm & 4) ? get_sse_roundingmode()
19433 : mkU32(imm & 3),
19434 mkexpr(src)) );
19436 if (isD)
19437 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
19438 else
19439 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
19441 goto decode_success;
19443 break;
19445 case 0x0C:
19446 /* 66 0F 3A 0C /r ib = BLENDPS xmm1, xmm2/m128, imm8
19447 Blend Packed Single Precision Floating-Point Values (XMM) */
19448 if (have66noF2noF3(pfx) && sz == 2) {
19450 Int imm8;
19451 IRTemp dst_vec = newTemp(Ity_V128);
19452 IRTemp src_vec = newTemp(Ity_V128);
19454 modrm = getUChar(delta);
19456 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
19458 if ( epartIsReg( modrm ) ) {
19459 imm8 = (Int)getUChar(delta+1);
19460 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
19461 delta += 1+1;
19462 DIP( "blendps $%d, %s,%s\n", imm8,
19463 nameXMMReg( eregOfRexRM(pfx, modrm) ),
19464 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19465 } else {
19466 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19467 1/* imm8 is 1 byte after the amode */ );
19468 gen_SIGNAL_if_not_16_aligned( vbi, addr );
19469 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19470 imm8 = (Int)getUChar(delta+alen);
19471 delta += alen+1;
19472 DIP( "blendps $%d, %s,%s\n",
19473 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19476 putXMMReg( gregOfRexRM(pfx, modrm),
19477 mkexpr( math_BLENDPS_128( src_vec, dst_vec, imm8) ) );
19478 goto decode_success;
19480 break;
19482 case 0x0D:
19483 /* 66 0F 3A 0D /r ib = BLENDPD xmm1, xmm2/m128, imm8
19484 Blend Packed Double Precision Floating-Point Values (XMM) */
19485 if (have66noF2noF3(pfx) && sz == 2) {
19487 Int imm8;
19488 IRTemp dst_vec = newTemp(Ity_V128);
19489 IRTemp src_vec = newTemp(Ity_V128);
19491 modrm = getUChar(delta);
19492 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
19494 if ( epartIsReg( modrm ) ) {
19495 imm8 = (Int)getUChar(delta+1);
19496 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
19497 delta += 1+1;
19498 DIP( "blendpd $%d, %s,%s\n", imm8,
19499 nameXMMReg( eregOfRexRM(pfx, modrm) ),
19500 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19501 } else {
19502 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19503 1/* imm8 is 1 byte after the amode */ );
19504 gen_SIGNAL_if_not_16_aligned( vbi, addr );
19505 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19506 imm8 = (Int)getUChar(delta+alen);
19507 delta += alen+1;
19508 DIP( "blendpd $%d, %s,%s\n",
19509 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19512 putXMMReg( gregOfRexRM(pfx, modrm),
19513 mkexpr( math_BLENDPD_128( src_vec, dst_vec, imm8) ) );
19514 goto decode_success;
19516 break;
19518 case 0x0E:
19519 /* 66 0F 3A 0E /r ib = PBLENDW xmm1, xmm2/m128, imm8
19520 Blend Packed Words (XMM) */
19521 if (have66noF2noF3(pfx) && sz == 2) {
19523 Int imm8;
19524 IRTemp dst_vec = newTemp(Ity_V128);
19525 IRTemp src_vec = newTemp(Ity_V128);
19527 modrm = getUChar(delta);
19529 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
19531 if ( epartIsReg( modrm ) ) {
19532 imm8 = (Int)getUChar(delta+1);
19533 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
19534 delta += 1+1;
19535 DIP( "pblendw $%d, %s,%s\n", imm8,
19536 nameXMMReg( eregOfRexRM(pfx, modrm) ),
19537 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19538 } else {
19539 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19540 1/* imm8 is 1 byte after the amode */ );
19541 gen_SIGNAL_if_not_16_aligned( vbi, addr );
19542 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19543 imm8 = (Int)getUChar(delta+alen);
19544 delta += alen+1;
19545 DIP( "pblendw $%d, %s,%s\n",
19546 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19549 putXMMReg( gregOfRexRM(pfx, modrm),
19550 mkexpr( math_PBLENDW_128( src_vec, dst_vec, imm8) ) );
19551 goto decode_success;
19553 break;
19555 case 0x14:
19556 /* 66 0F 3A 14 /r ib = PEXTRB r/m16, xmm, imm8
19557 Extract Byte from xmm, store in mem or zero-extend + store in gen.reg.
19558 (XMM) */
19559 if (have66noF2noF3(pfx) && sz == 2) {
19560 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ );
19561 goto decode_success;
19563 break;
19565 case 0x15:
19566 /* 66 0F 3A 15 /r ib = PEXTRW r/m16, xmm, imm8
19567 Extract Word from xmm, store in mem or zero-extend + store in gen.reg.
19568 (XMM) */
19569 if (have66noF2noF3(pfx) && sz == 2) {
19570 delta = dis_PEXTRW( vbi, pfx, delta, False/*!isAvx*/ );
19571 goto decode_success;
19573 break;
19575 case 0x16:
19576 /* 66 no-REX.W 0F 3A 16 /r ib = PEXTRD reg/mem32, xmm2, imm8
19577 Extract Doubleword int from xmm reg and store in gen.reg or mem. (XMM)
19578 Note that this insn has the same opcodes as PEXTRQ, but
19579 here the REX.W bit is _not_ present */
19580 if (have66noF2noF3(pfx)
19581 && sz == 2 /* REX.W is _not_ present */) {
19582 delta = dis_PEXTRD( vbi, pfx, delta, False/*!isAvx*/ );
19583 goto decode_success;
19585 /* 66 REX.W 0F 3A 16 /r ib = PEXTRQ reg/mem64, xmm2, imm8
19586 Extract Quadword int from xmm reg and store in gen.reg or mem. (XMM)
19587 Note that this insn has the same opcodes as PEXTRD, but
19588 here the REX.W bit is present */
19589 if (have66noF2noF3(pfx)
19590 && sz == 8 /* REX.W is present */) {
19591 delta = dis_PEXTRQ( vbi, pfx, delta, False/*!isAvx*/);
19592 goto decode_success;
19594 break;
19596 case 0x17:
19597 /* 66 0F 3A 17 /r ib = EXTRACTPS reg/mem32, xmm2, imm8 Extract
19598 float from xmm reg and store in gen.reg or mem. This is
19599 identical to PEXTRD, except that REX.W appears to be ignored.
19601 if (have66noF2noF3(pfx)
19602 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
19603 delta = dis_EXTRACTPS( vbi, pfx, delta, False/*!isAvx*/ );
19604 goto decode_success;
19606 break;
19608 case 0x20:
19609 /* 66 0F 3A 20 /r ib = PINSRB xmm1, r32/m8, imm8
19610 Extract byte from r32/m8 and insert into xmm1 */
19611 if (have66noF2noF3(pfx) && sz == 2) {
19612 Int imm8;
19613 IRTemp new8 = newTemp(Ity_I8);
19614 modrm = getUChar(delta);
19615 UInt rG = gregOfRexRM(pfx, modrm);
19616 if ( epartIsReg( modrm ) ) {
19617 UInt rE = eregOfRexRM(pfx,modrm);
19618 imm8 = (Int)(getUChar(delta+1) & 0xF);
19619 assign( new8, unop(Iop_32to8, getIReg32(rE)) );
19620 delta += 1+1;
19621 DIP( "pinsrb $%d,%s,%s\n", imm8,
19622 nameIReg32(rE), nameXMMReg(rG) );
19623 } else {
19624 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19625 imm8 = (Int)(getUChar(delta+alen) & 0xF);
19626 assign( new8, loadLE( Ity_I8, mkexpr(addr) ) );
19627 delta += alen+1;
19628 DIP( "pinsrb $%d,%s,%s\n",
19629 imm8, dis_buf, nameXMMReg(rG) );
19631 IRTemp src_vec = newTemp(Ity_V128);
19632 assign(src_vec, getXMMReg( gregOfRexRM(pfx, modrm) ));
19633 IRTemp res = math_PINSRB_128( src_vec, new8, imm8 );
19634 putXMMReg( rG, mkexpr(res) );
19635 goto decode_success;
19637 break;
19639 case 0x21:
19640 /* 66 0F 3A 21 /r ib = INSERTPS imm8, xmm2/m32, xmm1
19641 Insert Packed Single Precision Floating-Point Value (XMM) */
19642 if (have66noF2noF3(pfx) && sz == 2) {
19643 UInt imm8;
19644 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */
19645 const IRTemp inval = IRTemp_INVALID;
19647 modrm = getUChar(delta);
19648 UInt rG = gregOfRexRM(pfx, modrm);
19650 if ( epartIsReg( modrm ) ) {
19651 UInt rE = eregOfRexRM(pfx, modrm);
19652 IRTemp vE = newTemp(Ity_V128);
19653 assign( vE, getXMMReg(rE) );
19654 IRTemp dsE[4] = { inval, inval, inval, inval };
19655 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] );
19656 imm8 = getUChar(delta+1);
19657 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */
19658 delta += 1+1;
19659 DIP( "insertps $%u, %s,%s\n",
19660 imm8, nameXMMReg(rE), nameXMMReg(rG) );
19661 } else {
19662 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19663 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) );
19664 imm8 = getUChar(delta+alen);
19665 delta += alen+1;
19666 DIP( "insertps $%u, %s,%s\n",
19667 imm8, dis_buf, nameXMMReg(rG) );
19670 IRTemp vG = newTemp(Ity_V128);
19671 assign( vG, getXMMReg(rG) );
19673 putXMMReg( rG, mkexpr(math_INSERTPS( vG, d2ins, imm8 )) );
19674 goto decode_success;
19676 break;
19678 case 0x22:
19679 /* 66 no-REX.W 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8
19680 Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */
19681 if (have66noF2noF3(pfx)
19682 && sz == 2 /* REX.W is NOT present */) {
19683 Int imm8_10;
19684 IRTemp src_u32 = newTemp(Ity_I32);
19685 modrm = getUChar(delta);
19686 UInt rG = gregOfRexRM(pfx, modrm);
19688 if ( epartIsReg( modrm ) ) {
19689 UInt rE = eregOfRexRM(pfx,modrm);
19690 imm8_10 = (Int)(getUChar(delta+1) & 3);
19691 assign( src_u32, getIReg32( rE ) );
19692 delta += 1+1;
19693 DIP( "pinsrd $%d, %s,%s\n",
19694 imm8_10, nameIReg32(rE), nameXMMReg(rG) );
19695 } else {
19696 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19697 imm8_10 = (Int)(getUChar(delta+alen) & 3);
19698 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) );
19699 delta += alen+1;
19700 DIP( "pinsrd $%d, %s,%s\n",
19701 imm8_10, dis_buf, nameXMMReg(rG) );
19704 IRTemp src_vec = newTemp(Ity_V128);
19705 assign(src_vec, getXMMReg( rG ));
19706 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 );
19707 putXMMReg( rG, mkexpr(res_vec) );
19708 goto decode_success;
19710 /* 66 REX.W 0F 3A 22 /r ib = PINSRQ xmm1, r/m64, imm8
19711 Extract Quadword int from gen.reg/mem64 and insert into xmm1 */
19712 if (have66noF2noF3(pfx)
19713 && sz == 8 /* REX.W is present */) {
19714 Int imm8_0;
19715 IRTemp src_u64 = newTemp(Ity_I64);
19716 modrm = getUChar(delta);
19717 UInt rG = gregOfRexRM(pfx, modrm);
19719 if ( epartIsReg( modrm ) ) {
19720 UInt rE = eregOfRexRM(pfx,modrm);
19721 imm8_0 = (Int)(getUChar(delta+1) & 1);
19722 assign( src_u64, getIReg64( rE ) );
19723 delta += 1+1;
19724 DIP( "pinsrq $%d, %s,%s\n",
19725 imm8_0, nameIReg64(rE), nameXMMReg(rG) );
19726 } else {
19727 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19728 imm8_0 = (Int)(getUChar(delta+alen) & 1);
19729 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) );
19730 delta += alen+1;
19731 DIP( "pinsrq $%d, %s,%s\n",
19732 imm8_0, dis_buf, nameXMMReg(rG) );
19735 IRTemp src_vec = newTemp(Ity_V128);
19736 assign(src_vec, getXMMReg( rG ));
19737 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 );
19738 putXMMReg( rG, mkexpr(res_vec) );
19739 goto decode_success;
19741 break;
19743 case 0x40:
19744 /* 66 0F 3A 40 /r ib = DPPS xmm1, xmm2/m128, imm8
19745 Dot Product of Packed Single Precision Floating-Point Values (XMM) */
19746 if (have66noF2noF3(pfx) && sz == 2) {
19747 modrm = getUChar(delta);
19748 Int imm8;
19749 IRTemp src_vec = newTemp(Ity_V128);
19750 IRTemp dst_vec = newTemp(Ity_V128);
19751 UInt rG = gregOfRexRM(pfx, modrm);
19752 assign( dst_vec, getXMMReg( rG ) );
19753 if ( epartIsReg( modrm ) ) {
19754 UInt rE = eregOfRexRM(pfx, modrm);
19755 imm8 = (Int)getUChar(delta+1);
19756 assign( src_vec, getXMMReg(rE) );
19757 delta += 1+1;
19758 DIP( "dpps $%d, %s,%s\n",
19759 imm8, nameXMMReg(rE), nameXMMReg(rG) );
19760 } else {
19761 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19762 1/* imm8 is 1 byte after the amode */ );
19763 gen_SIGNAL_if_not_16_aligned( vbi, addr );
19764 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19765 imm8 = (Int)getUChar(delta+alen);
19766 delta += alen+1;
19767 DIP( "dpps $%d, %s,%s\n",
19768 imm8, dis_buf, nameXMMReg(rG) );
19770 IRTemp res = math_DPPS_128( src_vec, dst_vec, imm8 );
19771 putXMMReg( rG, mkexpr(res) );
19772 goto decode_success;
19774 break;
19776 case 0x41:
19777 /* 66 0F 3A 41 /r ib = DPPD xmm1, xmm2/m128, imm8
19778 Dot Product of Packed Double Precision Floating-Point Values (XMM) */
19779 if (have66noF2noF3(pfx) && sz == 2) {
19780 modrm = getUChar(delta);
19781 Int imm8;
19782 IRTemp src_vec = newTemp(Ity_V128);
19783 IRTemp dst_vec = newTemp(Ity_V128);
19784 UInt rG = gregOfRexRM(pfx, modrm);
19785 assign( dst_vec, getXMMReg( rG ) );
19786 if ( epartIsReg( modrm ) ) {
19787 UInt rE = eregOfRexRM(pfx, modrm);
19788 imm8 = (Int)getUChar(delta+1);
19789 assign( src_vec, getXMMReg(rE) );
19790 delta += 1+1;
19791 DIP( "dppd $%d, %s,%s\n",
19792 imm8, nameXMMReg(rE), nameXMMReg(rG) );
19793 } else {
19794 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19795 1/* imm8 is 1 byte after the amode */ );
19796 gen_SIGNAL_if_not_16_aligned( vbi, addr );
19797 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19798 imm8 = (Int)getUChar(delta+alen);
19799 delta += alen+1;
19800 DIP( "dppd $%d, %s,%s\n",
19801 imm8, dis_buf, nameXMMReg(rG) );
19803 IRTemp res = math_DPPD_128( src_vec, dst_vec, imm8 );
19804 putXMMReg( rG, mkexpr(res) );
19805 goto decode_success;
19807 break;
19809 case 0x42:
19810 /* 66 0F 3A 42 /r ib = MPSADBW xmm1, xmm2/m128, imm8
19811 Multiple Packed Sums of Absolule Difference (XMM) */
19812 if (have66noF2noF3(pfx) && sz == 2) {
19813 Int imm8;
19814 IRTemp src_vec = newTemp(Ity_V128);
19815 IRTemp dst_vec = newTemp(Ity_V128);
19816 modrm = getUChar(delta);
19817 UInt rG = gregOfRexRM(pfx, modrm);
19819 assign( dst_vec, getXMMReg(rG) );
19821 if ( epartIsReg( modrm ) ) {
19822 UInt rE = eregOfRexRM(pfx, modrm);
19824 imm8 = (Int)getUChar(delta+1);
19825 assign( src_vec, getXMMReg(rE) );
19826 delta += 1+1;
19827 DIP( "mpsadbw $%d, %s,%s\n", imm8,
19828 nameXMMReg(rE), nameXMMReg(rG) );
19829 } else {
19830 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19831 1/* imm8 is 1 byte after the amode */ );
19832 gen_SIGNAL_if_not_16_aligned( vbi, addr );
19833 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19834 imm8 = (Int)getUChar(delta+alen);
19835 delta += alen+1;
19836 DIP( "mpsadbw $%d, %s,%s\n", imm8, dis_buf, nameXMMReg(rG) );
19839 putXMMReg( rG, mkexpr( math_MPSADBW_128(dst_vec, src_vec, imm8) ) );
19840 goto decode_success;
19842 break;
19844 case 0x44:
19845 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
19846 * Carry-less multiplication of selected XMM quadwords into XMM
19847 * registers (a.k.a multiplication of polynomials over GF(2))
19849 if (have66noF2noF3(pfx) && sz == 2) {
19851 Int imm8;
19852 IRTemp svec = newTemp(Ity_V128);
19853 IRTemp dvec = newTemp(Ity_V128);
19854 modrm = getUChar(delta);
19855 UInt rG = gregOfRexRM(pfx, modrm);
19857 assign( dvec, getXMMReg(rG) );
19859 if ( epartIsReg( modrm ) ) {
19860 UInt rE = eregOfRexRM(pfx, modrm);
19861 imm8 = (Int)getUChar(delta+1);
19862 assign( svec, getXMMReg(rE) );
19863 delta += 1+1;
19864 DIP( "pclmulqdq $%d, %s,%s\n", imm8,
19865 nameXMMReg(rE), nameXMMReg(rG) );
19866 } else {
19867 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19868 1/* imm8 is 1 byte after the amode */ );
19869 gen_SIGNAL_if_not_16_aligned( vbi, addr );
19870 assign( svec, loadLE( Ity_V128, mkexpr(addr) ) );
19871 imm8 = (Int)getUChar(delta+alen);
19872 delta += alen+1;
19873 DIP( "pclmulqdq $%d, %s,%s\n",
19874 imm8, dis_buf, nameXMMReg(rG) );
19877 putXMMReg( rG, mkexpr( math_PCLMULQDQ(dvec, svec, imm8) ) );
19878 goto decode_success;
19880 break;
19882 case 0x60:
19883 case 0x61:
19884 case 0x62:
19885 case 0x63:
19886 /* 66 0F 3A 63 /r ib = PCMPISTRI imm8, xmm2/m128, xmm1
19887 66 0F 3A 62 /r ib = PCMPISTRM imm8, xmm2/m128, xmm1
19888 66 0F 3A 61 /r ib = PCMPESTRI imm8, xmm2/m128, xmm1
19889 66 0F 3A 60 /r ib = PCMPESTRM imm8, xmm2/m128, xmm1
19890 (selected special cases that actually occur in glibc,
19891 not by any means a complete implementation.)
19893 if (have66noF2noF3(pfx) && sz == 2) {
19894 Long delta0 = delta;
19895 delta = dis_PCMPxSTRx( vbi, pfx, delta, False/*!isAvx*/, opc );
19896 if (delta > delta0) goto decode_success;
19897 /* else fall though; dis_PCMPxSTRx failed to decode it */
19899 break;
19901 case 0xDF:
19902 /* 66 0F 3A DF /r ib = AESKEYGENASSIST imm8, xmm2/m128, xmm1 */
19903 if (have66noF2noF3(pfx) && sz == 2) {
19904 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, False/*!isAvx*/ );
19905 goto decode_success;
19907 break;
19909 default:
19910 break;
19914 decode_failure:
19915 *decode_OK = False;
19916 return deltaIN;
19918 decode_success:
19919 *decode_OK = True;
19920 return delta;
19924 /*------------------------------------------------------------*/
19925 /*--- ---*/
19926 /*--- Top-level post-escape decoders: dis_ESC_NONE ---*/
19927 /*--- ---*/
19928 /*------------------------------------------------------------*/
19930 __attribute__((noinline))
19931 static
19932 Long dis_ESC_NONE (
19933 /*MB_OUT*/DisResult* dres,
19934 /*MB_OUT*/Bool* expect_CAS,
19935 const VexArchInfo* archinfo,
19936 const VexAbiInfo* vbi,
19937 Prefix pfx, Int sz, Long deltaIN
19940 Long d64 = 0;
19941 UChar abyte = 0;
19942 IRTemp addr = IRTemp_INVALID;
19943 IRTemp t1 = IRTemp_INVALID;
19944 IRTemp t2 = IRTemp_INVALID;
19945 IRTemp t3 = IRTemp_INVALID;
19946 IRTemp t4 = IRTemp_INVALID;
19947 IRTemp t5 = IRTemp_INVALID;
19948 IRType ty = Ity_INVALID;
19949 UChar modrm = 0;
19950 Int am_sz = 0;
19951 Int d_sz = 0;
19952 Int alen = 0;
19953 HChar dis_buf[50];
19955 Long delta = deltaIN;
19956 UChar opc = getUChar(delta); delta++;
19958 /* delta now points at the modrm byte. In most of the cases that
19959 follow, neither the F2 nor F3 prefixes are allowed. However,
19960 for some basic arithmetic operations we have to allow F2/XACQ or
19961 F3/XREL in the case where the destination is memory and the LOCK
19962 prefix is also present. Do this check by looking at the modrm
19963 byte but not advancing delta over it. */
19964 /* By default, F2 and F3 are not allowed, so let's start off with
19965 that setting. */
19966 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
19967 { UChar tmp_modrm = getUChar(delta);
19968 switch (opc) {
19969 case 0x00: /* ADD Gb,Eb */ case 0x01: /* ADD Gv,Ev */
19970 case 0x08: /* OR Gb,Eb */ case 0x09: /* OR Gv,Ev */
19971 case 0x10: /* ADC Gb,Eb */ case 0x11: /* ADC Gv,Ev */
19972 case 0x18: /* SBB Gb,Eb */ case 0x19: /* SBB Gv,Ev */
19973 case 0x20: /* AND Gb,Eb */ case 0x21: /* AND Gv,Ev */
19974 case 0x28: /* SUB Gb,Eb */ case 0x29: /* SUB Gv,Ev */
19975 case 0x30: /* XOR Gb,Eb */ case 0x31: /* XOR Gv,Ev */
19976 if (!epartIsReg(tmp_modrm)
19977 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
19978 /* dst is mem, and we have F2 or F3 but not both */
19979 validF2orF3 = True;
19981 break;
19982 default:
19983 break;
19987 /* Now, in the switch below, for the opc values examined by the
19988 switch above, use validF2orF3 rather than looking at pfx
19989 directly. */
19990 switch (opc) {
19992 case 0x00: /* ADD Gb,Eb */
19993 if (!validF2orF3) goto decode_failure;
19994 delta = dis_op2_G_E ( vbi, pfx, Iop_Add8, WithFlagNone, True, 1, delta, "add" );
19995 return delta;
19996 case 0x01: /* ADD Gv,Ev */
19997 if (!validF2orF3) goto decode_failure;
19998 delta = dis_op2_G_E ( vbi, pfx, Iop_Add8, WithFlagNone, True, sz, delta, "add" );
19999 return delta;
20001 case 0x02: /* ADD Eb,Gb */
20002 if (haveF2orF3(pfx)) goto decode_failure;
20003 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagNone, True, 1, delta, "add" );
20004 return delta;
20005 case 0x03: /* ADD Ev,Gv */
20006 if (haveF2orF3(pfx)) goto decode_failure;
20007 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagNone, True, sz, delta, "add" );
20008 return delta;
20010 case 0x04: /* ADD Ib, AL */
20011 if (haveF2orF3(pfx)) goto decode_failure;
20012 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" );
20013 return delta;
20014 case 0x05: /* ADD Iv, eAX */
20015 if (haveF2orF3(pfx)) goto decode_failure;
20016 delta = dis_op_imm_A(sz, False, Iop_Add8, True, delta, "add" );
20017 return delta;
20019 case 0x08: /* OR Gb,Eb */
20020 if (!validF2orF3) goto decode_failure;
20021 delta = dis_op2_G_E ( vbi, pfx, Iop_Or8, WithFlagNone, True, 1, delta, "or" );
20022 return delta;
20023 case 0x09: /* OR Gv,Ev */
20024 if (!validF2orF3) goto decode_failure;
20025 delta = dis_op2_G_E ( vbi, pfx, Iop_Or8, WithFlagNone, True, sz, delta, "or" );
20026 return delta;
20028 case 0x0A: /* OR Eb,Gb */
20029 if (haveF2orF3(pfx)) goto decode_failure;
20030 delta = dis_op2_E_G ( vbi, pfx, Iop_Or8, WithFlagNone, True, 1, delta, "or" );
20031 return delta;
20032 case 0x0B: /* OR Ev,Gv */
20033 if (haveF2orF3(pfx)) goto decode_failure;
20034 delta = dis_op2_E_G ( vbi, pfx, Iop_Or8, WithFlagNone, True, sz, delta, "or" );
20035 return delta;
20037 case 0x0C: /* OR Ib, AL */
20038 if (haveF2orF3(pfx)) goto decode_failure;
20039 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" );
20040 return delta;
20041 case 0x0D: /* OR Iv, eAX */
20042 if (haveF2orF3(pfx)) goto decode_failure;
20043 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" );
20044 return delta;
20046 case 0x10: /* ADC Gb,Eb */
20047 if (!validF2orF3) goto decode_failure;
20048 delta = dis_op2_G_E ( vbi, pfx, Iop_Add8, WithFlagCarry, True, 1, delta, "adc" );
20049 return delta;
20050 case 0x11: /* ADC Gv,Ev */
20051 if (!validF2orF3) goto decode_failure;
20052 delta = dis_op2_G_E ( vbi, pfx, Iop_Add8, WithFlagCarry, True, sz, delta, "adc" );
20053 return delta;
20055 case 0x12: /* ADC Eb,Gb */
20056 if (haveF2orF3(pfx)) goto decode_failure;
20057 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagCarry, True, 1, delta, "adc" );
20058 return delta;
20059 case 0x13: /* ADC Ev,Gv */
20060 if (haveF2orF3(pfx)) goto decode_failure;
20061 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagCarry, True, sz, delta, "adc" );
20062 return delta;
20064 case 0x14: /* ADC Ib, AL */
20065 if (haveF2orF3(pfx)) goto decode_failure;
20066 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" );
20067 return delta;
20068 case 0x15: /* ADC Iv, eAX */
20069 if (haveF2orF3(pfx)) goto decode_failure;
20070 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" );
20071 return delta;
20073 case 0x18: /* SBB Gb,Eb */
20074 if (!validF2orF3) goto decode_failure;
20075 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagCarry, True, 1, delta, "sbb" );
20076 return delta;
20077 case 0x19: /* SBB Gv,Ev */
20078 if (!validF2orF3) goto decode_failure;
20079 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagCarry, True, sz, delta, "sbb" );
20080 return delta;
20082 case 0x1A: /* SBB Eb,Gb */
20083 if (haveF2orF3(pfx)) goto decode_failure;
20084 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagCarry, True, 1, delta, "sbb" );
20085 return delta;
20086 case 0x1B: /* SBB Ev,Gv */
20087 if (haveF2orF3(pfx)) goto decode_failure;
20088 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagCarry, True, sz, delta, "sbb" );
20089 return delta;
20091 case 0x1C: /* SBB Ib, AL */
20092 if (haveF2orF3(pfx)) goto decode_failure;
20093 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" );
20094 return delta;
20095 case 0x1D: /* SBB Iv, eAX */
20096 if (haveF2orF3(pfx)) goto decode_failure;
20097 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" );
20098 return delta;
20100 case 0x20: /* AND Gb,Eb */
20101 if (!validF2orF3) goto decode_failure;
20102 delta = dis_op2_G_E ( vbi, pfx, Iop_And8, WithFlagNone, True, 1, delta, "and" );
20103 return delta;
20104 case 0x21: /* AND Gv,Ev */
20105 if (!validF2orF3) goto decode_failure;
20106 delta = dis_op2_G_E ( vbi, pfx, Iop_And8, WithFlagNone, True, sz, delta, "and" );
20107 return delta;
20109 case 0x22: /* AND Eb,Gb */
20110 if (haveF2orF3(pfx)) goto decode_failure;
20111 delta = dis_op2_E_G ( vbi, pfx, Iop_And8, WithFlagNone, True, 1, delta, "and" );
20112 return delta;
20113 case 0x23: /* AND Ev,Gv */
20114 if (haveF2orF3(pfx)) goto decode_failure;
20115 delta = dis_op2_E_G ( vbi, pfx, Iop_And8, WithFlagNone, True, sz, delta, "and" );
20116 return delta;
20118 case 0x24: /* AND Ib, AL */
20119 if (haveF2orF3(pfx)) goto decode_failure;
20120 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" );
20121 return delta;
20122 case 0x25: /* AND Iv, eAX */
20123 if (haveF2orF3(pfx)) goto decode_failure;
20124 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" );
20125 return delta;
20127 case 0x28: /* SUB Gb,Eb */
20128 if (!validF2orF3) goto decode_failure;
20129 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagNone, True, 1, delta, "sub" );
20130 return delta;
20131 case 0x29: /* SUB Gv,Ev */
20132 if (!validF2orF3) goto decode_failure;
20133 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagNone, True, sz, delta, "sub" );
20134 return delta;
20136 case 0x2A: /* SUB Eb,Gb */
20137 if (haveF2orF3(pfx)) goto decode_failure;
20138 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagNone, True, 1, delta, "sub" );
20139 return delta;
20140 case 0x2B: /* SUB Ev,Gv */
20141 if (haveF2orF3(pfx)) goto decode_failure;
20142 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagNone, True, sz, delta, "sub" );
20143 return delta;
20145 case 0x2C: /* SUB Ib, AL */
20146 if (haveF2orF3(pfx)) goto decode_failure;
20147 delta = dis_op_imm_A(1, False, Iop_Sub8, True, delta, "sub" );
20148 return delta;
20149 case 0x2D: /* SUB Iv, eAX */
20150 if (haveF2orF3(pfx)) goto decode_failure;
20151 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" );
20152 return delta;
20154 case 0x30: /* XOR Gb,Eb */
20155 if (!validF2orF3) goto decode_failure;
20156 delta = dis_op2_G_E ( vbi, pfx, Iop_Xor8, WithFlagNone, True, 1, delta, "xor" );
20157 return delta;
20158 case 0x31: /* XOR Gv,Ev */
20159 if (!validF2orF3) goto decode_failure;
20160 delta = dis_op2_G_E ( vbi, pfx, Iop_Xor8, WithFlagNone, True, sz, delta, "xor" );
20161 return delta;
20163 case 0x32: /* XOR Eb,Gb */
20164 if (haveF2orF3(pfx)) goto decode_failure;
20165 delta = dis_op2_E_G ( vbi, pfx, Iop_Xor8, WithFlagNone, True, 1, delta, "xor" );
20166 return delta;
20167 case 0x33: /* XOR Ev,Gv */
20168 if (haveF2orF3(pfx)) goto decode_failure;
20169 delta = dis_op2_E_G ( vbi, pfx, Iop_Xor8, WithFlagNone, True, sz, delta, "xor" );
20170 return delta;
20172 case 0x34: /* XOR Ib, AL */
20173 if (haveF2orF3(pfx)) goto decode_failure;
20174 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" );
20175 return delta;
20176 case 0x35: /* XOR Iv, eAX */
20177 if (haveF2orF3(pfx)) goto decode_failure;
20178 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" );
20179 return delta;
20181 case 0x38: /* CMP Gb,Eb */
20182 if (haveF2orF3(pfx)) goto decode_failure;
20183 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagNone, False, 1, delta, "cmp" );
20184 return delta;
20185 case 0x39: /* CMP Gv,Ev */
20186 if (haveF2orF3(pfx)) goto decode_failure;
20187 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagNone, False, sz, delta, "cmp" );
20188 return delta;
20190 case 0x3A: /* CMP Eb,Gb */
20191 if (haveF2orF3(pfx)) goto decode_failure;
20192 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagNone, False, 1, delta, "cmp" );
20193 return delta;
20194 case 0x3B: /* CMP Ev,Gv */
20195 if (haveF2orF3(pfx)) goto decode_failure;
20196 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagNone, False, sz, delta, "cmp" );
20197 return delta;
20199 case 0x3C: /* CMP Ib, AL */
20200 if (haveF2orF3(pfx)) goto decode_failure;
20201 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" );
20202 return delta;
20203 case 0x3D: /* CMP Iv, eAX */
20204 if (haveF2orF3(pfx)) goto decode_failure;
20205 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" );
20206 return delta;
20208 case 0x50: /* PUSH eAX */
20209 case 0x51: /* PUSH eCX */
20210 case 0x52: /* PUSH eDX */
20211 case 0x53: /* PUSH eBX */
20212 case 0x55: /* PUSH eBP */
20213 case 0x56: /* PUSH eSI */
20214 case 0x57: /* PUSH eDI */
20215 case 0x54: /* PUSH eSP */
20216 /* This is the Right Way, in that the value to be pushed is
20217 established before %rsp is changed, so that pushq %rsp
20218 correctly pushes the old value. */
20219 if (haveF2orF3(pfx)) goto decode_failure;
20220 vassert(sz == 2 || sz == 4 || sz == 8);
20221 if (sz == 4)
20222 sz = 8; /* there is no encoding for 32-bit push in 64-bit mode */
20223 ty = sz==2 ? Ity_I16 : Ity_I64;
20224 t1 = newTemp(ty);
20225 t2 = newTemp(Ity_I64);
20226 assign(t1, getIRegRexB(sz, pfx, opc-0x50));
20227 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(sz)));
20228 putIReg64(R_RSP, mkexpr(t2) );
20229 storeLE(mkexpr(t2),mkexpr(t1));
20230 DIP("push%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x50));
20231 return delta;
20233 case 0x58: /* POP eAX */
20234 case 0x59: /* POP eCX */
20235 case 0x5A: /* POP eDX */
20236 case 0x5B: /* POP eBX */
20237 case 0x5D: /* POP eBP */
20238 case 0x5E: /* POP eSI */
20239 case 0x5F: /* POP eDI */
20240 case 0x5C: /* POP eSP */
20241 if (haveF2orF3(pfx)) goto decode_failure;
20242 vassert(sz == 2 || sz == 4 || sz == 8);
20243 if (sz == 4)
20244 sz = 8; /* there is no encoding for 32-bit pop in 64-bit mode */
20245 t1 = newTemp(szToITy(sz));
20246 t2 = newTemp(Ity_I64);
20247 assign(t2, getIReg64(R_RSP));
20248 assign(t1, loadLE(szToITy(sz),mkexpr(t2)));
20249 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz)));
20250 putIRegRexB(sz, pfx, opc-0x58, mkexpr(t1));
20251 DIP("pop%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x58));
20252 return delta;
20254 case 0x63: /* MOVSX */
20255 if (haveF2orF3(pfx)) goto decode_failure;
20256 if (haveREX(pfx) && 1==getRexW(pfx)) {
20257 vassert(sz == 8);
20258 /* movsx r/m32 to r64 */
20259 modrm = getUChar(delta);
20260 if (epartIsReg(modrm)) {
20261 delta++;
20262 putIRegG(8, pfx, modrm,
20263 unop(Iop_32Sto64,
20264 getIRegE(4, pfx, modrm)));
20265 DIP("movslq %s,%s\n",
20266 nameIRegE(4, pfx, modrm),
20267 nameIRegG(8, pfx, modrm));
20268 return delta;
20269 } else {
20270 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
20271 delta += alen;
20272 putIRegG(8, pfx, modrm,
20273 unop(Iop_32Sto64,
20274 loadLE(Ity_I32, mkexpr(addr))));
20275 DIP("movslq %s,%s\n", dis_buf,
20276 nameIRegG(8, pfx, modrm));
20277 return delta;
20279 } else {
20280 goto decode_failure;
20283 case 0x68: /* PUSH Iv */
20284 if (haveF2orF3(pfx)) goto decode_failure;
20285 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
20286 if (sz == 4) sz = 8;
20287 d64 = getSDisp(imin(4,sz),delta);
20288 delta += imin(4,sz);
20289 goto do_push_I;
20291 case 0x69: /* IMUL Iv, Ev, Gv */
20292 if (haveF2orF3(pfx)) goto decode_failure;
20293 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, sz );
20294 return delta;
20296 case 0x6A: /* PUSH Ib, sign-extended to sz */
20297 if (haveF2orF3(pfx)) goto decode_failure;
20298 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
20299 if (sz == 4) sz = 8;
20300 d64 = getSDisp8(delta); delta += 1;
20301 goto do_push_I;
20302 do_push_I:
20303 ty = szToITy(sz);
20304 t1 = newTemp(Ity_I64);
20305 t2 = newTemp(ty);
20306 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
20307 putIReg64(R_RSP, mkexpr(t1) );
20308 /* stop mkU16 asserting if d32 is a negative 16-bit number
20309 (bug #132813) */
20310 if (ty == Ity_I16)
20311 d64 &= 0xFFFF;
20312 storeLE( mkexpr(t1), mkU(ty,d64) );
20313 DIP("push%c $%lld\n", nameISize(sz), (Long)d64);
20314 return delta;
20316 case 0x6B: /* IMUL Ib, Ev, Gv */
20317 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, 1 );
20318 return delta;
20320 case 0x70:
20321 case 0x71:
20322 case 0x72: /* JBb/JNAEb (jump below) */
20323 case 0x73: /* JNBb/JAEb (jump not below) */
20324 case 0x74: /* JZb/JEb (jump zero) */
20325 case 0x75: /* JNZb/JNEb (jump not zero) */
20326 case 0x76: /* JBEb/JNAb (jump below or equal) */
20327 case 0x77: /* JNBEb/JAb (jump not below or equal) */
20328 case 0x78: /* JSb (jump negative) */
20329 case 0x79: /* JSb (jump not negative) */
20330 case 0x7A: /* JP (jump parity even) */
20331 case 0x7B: /* JNP/JPO (jump parity odd) */
20332 case 0x7C: /* JLb/JNGEb (jump less) */
20333 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
20334 case 0x7E: /* JLEb/JNGb (jump less or equal) */
20335 case 0x7F: { /* JGb/JNLEb (jump greater) */
20336 Long jmpDelta;
20337 const HChar* comment = "";
20338 if (haveF3(pfx)) goto decode_failure;
20339 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
20340 jmpDelta = getSDisp8(delta);
20341 vassert(-128 <= jmpDelta && jmpDelta < 128);
20342 d64 = (guest_RIP_bbstart+delta+1) + jmpDelta;
20343 delta++;
20344 /* End the block at this point. */
20345 jcc_01( dres, (AMD64Condcode)(opc - 0x70),
20346 guest_RIP_bbstart+delta, d64 );
20347 vassert(dres->whatNext == Dis_StopHere);
20348 DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc - 0x70), (ULong)d64,
20349 comment);
20350 return delta;
20353 case 0x80: /* Grp1 Ib,Eb */
20354 modrm = getUChar(delta);
20355 /* Disallow F2/XACQ and F3/XREL for the non-mem case. Allow
20356 just one for the mem case and also require LOCK in this case.
20357 Note that this erroneously allows XACQ/XREL on CMP since we
20358 don't check the subopcode here. No big deal. */
20359 if (epartIsReg(modrm) && haveF2orF3(pfx))
20360 goto decode_failure;
20361 if (!epartIsReg(modrm) && haveF2andF3(pfx))
20362 goto decode_failure;
20363 if (!epartIsReg(modrm) && haveF2orF3(pfx) && !haveLOCK(pfx))
20364 goto decode_failure;
20365 am_sz = lengthAMode(pfx,delta);
20366 sz = 1;
20367 d_sz = 1;
20368 d64 = getSDisp8(delta + am_sz);
20369 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
20370 return delta;
20372 case 0x81: /* Grp1 Iv,Ev */
20373 modrm = getUChar(delta);
20374 /* Same comment as for case 0x80 just above. */
20375 if (epartIsReg(modrm) && haveF2orF3(pfx))
20376 goto decode_failure;
20377 if (!epartIsReg(modrm) && haveF2andF3(pfx))
20378 goto decode_failure;
20379 if (!epartIsReg(modrm) && haveF2orF3(pfx) && !haveLOCK(pfx))
20380 goto decode_failure;
20381 am_sz = lengthAMode(pfx,delta);
20382 d_sz = imin(sz,4);
20383 d64 = getSDisp(d_sz, delta + am_sz);
20384 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
20385 return delta;
20387 case 0x83: /* Grp1 Ib,Ev */
20388 if (haveF2orF3(pfx)) goto decode_failure;
20389 modrm = getUChar(delta);
20390 am_sz = lengthAMode(pfx,delta);
20391 d_sz = 1;
20392 d64 = getSDisp8(delta + am_sz);
20393 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
20394 return delta;
20396 case 0x84: /* TEST Eb,Gb */
20397 if (haveF2orF3(pfx)) goto decode_failure;
20398 delta = dis_op2_E_G ( vbi, pfx, Iop_And8, WithFlagNone, False,
20399 1, delta, "test" );
20400 return delta;
20402 case 0x85: /* TEST Ev,Gv */
20403 if (haveF2orF3(pfx)) goto decode_failure;
20404 delta = dis_op2_E_G ( vbi, pfx, Iop_And8, WithFlagNone, False,
20405 sz, delta, "test" );
20406 return delta;
20408 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
20409 prefix. Therefore, generate CAS regardless of the presence or
20410 otherwise of a LOCK prefix. */
20411 case 0x86: /* XCHG Gb,Eb */
20412 sz = 1;
20413 /* Fall through ... */
20414 case 0x87: /* XCHG Gv,Ev */
20415 modrm = getUChar(delta);
20416 /* Check whether F2 or F3 are allowable. For the mem case, one
20417 or the othter but not both are. We don't care about the
20418 presence of LOCK in this case -- XCHG is unusual in this
20419 respect. */
20420 if (haveF2orF3(pfx)) {
20421 if (epartIsReg(modrm)) {
20422 goto decode_failure;
20423 } else {
20424 if (haveF2andF3(pfx))
20425 goto decode_failure;
20428 ty = szToITy(sz);
20429 t1 = newTemp(ty); t2 = newTemp(ty);
20430 if (epartIsReg(modrm)) {
20431 assign(t1, getIRegE(sz, pfx, modrm));
20432 assign(t2, getIRegG(sz, pfx, modrm));
20433 putIRegG(sz, pfx, modrm, mkexpr(t1));
20434 putIRegE(sz, pfx, modrm, mkexpr(t2));
20435 delta++;
20436 DIP("xchg%c %s, %s\n",
20437 nameISize(sz), nameIRegG(sz, pfx, modrm),
20438 nameIRegE(sz, pfx, modrm));
20439 } else {
20440 *expect_CAS = True;
20441 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
20442 assign( t1, loadLE(ty, mkexpr(addr)) );
20443 assign( t2, getIRegG(sz, pfx, modrm) );
20444 casLE( mkexpr(addr),
20445 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
20446 putIRegG( sz, pfx, modrm, mkexpr(t1) );
20447 delta += alen;
20448 DIP("xchg%c %s, %s\n", nameISize(sz),
20449 nameIRegG(sz, pfx, modrm), dis_buf);
20451 return delta;
20453 case 0x88: { /* MOV Gb,Eb */
20454 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */
20455 Bool ok = True;
20456 delta = dis_mov_G_E(vbi, pfx, 1, delta, &ok);
20457 if (!ok) goto decode_failure;
20458 return delta;
20461 case 0x89: { /* MOV Gv,Ev */
20462 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */
20463 Bool ok = True;
20464 delta = dis_mov_G_E(vbi, pfx, sz, delta, &ok);
20465 if (!ok) goto decode_failure;
20466 return delta;
20469 case 0x8A: /* MOV Eb,Gb */
20470 if (haveF2orF3(pfx)) goto decode_failure;
20471 delta = dis_mov_E_G(vbi, pfx, 1, delta);
20472 return delta;
20474 case 0x8B: /* MOV Ev,Gv */
20475 if (haveF2orF3(pfx)) goto decode_failure;
20476 delta = dis_mov_E_G(vbi, pfx, sz, delta);
20477 return delta;
20479 case 0x8C: /* MOV S,E -- MOV from a SEGMENT REGISTER */
20480 if (haveF2orF3(pfx)) goto decode_failure;
20481 delta = dis_mov_S_E(vbi, pfx, sz, delta);
20482 return delta;
20484 case 0x8D: /* LEA M,Gv */
20485 if (haveF2orF3(pfx)) goto decode_failure;
20486 if (sz != 4 && sz != 8)
20487 goto decode_failure;
20488 modrm = getUChar(delta);
20489 if (epartIsReg(modrm))
20490 goto decode_failure;
20491 /* NOTE! this is the one place where a segment override prefix
20492 has no effect on the address calculation. Therefore we clear
20493 any segment override bits in pfx. */
20494 addr = disAMode ( &alen, vbi, clearSegBits(pfx), delta, dis_buf, 0 );
20495 delta += alen;
20496 /* This is a hack. But it isn't clear that really doing the
20497 calculation at 32 bits is really worth it. Hence for leal,
20498 do the full 64-bit calculation and then truncate it. */
20499 putIRegG( sz, pfx, modrm,
20500 sz == 4
20501 ? unop(Iop_64to32, mkexpr(addr))
20502 : mkexpr(addr)
20504 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf,
20505 nameIRegG(sz,pfx,modrm));
20506 return delta;
20508 case 0x8F: { /* POPQ m64 / POPW m16 */
20509 Int len;
20510 UChar rm;
20511 /* There is no encoding for 32-bit pop in 64-bit mode.
20512 So sz==4 actually means sz==8. */
20513 if (haveF2orF3(pfx)) goto decode_failure;
20514 vassert(sz == 2 || sz == 4
20515 || /* tolerate redundant REX.W, see #210481 */ sz == 8);
20516 if (sz == 4) sz = 8;
20517 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
20519 rm = getUChar(delta);
20521 /* make sure this instruction is correct POP */
20522 if (epartIsReg(rm) || gregLO3ofRM(rm) != 0)
20523 goto decode_failure;
20524 /* and has correct size */
20525 vassert(sz == 8);
20527 t1 = newTemp(Ity_I64);
20528 t3 = newTemp(Ity_I64);
20529 assign( t1, getIReg64(R_RSP) );
20530 assign( t3, loadLE(Ity_I64, mkexpr(t1)) );
20532 /* Increase RSP; must be done before the STORE. Intel manual
20533 says: If the RSP register is used as a base register for
20534 addressing a destination operand in memory, the POP
20535 instruction computes the effective address of the operand
20536 after it increments the RSP register. */
20537 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(sz)) );
20539 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
20540 storeLE( mkexpr(addr), mkexpr(t3) );
20542 DIP("popl %s\n", dis_buf);
20544 delta += len;
20545 return delta;
20548 case 0x90: /* XCHG eAX,eAX */
20549 /* detect and handle F3 90 (rep nop) specially */
20550 if (!have66(pfx) && !haveF2(pfx) && haveF3(pfx)) {
20551 DIP("rep nop (P4 pause)\n");
20552 /* "observe" the hint. The Vex client needs to be careful not
20553 to cause very long delays as a result, though. */
20554 jmp_lit(dres, Ijk_Yield, guest_RIP_bbstart+delta);
20555 vassert(dres->whatNext == Dis_StopHere);
20556 return delta;
20558 /* detect and handle NOPs specially */
20559 if (/* F2/F3 probably change meaning completely */
20560 !haveF2orF3(pfx)
20561 /* If REX.B is 1, we're not exchanging rAX with itself */
20562 && getRexB(pfx)==0 ) {
20563 DIP("nop\n");
20564 return delta;
20566 /* else fall through to normal case. */
20567 case 0x91: /* XCHG rAX,rCX */
20568 case 0x92: /* XCHG rAX,rDX */
20569 case 0x93: /* XCHG rAX,rBX */
20570 case 0x94: /* XCHG rAX,rSP */
20571 case 0x95: /* XCHG rAX,rBP */
20572 case 0x96: /* XCHG rAX,rSI */
20573 case 0x97: /* XCHG rAX,rDI */
20574 /* guard against mutancy */
20575 if (haveF2orF3(pfx)) goto decode_failure;
20576 codegen_xchg_rAX_Reg ( pfx, sz, opc - 0x90 );
20577 return delta;
20579 case 0x98: /* CBW */
20580 if (haveF2orF3(pfx)) goto decode_failure;
20581 if (sz == 8) {
20582 putIRegRAX( 8, unop(Iop_32Sto64, getIRegRAX(4)) );
20583 DIP(/*"cdqe\n"*/"cltq\n");
20584 return delta;
20586 if (sz == 4) {
20587 putIRegRAX( 4, unop(Iop_16Sto32, getIRegRAX(2)) );
20588 DIP("cwtl\n");
20589 return delta;
20591 if (sz == 2) {
20592 putIRegRAX( 2, unop(Iop_8Sto16, getIRegRAX(1)) );
20593 DIP("cbw\n");
20594 return delta;
20596 goto decode_failure;
20598 case 0x99: /* CWD/CDQ/CQO */
20599 if (haveF2orF3(pfx)) goto decode_failure;
20600 vassert(sz == 2 || sz == 4 || sz == 8);
20601 ty = szToITy(sz);
20602 putIRegRDX( sz,
20603 binop(mkSizedOp(ty,Iop_Sar8),
20604 getIRegRAX(sz),
20605 mkU8(sz == 2 ? 15 : (sz == 4 ? 31 : 63))) );
20606 DIP(sz == 2 ? "cwd\n"
20607 : (sz == 4 ? /*"cdq\n"*/ "cltd\n"
20608 : "cqo\n"));
20609 return delta;
20611 case 0x9B: /* FWAIT (X87 insn) */
20612 /* ignore? */
20613 DIP("fwait\n");
20614 return delta;
20616 case 0x9C: /* PUSHF */ {
20617 /* Note. There is no encoding for a 32-bit pushf in 64-bit
20618 mode. So sz==4 actually means sz==8. */
20619 /* 24 July 06: has also been seen with a redundant REX prefix,
20620 so must also allow sz==8. */
20621 if (haveF2orF3(pfx)) goto decode_failure;
20622 vassert(sz == 2 || sz == 4 || sz == 8);
20623 if (sz == 4) sz = 8;
20624 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
20626 t1 = newTemp(Ity_I64);
20627 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
20628 putIReg64(R_RSP, mkexpr(t1) );
20630 t2 = newTemp(Ity_I64);
20631 assign( t2, mk_amd64g_calculate_rflags_all() );
20633 /* Patch in the D flag. This can simply be a copy of bit 10 of
20634 baseBlock[OFFB_DFLAG]. */
20635 t3 = newTemp(Ity_I64);
20636 assign( t3, binop(Iop_Or64,
20637 mkexpr(t2),
20638 binop(Iop_And64,
20639 IRExpr_Get(OFFB_DFLAG,Ity_I64),
20640 mkU64(1<<10)))
20643 /* And patch in the ID flag. */
20644 t4 = newTemp(Ity_I64);
20645 assign( t4, binop(Iop_Or64,
20646 mkexpr(t3),
20647 binop(Iop_And64,
20648 binop(Iop_Shl64, IRExpr_Get(OFFB_IDFLAG,Ity_I64),
20649 mkU8(21)),
20650 mkU64(1<<21)))
20653 /* And patch in the AC flag too. */
20654 t5 = newTemp(Ity_I64);
20655 assign( t5, binop(Iop_Or64,
20656 mkexpr(t4),
20657 binop(Iop_And64,
20658 binop(Iop_Shl64, IRExpr_Get(OFFB_ACFLAG,Ity_I64),
20659 mkU8(18)),
20660 mkU64(1<<18)))
20663 /* if sz==2, the stored value needs to be narrowed. */
20664 if (sz == 2)
20665 storeLE( mkexpr(t1), unop(Iop_32to16,
20666 unop(Iop_64to32,mkexpr(t5))) );
20667 else
20668 storeLE( mkexpr(t1), mkexpr(t5) );
20670 DIP("pushf%c\n", nameISize(sz));
20671 return delta;
20674 case 0x9D: /* POPF */
20675 /* Note. There is no encoding for a 32-bit popf in 64-bit mode.
20676 So sz==4 actually means sz==8. */
20677 if (haveF2orF3(pfx)) goto decode_failure;
20678 vassert(sz == 2 || sz == 4 || sz == 8);
20679 if (sz == 4) sz = 8;
20680 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
20681 t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I64);
20682 assign(t2, getIReg64(R_RSP));
20683 assign(t1, widenUto64(loadLE(szToITy(sz),mkexpr(t2))));
20684 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz)));
20685 /* t1 is the flag word. Mask out everything except OSZACP and
20686 set the flags thunk to AMD64G_CC_OP_COPY. */
20687 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
20688 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
20689 stmt( IRStmt_Put( OFFB_CC_DEP1,
20690 binop(Iop_And64,
20691 mkexpr(t1),
20692 mkU64( AMD64G_CC_MASK_C | AMD64G_CC_MASK_P
20693 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_Z
20694 | AMD64G_CC_MASK_S| AMD64G_CC_MASK_O )
20698 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
20700 /* Also need to set the D flag, which is held in bit 10 of t1.
20701 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
20702 stmt( IRStmt_Put(
20703 OFFB_DFLAG,
20704 IRExpr_ITE(
20705 unop(Iop_64to1,
20706 binop(Iop_And64,
20707 binop(Iop_Shr64, mkexpr(t1), mkU8(10)),
20708 mkU64(1))),
20709 mkU64(0xFFFFFFFFFFFFFFFFULL),
20710 mkU64(1)))
20713 /* And set the ID flag */
20714 stmt( IRStmt_Put(
20715 OFFB_IDFLAG,
20716 IRExpr_ITE(
20717 unop(Iop_64to1,
20718 binop(Iop_And64,
20719 binop(Iop_Shr64, mkexpr(t1), mkU8(21)),
20720 mkU64(1))),
20721 mkU64(1),
20722 mkU64(0)))
20725 /* And set the AC flag too */
20726 stmt( IRStmt_Put(
20727 OFFB_ACFLAG,
20728 IRExpr_ITE(
20729 unop(Iop_64to1,
20730 binop(Iop_And64,
20731 binop(Iop_Shr64, mkexpr(t1), mkU8(18)),
20732 mkU64(1))),
20733 mkU64(1),
20734 mkU64(0)))
20737 DIP("popf%c\n", nameISize(sz));
20738 return delta;
20740 case 0x9E: /* SAHF */
20741 codegen_SAHF();
20742 DIP("sahf\n");
20743 return delta;
20745 case 0x9F: /* LAHF */
20746 codegen_LAHF();
20747 DIP("lahf\n");
20748 return delta;
20750 case 0xA0: /* MOV Ob,AL */
20751 if (have66orF2orF3(pfx)) goto decode_failure;
20752 sz = 1;
20753 /* Fall through ... */
20754 case 0xA1: /* MOV Ov,eAX */
20755 if (sz != 8 && sz != 4 && sz != 2 && sz != 1)
20756 goto decode_failure;
20757 d64 = getDisp64(delta);
20758 delta += 8;
20759 ty = szToITy(sz);
20760 addr = newTemp(Ity_I64);
20761 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) );
20762 putIRegRAX(sz, loadLE( ty, mkexpr(addr) ));
20763 DIP("mov%c %s0x%llx, %s\n", nameISize(sz),
20764 segRegTxt(pfx), (ULong)d64,
20765 nameIRegRAX(sz));
20766 return delta;
20768 case 0xA2: /* MOV AL,Ob */
20769 if (have66orF2orF3(pfx)) goto decode_failure;
20770 sz = 1;
20771 /* Fall through ... */
20772 case 0xA3: /* MOV eAX,Ov */
20773 if (sz != 8 && sz != 4 && sz != 2 && sz != 1)
20774 goto decode_failure;
20775 d64 = getDisp64(delta);
20776 delta += 8;
20777 ty = szToITy(sz);
20778 addr = newTemp(Ity_I64);
20779 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) );
20780 storeLE( mkexpr(addr), getIRegRAX(sz) );
20781 DIP("mov%c %s, %s0x%llx\n", nameISize(sz), nameIRegRAX(sz),
20782 segRegTxt(pfx), (ULong)d64);
20783 return delta;
20785 case 0xA4:
20786 case 0xA5:
20787 /* F3 A4: rep movsb */
20788 if (haveF3(pfx) && !haveF2(pfx)) {
20789 if (opc == 0xA4)
20790 sz = 1;
20791 dis_REP_op ( dres, AMD64CondAlways, dis_MOVS, sz,
20792 guest_RIP_curr_instr,
20793 guest_RIP_bbstart+delta, "rep movs", pfx );
20794 dres->whatNext = Dis_StopHere;
20795 return delta;
20797 /* A4: movsb */
20798 if (!haveF3(pfx) && !haveF2(pfx)) {
20799 if (opc == 0xA4)
20800 sz = 1;
20801 dis_string_op( dis_MOVS, sz, "movs", pfx );
20802 return delta;
20804 goto decode_failure;
20806 case 0xA6:
20807 case 0xA7:
20808 /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */
20809 if (haveF3(pfx) && !haveF2(pfx)) {
20810 if (opc == 0xA6)
20811 sz = 1;
20812 dis_REP_op ( dres, AMD64CondZ, dis_CMPS, sz,
20813 guest_RIP_curr_instr,
20814 guest_RIP_bbstart+delta, "repe cmps", pfx );
20815 dres->whatNext = Dis_StopHere;
20816 return delta;
20818 goto decode_failure;
20820 case 0xAA:
20821 case 0xAB:
20822 /* F3 AA/AB: rep stosb/rep stos{w,l,q} */
20823 if (haveF3(pfx) && !haveF2(pfx)) {
20824 if (opc == 0xAA)
20825 sz = 1;
20826 dis_REP_op ( dres, AMD64CondAlways, dis_STOS, sz,
20827 guest_RIP_curr_instr,
20828 guest_RIP_bbstart+delta, "rep stos", pfx );
20829 vassert(dres->whatNext == Dis_StopHere);
20830 return delta;
20832 /* AA/AB: stosb/stos{w,l,q} */
20833 if (!haveF3(pfx) && !haveF2(pfx)) {
20834 if (opc == 0xAA)
20835 sz = 1;
20836 dis_string_op( dis_STOS, sz, "stos", pfx );
20837 return delta;
20839 goto decode_failure;
20841 case 0xA8: /* TEST Ib, AL */
20842 if (haveF2orF3(pfx)) goto decode_failure;
20843 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" );
20844 return delta;
20845 case 0xA9: /* TEST Iv, eAX */
20846 if (haveF2orF3(pfx)) goto decode_failure;
20847 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" );
20848 return delta;
20850 case 0xAC: /* LODS, no REP prefix */
20851 case 0xAD:
20852 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", pfx );
20853 return delta;
20855 case 0xAE:
20856 case 0xAF:
20857 /* F2 AE/AF: repne scasb/repne scas{w,l,q} */
20858 if (haveF2(pfx) && !haveF3(pfx)) {
20859 if (opc == 0xAE)
20860 sz = 1;
20861 dis_REP_op ( dres, AMD64CondNZ, dis_SCAS, sz,
20862 guest_RIP_curr_instr,
20863 guest_RIP_bbstart+delta, "repne scas", pfx );
20864 vassert(dres->whatNext == Dis_StopHere);
20865 return delta;
20867 /* F3 AE/AF: repe scasb/repe scas{w,l,q} */
20868 if (!haveF2(pfx) && haveF3(pfx)) {
20869 if (opc == 0xAE)
20870 sz = 1;
20871 dis_REP_op ( dres, AMD64CondZ, dis_SCAS, sz,
20872 guest_RIP_curr_instr,
20873 guest_RIP_bbstart+delta, "repe scas", pfx );
20874 vassert(dres->whatNext == Dis_StopHere);
20875 return delta;
20877 /* AE/AF: scasb/scas{w,l,q} */
20878 if (!haveF2(pfx) && !haveF3(pfx)) {
20879 if (opc == 0xAE)
20880 sz = 1;
20881 dis_string_op( dis_SCAS, sz, "scas", pfx );
20882 return delta;
20884 goto decode_failure;
20886 /* XXXX be careful here with moves to AH/BH/CH/DH */
20887 case 0xB0: /* MOV imm,AL */
20888 case 0xB1: /* MOV imm,CL */
20889 case 0xB2: /* MOV imm,DL */
20890 case 0xB3: /* MOV imm,BL */
20891 case 0xB4: /* MOV imm,AH */
20892 case 0xB5: /* MOV imm,CH */
20893 case 0xB6: /* MOV imm,DH */
20894 case 0xB7: /* MOV imm,BH */
20895 if (haveF2orF3(pfx)) goto decode_failure;
20896 d64 = getUChar(delta);
20897 delta += 1;
20898 putIRegRexB(1, pfx, opc-0xB0, mkU8(d64));
20899 DIP("movb $%lld,%s\n", d64, nameIRegRexB(1,pfx,opc-0xB0));
20900 return delta;
20902 case 0xB8: /* MOV imm,eAX */
20903 case 0xB9: /* MOV imm,eCX */
20904 case 0xBA: /* MOV imm,eDX */
20905 case 0xBB: /* MOV imm,eBX */
20906 case 0xBC: /* MOV imm,eSP */
20907 case 0xBD: /* MOV imm,eBP */
20908 case 0xBE: /* MOV imm,eSI */
20909 case 0xBF: /* MOV imm,eDI */
20910 /* This is the one-and-only place where 64-bit literals are
20911 allowed in the instruction stream. */
20912 if (haveF2orF3(pfx)) goto decode_failure;
20913 if (sz == 8) {
20914 d64 = getDisp64(delta);
20915 delta += 8;
20916 putIRegRexB(8, pfx, opc-0xB8, mkU64(d64));
20917 DIP("movabsq $%lld,%s\n", (Long)d64,
20918 nameIRegRexB(8,pfx,opc-0xB8));
20919 } else {
20920 d64 = getSDisp(imin(4,sz),delta);
20921 delta += imin(4,sz);
20922 putIRegRexB(sz, pfx, opc-0xB8,
20923 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
20924 DIP("mov%c $%lld,%s\n", nameISize(sz),
20925 (Long)d64,
20926 nameIRegRexB(sz,pfx,opc-0xB8));
20928 return delta;
20930 case 0xC0: { /* Grp2 Ib,Eb */
20931 Bool decode_OK = True;
20932 if (haveF2orF3(pfx)) goto decode_failure;
20933 modrm = getUChar(delta);
20934 am_sz = lengthAMode(pfx,delta);
20935 d_sz = 1;
20936 d64 = getUChar(delta + am_sz);
20937 sz = 1;
20938 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
20939 mkU8(d64 & 0xFF), NULL, &decode_OK );
20940 if (!decode_OK) goto decode_failure;
20941 return delta;
20944 case 0xC1: { /* Grp2 Ib,Ev */
20945 Bool decode_OK = True;
20946 if (haveF2orF3(pfx)) goto decode_failure;
20947 modrm = getUChar(delta);
20948 am_sz = lengthAMode(pfx,delta);
20949 d_sz = 1;
20950 d64 = getUChar(delta + am_sz);
20951 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
20952 mkU8(d64 & 0xFF), NULL, &decode_OK );
20953 if (!decode_OK) goto decode_failure;
20954 return delta;
20957 case 0xC2: /* RET imm16 */
20958 if (have66orF3(pfx)) goto decode_failure;
20959 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
20960 d64 = getUDisp16(delta);
20961 delta += 2;
20962 dis_ret(dres, vbi, d64);
20963 DIP("ret $%lld\n", d64);
20964 return delta;
20966 case 0xC3: /* RET */
20967 if (have66(pfx)) goto decode_failure;
20968 /* F3 is acceptable on AMD. */
20969 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
20970 dis_ret(dres, vbi, 0);
20971 DIP(haveF3(pfx) ? "rep ; ret\n" : "ret\n");
20972 return delta;
20974 case 0xC6: /* C6 /0 = MOV Ib,Eb */
20975 sz = 1;
20976 goto maybe_do_Mov_I_E;
20977 case 0xC7: /* C7 /0 = MOV Iv,Ev */
20978 goto maybe_do_Mov_I_E;
20979 maybe_do_Mov_I_E:
20980 modrm = getUChar(delta);
20981 if (gregLO3ofRM(modrm) == 0) {
20982 if (epartIsReg(modrm)) {
20983 /* Neither F2 nor F3 are allowable. */
20984 if (haveF2orF3(pfx)) goto decode_failure;
20985 delta++; /* mod/rm byte */
20986 d64 = getSDisp(imin(4,sz),delta);
20987 delta += imin(4,sz);
20988 putIRegE(sz, pfx, modrm,
20989 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
20990 DIP("mov%c $%lld, %s\n", nameISize(sz),
20991 (Long)d64,
20992 nameIRegE(sz,pfx,modrm));
20993 } else {
20994 if (haveF2(pfx)) goto decode_failure;
20995 /* F3(XRELEASE) is allowable here */
20996 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
20997 /*xtra*/imin(4,sz) );
20998 delta += alen;
20999 d64 = getSDisp(imin(4,sz),delta);
21000 delta += imin(4,sz);
21001 storeLE(mkexpr(addr),
21002 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
21003 DIP("mov%c $%lld, %s\n", nameISize(sz), (Long)d64, dis_buf);
21005 return delta;
21007 /* BEGIN HACKY SUPPORT FOR xbegin */
21008 if (opc == 0xC7 && modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 4
21009 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21010 delta++; /* mod/rm byte */
21011 d64 = getSDisp(4,delta);
21012 delta += 4;
21013 guest_RIP_next_mustcheck = True;
21014 guest_RIP_next_assumed = guest_RIP_bbstart + delta;
21015 Addr64 failAddr = guest_RIP_bbstart + delta + d64;
21016 /* EAX contains the failure status code. Bit 3 is "Set if an
21017 internal buffer overflowed", which seems like the
21018 least-bogus choice we can make here. */
21019 putIRegRAX(4, mkU32(1<<3));
21020 /* And jump to the fail address. */
21021 jmp_lit(dres, Ijk_Boring, failAddr);
21022 vassert(dres->whatNext == Dis_StopHere);
21023 DIP("xbeginq 0x%llx\n", failAddr);
21024 return delta;
21026 /* END HACKY SUPPORT FOR xbegin */
21027 /* BEGIN HACKY SUPPORT FOR xabort */
21028 if (opc == 0xC6 && modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 1
21029 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21030 delta++; /* mod/rm byte */
21031 abyte = getUChar(delta); delta++;
21032 /* There is never a real transaction in progress, so do nothing. */
21033 DIP("xabort $%d", (Int)abyte);
21034 return delta;
21036 /* END HACKY SUPPORT FOR xabort */
21037 goto decode_failure;
21039 case 0xC8: /* ENTER */
21040 /* Same comments re operand size as for LEAVE below apply.
21041 Also, only handles the case "enter $imm16, $0"; other cases
21042 for the second operand (nesting depth) are not handled. */
21043 if (sz != 4)
21044 goto decode_failure;
21045 d64 = getUDisp16(delta);
21046 delta += 2;
21047 vassert(d64 >= 0 && d64 <= 0xFFFF);
21048 if (getUChar(delta) != 0)
21049 goto decode_failure;
21050 delta++;
21051 /* Intel docs seem to suggest:
21052 push rbp
21053 temp = rsp
21054 rbp = temp
21055 rsp = rsp - imm16
21057 t1 = newTemp(Ity_I64);
21058 assign(t1, getIReg64(R_RBP));
21059 t2 = newTemp(Ity_I64);
21060 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
21061 putIReg64(R_RSP, mkexpr(t2));
21062 storeLE(mkexpr(t2), mkexpr(t1));
21063 putIReg64(R_RBP, mkexpr(t2));
21064 if (d64 > 0) {
21065 putIReg64(R_RSP, binop(Iop_Sub64, mkexpr(t2), mkU64(d64)));
21067 DIP("enter $%u, $0\n", (UInt)d64);
21068 return delta;
21070 case 0xC9: /* LEAVE */
21071 /* In 64-bit mode this defaults to a 64-bit operand size. There
21072 is no way to encode a 32-bit variant. Hence sz==4 but we do
21073 it as if sz=8. */
21074 if (sz != 4)
21075 goto decode_failure;
21076 t1 = newTemp(Ity_I64);
21077 t2 = newTemp(Ity_I64);
21078 assign(t1, getIReg64(R_RBP));
21079 /* First PUT RSP looks redundant, but need it because RSP must
21080 always be up-to-date for Memcheck to work... */
21081 putIReg64(R_RSP, mkexpr(t1));
21082 assign(t2, loadLE(Ity_I64,mkexpr(t1)));
21083 putIReg64(R_RBP, mkexpr(t2));
21084 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(8)) );
21085 DIP("leave\n");
21086 return delta;
21088 case 0xCC: /* INT 3 */
21089 jmp_lit(dres, Ijk_SigTRAP, guest_RIP_bbstart + delta);
21090 vassert(dres->whatNext == Dis_StopHere);
21091 DIP("int $0x3\n");
21092 return delta;
21094 case 0xCD: /* INT imm8 */
21095 d64 = getUChar(delta); delta++;
21097 /* Handle int $0xD2 (Solaris fasttrap syscalls). */
21098 if (d64 == 0xD2) {
21099 jmp_lit(dres, Ijk_Sys_int210, guest_RIP_bbstart + delta);
21100 vassert(dres->whatNext == Dis_StopHere);
21101 DIP("int $0xD2\n");
21102 return delta;
21104 goto decode_failure;
21106 case 0xCF: /* IRET */
21107 /* Note, this is an extremely kludgey and limited implementation of iret
21108 based on the extremely kludgey and limited implementation of iret for x86
21109 popq %RIP; popl %CS; popq %RFLAGS; popq %RSP; popl %SS
21110 %CS and %SS are ignored */
21111 if (sz != 8 || have66orF2orF3(pfx)) goto decode_failure;
21113 t1 = newTemp(Ity_I64); /* RSP */
21114 t2 = newTemp(Ity_I64); /* new RIP */
21115 /* t3 = newTemp(Ity_I32); new CS */
21116 t4 = newTemp(Ity_I64); /* new RFLAGS */
21117 t5 = newTemp(Ity_I64); /* new RSP */
21118 /* t6 = newTemp(Ity_I32); new SS */
21120 assign(t1, getIReg64(R_RSP));
21121 assign(t2, loadLE(Ity_I64, binop(Iop_Add64,mkexpr(t1),mkU64(0))));
21122 /* assign(t3, loadLE(Ity_I32, binop(Iop_Add64,mkexpr(t1),mkU64(8)))); */
21123 assign(t4, loadLE(Ity_I64, binop(Iop_Add64,mkexpr(t1),mkU64(16))));
21124 assign(t5, loadLE(Ity_I64, binop(Iop_Add64,mkexpr(t1),mkU64(24))));
21125 /* assign(t6, loadLE(Ity_I32, binop(Iop_Add64,mkexpr(t1),mkU64(32)))); */
21127 /* set %RFLAGS */
21128 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
21129 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
21130 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
21131 stmt( IRStmt_Put( OFFB_CC_DEP1,
21132 binop(Iop_And64,
21133 mkexpr(t4),
21134 mkU64( AMD64G_CC_MASK_C | AMD64G_CC_MASK_P
21135 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_Z
21136 | AMD64G_CC_MASK_S| AMD64G_CC_MASK_O )
21141 /* Also need to set the D flag, which is held in bit 10 of t4.
21142 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
21143 stmt( IRStmt_Put(
21144 OFFB_DFLAG,
21145 IRExpr_ITE(
21146 unop(Iop_64to1,
21147 binop(Iop_And64,
21148 binop(Iop_Shr64, mkexpr(t4), mkU8(10)),
21149 mkU64(1))),
21150 mkU64(0xFFFFFFFFFFFFFFFFULL),
21151 mkU64(1)))
21154 /* And set the ID flag */
21155 stmt( IRStmt_Put(
21156 OFFB_IDFLAG,
21157 IRExpr_ITE(
21158 unop(Iop_64to1,
21159 binop(Iop_And64,
21160 binop(Iop_Shr64, mkexpr(t4), mkU8(21)),
21161 mkU64(1))),
21162 mkU64(1),
21163 mkU64(0)))
21166 /* And set the AC flag too */
21167 stmt( IRStmt_Put(
21168 OFFB_ACFLAG,
21169 IRExpr_ITE(
21170 unop(Iop_64to1,
21171 binop(Iop_And64,
21172 binop(Iop_Shr64, mkexpr(t4), mkU8(18)),
21173 mkU64(1))),
21174 mkU64(1),
21175 mkU64(0)))
21179 /* set new stack */
21180 putIReg64(R_RSP, mkexpr(t5));
21182 /* goto new RIP value */
21183 jmp_treg(dres, Ijk_Ret, t2);
21184 DIP("iret (very kludgey)\n");
21185 return delta;
21187 case 0xD0: { /* Grp2 1,Eb */
21188 Bool decode_OK = True;
21189 if (haveF2orF3(pfx)) goto decode_failure;
21190 modrm = getUChar(delta);
21191 am_sz = lengthAMode(pfx,delta);
21192 d_sz = 0;
21193 d64 = 1;
21194 sz = 1;
21195 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
21196 mkU8(d64), NULL, &decode_OK );
21197 if (!decode_OK) goto decode_failure;
21198 return delta;
21201 case 0xD1: { /* Grp2 1,Ev */
21202 Bool decode_OK = True;
21203 if (haveF2orF3(pfx)) goto decode_failure;
21204 modrm = getUChar(delta);
21205 am_sz = lengthAMode(pfx,delta);
21206 d_sz = 0;
21207 d64 = 1;
21208 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
21209 mkU8(d64), NULL, &decode_OK );
21210 if (!decode_OK) goto decode_failure;
21211 return delta;
21214 case 0xD2: { /* Grp2 CL,Eb */
21215 Bool decode_OK = True;
21216 if (haveF2orF3(pfx)) goto decode_failure;
21217 modrm = getUChar(delta);
21218 am_sz = lengthAMode(pfx,delta);
21219 d_sz = 0;
21220 sz = 1;
21221 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
21222 getIRegCL(), "%cl", &decode_OK );
21223 if (!decode_OK) goto decode_failure;
21224 return delta;
21227 case 0xD3: { /* Grp2 CL,Ev */
21228 Bool decode_OK = True;
21229 if (haveF2orF3(pfx)) goto decode_failure;
21230 modrm = getUChar(delta);
21231 am_sz = lengthAMode(pfx,delta);
21232 d_sz = 0;
21233 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
21234 getIRegCL(), "%cl", &decode_OK );
21235 if (!decode_OK) goto decode_failure;
21236 return delta;
21239 case 0xD8: /* X87 instructions */
21240 case 0xD9:
21241 case 0xDA:
21242 case 0xDB:
21243 case 0xDC:
21244 case 0xDD:
21245 case 0xDE:
21246 case 0xDF: {
21247 Bool redundantREXWok = False;
21249 if (haveF2orF3(pfx))
21250 goto decode_failure;
21252 /* kludge to tolerate redundant rex.w prefixes (should do this
21253 properly one day) */
21254 /* mono 1.1.18.1 produces 48 D9 FA, which is rex.w fsqrt */
21255 if ( (opc == 0xD9 && getUChar(delta+0) == 0xFA)/*fsqrt*/ )
21256 redundantREXWok = True;
21258 Bool size_OK = False;
21259 if ( sz == 4 )
21260 size_OK = True;
21261 else if ( sz == 8 )
21262 size_OK = redundantREXWok;
21263 else if ( sz == 2 ) {
21264 int mod_rm = getUChar(delta+0);
21265 int reg = gregLO3ofRM(mod_rm);
21266 /* The HotSpot JVM uses these */
21267 if ( (opc == 0xDD) && (reg == 0 /* FLDL */ ||
21268 reg == 4 /* FNSAVE */ ||
21269 reg == 6 /* FRSTOR */ ) )
21270 size_OK = True;
21272 /* AMD manual says 0x66 size override is ignored, except where
21273 it is meaningful */
21274 if (!size_OK)
21275 goto decode_failure;
21277 Bool decode_OK = False;
21278 delta = dis_FPU ( &decode_OK, vbi, pfx, delta );
21279 if (!decode_OK)
21280 goto decode_failure;
21282 return delta;
21285 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
21286 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
21287 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
21288 { /* The docs say this uses rCX as a count depending on the
21289 address size override, not the operand one. */
21290 IRExpr* zbit = NULL;
21291 IRExpr* count = NULL;
21292 IRExpr* cond = NULL;
21293 const HChar* xtra = NULL;
21295 if (have66orF2orF3(pfx) || 1==getRexW(pfx)) goto decode_failure;
21296 /* So at this point we've rejected any variants which appear to
21297 be governed by the usual operand-size modifiers. Hence only
21298 the address size prefix can have an effect. It changes the
21299 size from 64 (default) to 32. */
21300 d64 = guest_RIP_bbstart+delta+1 + getSDisp8(delta);
21301 delta++;
21302 if (haveASO(pfx)) {
21303 /* 64to32 of 64-bit get is merely a get-put improvement
21304 trick. */
21305 putIReg32(R_RCX, binop(Iop_Sub32,
21306 unop(Iop_64to32, getIReg64(R_RCX)),
21307 mkU32(1)));
21308 } else {
21309 putIReg64(R_RCX, binop(Iop_Sub64, getIReg64(R_RCX), mkU64(1)));
21312 /* This is correct, both for 32- and 64-bit versions. If we're
21313 doing a 32-bit dec and the result is zero then the default
21314 zero extension rule will cause the upper 32 bits to be zero
21315 too. Hence a 64-bit check against zero is OK. */
21316 count = getIReg64(R_RCX);
21317 cond = binop(Iop_CmpNE64, count, mkU64(0));
21318 switch (opc) {
21319 case 0xE2:
21320 xtra = "";
21321 break;
21322 case 0xE1:
21323 xtra = "e";
21324 zbit = mk_amd64g_calculate_condition( AMD64CondZ );
21325 cond = mkAnd1(cond, zbit);
21326 break;
21327 case 0xE0:
21328 xtra = "ne";
21329 zbit = mk_amd64g_calculate_condition( AMD64CondNZ );
21330 cond = mkAnd1(cond, zbit);
21331 break;
21332 default:
21333 vassert(0);
21335 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64), OFFB_RIP) );
21337 DIP("loop%s%s 0x%llx\n", xtra, haveASO(pfx) ? "l" : "", (ULong)d64);
21338 return delta;
21341 case 0xE3:
21342 /* JRCXZ or JECXZ, depending address size override. */
21343 if (have66orF2orF3(pfx)) goto decode_failure;
21344 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta);
21345 delta++;
21346 if (haveASO(pfx)) {
21347 /* 32-bit */
21348 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
21349 unop(Iop_32Uto64, getIReg32(R_RCX)),
21350 mkU64(0)),
21351 Ijk_Boring,
21352 IRConst_U64(d64),
21353 OFFB_RIP
21355 DIP("jecxz 0x%llx\n", (ULong)d64);
21356 } else {
21357 /* 64-bit */
21358 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
21359 getIReg64(R_RCX),
21360 mkU64(0)),
21361 Ijk_Boring,
21362 IRConst_U64(d64),
21363 OFFB_RIP
21365 DIP("jrcxz 0x%llx\n", (ULong)d64);
21367 return delta;
21369 case 0xE4: /* IN imm8, AL */
21370 sz = 1;
21371 t1 = newTemp(Ity_I64);
21372 abyte = getUChar(delta); delta++;
21373 assign(t1, mkU64( abyte & 0xFF ));
21374 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz));
21375 goto do_IN;
21376 case 0xE5: /* IN imm8, eAX */
21377 if (!(sz == 2 || sz == 4)) goto decode_failure;
21378 t1 = newTemp(Ity_I64);
21379 abyte = getUChar(delta); delta++;
21380 assign(t1, mkU64( abyte & 0xFF ));
21381 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz));
21382 goto do_IN;
21383 case 0xEC: /* IN %DX, AL */
21384 sz = 1;
21385 t1 = newTemp(Ity_I64);
21386 assign(t1, unop(Iop_16Uto64, getIRegRDX(2)));
21387 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2),
21388 nameIRegRAX(sz));
21389 goto do_IN;
21390 case 0xED: /* IN %DX, eAX */
21391 if (!(sz == 2 || sz == 4)) goto decode_failure;
21392 t1 = newTemp(Ity_I64);
21393 assign(t1, unop(Iop_16Uto64, getIRegRDX(2)));
21394 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2),
21395 nameIRegRAX(sz));
21396 goto do_IN;
21397 do_IN: {
21398 /* At this point, sz indicates the width, and t1 is a 64-bit
21399 value giving port number. */
21400 IRDirty* d;
21401 if (haveF2orF3(pfx)) goto decode_failure;
21402 vassert(sz == 1 || sz == 2 || sz == 4);
21403 ty = szToITy(sz);
21404 t2 = newTemp(Ity_I64);
21405 d = unsafeIRDirty_1_N(
21407 0/*regparms*/,
21408 "amd64g_dirtyhelper_IN",
21409 &amd64g_dirtyhelper_IN,
21410 mkIRExprVec_2( mkexpr(t1), mkU64(sz) )
21412 /* do the call, dumping the result in t2. */
21413 stmt( IRStmt_Dirty(d) );
21414 putIRegRAX(sz, narrowTo( ty, mkexpr(t2) ) );
21415 return delta;
21418 case 0xE6: /* OUT AL, imm8 */
21419 sz = 1;
21420 t1 = newTemp(Ity_I64);
21421 abyte = getUChar(delta); delta++;
21422 assign( t1, mkU64( abyte & 0xFF ) );
21423 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte);
21424 goto do_OUT;
21425 case 0xE7: /* OUT eAX, imm8 */
21426 if (!(sz == 2 || sz == 4)) goto decode_failure;
21427 t1 = newTemp(Ity_I64);
21428 abyte = getUChar(delta); delta++;
21429 assign( t1, mkU64( abyte & 0xFF ) );
21430 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte);
21431 goto do_OUT;
21432 case 0xEE: /* OUT AL, %DX */
21433 sz = 1;
21434 t1 = newTemp(Ity_I64);
21435 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) );
21436 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz),
21437 nameIRegRDX(2));
21438 goto do_OUT;
21439 case 0xEF: /* OUT eAX, %DX */
21440 if (!(sz == 2 || sz == 4)) goto decode_failure;
21441 t1 = newTemp(Ity_I64);
21442 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) );
21443 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz),
21444 nameIRegRDX(2));
21445 goto do_OUT;
21446 do_OUT: {
21447 /* At this point, sz indicates the width, and t1 is a 64-bit
21448 value giving port number. */
21449 IRDirty* d;
21450 if (haveF2orF3(pfx)) goto decode_failure;
21451 vassert(sz == 1 || sz == 2 || sz == 4);
21452 ty = szToITy(sz);
21453 d = unsafeIRDirty_0_N(
21454 0/*regparms*/,
21455 "amd64g_dirtyhelper_OUT",
21456 &amd64g_dirtyhelper_OUT,
21457 mkIRExprVec_3( mkexpr(t1),
21458 widenUto64( getIRegRAX(sz) ),
21459 mkU64(sz) )
21461 stmt( IRStmt_Dirty(d) );
21462 return delta;
21465 case 0xE8: /* CALL J4 */
21466 if (haveF3(pfx)) goto decode_failure;
21467 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
21468 d64 = getSDisp32(delta); delta += 4;
21469 d64 += (guest_RIP_bbstart+delta);
21470 /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */
21471 t1 = newTemp(Ity_I64);
21472 assign(t1, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
21473 putIReg64(R_RSP, mkexpr(t1));
21474 storeLE( mkexpr(t1), mkU64(guest_RIP_bbstart+delta));
21475 t2 = newTemp(Ity_I64);
21476 assign(t2, mkU64((Addr64)d64));
21477 make_redzone_AbiHint(vbi, t1, t2/*nia*/, "call-d32");
21478 jmp_lit(dres, Ijk_Call, d64);
21479 vassert(dres->whatNext == Dis_StopHere);
21480 DIP("call 0x%llx\n", (ULong)d64);
21481 return delta;
21483 case 0xE9: /* Jv (jump, 16/32 offset) */
21484 if (haveF3(pfx)) goto decode_failure;
21485 sz = 4; /* Prefixes that change operand size are ignored for this
21486 instruction. Operand size is forced to 32bit. */
21487 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
21488 d64 = (guest_RIP_bbstart+delta+sz) + getSDisp(sz,delta);
21489 delta += sz;
21490 jmp_lit(dres, Ijk_Boring, d64);
21491 vassert(dres->whatNext == Dis_StopHere);
21492 DIP("jmp 0x%llx\n", (ULong)d64);
21493 return delta;
21495 case 0xEB: /* Jb (jump, byte offset) */
21496 if (haveF3(pfx)) goto decode_failure;
21497 /* Prefixes that change operand size are ignored for this instruction. */
21498 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
21499 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta);
21500 delta++;
21501 jmp_lit(dres, Ijk_Boring, d64);
21502 vassert(dres->whatNext == Dis_StopHere);
21503 DIP("jmp-8 0x%llx\n", (ULong)d64);
21504 return delta;
21506 case 0xF5: /* CMC */
21507 case 0xF8: /* CLC */
21508 case 0xF9: /* STC */
21509 t1 = newTemp(Ity_I64);
21510 t2 = newTemp(Ity_I64);
21511 assign( t1, mk_amd64g_calculate_rflags_all() );
21512 switch (opc) {
21513 case 0xF5:
21514 assign( t2, binop(Iop_Xor64, mkexpr(t1),
21515 mkU64(AMD64G_CC_MASK_C)));
21516 DIP("cmc\n");
21517 break;
21518 case 0xF8:
21519 assign( t2, binop(Iop_And64, mkexpr(t1),
21520 mkU64(~AMD64G_CC_MASK_C)));
21521 DIP("clc\n");
21522 break;
21523 case 0xF9:
21524 assign( t2, binop(Iop_Or64, mkexpr(t1),
21525 mkU64(AMD64G_CC_MASK_C)));
21526 DIP("stc\n");
21527 break;
21528 default:
21529 vpanic("disInstr(x64)(cmc/clc/stc)");
21531 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
21532 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
21533 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t2) ));
21534 /* Set NDEP even though it isn't used. This makes redundant-PUT
21535 elimination of previous stores to this field work better. */
21536 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
21537 return delta;
21539 case 0xF6: { /* Grp3 Eb */
21540 Bool decode_OK = True;
21541 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21542 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */
21543 delta = dis_Grp3 ( vbi, pfx, 1, delta, &decode_OK );
21544 if (!decode_OK) goto decode_failure;
21545 return delta;
21548 case 0xF7: { /* Grp3 Ev */
21549 Bool decode_OK = True;
21550 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21551 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */
21552 delta = dis_Grp3 ( vbi, pfx, sz, delta, &decode_OK );
21553 if (!decode_OK) goto decode_failure;
21554 return delta;
21557 case 0xFC: /* CLD */
21558 if (haveF2orF3(pfx)) goto decode_failure;
21559 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(1)) );
21560 DIP("cld\n");
21561 return delta;
21563 case 0xFD: /* STD */
21564 if (haveF2orF3(pfx)) goto decode_failure;
21565 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(-1ULL)) );
21566 DIP("std\n");
21567 return delta;
21569 case 0xFE: { /* Grp4 Eb */
21570 Bool decode_OK = True;
21571 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21572 /* We now let dis_Grp4 itself decide if F2 and/or F3 are valid */
21573 delta = dis_Grp4 ( vbi, pfx, delta, &decode_OK );
21574 if (!decode_OK) goto decode_failure;
21575 return delta;
21578 case 0xFF: { /* Grp5 Ev */
21579 Bool decode_OK = True;
21580 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21581 /* We now let dis_Grp5 itself decide if F2 and/or F3 are valid */
21582 delta = dis_Grp5 ( vbi, pfx, sz, delta, dres, &decode_OK );
21583 if (!decode_OK) goto decode_failure;
21584 return delta;
21587 default:
21588 break;
21592 decode_failure:
21593 return deltaIN; /* fail */
21597 /*------------------------------------------------------------*/
21598 /*--- ---*/
21599 /*--- Top-level post-escape decoders: dis_ESC_0F ---*/
21600 /*--- ---*/
21601 /*------------------------------------------------------------*/
21603 static IRTemp math_BSWAP ( IRTemp t1, IRType ty )
21605 IRTemp t2 = newTemp(ty);
21606 if (ty == Ity_I64) {
21607 IRTemp m8 = newTemp(Ity_I64);
21608 IRTemp s8 = newTemp(Ity_I64);
21609 IRTemp m16 = newTemp(Ity_I64);
21610 IRTemp s16 = newTemp(Ity_I64);
21611 IRTemp m32 = newTemp(Ity_I64);
21612 assign( m8, mkU64(0xFF00FF00FF00FF00ULL) );
21613 assign( s8,
21614 binop(Iop_Or64,
21615 binop(Iop_Shr64,
21616 binop(Iop_And64,mkexpr(t1),mkexpr(m8)),
21617 mkU8(8)),
21618 binop(Iop_And64,
21619 binop(Iop_Shl64,mkexpr(t1),mkU8(8)),
21620 mkexpr(m8))
21624 assign( m16, mkU64(0xFFFF0000FFFF0000ULL) );
21625 assign( s16,
21626 binop(Iop_Or64,
21627 binop(Iop_Shr64,
21628 binop(Iop_And64,mkexpr(s8),mkexpr(m16)),
21629 mkU8(16)),
21630 binop(Iop_And64,
21631 binop(Iop_Shl64,mkexpr(s8),mkU8(16)),
21632 mkexpr(m16))
21636 assign( m32, mkU64(0xFFFFFFFF00000000ULL) );
21637 assign( t2,
21638 binop(Iop_Or64,
21639 binop(Iop_Shr64,
21640 binop(Iop_And64,mkexpr(s16),mkexpr(m32)),
21641 mkU8(32)),
21642 binop(Iop_And64,
21643 binop(Iop_Shl64,mkexpr(s16),mkU8(32)),
21644 mkexpr(m32))
21647 return t2;
21649 if (ty == Ity_I32) {
21650 assign( t2,
21651 binop(
21652 Iop_Or32,
21653 binop(Iop_Shl32, mkexpr(t1), mkU8(24)),
21654 binop(
21655 Iop_Or32,
21656 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)),
21657 mkU32(0x00FF0000)),
21658 binop(Iop_Or32,
21659 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)),
21660 mkU32(0x0000FF00)),
21661 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)),
21662 mkU32(0x000000FF) )
21665 return t2;
21667 if (ty == Ity_I16) {
21668 assign(t2,
21669 binop(Iop_Or16,
21670 binop(Iop_Shl16, mkexpr(t1), mkU8(8)),
21671 binop(Iop_Shr16, mkexpr(t1), mkU8(8)) ));
21672 return t2;
21674 vassert(0);
21675 /*NOTREACHED*/
21676 return IRTemp_INVALID;
21680 __attribute__((noinline))
21681 static
21682 Long dis_ESC_0F (
21683 /*MB_OUT*/DisResult* dres,
21684 /*MB_OUT*/Bool* expect_CAS,
21685 const VexArchInfo* archinfo,
21686 const VexAbiInfo* vbi,
21687 Prefix pfx, Int sz, Long deltaIN
21690 Long d64 = 0;
21691 IRTemp addr = IRTemp_INVALID;
21692 IRTemp t1 = IRTemp_INVALID;
21693 IRTemp t2 = IRTemp_INVALID;
21694 UChar modrm = 0;
21695 Int am_sz = 0;
21696 Int alen = 0;
21697 HChar dis_buf[50];
21699 /* In the first switch, look for ordinary integer insns. */
21700 Long delta = deltaIN;
21701 UChar opc = getUChar(delta);
21702 delta++;
21703 switch (opc) { /* first switch */
21705 case 0x01:
21707 modrm = getUChar(delta);
21708 /* 0F 01 /0 -- SGDT */
21709 /* 0F 01 /1 -- SIDT */
21710 if (!epartIsReg(modrm)
21711 && (gregLO3ofRM(modrm) == 0 || gregLO3ofRM(modrm) == 1)) {
21712 /* This is really revolting, but ... since each processor
21713 (core) only has one IDT and one GDT, just let the guest
21714 see it (pass-through semantics). I can't see any way to
21715 construct a faked-up value, so don't bother to try. */
21716 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21717 delta += alen;
21718 switch (gregLO3ofRM(modrm)) {
21719 case 0: DIP("sgdt %s\n", dis_buf); break;
21720 case 1: DIP("sidt %s\n", dis_buf); break;
21721 default: vassert(0); /*NOTREACHED*/
21723 IRDirty* d = unsafeIRDirty_0_N (
21724 0/*regparms*/,
21725 "amd64g_dirtyhelper_SxDT",
21726 &amd64g_dirtyhelper_SxDT,
21727 mkIRExprVec_2( mkexpr(addr),
21728 mkU64(gregLO3ofRM(modrm)) )
21730 /* declare we're writing memory */
21731 d->mFx = Ifx_Write;
21732 d->mAddr = mkexpr(addr);
21733 d->mSize = 6;
21734 stmt( IRStmt_Dirty(d) );
21735 return delta;
21737 /* 0F 01 D0 = XGETBV */
21738 if (modrm == 0xD0 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21739 delta += 1;
21740 DIP("xgetbv\n");
21741 /* Fault (SEGV) if ECX isn't zero. Intel docs say #GP and I
21742 am not sure if that translates in to SEGV or to something
21743 else, in user space. */
21744 t1 = newTemp(Ity_I32);
21745 assign( t1, getIReg32(R_RCX) );
21746 stmt( IRStmt_Exit(binop(Iop_CmpNE32, mkexpr(t1), mkU32(0)),
21747 Ijk_SigSEGV,
21748 IRConst_U64(guest_RIP_curr_instr),
21749 OFFB_RIP
21751 putIRegRAX(4, mkU32(7));
21752 putIRegRDX(4, mkU32(0));
21753 return delta;
21755 /* BEGIN HACKY SUPPORT FOR xend */
21756 /* 0F 01 D5 = XEND */
21757 if (modrm == 0xD5 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21758 /* We are never in an transaction (xbegin immediately aborts).
21759 So this just always generates a General Protection Fault. */
21760 delta += 1;
21761 jmp_lit(dres, Ijk_SigSEGV, guest_RIP_bbstart + delta);
21762 vassert(dres->whatNext == Dis_StopHere);
21763 DIP("xend\n");
21764 return delta;
21766 /* END HACKY SUPPORT FOR xend */
21767 /* BEGIN HACKY SUPPORT FOR xtest */
21768 /* 0F 01 D6 = XTEST */
21769 if (modrm == 0xD6 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21770 /* Sets ZF because there never is a transaction, and all
21771 CF, OF, SF, PF and AF are always cleared by xtest. */
21772 delta += 1;
21773 DIP("xtest\n");
21774 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
21775 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
21776 stmt( IRStmt_Put( OFFB_CC_DEP1, mkU64(AMD64G_CC_MASK_Z) ));
21777 /* Set NDEP even though it isn't used. This makes redundant-PUT
21778 elimination of previous stores to this field work better. */
21779 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
21780 return delta;
21782 /* END HACKY SUPPORT FOR xtest */
21783 /* 0F 01 F9 = RDTSCP */
21784 if (modrm == 0xF9 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDTSCP)) {
21785 delta += 1;
21786 /* Uses dirty helper:
21787 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* )
21788 declared to wr rax, rcx, rdx
21790 const HChar* fName = "amd64g_dirtyhelper_RDTSCP";
21791 void* fAddr = &amd64g_dirtyhelper_RDTSCP;
21792 IRDirty* d
21793 = unsafeIRDirty_0_N ( 0/*regparms*/,
21794 fName, fAddr, mkIRExprVec_1(IRExpr_GSPTR()) );
21795 /* declare guest state effects */
21796 d->nFxState = 3;
21797 vex_bzero(&d->fxState, sizeof(d->fxState));
21798 d->fxState[0].fx = Ifx_Write;
21799 d->fxState[0].offset = OFFB_RAX;
21800 d->fxState[0].size = 8;
21801 d->fxState[1].fx = Ifx_Write;
21802 d->fxState[1].offset = OFFB_RCX;
21803 d->fxState[1].size = 8;
21804 d->fxState[2].fx = Ifx_Write;
21805 d->fxState[2].offset = OFFB_RDX;
21806 d->fxState[2].size = 8;
21807 /* execute the dirty call, side-effecting guest state */
21808 stmt( IRStmt_Dirty(d) );
21809 /* RDTSCP is a serialising insn. So, just in case someone is
21810 using it as a memory fence ... */
21811 stmt( IRStmt_MBE(Imbe_Fence) );
21812 DIP("rdtscp\n");
21813 return delta;
21815 /* else decode failed */
21816 break;
21819 case 0x05: /* SYSCALL */
21820 guest_RIP_next_mustcheck = True;
21821 guest_RIP_next_assumed = guest_RIP_bbstart + delta;
21822 putIReg64( R_RCX, mkU64(guest_RIP_next_assumed) );
21823 /* It's important that all guest state is up-to-date
21824 at this point. So we declare an end-of-block here, which
21825 forces any cached guest state to be flushed. */
21826 jmp_lit(dres, Ijk_Sys_syscall, guest_RIP_next_assumed);
21827 vassert(dres->whatNext == Dis_StopHere);
21828 DIP("syscall\n");
21829 return delta;
21831 case 0x0B: /* UD2 */
21832 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
21833 jmp_lit(dres, Ijk_NoDecode, guest_RIP_curr_instr);
21834 vassert(dres->whatNext == Dis_StopHere);
21835 DIP("ud2\n");
21836 return delta;
21838 case 0x0D: /* 0F 0D /0 -- prefetch mem8 */
21839 /* 0F 0D /1 -- prefetchw mem8 */
21840 if (have66orF2orF3(pfx)) goto decode_failure;
21841 modrm = getUChar(delta);
21842 if (epartIsReg(modrm)) goto decode_failure;
21843 if (gregLO3ofRM(modrm) != 0 && gregLO3ofRM(modrm) != 1)
21844 goto decode_failure;
21845 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21846 delta += alen;
21847 switch (gregLO3ofRM(modrm)) {
21848 case 0: DIP("prefetch %s\n", dis_buf); break;
21849 case 1: DIP("prefetchw %s\n", dis_buf); break;
21850 default: vassert(0); /*NOTREACHED*/
21852 return delta;
21854 case 0x19:
21855 case 0x1C:
21856 case 0x1D:
21857 case 0x1E:
21858 case 0x1F:
21859 // Intel CET instructions can have any prefixes before NOPs
21860 // and can use any ModRM, SIB and disp
21861 modrm = getUChar(delta);
21862 if (epartIsReg(modrm)) {
21863 delta += 1;
21864 DIP("nop%c\n", nameISize(sz));
21865 } else {
21866 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21867 delta += alen;
21868 DIP("nop%c %s\n", nameISize(sz), dis_buf);
21870 return delta;
21872 case 0x31: { /* RDTSC */
21873 IRTemp val = newTemp(Ity_I64);
21874 IRExpr** args = mkIRExprVec_0();
21875 IRDirty* d = unsafeIRDirty_1_N (
21876 val,
21877 0/*regparms*/,
21878 "amd64g_dirtyhelper_RDTSC",
21879 &amd64g_dirtyhelper_RDTSC,
21880 args
21882 if (have66orF2orF3(pfx)) goto decode_failure;
21883 /* execute the dirty call, dumping the result in val. */
21884 stmt( IRStmt_Dirty(d) );
21885 putIRegRDX(4, unop(Iop_64HIto32, mkexpr(val)));
21886 putIRegRAX(4, unop(Iop_64to32, mkexpr(val)));
21887 DIP("rdtsc\n");
21888 return delta;
21891 case 0x40:
21892 case 0x41:
21893 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
21894 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
21895 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
21896 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
21897 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
21898 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
21899 case 0x48: /* CMOVSb (cmov negative) */
21900 case 0x49: /* CMOVSb (cmov not negative) */
21901 case 0x4A: /* CMOVP (cmov parity even) */
21902 case 0x4B: /* CMOVNP (cmov parity odd) */
21903 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
21904 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
21905 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
21906 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
21907 if (haveF2orF3(pfx)) goto decode_failure;
21908 delta = dis_cmov_E_G(vbi, pfx, sz, (AMD64Condcode)(opc - 0x40), delta);
21909 return delta;
21911 case 0x80:
21912 case 0x81:
21913 case 0x82: /* JBb/JNAEb (jump below) */
21914 case 0x83: /* JNBb/JAEb (jump not below) */
21915 case 0x84: /* JZb/JEb (jump zero) */
21916 case 0x85: /* JNZb/JNEb (jump not zero) */
21917 case 0x86: /* JBEb/JNAb (jump below or equal) */
21918 case 0x87: /* JNBEb/JAb (jump not below or equal) */
21919 case 0x88: /* JSb (jump negative) */
21920 case 0x89: /* JSb (jump not negative) */
21921 case 0x8A: /* JP (jump parity even) */
21922 case 0x8B: /* JNP/JPO (jump parity odd) */
21923 case 0x8C: /* JLb/JNGEb (jump less) */
21924 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
21925 case 0x8E: /* JLEb/JNGb (jump less or equal) */
21926 case 0x8F: { /* JGb/JNLEb (jump greater) */
21927 Long jmpDelta;
21928 const HChar* comment = "";
21929 if (haveF3(pfx)) goto decode_failure;
21930 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
21931 jmpDelta = getSDisp32(delta);
21932 d64 = (guest_RIP_bbstart+delta+4) + jmpDelta;
21933 delta += 4;
21934 /* End the block at this point. */
21935 jcc_01( dres, (AMD64Condcode)(opc - 0x80),
21936 guest_RIP_bbstart+delta, d64 );
21937 vassert(dres->whatNext == Dis_StopHere);
21938 DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc - 0x80), (ULong)d64,
21939 comment);
21940 return delta;
21943 case 0x90:
21944 case 0x91:
21945 case 0x92: /* set-Bb/set-NAEb (set if below) */
21946 case 0x93: /* set-NBb/set-AEb (set if not below) */
21947 case 0x94: /* set-Zb/set-Eb (set if zero) */
21948 case 0x95: /* set-NZb/set-NEb (set if not zero) */
21949 case 0x96: /* set-BEb/set-NAb (set if below or equal) */
21950 case 0x97: /* set-NBEb/set-Ab (set if not below or equal) */
21951 case 0x98: /* set-Sb (set if negative) */
21952 case 0x99: /* set-Sb (set if not negative) */
21953 case 0x9A: /* set-P (set if parity even) */
21954 case 0x9B: /* set-NP (set if parity odd) */
21955 case 0x9C: /* set-Lb/set-NGEb (set if less) */
21956 case 0x9D: /* set-GEb/set-NLb (set if greater or equal) */
21957 case 0x9E: /* set-LEb/set-NGb (set if less or equal) */
21958 case 0x9F: /* set-Gb/set-NLEb (set if greater) */
21959 if (haveF2orF3(pfx)) goto decode_failure;
21960 t1 = newTemp(Ity_I8);
21961 assign( t1, unop(Iop_1Uto8,mk_amd64g_calculate_condition(opc-0x90)) );
21962 modrm = getUChar(delta);
21963 if (epartIsReg(modrm)) {
21964 delta++;
21965 putIRegE(1, pfx, modrm, mkexpr(t1));
21966 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90),
21967 nameIRegE(1,pfx,modrm));
21968 } else {
21969 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21970 delta += alen;
21971 storeLE( mkexpr(addr), mkexpr(t1) );
21972 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), dis_buf);
21974 return delta;
21976 case 0x1A:
21977 case 0x1B: { /* Future MPX instructions, currently NOPs.
21978 BNDMK b, m F3 0F 1B
21979 BNDCL b, r/m F3 0F 1A
21980 BNDCU b, r/m F2 0F 1A
21981 BNDCN b, r/m F2 0F 1B
21982 BNDMOV b, b/m 66 0F 1A
21983 BNDMOV b/m, b 66 0F 1B
21984 BNDLDX b, mib 0F 1A
21985 BNDSTX mib, b 0F 1B */
21987 /* All instructions have two operands. One operand is always the
21988 bnd register number (bnd0-bnd3, other register numbers are
21989 ignored when MPX isn't enabled, but should generate an
21990 exception if MPX is enabled) given by gregOfRexRM. The other
21991 operand is either a ModRM:reg, ModRM:r/m or a SIB encoded
21992 address, all of which can be decoded by using either
21993 eregOfRexRM or disAMode. */
21995 modrm = getUChar(delta);
21996 int bnd = gregOfRexRM(pfx,modrm);
21997 const HChar *oper;
21998 if (epartIsReg(modrm)) {
21999 oper = nameIReg64 (eregOfRexRM(pfx,modrm));
22000 delta += 1;
22001 } else {
22002 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22003 delta += alen;
22004 oper = dis_buf;
22007 if (haveF3no66noF2 (pfx)) {
22008 if (opc == 0x1B) {
22009 DIP ("bndmk %s, %%bnd%d\n", oper, bnd);
22010 } else /* opc == 0x1A */ {
22011 DIP ("bndcl %s, %%bnd%d\n", oper, bnd);
22013 } else if (haveF2no66noF3 (pfx)) {
22014 if (opc == 0x1A) {
22015 DIP ("bndcu %s, %%bnd%d\n", oper, bnd);
22016 } else /* opc == 0x1B */ {
22017 DIP ("bndcn %s, %%bnd%d\n", oper, bnd);
22019 } else if (have66noF2noF3 (pfx)) {
22020 if (opc == 0x1A) {
22021 DIP ("bndmov %s, %%bnd%d\n", oper, bnd);
22022 } else /* opc == 0x1B */ {
22023 DIP ("bndmov %%bnd%d, %s\n", bnd, oper);
22025 } else if (haveNo66noF2noF3 (pfx)) {
22026 if (opc == 0x1A) {
22027 DIP ("bndldx %s, %%bnd%d\n", oper, bnd);
22028 } else /* opc == 0x1B */ {
22029 DIP ("bndstx %%bnd%d, %s\n", bnd, oper);
22031 } else goto decode_failure;
22033 return delta;
22036 case 0xA2: { /* CPUID */
22037 /* Uses dirty helper:
22038 void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* )
22039 declared to mod rax, wr rbx, rcx, rdx
22041 IRDirty* d = NULL;
22042 const HChar* fName = NULL;
22043 void* fAddr = NULL;
22045 if (haveF2orF3(pfx)) goto decode_failure;
22047 /* This isn't entirely correct, CPUID should depend on the VEX
22048 capabilities, not on the underlying CPU. See bug #324882. */
22049 if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSSE3) &&
22050 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16) &&
22051 (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX2)) {
22052 fName = "amd64g_dirtyhelper_CPUID_avx2";
22053 fAddr = &amd64g_dirtyhelper_CPUID_avx2;
22054 /* This is a Core-i7-4910-like machine */
22056 else if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSSE3) &&
22057 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16) &&
22058 (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
22059 fName = "amd64g_dirtyhelper_CPUID_avx_and_cx16";
22060 fAddr = &amd64g_dirtyhelper_CPUID_avx_and_cx16;
22061 /* This is a Core-i5-2300-like machine */
22063 else if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSSE3) &&
22064 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16)) {
22065 fName = "amd64g_dirtyhelper_CPUID_sse42_and_cx16";
22066 fAddr = &amd64g_dirtyhelper_CPUID_sse42_and_cx16;
22067 /* This is a Core-i5-670-like machine */
22069 else {
22070 /* Give a CPUID for at least a baseline machine, SSE2
22071 only, and no CX16 */
22072 fName = "amd64g_dirtyhelper_CPUID_baseline";
22073 fAddr = &amd64g_dirtyhelper_CPUID_baseline;
22076 vassert(fName); vassert(fAddr);
22077 IRExpr** args = NULL;
22078 if (fAddr == &amd64g_dirtyhelper_CPUID_avx2
22079 || fAddr == &amd64g_dirtyhelper_CPUID_avx_and_cx16) {
22080 Bool hasF16C = (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C) != 0;
22081 Bool hasRDRAND = (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDRAND) != 0;
22082 Bool hasRDSEED = (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDSEED) != 0;
22083 args = mkIRExprVec_4(IRExpr_GSPTR(),
22084 mkIRExpr_HWord(hasF16C ? 1 : 0),
22085 mkIRExpr_HWord(hasRDRAND ? 1 : 0),
22086 mkIRExpr_HWord(hasRDSEED ? 1 : 0));
22087 } else {
22088 args = mkIRExprVec_1(IRExpr_GSPTR());
22090 d = unsafeIRDirty_0_N ( 0/*regparms*/, fName, fAddr, args );
22092 /* Declare guest state effects. EAX, EBX, ECX and EDX are written. EAX
22093 is also read, hence is marked as Modified. ECX is sometimes also
22094 read, depending on the value in EAX; that much is obvious from
22095 inspection of the helper function.
22097 This is a bit of a problem: if we mark ECX as Modified -- hence, by
22098 implication, Read -- then we may get false positives from Memcheck in
22099 the case where ECX contains undefined bits, but the EAX value is such
22100 that the instruction wouldn't read ECX anyway. The obvious way out
22101 of this is to mark it as written only, but that means Memcheck will
22102 effectively ignore undefinedness in the incoming ECX value. That
22103 seems like a small loss to take to avoid false positives here,
22104 though. Fundamentally the problem exists because CPUID itself has
22105 conditional dataflow -- whether ECX is read depends on the value in
22106 EAX -- but the annotation mechanism for dirty helpers can't represent
22107 that conditionality.
22109 A fully-accurate solution might be to change the helpers so that the
22110 EAX and ECX values are passed as parameters. Then, for the ECX
22111 value, we can pass, effectively "if EAX is some value for which ECX
22112 is ignored { 0 } else { ECX }", and Memcheck will see and understand
22113 this conditionality. */
22114 d->nFxState = 4;
22115 vex_bzero(&d->fxState, sizeof(d->fxState));
22116 d->fxState[0].fx = Ifx_Modify;
22117 d->fxState[0].offset = OFFB_RAX;
22118 d->fxState[0].size = 8;
22119 d->fxState[1].fx = Ifx_Write;
22120 d->fxState[1].offset = OFFB_RBX;
22121 d->fxState[1].size = 8;
22122 d->fxState[2].fx = Ifx_Write; /* was: Ifx_Modify; */
22123 d->fxState[2].offset = OFFB_RCX;
22124 d->fxState[2].size = 8;
22125 d->fxState[3].fx = Ifx_Write;
22126 d->fxState[3].offset = OFFB_RDX;
22127 d->fxState[3].size = 8;
22128 /* Execute the dirty call, side-effecting guest state. */
22129 stmt( IRStmt_Dirty(d) );
22130 /* CPUID is a serialising insn. So, just in case someone is
22131 using it as a memory fence ... */
22132 stmt( IRStmt_MBE(Imbe_Fence) );
22133 DIP("cpuid\n");
22134 return delta;
22137 case 0xA3: { /* BT Gv,Ev */
22138 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22139 Bool ok = True;
22140 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
22141 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpNone, &ok );
22142 if (!ok) goto decode_failure;
22143 return delta;
22146 case 0xA4: /* SHLDv imm8,Gv,Ev */
22147 modrm = getUChar(delta);
22148 d64 = delta + lengthAMode(pfx, delta);
22149 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64));
22150 delta = dis_SHLRD_Gv_Ev (
22151 vbi, pfx, delta, modrm, sz,
22152 mkU8(getUChar(d64)), True, /* literal */
22153 dis_buf, True /* left */ );
22154 return delta;
22156 case 0xA5: /* SHLDv %cl,Gv,Ev */
22157 modrm = getUChar(delta);
22158 delta = dis_SHLRD_Gv_Ev (
22159 vbi, pfx, delta, modrm, sz,
22160 getIRegCL(), False, /* not literal */
22161 "%cl", True /* left */ );
22162 return delta;
22164 case 0xAB: { /* BTS Gv,Ev */
22165 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22166 Bool ok = True;
22167 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
22168 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpSet, &ok );
22169 if (!ok) goto decode_failure;
22170 return delta;
22173 case 0xAC: /* SHRDv imm8,Gv,Ev */
22174 modrm = getUChar(delta);
22175 d64 = delta + lengthAMode(pfx, delta);
22176 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64));
22177 delta = dis_SHLRD_Gv_Ev (
22178 vbi, pfx, delta, modrm, sz,
22179 mkU8(getUChar(d64)), True, /* literal */
22180 dis_buf, False /* right */ );
22181 return delta;
22183 case 0xAD: /* SHRDv %cl,Gv,Ev */
22184 modrm = getUChar(delta);
22185 delta = dis_SHLRD_Gv_Ev (
22186 vbi, pfx, delta, modrm, sz,
22187 getIRegCL(), False, /* not literal */
22188 "%cl", False /* right */);
22189 return delta;
22191 case 0xAF: /* IMUL Ev, Gv */
22192 if (haveF2orF3(pfx)) goto decode_failure;
22193 delta = dis_mul_E_G ( vbi, pfx, sz, delta );
22194 return delta;
22196 case 0xB0: { /* CMPXCHG Gb,Eb */
22197 Bool ok = True;
22198 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */
22199 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, 1, delta );
22200 if (!ok) goto decode_failure;
22201 return delta;
22204 case 0xB1: { /* CMPXCHG Gv,Ev (allowed in 16,32,64 bit) */
22205 Bool ok = True;
22206 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */
22207 if (sz != 2 && sz != 4 && sz != 8) goto decode_failure;
22208 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, sz, delta );
22209 if (!ok) goto decode_failure;
22210 return delta;
22213 case 0xB3: { /* BTR Gv,Ev */
22214 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22215 Bool ok = True;
22216 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
22217 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpReset, &ok );
22218 if (!ok) goto decode_failure;
22219 return delta;
22222 case 0xB6: /* MOVZXb Eb,Gv */
22223 if (haveF2orF3(pfx)) goto decode_failure;
22224 if (sz != 2 && sz != 4 && sz != 8)
22225 goto decode_failure;
22226 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, False );
22227 return delta;
22229 case 0xB7: /* MOVZXw Ew,Gv */
22230 if (haveF2orF3(pfx)) goto decode_failure;
22231 if (sz != 4 && sz != 8)
22232 goto decode_failure;
22233 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, False );
22234 return delta;
22236 case 0xBA: { /* Grp8 Ib,Ev */
22237 /* We let dis_Grp8_Imm decide whether F2 or F3 are allowable. */
22238 Bool decode_OK = False;
22239 modrm = getUChar(delta);
22240 am_sz = lengthAMode(pfx,delta);
22241 d64 = getSDisp8(delta + am_sz);
22242 delta = dis_Grp8_Imm ( vbi, pfx, delta, modrm, am_sz, sz, d64,
22243 &decode_OK );
22244 if (!decode_OK)
22245 goto decode_failure;
22246 return delta;
22249 case 0xBB: { /* BTC Gv,Ev */
22250 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22251 Bool ok = False;
22252 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
22253 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpComp, &ok );
22254 if (!ok) goto decode_failure;
22255 return delta;
22258 case 0xBC: /* BSF Gv,Ev */
22259 if (!haveF2orF3(pfx)
22260 || (haveF3noF2(pfx)
22261 && 0 == (archinfo->hwcaps & VEX_HWCAPS_AMD64_BMI))) {
22262 /* no-F2 no-F3 0F BC = BSF
22263 or F3 0F BC = REP; BSF on older CPUs. */
22264 delta = dis_bs_E_G ( vbi, pfx, sz, delta, True );
22265 return delta;
22267 /* Fall through, since F3 0F BC is TZCNT, and needs to
22268 be handled by dis_ESC_0F__SSE4. */
22269 break;
22271 case 0xBD: /* BSR Gv,Ev */
22272 if (!haveF2orF3(pfx)
22273 || (haveF3noF2(pfx)
22274 && 0 == (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT))) {
22275 /* no-F2 no-F3 0F BD = BSR
22276 or F3 0F BD = REP; BSR on older CPUs. */
22277 delta = dis_bs_E_G ( vbi, pfx, sz, delta, False );
22278 return delta;
22280 /* Fall through, since F3 0F BD is LZCNT, and needs to
22281 be handled by dis_ESC_0F__SSE4. */
22282 break;
22284 case 0xBE: /* MOVSXb Eb,Gv */
22285 if (haveF2orF3(pfx)) goto decode_failure;
22286 if (sz != 2 && sz != 4 && sz != 8)
22287 goto decode_failure;
22288 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, True );
22289 return delta;
22291 case 0xBF: /* MOVSXw Ew,Gv */
22292 if (haveF2orF3(pfx)) goto decode_failure;
22293 if (sz != 4 && sz != 8)
22294 goto decode_failure;
22295 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, True );
22296 return delta;
22298 case 0xC0: { /* XADD Gb,Eb */
22299 Bool decode_OK = False;
22300 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, 1, delta );
22301 if (!decode_OK)
22302 goto decode_failure;
22303 return delta;
22306 case 0xC1: { /* XADD Gv,Ev */
22307 Bool decode_OK = False;
22308 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, sz, delta );
22309 if (!decode_OK)
22310 goto decode_failure;
22311 return delta;
22314 case 0xC7: {
22315 modrm = getUChar(delta);
22317 // Detecting valid CMPXCHG combinations is pretty complex.
22318 Bool isValidCMPXCHG = gregLO3ofRM(modrm) == 1;
22319 if (isValidCMPXCHG) {
22320 if (have66(pfx)) isValidCMPXCHG = False;
22321 if (sz != 4 && sz != 8) isValidCMPXCHG = False;
22322 if (sz == 8 && !(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16))
22323 isValidCMPXCHG = False;
22324 if (epartIsReg(modrm)) isValidCMPXCHG = False;
22325 if (haveF2orF3(pfx)) {
22326 /* Since the e-part is memory only, F2 or F3 (one or the
22327 other) is acceptable if LOCK is also present. But only
22328 for cmpxchg8b. */
22329 if (sz == 8) isValidCMPXCHG = False;
22330 if (haveF2andF3(pfx) || !haveLOCK(pfx)) isValidCMPXCHG = False;
22334 /* 0F C7 /1 (with qualifications) = CMPXCHG */
22335 if (isValidCMPXCHG) {
22336 // Note that we've already read the modrm byte by this point, but we
22337 // haven't moved delta past it.
22338 IRType elemTy = sz==4 ? Ity_I32 : Ity_I64;
22339 IRTemp expdHi = newTemp(elemTy);
22340 IRTemp expdLo = newTemp(elemTy);
22341 IRTemp dataHi = newTemp(elemTy);
22342 IRTemp dataLo = newTemp(elemTy);
22343 IRTemp oldHi = newTemp(elemTy);
22344 IRTemp oldLo = newTemp(elemTy);
22345 IRTemp flags_old = newTemp(Ity_I64);
22346 IRTemp flags_new = newTemp(Ity_I64);
22347 IRTemp success = newTemp(Ity_I1);
22348 IROp opOR = sz==4 ? Iop_Or32 : Iop_Or64;
22349 IROp opXOR = sz==4 ? Iop_Xor32 : Iop_Xor64;
22350 IROp opCasCmpEQ = sz==4 ? Iop_CasCmpEQ32 : Iop_CasCmpEQ64;
22351 IRExpr* zero = sz==4 ? mkU32(0) : mkU64(0);
22352 IRTemp expdHi64 = newTemp(Ity_I64);
22353 IRTemp expdLo64 = newTemp(Ity_I64);
22355 /* Translate this using a DCAS, even if there is no LOCK
22356 prefix. Life is too short to bother with generating two
22357 different translations for the with/without-LOCK-prefix
22358 cases. */
22359 *expect_CAS = True;
22361 /* Generate address */
22362 vassert(!epartIsReg(modrm));
22363 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22364 delta += alen;
22366 /* cmpxchg16b requires an alignment check. */
22367 if (sz == 8)
22368 gen_SIGNAL_if_not_16_aligned( vbi, addr );
22370 /* Get the expected and new values. */
22371 assign( expdHi64, getIReg64(R_RDX) );
22372 assign( expdLo64, getIReg64(R_RAX) );
22374 /* These are the correctly-sized expected and new values.
22375 However, we also get expdHi64/expdLo64 above as 64-bits
22376 regardless, because we will need them later in the 32-bit
22377 case (paradoxically). */
22378 assign( expdHi, sz==4 ? unop(Iop_64to32, mkexpr(expdHi64))
22379 : mkexpr(expdHi64) );
22380 assign( expdLo, sz==4 ? unop(Iop_64to32, mkexpr(expdLo64))
22381 : mkexpr(expdLo64) );
22382 assign( dataHi, sz==4 ? getIReg32(R_RCX) : getIReg64(R_RCX) );
22383 assign( dataLo, sz==4 ? getIReg32(R_RBX) : getIReg64(R_RBX) );
22385 /* Do the DCAS */
22386 stmt( IRStmt_CAS(
22387 mkIRCAS( oldHi, oldLo,
22388 Iend_LE, mkexpr(addr),
22389 mkexpr(expdHi), mkexpr(expdLo),
22390 mkexpr(dataHi), mkexpr(dataLo)
22391 )));
22393 /* success when oldHi:oldLo == expdHi:expdLo */
22394 assign( success,
22395 binop(opCasCmpEQ,
22396 binop(opOR,
22397 binop(opXOR, mkexpr(oldHi), mkexpr(expdHi)),
22398 binop(opXOR, mkexpr(oldLo), mkexpr(expdLo))
22400 zero
22403 /* If the DCAS is successful, that is to say oldHi:oldLo ==
22404 expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX,
22405 which is where they came from originally. Both the actual
22406 contents of these two regs, and any shadow values, are
22407 unchanged. If the DCAS fails then we're putting into
22408 RDX:RAX the value seen in memory. */
22409 /* Now of course there's a complication in the 32-bit case
22410 (bah!): if the DCAS succeeds, we need to leave RDX:RAX
22411 unchanged; but if we use the same scheme as in the 64-bit
22412 case, we get hit by the standard rule that a write to the
22413 bottom 32 bits of an integer register zeros the upper 32
22414 bits. And so the upper halves of RDX and RAX mysteriously
22415 become zero. So we have to stuff back in the original
22416 64-bit values which we previously stashed in
22417 expdHi64:expdLo64, even if we're doing a cmpxchg8b. */
22418 /* It's just _so_ much fun ... */
22419 putIRegRDX( 8,
22420 IRExpr_ITE( mkexpr(success),
22421 mkexpr(expdHi64),
22422 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldHi))
22423 : mkexpr(oldHi)
22425 putIRegRAX( 8,
22426 IRExpr_ITE( mkexpr(success),
22427 mkexpr(expdLo64),
22428 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldLo))
22429 : mkexpr(oldLo)
22432 /* Copy the success bit into the Z flag and leave the others
22433 unchanged */
22434 assign( flags_old, widenUto64(mk_amd64g_calculate_rflags_all()));
22435 assign(
22436 flags_new,
22437 binop(Iop_Or64,
22438 binop(Iop_And64, mkexpr(flags_old),
22439 mkU64(~AMD64G_CC_MASK_Z)),
22440 binop(Iop_Shl64,
22441 binop(Iop_And64,
22442 unop(Iop_1Uto64, mkexpr(success)), mkU64(1)),
22443 mkU8(AMD64G_CC_SHIFT_Z)) ));
22445 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
22446 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) ));
22447 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
22448 /* Set NDEP even though it isn't used. This makes
22449 redundant-PUT elimination of previous stores to this field
22450 work better. */
22451 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
22453 /* Sheesh. Aren't you glad it was me and not you that had to
22454 write and validate all this grunge? */
22456 DIP("cmpxchg8b %s\n", dis_buf);
22457 return delta;
22458 } // if (isValidCMPXCHG)
22460 /* 0F C7 /6 no-F2-or-F3 = RDRAND, 0F C7 /7 = RDSEED */
22461 int insn = gregLO3ofRM(modrm);
22462 if (((insn == 6 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDRAND))
22463 || (insn == 7 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDSEED)))
22464 && epartIsReg(modrm) && haveNoF2noF3(pfx)
22465 && (sz == 8 || sz == 4 || sz == 2)) {
22467 delta++; // move past modrm
22468 IRType ty = szToITy(sz);
22470 // Pull a first 32 bits of randomness, plus C flag, out of the host.
22471 IRTemp pairLO = newTemp(Ity_I64);
22472 IRDirty* dLO;
22473 if (insn == 6) /* RDRAND */
22474 dLO = unsafeIRDirty_1_N(pairLO, 0/*regparms*/,
22475 "amd64g_dirtyhelper_RDRAND",
22476 &amd64g_dirtyhelper_RDRAND, mkIRExprVec_0());
22477 else /* RDSEED */
22478 dLO = unsafeIRDirty_1_N(pairLO, 0/*regparms*/,
22479 "amd64g_dirtyhelper_RDSEED",
22480 &amd64g_dirtyhelper_RDSEED, mkIRExprVec_0());
22482 // There are no guest state or memory effects to declare for |dLO|.
22483 stmt( IRStmt_Dirty(dLO) );
22485 IRTemp randsLO = newTemp(Ity_I32);
22486 assign(randsLO, unop(Iop_64to32, mkexpr(pairLO)));
22487 IRTemp cLO = newTemp(Ity_I64);
22488 assign(cLO, binop(Iop_Shr64, mkexpr(pairLO), mkU8(32)));
22490 // We'll assemble the final pairing in (cFinal, randsNearlyFinal).
22491 IRTemp randsNearlyFinal = newTemp(Ity_I64);
22492 IRTemp cFinal = newTemp(Ity_I64);
22494 if (ty == Ity_I64) {
22495 // Pull another 32 bits of randomness out of the host.
22496 IRTemp pairHI = newTemp(Ity_I64);
22497 IRDirty* dHI;
22498 if (insn == 6) /* RDRAND */
22499 dHI = unsafeIRDirty_1_N(pairHI, 0/*regparms*/,
22500 "amd64g_dirtyhelper_RDRAND",
22501 &amd64g_dirtyhelper_RDRAND, mkIRExprVec_0());
22502 else /* RDSEED */
22503 dHI = unsafeIRDirty_1_N(pairHI, 0/*regparms*/,
22504 "amd64g_dirtyhelper_RDSEED",
22505 &amd64g_dirtyhelper_RDSEED, mkIRExprVec_0());
22507 // There are no guest state or memory effects to declare for |dHI|.
22508 stmt( IRStmt_Dirty(dHI) );
22510 IRTemp randsHI = newTemp(Ity_I32);
22511 assign(randsHI, unop(Iop_64to32, mkexpr(pairHI)));
22512 IRTemp cHI = newTemp(Ity_I64);
22513 assign(cHI, binop(Iop_Shr64, mkexpr(pairHI), mkU8(32)));
22514 assign(randsNearlyFinal, binop(Iop_32HLto64,
22515 mkexpr(randsHI), mkexpr(randsLO)));
22516 assign(cFinal, binop(Iop_And64,
22517 binop(Iop_And64, mkexpr(cHI), mkexpr(cLO)),
22518 mkU64(1)));
22519 } else {
22520 assign(randsNearlyFinal, unop(Iop_32Uto64, mkexpr(randsLO)));
22521 assign(cFinal, binop(Iop_And64, mkexpr(cLO), mkU64(1)));
22524 /* Now cFinal[0] is the final success/failure flag (cFinal[0] == 1
22525 means success). But there's another twist. If we failed then the
22526 returned value must be forced to zero. Otherwise we could have the
22527 situation, when sz==8, where one of the host calls failed but the
22528 other didn't. This would give cFinal[0] == 0 (correctly) but
22529 randsNearlyFinal not being zero, because it contains the 32 bit
22530 result of the non-failing call. */
22531 IRTemp randsFinal = newTemp(Ity_I64);
22532 assign(randsFinal,
22533 binop(Iop_And64,
22534 mkexpr(randsNearlyFinal),
22535 binop(Iop_Sar64,
22536 binop(Iop_Shl64, mkexpr(cFinal), mkU8(63)),
22537 mkU8(63))
22540 // So, finally, update the guest state.
22541 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(randsFinal)));
22543 // Set C=<success indication>, O,S,Z,A,P = 0. cFinal has already been
22544 // masked so only the lowest bit remains.
22545 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
22546 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(cFinal) ));
22547 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
22548 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
22550 if (insn == 6) {
22551 DIP("rdrand %s", nameIRegE(sz, pfx, modrm));
22552 } else {
22553 DIP("rdseed %s", nameIRegE(sz, pfx, modrm));
22556 return delta;
22559 goto decode_failure;
22562 case 0xC8: /* BSWAP %eax */
22563 case 0xC9:
22564 case 0xCA:
22565 case 0xCB:
22566 case 0xCC:
22567 case 0xCD:
22568 case 0xCE:
22569 case 0xCF: /* BSWAP %edi */
22570 if (haveF2orF3(pfx)) goto decode_failure;
22571 /* According to the AMD64 docs, this insn can have size 4 or
22572 8. */
22573 if (sz == 4) {
22574 t1 = newTemp(Ity_I32);
22575 assign( t1, getIRegRexB(4, pfx, opc-0xC8) );
22576 t2 = math_BSWAP( t1, Ity_I32 );
22577 putIRegRexB(4, pfx, opc-0xC8, mkexpr(t2));
22578 DIP("bswapl %s\n", nameIRegRexB(4, pfx, opc-0xC8));
22579 return delta;
22581 if (sz == 8) {
22582 t1 = newTemp(Ity_I64);
22583 t2 = newTemp(Ity_I64);
22584 assign( t1, getIRegRexB(8, pfx, opc-0xC8) );
22585 t2 = math_BSWAP( t1, Ity_I64 );
22586 putIRegRexB(8, pfx, opc-0xC8, mkexpr(t2));
22587 DIP("bswapq %s\n", nameIRegRexB(8, pfx, opc-0xC8));
22588 return delta;
22590 goto decode_failure;
22592 default:
22593 break;
22595 } /* first switch */
22598 /* =-=-=-=-=-=-=-=-= MMXery =-=-=-=-=-=-=-=-= */
22599 /* In the second switch, pick off MMX insns. */
22601 if (!have66orF2orF3(pfx)) {
22602 /* So there's no SIMD prefix. */
22604 vassert(sz == 4 || sz == 8);
22606 switch (opc) { /* second switch */
22608 case 0x71:
22609 case 0x72:
22610 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
22612 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
22613 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
22614 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
22615 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
22617 case 0xFC:
22618 case 0xFD:
22619 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
22621 case 0xEC:
22622 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
22624 case 0xDC:
22625 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
22627 case 0xF8:
22628 case 0xF9:
22629 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
22631 case 0xE8:
22632 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
22634 case 0xD8:
22635 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
22637 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
22638 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
22640 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
22642 case 0x74:
22643 case 0x75:
22644 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
22646 case 0x64:
22647 case 0x65:
22648 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
22650 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
22651 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
22652 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
22654 case 0x68:
22655 case 0x69:
22656 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
22658 case 0x60:
22659 case 0x61:
22660 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
22662 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
22663 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
22664 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
22665 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
22667 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
22668 case 0xF2:
22669 case 0xF3:
22671 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
22672 case 0xD2:
22673 case 0xD3:
22675 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
22676 case 0xE2: {
22677 Bool decode_OK = False;
22678 delta = dis_MMX ( &decode_OK, vbi, pfx, sz, deltaIN );
22679 if (decode_OK)
22680 return delta;
22681 goto decode_failure;
22684 default:
22685 break;
22686 } /* second switch */
22690 /* A couple of MMX corner cases */
22691 if (opc == 0x0E/* FEMMS */ || opc == 0x77/* EMMS */) {
22692 if (sz != 4)
22693 goto decode_failure;
22694 do_EMMS_preamble();
22695 DIP("{f}emms\n");
22696 return delta;
22699 /* =-=-=-=-=-=-=-=-= SSE2ery =-=-=-=-=-=-=-=-= */
22700 /* Perhaps it's an SSE or SSE2 instruction. We can try this
22701 without checking the guest hwcaps because SSE2 is a baseline
22702 facility in 64 bit mode. */
22704 Bool decode_OK = False;
22705 delta = dis_ESC_0F__SSE2 ( &decode_OK,
22706 archinfo, vbi, pfx, sz, deltaIN, dres );
22707 if (decode_OK)
22708 return delta;
22711 /* =-=-=-=-=-=-=-=-= SSE3ery =-=-=-=-=-=-=-=-= */
22712 /* Perhaps it's a SSE3 instruction. FIXME: check guest hwcaps
22713 first. */
22715 Bool decode_OK = False;
22716 delta = dis_ESC_0F__SSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
22717 if (decode_OK)
22718 return delta;
22721 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22722 /* Perhaps it's a SSE4 instruction. FIXME: check guest hwcaps
22723 first. */
22725 Bool decode_OK = False;
22726 delta = dis_ESC_0F__SSE4 ( &decode_OK,
22727 archinfo, vbi, pfx, sz, deltaIN );
22728 if (decode_OK)
22729 return delta;
22732 decode_failure:
22733 return deltaIN; /* fail */
22737 /*------------------------------------------------------------*/
22738 /*--- ---*/
22739 /*--- Top-level post-escape decoders: dis_ESC_0F38 ---*/
22740 /*--- ---*/
22741 /*------------------------------------------------------------*/
22743 __attribute__((noinline))
22744 static
22745 Long dis_ESC_0F38 (
22746 /*MB_OUT*/DisResult* dres,
22747 const VexArchInfo* archinfo,
22748 const VexAbiInfo* vbi,
22749 Prefix pfx, Int sz, Long deltaIN
22752 Long delta = deltaIN;
22753 UChar opc = getUChar(delta);
22754 delta++;
22755 switch (opc) {
22757 case 0xF0: /* 0F 38 F0 = MOVBE m16/32/64(E), r16/32/64(G) */
22758 case 0xF1: { /* 0F 38 F1 = MOVBE r16/32/64(G), m16/32/64(E) */
22759 if (!haveF2orF3(pfx) && !haveVEX(pfx)
22760 && (sz == 2 || sz == 4 || sz == 8)) {
22761 IRTemp addr = IRTemp_INVALID;
22762 UChar modrm = 0;
22763 Int alen = 0;
22764 HChar dis_buf[50];
22765 modrm = getUChar(delta);
22766 if (epartIsReg(modrm)) break;
22767 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22768 delta += alen;
22769 IRType ty = szToITy(sz);
22770 IRTemp src = newTemp(ty);
22771 if (opc == 0xF0) { /* LOAD */
22772 assign(src, loadLE(ty, mkexpr(addr)));
22773 IRTemp dst = math_BSWAP(src, ty);
22774 putIRegG(sz, pfx, modrm, mkexpr(dst));
22775 DIP("movbe %s,%s\n", dis_buf, nameIRegG(sz, pfx, modrm));
22776 } else { /* STORE */
22777 assign(src, getIRegG(sz, pfx, modrm));
22778 IRTemp dst = math_BSWAP(src, ty);
22779 storeLE(mkexpr(addr), mkexpr(dst));
22780 DIP("movbe %s,%s\n", nameIRegG(sz, pfx, modrm), dis_buf);
22782 return delta;
22784 /* else fall through; maybe one of the decoders below knows what
22785 it is. */
22786 break;
22789 default:
22790 break;
22793 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
22794 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
22795 rather than proceeding indiscriminately. */
22797 Bool decode_OK = False;
22798 delta = dis_ESC_0F38__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
22799 if (decode_OK)
22800 return delta;
22803 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22804 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
22805 rather than proceeding indiscriminately. */
22807 Bool decode_OK = False;
22808 delta = dis_ESC_0F38__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN );
22809 if (decode_OK)
22810 return delta;
22813 /* Ignore previous decode attempts and restart from the beginning of
22814 the instruction. */
22815 delta = deltaIN;
22816 opc = getUChar(delta);
22817 delta++;
22819 switch (opc) {
22821 case 0xF6: {
22822 /* 66 0F 38 F6 = ADCX r32/64(G), m32/64(E) */
22823 /* F3 0F 38 F6 = ADOX r32/64(G), m32/64(E) */
22824 /* These were introduced in Broadwell. Gate them on AVX so as to at
22825 least reject them on earlier guests. Has no host requirements. */
22826 if (have66noF2noF3(pfx) && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
22827 if (sz == 2) {
22828 sz = 4; /* 66 prefix but operand size is 4/8 */
22830 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagCarryX, True,
22831 sz, delta, "adcx" );
22832 return delta;
22834 if (haveF3no66noF2(pfx) && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
22835 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagOverX, True,
22836 sz, delta, "adox" );
22837 return delta;
22839 /* else fall through */
22840 break;
22843 default:
22844 break;
22847 /*decode_failure:*/
22848 return deltaIN; /* fail */
22852 /*------------------------------------------------------------*/
22853 /*--- ---*/
22854 /*--- Top-level post-escape decoders: dis_ESC_0F3A ---*/
22855 /*--- ---*/
22856 /*------------------------------------------------------------*/
22858 __attribute__((noinline))
22859 static
22860 Long dis_ESC_0F3A (
22861 /*MB_OUT*/DisResult* dres,
22862 const VexArchInfo* archinfo,
22863 const VexAbiInfo* vbi,
22864 Prefix pfx, Int sz, Long deltaIN
22867 Long delta = deltaIN;
22868 UChar opc = getUChar(delta);
22869 delta++;
22870 switch (opc) {
22872 default:
22873 break;
22877 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
22878 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
22879 rather than proceeding indiscriminately. */
22881 Bool decode_OK = False;
22882 delta = dis_ESC_0F3A__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
22883 if (decode_OK)
22884 return delta;
22887 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22888 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
22889 rather than proceeding indiscriminately. */
22891 Bool decode_OK = False;
22892 delta = dis_ESC_0F3A__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN );
22893 if (decode_OK)
22894 return delta;
22897 return deltaIN; /* fail */
22901 /*------------------------------------------------------------*/
22902 /*--- ---*/
22903 /*--- Top-level post-escape decoders: dis_ESC_0F__VEX ---*/
22904 /*--- ---*/
22905 /*------------------------------------------------------------*/
22907 /* FIXME: common up with the _256_ version below? */
22908 static
22909 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG (
22910 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
22911 Prefix pfx, Long delta, const HChar* name,
22912 /* The actual operation. Use either 'op' or 'opfn',
22913 but not both. */
22914 IROp op, IRTemp(*opFn)(IRTemp,IRTemp),
22915 Bool invertLeftArg,
22916 Bool swapArgs
22919 UChar modrm = getUChar(delta);
22920 UInt rD = gregOfRexRM(pfx, modrm);
22921 UInt rSL = getVexNvvvv(pfx);
22922 IRTemp tSL = newTemp(Ity_V128);
22923 IRTemp tSR = newTemp(Ity_V128);
22924 IRTemp addr = IRTemp_INVALID;
22925 HChar dis_buf[50];
22926 Int alen = 0;
22927 vassert(0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*WIG?*/);
22929 assign(tSL, invertLeftArg ? unop(Iop_NotV128, getXMMReg(rSL))
22930 : getXMMReg(rSL));
22932 if (epartIsReg(modrm)) {
22933 UInt rSR = eregOfRexRM(pfx, modrm);
22934 delta += 1;
22935 assign(tSR, getXMMReg(rSR));
22936 DIP("%s %s,%s,%s\n",
22937 name, nameXMMReg(rSR), nameXMMReg(rSL), nameXMMReg(rD));
22938 } else {
22939 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
22940 delta += alen;
22941 assign(tSR, loadLE(Ity_V128, mkexpr(addr)));
22942 DIP("%s %s,%s,%s\n",
22943 name, dis_buf, nameXMMReg(rSL), nameXMMReg(rD));
22946 IRTemp res = IRTemp_INVALID;
22947 if (op != Iop_INVALID) {
22948 vassert(opFn == NULL);
22949 res = newTemp(Ity_V128);
22950 if (requiresRMode(op)) {
22951 IRTemp rm = newTemp(Ity_I32);
22952 assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
22953 assign(res, swapArgs
22954 ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL))
22955 : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR)));
22956 } else {
22957 assign(res, swapArgs
22958 ? binop(op, mkexpr(tSR), mkexpr(tSL))
22959 : binop(op, mkexpr(tSL), mkexpr(tSR)));
22961 } else {
22962 vassert(opFn != NULL);
22963 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR);
22966 putYMMRegLoAndZU(rD, mkexpr(res));
22968 *uses_vvvv = True;
22969 return delta;
22973 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, with a simple IROp
22974 for the operation, no inversion of the left arg, and no swapping of
22975 args. */
22976 static
22977 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple (
22978 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
22979 Prefix pfx, Long delta, const HChar* name,
22980 IROp op
22983 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22984 uses_vvvv, vbi, pfx, delta, name, op, NULL, False, False);
22988 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, using the given IR
22989 generator to compute the result, no inversion of the left
22990 arg, and no swapping of args. */
22991 static
22992 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex (
22993 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
22994 Prefix pfx, Long delta, const HChar* name,
22995 IRTemp(*opFn)(IRTemp,IRTemp)
22998 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22999 uses_vvvv, vbi, pfx, delta, name,
23000 Iop_INVALID, opFn, False, False );
23004 /* Vector by scalar shift of V by the amount specified at the bottom
23005 of E. */
23006 static ULong dis_AVX128_shiftV_byE ( const VexAbiInfo* vbi,
23007 Prefix pfx, Long delta,
23008 const HChar* opname, IROp op )
23010 HChar dis_buf[50];
23011 Int alen, size;
23012 IRTemp addr;
23013 Bool shl, shr, sar;
23014 UChar modrm = getUChar(delta);
23015 UInt rG = gregOfRexRM(pfx,modrm);
23016 UInt rV = getVexNvvvv(pfx);;
23017 IRTemp g0 = newTemp(Ity_V128);
23018 IRTemp g1 = newTemp(Ity_V128);
23019 IRTemp amt = newTemp(Ity_I64);
23020 IRTemp amt8 = newTemp(Ity_I8);
23021 if (epartIsReg(modrm)) {
23022 UInt rE = eregOfRexRM(pfx,modrm);
23023 assign( amt, getXMMRegLane64(rE, 0) );
23024 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE),
23025 nameXMMReg(rV), nameXMMReg(rG) );
23026 delta++;
23027 } else {
23028 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23029 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
23030 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
23031 delta += alen;
23033 assign( g0, getXMMReg(rV) );
23034 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
23036 shl = shr = sar = False;
23037 size = 0;
23038 switch (op) {
23039 case Iop_ShlN16x8: shl = True; size = 32; break;
23040 case Iop_ShlN32x4: shl = True; size = 32; break;
23041 case Iop_ShlN64x2: shl = True; size = 64; break;
23042 case Iop_SarN16x8: sar = True; size = 16; break;
23043 case Iop_SarN32x4: sar = True; size = 32; break;
23044 case Iop_ShrN16x8: shr = True; size = 16; break;
23045 case Iop_ShrN32x4: shr = True; size = 32; break;
23046 case Iop_ShrN64x2: shr = True; size = 64; break;
23047 default: vassert(0);
23050 if (shl || shr) {
23051 assign(
23053 IRExpr_ITE(
23054 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
23055 binop(op, mkexpr(g0), mkexpr(amt8)),
23056 mkV128(0x0000)
23059 } else
23060 if (sar) {
23061 assign(
23063 IRExpr_ITE(
23064 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
23065 binop(op, mkexpr(g0), mkexpr(amt8)),
23066 binop(op, mkexpr(g0), mkU8(size-1))
23069 } else {
23070 vassert(0);
23073 putYMMRegLoAndZU( rG, mkexpr(g1) );
23074 return delta;
23078 /* Vector by scalar shift of V by the amount specified at the bottom
23079 of E. */
23080 static ULong dis_AVX256_shiftV_byE ( const VexAbiInfo* vbi,
23081 Prefix pfx, Long delta,
23082 const HChar* opname, IROp op )
23084 HChar dis_buf[50];
23085 Int alen, size;
23086 IRTemp addr;
23087 Bool shl, shr, sar;
23088 UChar modrm = getUChar(delta);
23089 UInt rG = gregOfRexRM(pfx,modrm);
23090 UInt rV = getVexNvvvv(pfx);;
23091 IRTemp g0 = newTemp(Ity_V256);
23092 IRTemp g1 = newTemp(Ity_V256);
23093 IRTemp amt = newTemp(Ity_I64);
23094 IRTemp amt8 = newTemp(Ity_I8);
23095 if (epartIsReg(modrm)) {
23096 UInt rE = eregOfRexRM(pfx,modrm);
23097 assign( amt, getXMMRegLane64(rE, 0) );
23098 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE),
23099 nameYMMReg(rV), nameYMMReg(rG) );
23100 delta++;
23101 } else {
23102 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23103 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
23104 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
23105 delta += alen;
23107 assign( g0, getYMMReg(rV) );
23108 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
23110 shl = shr = sar = False;
23111 size = 0;
23112 switch (op) {
23113 case Iop_ShlN16x16: shl = True; size = 32; break;
23114 case Iop_ShlN32x8: shl = True; size = 32; break;
23115 case Iop_ShlN64x4: shl = True; size = 64; break;
23116 case Iop_SarN16x16: sar = True; size = 16; break;
23117 case Iop_SarN32x8: sar = True; size = 32; break;
23118 case Iop_ShrN16x16: shr = True; size = 16; break;
23119 case Iop_ShrN32x8: shr = True; size = 32; break;
23120 case Iop_ShrN64x4: shr = True; size = 64; break;
23121 default: vassert(0);
23124 if (shl || shr) {
23125 assign(
23127 IRExpr_ITE(
23128 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
23129 binop(op, mkexpr(g0), mkexpr(amt8)),
23130 binop(Iop_V128HLtoV256, mkV128(0), mkV128(0))
23133 } else
23134 if (sar) {
23135 assign(
23137 IRExpr_ITE(
23138 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
23139 binop(op, mkexpr(g0), mkexpr(amt8)),
23140 binop(op, mkexpr(g0), mkU8(size-1))
23143 } else {
23144 vassert(0);
23147 putYMMReg( rG, mkexpr(g1) );
23148 return delta;
23152 /* Vector by vector shift of V by the amount specified at the bottom
23153 of E. Vector by vector shifts are defined for all shift amounts,
23154 so not using Iop_S*x* here (and SSE2 doesn't support variable shifts
23155 anyway). */
23156 static ULong dis_AVX_var_shiftV_byE ( const VexAbiInfo* vbi,
23157 Prefix pfx, Long delta,
23158 const HChar* opname, IROp op, Bool isYMM )
23160 HChar dis_buf[50];
23161 Int alen, size, i;
23162 IRTemp addr;
23163 UChar modrm = getUChar(delta);
23164 UInt rG = gregOfRexRM(pfx,modrm);
23165 UInt rV = getVexNvvvv(pfx);;
23166 IRTemp sV = isYMM ? newTemp(Ity_V256) : newTemp(Ity_V128);
23167 IRTemp amt = isYMM ? newTemp(Ity_V256) : newTemp(Ity_V128);
23168 IRTemp amts[8], sVs[8], res[8];
23169 if (epartIsReg(modrm)) {
23170 UInt rE = eregOfRexRM(pfx,modrm);
23171 assign( amt, isYMM ? getYMMReg(rE) : getXMMReg(rE) );
23172 if (isYMM) {
23173 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rE),
23174 nameYMMReg(rV), nameYMMReg(rG) );
23175 } else {
23176 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE),
23177 nameXMMReg(rV), nameXMMReg(rG) );
23179 delta++;
23180 } else {
23181 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23182 assign( amt, loadLE(isYMM ? Ity_V256 : Ity_V128, mkexpr(addr)) );
23183 if (isYMM) {
23184 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV),
23185 nameYMMReg(rG) );
23186 } else {
23187 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV),
23188 nameXMMReg(rG) );
23190 delta += alen;
23192 assign( sV, isYMM ? getYMMReg(rV) : getXMMReg(rV) );
23194 size = 0;
23195 switch (op) {
23196 case Iop_Shl32: size = 32; break;
23197 case Iop_Shl64: size = 64; break;
23198 case Iop_Sar32: size = 32; break;
23199 case Iop_Shr32: size = 32; break;
23200 case Iop_Shr64: size = 64; break;
23201 default: vassert(0);
23204 for (i = 0; i < 8; i++) {
23205 sVs[i] = IRTemp_INVALID;
23206 amts[i] = IRTemp_INVALID;
23208 switch (size) {
23209 case 32:
23210 if (isYMM) {
23211 breakupV256to32s( sV, &sVs[7], &sVs[6], &sVs[5], &sVs[4],
23212 &sVs[3], &sVs[2], &sVs[1], &sVs[0] );
23213 breakupV256to32s( amt, &amts[7], &amts[6], &amts[5], &amts[4],
23214 &amts[3], &amts[2], &amts[1], &amts[0] );
23215 } else {
23216 breakupV128to32s( sV, &sVs[3], &sVs[2], &sVs[1], &sVs[0] );
23217 breakupV128to32s( amt, &amts[3], &amts[2], &amts[1], &amts[0] );
23219 break;
23220 case 64:
23221 if (isYMM) {
23222 breakupV256to64s( sV, &sVs[3], &sVs[2], &sVs[1], &sVs[0] );
23223 breakupV256to64s( amt, &amts[3], &amts[2], &amts[1], &amts[0] );
23224 } else {
23225 breakupV128to64s( sV, &sVs[1], &sVs[0] );
23226 breakupV128to64s( amt, &amts[1], &amts[0] );
23228 break;
23229 default: vassert(0);
23231 for (i = 0; i < 8; i++)
23232 if (sVs[i] != IRTemp_INVALID) {
23233 res[i] = size == 32 ? newTemp(Ity_I32) : newTemp(Ity_I64);
23234 assign( res[i],
23235 IRExpr_ITE(
23236 binop(size == 32 ? Iop_CmpLT32U : Iop_CmpLT64U,
23237 mkexpr(amts[i]),
23238 size == 32 ? mkU32(size) : mkU64(size)),
23239 binop(op, mkexpr(sVs[i]),
23240 unop(size == 32 ? Iop_32to8 : Iop_64to8,
23241 mkexpr(amts[i]))),
23242 op == Iop_Sar32 ? binop(op, mkexpr(sVs[i]), mkU8(size-1))
23243 : size == 32 ? mkU32(0) : mkU64(0)
23246 switch (size) {
23247 case 32:
23248 for (i = 0; i < 8; i++)
23249 putYMMRegLane32( rG, i, (i < 4 || isYMM)
23250 ? mkexpr(res[i]) : mkU32(0) );
23251 break;
23252 case 64:
23253 for (i = 0; i < 4; i++)
23254 putYMMRegLane64( rG, i, (i < 2 || isYMM)
23255 ? mkexpr(res[i]) : mkU64(0) );
23256 break;
23257 default: vassert(0);
23260 return delta;
23264 /* Vector by scalar shift of E into V, by an immediate byte. Modified
23265 version of dis_SSE_shiftE_imm. */
23266 static
23267 Long dis_AVX128_shiftE_to_V_imm( Prefix pfx,
23268 Long delta, const HChar* opname, IROp op )
23270 Bool shl, shr, sar;
23271 UChar rm = getUChar(delta);
23272 IRTemp e0 = newTemp(Ity_V128);
23273 IRTemp e1 = newTemp(Ity_V128);
23274 UInt rD = getVexNvvvv(pfx);
23275 UChar amt, size;
23276 vassert(epartIsReg(rm));
23277 vassert(gregLO3ofRM(rm) == 2
23278 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
23279 amt = getUChar(delta+1);
23280 delta += 2;
23281 DIP("%s $%d,%s,%s\n", opname,
23282 (Int)amt,
23283 nameXMMReg(eregOfRexRM(pfx,rm)),
23284 nameXMMReg(rD));
23285 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) );
23287 shl = shr = sar = False;
23288 size = 0;
23289 switch (op) {
23290 case Iop_ShlN16x8: shl = True; size = 16; break;
23291 case Iop_ShlN32x4: shl = True; size = 32; break;
23292 case Iop_ShlN64x2: shl = True; size = 64; break;
23293 case Iop_SarN16x8: sar = True; size = 16; break;
23294 case Iop_SarN32x4: sar = True; size = 32; break;
23295 case Iop_ShrN16x8: shr = True; size = 16; break;
23296 case Iop_ShrN32x4: shr = True; size = 32; break;
23297 case Iop_ShrN64x2: shr = True; size = 64; break;
23298 default: vassert(0);
23301 if (shl || shr) {
23302 assign( e1, amt >= size
23303 ? mkV128(0x0000)
23304 : binop(op, mkexpr(e0), mkU8(amt))
23306 } else
23307 if (sar) {
23308 assign( e1, amt >= size
23309 ? binop(op, mkexpr(e0), mkU8(size-1))
23310 : binop(op, mkexpr(e0), mkU8(amt))
23312 } else {
23313 vassert(0);
23316 putYMMRegLoAndZU( rD, mkexpr(e1) );
23317 return delta;
23321 /* Vector by scalar shift of E into V, by an immediate byte. Modified
23322 version of dis_AVX128_shiftE_to_V_imm. */
23323 static
23324 Long dis_AVX256_shiftE_to_V_imm( Prefix pfx,
23325 Long delta, const HChar* opname, IROp op )
23327 Bool shl, shr, sar;
23328 UChar rm = getUChar(delta);
23329 IRTemp e0 = newTemp(Ity_V256);
23330 IRTemp e1 = newTemp(Ity_V256);
23331 UInt rD = getVexNvvvv(pfx);
23332 UChar amt, size;
23333 vassert(epartIsReg(rm));
23334 vassert(gregLO3ofRM(rm) == 2
23335 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
23336 amt = getUChar(delta+1);
23337 delta += 2;
23338 DIP("%s $%d,%s,%s\n", opname,
23339 (Int)amt,
23340 nameYMMReg(eregOfRexRM(pfx,rm)),
23341 nameYMMReg(rD));
23342 assign( e0, getYMMReg(eregOfRexRM(pfx,rm)) );
23344 shl = shr = sar = False;
23345 size = 0;
23346 switch (op) {
23347 case Iop_ShlN16x16: shl = True; size = 16; break;
23348 case Iop_ShlN32x8: shl = True; size = 32; break;
23349 case Iop_ShlN64x4: shl = True; size = 64; break;
23350 case Iop_SarN16x16: sar = True; size = 16; break;
23351 case Iop_SarN32x8: sar = True; size = 32; break;
23352 case Iop_ShrN16x16: shr = True; size = 16; break;
23353 case Iop_ShrN32x8: shr = True; size = 32; break;
23354 case Iop_ShrN64x4: shr = True; size = 64; break;
23355 default: vassert(0);
23359 if (shl || shr) {
23360 assign( e1, amt >= size
23361 ? binop(Iop_V128HLtoV256, mkV128(0), mkV128(0))
23362 : binop(op, mkexpr(e0), mkU8(amt))
23364 } else
23365 if (sar) {
23366 assign( e1, amt >= size
23367 ? binop(op, mkexpr(e0), mkU8(size-1))
23368 : binop(op, mkexpr(e0), mkU8(amt))
23370 } else {
23371 vassert(0);
23374 putYMMReg( rD, mkexpr(e1) );
23375 return delta;
23379 /* Lower 64-bit lane only AVX128 binary operation:
23380 G[63:0] = V[63:0] `op` E[63:0]
23381 G[127:64] = V[127:64]
23382 G[255:128] = 0.
23383 The specified op must be of the 64F0x2 kind, so that it
23384 copies the upper half of the left operand to the result.
23386 static Long dis_AVX128_E_V_to_G_lo64 ( /*OUT*/Bool* uses_vvvv,
23387 const VexAbiInfo* vbi,
23388 Prefix pfx, Long delta,
23389 const HChar* opname, IROp op )
23391 HChar dis_buf[50];
23392 Int alen;
23393 IRTemp addr;
23394 UChar rm = getUChar(delta);
23395 UInt rG = gregOfRexRM(pfx,rm);
23396 UInt rV = getVexNvvvv(pfx);
23397 IRExpr* vpart = getXMMReg(rV);
23398 if (epartIsReg(rm)) {
23399 UInt rE = eregOfRexRM(pfx,rm);
23400 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) );
23401 DIP("%s %s,%s,%s\n", opname,
23402 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23403 delta = delta+1;
23404 } else {
23405 /* We can only do a 64-bit memory read, so the upper half of the
23406 E operand needs to be made simply of zeroes. */
23407 IRTemp epart = newTemp(Ity_V128);
23408 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23409 assign( epart, unop( Iop_64UtoV128,
23410 loadLE(Ity_I64, mkexpr(addr))) );
23411 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) );
23412 DIP("%s %s,%s,%s\n", opname,
23413 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23414 delta = delta+alen;
23416 putYMMRegLane128( rG, 1, mkV128(0) );
23417 *uses_vvvv = True;
23418 return delta;
23422 /* Lower 64-bit lane only AVX128 unary operation:
23423 G[63:0] = op(E[63:0])
23424 G[127:64] = V[127:64]
23425 G[255:128] = 0
23426 The specified op must be of the 64F0x2 kind, so that it
23427 copies the upper half of the operand to the result.
23429 static Long dis_AVX128_E_V_to_G_lo64_unary ( /*OUT*/Bool* uses_vvvv,
23430 const VexAbiInfo* vbi,
23431 Prefix pfx, Long delta,
23432 const HChar* opname, IROp op )
23434 HChar dis_buf[50];
23435 Int alen;
23436 IRTemp addr;
23437 UChar rm = getUChar(delta);
23438 UInt rG = gregOfRexRM(pfx,rm);
23439 UInt rV = getVexNvvvv(pfx);
23440 IRTemp e64 = newTemp(Ity_I64);
23442 /* Fetch E[63:0] */
23443 if (epartIsReg(rm)) {
23444 UInt rE = eregOfRexRM(pfx,rm);
23445 assign(e64, getXMMRegLane64(rE, 0));
23446 DIP("%s %s,%s,%s\n", opname,
23447 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23448 delta += 1;
23449 } else {
23450 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23451 assign(e64, loadLE(Ity_I64, mkexpr(addr)));
23452 DIP("%s %s,%s,%s\n", opname,
23453 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23454 delta += alen;
23457 /* Create a value 'arg' as V[127:64]++E[63:0] */
23458 IRTemp arg = newTemp(Ity_V128);
23459 assign(arg,
23460 binop(Iop_SetV128lo64,
23461 getXMMReg(rV), mkexpr(e64)));
23462 /* and apply op to it */
23463 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) );
23464 *uses_vvvv = True;
23465 return delta;
23469 /* Lower 32-bit lane only AVX128 unary operation:
23470 G[31:0] = op(E[31:0])
23471 G[127:32] = V[127:32]
23472 G[255:128] = 0
23473 The specified op must be of the 32F0x4 kind, so that it
23474 copies the upper 3/4 of the operand to the result.
23476 static Long dis_AVX128_E_V_to_G_lo32_unary ( /*OUT*/Bool* uses_vvvv,
23477 const VexAbiInfo* vbi,
23478 Prefix pfx, Long delta,
23479 const HChar* opname, IROp op )
23481 HChar dis_buf[50];
23482 Int alen;
23483 IRTemp addr;
23484 UChar rm = getUChar(delta);
23485 UInt rG = gregOfRexRM(pfx,rm);
23486 UInt rV = getVexNvvvv(pfx);
23487 IRTemp e32 = newTemp(Ity_I32);
23489 /* Fetch E[31:0] */
23490 if (epartIsReg(rm)) {
23491 UInt rE = eregOfRexRM(pfx,rm);
23492 assign(e32, getXMMRegLane32(rE, 0));
23493 DIP("%s %s,%s,%s\n", opname,
23494 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23495 delta += 1;
23496 } else {
23497 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23498 assign(e32, loadLE(Ity_I32, mkexpr(addr)));
23499 DIP("%s %s,%s,%s\n", opname,
23500 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23501 delta += alen;
23504 /* Create a value 'arg' as V[127:32]++E[31:0] */
23505 IRTemp arg = newTemp(Ity_V128);
23506 assign(arg,
23507 binop(Iop_SetV128lo32,
23508 getXMMReg(rV), mkexpr(e32)));
23509 /* and apply op to it */
23510 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) );
23511 *uses_vvvv = True;
23512 return delta;
23516 /* Lower 32-bit lane only AVX128 binary operation:
23517 G[31:0] = V[31:0] `op` E[31:0]
23518 G[127:32] = V[127:32]
23519 G[255:128] = 0.
23520 The specified op must be of the 32F0x4 kind, so that it
23521 copies the upper 3/4 of the left operand to the result.
23523 static Long dis_AVX128_E_V_to_G_lo32 ( /*OUT*/Bool* uses_vvvv,
23524 const VexAbiInfo* vbi,
23525 Prefix pfx, Long delta,
23526 const HChar* opname, IROp op )
23528 HChar dis_buf[50];
23529 Int alen;
23530 IRTemp addr;
23531 UChar rm = getUChar(delta);
23532 UInt rG = gregOfRexRM(pfx,rm);
23533 UInt rV = getVexNvvvv(pfx);
23534 IRExpr* vpart = getXMMReg(rV);
23535 if (epartIsReg(rm)) {
23536 UInt rE = eregOfRexRM(pfx,rm);
23537 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) );
23538 DIP("%s %s,%s,%s\n", opname,
23539 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23540 delta = delta+1;
23541 } else {
23542 /* We can only do a 32-bit memory read, so the upper 3/4 of the
23543 E operand needs to be made simply of zeroes. */
23544 IRTemp epart = newTemp(Ity_V128);
23545 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23546 assign( epart, unop( Iop_32UtoV128,
23547 loadLE(Ity_I32, mkexpr(addr))) );
23548 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) );
23549 DIP("%s %s,%s,%s\n", opname,
23550 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23551 delta = delta+alen;
23553 putYMMRegLane128( rG, 1, mkV128(0) );
23554 *uses_vvvv = True;
23555 return delta;
23559 /* All-lanes AVX128 binary operation:
23560 G[127:0] = V[127:0] `op` E[127:0]
23561 G[255:128] = 0.
23563 static Long dis_AVX128_E_V_to_G ( /*OUT*/Bool* uses_vvvv,
23564 const VexAbiInfo* vbi,
23565 Prefix pfx, Long delta,
23566 const HChar* opname, IROp op )
23568 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
23569 uses_vvvv, vbi, pfx, delta, opname, op,
23570 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/
23575 /* Handles AVX128 32F/64F comparisons. A derivative of
23576 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
23577 original delta to indicate failure. */
23578 static
23579 Long dis_AVX128_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv,
23580 const VexAbiInfo* vbi,
23581 Prefix pfx, Long delta,
23582 const HChar* opname, Bool all_lanes, Int sz )
23584 vassert(sz == 4 || sz == 8);
23585 Long deltaIN = delta;
23586 HChar dis_buf[50];
23587 Int alen;
23588 UInt imm8;
23589 IRTemp addr;
23590 Bool preZero = False;
23591 Bool preSwap = False;
23592 IROp op = Iop_INVALID;
23593 Bool postNot = False;
23594 IRTemp plain = newTemp(Ity_V128);
23595 UChar rm = getUChar(delta);
23596 UInt rG = gregOfRexRM(pfx, rm);
23597 UInt rV = getVexNvvvv(pfx);
23598 IRTemp argL = newTemp(Ity_V128);
23599 IRTemp argR = newTemp(Ity_V128);
23601 assign(argL, getXMMReg(rV));
23602 if (epartIsReg(rm)) {
23603 imm8 = getUChar(delta+1);
23604 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot,
23605 imm8, all_lanes, sz);
23606 if (!ok) return deltaIN; /* FAIL */
23607 UInt rE = eregOfRexRM(pfx,rm);
23608 assign(argR, getXMMReg(rE));
23609 delta += 1+1;
23610 DIP("%s $%u,%s,%s,%s\n",
23611 opname, imm8,
23612 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23613 } else {
23614 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
23615 imm8 = getUChar(delta+alen);
23616 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot,
23617 imm8, all_lanes, sz);
23618 if (!ok) return deltaIN; /* FAIL */
23619 assign(argR,
23620 all_lanes ? loadLE(Ity_V128, mkexpr(addr))
23621 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
23622 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr))));
23623 delta += alen+1;
23624 DIP("%s $%u,%s,%s,%s\n",
23625 opname, imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23628 IRTemp argMask = newTemp(Ity_V128);
23629 if (preZero) {
23630 // In this case, preSwap is irrelevant, but it's harmless to honour it
23631 // anyway.
23632 assign(argMask, mkV128(all_lanes ? 0x0000 : (sz==4 ? 0xFFF0 : 0xFF00)));
23633 } else {
23634 assign(argMask, mkV128(0xFFFF));
23637 assign(
23638 plain,
23639 preSwap ? binop(op, binop(Iop_AndV128, mkexpr(argR), mkexpr(argMask)),
23640 binop(Iop_AndV128, mkexpr(argL), mkexpr(argMask)))
23641 : binop(op, binop(Iop_AndV128, mkexpr(argL), mkexpr(argMask)),
23642 binop(Iop_AndV128, mkexpr(argR), mkexpr(argMask)))
23645 if (all_lanes) {
23646 /* This is simple: just invert the result, if necessary, and
23647 have done. */
23648 if (postNot) {
23649 putYMMRegLoAndZU( rG, unop(Iop_NotV128, mkexpr(plain)) );
23650 } else {
23651 putYMMRegLoAndZU( rG, mkexpr(plain) );
23654 else
23655 if (!preSwap) {
23656 /* More complex. It's a one-lane-only, hence need to possibly
23657 invert only that one lane. But at least the other lanes are
23658 correctly "in" the result, having been copied from the left
23659 operand (argL). */
23660 if (postNot) {
23661 IRExpr* mask = mkV128(sz==4 ? 0x000F : 0x00FF);
23662 putYMMRegLoAndZU( rG, binop(Iop_XorV128, mkexpr(plain),
23663 mask) );
23664 } else {
23665 putYMMRegLoAndZU( rG, mkexpr(plain) );
23668 else {
23669 /* This is the most complex case. One-lane-only, but the args
23670 were swapped. So we have to possibly invert the bottom lane,
23671 and (definitely) we have to copy the upper lane(s) from argL
23672 since, due to the swapping, what's currently there is from
23673 argR, which is not correct. */
23674 IRTemp res = newTemp(Ity_V128);
23675 IRTemp mask = newTemp(Ity_V128);
23676 IRTemp notMask = newTemp(Ity_V128);
23677 assign(mask, mkV128(sz==4 ? 0x000F : 0x00FF));
23678 assign(notMask, mkV128(sz==4 ? 0xFFF0 : 0xFF00));
23679 if (postNot) {
23680 assign(res,
23681 binop(Iop_OrV128,
23682 binop(Iop_AndV128,
23683 unop(Iop_NotV128, mkexpr(plain)),
23684 mkexpr(mask)),
23685 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask))));
23686 } else {
23687 assign(res,
23688 binop(Iop_OrV128,
23689 binop(Iop_AndV128,
23690 mkexpr(plain),
23691 mkexpr(mask)),
23692 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask))));
23694 putYMMRegLoAndZU( rG, mkexpr(res) );
23697 *uses_vvvv = True;
23698 return delta;
23702 /* Handles AVX256 32F/64F comparisons. A derivative of
23703 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
23704 original delta to indicate failure. */
23705 static
23706 Long dis_AVX256_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv,
23707 const VexAbiInfo* vbi,
23708 Prefix pfx, Long delta,
23709 const HChar* opname, Int sz )
23711 vassert(sz == 4 || sz == 8);
23712 Long deltaIN = delta;
23713 HChar dis_buf[50];
23714 Int alen;
23715 UInt imm8;
23716 IRTemp addr;
23717 Bool preZero = False;
23718 Bool preSwap = False;
23719 IROp op = Iop_INVALID;
23720 Bool postNot = False;
23721 IRTemp plain = newTemp(Ity_V256);
23722 UChar rm = getUChar(delta);
23723 UInt rG = gregOfRexRM(pfx, rm);
23724 UInt rV = getVexNvvvv(pfx);
23725 IRTemp argL = newTemp(Ity_V256);
23726 IRTemp argR = newTemp(Ity_V256);
23727 IRTemp argLhi = IRTemp_INVALID;
23728 IRTemp argLlo = IRTemp_INVALID;
23729 IRTemp argRhi = IRTemp_INVALID;
23730 IRTemp argRlo = IRTemp_INVALID;
23732 assign(argL, getYMMReg(rV));
23733 if (epartIsReg(rm)) {
23734 imm8 = getUChar(delta+1);
23735 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot, imm8,
23736 True/*all_lanes*/, sz);
23737 if (!ok) return deltaIN; /* FAIL */
23738 UInt rE = eregOfRexRM(pfx,rm);
23739 assign(argR, getYMMReg(rE));
23740 delta += 1+1;
23741 DIP("%s $%u,%s,%s,%s\n",
23742 opname, imm8,
23743 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
23744 } else {
23745 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
23746 imm8 = getUChar(delta+alen);
23747 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot, imm8,
23748 True/*all_lanes*/, sz);
23749 if (!ok) return deltaIN; /* FAIL */
23750 assign(argR, loadLE(Ity_V256, mkexpr(addr)) );
23751 delta += alen+1;
23752 DIP("%s $%u,%s,%s,%s\n",
23753 opname, imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
23756 breakupV256toV128s( preSwap ? argR : argL, &argLhi, &argLlo );
23757 breakupV256toV128s( preSwap ? argL : argR, &argRhi, &argRlo );
23759 IRTemp argMask = newTemp(Ity_V128);
23760 if (preZero) {
23761 // In this case, preSwap is irrelevant, but it's harmless to honour it
23762 // anyway.
23763 assign(argMask, mkV128(0x0000));
23764 } else {
23765 assign(argMask, mkV128(0xFFFF));
23768 assign(
23769 plain,
23770 binop( Iop_V128HLtoV256,
23771 binop(op, binop(Iop_AndV128, mkexpr(argLhi), mkexpr(argMask)),
23772 binop(Iop_AndV128, mkexpr(argRhi), mkexpr(argMask))),
23773 binop(op, binop(Iop_AndV128, mkexpr(argLlo), mkexpr(argMask)),
23774 binop(Iop_AndV128, mkexpr(argRlo), mkexpr(argMask))))
23777 /* This is simple: just invert the result, if necessary, and
23778 have done. */
23779 if (postNot) {
23780 putYMMReg( rG, unop(Iop_NotV256, mkexpr(plain)) );
23781 } else {
23782 putYMMReg( rG, mkexpr(plain) );
23785 *uses_vvvv = True;
23786 return delta;
23790 /* Handles AVX128 unary E-to-G all-lanes operations. */
23791 static
23792 Long dis_AVX128_E_to_G_unary ( /*OUT*/Bool* uses_vvvv,
23793 const VexAbiInfo* vbi,
23794 Prefix pfx, Long delta,
23795 const HChar* opname,
23796 IRTemp (*opFn)(IRTemp) )
23798 HChar dis_buf[50];
23799 Int alen;
23800 IRTemp addr;
23801 IRTemp res = newTemp(Ity_V128);
23802 IRTemp arg = newTemp(Ity_V128);
23803 UChar rm = getUChar(delta);
23804 UInt rG = gregOfRexRM(pfx, rm);
23805 if (epartIsReg(rm)) {
23806 UInt rE = eregOfRexRM(pfx,rm);
23807 assign(arg, getXMMReg(rE));
23808 delta += 1;
23809 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG));
23810 } else {
23811 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23812 assign(arg, loadLE(Ity_V128, mkexpr(addr)));
23813 delta += alen;
23814 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG));
23816 res = opFn(arg);
23817 putYMMRegLoAndZU( rG, mkexpr(res) );
23818 *uses_vvvv = False;
23819 return delta;
23823 /* Handles AVX128 unary E-to-G all-lanes operations. */
23824 static
23825 Long dis_AVX128_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv,
23826 const VexAbiInfo* vbi,
23827 Prefix pfx, Long delta,
23828 const HChar* opname, IROp op )
23830 HChar dis_buf[50];
23831 Int alen;
23832 IRTemp addr;
23833 IRTemp arg = newTemp(Ity_V128);
23834 UChar rm = getUChar(delta);
23835 UInt rG = gregOfRexRM(pfx, rm);
23836 if (epartIsReg(rm)) {
23837 UInt rE = eregOfRexRM(pfx,rm);
23838 assign(arg, getXMMReg(rE));
23839 delta += 1;
23840 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG));
23841 } else {
23842 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23843 assign(arg, loadLE(Ity_V128, mkexpr(addr)));
23844 delta += alen;
23845 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG));
23847 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
23848 // up in the usual way.
23849 Bool needsIRRM = op == Iop_Sqrt32Fx4 || op == Iop_Sqrt64Fx2;
23850 /* XXXROUNDINGFIXME */
23851 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), mkexpr(arg))
23852 : unop(op, mkexpr(arg));
23853 putYMMRegLoAndZU( rG, res );
23854 *uses_vvvv = False;
23855 return delta;
23859 /* FIXME: common up with the _128_ version above? */
23860 static
23861 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG (
23862 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
23863 Prefix pfx, Long delta, const HChar* name,
23864 /* The actual operation. Use either 'op' or 'opfn',
23865 but not both. */
23866 IROp op, IRTemp(*opFn)(IRTemp,IRTemp),
23867 Bool invertLeftArg,
23868 Bool swapArgs
23871 UChar modrm = getUChar(delta);
23872 UInt rD = gregOfRexRM(pfx, modrm);
23873 UInt rSL = getVexNvvvv(pfx);
23874 IRTemp tSL = newTemp(Ity_V256);
23875 IRTemp tSR = newTemp(Ity_V256);
23876 IRTemp addr = IRTemp_INVALID;
23877 HChar dis_buf[50];
23878 Int alen = 0;
23879 vassert(1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*WIG?*/);
23881 assign(tSL, invertLeftArg ? unop(Iop_NotV256, getYMMReg(rSL))
23882 : getYMMReg(rSL));
23884 if (epartIsReg(modrm)) {
23885 UInt rSR = eregOfRexRM(pfx, modrm);
23886 delta += 1;
23887 assign(tSR, getYMMReg(rSR));
23888 DIP("%s %s,%s,%s\n",
23889 name, nameYMMReg(rSR), nameYMMReg(rSL), nameYMMReg(rD));
23890 } else {
23891 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
23892 delta += alen;
23893 assign(tSR, loadLE(Ity_V256, mkexpr(addr)));
23894 DIP("%s %s,%s,%s\n",
23895 name, dis_buf, nameYMMReg(rSL), nameYMMReg(rD));
23898 IRTemp res = IRTemp_INVALID;
23899 if (op != Iop_INVALID) {
23900 vassert(opFn == NULL);
23901 res = newTemp(Ity_V256);
23902 if (requiresRMode(op)) {
23903 IRTemp rm = newTemp(Ity_I32);
23904 assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
23905 assign(res, swapArgs
23906 ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL))
23907 : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR)));
23908 } else {
23909 assign(res, swapArgs
23910 ? binop(op, mkexpr(tSR), mkexpr(tSL))
23911 : binop(op, mkexpr(tSL), mkexpr(tSR)));
23913 } else {
23914 vassert(opFn != NULL);
23915 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR);
23918 putYMMReg(rD, mkexpr(res));
23920 *uses_vvvv = True;
23921 return delta;
23925 /* All-lanes AVX256 binary operation:
23926 G[255:0] = V[255:0] `op` E[255:0]
23928 static Long dis_AVX256_E_V_to_G ( /*OUT*/Bool* uses_vvvv,
23929 const VexAbiInfo* vbi,
23930 Prefix pfx, Long delta,
23931 const HChar* opname, IROp op )
23933 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23934 uses_vvvv, vbi, pfx, delta, opname, op,
23935 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/
23940 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, with a simple IROp
23941 for the operation, no inversion of the left arg, and no swapping of
23942 args. */
23943 static
23944 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple (
23945 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
23946 Prefix pfx, Long delta, const HChar* name,
23947 IROp op
23950 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23951 uses_vvvv, vbi, pfx, delta, name, op, NULL, False, False);
23955 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, using the given IR
23956 generator to compute the result, no inversion of the left
23957 arg, and no swapping of args. */
23958 static
23959 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex (
23960 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
23961 Prefix pfx, Long delta, const HChar* name,
23962 IRTemp(*opFn)(IRTemp,IRTemp)
23965 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23966 uses_vvvv, vbi, pfx, delta, name,
23967 Iop_INVALID, opFn, False, False );
23971 /* Handles AVX256 unary E-to-G all-lanes operations. */
23972 static
23973 Long dis_AVX256_E_to_G_unary ( /*OUT*/Bool* uses_vvvv,
23974 const VexAbiInfo* vbi,
23975 Prefix pfx, Long delta,
23976 const HChar* opname,
23977 IRTemp (*opFn)(IRTemp) )
23979 HChar dis_buf[50];
23980 Int alen;
23981 IRTemp addr;
23982 IRTemp res = newTemp(Ity_V256);
23983 IRTemp arg = newTemp(Ity_V256);
23984 UChar rm = getUChar(delta);
23985 UInt rG = gregOfRexRM(pfx, rm);
23986 if (epartIsReg(rm)) {
23987 UInt rE = eregOfRexRM(pfx,rm);
23988 assign(arg, getYMMReg(rE));
23989 delta += 1;
23990 DIP("%s %s,%s\n", opname, nameYMMReg(rE), nameYMMReg(rG));
23991 } else {
23992 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23993 assign(arg, loadLE(Ity_V256, mkexpr(addr)));
23994 delta += alen;
23995 DIP("%s %s,%s\n", opname, dis_buf, nameYMMReg(rG));
23997 res = opFn(arg);
23998 putYMMReg( rG, mkexpr(res) );
23999 *uses_vvvv = False;
24000 return delta;
24004 /* Handles AVX256 unary E-to-G all-lanes operations. */
24005 static
24006 Long dis_AVX256_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv,
24007 const VexAbiInfo* vbi,
24008 Prefix pfx, Long delta,
24009 const HChar* opname, IROp op )
24011 HChar dis_buf[50];
24012 Int alen;
24013 IRTemp addr;
24014 IRTemp arg = newTemp(Ity_V256);
24015 UChar rm = getUChar(delta);
24016 UInt rG = gregOfRexRM(pfx, rm);
24017 if (epartIsReg(rm)) {
24018 UInt rE = eregOfRexRM(pfx,rm);
24019 assign(arg, getYMMReg(rE));
24020 delta += 1;
24021 DIP("%s %s,%s\n", opname, nameYMMReg(rE), nameYMMReg(rG));
24022 } else {
24023 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24024 assign(arg, loadLE(Ity_V256, mkexpr(addr)));
24025 delta += alen;
24026 DIP("%s %s,%s\n", opname, dis_buf, nameYMMReg(rG));
24028 putYMMReg( rG, unop(op, mkexpr(arg)) );
24029 *uses_vvvv = False;
24030 return delta;
24034 /* The use of ReinterpF64asI64 is ugly. Surely could do better if we
24035 had a variant of Iop_64x4toV256 that took F64s as args instead. */
24036 static Long dis_CVTDQ2PD_256 ( const VexAbiInfo* vbi, Prefix pfx,
24037 Long delta )
24039 IRTemp addr = IRTemp_INVALID;
24040 Int alen = 0;
24041 HChar dis_buf[50];
24042 UChar modrm = getUChar(delta);
24043 IRTemp sV = newTemp(Ity_V128);
24044 UInt rG = gregOfRexRM(pfx,modrm);
24045 if (epartIsReg(modrm)) {
24046 UInt rE = eregOfRexRM(pfx,modrm);
24047 assign( sV, getXMMReg(rE) );
24048 delta += 1;
24049 DIP("vcvtdq2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
24050 } else {
24051 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24052 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
24053 delta += alen;
24054 DIP("vcvtdq2pd %s,%s\n", dis_buf, nameYMMReg(rG) );
24056 IRTemp s3, s2, s1, s0;
24057 s3 = s2 = s1 = s0 = IRTemp_INVALID;
24058 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
24059 IRExpr* res
24060 = IRExpr_Qop(
24061 Iop_64x4toV256,
24062 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s3))),
24063 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s2))),
24064 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s1))),
24065 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s0)))
24067 putYMMReg(rG, res);
24068 return delta;
24072 static Long dis_CVTPD2PS_256 ( const VexAbiInfo* vbi, Prefix pfx,
24073 Long delta )
24075 IRTemp addr = IRTemp_INVALID;
24076 Int alen = 0;
24077 HChar dis_buf[50];
24078 UChar modrm = getUChar(delta);
24079 UInt rG = gregOfRexRM(pfx,modrm);
24080 IRTemp argV = newTemp(Ity_V256);
24081 IRTemp rmode = newTemp(Ity_I32);
24082 if (epartIsReg(modrm)) {
24083 UInt rE = eregOfRexRM(pfx,modrm);
24084 assign( argV, getYMMReg(rE) );
24085 delta += 1;
24086 DIP("vcvtpd2psy %s,%s\n", nameYMMReg(rE), nameXMMReg(rG));
24087 } else {
24088 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24089 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
24090 delta += alen;
24091 DIP("vcvtpd2psy %s,%s\n", dis_buf, nameXMMReg(rG) );
24094 assign( rmode, get_sse_roundingmode() );
24095 IRTemp t3, t2, t1, t0;
24096 t3 = t2 = t1 = t0 = IRTemp_INVALID;
24097 breakupV256to64s( argV, &t3, &t2, &t1, &t0 );
24098 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), \
24099 unop(Iop_ReinterpI64asF64, mkexpr(_t)) )
24100 putXMMRegLane32F( rG, 3, CVT(t3) );
24101 putXMMRegLane32F( rG, 2, CVT(t2) );
24102 putXMMRegLane32F( rG, 1, CVT(t1) );
24103 putXMMRegLane32F( rG, 0, CVT(t0) );
24104 # undef CVT
24105 putYMMRegLane128( rG, 1, mkV128(0) );
24106 return delta;
24110 static IRTemp math_VPUNPCK_YMM ( IRTemp tL, IRType tR, IROp op )
24112 IRTemp tLhi, tLlo, tRhi, tRlo;
24113 tLhi = tLlo = tRhi = tRlo = IRTemp_INVALID;
24114 IRTemp res = newTemp(Ity_V256);
24115 breakupV256toV128s( tL, &tLhi, &tLlo );
24116 breakupV256toV128s( tR, &tRhi, &tRlo );
24117 assign( res, binop( Iop_V128HLtoV256,
24118 binop( op, mkexpr(tRhi), mkexpr(tLhi) ),
24119 binop( op, mkexpr(tRlo), mkexpr(tLlo) ) ) );
24120 return res;
24124 static IRTemp math_VPUNPCKLBW_YMM ( IRTemp tL, IRTemp tR )
24126 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO8x16 );
24130 static IRTemp math_VPUNPCKLWD_YMM ( IRTemp tL, IRTemp tR )
24132 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO16x8 );
24136 static IRTemp math_VPUNPCKLDQ_YMM ( IRTemp tL, IRTemp tR )
24138 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO32x4 );
24142 static IRTemp math_VPUNPCKLQDQ_YMM ( IRTemp tL, IRTemp tR )
24144 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO64x2 );
24148 static IRTemp math_VPUNPCKHBW_YMM ( IRTemp tL, IRTemp tR )
24150 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI8x16 );
24154 static IRTemp math_VPUNPCKHWD_YMM ( IRTemp tL, IRTemp tR )
24156 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI16x8 );
24160 static IRTemp math_VPUNPCKHDQ_YMM ( IRTemp tL, IRTemp tR )
24162 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI32x4 );
24166 static IRTemp math_VPUNPCKHQDQ_YMM ( IRTemp tL, IRTemp tR )
24168 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI64x2 );
24172 static IRTemp math_VPACKSSWB_YMM ( IRTemp tL, IRTemp tR )
24174 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin16Sto8Sx16 );
24178 static IRTemp math_VPACKUSWB_YMM ( IRTemp tL, IRTemp tR )
24180 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin16Sto8Ux16 );
24184 static IRTemp math_VPACKSSDW_YMM ( IRTemp tL, IRTemp tR )
24186 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin32Sto16Sx8 );
24190 static IRTemp math_VPACKUSDW_YMM ( IRTemp tL, IRTemp tR )
24192 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin32Sto16Ux8 );
24196 __attribute__((noinline))
24197 static
24198 Long dis_ESC_0F__VEX (
24199 /*MB_OUT*/DisResult* dres,
24200 /*OUT*/ Bool* uses_vvvv,
24201 const VexArchInfo* archinfo,
24202 const VexAbiInfo* vbi,
24203 Prefix pfx, Int sz, Long deltaIN
24206 IRTemp addr = IRTemp_INVALID;
24207 Int alen = 0;
24208 HChar dis_buf[50];
24209 Long delta = deltaIN;
24210 UChar opc = getUChar(delta);
24211 delta++;
24212 *uses_vvvv = False;
24214 switch (opc) {
24216 case 0x10:
24217 /* VMOVSD m64, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
24218 /* Move 64 bits from E (mem only) to G (lo half xmm).
24219 Bits 255-64 of the dest are zeroed out. */
24220 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) {
24221 UChar modrm = getUChar(delta);
24222 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24223 UInt rG = gregOfRexRM(pfx,modrm);
24224 IRTemp z128 = newTemp(Ity_V128);
24225 assign(z128, mkV128(0));
24226 putXMMReg( rG, mkexpr(z128) );
24227 /* FIXME: ALIGNMENT CHECK? */
24228 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) );
24229 putYMMRegLane128( rG, 1, mkexpr(z128) );
24230 DIP("vmovsd %s,%s\n", dis_buf, nameXMMReg(rG));
24231 delta += alen;
24232 goto decode_success;
24234 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
24235 /* Reg form. */
24236 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) {
24237 UChar modrm = getUChar(delta);
24238 UInt rG = gregOfRexRM(pfx, modrm);
24239 UInt rE = eregOfRexRM(pfx, modrm);
24240 UInt rV = getVexNvvvv(pfx);
24241 delta++;
24242 DIP("vmovsd %s,%s,%s\n",
24243 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
24244 IRTemp res = newTemp(Ity_V128);
24245 assign(res, binop(Iop_64HLtoV128,
24246 getXMMRegLane64(rV, 1),
24247 getXMMRegLane64(rE, 0)));
24248 putYMMRegLoAndZU(rG, mkexpr(res));
24249 *uses_vvvv = True;
24250 goto decode_success;
24252 /* VMOVSS m32, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
24253 /* Move 32 bits from E (mem only) to G (lo half xmm).
24254 Bits 255-32 of the dest are zeroed out. */
24255 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) {
24256 UChar modrm = getUChar(delta);
24257 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24258 UInt rG = gregOfRexRM(pfx,modrm);
24259 IRTemp z128 = newTemp(Ity_V128);
24260 assign(z128, mkV128(0));
24261 putXMMReg( rG, mkexpr(z128) );
24262 /* FIXME: ALIGNMENT CHECK? */
24263 putXMMRegLane32( rG, 0, loadLE(Ity_I32, mkexpr(addr)) );
24264 putYMMRegLane128( rG, 1, mkexpr(z128) );
24265 DIP("vmovss %s,%s\n", dis_buf, nameXMMReg(rG));
24266 delta += alen;
24267 goto decode_success;
24269 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
24270 /* Reg form. */
24271 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) {
24272 UChar modrm = getUChar(delta);
24273 UInt rG = gregOfRexRM(pfx, modrm);
24274 UInt rE = eregOfRexRM(pfx, modrm);
24275 UInt rV = getVexNvvvv(pfx);
24276 delta++;
24277 DIP("vmovss %s,%s,%s\n",
24278 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
24279 IRTemp res = newTemp(Ity_V128);
24280 assign( res, binop( Iop_64HLtoV128,
24281 getXMMRegLane64(rV, 1),
24282 binop(Iop_32HLto64,
24283 getXMMRegLane32(rV, 1),
24284 getXMMRegLane32(rE, 0)) ) );
24285 putYMMRegLoAndZU(rG, mkexpr(res));
24286 *uses_vvvv = True;
24287 goto decode_success;
24289 /* VMOVUPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 10 /r */
24290 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24291 UChar modrm = getUChar(delta);
24292 UInt rG = gregOfRexRM(pfx, modrm);
24293 if (epartIsReg(modrm)) {
24294 UInt rE = eregOfRexRM(pfx,modrm);
24295 putYMMRegLoAndZU( rG, getXMMReg( rE ));
24296 DIP("vmovupd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
24297 delta += 1;
24298 } else {
24299 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24300 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
24301 DIP("vmovupd %s,%s\n", dis_buf, nameXMMReg(rG));
24302 delta += alen;
24304 goto decode_success;
24306 /* VMOVUPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 10 /r */
24307 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24308 UChar modrm = getUChar(delta);
24309 UInt rG = gregOfRexRM(pfx, modrm);
24310 if (epartIsReg(modrm)) {
24311 UInt rE = eregOfRexRM(pfx,modrm);
24312 putYMMReg( rG, getYMMReg( rE ));
24313 DIP("vmovupd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
24314 delta += 1;
24315 } else {
24316 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24317 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
24318 DIP("vmovupd %s,%s\n", dis_buf, nameYMMReg(rG));
24319 delta += alen;
24321 goto decode_success;
24323 /* VMOVUPS xmm2/m128, xmm1 = VEX.128.0F.WIG 10 /r */
24324 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24325 UChar modrm = getUChar(delta);
24326 UInt rG = gregOfRexRM(pfx, modrm);
24327 if (epartIsReg(modrm)) {
24328 UInt rE = eregOfRexRM(pfx,modrm);
24329 putYMMRegLoAndZU( rG, getXMMReg( rE ));
24330 DIP("vmovups %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
24331 delta += 1;
24332 } else {
24333 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24334 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
24335 DIP("vmovups %s,%s\n", dis_buf, nameXMMReg(rG));
24336 delta += alen;
24338 goto decode_success;
24340 /* VMOVUPS ymm2/m256, ymm1 = VEX.256.0F.WIG 10 /r */
24341 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24342 UChar modrm = getUChar(delta);
24343 UInt rG = gregOfRexRM(pfx, modrm);
24344 if (epartIsReg(modrm)) {
24345 UInt rE = eregOfRexRM(pfx,modrm);
24346 putYMMReg( rG, getYMMReg( rE ));
24347 DIP("vmovups %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
24348 delta += 1;
24349 } else {
24350 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24351 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
24352 DIP("vmovups %s,%s\n", dis_buf, nameYMMReg(rG));
24353 delta += alen;
24355 goto decode_success;
24357 break;
24359 case 0x11:
24360 /* VMOVSD xmm1, m64 = VEX.LIG.F2.0F.WIG 11 /r */
24361 /* Move 64 bits from G (low half xmm) to mem only. */
24362 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) {
24363 UChar modrm = getUChar(delta);
24364 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24365 UInt rG = gregOfRexRM(pfx,modrm);
24366 /* FIXME: ALIGNMENT CHECK? */
24367 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0));
24368 DIP("vmovsd %s,%s\n", nameXMMReg(rG), dis_buf);
24369 delta += alen;
24370 goto decode_success;
24372 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 11 /r */
24373 /* Reg form. */
24374 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) {
24375 UChar modrm = getUChar(delta);
24376 UInt rG = gregOfRexRM(pfx, modrm);
24377 UInt rE = eregOfRexRM(pfx, modrm);
24378 UInt rV = getVexNvvvv(pfx);
24379 delta++;
24380 DIP("vmovsd %s,%s,%s\n",
24381 nameXMMReg(rG), nameXMMReg(rV), nameXMMReg(rE));
24382 IRTemp res = newTemp(Ity_V128);
24383 assign(res, binop(Iop_64HLtoV128,
24384 getXMMRegLane64(rV, 1),
24385 getXMMRegLane64(rG, 0)));
24386 putYMMRegLoAndZU(rE, mkexpr(res));
24387 *uses_vvvv = True;
24388 goto decode_success;
24390 /* VMOVSS xmm1, m64 = VEX.LIG.F3.0F.WIG 11 /r */
24391 /* Move 32 bits from G (low 1/4 xmm) to mem only. */
24392 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) {
24393 UChar modrm = getUChar(delta);
24394 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24395 UInt rG = gregOfRexRM(pfx,modrm);
24396 /* FIXME: ALIGNMENT CHECK? */
24397 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0));
24398 DIP("vmovss %s,%s\n", nameXMMReg(rG), dis_buf);
24399 delta += alen;
24400 goto decode_success;
24402 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 11 /r */
24403 /* Reg form. */
24404 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) {
24405 UChar modrm = getUChar(delta);
24406 UInt rG = gregOfRexRM(pfx, modrm);
24407 UInt rE = eregOfRexRM(pfx, modrm);
24408 UInt rV = getVexNvvvv(pfx);
24409 delta++;
24410 DIP("vmovss %s,%s,%s\n",
24411 nameXMMReg(rG), nameXMMReg(rV), nameXMMReg(rE));
24412 IRTemp res = newTemp(Ity_V128);
24413 assign( res, binop( Iop_64HLtoV128,
24414 getXMMRegLane64(rV, 1),
24415 binop(Iop_32HLto64,
24416 getXMMRegLane32(rV, 1),
24417 getXMMRegLane32(rG, 0)) ) );
24418 putYMMRegLoAndZU(rE, mkexpr(res));
24419 *uses_vvvv = True;
24420 goto decode_success;
24422 /* VMOVUPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 11 /r */
24423 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24424 UChar modrm = getUChar(delta);
24425 UInt rG = gregOfRexRM(pfx,modrm);
24426 if (epartIsReg(modrm)) {
24427 UInt rE = eregOfRexRM(pfx,modrm);
24428 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24429 DIP("vmovupd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24430 delta += 1;
24431 } else {
24432 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24433 storeLE( mkexpr(addr), getXMMReg(rG) );
24434 DIP("vmovupd %s,%s\n", nameXMMReg(rG), dis_buf);
24435 delta += alen;
24437 goto decode_success;
24439 /* VMOVUPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 11 /r */
24440 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24441 UChar modrm = getUChar(delta);
24442 UInt rG = gregOfRexRM(pfx,modrm);
24443 if (epartIsReg(modrm)) {
24444 UInt rE = eregOfRexRM(pfx,modrm);
24445 putYMMReg( rE, getYMMReg(rG) );
24446 DIP("vmovupd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
24447 delta += 1;
24448 } else {
24449 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24450 storeLE( mkexpr(addr), getYMMReg(rG) );
24451 DIP("vmovupd %s,%s\n", nameYMMReg(rG), dis_buf);
24452 delta += alen;
24454 goto decode_success;
24456 /* VMOVUPS xmm1, xmm2/m128 = VEX.128.0F.WIG 11 /r */
24457 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24458 UChar modrm = getUChar(delta);
24459 UInt rG = gregOfRexRM(pfx,modrm);
24460 if (epartIsReg(modrm)) {
24461 UInt rE = eregOfRexRM(pfx,modrm);
24462 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24463 DIP("vmovups %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24464 delta += 1;
24465 } else {
24466 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24467 storeLE( mkexpr(addr), getXMMReg(rG) );
24468 DIP("vmovups %s,%s\n", nameXMMReg(rG), dis_buf);
24469 delta += alen;
24471 goto decode_success;
24473 /* VMOVUPS ymm1, ymm2/m256 = VEX.256.0F.WIG 11 /r */
24474 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24475 UChar modrm = getUChar(delta);
24476 UInt rG = gregOfRexRM(pfx,modrm);
24477 if (epartIsReg(modrm)) {
24478 UInt rE = eregOfRexRM(pfx,modrm);
24479 putYMMReg( rE, getYMMReg(rG) );
24480 DIP("vmovups %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
24481 delta += 1;
24482 } else {
24483 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24484 storeLE( mkexpr(addr), getYMMReg(rG) );
24485 DIP("vmovups %s,%s\n", nameYMMReg(rG), dis_buf);
24486 delta += alen;
24488 goto decode_success;
24490 break;
24492 case 0x12:
24493 /* VMOVDDUP xmm2/m64, xmm1 = VEX.128.F2.0F.WIG /12 r */
24494 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24495 delta = dis_MOVDDUP_128( vbi, pfx, delta, True/*isAvx*/ );
24496 goto decode_success;
24498 /* VMOVDDUP ymm2/m256, ymm1 = VEX.256.F2.0F.WIG /12 r */
24499 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24500 delta = dis_MOVDDUP_256( vbi, pfx, delta );
24501 goto decode_success;
24503 /* VMOVHLPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 12 /r */
24504 /* Insn only exists in reg form */
24505 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
24506 && epartIsReg(getUChar(delta))) {
24507 UChar modrm = getUChar(delta);
24508 UInt rG = gregOfRexRM(pfx, modrm);
24509 UInt rE = eregOfRexRM(pfx, modrm);
24510 UInt rV = getVexNvvvv(pfx);
24511 delta++;
24512 DIP("vmovhlps %s,%s,%s\n",
24513 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
24514 IRTemp res = newTemp(Ity_V128);
24515 assign(res, binop(Iop_64HLtoV128,
24516 getXMMRegLane64(rV, 1),
24517 getXMMRegLane64(rE, 1)));
24518 putYMMRegLoAndZU(rG, mkexpr(res));
24519 *uses_vvvv = True;
24520 goto decode_success;
24522 /* VMOVLPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 12 /r */
24523 /* Insn exists only in mem form, it appears. */
24524 /* VMOVLPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 12 /r */
24525 /* Insn exists only in mem form, it appears. */
24526 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24527 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24528 UChar modrm = getUChar(delta);
24529 UInt rG = gregOfRexRM(pfx, modrm);
24530 UInt rV = getVexNvvvv(pfx);
24531 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24532 delta += alen;
24533 DIP("vmovlpd %s,%s,%s\n",
24534 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
24535 IRTemp res = newTemp(Ity_V128);
24536 assign(res, binop(Iop_64HLtoV128,
24537 getXMMRegLane64(rV, 1),
24538 loadLE(Ity_I64, mkexpr(addr))));
24539 putYMMRegLoAndZU(rG, mkexpr(res));
24540 *uses_vvvv = True;
24541 goto decode_success;
24543 /* VMOVSLDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 12 /r */
24544 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
24545 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/,
24546 True/*isL*/ );
24547 goto decode_success;
24549 /* VMOVSLDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 12 /r */
24550 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
24551 delta = dis_MOVSxDUP_256( vbi, pfx, delta, True/*isL*/ );
24552 goto decode_success;
24554 break;
24556 case 0x13:
24557 /* VMOVLPS xmm1, m64 = VEX.128.0F.WIG 13 /r */
24558 /* Insn exists only in mem form, it appears. */
24559 /* VMOVLPD xmm1, m64 = VEX.128.66.0F.WIG 13 /r */
24560 /* Insn exists only in mem form, it appears. */
24561 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24562 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24563 UChar modrm = getUChar(delta);
24564 UInt rG = gregOfRexRM(pfx, modrm);
24565 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24566 delta += alen;
24567 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0));
24568 DIP("vmovlpd %s,%s\n", nameXMMReg(rG), dis_buf);
24569 goto decode_success;
24571 break;
24573 case 0x14:
24574 case 0x15:
24575 /* VUNPCKLPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 14 /r */
24576 /* VUNPCKHPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 15 /r */
24577 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24578 Bool hi = opc == 0x15;
24579 UChar modrm = getUChar(delta);
24580 UInt rG = gregOfRexRM(pfx,modrm);
24581 UInt rV = getVexNvvvv(pfx);
24582 IRTemp eV = newTemp(Ity_V128);
24583 IRTemp vV = newTemp(Ity_V128);
24584 assign( vV, getXMMReg(rV) );
24585 if (epartIsReg(modrm)) {
24586 UInt rE = eregOfRexRM(pfx,modrm);
24587 assign( eV, getXMMReg(rE) );
24588 delta += 1;
24589 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
24590 nameXMMReg(rE), nameXMMReg(rG));
24591 } else {
24592 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24593 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
24594 delta += alen;
24595 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
24596 dis_buf, nameXMMReg(rG));
24598 IRTemp res = math_UNPCKxPS_128( eV, vV, hi );
24599 putYMMRegLoAndZU( rG, mkexpr(res) );
24600 *uses_vvvv = True;
24601 goto decode_success;
24603 /* VUNPCKLPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 14 /r */
24604 /* VUNPCKHPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 15 /r */
24605 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24606 Bool hi = opc == 0x15;
24607 UChar modrm = getUChar(delta);
24608 UInt rG = gregOfRexRM(pfx,modrm);
24609 UInt rV = getVexNvvvv(pfx);
24610 IRTemp eV = newTemp(Ity_V256);
24611 IRTemp vV = newTemp(Ity_V256);
24612 assign( vV, getYMMReg(rV) );
24613 if (epartIsReg(modrm)) {
24614 UInt rE = eregOfRexRM(pfx,modrm);
24615 assign( eV, getYMMReg(rE) );
24616 delta += 1;
24617 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
24618 nameYMMReg(rE), nameYMMReg(rG));
24619 } else {
24620 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24621 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
24622 delta += alen;
24623 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
24624 dis_buf, nameYMMReg(rG));
24626 IRTemp res = math_UNPCKxPS_256( eV, vV, hi );
24627 putYMMReg( rG, mkexpr(res) );
24628 *uses_vvvv = True;
24629 goto decode_success;
24631 /* VUNPCKLPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 14 /r */
24632 /* VUNPCKHPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 15 /r */
24633 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24634 Bool hi = opc == 0x15;
24635 UChar modrm = getUChar(delta);
24636 UInt rG = gregOfRexRM(pfx,modrm);
24637 UInt rV = getVexNvvvv(pfx);
24638 IRTemp eV = newTemp(Ity_V128);
24639 IRTemp vV = newTemp(Ity_V128);
24640 assign( vV, getXMMReg(rV) );
24641 if (epartIsReg(modrm)) {
24642 UInt rE = eregOfRexRM(pfx,modrm);
24643 assign( eV, getXMMReg(rE) );
24644 delta += 1;
24645 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
24646 nameXMMReg(rE), nameXMMReg(rG));
24647 } else {
24648 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24649 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
24650 delta += alen;
24651 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
24652 dis_buf, nameXMMReg(rG));
24654 IRTemp res = math_UNPCKxPD_128( eV, vV, hi );
24655 putYMMRegLoAndZU( rG, mkexpr(res) );
24656 *uses_vvvv = True;
24657 goto decode_success;
24659 /* VUNPCKLPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 14 /r */
24660 /* VUNPCKHPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 15 /r */
24661 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24662 Bool hi = opc == 0x15;
24663 UChar modrm = getUChar(delta);
24664 UInt rG = gregOfRexRM(pfx,modrm);
24665 UInt rV = getVexNvvvv(pfx);
24666 IRTemp eV = newTemp(Ity_V256);
24667 IRTemp vV = newTemp(Ity_V256);
24668 assign( vV, getYMMReg(rV) );
24669 if (epartIsReg(modrm)) {
24670 UInt rE = eregOfRexRM(pfx,modrm);
24671 assign( eV, getYMMReg(rE) );
24672 delta += 1;
24673 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
24674 nameYMMReg(rE), nameYMMReg(rG));
24675 } else {
24676 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24677 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
24678 delta += alen;
24679 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
24680 dis_buf, nameYMMReg(rG));
24682 IRTemp res = math_UNPCKxPD_256( eV, vV, hi );
24683 putYMMReg( rG, mkexpr(res) );
24684 *uses_vvvv = True;
24685 goto decode_success;
24687 break;
24689 case 0x16:
24690 /* VMOVLHPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 16 /r */
24691 /* Insn only exists in reg form */
24692 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
24693 && epartIsReg(getUChar(delta))) {
24694 UChar modrm = getUChar(delta);
24695 UInt rG = gregOfRexRM(pfx, modrm);
24696 UInt rE = eregOfRexRM(pfx, modrm);
24697 UInt rV = getVexNvvvv(pfx);
24698 delta++;
24699 DIP("vmovlhps %s,%s,%s\n",
24700 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
24701 IRTemp res = newTemp(Ity_V128);
24702 assign(res, binop(Iop_64HLtoV128,
24703 getXMMRegLane64(rE, 0),
24704 getXMMRegLane64(rV, 0)));
24705 putYMMRegLoAndZU(rG, mkexpr(res));
24706 *uses_vvvv = True;
24707 goto decode_success;
24709 /* VMOVHPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 16 /r */
24710 /* Insn exists only in mem form, it appears. */
24711 /* VMOVHPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 16 /r */
24712 /* Insn exists only in mem form, it appears. */
24713 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24714 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24715 UChar modrm = getUChar(delta);
24716 UInt rG = gregOfRexRM(pfx, modrm);
24717 UInt rV = getVexNvvvv(pfx);
24718 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24719 delta += alen;
24720 DIP("vmovhp%c %s,%s,%s\n", have66(pfx) ? 'd' : 's',
24721 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
24722 IRTemp res = newTemp(Ity_V128);
24723 assign(res, binop(Iop_64HLtoV128,
24724 loadLE(Ity_I64, mkexpr(addr)),
24725 getXMMRegLane64(rV, 0)));
24726 putYMMRegLoAndZU(rG, mkexpr(res));
24727 *uses_vvvv = True;
24728 goto decode_success;
24730 /* VMOVSHDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 16 /r */
24731 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
24732 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/,
24733 False/*!isL*/ );
24734 goto decode_success;
24736 /* VMOVSHDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 16 /r */
24737 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
24738 delta = dis_MOVSxDUP_256( vbi, pfx, delta, False/*!isL*/ );
24739 goto decode_success;
24741 break;
24743 case 0x17:
24744 /* VMOVHPS xmm1, m64 = VEX.128.0F.WIG 17 /r */
24745 /* Insn exists only in mem form, it appears. */
24746 /* VMOVHPD xmm1, m64 = VEX.128.66.0F.WIG 17 /r */
24747 /* Insn exists only in mem form, it appears. */
24748 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24749 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24750 UChar modrm = getUChar(delta);
24751 UInt rG = gregOfRexRM(pfx, modrm);
24752 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24753 delta += alen;
24754 storeLE( mkexpr(addr), getXMMRegLane64( rG, 1));
24755 DIP("vmovhp%c %s,%s\n", have66(pfx) ? 'd' : 's',
24756 nameXMMReg(rG), dis_buf);
24757 goto decode_success;
24759 break;
24761 case 0x28:
24762 /* VMOVAPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 28 /r */
24763 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24764 UChar modrm = getUChar(delta);
24765 UInt rG = gregOfRexRM(pfx, modrm);
24766 if (epartIsReg(modrm)) {
24767 UInt rE = eregOfRexRM(pfx,modrm);
24768 putYMMRegLoAndZU( rG, getXMMReg( rE ));
24769 DIP("vmovapd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
24770 delta += 1;
24771 } else {
24772 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24773 gen_SIGNAL_if_not_16_aligned( vbi, addr );
24774 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
24775 DIP("vmovapd %s,%s\n", dis_buf, nameXMMReg(rG));
24776 delta += alen;
24778 goto decode_success;
24780 /* VMOVAPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 28 /r */
24781 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24782 UChar modrm = getUChar(delta);
24783 UInt rG = gregOfRexRM(pfx, modrm);
24784 if (epartIsReg(modrm)) {
24785 UInt rE = eregOfRexRM(pfx,modrm);
24786 putYMMReg( rG, getYMMReg( rE ));
24787 DIP("vmovapd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
24788 delta += 1;
24789 } else {
24790 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24791 gen_SIGNAL_if_not_32_aligned( vbi, addr );
24792 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
24793 DIP("vmovapd %s,%s\n", dis_buf, nameYMMReg(rG));
24794 delta += alen;
24796 goto decode_success;
24798 /* VMOVAPS xmm2/m128, xmm1 = VEX.128.0F.WIG 28 /r */
24799 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24800 UChar modrm = getUChar(delta);
24801 UInt rG = gregOfRexRM(pfx, modrm);
24802 if (epartIsReg(modrm)) {
24803 UInt rE = eregOfRexRM(pfx,modrm);
24804 putYMMRegLoAndZU( rG, getXMMReg( rE ));
24805 DIP("vmovaps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
24806 delta += 1;
24807 } else {
24808 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24809 gen_SIGNAL_if_not_16_aligned( vbi, addr );
24810 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
24811 DIP("vmovaps %s,%s\n", dis_buf, nameXMMReg(rG));
24812 delta += alen;
24814 goto decode_success;
24816 /* VMOVAPS ymm2/m256, ymm1 = VEX.256.0F.WIG 28 /r */
24817 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24818 UChar modrm = getUChar(delta);
24819 UInt rG = gregOfRexRM(pfx, modrm);
24820 if (epartIsReg(modrm)) {
24821 UInt rE = eregOfRexRM(pfx,modrm);
24822 putYMMReg( rG, getYMMReg( rE ));
24823 DIP("vmovaps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
24824 delta += 1;
24825 } else {
24826 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24827 gen_SIGNAL_if_not_32_aligned( vbi, addr );
24828 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
24829 DIP("vmovaps %s,%s\n", dis_buf, nameYMMReg(rG));
24830 delta += alen;
24832 goto decode_success;
24834 break;
24836 case 0x29:
24837 /* VMOVAPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 29 /r */
24838 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24839 UChar modrm = getUChar(delta);
24840 UInt rG = gregOfRexRM(pfx,modrm);
24841 if (epartIsReg(modrm)) {
24842 UInt rE = eregOfRexRM(pfx,modrm);
24843 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24844 DIP("vmovapd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24845 delta += 1;
24846 } else {
24847 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24848 gen_SIGNAL_if_not_16_aligned( vbi, addr );
24849 storeLE( mkexpr(addr), getXMMReg(rG) );
24850 DIP("vmovapd %s,%s\n", nameXMMReg(rG), dis_buf );
24851 delta += alen;
24853 goto decode_success;
24855 /* VMOVAPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 29 /r */
24856 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24857 UChar modrm = getUChar(delta);
24858 UInt rG = gregOfRexRM(pfx,modrm);
24859 if (epartIsReg(modrm)) {
24860 UInt rE = eregOfRexRM(pfx,modrm);
24861 putYMMReg( rE, getYMMReg(rG) );
24862 DIP("vmovapd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
24863 delta += 1;
24864 } else {
24865 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24866 gen_SIGNAL_if_not_32_aligned( vbi, addr );
24867 storeLE( mkexpr(addr), getYMMReg(rG) );
24868 DIP("vmovapd %s,%s\n", nameYMMReg(rG), dis_buf );
24869 delta += alen;
24871 goto decode_success;
24873 /* VMOVAPS xmm1, xmm2/m128 = VEX.128.0F.WIG 29 /r */
24874 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24875 UChar modrm = getUChar(delta);
24876 UInt rG = gregOfRexRM(pfx,modrm);
24877 if (epartIsReg(modrm)) {
24878 UInt rE = eregOfRexRM(pfx,modrm);
24879 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24880 DIP("vmovaps %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24881 delta += 1;
24882 goto decode_success;
24883 } else {
24884 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24885 gen_SIGNAL_if_not_16_aligned( vbi, addr );
24886 storeLE( mkexpr(addr), getXMMReg(rG) );
24887 DIP("vmovaps %s,%s\n", nameXMMReg(rG), dis_buf );
24888 delta += alen;
24889 goto decode_success;
24892 /* VMOVAPS ymm1, ymm2/m256 = VEX.256.0F.WIG 29 /r */
24893 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24894 UChar modrm = getUChar(delta);
24895 UInt rG = gregOfRexRM(pfx,modrm);
24896 if (epartIsReg(modrm)) {
24897 UInt rE = eregOfRexRM(pfx,modrm);
24898 putYMMReg( rE, getYMMReg(rG) );
24899 DIP("vmovaps %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
24900 delta += 1;
24901 goto decode_success;
24902 } else {
24903 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24904 gen_SIGNAL_if_not_32_aligned( vbi, addr );
24905 storeLE( mkexpr(addr), getYMMReg(rG) );
24906 DIP("vmovaps %s,%s\n", nameYMMReg(rG), dis_buf );
24907 delta += alen;
24908 goto decode_success;
24911 break;
24913 case 0x2A: {
24914 IRTemp rmode = newTemp(Ity_I32);
24915 assign( rmode, get_sse_roundingmode() );
24916 /* VCVTSI2SD r/m32, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W0 2A /r */
24917 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
24918 UChar modrm = getUChar(delta);
24919 UInt rV = getVexNvvvv(pfx);
24920 UInt rD = gregOfRexRM(pfx, modrm);
24921 IRTemp arg32 = newTemp(Ity_I32);
24922 if (epartIsReg(modrm)) {
24923 UInt rS = eregOfRexRM(pfx,modrm);
24924 assign( arg32, getIReg32(rS) );
24925 delta += 1;
24926 DIP("vcvtsi2sdl %s,%s,%s\n",
24927 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD));
24928 } else {
24929 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24930 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
24931 delta += alen;
24932 DIP("vcvtsi2sdl %s,%s,%s\n",
24933 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24935 putXMMRegLane64F( rD, 0,
24936 unop(Iop_I32StoF64, mkexpr(arg32)));
24937 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24938 putYMMRegLane128( rD, 1, mkV128(0) );
24939 *uses_vvvv = True;
24940 goto decode_success;
24942 /* VCVTSI2SD r/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W1 2A /r */
24943 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
24944 UChar modrm = getUChar(delta);
24945 UInt rV = getVexNvvvv(pfx);
24946 UInt rD = gregOfRexRM(pfx, modrm);
24947 IRTemp arg64 = newTemp(Ity_I64);
24948 if (epartIsReg(modrm)) {
24949 UInt rS = eregOfRexRM(pfx,modrm);
24950 assign( arg64, getIReg64(rS) );
24951 delta += 1;
24952 DIP("vcvtsi2sdq %s,%s,%s\n",
24953 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD));
24954 } else {
24955 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24956 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
24957 delta += alen;
24958 DIP("vcvtsi2sdq %s,%s,%s\n",
24959 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24961 putXMMRegLane64F( rD, 0,
24962 binop( Iop_I64StoF64,
24963 get_sse_roundingmode(),
24964 mkexpr(arg64)) );
24965 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24966 putYMMRegLane128( rD, 1, mkV128(0) );
24967 *uses_vvvv = True;
24968 goto decode_success;
24970 /* VCVTSI2SS r/m64, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W1 2A /r */
24971 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
24972 UChar modrm = getUChar(delta);
24973 UInt rV = getVexNvvvv(pfx);
24974 UInt rD = gregOfRexRM(pfx, modrm);
24975 IRTemp arg64 = newTemp(Ity_I64);
24976 if (epartIsReg(modrm)) {
24977 UInt rS = eregOfRexRM(pfx,modrm);
24978 assign( arg64, getIReg64(rS) );
24979 delta += 1;
24980 DIP("vcvtsi2ssq %s,%s,%s\n",
24981 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD));
24982 } else {
24983 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24984 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
24985 delta += alen;
24986 DIP("vcvtsi2ssq %s,%s,%s\n",
24987 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24989 putXMMRegLane32F( rD, 0,
24990 binop(Iop_F64toF32,
24991 mkexpr(rmode),
24992 binop(Iop_I64StoF64, mkexpr(rmode),
24993 mkexpr(arg64)) ) );
24994 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
24995 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24996 putYMMRegLane128( rD, 1, mkV128(0) );
24997 *uses_vvvv = True;
24998 goto decode_success;
25000 /* VCVTSI2SS r/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W0 2A /r */
25001 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
25002 UChar modrm = getUChar(delta);
25003 UInt rV = getVexNvvvv(pfx);
25004 UInt rD = gregOfRexRM(pfx, modrm);
25005 IRTemp arg32 = newTemp(Ity_I32);
25006 if (epartIsReg(modrm)) {
25007 UInt rS = eregOfRexRM(pfx,modrm);
25008 assign( arg32, getIReg32(rS) );
25009 delta += 1;
25010 DIP("vcvtsi2ssl %s,%s,%s\n",
25011 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD));
25012 } else {
25013 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25014 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
25015 delta += alen;
25016 DIP("vcvtsi2ssl %s,%s,%s\n",
25017 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
25019 putXMMRegLane32F( rD, 0,
25020 binop(Iop_F64toF32,
25021 mkexpr(rmode),
25022 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
25023 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
25024 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
25025 putYMMRegLane128( rD, 1, mkV128(0) );
25026 *uses_vvvv = True;
25027 goto decode_success;
25029 break;
25032 case 0x2B:
25033 /* VMOVNTPD xmm1, m128 = VEX.128.66.0F.WIG 2B /r */
25034 /* VMOVNTPS xmm1, m128 = VEX.128.0F.WIG 2B /r */
25035 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
25036 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
25037 UChar modrm = getUChar(delta);
25038 UInt rS = gregOfRexRM(pfx, modrm);
25039 IRTemp tS = newTemp(Ity_V128);
25040 assign(tS, getXMMReg(rS));
25041 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25042 delta += alen;
25043 gen_SIGNAL_if_not_16_aligned(vbi, addr);
25044 storeLE(mkexpr(addr), mkexpr(tS));
25045 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's',
25046 nameXMMReg(rS), dis_buf);
25047 goto decode_success;
25049 /* VMOVNTPD ymm1, m256 = VEX.256.66.0F.WIG 2B /r */
25050 /* VMOVNTPS ymm1, m256 = VEX.256.0F.WIG 2B /r */
25051 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
25052 && 1==getVexL(pfx)/*256*/ && !epartIsReg(getUChar(delta))) {
25053 UChar modrm = getUChar(delta);
25054 UInt rS = gregOfRexRM(pfx, modrm);
25055 IRTemp tS = newTemp(Ity_V256);
25056 assign(tS, getYMMReg(rS));
25057 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25058 delta += alen;
25059 gen_SIGNAL_if_not_32_aligned(vbi, addr);
25060 storeLE(mkexpr(addr), mkexpr(tS));
25061 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's',
25062 nameYMMReg(rS), dis_buf);
25063 goto decode_success;
25065 break;
25067 case 0x2C:
25068 /* VCVTTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2C /r */
25069 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
25070 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
25071 goto decode_success;
25073 /* VCVTTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2C /r */
25074 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
25075 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
25076 goto decode_success;
25078 /* VCVTTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2C /r */
25079 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
25080 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
25081 goto decode_success;
25083 /* VCVTTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2C /r */
25084 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
25085 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
25086 goto decode_success;
25088 break;
25090 case 0x2D:
25091 /* VCVTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2D /r */
25092 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
25093 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
25094 goto decode_success;
25096 /* VCVTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2D /r */
25097 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
25098 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
25099 goto decode_success;
25101 /* VCVTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2D /r */
25102 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
25103 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
25104 goto decode_success;
25106 /* VCVTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2D /r */
25107 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
25108 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
25109 goto decode_success;
25111 break;
25113 case 0x2E:
25114 case 0x2F:
25115 /* VUCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2E /r */
25116 /* VCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2F /r */
25117 if (have66noF2noF3(pfx)) {
25118 delta = dis_COMISD( vbi, pfx, delta, True/*isAvx*/, opc );
25119 goto decode_success;
25121 /* VUCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2E /r */
25122 /* VCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2F /r */
25123 if (haveNo66noF2noF3(pfx)) {
25124 delta = dis_COMISS( vbi, pfx, delta, True/*isAvx*/, opc );
25125 goto decode_success;
25127 break;
25129 case 0x50:
25130 /* VMOVMSKPD xmm2, r32 = VEX.128.66.0F.WIG 50 /r */
25131 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25132 delta = dis_MOVMSKPD_128( vbi, pfx, delta, True/*isAvx*/ );
25133 goto decode_success;
25135 /* VMOVMSKPD ymm2, r32 = VEX.256.66.0F.WIG 50 /r */
25136 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25137 delta = dis_MOVMSKPD_256( vbi, pfx, delta );
25138 goto decode_success;
25140 /* VMOVMSKPS xmm2, r32 = VEX.128.0F.WIG 50 /r */
25141 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25142 delta = dis_MOVMSKPS_128( vbi, pfx, delta, True/*isAvx*/ );
25143 goto decode_success;
25145 /* VMOVMSKPS ymm2, r32 = VEX.256.0F.WIG 50 /r */
25146 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25147 delta = dis_MOVMSKPS_256( vbi, pfx, delta );
25148 goto decode_success;
25150 break;
25152 case 0x51:
25153 /* VSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 51 /r */
25154 if (haveF3no66noF2(pfx)) {
25155 delta = dis_AVX128_E_V_to_G_lo32_unary(
25156 uses_vvvv, vbi, pfx, delta, "vsqrtss", Iop_Sqrt32F0x4 );
25157 goto decode_success;
25159 /* VSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 51 /r */
25160 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25161 delta = dis_AVX128_E_to_G_unary_all(
25162 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx4 );
25163 goto decode_success;
25165 /* VSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 51 /r */
25166 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25167 delta = dis_AVX256_E_to_G_unary_all(
25168 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx8 );
25169 goto decode_success;
25171 /* VSQRTSD xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F2.0F.WIG 51 /r */
25172 if (haveF2no66noF3(pfx)) {
25173 delta = dis_AVX128_E_V_to_G_lo64_unary(
25174 uses_vvvv, vbi, pfx, delta, "vsqrtsd", Iop_Sqrt64F0x2 );
25175 goto decode_success;
25177 /* VSQRTPD xmm2/m128(E), xmm1(G) = VEX.NDS.128.66.0F.WIG 51 /r */
25178 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25179 delta = dis_AVX128_E_to_G_unary_all(
25180 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx2 );
25181 goto decode_success;
25183 /* VSQRTPD ymm2/m256(E), ymm1(G) = VEX.NDS.256.66.0F.WIG 51 /r */
25184 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25185 delta = dis_AVX256_E_to_G_unary_all(
25186 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx4 );
25187 goto decode_success;
25189 break;
25191 case 0x52:
25192 /* VRSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 52 /r */
25193 if (haveF3no66noF2(pfx)) {
25194 delta = dis_AVX128_E_V_to_G_lo32_unary(
25195 uses_vvvv, vbi, pfx, delta, "vrsqrtss",
25196 Iop_RSqrtEst32F0x4 );
25197 goto decode_success;
25199 /* VRSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 52 /r */
25200 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25201 delta = dis_AVX128_E_to_G_unary_all(
25202 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrtEst32Fx4 );
25203 goto decode_success;
25205 /* VRSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 52 /r */
25206 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25207 delta = dis_AVX256_E_to_G_unary_all(
25208 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrtEst32Fx8 );
25209 goto decode_success;
25211 break;
25213 case 0x53:
25214 /* VRCPSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 53 /r */
25215 if (haveF3no66noF2(pfx)) {
25216 delta = dis_AVX128_E_V_to_G_lo32_unary(
25217 uses_vvvv, vbi, pfx, delta, "vrcpss", Iop_RecipEst32F0x4 );
25218 goto decode_success;
25220 /* VRCPPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 53 /r */
25221 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25222 delta = dis_AVX128_E_to_G_unary_all(
25223 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_RecipEst32Fx4 );
25224 goto decode_success;
25226 /* VRCPPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 53 /r */
25227 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25228 delta = dis_AVX256_E_to_G_unary_all(
25229 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_RecipEst32Fx8 );
25230 goto decode_success;
25232 break;
25234 case 0x54:
25235 /* VANDPD r/m, rV, r ::: r = rV & r/m */
25236 /* VANDPD = VEX.NDS.128.66.0F.WIG 54 /r */
25237 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25238 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25239 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128 );
25240 goto decode_success;
25242 /* VANDPD r/m, rV, r ::: r = rV & r/m */
25243 /* VANDPD = VEX.NDS.256.66.0F.WIG 54 /r */
25244 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25245 delta = dis_AVX256_E_V_to_G(
25246 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256 );
25247 goto decode_success;
25249 /* VANDPS = VEX.NDS.128.0F.WIG 54 /r */
25250 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25251 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25252 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128 );
25253 goto decode_success;
25255 /* VANDPS = VEX.NDS.256.0F.WIG 54 /r */
25256 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25257 delta = dis_AVX256_E_V_to_G(
25258 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256 );
25259 goto decode_success;
25261 break;
25263 case 0x55:
25264 /* VANDNPD r/m, rV, r ::: r = (not rV) & r/m */
25265 /* VANDNPD = VEX.NDS.128.66.0F.WIG 55 /r */
25266 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25267 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25268 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128,
25269 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
25270 goto decode_success;
25272 /* VANDNPD = VEX.NDS.256.66.0F.WIG 55 /r */
25273 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25274 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
25275 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256,
25276 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
25277 goto decode_success;
25279 /* VANDNPS = VEX.NDS.128.0F.WIG 55 /r */
25280 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25281 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25282 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128,
25283 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
25284 goto decode_success;
25286 /* VANDNPS = VEX.NDS.256.0F.WIG 55 /r */
25287 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25288 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
25289 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256,
25290 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
25291 goto decode_success;
25293 break;
25295 case 0x56:
25296 /* VORPD r/m, rV, r ::: r = rV | r/m */
25297 /* VORPD = VEX.NDS.128.66.0F.WIG 56 /r */
25298 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25299 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25300 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV128 );
25301 goto decode_success;
25303 /* VORPD r/m, rV, r ::: r = rV | r/m */
25304 /* VORPD = VEX.NDS.256.66.0F.WIG 56 /r */
25305 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25306 delta = dis_AVX256_E_V_to_G(
25307 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV256 );
25308 goto decode_success;
25310 /* VORPS r/m, rV, r ::: r = rV | r/m */
25311 /* VORPS = VEX.NDS.128.0F.WIG 56 /r */
25312 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25313 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25314 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV128 );
25315 goto decode_success;
25317 /* VORPS r/m, rV, r ::: r = rV | r/m */
25318 /* VORPS = VEX.NDS.256.0F.WIG 56 /r */
25319 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25320 delta = dis_AVX256_E_V_to_G(
25321 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV256 );
25322 goto decode_success;
25324 break;
25326 case 0x57:
25327 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
25328 /* VXORPD = VEX.NDS.128.66.0F.WIG 57 /r */
25329 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25330 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25331 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV128 );
25332 goto decode_success;
25334 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
25335 /* VXORPD = VEX.NDS.256.66.0F.WIG 57 /r */
25336 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25337 delta = dis_AVX256_E_V_to_G(
25338 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV256 );
25339 goto decode_success;
25341 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
25342 /* VXORPS = VEX.NDS.128.0F.WIG 57 /r */
25343 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25344 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25345 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV128 );
25346 goto decode_success;
25348 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
25349 /* VXORPS = VEX.NDS.256.0F.WIG 57 /r */
25350 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25351 delta = dis_AVX256_E_V_to_G(
25352 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV256 );
25353 goto decode_success;
25355 break;
25357 case 0x58:
25358 /* VADDSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 58 /r */
25359 if (haveF2no66noF3(pfx)) {
25360 delta = dis_AVX128_E_V_to_G_lo64(
25361 uses_vvvv, vbi, pfx, delta, "vaddsd", Iop_Add64F0x2 );
25362 goto decode_success;
25364 /* VADDSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 58 /r */
25365 if (haveF3no66noF2(pfx)) {
25366 delta = dis_AVX128_E_V_to_G_lo32(
25367 uses_vvvv, vbi, pfx, delta, "vaddss", Iop_Add32F0x4 );
25368 goto decode_success;
25370 /* VADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 58 /r */
25371 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25372 delta = dis_AVX128_E_V_to_G(
25373 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx4 );
25374 goto decode_success;
25376 /* VADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 58 /r */
25377 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25378 delta = dis_AVX256_E_V_to_G(
25379 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx8 );
25380 goto decode_success;
25382 /* VADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 58 /r */
25383 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25384 delta = dis_AVX128_E_V_to_G(
25385 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx2 );
25386 goto decode_success;
25388 /* VADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 58 /r */
25389 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25390 delta = dis_AVX256_E_V_to_G(
25391 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx4 );
25392 goto decode_success;
25394 break;
25396 case 0x59:
25397 /* VMULSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 59 /r */
25398 if (haveF2no66noF3(pfx)) {
25399 delta = dis_AVX128_E_V_to_G_lo64(
25400 uses_vvvv, vbi, pfx, delta, "vmulsd", Iop_Mul64F0x2 );
25401 goto decode_success;
25403 /* VMULSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 59 /r */
25404 if (haveF3no66noF2(pfx)) {
25405 delta = dis_AVX128_E_V_to_G_lo32(
25406 uses_vvvv, vbi, pfx, delta, "vmulss", Iop_Mul32F0x4 );
25407 goto decode_success;
25409 /* VMULPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 59 /r */
25410 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25411 delta = dis_AVX128_E_V_to_G(
25412 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx4 );
25413 goto decode_success;
25415 /* VMULPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 59 /r */
25416 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25417 delta = dis_AVX256_E_V_to_G(
25418 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx8 );
25419 goto decode_success;
25421 /* VMULPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 59 /r */
25422 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25423 delta = dis_AVX128_E_V_to_G(
25424 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx2 );
25425 goto decode_success;
25427 /* VMULPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 59 /r */
25428 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25429 delta = dis_AVX256_E_V_to_G(
25430 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx4 );
25431 goto decode_success;
25433 break;
25435 case 0x5A:
25436 /* VCVTPS2PD xmm2/m64, xmm1 = VEX.128.0F.WIG 5A /r */
25437 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25438 delta = dis_CVTPS2PD_128( vbi, pfx, delta, True/*isAvx*/ );
25439 goto decode_success;
25441 /* VCVTPS2PD xmm2/m128, ymm1 = VEX.256.0F.WIG 5A /r */
25442 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25443 delta = dis_CVTPS2PD_256( vbi, pfx, delta );
25444 goto decode_success;
25446 /* VCVTPD2PS xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5A /r */
25447 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25448 delta = dis_CVTPD2PS_128( vbi, pfx, delta, True/*isAvx*/ );
25449 goto decode_success;
25451 /* VCVTPD2PS ymm2/m256, xmm1 = VEX.256.66.0F.WIG 5A /r */
25452 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25453 delta = dis_CVTPD2PS_256( vbi, pfx, delta );
25454 goto decode_success;
25456 /* VCVTSD2SS xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5A /r */
25457 if (haveF2no66noF3(pfx)) {
25458 UChar modrm = getUChar(delta);
25459 UInt rV = getVexNvvvv(pfx);
25460 UInt rD = gregOfRexRM(pfx, modrm);
25461 IRTemp f64lo = newTemp(Ity_F64);
25462 IRTemp rmode = newTemp(Ity_I32);
25463 assign( rmode, get_sse_roundingmode() );
25464 if (epartIsReg(modrm)) {
25465 UInt rS = eregOfRexRM(pfx,modrm);
25466 assign(f64lo, getXMMRegLane64F(rS, 0));
25467 delta += 1;
25468 DIP("vcvtsd2ss %s,%s,%s\n",
25469 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD));
25470 } else {
25471 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25472 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)) );
25473 delta += alen;
25474 DIP("vcvtsd2ss %s,%s,%s\n",
25475 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
25477 putXMMRegLane32F( rD, 0,
25478 binop( Iop_F64toF32, mkexpr(rmode),
25479 mkexpr(f64lo)) );
25480 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
25481 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
25482 putYMMRegLane128( rD, 1, mkV128(0) );
25483 *uses_vvvv = True;
25484 goto decode_success;
25486 /* VCVTSS2SD xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5A /r */
25487 if (haveF3no66noF2(pfx)) {
25488 UChar modrm = getUChar(delta);
25489 UInt rV = getVexNvvvv(pfx);
25490 UInt rD = gregOfRexRM(pfx, modrm);
25491 IRTemp f32lo = newTemp(Ity_F32);
25492 if (epartIsReg(modrm)) {
25493 UInt rS = eregOfRexRM(pfx,modrm);
25494 assign(f32lo, getXMMRegLane32F(rS, 0));
25495 delta += 1;
25496 DIP("vcvtss2sd %s,%s,%s\n",
25497 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD));
25498 } else {
25499 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25500 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)) );
25501 delta += alen;
25502 DIP("vcvtss2sd %s,%s,%s\n",
25503 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
25505 putXMMRegLane64F( rD, 0,
25506 unop( Iop_F32toF64, mkexpr(f32lo)) );
25507 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
25508 putYMMRegLane128( rD, 1, mkV128(0) );
25509 *uses_vvvv = True;
25510 goto decode_success;
25512 break;
25514 case 0x5B:
25515 /* VCVTPS2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5B /r */
25516 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25517 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta,
25518 True/*isAvx*/, False/*!r2zero*/ );
25519 goto decode_success;
25521 /* VCVTPS2DQ ymm2/m256, ymm1 = VEX.256.66.0F.WIG 5B /r */
25522 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25523 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta,
25524 False/*!r2zero*/ );
25525 goto decode_success;
25527 /* VCVTTPS2DQ xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 5B /r */
25528 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
25529 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta,
25530 True/*isAvx*/, True/*r2zero*/ );
25531 goto decode_success;
25533 /* VCVTTPS2DQ ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 5B /r */
25534 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
25535 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta,
25536 True/*r2zero*/ );
25537 goto decode_success;
25539 /* VCVTDQ2PS xmm2/m128, xmm1 = VEX.128.0F.WIG 5B /r */
25540 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25541 delta = dis_CVTDQ2PS_128 ( vbi, pfx, delta, True/*isAvx*/ );
25542 goto decode_success;
25544 /* VCVTDQ2PS ymm2/m256, ymm1 = VEX.256.0F.WIG 5B /r */
25545 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25546 delta = dis_CVTDQ2PS_256 ( vbi, pfx, delta );
25547 goto decode_success;
25549 break;
25551 case 0x5C:
25552 /* VSUBSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5C /r */
25553 if (haveF2no66noF3(pfx)) {
25554 delta = dis_AVX128_E_V_to_G_lo64(
25555 uses_vvvv, vbi, pfx, delta, "vsubsd", Iop_Sub64F0x2 );
25556 goto decode_success;
25558 /* VSUBSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5C /r */
25559 if (haveF3no66noF2(pfx)) {
25560 delta = dis_AVX128_E_V_to_G_lo32(
25561 uses_vvvv, vbi, pfx, delta, "vsubss", Iop_Sub32F0x4 );
25562 goto decode_success;
25564 /* VSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5C /r */
25565 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25566 delta = dis_AVX128_E_V_to_G(
25567 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx4 );
25568 goto decode_success;
25570 /* VSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5C /r */
25571 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25572 delta = dis_AVX256_E_V_to_G(
25573 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx8 );
25574 goto decode_success;
25576 /* VSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5C /r */
25577 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25578 delta = dis_AVX128_E_V_to_G(
25579 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx2 );
25580 goto decode_success;
25582 /* VSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5C /r */
25583 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25584 delta = dis_AVX256_E_V_to_G(
25585 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx4 );
25586 goto decode_success;
25588 break;
25590 case 0x5D:
25591 /* VMINSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5D /r */
25592 if (haveF2no66noF3(pfx)) {
25593 delta = dis_AVX128_E_V_to_G_lo64(
25594 uses_vvvv, vbi, pfx, delta, "vminsd", Iop_Min64F0x2 );
25595 goto decode_success;
25597 /* VMINSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5D /r */
25598 if (haveF3no66noF2(pfx)) {
25599 delta = dis_AVX128_E_V_to_G_lo32(
25600 uses_vvvv, vbi, pfx, delta, "vminss", Iop_Min32F0x4 );
25601 goto decode_success;
25603 /* VMINPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5D /r */
25604 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25605 delta = dis_AVX128_E_V_to_G(
25606 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx4 );
25607 goto decode_success;
25609 /* VMINPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5D /r */
25610 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25611 delta = dis_AVX256_E_V_to_G(
25612 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx8 );
25613 goto decode_success;
25615 /* VMINPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5D /r */
25616 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25617 delta = dis_AVX128_E_V_to_G(
25618 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx2 );
25619 goto decode_success;
25621 /* VMINPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5D /r */
25622 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25623 delta = dis_AVX256_E_V_to_G(
25624 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx4 );
25625 goto decode_success;
25627 break;
25629 case 0x5E:
25630 /* VDIVSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5E /r */
25631 if (haveF2no66noF3(pfx)) {
25632 delta = dis_AVX128_E_V_to_G_lo64(
25633 uses_vvvv, vbi, pfx, delta, "vdivsd", Iop_Div64F0x2 );
25634 goto decode_success;
25636 /* VDIVSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5E /r */
25637 if (haveF3no66noF2(pfx)) {
25638 delta = dis_AVX128_E_V_to_G_lo32(
25639 uses_vvvv, vbi, pfx, delta, "vdivss", Iop_Div32F0x4 );
25640 goto decode_success;
25642 /* VDIVPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5E /r */
25643 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25644 delta = dis_AVX128_E_V_to_G(
25645 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx4 );
25646 goto decode_success;
25648 /* VDIVPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5E /r */
25649 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25650 delta = dis_AVX256_E_V_to_G(
25651 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx8 );
25652 goto decode_success;
25654 /* VDIVPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5E /r */
25655 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25656 delta = dis_AVX128_E_V_to_G(
25657 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx2 );
25658 goto decode_success;
25660 /* VDIVPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5E /r */
25661 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25662 delta = dis_AVX256_E_V_to_G(
25663 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx4 );
25664 goto decode_success;
25666 break;
25668 case 0x5F:
25669 /* VMAXSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5F /r */
25670 if (haveF2no66noF3(pfx)) {
25671 delta = dis_AVX128_E_V_to_G_lo64(
25672 uses_vvvv, vbi, pfx, delta, "vmaxsd", Iop_Max64F0x2 );
25673 goto decode_success;
25675 /* VMAXSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5F /r */
25676 if (haveF3no66noF2(pfx)) {
25677 delta = dis_AVX128_E_V_to_G_lo32(
25678 uses_vvvv, vbi, pfx, delta, "vmaxss", Iop_Max32F0x4 );
25679 goto decode_success;
25681 /* VMAXPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5F /r */
25682 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25683 delta = dis_AVX128_E_V_to_G(
25684 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx4 );
25685 goto decode_success;
25687 /* VMAXPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5F /r */
25688 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25689 delta = dis_AVX256_E_V_to_G(
25690 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx8 );
25691 goto decode_success;
25693 /* VMAXPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5F /r */
25694 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25695 delta = dis_AVX128_E_V_to_G(
25696 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx2 );
25697 goto decode_success;
25699 /* VMAXPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5F /r */
25700 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25701 delta = dis_AVX256_E_V_to_G(
25702 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx4 );
25703 goto decode_success;
25705 break;
25707 case 0x60:
25708 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
25709 /* VPUNPCKLBW = VEX.NDS.128.66.0F.WIG 60 /r */
25710 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25711 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25712 uses_vvvv, vbi, pfx, delta, "vpunpcklbw",
25713 Iop_InterleaveLO8x16, NULL,
25714 False/*!invertLeftArg*/, True/*swapArgs*/ );
25715 goto decode_success;
25717 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
25718 /* VPUNPCKLBW = VEX.NDS.256.66.0F.WIG 60 /r */
25719 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25720 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25721 uses_vvvv, vbi, pfx, delta, "vpunpcklbw",
25722 math_VPUNPCKLBW_YMM );
25723 goto decode_success;
25725 break;
25727 case 0x61:
25728 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
25729 /* VPUNPCKLWD = VEX.NDS.128.66.0F.WIG 61 /r */
25730 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25731 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25732 uses_vvvv, vbi, pfx, delta, "vpunpcklwd",
25733 Iop_InterleaveLO16x8, NULL,
25734 False/*!invertLeftArg*/, True/*swapArgs*/ );
25735 goto decode_success;
25737 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
25738 /* VPUNPCKLWD = VEX.NDS.256.66.0F.WIG 61 /r */
25739 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25740 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25741 uses_vvvv, vbi, pfx, delta, "vpunpcklwd",
25742 math_VPUNPCKLWD_YMM );
25743 goto decode_success;
25745 break;
25747 case 0x62:
25748 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
25749 /* VPUNPCKLDQ = VEX.NDS.128.66.0F.WIG 62 /r */
25750 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25751 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25752 uses_vvvv, vbi, pfx, delta, "vpunpckldq",
25753 Iop_InterleaveLO32x4, NULL,
25754 False/*!invertLeftArg*/, True/*swapArgs*/ );
25755 goto decode_success;
25757 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
25758 /* VPUNPCKLDQ = VEX.NDS.256.66.0F.WIG 62 /r */
25759 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25760 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25761 uses_vvvv, vbi, pfx, delta, "vpunpckldq",
25762 math_VPUNPCKLDQ_YMM );
25763 goto decode_success;
25765 break;
25767 case 0x63:
25768 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
25769 /* VPACKSSWB = VEX.NDS.128.66.0F.WIG 63 /r */
25770 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25771 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25772 uses_vvvv, vbi, pfx, delta, "vpacksswb",
25773 Iop_QNarrowBin16Sto8Sx16, NULL,
25774 False/*!invertLeftArg*/, True/*swapArgs*/ );
25775 goto decode_success;
25777 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
25778 /* VPACKSSWB = VEX.NDS.256.66.0F.WIG 63 /r */
25779 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25780 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25781 uses_vvvv, vbi, pfx, delta, "vpacksswb",
25782 math_VPACKSSWB_YMM );
25783 goto decode_success;
25785 break;
25787 case 0x64:
25788 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
25789 /* VPCMPGTB = VEX.NDS.128.66.0F.WIG 64 /r */
25790 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25791 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25792 uses_vvvv, vbi, pfx, delta, "vpcmpgtb", Iop_CmpGT8Sx16 );
25793 goto decode_success;
25795 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
25796 /* VPCMPGTB = VEX.NDS.256.66.0F.WIG 64 /r */
25797 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25798 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25799 uses_vvvv, vbi, pfx, delta, "vpcmpgtb", Iop_CmpGT8Sx32 );
25800 goto decode_success;
25802 break;
25804 case 0x65:
25805 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
25806 /* VPCMPGTW = VEX.NDS.128.66.0F.WIG 65 /r */
25807 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25808 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25809 uses_vvvv, vbi, pfx, delta, "vpcmpgtw", Iop_CmpGT16Sx8 );
25810 goto decode_success;
25812 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
25813 /* VPCMPGTW = VEX.NDS.256.66.0F.WIG 65 /r */
25814 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25815 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25816 uses_vvvv, vbi, pfx, delta, "vpcmpgtw", Iop_CmpGT16Sx16 );
25817 goto decode_success;
25819 break;
25821 case 0x66:
25822 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
25823 /* VPCMPGTD = VEX.NDS.128.66.0F.WIG 66 /r */
25824 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25825 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25826 uses_vvvv, vbi, pfx, delta, "vpcmpgtd", Iop_CmpGT32Sx4 );
25827 goto decode_success;
25829 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
25830 /* VPCMPGTD = VEX.NDS.256.66.0F.WIG 66 /r */
25831 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25832 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25833 uses_vvvv, vbi, pfx, delta, "vpcmpgtd", Iop_CmpGT32Sx8 );
25834 goto decode_success;
25836 break;
25838 case 0x67:
25839 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
25840 /* VPACKUSWB = VEX.NDS.128.66.0F.WIG 67 /r */
25841 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25842 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25843 uses_vvvv, vbi, pfx, delta, "vpackuswb",
25844 Iop_QNarrowBin16Sto8Ux16, NULL,
25845 False/*!invertLeftArg*/, True/*swapArgs*/ );
25846 goto decode_success;
25848 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
25849 /* VPACKUSWB = VEX.NDS.256.66.0F.WIG 67 /r */
25850 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25851 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25852 uses_vvvv, vbi, pfx, delta, "vpackuswb",
25853 math_VPACKUSWB_YMM );
25854 goto decode_success;
25856 break;
25858 case 0x68:
25859 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
25860 /* VPUNPCKHBW = VEX.NDS.128.0F.WIG 68 /r */
25861 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25862 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25863 uses_vvvv, vbi, pfx, delta, "vpunpckhbw",
25864 Iop_InterleaveHI8x16, NULL,
25865 False/*!invertLeftArg*/, True/*swapArgs*/ );
25866 goto decode_success;
25868 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
25869 /* VPUNPCKHBW = VEX.NDS.256.0F.WIG 68 /r */
25870 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25871 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25872 uses_vvvv, vbi, pfx, delta, "vpunpckhbw",
25873 math_VPUNPCKHBW_YMM );
25874 goto decode_success;
25876 break;
25878 case 0x69:
25879 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
25880 /* VPUNPCKHWD = VEX.NDS.128.0F.WIG 69 /r */
25881 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25882 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25883 uses_vvvv, vbi, pfx, delta, "vpunpckhwd",
25884 Iop_InterleaveHI16x8, NULL,
25885 False/*!invertLeftArg*/, True/*swapArgs*/ );
25886 goto decode_success;
25888 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
25889 /* VPUNPCKHWD = VEX.NDS.256.0F.WIG 69 /r */
25890 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25891 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25892 uses_vvvv, vbi, pfx, delta, "vpunpckhwd",
25893 math_VPUNPCKHWD_YMM );
25894 goto decode_success;
25896 break;
25898 case 0x6A:
25899 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
25900 /* VPUNPCKHDQ = VEX.NDS.128.66.0F.WIG 6A /r */
25901 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25902 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25903 uses_vvvv, vbi, pfx, delta, "vpunpckhdq",
25904 Iop_InterleaveHI32x4, NULL,
25905 False/*!invertLeftArg*/, True/*swapArgs*/ );
25906 goto decode_success;
25908 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
25909 /* VPUNPCKHDQ = VEX.NDS.256.66.0F.WIG 6A /r */
25910 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25911 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25912 uses_vvvv, vbi, pfx, delta, "vpunpckhdq",
25913 math_VPUNPCKHDQ_YMM );
25914 goto decode_success;
25916 break;
25918 case 0x6B:
25919 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
25920 /* VPACKSSDW = VEX.NDS.128.66.0F.WIG 6B /r */
25921 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25922 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25923 uses_vvvv, vbi, pfx, delta, "vpackssdw",
25924 Iop_QNarrowBin32Sto16Sx8, NULL,
25925 False/*!invertLeftArg*/, True/*swapArgs*/ );
25926 goto decode_success;
25928 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
25929 /* VPACKSSDW = VEX.NDS.256.66.0F.WIG 6B /r */
25930 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25931 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25932 uses_vvvv, vbi, pfx, delta, "vpackssdw",
25933 math_VPACKSSDW_YMM );
25934 goto decode_success;
25936 break;
25938 case 0x6C:
25939 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
25940 /* VPUNPCKLQDQ = VEX.NDS.128.0F.WIG 6C /r */
25941 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25942 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25943 uses_vvvv, vbi, pfx, delta, "vpunpcklqdq",
25944 Iop_InterleaveLO64x2, NULL,
25945 False/*!invertLeftArg*/, True/*swapArgs*/ );
25946 goto decode_success;
25948 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
25949 /* VPUNPCKLQDQ = VEX.NDS.256.0F.WIG 6C /r */
25950 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25951 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25952 uses_vvvv, vbi, pfx, delta, "vpunpcklqdq",
25953 math_VPUNPCKLQDQ_YMM );
25954 goto decode_success;
25956 break;
25958 case 0x6D:
25959 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
25960 /* VPUNPCKHQDQ = VEX.NDS.128.0F.WIG 6D /r */
25961 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25962 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25963 uses_vvvv, vbi, pfx, delta, "vpunpckhqdq",
25964 Iop_InterleaveHI64x2, NULL,
25965 False/*!invertLeftArg*/, True/*swapArgs*/ );
25966 goto decode_success;
25968 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
25969 /* VPUNPCKHQDQ = VEX.NDS.256.0F.WIG 6D /r */
25970 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25971 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25972 uses_vvvv, vbi, pfx, delta, "vpunpckhqdq",
25973 math_VPUNPCKHQDQ_YMM );
25974 goto decode_success;
25976 break;
25978 case 0x6E:
25979 /* VMOVD r32/m32, xmm1 = VEX.128.66.0F.W0 6E */
25980 if (have66noF2noF3(pfx)
25981 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
25982 vassert(sz == 2); /* even tho we are transferring 4, not 2. */
25983 UChar modrm = getUChar(delta);
25984 if (epartIsReg(modrm)) {
25985 delta += 1;
25986 putYMMRegLoAndZU(
25987 gregOfRexRM(pfx,modrm),
25988 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) )
25990 DIP("vmovd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
25991 nameXMMReg(gregOfRexRM(pfx,modrm)));
25992 } else {
25993 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25994 delta += alen;
25995 putYMMRegLoAndZU(
25996 gregOfRexRM(pfx,modrm),
25997 unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)))
25999 DIP("vmovd %s, %s\n", dis_buf,
26000 nameXMMReg(gregOfRexRM(pfx,modrm)));
26002 goto decode_success;
26004 /* VMOVQ r64/m64, xmm1 = VEX.128.66.0F.W1 6E */
26005 if (have66noF2noF3(pfx)
26006 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
26007 vassert(sz == 2); /* even tho we are transferring 8, not 2. */
26008 UChar modrm = getUChar(delta);
26009 if (epartIsReg(modrm)) {
26010 delta += 1;
26011 putYMMRegLoAndZU(
26012 gregOfRexRM(pfx,modrm),
26013 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) )
26015 DIP("vmovq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
26016 nameXMMReg(gregOfRexRM(pfx,modrm)));
26017 } else {
26018 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
26019 delta += alen;
26020 putYMMRegLoAndZU(
26021 gregOfRexRM(pfx,modrm),
26022 unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)))
26024 DIP("vmovq %s, %s\n", dis_buf,
26025 nameXMMReg(gregOfRexRM(pfx,modrm)));
26027 goto decode_success;
26029 break;
26031 case 0x6F:
26032 /* VMOVDQA ymm2/m256, ymm1 = VEX.256.66.0F.WIG 6F */
26033 /* VMOVDQU ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 6F */
26034 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
26035 && 1==getVexL(pfx)/*256*/) {
26036 UChar modrm = getUChar(delta);
26037 UInt rD = gregOfRexRM(pfx, modrm);
26038 IRTemp tD = newTemp(Ity_V256);
26039 Bool isA = have66noF2noF3(pfx);
26040 HChar ch = isA ? 'a' : 'u';
26041 if (epartIsReg(modrm)) {
26042 UInt rS = eregOfRexRM(pfx, modrm);
26043 delta += 1;
26044 assign(tD, getYMMReg(rS));
26045 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD));
26046 } else {
26047 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
26048 delta += alen;
26049 if (isA)
26050 gen_SIGNAL_if_not_32_aligned(vbi, addr);
26051 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
26052 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameYMMReg(rD));
26054 putYMMReg(rD, mkexpr(tD));
26055 goto decode_success;
26057 /* VMOVDQA xmm2/m128, xmm1 = VEX.128.66.0F.WIG 6F */
26058 /* VMOVDQU xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 6F */
26059 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
26060 && 0==getVexL(pfx)/*128*/) {
26061 UChar modrm = getUChar(delta);
26062 UInt rD = gregOfRexRM(pfx, modrm);
26063 IRTemp tD = newTemp(Ity_V128);
26064 Bool isA = have66noF2noF3(pfx);
26065 HChar ch = isA ? 'a' : 'u';
26066 if (epartIsReg(modrm)) {
26067 UInt rS = eregOfRexRM(pfx, modrm);
26068 delta += 1;
26069 assign(tD, getXMMReg(rS));
26070 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD));
26071 } else {
26072 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
26073 delta += alen;
26074 if (isA)
26075 gen_SIGNAL_if_not_16_aligned(vbi, addr);
26076 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
26077 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameXMMReg(rD));
26079 putYMMRegLoAndZU(rD, mkexpr(tD));
26080 goto decode_success;
26082 break;
26084 case 0x70:
26085 /* VPSHUFD imm8, xmm2/m128, xmm1 = VEX.128.66.0F.WIG 70 /r ib */
26086 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26087 delta = dis_PSHUFD_32x4( vbi, pfx, delta, True/*writesYmm*/);
26088 goto decode_success;
26090 /* VPSHUFD imm8, ymm2/m256, ymm1 = VEX.256.66.0F.WIG 70 /r ib */
26091 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26092 delta = dis_PSHUFD_32x8( vbi, pfx, delta);
26093 goto decode_success;
26095 /* VPSHUFLW imm8, xmm2/m128, xmm1 = VEX.128.F2.0F.WIG 70 /r ib */
26096 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26097 delta = dis_PSHUFxW_128( vbi, pfx, delta,
26098 True/*isAvx*/, False/*!xIsH*/ );
26099 goto decode_success;
26101 /* VPSHUFLW imm8, ymm2/m256, ymm1 = VEX.256.F2.0F.WIG 70 /r ib */
26102 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26103 delta = dis_PSHUFxW_256( vbi, pfx, delta, False/*!xIsH*/ );
26104 goto decode_success;
26106 /* VPSHUFHW imm8, xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 70 /r ib */
26107 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
26108 delta = dis_PSHUFxW_128( vbi, pfx, delta,
26109 True/*isAvx*/, True/*xIsH*/ );
26110 goto decode_success;
26112 /* VPSHUFHW imm8, ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 70 /r ib */
26113 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
26114 delta = dis_PSHUFxW_256( vbi, pfx, delta, True/*xIsH*/ );
26115 goto decode_success;
26117 break;
26119 case 0x71:
26120 /* VPSRLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /2 ib */
26121 /* VPSRAW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /4 ib */
26122 /* VPSLLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /6 ib */
26123 if (have66noF2noF3(pfx)
26124 && 0==getVexL(pfx)/*128*/
26125 && epartIsReg(getUChar(delta))) {
26126 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
26127 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26128 "vpsrlw", Iop_ShrN16x8 );
26129 *uses_vvvv = True;
26130 goto decode_success;
26132 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
26133 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26134 "vpsraw", Iop_SarN16x8 );
26135 *uses_vvvv = True;
26136 goto decode_success;
26138 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
26139 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26140 "vpsllw", Iop_ShlN16x8 );
26141 *uses_vvvv = True;
26142 goto decode_success;
26144 /* else fall through */
26146 /* VPSRLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /2 ib */
26147 /* VPSRAW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /4 ib */
26148 /* VPSLLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /6 ib */
26149 if (have66noF2noF3(pfx)
26150 && 1==getVexL(pfx)/*256*/
26151 && epartIsReg(getUChar(delta))) {
26152 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
26153 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26154 "vpsrlw", Iop_ShrN16x16 );
26155 *uses_vvvv = True;
26156 goto decode_success;
26158 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
26159 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26160 "vpsraw", Iop_SarN16x16 );
26161 *uses_vvvv = True;
26162 goto decode_success;
26164 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
26165 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26166 "vpsllw", Iop_ShlN16x16 );
26167 *uses_vvvv = True;
26168 goto decode_success;
26170 /* else fall through */
26172 break;
26174 case 0x72:
26175 /* VPSRLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /2 ib */
26176 /* VPSRAD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /4 ib */
26177 /* VPSLLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /6 ib */
26178 if (have66noF2noF3(pfx)
26179 && 0==getVexL(pfx)/*128*/
26180 && epartIsReg(getUChar(delta))) {
26181 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
26182 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26183 "vpsrld", Iop_ShrN32x4 );
26184 *uses_vvvv = True;
26185 goto decode_success;
26187 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
26188 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26189 "vpsrad", Iop_SarN32x4 );
26190 *uses_vvvv = True;
26191 goto decode_success;
26193 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
26194 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26195 "vpslld", Iop_ShlN32x4 );
26196 *uses_vvvv = True;
26197 goto decode_success;
26199 /* else fall through */
26201 /* VPSRLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /2 ib */
26202 /* VPSRAD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /4 ib */
26203 /* VPSLLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /6 ib */
26204 if (have66noF2noF3(pfx)
26205 && 1==getVexL(pfx)/*256*/
26206 && epartIsReg(getUChar(delta))) {
26207 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
26208 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26209 "vpsrld", Iop_ShrN32x8 );
26210 *uses_vvvv = True;
26211 goto decode_success;
26213 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
26214 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26215 "vpsrad", Iop_SarN32x8 );
26216 *uses_vvvv = True;
26217 goto decode_success;
26219 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
26220 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26221 "vpslld", Iop_ShlN32x8 );
26222 *uses_vvvv = True;
26223 goto decode_success;
26225 /* else fall through */
26227 break;
26229 case 0x73:
26230 /* VPSRLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /3 ib */
26231 /* VPSLLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /7 ib */
26232 /* VPSRLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /2 ib */
26233 /* VPSLLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /6 ib */
26234 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
26235 && epartIsReg(getUChar(delta))) {
26236 Int rS = eregOfRexRM(pfx,getUChar(delta));
26237 Int rD = getVexNvvvv(pfx);
26238 IRTemp vecS = newTemp(Ity_V128);
26239 if (gregLO3ofRM(getUChar(delta)) == 3) {
26240 Int imm = (Int)getUChar(delta+1);
26241 DIP("vpsrldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD));
26242 delta += 2;
26243 assign( vecS, getXMMReg(rS) );
26244 putYMMRegLoAndZU(rD, mkexpr(math_PSRLDQ( vecS, imm )));
26245 *uses_vvvv = True;
26246 goto decode_success;
26248 if (gregLO3ofRM(getUChar(delta)) == 7) {
26249 Int imm = (Int)getUChar(delta+1);
26250 DIP("vpslldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD));
26251 delta += 2;
26252 assign( vecS, getXMMReg(rS) );
26253 putYMMRegLoAndZU(rD, mkexpr(math_PSLLDQ( vecS, imm )));
26254 *uses_vvvv = True;
26255 goto decode_success;
26257 if (gregLO3ofRM(getUChar(delta)) == 2) {
26258 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26259 "vpsrlq", Iop_ShrN64x2 );
26260 *uses_vvvv = True;
26261 goto decode_success;
26263 if (gregLO3ofRM(getUChar(delta)) == 6) {
26264 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26265 "vpsllq", Iop_ShlN64x2 );
26266 *uses_vvvv = True;
26267 goto decode_success;
26269 /* else fall through */
26271 /* VPSRLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /3 ib */
26272 /* VPSLLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /7 ib */
26273 /* VPSRLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /2 ib */
26274 /* VPSLLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /6 ib */
26275 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
26276 && epartIsReg(getUChar(delta))) {
26277 Int rS = eregOfRexRM(pfx,getUChar(delta));
26278 Int rD = getVexNvvvv(pfx);
26279 if (gregLO3ofRM(getUChar(delta)) == 3) {
26280 IRTemp vecS0 = newTemp(Ity_V128);
26281 IRTemp vecS1 = newTemp(Ity_V128);
26282 Int imm = (Int)getUChar(delta+1);
26283 DIP("vpsrldq $%d,%s,%s\n", imm, nameYMMReg(rS), nameYMMReg(rD));
26284 delta += 2;
26285 assign( vecS0, getYMMRegLane128(rS, 0));
26286 assign( vecS1, getYMMRegLane128(rS, 1));
26287 putYMMRegLane128(rD, 0, mkexpr(math_PSRLDQ( vecS0, imm )));
26288 putYMMRegLane128(rD, 1, mkexpr(math_PSRLDQ( vecS1, imm )));
26289 *uses_vvvv = True;
26290 goto decode_success;
26292 if (gregLO3ofRM(getUChar(delta)) == 7) {
26293 IRTemp vecS0 = newTemp(Ity_V128);
26294 IRTemp vecS1 = newTemp(Ity_V128);
26295 Int imm = (Int)getUChar(delta+1);
26296 DIP("vpslldq $%d,%s,%s\n", imm, nameYMMReg(rS), nameYMMReg(rD));
26297 delta += 2;
26298 assign( vecS0, getYMMRegLane128(rS, 0));
26299 assign( vecS1, getYMMRegLane128(rS, 1));
26300 putYMMRegLane128(rD, 0, mkexpr(math_PSLLDQ( vecS0, imm )));
26301 putYMMRegLane128(rD, 1, mkexpr(math_PSLLDQ( vecS1, imm )));
26302 *uses_vvvv = True;
26303 goto decode_success;
26305 if (gregLO3ofRM(getUChar(delta)) == 2) {
26306 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26307 "vpsrlq", Iop_ShrN64x4 );
26308 *uses_vvvv = True;
26309 goto decode_success;
26311 if (gregLO3ofRM(getUChar(delta)) == 6) {
26312 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26313 "vpsllq", Iop_ShlN64x4 );
26314 *uses_vvvv = True;
26315 goto decode_success;
26317 /* else fall through */
26319 break;
26321 case 0x74:
26322 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
26323 /* VPCMPEQB = VEX.NDS.128.66.0F.WIG 74 /r */
26324 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26325 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26326 uses_vvvv, vbi, pfx, delta, "vpcmpeqb", Iop_CmpEQ8x16 );
26327 goto decode_success;
26329 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
26330 /* VPCMPEQB = VEX.NDS.256.66.0F.WIG 74 /r */
26331 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26332 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26333 uses_vvvv, vbi, pfx, delta, "vpcmpeqb", Iop_CmpEQ8x32 );
26334 goto decode_success;
26336 break;
26338 case 0x75:
26339 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
26340 /* VPCMPEQW = VEX.NDS.128.66.0F.WIG 75 /r */
26341 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26342 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26343 uses_vvvv, vbi, pfx, delta, "vpcmpeqw", Iop_CmpEQ16x8 );
26344 goto decode_success;
26346 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
26347 /* VPCMPEQW = VEX.NDS.256.66.0F.WIG 75 /r */
26348 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26349 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26350 uses_vvvv, vbi, pfx, delta, "vpcmpeqw", Iop_CmpEQ16x16 );
26351 goto decode_success;
26353 break;
26355 case 0x76:
26356 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
26357 /* VPCMPEQD = VEX.NDS.128.66.0F.WIG 76 /r */
26358 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26359 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26360 uses_vvvv, vbi, pfx, delta, "vpcmpeqd", Iop_CmpEQ32x4 );
26361 goto decode_success;
26363 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
26364 /* VPCMPEQD = VEX.NDS.256.66.0F.WIG 76 /r */
26365 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26366 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26367 uses_vvvv, vbi, pfx, delta, "vpcmpeqd", Iop_CmpEQ32x8 );
26368 goto decode_success;
26370 break;
26372 case 0x77:
26373 /* VZEROUPPER = VEX.128.0F.WIG 77 */
26374 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26375 Int i;
26376 IRTemp zero128 = newTemp(Ity_V128);
26377 assign(zero128, mkV128(0));
26378 for (i = 0; i < 16; i++) {
26379 putYMMRegLane128(i, 1, mkexpr(zero128));
26381 DIP("vzeroupper\n");
26382 goto decode_success;
26384 /* VZEROALL = VEX.256.0F.WIG 77 */
26385 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26386 Int i;
26387 IRTemp zero128 = newTemp(Ity_V128);
26388 assign(zero128, mkV128(0));
26389 for (i = 0; i < 16; i++) {
26390 putYMMRegLoAndZU(i, mkexpr(zero128));
26392 DIP("vzeroall\n");
26393 goto decode_success;
26395 break;
26397 case 0x7C:
26398 case 0x7D:
26399 /* VHADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7C /r */
26400 /* VHSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7D /r */
26401 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26402 IRTemp sV = newTemp(Ity_V128);
26403 IRTemp dV = newTemp(Ity_V128);
26404 Bool isAdd = opc == 0x7C;
26405 const HChar* str = isAdd ? "add" : "sub";
26406 UChar modrm = getUChar(delta);
26407 UInt rG = gregOfRexRM(pfx,modrm);
26408 UInt rV = getVexNvvvv(pfx);
26409 if (epartIsReg(modrm)) {
26410 UInt rE = eregOfRexRM(pfx,modrm);
26411 assign( sV, getXMMReg(rE) );
26412 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE),
26413 nameXMMReg(rV), nameXMMReg(rG));
26414 delta += 1;
26415 } else {
26416 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26417 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
26418 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
26419 nameXMMReg(rV), nameXMMReg(rG));
26420 delta += alen;
26422 assign( dV, getXMMReg(rV) );
26423 putYMMRegLoAndZU( rG, mkexpr( math_HADDPS_128 ( dV, sV, isAdd ) ) );
26424 *uses_vvvv = True;
26425 goto decode_success;
26427 /* VHADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7C /r */
26428 /* VHSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7D /r */
26429 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26430 IRTemp sV = newTemp(Ity_V256);
26431 IRTemp dV = newTemp(Ity_V256);
26432 IRTemp s1, s0, d1, d0;
26433 Bool isAdd = opc == 0x7C;
26434 const HChar* str = isAdd ? "add" : "sub";
26435 UChar modrm = getUChar(delta);
26436 UInt rG = gregOfRexRM(pfx,modrm);
26437 UInt rV = getVexNvvvv(pfx);
26438 s1 = s0 = d1 = d0 = IRTemp_INVALID;
26439 if (epartIsReg(modrm)) {
26440 UInt rE = eregOfRexRM(pfx,modrm);
26441 assign( sV, getYMMReg(rE) );
26442 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE),
26443 nameYMMReg(rV), nameYMMReg(rG));
26444 delta += 1;
26445 } else {
26446 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26447 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
26448 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
26449 nameYMMReg(rV), nameYMMReg(rG));
26450 delta += alen;
26452 assign( dV, getYMMReg(rV) );
26453 breakupV256toV128s( dV, &d1, &d0 );
26454 breakupV256toV128s( sV, &s1, &s0 );
26455 putYMMReg( rG, binop(Iop_V128HLtoV256,
26456 mkexpr( math_HADDPS_128 ( d1, s1, isAdd ) ),
26457 mkexpr( math_HADDPS_128 ( d0, s0, isAdd ) ) ) );
26458 *uses_vvvv = True;
26459 goto decode_success;
26461 /* VHADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7C /r */
26462 /* VHSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7D /r */
26463 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26464 IRTemp sV = newTemp(Ity_V128);
26465 IRTemp dV = newTemp(Ity_V128);
26466 Bool isAdd = opc == 0x7C;
26467 const HChar* str = isAdd ? "add" : "sub";
26468 UChar modrm = getUChar(delta);
26469 UInt rG = gregOfRexRM(pfx,modrm);
26470 UInt rV = getVexNvvvv(pfx);
26471 if (epartIsReg(modrm)) {
26472 UInt rE = eregOfRexRM(pfx,modrm);
26473 assign( sV, getXMMReg(rE) );
26474 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE),
26475 nameXMMReg(rV), nameXMMReg(rG));
26476 delta += 1;
26477 } else {
26478 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26479 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
26480 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
26481 nameXMMReg(rV), nameXMMReg(rG));
26482 delta += alen;
26484 assign( dV, getXMMReg(rV) );
26485 putYMMRegLoAndZU( rG, mkexpr( math_HADDPD_128 ( dV, sV, isAdd ) ) );
26486 *uses_vvvv = True;
26487 goto decode_success;
26489 /* VHADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7C /r */
26490 /* VHSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7D /r */
26491 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26492 IRTemp sV = newTemp(Ity_V256);
26493 IRTemp dV = newTemp(Ity_V256);
26494 IRTemp s1, s0, d1, d0;
26495 Bool isAdd = opc == 0x7C;
26496 const HChar* str = isAdd ? "add" : "sub";
26497 UChar modrm = getUChar(delta);
26498 UInt rG = gregOfRexRM(pfx,modrm);
26499 UInt rV = getVexNvvvv(pfx);
26500 s1 = s0 = d1 = d0 = IRTemp_INVALID;
26501 if (epartIsReg(modrm)) {
26502 UInt rE = eregOfRexRM(pfx,modrm);
26503 assign( sV, getYMMReg(rE) );
26504 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE),
26505 nameYMMReg(rV), nameYMMReg(rG));
26506 delta += 1;
26507 } else {
26508 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26509 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
26510 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
26511 nameYMMReg(rV), nameYMMReg(rG));
26512 delta += alen;
26514 assign( dV, getYMMReg(rV) );
26515 breakupV256toV128s( dV, &d1, &d0 );
26516 breakupV256toV128s( sV, &s1, &s0 );
26517 putYMMReg( rG, binop(Iop_V128HLtoV256,
26518 mkexpr( math_HADDPD_128 ( d1, s1, isAdd ) ),
26519 mkexpr( math_HADDPD_128 ( d0, s0, isAdd ) ) ) );
26520 *uses_vvvv = True;
26521 goto decode_success;
26523 break;
26525 case 0x7E:
26526 /* Note the Intel docs don't make sense for this. I think they
26527 are wrong. They seem to imply it is a store when in fact I
26528 think it is a load. Also it's unclear whether this is W0, W1
26529 or WIG. */
26530 /* VMOVQ xmm2/m64, xmm1 = VEX.128.F3.0F.W0 7E /r */
26531 if (haveF3no66noF2(pfx)
26532 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
26533 vassert(sz == 4); /* even tho we are transferring 8, not 4. */
26534 UChar modrm = getUChar(delta);
26535 UInt rG = gregOfRexRM(pfx,modrm);
26536 if (epartIsReg(modrm)) {
26537 UInt rE = eregOfRexRM(pfx,modrm);
26538 putXMMRegLane64( rG, 0, getXMMRegLane64( rE, 0 ));
26539 DIP("vmovq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
26540 delta += 1;
26541 } else {
26542 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26543 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) );
26544 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG));
26545 delta += alen;
26547 /* zero bits 255:64 */
26548 putXMMRegLane64( rG, 1, mkU64(0) );
26549 putYMMRegLane128( rG, 1, mkV128(0) );
26550 goto decode_success;
26552 /* VMOVQ xmm1, r64 = VEX.128.66.0F.W1 7E /r (reg case only) */
26553 /* Moves from G to E, so is a store-form insn */
26554 /* Intel docs list this in the VMOVD entry for some reason. */
26555 if (have66noF2noF3(pfx)
26556 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
26557 UChar modrm = getUChar(delta);
26558 UInt rG = gregOfRexRM(pfx,modrm);
26559 if (epartIsReg(modrm)) {
26560 UInt rE = eregOfRexRM(pfx,modrm);
26561 DIP("vmovq %s,%s\n", nameXMMReg(rG), nameIReg64(rE));
26562 putIReg64(rE, getXMMRegLane64(rG, 0));
26563 delta += 1;
26564 } else {
26565 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26566 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0) );
26567 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG));
26568 delta += alen;
26570 goto decode_success;
26572 /* VMOVD xmm1, m32/r32 = VEX.128.66.0F.W0 7E /r (reg case only) */
26573 /* Moves from G to E, so is a store-form insn */
26574 if (have66noF2noF3(pfx)
26575 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
26576 UChar modrm = getUChar(delta);
26577 UInt rG = gregOfRexRM(pfx,modrm);
26578 if (epartIsReg(modrm)) {
26579 UInt rE = eregOfRexRM(pfx,modrm);
26580 DIP("vmovd %s,%s\n", nameXMMReg(rG), nameIReg32(rE));
26581 putIReg32(rE, getXMMRegLane32(rG, 0));
26582 delta += 1;
26583 } else {
26584 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26585 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0) );
26586 DIP("vmovd %s,%s\n", dis_buf, nameXMMReg(rG));
26587 delta += alen;
26589 goto decode_success;
26591 break;
26593 case 0x7F:
26594 /* VMOVDQA ymm1, ymm2/m256 = VEX.256.66.0F.WIG 7F */
26595 /* VMOVDQU ymm1, ymm2/m256 = VEX.256.F3.0F.WIG 7F */
26596 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
26597 && 1==getVexL(pfx)/*256*/) {
26598 UChar modrm = getUChar(delta);
26599 UInt rS = gregOfRexRM(pfx, modrm);
26600 IRTemp tS = newTemp(Ity_V256);
26601 Bool isA = have66noF2noF3(pfx);
26602 HChar ch = isA ? 'a' : 'u';
26603 assign(tS, getYMMReg(rS));
26604 if (epartIsReg(modrm)) {
26605 UInt rD = eregOfRexRM(pfx, modrm);
26606 delta += 1;
26607 putYMMReg(rD, mkexpr(tS));
26608 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD));
26609 } else {
26610 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
26611 delta += alen;
26612 if (isA)
26613 gen_SIGNAL_if_not_32_aligned(vbi, addr);
26614 storeLE(mkexpr(addr), mkexpr(tS));
26615 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), dis_buf);
26617 goto decode_success;
26619 /* VMOVDQA xmm1, xmm2/m128 = VEX.128.66.0F.WIG 7F */
26620 /* VMOVDQU xmm1, xmm2/m128 = VEX.128.F3.0F.WIG 7F */
26621 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
26622 && 0==getVexL(pfx)/*128*/) {
26623 UChar modrm = getUChar(delta);
26624 UInt rS = gregOfRexRM(pfx, modrm);
26625 IRTemp tS = newTemp(Ity_V128);
26626 Bool isA = have66noF2noF3(pfx);
26627 HChar ch = isA ? 'a' : 'u';
26628 assign(tS, getXMMReg(rS));
26629 if (epartIsReg(modrm)) {
26630 UInt rD = eregOfRexRM(pfx, modrm);
26631 delta += 1;
26632 putYMMRegLoAndZU(rD, mkexpr(tS));
26633 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD));
26634 } else {
26635 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
26636 delta += alen;
26637 if (isA)
26638 gen_SIGNAL_if_not_16_aligned(vbi, addr);
26639 storeLE(mkexpr(addr), mkexpr(tS));
26640 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), dis_buf);
26642 goto decode_success;
26644 break;
26646 case 0xAE:
26647 /* VSTMXCSR m32 = VEX.LZ.0F.WIG AE /3 */
26648 if (haveNo66noF2noF3(pfx)
26649 && 0==getVexL(pfx)/*LZ*/
26650 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */
26651 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3
26652 && sz == 4) {
26653 delta = dis_STMXCSR(vbi, pfx, delta, True/*isAvx*/);
26654 goto decode_success;
26656 /* VLDMXCSR m32 = VEX.LZ.0F.WIG AE /2 */
26657 if (haveNo66noF2noF3(pfx)
26658 && 0==getVexL(pfx)/*LZ*/
26659 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */
26660 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2
26661 && sz == 4) {
26662 delta = dis_LDMXCSR(vbi, pfx, delta, True/*isAvx*/);
26663 goto decode_success;
26665 break;
26667 case 0xC2:
26668 /* VCMPSD xmm3/m64(E=argL), xmm2(V=argR), xmm1(G) */
26669 /* = VEX.NDS.LIG.F2.0F.WIG C2 /r ib */
26670 if (haveF2no66noF3(pfx)) {
26671 Long delta0 = delta;
26672 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26673 "vcmpsd", False/*!all_lanes*/,
26674 8/*sz*/);
26675 if (delta > delta0) goto decode_success;
26676 /* else fall through -- decoding has failed */
26678 /* VCMPSS xmm3/m32(E=argL), xmm2(V=argR), xmm1(G) */
26679 /* = VEX.NDS.LIG.F3.0F.WIG C2 /r ib */
26680 if (haveF3no66noF2(pfx)) {
26681 Long delta0 = delta;
26682 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26683 "vcmpss", False/*!all_lanes*/,
26684 4/*sz*/);
26685 if (delta > delta0) goto decode_success;
26686 /* else fall through -- decoding has failed */
26688 /* VCMPPD xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
26689 /* = VEX.NDS.128.66.0F.WIG C2 /r ib */
26690 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26691 Long delta0 = delta;
26692 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26693 "vcmppd", True/*all_lanes*/,
26694 8/*sz*/);
26695 if (delta > delta0) goto decode_success;
26696 /* else fall through -- decoding has failed */
26698 /* VCMPPD ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
26699 /* = VEX.NDS.256.66.0F.WIG C2 /r ib */
26700 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26701 Long delta0 = delta;
26702 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26703 "vcmppd", 8/*sz*/);
26704 if (delta > delta0) goto decode_success;
26705 /* else fall through -- decoding has failed */
26707 /* VCMPPS xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
26708 /* = VEX.NDS.128.0F.WIG C2 /r ib */
26709 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26710 Long delta0 = delta;
26711 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26712 "vcmpps", True/*all_lanes*/,
26713 4/*sz*/);
26714 if (delta > delta0) goto decode_success;
26715 /* else fall through -- decoding has failed */
26717 /* VCMPPS ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
26718 /* = VEX.NDS.256.0F.WIG C2 /r ib */
26719 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26720 Long delta0 = delta;
26721 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26722 "vcmpps", 4/*sz*/);
26723 if (delta > delta0) goto decode_success;
26724 /* else fall through -- decoding has failed */
26726 break;
26728 case 0xC4:
26729 /* VPINSRW r32/m16, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG C4 /r ib */
26730 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26731 UChar modrm = getUChar(delta);
26732 UInt rG = gregOfRexRM(pfx, modrm);
26733 UInt rV = getVexNvvvv(pfx);
26734 Int imm8;
26735 IRTemp new16 = newTemp(Ity_I16);
26737 if ( epartIsReg( modrm ) ) {
26738 imm8 = (Int)(getUChar(delta+1) & 7);
26739 assign( new16, unop(Iop_32to16,
26740 getIReg32(eregOfRexRM(pfx,modrm))) );
26741 delta += 1+1;
26742 DIP( "vpinsrw $%d,%s,%s\n", imm8,
26743 nameIReg32( eregOfRexRM(pfx, modrm) ), nameXMMReg(rG) );
26744 } else {
26745 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
26746 imm8 = (Int)(getUChar(delta+alen) & 7);
26747 assign( new16, loadLE( Ity_I16, mkexpr(addr) ));
26748 delta += alen+1;
26749 DIP( "vpinsrw $%d,%s,%s\n",
26750 imm8, dis_buf, nameXMMReg(rG) );
26753 IRTemp src_vec = newTemp(Ity_V128);
26754 assign(src_vec, getXMMReg( rV ));
26755 IRTemp res_vec = math_PINSRW_128( src_vec, new16, imm8 );
26756 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
26757 *uses_vvvv = True;
26758 goto decode_success;
26760 break;
26762 case 0xC5:
26763 /* VPEXTRW imm8, xmm1, reg32 = VEX.128.66.0F.W0 C5 /r ib */
26764 if (have66noF2noF3(pfx)
26765 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
26766 Long delta0 = delta;
26767 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta,
26768 True/*isAvx*/ );
26769 if (delta > delta0) goto decode_success;
26770 /* else fall through -- decoding has failed */
26772 break;
26774 case 0xC6:
26775 /* VSHUFPS imm8, xmm3/m128, xmm2, xmm1, xmm2 */
26776 /* = VEX.NDS.128.0F.WIG C6 /r ib */
26777 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26778 Int imm8 = 0;
26779 IRTemp eV = newTemp(Ity_V128);
26780 IRTemp vV = newTemp(Ity_V128);
26781 UInt modrm = getUChar(delta);
26782 UInt rG = gregOfRexRM(pfx,modrm);
26783 UInt rV = getVexNvvvv(pfx);
26784 assign( vV, getXMMReg(rV) );
26785 if (epartIsReg(modrm)) {
26786 UInt rE = eregOfRexRM(pfx,modrm);
26787 assign( eV, getXMMReg(rE) );
26788 imm8 = (Int)getUChar(delta+1);
26789 delta += 1+1;
26790 DIP("vshufps $%d,%s,%s,%s\n",
26791 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
26792 } else {
26793 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26794 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
26795 imm8 = (Int)getUChar(delta+alen);
26796 delta += 1+alen;
26797 DIP("vshufps $%d,%s,%s,%s\n",
26798 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
26800 IRTemp res = math_SHUFPS_128( eV, vV, imm8 );
26801 putYMMRegLoAndZU( rG, mkexpr(res) );
26802 *uses_vvvv = True;
26803 goto decode_success;
26805 /* VSHUFPS imm8, ymm3/m256, ymm2, ymm1, ymm2 */
26806 /* = VEX.NDS.256.0F.WIG C6 /r ib */
26807 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26808 Int imm8 = 0;
26809 IRTemp eV = newTemp(Ity_V256);
26810 IRTemp vV = newTemp(Ity_V256);
26811 UInt modrm = getUChar(delta);
26812 UInt rG = gregOfRexRM(pfx,modrm);
26813 UInt rV = getVexNvvvv(pfx);
26814 assign( vV, getYMMReg(rV) );
26815 if (epartIsReg(modrm)) {
26816 UInt rE = eregOfRexRM(pfx,modrm);
26817 assign( eV, getYMMReg(rE) );
26818 imm8 = (Int)getUChar(delta+1);
26819 delta += 1+1;
26820 DIP("vshufps $%d,%s,%s,%s\n",
26821 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
26822 } else {
26823 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26824 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
26825 imm8 = (Int)getUChar(delta+alen);
26826 delta += 1+alen;
26827 DIP("vshufps $%d,%s,%s,%s\n",
26828 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
26830 IRTemp res = math_SHUFPS_256( eV, vV, imm8 );
26831 putYMMReg( rG, mkexpr(res) );
26832 *uses_vvvv = True;
26833 goto decode_success;
26835 /* VSHUFPD imm8, xmm3/m128, xmm2, xmm1, xmm2 */
26836 /* = VEX.NDS.128.66.0F.WIG C6 /r ib */
26837 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26838 Int imm8 = 0;
26839 IRTemp eV = newTemp(Ity_V128);
26840 IRTemp vV = newTemp(Ity_V128);
26841 UInt modrm = getUChar(delta);
26842 UInt rG = gregOfRexRM(pfx,modrm);
26843 UInt rV = getVexNvvvv(pfx);
26844 assign( vV, getXMMReg(rV) );
26845 if (epartIsReg(modrm)) {
26846 UInt rE = eregOfRexRM(pfx,modrm);
26847 assign( eV, getXMMReg(rE) );
26848 imm8 = (Int)getUChar(delta+1);
26849 delta += 1+1;
26850 DIP("vshufpd $%d,%s,%s,%s\n",
26851 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
26852 } else {
26853 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26854 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
26855 imm8 = (Int)getUChar(delta+alen);
26856 delta += 1+alen;
26857 DIP("vshufpd $%d,%s,%s,%s\n",
26858 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
26860 IRTemp res = math_SHUFPD_128( eV, vV, imm8 );
26861 putYMMRegLoAndZU( rG, mkexpr(res) );
26862 *uses_vvvv = True;
26863 goto decode_success;
26865 /* VSHUFPD imm8, ymm3/m256, ymm2, ymm1, ymm2 */
26866 /* = VEX.NDS.256.66.0F.WIG C6 /r ib */
26867 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26868 Int imm8 = 0;
26869 IRTemp eV = newTemp(Ity_V256);
26870 IRTemp vV = newTemp(Ity_V256);
26871 UInt modrm = getUChar(delta);
26872 UInt rG = gregOfRexRM(pfx,modrm);
26873 UInt rV = getVexNvvvv(pfx);
26874 assign( vV, getYMMReg(rV) );
26875 if (epartIsReg(modrm)) {
26876 UInt rE = eregOfRexRM(pfx,modrm);
26877 assign( eV, getYMMReg(rE) );
26878 imm8 = (Int)getUChar(delta+1);
26879 delta += 1+1;
26880 DIP("vshufpd $%d,%s,%s,%s\n",
26881 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
26882 } else {
26883 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26884 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
26885 imm8 = (Int)getUChar(delta+alen);
26886 delta += 1+alen;
26887 DIP("vshufpd $%d,%s,%s,%s\n",
26888 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
26890 IRTemp res = math_SHUFPD_256( eV, vV, imm8 );
26891 putYMMReg( rG, mkexpr(res) );
26892 *uses_vvvv = True;
26893 goto decode_success;
26895 break;
26897 case 0xD0:
26898 /* VADDSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D0 /r */
26899 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26900 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26901 uses_vvvv, vbi, pfx, delta,
26902 "vaddsubpd", math_ADDSUBPD_128 );
26903 goto decode_success;
26905 /* VADDSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D0 /r */
26906 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26907 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26908 uses_vvvv, vbi, pfx, delta,
26909 "vaddsubpd", math_ADDSUBPD_256 );
26910 goto decode_success;
26912 /* VADDSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG D0 /r */
26913 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26914 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26915 uses_vvvv, vbi, pfx, delta,
26916 "vaddsubps", math_ADDSUBPS_128 );
26917 goto decode_success;
26919 /* VADDSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG D0 /r */
26920 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26921 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26922 uses_vvvv, vbi, pfx, delta,
26923 "vaddsubps", math_ADDSUBPS_256 );
26924 goto decode_success;
26926 break;
26928 case 0xD1:
26929 /* VPSRLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D1 /r */
26930 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26931 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26932 "vpsrlw", Iop_ShrN16x8 );
26933 *uses_vvvv = True;
26934 goto decode_success;
26937 /* VPSRLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D1 /r */
26938 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26939 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26940 "vpsrlw", Iop_ShrN16x16 );
26941 *uses_vvvv = True;
26942 goto decode_success;
26945 break;
26947 case 0xD2:
26948 /* VPSRLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D2 /r */
26949 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26950 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26951 "vpsrld", Iop_ShrN32x4 );
26952 *uses_vvvv = True;
26953 goto decode_success;
26955 /* VPSRLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D2 /r */
26956 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26957 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26958 "vpsrld", Iop_ShrN32x8 );
26959 *uses_vvvv = True;
26960 goto decode_success;
26962 break;
26964 case 0xD3:
26965 /* VPSRLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D3 /r */
26966 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26967 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26968 "vpsrlq", Iop_ShrN64x2 );
26969 *uses_vvvv = True;
26970 goto decode_success;
26972 /* VPSRLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D3 /r */
26973 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26974 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26975 "vpsrlq", Iop_ShrN64x4 );
26976 *uses_vvvv = True;
26977 goto decode_success;
26979 break;
26981 case 0xD4:
26982 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
26983 /* VPADDQ = VEX.NDS.128.66.0F.WIG D4 /r */
26984 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26985 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26986 uses_vvvv, vbi, pfx, delta, "vpaddq", Iop_Add64x2 );
26987 goto decode_success;
26989 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
26990 /* VPADDQ = VEX.NDS.256.66.0F.WIG D4 /r */
26991 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26992 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26993 uses_vvvv, vbi, pfx, delta, "vpaddq", Iop_Add64x4 );
26994 goto decode_success;
26996 break;
26998 case 0xD5:
26999 /* VPMULLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D5 /r */
27000 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27001 delta = dis_AVX128_E_V_to_G(
27002 uses_vvvv, vbi, pfx, delta, "vpmullw", Iop_Mul16x8 );
27003 goto decode_success;
27005 /* VPMULLW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D5 /r */
27006 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27007 delta = dis_AVX256_E_V_to_G(
27008 uses_vvvv, vbi, pfx, delta, "vpmullw", Iop_Mul16x16 );
27009 goto decode_success;
27011 break;
27013 case 0xD6:
27014 /* I can't even find any Intel docs for this one. */
27015 /* Basically: 66 0F D6 = MOVQ -- move 64 bits from G (lo half
27016 xmm) to E (mem or lo half xmm). Looks like L==0(128), W==0
27017 (WIG, maybe?) */
27018 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
27019 && 0==getRexW(pfx)/*this might be redundant, dunno*/) {
27020 UChar modrm = getUChar(delta);
27021 UInt rG = gregOfRexRM(pfx,modrm);
27022 if (epartIsReg(modrm)) {
27023 /* fall through, awaiting test case */
27024 /* dst: lo half copied, hi half zeroed */
27025 } else {
27026 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27027 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0 ));
27028 DIP("vmovq %s,%s\n", nameXMMReg(rG), dis_buf );
27029 delta += alen;
27030 goto decode_success;
27033 break;
27035 case 0xD7:
27036 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB xmm1, r32 */
27037 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27038 delta = dis_PMOVMSKB_128( vbi, pfx, delta, True/*isAvx*/ );
27039 goto decode_success;
27041 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB ymm1, r32 */
27042 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27043 delta = dis_PMOVMSKB_256( vbi, pfx, delta );
27044 goto decode_success;
27046 break;
27048 case 0xD8:
27049 /* VPSUBUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D8 /r */
27050 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27051 delta = dis_AVX128_E_V_to_G(
27052 uses_vvvv, vbi, pfx, delta, "vpsubusb", Iop_QSub8Ux16 );
27053 goto decode_success;
27055 /* VPSUBUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D8 /r */
27056 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27057 delta = dis_AVX256_E_V_to_G(
27058 uses_vvvv, vbi, pfx, delta, "vpsubusb", Iop_QSub8Ux32 );
27059 goto decode_success;
27061 break;
27063 case 0xD9:
27064 /* VPSUBUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D9 /r */
27065 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27066 delta = dis_AVX128_E_V_to_G(
27067 uses_vvvv, vbi, pfx, delta, "vpsubusw", Iop_QSub16Ux8 );
27068 goto decode_success;
27070 /* VPSUBUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D9 /r */
27071 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27072 delta = dis_AVX256_E_V_to_G(
27073 uses_vvvv, vbi, pfx, delta, "vpsubusw", Iop_QSub16Ux16 );
27074 goto decode_success;
27076 break;
27078 case 0xDA:
27079 /* VPMINUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DA /r */
27080 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27081 delta = dis_AVX128_E_V_to_G(
27082 uses_vvvv, vbi, pfx, delta, "vpminub", Iop_Min8Ux16 );
27083 goto decode_success;
27085 /* VPMINUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DA /r */
27086 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27087 delta = dis_AVX256_E_V_to_G(
27088 uses_vvvv, vbi, pfx, delta, "vpminub", Iop_Min8Ux32 );
27089 goto decode_success;
27091 break;
27093 case 0xDB:
27094 /* VPAND r/m, rV, r ::: r = rV & r/m */
27095 /* VEX.NDS.128.66.0F.WIG DB /r = VPAND xmm3/m128, xmm2, xmm1 */
27096 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27097 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27098 uses_vvvv, vbi, pfx, delta, "vpand", Iop_AndV128 );
27099 goto decode_success;
27101 /* VPAND r/m, rV, r ::: r = rV & r/m */
27102 /* VEX.NDS.256.66.0F.WIG DB /r = VPAND ymm3/m256, ymm2, ymm1 */
27103 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27104 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27105 uses_vvvv, vbi, pfx, delta, "vpand", Iop_AndV256 );
27106 goto decode_success;
27108 break;
27110 case 0xDC:
27111 /* VPADDUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DC /r */
27112 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27113 delta = dis_AVX128_E_V_to_G(
27114 uses_vvvv, vbi, pfx, delta, "vpaddusb", Iop_QAdd8Ux16 );
27115 goto decode_success;
27117 /* VPADDUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DC /r */
27118 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27119 delta = dis_AVX256_E_V_to_G(
27120 uses_vvvv, vbi, pfx, delta, "vpaddusb", Iop_QAdd8Ux32 );
27121 goto decode_success;
27123 break;
27125 case 0xDD:
27126 /* VPADDUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DD /r */
27127 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27128 delta = dis_AVX128_E_V_to_G(
27129 uses_vvvv, vbi, pfx, delta, "vpaddusw", Iop_QAdd16Ux8 );
27130 goto decode_success;
27132 /* VPADDUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DD /r */
27133 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27134 delta = dis_AVX256_E_V_to_G(
27135 uses_vvvv, vbi, pfx, delta, "vpaddusw", Iop_QAdd16Ux16 );
27136 goto decode_success;
27138 break;
27140 case 0xDE:
27141 /* VPMAXUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DE /r */
27142 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27143 delta = dis_AVX128_E_V_to_G(
27144 uses_vvvv, vbi, pfx, delta, "vpmaxub", Iop_Max8Ux16 );
27145 goto decode_success;
27147 /* VPMAXUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DE /r */
27148 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27149 delta = dis_AVX256_E_V_to_G(
27150 uses_vvvv, vbi, pfx, delta, "vpmaxub", Iop_Max8Ux32 );
27151 goto decode_success;
27153 break;
27155 case 0xDF:
27156 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
27157 /* VEX.NDS.128.66.0F.WIG DF /r = VPANDN xmm3/m128, xmm2, xmm1 */
27158 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27159 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
27160 uses_vvvv, vbi, pfx, delta, "vpandn", Iop_AndV128,
27161 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
27162 goto decode_success;
27164 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
27165 /* VEX.NDS.256.66.0F.WIG DF /r = VPANDN ymm3/m256, ymm2, ymm1 */
27166 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27167 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
27168 uses_vvvv, vbi, pfx, delta, "vpandn", Iop_AndV256,
27169 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
27170 goto decode_success;
27172 break;
27174 case 0xE0:
27175 /* VPAVGB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E0 /r */
27176 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27177 delta = dis_AVX128_E_V_to_G(
27178 uses_vvvv, vbi, pfx, delta, "vpavgb", Iop_Avg8Ux16 );
27179 goto decode_success;
27181 /* VPAVGB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E0 /r */
27182 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27183 delta = dis_AVX256_E_V_to_G(
27184 uses_vvvv, vbi, pfx, delta, "vpavgb", Iop_Avg8Ux32 );
27185 goto decode_success;
27187 break;
27189 case 0xE1:
27190 /* VPSRAW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E1 /r */
27191 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27192 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27193 "vpsraw", Iop_SarN16x8 );
27194 *uses_vvvv = True;
27195 goto decode_success;
27197 /* VPSRAW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E1 /r */
27198 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27199 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27200 "vpsraw", Iop_SarN16x16 );
27201 *uses_vvvv = True;
27202 goto decode_success;
27204 break;
27206 case 0xE2:
27207 /* VPSRAD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E2 /r */
27208 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27209 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27210 "vpsrad", Iop_SarN32x4 );
27211 *uses_vvvv = True;
27212 goto decode_success;
27214 /* VPSRAD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E2 /r */
27215 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27216 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27217 "vpsrad", Iop_SarN32x8 );
27218 *uses_vvvv = True;
27219 goto decode_success;
27221 break;
27223 case 0xE3:
27224 /* VPAVGW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E3 /r */
27225 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27226 delta = dis_AVX128_E_V_to_G(
27227 uses_vvvv, vbi, pfx, delta, "vpavgw", Iop_Avg16Ux8 );
27228 goto decode_success;
27230 /* VPAVGW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E3 /r */
27231 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27232 delta = dis_AVX256_E_V_to_G(
27233 uses_vvvv, vbi, pfx, delta, "vpavgw", Iop_Avg16Ux16 );
27234 goto decode_success;
27236 break;
27238 case 0xE4:
27239 /* VPMULHUW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E4 /r */
27240 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27241 delta = dis_AVX128_E_V_to_G(
27242 uses_vvvv, vbi, pfx, delta, "vpmulhuw", Iop_MulHi16Ux8 );
27243 goto decode_success;
27245 /* VPMULHUW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E4 /r */
27246 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27247 delta = dis_AVX256_E_V_to_G(
27248 uses_vvvv, vbi, pfx, delta, "vpmulhuw", Iop_MulHi16Ux16 );
27249 goto decode_success;
27251 break;
27253 case 0xE5:
27254 /* VPMULHW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E5 /r */
27255 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27256 delta = dis_AVX128_E_V_to_G(
27257 uses_vvvv, vbi, pfx, delta, "vpmulhw", Iop_MulHi16Sx8 );
27258 goto decode_success;
27260 /* VPMULHW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E5 /r */
27261 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27262 delta = dis_AVX256_E_V_to_G(
27263 uses_vvvv, vbi, pfx, delta, "vpmulhw", Iop_MulHi16Sx16 );
27264 goto decode_success;
27266 break;
27268 case 0xE6:
27269 /* VCVTDQ2PD xmm2/m64, xmm1 = VEX.128.F3.0F.WIG E6 /r */
27270 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
27271 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, True/*isAvx*/);
27272 goto decode_success;
27274 /* VCVTDQ2PD xmm2/m128, ymm1 = VEX.256.F3.0F.WIG E6 /r */
27275 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
27276 delta = dis_CVTDQ2PD_256(vbi, pfx, delta);
27277 goto decode_success;
27279 /* VCVTTPD2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG E6 /r */
27280 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27281 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/,
27282 True/*r2zero*/);
27283 goto decode_success;
27285 /* VCVTTPD2DQ ymm2/m256, xmm1 = VEX.256.66.0F.WIG E6 /r */
27286 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27287 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, True/*r2zero*/);
27288 goto decode_success;
27290 /* VCVTPD2DQ xmm2/m128, xmm1 = VEX.128.F2.0F.WIG E6 /r */
27291 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27292 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/,
27293 False/*!r2zero*/);
27294 goto decode_success;
27296 /* VCVTPD2DQ ymm2/m256, xmm1 = VEX.256.F2.0F.WIG E6 /r */
27297 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27298 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, False/*!r2zero*/);
27299 goto decode_success;
27301 break;
27303 case 0xE7:
27304 /* VMOVNTDQ xmm1, m128 = VEX.128.66.0F.WIG E7 /r */
27305 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27306 UChar modrm = getUChar(delta);
27307 UInt rG = gregOfRexRM(pfx,modrm);
27308 if (!epartIsReg(modrm)) {
27309 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27310 gen_SIGNAL_if_not_16_aligned( vbi, addr );
27311 storeLE( mkexpr(addr), getXMMReg(rG) );
27312 DIP("vmovntdq %s,%s\n", dis_buf, nameXMMReg(rG));
27313 delta += alen;
27314 goto decode_success;
27316 /* else fall through */
27318 /* VMOVNTDQ ymm1, m256 = VEX.256.66.0F.WIG E7 /r */
27319 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27320 UChar modrm = getUChar(delta);
27321 UInt rG = gregOfRexRM(pfx,modrm);
27322 if (!epartIsReg(modrm)) {
27323 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27324 gen_SIGNAL_if_not_32_aligned( vbi, addr );
27325 storeLE( mkexpr(addr), getYMMReg(rG) );
27326 DIP("vmovntdq %s,%s\n", dis_buf, nameYMMReg(rG));
27327 delta += alen;
27328 goto decode_success;
27330 /* else fall through */
27332 break;
27334 case 0xE8:
27335 /* VPSUBSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E8 /r */
27336 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27337 delta = dis_AVX128_E_V_to_G(
27338 uses_vvvv, vbi, pfx, delta, "vpsubsb", Iop_QSub8Sx16 );
27339 goto decode_success;
27341 /* VPSUBSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E8 /r */
27342 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27343 delta = dis_AVX256_E_V_to_G(
27344 uses_vvvv, vbi, pfx, delta, "vpsubsb", Iop_QSub8Sx32 );
27345 goto decode_success;
27347 break;
27349 case 0xE9:
27350 /* VPSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E9 /r */
27351 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27352 delta = dis_AVX128_E_V_to_G(
27353 uses_vvvv, vbi, pfx, delta, "vpsubsw", Iop_QSub16Sx8 );
27354 goto decode_success;
27356 /* VPSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E9 /r */
27357 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27358 delta = dis_AVX256_E_V_to_G(
27359 uses_vvvv, vbi, pfx, delta, "vpsubsw", Iop_QSub16Sx16 );
27360 goto decode_success;
27362 break;
27364 case 0xEA:
27365 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
27366 /* VPMINSW = VEX.NDS.128.66.0F.WIG EA /r */
27367 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27368 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27369 uses_vvvv, vbi, pfx, delta, "vpminsw", Iop_Min16Sx8 );
27370 goto decode_success;
27372 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
27373 /* VPMINSW = VEX.NDS.256.66.0F.WIG EA /r */
27374 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27375 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27376 uses_vvvv, vbi, pfx, delta, "vpminsw", Iop_Min16Sx16 );
27377 goto decode_success;
27379 break;
27381 case 0xEB:
27382 /* VPOR r/m, rV, r ::: r = rV | r/m */
27383 /* VPOR = VEX.NDS.128.66.0F.WIG EB /r */
27384 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27385 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27386 uses_vvvv, vbi, pfx, delta, "vpor", Iop_OrV128 );
27387 goto decode_success;
27389 /* VPOR r/m, rV, r ::: r = rV | r/m */
27390 /* VPOR = VEX.NDS.256.66.0F.WIG EB /r */
27391 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27392 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27393 uses_vvvv, vbi, pfx, delta, "vpor", Iop_OrV256 );
27394 goto decode_success;
27396 break;
27398 case 0xEC:
27399 /* VPADDSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG EC /r */
27400 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27401 delta = dis_AVX128_E_V_to_G(
27402 uses_vvvv, vbi, pfx, delta, "vpaddsb", Iop_QAdd8Sx16 );
27403 goto decode_success;
27405 /* VPADDSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG EC /r */
27406 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27407 delta = dis_AVX256_E_V_to_G(
27408 uses_vvvv, vbi, pfx, delta, "vpaddsb", Iop_QAdd8Sx32 );
27409 goto decode_success;
27411 break;
27413 case 0xED:
27414 /* VPADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG ED /r */
27415 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27416 delta = dis_AVX128_E_V_to_G(
27417 uses_vvvv, vbi, pfx, delta, "vpaddsw", Iop_QAdd16Sx8 );
27418 goto decode_success;
27420 /* VPADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG ED /r */
27421 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27422 delta = dis_AVX256_E_V_to_G(
27423 uses_vvvv, vbi, pfx, delta, "vpaddsw", Iop_QAdd16Sx16 );
27424 goto decode_success;
27426 break;
27428 case 0xEE:
27429 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
27430 /* VPMAXSW = VEX.NDS.128.66.0F.WIG EE /r */
27431 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27432 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27433 uses_vvvv, vbi, pfx, delta, "vpmaxsw", Iop_Max16Sx8 );
27434 goto decode_success;
27436 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
27437 /* VPMAXSW = VEX.NDS.256.66.0F.WIG EE /r */
27438 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27439 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27440 uses_vvvv, vbi, pfx, delta, "vpmaxsw", Iop_Max16Sx16 );
27441 goto decode_success;
27443 break;
27445 case 0xEF:
27446 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
27447 /* VPXOR = VEX.NDS.128.66.0F.WIG EF /r */
27448 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27449 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27450 uses_vvvv, vbi, pfx, delta, "vpxor", Iop_XorV128 );
27451 goto decode_success;
27453 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
27454 /* VPXOR = VEX.NDS.256.66.0F.WIG EF /r */
27455 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27456 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27457 uses_vvvv, vbi, pfx, delta, "vpxor", Iop_XorV256 );
27458 goto decode_success;
27460 break;
27462 case 0xF0:
27463 /* VLDDQU m256, ymm1 = VEX.256.F2.0F.WIG F0 /r */
27464 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27465 UChar modrm = getUChar(delta);
27466 UInt rD = gregOfRexRM(pfx, modrm);
27467 IRTemp tD = newTemp(Ity_V256);
27468 if (epartIsReg(modrm)) break;
27469 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27470 delta += alen;
27471 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
27472 DIP("vlddqu %s,%s\n", dis_buf, nameYMMReg(rD));
27473 putYMMReg(rD, mkexpr(tD));
27474 goto decode_success;
27476 /* VLDDQU m128, xmm1 = VEX.128.F2.0F.WIG F0 /r */
27477 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27478 UChar modrm = getUChar(delta);
27479 UInt rD = gregOfRexRM(pfx, modrm);
27480 IRTemp tD = newTemp(Ity_V128);
27481 if (epartIsReg(modrm)) break;
27482 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27483 delta += alen;
27484 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
27485 DIP("vlddqu %s,%s\n", dis_buf, nameXMMReg(rD));
27486 putYMMRegLoAndZU(rD, mkexpr(tD));
27487 goto decode_success;
27489 break;
27491 case 0xF1:
27492 /* VPSLLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F1 /r */
27493 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27494 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27495 "vpsllw", Iop_ShlN16x8 );
27496 *uses_vvvv = True;
27497 goto decode_success;
27500 /* VPSLLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F1 /r */
27501 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27502 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27503 "vpsllw", Iop_ShlN16x16 );
27504 *uses_vvvv = True;
27505 goto decode_success;
27508 break;
27510 case 0xF2:
27511 /* VPSLLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F2 /r */
27512 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27513 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27514 "vpslld", Iop_ShlN32x4 );
27515 *uses_vvvv = True;
27516 goto decode_success;
27518 /* VPSLLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F2 /r */
27519 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27520 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27521 "vpslld", Iop_ShlN32x8 );
27522 *uses_vvvv = True;
27523 goto decode_success;
27525 break;
27527 case 0xF3:
27528 /* VPSLLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F3 /r */
27529 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27530 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27531 "vpsllq", Iop_ShlN64x2 );
27532 *uses_vvvv = True;
27533 goto decode_success;
27535 /* VPSLLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F3 /r */
27536 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27537 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27538 "vpsllq", Iop_ShlN64x4 );
27539 *uses_vvvv = True;
27540 goto decode_success;
27542 break;
27544 case 0xF4:
27545 /* VPMULUDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F4 /r */
27546 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27547 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27548 uses_vvvv, vbi, pfx, delta,
27549 "vpmuludq", math_PMULUDQ_128 );
27550 goto decode_success;
27552 /* VPMULUDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F4 /r */
27553 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27554 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27555 uses_vvvv, vbi, pfx, delta,
27556 "vpmuludq", math_PMULUDQ_256 );
27557 goto decode_success;
27559 break;
27561 case 0xF5:
27562 /* VPMADDWD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F5 /r */
27563 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27564 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27565 uses_vvvv, vbi, pfx, delta,
27566 "vpmaddwd", math_PMADDWD_128 );
27567 goto decode_success;
27569 /* VPMADDWD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F5 /r */
27570 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27571 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27572 uses_vvvv, vbi, pfx, delta,
27573 "vpmaddwd", math_PMADDWD_256 );
27574 goto decode_success;
27576 break;
27578 case 0xF6:
27579 /* VPSADBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F6 /r */
27580 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27581 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27582 uses_vvvv, vbi, pfx, delta,
27583 "vpsadbw", math_PSADBW_128 );
27584 goto decode_success;
27586 /* VPSADBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F6 /r */
27587 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27588 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27589 uses_vvvv, vbi, pfx, delta,
27590 "vpsadbw", math_PSADBW_256 );
27591 goto decode_success;
27593 break;
27595 case 0xF7:
27596 /* VMASKMOVDQU xmm2, xmm1 = VEX.128.66.0F.WIG F7 /r */
27597 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
27598 && epartIsReg(getUChar(delta))) {
27599 delta = dis_MASKMOVDQU( vbi, pfx, delta, True/*isAvx*/ );
27600 goto decode_success;
27602 break;
27604 case 0xF8:
27605 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
27606 /* VPSUBB = VEX.NDS.128.66.0F.WIG F8 /r */
27607 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27608 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27609 uses_vvvv, vbi, pfx, delta, "vpsubb", Iop_Sub8x16 );
27610 goto decode_success;
27612 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
27613 /* VPSUBB = VEX.NDS.256.66.0F.WIG F8 /r */
27614 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27615 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27616 uses_vvvv, vbi, pfx, delta, "vpsubb", Iop_Sub8x32 );
27617 goto decode_success;
27619 break;
27621 case 0xF9:
27622 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
27623 /* VPSUBW = VEX.NDS.128.66.0F.WIG F9 /r */
27624 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27625 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27626 uses_vvvv, vbi, pfx, delta, "vpsubw", Iop_Sub16x8 );
27627 goto decode_success;
27629 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
27630 /* VPSUBW = VEX.NDS.256.66.0F.WIG F9 /r */
27631 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27632 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27633 uses_vvvv, vbi, pfx, delta, "vpsubw", Iop_Sub16x16 );
27634 goto decode_success;
27636 break;
27638 case 0xFA:
27639 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
27640 /* VPSUBD = VEX.NDS.128.66.0F.WIG FA /r */
27641 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27642 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27643 uses_vvvv, vbi, pfx, delta, "vpsubd", Iop_Sub32x4 );
27644 goto decode_success;
27646 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
27647 /* VPSUBD = VEX.NDS.256.66.0F.WIG FA /r */
27648 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27649 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27650 uses_vvvv, vbi, pfx, delta, "vpsubd", Iop_Sub32x8 );
27651 goto decode_success;
27653 break;
27655 case 0xFB:
27656 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
27657 /* VPSUBQ = VEX.NDS.128.66.0F.WIG FB /r */
27658 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27659 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27660 uses_vvvv, vbi, pfx, delta, "vpsubq", Iop_Sub64x2 );
27661 goto decode_success;
27663 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
27664 /* VPSUBQ = VEX.NDS.256.66.0F.WIG FB /r */
27665 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27666 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27667 uses_vvvv, vbi, pfx, delta, "vpsubq", Iop_Sub64x4 );
27668 goto decode_success;
27670 break;
27672 case 0xFC:
27673 /* VPADDB r/m, rV, r ::: r = rV + r/m */
27674 /* VPADDB = VEX.NDS.128.66.0F.WIG FC /r */
27675 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27676 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27677 uses_vvvv, vbi, pfx, delta, "vpaddb", Iop_Add8x16 );
27678 goto decode_success;
27680 /* VPADDB r/m, rV, r ::: r = rV + r/m */
27681 /* VPADDB = VEX.NDS.256.66.0F.WIG FC /r */
27682 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27683 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27684 uses_vvvv, vbi, pfx, delta, "vpaddb", Iop_Add8x32 );
27685 goto decode_success;
27687 break;
27689 case 0xFD:
27690 /* VPADDW r/m, rV, r ::: r = rV + r/m */
27691 /* VPADDW = VEX.NDS.128.66.0F.WIG FD /r */
27692 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27693 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27694 uses_vvvv, vbi, pfx, delta, "vpaddw", Iop_Add16x8 );
27695 goto decode_success;
27697 /* VPADDW r/m, rV, r ::: r = rV + r/m */
27698 /* VPADDW = VEX.NDS.256.66.0F.WIG FD /r */
27699 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27700 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27701 uses_vvvv, vbi, pfx, delta, "vpaddw", Iop_Add16x16 );
27702 goto decode_success;
27704 break;
27706 case 0xFE:
27707 /* VPADDD r/m, rV, r ::: r = rV + r/m */
27708 /* VPADDD = VEX.NDS.128.66.0F.WIG FE /r */
27709 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27710 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27711 uses_vvvv, vbi, pfx, delta, "vpaddd", Iop_Add32x4 );
27712 goto decode_success;
27714 /* VPADDD r/m, rV, r ::: r = rV + r/m */
27715 /* VPADDD = VEX.NDS.256.66.0F.WIG FE /r */
27716 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27717 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27718 uses_vvvv, vbi, pfx, delta, "vpaddd", Iop_Add32x8 );
27719 goto decode_success;
27721 break;
27723 default:
27724 break;
27728 //decode_failure:
27729 return deltaIN;
27731 decode_success:
27732 return delta;
27736 /*------------------------------------------------------------*/
27737 /*--- ---*/
27738 /*--- Top-level post-escape decoders: dis_ESC_0F38__VEX ---*/
27739 /*--- ---*/
27740 /*------------------------------------------------------------*/
27742 static IRTemp math_PERMILPS_VAR_128 ( IRTemp dataV, IRTemp ctrlV )
27744 /* In the control vector, zero out all but the bottom two bits of
27745 each 32-bit lane. */
27746 IRExpr* cv1 = binop(Iop_ShrN32x4,
27747 binop(Iop_ShlN32x4, mkexpr(ctrlV), mkU8(30)),
27748 mkU8(30));
27749 /* And use the resulting cleaned-up control vector as steering
27750 in a Perm operation. */
27751 IRTemp res = newTemp(Ity_V128);
27752 assign(res, binop(Iop_Perm32x4, mkexpr(dataV), cv1));
27753 return res;
27756 static IRTemp math_PERMILPS_VAR_256 ( IRTemp dataV, IRTemp ctrlV )
27758 IRTemp dHi, dLo, cHi, cLo;
27759 dHi = dLo = cHi = cLo = IRTemp_INVALID;
27760 breakupV256toV128s( dataV, &dHi, &dLo );
27761 breakupV256toV128s( ctrlV, &cHi, &cLo );
27762 IRTemp rHi = math_PERMILPS_VAR_128( dHi, cHi );
27763 IRTemp rLo = math_PERMILPS_VAR_128( dLo, cLo );
27764 IRTemp res = newTemp(Ity_V256);
27765 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo)));
27766 return res;
27769 static IRTemp math_PERMILPD_VAR_128 ( IRTemp dataV, IRTemp ctrlV )
27771 /* No cleverness here .. */
27772 IRTemp dHi, dLo, cHi, cLo;
27773 dHi = dLo = cHi = cLo = IRTemp_INVALID;
27774 breakupV128to64s( dataV, &dHi, &dLo );
27775 breakupV128to64s( ctrlV, &cHi, &cLo );
27776 IRExpr* rHi
27777 = IRExpr_ITE( unop(Iop_64to1,
27778 binop(Iop_Shr64, mkexpr(cHi), mkU8(1))),
27779 mkexpr(dHi), mkexpr(dLo) );
27780 IRExpr* rLo
27781 = IRExpr_ITE( unop(Iop_64to1,
27782 binop(Iop_Shr64, mkexpr(cLo), mkU8(1))),
27783 mkexpr(dHi), mkexpr(dLo) );
27784 IRTemp res = newTemp(Ity_V128);
27785 assign(res, binop(Iop_64HLtoV128, rHi, rLo));
27786 return res;
27789 static IRTemp math_PERMILPD_VAR_256 ( IRTemp dataV, IRTemp ctrlV )
27791 IRTemp dHi, dLo, cHi, cLo;
27792 dHi = dLo = cHi = cLo = IRTemp_INVALID;
27793 breakupV256toV128s( dataV, &dHi, &dLo );
27794 breakupV256toV128s( ctrlV, &cHi, &cLo );
27795 IRTemp rHi = math_PERMILPD_VAR_128( dHi, cHi );
27796 IRTemp rLo = math_PERMILPD_VAR_128( dLo, cLo );
27797 IRTemp res = newTemp(Ity_V256);
27798 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo)));
27799 return res;
27802 static IRTemp math_VPERMD ( IRTemp ctrlV, IRTemp dataV )
27804 /* In the control vector, zero out all but the bottom three bits of
27805 each 32-bit lane. */
27806 IRExpr* cv1 = binop(Iop_ShrN32x8,
27807 binop(Iop_ShlN32x8, mkexpr(ctrlV), mkU8(29)),
27808 mkU8(29));
27809 /* And use the resulting cleaned-up control vector as steering
27810 in a Perm operation. */
27811 IRTemp res = newTemp(Ity_V256);
27812 assign(res, binop(Iop_Perm32x8, mkexpr(dataV), cv1));
27813 return res;
27816 static Long dis_SHIFTX ( /*OUT*/Bool* uses_vvvv,
27817 const VexAbiInfo* vbi, Prefix pfx, Long delta,
27818 const HChar* opname, IROp op8 )
27820 HChar dis_buf[50];
27821 Int alen;
27822 Int size = getRexW(pfx) ? 8 : 4;
27823 IRType ty = szToITy(size);
27824 IRTemp src = newTemp(ty);
27825 IRTemp amt = newTemp(ty);
27826 UChar rm = getUChar(delta);
27828 assign( amt, getIRegV(size,pfx) );
27829 if (epartIsReg(rm)) {
27830 assign( src, getIRegE(size,pfx,rm) );
27831 DIP("%s %s,%s,%s\n", opname, nameIRegV(size,pfx),
27832 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm));
27833 delta++;
27834 } else {
27835 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27836 assign( src, loadLE(ty, mkexpr(addr)) );
27837 DIP("%s %s,%s,%s\n", opname, nameIRegV(size,pfx), dis_buf,
27838 nameIRegG(size,pfx,rm));
27839 delta += alen;
27842 putIRegG( size, pfx, rm,
27843 binop(mkSizedOp(ty,op8), mkexpr(src),
27844 narrowTo(Ity_I8, binop(mkSizedOp(ty,Iop_And8), mkexpr(amt),
27845 mkU(ty,8*size-1)))) );
27846 /* Flags aren't modified. */
27847 *uses_vvvv = True;
27848 return delta;
27852 static Long dis_FMA ( const VexAbiInfo* vbi, Prefix pfx, Long delta, UChar opc )
27854 UChar modrm = getUChar(delta);
27855 UInt rG = gregOfRexRM(pfx, modrm);
27856 UInt rV = getVexNvvvv(pfx);
27857 Bool scalar = (opc & 0xF) > 7 && (opc & 1);
27858 IRType ty = getRexW(pfx) ? Ity_F64 : Ity_F32;
27859 IRType vty = scalar ? ty : (getVexL(pfx) ? Ity_V256 : Ity_V128);
27860 IRTemp addr = IRTemp_INVALID;
27861 HChar dis_buf[50];
27862 Int alen = 0;
27863 const HChar *name;
27864 const HChar *suffix;
27865 const HChar *order;
27866 Bool negateRes = False;
27867 Bool negateZeven = False;
27868 Bool negateZodd = False;
27869 UInt count = 0;
27871 switch (opc & 0xF) {
27872 case 0x6: name = "addsub"; negateZeven = True; break;
27873 case 0x7: name = "subadd"; negateZodd = True; break;
27874 case 0x8:
27875 case 0x9: name = "add"; break;
27876 case 0xA:
27877 case 0xB: name = "sub"; negateZeven = True; negateZodd = True;
27878 break;
27879 case 0xC:
27880 case 0xD: name = "add"; negateRes = True; negateZeven = True;
27881 negateZodd = True; break;
27882 case 0xE:
27883 case 0xF: name = "sub"; negateRes = True; break;
27884 default: vpanic("dis_FMA(amd64)"); break;
27886 switch (opc & 0xF0) {
27887 case 0x90: order = "132"; break;
27888 case 0xA0: order = "213"; break;
27889 case 0xB0: order = "231"; break;
27890 default: vpanic("dis_FMA(amd64)"); break;
27892 if (scalar) {
27893 suffix = ty == Ity_F64 ? "sd" : "ss";
27894 } else {
27895 suffix = ty == Ity_F64 ? "pd" : "ps";
27898 // Figure out |count| (the number of elements) by considering |vty| and |ty|.
27899 count = sizeofIRType(vty) / sizeofIRType(ty);
27900 vassert(count == 1 || count == 2 || count == 4 || count == 8);
27902 // Fetch operands into the first |count| elements of |sX|, |sY| and |sZ|.
27903 UInt i;
27904 IRExpr *sX[8], *sY[8], *sZ[8], *res[8];
27905 for (i = 0; i < 8; i++) sX[i] = sY[i] = sZ[i] = res[i] = NULL;
27907 IRExpr* (*getYMMRegLane)(UInt,Int)
27908 = ty == Ity_F32 ? getYMMRegLane32F : getYMMRegLane64F;
27909 void (*putYMMRegLane)(UInt,Int,IRExpr*)
27910 = ty == Ity_F32 ? putYMMRegLane32F : putYMMRegLane64F;
27912 for (i = 0; i < count; i++) {
27913 sX[i] = getYMMRegLane(rG, i);
27914 sZ[i] = getYMMRegLane(rV, i);
27917 if (epartIsReg(modrm)) {
27918 UInt rE = eregOfRexRM(pfx, modrm);
27919 delta += 1;
27920 for (i = 0; i < count; i++) {
27921 sY[i] = getYMMRegLane(rE, i);
27923 if (vty == Ity_V256) {
27924 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27925 name, order, suffix, nameYMMReg(rE), nameYMMReg(rV),
27926 nameYMMReg(rG));
27927 } else {
27928 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27929 name, order, suffix, nameXMMReg(rE), nameXMMReg(rV),
27930 nameXMMReg(rG));
27932 } else {
27933 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27934 delta += alen;
27935 for (i = 0; i < count; i++) {
27936 sY[i] = loadLE(ty, binop(Iop_Add64, mkexpr(addr),
27937 mkU64(i * sizeofIRType(ty))));
27939 if (vty == Ity_V256) {
27940 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27941 name, order, suffix, dis_buf, nameYMMReg(rV),
27942 nameYMMReg(rG));
27943 } else {
27944 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27945 name, order, suffix, dis_buf, nameXMMReg(rV),
27946 nameXMMReg(rG));
27950 /* vX/vY/vZ are now in 132 order. If the instruction requires a different
27951 order, swap them around. */
27953 # define COPY_ARR(_dst, _src) \
27954 do { for (int j = 0; j < 8; j++) { _dst[j] = _src[j]; } } while (0)
27956 if ((opc & 0xF0) != 0x90) {
27957 IRExpr* temp[8];
27958 COPY_ARR(temp, sX);
27959 if ((opc & 0xF0) == 0xA0) {
27960 COPY_ARR(sX, sZ);
27961 COPY_ARR(sZ, sY);
27962 COPY_ARR(sY, temp);
27963 } else {
27964 COPY_ARR(sX, sZ);
27965 COPY_ARR(sZ, temp);
27969 # undef COPY_ARR
27971 for (i = 0; i < count; i++) {
27972 IROp opNEG = ty == Ity_F64 ? Iop_NegF64 : Iop_NegF32;
27973 if ((i & 1) ? negateZodd : negateZeven) {
27974 sZ[i] = unop(opNEG, sZ[i]);
27976 res[i] = IRExpr_Qop(ty == Ity_F64 ? Iop_MAddF64 : Iop_MAddF32,
27977 get_FAKE_roundingmode(), sX[i], sY[i], sZ[i]);
27978 if (negateRes) {
27979 res[i] = unop(opNEG, res[i]);
27983 for (i = 0; i < count; i++) {
27984 putYMMRegLane(rG, i, res[i]);
27987 switch (vty) {
27988 case Ity_F32: putYMMRegLane32(rG, 1, mkU32(0)); /*fallthru*/
27989 case Ity_F64: putYMMRegLane64(rG, 1, mkU64(0)); /*fallthru*/
27990 case Ity_V128: putYMMRegLane128(rG, 1, mkV128(0)); /*fallthru*/
27991 case Ity_V256: break;
27992 default: vassert(0);
27995 return delta;
27999 /* Masked load or masked store. */
28000 static ULong dis_VMASKMOV ( Bool *uses_vvvv, const VexAbiInfo* vbi,
28001 Prefix pfx, Long delta,
28002 const HChar* opname, Bool isYMM, IRType ty,
28003 Bool isLoad )
28005 HChar dis_buf[50];
28006 Int alen, i;
28007 IRTemp addr;
28008 UChar modrm = getUChar(delta);
28009 UInt rG = gregOfRexRM(pfx,modrm);
28010 UInt rV = getVexNvvvv(pfx);
28012 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
28013 delta += alen;
28015 /**/ if (isLoad && isYMM) {
28016 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
28018 else if (isLoad && !isYMM) {
28019 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
28022 else if (!isLoad && isYMM) {
28023 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rG), nameYMMReg(rV), dis_buf );
28025 else {
28026 vassert(!isLoad && !isYMM);
28027 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rG), nameXMMReg(rV), dis_buf );
28030 vassert(ty == Ity_I32 || ty == Ity_I64);
28031 Bool laneIs32 = ty == Ity_I32;
28033 Int nLanes = (isYMM ? 2 : 1) * (laneIs32 ? 4 : 2);
28035 for (i = 0; i < nLanes; i++) {
28036 IRExpr* shAmt = laneIs32 ? mkU8(31) : mkU8(63);
28037 IRExpr* one = laneIs32 ? mkU32(1) : mkU64(1);
28038 IROp opSHR = laneIs32 ? Iop_Shr32 : Iop_Shr64;
28039 IROp opEQ = laneIs32 ? Iop_CmpEQ32 : Iop_CmpEQ64;
28040 IRExpr* lane = (laneIs32 ? getYMMRegLane32 : getYMMRegLane64)( rV, i );
28042 IRTemp cond = newTemp(Ity_I1);
28043 assign(cond, binop(opEQ, binop(opSHR, lane, shAmt), one));
28045 IRTemp data = newTemp(ty);
28046 IRExpr* ea = binop(Iop_Add64, mkexpr(addr),
28047 mkU64(i * (laneIs32 ? 4 : 8)));
28048 if (isLoad) {
28049 stmt(
28050 IRStmt_LoadG(
28051 Iend_LE, laneIs32 ? ILGop_Ident32 : ILGop_Ident64,
28052 data, ea, laneIs32 ? mkU32(0) : mkU64(0), mkexpr(cond)
28054 (laneIs32 ? putYMMRegLane32 : putYMMRegLane64)( rG, i, mkexpr(data) );
28055 } else {
28056 assign(data, (laneIs32 ? getYMMRegLane32 : getYMMRegLane64)( rG, i ));
28057 stmt( IRStmt_StoreG(Iend_LE, ea, mkexpr(data), mkexpr(cond)) );
28061 if (isLoad && !isYMM)
28062 putYMMRegLane128( rG, 1, mkV128(0) );
28064 *uses_vvvv = True;
28065 return delta;
28069 /* Gather. */
28070 static ULong dis_VGATHER ( Bool *uses_vvvv, const VexAbiInfo* vbi,
28071 Prefix pfx, Long delta,
28072 const HChar* opname, Bool isYMM,
28073 Bool isVM64x, IRType ty )
28075 HChar dis_buf[50];
28076 Int alen, i, vscale, count1, count2;
28077 IRTemp addr;
28078 UChar modrm = getUChar(delta);
28079 UInt rG = gregOfRexRM(pfx,modrm);
28080 UInt rV = getVexNvvvv(pfx);
28081 UInt rI;
28082 IRType dstTy = (isYMM && (ty == Ity_I64 || !isVM64x)) ? Ity_V256 : Ity_V128;
28083 IRType idxTy = (isYMM && (ty == Ity_I32 || isVM64x)) ? Ity_V256 : Ity_V128;
28084 IRTemp cond;
28085 addr = disAVSIBMode ( &alen, vbi, pfx, delta, dis_buf, &rI,
28086 idxTy, &vscale );
28087 if (addr == IRTemp_INVALID || rI == rG || rI == rV || rG == rV)
28088 return delta;
28089 if (dstTy == Ity_V256) {
28090 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rV), dis_buf, nameYMMReg(rG) );
28091 } else {
28092 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rV), dis_buf, nameXMMReg(rG) );
28094 delta += alen;
28096 if (ty == Ity_I32) {
28097 count1 = isYMM ? 8 : 4;
28098 count2 = isVM64x ? count1 / 2 : count1;
28099 } else {
28100 count1 = count2 = isYMM ? 4 : 2;
28103 /* First update the mask register to copies of the sign bit. */
28104 if (ty == Ity_I32) {
28105 if (isYMM)
28106 putYMMReg( rV, binop(Iop_SarN32x8, getYMMReg( rV ), mkU8(31)) );
28107 else
28108 putYMMRegLoAndZU( rV, binop(Iop_SarN32x4, getXMMReg( rV ), mkU8(31)) );
28109 } else {
28110 for (i = 0; i < count1; i++) {
28111 putYMMRegLane64( rV, i, binop(Iop_Sar64, getYMMRegLane64( rV, i ),
28112 mkU8(63)) );
28116 /* Next gather the individual elements. If any fault occurs, the
28117 corresponding mask element will be set and the loop stops. */
28118 for (i = 0; i < count2; i++) {
28119 IRExpr *expr, *addr_expr;
28120 cond = newTemp(Ity_I1);
28121 assign( cond,
28122 binop(ty == Ity_I32 ? Iop_CmpLT32S : Iop_CmpLT64S,
28123 ty == Ity_I32 ? getYMMRegLane32( rV, i )
28124 : getYMMRegLane64( rV, i ),
28125 mkU(ty, 0)) );
28126 expr = ty == Ity_I32 ? getYMMRegLane32( rG, i )
28127 : getYMMRegLane64( rG, i );
28128 addr_expr = isVM64x ? getYMMRegLane64( rI, i )
28129 : unop(Iop_32Sto64, getYMMRegLane32( rI, i ));
28130 switch (vscale) {
28131 case 2: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(1)); break;
28132 case 4: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(2)); break;
28133 case 8: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(3)); break;
28134 default: break;
28136 addr_expr = binop(Iop_Add64, mkexpr(addr), addr_expr);
28137 addr_expr = handleAddrOverrides(vbi, pfx, addr_expr);
28138 addr_expr = IRExpr_ITE(mkexpr(cond), addr_expr, getIReg64(R_RSP));
28139 expr = IRExpr_ITE(mkexpr(cond), loadLE(ty, addr_expr), expr);
28140 if (ty == Ity_I32) {
28141 putYMMRegLane32( rG, i, expr );
28142 putYMMRegLane32( rV, i, mkU32(0) );
28143 } else {
28144 putYMMRegLane64( rG, i, expr);
28145 putYMMRegLane64( rV, i, mkU64(0) );
28149 if (!isYMM || (ty == Ity_I32 && isVM64x)) {
28150 if (ty == Ity_I64 || isYMM)
28151 putYMMRegLane128( rV, 1, mkV128(0) );
28152 else if (ty == Ity_I32 && count2 == 2) {
28153 putYMMRegLane64( rV, 1, mkU64(0) );
28154 putYMMRegLane64( rG, 1, mkU64(0) );
28156 putYMMRegLane128( rG, 1, mkV128(0) );
28159 *uses_vvvv = True;
28160 return delta;
28164 __attribute__((noinline))
28165 static
28166 Long dis_ESC_0F38__VEX (
28167 /*MB_OUT*/DisResult* dres,
28168 /*OUT*/ Bool* uses_vvvv,
28169 const VexArchInfo* archinfo,
28170 const VexAbiInfo* vbi,
28171 Prefix pfx, Int sz, Long deltaIN
28174 IRTemp addr = IRTemp_INVALID;
28175 Int alen = 0;
28176 HChar dis_buf[50];
28177 Long delta = deltaIN;
28178 UChar opc = getUChar(delta);
28179 delta++;
28180 *uses_vvvv = False;
28182 switch (opc) {
28184 case 0x00:
28185 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
28186 /* VPSHUFB = VEX.NDS.128.66.0F38.WIG 00 /r */
28187 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28188 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28189 uses_vvvv, vbi, pfx, delta, "vpshufb", math_PSHUFB_XMM );
28190 goto decode_success;
28192 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
28193 /* VPSHUFB = VEX.NDS.256.66.0F38.WIG 00 /r */
28194 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28195 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28196 uses_vvvv, vbi, pfx, delta, "vpshufb", math_PSHUFB_YMM );
28197 goto decode_success;
28199 break;
28201 case 0x01:
28202 case 0x02:
28203 case 0x03:
28204 /* VPHADDW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 01 /r */
28205 /* VPHADDD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 02 /r */
28206 /* VPHADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 03 /r */
28207 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28208 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc );
28209 *uses_vvvv = True;
28210 goto decode_success;
28212 /* VPHADDW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 01 /r */
28213 /* VPHADDD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 02 /r */
28214 /* VPHADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 03 /r */
28215 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28216 delta = dis_PHADD_256( vbi, pfx, delta, opc );
28217 *uses_vvvv = True;
28218 goto decode_success;
28220 break;
28222 case 0x04:
28223 /* VPMADDUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 04 /r */
28224 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28225 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28226 uses_vvvv, vbi, pfx, delta, "vpmaddubsw",
28227 math_PMADDUBSW_128 );
28228 goto decode_success;
28230 /* VPMADDUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 04 /r */
28231 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28232 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28233 uses_vvvv, vbi, pfx, delta, "vpmaddubsw",
28234 math_PMADDUBSW_256 );
28235 goto decode_success;
28237 break;
28239 case 0x05:
28240 case 0x06:
28241 case 0x07:
28242 /* VPHSUBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 05 /r */
28243 /* VPHSUBD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 06 /r */
28244 /* VPHSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 07 /r */
28245 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28246 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc );
28247 *uses_vvvv = True;
28248 goto decode_success;
28250 /* VPHSUBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 05 /r */
28251 /* VPHSUBD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 06 /r */
28252 /* VPHSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 07 /r */
28253 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28254 delta = dis_PHADD_256( vbi, pfx, delta, opc );
28255 *uses_vvvv = True;
28256 goto decode_success;
28258 break;
28260 case 0x08:
28261 case 0x09:
28262 case 0x0A:
28263 /* VPSIGNB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 08 /r */
28264 /* VPSIGNW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 09 /r */
28265 /* VPSIGND xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0A /r */
28266 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28267 IRTemp sV = newTemp(Ity_V128);
28268 IRTemp dV = newTemp(Ity_V128);
28269 IRTemp sHi, sLo, dHi, dLo;
28270 sHi = sLo = dHi = dLo = IRTemp_INVALID;
28271 HChar ch = '?';
28272 Int laneszB = 0;
28273 UChar modrm = getUChar(delta);
28274 UInt rG = gregOfRexRM(pfx,modrm);
28275 UInt rV = getVexNvvvv(pfx);
28277 switch (opc) {
28278 case 0x08: laneszB = 1; ch = 'b'; break;
28279 case 0x09: laneszB = 2; ch = 'w'; break;
28280 case 0x0A: laneszB = 4; ch = 'd'; break;
28281 default: vassert(0);
28284 assign( dV, getXMMReg(rV) );
28286 if (epartIsReg(modrm)) {
28287 UInt rE = eregOfRexRM(pfx,modrm);
28288 assign( sV, getXMMReg(rE) );
28289 delta += 1;
28290 DIP("vpsign%c %s,%s,%s\n", ch, nameXMMReg(rE),
28291 nameXMMReg(rV), nameXMMReg(rG));
28292 } else {
28293 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
28294 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
28295 delta += alen;
28296 DIP("vpsign%c %s,%s,%s\n", ch, dis_buf,
28297 nameXMMReg(rV), nameXMMReg(rG));
28300 breakupV128to64s( dV, &dHi, &dLo );
28301 breakupV128to64s( sV, &sHi, &sLo );
28303 putYMMRegLoAndZU(
28305 binop(Iop_64HLtoV128,
28306 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
28307 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
28310 *uses_vvvv = True;
28311 goto decode_success;
28313 /* VPSIGNB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 08 /r */
28314 /* VPSIGNW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 09 /r */
28315 /* VPSIGND ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0A /r */
28316 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28317 IRTemp sV = newTemp(Ity_V256);
28318 IRTemp dV = newTemp(Ity_V256);
28319 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
28320 s3 = s2 = s1 = s0 = IRTemp_INVALID;
28321 d3 = d2 = d1 = d0 = IRTemp_INVALID;
28322 UChar ch = '?';
28323 Int laneszB = 0;
28324 UChar modrm = getUChar(delta);
28325 UInt rG = gregOfRexRM(pfx,modrm);
28326 UInt rV = getVexNvvvv(pfx);
28328 switch (opc) {
28329 case 0x08: laneszB = 1; ch = 'b'; break;
28330 case 0x09: laneszB = 2; ch = 'w'; break;
28331 case 0x0A: laneszB = 4; ch = 'd'; break;
28332 default: vassert(0);
28335 assign( dV, getYMMReg(rV) );
28337 if (epartIsReg(modrm)) {
28338 UInt rE = eregOfRexRM(pfx,modrm);
28339 assign( sV, getYMMReg(rE) );
28340 delta += 1;
28341 DIP("vpsign%c %s,%s,%s\n", ch, nameYMMReg(rE),
28342 nameYMMReg(rV), nameYMMReg(rG));
28343 } else {
28344 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
28345 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
28346 delta += alen;
28347 DIP("vpsign%c %s,%s,%s\n", ch, dis_buf,
28348 nameYMMReg(rV), nameYMMReg(rG));
28351 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
28352 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
28354 putYMMReg(
28356 binop( Iop_V128HLtoV256,
28357 binop(Iop_64HLtoV128,
28358 dis_PSIGN_helper( mkexpr(s3), mkexpr(d3), laneszB ),
28359 dis_PSIGN_helper( mkexpr(s2), mkexpr(d2), laneszB )
28361 binop(Iop_64HLtoV128,
28362 dis_PSIGN_helper( mkexpr(s1), mkexpr(d1), laneszB ),
28363 dis_PSIGN_helper( mkexpr(s0), mkexpr(d0), laneszB )
28367 *uses_vvvv = True;
28368 goto decode_success;
28370 break;
28372 case 0x0B:
28373 /* VPMULHRSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0B /r */
28374 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28375 IRTemp sV = newTemp(Ity_V128);
28376 IRTemp dV = newTemp(Ity_V128);
28377 IRTemp sHi, sLo, dHi, dLo;
28378 sHi = sLo = dHi = dLo = IRTemp_INVALID;
28379 UChar modrm = getUChar(delta);
28380 UInt rG = gregOfRexRM(pfx,modrm);
28381 UInt rV = getVexNvvvv(pfx);
28383 assign( dV, getXMMReg(rV) );
28385 if (epartIsReg(modrm)) {
28386 UInt rE = eregOfRexRM(pfx,modrm);
28387 assign( sV, getXMMReg(rE) );
28388 delta += 1;
28389 DIP("vpmulhrsw %s,%s,%s\n", nameXMMReg(rE),
28390 nameXMMReg(rV), nameXMMReg(rG));
28391 } else {
28392 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
28393 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
28394 delta += alen;
28395 DIP("vpmulhrsw %s,%s,%s\n", dis_buf,
28396 nameXMMReg(rV), nameXMMReg(rG));
28399 breakupV128to64s( dV, &dHi, &dLo );
28400 breakupV128to64s( sV, &sHi, &sLo );
28402 putYMMRegLoAndZU(
28404 binop(Iop_64HLtoV128,
28405 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
28406 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
28409 *uses_vvvv = True;
28410 goto decode_success;
28412 /* VPMULHRSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0B /r */
28413 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28414 IRTemp sV = newTemp(Ity_V256);
28415 IRTemp dV = newTemp(Ity_V256);
28416 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
28417 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
28418 UChar modrm = getUChar(delta);
28419 UInt rG = gregOfRexRM(pfx,modrm);
28420 UInt rV = getVexNvvvv(pfx);
28422 assign( dV, getYMMReg(rV) );
28424 if (epartIsReg(modrm)) {
28425 UInt rE = eregOfRexRM(pfx,modrm);
28426 assign( sV, getYMMReg(rE) );
28427 delta += 1;
28428 DIP("vpmulhrsw %s,%s,%s\n", nameYMMReg(rE),
28429 nameYMMReg(rV), nameYMMReg(rG));
28430 } else {
28431 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
28432 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
28433 delta += alen;
28434 DIP("vpmulhrsw %s,%s,%s\n", dis_buf,
28435 nameYMMReg(rV), nameYMMReg(rG));
28438 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
28439 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
28441 putYMMReg(
28443 binop(Iop_V128HLtoV256,
28444 binop(Iop_64HLtoV128,
28445 dis_PMULHRSW_helper( mkexpr(s3), mkexpr(d3) ),
28446 dis_PMULHRSW_helper( mkexpr(s2), mkexpr(d2) ) ),
28447 binop(Iop_64HLtoV128,
28448 dis_PMULHRSW_helper( mkexpr(s1), mkexpr(d1) ),
28449 dis_PMULHRSW_helper( mkexpr(s0), mkexpr(d0) ) )
28452 *uses_vvvv = True;
28453 dres->hint = Dis_HintVerbose;
28454 goto decode_success;
28456 break;
28458 case 0x0C:
28459 /* VPERMILPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0C /r */
28460 if (have66noF2noF3(pfx)
28461 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
28462 UChar modrm = getUChar(delta);
28463 UInt rG = gregOfRexRM(pfx, modrm);
28464 UInt rV = getVexNvvvv(pfx);
28465 IRTemp ctrlV = newTemp(Ity_V128);
28466 if (epartIsReg(modrm)) {
28467 UInt rE = eregOfRexRM(pfx, modrm);
28468 delta += 1;
28469 DIP("vpermilps %s,%s,%s\n",
28470 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
28471 assign(ctrlV, getXMMReg(rE));
28472 } else {
28473 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28474 delta += alen;
28475 DIP("vpermilps %s,%s,%s\n",
28476 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
28477 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr)));
28479 IRTemp dataV = newTemp(Ity_V128);
28480 assign(dataV, getXMMReg(rV));
28481 IRTemp resV = math_PERMILPS_VAR_128(dataV, ctrlV);
28482 putYMMRegLoAndZU(rG, mkexpr(resV));
28483 *uses_vvvv = True;
28484 goto decode_success;
28486 /* VPERMILPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0C /r */
28487 if (have66noF2noF3(pfx)
28488 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
28489 UChar modrm = getUChar(delta);
28490 UInt rG = gregOfRexRM(pfx, modrm);
28491 UInt rV = getVexNvvvv(pfx);
28492 IRTemp ctrlV = newTemp(Ity_V256);
28493 if (epartIsReg(modrm)) {
28494 UInt rE = eregOfRexRM(pfx, modrm);
28495 delta += 1;
28496 DIP("vpermilps %s,%s,%s\n",
28497 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
28498 assign(ctrlV, getYMMReg(rE));
28499 } else {
28500 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28501 delta += alen;
28502 DIP("vpermilps %s,%s,%s\n",
28503 dis_buf, nameYMMReg(rV), nameYMMReg(rG));
28504 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr)));
28506 IRTemp dataV = newTemp(Ity_V256);
28507 assign(dataV, getYMMReg(rV));
28508 IRTemp resV = math_PERMILPS_VAR_256(dataV, ctrlV);
28509 putYMMReg(rG, mkexpr(resV));
28510 *uses_vvvv = True;
28511 goto decode_success;
28513 break;
28515 case 0x0D:
28516 /* VPERMILPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0D /r */
28517 if (have66noF2noF3(pfx)
28518 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
28519 UChar modrm = getUChar(delta);
28520 UInt rG = gregOfRexRM(pfx, modrm);
28521 UInt rV = getVexNvvvv(pfx);
28522 IRTemp ctrlV = newTemp(Ity_V128);
28523 if (epartIsReg(modrm)) {
28524 UInt rE = eregOfRexRM(pfx, modrm);
28525 delta += 1;
28526 DIP("vpermilpd %s,%s,%s\n",
28527 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
28528 assign(ctrlV, getXMMReg(rE));
28529 } else {
28530 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28531 delta += alen;
28532 DIP("vpermilpd %s,%s,%s\n",
28533 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
28534 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr)));
28536 IRTemp dataV = newTemp(Ity_V128);
28537 assign(dataV, getXMMReg(rV));
28538 IRTemp resV = math_PERMILPD_VAR_128(dataV, ctrlV);
28539 putYMMRegLoAndZU(rG, mkexpr(resV));
28540 *uses_vvvv = True;
28541 goto decode_success;
28543 /* VPERMILPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0D /r */
28544 if (have66noF2noF3(pfx)
28545 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
28546 UChar modrm = getUChar(delta);
28547 UInt rG = gregOfRexRM(pfx, modrm);
28548 UInt rV = getVexNvvvv(pfx);
28549 IRTemp ctrlV = newTemp(Ity_V256);
28550 if (epartIsReg(modrm)) {
28551 UInt rE = eregOfRexRM(pfx, modrm);
28552 delta += 1;
28553 DIP("vpermilpd %s,%s,%s\n",
28554 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
28555 assign(ctrlV, getYMMReg(rE));
28556 } else {
28557 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28558 delta += alen;
28559 DIP("vpermilpd %s,%s,%s\n",
28560 dis_buf, nameYMMReg(rV), nameYMMReg(rG));
28561 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr)));
28563 IRTemp dataV = newTemp(Ity_V256);
28564 assign(dataV, getYMMReg(rV));
28565 IRTemp resV = math_PERMILPD_VAR_256(dataV, ctrlV);
28566 putYMMReg(rG, mkexpr(resV));
28567 *uses_vvvv = True;
28568 goto decode_success;
28570 break;
28572 case 0x0E:
28573 /* VTESTPS xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0E /r */
28574 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28575 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 32 );
28576 goto decode_success;
28578 /* VTESTPS ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0E /r */
28579 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28580 delta = dis_xTESTy_256( vbi, pfx, delta, 32 );
28581 goto decode_success;
28583 break;
28585 case 0x0F:
28586 /* VTESTPD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0F /r */
28587 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28588 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 64 );
28589 goto decode_success;
28591 /* VTESTPD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0F /r */
28592 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28593 delta = dis_xTESTy_256( vbi, pfx, delta, 64 );
28594 goto decode_success;
28596 break;
28598 case 0x13:
28599 /* VCVTPH2PS xmm2/m64, xmm1 = VEX.128.66.0F38.W0 13 /r */
28600 if (have66noF2noF3(pfx)
28601 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/
28602 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C)) {
28603 delta = dis_VCVTPH2PS( vbi, pfx, delta, /*is256bit=*/False );
28604 goto decode_success;
28606 /* VCVTPH2PS xmm2/m128, xmm1 = VEX.256.66.0F38.W0 13 /r */
28607 if (have66noF2noF3(pfx)
28608 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/
28609 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C)) {
28610 delta = dis_VCVTPH2PS( vbi, pfx, delta, /*is256bit=*/True );
28611 goto decode_success;
28613 break;
28615 case 0x16:
28616 /* VPERMPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 16 /r */
28617 if (have66noF2noF3(pfx)
28618 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
28619 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28620 uses_vvvv, vbi, pfx, delta, "vpermps", math_VPERMD );
28621 goto decode_success;
28623 break;
28625 case 0x17:
28626 /* VPTEST xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 17 /r */
28627 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28628 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 0 );
28629 goto decode_success;
28631 /* VPTEST ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 17 /r */
28632 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28633 delta = dis_xTESTy_256( vbi, pfx, delta, 0 );
28634 goto decode_success;
28636 break;
28638 case 0x18:
28639 /* VBROADCASTSS m32, xmm1 = VEX.128.66.0F38.WIG 18 /r */
28640 if (have66noF2noF3(pfx)
28641 && 0==getVexL(pfx)/*128*/
28642 && !epartIsReg(getUChar(delta))) {
28643 UChar modrm = getUChar(delta);
28644 UInt rG = gregOfRexRM(pfx, modrm);
28645 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28646 delta += alen;
28647 DIP("vbroadcastss %s,%s\n", dis_buf, nameXMMReg(rG));
28648 IRTemp t32 = newTemp(Ity_I32);
28649 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
28650 IRTemp t64 = newTemp(Ity_I64);
28651 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28652 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
28653 putYMMRegLoAndZU(rG, res);
28654 goto decode_success;
28656 /* VBROADCASTSS m32, ymm1 = VEX.256.66.0F38.WIG 18 /r */
28657 if (have66noF2noF3(pfx)
28658 && 1==getVexL(pfx)/*256*/
28659 && !epartIsReg(getUChar(delta))) {
28660 UChar modrm = getUChar(delta);
28661 UInt rG = gregOfRexRM(pfx, modrm);
28662 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28663 delta += alen;
28664 DIP("vbroadcastss %s,%s\n", dis_buf, nameYMMReg(rG));
28665 IRTemp t32 = newTemp(Ity_I32);
28666 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
28667 IRTemp t64 = newTemp(Ity_I64);
28668 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28669 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28670 mkexpr(t64), mkexpr(t64));
28671 putYMMReg(rG, res);
28672 goto decode_success;
28674 /* VBROADCASTSS xmm2, xmm1 = VEX.128.66.0F38.WIG 18 /r */
28675 if (have66noF2noF3(pfx)
28676 && 0==getVexL(pfx)/*128*/
28677 && epartIsReg(getUChar(delta))) {
28678 UChar modrm = getUChar(delta);
28679 UInt rG = gregOfRexRM(pfx, modrm);
28680 UInt rE = eregOfRexRM(pfx, modrm);
28681 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
28682 IRTemp t32 = newTemp(Ity_I32);
28683 assign(t32, getXMMRegLane32(rE, 0));
28684 IRTemp t64 = newTemp(Ity_I64);
28685 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28686 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
28687 putYMMRegLoAndZU(rG, res);
28688 delta++;
28689 goto decode_success;
28691 /* VBROADCASTSS xmm2, ymm1 = VEX.256.66.0F38.WIG 18 /r */
28692 if (have66noF2noF3(pfx)
28693 && 1==getVexL(pfx)/*256*/
28694 && epartIsReg(getUChar(delta))) {
28695 UChar modrm = getUChar(delta);
28696 UInt rG = gregOfRexRM(pfx, modrm);
28697 UInt rE = eregOfRexRM(pfx, modrm);
28698 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
28699 IRTemp t32 = newTemp(Ity_I32);
28700 assign(t32, getXMMRegLane32(rE, 0));
28701 IRTemp t64 = newTemp(Ity_I64);
28702 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28703 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28704 mkexpr(t64), mkexpr(t64));
28705 putYMMReg(rG, res);
28706 delta++;
28707 goto decode_success;
28709 break;
28711 case 0x19:
28712 /* VBROADCASTSD m64, ymm1 = VEX.256.66.0F38.WIG 19 /r */
28713 if (have66noF2noF3(pfx)
28714 && 1==getVexL(pfx)/*256*/
28715 && !epartIsReg(getUChar(delta))) {
28716 UChar modrm = getUChar(delta);
28717 UInt rG = gregOfRexRM(pfx, modrm);
28718 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28719 delta += alen;
28720 DIP("vbroadcastsd %s,%s\n", dis_buf, nameYMMReg(rG));
28721 IRTemp t64 = newTemp(Ity_I64);
28722 assign(t64, loadLE(Ity_I64, mkexpr(addr)));
28723 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28724 mkexpr(t64), mkexpr(t64));
28725 putYMMReg(rG, res);
28726 goto decode_success;
28728 /* VBROADCASTSD xmm2, ymm1 = VEX.256.66.0F38.WIG 19 /r */
28729 if (have66noF2noF3(pfx)
28730 && 1==getVexL(pfx)/*256*/
28731 && epartIsReg(getUChar(delta))) {
28732 UChar modrm = getUChar(delta);
28733 UInt rG = gregOfRexRM(pfx, modrm);
28734 UInt rE = eregOfRexRM(pfx, modrm);
28735 DIP("vbroadcastsd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
28736 IRTemp t64 = newTemp(Ity_I64);
28737 assign(t64, getXMMRegLane64(rE, 0));
28738 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28739 mkexpr(t64), mkexpr(t64));
28740 putYMMReg(rG, res);
28741 delta++;
28742 goto decode_success;
28744 break;
28746 case 0x1A:
28747 /* VBROADCASTF128 m128, ymm1 = VEX.256.66.0F38.WIG 1A /r */
28748 if (have66noF2noF3(pfx)
28749 && 1==getVexL(pfx)/*256*/
28750 && !epartIsReg(getUChar(delta))) {
28751 UChar modrm = getUChar(delta);
28752 UInt rG = gregOfRexRM(pfx, modrm);
28753 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28754 delta += alen;
28755 DIP("vbroadcastf128 %s,%s\n", dis_buf, nameYMMReg(rG));
28756 IRTemp t128 = newTemp(Ity_V128);
28757 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
28758 putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) );
28759 goto decode_success;
28761 break;
28763 case 0x1C:
28764 /* VPABSB xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1C /r */
28765 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28766 delta = dis_AVX128_E_to_G_unary(
28767 uses_vvvv, vbi, pfx, delta,
28768 "vpabsb", math_PABS_XMM_pap1 );
28769 goto decode_success;
28771 /* VPABSB ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1C /r */
28772 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28773 delta = dis_AVX256_E_to_G_unary(
28774 uses_vvvv, vbi, pfx, delta,
28775 "vpabsb", math_PABS_YMM_pap1 );
28776 goto decode_success;
28778 break;
28780 case 0x1D:
28781 /* VPABSW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1D /r */
28782 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28783 delta = dis_AVX128_E_to_G_unary(
28784 uses_vvvv, vbi, pfx, delta,
28785 "vpabsw", math_PABS_XMM_pap2 );
28786 goto decode_success;
28788 /* VPABSW ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1D /r */
28789 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28790 delta = dis_AVX256_E_to_G_unary(
28791 uses_vvvv, vbi, pfx, delta,
28792 "vpabsw", math_PABS_YMM_pap2 );
28793 goto decode_success;
28795 break;
28797 case 0x1E:
28798 /* VPABSD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1E /r */
28799 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28800 delta = dis_AVX128_E_to_G_unary(
28801 uses_vvvv, vbi, pfx, delta,
28802 "vpabsd", math_PABS_XMM_pap4 );
28803 goto decode_success;
28805 /* VPABSD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1E /r */
28806 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28807 delta = dis_AVX256_E_to_G_unary(
28808 uses_vvvv, vbi, pfx, delta,
28809 "vpabsd", math_PABS_YMM_pap4 );
28810 goto decode_success;
28812 break;
28814 case 0x20:
28815 /* VPMOVSXBW xmm2/m64, xmm1 */
28816 /* VPMOVSXBW = VEX.128.66.0F38.WIG 20 /r */
28817 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28818 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
28819 True/*isAvx*/, False/*!xIsZ*/ );
28820 goto decode_success;
28822 /* VPMOVSXBW xmm2/m128, ymm1 */
28823 /* VPMOVSXBW = VEX.256.66.0F38.WIG 20 /r */
28824 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28825 delta = dis_PMOVxXBW_256( vbi, pfx, delta, False/*!xIsZ*/ );
28826 goto decode_success;
28828 break;
28830 case 0x21:
28831 /* VPMOVSXBD xmm2/m32, xmm1 */
28832 /* VPMOVSXBD = VEX.128.66.0F38.WIG 21 /r */
28833 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28834 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
28835 True/*isAvx*/, False/*!xIsZ*/ );
28836 goto decode_success;
28838 /* VPMOVSXBD xmm2/m64, ymm1 */
28839 /* VPMOVSXBD = VEX.256.66.0F38.WIG 21 /r */
28840 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28841 delta = dis_PMOVxXBD_256( vbi, pfx, delta, False/*!xIsZ*/ );
28842 goto decode_success;
28844 break;
28846 case 0x22:
28847 /* VPMOVSXBQ xmm2/m16, xmm1 */
28848 /* VPMOVSXBQ = VEX.128.66.0F38.WIG 22 /r */
28849 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28850 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, True/*isAvx*/ );
28851 goto decode_success;
28853 /* VPMOVSXBQ xmm2/m32, ymm1 */
28854 /* VPMOVSXBQ = VEX.256.66.0F38.WIG 22 /r */
28855 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28856 delta = dis_PMOVSXBQ_256( vbi, pfx, delta );
28857 goto decode_success;
28859 break;
28861 case 0x23:
28862 /* VPMOVSXWD xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 23 /r */
28863 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28864 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
28865 True/*isAvx*/, False/*!xIsZ*/ );
28866 goto decode_success;
28868 /* VPMOVSXWD xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 23 /r */
28869 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28870 delta = dis_PMOVxXWD_256( vbi, pfx, delta, False/*!xIsZ*/ );
28871 goto decode_success;
28873 break;
28875 case 0x24:
28876 /* VPMOVSXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 24 /r */
28877 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28878 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, True/*isAvx*/ );
28879 goto decode_success;
28881 /* VPMOVSXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 24 /r */
28882 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28883 delta = dis_PMOVSXWQ_256( vbi, pfx, delta );
28884 goto decode_success;
28886 break;
28888 case 0x25:
28889 /* VPMOVSXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 25 /r */
28890 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28891 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
28892 True/*isAvx*/, False/*!xIsZ*/ );
28893 goto decode_success;
28895 /* VPMOVSXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 25 /r */
28896 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28897 delta = dis_PMOVxXDQ_256( vbi, pfx, delta, False/*!xIsZ*/ );
28898 goto decode_success;
28900 break;
28902 case 0x28:
28903 /* VPMULDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 28 /r */
28904 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28905 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28906 uses_vvvv, vbi, pfx, delta,
28907 "vpmuldq", math_PMULDQ_128 );
28908 goto decode_success;
28910 /* VPMULDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 28 /r */
28911 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28912 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28913 uses_vvvv, vbi, pfx, delta,
28914 "vpmuldq", math_PMULDQ_256 );
28915 goto decode_success;
28917 break;
28919 case 0x29:
28920 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
28921 /* VPCMPEQQ = VEX.NDS.128.66.0F38.WIG 29 /r */
28922 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28923 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28924 uses_vvvv, vbi, pfx, delta, "vpcmpeqq", Iop_CmpEQ64x2 );
28925 goto decode_success;
28927 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
28928 /* VPCMPEQQ = VEX.NDS.256.66.0F38.WIG 29 /r */
28929 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28930 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28931 uses_vvvv, vbi, pfx, delta, "vpcmpeqq", Iop_CmpEQ64x4 );
28932 goto decode_success;
28934 break;
28936 case 0x2A:
28937 /* VMOVNTDQA m128, xmm1 = VEX.128.66.0F38.WIG 2A /r */
28938 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28939 && !epartIsReg(getUChar(delta))) {
28940 UChar modrm = getUChar(delta);
28941 UInt rD = gregOfRexRM(pfx, modrm);
28942 IRTemp tD = newTemp(Ity_V128);
28943 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28944 delta += alen;
28945 gen_SIGNAL_if_not_16_aligned(vbi, addr);
28946 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
28947 DIP("vmovntdqa %s,%s\n", dis_buf, nameXMMReg(rD));
28948 putYMMRegLoAndZU(rD, mkexpr(tD));
28949 goto decode_success;
28951 /* VMOVNTDQA m256, ymm1 = VEX.256.66.0F38.WIG 2A /r */
28952 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28953 && !epartIsReg(getUChar(delta))) {
28954 UChar modrm = getUChar(delta);
28955 UInt rD = gregOfRexRM(pfx, modrm);
28956 IRTemp tD = newTemp(Ity_V256);
28957 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28958 delta += alen;
28959 gen_SIGNAL_if_not_32_aligned(vbi, addr);
28960 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
28961 DIP("vmovntdqa %s,%s\n", dis_buf, nameYMMReg(rD));
28962 putYMMReg(rD, mkexpr(tD));
28963 goto decode_success;
28965 break;
28967 case 0x2B:
28968 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
28969 /* VPACKUSDW = VEX.NDS.128.66.0F38.WIG 2B /r */
28970 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28971 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
28972 uses_vvvv, vbi, pfx, delta, "vpackusdw",
28973 Iop_QNarrowBin32Sto16Ux8, NULL,
28974 False/*!invertLeftArg*/, True/*swapArgs*/ );
28975 goto decode_success;
28977 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
28978 /* VPACKUSDW = VEX.NDS.256.66.0F38.WIG 2B /r */
28979 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28980 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28981 uses_vvvv, vbi, pfx, delta, "vpackusdw",
28982 math_VPACKUSDW_YMM );
28983 goto decode_success;
28985 break;
28987 case 0x2C:
28988 /* VMASKMOVPS m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2C /r */
28989 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28990 && 0==getRexW(pfx)/*W0*/
28991 && !epartIsReg(getUChar(delta))) {
28992 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
28993 /*!isYMM*/False, Ity_I32, /*isLoad*/True );
28994 goto decode_success;
28996 /* VMASKMOVPS m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2C /r */
28997 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28998 && 0==getRexW(pfx)/*W0*/
28999 && !epartIsReg(getUChar(delta))) {
29000 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
29001 /*isYMM*/True, Ity_I32, /*isLoad*/True );
29002 goto decode_success;
29004 break;
29006 case 0x2D:
29007 /* VMASKMOVPD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2D /r */
29008 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29009 && 0==getRexW(pfx)/*W0*/
29010 && !epartIsReg(getUChar(delta))) {
29011 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
29012 /*!isYMM*/False, Ity_I64, /*isLoad*/True );
29013 goto decode_success;
29015 /* VMASKMOVPD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2D /r */
29016 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29017 && 0==getRexW(pfx)/*W0*/
29018 && !epartIsReg(getUChar(delta))) {
29019 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
29020 /*isYMM*/True, Ity_I64, /*isLoad*/True );
29021 goto decode_success;
29023 break;
29025 case 0x2E:
29026 /* VMASKMOVPS xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2E /r */
29027 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29028 && 0==getRexW(pfx)/*W0*/
29029 && !epartIsReg(getUChar(delta))) {
29030 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
29031 /*!isYMM*/False, Ity_I32, /*!isLoad*/False );
29032 goto decode_success;
29034 /* VMASKMOVPS ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2E /r */
29035 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29036 && 0==getRexW(pfx)/*W0*/
29037 && !epartIsReg(getUChar(delta))) {
29038 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
29039 /*isYMM*/True, Ity_I32, /*!isLoad*/False );
29040 goto decode_success;
29042 break;
29044 case 0x2F:
29045 /* VMASKMOVPD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2F /r */
29046 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29047 && 0==getRexW(pfx)/*W0*/
29048 && !epartIsReg(getUChar(delta))) {
29049 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
29050 /*!isYMM*/False, Ity_I64, /*!isLoad*/False );
29051 goto decode_success;
29053 /* VMASKMOVPD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2F /r */
29054 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29055 && 0==getRexW(pfx)/*W0*/
29056 && !epartIsReg(getUChar(delta))) {
29057 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
29058 /*isYMM*/True, Ity_I64, /*!isLoad*/False );
29059 goto decode_success;
29061 break;
29063 case 0x30:
29064 /* VPMOVZXBW xmm2/m64, xmm1 */
29065 /* VPMOVZXBW = VEX.128.66.0F38.WIG 30 /r */
29066 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29067 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
29068 True/*isAvx*/, True/*xIsZ*/ );
29069 goto decode_success;
29071 /* VPMOVZXBW xmm2/m128, ymm1 */
29072 /* VPMOVZXBW = VEX.256.66.0F38.WIG 30 /r */
29073 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29074 delta = dis_PMOVxXBW_256( vbi, pfx, delta, True/*xIsZ*/ );
29075 goto decode_success;
29077 break;
29079 case 0x31:
29080 /* VPMOVZXBD xmm2/m32, xmm1 */
29081 /* VPMOVZXBD = VEX.128.66.0F38.WIG 31 /r */
29082 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29083 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
29084 True/*isAvx*/, True/*xIsZ*/ );
29085 goto decode_success;
29087 /* VPMOVZXBD xmm2/m64, ymm1 */
29088 /* VPMOVZXBD = VEX.256.66.0F38.WIG 31 /r */
29089 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29090 delta = dis_PMOVxXBD_256( vbi, pfx, delta, True/*xIsZ*/ );
29091 goto decode_success;
29093 break;
29095 case 0x32:
29096 /* VPMOVZXBQ xmm2/m16, xmm1 */
29097 /* VPMOVZXBQ = VEX.128.66.0F38.WIG 32 /r */
29098 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29099 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, True/*isAvx*/ );
29100 goto decode_success;
29102 /* VPMOVZXBQ xmm2/m32, ymm1 */
29103 /* VPMOVZXBQ = VEX.256.66.0F38.WIG 32 /r */
29104 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29105 delta = dis_PMOVZXBQ_256( vbi, pfx, delta );
29106 goto decode_success;
29108 break;
29110 case 0x33:
29111 /* VPMOVZXWD xmm2/m64, xmm1 */
29112 /* VPMOVZXWD = VEX.128.66.0F38.WIG 33 /r */
29113 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29114 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
29115 True/*isAvx*/, True/*xIsZ*/ );
29116 goto decode_success;
29118 /* VPMOVZXWD xmm2/m128, ymm1 */
29119 /* VPMOVZXWD = VEX.256.66.0F38.WIG 33 /r */
29120 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29121 delta = dis_PMOVxXWD_256( vbi, pfx, delta, True/*xIsZ*/ );
29122 goto decode_success;
29124 break;
29126 case 0x34:
29127 /* VPMOVZXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 34 /r */
29128 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29129 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, True/*isAvx*/ );
29130 goto decode_success;
29132 /* VPMOVZXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 34 /r */
29133 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29134 delta = dis_PMOVZXWQ_256( vbi, pfx, delta );
29135 goto decode_success;
29137 break;
29139 case 0x35:
29140 /* VPMOVZXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 35 /r */
29141 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29142 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
29143 True/*isAvx*/, True/*xIsZ*/ );
29144 goto decode_success;
29146 /* VPMOVZXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 35 /r */
29147 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29148 delta = dis_PMOVxXDQ_256( vbi, pfx, delta, True/*xIsZ*/ );
29149 goto decode_success;
29151 break;
29153 case 0x36:
29154 /* VPERMD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 36 /r */
29155 if (have66noF2noF3(pfx)
29156 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
29157 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
29158 uses_vvvv, vbi, pfx, delta, "vpermd", math_VPERMD );
29159 goto decode_success;
29161 break;
29163 case 0x37:
29164 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
29165 /* VPCMPGTQ = VEX.NDS.128.66.0F38.WIG 37 /r */
29166 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29167 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29168 uses_vvvv, vbi, pfx, delta, "vpcmpgtq", Iop_CmpGT64Sx2 );
29169 goto decode_success;
29171 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
29172 /* VPCMPGTQ = VEX.NDS.256.66.0F38.WIG 37 /r */
29173 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29174 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29175 uses_vvvv, vbi, pfx, delta, "vpcmpgtq", Iop_CmpGT64Sx4 );
29176 goto decode_success;
29178 break;
29180 case 0x38:
29181 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
29182 /* VPMINSB = VEX.NDS.128.66.0F38.WIG 38 /r */
29183 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29184 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29185 uses_vvvv, vbi, pfx, delta, "vpminsb", Iop_Min8Sx16 );
29186 goto decode_success;
29188 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
29189 /* VPMINSB = VEX.NDS.256.66.0F38.WIG 38 /r */
29190 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29191 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29192 uses_vvvv, vbi, pfx, delta, "vpminsb", Iop_Min8Sx32 );
29193 goto decode_success;
29195 break;
29197 case 0x39:
29198 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
29199 /* VPMINSD = VEX.NDS.128.66.0F38.WIG 39 /r */
29200 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29201 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29202 uses_vvvv, vbi, pfx, delta, "vpminsd", Iop_Min32Sx4 );
29203 goto decode_success;
29205 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
29206 /* VPMINSD = VEX.NDS.256.66.0F38.WIG 39 /r */
29207 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29208 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29209 uses_vvvv, vbi, pfx, delta, "vpminsd", Iop_Min32Sx8 );
29210 goto decode_success;
29212 break;
29214 case 0x3A:
29215 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
29216 /* VPMINUW = VEX.NDS.128.66.0F38.WIG 3A /r */
29217 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29218 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29219 uses_vvvv, vbi, pfx, delta, "vpminuw", Iop_Min16Ux8 );
29220 goto decode_success;
29222 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
29223 /* VPMINUW = VEX.NDS.256.66.0F38.WIG 3A /r */
29224 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29225 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29226 uses_vvvv, vbi, pfx, delta, "vpminuw", Iop_Min16Ux16 );
29227 goto decode_success;
29229 break;
29231 case 0x3B:
29232 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
29233 /* VPMINUD = VEX.NDS.128.66.0F38.WIG 3B /r */
29234 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29235 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29236 uses_vvvv, vbi, pfx, delta, "vpminud", Iop_Min32Ux4 );
29237 goto decode_success;
29239 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
29240 /* VPMINUD = VEX.NDS.256.66.0F38.WIG 3B /r */
29241 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29242 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29243 uses_vvvv, vbi, pfx, delta, "vpminud", Iop_Min32Ux8 );
29244 goto decode_success;
29246 break;
29248 case 0x3C:
29249 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
29250 /* VPMAXSB = VEX.NDS.128.66.0F38.WIG 3C /r */
29251 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29252 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29253 uses_vvvv, vbi, pfx, delta, "vpmaxsb", Iop_Max8Sx16 );
29254 goto decode_success;
29256 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
29257 /* VPMAXSB = VEX.NDS.256.66.0F38.WIG 3C /r */
29258 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29259 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29260 uses_vvvv, vbi, pfx, delta, "vpmaxsb", Iop_Max8Sx32 );
29261 goto decode_success;
29263 break;
29265 case 0x3D:
29266 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
29267 /* VPMAXSD = VEX.NDS.128.66.0F38.WIG 3D /r */
29268 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29269 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29270 uses_vvvv, vbi, pfx, delta, "vpmaxsd", Iop_Max32Sx4 );
29271 goto decode_success;
29273 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
29274 /* VPMAXSD = VEX.NDS.256.66.0F38.WIG 3D /r */
29275 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29276 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29277 uses_vvvv, vbi, pfx, delta, "vpmaxsd", Iop_Max32Sx8 );
29278 goto decode_success;
29280 break;
29282 case 0x3E:
29283 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
29284 /* VPMAXUW = VEX.NDS.128.66.0F38.WIG 3E /r */
29285 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29286 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29287 uses_vvvv, vbi, pfx, delta, "vpmaxuw", Iop_Max16Ux8 );
29288 goto decode_success;
29290 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
29291 /* VPMAXUW = VEX.NDS.256.66.0F38.WIG 3E /r */
29292 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29293 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29294 uses_vvvv, vbi, pfx, delta, "vpmaxuw", Iop_Max16Ux16 );
29295 goto decode_success;
29297 break;
29299 case 0x3F:
29300 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
29301 /* VPMAXUD = VEX.NDS.128.66.0F38.WIG 3F /r */
29302 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29303 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29304 uses_vvvv, vbi, pfx, delta, "vpmaxud", Iop_Max32Ux4 );
29305 goto decode_success;
29307 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
29308 /* VPMAXUD = VEX.NDS.256.66.0F38.WIG 3F /r */
29309 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29310 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29311 uses_vvvv, vbi, pfx, delta, "vpmaxud", Iop_Max32Ux8 );
29312 goto decode_success;
29314 break;
29316 case 0x40:
29317 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
29318 /* VPMULLD = VEX.NDS.128.66.0F38.WIG 40 /r */
29319 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29320 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29321 uses_vvvv, vbi, pfx, delta, "vpmulld", Iop_Mul32x4 );
29322 goto decode_success;
29324 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
29325 /* VPMULLD = VEX.NDS.256.66.0F38.WIG 40 /r */
29326 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29327 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29328 uses_vvvv, vbi, pfx, delta, "vpmulld", Iop_Mul32x8 );
29329 goto decode_success;
29331 break;
29333 case 0x41:
29334 /* VPHMINPOSUW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 41 /r */
29335 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29336 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, True/*isAvx*/ );
29337 goto decode_success;
29339 break;
29341 case 0x45:
29342 /* VPSRLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 45 /r */
29343 /* VPSRLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 45 /r */
29344 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
29345 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsrlvd",
29346 Iop_Shr32, 1==getVexL(pfx) );
29347 *uses_vvvv = True;
29348 goto decode_success;
29350 /* VPSRLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 45 /r */
29351 /* VPSRLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 45 /r */
29352 if (have66noF2noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
29353 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsrlvq",
29354 Iop_Shr64, 1==getVexL(pfx) );
29355 *uses_vvvv = True;
29356 goto decode_success;
29358 break;
29360 case 0x46:
29361 /* VPSRAVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 46 /r */
29362 /* VPSRAVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 46 /r */
29363 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
29364 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsravd",
29365 Iop_Sar32, 1==getVexL(pfx) );
29366 *uses_vvvv = True;
29367 goto decode_success;
29369 break;
29371 case 0x47:
29372 /* VPSLLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 47 /r */
29373 /* VPSLLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 47 /r */
29374 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
29375 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsllvd",
29376 Iop_Shl32, 1==getVexL(pfx) );
29377 *uses_vvvv = True;
29378 goto decode_success;
29380 /* VPSLLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 47 /r */
29381 /* VPSLLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 47 /r */
29382 if (have66noF2noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
29383 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsllvq",
29384 Iop_Shl64, 1==getVexL(pfx) );
29385 *uses_vvvv = True;
29386 goto decode_success;
29388 break;
29390 case 0x58:
29391 /* VPBROADCASTD xmm2/m32, xmm1 = VEX.128.66.0F38.W0 58 /r */
29392 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29393 && 0==getRexW(pfx)/*W0*/) {
29394 UChar modrm = getUChar(delta);
29395 UInt rG = gregOfRexRM(pfx, modrm);
29396 IRTemp t32 = newTemp(Ity_I32);
29397 if (epartIsReg(modrm)) {
29398 UInt rE = eregOfRexRM(pfx, modrm);
29399 delta++;
29400 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
29401 assign(t32, getXMMRegLane32(rE, 0));
29402 } else {
29403 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29404 delta += alen;
29405 DIP("vpbroadcastd %s,%s\n", dis_buf, nameXMMReg(rG));
29406 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
29408 IRTemp t64 = newTemp(Ity_I64);
29409 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29410 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
29411 putYMMRegLoAndZU(rG, res);
29412 goto decode_success;
29414 /* VPBROADCASTD xmm2/m32, ymm1 = VEX.256.66.0F38.W0 58 /r */
29415 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29416 && 0==getRexW(pfx)/*W0*/) {
29417 UChar modrm = getUChar(delta);
29418 UInt rG = gregOfRexRM(pfx, modrm);
29419 IRTemp t32 = newTemp(Ity_I32);
29420 if (epartIsReg(modrm)) {
29421 UInt rE = eregOfRexRM(pfx, modrm);
29422 delta++;
29423 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
29424 assign(t32, getXMMRegLane32(rE, 0));
29425 } else {
29426 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29427 delta += alen;
29428 DIP("vpbroadcastd %s,%s\n", dis_buf, nameYMMReg(rG));
29429 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
29431 IRTemp t64 = newTemp(Ity_I64);
29432 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29433 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
29434 mkexpr(t64), mkexpr(t64));
29435 putYMMReg(rG, res);
29436 goto decode_success;
29438 break;
29440 case 0x59:
29441 /* VPBROADCASTQ xmm2/m64, xmm1 = VEX.128.66.0F38.W0 59 /r */
29442 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29443 && 0==getRexW(pfx)/*W0*/) {
29444 UChar modrm = getUChar(delta);
29445 UInt rG = gregOfRexRM(pfx, modrm);
29446 IRTemp t64 = newTemp(Ity_I64);
29447 if (epartIsReg(modrm)) {
29448 UInt rE = eregOfRexRM(pfx, modrm);
29449 delta++;
29450 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
29451 assign(t64, getXMMRegLane64(rE, 0));
29452 } else {
29453 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29454 delta += alen;
29455 DIP("vpbroadcastq %s,%s\n", dis_buf, nameXMMReg(rG));
29456 assign(t64, loadLE(Ity_I64, mkexpr(addr)));
29458 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
29459 putYMMRegLoAndZU(rG, res);
29460 goto decode_success;
29462 /* VPBROADCASTQ xmm2/m64, ymm1 = VEX.256.66.0F38.W0 59 /r */
29463 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29464 && 0==getRexW(pfx)/*W0*/) {
29465 UChar modrm = getUChar(delta);
29466 UInt rG = gregOfRexRM(pfx, modrm);
29467 IRTemp t64 = newTemp(Ity_I64);
29468 if (epartIsReg(modrm)) {
29469 UInt rE = eregOfRexRM(pfx, modrm);
29470 delta++;
29471 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
29472 assign(t64, getXMMRegLane64(rE, 0));
29473 } else {
29474 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29475 delta += alen;
29476 DIP("vpbroadcastq %s,%s\n", dis_buf, nameYMMReg(rG));
29477 assign(t64, loadLE(Ity_I64, mkexpr(addr)));
29479 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
29480 mkexpr(t64), mkexpr(t64));
29481 putYMMReg(rG, res);
29482 goto decode_success;
29484 break;
29486 case 0x5A:
29487 /* VBROADCASTI128 m128, ymm1 = VEX.256.66.0F38.WIG 5A /r */
29488 if (have66noF2noF3(pfx)
29489 && 1==getVexL(pfx)/*256*/
29490 && !epartIsReg(getUChar(delta))) {
29491 UChar modrm = getUChar(delta);
29492 UInt rG = gregOfRexRM(pfx, modrm);
29493 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29494 delta += alen;
29495 DIP("vbroadcasti128 %s,%s\n", dis_buf, nameYMMReg(rG));
29496 IRTemp t128 = newTemp(Ity_V128);
29497 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
29498 putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) );
29499 goto decode_success;
29501 break;
29503 case 0x78:
29504 /* VPBROADCASTB xmm2/m8, xmm1 = VEX.128.66.0F38.W0 78 /r */
29505 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29506 && 0==getRexW(pfx)/*W0*/) {
29507 UChar modrm = getUChar(delta);
29508 UInt rG = gregOfRexRM(pfx, modrm);
29509 IRTemp t8 = newTemp(Ity_I8);
29510 if (epartIsReg(modrm)) {
29511 UInt rE = eregOfRexRM(pfx, modrm);
29512 delta++;
29513 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
29514 assign(t8, unop(Iop_32to8, getXMMRegLane32(rE, 0)));
29515 } else {
29516 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29517 delta += alen;
29518 DIP("vpbroadcastb %s,%s\n", dis_buf, nameXMMReg(rG));
29519 assign(t8, loadLE(Ity_I8, mkexpr(addr)));
29521 IRTemp t16 = newTemp(Ity_I16);
29522 assign(t16, binop(Iop_8HLto16, mkexpr(t8), mkexpr(t8)));
29523 IRTemp t32 = newTemp(Ity_I32);
29524 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
29525 IRTemp t64 = newTemp(Ity_I64);
29526 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29527 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
29528 putYMMRegLoAndZU(rG, res);
29529 goto decode_success;
29531 /* VPBROADCASTB xmm2/m8, ymm1 = VEX.256.66.0F38.W0 78 /r */
29532 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29533 && 0==getRexW(pfx)/*W0*/) {
29534 UChar modrm = getUChar(delta);
29535 UInt rG = gregOfRexRM(pfx, modrm);
29536 IRTemp t8 = newTemp(Ity_I8);
29537 if (epartIsReg(modrm)) {
29538 UInt rE = eregOfRexRM(pfx, modrm);
29539 delta++;
29540 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
29541 assign(t8, unop(Iop_32to8, getXMMRegLane32(rE, 0)));
29542 } else {
29543 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29544 delta += alen;
29545 DIP("vpbroadcastb %s,%s\n", dis_buf, nameYMMReg(rG));
29546 assign(t8, loadLE(Ity_I8, mkexpr(addr)));
29548 IRTemp t16 = newTemp(Ity_I16);
29549 assign(t16, binop(Iop_8HLto16, mkexpr(t8), mkexpr(t8)));
29550 IRTemp t32 = newTemp(Ity_I32);
29551 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
29552 IRTemp t64 = newTemp(Ity_I64);
29553 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29554 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
29555 mkexpr(t64), mkexpr(t64));
29556 putYMMReg(rG, res);
29557 goto decode_success;
29559 break;
29561 case 0x79:
29562 /* VPBROADCASTW xmm2/m16, xmm1 = VEX.128.66.0F38.W0 79 /r */
29563 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29564 && 0==getRexW(pfx)/*W0*/) {
29565 UChar modrm = getUChar(delta);
29566 UInt rG = gregOfRexRM(pfx, modrm);
29567 IRTemp t16 = newTemp(Ity_I16);
29568 if (epartIsReg(modrm)) {
29569 UInt rE = eregOfRexRM(pfx, modrm);
29570 delta++;
29571 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
29572 assign(t16, unop(Iop_32to16, getXMMRegLane32(rE, 0)));
29573 } else {
29574 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29575 delta += alen;
29576 DIP("vpbroadcastw %s,%s\n", dis_buf, nameXMMReg(rG));
29577 assign(t16, loadLE(Ity_I16, mkexpr(addr)));
29579 IRTemp t32 = newTemp(Ity_I32);
29580 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
29581 IRTemp t64 = newTemp(Ity_I64);
29582 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29583 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
29584 putYMMRegLoAndZU(rG, res);
29585 goto decode_success;
29587 /* VPBROADCASTW xmm2/m16, ymm1 = VEX.256.66.0F38.W0 79 /r */
29588 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29589 && 0==getRexW(pfx)/*W0*/) {
29590 UChar modrm = getUChar(delta);
29591 UInt rG = gregOfRexRM(pfx, modrm);
29592 IRTemp t16 = newTemp(Ity_I16);
29593 if (epartIsReg(modrm)) {
29594 UInt rE = eregOfRexRM(pfx, modrm);
29595 delta++;
29596 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
29597 assign(t16, unop(Iop_32to16, getXMMRegLane32(rE, 0)));
29598 } else {
29599 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29600 delta += alen;
29601 DIP("vpbroadcastw %s,%s\n", dis_buf, nameYMMReg(rG));
29602 assign(t16, loadLE(Ity_I16, mkexpr(addr)));
29604 IRTemp t32 = newTemp(Ity_I32);
29605 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
29606 IRTemp t64 = newTemp(Ity_I64);
29607 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29608 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
29609 mkexpr(t64), mkexpr(t64));
29610 putYMMReg(rG, res);
29611 goto decode_success;
29613 break;
29615 case 0x8C:
29616 /* VPMASKMOVD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 8C /r */
29617 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29618 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29619 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
29620 /*!isYMM*/False, Ity_I32, /*isLoad*/True );
29621 goto decode_success;
29623 /* VPMASKMOVD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 8C /r */
29624 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29625 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29626 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
29627 /*isYMM*/True, Ity_I32, /*isLoad*/True );
29628 goto decode_success;
29630 /* VPMASKMOVQ m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 8C /r */
29631 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29632 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29633 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
29634 /*!isYMM*/False, Ity_I64, /*isLoad*/True );
29635 goto decode_success;
29637 /* VPMASKMOVQ m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 8C /r */
29638 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29639 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29640 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
29641 /*isYMM*/True, Ity_I64, /*isLoad*/True );
29642 goto decode_success;
29644 break;
29646 case 0x8E:
29647 /* VPMASKMOVD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 8E /r */
29648 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29649 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29650 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
29651 /*!isYMM*/False, Ity_I32, /*!isLoad*/False );
29652 goto decode_success;
29654 /* VPMASKMOVD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 8E /r */
29655 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29656 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29657 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
29658 /*isYMM*/True, Ity_I32, /*!isLoad*/False );
29659 goto decode_success;
29661 /* VPMASKMOVQ xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W1 8E /r */
29662 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29663 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29664 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
29665 /*!isYMM*/False, Ity_I64, /*!isLoad*/False );
29666 goto decode_success;
29668 /* VPMASKMOVQ ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W1 8E /r */
29669 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29670 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29671 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
29672 /*isYMM*/True, Ity_I64, /*!isLoad*/False );
29673 goto decode_success;
29675 break;
29677 case 0x90:
29678 /* VPGATHERDD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 90 /r */
29679 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29680 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29681 Long delta0 = delta;
29682 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdd",
29683 /*!isYMM*/False, /*!isVM64x*/False, Ity_I32 );
29684 if (delta != delta0)
29685 goto decode_success;
29687 /* VPGATHERDD ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 90 /r */
29688 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29689 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29690 Long delta0 = delta;
29691 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdd",
29692 /*isYMM*/True, /*!isVM64x*/False, Ity_I32 );
29693 if (delta != delta0)
29694 goto decode_success;
29696 /* VPGATHERDQ xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 90 /r */
29697 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29698 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29699 Long delta0 = delta;
29700 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdq",
29701 /*!isYMM*/False, /*!isVM64x*/False, Ity_I64 );
29702 if (delta != delta0)
29703 goto decode_success;
29705 /* VPGATHERDQ ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 90 /r */
29706 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29707 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29708 Long delta0 = delta;
29709 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdq",
29710 /*isYMM*/True, /*!isVM64x*/False, Ity_I64 );
29711 if (delta != delta0)
29712 goto decode_success;
29714 break;
29716 case 0x91:
29717 /* VPGATHERQD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 91 /r */
29718 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29719 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29720 Long delta0 = delta;
29721 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqd",
29722 /*!isYMM*/False, /*isVM64x*/True, Ity_I32 );
29723 if (delta != delta0)
29724 goto decode_success;
29726 /* VPGATHERQD xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 91 /r */
29727 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29728 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29729 Long delta0 = delta;
29730 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqd",
29731 /*isYMM*/True, /*isVM64x*/True, Ity_I32 );
29732 if (delta != delta0)
29733 goto decode_success;
29735 /* VPGATHERQQ xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 91 /r */
29736 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29737 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29738 Long delta0 = delta;
29739 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqq",
29740 /*!isYMM*/False, /*isVM64x*/True, Ity_I64 );
29741 if (delta != delta0)
29742 goto decode_success;
29744 /* VPGATHERQQ ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 91 /r */
29745 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29746 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29747 Long delta0 = delta;
29748 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqq",
29749 /*isYMM*/True, /*isVM64x*/True, Ity_I64 );
29750 if (delta != delta0)
29751 goto decode_success;
29753 break;
29755 case 0x92:
29756 /* VGATHERDPS xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 92 /r */
29757 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29758 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29759 Long delta0 = delta;
29760 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdps",
29761 /*!isYMM*/False, /*!isVM64x*/False, Ity_I32 );
29762 if (delta != delta0)
29763 goto decode_success;
29765 /* VGATHERDPS ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 92 /r */
29766 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29767 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29768 Long delta0 = delta;
29769 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdps",
29770 /*isYMM*/True, /*!isVM64x*/False, Ity_I32 );
29771 if (delta != delta0)
29772 goto decode_success;
29774 /* VGATHERDPD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 92 /r */
29775 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29776 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29777 Long delta0 = delta;
29778 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdpd",
29779 /*!isYMM*/False, /*!isVM64x*/False, Ity_I64 );
29780 if (delta != delta0)
29781 goto decode_success;
29783 /* VGATHERDPD ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 92 /r */
29784 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29785 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29786 Long delta0 = delta;
29787 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdpd",
29788 /*isYMM*/True, /*!isVM64x*/False, Ity_I64 );
29789 if (delta != delta0)
29790 goto decode_success;
29792 break;
29794 case 0x93:
29795 /* VGATHERQPS xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 93 /r */
29796 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29797 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29798 Long delta0 = delta;
29799 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqps",
29800 /*!isYMM*/False, /*isVM64x*/True, Ity_I32 );
29801 if (delta != delta0)
29802 goto decode_success;
29804 /* VGATHERQPS xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 93 /r */
29805 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29806 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29807 Long delta0 = delta;
29808 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqps",
29809 /*isYMM*/True, /*isVM64x*/True, Ity_I32 );
29810 if (delta != delta0)
29811 goto decode_success;
29813 /* VGATHERQPD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 93 /r */
29814 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29815 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29816 Long delta0 = delta;
29817 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqpd",
29818 /*!isYMM*/False, /*isVM64x*/True, Ity_I64 );
29819 if (delta != delta0)
29820 goto decode_success;
29822 /* VGATHERQPD ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 93 /r */
29823 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29824 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29825 Long delta0 = delta;
29826 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqpd",
29827 /*isYMM*/True, /*isVM64x*/True, Ity_I64 );
29828 if (delta != delta0)
29829 goto decode_success;
29831 break;
29833 case 0x96 ... 0x9F:
29834 case 0xA6 ... 0xAF:
29835 case 0xB6 ... 0xBF:
29836 /* VFMADDSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 96 /r */
29837 /* VFMADDSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 96 /r */
29838 /* VFMADDSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 96 /r */
29839 /* VFMADDSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 96 /r */
29840 /* VFMSUBADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 97 /r */
29841 /* VFMSUBADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 97 /r */
29842 /* VFMSUBADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 97 /r */
29843 /* VFMSUBADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 97 /r */
29844 /* VFMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 98 /r */
29845 /* VFMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 98 /r */
29846 /* VFMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 98 /r */
29847 /* VFMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 98 /r */
29848 /* VFMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 99 /r */
29849 /* VFMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 99 /r */
29850 /* VFMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9A /r */
29851 /* VFMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9A /r */
29852 /* VFMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9A /r */
29853 /* VFMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9A /r */
29854 /* VFMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9B /r */
29855 /* VFMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9B /r */
29856 /* VFNMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9C /r */
29857 /* VFNMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9C /r */
29858 /* VFNMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9C /r */
29859 /* VFNMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9C /r */
29860 /* VFNMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9D /r */
29861 /* VFNMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9D /r */
29862 /* VFNMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9E /r */
29863 /* VFNMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9E /r */
29864 /* VFNMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9E /r */
29865 /* VFNMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9E /r */
29866 /* VFNMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9F /r */
29867 /* VFNMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9F /r */
29868 /* VFMADDSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A6 /r */
29869 /* VFMADDSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A6 /r */
29870 /* VFMADDSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A6 /r */
29871 /* VFMADDSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A6 /r */
29872 /* VFMSUBADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A7 /r */
29873 /* VFMSUBADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A7 /r */
29874 /* VFMSUBADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A7 /r */
29875 /* VFMSUBADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A7 /r */
29876 /* VFMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A8 /r */
29877 /* VFMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A8 /r */
29878 /* VFMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A8 /r */
29879 /* VFMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A8 /r */
29880 /* VFMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 A9 /r */
29881 /* VFMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 A9 /r */
29882 /* VFMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AA /r */
29883 /* VFMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AA /r */
29884 /* VFMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AA /r */
29885 /* VFMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AA /r */
29886 /* VFMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AB /r */
29887 /* VFMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AB /r */
29888 /* VFNMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AC /r */
29889 /* VFNMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AC /r */
29890 /* VFNMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AC /r */
29891 /* VFNMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AC /r */
29892 /* VFNMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AD /r */
29893 /* VFNMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AD /r */
29894 /* VFNMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AE /r */
29895 /* VFNMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AE /r */
29896 /* VFNMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AE /r */
29897 /* VFNMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AE /r */
29898 /* VFNMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AF /r */
29899 /* VFNMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AF /r */
29900 /* VFMADDSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B6 /r */
29901 /* VFMADDSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B6 /r */
29902 /* VFMADDSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B6 /r */
29903 /* VFMADDSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B6 /r */
29904 /* VFMSUBADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B7 /r */
29905 /* VFMSUBADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B7 /r */
29906 /* VFMSUBADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B7 /r */
29907 /* VFMSUBADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B7 /r */
29908 /* VFMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B8 /r */
29909 /* VFMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B8 /r */
29910 /* VFMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B8 /r */
29911 /* VFMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B8 /r */
29912 /* VFMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 B9 /r */
29913 /* VFMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 B9 /r */
29914 /* VFMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BA /r */
29915 /* VFMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BA /r */
29916 /* VFMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BA /r */
29917 /* VFMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BA /r */
29918 /* VFMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BB /r */
29919 /* VFMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BB /r */
29920 /* VFNMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BC /r */
29921 /* VFNMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BC /r */
29922 /* VFNMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BC /r */
29923 /* VFNMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BC /r */
29924 /* VFNMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BD /r */
29925 /* VFNMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BD /r */
29926 /* VFNMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BE /r */
29927 /* VFNMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BE /r */
29928 /* VFNMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BE /r */
29929 /* VFNMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BE /r */
29930 /* VFNMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BF /r */
29931 /* VFNMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BF /r */
29932 if (have66noF2noF3(pfx)) {
29933 delta = dis_FMA( vbi, pfx, delta, opc );
29934 *uses_vvvv = True;
29935 dres->hint = Dis_HintVerbose;
29936 goto decode_success;
29938 break;
29940 case 0xDB:
29941 case 0xDC:
29942 case 0xDD:
29943 case 0xDE:
29944 case 0xDF:
29945 /* VAESIMC xmm2/m128, xmm1 = VEX.128.66.0F38.WIG DB /r */
29946 /* VAESENC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DC /r */
29947 /* VAESENCLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DD /r */
29948 /* VAESDEC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DE /r */
29949 /* VAESDECLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DF /r */
29950 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29951 delta = dis_AESx( vbi, pfx, delta, True/*!isAvx*/, opc );
29952 if (opc != 0xDB) *uses_vvvv = True;
29953 goto decode_success;
29955 break;
29957 case 0xF2:
29958 /* ANDN r/m32, r32b, r32a = VEX.NDS.LZ.0F38.W0 F2 /r */
29959 /* ANDN r/m64, r64b, r64a = VEX.NDS.LZ.0F38.W1 F2 /r */
29960 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29961 Int size = getRexW(pfx) ? 8 : 4;
29962 IRType ty = szToITy(size);
29963 IRTemp dst = newTemp(ty);
29964 IRTemp src1 = newTemp(ty);
29965 IRTemp src2 = newTemp(ty);
29966 UChar rm = getUChar(delta);
29968 assign( src1, getIRegV(size,pfx) );
29969 if (epartIsReg(rm)) {
29970 assign( src2, getIRegE(size,pfx,rm) );
29971 DIP("andn %s,%s,%s\n", nameIRegE(size,pfx,rm),
29972 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
29973 delta++;
29974 } else {
29975 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29976 assign( src2, loadLE(ty, mkexpr(addr)) );
29977 DIP("andn %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
29978 nameIRegG(size,pfx,rm));
29979 delta += alen;
29982 assign( dst, binop( mkSizedOp(ty,Iop_And8),
29983 unop( mkSizedOp(ty,Iop_Not8), mkexpr(src1) ),
29984 mkexpr(src2) ) );
29985 putIRegG( size, pfx, rm, mkexpr(dst) );
29986 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29987 ? AMD64G_CC_OP_ANDN64
29988 : AMD64G_CC_OP_ANDN32)) );
29989 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
29990 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
29991 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
29992 *uses_vvvv = True;
29993 goto decode_success;
29995 break;
29997 case 0xF3:
29998 /* BLSI r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /3 */
29999 /* BLSI r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /3 */
30000 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/
30001 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 3) {
30002 Int size = getRexW(pfx) ? 8 : 4;
30003 IRType ty = szToITy(size);
30004 IRTemp src = newTemp(ty);
30005 IRTemp dst = newTemp(ty);
30006 UChar rm = getUChar(delta);
30008 if (epartIsReg(rm)) {
30009 assign( src, getIRegE(size,pfx,rm) );
30010 DIP("blsi %s,%s\n", nameIRegE(size,pfx,rm),
30011 nameIRegV(size,pfx));
30012 delta++;
30013 } else {
30014 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30015 assign( src, loadLE(ty, mkexpr(addr)) );
30016 DIP("blsi %s,%s\n", dis_buf, nameIRegV(size,pfx));
30017 delta += alen;
30020 assign( dst, binop(mkSizedOp(ty,Iop_And8),
30021 binop(mkSizedOp(ty,Iop_Sub8), mkU(ty, 0),
30022 mkexpr(src)), mkexpr(src)) );
30023 putIRegV( size, pfx, mkexpr(dst) );
30024 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
30025 ? AMD64G_CC_OP_BLSI64
30026 : AMD64G_CC_OP_BLSI32)) );
30027 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
30028 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) );
30029 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
30030 *uses_vvvv = True;
30031 goto decode_success;
30033 /* BLSMSK r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /2 */
30034 /* BLSMSK r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /2 */
30035 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/
30036 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 2) {
30037 Int size = getRexW(pfx) ? 8 : 4;
30038 IRType ty = szToITy(size);
30039 IRTemp src = newTemp(ty);
30040 IRTemp dst = newTemp(ty);
30041 UChar rm = getUChar(delta);
30043 if (epartIsReg(rm)) {
30044 assign( src, getIRegE(size,pfx,rm) );
30045 DIP("blsmsk %s,%s\n", nameIRegE(size,pfx,rm),
30046 nameIRegV(size,pfx));
30047 delta++;
30048 } else {
30049 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30050 assign( src, loadLE(ty, mkexpr(addr)) );
30051 DIP("blsmsk %s,%s\n", dis_buf, nameIRegV(size,pfx));
30052 delta += alen;
30055 assign( dst, binop(mkSizedOp(ty,Iop_Xor8),
30056 binop(mkSizedOp(ty,Iop_Sub8), mkexpr(src),
30057 mkU(ty, 1)), mkexpr(src)) );
30058 putIRegV( size, pfx, mkexpr(dst) );
30059 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
30060 ? AMD64G_CC_OP_BLSMSK64
30061 : AMD64G_CC_OP_BLSMSK32)) );
30062 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
30063 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) );
30064 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
30065 *uses_vvvv = True;
30066 goto decode_success;
30068 /* BLSR r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /1 */
30069 /* BLSR r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /1 */
30070 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/
30071 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 1) {
30072 Int size = getRexW(pfx) ? 8 : 4;
30073 IRType ty = szToITy(size);
30074 IRTemp src = newTemp(ty);
30075 IRTemp dst = newTemp(ty);
30076 UChar rm = getUChar(delta);
30078 if (epartIsReg(rm)) {
30079 assign( src, getIRegE(size,pfx,rm) );
30080 DIP("blsr %s,%s\n", nameIRegE(size,pfx,rm),
30081 nameIRegV(size,pfx));
30082 delta++;
30083 } else {
30084 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30085 assign( src, loadLE(ty, mkexpr(addr)) );
30086 DIP("blsr %s,%s\n", dis_buf, nameIRegV(size,pfx));
30087 delta += alen;
30090 assign( dst, binop(mkSizedOp(ty,Iop_And8),
30091 binop(mkSizedOp(ty,Iop_Sub8), mkexpr(src),
30092 mkU(ty, 1)), mkexpr(src)) );
30093 putIRegV( size, pfx, mkexpr(dst) );
30094 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
30095 ? AMD64G_CC_OP_BLSR64
30096 : AMD64G_CC_OP_BLSR32)) );
30097 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
30098 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) );
30099 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
30100 *uses_vvvv = True;
30101 goto decode_success;
30103 break;
30105 case 0xF5:
30106 /* BZHI r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F5 /r */
30107 /* BZHI r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F5 /r */
30108 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30109 Int size = getRexW(pfx) ? 8 : 4;
30110 IRType ty = szToITy(size);
30111 IRTemp dst = newTemp(ty);
30112 IRTemp src1 = newTemp(ty);
30113 IRTemp src2 = newTemp(ty);
30114 IRTemp start = newTemp(Ity_I8);
30115 IRTemp cond = newTemp(Ity_I1);
30116 UChar rm = getUChar(delta);
30118 assign( src2, getIRegV(size,pfx) );
30119 if (epartIsReg(rm)) {
30120 assign( src1, getIRegE(size,pfx,rm) );
30121 DIP("bzhi %s,%s,%s\n", nameIRegV(size,pfx),
30122 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm));
30123 delta++;
30124 } else {
30125 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30126 assign( src1, loadLE(ty, mkexpr(addr)) );
30127 DIP("bzhi %s,%s,%s\n", nameIRegV(size,pfx), dis_buf,
30128 nameIRegG(size,pfx,rm));
30129 delta += alen;
30132 assign( start, narrowTo( Ity_I8, mkexpr(src2) ) );
30133 assign( cond, binop(Iop_CmpLT32U,
30134 unop(Iop_8Uto32, mkexpr(start)),
30135 mkU32(8*size)) );
30136 /* if (start < opsize) {
30137 if (start == 0)
30138 dst = 0;
30139 else
30140 dst = (src1 << (opsize-start)) u>> (opsize-start);
30141 } else {
30142 dst = src1;
30143 } */
30144 assign( dst,
30145 IRExpr_ITE(
30146 mkexpr(cond),
30147 IRExpr_ITE(
30148 binop(Iop_CmpEQ8, mkexpr(start), mkU8(0)),
30149 mkU(ty, 0),
30150 binop(
30151 mkSizedOp(ty,Iop_Shr8),
30152 binop(
30153 mkSizedOp(ty,Iop_Shl8),
30154 mkexpr(src1),
30155 binop(Iop_Sub8, mkU8(8*size), mkexpr(start))
30157 binop(Iop_Sub8, mkU8(8*size), mkexpr(start))
30160 mkexpr(src1)
30163 putIRegG( size, pfx, rm, mkexpr(dst) );
30164 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
30165 ? AMD64G_CC_OP_BLSR64
30166 : AMD64G_CC_OP_BLSR32)) );
30167 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
30168 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(cond))) );
30169 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
30170 *uses_vvvv = True;
30171 goto decode_success;
30173 /* PDEP r/m32, r32b, r32a = VEX.NDS.LZ.F2.0F38.W0 F5 /r */
30174 /* PDEP r/m64, r64b, r64a = VEX.NDS.LZ.F2.0F38.W1 F5 /r */
30175 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30176 Int size = getRexW(pfx) ? 8 : 4;
30177 IRType ty = szToITy(size);
30178 IRTemp src = newTemp(ty);
30179 IRTemp mask = newTemp(ty);
30180 UChar rm = getUChar(delta);
30182 assign( src, getIRegV(size,pfx) );
30183 if (epartIsReg(rm)) {
30184 assign( mask, getIRegE(size,pfx,rm) );
30185 DIP("pdep %s,%s,%s\n", nameIRegE(size,pfx,rm),
30186 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
30187 delta++;
30188 } else {
30189 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30190 assign( mask, loadLE(ty, mkexpr(addr)) );
30191 DIP("pdep %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
30192 nameIRegG(size,pfx,rm));
30193 delta += alen;
30196 IRExpr** args = mkIRExprVec_2( widenUto64(mkexpr(src)),
30197 widenUto64(mkexpr(mask)) );
30198 putIRegG( size, pfx, rm,
30199 narrowTo(ty, mkIRExprCCall(Ity_I64, 0/*regparms*/,
30200 "amd64g_calculate_pdep",
30201 &amd64g_calculate_pdep, args)) );
30202 *uses_vvvv = True;
30203 /* Flags aren't modified. */
30204 goto decode_success;
30206 /* PEXT r/m32, r32b, r32a = VEX.NDS.LZ.F3.0F38.W0 F5 /r */
30207 /* PEXT r/m64, r64b, r64a = VEX.NDS.LZ.F3.0F38.W1 F5 /r */
30208 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30209 Int size = getRexW(pfx) ? 8 : 4;
30210 IRType ty = szToITy(size);
30211 IRTemp src = newTemp(ty);
30212 IRTemp mask = newTemp(ty);
30213 UChar rm = getUChar(delta);
30215 assign( src, getIRegV(size,pfx) );
30216 if (epartIsReg(rm)) {
30217 assign( mask, getIRegE(size,pfx,rm) );
30218 DIP("pext %s,%s,%s\n", nameIRegE(size,pfx,rm),
30219 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
30220 delta++;
30221 } else {
30222 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30223 assign( mask, loadLE(ty, mkexpr(addr)) );
30224 DIP("pext %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
30225 nameIRegG(size,pfx,rm));
30226 delta += alen;
30229 /* First mask off bits not set in mask, they are ignored
30230 and it should be fine if they contain undefined values. */
30231 IRExpr* masked = binop(mkSizedOp(ty,Iop_And8),
30232 mkexpr(src), mkexpr(mask));
30233 IRExpr** args = mkIRExprVec_2( widenUto64(masked),
30234 widenUto64(mkexpr(mask)) );
30235 putIRegG( size, pfx, rm,
30236 narrowTo(ty, mkIRExprCCall(Ity_I64, 0/*regparms*/,
30237 "amd64g_calculate_pext",
30238 &amd64g_calculate_pext, args)) );
30239 *uses_vvvv = True;
30240 /* Flags aren't modified. */
30241 goto decode_success;
30243 break;
30245 case 0xF6:
30246 /* MULX r/m32, r32b, r32a = VEX.NDD.LZ.F2.0F38.W0 F6 /r */
30247 /* MULX r/m64, r64b, r64a = VEX.NDD.LZ.F2.0F38.W1 F6 /r */
30248 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30249 Int size = getRexW(pfx) ? 8 : 4;
30250 IRType ty = szToITy(size);
30251 IRTemp src1 = newTemp(ty);
30252 IRTemp src2 = newTemp(ty);
30253 IRTemp res = newTemp(size == 8 ? Ity_I128 : Ity_I64);
30254 UChar rm = getUChar(delta);
30256 assign( src1, getIRegRDX(size) );
30257 if (epartIsReg(rm)) {
30258 assign( src2, getIRegE(size,pfx,rm) );
30259 DIP("mulx %s,%s,%s\n", nameIRegE(size,pfx,rm),
30260 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
30261 delta++;
30262 } else {
30263 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30264 assign( src2, loadLE(ty, mkexpr(addr)) );
30265 DIP("mulx %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
30266 nameIRegG(size,pfx,rm));
30267 delta += alen;
30270 assign( res, binop(size == 8 ? Iop_MullU64 : Iop_MullU32,
30271 mkexpr(src1), mkexpr(src2)) );
30272 putIRegV( size, pfx,
30273 unop(size == 8 ? Iop_128to64 : Iop_64to32, mkexpr(res)) );
30274 putIRegG( size, pfx, rm,
30275 unop(size == 8 ? Iop_128HIto64 : Iop_64HIto32,
30276 mkexpr(res)) );
30277 *uses_vvvv = True;
30278 /* Flags aren't modified. */
30279 goto decode_success;
30281 break;
30283 case 0xF7:
30284 /* SARX r32b, r/m32, r32a = VEX.NDS.LZ.F3.0F38.W0 F7 /r */
30285 /* SARX r64b, r/m64, r64a = VEX.NDS.LZ.F3.0F38.W1 F7 /r */
30286 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30287 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "sarx", Iop_Sar8 );
30288 goto decode_success;
30290 /* SHLX r32b, r/m32, r32a = VEX.NDS.LZ.66.0F38.W0 F7 /r */
30291 /* SHLX r64b, r/m64, r64a = VEX.NDS.LZ.66.0F38.W1 F7 /r */
30292 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30293 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "shlx", Iop_Shl8 );
30294 goto decode_success;
30296 /* SHRX r32b, r/m32, r32a = VEX.NDS.LZ.F2.0F38.W0 F7 /r */
30297 /* SHRX r64b, r/m64, r64a = VEX.NDS.LZ.F2.0F38.W1 F7 /r */
30298 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30299 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "shrx", Iop_Shr8 );
30300 goto decode_success;
30302 /* BEXTR r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F7 /r */
30303 /* BEXTR r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F7 /r */
30304 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30305 Int size = getRexW(pfx) ? 8 : 4;
30306 IRType ty = szToITy(size);
30307 IRTemp dst = newTemp(ty);
30308 IRTemp src1 = newTemp(ty);
30309 IRTemp src2 = newTemp(ty);
30310 IRTemp stle = newTemp(Ity_I16);
30311 IRTemp start = newTemp(Ity_I8);
30312 IRTemp len = newTemp(Ity_I8);
30313 UChar rm = getUChar(delta);
30315 assign( src2, getIRegV(size,pfx) );
30316 if (epartIsReg(rm)) {
30317 assign( src1, getIRegE(size,pfx,rm) );
30318 DIP("bextr %s,%s,%s\n", nameIRegV(size,pfx),
30319 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm));
30320 delta++;
30321 } else {
30322 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30323 assign( src1, loadLE(ty, mkexpr(addr)) );
30324 DIP("bextr %s,%s,%s\n", nameIRegV(size,pfx), dis_buf,
30325 nameIRegG(size,pfx,rm));
30326 delta += alen;
30329 assign( stle, narrowTo( Ity_I16, mkexpr(src2) ) );
30330 assign( start, unop( Iop_16to8, mkexpr(stle) ) );
30331 assign( len, unop( Iop_16HIto8, mkexpr(stle) ) );
30332 /* if (start+len < opsize) {
30333 if (len != 0)
30334 dst = (src1 << (opsize-start-len)) u>> (opsize-len);
30335 else
30336 dst = 0;
30337 } else {
30338 if (start < opsize)
30339 dst = src1 u>> start;
30340 else
30341 dst = 0;
30342 } */
30343 assign( dst,
30344 IRExpr_ITE(
30345 binop(Iop_CmpLT32U,
30346 binop(Iop_Add32,
30347 unop(Iop_8Uto32, mkexpr(start)),
30348 unop(Iop_8Uto32, mkexpr(len))),
30349 mkU32(8*size)),
30350 IRExpr_ITE(
30351 binop(Iop_CmpEQ8, mkexpr(len), mkU8(0)),
30352 mkU(ty, 0),
30353 binop(mkSizedOp(ty,Iop_Shr8),
30354 binop(mkSizedOp(ty,Iop_Shl8), mkexpr(src1),
30355 binop(Iop_Sub8,
30356 binop(Iop_Sub8, mkU8(8*size),
30357 mkexpr(start)),
30358 mkexpr(len))),
30359 binop(Iop_Sub8, mkU8(8*size),
30360 mkexpr(len)))
30362 IRExpr_ITE(
30363 binop(Iop_CmpLT32U,
30364 unop(Iop_8Uto32, mkexpr(start)),
30365 mkU32(8*size)),
30366 binop(mkSizedOp(ty,Iop_Shr8), mkexpr(src1),
30367 mkexpr(start)),
30368 mkU(ty, 0)
30372 putIRegG( size, pfx, rm, mkexpr(dst) );
30373 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
30374 ? AMD64G_CC_OP_ANDN64
30375 : AMD64G_CC_OP_ANDN32)) );
30376 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
30377 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
30378 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
30379 *uses_vvvv = True;
30380 goto decode_success;
30382 break;
30384 default:
30385 break;
30389 //decode_failure:
30390 return deltaIN;
30392 decode_success:
30393 return delta;
30396 /* operand format:
30397 * [0] = dst
30398 * [n] = srcn
30400 static Long decode_vregW(Int count, Long delta, UChar modrm, Prefix pfx,
30401 const VexAbiInfo* vbi, IRTemp *v, UInt *dst, Int swap)
30403 v[0] = newTemp(Ity_V128);
30404 v[1] = newTemp(Ity_V128);
30405 v[2] = newTemp(Ity_V128);
30406 v[3] = newTemp(Ity_V128);
30407 IRTemp addr = IRTemp_INVALID;
30408 Int alen = 0;
30409 HChar dis_buf[50];
30411 *dst = gregOfRexRM(pfx, modrm);
30412 assign( v[0], getXMMReg(*dst) );
30414 if ( epartIsReg( modrm ) ) {
30415 UInt ereg = eregOfRexRM(pfx, modrm);
30416 assign(swap ? v[count-1] : v[count-2], getXMMReg(ereg) );
30417 DIS(dis_buf, "%s", nameXMMReg(ereg));
30418 } else {
30419 Bool extra_byte = (getUChar(delta - 3) & 0xF) != 9;
30420 addr = disAMode(&alen, vbi, pfx, delta, dis_buf, extra_byte);
30421 assign(swap ? v[count-1] : v[count-2], loadLE(Ity_V128, mkexpr(addr)));
30422 delta += alen - 1;
30425 UInt vvvv = getVexNvvvv(pfx);
30426 switch(count) {
30427 case 2:
30428 DIP( "%s,%s", nameXMMReg(*dst), dis_buf );
30429 break;
30430 case 3:
30431 assign( swap ? v[1] : v[2], getXMMReg(vvvv) );
30432 DIP( "%s,%s,%s", nameXMMReg(*dst), nameXMMReg(vvvv), dis_buf );
30433 break;
30434 case 4:
30436 assign( v[1], getXMMReg(vvvv) );
30437 UInt src2 = getUChar(delta + 1) >> 4;
30438 assign( swap ? v[2] : v[3], getXMMReg(src2) );
30439 DIP( "%s,%s,%s,%s", nameXMMReg(*dst), nameXMMReg(vvvv),
30440 nameXMMReg(src2), dis_buf );
30442 break;
30444 return delta + 1;
30447 static Long dis_FMA4 (Prefix pfx, Long delta, UChar opc,
30448 Bool* uses_vvvv, const VexAbiInfo* vbi )
30450 UInt dst;
30451 *uses_vvvv = True;
30453 UChar modrm = getUChar(delta);
30455 Bool zero_64F = False;
30456 Bool zero_96F = False;
30457 UInt is_F32 = ((opc & 0x01) == 0x00) ? 1 : 0;
30458 Bool neg = (opc & 0xF0) == 0x70;
30459 Bool alt = (opc & 0xF0) == 0x50;
30460 Bool sub = alt ? (opc & 0x0E) != 0x0E : (opc & 0x0C) == 0x0C;
30462 IRTemp operand[4];
30463 switch(opc & 0xF) {
30464 case 0x0A: zero_96F = (opc >> 4) != 0x05; break;
30465 case 0x0B: zero_64F = (opc >> 4) != 0x05; break;
30466 case 0x0E: zero_96F = (opc >> 4) != 0x05; break;
30467 case 0x0F: zero_64F = (opc >> 4) != 0x05; break;
30468 default: break;
30470 DIP("vfm%s", neg ? "n" : "");
30471 if(alt) DIP("%s", sub ? "add" : "sub");
30472 DIP("%s", sub ? "sub" : "add");
30473 DIP("%c ", (zero_64F || zero_96F) ? 's' : 'p');
30474 DIP("%c ", is_F32 ? 's' : 'd');
30475 delta = decode_vregW(4, delta, modrm, pfx, vbi, operand, &dst, getRexW(pfx));
30476 DIP("\n");
30477 IRExpr *src[3];
30479 void (*putXMM[2])(UInt,Int,IRExpr*) = {&putXMMRegLane64F, &putXMMRegLane32F};
30481 IROp size_op[] = {Iop_V128to64, Iop_V128HIto64, Iop_64to32, Iop_64HIto32};
30482 IROp neg_op[] = {Iop_NegF64, Iop_NegF32};
30483 int i, j;
30484 for(i = 0; i < is_F32 * 2 + 2; i++) {
30485 for(j = 0; j < 3; j++) {
30486 if(is_F32) {
30487 src[j] = unop(Iop_ReinterpI32asF32,
30488 unop(size_op[i%2+2],
30489 unop(size_op[i/2],
30490 mkexpr(operand[j + 1])
30493 } else {
30494 src[j] = unop(Iop_ReinterpI64asF64,
30495 unop(size_op[i%2],
30496 mkexpr(operand[j + 1])
30500 putXMM[is_F32](dst, i, IRExpr_Qop(is_F32 ? Iop_MAddF32 : Iop_MAddF64,
30501 get_FAKE_roundingmode(),
30502 neg ? unop(neg_op[is_F32], src[0])
30503 : src[0],
30504 src[1],
30505 sub ? unop(neg_op[is_F32], src[2])
30506 : src[2]
30508 if(alt) {
30509 sub = !sub;
30513 /* Zero out top bits of ymm/xmm register. */
30514 putYMMRegLane128( dst, 1, mkV128(0) );
30516 if(zero_64F || zero_96F) {
30517 putXMMRegLane64( dst, 1, IRExpr_Const(IRConst_U64(0)));
30520 if(zero_96F) {
30521 putXMMRegLane32( dst, 1, IRExpr_Const(IRConst_U32(0)));
30524 return delta+1;
30527 /*------------------------------------------------------------*/
30528 /*--- ---*/
30529 /*--- Top-level post-escape decoders: dis_ESC_0F3A__VEX ---*/
30530 /*--- ---*/
30531 /*------------------------------------------------------------*/
30533 static IRTemp math_VPERMILPS_128 ( IRTemp sV, UInt imm8 )
30535 vassert(imm8 < 256);
30536 IRTemp s3, s2, s1, s0;
30537 s3 = s2 = s1 = s0 = IRTemp_INVALID;
30538 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
30539 # define SEL(_nn) (((_nn)==0) ? s0 : ((_nn)==1) ? s1 \
30540 : ((_nn)==2) ? s2 : s3)
30541 IRTemp res = newTemp(Ity_V128);
30542 assign(res, mkV128from32s( SEL((imm8 >> 6) & 3),
30543 SEL((imm8 >> 4) & 3),
30544 SEL((imm8 >> 2) & 3),
30545 SEL((imm8 >> 0) & 3) ));
30546 # undef SEL
30547 return res;
30550 /* Handles 128 and 256 bit versions of VCVTPS2PH. */
30551 static Long dis_VCVTPS2PH ( const VexAbiInfo* vbi, Prefix pfx,
30552 Long delta, Bool is256bit )
30554 /* This is a width-halving store or reg-reg move, that does conversion on the
30555 transferred data. */
30556 UChar modrm = getUChar(delta);
30557 UInt rG = gregOfRexRM(pfx, modrm);
30558 IRTemp rm = newTemp(Ity_I32);
30559 IROp op = is256bit ? Iop_F32toF16x8 : Iop_F32toF16x4;
30560 IRExpr* srcG = (is256bit ? getYMMReg : getXMMReg)(rG);
30562 /* (imm & 3) contains an Intel-encoded rounding mode. Because that encoding
30563 is the same as the encoding for IRRoundingMode, we can use that value
30564 directly in the IR as a rounding mode. */
30566 if (epartIsReg(modrm)) {
30567 UInt rE = eregOfRexRM(pfx, modrm);
30568 delta += 1;
30569 UInt imm = getUChar(delta);
30570 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30571 IRExpr* res = binop(op, mkexpr(rm), srcG);
30572 if (!is256bit)
30573 res = unop(Iop_64UtoV128, res);
30574 putYMMRegLoAndZU(rE, res);
30575 DIP("vcvtps2ph $%u,%s,%s\n",
30576 imm, (is256bit ? nameYMMReg : nameXMMReg)(rG), nameXMMReg(rE));
30577 } else {
30578 Int alen = 0;
30579 HChar dis_buf[50];
30580 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30581 delta += alen;
30582 UInt imm = getUChar(delta);
30583 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30584 IRExpr* res = binop(op, mkexpr(rm), srcG);
30585 storeLE(mkexpr(addr), res);
30586 DIP("vcvtps2ph $%u,%s,%s\n",
30587 imm, (is256bit ? nameYMMReg : nameXMMReg)(rG), dis_buf);
30589 delta++;
30590 /* doesn't use vvvv */
30591 return delta;
30594 __attribute__((noinline))
30595 static
30596 Long dis_ESC_0F3A__VEX (
30597 /*MB_OUT*/DisResult* dres,
30598 /*OUT*/ Bool* uses_vvvv,
30599 const VexArchInfo* archinfo,
30600 const VexAbiInfo* vbi,
30601 Prefix pfx, Int sz, Long deltaIN
30604 IRTemp addr = IRTemp_INVALID;
30605 Int alen = 0;
30606 HChar dis_buf[50];
30607 Long delta = deltaIN;
30608 UChar opc = getUChar(delta);
30609 delta++;
30610 *uses_vvvv = False;
30612 switch (opc) {
30614 case 0x00:
30615 case 0x01:
30616 /* VPERMQ imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 00 /r ib */
30617 /* VPERMPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 01 /r ib */
30618 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
30619 && 1==getRexW(pfx)/*W1*/) {
30620 UChar modrm = getUChar(delta);
30621 UInt imm8 = 0;
30622 UInt rG = gregOfRexRM(pfx, modrm);
30623 IRTemp sV = newTemp(Ity_V256);
30624 const HChar *name = opc == 0 ? "vpermq" : "vpermpd";
30625 if (epartIsReg(modrm)) {
30626 UInt rE = eregOfRexRM(pfx, modrm);
30627 delta += 1;
30628 imm8 = getUChar(delta);
30629 DIP("%s $%u,%s,%s\n",
30630 name, imm8, nameYMMReg(rE), nameYMMReg(rG));
30631 assign(sV, getYMMReg(rE));
30632 } else {
30633 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30634 delta += alen;
30635 imm8 = getUChar(delta);
30636 DIP("%s $%u,%s,%s\n",
30637 name, imm8, dis_buf, nameYMMReg(rG));
30638 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
30640 delta++;
30641 IRTemp s[4];
30642 s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID;
30643 breakupV256to64s(sV, &s[3], &s[2], &s[1], &s[0]);
30644 IRTemp dV = newTemp(Ity_V256);
30645 assign(dV, IRExpr_Qop(Iop_64x4toV256,
30646 mkexpr(s[(imm8 >> 6) & 3]),
30647 mkexpr(s[(imm8 >> 4) & 3]),
30648 mkexpr(s[(imm8 >> 2) & 3]),
30649 mkexpr(s[(imm8 >> 0) & 3])));
30650 putYMMReg(rG, mkexpr(dV));
30651 goto decode_success;
30653 break;
30655 case 0x02:
30656 /* VPBLENDD imm8, xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 02 /r ib */
30657 if (have66noF2noF3(pfx)
30658 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
30659 UChar modrm = getUChar(delta);
30660 UInt imm8 = 0;
30661 UInt rG = gregOfRexRM(pfx, modrm);
30662 UInt rV = getVexNvvvv(pfx);
30663 IRTemp sV = newTemp(Ity_V128);
30664 IRTemp dV = newTemp(Ity_V128);
30665 UInt i;
30666 IRTemp s[4], d[4];
30667 assign(sV, getXMMReg(rV));
30668 if (epartIsReg(modrm)) {
30669 UInt rE = eregOfRexRM(pfx, modrm);
30670 delta += 1;
30671 imm8 = getUChar(delta);
30672 DIP("vpblendd $%u,%s,%s,%s\n",
30673 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
30674 assign(dV, getXMMReg(rE));
30675 } else {
30676 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30677 delta += alen;
30678 imm8 = getUChar(delta);
30679 DIP("vpblendd $%u,%s,%s,%s\n",
30680 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
30681 assign(dV, loadLE(Ity_V128, mkexpr(addr)));
30683 delta++;
30684 for (i = 0; i < 4; i++) {
30685 s[i] = IRTemp_INVALID;
30686 d[i] = IRTemp_INVALID;
30688 breakupV128to32s( sV, &s[3], &s[2], &s[1], &s[0] );
30689 breakupV128to32s( dV, &d[3], &d[2], &d[1], &d[0] );
30690 for (i = 0; i < 4; i++)
30691 putYMMRegLane32(rG, i, mkexpr((imm8 & (1<<i)) ? d[i] : s[i]));
30692 putYMMRegLane128(rG, 1, mkV128(0));
30693 *uses_vvvv = True;
30694 goto decode_success;
30696 /* VPBLENDD imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F3A.W0 02 /r ib */
30697 if (have66noF2noF3(pfx)
30698 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
30699 UChar modrm = getUChar(delta);
30700 UInt imm8 = 0;
30701 UInt rG = gregOfRexRM(pfx, modrm);
30702 UInt rV = getVexNvvvv(pfx);
30703 IRTemp sV = newTemp(Ity_V256);
30704 IRTemp dV = newTemp(Ity_V256);
30705 UInt i;
30706 IRTemp s[8], d[8];
30707 assign(sV, getYMMReg(rV));
30708 if (epartIsReg(modrm)) {
30709 UInt rE = eregOfRexRM(pfx, modrm);
30710 delta += 1;
30711 imm8 = getUChar(delta);
30712 DIP("vpblendd $%u,%s,%s,%s\n",
30713 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
30714 assign(dV, getYMMReg(rE));
30715 } else {
30716 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30717 delta += alen;
30718 imm8 = getUChar(delta);
30719 DIP("vpblendd $%u,%s,%s,%s\n",
30720 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
30721 assign(dV, loadLE(Ity_V256, mkexpr(addr)));
30723 delta++;
30724 for (i = 0; i < 8; i++) {
30725 s[i] = IRTemp_INVALID;
30726 d[i] = IRTemp_INVALID;
30728 breakupV256to32s( sV, &s[7], &s[6], &s[5], &s[4],
30729 &s[3], &s[2], &s[1], &s[0] );
30730 breakupV256to32s( dV, &d[7], &d[6], &d[5], &d[4],
30731 &d[3], &d[2], &d[1], &d[0] );
30732 for (i = 0; i < 8; i++)
30733 putYMMRegLane32(rG, i, mkexpr((imm8 & (1<<i)) ? d[i] : s[i]));
30734 *uses_vvvv = True;
30735 goto decode_success;
30737 break;
30739 case 0x04:
30740 /* VPERMILPS imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 04 /r ib */
30741 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30742 UChar modrm = getUChar(delta);
30743 UInt imm8 = 0;
30744 UInt rG = gregOfRexRM(pfx, modrm);
30745 IRTemp sV = newTemp(Ity_V256);
30746 if (epartIsReg(modrm)) {
30747 UInt rE = eregOfRexRM(pfx, modrm);
30748 delta += 1;
30749 imm8 = getUChar(delta);
30750 DIP("vpermilps $%u,%s,%s\n",
30751 imm8, nameYMMReg(rE), nameYMMReg(rG));
30752 assign(sV, getYMMReg(rE));
30753 } else {
30754 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30755 delta += alen;
30756 imm8 = getUChar(delta);
30757 DIP("vpermilps $%u,%s,%s\n",
30758 imm8, dis_buf, nameYMMReg(rG));
30759 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
30761 delta++;
30762 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
30763 breakupV256toV128s( sV, &sVhi, &sVlo );
30764 IRTemp dVhi = math_VPERMILPS_128( sVhi, imm8 );
30765 IRTemp dVlo = math_VPERMILPS_128( sVlo, imm8 );
30766 IRExpr* res = binop(Iop_V128HLtoV256, mkexpr(dVhi), mkexpr(dVlo));
30767 putYMMReg(rG, res);
30768 goto decode_success;
30770 /* VPERMILPS imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 04 /r ib */
30771 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30772 UChar modrm = getUChar(delta);
30773 UInt imm8 = 0;
30774 UInt rG = gregOfRexRM(pfx, modrm);
30775 IRTemp sV = newTemp(Ity_V128);
30776 if (epartIsReg(modrm)) {
30777 UInt rE = eregOfRexRM(pfx, modrm);
30778 delta += 1;
30779 imm8 = getUChar(delta);
30780 DIP("vpermilps $%u,%s,%s\n",
30781 imm8, nameXMMReg(rE), nameXMMReg(rG));
30782 assign(sV, getXMMReg(rE));
30783 } else {
30784 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30785 delta += alen;
30786 imm8 = getUChar(delta);
30787 DIP("vpermilps $%u,%s,%s\n",
30788 imm8, dis_buf, nameXMMReg(rG));
30789 assign(sV, loadLE(Ity_V128, mkexpr(addr)));
30791 delta++;
30792 putYMMRegLoAndZU(rG, mkexpr ( math_VPERMILPS_128 ( sV, imm8 ) ) );
30793 goto decode_success;
30795 break;
30797 case 0x05:
30798 /* VPERMILPD imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 05 /r ib */
30799 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30800 UChar modrm = getUChar(delta);
30801 UInt imm8 = 0;
30802 UInt rG = gregOfRexRM(pfx, modrm);
30803 IRTemp sV = newTemp(Ity_V128);
30804 if (epartIsReg(modrm)) {
30805 UInt rE = eregOfRexRM(pfx, modrm);
30806 delta += 1;
30807 imm8 = getUChar(delta);
30808 DIP("vpermilpd $%u,%s,%s\n",
30809 imm8, nameXMMReg(rE), nameXMMReg(rG));
30810 assign(sV, getXMMReg(rE));
30811 } else {
30812 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30813 delta += alen;
30814 imm8 = getUChar(delta);
30815 DIP("vpermilpd $%u,%s,%s\n",
30816 imm8, dis_buf, nameXMMReg(rG));
30817 assign(sV, loadLE(Ity_V128, mkexpr(addr)));
30819 delta++;
30820 IRTemp s1 = newTemp(Ity_I64);
30821 IRTemp s0 = newTemp(Ity_I64);
30822 assign(s1, unop(Iop_V128HIto64, mkexpr(sV)));
30823 assign(s0, unop(Iop_V128to64, mkexpr(sV)));
30824 IRTemp dV = newTemp(Ity_V128);
30825 assign(dV, binop(Iop_64HLtoV128,
30826 mkexpr((imm8 & (1<<1)) ? s1 : s0),
30827 mkexpr((imm8 & (1<<0)) ? s1 : s0)));
30828 putYMMRegLoAndZU(rG, mkexpr(dV));
30829 goto decode_success;
30831 /* VPERMILPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 05 /r ib */
30832 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30833 UChar modrm = getUChar(delta);
30834 UInt imm8 = 0;
30835 UInt rG = gregOfRexRM(pfx, modrm);
30836 IRTemp sV = newTemp(Ity_V256);
30837 if (epartIsReg(modrm)) {
30838 UInt rE = eregOfRexRM(pfx, modrm);
30839 delta += 1;
30840 imm8 = getUChar(delta);
30841 DIP("vpermilpd $%u,%s,%s\n",
30842 imm8, nameYMMReg(rE), nameYMMReg(rG));
30843 assign(sV, getYMMReg(rE));
30844 } else {
30845 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30846 delta += alen;
30847 imm8 = getUChar(delta);
30848 DIP("vpermilpd $%u,%s,%s\n",
30849 imm8, dis_buf, nameYMMReg(rG));
30850 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
30852 delta++;
30853 IRTemp s3, s2, s1, s0;
30854 s3 = s2 = s1 = s0 = IRTemp_INVALID;
30855 breakupV256to64s(sV, &s3, &s2, &s1, &s0);
30856 IRTemp dV = newTemp(Ity_V256);
30857 assign(dV, IRExpr_Qop(Iop_64x4toV256,
30858 mkexpr((imm8 & (1<<3)) ? s3 : s2),
30859 mkexpr((imm8 & (1<<2)) ? s3 : s2),
30860 mkexpr((imm8 & (1<<1)) ? s1 : s0),
30861 mkexpr((imm8 & (1<<0)) ? s1 : s0)));
30862 putYMMReg(rG, mkexpr(dV));
30863 goto decode_success;
30865 break;
30867 case 0x06:
30868 /* VPERM2F128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 06 /r ib */
30869 if (have66noF2noF3(pfx)
30870 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
30871 UChar modrm = getUChar(delta);
30872 UInt imm8 = 0;
30873 UInt rG = gregOfRexRM(pfx, modrm);
30874 UInt rV = getVexNvvvv(pfx);
30875 IRTemp s00 = newTemp(Ity_V128);
30876 IRTemp s01 = newTemp(Ity_V128);
30877 IRTemp s10 = newTemp(Ity_V128);
30878 IRTemp s11 = newTemp(Ity_V128);
30879 assign(s00, getYMMRegLane128(rV, 0));
30880 assign(s01, getYMMRegLane128(rV, 1));
30881 if (epartIsReg(modrm)) {
30882 UInt rE = eregOfRexRM(pfx, modrm);
30883 delta += 1;
30884 imm8 = getUChar(delta);
30885 DIP("vperm2f128 $%u,%s,%s,%s\n",
30886 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
30887 assign(s10, getYMMRegLane128(rE, 0));
30888 assign(s11, getYMMRegLane128(rE, 1));
30889 } else {
30890 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30891 delta += alen;
30892 imm8 = getUChar(delta);
30893 DIP("vperm2f128 $%u,%s,%s,%s\n",
30894 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
30895 assign(s10, loadLE(Ity_V128, binop(Iop_Add64,
30896 mkexpr(addr), mkU64(0))));
30897 assign(s11, loadLE(Ity_V128, binop(Iop_Add64,
30898 mkexpr(addr), mkU64(16))));
30900 delta++;
30901 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
30902 : ((_nn)==2) ? s10 : s11)
30903 putYMMRegLane128(rG, 0, mkexpr(SEL((imm8 >> 0) & 3)));
30904 putYMMRegLane128(rG, 1, mkexpr(SEL((imm8 >> 4) & 3)));
30905 # undef SEL
30906 if (imm8 & (1<<3)) putYMMRegLane128(rG, 0, mkV128(0));
30907 if (imm8 & (1<<7)) putYMMRegLane128(rG, 1, mkV128(0));
30908 *uses_vvvv = True;
30909 goto decode_success;
30911 break;
30913 case 0x08:
30914 /* VROUNDPS imm8, xmm2/m128, xmm1 */
30915 /* VROUNDPS = VEX.NDS.128.66.0F3A.WIG 08 ib */
30916 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30917 UChar modrm = getUChar(delta);
30918 UInt rG = gregOfRexRM(pfx, modrm);
30919 IRTemp src = newTemp(Ity_V128);
30920 IRTemp s0 = IRTemp_INVALID;
30921 IRTemp s1 = IRTemp_INVALID;
30922 IRTemp s2 = IRTemp_INVALID;
30923 IRTemp s3 = IRTemp_INVALID;
30924 IRTemp rm = newTemp(Ity_I32);
30925 Int imm = 0;
30927 modrm = getUChar(delta);
30929 if (epartIsReg(modrm)) {
30930 UInt rE = eregOfRexRM(pfx, modrm);
30931 assign( src, getXMMReg( rE ) );
30932 imm = getUChar(delta+1);
30933 if (imm & ~15) break;
30934 delta += 1+1;
30935 DIP( "vroundps $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) );
30936 } else {
30937 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30938 assign( src, loadLE(Ity_V128, mkexpr(addr) ) );
30939 imm = getUChar(delta+alen);
30940 if (imm & ~15) break;
30941 delta += alen+1;
30942 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) );
30945 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30946 that encoding is the same as the encoding for IRRoundingMode,
30947 we can use that value directly in the IR as a rounding
30948 mode. */
30949 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30951 breakupV128to32s( src, &s3, &s2, &s1, &s0 );
30952 putYMMRegLane128( rG, 1, mkV128(0) );
30953 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
30954 unop(Iop_ReinterpI32asF32, mkexpr(s)))
30955 putYMMRegLane32F( rG, 3, CVT(s3) );
30956 putYMMRegLane32F( rG, 2, CVT(s2) );
30957 putYMMRegLane32F( rG, 1, CVT(s1) );
30958 putYMMRegLane32F( rG, 0, CVT(s0) );
30959 # undef CVT
30960 goto decode_success;
30962 /* VROUNDPS imm8, ymm2/m256, ymm1 */
30963 /* VROUNDPS = VEX.NDS.256.66.0F3A.WIG 08 ib */
30964 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30965 UChar modrm = getUChar(delta);
30966 UInt rG = gregOfRexRM(pfx, modrm);
30967 IRTemp src = newTemp(Ity_V256);
30968 IRTemp s0 = IRTemp_INVALID;
30969 IRTemp s1 = IRTemp_INVALID;
30970 IRTemp s2 = IRTemp_INVALID;
30971 IRTemp s3 = IRTemp_INVALID;
30972 IRTemp s4 = IRTemp_INVALID;
30973 IRTemp s5 = IRTemp_INVALID;
30974 IRTemp s6 = IRTemp_INVALID;
30975 IRTemp s7 = IRTemp_INVALID;
30976 IRTemp rm = newTemp(Ity_I32);
30977 Int imm = 0;
30979 modrm = getUChar(delta);
30981 if (epartIsReg(modrm)) {
30982 UInt rE = eregOfRexRM(pfx, modrm);
30983 assign( src, getYMMReg( rE ) );
30984 imm = getUChar(delta+1);
30985 if (imm & ~15) break;
30986 delta += 1+1;
30987 DIP( "vroundps $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) );
30988 } else {
30989 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30990 assign( src, loadLE(Ity_V256, mkexpr(addr) ) );
30991 imm = getUChar(delta+alen);
30992 if (imm & ~15) break;
30993 delta += alen+1;
30994 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) );
30997 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30998 that encoding is the same as the encoding for IRRoundingMode,
30999 we can use that value directly in the IR as a rounding
31000 mode. */
31001 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
31003 breakupV256to32s( src, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
31004 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
31005 unop(Iop_ReinterpI32asF32, mkexpr(s)))
31006 putYMMRegLane32F( rG, 7, CVT(s7) );
31007 putYMMRegLane32F( rG, 6, CVT(s6) );
31008 putYMMRegLane32F( rG, 5, CVT(s5) );
31009 putYMMRegLane32F( rG, 4, CVT(s4) );
31010 putYMMRegLane32F( rG, 3, CVT(s3) );
31011 putYMMRegLane32F( rG, 2, CVT(s2) );
31012 putYMMRegLane32F( rG, 1, CVT(s1) );
31013 putYMMRegLane32F( rG, 0, CVT(s0) );
31014 # undef CVT
31015 goto decode_success;
31017 break;
31019 case 0x09:
31020 /* VROUNDPD imm8, xmm2/m128, xmm1 */
31021 /* VROUNDPD = VEX.NDS.128.66.0F3A.WIG 09 ib */
31022 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31023 UChar modrm = getUChar(delta);
31024 UInt rG = gregOfRexRM(pfx, modrm);
31025 IRTemp src = newTemp(Ity_V128);
31026 IRTemp s0 = IRTemp_INVALID;
31027 IRTemp s1 = IRTemp_INVALID;
31028 IRTemp rm = newTemp(Ity_I32);
31029 Int imm = 0;
31031 modrm = getUChar(delta);
31033 if (epartIsReg(modrm)) {
31034 UInt rE = eregOfRexRM(pfx, modrm);
31035 assign( src, getXMMReg( rE ) );
31036 imm = getUChar(delta+1);
31037 if (imm & ~15) break;
31038 delta += 1+1;
31039 DIP( "vroundpd $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) );
31040 } else {
31041 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31042 assign( src, loadLE(Ity_V128, mkexpr(addr) ) );
31043 imm = getUChar(delta+alen);
31044 if (imm & ~15) break;
31045 delta += alen+1;
31046 DIP( "vroundpd $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) );
31049 /* (imm & 3) contains an Intel-encoded rounding mode. Because
31050 that encoding is the same as the encoding for IRRoundingMode,
31051 we can use that value directly in the IR as a rounding
31052 mode. */
31053 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
31055 breakupV128to64s( src, &s1, &s0 );
31056 putYMMRegLane128( rG, 1, mkV128(0) );
31057 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
31058 unop(Iop_ReinterpI64asF64, mkexpr(s)))
31059 putYMMRegLane64F( rG, 1, CVT(s1) );
31060 putYMMRegLane64F( rG, 0, CVT(s0) );
31061 # undef CVT
31062 goto decode_success;
31064 /* VROUNDPD imm8, ymm2/m256, ymm1 */
31065 /* VROUNDPD = VEX.NDS.256.66.0F3A.WIG 09 ib */
31066 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31067 UChar modrm = getUChar(delta);
31068 UInt rG = gregOfRexRM(pfx, modrm);
31069 IRTemp src = newTemp(Ity_V256);
31070 IRTemp s0 = IRTemp_INVALID;
31071 IRTemp s1 = IRTemp_INVALID;
31072 IRTemp s2 = IRTemp_INVALID;
31073 IRTemp s3 = IRTemp_INVALID;
31074 IRTemp rm = newTemp(Ity_I32);
31075 Int imm = 0;
31077 modrm = getUChar(delta);
31079 if (epartIsReg(modrm)) {
31080 UInt rE = eregOfRexRM(pfx, modrm);
31081 assign( src, getYMMReg( rE ) );
31082 imm = getUChar(delta+1);
31083 if (imm & ~15) break;
31084 delta += 1+1;
31085 DIP( "vroundpd $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) );
31086 } else {
31087 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31088 assign( src, loadLE(Ity_V256, mkexpr(addr) ) );
31089 imm = getUChar(delta+alen);
31090 if (imm & ~15) break;
31091 delta += alen+1;
31092 DIP( "vroundpd $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) );
31095 /* (imm & 3) contains an Intel-encoded rounding mode. Because
31096 that encoding is the same as the encoding for IRRoundingMode,
31097 we can use that value directly in the IR as a rounding
31098 mode. */
31099 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
31101 breakupV256to64s( src, &s3, &s2, &s1, &s0 );
31102 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
31103 unop(Iop_ReinterpI64asF64, mkexpr(s)))
31104 putYMMRegLane64F( rG, 3, CVT(s3) );
31105 putYMMRegLane64F( rG, 2, CVT(s2) );
31106 putYMMRegLane64F( rG, 1, CVT(s1) );
31107 putYMMRegLane64F( rG, 0, CVT(s0) );
31108 # undef CVT
31109 goto decode_success;
31111 break;
31113 case 0x0A:
31114 case 0x0B:
31115 /* VROUNDSS imm8, xmm3/m32, xmm2, xmm1 */
31116 /* VROUNDSS = VEX.NDS.128.66.0F3A.WIG 0A ib */
31117 /* VROUNDSD imm8, xmm3/m64, xmm2, xmm1 */
31118 /* VROUNDSD = VEX.NDS.128.66.0F3A.WIG 0B ib */
31119 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31120 UChar modrm = getUChar(delta);
31121 UInt rG = gregOfRexRM(pfx, modrm);
31122 UInt rV = getVexNvvvv(pfx);
31123 Bool isD = opc == 0x0B;
31124 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
31125 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
31126 Int imm = 0;
31128 if (epartIsReg(modrm)) {
31129 UInt rE = eregOfRexRM(pfx, modrm);
31130 assign( src,
31131 isD ? getXMMRegLane64F(rE, 0) : getXMMRegLane32F(rE, 0) );
31132 imm = getUChar(delta+1);
31133 if (imm & ~15) break;
31134 delta += 1+1;
31135 DIP( "vrounds%c $%d,%s,%s,%s\n",
31136 isD ? 'd' : 's',
31137 imm, nameXMMReg( rE ), nameXMMReg( rV ), nameXMMReg( rG ) );
31138 } else {
31139 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31140 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
31141 imm = getUChar(delta+alen);
31142 if (imm & ~15) break;
31143 delta += alen+1;
31144 DIP( "vrounds%c $%d,%s,%s,%s\n",
31145 isD ? 'd' : 's',
31146 imm, dis_buf, nameXMMReg( rV ), nameXMMReg( rG ) );
31149 /* (imm & 3) contains an Intel-encoded rounding mode. Because
31150 that encoding is the same as the encoding for IRRoundingMode,
31151 we can use that value directly in the IR as a rounding
31152 mode. */
31153 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
31154 (imm & 4) ? get_sse_roundingmode()
31155 : mkU32(imm & 3),
31156 mkexpr(src)) );
31158 if (isD)
31159 putXMMRegLane64F( rG, 0, mkexpr(res) );
31160 else {
31161 putXMMRegLane32F( rG, 0, mkexpr(res) );
31162 putXMMRegLane32F( rG, 1, getXMMRegLane32F( rV, 1 ) );
31164 putXMMRegLane64F( rG, 1, getXMMRegLane64F( rV, 1 ) );
31165 putYMMRegLane128( rG, 1, mkV128(0) );
31166 *uses_vvvv = True;
31167 goto decode_success;
31169 break;
31171 case 0x0C:
31172 /* VBLENDPS imm8, ymm3/m256, ymm2, ymm1 */
31173 /* VBLENDPS = VEX.NDS.256.66.0F3A.WIG 0C /r ib */
31174 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31175 UChar modrm = getUChar(delta);
31176 UInt imm8;
31177 UInt rG = gregOfRexRM(pfx, modrm);
31178 UInt rV = getVexNvvvv(pfx);
31179 IRTemp sV = newTemp(Ity_V256);
31180 IRTemp sE = newTemp(Ity_V256);
31181 assign ( sV, getYMMReg(rV) );
31182 if (epartIsReg(modrm)) {
31183 UInt rE = eregOfRexRM(pfx, modrm);
31184 delta += 1;
31185 imm8 = getUChar(delta);
31186 DIP("vblendps $%u,%s,%s,%s\n",
31187 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31188 assign(sE, getYMMReg(rE));
31189 } else {
31190 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31191 delta += alen;
31192 imm8 = getUChar(delta);
31193 DIP("vblendps $%u,%s,%s,%s\n",
31194 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31195 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
31197 delta++;
31198 putYMMReg( rG,
31199 mkexpr( math_BLENDPS_256( sE, sV, imm8) ) );
31200 *uses_vvvv = True;
31201 goto decode_success;
31203 /* VBLENDPS imm8, xmm3/m128, xmm2, xmm1 */
31204 /* VBLENDPS = VEX.NDS.128.66.0F3A.WIG 0C /r ib */
31205 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31206 UChar modrm = getUChar(delta);
31207 UInt imm8;
31208 UInt rG = gregOfRexRM(pfx, modrm);
31209 UInt rV = getVexNvvvv(pfx);
31210 IRTemp sV = newTemp(Ity_V128);
31211 IRTemp sE = newTemp(Ity_V128);
31212 assign ( sV, getXMMReg(rV) );
31213 if (epartIsReg(modrm)) {
31214 UInt rE = eregOfRexRM(pfx, modrm);
31215 delta += 1;
31216 imm8 = getUChar(delta);
31217 DIP("vblendps $%u,%s,%s,%s\n",
31218 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
31219 assign(sE, getXMMReg(rE));
31220 } else {
31221 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31222 delta += alen;
31223 imm8 = getUChar(delta);
31224 DIP("vblendps $%u,%s,%s,%s\n",
31225 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
31226 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
31228 delta++;
31229 putYMMRegLoAndZU( rG,
31230 mkexpr( math_BLENDPS_128( sE, sV, imm8) ) );
31231 *uses_vvvv = True;
31232 goto decode_success;
31234 break;
31236 case 0x0D:
31237 /* VBLENDPD imm8, ymm3/m256, ymm2, ymm1 */
31238 /* VBLENDPD = VEX.NDS.256.66.0F3A.WIG 0D /r ib */
31239 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31240 UChar modrm = getUChar(delta);
31241 UInt imm8;
31242 UInt rG = gregOfRexRM(pfx, modrm);
31243 UInt rV = getVexNvvvv(pfx);
31244 IRTemp sV = newTemp(Ity_V256);
31245 IRTemp sE = newTemp(Ity_V256);
31246 assign ( sV, getYMMReg(rV) );
31247 if (epartIsReg(modrm)) {
31248 UInt rE = eregOfRexRM(pfx, modrm);
31249 delta += 1;
31250 imm8 = getUChar(delta);
31251 DIP("vblendpd $%u,%s,%s,%s\n",
31252 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31253 assign(sE, getYMMReg(rE));
31254 } else {
31255 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31256 delta += alen;
31257 imm8 = getUChar(delta);
31258 DIP("vblendpd $%u,%s,%s,%s\n",
31259 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31260 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
31262 delta++;
31263 putYMMReg( rG,
31264 mkexpr( math_BLENDPD_256( sE, sV, imm8) ) );
31265 *uses_vvvv = True;
31266 goto decode_success;
31268 /* VBLENDPD imm8, xmm3/m128, xmm2, xmm1 */
31269 /* VBLENDPD = VEX.NDS.128.66.0F3A.WIG 0D /r ib */
31270 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31271 UChar modrm = getUChar(delta);
31272 UInt imm8;
31273 UInt rG = gregOfRexRM(pfx, modrm);
31274 UInt rV = getVexNvvvv(pfx);
31275 IRTemp sV = newTemp(Ity_V128);
31276 IRTemp sE = newTemp(Ity_V128);
31277 assign ( sV, getXMMReg(rV) );
31278 if (epartIsReg(modrm)) {
31279 UInt rE = eregOfRexRM(pfx, modrm);
31280 delta += 1;
31281 imm8 = getUChar(delta);
31282 DIP("vblendpd $%u,%s,%s,%s\n",
31283 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
31284 assign(sE, getXMMReg(rE));
31285 } else {
31286 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31287 delta += alen;
31288 imm8 = getUChar(delta);
31289 DIP("vblendpd $%u,%s,%s,%s\n",
31290 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
31291 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
31293 delta++;
31294 putYMMRegLoAndZU( rG,
31295 mkexpr( math_BLENDPD_128( sE, sV, imm8) ) );
31296 *uses_vvvv = True;
31297 goto decode_success;
31299 break;
31301 case 0x0E:
31302 /* VPBLENDW imm8, xmm3/m128, xmm2, xmm1 */
31303 /* VPBLENDW = VEX.NDS.128.66.0F3A.WIG 0E /r ib */
31304 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31305 UChar modrm = getUChar(delta);
31306 UInt imm8;
31307 UInt rG = gregOfRexRM(pfx, modrm);
31308 UInt rV = getVexNvvvv(pfx);
31309 IRTemp sV = newTemp(Ity_V128);
31310 IRTemp sE = newTemp(Ity_V128);
31311 assign ( sV, getXMMReg(rV) );
31312 if (epartIsReg(modrm)) {
31313 UInt rE = eregOfRexRM(pfx, modrm);
31314 delta += 1;
31315 imm8 = getUChar(delta);
31316 DIP("vpblendw $%u,%s,%s,%s\n",
31317 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
31318 assign(sE, getXMMReg(rE));
31319 } else {
31320 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31321 delta += alen;
31322 imm8 = getUChar(delta);
31323 DIP("vpblendw $%u,%s,%s,%s\n",
31324 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
31325 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
31327 delta++;
31328 putYMMRegLoAndZU( rG,
31329 mkexpr( math_PBLENDW_128( sE, sV, imm8) ) );
31330 *uses_vvvv = True;
31331 goto decode_success;
31333 /* VPBLENDW imm8, ymm3/m256, ymm2, ymm1 */
31334 /* VPBLENDW = VEX.NDS.256.66.0F3A.WIG 0E /r ib */
31335 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31336 UChar modrm = getUChar(delta);
31337 UInt imm8;
31338 UInt rG = gregOfRexRM(pfx, modrm);
31339 UInt rV = getVexNvvvv(pfx);
31340 IRTemp sV = newTemp(Ity_V256);
31341 IRTemp sE = newTemp(Ity_V256);
31342 IRTemp sVhi, sVlo, sEhi, sElo;
31343 sVhi = sVlo = sEhi = sElo = IRTemp_INVALID;
31344 assign ( sV, getYMMReg(rV) );
31345 if (epartIsReg(modrm)) {
31346 UInt rE = eregOfRexRM(pfx, modrm);
31347 delta += 1;
31348 imm8 = getUChar(delta);
31349 DIP("vpblendw $%u,%s,%s,%s\n",
31350 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31351 assign(sE, getYMMReg(rE));
31352 } else {
31353 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31354 delta += alen;
31355 imm8 = getUChar(delta);
31356 DIP("vpblendw $%u,%s,%s,%s\n",
31357 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31358 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
31360 delta++;
31361 breakupV256toV128s( sV, &sVhi, &sVlo );
31362 breakupV256toV128s( sE, &sEhi, &sElo );
31363 putYMMReg( rG, binop( Iop_V128HLtoV256,
31364 mkexpr( math_PBLENDW_128( sEhi, sVhi, imm8) ),
31365 mkexpr( math_PBLENDW_128( sElo, sVlo, imm8) ) ) );
31366 *uses_vvvv = True;
31367 goto decode_success;
31369 break;
31371 case 0x0F:
31372 /* VPALIGNR imm8, xmm3/m128, xmm2, xmm1 */
31373 /* VPALIGNR = VEX.NDS.128.66.0F3A.WIG 0F /r ib */
31374 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31375 UChar modrm = getUChar(delta);
31376 UInt rG = gregOfRexRM(pfx, modrm);
31377 UInt rV = getVexNvvvv(pfx);
31378 IRTemp sV = newTemp(Ity_V128);
31379 IRTemp dV = newTemp(Ity_V128);
31380 UInt imm8;
31382 assign( dV, getXMMReg(rV) );
31384 if ( epartIsReg( modrm ) ) {
31385 UInt rE = eregOfRexRM(pfx, modrm);
31386 assign( sV, getXMMReg(rE) );
31387 imm8 = getUChar(delta+1);
31388 delta += 1+1;
31389 DIP("vpalignr $%u,%s,%s,%s\n", imm8, nameXMMReg(rE),
31390 nameXMMReg(rV), nameXMMReg(rG));
31391 } else {
31392 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31393 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
31394 imm8 = getUChar(delta+alen);
31395 delta += alen+1;
31396 DIP("vpalignr $%u,%s,%s,%s\n", imm8, dis_buf,
31397 nameXMMReg(rV), nameXMMReg(rG));
31400 IRTemp res = math_PALIGNR_XMM( sV, dV, imm8 );
31401 putYMMRegLoAndZU( rG, mkexpr(res) );
31402 *uses_vvvv = True;
31403 goto decode_success;
31405 /* VPALIGNR imm8, ymm3/m256, ymm2, ymm1 */
31406 /* VPALIGNR = VEX.NDS.256.66.0F3A.WIG 0F /r ib */
31407 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31408 UChar modrm = getUChar(delta);
31409 UInt rG = gregOfRexRM(pfx, modrm);
31410 UInt rV = getVexNvvvv(pfx);
31411 IRTemp sV = newTemp(Ity_V256);
31412 IRTemp dV = newTemp(Ity_V256);
31413 IRTemp sHi, sLo, dHi, dLo;
31414 sHi = sLo = dHi = dLo = IRTemp_INVALID;
31415 UInt imm8;
31417 assign( dV, getYMMReg(rV) );
31419 if ( epartIsReg( modrm ) ) {
31420 UInt rE = eregOfRexRM(pfx, modrm);
31421 assign( sV, getYMMReg(rE) );
31422 imm8 = getUChar(delta+1);
31423 delta += 1+1;
31424 DIP("vpalignr $%u,%s,%s,%s\n", imm8, nameYMMReg(rE),
31425 nameYMMReg(rV), nameYMMReg(rG));
31426 } else {
31427 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31428 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
31429 imm8 = getUChar(delta+alen);
31430 delta += alen+1;
31431 DIP("vpalignr $%u,%s,%s,%s\n", imm8, dis_buf,
31432 nameYMMReg(rV), nameYMMReg(rG));
31435 breakupV256toV128s( dV, &dHi, &dLo );
31436 breakupV256toV128s( sV, &sHi, &sLo );
31437 putYMMReg( rG, binop( Iop_V128HLtoV256,
31438 mkexpr( math_PALIGNR_XMM( sHi, dHi, imm8 ) ),
31439 mkexpr( math_PALIGNR_XMM( sLo, dLo, imm8 ) ) )
31441 *uses_vvvv = True;
31442 goto decode_success;
31444 break;
31446 case 0x14:
31447 /* VPEXTRB imm8, xmm2, reg/m8 = VEX.128.66.0F3A.W0 14 /r ib */
31448 if (have66noF2noF3(pfx)
31449 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31450 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ );
31451 goto decode_success;
31453 break;
31455 case 0x15:
31456 /* VPEXTRW imm8, reg/m16, xmm2 */
31457 /* VPEXTRW = VEX.128.66.0F3A.W0 15 /r ib */
31458 if (have66noF2noF3(pfx)
31459 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31460 delta = dis_PEXTRW( vbi, pfx, delta, True/*isAvx*/ );
31461 goto decode_success;
31463 break;
31465 case 0x16:
31466 /* VPEXTRD imm8, r32/m32, xmm2 */
31467 /* VPEXTRD = VEX.128.66.0F3A.W0 16 /r ib */
31468 if (have66noF2noF3(pfx)
31469 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31470 delta = dis_PEXTRD( vbi, pfx, delta, True/*isAvx*/ );
31471 goto decode_success;
31473 /* VPEXTRQ = VEX.128.66.0F3A.W1 16 /r ib */
31474 if (have66noF2noF3(pfx)
31475 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
31476 delta = dis_PEXTRQ( vbi, pfx, delta, True/*isAvx*/ );
31477 goto decode_success;
31479 break;
31481 case 0x17:
31482 /* VEXTRACTPS imm8, xmm1, r32/m32 = VEX.128.66.0F3A.WIG 17 /r ib */
31483 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31484 delta = dis_EXTRACTPS( vbi, pfx, delta, True/*isAvx*/ );
31485 goto decode_success;
31487 break;
31489 case 0x18:
31490 /* VINSERTF128 r/m, rV, rD
31491 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
31492 /* VINSERTF128 = VEX.NDS.256.66.0F3A.W0 18 /r ib */
31493 if (have66noF2noF3(pfx)
31494 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31495 UChar modrm = getUChar(delta);
31496 UInt ib = 0;
31497 UInt rG = gregOfRexRM(pfx, modrm);
31498 UInt rV = getVexNvvvv(pfx);
31499 IRTemp t128 = newTemp(Ity_V128);
31500 if (epartIsReg(modrm)) {
31501 UInt rE = eregOfRexRM(pfx, modrm);
31502 delta += 1;
31503 assign(t128, getXMMReg(rE));
31504 ib = getUChar(delta);
31505 DIP("vinsertf128 $%u,%s,%s,%s\n",
31506 ib, nameXMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31507 } else {
31508 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31509 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
31510 delta += alen;
31511 ib = getUChar(delta);
31512 DIP("vinsertf128 $%u,%s,%s,%s\n",
31513 ib, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31515 delta++;
31516 putYMMRegLane128(rG, 0, getYMMRegLane128(rV, 0));
31517 putYMMRegLane128(rG, 1, getYMMRegLane128(rV, 1));
31518 putYMMRegLane128(rG, ib & 1, mkexpr(t128));
31519 *uses_vvvv = True;
31520 goto decode_success;
31522 break;
31524 case 0x19:
31525 /* VEXTRACTF128 $lane_no, rS, r/m
31526 ::: r/m:V128 = a lane of rS:V256 (RM format) */
31527 /* VEXTRACTF128 = VEX.256.66.0F3A.W0 19 /r ib */
31528 if (have66noF2noF3(pfx)
31529 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31530 UChar modrm = getUChar(delta);
31531 UInt ib = 0;
31532 UInt rS = gregOfRexRM(pfx, modrm);
31533 IRTemp t128 = newTemp(Ity_V128);
31534 if (epartIsReg(modrm)) {
31535 UInt rD = eregOfRexRM(pfx, modrm);
31536 delta += 1;
31537 ib = getUChar(delta);
31538 assign(t128, getYMMRegLane128(rS, ib & 1));
31539 putYMMRegLoAndZU(rD, mkexpr(t128));
31540 DIP("vextractf128 $%u,%s,%s\n",
31541 ib, nameXMMReg(rS), nameYMMReg(rD));
31542 } else {
31543 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31544 delta += alen;
31545 ib = getUChar(delta);
31546 assign(t128, getYMMRegLane128(rS, ib & 1));
31547 storeLE(mkexpr(addr), mkexpr(t128));
31548 DIP("vextractf128 $%u,%s,%s\n",
31549 ib, nameYMMReg(rS), dis_buf);
31551 delta++;
31552 /* doesn't use vvvv */
31553 goto decode_success;
31555 break;
31557 case 0x1D:
31558 /* VCVTPS2PH imm8, xmm2, xmm1/m64 = VEX.128.66.0F3A.W0 1D /r ib */
31559 if (have66noF2noF3(pfx)
31560 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/
31561 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C)) {
31562 delta = dis_VCVTPS2PH( vbi, pfx, delta, /*is256bit=*/False );
31563 goto decode_success;
31565 /* VCVTPS2PH imm8, ymm2, ymm1/m128 = VEX.256.66.0F3A.W0 1D /r ib */
31566 if (have66noF2noF3(pfx)
31567 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/
31568 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C)) {
31569 delta = dis_VCVTPS2PH( vbi, pfx, delta, /*is256bit=*/True );
31570 goto decode_success;
31572 break;
31574 case 0x20:
31575 /* VPINSRB r32/m8, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 20 /r ib */
31576 if (have66noF2noF3(pfx)
31577 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31578 UChar modrm = getUChar(delta);
31579 UInt rG = gregOfRexRM(pfx, modrm);
31580 UInt rV = getVexNvvvv(pfx);
31581 Int imm8;
31582 IRTemp src_u8 = newTemp(Ity_I8);
31584 if ( epartIsReg( modrm ) ) {
31585 UInt rE = eregOfRexRM(pfx,modrm);
31586 imm8 = (Int)(getUChar(delta+1) & 15);
31587 assign( src_u8, unop(Iop_32to8, getIReg32( rE )) );
31588 delta += 1+1;
31589 DIP( "vpinsrb $%d,%s,%s,%s\n",
31590 imm8, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) );
31591 } else {
31592 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31593 imm8 = (Int)(getUChar(delta+alen) & 15);
31594 assign( src_u8, loadLE( Ity_I8, mkexpr(addr) ) );
31595 delta += alen+1;
31596 DIP( "vpinsrb $%d,%s,%s,%s\n",
31597 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31600 IRTemp src_vec = newTemp(Ity_V128);
31601 assign(src_vec, getXMMReg( rV ));
31602 IRTemp res_vec = math_PINSRB_128( src_vec, src_u8, imm8 );
31603 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31604 *uses_vvvv = True;
31605 goto decode_success;
31607 break;
31609 case 0x21:
31610 /* VINSERTPS imm8, xmm3/m32, xmm2, xmm1
31611 = VEX.NDS.128.66.0F3A.WIG 21 /r ib */
31612 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31613 UChar modrm = getUChar(delta);
31614 UInt rG = gregOfRexRM(pfx, modrm);
31615 UInt rV = getVexNvvvv(pfx);
31616 UInt imm8;
31617 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */
31618 const IRTemp inval = IRTemp_INVALID;
31620 if ( epartIsReg( modrm ) ) {
31621 UInt rE = eregOfRexRM(pfx, modrm);
31622 IRTemp vE = newTemp(Ity_V128);
31623 assign( vE, getXMMReg(rE) );
31624 IRTemp dsE[4] = { inval, inval, inval, inval };
31625 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] );
31626 imm8 = getUChar(delta+1);
31627 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */
31628 delta += 1+1;
31629 DIP( "insertps $%u, %s,%s\n",
31630 imm8, nameXMMReg(rE), nameXMMReg(rG) );
31631 } else {
31632 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31633 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) );
31634 imm8 = getUChar(delta+alen);
31635 delta += alen+1;
31636 DIP( "insertps $%u, %s,%s\n",
31637 imm8, dis_buf, nameXMMReg(rG) );
31640 IRTemp vV = newTemp(Ity_V128);
31641 assign( vV, getXMMReg(rV) );
31643 putYMMRegLoAndZU( rG, mkexpr(math_INSERTPS( vV, d2ins, imm8 )) );
31644 *uses_vvvv = True;
31645 goto decode_success;
31647 break;
31649 case 0x22:
31650 /* VPINSRD r32/m32, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 22 /r ib */
31651 if (have66noF2noF3(pfx)
31652 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31653 UChar modrm = getUChar(delta);
31654 UInt rG = gregOfRexRM(pfx, modrm);
31655 UInt rV = getVexNvvvv(pfx);
31656 Int imm8_10;
31657 IRTemp src_u32 = newTemp(Ity_I32);
31659 if ( epartIsReg( modrm ) ) {
31660 UInt rE = eregOfRexRM(pfx,modrm);
31661 imm8_10 = (Int)(getUChar(delta+1) & 3);
31662 assign( src_u32, getIReg32( rE ) );
31663 delta += 1+1;
31664 DIP( "vpinsrd $%d,%s,%s,%s\n",
31665 imm8_10, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) );
31666 } else {
31667 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31668 imm8_10 = (Int)(getUChar(delta+alen) & 3);
31669 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) );
31670 delta += alen+1;
31671 DIP( "vpinsrd $%d,%s,%s,%s\n",
31672 imm8_10, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31675 IRTemp src_vec = newTemp(Ity_V128);
31676 assign(src_vec, getXMMReg( rV ));
31677 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 );
31678 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31679 *uses_vvvv = True;
31680 goto decode_success;
31682 /* VPINSRQ r64/m64, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W1 22 /r ib */
31683 if (have66noF2noF3(pfx)
31684 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
31685 UChar modrm = getUChar(delta);
31686 UInt rG = gregOfRexRM(pfx, modrm);
31687 UInt rV = getVexNvvvv(pfx);
31688 Int imm8_0;
31689 IRTemp src_u64 = newTemp(Ity_I64);
31691 if ( epartIsReg( modrm ) ) {
31692 UInt rE = eregOfRexRM(pfx,modrm);
31693 imm8_0 = (Int)(getUChar(delta+1) & 1);
31694 assign( src_u64, getIReg64( rE ) );
31695 delta += 1+1;
31696 DIP( "vpinsrq $%d,%s,%s,%s\n",
31697 imm8_0, nameIReg64(rE), nameXMMReg(rV), nameXMMReg(rG) );
31698 } else {
31699 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31700 imm8_0 = (Int)(getUChar(delta+alen) & 1);
31701 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) );
31702 delta += alen+1;
31703 DIP( "vpinsrq $%d,%s,%s,%s\n",
31704 imm8_0, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31707 IRTemp src_vec = newTemp(Ity_V128);
31708 assign(src_vec, getXMMReg( rV ));
31709 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 );
31710 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31711 *uses_vvvv = True;
31712 goto decode_success;
31714 break;
31716 case 0x38:
31717 /* VINSERTI128 r/m, rV, rD
31718 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
31719 /* VINSERTI128 = VEX.NDS.256.66.0F3A.W0 38 /r ib */
31720 if (have66noF2noF3(pfx)
31721 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31722 UChar modrm = getUChar(delta);
31723 UInt ib = 0;
31724 UInt rG = gregOfRexRM(pfx, modrm);
31725 UInt rV = getVexNvvvv(pfx);
31726 IRTemp t128 = newTemp(Ity_V128);
31727 if (epartIsReg(modrm)) {
31728 UInt rE = eregOfRexRM(pfx, modrm);
31729 delta += 1;
31730 assign(t128, getXMMReg(rE));
31731 ib = getUChar(delta);
31732 DIP("vinserti128 $%u,%s,%s,%s\n",
31733 ib, nameXMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31734 } else {
31735 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31736 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
31737 delta += alen;
31738 ib = getUChar(delta);
31739 DIP("vinserti128 $%u,%s,%s,%s\n",
31740 ib, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31742 delta++;
31743 putYMMRegLane128(rG, 0, getYMMRegLane128(rV, 0));
31744 putYMMRegLane128(rG, 1, getYMMRegLane128(rV, 1));
31745 putYMMRegLane128(rG, ib & 1, mkexpr(t128));
31746 *uses_vvvv = True;
31747 goto decode_success;
31749 break;
31751 case 0x39:
31752 /* VEXTRACTI128 $lane_no, rS, r/m
31753 ::: r/m:V128 = a lane of rS:V256 (RM format) */
31754 /* VEXTRACTI128 = VEX.256.66.0F3A.W0 39 /r ib */
31755 if (have66noF2noF3(pfx)
31756 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31757 UChar modrm = getUChar(delta);
31758 UInt ib = 0;
31759 UInt rS = gregOfRexRM(pfx, modrm);
31760 IRTemp t128 = newTemp(Ity_V128);
31761 if (epartIsReg(modrm)) {
31762 UInt rD = eregOfRexRM(pfx, modrm);
31763 delta += 1;
31764 ib = getUChar(delta);
31765 assign(t128, getYMMRegLane128(rS, ib & 1));
31766 putYMMRegLoAndZU(rD, mkexpr(t128));
31767 DIP("vextracti128 $%u,%s,%s\n",
31768 ib, nameXMMReg(rS), nameYMMReg(rD));
31769 } else {
31770 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31771 delta += alen;
31772 ib = getUChar(delta);
31773 assign(t128, getYMMRegLane128(rS, ib & 1));
31774 storeLE(mkexpr(addr), mkexpr(t128));
31775 DIP("vextracti128 $%u,%s,%s\n",
31776 ib, nameYMMReg(rS), dis_buf);
31778 delta++;
31779 /* doesn't use vvvv */
31780 goto decode_success;
31782 break;
31784 case 0x40:
31785 /* VDPPS imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 40 /r ib */
31786 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31787 UChar modrm = getUChar(delta);
31788 UInt rG = gregOfRexRM(pfx, modrm);
31789 UInt rV = getVexNvvvv(pfx);
31790 IRTemp dst_vec = newTemp(Ity_V128);
31791 Int imm8;
31792 if (epartIsReg( modrm )) {
31793 UInt rE = eregOfRexRM(pfx,modrm);
31794 imm8 = (Int)getUChar(delta+1);
31795 assign( dst_vec, getXMMReg( rE ) );
31796 delta += 1+1;
31797 DIP( "vdpps $%d,%s,%s,%s\n",
31798 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
31799 } else {
31800 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31801 imm8 = (Int)getUChar(delta+alen);
31802 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) );
31803 delta += alen+1;
31804 DIP( "vdpps $%d,%s,%s,%s\n",
31805 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31808 IRTemp src_vec = newTemp(Ity_V128);
31809 assign(src_vec, getXMMReg( rV ));
31810 IRTemp res_vec = math_DPPS_128( src_vec, dst_vec, imm8 );
31811 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31812 *uses_vvvv = True;
31813 goto decode_success;
31815 /* VDPPS imm8, ymm3/m128,ymm2,ymm1 = VEX.NDS.256.66.0F3A.WIG 40 /r ib */
31816 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31817 UChar modrm = getUChar(delta);
31818 UInt rG = gregOfRexRM(pfx, modrm);
31819 UInt rV = getVexNvvvv(pfx);
31820 IRTemp dst_vec = newTemp(Ity_V256);
31821 Int imm8;
31822 if (epartIsReg( modrm )) {
31823 UInt rE = eregOfRexRM(pfx,modrm);
31824 imm8 = (Int)getUChar(delta+1);
31825 assign( dst_vec, getYMMReg( rE ) );
31826 delta += 1+1;
31827 DIP( "vdpps $%d,%s,%s,%s\n",
31828 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG) );
31829 } else {
31830 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31831 imm8 = (Int)getUChar(delta+alen);
31832 assign( dst_vec, loadLE( Ity_V256, mkexpr(addr) ) );
31833 delta += alen+1;
31834 DIP( "vdpps $%d,%s,%s,%s\n",
31835 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
31838 IRTemp src_vec = newTemp(Ity_V256);
31839 assign(src_vec, getYMMReg( rV ));
31840 IRTemp s0, s1, d0, d1;
31841 s0 = s1 = d0 = d1 = IRTemp_INVALID;
31842 breakupV256toV128s( dst_vec, &d1, &d0 );
31843 breakupV256toV128s( src_vec, &s1, &s0 );
31844 putYMMReg( rG, binop( Iop_V128HLtoV256,
31845 mkexpr( math_DPPS_128(s1, d1, imm8) ),
31846 mkexpr( math_DPPS_128(s0, d0, imm8) ) ) );
31847 *uses_vvvv = True;
31848 goto decode_success;
31850 break;
31852 case 0x41:
31853 /* VDPPD imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 41 /r ib */
31854 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31855 UChar modrm = getUChar(delta);
31856 UInt rG = gregOfRexRM(pfx, modrm);
31857 UInt rV = getVexNvvvv(pfx);
31858 IRTemp dst_vec = newTemp(Ity_V128);
31859 Int imm8;
31860 if (epartIsReg( modrm )) {
31861 UInt rE = eregOfRexRM(pfx,modrm);
31862 imm8 = (Int)getUChar(delta+1);
31863 assign( dst_vec, getXMMReg( rE ) );
31864 delta += 1+1;
31865 DIP( "vdppd $%d,%s,%s,%s\n",
31866 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
31867 } else {
31868 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31869 imm8 = (Int)getUChar(delta+alen);
31870 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) );
31871 delta += alen+1;
31872 DIP( "vdppd $%d,%s,%s,%s\n",
31873 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31876 IRTemp src_vec = newTemp(Ity_V128);
31877 assign(src_vec, getXMMReg( rV ));
31878 IRTemp res_vec = math_DPPD_128( src_vec, dst_vec, imm8 );
31879 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31880 *uses_vvvv = True;
31881 goto decode_success;
31883 break;
31885 case 0x42:
31886 /* VMPSADBW imm8, xmm3/m128,xmm2,xmm1 */
31887 /* VMPSADBW = VEX.NDS.128.66.0F3A.WIG 42 /r ib */
31888 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31889 UChar modrm = getUChar(delta);
31890 Int imm8;
31891 IRTemp src_vec = newTemp(Ity_V128);
31892 IRTemp dst_vec = newTemp(Ity_V128);
31893 UInt rG = gregOfRexRM(pfx, modrm);
31894 UInt rV = getVexNvvvv(pfx);
31896 assign( dst_vec, getXMMReg(rV) );
31898 if ( epartIsReg( modrm ) ) {
31899 UInt rE = eregOfRexRM(pfx, modrm);
31901 imm8 = (Int)getUChar(delta+1);
31902 assign( src_vec, getXMMReg(rE) );
31903 delta += 1+1;
31904 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
31905 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
31906 } else {
31907 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
31908 1/* imm8 is 1 byte after the amode */ );
31909 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
31910 imm8 = (Int)getUChar(delta+alen);
31911 delta += alen+1;
31912 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
31913 dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31916 putYMMRegLoAndZU( rG, mkexpr( math_MPSADBW_128(dst_vec,
31917 src_vec, imm8) ) );
31918 *uses_vvvv = True;
31919 goto decode_success;
31921 /* VMPSADBW imm8, ymm3/m256,ymm2,ymm1 */
31922 /* VMPSADBW = VEX.NDS.256.66.0F3A.WIG 42 /r ib */
31923 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31924 UChar modrm = getUChar(delta);
31925 Int imm8;
31926 IRTemp src_vec = newTemp(Ity_V256);
31927 IRTemp dst_vec = newTemp(Ity_V256);
31928 UInt rG = gregOfRexRM(pfx, modrm);
31929 UInt rV = getVexNvvvv(pfx);
31930 IRTemp sHi, sLo, dHi, dLo;
31931 sHi = sLo = dHi = dLo = IRTemp_INVALID;
31933 assign( dst_vec, getYMMReg(rV) );
31935 if ( epartIsReg( modrm ) ) {
31936 UInt rE = eregOfRexRM(pfx, modrm);
31938 imm8 = (Int)getUChar(delta+1);
31939 assign( src_vec, getYMMReg(rE) );
31940 delta += 1+1;
31941 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
31942 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG) );
31943 } else {
31944 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
31945 1/* imm8 is 1 byte after the amode */ );
31946 assign( src_vec, loadLE( Ity_V256, mkexpr(addr) ) );
31947 imm8 = (Int)getUChar(delta+alen);
31948 delta += alen+1;
31949 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
31950 dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
31953 breakupV256toV128s( dst_vec, &dHi, &dLo );
31954 breakupV256toV128s( src_vec, &sHi, &sLo );
31955 putYMMReg( rG, binop( Iop_V128HLtoV256,
31956 mkexpr( math_MPSADBW_128(dHi, sHi, imm8 >> 3) ),
31957 mkexpr( math_MPSADBW_128(dLo, sLo, imm8) ) ) );
31958 *uses_vvvv = True;
31959 goto decode_success;
31961 break;
31963 case 0x44:
31964 /* VPCLMULQDQ imm8, xmm3/m128,xmm2,xmm1 */
31965 /* VPCLMULQDQ = VEX.NDS.128.66.0F3A.WIG 44 /r ib */
31966 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
31967 * Carry-less multiplication of selected XMM quadwords into XMM
31968 * registers (a.k.a multiplication of polynomials over GF(2))
31970 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31971 UChar modrm = getUChar(delta);
31972 Int imm8;
31973 IRTemp sV = newTemp(Ity_V128);
31974 IRTemp dV = newTemp(Ity_V128);
31975 UInt rG = gregOfRexRM(pfx, modrm);
31976 UInt rV = getVexNvvvv(pfx);
31978 assign( dV, getXMMReg(rV) );
31980 if ( epartIsReg( modrm ) ) {
31981 UInt rE = eregOfRexRM(pfx, modrm);
31982 imm8 = (Int)getUChar(delta+1);
31983 assign( sV, getXMMReg(rE) );
31984 delta += 1+1;
31985 DIP( "vpclmulqdq $%d, %s,%s,%s\n", imm8,
31986 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
31987 } else {
31988 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
31989 1/* imm8 is 1 byte after the amode */ );
31990 assign( sV, loadLE( Ity_V128, mkexpr(addr) ) );
31991 imm8 = (Int)getUChar(delta+alen);
31992 delta += alen+1;
31993 DIP( "vpclmulqdq $%d, %s,%s,%s\n",
31994 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31997 putYMMRegLoAndZU( rG, mkexpr( math_PCLMULQDQ(dV, sV, imm8) ) );
31998 *uses_vvvv = True;
31999 goto decode_success;
32001 break;
32003 case 0x46:
32004 /* VPERM2I128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 46 /r ib */
32005 if (have66noF2noF3(pfx)
32006 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
32007 UChar modrm = getUChar(delta);
32008 UInt imm8 = 0;
32009 UInt rG = gregOfRexRM(pfx, modrm);
32010 UInt rV = getVexNvvvv(pfx);
32011 IRTemp s00 = newTemp(Ity_V128);
32012 IRTemp s01 = newTemp(Ity_V128);
32013 IRTemp s10 = newTemp(Ity_V128);
32014 IRTemp s11 = newTemp(Ity_V128);
32015 assign(s00, getYMMRegLane128(rV, 0));
32016 assign(s01, getYMMRegLane128(rV, 1));
32017 if (epartIsReg(modrm)) {
32018 UInt rE = eregOfRexRM(pfx, modrm);
32019 delta += 1;
32020 imm8 = getUChar(delta);
32021 DIP("vperm2i128 $%u,%s,%s,%s\n",
32022 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
32023 assign(s10, getYMMRegLane128(rE, 0));
32024 assign(s11, getYMMRegLane128(rE, 1));
32025 } else {
32026 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
32027 delta += alen;
32028 imm8 = getUChar(delta);
32029 DIP("vperm2i128 $%u,%s,%s,%s\n",
32030 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
32031 assign(s10, loadLE(Ity_V128, binop(Iop_Add64,
32032 mkexpr(addr), mkU64(0))));
32033 assign(s11, loadLE(Ity_V128, binop(Iop_Add64,
32034 mkexpr(addr), mkU64(16))));
32036 delta++;
32037 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
32038 : ((_nn)==2) ? s10 : s11)
32039 putYMMRegLane128(rG, 0, mkexpr(SEL((imm8 >> 0) & 3)));
32040 putYMMRegLane128(rG, 1, mkexpr(SEL((imm8 >> 4) & 3)));
32041 # undef SEL
32042 if (imm8 & (1<<3)) putYMMRegLane128(rG, 0, mkV128(0));
32043 if (imm8 & (1<<7)) putYMMRegLane128(rG, 1, mkV128(0));
32044 *uses_vvvv = True;
32045 goto decode_success;
32047 break;
32049 case 0x4A:
32050 /* VBLENDVPS xmmG, xmmE/memE, xmmV, xmmIS4
32051 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
32052 /* VBLENDVPS = VEX.NDS.128.66.0F3A.WIG 4A /r /is4 */
32053 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
32054 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
32055 "vblendvps", 4, Iop_SarN32x4 );
32056 *uses_vvvv = True;
32057 goto decode_success;
32059 /* VBLENDVPS ymmG, ymmE/memE, ymmV, ymmIS4
32060 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
32061 /* VBLENDVPS = VEX.NDS.256.66.0F3A.WIG 4A /r /is4 */
32062 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
32063 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
32064 "vblendvps", 4, Iop_SarN32x4 );
32065 *uses_vvvv = True;
32066 goto decode_success;
32068 break;
32070 case 0x4B:
32071 /* VBLENDVPD xmmG, xmmE/memE, xmmV, xmmIS4
32072 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
32073 /* VBLENDVPD = VEX.NDS.128.66.0F3A.WIG 4B /r /is4 */
32074 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
32075 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
32076 "vblendvpd", 8, Iop_SarN64x2 );
32077 *uses_vvvv = True;
32078 goto decode_success;
32080 /* VBLENDVPD ymmG, ymmE/memE, ymmV, ymmIS4
32081 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
32082 /* VBLENDVPD = VEX.NDS.256.66.0F3A.WIG 4B /r /is4 */
32083 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
32084 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
32085 "vblendvpd", 8, Iop_SarN64x2 );
32086 *uses_vvvv = True;
32087 goto decode_success;
32089 break;
32091 case 0x4C:
32092 /* VPBLENDVB xmmG, xmmE/memE, xmmV, xmmIS4
32093 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
32094 /* VPBLENDVB = VEX.NDS.128.66.0F3A.WIG 4C /r /is4 */
32095 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
32096 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
32097 "vpblendvb", 1, Iop_SarN8x16 );
32098 *uses_vvvv = True;
32099 goto decode_success;
32101 /* VPBLENDVB ymmG, ymmE/memE, ymmV, ymmIS4
32102 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
32103 /* VPBLENDVB = VEX.NDS.256.66.0F3A.WIG 4C /r /is4 */
32104 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
32105 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
32106 "vpblendvb", 1, Iop_SarN8x16 );
32107 *uses_vvvv = True;
32108 goto decode_success;
32110 break;
32112 case 0x60:
32113 case 0x61:
32114 case 0x62:
32115 case 0x63:
32116 /* VEX.128.66.0F3A.WIG 63 /r ib = VPCMPISTRI imm8, xmm2/m128, xmm1
32117 VEX.128.66.0F3A.WIG 62 /r ib = VPCMPISTRM imm8, xmm2/m128, xmm1
32118 VEX.128.66.0F3A.WIG 61 /r ib = VPCMPESTRI imm8, xmm2/m128, xmm1
32119 VEX.128.66.0F3A.WIG 60 /r ib = VPCMPESTRM imm8, xmm2/m128, xmm1
32120 (selected special cases that actually occur in glibc,
32121 not by any means a complete implementation.)
32123 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
32124 Long delta0 = delta;
32125 delta = dis_PCMPxSTRx( vbi, pfx, delta, True/*isAvx*/, opc );
32126 if (delta > delta0) goto decode_success;
32127 /* else fall though; dis_PCMPxSTRx failed to decode it */
32129 break;
32131 case 0x5C ... 0x5F:
32132 case 0x68 ... 0x6F:
32133 case 0x78 ... 0x7F:
32134 /* FIXME: list the instructions decoded here */
32135 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
32136 Long delta0 = delta;
32137 delta = dis_FMA4( pfx, delta, opc, uses_vvvv, vbi );
32138 if (delta > delta0) {
32139 dres->hint = Dis_HintVerbose;
32140 goto decode_success;
32142 /* else fall though; dis_FMA4 failed to decode it */
32144 break;
32146 case 0xDF:
32147 /* VAESKEYGENASSIST imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG DF /r */
32148 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
32149 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, True/*!isAvx*/ );
32150 goto decode_success;
32152 break;
32154 case 0xF0:
32155 /* RORX imm8, r/m32, r32a = VEX.LZ.F2.0F3A.W0 F0 /r /i */
32156 /* RORX imm8, r/m64, r64a = VEX.LZ.F2.0F3A.W1 F0 /r /i */
32157 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
32158 Int size = getRexW(pfx) ? 8 : 4;
32159 IRType ty = szToITy(size);
32160 IRTemp src = newTemp(ty);
32161 UChar rm = getUChar(delta);
32162 UChar imm8;
32164 if (epartIsReg(rm)) {
32165 imm8 = getUChar(delta+1);
32166 assign( src, getIRegE(size,pfx,rm) );
32167 DIP("rorx %d,%s,%s\n", imm8, nameIRegE(size,pfx,rm),
32168 nameIRegG(size,pfx,rm));
32169 delta += 2;
32170 } else {
32171 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
32172 imm8 = getUChar(delta+alen);
32173 assign( src, loadLE(ty, mkexpr(addr)) );
32174 DIP("rorx %d,%s,%s\n", imm8, dis_buf, nameIRegG(size,pfx,rm));
32175 delta += alen + 1;
32177 imm8 &= 8*size-1;
32179 /* dst = (src >>u imm8) | (src << (size-imm8)) */
32180 putIRegG( size, pfx, rm,
32181 imm8 == 0 ? mkexpr(src)
32182 : binop( mkSizedOp(ty,Iop_Or8),
32183 binop( mkSizedOp(ty,Iop_Shr8), mkexpr(src),
32184 mkU8(imm8) ),
32185 binop( mkSizedOp(ty,Iop_Shl8), mkexpr(src),
32186 mkU8(8*size-imm8) ) ) );
32187 /* Flags aren't modified. */
32188 goto decode_success;
32190 break;
32192 default:
32193 break;
32197 //decode_failure:
32198 return deltaIN;
32200 decode_success:
32201 return delta;
32205 /*------------------------------------------------------------*/
32206 /*--- ---*/
32207 /*--- Disassemble a single instruction ---*/
32208 /*--- ---*/
32209 /*------------------------------------------------------------*/
32211 /* Disassemble a single instruction into IR. The instruction is
32212 located in host memory at &guest_code[delta]. */
32214 static
32215 DisResult disInstr_AMD64_WRK (
32216 /*OUT*/Bool* expect_CAS,
32217 Long delta64,
32218 const VexArchInfo* archinfo,
32219 const VexAbiInfo* vbi,
32220 Bool sigill_diag
32223 IRTemp t1, t2;
32224 UChar pre;
32225 Int n, n_prefixes;
32226 DisResult dres;
32228 /* The running delta */
32229 Long delta = delta64;
32231 /* Holds eip at the start of the insn, so that we can print
32232 consistent error messages for unimplemented insns. */
32233 Long delta_start = delta;
32235 /* sz denotes the nominal data-op size of the insn; we change it to
32236 2 if an 0x66 prefix is seen and 8 if REX.W is 1. In case of
32237 conflict REX.W takes precedence. */
32238 Int sz = 4;
32240 /* pfx holds the summary of prefixes. */
32241 Prefix pfx = PFX_EMPTY;
32243 /* Holds the computed opcode-escape indication. */
32244 Escape esc = ESC_NONE;
32246 /* Set result defaults. */
32247 dres.whatNext = Dis_Continue;
32248 dres.len = 0;
32249 dres.jk_StopHere = Ijk_INVALID;
32250 dres.hint = Dis_HintNone;
32251 *expect_CAS = False;
32253 vassert(guest_RIP_next_assumed == 0);
32254 vassert(guest_RIP_next_mustcheck == False);
32256 t1 = t2 = IRTemp_INVALID;
32258 DIP("\t0x%llx: ", guest_RIP_bbstart+delta);
32260 /* Spot "Special" instructions (see comment at top of file). */
32262 const UChar* code = guest_code + delta;
32263 /* Spot the 16-byte preamble:
32264 48C1C703 rolq $3, %rdi
32265 48C1C70D rolq $13, %rdi
32266 48C1C73D rolq $61, %rdi
32267 48C1C733 rolq $51, %rdi
32269 if (code[ 0] == 0x48 && code[ 1] == 0xC1 && code[ 2] == 0xC7
32270 && code[ 3] == 0x03 &&
32271 code[ 4] == 0x48 && code[ 5] == 0xC1 && code[ 6] == 0xC7
32272 && code[ 7] == 0x0D &&
32273 code[ 8] == 0x48 && code[ 9] == 0xC1 && code[10] == 0xC7
32274 && code[11] == 0x3D &&
32275 code[12] == 0x48 && code[13] == 0xC1 && code[14] == 0xC7
32276 && code[15] == 0x33) {
32277 /* Got a "Special" instruction preamble. Which one is it? */
32278 if (code[16] == 0x48 && code[17] == 0x87
32279 && code[18] == 0xDB /* xchgq %rbx,%rbx */) {
32280 /* %RDX = client_request ( %RAX ) */
32281 DIP("%%rdx = client_request ( %%rax )\n");
32282 delta += 19;
32283 jmp_lit(&dres, Ijk_ClientReq, guest_RIP_bbstart+delta);
32284 vassert(dres.whatNext == Dis_StopHere);
32285 goto decode_success;
32287 else
32288 if (code[16] == 0x48 && code[17] == 0x87
32289 && code[18] == 0xC9 /* xchgq %rcx,%rcx */) {
32290 /* %RAX = guest_NRADDR */
32291 DIP("%%rax = guest_NRADDR\n");
32292 delta += 19;
32293 putIRegRAX(8, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
32294 goto decode_success;
32296 else
32297 if (code[16] == 0x48 && code[17] == 0x87
32298 && code[18] == 0xD2 /* xchgq %rdx,%rdx */) {
32299 /* call-noredir *%RAX */
32300 DIP("call-noredir *%%rax\n");
32301 delta += 19;
32302 t1 = newTemp(Ity_I64);
32303 assign(t1, getIRegRAX(8));
32304 t2 = newTemp(Ity_I64);
32305 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
32306 putIReg64(R_RSP, mkexpr(t2));
32307 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta));
32308 jmp_treg(&dres, Ijk_NoRedir, t1);
32309 vassert(dres.whatNext == Dis_StopHere);
32310 goto decode_success;
32312 else
32313 if (code[16] == 0x48 && code[17] == 0x87
32314 && code[18] == 0xff /* xchgq %rdi,%rdi */) {
32315 /* IR injection */
32316 DIP("IR injection\n");
32317 vex_inject_ir(irsb, Iend_LE);
32319 // Invalidate the current insn. The reason is that the IRop we're
32320 // injecting here can change. In which case the translation has to
32321 // be redone. For ease of handling, we simply invalidate all the
32322 // time.
32323 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_RIP_curr_instr)));
32324 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(19)));
32326 delta += 19;
32328 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) );
32329 dres.whatNext = Dis_StopHere;
32330 dres.jk_StopHere = Ijk_InvalICache;
32331 goto decode_success;
32333 /* We don't know what it is. */
32334 goto decode_failure;
32335 /*NOTREACHED*/
32339 /* Eat prefixes, summarising the result in pfx and sz, and rejecting
32340 as many invalid combinations as possible. */
32341 n_prefixes = 0;
32342 while (True) {
32343 if (n_prefixes > 7) goto decode_failure;
32344 pre = getUChar(delta);
32345 switch (pre) {
32346 case 0x66: pfx |= PFX_66; break;
32347 case 0x67: pfx |= PFX_ASO; break;
32348 case 0xF2: pfx |= PFX_F2; break;
32349 case 0xF3: pfx |= PFX_F3; break;
32350 case 0xF0: pfx |= PFX_LOCK; *expect_CAS = True; break;
32351 case 0x2E: pfx |= PFX_CS; break;
32352 case 0x3E: pfx |= PFX_DS; break;
32353 case 0x26: pfx |= PFX_ES; break;
32354 case 0x64: pfx |= PFX_FS; break;
32355 case 0x65: pfx |= PFX_GS; break;
32356 case 0x36: pfx |= PFX_SS; break;
32357 case 0x40 ... 0x4F:
32358 pfx |= PFX_REX;
32359 if (pre & (1<<3)) pfx |= PFX_REXW;
32360 if (pre & (1<<2)) pfx |= PFX_REXR;
32361 if (pre & (1<<1)) pfx |= PFX_REXX;
32362 if (pre & (1<<0)) pfx |= PFX_REXB;
32363 break;
32364 default:
32365 goto not_a_legacy_prefix;
32367 n_prefixes++;
32368 delta++;
32371 not_a_legacy_prefix:
32372 /* We've used up all the non-VEX prefixes. Parse and validate a
32373 VEX prefix if that's appropriate. */
32374 if (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX) {
32375 /* Used temporarily for holding VEX prefixes. */
32376 UChar vex0 = getUChar(delta);
32377 if (vex0 == 0xC4) {
32378 /* 3-byte VEX */
32379 UChar vex1 = getUChar(delta+1);
32380 UChar vex2 = getUChar(delta+2);
32381 delta += 3;
32382 pfx |= PFX_VEX;
32383 /* Snarf contents of byte 1 */
32384 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR;
32385 /* X */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_REXX;
32386 /* B */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_REXB;
32387 /* m-mmmm */
32388 switch (vex1 & 0x1F) {
32389 case 1: esc = ESC_0F; break;
32390 case 2: esc = ESC_0F38; break;
32391 case 3: esc = ESC_0F3A; break;
32392 /* Any other m-mmmm field will #UD */
32393 default: goto decode_failure;
32395 /* Snarf contents of byte 2 */
32396 /* W */ pfx |= (vex2 & (1<<7)) ? PFX_REXW : 0;
32397 /* ~v3 */ pfx |= (vex2 & (1<<6)) ? 0 : PFX_VEXnV3;
32398 /* ~v2 */ pfx |= (vex2 & (1<<5)) ? 0 : PFX_VEXnV2;
32399 /* ~v1 */ pfx |= (vex2 & (1<<4)) ? 0 : PFX_VEXnV1;
32400 /* ~v0 */ pfx |= (vex2 & (1<<3)) ? 0 : PFX_VEXnV0;
32401 /* L */ pfx |= (vex2 & (1<<2)) ? PFX_VEXL : 0;
32402 /* pp */
32403 switch (vex2 & 3) {
32404 case 0: break;
32405 case 1: pfx |= PFX_66; break;
32406 case 2: pfx |= PFX_F3; break;
32407 case 3: pfx |= PFX_F2; break;
32408 default: vassert(0);
32411 else if (vex0 == 0xC5) {
32412 /* 2-byte VEX */
32413 UChar vex1 = getUChar(delta+1);
32414 delta += 2;
32415 pfx |= PFX_VEX;
32416 /* Snarf contents of byte 1 */
32417 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR;
32418 /* ~v3 */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_VEXnV3;
32419 /* ~v2 */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_VEXnV2;
32420 /* ~v1 */ pfx |= (vex1 & (1<<4)) ? 0 : PFX_VEXnV1;
32421 /* ~v0 */ pfx |= (vex1 & (1<<3)) ? 0 : PFX_VEXnV0;
32422 /* L */ pfx |= (vex1 & (1<<2)) ? PFX_VEXL : 0;
32423 /* pp */
32424 switch (vex1 & 3) {
32425 case 0: break;
32426 case 1: pfx |= PFX_66; break;
32427 case 2: pfx |= PFX_F3; break;
32428 case 3: pfx |= PFX_F2; break;
32429 default: vassert(0);
32431 /* implied: */
32432 esc = ESC_0F;
32434 /* Can't have both VEX and REX */
32435 if ((pfx & PFX_VEX) && (pfx & PFX_REX))
32436 goto decode_failure; /* can't have both */
32439 /* Dump invalid combinations */
32440 n = 0;
32441 if (pfx & PFX_F2) n++;
32442 if (pfx & PFX_F3) n++;
32443 if (n > 1)
32444 goto decode_failure; /* can't have both */
32446 n = 0;
32447 if (pfx & PFX_CS) n++;
32448 if (pfx & PFX_DS) n++;
32449 if (pfx & PFX_ES) n++;
32450 if (pfx & PFX_FS) n++;
32451 if (pfx & PFX_GS) n++;
32452 if (pfx & PFX_SS) n++;
32453 if (n > 1)
32454 goto decode_failure; /* multiple seg overrides == illegal */
32456 /* We have a %fs prefix. Reject it if there's no evidence in 'vbi'
32457 that we should accept it. */
32458 if ((pfx & PFX_FS) && !vbi->guest_amd64_assume_fs_is_const)
32459 goto decode_failure;
32461 /* Ditto for %gs prefixes. */
32462 if ((pfx & PFX_GS) && !vbi->guest_amd64_assume_gs_is_const)
32463 goto decode_failure;
32465 /* Set up sz. */
32466 sz = 4;
32467 if (pfx & PFX_66) sz = 2;
32468 if ((pfx & PFX_REX) && (pfx & PFX_REXW)) sz = 8;
32470 /* Now we should be looking at the primary opcode byte or the
32471 leading escapes. Check that any LOCK prefix is actually
32472 allowed. */
32473 if (haveLOCK(pfx)) {
32474 if (can_be_used_with_LOCK_prefix( &guest_code[delta] )) {
32475 DIP("lock ");
32476 } else {
32477 *expect_CAS = False;
32478 goto decode_failure;
32482 /* Eat up opcode escape bytes, until we're really looking at the
32483 primary opcode byte. But only if there's no VEX present. */
32484 if (!(pfx & PFX_VEX)) {
32485 vassert(esc == ESC_NONE);
32486 pre = getUChar(delta);
32487 if (pre == 0x0F) {
32488 delta++;
32489 pre = getUChar(delta);
32490 switch (pre) {
32491 case 0x38: esc = ESC_0F38; delta++; break;
32492 case 0x3A: esc = ESC_0F3A; delta++; break;
32493 default: esc = ESC_0F; break;
32498 /* So now we're really really looking at the primary opcode
32499 byte. */
32500 Long delta_at_primary_opcode = delta;
32502 if (!(pfx & PFX_VEX)) {
32503 /* Handle non-VEX prefixed instructions. "Legacy" (non-VEX) SSE
32504 instructions preserve the upper 128 bits of YMM registers;
32505 iow we can simply ignore the presence of the upper halves of
32506 these registers. */
32507 switch (esc) {
32508 case ESC_NONE:
32509 delta = dis_ESC_NONE( &dres, expect_CAS,
32510 archinfo, vbi, pfx, sz, delta );
32511 break;
32512 case ESC_0F:
32513 delta = dis_ESC_0F ( &dres, expect_CAS,
32514 archinfo, vbi, pfx, sz, delta );
32515 break;
32516 case ESC_0F38:
32517 delta = dis_ESC_0F38( &dres,
32518 archinfo, vbi, pfx, sz, delta );
32519 break;
32520 case ESC_0F3A:
32521 delta = dis_ESC_0F3A( &dres,
32522 archinfo, vbi, pfx, sz, delta );
32523 break;
32524 default:
32525 vassert(0);
32527 } else {
32528 /* VEX prefixed instruction */
32529 /* Sloppy Intel wording: "An instruction encoded with a VEX.128
32530 prefix that loads a YMM register operand ..." zeroes out bits
32531 128 and above of the register. */
32532 Bool uses_vvvv = False;
32533 switch (esc) {
32534 case ESC_0F:
32535 delta = dis_ESC_0F__VEX ( &dres, &uses_vvvv,
32536 archinfo, vbi, pfx, sz, delta );
32537 break;
32538 case ESC_0F38:
32539 delta = dis_ESC_0F38__VEX ( &dres, &uses_vvvv,
32540 archinfo, vbi, pfx, sz, delta );
32541 break;
32542 case ESC_0F3A:
32543 delta = dis_ESC_0F3A__VEX ( &dres, &uses_vvvv,
32544 archinfo, vbi, pfx, sz, delta );
32545 break;
32546 case ESC_NONE:
32547 /* The presence of a VEX prefix, by Intel definition,
32548 always implies at least an 0F escape. */
32549 goto decode_failure;
32550 default:
32551 vassert(0);
32553 /* If the insn doesn't use VEX.vvvv then it must be all ones.
32554 Check this. */
32555 if (!uses_vvvv) {
32556 if (getVexNvvvv(pfx) != 0)
32557 goto decode_failure;
32561 vassert(delta - delta_at_primary_opcode >= 0);
32562 vassert(delta - delta_at_primary_opcode < 16/*let's say*/);
32564 /* Use delta == delta_at_primary_opcode to denote decode failure.
32565 This implies that any successful decode must use at least one
32566 byte up. */
32567 if (delta == delta_at_primary_opcode)
32568 goto decode_failure;
32569 else
32570 goto decode_success; /* \o/ */
32573 decode_failure:
32574 /* All decode failures end up here. */
32575 if (sigill_diag) {
32576 vex_printf("vex amd64->IR: unhandled instruction bytes: "
32577 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
32578 getUChar(delta_start+0),
32579 getUChar(delta_start+1),
32580 getUChar(delta_start+2),
32581 getUChar(delta_start+3),
32582 getUChar(delta_start+4),
32583 getUChar(delta_start+5),
32584 getUChar(delta_start+6),
32585 getUChar(delta_start+7),
32586 getUChar(delta_start+8),
32587 getUChar(delta_start+9) );
32588 vex_printf("vex amd64->IR: REX=%d REX.W=%d REX.R=%d REX.X=%d REX.B=%d\n",
32589 haveREX(pfx) ? 1 : 0, getRexW(pfx), getRexR(pfx),
32590 getRexX(pfx), getRexB(pfx));
32591 vex_printf("vex amd64->IR: VEX=%d VEX.L=%d VEX.nVVVV=0x%x ESC=%s\n",
32592 haveVEX(pfx) ? 1 : 0, getVexL(pfx),
32593 getVexNvvvv(pfx),
32594 esc==ESC_NONE ? "NONE" :
32595 esc==ESC_0F ? "0F" :
32596 esc==ESC_0F38 ? "0F38" :
32597 esc==ESC_0F3A ? "0F3A" : "???");
32598 vex_printf("vex amd64->IR: PFX.66=%d PFX.F2=%d PFX.F3=%d\n",
32599 have66(pfx) ? 1 : 0, haveF2(pfx) ? 1 : 0,
32600 haveF3(pfx) ? 1 : 0);
32603 /* Tell the dispatcher that this insn cannot be decoded, and so has
32604 not been executed, and (is currently) the next to be executed.
32605 RIP should be up-to-date since it made so at the start of each
32606 insn, but nevertheless be paranoid and update it again right
32607 now. */
32608 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
32609 jmp_lit(&dres, Ijk_NoDecode, guest_RIP_curr_instr);
32610 vassert(dres.whatNext == Dis_StopHere);
32611 dres.len = 0;
32612 /* We also need to say that a CAS is not expected now, regardless
32613 of what it might have been set to at the start of the function,
32614 since the IR that we've emitted just above (to synthesis a
32615 SIGILL) does not involve any CAS, and presumably no other IR has
32616 been emitted for this (non-decoded) insn. */
32617 *expect_CAS = False;
32618 return dres;
32621 decode_success:
32622 /* All decode successes end up here. */
32623 switch (dres.whatNext) {
32624 case Dis_Continue:
32625 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) );
32626 break;
32627 case Dis_StopHere:
32628 break;
32629 default:
32630 vassert(0);
32633 DIP("\n");
32634 dres.len = toUInt(delta - delta_start);
32635 return dres;
32638 #undef DIP
32639 #undef DIS
32642 /*------------------------------------------------------------*/
32643 /*--- Top-level fn ---*/
32644 /*------------------------------------------------------------*/
32646 /* Disassemble a single instruction into IR. The instruction
32647 is located in host memory at &guest_code[delta]. */
32649 DisResult disInstr_AMD64 ( IRSB* irsb_IN,
32650 const UChar* guest_code_IN,
32651 Long delta,
32652 Addr guest_IP,
32653 VexArch guest_arch,
32654 const VexArchInfo* archinfo,
32655 const VexAbiInfo* abiinfo,
32656 VexEndness host_endness_IN,
32657 Bool sigill_diag_IN )
32659 Int i, x1, x2;
32660 Bool expect_CAS, has_CAS;
32661 DisResult dres;
32663 /* Set globals (see top of this file) */
32664 vassert(guest_arch == VexArchAMD64);
32665 guest_code = guest_code_IN;
32666 irsb = irsb_IN;
32667 host_endness = host_endness_IN;
32668 guest_RIP_curr_instr = guest_IP;
32669 guest_RIP_bbstart = guest_IP - delta;
32671 /* We'll consult these after doing disInstr_AMD64_WRK. */
32672 guest_RIP_next_assumed = 0;
32673 guest_RIP_next_mustcheck = False;
32675 x1 = irsb_IN->stmts_used;
32676 expect_CAS = False;
32677 dres = disInstr_AMD64_WRK ( &expect_CAS,
32678 delta, archinfo, abiinfo, sigill_diag_IN );
32679 x2 = irsb_IN->stmts_used;
32680 vassert(x2 >= x1);
32682 /* If disInstr_AMD64_WRK tried to figure out the next rip, check it
32683 got it right. Failure of this assertion is serious and denotes
32684 a bug in disInstr. */
32685 if (guest_RIP_next_mustcheck
32686 && guest_RIP_next_assumed != guest_RIP_curr_instr + dres.len) {
32687 vex_printf("\n");
32688 vex_printf("assumed next %%rip = 0x%llx\n",
32689 guest_RIP_next_assumed );
32690 vex_printf(" actual next %%rip = 0x%llx\n",
32691 guest_RIP_curr_instr + dres.len );
32692 vpanic("disInstr_AMD64: disInstr miscalculated next %rip");
32695 /* See comment at the top of disInstr_AMD64_WRK for meaning of
32696 expect_CAS. Here, we (sanity-)check for the presence/absence of
32697 IRCAS as directed by the returned expect_CAS value. */
32698 has_CAS = False;
32699 for (i = x1; i < x2; i++) {
32700 if (irsb_IN->stmts[i]->tag == Ist_CAS)
32701 has_CAS = True;
32704 if (expect_CAS != has_CAS) {
32705 /* inconsistency detected. re-disassemble the instruction so as
32706 to generate a useful error message; then assert. */
32707 vex_traceflags |= VEX_TRACE_FE;
32708 dres = disInstr_AMD64_WRK ( &expect_CAS,
32709 delta, archinfo, abiinfo, sigill_diag_IN );
32710 for (i = x1; i < x2; i++) {
32711 vex_printf("\t\t");
32712 ppIRStmt(irsb_IN->stmts[i]);
32713 vex_printf("\n");
32715 /* Failure of this assertion is serious and denotes a bug in
32716 disInstr. */
32717 vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling");
32720 return dres;
32724 /*------------------------------------------------------------*/
32725 /*--- Unused stuff ---*/
32726 /*------------------------------------------------------------*/
32728 // A potentially more Memcheck-friendly version of gen_LZCNT, if
32729 // this should ever be needed.
32731 //static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
32733 // /* Scheme is simple: propagate the most significant 1-bit into all
32734 // lower positions in the word. This gives a word of the form
32735 // 0---01---1. Now invert it, giving a word of the form
32736 // 1---10---0, then do a population-count idiom (to count the 1s,
32737 // which is the number of leading zeroes, or the word size if the
32738 // original word was 0.
32739 // */
32740 // Int i;
32741 // IRTemp t[7];
32742 // for (i = 0; i < 7; i++) {
32743 // t[i] = newTemp(ty);
32744 // }
32745 // if (ty == Ity_I64) {
32746 // assign(t[0], binop(Iop_Or64, mkexpr(src),
32747 // binop(Iop_Shr64, mkexpr(src), mkU8(1))));
32748 // assign(t[1], binop(Iop_Or64, mkexpr(t[0]),
32749 // binop(Iop_Shr64, mkexpr(t[0]), mkU8(2))));
32750 // assign(t[2], binop(Iop_Or64, mkexpr(t[1]),
32751 // binop(Iop_Shr64, mkexpr(t[1]), mkU8(4))));
32752 // assign(t[3], binop(Iop_Or64, mkexpr(t[2]),
32753 // binop(Iop_Shr64, mkexpr(t[2]), mkU8(8))));
32754 // assign(t[4], binop(Iop_Or64, mkexpr(t[3]),
32755 // binop(Iop_Shr64, mkexpr(t[3]), mkU8(16))));
32756 // assign(t[5], binop(Iop_Or64, mkexpr(t[4]),
32757 // binop(Iop_Shr64, mkexpr(t[4]), mkU8(32))));
32758 // assign(t[6], unop(Iop_Not64, mkexpr(t[5])));
32759 // return gen_POPCOUNT(ty, t[6]);
32760 // }
32761 // if (ty == Ity_I32) {
32762 // assign(t[0], binop(Iop_Or32, mkexpr(src),
32763 // binop(Iop_Shr32, mkexpr(src), mkU8(1))));
32764 // assign(t[1], binop(Iop_Or32, mkexpr(t[0]),
32765 // binop(Iop_Shr32, mkexpr(t[0]), mkU8(2))));
32766 // assign(t[2], binop(Iop_Or32, mkexpr(t[1]),
32767 // binop(Iop_Shr32, mkexpr(t[1]), mkU8(4))));
32768 // assign(t[3], binop(Iop_Or32, mkexpr(t[2]),
32769 // binop(Iop_Shr32, mkexpr(t[2]), mkU8(8))));
32770 // assign(t[4], binop(Iop_Or32, mkexpr(t[3]),
32771 // binop(Iop_Shr32, mkexpr(t[3]), mkU8(16))));
32772 // assign(t[5], unop(Iop_Not32, mkexpr(t[4])));
32773 // return gen_POPCOUNT(ty, t[5]);
32774 // }
32775 // if (ty == Ity_I16) {
32776 // assign(t[0], binop(Iop_Or16, mkexpr(src),
32777 // binop(Iop_Shr16, mkexpr(src), mkU8(1))));
32778 // assign(t[1], binop(Iop_Or16, mkexpr(t[0]),
32779 // binop(Iop_Shr16, mkexpr(t[0]), mkU8(2))));
32780 // assign(t[2], binop(Iop_Or16, mkexpr(t[1]),
32781 // binop(Iop_Shr16, mkexpr(t[1]), mkU8(4))));
32782 // assign(t[3], binop(Iop_Or16, mkexpr(t[2]),
32783 // binop(Iop_Shr16, mkexpr(t[2]), mkU8(8))));
32784 // assign(t[4], unop(Iop_Not16, mkexpr(t[3])));
32785 // return gen_POPCOUNT(ty, t[4]);
32786 // }
32787 // vassert(0);
32791 /*--------------------------------------------------------------------*/
32792 /*--- end guest_amd64_toIR.c ---*/
32793 /*--------------------------------------------------------------------*/