Advance the head to 3.16.0.GIT.
[valgrind.git] / VEX / priv / guest_x86_toIR.c
blob9a6e417d63e678ac086ed71c04c5946f4d383f85
2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_x86_toIR.c ---*/
4 /*--------------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 /* Translates x86 code to IR. */
38 /* TODO:
40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
41 to ensure a 32-bit value is being written.
43 FUCOMI(P): what happens to A and S flags? Currently are forced
44 to zero.
46 x87 FP Limitations:
48 * all arithmetic done at 64 bits
50 * no FP exceptions, except for handling stack over/underflow
52 * FP rounding mode observed only for float->int conversions
53 and int->float conversions which could lose accuracy, and
54 for float-to-float rounding. For all other operations,
55 round-to-nearest is used, regardless.
57 * some of the FCOM cases could do with testing -- not convinced
58 that the args are the right way round.
60 * FSAVE does not re-initialise the FPU; it should do
62 * FINIT not only initialises the FPU environment, it also
63 zeroes all the FP registers. It should leave the registers
64 unchanged.
66 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
67 per Intel docs this bit has no meaning anyway. Since PUSHF is the
68 only way to observe eflags[1], a proper fix would be to make that
69 bit be set by PUSHF.
71 The state of %eflags.AC (alignment check, bit 18) is recorded by
72 the simulation (viz, if you set it with popf then a pushf produces
73 the value you set it to), but it is otherwise ignored. In
74 particular, setting it to 1 does NOT cause alignment checking to
75 happen. Programs that set it to 1 and then rely on the resulting
76 SIGBUSs to inform them of misaligned accesses will not work.
78 Implementation of sysenter is necessarily partial. sysenter is a
79 kind of system call entry. When doing a sysenter, the return
80 address is not known -- that is something that is beyond Vex's
81 knowledge. So the generated IR forces a return to the scheduler,
82 which can do what it likes to simulate the systenter, but it MUST
83 set this thread's guest_EIP field with the continuation address
84 before resuming execution. If that doesn't happen, the thread will
85 jump to address zero, which is probably fatal.
87 This module uses global variables and so is not MT-safe (if that
88 should ever become relevant).
90 The delta values are 32-bit ints, not 64-bit ints. That means
91 this module may not work right if run on a 64-bit host. That should
92 be fixed properly, really -- if anyone ever wants to use Vex to
93 translate x86 code for execution on a 64-bit host.
95 casLE (implementation of lock-prefixed insns) and rep-prefixed
96 insns: the side-exit back to the start of the insn is done with
97 Ijk_Boring. This is quite wrong, it should be done with
98 Ijk_NoRedir, since otherwise the side exit, which is intended to
99 restart the instruction for whatever reason, could go somewhere
100 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
101 no-redir jumps performance critical, at least for rep-prefixed
102 instructions, since all iterations thereof would involve such a
103 jump. It's not such a big deal with casLE since the side exit is
104 only taken if the CAS fails, that is, the location is contended,
105 which is relatively unlikely.
107 XXXX: Nov 2009: handling of SWP on ARM suffers from the same
108 problem.
110 Note also, the test for CAS success vs failure is done using
111 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
112 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
113 shouldn't definedness-check these comparisons. See
114 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
115 background/rationale.
118 /* Performance holes:
120 - fcom ; fstsw %ax ; sahf
121 sahf does not update the O flag (sigh) and so O needs to
122 be computed. This is done expensively; it would be better
123 to have a calculate_eflags_o helper.
125 - emwarns; some FP codes can generate huge numbers of these
126 if the fpucw is changed in an inner loop. It would be
127 better for the guest state to have an emwarn-enable reg
128 which can be set zero or nonzero. If it is zero, emwarns
129 are not flagged, and instead control just flows all the
130 way through bbs as usual.
133 /* "Special" instructions.
135 This instruction decoder can decode three special instructions
136 which mean nothing natively (are no-ops as far as regs/mem are
137 concerned) but have meaning for supporting Valgrind. A special
138 instruction is flagged by the 12-byte preamble C1C703 C1C70D C1C71D
139 C1C713 (in the standard interpretation, that means: roll $3, %edi;
140 roll $13, %edi; roll $29, %edi; roll $19, %edi). Following that,
141 one of the following 3 are allowed (standard interpretation in
142 parentheses):
144 87DB (xchgl %ebx,%ebx) %EDX = client_request ( %EAX )
145 87C9 (xchgl %ecx,%ecx) %EAX = guest_NRADDR
146 87D2 (xchgl %edx,%edx) call-noredir *%EAX
147 87FF (xchgl %edi,%edi) IR injection
149 Any other bytes following the 12-byte preamble are illegal and
150 constitute a failure in instruction decoding. This all assumes
151 that the preamble will never occur except in specific code
152 fragments designed for Valgrind to catch.
154 No prefixes may precede a "Special" instruction.
157 /* LOCK prefixed instructions. These are translated using IR-level
158 CAS statements (IRCAS) and are believed to preserve atomicity, even
159 from the point of view of some other process racing against a
160 simulated one (presumably they communicate via a shared memory
161 segment).
163 Handlers which are aware of LOCK prefixes are:
164 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
165 dis_cmpxchg_G_E (cmpxchg)
166 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
167 dis_Grp3 (not, neg)
168 dis_Grp4 (inc, dec)
169 dis_Grp5 (inc, dec)
170 dis_Grp8_Imm (bts, btc, btr)
171 dis_bt_G_E (bts, btc, btr)
172 dis_xadd_G_E (xadd)
176 #include "libvex_basictypes.h"
177 #include "libvex_ir.h"
178 #include "libvex.h"
179 #include "libvex_guest_x86.h"
181 #include "main_util.h"
182 #include "main_globals.h"
183 #include "guest_generic_bb_to_IR.h"
184 #include "guest_generic_x87.h"
185 #include "guest_x86_defs.h"
188 /*------------------------------------------------------------*/
189 /*--- Globals ---*/
190 /*------------------------------------------------------------*/
192 /* These are set at the start of the translation of an insn, right
193 down in disInstr_X86, so that we don't have to pass them around
194 endlessly. They are all constant during the translation of any
195 given insn. */
197 /* We need to know this to do sub-register accesses correctly. */
198 static VexEndness host_endness;
200 /* Pointer to the guest code area (points to start of BB, not to the
201 insn being processed). */
202 static const UChar* guest_code;
204 /* The guest address corresponding to guest_code[0]. */
205 static Addr32 guest_EIP_bbstart;
207 /* The guest address for the instruction currently being
208 translated. */
209 static Addr32 guest_EIP_curr_instr;
211 /* The IRSB* into which we're generating code. */
212 static IRSB* irsb;
215 /*------------------------------------------------------------*/
216 /*--- Debugging output ---*/
217 /*------------------------------------------------------------*/
219 #define DIP(format, args...) \
220 if (vex_traceflags & VEX_TRACE_FE) \
221 vex_printf(format, ## args)
223 #define DIS(buf, format, args...) \
224 if (vex_traceflags & VEX_TRACE_FE) \
225 vex_sprintf(buf, format, ## args)
228 /*------------------------------------------------------------*/
229 /*--- Offsets of various parts of the x86 guest state. ---*/
230 /*------------------------------------------------------------*/
232 #define OFFB_EAX offsetof(VexGuestX86State,guest_EAX)
233 #define OFFB_EBX offsetof(VexGuestX86State,guest_EBX)
234 #define OFFB_ECX offsetof(VexGuestX86State,guest_ECX)
235 #define OFFB_EDX offsetof(VexGuestX86State,guest_EDX)
236 #define OFFB_ESP offsetof(VexGuestX86State,guest_ESP)
237 #define OFFB_EBP offsetof(VexGuestX86State,guest_EBP)
238 #define OFFB_ESI offsetof(VexGuestX86State,guest_ESI)
239 #define OFFB_EDI offsetof(VexGuestX86State,guest_EDI)
241 #define OFFB_EIP offsetof(VexGuestX86State,guest_EIP)
243 #define OFFB_CC_OP offsetof(VexGuestX86State,guest_CC_OP)
244 #define OFFB_CC_DEP1 offsetof(VexGuestX86State,guest_CC_DEP1)
245 #define OFFB_CC_DEP2 offsetof(VexGuestX86State,guest_CC_DEP2)
246 #define OFFB_CC_NDEP offsetof(VexGuestX86State,guest_CC_NDEP)
248 #define OFFB_FPREGS offsetof(VexGuestX86State,guest_FPREG[0])
249 #define OFFB_FPTAGS offsetof(VexGuestX86State,guest_FPTAG[0])
250 #define OFFB_DFLAG offsetof(VexGuestX86State,guest_DFLAG)
251 #define OFFB_IDFLAG offsetof(VexGuestX86State,guest_IDFLAG)
252 #define OFFB_ACFLAG offsetof(VexGuestX86State,guest_ACFLAG)
253 #define OFFB_FTOP offsetof(VexGuestX86State,guest_FTOP)
254 #define OFFB_FC3210 offsetof(VexGuestX86State,guest_FC3210)
255 #define OFFB_FPROUND offsetof(VexGuestX86State,guest_FPROUND)
257 #define OFFB_CS offsetof(VexGuestX86State,guest_CS)
258 #define OFFB_DS offsetof(VexGuestX86State,guest_DS)
259 #define OFFB_ES offsetof(VexGuestX86State,guest_ES)
260 #define OFFB_FS offsetof(VexGuestX86State,guest_FS)
261 #define OFFB_GS offsetof(VexGuestX86State,guest_GS)
262 #define OFFB_SS offsetof(VexGuestX86State,guest_SS)
263 #define OFFB_LDT offsetof(VexGuestX86State,guest_LDT)
264 #define OFFB_GDT offsetof(VexGuestX86State,guest_GDT)
266 #define OFFB_SSEROUND offsetof(VexGuestX86State,guest_SSEROUND)
267 #define OFFB_XMM0 offsetof(VexGuestX86State,guest_XMM0)
268 #define OFFB_XMM1 offsetof(VexGuestX86State,guest_XMM1)
269 #define OFFB_XMM2 offsetof(VexGuestX86State,guest_XMM2)
270 #define OFFB_XMM3 offsetof(VexGuestX86State,guest_XMM3)
271 #define OFFB_XMM4 offsetof(VexGuestX86State,guest_XMM4)
272 #define OFFB_XMM5 offsetof(VexGuestX86State,guest_XMM5)
273 #define OFFB_XMM6 offsetof(VexGuestX86State,guest_XMM6)
274 #define OFFB_XMM7 offsetof(VexGuestX86State,guest_XMM7)
276 #define OFFB_EMNOTE offsetof(VexGuestX86State,guest_EMNOTE)
278 #define OFFB_CMSTART offsetof(VexGuestX86State,guest_CMSTART)
279 #define OFFB_CMLEN offsetof(VexGuestX86State,guest_CMLEN)
280 #define OFFB_NRADDR offsetof(VexGuestX86State,guest_NRADDR)
282 #define OFFB_IP_AT_SYSCALL offsetof(VexGuestX86State,guest_IP_AT_SYSCALL)
285 /*------------------------------------------------------------*/
286 /*--- Helper bits and pieces for deconstructing the ---*/
287 /*--- x86 insn stream. ---*/
288 /*------------------------------------------------------------*/
290 /* This is the Intel register encoding -- integer regs. */
291 #define R_EAX 0
292 #define R_ECX 1
293 #define R_EDX 2
294 #define R_EBX 3
295 #define R_ESP 4
296 #define R_EBP 5
297 #define R_ESI 6
298 #define R_EDI 7
300 #define R_AL (0+R_EAX)
301 #define R_AH (4+R_EAX)
303 /* This is the Intel register encoding -- segment regs. */
304 #define R_ES 0
305 #define R_CS 1
306 #define R_SS 2
307 #define R_DS 3
308 #define R_FS 4
309 #define R_GS 5
312 /* Add a statement to the list held by "irbb". */
313 static void stmt ( IRStmt* st )
315 addStmtToIRSB( irsb, st );
318 /* Generate a new temporary of the given type. */
319 static IRTemp newTemp ( IRType ty )
321 vassert(isPlausibleIRType(ty));
322 return newIRTemp( irsb->tyenv, ty );
325 /* Various simple conversions */
327 static UInt extend_s_8to32( UInt x )
329 return (UInt)((Int)(x << 24) >> 24);
332 static UInt extend_s_16to32 ( UInt x )
334 return (UInt)((Int)(x << 16) >> 16);
337 /* Fetch a byte from the guest insn stream. */
338 static UChar getIByte ( Int delta )
340 return guest_code[delta];
343 /* Extract the reg field from a modRM byte. */
344 static Int gregOfRM ( UChar mod_reg_rm )
346 return (Int)( (mod_reg_rm >> 3) & 7 );
349 /* Figure out whether the mod and rm parts of a modRM byte refer to a
350 register or memory. If so, the byte will have the form 11XXXYYY,
351 where YYY is the register number. */
352 static Bool epartIsReg ( UChar mod_reg_rm )
354 return toBool(0xC0 == (mod_reg_rm & 0xC0));
357 /* ... and extract the register number ... */
358 static Int eregOfRM ( UChar mod_reg_rm )
360 return (Int)(mod_reg_rm & 0x7);
363 /* Get a 8/16/32-bit unsigned value out of the insn stream. */
365 static UChar getUChar ( Int delta )
367 UChar v = guest_code[delta+0];
368 return toUChar(v);
371 static UInt getUDisp16 ( Int delta )
373 UInt v = guest_code[delta+1]; v <<= 8;
374 v |= guest_code[delta+0];
375 return v & 0xFFFF;
378 static UInt getUDisp32 ( Int delta )
380 UInt v = guest_code[delta+3]; v <<= 8;
381 v |= guest_code[delta+2]; v <<= 8;
382 v |= guest_code[delta+1]; v <<= 8;
383 v |= guest_code[delta+0];
384 return v;
387 static UInt getUDisp ( Int size, Int delta )
389 switch (size) {
390 case 4: return getUDisp32(delta);
391 case 2: return getUDisp16(delta);
392 case 1: return (UInt)getUChar(delta);
393 default: vpanic("getUDisp(x86)");
395 return 0; /*notreached*/
399 /* Get a byte value out of the insn stream and sign-extend to 32
400 bits. */
401 static UInt getSDisp8 ( Int delta )
403 return extend_s_8to32( (UInt) (guest_code[delta]) );
406 static UInt getSDisp16 ( Int delta0 )
408 const UChar* eip = &guest_code[delta0];
409 UInt d = *eip++;
410 d |= ((*eip++) << 8);
411 return extend_s_16to32(d);
414 static UInt getSDisp ( Int size, Int delta )
416 switch (size) {
417 case 4: return getUDisp32(delta);
418 case 2: return getSDisp16(delta);
419 case 1: return getSDisp8(delta);
420 default: vpanic("getSDisp(x86)");
422 return 0; /*notreached*/
426 /*------------------------------------------------------------*/
427 /*--- Helpers for constructing IR. ---*/
428 /*------------------------------------------------------------*/
430 /* Create a 1/2/4 byte read of an x86 integer registers. For 16/8 bit
431 register references, we need to take the host endianness into
432 account. Supplied value is 0 .. 7 and in the Intel instruction
433 encoding. */
435 static IRType szToITy ( Int n )
437 switch (n) {
438 case 1: return Ity_I8;
439 case 2: return Ity_I16;
440 case 4: return Ity_I32;
441 default: vpanic("szToITy(x86)");
445 /* On a little-endian host, less significant bits of the guest
446 registers are at lower addresses. Therefore, if a reference to a
447 register low half has the safe guest state offset as a reference to
448 the full register.
450 static Int integerGuestRegOffset ( Int sz, UInt archreg )
452 vassert(archreg < 8);
454 /* Correct for little-endian host only. */
455 vassert(host_endness == VexEndnessLE);
457 if (sz == 4 || sz == 2 || (sz == 1 && archreg < 4)) {
458 switch (archreg) {
459 case R_EAX: return OFFB_EAX;
460 case R_EBX: return OFFB_EBX;
461 case R_ECX: return OFFB_ECX;
462 case R_EDX: return OFFB_EDX;
463 case R_ESI: return OFFB_ESI;
464 case R_EDI: return OFFB_EDI;
465 case R_ESP: return OFFB_ESP;
466 case R_EBP: return OFFB_EBP;
467 default: vpanic("integerGuestRegOffset(x86,le)(4,2)");
471 vassert(archreg >= 4 && archreg < 8 && sz == 1);
472 switch (archreg-4) {
473 case R_EAX: return 1+ OFFB_EAX;
474 case R_EBX: return 1+ OFFB_EBX;
475 case R_ECX: return 1+ OFFB_ECX;
476 case R_EDX: return 1+ OFFB_EDX;
477 default: vpanic("integerGuestRegOffset(x86,le)(1h)");
480 /* NOTREACHED */
481 vpanic("integerGuestRegOffset(x86,le)");
484 static Int segmentGuestRegOffset ( UInt sreg )
486 switch (sreg) {
487 case R_ES: return OFFB_ES;
488 case R_CS: return OFFB_CS;
489 case R_SS: return OFFB_SS;
490 case R_DS: return OFFB_DS;
491 case R_FS: return OFFB_FS;
492 case R_GS: return OFFB_GS;
493 default: vpanic("segmentGuestRegOffset(x86)");
497 static Int xmmGuestRegOffset ( UInt xmmreg )
499 switch (xmmreg) {
500 case 0: return OFFB_XMM0;
501 case 1: return OFFB_XMM1;
502 case 2: return OFFB_XMM2;
503 case 3: return OFFB_XMM3;
504 case 4: return OFFB_XMM4;
505 case 5: return OFFB_XMM5;
506 case 6: return OFFB_XMM6;
507 case 7: return OFFB_XMM7;
508 default: vpanic("xmmGuestRegOffset");
512 /* Lanes of vector registers are always numbered from zero being the
513 least significant lane (rightmost in the register). */
515 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno )
517 /* Correct for little-endian host only. */
518 vassert(host_endness == VexEndnessLE);
519 vassert(laneno >= 0 && laneno < 8);
520 return xmmGuestRegOffset( xmmreg ) + 2 * laneno;
523 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno )
525 /* Correct for little-endian host only. */
526 vassert(host_endness == VexEndnessLE);
527 vassert(laneno >= 0 && laneno < 4);
528 return xmmGuestRegOffset( xmmreg ) + 4 * laneno;
531 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno )
533 /* Correct for little-endian host only. */
534 vassert(host_endness == VexEndnessLE);
535 vassert(laneno >= 0 && laneno < 2);
536 return xmmGuestRegOffset( xmmreg ) + 8 * laneno;
539 static IRExpr* getIReg ( Int sz, UInt archreg )
541 vassert(sz == 1 || sz == 2 || sz == 4);
542 vassert(archreg < 8);
543 return IRExpr_Get( integerGuestRegOffset(sz,archreg),
544 szToITy(sz) );
547 /* Ditto, but write to a reg instead. */
548 static void putIReg ( Int sz, UInt archreg, IRExpr* e )
550 IRType ty = typeOfIRExpr(irsb->tyenv, e);
551 switch (sz) {
552 case 1: vassert(ty == Ity_I8); break;
553 case 2: vassert(ty == Ity_I16); break;
554 case 4: vassert(ty == Ity_I32); break;
555 default: vpanic("putIReg(x86)");
557 vassert(archreg < 8);
558 stmt( IRStmt_Put(integerGuestRegOffset(sz,archreg), e) );
561 static IRExpr* getSReg ( UInt sreg )
563 return IRExpr_Get( segmentGuestRegOffset(sreg), Ity_I16 );
566 static void putSReg ( UInt sreg, IRExpr* e )
568 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
569 stmt( IRStmt_Put( segmentGuestRegOffset(sreg), e ) );
572 static IRExpr* getXMMReg ( UInt xmmreg )
574 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 );
577 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno )
579 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 );
582 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno )
584 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 );
587 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno )
589 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 );
592 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno )
594 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 );
597 static void putXMMReg ( UInt xmmreg, IRExpr* e )
599 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
600 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) );
603 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e )
605 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
606 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
609 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e )
611 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
612 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
615 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e )
617 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
618 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
621 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e )
623 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
624 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
627 static void putXMMRegLane16 ( UInt xmmreg, Int laneno, IRExpr* e )
629 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
630 stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg,laneno), e ) );
633 static void assign ( IRTemp dst, IRExpr* e )
635 stmt( IRStmt_WrTmp(dst, e) );
638 static void storeLE ( IRExpr* addr, IRExpr* data )
640 stmt( IRStmt_Store(Iend_LE, addr, data) );
643 static IRExpr* unop ( IROp op, IRExpr* a )
645 return IRExpr_Unop(op, a);
648 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
650 return IRExpr_Binop(op, a1, a2);
653 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
655 return IRExpr_Triop(op, a1, a2, a3);
658 static IRExpr* mkexpr ( IRTemp tmp )
660 return IRExpr_RdTmp(tmp);
663 static IRExpr* mkU8 ( UInt i )
665 vassert(i < 256);
666 return IRExpr_Const(IRConst_U8( (UChar)i ));
669 static IRExpr* mkU16 ( UInt i )
671 vassert(i < 65536);
672 return IRExpr_Const(IRConst_U16( (UShort)i ));
675 static IRExpr* mkU32 ( UInt i )
677 return IRExpr_Const(IRConst_U32(i));
680 static IRExpr* mkU64 ( ULong i )
682 return IRExpr_Const(IRConst_U64(i));
685 static IRExpr* mkU ( IRType ty, UInt i )
687 if (ty == Ity_I8) return mkU8(i);
688 if (ty == Ity_I16) return mkU16(i);
689 if (ty == Ity_I32) return mkU32(i);
690 /* If this panics, it usually means you passed a size (1,2,4)
691 value as the IRType, rather than a real IRType. */
692 vpanic("mkU(x86)");
695 static IRExpr* mkV128 ( UShort mask )
697 return IRExpr_Const(IRConst_V128(mask));
700 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
702 return IRExpr_Load(Iend_LE, ty, addr);
705 static IROp mkSizedOp ( IRType ty, IROp op8 )
707 Int adj;
708 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
709 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8
710 || op8 == Iop_Mul8
711 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8
712 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8
713 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8
714 || op8 == Iop_CasCmpNE8
715 || op8 == Iop_ExpCmpNE8
716 || op8 == Iop_Not8);
717 adj = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
718 return adj + op8;
721 static IROp mkWidenOp ( Int szSmall, Int szBig, Bool signd )
723 if (szSmall == 1 && szBig == 4) {
724 return signd ? Iop_8Sto32 : Iop_8Uto32;
726 if (szSmall == 1 && szBig == 2) {
727 return signd ? Iop_8Sto16 : Iop_8Uto16;
729 if (szSmall == 2 && szBig == 4) {
730 return signd ? Iop_16Sto32 : Iop_16Uto32;
732 vpanic("mkWidenOp(x86,guest)");
735 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y )
737 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1);
738 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1);
739 return unop(Iop_32to1,
740 binop(Iop_And32,
741 unop(Iop_1Uto32,x),
742 unop(Iop_1Uto32,y)));
745 /* Generate a compare-and-swap operation, operating on memory at
746 'addr'. The expected value is 'expVal' and the new value is
747 'newVal'. If the operation fails, then transfer control (with a
748 no-redir jump (XXX no -- see comment at top of this file)) to
749 'restart_point', which is presumably the address of the guest
750 instruction again -- retrying, essentially. */
751 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
752 Addr32 restart_point )
754 IRCAS* cas;
755 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal);
756 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal);
757 IRTemp oldTmp = newTemp(tyE);
758 IRTemp expTmp = newTemp(tyE);
759 vassert(tyE == tyN);
760 vassert(tyE == Ity_I32 || tyE == Ity_I16 || tyE == Ity_I8);
761 assign(expTmp, expVal);
762 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
763 NULL, mkexpr(expTmp), NULL, newVal );
764 stmt( IRStmt_CAS(cas) );
765 stmt( IRStmt_Exit(
766 binop( mkSizedOp(tyE,Iop_CasCmpNE8),
767 mkexpr(oldTmp), mkexpr(expTmp) ),
768 Ijk_Boring, /*Ijk_NoRedir*/
769 IRConst_U32( restart_point ),
770 OFFB_EIP
775 /*------------------------------------------------------------*/
776 /*--- Helpers for %eflags. ---*/
777 /*------------------------------------------------------------*/
779 /* -------------- Evaluating the flags-thunk. -------------- */
781 /* Build IR to calculate all the eflags from stored
782 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
783 Ity_I32. */
784 static IRExpr* mk_x86g_calculate_eflags_all ( void )
786 IRExpr** args
787 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
788 IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
789 IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
790 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
791 IRExpr* call
792 = mkIRExprCCall(
793 Ity_I32,
794 0/*regparm*/,
795 "x86g_calculate_eflags_all", &x86g_calculate_eflags_all,
796 args
798 /* Exclude OP and NDEP from definedness checking. We're only
799 interested in DEP1 and DEP2. */
800 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
801 return call;
804 /* Build IR to calculate some particular condition from stored
805 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
806 Ity_Bit. */
807 static IRExpr* mk_x86g_calculate_condition ( X86Condcode cond )
809 IRExpr** args
810 = mkIRExprVec_5( mkU32(cond),
811 IRExpr_Get(OFFB_CC_OP, Ity_I32),
812 IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
813 IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
814 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
815 IRExpr* call
816 = mkIRExprCCall(
817 Ity_I32,
818 0/*regparm*/,
819 "x86g_calculate_condition", &x86g_calculate_condition,
820 args
822 /* Exclude the requested condition, OP and NDEP from definedness
823 checking. We're only interested in DEP1 and DEP2. */
824 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4);
825 return unop(Iop_32to1, call);
828 /* Build IR to calculate just the carry flag from stored
829 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I32. */
830 static IRExpr* mk_x86g_calculate_eflags_c ( void )
832 IRExpr** args
833 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
834 IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
835 IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
836 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
837 IRExpr* call
838 = mkIRExprCCall(
839 Ity_I32,
840 3/*regparm*/,
841 "x86g_calculate_eflags_c", &x86g_calculate_eflags_c,
842 args
844 /* Exclude OP and NDEP from definedness checking. We're only
845 interested in DEP1 and DEP2. */
846 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
847 return call;
851 /* -------------- Building the flags-thunk. -------------- */
853 /* The machinery in this section builds the flag-thunk following a
854 flag-setting operation. Hence the various setFlags_* functions.
857 static Bool isAddSub ( IROp op8 )
859 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8);
862 static Bool isLogic ( IROp op8 )
864 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8);
867 /* U-widen 8/16/32 bit int expr to 32. */
868 static IRExpr* widenUto32 ( IRExpr* e )
870 switch (typeOfIRExpr(irsb->tyenv,e)) {
871 case Ity_I32: return e;
872 case Ity_I16: return unop(Iop_16Uto32,e);
873 case Ity_I8: return unop(Iop_8Uto32,e);
874 default: vpanic("widenUto32");
878 /* S-widen 8/16/32 bit int expr to 32. */
879 static IRExpr* widenSto32 ( IRExpr* e )
881 switch (typeOfIRExpr(irsb->tyenv,e)) {
882 case Ity_I32: return e;
883 case Ity_I16: return unop(Iop_16Sto32,e);
884 case Ity_I8: return unop(Iop_8Sto32,e);
885 default: vpanic("widenSto32");
889 /* Narrow 8/16/32 bit int expr to 8/16/32. Clearly only some
890 of these combinations make sense. */
891 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e )
893 IRType src_ty = typeOfIRExpr(irsb->tyenv,e);
894 if (src_ty == dst_ty)
895 return e;
896 if (src_ty == Ity_I32 && dst_ty == Ity_I16)
897 return unop(Iop_32to16, e);
898 if (src_ty == Ity_I32 && dst_ty == Ity_I8)
899 return unop(Iop_32to8, e);
901 vex_printf("\nsrc, dst tys are: ");
902 ppIRType(src_ty);
903 vex_printf(", ");
904 ppIRType(dst_ty);
905 vex_printf("\n");
906 vpanic("narrowTo(x86)");
910 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
911 auto-sized up to the real op. */
913 static
914 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty )
916 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
918 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
920 switch (op8) {
921 case Iop_Add8: ccOp += X86G_CC_OP_ADDB; break;
922 case Iop_Sub8: ccOp += X86G_CC_OP_SUBB; break;
923 default: ppIROp(op8);
924 vpanic("setFlags_DEP1_DEP2(x86)");
926 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) );
927 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) );
928 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(dep2))) );
929 /* Set NDEP even though it isn't used. This makes redundant-PUT
930 elimination of previous stores to this field work better. */
931 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
935 /* Set the OP and DEP1 fields only, and write zero to DEP2. */
937 static
938 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty )
940 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
942 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
944 switch (op8) {
945 case Iop_Or8:
946 case Iop_And8:
947 case Iop_Xor8: ccOp += X86G_CC_OP_LOGICB; break;
948 default: ppIROp(op8);
949 vpanic("setFlags_DEP1(x86)");
951 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) );
952 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) );
953 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) );
954 /* Set NDEP even though it isn't used. This makes redundant-PUT
955 elimination of previous stores to this field work better. */
956 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
960 /* For shift operations, we put in the result and the undershifted
961 result. Except if the shift amount is zero, the thunk is left
962 unchanged. */
964 static void setFlags_DEP1_DEP2_shift ( IROp op32,
965 IRTemp res,
966 IRTemp resUS,
967 IRType ty,
968 IRTemp guard )
970 Int ccOp = ty==Ity_I8 ? 2 : (ty==Ity_I16 ? 1 : 0);
972 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
973 vassert(guard);
975 /* Both kinds of right shifts are handled by the same thunk
976 operation. */
977 switch (op32) {
978 case Iop_Shr32:
979 case Iop_Sar32: ccOp = X86G_CC_OP_SHRL - ccOp; break;
980 case Iop_Shl32: ccOp = X86G_CC_OP_SHLL - ccOp; break;
981 default: ppIROp(op32);
982 vpanic("setFlags_DEP1_DEP2_shift(x86)");
985 /* guard :: Ity_I8. We need to convert it to I1. */
986 IRTemp guardB = newTemp(Ity_I1);
987 assign( guardB, binop(Iop_CmpNE8, mkexpr(guard), mkU8(0)) );
989 /* DEP1 contains the result, DEP2 contains the undershifted value. */
990 stmt( IRStmt_Put( OFFB_CC_OP,
991 IRExpr_ITE( mkexpr(guardB),
992 mkU32(ccOp),
993 IRExpr_Get(OFFB_CC_OP,Ity_I32) ) ));
994 stmt( IRStmt_Put( OFFB_CC_DEP1,
995 IRExpr_ITE( mkexpr(guardB),
996 widenUto32(mkexpr(res)),
997 IRExpr_Get(OFFB_CC_DEP1,Ity_I32) ) ));
998 stmt( IRStmt_Put( OFFB_CC_DEP2,
999 IRExpr_ITE( mkexpr(guardB),
1000 widenUto32(mkexpr(resUS)),
1001 IRExpr_Get(OFFB_CC_DEP2,Ity_I32) ) ));
1002 /* Set NDEP even though it isn't used. This makes redundant-PUT
1003 elimination of previous stores to this field work better. */
1004 stmt( IRStmt_Put( OFFB_CC_NDEP,
1005 IRExpr_ITE( mkexpr(guardB),
1006 mkU32(0),
1007 IRExpr_Get(OFFB_CC_NDEP,Ity_I32) ) ));
1011 /* For the inc/dec case, we store in DEP1 the result value and in NDEP
1012 the former value of the carry flag, which unfortunately we have to
1013 compute. */
1015 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty )
1017 Int ccOp = inc ? X86G_CC_OP_INCB : X86G_CC_OP_DECB;
1019 ccOp += ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
1020 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
1022 /* This has to come first, because calculating the C flag
1023 may require reading all four thunk fields. */
1024 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_x86g_calculate_eflags_c()) );
1025 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) );
1026 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(res))) );
1027 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) );
1031 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
1032 two arguments. */
1034 static
1035 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, UInt base_op )
1037 switch (ty) {
1038 case Ity_I8:
1039 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+0) ) );
1040 break;
1041 case Ity_I16:
1042 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+1) ) );
1043 break;
1044 case Ity_I32:
1045 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+2) ) );
1046 break;
1047 default:
1048 vpanic("setFlags_MUL(x86)");
1050 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(arg1)) ));
1051 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(arg2)) ));
1052 /* Set NDEP even though it isn't used. This makes redundant-PUT
1053 elimination of previous stores to this field work better. */
1054 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
1058 /* -------------- Condition codes. -------------- */
1060 /* Condition codes, using the Intel encoding. */
1062 static const HChar* name_X86Condcode ( X86Condcode cond )
1064 switch (cond) {
1065 case X86CondO: return "o";
1066 case X86CondNO: return "no";
1067 case X86CondB: return "b";
1068 case X86CondNB: return "nb";
1069 case X86CondZ: return "z";
1070 case X86CondNZ: return "nz";
1071 case X86CondBE: return "be";
1072 case X86CondNBE: return "nbe";
1073 case X86CondS: return "s";
1074 case X86CondNS: return "ns";
1075 case X86CondP: return "p";
1076 case X86CondNP: return "np";
1077 case X86CondL: return "l";
1078 case X86CondNL: return "nl";
1079 case X86CondLE: return "le";
1080 case X86CondNLE: return "nle";
1081 case X86CondAlways: return "ALWAYS";
1082 default: vpanic("name_X86Condcode");
1086 static
1087 X86Condcode positiveIse_X86Condcode ( X86Condcode cond,
1088 Bool* needInvert )
1090 vassert(cond >= X86CondO && cond <= X86CondNLE);
1091 if (cond & 1) {
1092 *needInvert = True;
1093 return cond-1;
1094 } else {
1095 *needInvert = False;
1096 return cond;
1101 /* -------------- Helpers for ADD/SUB with carry. -------------- */
1103 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
1104 appropriately.
1106 Optionally, generate a store for the 'tres' value. This can either
1107 be a normal store, or it can be a cas-with-possible-failure style
1108 store:
1110 if taddr is IRTemp_INVALID, then no store is generated.
1112 if taddr is not IRTemp_INVALID, then a store (using taddr as
1113 the address) is generated:
1115 if texpVal is IRTemp_INVALID then a normal store is
1116 generated, and restart_point must be zero (it is irrelevant).
1118 if texpVal is not IRTemp_INVALID then a cas-style store is
1119 generated. texpVal is the expected value, restart_point
1120 is the restart point if the store fails, and texpVal must
1121 have the same type as tres.
1123 static void helper_ADC ( Int sz,
1124 IRTemp tres, IRTemp ta1, IRTemp ta2,
1125 /* info about optional store: */
1126 IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
1128 UInt thunkOp;
1129 IRType ty = szToITy(sz);
1130 IRTemp oldc = newTemp(Ity_I32);
1131 IRTemp oldcn = newTemp(ty);
1132 IROp plus = mkSizedOp(ty, Iop_Add8);
1133 IROp xor = mkSizedOp(ty, Iop_Xor8);
1135 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
1136 vassert(sz == 1 || sz == 2 || sz == 4);
1137 thunkOp = sz==4 ? X86G_CC_OP_ADCL
1138 : (sz==2 ? X86G_CC_OP_ADCW : X86G_CC_OP_ADCB);
1140 /* oldc = old carry flag, 0 or 1 */
1141 assign( oldc, binop(Iop_And32,
1142 mk_x86g_calculate_eflags_c(),
1143 mkU32(1)) );
1145 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
1147 assign( tres, binop(plus,
1148 binop(plus,mkexpr(ta1),mkexpr(ta2)),
1149 mkexpr(oldcn)) );
1151 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1152 start of this function. */
1153 if (taddr != IRTemp_INVALID) {
1154 if (texpVal == IRTemp_INVALID) {
1155 vassert(restart_point == 0);
1156 storeLE( mkexpr(taddr), mkexpr(tres) );
1157 } else {
1158 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
1159 /* .. and hence 'texpVal' has the same type as 'tres'. */
1160 casLE( mkexpr(taddr),
1161 mkexpr(texpVal), mkexpr(tres), restart_point );
1165 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) );
1166 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1)) ));
1167 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2),
1168 mkexpr(oldcn)) )) );
1169 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
1173 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
1174 appropriately. As with helper_ADC, possibly generate a store of
1175 the result -- see comments on helper_ADC for details.
1177 static void helper_SBB ( Int sz,
1178 IRTemp tres, IRTemp ta1, IRTemp ta2,
1179 /* info about optional store: */
1180 IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
1182 UInt thunkOp;
1183 IRType ty = szToITy(sz);
1184 IRTemp oldc = newTemp(Ity_I32);
1185 IRTemp oldcn = newTemp(ty);
1186 IROp minus = mkSizedOp(ty, Iop_Sub8);
1187 IROp xor = mkSizedOp(ty, Iop_Xor8);
1189 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
1190 vassert(sz == 1 || sz == 2 || sz == 4);
1191 thunkOp = sz==4 ? X86G_CC_OP_SBBL
1192 : (sz==2 ? X86G_CC_OP_SBBW : X86G_CC_OP_SBBB);
1194 /* oldc = old carry flag, 0 or 1 */
1195 assign( oldc, binop(Iop_And32,
1196 mk_x86g_calculate_eflags_c(),
1197 mkU32(1)) );
1199 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
1201 assign( tres, binop(minus,
1202 binop(minus,mkexpr(ta1),mkexpr(ta2)),
1203 mkexpr(oldcn)) );
1205 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1206 start of this function. */
1207 if (taddr != IRTemp_INVALID) {
1208 if (texpVal == IRTemp_INVALID) {
1209 vassert(restart_point == 0);
1210 storeLE( mkexpr(taddr), mkexpr(tres) );
1211 } else {
1212 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
1213 /* .. and hence 'texpVal' has the same type as 'tres'. */
1214 casLE( mkexpr(taddr),
1215 mkexpr(texpVal), mkexpr(tres), restart_point );
1219 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) );
1220 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1) )) );
1221 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2),
1222 mkexpr(oldcn)) )) );
1223 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
1227 /* -------------- Helpers for disassembly printing. -------------- */
1229 static const HChar* nameGrp1 ( Int opc_aux )
1231 static const HChar* grp1_names[8]
1232 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
1233 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(x86)");
1234 return grp1_names[opc_aux];
1237 static const HChar* nameGrp2 ( Int opc_aux )
1239 static const HChar* grp2_names[8]
1240 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
1241 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(x86)");
1242 return grp2_names[opc_aux];
1245 static const HChar* nameGrp4 ( Int opc_aux )
1247 static const HChar* grp4_names[8]
1248 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
1249 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(x86)");
1250 return grp4_names[opc_aux];
1253 static const HChar* nameGrp5 ( Int opc_aux )
1255 static const HChar* grp5_names[8]
1256 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
1257 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(x86)");
1258 return grp5_names[opc_aux];
1261 static const HChar* nameGrp8 ( Int opc_aux )
1263 static const HChar* grp8_names[8]
1264 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
1265 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(x86)");
1266 return grp8_names[opc_aux];
1269 static const HChar* nameIReg ( Int size, Int reg )
1271 static const HChar* ireg32_names[8]
1272 = { "%eax", "%ecx", "%edx", "%ebx",
1273 "%esp", "%ebp", "%esi", "%edi" };
1274 static const HChar* ireg16_names[8]
1275 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di" };
1276 static const HChar* ireg8_names[8]
1277 = { "%al", "%cl", "%dl", "%bl",
1278 "%ah{sp}", "%ch{bp}", "%dh{si}", "%bh{di}" };
1279 if (reg < 0 || reg > 7) goto bad;
1280 switch (size) {
1281 case 4: return ireg32_names[reg];
1282 case 2: return ireg16_names[reg];
1283 case 1: return ireg8_names[reg];
1285 bad:
1286 vpanic("nameIReg(X86)");
1287 return NULL; /*notreached*/
1290 static const HChar* nameSReg ( UInt sreg )
1292 switch (sreg) {
1293 case R_ES: return "%es";
1294 case R_CS: return "%cs";
1295 case R_SS: return "%ss";
1296 case R_DS: return "%ds";
1297 case R_FS: return "%fs";
1298 case R_GS: return "%gs";
1299 default: vpanic("nameSReg(x86)");
1303 static const HChar* nameMMXReg ( Int mmxreg )
1305 static const HChar* mmx_names[8]
1306 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
1307 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(x86,guest)");
1308 return mmx_names[mmxreg];
1311 static const HChar* nameXMMReg ( Int xmmreg )
1313 static const HChar* xmm_names[8]
1314 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
1315 "%xmm4", "%xmm5", "%xmm6", "%xmm7" };
1316 if (xmmreg < 0 || xmmreg > 7) vpanic("name_of_xmm_reg");
1317 return xmm_names[xmmreg];
1320 static const HChar* nameMMXGran ( Int gran )
1322 switch (gran) {
1323 case 0: return "b";
1324 case 1: return "w";
1325 case 2: return "d";
1326 case 3: return "q";
1327 default: vpanic("nameMMXGran(x86,guest)");
1331 static HChar nameISize ( Int size )
1333 switch (size) {
1334 case 4: return 'l';
1335 case 2: return 'w';
1336 case 1: return 'b';
1337 default: vpanic("nameISize(x86)");
1342 /*------------------------------------------------------------*/
1343 /*--- JMP helpers ---*/
1344 /*------------------------------------------------------------*/
1346 static void jmp_lit( /*MOD*/DisResult* dres,
1347 IRJumpKind kind, Addr32 d32 )
1349 vassert(dres->whatNext == Dis_Continue);
1350 vassert(dres->len == 0);
1351 vassert(dres->continueAt == 0);
1352 vassert(dres->jk_StopHere == Ijk_INVALID);
1353 dres->whatNext = Dis_StopHere;
1354 dres->jk_StopHere = kind;
1355 stmt( IRStmt_Put( OFFB_EIP, mkU32(d32) ) );
1358 static void jmp_treg( /*MOD*/DisResult* dres,
1359 IRJumpKind kind, IRTemp t )
1361 vassert(dres->whatNext == Dis_Continue);
1362 vassert(dres->len == 0);
1363 vassert(dres->continueAt == 0);
1364 vassert(dres->jk_StopHere == Ijk_INVALID);
1365 dres->whatNext = Dis_StopHere;
1366 dres->jk_StopHere = kind;
1367 stmt( IRStmt_Put( OFFB_EIP, mkexpr(t) ) );
1370 static
1371 void jcc_01( /*MOD*/DisResult* dres,
1372 X86Condcode cond, Addr32 d32_false, Addr32 d32_true )
1374 Bool invert;
1375 X86Condcode condPos;
1376 vassert(dres->whatNext == Dis_Continue);
1377 vassert(dres->len == 0);
1378 vassert(dres->continueAt == 0);
1379 vassert(dres->jk_StopHere == Ijk_INVALID);
1380 dres->whatNext = Dis_StopHere;
1381 dres->jk_StopHere = Ijk_Boring;
1382 condPos = positiveIse_X86Condcode ( cond, &invert );
1383 if (invert) {
1384 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos),
1385 Ijk_Boring,
1386 IRConst_U32(d32_false),
1387 OFFB_EIP ) );
1388 stmt( IRStmt_Put( OFFB_EIP, mkU32(d32_true) ) );
1389 } else {
1390 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos),
1391 Ijk_Boring,
1392 IRConst_U32(d32_true),
1393 OFFB_EIP ) );
1394 stmt( IRStmt_Put( OFFB_EIP, mkU32(d32_false) ) );
1399 /*------------------------------------------------------------*/
1400 /*--- Disassembling addressing modes ---*/
1401 /*------------------------------------------------------------*/
1403 static
1404 const HChar* sorbTxt ( UChar sorb )
1406 switch (sorb) {
1407 case 0: return ""; /* no override */
1408 case 0x3E: return "%ds";
1409 case 0x26: return "%es:";
1410 case 0x64: return "%fs:";
1411 case 0x65: return "%gs:";
1412 case 0x36: return "%ss:";
1413 default: vpanic("sorbTxt(x86,guest)");
1418 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
1419 linear address by adding any required segment override as indicated
1420 by sorb. */
1421 static
1422 IRExpr* handleSegOverride ( UChar sorb, IRExpr* virtual )
1424 Int sreg;
1425 IRType hWordTy;
1426 IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
1428 if (sorb == 0)
1429 /* the common case - no override */
1430 return virtual;
1432 switch (sorb) {
1433 case 0x3E: sreg = R_DS; break;
1434 case 0x26: sreg = R_ES; break;
1435 case 0x64: sreg = R_FS; break;
1436 case 0x65: sreg = R_GS; break;
1437 case 0x36: sreg = R_SS; break;
1438 default: vpanic("handleSegOverride(x86,guest)");
1441 hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
1443 seg_selector = newTemp(Ity_I32);
1444 ldt_ptr = newTemp(hWordTy);
1445 gdt_ptr = newTemp(hWordTy);
1446 r64 = newTemp(Ity_I64);
1448 assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
1449 assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
1450 assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
1453 Call this to do the translation and limit checks:
1454 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
1455 UInt seg_selector, UInt virtual_addr )
1457 assign(
1458 r64,
1459 mkIRExprCCall(
1460 Ity_I64,
1461 0/*regparms*/,
1462 "x86g_use_seg_selector",
1463 &x86g_use_seg_selector,
1464 mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
1465 mkexpr(seg_selector), virtual)
1469 /* If the high 32 of the result are non-zero, there was a
1470 failure in address translation. In which case, make a
1471 quick exit.
1473 stmt(
1474 IRStmt_Exit(
1475 binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
1476 Ijk_MapFail,
1477 IRConst_U32( guest_EIP_curr_instr ),
1478 OFFB_EIP
1482 /* otherwise, here's the translated result. */
1483 return unop(Iop_64to32, mkexpr(r64));
1487 /* Generate IR to calculate an address indicated by a ModRM and
1488 following SIB bytes. The expression, and the number of bytes in
1489 the address mode, are returned. Note that this fn should not be
1490 called if the R/M part of the address denotes a register instead of
1491 memory. If print_codegen is true, text of the addressing mode is
1492 placed in buf.
1494 The computed address is stored in a new tempreg, and the
1495 identity of the tempreg is returned. */
1497 static IRTemp disAMode_copy2tmp ( IRExpr* addr32 )
1499 IRTemp tmp = newTemp(Ity_I32);
1500 assign( tmp, addr32 );
1501 return tmp;
1504 static
1505 IRTemp disAMode ( Int* len, UChar sorb, Int delta, HChar* buf )
1507 UChar mod_reg_rm = getIByte(delta);
1508 delta++;
1510 buf[0] = (UChar)0;
1512 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
1513 jump table seems a bit excessive.
1515 mod_reg_rm &= 0xC7; /* is now XX000YYY */
1516 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
1517 /* is now XX0XXYYY */
1518 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
1519 switch (mod_reg_rm) {
1521 /* (%eax) .. (%edi), not including (%esp) or (%ebp).
1522 --> GET %reg, t
1524 case 0x00: case 0x01: case 0x02: case 0x03:
1525 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
1526 { UChar rm = mod_reg_rm;
1527 DIS(buf, "%s(%s)", sorbTxt(sorb), nameIReg(4,rm));
1528 *len = 1;
1529 return disAMode_copy2tmp(
1530 handleSegOverride(sorb, getIReg(4,rm)));
1533 /* d8(%eax) ... d8(%edi), not including d8(%esp)
1534 --> GET %reg, t ; ADDL d8, t
1536 case 0x08: case 0x09: case 0x0A: case 0x0B:
1537 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
1538 { UChar rm = toUChar(mod_reg_rm & 7);
1539 UInt d = getSDisp8(delta);
1540 DIS(buf, "%s%d(%s)", sorbTxt(sorb), (Int)d, nameIReg(4,rm));
1541 *len = 2;
1542 return disAMode_copy2tmp(
1543 handleSegOverride(sorb,
1544 binop(Iop_Add32,getIReg(4,rm),mkU32(d))));
1547 /* d32(%eax) ... d32(%edi), not including d32(%esp)
1548 --> GET %reg, t ; ADDL d8, t
1550 case 0x10: case 0x11: case 0x12: case 0x13:
1551 /* ! 14 */ case 0x15: case 0x16: case 0x17:
1552 { UChar rm = toUChar(mod_reg_rm & 7);
1553 UInt d = getUDisp32(delta);
1554 DIS(buf, "%s0x%x(%s)", sorbTxt(sorb), d, nameIReg(4,rm));
1555 *len = 5;
1556 return disAMode_copy2tmp(
1557 handleSegOverride(sorb,
1558 binop(Iop_Add32,getIReg(4,rm),mkU32(d))));
1561 /* a register, %eax .. %edi. This shouldn't happen. */
1562 case 0x18: case 0x19: case 0x1A: case 0x1B:
1563 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
1564 vpanic("disAMode(x86): not an addr!");
1566 /* a 32-bit literal address
1567 --> MOV d32, tmp
1569 case 0x05:
1570 { UInt d = getUDisp32(delta);
1571 *len = 5;
1572 DIS(buf, "%s(0x%x)", sorbTxt(sorb), d);
1573 return disAMode_copy2tmp(
1574 handleSegOverride(sorb, mkU32(d)));
1577 case 0x04: {
1578 /* SIB, with no displacement. Special cases:
1579 -- %esp cannot act as an index value.
1580 If index_r indicates %esp, zero is used for the index.
1581 -- when mod is zero and base indicates EBP, base is instead
1582 a 32-bit literal.
1583 It's all madness, I tell you. Extract %index, %base and
1584 scale from the SIB byte. The value denoted is then:
1585 | %index == %ESP && %base == %EBP
1586 = d32 following SIB byte
1587 | %index == %ESP && %base != %EBP
1588 = %base
1589 | %index != %ESP && %base == %EBP
1590 = d32 following SIB byte + (%index << scale)
1591 | %index != %ESP && %base != %ESP
1592 = %base + (%index << scale)
1594 What happens to the souls of CPU architects who dream up such
1595 horrendous schemes, do you suppose?
1597 UChar sib = getIByte(delta);
1598 UChar scale = toUChar((sib >> 6) & 3);
1599 UChar index_r = toUChar((sib >> 3) & 7);
1600 UChar base_r = toUChar(sib & 7);
1601 delta++;
1603 if (index_r != R_ESP && base_r != R_EBP) {
1604 DIS(buf, "%s(%s,%s,%d)", sorbTxt(sorb),
1605 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale);
1606 *len = 2;
1607 return
1608 disAMode_copy2tmp(
1609 handleSegOverride(sorb,
1610 binop(Iop_Add32,
1611 getIReg(4,base_r),
1612 binop(Iop_Shl32, getIReg(4,index_r),
1613 mkU8(scale)))));
1616 if (index_r != R_ESP && base_r == R_EBP) {
1617 UInt d = getUDisp32(delta);
1618 DIS(buf, "%s0x%x(,%s,%d)", sorbTxt(sorb), d,
1619 nameIReg(4,index_r), 1<<scale);
1620 *len = 6;
1621 return
1622 disAMode_copy2tmp(
1623 handleSegOverride(sorb,
1624 binop(Iop_Add32,
1625 binop(Iop_Shl32, getIReg(4,index_r), mkU8(scale)),
1626 mkU32(d))));
1629 if (index_r == R_ESP && base_r != R_EBP) {
1630 DIS(buf, "%s(%s,,)", sorbTxt(sorb), nameIReg(4,base_r));
1631 *len = 2;
1632 return disAMode_copy2tmp(
1633 handleSegOverride(sorb, getIReg(4,base_r)));
1636 if (index_r == R_ESP && base_r == R_EBP) {
1637 UInt d = getUDisp32(delta);
1638 DIS(buf, "%s0x%x(,,)", sorbTxt(sorb), d);
1639 *len = 6;
1640 return disAMode_copy2tmp(
1641 handleSegOverride(sorb, mkU32(d)));
1643 /*NOTREACHED*/
1644 vassert(0);
1647 /* SIB, with 8-bit displacement. Special cases:
1648 -- %esp cannot act as an index value.
1649 If index_r indicates %esp, zero is used for the index.
1650 Denoted value is:
1651 | %index == %ESP
1652 = d8 + %base
1653 | %index != %ESP
1654 = d8 + %base + (%index << scale)
1656 case 0x0C: {
1657 UChar sib = getIByte(delta);
1658 UChar scale = toUChar((sib >> 6) & 3);
1659 UChar index_r = toUChar((sib >> 3) & 7);
1660 UChar base_r = toUChar(sib & 7);
1661 UInt d = getSDisp8(delta+1);
1663 if (index_r == R_ESP) {
1664 DIS(buf, "%s%d(%s,,)", sorbTxt(sorb),
1665 (Int)d, nameIReg(4,base_r));
1666 *len = 3;
1667 return disAMode_copy2tmp(
1668 handleSegOverride(sorb,
1669 binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) ));
1670 } else {
1671 DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d,
1672 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale);
1673 *len = 3;
1674 return
1675 disAMode_copy2tmp(
1676 handleSegOverride(sorb,
1677 binop(Iop_Add32,
1678 binop(Iop_Add32,
1679 getIReg(4,base_r),
1680 binop(Iop_Shl32,
1681 getIReg(4,index_r), mkU8(scale))),
1682 mkU32(d))));
1684 /*NOTREACHED*/
1685 vassert(0);
1688 /* SIB, with 32-bit displacement. Special cases:
1689 -- %esp cannot act as an index value.
1690 If index_r indicates %esp, zero is used for the index.
1691 Denoted value is:
1692 | %index == %ESP
1693 = d32 + %base
1694 | %index != %ESP
1695 = d32 + %base + (%index << scale)
1697 case 0x14: {
1698 UChar sib = getIByte(delta);
1699 UChar scale = toUChar((sib >> 6) & 3);
1700 UChar index_r = toUChar((sib >> 3) & 7);
1701 UChar base_r = toUChar(sib & 7);
1702 UInt d = getUDisp32(delta+1);
1704 if (index_r == R_ESP) {
1705 DIS(buf, "%s%d(%s,,)", sorbTxt(sorb),
1706 (Int)d, nameIReg(4,base_r));
1707 *len = 6;
1708 return disAMode_copy2tmp(
1709 handleSegOverride(sorb,
1710 binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) ));
1711 } else {
1712 DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d,
1713 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale);
1714 *len = 6;
1715 return
1716 disAMode_copy2tmp(
1717 handleSegOverride(sorb,
1718 binop(Iop_Add32,
1719 binop(Iop_Add32,
1720 getIReg(4,base_r),
1721 binop(Iop_Shl32,
1722 getIReg(4,index_r), mkU8(scale))),
1723 mkU32(d))));
1725 /*NOTREACHED*/
1726 vassert(0);
1729 default:
1730 vpanic("disAMode(x86)");
1731 return 0; /*notreached*/
1736 /* Figure out the number of (insn-stream) bytes constituting the amode
1737 beginning at delta. Is useful for getting hold of literals beyond
1738 the end of the amode before it has been disassembled. */
1740 static UInt lengthAMode ( Int delta )
1742 UChar mod_reg_rm = getIByte(delta); delta++;
1744 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
1745 jump table seems a bit excessive.
1747 mod_reg_rm &= 0xC7; /* is now XX000YYY */
1748 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
1749 /* is now XX0XXYYY */
1750 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
1751 switch (mod_reg_rm) {
1753 /* (%eax) .. (%edi), not including (%esp) or (%ebp). */
1754 case 0x00: case 0x01: case 0x02: case 0x03:
1755 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
1756 return 1;
1758 /* d8(%eax) ... d8(%edi), not including d8(%esp). */
1759 case 0x08: case 0x09: case 0x0A: case 0x0B:
1760 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
1761 return 2;
1763 /* d32(%eax) ... d32(%edi), not including d32(%esp). */
1764 case 0x10: case 0x11: case 0x12: case 0x13:
1765 /* ! 14 */ case 0x15: case 0x16: case 0x17:
1766 return 5;
1768 /* a register, %eax .. %edi. (Not an addr, but still handled.) */
1769 case 0x18: case 0x19: case 0x1A: case 0x1B:
1770 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
1771 return 1;
1773 /* a 32-bit literal address. */
1774 case 0x05: return 5;
1776 /* SIB, no displacement. */
1777 case 0x04: {
1778 UChar sib = getIByte(delta);
1779 UChar base_r = toUChar(sib & 7);
1780 if (base_r == R_EBP) return 6; else return 2;
1782 /* SIB, with 8-bit displacement. */
1783 case 0x0C: return 3;
1785 /* SIB, with 32-bit displacement. */
1786 case 0x14: return 6;
1788 default:
1789 vpanic("lengthAMode");
1790 return 0; /*notreached*/
1794 /*------------------------------------------------------------*/
1795 /*--- Disassembling common idioms ---*/
1796 /*------------------------------------------------------------*/
1798 /* Handle binary integer instructions of the form
1799 op E, G meaning
1800 op reg-or-mem, reg
1801 Is passed the a ptr to the modRM byte, the actual operation, and the
1802 data size. Returns the address advanced completely over this
1803 instruction.
1805 E(src) is reg-or-mem
1806 G(dst) is reg.
1808 If E is reg, --> GET %G, tmp
1809 OP %E, tmp
1810 PUT tmp, %G
1812 If E is mem and OP is not reversible,
1813 --> (getAddr E) -> tmpa
1814 LD (tmpa), tmpa
1815 GET %G, tmp2
1816 OP tmpa, tmp2
1817 PUT tmp2, %G
1819 If E is mem and OP is reversible
1820 --> (getAddr E) -> tmpa
1821 LD (tmpa), tmpa
1822 OP %G, tmpa
1823 PUT tmpa, %G
1825 static
1826 UInt dis_op2_E_G ( UChar sorb,
1827 Bool addSubCarry,
1828 IROp op8,
1829 Bool keep,
1830 Int size,
1831 Int delta0,
1832 const HChar* t_x86opc )
1834 HChar dis_buf[50];
1835 Int len;
1836 IRType ty = szToITy(size);
1837 IRTemp dst1 = newTemp(ty);
1838 IRTemp src = newTemp(ty);
1839 IRTemp dst0 = newTemp(ty);
1840 UChar rm = getUChar(delta0);
1841 IRTemp addr = IRTemp_INVALID;
1843 /* addSubCarry == True indicates the intended operation is
1844 add-with-carry or subtract-with-borrow. */
1845 if (addSubCarry) {
1846 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
1847 vassert(keep);
1850 if (epartIsReg(rm)) {
1851 /* Specially handle XOR reg,reg, because that doesn't really
1852 depend on reg, and doing the obvious thing potentially
1853 generates a spurious value check failure due to the bogus
1854 dependency. Ditto SBB reg,reg. */
1855 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
1856 && gregOfRM(rm) == eregOfRM(rm)) {
1857 putIReg(size, gregOfRM(rm), mkU(ty,0));
1859 assign( dst0, getIReg(size,gregOfRM(rm)) );
1860 assign( src, getIReg(size,eregOfRM(rm)) );
1862 if (addSubCarry && op8 == Iop_Add8) {
1863 helper_ADC( size, dst1, dst0, src,
1864 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1865 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1866 } else
1867 if (addSubCarry && op8 == Iop_Sub8) {
1868 helper_SBB( size, dst1, dst0, src,
1869 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1870 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1871 } else {
1872 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
1873 if (isAddSub(op8))
1874 setFlags_DEP1_DEP2(op8, dst0, src, ty);
1875 else
1876 setFlags_DEP1(op8, dst1, ty);
1877 if (keep)
1878 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1881 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
1882 nameIReg(size,eregOfRM(rm)),
1883 nameIReg(size,gregOfRM(rm)));
1884 return 1+delta0;
1885 } else {
1886 /* E refers to memory */
1887 addr = disAMode ( &len, sorb, delta0, dis_buf);
1888 assign( dst0, getIReg(size,gregOfRM(rm)) );
1889 assign( src, loadLE(szToITy(size), mkexpr(addr)) );
1891 if (addSubCarry && op8 == Iop_Add8) {
1892 helper_ADC( size, dst1, dst0, src,
1893 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1894 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1895 } else
1896 if (addSubCarry && op8 == Iop_Sub8) {
1897 helper_SBB( size, dst1, dst0, src,
1898 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1899 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1900 } else {
1901 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
1902 if (isAddSub(op8))
1903 setFlags_DEP1_DEP2(op8, dst0, src, ty);
1904 else
1905 setFlags_DEP1(op8, dst1, ty);
1906 if (keep)
1907 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1910 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
1911 dis_buf,nameIReg(size,gregOfRM(rm)));
1912 return len+delta0;
1918 /* Handle binary integer instructions of the form
1919 op G, E meaning
1920 op reg, reg-or-mem
1921 Is passed the a ptr to the modRM byte, the actual operation, and the
1922 data size. Returns the address advanced completely over this
1923 instruction.
1925 G(src) is reg.
1926 E(dst) is reg-or-mem
1928 If E is reg, --> GET %E, tmp
1929 OP %G, tmp
1930 PUT tmp, %E
1932 If E is mem, --> (getAddr E) -> tmpa
1933 LD (tmpa), tmpv
1934 OP %G, tmpv
1935 ST tmpv, (tmpa)
1937 static
1938 UInt dis_op2_G_E ( UChar sorb,
1939 Bool locked,
1940 Bool addSubCarry,
1941 IROp op8,
1942 Bool keep,
1943 Int size,
1944 Int delta0,
1945 const HChar* t_x86opc )
1947 HChar dis_buf[50];
1948 Int len;
1949 IRType ty = szToITy(size);
1950 IRTemp dst1 = newTemp(ty);
1951 IRTemp src = newTemp(ty);
1952 IRTemp dst0 = newTemp(ty);
1953 UChar rm = getIByte(delta0);
1954 IRTemp addr = IRTemp_INVALID;
1956 /* addSubCarry == True indicates the intended operation is
1957 add-with-carry or subtract-with-borrow. */
1958 if (addSubCarry) {
1959 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
1960 vassert(keep);
1963 if (epartIsReg(rm)) {
1964 /* Specially handle XOR reg,reg, because that doesn't really
1965 depend on reg, and doing the obvious thing potentially
1966 generates a spurious value check failure due to the bogus
1967 dependency. Ditto SBB reg,reg.*/
1968 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
1969 && gregOfRM(rm) == eregOfRM(rm)) {
1970 putIReg(size, eregOfRM(rm), mkU(ty,0));
1972 assign(dst0, getIReg(size,eregOfRM(rm)));
1973 assign(src, getIReg(size,gregOfRM(rm)));
1975 if (addSubCarry && op8 == Iop_Add8) {
1976 helper_ADC( size, dst1, dst0, src,
1977 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1978 putIReg(size, eregOfRM(rm), mkexpr(dst1));
1979 } else
1980 if (addSubCarry && op8 == Iop_Sub8) {
1981 helper_SBB( size, dst1, dst0, src,
1982 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1983 putIReg(size, eregOfRM(rm), mkexpr(dst1));
1984 } else {
1985 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
1986 if (isAddSub(op8))
1987 setFlags_DEP1_DEP2(op8, dst0, src, ty);
1988 else
1989 setFlags_DEP1(op8, dst1, ty);
1990 if (keep)
1991 putIReg(size, eregOfRM(rm), mkexpr(dst1));
1994 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
1995 nameIReg(size,gregOfRM(rm)),
1996 nameIReg(size,eregOfRM(rm)));
1997 return 1+delta0;
2000 /* E refers to memory */
2002 addr = disAMode ( &len, sorb, delta0, dis_buf);
2003 assign(dst0, loadLE(ty,mkexpr(addr)));
2004 assign(src, getIReg(size,gregOfRM(rm)));
2006 if (addSubCarry && op8 == Iop_Add8) {
2007 if (locked) {
2008 /* cas-style store */
2009 helper_ADC( size, dst1, dst0, src,
2010 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
2011 } else {
2012 /* normal store */
2013 helper_ADC( size, dst1, dst0, src,
2014 /*store*/addr, IRTemp_INVALID, 0 );
2016 } else
2017 if (addSubCarry && op8 == Iop_Sub8) {
2018 if (locked) {
2019 /* cas-style store */
2020 helper_SBB( size, dst1, dst0, src,
2021 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
2022 } else {
2023 /* normal store */
2024 helper_SBB( size, dst1, dst0, src,
2025 /*store*/addr, IRTemp_INVALID, 0 );
2027 } else {
2028 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2029 if (keep) {
2030 if (locked) {
2031 if (0) vex_printf("locked case\n" );
2032 casLE( mkexpr(addr),
2033 mkexpr(dst0)/*expval*/,
2034 mkexpr(dst1)/*newval*/, guest_EIP_curr_instr );
2035 } else {
2036 if (0) vex_printf("nonlocked case\n");
2037 storeLE(mkexpr(addr), mkexpr(dst1));
2040 if (isAddSub(op8))
2041 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2042 else
2043 setFlags_DEP1(op8, dst1, ty);
2046 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
2047 nameIReg(size,gregOfRM(rm)), dis_buf);
2048 return len+delta0;
2053 /* Handle move instructions of the form
2054 mov E, G meaning
2055 mov reg-or-mem, reg
2056 Is passed the a ptr to the modRM byte, and the data size. Returns
2057 the address advanced completely over this instruction.
2059 E(src) is reg-or-mem
2060 G(dst) is reg.
2062 If E is reg, --> GET %E, tmpv
2063 PUT tmpv, %G
2065 If E is mem --> (getAddr E) -> tmpa
2066 LD (tmpa), tmpb
2067 PUT tmpb, %G
2069 static
2070 UInt dis_mov_E_G ( UChar sorb,
2071 Int size,
2072 Int delta0 )
2074 Int len;
2075 UChar rm = getIByte(delta0);
2076 HChar dis_buf[50];
2078 if (epartIsReg(rm)) {
2079 putIReg(size, gregOfRM(rm), getIReg(size, eregOfRM(rm)));
2080 DIP("mov%c %s,%s\n", nameISize(size),
2081 nameIReg(size,eregOfRM(rm)),
2082 nameIReg(size,gregOfRM(rm)));
2083 return 1+delta0;
2086 /* E refers to memory */
2088 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
2089 putIReg(size, gregOfRM(rm), loadLE(szToITy(size), mkexpr(addr)));
2090 DIP("mov%c %s,%s\n", nameISize(size),
2091 dis_buf,nameIReg(size,gregOfRM(rm)));
2092 return delta0+len;
2097 /* Handle move instructions of the form
2098 mov G, E meaning
2099 mov reg, reg-or-mem
2100 Is passed the a ptr to the modRM byte, and the data size. Returns
2101 the address advanced completely over this instruction.
2103 G(src) is reg.
2104 E(dst) is reg-or-mem
2106 If E is reg, --> GET %G, tmp
2107 PUT tmp, %E
2109 If E is mem, --> (getAddr E) -> tmpa
2110 GET %G, tmpv
2111 ST tmpv, (tmpa)
2113 static
2114 UInt dis_mov_G_E ( UChar sorb,
2115 Int size,
2116 Int delta0 )
2118 Int len;
2119 UChar rm = getIByte(delta0);
2120 HChar dis_buf[50];
2122 if (epartIsReg(rm)) {
2123 putIReg(size, eregOfRM(rm), getIReg(size, gregOfRM(rm)));
2124 DIP("mov%c %s,%s\n", nameISize(size),
2125 nameIReg(size,gregOfRM(rm)),
2126 nameIReg(size,eregOfRM(rm)));
2127 return 1+delta0;
2130 /* E refers to memory */
2132 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf);
2133 storeLE( mkexpr(addr), getIReg(size, gregOfRM(rm)) );
2134 DIP("mov%c %s,%s\n", nameISize(size),
2135 nameIReg(size,gregOfRM(rm)), dis_buf);
2136 return len+delta0;
2141 /* op $immediate, AL/AX/EAX. */
2142 static
2143 UInt dis_op_imm_A ( Int size,
2144 Bool carrying,
2145 IROp op8,
2146 Bool keep,
2147 Int delta,
2148 const HChar* t_x86opc )
2150 IRType ty = szToITy(size);
2151 IRTemp dst0 = newTemp(ty);
2152 IRTemp src = newTemp(ty);
2153 IRTemp dst1 = newTemp(ty);
2154 UInt lit = getUDisp(size,delta);
2155 assign(dst0, getIReg(size,R_EAX));
2156 assign(src, mkU(ty,lit));
2158 if (isAddSub(op8) && !carrying) {
2159 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2160 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2162 else
2163 if (isLogic(op8)) {
2164 vassert(!carrying);
2165 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2166 setFlags_DEP1(op8, dst1, ty);
2168 else
2169 if (op8 == Iop_Add8 && carrying) {
2170 helper_ADC( size, dst1, dst0, src,
2171 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2173 else
2174 if (op8 == Iop_Sub8 && carrying) {
2175 helper_SBB( size, dst1, dst0, src,
2176 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2178 else
2179 vpanic("dis_op_imm_A(x86,guest)");
2181 if (keep)
2182 putIReg(size, R_EAX, mkexpr(dst1));
2184 DIP("%s%c $0x%x, %s\n", t_x86opc, nameISize(size),
2185 lit, nameIReg(size,R_EAX));
2186 return delta+size;
2190 /* Sign- and Zero-extending moves. */
2191 static
2192 UInt dis_movx_E_G ( UChar sorb,
2193 Int delta, Int szs, Int szd, Bool sign_extend )
2195 UChar rm = getIByte(delta);
2196 if (epartIsReg(rm)) {
2197 if (szd == szs) {
2198 // mutant case. See #250799
2199 putIReg(szd, gregOfRM(rm),
2200 getIReg(szs,eregOfRM(rm)));
2201 } else {
2202 // normal case
2203 putIReg(szd, gregOfRM(rm),
2204 unop(mkWidenOp(szs,szd,sign_extend),
2205 getIReg(szs,eregOfRM(rm))));
2207 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
2208 nameISize(szs), nameISize(szd),
2209 nameIReg(szs,eregOfRM(rm)),
2210 nameIReg(szd,gregOfRM(rm)));
2211 return 1+delta;
2214 /* E refers to memory */
2216 Int len;
2217 HChar dis_buf[50];
2218 IRTemp addr = disAMode ( &len, sorb, delta, dis_buf );
2219 if (szd == szs) {
2220 // mutant case. See #250799
2221 putIReg(szd, gregOfRM(rm),
2222 loadLE(szToITy(szs),mkexpr(addr)));
2223 } else {
2224 // normal case
2225 putIReg(szd, gregOfRM(rm),
2226 unop(mkWidenOp(szs,szd,sign_extend),
2227 loadLE(szToITy(szs),mkexpr(addr))));
2229 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
2230 nameISize(szs), nameISize(szd),
2231 dis_buf, nameIReg(szd,gregOfRM(rm)));
2232 return len+delta;
2237 /* Generate code to divide ArchRegs EDX:EAX / DX:AX / AX by the 32 /
2238 16 / 8 bit quantity in the given IRTemp. */
2239 static
2240 void codegen_div ( Int sz, IRTemp t, Bool signed_divide )
2242 IROp op = signed_divide ? Iop_DivModS64to32 : Iop_DivModU64to32;
2243 IRTemp src64 = newTemp(Ity_I64);
2244 IRTemp dst64 = newTemp(Ity_I64);
2245 switch (sz) {
2246 case 4:
2247 assign( src64, binop(Iop_32HLto64,
2248 getIReg(4,R_EDX), getIReg(4,R_EAX)) );
2249 assign( dst64, binop(op, mkexpr(src64), mkexpr(t)) );
2250 putIReg( 4, R_EAX, unop(Iop_64to32,mkexpr(dst64)) );
2251 putIReg( 4, R_EDX, unop(Iop_64HIto32,mkexpr(dst64)) );
2252 break;
2253 case 2: {
2254 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
2255 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
2256 assign( src64, unop(widen3264,
2257 binop(Iop_16HLto32,
2258 getIReg(2,R_EDX), getIReg(2,R_EAX))) );
2259 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) );
2260 putIReg( 2, R_EAX, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) );
2261 putIReg( 2, R_EDX, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) );
2262 break;
2264 case 1: {
2265 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
2266 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
2267 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16;
2268 assign( src64, unop(widen3264, unop(widen1632, getIReg(2,R_EAX))) );
2269 assign( dst64,
2270 binop(op, mkexpr(src64),
2271 unop(widen1632, unop(widen816, mkexpr(t)))) );
2272 putIReg( 1, R_AL, unop(Iop_16to8, unop(Iop_32to16,
2273 unop(Iop_64to32,mkexpr(dst64)))) );
2274 putIReg( 1, R_AH, unop(Iop_16to8, unop(Iop_32to16,
2275 unop(Iop_64HIto32,mkexpr(dst64)))) );
2276 break;
2278 default: vpanic("codegen_div(x86)");
2283 static
2284 UInt dis_Grp1 ( UChar sorb, Bool locked,
2285 Int delta, UChar modrm,
2286 Int am_sz, Int d_sz, Int sz, UInt d32 )
2288 Int len;
2289 HChar dis_buf[50];
2290 IRType ty = szToITy(sz);
2291 IRTemp dst1 = newTemp(ty);
2292 IRTemp src = newTemp(ty);
2293 IRTemp dst0 = newTemp(ty);
2294 IRTemp addr = IRTemp_INVALID;
2295 IROp op8 = Iop_INVALID;
2296 UInt mask = sz==1 ? 0xFF : (sz==2 ? 0xFFFF : 0xFFFFFFFF);
2298 switch (gregOfRM(modrm)) {
2299 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break;
2300 case 2: break; // ADC
2301 case 3: break; // SBB
2302 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break;
2303 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break;
2304 /*NOTREACHED*/
2305 default: vpanic("dis_Grp1: unhandled case");
2308 if (epartIsReg(modrm)) {
2309 vassert(am_sz == 1);
2311 assign(dst0, getIReg(sz,eregOfRM(modrm)));
2312 assign(src, mkU(ty,d32 & mask));
2314 if (gregOfRM(modrm) == 2 /* ADC */) {
2315 helper_ADC( sz, dst1, dst0, src,
2316 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2317 } else
2318 if (gregOfRM(modrm) == 3 /* SBB */) {
2319 helper_SBB( sz, dst1, dst0, src,
2320 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2321 } else {
2322 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2323 if (isAddSub(op8))
2324 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2325 else
2326 setFlags_DEP1(op8, dst1, ty);
2329 if (gregOfRM(modrm) < 7)
2330 putIReg(sz, eregOfRM(modrm), mkexpr(dst1));
2332 delta += (am_sz + d_sz);
2333 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz), d32,
2334 nameIReg(sz,eregOfRM(modrm)));
2335 } else {
2336 addr = disAMode ( &len, sorb, delta, dis_buf);
2338 assign(dst0, loadLE(ty,mkexpr(addr)));
2339 assign(src, mkU(ty,d32 & mask));
2341 if (gregOfRM(modrm) == 2 /* ADC */) {
2342 if (locked) {
2343 /* cas-style store */
2344 helper_ADC( sz, dst1, dst0, src,
2345 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
2346 } else {
2347 /* normal store */
2348 helper_ADC( sz, dst1, dst0, src,
2349 /*store*/addr, IRTemp_INVALID, 0 );
2351 } else
2352 if (gregOfRM(modrm) == 3 /* SBB */) {
2353 if (locked) {
2354 /* cas-style store */
2355 helper_SBB( sz, dst1, dst0, src,
2356 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
2357 } else {
2358 /* normal store */
2359 helper_SBB( sz, dst1, dst0, src,
2360 /*store*/addr, IRTemp_INVALID, 0 );
2362 } else {
2363 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2364 if (gregOfRM(modrm) < 7) {
2365 if (locked) {
2366 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
2367 mkexpr(dst1)/*newVal*/,
2368 guest_EIP_curr_instr );
2369 } else {
2370 storeLE(mkexpr(addr), mkexpr(dst1));
2373 if (isAddSub(op8))
2374 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2375 else
2376 setFlags_DEP1(op8, dst1, ty);
2379 delta += (len+d_sz);
2380 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz),
2381 d32, dis_buf);
2383 return delta;
2387 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed
2388 expression. */
2390 static
2391 UInt dis_Grp2 ( UChar sorb,
2392 Int delta, UChar modrm,
2393 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr,
2394 const HChar* shift_expr_txt, Bool* decode_OK )
2396 /* delta on entry points at the modrm byte. */
2397 HChar dis_buf[50];
2398 Int len;
2399 Bool isShift, isRotate, isRotateC;
2400 IRType ty = szToITy(sz);
2401 IRTemp dst0 = newTemp(ty);
2402 IRTemp dst1 = newTemp(ty);
2403 IRTemp addr = IRTemp_INVALID;
2405 *decode_OK = True;
2407 vassert(sz == 1 || sz == 2 || sz == 4);
2409 /* Put value to shift/rotate in dst0. */
2410 if (epartIsReg(modrm)) {
2411 assign(dst0, getIReg(sz, eregOfRM(modrm)));
2412 delta += (am_sz + d_sz);
2413 } else {
2414 addr = disAMode ( &len, sorb, delta, dis_buf);
2415 assign(dst0, loadLE(ty,mkexpr(addr)));
2416 delta += len + d_sz;
2419 isShift = False;
2420 switch (gregOfRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; }
2422 isRotate = False;
2423 switch (gregOfRM(modrm)) { case 0: case 1: isRotate = True; }
2425 isRotateC = False;
2426 switch (gregOfRM(modrm)) { case 2: case 3: isRotateC = True; }
2428 if (!isShift && !isRotate && !isRotateC) {
2429 /*NOTREACHED*/
2430 vpanic("dis_Grp2(Reg): unhandled case(x86)");
2433 if (isRotateC) {
2434 /* call a helper; these insns are so ridiculous they do not
2435 deserve better */
2436 Bool left = toBool(gregOfRM(modrm) == 2);
2437 IRTemp r64 = newTemp(Ity_I64);
2438 IRExpr** args
2439 = mkIRExprVec_4( widenUto32(mkexpr(dst0)), /* thing to rotate */
2440 widenUto32(shift_expr), /* rotate amount */
2441 widenUto32(mk_x86g_calculate_eflags_all()),
2442 mkU32(sz) );
2443 assign( r64, mkIRExprCCall(
2444 Ity_I64,
2445 0/*regparm*/,
2446 left ? "x86g_calculate_RCL" : "x86g_calculate_RCR",
2447 left ? &x86g_calculate_RCL : &x86g_calculate_RCR,
2448 args
2451 /* new eflags in hi half r64; new value in lo half r64 */
2452 assign( dst1, narrowTo(ty, unop(Iop_64to32, mkexpr(r64))) );
2453 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
2454 stmt( IRStmt_Put( OFFB_CC_DEP1, unop(Iop_64HIto32, mkexpr(r64)) ));
2455 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
2456 /* Set NDEP even though it isn't used. This makes redundant-PUT
2457 elimination of previous stores to this field work better. */
2458 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
2461 if (isShift) {
2463 IRTemp pre32 = newTemp(Ity_I32);
2464 IRTemp res32 = newTemp(Ity_I32);
2465 IRTemp res32ss = newTemp(Ity_I32);
2466 IRTemp shift_amt = newTemp(Ity_I8);
2467 IROp op32;
2469 switch (gregOfRM(modrm)) {
2470 case 4: op32 = Iop_Shl32; break;
2471 case 5: op32 = Iop_Shr32; break;
2472 case 6: op32 = Iop_Shl32; break;
2473 case 7: op32 = Iop_Sar32; break;
2474 /*NOTREACHED*/
2475 default: vpanic("dis_Grp2:shift"); break;
2478 /* Widen the value to be shifted to 32 bits, do the shift, and
2479 narrow back down. This seems surprisingly long-winded, but
2480 unfortunately the Intel semantics requires that 8/16-bit
2481 shifts give defined results for shift values all the way up
2482 to 31, and this seems the simplest way to do it. It has the
2483 advantage that the only IR level shifts generated are of 32
2484 bit values, and the shift amount is guaranteed to be in the
2485 range 0 .. 31, thereby observing the IR semantics requiring
2486 all shift values to be in the range 0 .. 2^word_size-1. */
2488 /* shift_amt = shift_expr & 31, regardless of operation size */
2489 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(31)) );
2491 /* suitably widen the value to be shifted to 32 bits. */
2492 assign( pre32, op32==Iop_Sar32 ? widenSto32(mkexpr(dst0))
2493 : widenUto32(mkexpr(dst0)) );
2495 /* res32 = pre32 `shift` shift_amt */
2496 assign( res32, binop(op32, mkexpr(pre32), mkexpr(shift_amt)) );
2498 /* res32ss = pre32 `shift` ((shift_amt - 1) & 31) */
2499 assign( res32ss,
2500 binop(op32,
2501 mkexpr(pre32),
2502 binop(Iop_And8,
2503 binop(Iop_Sub8,
2504 mkexpr(shift_amt), mkU8(1)),
2505 mkU8(31))) );
2507 /* Build the flags thunk. */
2508 setFlags_DEP1_DEP2_shift(op32, res32, res32ss, ty, shift_amt);
2510 /* Narrow the result back down. */
2511 assign( dst1, narrowTo(ty, mkexpr(res32)) );
2513 } /* if (isShift) */
2515 else
2516 if (isRotate) {
2517 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
2518 Bool left = toBool(gregOfRM(modrm) == 0);
2519 IRTemp rot_amt = newTemp(Ity_I8);
2520 IRTemp rot_amt32 = newTemp(Ity_I8);
2521 IRTemp oldFlags = newTemp(Ity_I32);
2523 /* rot_amt = shift_expr & mask */
2524 /* By masking the rotate amount thusly, the IR-level Shl/Shr
2525 expressions never shift beyond the word size and thus remain
2526 well defined. */
2527 assign(rot_amt32, binop(Iop_And8, shift_expr, mkU8(31)));
2529 if (ty == Ity_I32)
2530 assign(rot_amt, mkexpr(rot_amt32));
2531 else
2532 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt32), mkU8(8*sz-1)));
2534 if (left) {
2536 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
2537 assign(dst1,
2538 binop( mkSizedOp(ty,Iop_Or8),
2539 binop( mkSizedOp(ty,Iop_Shl8),
2540 mkexpr(dst0),
2541 mkexpr(rot_amt)
2543 binop( mkSizedOp(ty,Iop_Shr8),
2544 mkexpr(dst0),
2545 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
2549 ccOp += X86G_CC_OP_ROLB;
2551 } else { /* right */
2553 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
2554 assign(dst1,
2555 binop( mkSizedOp(ty,Iop_Or8),
2556 binop( mkSizedOp(ty,Iop_Shr8),
2557 mkexpr(dst0),
2558 mkexpr(rot_amt)
2560 binop( mkSizedOp(ty,Iop_Shl8),
2561 mkexpr(dst0),
2562 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
2566 ccOp += X86G_CC_OP_RORB;
2570 /* dst1 now holds the rotated value. Build flag thunk. We
2571 need the resulting value for this, and the previous flags.
2572 Except don't set it if the rotate count is zero. */
2574 assign(oldFlags, mk_x86g_calculate_eflags_all());
2576 /* rot_amt32 :: Ity_I8. We need to convert it to I1. */
2577 IRTemp rot_amt32b = newTemp(Ity_I1);
2578 assign(rot_amt32b, binop(Iop_CmpNE8, mkexpr(rot_amt32), mkU8(0)) );
2580 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
2581 stmt( IRStmt_Put( OFFB_CC_OP,
2582 IRExpr_ITE( mkexpr(rot_amt32b),
2583 mkU32(ccOp),
2584 IRExpr_Get(OFFB_CC_OP,Ity_I32) ) ));
2585 stmt( IRStmt_Put( OFFB_CC_DEP1,
2586 IRExpr_ITE( mkexpr(rot_amt32b),
2587 widenUto32(mkexpr(dst1)),
2588 IRExpr_Get(OFFB_CC_DEP1,Ity_I32) ) ));
2589 stmt( IRStmt_Put( OFFB_CC_DEP2,
2590 IRExpr_ITE( mkexpr(rot_amt32b),
2591 mkU32(0),
2592 IRExpr_Get(OFFB_CC_DEP2,Ity_I32) ) ));
2593 stmt( IRStmt_Put( OFFB_CC_NDEP,
2594 IRExpr_ITE( mkexpr(rot_amt32b),
2595 mkexpr(oldFlags),
2596 IRExpr_Get(OFFB_CC_NDEP,Ity_I32) ) ));
2597 } /* if (isRotate) */
2599 /* Save result, and finish up. */
2600 if (epartIsReg(modrm)) {
2601 putIReg(sz, eregOfRM(modrm), mkexpr(dst1));
2602 if (vex_traceflags & VEX_TRACE_FE) {
2603 vex_printf("%s%c ",
2604 nameGrp2(gregOfRM(modrm)), nameISize(sz) );
2605 if (shift_expr_txt)
2606 vex_printf("%s", shift_expr_txt);
2607 else
2608 ppIRExpr(shift_expr);
2609 vex_printf(", %s\n", nameIReg(sz,eregOfRM(modrm)));
2611 } else {
2612 storeLE(mkexpr(addr), mkexpr(dst1));
2613 if (vex_traceflags & VEX_TRACE_FE) {
2614 vex_printf("%s%c ",
2615 nameGrp2(gregOfRM(modrm)), nameISize(sz) );
2616 if (shift_expr_txt)
2617 vex_printf("%s", shift_expr_txt);
2618 else
2619 ppIRExpr(shift_expr);
2620 vex_printf(", %s\n", dis_buf);
2623 return delta;
2627 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
2628 static
2629 UInt dis_Grp8_Imm ( UChar sorb,
2630 Bool locked,
2631 Int delta, UChar modrm,
2632 Int am_sz, Int sz, UInt src_val,
2633 Bool* decode_OK )
2635 /* src_val denotes a d8.
2636 And delta on entry points at the modrm byte. */
2638 IRType ty = szToITy(sz);
2639 IRTemp t2 = newTemp(Ity_I32);
2640 IRTemp t2m = newTemp(Ity_I32);
2641 IRTemp t_addr = IRTemp_INVALID;
2642 HChar dis_buf[50];
2643 UInt mask;
2645 /* we're optimists :-) */
2646 *decode_OK = True;
2648 /* Limit src_val -- the bit offset -- to something within a word.
2649 The Intel docs say that literal offsets larger than a word are
2650 masked in this way. */
2651 switch (sz) {
2652 case 2: src_val &= 15; break;
2653 case 4: src_val &= 31; break;
2654 default: *decode_OK = False; return delta;
2657 /* Invent a mask suitable for the operation. */
2658 switch (gregOfRM(modrm)) {
2659 case 4: /* BT */ mask = 0; break;
2660 case 5: /* BTS */ mask = 1 << src_val; break;
2661 case 6: /* BTR */ mask = ~(1 << src_val); break;
2662 case 7: /* BTC */ mask = 1 << src_val; break;
2663 /* If this needs to be extended, probably simplest to make a
2664 new function to handle the other cases (0 .. 3). The
2665 Intel docs do however not indicate any use for 0 .. 3, so
2666 we don't expect this to happen. */
2667 default: *decode_OK = False; return delta;
2670 /* Fetch the value to be tested and modified into t2, which is
2671 32-bits wide regardless of sz. */
2672 if (epartIsReg(modrm)) {
2673 vassert(am_sz == 1);
2674 assign( t2, widenUto32(getIReg(sz, eregOfRM(modrm))) );
2675 delta += (am_sz + 1);
2676 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz),
2677 src_val, nameIReg(sz,eregOfRM(modrm)));
2678 } else {
2679 Int len;
2680 t_addr = disAMode ( &len, sorb, delta, dis_buf);
2681 delta += (len+1);
2682 assign( t2, widenUto32(loadLE(ty, mkexpr(t_addr))) );
2683 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz),
2684 src_val, dis_buf);
2687 /* Compute the new value into t2m, if non-BT. */
2688 switch (gregOfRM(modrm)) {
2689 case 4: /* BT */
2690 break;
2691 case 5: /* BTS */
2692 assign( t2m, binop(Iop_Or32, mkU32(mask), mkexpr(t2)) );
2693 break;
2694 case 6: /* BTR */
2695 assign( t2m, binop(Iop_And32, mkU32(mask), mkexpr(t2)) );
2696 break;
2697 case 7: /* BTC */
2698 assign( t2m, binop(Iop_Xor32, mkU32(mask), mkexpr(t2)) );
2699 break;
2700 default:
2701 /*NOTREACHED*/ /*the previous switch guards this*/
2702 vassert(0);
2705 /* Write the result back, if non-BT. If the CAS fails then we
2706 side-exit from the trace at this point, and so the flag state is
2707 not affected. This is of course as required. */
2708 if (gregOfRM(modrm) != 4 /* BT */) {
2709 if (epartIsReg(modrm)) {
2710 putIReg(sz, eregOfRM(modrm), narrowTo(ty, mkexpr(t2m)));
2711 } else {
2712 if (locked) {
2713 casLE( mkexpr(t_addr),
2714 narrowTo(ty, mkexpr(t2))/*expd*/,
2715 narrowTo(ty, mkexpr(t2m))/*new*/,
2716 guest_EIP_curr_instr );
2717 } else {
2718 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
2723 /* Copy relevant bit from t2 into the carry flag. */
2724 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
2725 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
2726 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
2727 stmt( IRStmt_Put(
2728 OFFB_CC_DEP1,
2729 binop(Iop_And32,
2730 binop(Iop_Shr32, mkexpr(t2), mkU8(src_val)),
2731 mkU32(1))
2733 /* Set NDEP even though it isn't used. This makes redundant-PUT
2734 elimination of previous stores to this field work better. */
2735 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
2737 return delta;
2741 /* Signed/unsigned widening multiply. Generate IR to multiply the
2742 value in EAX/AX/AL by the given IRTemp, and park the result in
2743 EDX:EAX/DX:AX/AX.
2745 static void codegen_mulL_A_D ( Int sz, Bool syned,
2746 IRTemp tmp, const HChar* tmp_txt )
2748 IRType ty = szToITy(sz);
2749 IRTemp t1 = newTemp(ty);
2751 assign( t1, getIReg(sz, R_EAX) );
2753 switch (ty) {
2754 case Ity_I32: {
2755 IRTemp res64 = newTemp(Ity_I64);
2756 IRTemp resHi = newTemp(Ity_I32);
2757 IRTemp resLo = newTemp(Ity_I32);
2758 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32;
2759 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB;
2760 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp );
2761 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
2762 assign( resHi, unop(Iop_64HIto32,mkexpr(res64)));
2763 assign( resLo, unop(Iop_64to32,mkexpr(res64)));
2764 putIReg(4, R_EDX, mkexpr(resHi));
2765 putIReg(4, R_EAX, mkexpr(resLo));
2766 break;
2768 case Ity_I16: {
2769 IRTemp res32 = newTemp(Ity_I32);
2770 IRTemp resHi = newTemp(Ity_I16);
2771 IRTemp resLo = newTemp(Ity_I16);
2772 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16;
2773 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB;
2774 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp );
2775 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
2776 assign( resHi, unop(Iop_32HIto16,mkexpr(res32)));
2777 assign( resLo, unop(Iop_32to16,mkexpr(res32)));
2778 putIReg(2, R_EDX, mkexpr(resHi));
2779 putIReg(2, R_EAX, mkexpr(resLo));
2780 break;
2782 case Ity_I8: {
2783 IRTemp res16 = newTemp(Ity_I16);
2784 IRTemp resHi = newTemp(Ity_I8);
2785 IRTemp resLo = newTemp(Ity_I8);
2786 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8;
2787 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB;
2788 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp );
2789 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
2790 assign( resHi, unop(Iop_16HIto8,mkexpr(res16)));
2791 assign( resLo, unop(Iop_16to8,mkexpr(res16)));
2792 putIReg(2, R_EAX, mkexpr(res16));
2793 break;
2795 default:
2796 vpanic("codegen_mulL_A_D(x86)");
2798 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt);
2802 /* Group 3 extended opcodes. */
2803 static
2804 UInt dis_Grp3 ( UChar sorb, Bool locked, Int sz, Int delta, Bool* decode_OK )
2806 UInt d32;
2807 UChar modrm;
2808 HChar dis_buf[50];
2809 Int len;
2810 IRTemp addr;
2811 IRType ty = szToITy(sz);
2812 IRTemp t1 = newTemp(ty);
2813 IRTemp dst1, src, dst0;
2815 *decode_OK = True; /* may change this later */
2817 modrm = getIByte(delta);
2819 if (locked && (gregOfRM(modrm) != 2 && gregOfRM(modrm) != 3)) {
2820 /* LOCK prefix only allowed with not and neg subopcodes */
2821 *decode_OK = False;
2822 return delta;
2825 if (epartIsReg(modrm)) {
2826 switch (gregOfRM(modrm)) {
2827 case 0: { /* TEST */
2828 delta++; d32 = getUDisp(sz, delta); delta += sz;
2829 dst1 = newTemp(ty);
2830 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
2831 getIReg(sz,eregOfRM(modrm)),
2832 mkU(ty,d32)));
2833 setFlags_DEP1( Iop_And8, dst1, ty );
2834 DIP("test%c $0x%x, %s\n", nameISize(sz), d32,
2835 nameIReg(sz, eregOfRM(modrm)));
2836 break;
2838 case 1: /* UNDEFINED */
2839 /* The Intel docs imply this insn is undefined and binutils
2840 agrees. Unfortunately Core 2 will run it (with who
2841 knows what result?) sandpile.org reckons it's an alias
2842 for case 0. We play safe. */
2843 *decode_OK = False;
2844 break;
2845 case 2: /* NOT */
2846 delta++;
2847 putIReg(sz, eregOfRM(modrm),
2848 unop(mkSizedOp(ty,Iop_Not8),
2849 getIReg(sz, eregOfRM(modrm))));
2850 DIP("not%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
2851 break;
2852 case 3: /* NEG */
2853 delta++;
2854 dst0 = newTemp(ty);
2855 src = newTemp(ty);
2856 dst1 = newTemp(ty);
2857 assign(dst0, mkU(ty,0));
2858 assign(src, getIReg(sz,eregOfRM(modrm)));
2859 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), mkexpr(src)));
2860 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
2861 putIReg(sz, eregOfRM(modrm), mkexpr(dst1));
2862 DIP("neg%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
2863 break;
2864 case 4: /* MUL (unsigned widening) */
2865 delta++;
2866 src = newTemp(ty);
2867 assign(src, getIReg(sz,eregOfRM(modrm)));
2868 codegen_mulL_A_D ( sz, False, src, nameIReg(sz,eregOfRM(modrm)) );
2869 break;
2870 case 5: /* IMUL (signed widening) */
2871 delta++;
2872 src = newTemp(ty);
2873 assign(src, getIReg(sz,eregOfRM(modrm)));
2874 codegen_mulL_A_D ( sz, True, src, nameIReg(sz,eregOfRM(modrm)) );
2875 break;
2876 case 6: /* DIV */
2877 delta++;
2878 assign( t1, getIReg(sz, eregOfRM(modrm)) );
2879 codegen_div ( sz, t1, False );
2880 DIP("div%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
2881 break;
2882 case 7: /* IDIV */
2883 delta++;
2884 assign( t1, getIReg(sz, eregOfRM(modrm)) );
2885 codegen_div ( sz, t1, True );
2886 DIP("idiv%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
2887 break;
2888 default:
2889 /* This can't happen - gregOfRM should return 0 .. 7 only */
2890 vpanic("Grp3(x86)");
2892 } else {
2893 addr = disAMode ( &len, sorb, delta, dis_buf );
2894 t1 = newTemp(ty);
2895 delta += len;
2896 assign(t1, loadLE(ty,mkexpr(addr)));
2897 switch (gregOfRM(modrm)) {
2898 case 0: { /* TEST */
2899 d32 = getUDisp(sz, delta); delta += sz;
2900 dst1 = newTemp(ty);
2901 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
2902 mkexpr(t1), mkU(ty,d32)));
2903 setFlags_DEP1( Iop_And8, dst1, ty );
2904 DIP("test%c $0x%x, %s\n", nameISize(sz), d32, dis_buf);
2905 break;
2907 case 1: /* UNDEFINED */
2908 /* See comment above on R case */
2909 *decode_OK = False;
2910 break;
2911 case 2: /* NOT */
2912 dst1 = newTemp(ty);
2913 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
2914 if (locked) {
2915 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
2916 guest_EIP_curr_instr );
2917 } else {
2918 storeLE( mkexpr(addr), mkexpr(dst1) );
2920 DIP("not%c %s\n", nameISize(sz), dis_buf);
2921 break;
2922 case 3: /* NEG */
2923 dst0 = newTemp(ty);
2924 src = newTemp(ty);
2925 dst1 = newTemp(ty);
2926 assign(dst0, mkU(ty,0));
2927 assign(src, mkexpr(t1));
2928 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8),
2929 mkexpr(dst0), mkexpr(src)));
2930 if (locked) {
2931 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
2932 guest_EIP_curr_instr );
2933 } else {
2934 storeLE( mkexpr(addr), mkexpr(dst1) );
2936 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
2937 DIP("neg%c %s\n", nameISize(sz), dis_buf);
2938 break;
2939 case 4: /* MUL */
2940 codegen_mulL_A_D ( sz, False, t1, dis_buf );
2941 break;
2942 case 5: /* IMUL */
2943 codegen_mulL_A_D ( sz, True, t1, dis_buf );
2944 break;
2945 case 6: /* DIV */
2946 codegen_div ( sz, t1, False );
2947 DIP("div%c %s\n", nameISize(sz), dis_buf);
2948 break;
2949 case 7: /* IDIV */
2950 codegen_div ( sz, t1, True );
2951 DIP("idiv%c %s\n", nameISize(sz), dis_buf);
2952 break;
2953 default:
2954 /* This can't happen - gregOfRM should return 0 .. 7 only */
2955 vpanic("Grp3(x86)");
2958 return delta;
2962 /* Group 4 extended opcodes. */
2963 static
2964 UInt dis_Grp4 ( UChar sorb, Bool locked, Int delta, Bool* decode_OK )
2966 Int alen;
2967 UChar modrm;
2968 HChar dis_buf[50];
2969 IRType ty = Ity_I8;
2970 IRTemp t1 = newTemp(ty);
2971 IRTemp t2 = newTemp(ty);
2973 *decode_OK = True;
2975 modrm = getIByte(delta);
2977 if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) {
2978 /* LOCK prefix only allowed with inc and dec subopcodes */
2979 *decode_OK = False;
2980 return delta;
2983 if (epartIsReg(modrm)) {
2984 assign(t1, getIReg(1, eregOfRM(modrm)));
2985 switch (gregOfRM(modrm)) {
2986 case 0: /* INC */
2987 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
2988 putIReg(1, eregOfRM(modrm), mkexpr(t2));
2989 setFlags_INC_DEC( True, t2, ty );
2990 break;
2991 case 1: /* DEC */
2992 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
2993 putIReg(1, eregOfRM(modrm), mkexpr(t2));
2994 setFlags_INC_DEC( False, t2, ty );
2995 break;
2996 default:
2997 *decode_OK = False;
2998 return delta;
3000 delta++;
3001 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)),
3002 nameIReg(1, eregOfRM(modrm)));
3003 } else {
3004 IRTemp addr = disAMode ( &alen, sorb, delta, dis_buf );
3005 assign( t1, loadLE(ty, mkexpr(addr)) );
3006 switch (gregOfRM(modrm)) {
3007 case 0: /* INC */
3008 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
3009 if (locked) {
3010 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
3011 guest_EIP_curr_instr );
3012 } else {
3013 storeLE( mkexpr(addr), mkexpr(t2) );
3015 setFlags_INC_DEC( True, t2, ty );
3016 break;
3017 case 1: /* DEC */
3018 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
3019 if (locked) {
3020 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
3021 guest_EIP_curr_instr );
3022 } else {
3023 storeLE( mkexpr(addr), mkexpr(t2) );
3025 setFlags_INC_DEC( False, t2, ty );
3026 break;
3027 default:
3028 *decode_OK = False;
3029 return delta;
3031 delta += alen;
3032 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)), dis_buf);
3034 return delta;
3038 /* Group 5 extended opcodes. */
3039 static
3040 UInt dis_Grp5 ( UChar sorb, Bool locked, Int sz, Int delta,
3041 /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK )
3043 Int len;
3044 UChar modrm;
3045 HChar dis_buf[50];
3046 IRTemp addr = IRTemp_INVALID;
3047 IRType ty = szToITy(sz);
3048 IRTemp t1 = newTemp(ty);
3049 IRTemp t2 = IRTemp_INVALID;
3051 *decode_OK = True;
3053 modrm = getIByte(delta);
3055 if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) {
3056 /* LOCK prefix only allowed with inc and dec subopcodes */
3057 *decode_OK = False;
3058 return delta;
3061 if (epartIsReg(modrm)) {
3062 assign(t1, getIReg(sz,eregOfRM(modrm)));
3063 switch (gregOfRM(modrm)) {
3064 case 0: /* INC */
3065 vassert(sz == 2 || sz == 4);
3066 t2 = newTemp(ty);
3067 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
3068 mkexpr(t1), mkU(ty,1)));
3069 setFlags_INC_DEC( True, t2, ty );
3070 putIReg(sz,eregOfRM(modrm),mkexpr(t2));
3071 break;
3072 case 1: /* DEC */
3073 vassert(sz == 2 || sz == 4);
3074 t2 = newTemp(ty);
3075 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
3076 mkexpr(t1), mkU(ty,1)));
3077 setFlags_INC_DEC( False, t2, ty );
3078 putIReg(sz,eregOfRM(modrm),mkexpr(t2));
3079 break;
3080 case 2: /* call Ev */
3081 vassert(sz == 4);
3082 t2 = newTemp(Ity_I32);
3083 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
3084 putIReg(4, R_ESP, mkexpr(t2));
3085 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+1));
3086 jmp_treg(dres, Ijk_Call, t1);
3087 vassert(dres->whatNext == Dis_StopHere);
3088 break;
3089 case 4: /* jmp Ev */
3090 vassert(sz == 4);
3091 jmp_treg(dres, Ijk_Boring, t1);
3092 vassert(dres->whatNext == Dis_StopHere);
3093 break;
3094 case 6: /* PUSH Ev */
3095 vassert(sz == 4 || sz == 2);
3096 t2 = newTemp(Ity_I32);
3097 assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
3098 putIReg(4, R_ESP, mkexpr(t2) );
3099 storeLE( mkexpr(t2), mkexpr(t1) );
3100 break;
3101 default:
3102 *decode_OK = False;
3103 return delta;
3105 delta++;
3106 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)),
3107 nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
3108 } else {
3109 addr = disAMode ( &len, sorb, delta, dis_buf );
3110 assign(t1, loadLE(ty,mkexpr(addr)));
3111 switch (gregOfRM(modrm)) {
3112 case 0: /* INC */
3113 t2 = newTemp(ty);
3114 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
3115 mkexpr(t1), mkU(ty,1)));
3116 if (locked) {
3117 casLE( mkexpr(addr),
3118 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
3119 } else {
3120 storeLE(mkexpr(addr),mkexpr(t2));
3122 setFlags_INC_DEC( True, t2, ty );
3123 break;
3124 case 1: /* DEC */
3125 t2 = newTemp(ty);
3126 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
3127 mkexpr(t1), mkU(ty,1)));
3128 if (locked) {
3129 casLE( mkexpr(addr),
3130 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
3131 } else {
3132 storeLE(mkexpr(addr),mkexpr(t2));
3134 setFlags_INC_DEC( False, t2, ty );
3135 break;
3136 case 2: /* call Ev */
3137 vassert(sz == 4);
3138 t2 = newTemp(Ity_I32);
3139 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
3140 putIReg(4, R_ESP, mkexpr(t2));
3141 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+len));
3142 jmp_treg(dres, Ijk_Call, t1);
3143 vassert(dres->whatNext == Dis_StopHere);
3144 break;
3145 case 4: /* JMP Ev */
3146 vassert(sz == 4);
3147 jmp_treg(dres, Ijk_Boring, t1);
3148 vassert(dres->whatNext == Dis_StopHere);
3149 break;
3150 case 6: /* PUSH Ev */
3151 vassert(sz == 4 || sz == 2);
3152 t2 = newTemp(Ity_I32);
3153 assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
3154 putIReg(4, R_ESP, mkexpr(t2) );
3155 storeLE( mkexpr(t2), mkexpr(t1) );
3156 break;
3157 default:
3158 *decode_OK = False;
3159 return delta;
3161 delta += len;
3162 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)),
3163 nameISize(sz), dis_buf);
3165 return delta;
3169 /*------------------------------------------------------------*/
3170 /*--- Disassembling string ops (including REP prefixes) ---*/
3171 /*------------------------------------------------------------*/
3173 /* Code shared by all the string ops */
3174 static
3175 void dis_string_op_increment(Int sz, IRTemp t_inc)
3177 if (sz == 4 || sz == 2) {
3178 assign( t_inc,
3179 binop(Iop_Shl32, IRExpr_Get( OFFB_DFLAG, Ity_I32 ),
3180 mkU8(sz/2) ) );
3181 } else {
3182 assign( t_inc,
3183 IRExpr_Get( OFFB_DFLAG, Ity_I32 ) );
3187 static
3188 void dis_string_op( void (*dis_OP)( Int, IRTemp ),
3189 Int sz, const HChar* name, UChar sorb )
3191 IRTemp t_inc = newTemp(Ity_I32);
3192 vassert(sorb == 0); /* hmm. so what was the point of passing it in? */
3193 dis_string_op_increment(sz, t_inc);
3194 dis_OP( sz, t_inc );
3195 DIP("%s%c\n", name, nameISize(sz));
3198 static
3199 void dis_MOVS ( Int sz, IRTemp t_inc )
3201 IRType ty = szToITy(sz);
3202 IRTemp td = newTemp(Ity_I32); /* EDI */
3203 IRTemp ts = newTemp(Ity_I32); /* ESI */
3205 assign( td, getIReg(4, R_EDI) );
3206 assign( ts, getIReg(4, R_ESI) );
3208 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) );
3210 putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3211 putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) );
3214 static
3215 void dis_LODS ( Int sz, IRTemp t_inc )
3217 IRType ty = szToITy(sz);
3218 IRTemp ts = newTemp(Ity_I32); /* ESI */
3220 assign( ts, getIReg(4, R_ESI) );
3222 putIReg( sz, R_EAX, loadLE(ty, mkexpr(ts)) );
3224 putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) );
3227 static
3228 void dis_STOS ( Int sz, IRTemp t_inc )
3230 IRType ty = szToITy(sz);
3231 IRTemp ta = newTemp(ty); /* EAX */
3232 IRTemp td = newTemp(Ity_I32); /* EDI */
3234 assign( ta, getIReg(sz, R_EAX) );
3235 assign( td, getIReg(4, R_EDI) );
3237 storeLE( mkexpr(td), mkexpr(ta) );
3239 putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3242 static
3243 void dis_CMPS ( Int sz, IRTemp t_inc )
3245 IRType ty = szToITy(sz);
3246 IRTemp tdv = newTemp(ty); /* (EDI) */
3247 IRTemp tsv = newTemp(ty); /* (ESI) */
3248 IRTemp td = newTemp(Ity_I32); /* EDI */
3249 IRTemp ts = newTemp(Ity_I32); /* ESI */
3251 assign( td, getIReg(4, R_EDI) );
3252 assign( ts, getIReg(4, R_ESI) );
3254 assign( tdv, loadLE(ty,mkexpr(td)) );
3255 assign( tsv, loadLE(ty,mkexpr(ts)) );
3257 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty );
3259 putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3260 putIReg(4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) );
3263 static
3264 void dis_SCAS ( Int sz, IRTemp t_inc )
3266 IRType ty = szToITy(sz);
3267 IRTemp ta = newTemp(ty); /* EAX */
3268 IRTemp td = newTemp(Ity_I32); /* EDI */
3269 IRTemp tdv = newTemp(ty); /* (EDI) */
3271 assign( ta, getIReg(sz, R_EAX) );
3272 assign( td, getIReg(4, R_EDI) );
3274 assign( tdv, loadLE(ty,mkexpr(td)) );
3275 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty );
3277 putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3281 /* Wrap the appropriate string op inside a REP/REPE/REPNE.
3282 We assume the insn is the last one in the basic block, and so emit a jump
3283 to the next insn, rather than just falling through. */
3284 static
3285 void dis_REP_op ( /*MOD*/DisResult* dres,
3286 X86Condcode cond,
3287 void (*dis_OP)(Int, IRTemp),
3288 Int sz, Addr32 eip, Addr32 eip_next, const HChar* name )
3290 IRTemp t_inc = newTemp(Ity_I32);
3291 IRTemp tc = newTemp(Ity_I32); /* ECX */
3293 assign( tc, getIReg(4,R_ECX) );
3295 stmt( IRStmt_Exit( binop(Iop_CmpEQ32,mkexpr(tc),mkU32(0)),
3296 Ijk_Boring,
3297 IRConst_U32(eip_next), OFFB_EIP ) );
3299 putIReg(4, R_ECX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) );
3301 dis_string_op_increment(sz, t_inc);
3302 dis_OP (sz, t_inc);
3304 if (cond == X86CondAlways) {
3305 jmp_lit(dres, Ijk_Boring, eip);
3306 vassert(dres->whatNext == Dis_StopHere);
3307 } else {
3308 stmt( IRStmt_Exit( mk_x86g_calculate_condition(cond),
3309 Ijk_Boring,
3310 IRConst_U32(eip), OFFB_EIP ) );
3311 jmp_lit(dres, Ijk_Boring, eip_next);
3312 vassert(dres->whatNext == Dis_StopHere);
3314 DIP("%s%c\n", name, nameISize(sz));
3318 /*------------------------------------------------------------*/
3319 /*--- Arithmetic, etc. ---*/
3320 /*------------------------------------------------------------*/
3322 /* IMUL E, G. Supplied eip points to the modR/M byte. */
3323 static
3324 UInt dis_mul_E_G ( UChar sorb,
3325 Int size,
3326 Int delta0 )
3328 Int alen;
3329 HChar dis_buf[50];
3330 UChar rm = getIByte(delta0);
3331 IRType ty = szToITy(size);
3332 IRTemp te = newTemp(ty);
3333 IRTemp tg = newTemp(ty);
3334 IRTemp resLo = newTemp(ty);
3336 assign( tg, getIReg(size, gregOfRM(rm)) );
3337 if (epartIsReg(rm)) {
3338 assign( te, getIReg(size, eregOfRM(rm)) );
3339 } else {
3340 IRTemp addr = disAMode( &alen, sorb, delta0, dis_buf );
3341 assign( te, loadLE(ty,mkexpr(addr)) );
3344 setFlags_MUL ( ty, te, tg, X86G_CC_OP_SMULB );
3346 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) );
3348 putIReg(size, gregOfRM(rm), mkexpr(resLo) );
3350 if (epartIsReg(rm)) {
3351 DIP("imul%c %s, %s\n", nameISize(size),
3352 nameIReg(size,eregOfRM(rm)),
3353 nameIReg(size,gregOfRM(rm)));
3354 return 1+delta0;
3355 } else {
3356 DIP("imul%c %s, %s\n", nameISize(size),
3357 dis_buf, nameIReg(size,gregOfRM(rm)));
3358 return alen+delta0;
3363 /* IMUL I * E -> G. Supplied eip points to the modR/M byte. */
3364 static
3365 UInt dis_imul_I_E_G ( UChar sorb,
3366 Int size,
3367 Int delta,
3368 Int litsize )
3370 Int d32, alen;
3371 HChar dis_buf[50];
3372 UChar rm = getIByte(delta);
3373 IRType ty = szToITy(size);
3374 IRTemp te = newTemp(ty);
3375 IRTemp tl = newTemp(ty);
3376 IRTemp resLo = newTemp(ty);
3378 vassert(size == 1 || size == 2 || size == 4);
3380 if (epartIsReg(rm)) {
3381 assign(te, getIReg(size, eregOfRM(rm)));
3382 delta++;
3383 } else {
3384 IRTemp addr = disAMode( &alen, sorb, delta, dis_buf );
3385 assign(te, loadLE(ty, mkexpr(addr)));
3386 delta += alen;
3388 d32 = getSDisp(litsize,delta);
3389 delta += litsize;
3391 if (size == 1) d32 &= 0xFF;
3392 if (size == 2) d32 &= 0xFFFF;
3394 assign(tl, mkU(ty,d32));
3396 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) ));
3398 setFlags_MUL ( ty, te, tl, X86G_CC_OP_SMULB );
3400 putIReg(size, gregOfRM(rm), mkexpr(resLo));
3402 DIP("imul %d, %s, %s\n", d32,
3403 ( epartIsReg(rm) ? nameIReg(size,eregOfRM(rm)) : dis_buf ),
3404 nameIReg(size,gregOfRM(rm)) );
3405 return delta;
3409 /* Generate an IR sequence to do a count-leading-zeroes operation on
3410 the supplied IRTemp, and return a new IRTemp holding the result.
3411 'ty' may be Ity_I16 or Ity_I32 only. In the case where the
3412 argument is zero, return the number of bits in the word (the
3413 natural semantics). */
3414 static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
3416 vassert(ty == Ity_I32 || ty == Ity_I16);
3418 IRTemp src32 = newTemp(Ity_I32);
3419 assign(src32, widenUto32( mkexpr(src) ));
3421 IRTemp src32x = newTemp(Ity_I32);
3422 assign(src32x,
3423 binop(Iop_Shl32, mkexpr(src32),
3424 mkU8(32 - 8 * sizeofIRType(ty))));
3426 // Clz32 has undefined semantics when its input is zero, so
3427 // special-case around that.
3428 IRTemp res32 = newTemp(Ity_I32);
3429 assign(res32,
3430 IRExpr_ITE(
3431 binop(Iop_CmpEQ32, mkexpr(src32x), mkU32(0)),
3432 mkU32(8 * sizeofIRType(ty)),
3433 unop(Iop_Clz32, mkexpr(src32x))
3436 IRTemp res = newTemp(ty);
3437 assign(res, narrowTo(ty, mkexpr(res32)));
3438 return res;
3442 /*------------------------------------------------------------*/
3443 /*--- ---*/
3444 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/
3445 /*--- ---*/
3446 /*------------------------------------------------------------*/
3448 /* --- Helper functions for dealing with the register stack. --- */
3450 /* --- Set the emulation-warning pseudo-register. --- */
3452 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ )
3454 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
3455 stmt( IRStmt_Put( OFFB_EMNOTE, e ) );
3458 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
3460 static IRExpr* mkQNaN64 ( void )
3462 /* QNaN is 0 2047 1 0(51times)
3463 == 0b 11111111111b 1 0(51times)
3464 == 0x7FF8 0000 0000 0000
3466 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL));
3469 /* --------- Get/put the top-of-stack pointer. --------- */
3471 static IRExpr* get_ftop ( void )
3473 return IRExpr_Get( OFFB_FTOP, Ity_I32 );
3476 static void put_ftop ( IRExpr* e )
3478 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
3479 stmt( IRStmt_Put( OFFB_FTOP, e ) );
3482 /* --------- Get/put the C3210 bits. --------- */
3484 static IRExpr* get_C3210 ( void )
3486 return IRExpr_Get( OFFB_FC3210, Ity_I32 );
3489 static void put_C3210 ( IRExpr* e )
3491 stmt( IRStmt_Put( OFFB_FC3210, e ) );
3494 /* --------- Get/put the FPU rounding mode. --------- */
3495 static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
3497 return IRExpr_Get( OFFB_FPROUND, Ity_I32 );
3500 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e )
3502 stmt( IRStmt_Put( OFFB_FPROUND, e ) );
3506 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
3507 /* Produces a value in 0 .. 3, which is encoded as per the type
3508 IRRoundingMode. Since the guest_FPROUND value is also encoded as
3509 per IRRoundingMode, we merely need to get it and mask it for
3510 safety.
3512 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
3514 return binop( Iop_And32, get_fpround(), mkU32(3) );
3517 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
3519 return mkU32(Irrm_NEAREST);
3523 /* --------- Get/set FP register tag bytes. --------- */
3525 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
3527 static void put_ST_TAG ( Int i, IRExpr* value )
3529 IRRegArray* descr;
3530 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8);
3531 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
3532 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
3535 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
3536 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
3538 static IRExpr* get_ST_TAG ( Int i )
3540 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
3541 return IRExpr_GetI( descr, get_ftop(), i );
3545 /* --------- Get/set FP registers. --------- */
3547 /* Given i, and some expression e, emit 'ST(i) = e' and set the
3548 register's tag to indicate the register is full. The previous
3549 state of the register is not checked. */
3551 static void put_ST_UNCHECKED ( Int i, IRExpr* value )
3553 IRRegArray* descr;
3554 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64);
3555 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
3556 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
3557 /* Mark the register as in-use. */
3558 put_ST_TAG(i, mkU8(1));
3561 /* Given i, and some expression e, emit
3562 ST(i) = is_full(i) ? NaN : e
3563 and set the tag accordingly.
3566 static void put_ST ( Int i, IRExpr* value )
3568 put_ST_UNCHECKED(
3570 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)),
3571 /* non-0 means full */
3572 mkQNaN64(),
3573 /* 0 means empty */
3574 value
3580 /* Given i, generate an expression yielding 'ST(i)'. */
3582 static IRExpr* get_ST_UNCHECKED ( Int i )
3584 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
3585 return IRExpr_GetI( descr, get_ftop(), i );
3589 /* Given i, generate an expression yielding
3590 is_full(i) ? ST(i) : NaN
3593 static IRExpr* get_ST ( Int i )
3595 return
3596 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)),
3597 /* non-0 means full */
3598 get_ST_UNCHECKED(i),
3599 /* 0 means empty */
3600 mkQNaN64());
3604 /* Given i, and some expression e, and a condition cond, generate IR
3605 which has the same effect as put_ST(i,e) when cond is true and has
3606 no effect when cond is false. Given the lack of proper
3607 if-then-else in the IR, this is pretty tricky.
3610 static void maybe_put_ST ( IRTemp cond, Int i, IRExpr* value )
3612 // new_tag = if cond then FULL else old_tag
3613 // new_val = if cond then (if old_tag==FULL then NaN else val)
3614 // else old_val
3616 IRTemp old_tag = newTemp(Ity_I8);
3617 assign(old_tag, get_ST_TAG(i));
3618 IRTemp new_tag = newTemp(Ity_I8);
3619 assign(new_tag,
3620 IRExpr_ITE(mkexpr(cond), mkU8(1)/*FULL*/, mkexpr(old_tag)));
3622 IRTemp old_val = newTemp(Ity_F64);
3623 assign(old_val, get_ST_UNCHECKED(i));
3624 IRTemp new_val = newTemp(Ity_F64);
3625 assign(new_val,
3626 IRExpr_ITE(mkexpr(cond),
3627 IRExpr_ITE(binop(Iop_CmpNE8, mkexpr(old_tag), mkU8(0)),
3628 /* non-0 means full */
3629 mkQNaN64(),
3630 /* 0 means empty */
3631 value),
3632 mkexpr(old_val)));
3634 put_ST_UNCHECKED(i, mkexpr(new_val));
3635 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So
3636 // now set it to new_tag instead.
3637 put_ST_TAG(i, mkexpr(new_tag));
3640 /* Adjust FTOP downwards by one register. */
3642 static void fp_push ( void )
3644 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) );
3647 /* Adjust FTOP downwards by one register when COND is 1:I1. Else
3648 don't change it. */
3650 static void maybe_fp_push ( IRTemp cond )
3652 put_ftop( binop(Iop_Sub32, get_ftop(), unop(Iop_1Uto32,mkexpr(cond))) );
3655 /* Adjust FTOP upwards by one register, and mark the vacated register
3656 as empty. */
3658 static void fp_pop ( void )
3660 put_ST_TAG(0, mkU8(0));
3661 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
3664 /* Set the C2 bit of the FPU status register to e[0]. Assumes that
3665 e[31:1] == 0.
3667 static void set_C2 ( IRExpr* e )
3669 IRExpr* cleared = binop(Iop_And32, get_C3210(), mkU32(~X86G_FC_MASK_C2));
3670 put_C3210( binop(Iop_Or32,
3671 cleared,
3672 binop(Iop_Shl32, e, mkU8(X86G_FC_SHIFT_C2))) );
3675 /* Generate code to check that abs(d64) < 2^63 and is finite. This is
3676 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The
3677 test is simple, but the derivation of it is not so simple.
3679 The exponent field for an IEEE754 double is 11 bits. That means it
3680 can take values 0 through 0x7FF. If the exponent has value 0x7FF,
3681 the number is either a NaN or an Infinity and so is not finite.
3682 Furthermore, a finite value of exactly 2^63 is the smallest value
3683 that has exponent value 0x43E. Hence, what we need to do is
3684 extract the exponent, ignoring the sign bit and mantissa, and check
3685 it is < 0x43E, or <= 0x43D.
3687 To make this easily applicable to 32- and 64-bit targets, a
3688 roundabout approach is used. First the number is converted to I64,
3689 then the top 32 bits are taken. Shifting them right by 20 bits
3690 places the sign bit and exponent in the bottom 12 bits. Anding
3691 with 0x7FF gets rid of the sign bit, leaving just the exponent
3692 available for comparison.
3694 static IRTemp math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64 )
3696 IRTemp i64 = newTemp(Ity_I64);
3697 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(d64)) );
3698 IRTemp exponent = newTemp(Ity_I32);
3699 assign(exponent,
3700 binop(Iop_And32,
3701 binop(Iop_Shr32, unop(Iop_64HIto32, mkexpr(i64)), mkU8(20)),
3702 mkU32(0x7FF)));
3703 IRTemp in_range_and_finite = newTemp(Ity_I1);
3704 assign(in_range_and_finite,
3705 binop(Iop_CmpLE32U, mkexpr(exponent), mkU32(0x43D)));
3706 return in_range_and_finite;
3709 /* Invent a plausible-looking FPU status word value:
3710 ((ftop & 7) << 11) | (c3210 & 0x4700)
3712 static IRExpr* get_FPU_sw ( void )
3714 return
3715 unop(Iop_32to16,
3716 binop(Iop_Or32,
3717 binop(Iop_Shl32,
3718 binop(Iop_And32, get_ftop(), mkU32(7)),
3719 mkU8(11)),
3720 binop(Iop_And32, get_C3210(), mkU32(0x4700))
3725 /* ------------------------------------------------------- */
3726 /* Given all that stack-mangling junk, we can now go ahead
3727 and describe FP instructions.
3730 /* ST(0) = ST(0) `op` mem64/32(addr)
3731 Need to check ST(0)'s tag on read, but not on write.
3733 static
3734 void fp_do_op_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf,
3735 IROp op, Bool dbl )
3737 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
3738 if (dbl) {
3739 put_ST_UNCHECKED(0,
3740 triop( op,
3741 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3742 get_ST(0),
3743 loadLE(Ity_F64,mkexpr(addr))
3745 } else {
3746 put_ST_UNCHECKED(0,
3747 triop( op,
3748 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3749 get_ST(0),
3750 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr)))
3756 /* ST(0) = mem64/32(addr) `op` ST(0)
3757 Need to check ST(0)'s tag on read, but not on write.
3759 static
3760 void fp_do_oprev_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf,
3761 IROp op, Bool dbl )
3763 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
3764 if (dbl) {
3765 put_ST_UNCHECKED(0,
3766 triop( op,
3767 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3768 loadLE(Ity_F64,mkexpr(addr)),
3769 get_ST(0)
3771 } else {
3772 put_ST_UNCHECKED(0,
3773 triop( op,
3774 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3775 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))),
3776 get_ST(0)
3782 /* ST(dst) = ST(dst) `op` ST(src).
3783 Check dst and src tags when reading but not on write.
3785 static
3786 void fp_do_op_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
3787 Bool pop_after )
3789 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"",
3790 st_src, st_dst);
3791 put_ST_UNCHECKED(
3792 st_dst,
3793 triop( op,
3794 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3795 get_ST(st_dst),
3796 get_ST(st_src) )
3798 if (pop_after)
3799 fp_pop();
3802 /* ST(dst) = ST(src) `op` ST(dst).
3803 Check dst and src tags when reading but not on write.
3805 static
3806 void fp_do_oprev_ST_ST ( const HChar* op_txt, IROp op, UInt st_src,
3807 UInt st_dst, Bool pop_after )
3809 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"",
3810 st_src, st_dst);
3811 put_ST_UNCHECKED(
3812 st_dst,
3813 triop( op,
3814 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3815 get_ST(st_src),
3816 get_ST(st_dst) )
3818 if (pop_after)
3819 fp_pop();
3822 /* %eflags(Z,P,C) = UCOMI( st(0), st(i) ) */
3823 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after )
3825 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i);
3826 /* This is a bit of a hack (and isn't really right). It sets
3827 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
3828 documentation implies A and S are unchanged.
3830 /* It's also fishy in that it is used both for COMIP and
3831 UCOMIP, and they aren't the same (although similar). */
3832 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
3833 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
3834 stmt( IRStmt_Put( OFFB_CC_DEP1,
3835 binop( Iop_And32,
3836 binop(Iop_CmpF64, get_ST(0), get_ST(i)),
3837 mkU32(0x45)
3838 )));
3839 /* Set NDEP even though it isn't used. This makes redundant-PUT
3840 elimination of previous stores to this field work better. */
3841 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
3842 if (pop_after)
3843 fp_pop();
3847 static
3848 UInt dis_FPU ( Bool* decode_ok, UChar sorb, Int delta )
3850 Int len;
3851 UInt r_src, r_dst;
3852 HChar dis_buf[50];
3853 IRTemp t1, t2;
3855 /* On entry, delta points at the second byte of the insn (the modrm
3856 byte).*/
3857 UChar first_opcode = getIByte(delta-1);
3858 UChar modrm = getIByte(delta+0);
3860 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
3862 if (first_opcode == 0xD8) {
3863 if (modrm < 0xC0) {
3865 /* bits 5,4,3 are an opcode extension, and the modRM also
3866 specifies an address. */
3867 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
3868 delta += len;
3870 switch (gregOfRM(modrm)) {
3872 case 0: /* FADD single-real */
3873 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False );
3874 break;
3876 case 1: /* FMUL single-real */
3877 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False );
3878 break;
3880 case 2: /* FCOM single-real */
3881 DIP("fcoms %s\n", dis_buf);
3882 /* This forces C1 to zero, which isn't right. */
3883 put_C3210(
3884 binop( Iop_And32,
3885 binop(Iop_Shl32,
3886 binop(Iop_CmpF64,
3887 get_ST(0),
3888 unop(Iop_F32toF64,
3889 loadLE(Ity_F32,mkexpr(addr)))),
3890 mkU8(8)),
3891 mkU32(0x4500)
3893 break;
3895 case 3: /* FCOMP single-real */
3896 DIP("fcomps %s\n", dis_buf);
3897 /* This forces C1 to zero, which isn't right. */
3898 put_C3210(
3899 binop( Iop_And32,
3900 binop(Iop_Shl32,
3901 binop(Iop_CmpF64,
3902 get_ST(0),
3903 unop(Iop_F32toF64,
3904 loadLE(Ity_F32,mkexpr(addr)))),
3905 mkU8(8)),
3906 mkU32(0x4500)
3908 fp_pop();
3909 break;
3911 case 4: /* FSUB single-real */
3912 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False );
3913 break;
3915 case 5: /* FSUBR single-real */
3916 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False );
3917 break;
3919 case 6: /* FDIV single-real */
3920 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False );
3921 break;
3923 case 7: /* FDIVR single-real */
3924 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False );
3925 break;
3927 default:
3928 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
3929 vex_printf("first_opcode == 0xD8\n");
3930 goto decode_fail;
3932 } else {
3933 delta++;
3934 switch (modrm) {
3936 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
3937 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False );
3938 break;
3940 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
3941 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False );
3942 break;
3944 /* Dunno if this is right */
3945 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
3946 r_dst = (UInt)modrm - 0xD0;
3947 DIP("fcom %%st(0),%%st(%u)\n", r_dst);
3948 /* This forces C1 to zero, which isn't right. */
3949 put_C3210(
3950 binop( Iop_And32,
3951 binop(Iop_Shl32,
3952 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
3953 mkU8(8)),
3954 mkU32(0x4500)
3956 break;
3958 /* Dunno if this is right */
3959 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
3960 r_dst = (UInt)modrm - 0xD8;
3961 DIP("fcomp %%st(0),%%st(%u)\n", r_dst);
3962 /* This forces C1 to zero, which isn't right. */
3963 put_C3210(
3964 binop( Iop_And32,
3965 binop(Iop_Shl32,
3966 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
3967 mkU8(8)),
3968 mkU32(0x4500)
3970 fp_pop();
3971 break;
3973 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
3974 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False );
3975 break;
3977 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
3978 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False );
3979 break;
3981 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
3982 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False );
3983 break;
3985 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
3986 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False );
3987 break;
3989 default:
3990 goto decode_fail;
3995 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
3996 else
3997 if (first_opcode == 0xD9) {
3998 if (modrm < 0xC0) {
4000 /* bits 5,4,3 are an opcode extension, and the modRM also
4001 specifies an address. */
4002 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4003 delta += len;
4005 switch (gregOfRM(modrm)) {
4007 case 0: /* FLD single-real */
4008 DIP("flds %s\n", dis_buf);
4009 fp_push();
4010 put_ST(0, unop(Iop_F32toF64,
4011 loadLE(Ity_F32, mkexpr(addr))));
4012 break;
4014 case 2: /* FST single-real */
4015 DIP("fsts %s\n", dis_buf);
4016 storeLE(mkexpr(addr),
4017 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
4018 break;
4020 case 3: /* FSTP single-real */
4021 DIP("fstps %s\n", dis_buf);
4022 storeLE(mkexpr(addr),
4023 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
4024 fp_pop();
4025 break;
4027 case 4: { /* FLDENV m28 */
4028 /* Uses dirty helper:
4029 VexEmNote x86g_do_FLDENV ( VexGuestX86State*, HWord ) */
4030 IRTemp ew = newTemp(Ity_I32);
4031 IRDirty* d = unsafeIRDirty_0_N (
4032 0/*regparms*/,
4033 "x86g_dirtyhelper_FLDENV",
4034 &x86g_dirtyhelper_FLDENV,
4035 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
4037 d->tmp = ew;
4038 /* declare we're reading memory */
4039 d->mFx = Ifx_Read;
4040 d->mAddr = mkexpr(addr);
4041 d->mSize = 28;
4043 /* declare we're writing guest state */
4044 d->nFxState = 4;
4045 vex_bzero(&d->fxState, sizeof(d->fxState));
4047 d->fxState[0].fx = Ifx_Write;
4048 d->fxState[0].offset = OFFB_FTOP;
4049 d->fxState[0].size = sizeof(UInt);
4051 d->fxState[1].fx = Ifx_Write;
4052 d->fxState[1].offset = OFFB_FPTAGS;
4053 d->fxState[1].size = 8 * sizeof(UChar);
4055 d->fxState[2].fx = Ifx_Write;
4056 d->fxState[2].offset = OFFB_FPROUND;
4057 d->fxState[2].size = sizeof(UInt);
4059 d->fxState[3].fx = Ifx_Write;
4060 d->fxState[3].offset = OFFB_FC3210;
4061 d->fxState[3].size = sizeof(UInt);
4063 stmt( IRStmt_Dirty(d) );
4065 /* ew contains any emulation warning we may need to
4066 issue. If needed, side-exit to the next insn,
4067 reporting the warning, so that Valgrind's dispatcher
4068 sees the warning. */
4069 put_emwarn( mkexpr(ew) );
4070 stmt(
4071 IRStmt_Exit(
4072 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
4073 Ijk_EmWarn,
4074 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
4075 OFFB_EIP
4079 DIP("fldenv %s\n", dis_buf);
4080 break;
4083 case 5: {/* FLDCW */
4084 /* The only thing we observe in the control word is the
4085 rounding mode. Therefore, pass the 16-bit value
4086 (x87 native-format control word) to a clean helper,
4087 getting back a 64-bit value, the lower half of which
4088 is the FPROUND value to store, and the upper half of
4089 which is the emulation-warning token which may be
4090 generated.
4092 /* ULong x86h_check_fldcw ( UInt ); */
4093 IRTemp t64 = newTemp(Ity_I64);
4094 IRTemp ew = newTemp(Ity_I32);
4095 DIP("fldcw %s\n", dis_buf);
4096 assign( t64, mkIRExprCCall(
4097 Ity_I64, 0/*regparms*/,
4098 "x86g_check_fldcw",
4099 &x86g_check_fldcw,
4100 mkIRExprVec_1(
4101 unop( Iop_16Uto32,
4102 loadLE(Ity_I16, mkexpr(addr)))
4107 put_fpround( unop(Iop_64to32, mkexpr(t64)) );
4108 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
4109 put_emwarn( mkexpr(ew) );
4110 /* Finally, if an emulation warning was reported,
4111 side-exit to the next insn, reporting the warning,
4112 so that Valgrind's dispatcher sees the warning. */
4113 stmt(
4114 IRStmt_Exit(
4115 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
4116 Ijk_EmWarn,
4117 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
4118 OFFB_EIP
4121 break;
4124 case 6: { /* FNSTENV m28 */
4125 /* Uses dirty helper:
4126 void x86g_do_FSTENV ( VexGuestX86State*, HWord ) */
4127 IRDirty* d = unsafeIRDirty_0_N (
4128 0/*regparms*/,
4129 "x86g_dirtyhelper_FSTENV",
4130 &x86g_dirtyhelper_FSTENV,
4131 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
4133 /* declare we're writing memory */
4134 d->mFx = Ifx_Write;
4135 d->mAddr = mkexpr(addr);
4136 d->mSize = 28;
4138 /* declare we're reading guest state */
4139 d->nFxState = 4;
4140 vex_bzero(&d->fxState, sizeof(d->fxState));
4142 d->fxState[0].fx = Ifx_Read;
4143 d->fxState[0].offset = OFFB_FTOP;
4144 d->fxState[0].size = sizeof(UInt);
4146 d->fxState[1].fx = Ifx_Read;
4147 d->fxState[1].offset = OFFB_FPTAGS;
4148 d->fxState[1].size = 8 * sizeof(UChar);
4150 d->fxState[2].fx = Ifx_Read;
4151 d->fxState[2].offset = OFFB_FPROUND;
4152 d->fxState[2].size = sizeof(UInt);
4154 d->fxState[3].fx = Ifx_Read;
4155 d->fxState[3].offset = OFFB_FC3210;
4156 d->fxState[3].size = sizeof(UInt);
4158 stmt( IRStmt_Dirty(d) );
4160 DIP("fnstenv %s\n", dis_buf);
4161 break;
4164 case 7: /* FNSTCW */
4165 /* Fake up a native x87 FPU control word. The only
4166 thing it depends on is FPROUND[1:0], so call a clean
4167 helper to cook it up. */
4168 /* UInt x86h_create_fpucw ( UInt fpround ) */
4169 DIP("fnstcw %s\n", dis_buf);
4170 storeLE(
4171 mkexpr(addr),
4172 unop( Iop_32to16,
4173 mkIRExprCCall(
4174 Ity_I32, 0/*regp*/,
4175 "x86g_create_fpucw", &x86g_create_fpucw,
4176 mkIRExprVec_1( get_fpround() )
4180 break;
4182 default:
4183 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
4184 vex_printf("first_opcode == 0xD9\n");
4185 goto decode_fail;
4188 } else {
4189 delta++;
4190 switch (modrm) {
4192 case 0xC0 ... 0xC7: /* FLD %st(?) */
4193 r_src = (UInt)modrm - 0xC0;
4194 DIP("fld %%st(%u)\n", r_src);
4195 t1 = newTemp(Ity_F64);
4196 assign(t1, get_ST(r_src));
4197 fp_push();
4198 put_ST(0, mkexpr(t1));
4199 break;
4201 case 0xC8 ... 0xCF: /* FXCH %st(?) */
4202 r_src = (UInt)modrm - 0xC8;
4203 DIP("fxch %%st(%u)\n", r_src);
4204 t1 = newTemp(Ity_F64);
4205 t2 = newTemp(Ity_F64);
4206 assign(t1, get_ST(0));
4207 assign(t2, get_ST(r_src));
4208 put_ST_UNCHECKED(0, mkexpr(t2));
4209 put_ST_UNCHECKED(r_src, mkexpr(t1));
4210 break;
4212 case 0xE0: /* FCHS */
4213 DIP("fchs\n");
4214 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0)));
4215 break;
4217 case 0xE1: /* FABS */
4218 DIP("fabs\n");
4219 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0)));
4220 break;
4222 case 0xE4: /* FTST */
4223 DIP("ftst\n");
4224 /* This forces C1 to zero, which isn't right. */
4225 /* Well, in fact the Intel docs say (bizarrely): "C1 is
4226 set to 0 if stack underflow occurred; otherwise, set
4227 to 0" which is pretty nonsensical. I guess it's a
4228 typo. */
4229 put_C3210(
4230 binop( Iop_And32,
4231 binop(Iop_Shl32,
4232 binop(Iop_CmpF64,
4233 get_ST(0),
4234 IRExpr_Const(IRConst_F64i(0x0ULL))),
4235 mkU8(8)),
4236 mkU32(0x4500)
4238 break;
4240 case 0xE5: { /* FXAM */
4241 /* This is an interesting one. It examines %st(0),
4242 regardless of whether the tag says it's empty or not.
4243 Here, just pass both the tag (in our format) and the
4244 value (as a double, actually a ULong) to a helper
4245 function. */
4246 IRExpr** args
4247 = mkIRExprVec_2( unop(Iop_8Uto32, get_ST_TAG(0)),
4248 unop(Iop_ReinterpF64asI64,
4249 get_ST_UNCHECKED(0)) );
4250 put_C3210(mkIRExprCCall(
4251 Ity_I32,
4252 0/*regparm*/,
4253 "x86g_calculate_FXAM", &x86g_calculate_FXAM,
4254 args
4256 DIP("fxam\n");
4257 break;
4260 case 0xE8: /* FLD1 */
4261 DIP("fld1\n");
4262 fp_push();
4263 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
4264 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL)));
4265 break;
4267 case 0xE9: /* FLDL2T */
4268 DIP("fldl2t\n");
4269 fp_push();
4270 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
4271 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL)));
4272 break;
4274 case 0xEA: /* FLDL2E */
4275 DIP("fldl2e\n");
4276 fp_push();
4277 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
4278 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL)));
4279 break;
4281 case 0xEB: /* FLDPI */
4282 DIP("fldpi\n");
4283 fp_push();
4284 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
4285 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL)));
4286 break;
4288 case 0xEC: /* FLDLG2 */
4289 DIP("fldlg2\n");
4290 fp_push();
4291 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
4292 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL)));
4293 break;
4295 case 0xED: /* FLDLN2 */
4296 DIP("fldln2\n");
4297 fp_push();
4298 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
4299 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL)));
4300 break;
4302 case 0xEE: /* FLDZ */
4303 DIP("fldz\n");
4304 fp_push();
4305 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
4306 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL)));
4307 break;
4309 case 0xF0: /* F2XM1 */
4310 DIP("f2xm1\n");
4311 put_ST_UNCHECKED(0,
4312 binop(Iop_2xm1F64,
4313 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4314 get_ST(0)));
4315 break;
4317 case 0xF1: /* FYL2X */
4318 DIP("fyl2x\n");
4319 put_ST_UNCHECKED(1,
4320 triop(Iop_Yl2xF64,
4321 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4322 get_ST(1),
4323 get_ST(0)));
4324 fp_pop();
4325 break;
4327 case 0xF2: { /* FPTAN */
4328 DIP("fptan\n");
4329 IRTemp argD = newTemp(Ity_F64);
4330 assign(argD, get_ST(0));
4331 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
4332 IRTemp resD = newTemp(Ity_F64);
4333 assign(resD,
4334 IRExpr_ITE(
4335 mkexpr(argOK),
4336 binop(Iop_TanF64,
4337 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4338 mkexpr(argD)),
4339 mkexpr(argD))
4341 put_ST_UNCHECKED(0, mkexpr(resD));
4342 /* Conditionally push 1.0 on the stack, if the arg is
4343 in range */
4344 maybe_fp_push(argOK);
4345 maybe_put_ST(argOK, 0,
4346 IRExpr_Const(IRConst_F64(1.0)));
4347 set_C2( binop(Iop_Xor32,
4348 unop(Iop_1Uto32, mkexpr(argOK)),
4349 mkU32(1)) );
4350 break;
4353 case 0xF3: /* FPATAN */
4354 DIP("fpatan\n");
4355 put_ST_UNCHECKED(1,
4356 triop(Iop_AtanF64,
4357 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4358 get_ST(1),
4359 get_ST(0)));
4360 fp_pop();
4361 break;
4363 case 0xF4: { /* FXTRACT */
4364 IRTemp argF = newTemp(Ity_F64);
4365 IRTemp sigF = newTemp(Ity_F64);
4366 IRTemp expF = newTemp(Ity_F64);
4367 IRTemp argI = newTemp(Ity_I64);
4368 IRTemp sigI = newTemp(Ity_I64);
4369 IRTemp expI = newTemp(Ity_I64);
4370 DIP("fxtract\n");
4371 assign( argF, get_ST(0) );
4372 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF)));
4373 assign( sigI,
4374 mkIRExprCCall(
4375 Ity_I64, 0/*regparms*/,
4376 "x86amd64g_calculate_FXTRACT",
4377 &x86amd64g_calculate_FXTRACT,
4378 mkIRExprVec_2( mkexpr(argI),
4379 mkIRExpr_HWord(0)/*sig*/ ))
4381 assign( expI,
4382 mkIRExprCCall(
4383 Ity_I64, 0/*regparms*/,
4384 "x86amd64g_calculate_FXTRACT",
4385 &x86amd64g_calculate_FXTRACT,
4386 mkIRExprVec_2( mkexpr(argI),
4387 mkIRExpr_HWord(1)/*exp*/ ))
4389 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) );
4390 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) );
4391 /* exponent */
4392 put_ST_UNCHECKED(0, mkexpr(expF) );
4393 fp_push();
4394 /* significand */
4395 put_ST(0, mkexpr(sigF) );
4396 break;
4399 case 0xF5: { /* FPREM1 -- IEEE compliant */
4400 IRTemp a1 = newTemp(Ity_F64);
4401 IRTemp a2 = newTemp(Ity_F64);
4402 DIP("fprem1\n");
4403 /* Do FPREM1 twice, once to get the remainder, and once
4404 to get the C3210 flag values. */
4405 assign( a1, get_ST(0) );
4406 assign( a2, get_ST(1) );
4407 put_ST_UNCHECKED(0,
4408 triop(Iop_PRem1F64,
4409 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4410 mkexpr(a1),
4411 mkexpr(a2)));
4412 put_C3210(
4413 triop(Iop_PRem1C3210F64,
4414 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4415 mkexpr(a1),
4416 mkexpr(a2)) );
4417 break;
4420 case 0xF7: /* FINCSTP */
4421 DIP("fprem\n");
4422 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
4423 break;
4425 case 0xF8: { /* FPREM -- not IEEE compliant */
4426 IRTemp a1 = newTemp(Ity_F64);
4427 IRTemp a2 = newTemp(Ity_F64);
4428 DIP("fprem\n");
4429 /* Do FPREM twice, once to get the remainder, and once
4430 to get the C3210 flag values. */
4431 assign( a1, get_ST(0) );
4432 assign( a2, get_ST(1) );
4433 put_ST_UNCHECKED(0,
4434 triop(Iop_PRemF64,
4435 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4436 mkexpr(a1),
4437 mkexpr(a2)));
4438 put_C3210(
4439 triop(Iop_PRemC3210F64,
4440 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4441 mkexpr(a1),
4442 mkexpr(a2)) );
4443 break;
4446 case 0xF9: /* FYL2XP1 */
4447 DIP("fyl2xp1\n");
4448 put_ST_UNCHECKED(1,
4449 triop(Iop_Yl2xp1F64,
4450 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4451 get_ST(1),
4452 get_ST(0)));
4453 fp_pop();
4454 break;
4456 case 0xFA: /* FSQRT */
4457 DIP("fsqrt\n");
4458 put_ST_UNCHECKED(0,
4459 binop(Iop_SqrtF64,
4460 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4461 get_ST(0)));
4462 break;
4464 case 0xFB: { /* FSINCOS */
4465 DIP("fsincos\n");
4466 IRTemp argD = newTemp(Ity_F64);
4467 assign(argD, get_ST(0));
4468 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
4469 IRTemp resD = newTemp(Ity_F64);
4470 assign(resD,
4471 IRExpr_ITE(
4472 mkexpr(argOK),
4473 binop(Iop_SinF64,
4474 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4475 mkexpr(argD)),
4476 mkexpr(argD))
4478 put_ST_UNCHECKED(0, mkexpr(resD));
4479 /* Conditionally push the cos value on the stack, if
4480 the arg is in range */
4481 maybe_fp_push(argOK);
4482 maybe_put_ST(argOK, 0,
4483 binop(Iop_CosF64,
4484 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4485 mkexpr(argD)));
4486 set_C2( binop(Iop_Xor32,
4487 unop(Iop_1Uto32, mkexpr(argOK)),
4488 mkU32(1)) );
4489 break;
4492 case 0xFC: /* FRNDINT */
4493 DIP("frndint\n");
4494 put_ST_UNCHECKED(0,
4495 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) );
4496 break;
4498 case 0xFD: /* FSCALE */
4499 DIP("fscale\n");
4500 put_ST_UNCHECKED(0,
4501 triop(Iop_ScaleF64,
4502 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4503 get_ST(0),
4504 get_ST(1)));
4505 break;
4507 case 0xFE: /* FSIN */
4508 case 0xFF: { /* FCOS */
4509 Bool isSIN = modrm == 0xFE;
4510 DIP("%s\n", isSIN ? "fsin" : "fcos");
4511 IRTemp argD = newTemp(Ity_F64);
4512 assign(argD, get_ST(0));
4513 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
4514 IRTemp resD = newTemp(Ity_F64);
4515 assign(resD,
4516 IRExpr_ITE(
4517 mkexpr(argOK),
4518 binop(isSIN ? Iop_SinF64 : Iop_CosF64,
4519 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4520 mkexpr(argD)),
4521 mkexpr(argD))
4523 put_ST_UNCHECKED(0, mkexpr(resD));
4524 set_C2( binop(Iop_Xor32,
4525 unop(Iop_1Uto32, mkexpr(argOK)),
4526 mkU32(1)) );
4527 break;
4530 default:
4531 goto decode_fail;
4536 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
4537 else
4538 if (first_opcode == 0xDA) {
4540 if (modrm < 0xC0) {
4542 /* bits 5,4,3 are an opcode extension, and the modRM also
4543 specifies an address. */
4544 IROp fop;
4545 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4546 delta += len;
4547 switch (gregOfRM(modrm)) {
4549 case 0: /* FIADD m32int */ /* ST(0) += m32int */
4550 DIP("fiaddl %s\n", dis_buf);
4551 fop = Iop_AddF64;
4552 goto do_fop_m32;
4554 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
4555 DIP("fimull %s\n", dis_buf);
4556 fop = Iop_MulF64;
4557 goto do_fop_m32;
4559 case 2: /* FICOM m32int */
4560 DIP("ficoml %s\n", dis_buf);
4561 /* This forces C1 to zero, which isn't right. */
4562 put_C3210(
4563 binop( Iop_And32,
4564 binop(Iop_Shl32,
4565 binop(Iop_CmpF64,
4566 get_ST(0),
4567 unop(Iop_I32StoF64,
4568 loadLE(Ity_I32,mkexpr(addr)))),
4569 mkU8(8)),
4570 mkU32(0x4500)
4572 break;
4574 case 3: /* FICOMP m32int */
4575 DIP("ficompl %s\n", dis_buf);
4576 /* This forces C1 to zero, which isn't right. */
4577 put_C3210(
4578 binop( Iop_And32,
4579 binop(Iop_Shl32,
4580 binop(Iop_CmpF64,
4581 get_ST(0),
4582 unop(Iop_I32StoF64,
4583 loadLE(Ity_I32,mkexpr(addr)))),
4584 mkU8(8)),
4585 mkU32(0x4500)
4587 fp_pop();
4588 break;
4590 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
4591 DIP("fisubl %s\n", dis_buf);
4592 fop = Iop_SubF64;
4593 goto do_fop_m32;
4595 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
4596 DIP("fisubrl %s\n", dis_buf);
4597 fop = Iop_SubF64;
4598 goto do_foprev_m32;
4600 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
4601 DIP("fidivl %s\n", dis_buf);
4602 fop = Iop_DivF64;
4603 goto do_fop_m32;
4605 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
4606 DIP("fidivrl %s\n", dis_buf);
4607 fop = Iop_DivF64;
4608 goto do_foprev_m32;
4610 do_fop_m32:
4611 put_ST_UNCHECKED(0,
4612 triop(fop,
4613 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4614 get_ST(0),
4615 unop(Iop_I32StoF64,
4616 loadLE(Ity_I32, mkexpr(addr)))));
4617 break;
4619 do_foprev_m32:
4620 put_ST_UNCHECKED(0,
4621 triop(fop,
4622 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4623 unop(Iop_I32StoF64,
4624 loadLE(Ity_I32, mkexpr(addr))),
4625 get_ST(0)));
4626 break;
4628 default:
4629 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
4630 vex_printf("first_opcode == 0xDA\n");
4631 goto decode_fail;
4634 } else {
4636 delta++;
4637 switch (modrm) {
4639 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
4640 r_src = (UInt)modrm - 0xC0;
4641 DIP("fcmovb %%st(%u), %%st(0)\n", r_src);
4642 put_ST_UNCHECKED(0,
4643 IRExpr_ITE(
4644 mk_x86g_calculate_condition(X86CondB),
4645 get_ST(r_src), get_ST(0)) );
4646 break;
4648 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
4649 r_src = (UInt)modrm - 0xC8;
4650 DIP("fcmovz %%st(%u), %%st(0)\n", r_src);
4651 put_ST_UNCHECKED(0,
4652 IRExpr_ITE(
4653 mk_x86g_calculate_condition(X86CondZ),
4654 get_ST(r_src), get_ST(0)) );
4655 break;
4657 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
4658 r_src = (UInt)modrm - 0xD0;
4659 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src);
4660 put_ST_UNCHECKED(0,
4661 IRExpr_ITE(
4662 mk_x86g_calculate_condition(X86CondBE),
4663 get_ST(r_src), get_ST(0)) );
4664 break;
4666 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
4667 r_src = (UInt)modrm - 0xD8;
4668 DIP("fcmovu %%st(%u), %%st(0)\n", r_src);
4669 put_ST_UNCHECKED(0,
4670 IRExpr_ITE(
4671 mk_x86g_calculate_condition(X86CondP),
4672 get_ST(r_src), get_ST(0)) );
4673 break;
4675 case 0xE9: /* FUCOMPP %st(0),%st(1) */
4676 DIP("fucompp %%st(0),%%st(1)\n");
4677 /* This forces C1 to zero, which isn't right. */
4678 put_C3210(
4679 binop( Iop_And32,
4680 binop(Iop_Shl32,
4681 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
4682 mkU8(8)),
4683 mkU32(0x4500)
4685 fp_pop();
4686 fp_pop();
4687 break;
4689 default:
4690 goto decode_fail;
4696 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
4697 else
4698 if (first_opcode == 0xDB) {
4699 if (modrm < 0xC0) {
4701 /* bits 5,4,3 are an opcode extension, and the modRM also
4702 specifies an address. */
4703 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4704 delta += len;
4706 switch (gregOfRM(modrm)) {
4708 case 0: /* FILD m32int */
4709 DIP("fildl %s\n", dis_buf);
4710 fp_push();
4711 put_ST(0, unop(Iop_I32StoF64,
4712 loadLE(Ity_I32, mkexpr(addr))));
4713 break;
4715 case 1: /* FISTTPL m32 (SSE3) */
4716 DIP("fisttpl %s\n", dis_buf);
4717 storeLE( mkexpr(addr),
4718 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) );
4719 fp_pop();
4720 break;
4722 case 2: /* FIST m32 */
4723 DIP("fistl %s\n", dis_buf);
4724 storeLE( mkexpr(addr),
4725 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
4726 break;
4728 case 3: /* FISTP m32 */
4729 DIP("fistpl %s\n", dis_buf);
4730 storeLE( mkexpr(addr),
4731 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
4732 fp_pop();
4733 break;
4735 case 5: { /* FLD extended-real */
4736 /* Uses dirty helper:
4737 ULong x86g_loadF80le ( UInt )
4738 addr holds the address. First, do a dirty call to
4739 get hold of the data. */
4740 IRTemp val = newTemp(Ity_I64);
4741 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) );
4743 IRDirty* d = unsafeIRDirty_1_N (
4744 val,
4745 0/*regparms*/,
4746 "x86g_dirtyhelper_loadF80le",
4747 &x86g_dirtyhelper_loadF80le,
4748 args
4750 /* declare that we're reading memory */
4751 d->mFx = Ifx_Read;
4752 d->mAddr = mkexpr(addr);
4753 d->mSize = 10;
4755 /* execute the dirty call, dumping the result in val. */
4756 stmt( IRStmt_Dirty(d) );
4757 fp_push();
4758 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val)));
4760 DIP("fldt %s\n", dis_buf);
4761 break;
4764 case 7: { /* FSTP extended-real */
4765 /* Uses dirty helper: void x86g_storeF80le ( UInt, ULong ) */
4766 IRExpr** args
4767 = mkIRExprVec_2( mkexpr(addr),
4768 unop(Iop_ReinterpF64asI64, get_ST(0)) );
4770 IRDirty* d = unsafeIRDirty_0_N (
4771 0/*regparms*/,
4772 "x86g_dirtyhelper_storeF80le",
4773 &x86g_dirtyhelper_storeF80le,
4774 args
4776 /* declare we're writing memory */
4777 d->mFx = Ifx_Write;
4778 d->mAddr = mkexpr(addr);
4779 d->mSize = 10;
4781 /* execute the dirty call. */
4782 stmt( IRStmt_Dirty(d) );
4783 fp_pop();
4785 DIP("fstpt\n %s", dis_buf);
4786 break;
4789 default:
4790 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
4791 vex_printf("first_opcode == 0xDB\n");
4792 goto decode_fail;
4795 } else {
4797 delta++;
4798 switch (modrm) {
4800 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
4801 r_src = (UInt)modrm - 0xC0;
4802 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src);
4803 put_ST_UNCHECKED(0,
4804 IRExpr_ITE(
4805 mk_x86g_calculate_condition(X86CondNB),
4806 get_ST(r_src), get_ST(0)) );
4807 break;
4809 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
4810 r_src = (UInt)modrm - 0xC8;
4811 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src);
4812 put_ST_UNCHECKED(0,
4813 IRExpr_ITE(
4814 mk_x86g_calculate_condition(X86CondNZ),
4815 get_ST(r_src), get_ST(0)) );
4816 break;
4818 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
4819 r_src = (UInt)modrm - 0xD0;
4820 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src);
4821 put_ST_UNCHECKED(0,
4822 IRExpr_ITE(
4823 mk_x86g_calculate_condition(X86CondNBE),
4824 get_ST(r_src), get_ST(0)) );
4825 break;
4827 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
4828 r_src = (UInt)modrm - 0xD8;
4829 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src);
4830 put_ST_UNCHECKED(0,
4831 IRExpr_ITE(
4832 mk_x86g_calculate_condition(X86CondNP),
4833 get_ST(r_src), get_ST(0)) );
4834 break;
4836 case 0xE2:
4837 DIP("fnclex\n");
4838 break;
4840 case 0xE3: {
4841 /* Uses dirty helper:
4842 void x86g_do_FINIT ( VexGuestX86State* ) */
4843 IRDirty* d = unsafeIRDirty_0_N (
4844 0/*regparms*/,
4845 "x86g_dirtyhelper_FINIT",
4846 &x86g_dirtyhelper_FINIT,
4847 mkIRExprVec_1(IRExpr_GSPTR())
4850 /* declare we're writing guest state */
4851 d->nFxState = 5;
4852 vex_bzero(&d->fxState, sizeof(d->fxState));
4854 d->fxState[0].fx = Ifx_Write;
4855 d->fxState[0].offset = OFFB_FTOP;
4856 d->fxState[0].size = sizeof(UInt);
4858 d->fxState[1].fx = Ifx_Write;
4859 d->fxState[1].offset = OFFB_FPREGS;
4860 d->fxState[1].size = 8 * sizeof(ULong);
4862 d->fxState[2].fx = Ifx_Write;
4863 d->fxState[2].offset = OFFB_FPTAGS;
4864 d->fxState[2].size = 8 * sizeof(UChar);
4866 d->fxState[3].fx = Ifx_Write;
4867 d->fxState[3].offset = OFFB_FPROUND;
4868 d->fxState[3].size = sizeof(UInt);
4870 d->fxState[4].fx = Ifx_Write;
4871 d->fxState[4].offset = OFFB_FC3210;
4872 d->fxState[4].size = sizeof(UInt);
4874 stmt( IRStmt_Dirty(d) );
4876 DIP("fninit\n");
4877 break;
4880 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
4881 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False );
4882 break;
4884 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
4885 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False );
4886 break;
4888 default:
4889 goto decode_fail;
4894 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
4895 else
4896 if (first_opcode == 0xDC) {
4897 if (modrm < 0xC0) {
4899 /* bits 5,4,3 are an opcode extension, and the modRM also
4900 specifies an address. */
4901 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4902 delta += len;
4904 switch (gregOfRM(modrm)) {
4906 case 0: /* FADD double-real */
4907 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True );
4908 break;
4910 case 1: /* FMUL double-real */
4911 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True );
4912 break;
4914 case 2: /* FCOM double-real */
4915 DIP("fcoml %s\n", dis_buf);
4916 /* This forces C1 to zero, which isn't right. */
4917 put_C3210(
4918 binop( Iop_And32,
4919 binop(Iop_Shl32,
4920 binop(Iop_CmpF64,
4921 get_ST(0),
4922 loadLE(Ity_F64,mkexpr(addr))),
4923 mkU8(8)),
4924 mkU32(0x4500)
4926 break;
4928 case 3: /* FCOMP double-real */
4929 DIP("fcompl %s\n", dis_buf);
4930 /* This forces C1 to zero, which isn't right. */
4931 put_C3210(
4932 binop( Iop_And32,
4933 binop(Iop_Shl32,
4934 binop(Iop_CmpF64,
4935 get_ST(0),
4936 loadLE(Ity_F64,mkexpr(addr))),
4937 mkU8(8)),
4938 mkU32(0x4500)
4940 fp_pop();
4941 break;
4943 case 4: /* FSUB double-real */
4944 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True );
4945 break;
4947 case 5: /* FSUBR double-real */
4948 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True );
4949 break;
4951 case 6: /* FDIV double-real */
4952 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True );
4953 break;
4955 case 7: /* FDIVR double-real */
4956 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True );
4957 break;
4959 default:
4960 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
4961 vex_printf("first_opcode == 0xDC\n");
4962 goto decode_fail;
4965 } else {
4967 delta++;
4968 switch (modrm) {
4970 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
4971 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False );
4972 break;
4974 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
4975 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False );
4976 break;
4978 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
4979 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False );
4980 break;
4982 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
4983 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False );
4984 break;
4986 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
4987 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False );
4988 break;
4990 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
4991 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False );
4992 break;
4994 default:
4995 goto decode_fail;
5001 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
5002 else
5003 if (first_opcode == 0xDD) {
5005 if (modrm < 0xC0) {
5007 /* bits 5,4,3 are an opcode extension, and the modRM also
5008 specifies an address. */
5009 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5010 delta += len;
5012 switch (gregOfRM(modrm)) {
5014 case 0: /* FLD double-real */
5015 DIP("fldl %s\n", dis_buf);
5016 fp_push();
5017 put_ST(0, loadLE(Ity_F64, mkexpr(addr)));
5018 break;
5020 case 1: /* FISTTPQ m64 (SSE3) */
5021 DIP("fistppll %s\n", dis_buf);
5022 storeLE( mkexpr(addr),
5023 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) );
5024 fp_pop();
5025 break;
5027 case 2: /* FST double-real */
5028 DIP("fstl %s\n", dis_buf);
5029 storeLE(mkexpr(addr), get_ST(0));
5030 break;
5032 case 3: /* FSTP double-real */
5033 DIP("fstpl %s\n", dis_buf);
5034 storeLE(mkexpr(addr), get_ST(0));
5035 fp_pop();
5036 break;
5038 case 4: { /* FRSTOR m108 */
5039 /* Uses dirty helper:
5040 VexEmNote x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */
5041 IRTemp ew = newTemp(Ity_I32);
5042 IRDirty* d = unsafeIRDirty_0_N (
5043 0/*regparms*/,
5044 "x86g_dirtyhelper_FRSTOR",
5045 &x86g_dirtyhelper_FRSTOR,
5046 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
5048 d->tmp = ew;
5049 /* declare we're reading memory */
5050 d->mFx = Ifx_Read;
5051 d->mAddr = mkexpr(addr);
5052 d->mSize = 108;
5054 /* declare we're writing guest state */
5055 d->nFxState = 5;
5056 vex_bzero(&d->fxState, sizeof(d->fxState));
5058 d->fxState[0].fx = Ifx_Write;
5059 d->fxState[0].offset = OFFB_FTOP;
5060 d->fxState[0].size = sizeof(UInt);
5062 d->fxState[1].fx = Ifx_Write;
5063 d->fxState[1].offset = OFFB_FPREGS;
5064 d->fxState[1].size = 8 * sizeof(ULong);
5066 d->fxState[2].fx = Ifx_Write;
5067 d->fxState[2].offset = OFFB_FPTAGS;
5068 d->fxState[2].size = 8 * sizeof(UChar);
5070 d->fxState[3].fx = Ifx_Write;
5071 d->fxState[3].offset = OFFB_FPROUND;
5072 d->fxState[3].size = sizeof(UInt);
5074 d->fxState[4].fx = Ifx_Write;
5075 d->fxState[4].offset = OFFB_FC3210;
5076 d->fxState[4].size = sizeof(UInt);
5078 stmt( IRStmt_Dirty(d) );
5080 /* ew contains any emulation warning we may need to
5081 issue. If needed, side-exit to the next insn,
5082 reporting the warning, so that Valgrind's dispatcher
5083 sees the warning. */
5084 put_emwarn( mkexpr(ew) );
5085 stmt(
5086 IRStmt_Exit(
5087 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5088 Ijk_EmWarn,
5089 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
5090 OFFB_EIP
5094 DIP("frstor %s\n", dis_buf);
5095 break;
5098 case 6: { /* FNSAVE m108 */
5099 /* Uses dirty helper:
5100 void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */
5101 IRDirty* d = unsafeIRDirty_0_N (
5102 0/*regparms*/,
5103 "x86g_dirtyhelper_FSAVE",
5104 &x86g_dirtyhelper_FSAVE,
5105 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
5107 /* declare we're writing memory */
5108 d->mFx = Ifx_Write;
5109 d->mAddr = mkexpr(addr);
5110 d->mSize = 108;
5112 /* declare we're reading guest state */
5113 d->nFxState = 5;
5114 vex_bzero(&d->fxState, sizeof(d->fxState));
5116 d->fxState[0].fx = Ifx_Read;
5117 d->fxState[0].offset = OFFB_FTOP;
5118 d->fxState[0].size = sizeof(UInt);
5120 d->fxState[1].fx = Ifx_Read;
5121 d->fxState[1].offset = OFFB_FPREGS;
5122 d->fxState[1].size = 8 * sizeof(ULong);
5124 d->fxState[2].fx = Ifx_Read;
5125 d->fxState[2].offset = OFFB_FPTAGS;
5126 d->fxState[2].size = 8 * sizeof(UChar);
5128 d->fxState[3].fx = Ifx_Read;
5129 d->fxState[3].offset = OFFB_FPROUND;
5130 d->fxState[3].size = sizeof(UInt);
5132 d->fxState[4].fx = Ifx_Read;
5133 d->fxState[4].offset = OFFB_FC3210;
5134 d->fxState[4].size = sizeof(UInt);
5136 stmt( IRStmt_Dirty(d) );
5138 DIP("fnsave %s\n", dis_buf);
5139 break;
5142 case 7: { /* FNSTSW m16 */
5143 IRExpr* sw = get_FPU_sw();
5144 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16);
5145 storeLE( mkexpr(addr), sw );
5146 DIP("fnstsw %s\n", dis_buf);
5147 break;
5150 default:
5151 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
5152 vex_printf("first_opcode == 0xDD\n");
5153 goto decode_fail;
5155 } else {
5156 delta++;
5157 switch (modrm) {
5159 case 0xC0 ... 0xC7: /* FFREE %st(?) */
5160 r_dst = (UInt)modrm - 0xC0;
5161 DIP("ffree %%st(%u)\n", r_dst);
5162 put_ST_TAG ( r_dst, mkU8(0) );
5163 break;
5165 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
5166 r_dst = (UInt)modrm - 0xD0;
5167 DIP("fst %%st(0),%%st(%u)\n", r_dst);
5168 /* P4 manual says: "If the destination operand is a
5169 non-empty register, the invalid-operation exception
5170 is not generated. Hence put_ST_UNCHECKED. */
5171 put_ST_UNCHECKED(r_dst, get_ST(0));
5172 break;
5174 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
5175 r_dst = (UInt)modrm - 0xD8;
5176 DIP("fstp %%st(0),%%st(%u)\n", r_dst);
5177 /* P4 manual says: "If the destination operand is a
5178 non-empty register, the invalid-operation exception
5179 is not generated. Hence put_ST_UNCHECKED. */
5180 put_ST_UNCHECKED(r_dst, get_ST(0));
5181 fp_pop();
5182 break;
5184 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
5185 r_dst = (UInt)modrm - 0xE0;
5186 DIP("fucom %%st(0),%%st(%u)\n", r_dst);
5187 /* This forces C1 to zero, which isn't right. */
5188 put_C3210(
5189 binop( Iop_And32,
5190 binop(Iop_Shl32,
5191 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5192 mkU8(8)),
5193 mkU32(0x4500)
5195 break;
5197 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
5198 r_dst = (UInt)modrm - 0xE8;
5199 DIP("fucomp %%st(0),%%st(%u)\n", r_dst);
5200 /* This forces C1 to zero, which isn't right. */
5201 put_C3210(
5202 binop( Iop_And32,
5203 binop(Iop_Shl32,
5204 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5205 mkU8(8)),
5206 mkU32(0x4500)
5208 fp_pop();
5209 break;
5211 default:
5212 goto decode_fail;
5217 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
5218 else
5219 if (first_opcode == 0xDE) {
5221 if (modrm < 0xC0) {
5223 /* bits 5,4,3 are an opcode extension, and the modRM also
5224 specifies an address. */
5225 IROp fop;
5226 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5227 delta += len;
5229 switch (gregOfRM(modrm)) {
5231 case 0: /* FIADD m16int */ /* ST(0) += m16int */
5232 DIP("fiaddw %s\n", dis_buf);
5233 fop = Iop_AddF64;
5234 goto do_fop_m16;
5236 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
5237 DIP("fimulw %s\n", dis_buf);
5238 fop = Iop_MulF64;
5239 goto do_fop_m16;
5241 case 2: /* FICOM m16int */
5242 DIP("ficomw %s\n", dis_buf);
5243 /* This forces C1 to zero, which isn't right. */
5244 put_C3210(
5245 binop( Iop_And32,
5246 binop(Iop_Shl32,
5247 binop(Iop_CmpF64,
5248 get_ST(0),
5249 unop(Iop_I32StoF64,
5250 unop(Iop_16Sto32,
5251 loadLE(Ity_I16,mkexpr(addr))))),
5252 mkU8(8)),
5253 mkU32(0x4500)
5255 break;
5257 case 3: /* FICOMP m16int */
5258 DIP("ficompw %s\n", dis_buf);
5259 /* This forces C1 to zero, which isn't right. */
5260 put_C3210(
5261 binop( Iop_And32,
5262 binop(Iop_Shl32,
5263 binop(Iop_CmpF64,
5264 get_ST(0),
5265 unop(Iop_I32StoF64,
5266 unop(Iop_16Sto32,
5267 loadLE(Ity_I16,mkexpr(addr))))),
5268 mkU8(8)),
5269 mkU32(0x4500)
5271 fp_pop();
5272 break;
5274 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
5275 DIP("fisubw %s\n", dis_buf);
5276 fop = Iop_SubF64;
5277 goto do_fop_m16;
5279 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
5280 DIP("fisubrw %s\n", dis_buf);
5281 fop = Iop_SubF64;
5282 goto do_foprev_m16;
5284 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
5285 DIP("fisubw %s\n", dis_buf);
5286 fop = Iop_DivF64;
5287 goto do_fop_m16;
5289 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
5290 DIP("fidivrw %s\n", dis_buf);
5291 fop = Iop_DivF64;
5292 goto do_foprev_m16;
5294 do_fop_m16:
5295 put_ST_UNCHECKED(0,
5296 triop(fop,
5297 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5298 get_ST(0),
5299 unop(Iop_I32StoF64,
5300 unop(Iop_16Sto32,
5301 loadLE(Ity_I16, mkexpr(addr))))));
5302 break;
5304 do_foprev_m16:
5305 put_ST_UNCHECKED(0,
5306 triop(fop,
5307 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5308 unop(Iop_I32StoF64,
5309 unop(Iop_16Sto32,
5310 loadLE(Ity_I16, mkexpr(addr)))),
5311 get_ST(0)));
5312 break;
5314 default:
5315 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
5316 vex_printf("first_opcode == 0xDE\n");
5317 goto decode_fail;
5320 } else {
5322 delta++;
5323 switch (modrm) {
5325 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
5326 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True );
5327 break;
5329 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
5330 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True );
5331 break;
5333 case 0xD9: /* FCOMPP %st(0),%st(1) */
5334 DIP("fuompp %%st(0),%%st(1)\n");
5335 /* This forces C1 to zero, which isn't right. */
5336 put_C3210(
5337 binop( Iop_And32,
5338 binop(Iop_Shl32,
5339 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
5340 mkU8(8)),
5341 mkU32(0x4500)
5343 fp_pop();
5344 fp_pop();
5345 break;
5347 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
5348 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True );
5349 break;
5351 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
5352 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True );
5353 break;
5355 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
5356 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True );
5357 break;
5359 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
5360 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True );
5361 break;
5363 default:
5364 goto decode_fail;
5370 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
5371 else
5372 if (first_opcode == 0xDF) {
5374 if (modrm < 0xC0) {
5376 /* bits 5,4,3 are an opcode extension, and the modRM also
5377 specifies an address. */
5378 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5379 delta += len;
5381 switch (gregOfRM(modrm)) {
5383 case 0: /* FILD m16int */
5384 DIP("fildw %s\n", dis_buf);
5385 fp_push();
5386 put_ST(0, unop(Iop_I32StoF64,
5387 unop(Iop_16Sto32,
5388 loadLE(Ity_I16, mkexpr(addr)))));
5389 break;
5391 case 1: /* FISTTPS m16 (SSE3) */
5392 DIP("fisttps %s\n", dis_buf);
5393 storeLE( mkexpr(addr),
5394 binop(Iop_F64toI16S, mkU32(Irrm_ZERO), get_ST(0)) );
5395 fp_pop();
5396 break;
5398 case 2: /* FIST m16 */
5399 DIP("fistp %s\n", dis_buf);
5400 storeLE( mkexpr(addr),
5401 binop(Iop_F64toI16S, get_roundingmode(), get_ST(0)) );
5402 break;
5404 case 3: /* FISTP m16 */
5405 DIP("fistps %s\n", dis_buf);
5406 storeLE( mkexpr(addr),
5407 binop(Iop_F64toI16S, get_roundingmode(), get_ST(0)) );
5408 fp_pop();
5409 break;
5411 case 5: /* FILD m64 */
5412 DIP("fildll %s\n", dis_buf);
5413 fp_push();
5414 put_ST(0, binop(Iop_I64StoF64,
5415 get_roundingmode(),
5416 loadLE(Ity_I64, mkexpr(addr))));
5417 break;
5419 case 7: /* FISTP m64 */
5420 DIP("fistpll %s\n", dis_buf);
5421 storeLE( mkexpr(addr),
5422 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) );
5423 fp_pop();
5424 break;
5426 default:
5427 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
5428 vex_printf("first_opcode == 0xDF\n");
5429 goto decode_fail;
5432 } else {
5434 delta++;
5435 switch (modrm) {
5437 case 0xC0: /* FFREEP %st(0) */
5438 DIP("ffreep %%st(%d)\n", 0);
5439 put_ST_TAG ( 0, mkU8(0) );
5440 fp_pop();
5441 break;
5443 case 0xE0: /* FNSTSW %ax */
5444 DIP("fnstsw %%ax\n");
5445 /* Get the FPU status word value and dump it in %AX. */
5446 if (0) {
5447 /* The obvious thing to do is simply dump the 16-bit
5448 status word value in %AX. However, due to a
5449 limitation in Memcheck's origin tracking
5450 machinery, this causes Memcheck not to track the
5451 origin of any undefinedness into %AH (only into
5452 %AL/%AX/%EAX), which means origins are lost in
5453 the sequence "fnstsw %ax; test $M,%ah; jcond .." */
5454 putIReg(2, R_EAX, get_FPU_sw());
5455 } else {
5456 /* So a somewhat lame kludge is to make it very
5457 clear to Memcheck that the value is written to
5458 both %AH and %AL. This generates marginally
5459 worse code, but I don't think it matters much. */
5460 IRTemp t16 = newTemp(Ity_I16);
5461 assign(t16, get_FPU_sw());
5462 putIReg( 1, R_AL, unop(Iop_16to8, mkexpr(t16)) );
5463 putIReg( 1, R_AH, unop(Iop_16HIto8, mkexpr(t16)) );
5465 break;
5467 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
5468 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True );
5469 break;
5471 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
5472 /* not really right since COMIP != UCOMIP */
5473 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True );
5474 break;
5476 default:
5477 goto decode_fail;
5483 else
5484 vpanic("dis_FPU(x86): invalid primary opcode");
5486 *decode_ok = True;
5487 return delta;
5489 decode_fail:
5490 *decode_ok = False;
5491 return delta;
5495 /*------------------------------------------------------------*/
5496 /*--- ---*/
5497 /*--- MMX INSTRUCTIONS ---*/
5498 /*--- ---*/
5499 /*------------------------------------------------------------*/
5501 /* Effect of MMX insns on x87 FPU state (table 11-2 of
5502 IA32 arch manual, volume 3):
5504 Read from, or write to MMX register (viz, any insn except EMMS):
5505 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
5506 * FP stack pointer set to zero
5508 EMMS:
5509 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
5510 * FP stack pointer set to zero
5513 static void do_MMX_preamble ( void )
5515 Int i;
5516 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
5517 IRExpr* zero = mkU32(0);
5518 IRExpr* tag1 = mkU8(1);
5519 put_ftop(zero);
5520 for (i = 0; i < 8; i++)
5521 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag1) ) );
5524 static void do_EMMS_preamble ( void )
5526 Int i;
5527 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
5528 IRExpr* zero = mkU32(0);
5529 IRExpr* tag0 = mkU8(0);
5530 put_ftop(zero);
5531 for (i = 0; i < 8; i++)
5532 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag0) ) );
5536 static IRExpr* getMMXReg ( UInt archreg )
5538 vassert(archreg < 8);
5539 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 );
5543 static void putMMXReg ( UInt archreg, IRExpr* e )
5545 vassert(archreg < 8);
5546 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
5547 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) );
5551 /* Helper for non-shift MMX insns. Note this is incomplete in the
5552 sense that it does not first call do_MMX_preamble() -- that is the
5553 responsibility of its caller. */
5555 static
5556 UInt dis_MMXop_regmem_to_reg ( UChar sorb,
5557 Int delta,
5558 UChar opc,
5559 const HChar* name,
5560 Bool show_granularity )
5562 HChar dis_buf[50];
5563 UChar modrm = getIByte(delta);
5564 Bool isReg = epartIsReg(modrm);
5565 IRExpr* argL = NULL;
5566 IRExpr* argR = NULL;
5567 IRExpr* argG = NULL;
5568 IRExpr* argE = NULL;
5569 IRTemp res = newTemp(Ity_I64);
5571 Bool invG = False;
5572 IROp op = Iop_INVALID;
5573 void* hAddr = NULL;
5574 Bool eLeft = False;
5575 const HChar* hName = NULL;
5577 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
5579 switch (opc) {
5580 /* Original MMX ones */
5581 case 0xFC: op = Iop_Add8x8; break;
5582 case 0xFD: op = Iop_Add16x4; break;
5583 case 0xFE: op = Iop_Add32x2; break;
5585 case 0xEC: op = Iop_QAdd8Sx8; break;
5586 case 0xED: op = Iop_QAdd16Sx4; break;
5588 case 0xDC: op = Iop_QAdd8Ux8; break;
5589 case 0xDD: op = Iop_QAdd16Ux4; break;
5591 case 0xF8: op = Iop_Sub8x8; break;
5592 case 0xF9: op = Iop_Sub16x4; break;
5593 case 0xFA: op = Iop_Sub32x2; break;
5595 case 0xE8: op = Iop_QSub8Sx8; break;
5596 case 0xE9: op = Iop_QSub16Sx4; break;
5598 case 0xD8: op = Iop_QSub8Ux8; break;
5599 case 0xD9: op = Iop_QSub16Ux4; break;
5601 case 0xE5: op = Iop_MulHi16Sx4; break;
5602 case 0xD5: op = Iop_Mul16x4; break;
5603 case 0xF5: XXX(x86g_calculate_mmx_pmaddwd); break;
5605 case 0x74: op = Iop_CmpEQ8x8; break;
5606 case 0x75: op = Iop_CmpEQ16x4; break;
5607 case 0x76: op = Iop_CmpEQ32x2; break;
5609 case 0x64: op = Iop_CmpGT8Sx8; break;
5610 case 0x65: op = Iop_CmpGT16Sx4; break;
5611 case 0x66: op = Iop_CmpGT32Sx2; break;
5613 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break;
5614 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break;
5615 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break;
5617 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break;
5618 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
5619 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break;
5621 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break;
5622 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break;
5623 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break;
5625 case 0xDB: op = Iop_And64; break;
5626 case 0xDF: op = Iop_And64; invG = True; break;
5627 case 0xEB: op = Iop_Or64; break;
5628 case 0xEF: /* Possibly do better here if argL and argR are the
5629 same reg */
5630 op = Iop_Xor64; break;
5632 /* Introduced in SSE1 */
5633 case 0xE0: op = Iop_Avg8Ux8; break;
5634 case 0xE3: op = Iop_Avg16Ux4; break;
5635 case 0xEE: op = Iop_Max16Sx4; break;
5636 case 0xDE: op = Iop_Max8Ux8; break;
5637 case 0xEA: op = Iop_Min16Sx4; break;
5638 case 0xDA: op = Iop_Min8Ux8; break;
5639 case 0xE4: op = Iop_MulHi16Ux4; break;
5640 case 0xF6: XXX(x86g_calculate_mmx_psadbw); break;
5642 /* Introduced in SSE2 */
5643 case 0xD4: op = Iop_Add64; break;
5644 case 0xFB: op = Iop_Sub64; break;
5646 default:
5647 vex_printf("\n0x%x\n", opc);
5648 vpanic("dis_MMXop_regmem_to_reg");
5651 # undef XXX
5653 argG = getMMXReg(gregOfRM(modrm));
5654 if (invG)
5655 argG = unop(Iop_Not64, argG);
5657 if (isReg) {
5658 delta++;
5659 argE = getMMXReg(eregOfRM(modrm));
5660 } else {
5661 Int len;
5662 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5663 delta += len;
5664 argE = loadLE(Ity_I64, mkexpr(addr));
5667 if (eLeft) {
5668 argL = argE;
5669 argR = argG;
5670 } else {
5671 argL = argG;
5672 argR = argE;
5675 if (op != Iop_INVALID) {
5676 vassert(hName == NULL);
5677 vassert(hAddr == NULL);
5678 assign(res, binop(op, argL, argR));
5679 } else {
5680 vassert(hName != NULL);
5681 vassert(hAddr != NULL);
5682 assign( res,
5683 mkIRExprCCall(
5684 Ity_I64,
5685 0/*regparms*/, hName, hAddr,
5686 mkIRExprVec_2( argL, argR )
5691 putMMXReg( gregOfRM(modrm), mkexpr(res) );
5693 DIP("%s%s %s, %s\n",
5694 name, show_granularity ? nameMMXGran(opc & 3) : "",
5695 ( isReg ? nameMMXReg(eregOfRM(modrm)) : dis_buf ),
5696 nameMMXReg(gregOfRM(modrm)) );
5698 return delta;
5702 /* Vector by scalar shift of G by the amount specified at the bottom
5703 of E. This is a straight copy of dis_SSE_shiftG_byE. */
5705 static UInt dis_MMX_shiftG_byE ( UChar sorb, Int delta,
5706 const HChar* opname, IROp op )
5708 HChar dis_buf[50];
5709 Int alen, size;
5710 IRTemp addr;
5711 Bool shl, shr, sar;
5712 UChar rm = getIByte(delta);
5713 IRTemp g0 = newTemp(Ity_I64);
5714 IRTemp g1 = newTemp(Ity_I64);
5715 IRTemp amt = newTemp(Ity_I32);
5716 IRTemp amt8 = newTemp(Ity_I8);
5718 if (epartIsReg(rm)) {
5719 assign( amt, unop(Iop_64to32, getMMXReg(eregOfRM(rm))) );
5720 DIP("%s %s,%s\n", opname,
5721 nameMMXReg(eregOfRM(rm)),
5722 nameMMXReg(gregOfRM(rm)) );
5723 delta++;
5724 } else {
5725 addr = disAMode ( &alen, sorb, delta, dis_buf );
5726 assign( amt, loadLE(Ity_I32, mkexpr(addr)) );
5727 DIP("%s %s,%s\n", opname,
5728 dis_buf,
5729 nameMMXReg(gregOfRM(rm)) );
5730 delta += alen;
5732 assign( g0, getMMXReg(gregOfRM(rm)) );
5733 assign( amt8, unop(Iop_32to8, mkexpr(amt)) );
5735 shl = shr = sar = False;
5736 size = 0;
5737 switch (op) {
5738 case Iop_ShlN16x4: shl = True; size = 32; break;
5739 case Iop_ShlN32x2: shl = True; size = 32; break;
5740 case Iop_Shl64: shl = True; size = 64; break;
5741 case Iop_ShrN16x4: shr = True; size = 16; break;
5742 case Iop_ShrN32x2: shr = True; size = 32; break;
5743 case Iop_Shr64: shr = True; size = 64; break;
5744 case Iop_SarN16x4: sar = True; size = 16; break;
5745 case Iop_SarN32x2: sar = True; size = 32; break;
5746 default: vassert(0);
5749 if (shl || shr) {
5750 assign(
5752 IRExpr_ITE(
5753 binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size)),
5754 binop(op, mkexpr(g0), mkexpr(amt8)),
5755 mkU64(0)
5758 } else
5759 if (sar) {
5760 assign(
5762 IRExpr_ITE(
5763 binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size)),
5764 binop(op, mkexpr(g0), mkexpr(amt8)),
5765 binop(op, mkexpr(g0), mkU8(size-1))
5768 } else {
5769 /*NOTREACHED*/
5770 vassert(0);
5773 putMMXReg( gregOfRM(rm), mkexpr(g1) );
5774 return delta;
5778 /* Vector by scalar shift of E by an immediate byte. This is a
5779 straight copy of dis_SSE_shiftE_imm. */
5781 static
5782 UInt dis_MMX_shiftE_imm ( Int delta, const HChar* opname, IROp op )
5784 Bool shl, shr, sar;
5785 UChar rm = getIByte(delta);
5786 IRTemp e0 = newTemp(Ity_I64);
5787 IRTemp e1 = newTemp(Ity_I64);
5788 UChar amt, size;
5789 vassert(epartIsReg(rm));
5790 vassert(gregOfRM(rm) == 2
5791 || gregOfRM(rm) == 4 || gregOfRM(rm) == 6);
5792 amt = getIByte(delta+1);
5793 delta += 2;
5794 DIP("%s $%d,%s\n", opname,
5795 (Int)amt,
5796 nameMMXReg(eregOfRM(rm)) );
5798 assign( e0, getMMXReg(eregOfRM(rm)) );
5800 shl = shr = sar = False;
5801 size = 0;
5802 switch (op) {
5803 case Iop_ShlN16x4: shl = True; size = 16; break;
5804 case Iop_ShlN32x2: shl = True; size = 32; break;
5805 case Iop_Shl64: shl = True; size = 64; break;
5806 case Iop_SarN16x4: sar = True; size = 16; break;
5807 case Iop_SarN32x2: sar = True; size = 32; break;
5808 case Iop_ShrN16x4: shr = True; size = 16; break;
5809 case Iop_ShrN32x2: shr = True; size = 32; break;
5810 case Iop_Shr64: shr = True; size = 64; break;
5811 default: vassert(0);
5814 if (shl || shr) {
5815 assign( e1, amt >= size
5816 ? mkU64(0)
5817 : binop(op, mkexpr(e0), mkU8(amt))
5819 } else
5820 if (sar) {
5821 assign( e1, amt >= size
5822 ? binop(op, mkexpr(e0), mkU8(size-1))
5823 : binop(op, mkexpr(e0), mkU8(amt))
5825 } else {
5826 /*NOTREACHED*/
5827 vassert(0);
5830 putMMXReg( eregOfRM(rm), mkexpr(e1) );
5831 return delta;
5835 /* Completely handle all MMX instructions except emms. */
5837 static
5838 UInt dis_MMX ( Bool* decode_ok, UChar sorb, Int sz, Int delta )
5840 Int len;
5841 UChar modrm;
5842 HChar dis_buf[50];
5843 UChar opc = getIByte(delta);
5844 delta++;
5846 /* dis_MMX handles all insns except emms. */
5847 do_MMX_preamble();
5849 switch (opc) {
5851 case 0x6E:
5852 /* MOVD (src)ireg-or-mem (E), (dst)mmxreg (G)*/
5853 if (sz != 4)
5854 goto mmx_decode_failure;
5855 modrm = getIByte(delta);
5856 if (epartIsReg(modrm)) {
5857 delta++;
5858 putMMXReg(
5859 gregOfRM(modrm),
5860 binop( Iop_32HLto64,
5861 mkU32(0),
5862 getIReg(4, eregOfRM(modrm)) ) );
5863 DIP("movd %s, %s\n",
5864 nameIReg(4,eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm)));
5865 } else {
5866 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5867 delta += len;
5868 putMMXReg(
5869 gregOfRM(modrm),
5870 binop( Iop_32HLto64,
5871 mkU32(0),
5872 loadLE(Ity_I32, mkexpr(addr)) ) );
5873 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregOfRM(modrm)));
5875 break;
5877 case 0x7E: /* MOVD (src)mmxreg (G), (dst)ireg-or-mem (E) */
5878 if (sz != 4)
5879 goto mmx_decode_failure;
5880 modrm = getIByte(delta);
5881 if (epartIsReg(modrm)) {
5882 delta++;
5883 putIReg( 4, eregOfRM(modrm),
5884 unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) );
5885 DIP("movd %s, %s\n",
5886 nameMMXReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm)));
5887 } else {
5888 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5889 delta += len;
5890 storeLE( mkexpr(addr),
5891 unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) );
5892 DIP("movd %s, %s\n", nameMMXReg(gregOfRM(modrm)), dis_buf);
5894 break;
5896 case 0x6F:
5897 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
5898 if (sz != 4)
5899 goto mmx_decode_failure;
5900 modrm = getIByte(delta);
5901 if (epartIsReg(modrm)) {
5902 delta++;
5903 putMMXReg( gregOfRM(modrm), getMMXReg(eregOfRM(modrm)) );
5904 DIP("movq %s, %s\n",
5905 nameMMXReg(eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm)));
5906 } else {
5907 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5908 delta += len;
5909 putMMXReg( gregOfRM(modrm), loadLE(Ity_I64, mkexpr(addr)) );
5910 DIP("movq %s, %s\n",
5911 dis_buf, nameMMXReg(gregOfRM(modrm)));
5913 break;
5915 case 0x7F:
5916 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
5917 if (sz != 4)
5918 goto mmx_decode_failure;
5919 modrm = getIByte(delta);
5920 if (epartIsReg(modrm)) {
5921 delta++;
5922 putMMXReg( eregOfRM(modrm), getMMXReg(gregOfRM(modrm)) );
5923 DIP("movq %s, %s\n",
5924 nameMMXReg(gregOfRM(modrm)), nameMMXReg(eregOfRM(modrm)));
5925 } else {
5926 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5927 delta += len;
5928 storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) );
5929 DIP("mov(nt)q %s, %s\n",
5930 nameMMXReg(gregOfRM(modrm)), dis_buf);
5932 break;
5934 case 0xFC:
5935 case 0xFD:
5936 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
5937 if (sz != 4)
5938 goto mmx_decode_failure;
5939 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padd", True );
5940 break;
5942 case 0xEC:
5943 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
5944 if (sz != 4)
5945 goto mmx_decode_failure;
5946 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padds", True );
5947 break;
5949 case 0xDC:
5950 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
5951 if (sz != 4)
5952 goto mmx_decode_failure;
5953 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "paddus", True );
5954 break;
5956 case 0xF8:
5957 case 0xF9:
5958 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
5959 if (sz != 4)
5960 goto mmx_decode_failure;
5961 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psub", True );
5962 break;
5964 case 0xE8:
5965 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
5966 if (sz != 4)
5967 goto mmx_decode_failure;
5968 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubs", True );
5969 break;
5971 case 0xD8:
5972 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
5973 if (sz != 4)
5974 goto mmx_decode_failure;
5975 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubus", True );
5976 break;
5978 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
5979 if (sz != 4)
5980 goto mmx_decode_failure;
5981 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmulhw", False );
5982 break;
5984 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
5985 if (sz != 4)
5986 goto mmx_decode_failure;
5987 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmullw", False );
5988 break;
5990 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
5991 vassert(sz == 4);
5992 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmaddwd", False );
5993 break;
5995 case 0x74:
5996 case 0x75:
5997 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
5998 if (sz != 4)
5999 goto mmx_decode_failure;
6000 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpeq", True );
6001 break;
6003 case 0x64:
6004 case 0x65:
6005 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
6006 if (sz != 4)
6007 goto mmx_decode_failure;
6008 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpgt", True );
6009 break;
6011 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
6012 if (sz != 4)
6013 goto mmx_decode_failure;
6014 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packssdw", False );
6015 break;
6017 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
6018 if (sz != 4)
6019 goto mmx_decode_failure;
6020 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packsswb", False );
6021 break;
6023 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
6024 if (sz != 4)
6025 goto mmx_decode_failure;
6026 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packuswb", False );
6027 break;
6029 case 0x68:
6030 case 0x69:
6031 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
6032 if (sz != 4)
6033 goto mmx_decode_failure;
6034 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckh", True );
6035 break;
6037 case 0x60:
6038 case 0x61:
6039 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
6040 if (sz != 4)
6041 goto mmx_decode_failure;
6042 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckl", True );
6043 break;
6045 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
6046 if (sz != 4)
6047 goto mmx_decode_failure;
6048 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pand", False );
6049 break;
6051 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
6052 if (sz != 4)
6053 goto mmx_decode_failure;
6054 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pandn", False );
6055 break;
6057 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
6058 if (sz != 4)
6059 goto mmx_decode_failure;
6060 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "por", False );
6061 break;
6063 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
6064 if (sz != 4)
6065 goto mmx_decode_failure;
6066 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pxor", False );
6067 break;
6069 # define SHIFT_BY_REG(_name,_op) \
6070 delta = dis_MMX_shiftG_byE(sorb, delta, _name, _op); \
6071 break;
6073 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
6074 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4);
6075 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2);
6076 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64);
6078 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
6079 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4);
6080 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2);
6081 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64);
6083 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
6084 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4);
6085 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2);
6087 # undef SHIFT_BY_REG
6089 case 0x71:
6090 case 0x72:
6091 case 0x73: {
6092 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
6093 UChar byte2, subopc;
6094 if (sz != 4)
6095 goto mmx_decode_failure;
6096 byte2 = getIByte(delta); /* amode / sub-opcode */
6097 subopc = toUChar( (byte2 >> 3) & 7 );
6099 # define SHIFT_BY_IMM(_name,_op) \
6100 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
6101 } while (0)
6103 if (subopc == 2 /*SRL*/ && opc == 0x71)
6104 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4);
6105 else if (subopc == 2 /*SRL*/ && opc == 0x72)
6106 SHIFT_BY_IMM("psrld", Iop_ShrN32x2);
6107 else if (subopc == 2 /*SRL*/ && opc == 0x73)
6108 SHIFT_BY_IMM("psrlq", Iop_Shr64);
6110 else if (subopc == 4 /*SAR*/ && opc == 0x71)
6111 SHIFT_BY_IMM("psraw", Iop_SarN16x4);
6112 else if (subopc == 4 /*SAR*/ && opc == 0x72)
6113 SHIFT_BY_IMM("psrad", Iop_SarN32x2);
6115 else if (subopc == 6 /*SHL*/ && opc == 0x71)
6116 SHIFT_BY_IMM("psllw", Iop_ShlN16x4);
6117 else if (subopc == 6 /*SHL*/ && opc == 0x72)
6118 SHIFT_BY_IMM("pslld", Iop_ShlN32x2);
6119 else if (subopc == 6 /*SHL*/ && opc == 0x73)
6120 SHIFT_BY_IMM("psllq", Iop_Shl64);
6122 else goto mmx_decode_failure;
6124 # undef SHIFT_BY_IMM
6125 break;
6128 case 0xF7: {
6129 IRTemp addr = newTemp(Ity_I32);
6130 IRTemp regD = newTemp(Ity_I64);
6131 IRTemp regM = newTemp(Ity_I64);
6132 IRTemp mask = newTemp(Ity_I64);
6133 IRTemp olddata = newTemp(Ity_I64);
6134 IRTemp newdata = newTemp(Ity_I64);
6136 modrm = getIByte(delta);
6137 if (sz != 4 || (!epartIsReg(modrm)))
6138 goto mmx_decode_failure;
6139 delta++;
6141 assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) ));
6142 assign( regM, getMMXReg( eregOfRM(modrm) ));
6143 assign( regD, getMMXReg( gregOfRM(modrm) ));
6144 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) );
6145 assign( olddata, loadLE( Ity_I64, mkexpr(addr) ));
6146 assign( newdata,
6147 binop(Iop_Or64,
6148 binop(Iop_And64,
6149 mkexpr(regD),
6150 mkexpr(mask) ),
6151 binop(Iop_And64,
6152 mkexpr(olddata),
6153 unop(Iop_Not64, mkexpr(mask)))) );
6154 storeLE( mkexpr(addr), mkexpr(newdata) );
6155 DIP("maskmovq %s,%s\n", nameMMXReg( eregOfRM(modrm) ),
6156 nameMMXReg( gregOfRM(modrm) ) );
6157 break;
6160 /* --- MMX decode failure --- */
6161 default:
6162 mmx_decode_failure:
6163 *decode_ok = False;
6164 return delta; /* ignored */
6168 *decode_ok = True;
6169 return delta;
6173 /*------------------------------------------------------------*/
6174 /*--- More misc arithmetic and other obscure insns. ---*/
6175 /*------------------------------------------------------------*/
6177 /* Double length left and right shifts. Apparently only required in
6178 v-size (no b- variant). */
6179 static
6180 UInt dis_SHLRD_Gv_Ev ( UChar sorb,
6181 Int delta, UChar modrm,
6182 Int sz,
6183 IRExpr* shift_amt,
6184 Bool amt_is_literal,
6185 const HChar* shift_amt_txt,
6186 Bool left_shift )
6188 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
6189 for printing it. And eip on entry points at the modrm byte. */
6190 Int len;
6191 HChar dis_buf[50];
6193 IRType ty = szToITy(sz);
6194 IRTemp gsrc = newTemp(ty);
6195 IRTemp esrc = newTemp(ty);
6196 IRTemp addr = IRTemp_INVALID;
6197 IRTemp tmpSH = newTemp(Ity_I8);
6198 IRTemp tmpL = IRTemp_INVALID;
6199 IRTemp tmpRes = IRTemp_INVALID;
6200 IRTemp tmpSubSh = IRTemp_INVALID;
6201 IROp mkpair;
6202 IROp getres;
6203 IROp shift;
6204 IRExpr* mask = NULL;
6206 vassert(sz == 2 || sz == 4);
6208 /* The E-part is the destination; this is shifted. The G-part
6209 supplies bits to be shifted into the E-part, but is not
6210 changed.
6212 If shifting left, form a double-length word with E at the top
6213 and G at the bottom, and shift this left. The result is then in
6214 the high part.
6216 If shifting right, form a double-length word with G at the top
6217 and E at the bottom, and shift this right. The result is then
6218 at the bottom. */
6220 /* Fetch the operands. */
6222 assign( gsrc, getIReg(sz, gregOfRM(modrm)) );
6224 if (epartIsReg(modrm)) {
6225 delta++;
6226 assign( esrc, getIReg(sz, eregOfRM(modrm)) );
6227 DIP("sh%cd%c %s, %s, %s\n",
6228 ( left_shift ? 'l' : 'r' ), nameISize(sz),
6229 shift_amt_txt,
6230 nameIReg(sz, gregOfRM(modrm)), nameIReg(sz, eregOfRM(modrm)));
6231 } else {
6232 addr = disAMode ( &len, sorb, delta, dis_buf );
6233 delta += len;
6234 assign( esrc, loadLE(ty, mkexpr(addr)) );
6235 DIP("sh%cd%c %s, %s, %s\n",
6236 ( left_shift ? 'l' : 'r' ), nameISize(sz),
6237 shift_amt_txt,
6238 nameIReg(sz, gregOfRM(modrm)), dis_buf);
6241 /* Round up the relevant primops. */
6243 if (sz == 4) {
6244 tmpL = newTemp(Ity_I64);
6245 tmpRes = newTemp(Ity_I32);
6246 tmpSubSh = newTemp(Ity_I32);
6247 mkpair = Iop_32HLto64;
6248 getres = left_shift ? Iop_64HIto32 : Iop_64to32;
6249 shift = left_shift ? Iop_Shl64 : Iop_Shr64;
6250 mask = mkU8(31);
6251 } else {
6252 /* sz == 2 */
6253 tmpL = newTemp(Ity_I32);
6254 tmpRes = newTemp(Ity_I16);
6255 tmpSubSh = newTemp(Ity_I16);
6256 mkpair = Iop_16HLto32;
6257 getres = left_shift ? Iop_32HIto16 : Iop_32to16;
6258 shift = left_shift ? Iop_Shl32 : Iop_Shr32;
6259 mask = mkU8(15);
6262 /* Do the shift, calculate the subshift value, and set
6263 the flag thunk. */
6265 assign( tmpSH, binop(Iop_And8, shift_amt, mask) );
6267 if (left_shift)
6268 assign( tmpL, binop(mkpair, mkexpr(esrc), mkexpr(gsrc)) );
6269 else
6270 assign( tmpL, binop(mkpair, mkexpr(gsrc), mkexpr(esrc)) );
6272 assign( tmpRes, unop(getres, binop(shift, mkexpr(tmpL), mkexpr(tmpSH)) ) );
6273 assign( tmpSubSh,
6274 unop(getres,
6275 binop(shift,
6276 mkexpr(tmpL),
6277 binop(Iop_And8,
6278 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ),
6279 mask))) );
6281 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl32 : Iop_Sar32,
6282 tmpRes, tmpSubSh, ty, tmpSH );
6284 /* Put result back. */
6286 if (epartIsReg(modrm)) {
6287 putIReg(sz, eregOfRM(modrm), mkexpr(tmpRes));
6288 } else {
6289 storeLE( mkexpr(addr), mkexpr(tmpRes) );
6292 if (amt_is_literal) delta++;
6293 return delta;
6297 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
6298 required. */
6300 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp;
6302 static const HChar* nameBtOp ( BtOp op )
6304 switch (op) {
6305 case BtOpNone: return "";
6306 case BtOpSet: return "s";
6307 case BtOpReset: return "r";
6308 case BtOpComp: return "c";
6309 default: vpanic("nameBtOp(x86)");
6314 static
6315 UInt dis_bt_G_E ( const VexAbiInfo* vbi,
6316 UChar sorb, Bool locked, Int sz, Int delta, BtOp op )
6318 HChar dis_buf[50];
6319 UChar modrm;
6320 Int len;
6321 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
6322 t_addr1, t_esp, t_mask, t_new;
6324 vassert(sz == 2 || sz == 4);
6326 t_fetched = t_bitno0 = t_bitno1 = t_bitno2
6327 = t_addr0 = t_addr1 = t_esp
6328 = t_mask = t_new = IRTemp_INVALID;
6330 t_fetched = newTemp(Ity_I8);
6331 t_new = newTemp(Ity_I8);
6332 t_bitno0 = newTemp(Ity_I32);
6333 t_bitno1 = newTemp(Ity_I32);
6334 t_bitno2 = newTemp(Ity_I8);
6335 t_addr1 = newTemp(Ity_I32);
6336 modrm = getIByte(delta);
6338 assign( t_bitno0, widenSto32(getIReg(sz, gregOfRM(modrm))) );
6340 if (epartIsReg(modrm)) {
6341 delta++;
6342 /* Get it onto the client's stack. */
6343 t_esp = newTemp(Ity_I32);
6344 t_addr0 = newTemp(Ity_I32);
6346 /* For the choice of the value 128, see comment in dis_bt_G_E in
6347 guest_amd64_toIR.c. We point out here only that 128 is
6348 fast-cased in Memcheck and is > 0, so seems like a good
6349 choice. */
6350 vassert(vbi->guest_stack_redzone_size == 0);
6351 assign( t_esp, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(128)) );
6352 putIReg(4, R_ESP, mkexpr(t_esp));
6354 storeLE( mkexpr(t_esp), getIReg(sz, eregOfRM(modrm)) );
6356 /* Make t_addr0 point at it. */
6357 assign( t_addr0, mkexpr(t_esp) );
6359 /* Mask out upper bits of the shift amount, since we're doing a
6360 reg. */
6361 assign( t_bitno1, binop(Iop_And32,
6362 mkexpr(t_bitno0),
6363 mkU32(sz == 4 ? 31 : 15)) );
6365 } else {
6366 t_addr0 = disAMode ( &len, sorb, delta, dis_buf );
6367 delta += len;
6368 assign( t_bitno1, mkexpr(t_bitno0) );
6371 /* At this point: t_addr0 is the address being operated on. If it
6372 was a reg, we will have pushed it onto the client's stack.
6373 t_bitno1 is the bit number, suitably masked in the case of a
6374 reg. */
6376 /* Now the main sequence. */
6377 assign( t_addr1,
6378 binop(Iop_Add32,
6379 mkexpr(t_addr0),
6380 binop(Iop_Sar32, mkexpr(t_bitno1), mkU8(3))) );
6382 /* t_addr1 now holds effective address */
6384 assign( t_bitno2,
6385 unop(Iop_32to8,
6386 binop(Iop_And32, mkexpr(t_bitno1), mkU32(7))) );
6388 /* t_bitno2 contains offset of bit within byte */
6390 if (op != BtOpNone) {
6391 t_mask = newTemp(Ity_I8);
6392 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) );
6395 /* t_mask is now a suitable byte mask */
6397 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) );
6399 if (op != BtOpNone) {
6400 switch (op) {
6401 case BtOpSet:
6402 assign( t_new,
6403 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) );
6404 break;
6405 case BtOpComp:
6406 assign( t_new,
6407 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) );
6408 break;
6409 case BtOpReset:
6410 assign( t_new,
6411 binop(Iop_And8, mkexpr(t_fetched),
6412 unop(Iop_Not8, mkexpr(t_mask))) );
6413 break;
6414 default:
6415 vpanic("dis_bt_G_E(x86)");
6417 if (locked && !epartIsReg(modrm)) {
6418 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/,
6419 mkexpr(t_new)/*new*/,
6420 guest_EIP_curr_instr );
6421 } else {
6422 storeLE( mkexpr(t_addr1), mkexpr(t_new) );
6426 /* Side effect done; now get selected bit into Carry flag */
6427 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
6428 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
6429 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
6430 stmt( IRStmt_Put(
6431 OFFB_CC_DEP1,
6432 binop(Iop_And32,
6433 binop(Iop_Shr32,
6434 unop(Iop_8Uto32, mkexpr(t_fetched)),
6435 mkexpr(t_bitno2)),
6436 mkU32(1)))
6438 /* Set NDEP even though it isn't used. This makes redundant-PUT
6439 elimination of previous stores to this field work better. */
6440 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
6442 /* Move reg operand from stack back to reg */
6443 if (epartIsReg(modrm)) {
6444 /* t_esp still points at it. */
6445 putIReg(sz, eregOfRM(modrm), loadLE(szToITy(sz), mkexpr(t_esp)) );
6446 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t_esp), mkU32(128)) );
6449 DIP("bt%s%c %s, %s\n",
6450 nameBtOp(op), nameISize(sz), nameIReg(sz, gregOfRM(modrm)),
6451 ( epartIsReg(modrm) ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ) );
6453 return delta;
6458 /* Handle BSF/BSR. Only v-size seems necessary. */
6459 static
6460 UInt dis_bs_E_G ( UChar sorb, Int sz, Int delta, Bool fwds )
6462 Bool isReg;
6463 UChar modrm;
6464 HChar dis_buf[50];
6466 IRType ty = szToITy(sz);
6467 IRTemp src = newTemp(ty);
6468 IRTemp dst = newTemp(ty);
6470 IRTemp src32 = newTemp(Ity_I32);
6471 IRTemp dst32 = newTemp(Ity_I32);
6472 IRTemp srcB = newTemp(Ity_I1);
6474 vassert(sz == 4 || sz == 2);
6476 modrm = getIByte(delta);
6478 isReg = epartIsReg(modrm);
6479 if (isReg) {
6480 delta++;
6481 assign( src, getIReg(sz, eregOfRM(modrm)) );
6482 } else {
6483 Int len;
6484 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
6485 delta += len;
6486 assign( src, loadLE(ty, mkexpr(addr)) );
6489 DIP("bs%c%c %s, %s\n",
6490 fwds ? 'f' : 'r', nameISize(sz),
6491 ( isReg ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ),
6492 nameIReg(sz, gregOfRM(modrm)));
6494 /* Generate a bool expression which is zero iff the original is
6495 zero, and nonzero otherwise. Ask for a CmpNE version which, if
6496 instrumented by Memcheck, is instrumented expensively, since
6497 this may be used on the output of a preceding movmskb insn,
6498 which has been known to be partially defined, and in need of
6499 careful handling. */
6500 assign( srcB, binop(mkSizedOp(ty,Iop_ExpCmpNE8),
6501 mkexpr(src), mkU(ty,0)) );
6503 /* Flags: Z is 1 iff source value is zero. All others
6504 are undefined -- we force them to zero. */
6505 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
6506 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
6507 stmt( IRStmt_Put(
6508 OFFB_CC_DEP1,
6509 IRExpr_ITE( mkexpr(srcB),
6510 /* src!=0 */
6511 mkU32(0),
6512 /* src==0 */
6513 mkU32(X86G_CC_MASK_Z)
6516 /* Set NDEP even though it isn't used. This makes redundant-PUT
6517 elimination of previous stores to this field work better. */
6518 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
6520 /* Result: iff source value is zero, we can't use
6521 Iop_Clz32/Iop_Ctz32 as they have no defined result in that case.
6522 But anyway, Intel x86 semantics say the result is undefined in
6523 such situations. Hence handle the zero case specially. */
6525 /* Bleh. What we compute:
6527 bsf32: if src == 0 then 0 else Ctz32(src)
6528 bsr32: if src == 0 then 0 else 31 - Clz32(src)
6530 bsf16: if src == 0 then 0 else Ctz32(16Uto32(src))
6531 bsr16: if src == 0 then 0 else 31 - Clz32(16Uto32(src))
6533 First, widen src to 32 bits if it is not already.
6535 Postscript 15 Oct 04: it seems that at least VIA Nehemiah leaves the
6536 dst register unchanged when src == 0. Hence change accordingly.
6538 if (sz == 2)
6539 assign( src32, unop(Iop_16Uto32, mkexpr(src)) );
6540 else
6541 assign( src32, mkexpr(src) );
6543 /* The main computation, guarding against zero. */
6544 assign( dst32,
6545 IRExpr_ITE(
6546 mkexpr(srcB),
6547 /* src != 0 */
6548 fwds ? unop(Iop_Ctz32, mkexpr(src32))
6549 : binop(Iop_Sub32,
6550 mkU32(31),
6551 unop(Iop_Clz32, mkexpr(src32))),
6552 /* src == 0 -- leave dst unchanged */
6553 widenUto32( getIReg( sz, gregOfRM(modrm) ) )
6557 if (sz == 2)
6558 assign( dst, unop(Iop_32to16, mkexpr(dst32)) );
6559 else
6560 assign( dst, mkexpr(dst32) );
6562 /* dump result back */
6563 putIReg( sz, gregOfRM(modrm), mkexpr(dst) );
6565 return delta;
6569 static
6570 void codegen_xchg_eAX_Reg ( Int sz, Int reg )
6572 IRType ty = szToITy(sz);
6573 IRTemp t1 = newTemp(ty);
6574 IRTemp t2 = newTemp(ty);
6575 vassert(sz == 2 || sz == 4);
6576 assign( t1, getIReg(sz, R_EAX) );
6577 assign( t2, getIReg(sz, reg) );
6578 putIReg( sz, R_EAX, mkexpr(t2) );
6579 putIReg( sz, reg, mkexpr(t1) );
6580 DIP("xchg%c %s, %s\n",
6581 nameISize(sz), nameIReg(sz, R_EAX), nameIReg(sz, reg));
6585 static
6586 void codegen_SAHF ( void )
6588 /* Set the flags to:
6589 (x86g_calculate_flags_all() & X86G_CC_MASK_O) -- retain the old O flag
6590 | (%AH & (X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
6591 |X86G_CC_MASK_P|X86G_CC_MASK_C)
6593 UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
6594 |X86G_CC_MASK_C|X86G_CC_MASK_P;
6595 IRTemp oldflags = newTemp(Ity_I32);
6596 assign( oldflags, mk_x86g_calculate_eflags_all() );
6597 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
6598 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
6599 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
6600 stmt( IRStmt_Put( OFFB_CC_DEP1,
6601 binop(Iop_Or32,
6602 binop(Iop_And32, mkexpr(oldflags), mkU32(X86G_CC_MASK_O)),
6603 binop(Iop_And32,
6604 binop(Iop_Shr32, getIReg(4, R_EAX), mkU8(8)),
6605 mkU32(mask_SZACP))
6608 /* Set NDEP even though it isn't used. This makes redundant-PUT
6609 elimination of previous stores to this field work better. */
6610 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
6614 static
6615 void codegen_LAHF ( void )
6617 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
6618 IRExpr* eax_with_hole;
6619 IRExpr* new_byte;
6620 IRExpr* new_eax;
6621 UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
6622 |X86G_CC_MASK_C|X86G_CC_MASK_P;
6624 IRTemp flags = newTemp(Ity_I32);
6625 assign( flags, mk_x86g_calculate_eflags_all() );
6627 eax_with_hole
6628 = binop(Iop_And32, getIReg(4, R_EAX), mkU32(0xFFFF00FF));
6629 new_byte
6630 = binop(Iop_Or32, binop(Iop_And32, mkexpr(flags), mkU32(mask_SZACP)),
6631 mkU32(1<<1));
6632 new_eax
6633 = binop(Iop_Or32, eax_with_hole,
6634 binop(Iop_Shl32, new_byte, mkU8(8)));
6635 putIReg(4, R_EAX, new_eax);
6639 static
6640 UInt dis_cmpxchg_G_E ( UChar sorb,
6641 Bool locked,
6642 Int size,
6643 Int delta0 )
6645 HChar dis_buf[50];
6646 Int len;
6648 IRType ty = szToITy(size);
6649 IRTemp acc = newTemp(ty);
6650 IRTemp src = newTemp(ty);
6651 IRTemp dest = newTemp(ty);
6652 IRTemp dest2 = newTemp(ty);
6653 IRTemp acc2 = newTemp(ty);
6654 IRTemp cond = newTemp(Ity_I1);
6655 IRTemp addr = IRTemp_INVALID;
6656 UChar rm = getUChar(delta0);
6658 /* There are 3 cases to consider:
6660 reg-reg: ignore any lock prefix, generate sequence based
6661 on ITE
6663 reg-mem, not locked: ignore any lock prefix, generate sequence
6664 based on ITE
6666 reg-mem, locked: use IRCAS
6668 if (epartIsReg(rm)) {
6669 /* case 1 */
6670 assign( dest, getIReg(size, eregOfRM(rm)) );
6671 delta0++;
6672 assign( src, getIReg(size, gregOfRM(rm)) );
6673 assign( acc, getIReg(size, R_EAX) );
6674 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
6675 assign( cond, mk_x86g_calculate_condition(X86CondZ) );
6676 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) );
6677 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
6678 putIReg(size, R_EAX, mkexpr(acc2));
6679 putIReg(size, eregOfRM(rm), mkexpr(dest2));
6680 DIP("cmpxchg%c %s,%s\n", nameISize(size),
6681 nameIReg(size,gregOfRM(rm)),
6682 nameIReg(size,eregOfRM(rm)) );
6684 else if (!epartIsReg(rm) && !locked) {
6685 /* case 2 */
6686 addr = disAMode ( &len, sorb, delta0, dis_buf );
6687 assign( dest, loadLE(ty, mkexpr(addr)) );
6688 delta0 += len;
6689 assign( src, getIReg(size, gregOfRM(rm)) );
6690 assign( acc, getIReg(size, R_EAX) );
6691 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
6692 assign( cond, mk_x86g_calculate_condition(X86CondZ) );
6693 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) );
6694 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
6695 putIReg(size, R_EAX, mkexpr(acc2));
6696 storeLE( mkexpr(addr), mkexpr(dest2) );
6697 DIP("cmpxchg%c %s,%s\n", nameISize(size),
6698 nameIReg(size,gregOfRM(rm)), dis_buf);
6700 else if (!epartIsReg(rm) && locked) {
6701 /* case 3 */
6702 /* src is new value. acc is expected value. dest is old value.
6703 Compute success from the output of the IRCAS, and steer the
6704 new value for EAX accordingly: in case of success, EAX is
6705 unchanged. */
6706 addr = disAMode ( &len, sorb, delta0, dis_buf );
6707 delta0 += len;
6708 assign( src, getIReg(size, gregOfRM(rm)) );
6709 assign( acc, getIReg(size, R_EAX) );
6710 stmt( IRStmt_CAS(
6711 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr),
6712 NULL, mkexpr(acc), NULL, mkexpr(src) )
6714 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
6715 assign( cond, mk_x86g_calculate_condition(X86CondZ) );
6716 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
6717 putIReg(size, R_EAX, mkexpr(acc2));
6718 DIP("cmpxchg%c %s,%s\n", nameISize(size),
6719 nameIReg(size,gregOfRM(rm)), dis_buf);
6721 else vassert(0);
6723 return delta0;
6727 /* Handle conditional move instructions of the form
6728 cmovcc E(reg-or-mem), G(reg)
6730 E(src) is reg-or-mem
6731 G(dst) is reg.
6733 If E is reg, --> GET %E, tmps
6734 GET %G, tmpd
6735 CMOVcc tmps, tmpd
6736 PUT tmpd, %G
6738 If E is mem --> (getAddr E) -> tmpa
6739 LD (tmpa), tmps
6740 GET %G, tmpd
6741 CMOVcc tmps, tmpd
6742 PUT tmpd, %G
6744 static
6745 UInt dis_cmov_E_G ( UChar sorb,
6746 Int sz,
6747 X86Condcode cond,
6748 Int delta0 )
6750 UChar rm = getIByte(delta0);
6751 HChar dis_buf[50];
6752 Int len;
6754 IRType ty = szToITy(sz);
6755 IRTemp tmps = newTemp(ty);
6756 IRTemp tmpd = newTemp(ty);
6758 if (epartIsReg(rm)) {
6759 assign( tmps, getIReg(sz, eregOfRM(rm)) );
6760 assign( tmpd, getIReg(sz, gregOfRM(rm)) );
6762 putIReg(sz, gregOfRM(rm),
6763 IRExpr_ITE( mk_x86g_calculate_condition(cond),
6764 mkexpr(tmps),
6765 mkexpr(tmpd) )
6767 DIP("cmov%c%s %s,%s\n", nameISize(sz),
6768 name_X86Condcode(cond),
6769 nameIReg(sz,eregOfRM(rm)),
6770 nameIReg(sz,gregOfRM(rm)));
6771 return 1+delta0;
6774 /* E refers to memory */
6776 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
6777 assign( tmps, loadLE(ty, mkexpr(addr)) );
6778 assign( tmpd, getIReg(sz, gregOfRM(rm)) );
6780 putIReg(sz, gregOfRM(rm),
6781 IRExpr_ITE( mk_x86g_calculate_condition(cond),
6782 mkexpr(tmps),
6783 mkexpr(tmpd) )
6786 DIP("cmov%c%s %s,%s\n", nameISize(sz),
6787 name_X86Condcode(cond),
6788 dis_buf,
6789 nameIReg(sz,gregOfRM(rm)));
6790 return len+delta0;
6795 static
6796 UInt dis_xadd_G_E ( UChar sorb, Bool locked, Int sz, Int delta0,
6797 Bool* decodeOK )
6799 Int len;
6800 UChar rm = getIByte(delta0);
6801 HChar dis_buf[50];
6803 IRType ty = szToITy(sz);
6804 IRTemp tmpd = newTemp(ty);
6805 IRTemp tmpt0 = newTemp(ty);
6806 IRTemp tmpt1 = newTemp(ty);
6808 /* There are 3 cases to consider:
6810 reg-reg: ignore any lock prefix,
6811 generate 'naive' (non-atomic) sequence
6813 reg-mem, not locked: ignore any lock prefix, generate 'naive'
6814 (non-atomic) sequence
6816 reg-mem, locked: use IRCAS
6819 if (epartIsReg(rm)) {
6820 /* case 1 */
6821 assign( tmpd, getIReg(sz, eregOfRM(rm)));
6822 assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
6823 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
6824 mkexpr(tmpd), mkexpr(tmpt0)) );
6825 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
6826 putIReg(sz, eregOfRM(rm), mkexpr(tmpt1));
6827 putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
6828 DIP("xadd%c %s, %s\n",
6829 nameISize(sz), nameIReg(sz,gregOfRM(rm)),
6830 nameIReg(sz,eregOfRM(rm)));
6831 *decodeOK = True;
6832 return 1+delta0;
6834 else if (!epartIsReg(rm) && !locked) {
6835 /* case 2 */
6836 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
6837 assign( tmpd, loadLE(ty, mkexpr(addr)) );
6838 assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
6839 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
6840 mkexpr(tmpd), mkexpr(tmpt0)) );
6841 storeLE( mkexpr(addr), mkexpr(tmpt1) );
6842 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
6843 putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
6844 DIP("xadd%c %s, %s\n",
6845 nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf);
6846 *decodeOK = True;
6847 return len+delta0;
6849 else if (!epartIsReg(rm) && locked) {
6850 /* case 3 */
6851 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
6852 assign( tmpd, loadLE(ty, mkexpr(addr)) );
6853 assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
6854 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
6855 mkexpr(tmpd), mkexpr(tmpt0)) );
6856 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/,
6857 mkexpr(tmpt1)/*newVal*/, guest_EIP_curr_instr );
6858 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
6859 putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
6860 DIP("xadd%c %s, %s\n",
6861 nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf);
6862 *decodeOK = True;
6863 return len+delta0;
6865 /*UNREACHED*/
6866 vassert(0);
6869 /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
6871 static
6872 UInt dis_mov_Ew_Sw ( UChar sorb, Int delta0 )
6874 Int len;
6875 IRTemp addr;
6876 UChar rm = getIByte(delta0);
6877 HChar dis_buf[50];
6879 if (epartIsReg(rm)) {
6880 putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) );
6881 DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm)));
6882 return 1+delta0;
6883 } else {
6884 addr = disAMode ( &len, sorb, delta0, dis_buf );
6885 putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) );
6886 DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm)));
6887 return len+delta0;
6891 /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
6892 dst is ireg and sz==4, zero out top half of it. */
6894 static
6895 UInt dis_mov_Sw_Ew ( UChar sorb,
6896 Int sz,
6897 Int delta0 )
6899 Int len;
6900 IRTemp addr;
6901 UChar rm = getIByte(delta0);
6902 HChar dis_buf[50];
6904 vassert(sz == 2 || sz == 4);
6906 if (epartIsReg(rm)) {
6907 if (sz == 4)
6908 putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm))));
6909 else
6910 putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm)));
6912 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
6913 return 1+delta0;
6914 } else {
6915 addr = disAMode ( &len, sorb, delta0, dis_buf );
6916 storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) );
6917 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf);
6918 return len+delta0;
6923 static
6924 void dis_push_segreg ( UInt sreg, Int sz )
6926 IRTemp t1 = newTemp(Ity_I16);
6927 IRTemp ta = newTemp(Ity_I32);
6928 vassert(sz == 2 || sz == 4);
6930 assign( t1, getSReg(sreg) );
6931 assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
6932 putIReg(4, R_ESP, mkexpr(ta));
6933 storeLE( mkexpr(ta), mkexpr(t1) );
6935 DIP("push%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg));
6938 static
6939 void dis_pop_segreg ( UInt sreg, Int sz )
6941 IRTemp t1 = newTemp(Ity_I16);
6942 IRTemp ta = newTemp(Ity_I32);
6943 vassert(sz == 2 || sz == 4);
6945 assign( ta, getIReg(4, R_ESP) );
6946 assign( t1, loadLE(Ity_I16, mkexpr(ta)) );
6948 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) );
6949 putSReg( sreg, mkexpr(t1) );
6950 DIP("pop%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg));
6953 static
6954 void dis_ret ( /*MOD*/DisResult* dres, UInt d32 )
6956 IRTemp t1 = newTemp(Ity_I32);
6957 IRTemp t2 = newTemp(Ity_I32);
6958 assign(t1, getIReg(4,R_ESP));
6959 assign(t2, loadLE(Ity_I32,mkexpr(t1)));
6960 putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(4+d32)));
6961 jmp_treg(dres, Ijk_Ret, t2);
6962 vassert(dres->whatNext == Dis_StopHere);
6965 /*------------------------------------------------------------*/
6966 /*--- SSE/SSE2/SSE3 helpers ---*/
6967 /*------------------------------------------------------------*/
6969 /* Indicates whether the op requires a rounding-mode argument. Note
6970 that this covers only vector floating point arithmetic ops, and
6971 omits the scalar ones that need rounding modes. Note also that
6972 inconsistencies here will get picked up later by the IR sanity
6973 checker, so this isn't correctness-critical. */
6974 static Bool requiresRMode ( IROp op )
6976 switch (op) {
6977 /* 128 bit ops */
6978 case Iop_Add32Fx4: case Iop_Sub32Fx4:
6979 case Iop_Mul32Fx4: case Iop_Div32Fx4:
6980 case Iop_Add64Fx2: case Iop_Sub64Fx2:
6981 case Iop_Mul64Fx2: case Iop_Div64Fx2:
6982 return True;
6983 default:
6984 break;
6986 return False;
6990 /* Worker function; do not call directly.
6991 Handles full width G = G `op` E and G = (not G) `op` E.
6994 static UInt dis_SSE_E_to_G_all_wrk (
6995 UChar sorb, Int delta,
6996 const HChar* opname, IROp op,
6997 Bool invertG
7000 HChar dis_buf[50];
7001 Int alen;
7002 IRTemp addr;
7003 UChar rm = getIByte(delta);
7004 IRExpr* gpart
7005 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRM(rm)))
7006 : getXMMReg(gregOfRM(rm));
7007 if (epartIsReg(rm)) {
7008 putXMMReg(
7009 gregOfRM(rm),
7010 requiresRMode(op)
7011 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
7012 gpart,
7013 getXMMReg(eregOfRM(rm)))
7014 : binop(op, gpart,
7015 getXMMReg(eregOfRM(rm)))
7017 DIP("%s %s,%s\n", opname,
7018 nameXMMReg(eregOfRM(rm)),
7019 nameXMMReg(gregOfRM(rm)) );
7020 return delta+1;
7021 } else {
7022 addr = disAMode ( &alen, sorb, delta, dis_buf );
7023 putXMMReg(
7024 gregOfRM(rm),
7025 requiresRMode(op)
7026 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
7027 gpart,
7028 loadLE(Ity_V128, mkexpr(addr)))
7029 : binop(op, gpart,
7030 loadLE(Ity_V128, mkexpr(addr)))
7032 DIP("%s %s,%s\n", opname,
7033 dis_buf,
7034 nameXMMReg(gregOfRM(rm)) );
7035 return delta+alen;
7040 /* All lanes SSE binary operation, G = G `op` E. */
7042 static
7043 UInt dis_SSE_E_to_G_all ( UChar sorb, Int delta, const HChar* opname, IROp op )
7045 return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, False );
7048 /* All lanes SSE binary operation, G = (not G) `op` E. */
7050 static
7051 UInt dis_SSE_E_to_G_all_invG ( UChar sorb, Int delta,
7052 const HChar* opname, IROp op )
7054 return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, True );
7058 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
7060 static UInt dis_SSE_E_to_G_lo32 ( UChar sorb, Int delta,
7061 const HChar* opname, IROp op )
7063 HChar dis_buf[50];
7064 Int alen;
7065 IRTemp addr;
7066 UChar rm = getIByte(delta);
7067 IRExpr* gpart = getXMMReg(gregOfRM(rm));
7068 if (epartIsReg(rm)) {
7069 putXMMReg( gregOfRM(rm),
7070 binop(op, gpart,
7071 getXMMReg(eregOfRM(rm))) );
7072 DIP("%s %s,%s\n", opname,
7073 nameXMMReg(eregOfRM(rm)),
7074 nameXMMReg(gregOfRM(rm)) );
7075 return delta+1;
7076 } else {
7077 /* We can only do a 32-bit memory read, so the upper 3/4 of the
7078 E operand needs to be made simply of zeroes. */
7079 IRTemp epart = newTemp(Ity_V128);
7080 addr = disAMode ( &alen, sorb, delta, dis_buf );
7081 assign( epart, unop( Iop_32UtoV128,
7082 loadLE(Ity_I32, mkexpr(addr))) );
7083 putXMMReg( gregOfRM(rm),
7084 binop(op, gpart, mkexpr(epart)) );
7085 DIP("%s %s,%s\n", opname,
7086 dis_buf,
7087 nameXMMReg(gregOfRM(rm)) );
7088 return delta+alen;
7093 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
7095 static UInt dis_SSE_E_to_G_lo64 ( UChar sorb, Int delta,
7096 const HChar* opname, IROp op )
7098 HChar dis_buf[50];
7099 Int alen;
7100 IRTemp addr;
7101 UChar rm = getIByte(delta);
7102 IRExpr* gpart = getXMMReg(gregOfRM(rm));
7103 if (epartIsReg(rm)) {
7104 putXMMReg( gregOfRM(rm),
7105 binop(op, gpart,
7106 getXMMReg(eregOfRM(rm))) );
7107 DIP("%s %s,%s\n", opname,
7108 nameXMMReg(eregOfRM(rm)),
7109 nameXMMReg(gregOfRM(rm)) );
7110 return delta+1;
7111 } else {
7112 /* We can only do a 64-bit memory read, so the upper half of the
7113 E operand needs to be made simply of zeroes. */
7114 IRTemp epart = newTemp(Ity_V128);
7115 addr = disAMode ( &alen, sorb, delta, dis_buf );
7116 assign( epart, unop( Iop_64UtoV128,
7117 loadLE(Ity_I64, mkexpr(addr))) );
7118 putXMMReg( gregOfRM(rm),
7119 binop(op, gpart, mkexpr(epart)) );
7120 DIP("%s %s,%s\n", opname,
7121 dis_buf,
7122 nameXMMReg(gregOfRM(rm)) );
7123 return delta+alen;
7128 /* All lanes unary SSE operation, G = op(E). */
7130 static UInt dis_SSE_E_to_G_unary_all (
7131 UChar sorb, Int delta,
7132 const HChar* opname, IROp op
7135 HChar dis_buf[50];
7136 Int alen;
7137 IRTemp addr;
7138 UChar rm = getIByte(delta);
7139 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
7140 // up in the usual way.
7141 Bool needsIRRM = op == Iop_Sqrt32Fx4 || op == Iop_Sqrt64Fx2;
7142 if (epartIsReg(rm)) {
7143 IRExpr* src = getXMMReg(eregOfRM(rm));
7144 /* XXXROUNDINGFIXME */
7145 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src)
7146 : unop(op, src);
7147 putXMMReg( gregOfRM(rm), res );
7148 DIP("%s %s,%s\n", opname,
7149 nameXMMReg(eregOfRM(rm)),
7150 nameXMMReg(gregOfRM(rm)) );
7151 return delta+1;
7152 } else {
7153 addr = disAMode ( &alen, sorb, delta, dis_buf );
7154 IRExpr* src = loadLE(Ity_V128, mkexpr(addr));
7155 /* XXXROUNDINGFIXME */
7156 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src)
7157 : unop(op, src);
7158 putXMMReg( gregOfRM(rm), res );
7159 DIP("%s %s,%s\n", opname,
7160 dis_buf,
7161 nameXMMReg(gregOfRM(rm)) );
7162 return delta+alen;
7167 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */
7169 static UInt dis_SSE_E_to_G_unary_lo32 (
7170 UChar sorb, Int delta,
7171 const HChar* opname, IROp op
7174 /* First we need to get the old G value and patch the low 32 bits
7175 of the E operand into it. Then apply op and write back to G. */
7176 HChar dis_buf[50];
7177 Int alen;
7178 IRTemp addr;
7179 UChar rm = getIByte(delta);
7180 IRTemp oldG0 = newTemp(Ity_V128);
7181 IRTemp oldG1 = newTemp(Ity_V128);
7183 assign( oldG0, getXMMReg(gregOfRM(rm)) );
7185 if (epartIsReg(rm)) {
7186 assign( oldG1,
7187 binop( Iop_SetV128lo32,
7188 mkexpr(oldG0),
7189 getXMMRegLane32(eregOfRM(rm), 0)) );
7190 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
7191 DIP("%s %s,%s\n", opname,
7192 nameXMMReg(eregOfRM(rm)),
7193 nameXMMReg(gregOfRM(rm)) );
7194 return delta+1;
7195 } else {
7196 addr = disAMode ( &alen, sorb, delta, dis_buf );
7197 assign( oldG1,
7198 binop( Iop_SetV128lo32,
7199 mkexpr(oldG0),
7200 loadLE(Ity_I32, mkexpr(addr)) ));
7201 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
7202 DIP("%s %s,%s\n", opname,
7203 dis_buf,
7204 nameXMMReg(gregOfRM(rm)) );
7205 return delta+alen;
7210 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */
7212 static UInt dis_SSE_E_to_G_unary_lo64 (
7213 UChar sorb, Int delta,
7214 const HChar* opname, IROp op
7217 /* First we need to get the old G value and patch the low 64 bits
7218 of the E operand into it. Then apply op and write back to G. */
7219 HChar dis_buf[50];
7220 Int alen;
7221 IRTemp addr;
7222 UChar rm = getIByte(delta);
7223 IRTemp oldG0 = newTemp(Ity_V128);
7224 IRTemp oldG1 = newTemp(Ity_V128);
7226 assign( oldG0, getXMMReg(gregOfRM(rm)) );
7228 if (epartIsReg(rm)) {
7229 assign( oldG1,
7230 binop( Iop_SetV128lo64,
7231 mkexpr(oldG0),
7232 getXMMRegLane64(eregOfRM(rm), 0)) );
7233 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
7234 DIP("%s %s,%s\n", opname,
7235 nameXMMReg(eregOfRM(rm)),
7236 nameXMMReg(gregOfRM(rm)) );
7237 return delta+1;
7238 } else {
7239 addr = disAMode ( &alen, sorb, delta, dis_buf );
7240 assign( oldG1,
7241 binop( Iop_SetV128lo64,
7242 mkexpr(oldG0),
7243 loadLE(Ity_I64, mkexpr(addr)) ));
7244 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
7245 DIP("%s %s,%s\n", opname,
7246 dis_buf,
7247 nameXMMReg(gregOfRM(rm)) );
7248 return delta+alen;
7253 /* SSE integer binary operation:
7254 G = G `op` E (eLeft == False)
7255 G = E `op` G (eLeft == True)
7257 static UInt dis_SSEint_E_to_G(
7258 UChar sorb, Int delta,
7259 const HChar* opname, IROp op,
7260 Bool eLeft
7263 HChar dis_buf[50];
7264 Int alen;
7265 IRTemp addr;
7266 UChar rm = getIByte(delta);
7267 IRExpr* gpart = getXMMReg(gregOfRM(rm));
7268 IRExpr* epart = NULL;
7269 if (epartIsReg(rm)) {
7270 epart = getXMMReg(eregOfRM(rm));
7271 DIP("%s %s,%s\n", opname,
7272 nameXMMReg(eregOfRM(rm)),
7273 nameXMMReg(gregOfRM(rm)) );
7274 delta += 1;
7275 } else {
7276 addr = disAMode ( &alen, sorb, delta, dis_buf );
7277 epart = loadLE(Ity_V128, mkexpr(addr));
7278 DIP("%s %s,%s\n", opname,
7279 dis_buf,
7280 nameXMMReg(gregOfRM(rm)) );
7281 delta += alen;
7283 putXMMReg( gregOfRM(rm),
7284 eLeft ? binop(op, epart, gpart)
7285 : binop(op, gpart, epart) );
7286 return delta;
7290 /* Helper for doing SSE FP comparisons. */
7292 static void findSSECmpOp ( Bool* needNot, IROp* op,
7293 Int imm8, Bool all_lanes, Int sz )
7295 imm8 &= 7;
7296 *needNot = False;
7297 *op = Iop_INVALID;
7298 if (imm8 >= 4) {
7299 *needNot = True;
7300 imm8 -= 4;
7303 if (sz == 4 && all_lanes) {
7304 switch (imm8) {
7305 case 0: *op = Iop_CmpEQ32Fx4; return;
7306 case 1: *op = Iop_CmpLT32Fx4; return;
7307 case 2: *op = Iop_CmpLE32Fx4; return;
7308 case 3: *op = Iop_CmpUN32Fx4; return;
7309 default: break;
7312 if (sz == 4 && !all_lanes) {
7313 switch (imm8) {
7314 case 0: *op = Iop_CmpEQ32F0x4; return;
7315 case 1: *op = Iop_CmpLT32F0x4; return;
7316 case 2: *op = Iop_CmpLE32F0x4; return;
7317 case 3: *op = Iop_CmpUN32F0x4; return;
7318 default: break;
7321 if (sz == 8 && all_lanes) {
7322 switch (imm8) {
7323 case 0: *op = Iop_CmpEQ64Fx2; return;
7324 case 1: *op = Iop_CmpLT64Fx2; return;
7325 case 2: *op = Iop_CmpLE64Fx2; return;
7326 case 3: *op = Iop_CmpUN64Fx2; return;
7327 default: break;
7330 if (sz == 8 && !all_lanes) {
7331 switch (imm8) {
7332 case 0: *op = Iop_CmpEQ64F0x2; return;
7333 case 1: *op = Iop_CmpLT64F0x2; return;
7334 case 2: *op = Iop_CmpLE64F0x2; return;
7335 case 3: *op = Iop_CmpUN64F0x2; return;
7336 default: break;
7339 vpanic("findSSECmpOp(x86,guest)");
7342 /* Handles SSE 32F/64F comparisons. */
7344 static UInt dis_SSEcmp_E_to_G ( UChar sorb, Int delta,
7345 const HChar* opname, Bool all_lanes, Int sz )
7347 HChar dis_buf[50];
7348 Int alen, imm8;
7349 IRTemp addr;
7350 Bool needNot = False;
7351 IROp op = Iop_INVALID;
7352 IRTemp plain = newTemp(Ity_V128);
7353 UChar rm = getIByte(delta);
7354 UShort mask = 0;
7355 vassert(sz == 4 || sz == 8);
7356 if (epartIsReg(rm)) {
7357 imm8 = getIByte(delta+1);
7358 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz);
7359 assign( plain, binop(op, getXMMReg(gregOfRM(rm)),
7360 getXMMReg(eregOfRM(rm))) );
7361 delta += 2;
7362 DIP("%s $%d,%s,%s\n", opname,
7363 imm8,
7364 nameXMMReg(eregOfRM(rm)),
7365 nameXMMReg(gregOfRM(rm)) );
7366 } else {
7367 addr = disAMode ( &alen, sorb, delta, dis_buf );
7368 imm8 = getIByte(delta+alen);
7369 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz);
7370 assign( plain,
7371 binop(
7373 getXMMReg(gregOfRM(rm)),
7374 all_lanes ? loadLE(Ity_V128, mkexpr(addr))
7375 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
7376 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr)))
7379 delta += alen+1;
7380 DIP("%s $%d,%s,%s\n", opname,
7381 imm8,
7382 dis_buf,
7383 nameXMMReg(gregOfRM(rm)) );
7386 if (needNot && all_lanes) {
7387 putXMMReg( gregOfRM(rm),
7388 unop(Iop_NotV128, mkexpr(plain)) );
7390 else
7391 if (needNot && !all_lanes) {
7392 mask = toUShort( sz==4 ? 0x000F : 0x00FF );
7393 putXMMReg( gregOfRM(rm),
7394 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) );
7396 else {
7397 putXMMReg( gregOfRM(rm), mkexpr(plain) );
7400 return delta;
7404 /* Vector by scalar shift of G by the amount specified at the bottom
7405 of E. */
7407 static UInt dis_SSE_shiftG_byE ( UChar sorb, Int delta,
7408 const HChar* opname, IROp op )
7410 HChar dis_buf[50];
7411 Int alen, size;
7412 IRTemp addr;
7413 Bool shl, shr, sar;
7414 UChar rm = getIByte(delta);
7415 IRTemp g0 = newTemp(Ity_V128);
7416 IRTemp g1 = newTemp(Ity_V128);
7417 IRTemp amt = newTemp(Ity_I32);
7418 IRTemp amt8 = newTemp(Ity_I8);
7419 if (epartIsReg(rm)) {
7420 assign( amt, getXMMRegLane32(eregOfRM(rm), 0) );
7421 DIP("%s %s,%s\n", opname,
7422 nameXMMReg(eregOfRM(rm)),
7423 nameXMMReg(gregOfRM(rm)) );
7424 delta++;
7425 } else {
7426 addr = disAMode ( &alen, sorb, delta, dis_buf );
7427 assign( amt, loadLE(Ity_I32, mkexpr(addr)) );
7428 DIP("%s %s,%s\n", opname,
7429 dis_buf,
7430 nameXMMReg(gregOfRM(rm)) );
7431 delta += alen;
7433 assign( g0, getXMMReg(gregOfRM(rm)) );
7434 assign( amt8, unop(Iop_32to8, mkexpr(amt)) );
7436 shl = shr = sar = False;
7437 size = 0;
7438 switch (op) {
7439 case Iop_ShlN16x8: shl = True; size = 32; break;
7440 case Iop_ShlN32x4: shl = True; size = 32; break;
7441 case Iop_ShlN64x2: shl = True; size = 64; break;
7442 case Iop_SarN16x8: sar = True; size = 16; break;
7443 case Iop_SarN32x4: sar = True; size = 32; break;
7444 case Iop_ShrN16x8: shr = True; size = 16; break;
7445 case Iop_ShrN32x4: shr = True; size = 32; break;
7446 case Iop_ShrN64x2: shr = True; size = 64; break;
7447 default: vassert(0);
7450 if (shl || shr) {
7451 assign(
7453 IRExpr_ITE(
7454 binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size)),
7455 binop(op, mkexpr(g0), mkexpr(amt8)),
7456 mkV128(0x0000)
7459 } else
7460 if (sar) {
7461 assign(
7463 IRExpr_ITE(
7464 binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size)),
7465 binop(op, mkexpr(g0), mkexpr(amt8)),
7466 binop(op, mkexpr(g0), mkU8(size-1))
7469 } else {
7470 /*NOTREACHED*/
7471 vassert(0);
7474 putXMMReg( gregOfRM(rm), mkexpr(g1) );
7475 return delta;
7479 /* Vector by scalar shift of E by an immediate byte. */
7481 static
7482 UInt dis_SSE_shiftE_imm ( Int delta, const HChar* opname, IROp op )
7484 Bool shl, shr, sar;
7485 UChar rm = getIByte(delta);
7486 IRTemp e0 = newTemp(Ity_V128);
7487 IRTemp e1 = newTemp(Ity_V128);
7488 UChar amt, size;
7489 vassert(epartIsReg(rm));
7490 vassert(gregOfRM(rm) == 2
7491 || gregOfRM(rm) == 4 || gregOfRM(rm) == 6);
7492 amt = getIByte(delta+1);
7493 delta += 2;
7494 DIP("%s $%d,%s\n", opname,
7495 (Int)amt,
7496 nameXMMReg(eregOfRM(rm)) );
7497 assign( e0, getXMMReg(eregOfRM(rm)) );
7499 shl = shr = sar = False;
7500 size = 0;
7501 switch (op) {
7502 case Iop_ShlN16x8: shl = True; size = 16; break;
7503 case Iop_ShlN32x4: shl = True; size = 32; break;
7504 case Iop_ShlN64x2: shl = True; size = 64; break;
7505 case Iop_SarN16x8: sar = True; size = 16; break;
7506 case Iop_SarN32x4: sar = True; size = 32; break;
7507 case Iop_ShrN16x8: shr = True; size = 16; break;
7508 case Iop_ShrN32x4: shr = True; size = 32; break;
7509 case Iop_ShrN64x2: shr = True; size = 64; break;
7510 default: vassert(0);
7513 if (shl || shr) {
7514 assign( e1, amt >= size
7515 ? mkV128(0x0000)
7516 : binop(op, mkexpr(e0), mkU8(amt))
7518 } else
7519 if (sar) {
7520 assign( e1, amt >= size
7521 ? binop(op, mkexpr(e0), mkU8(size-1))
7522 : binop(op, mkexpr(e0), mkU8(amt))
7524 } else {
7525 /*NOTREACHED*/
7526 vassert(0);
7529 putXMMReg( eregOfRM(rm), mkexpr(e1) );
7530 return delta;
7534 /* Get the current SSE rounding mode. */
7536 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void )
7538 return binop( Iop_And32,
7539 IRExpr_Get( OFFB_SSEROUND, Ity_I32 ),
7540 mkU32(3) );
7543 static void put_sse_roundingmode ( IRExpr* sseround )
7545 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32);
7546 stmt( IRStmt_Put( OFFB_SSEROUND, sseround ) );
7549 /* Break a 128-bit value up into four 32-bit ints. */
7551 static void breakup128to32s ( IRTemp t128,
7552 /*OUTs*/
7553 IRTemp* t3, IRTemp* t2,
7554 IRTemp* t1, IRTemp* t0 )
7556 IRTemp hi64 = newTemp(Ity_I64);
7557 IRTemp lo64 = newTemp(Ity_I64);
7558 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
7559 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) );
7561 vassert(t0 && *t0 == IRTemp_INVALID);
7562 vassert(t1 && *t1 == IRTemp_INVALID);
7563 vassert(t2 && *t2 == IRTemp_INVALID);
7564 vassert(t3 && *t3 == IRTemp_INVALID);
7566 *t0 = newTemp(Ity_I32);
7567 *t1 = newTemp(Ity_I32);
7568 *t2 = newTemp(Ity_I32);
7569 *t3 = newTemp(Ity_I32);
7570 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) );
7571 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
7572 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) );
7573 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
7576 /* Construct a 128-bit value from four 32-bit ints. */
7578 static IRExpr* mk128from32s ( IRTemp t3, IRTemp t2,
7579 IRTemp t1, IRTemp t0 )
7581 return
7582 binop( Iop_64HLtoV128,
7583 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
7584 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0))
7588 /* Break a 64-bit value up into four 16-bit ints. */
7590 static void breakup64to16s ( IRTemp t64,
7591 /*OUTs*/
7592 IRTemp* t3, IRTemp* t2,
7593 IRTemp* t1, IRTemp* t0 )
7595 IRTemp hi32 = newTemp(Ity_I32);
7596 IRTemp lo32 = newTemp(Ity_I32);
7597 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) );
7598 assign( lo32, unop(Iop_64to32, mkexpr(t64)) );
7600 vassert(t0 && *t0 == IRTemp_INVALID);
7601 vassert(t1 && *t1 == IRTemp_INVALID);
7602 vassert(t2 && *t2 == IRTemp_INVALID);
7603 vassert(t3 && *t3 == IRTemp_INVALID);
7605 *t0 = newTemp(Ity_I16);
7606 *t1 = newTemp(Ity_I16);
7607 *t2 = newTemp(Ity_I16);
7608 *t3 = newTemp(Ity_I16);
7609 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) );
7610 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) );
7611 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) );
7612 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) );
7615 /* Construct a 64-bit value from four 16-bit ints. */
7617 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2,
7618 IRTemp t1, IRTemp t0 )
7620 return
7621 binop( Iop_32HLto64,
7622 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)),
7623 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0))
7627 /* Generate IR to set the guest %EFLAGS from the pushfl-format image
7628 in the given 32-bit temporary. The flags that are set are: O S Z A
7629 C P D ID AC.
7631 In all cases, code to set AC is generated. However, VEX actually
7632 ignores the AC value and so can optionally emit an emulation
7633 warning when it is enabled. In this routine, an emulation warning
7634 is only emitted if emit_AC_emwarn is True, in which case
7635 next_insn_EIP must be correct (this allows for correct code
7636 generation for popfl/popfw). If emit_AC_emwarn is False,
7637 next_insn_EIP is unimportant (this allows for easy if kludgey code
7638 generation for IRET.) */
7640 static
7641 void set_EFLAGS_from_value ( IRTemp t1,
7642 Bool emit_AC_emwarn,
7643 Addr32 next_insn_EIP )
7645 vassert(typeOfIRTemp(irsb->tyenv,t1) == Ity_I32);
7647 /* t1 is the flag word. Mask out everything except OSZACP and set
7648 the flags thunk to X86G_CC_OP_COPY. */
7649 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
7650 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
7651 stmt( IRStmt_Put( OFFB_CC_DEP1,
7652 binop(Iop_And32,
7653 mkexpr(t1),
7654 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P
7655 | X86G_CC_MASK_A | X86G_CC_MASK_Z
7656 | X86G_CC_MASK_S| X86G_CC_MASK_O )
7660 /* Set NDEP even though it isn't used. This makes redundant-PUT
7661 elimination of previous stores to this field work better. */
7662 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
7664 /* Also need to set the D flag, which is held in bit 10 of t1.
7665 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
7666 stmt( IRStmt_Put(
7667 OFFB_DFLAG,
7668 IRExpr_ITE(
7669 unop(Iop_32to1,
7670 binop(Iop_And32,
7671 binop(Iop_Shr32, mkexpr(t1), mkU8(10)),
7672 mkU32(1))),
7673 mkU32(0xFFFFFFFF),
7674 mkU32(1)))
7677 /* Set the ID flag */
7678 stmt( IRStmt_Put(
7679 OFFB_IDFLAG,
7680 IRExpr_ITE(
7681 unop(Iop_32to1,
7682 binop(Iop_And32,
7683 binop(Iop_Shr32, mkexpr(t1), mkU8(21)),
7684 mkU32(1))),
7685 mkU32(1),
7686 mkU32(0)))
7689 /* And set the AC flag. If setting it 1 to, possibly emit an
7690 emulation warning. */
7691 stmt( IRStmt_Put(
7692 OFFB_ACFLAG,
7693 IRExpr_ITE(
7694 unop(Iop_32to1,
7695 binop(Iop_And32,
7696 binop(Iop_Shr32, mkexpr(t1), mkU8(18)),
7697 mkU32(1))),
7698 mkU32(1),
7699 mkU32(0)))
7702 if (emit_AC_emwarn) {
7703 put_emwarn( mkU32(EmWarn_X86_acFlag) );
7704 stmt(
7705 IRStmt_Exit(
7706 binop( Iop_CmpNE32,
7707 binop(Iop_And32, mkexpr(t1), mkU32(1<<18)),
7708 mkU32(0) ),
7709 Ijk_EmWarn,
7710 IRConst_U32( next_insn_EIP ),
7711 OFFB_EIP
7718 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
7719 values (aa,bb), computes, for each of the 4 16-bit lanes:
7721 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
7723 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx )
7725 IRTemp aa = newTemp(Ity_I64);
7726 IRTemp bb = newTemp(Ity_I64);
7727 IRTemp aahi32s = newTemp(Ity_I64);
7728 IRTemp aalo32s = newTemp(Ity_I64);
7729 IRTemp bbhi32s = newTemp(Ity_I64);
7730 IRTemp bblo32s = newTemp(Ity_I64);
7731 IRTemp rHi = newTemp(Ity_I64);
7732 IRTemp rLo = newTemp(Ity_I64);
7733 IRTemp one32x2 = newTemp(Ity_I64);
7734 assign(aa, aax);
7735 assign(bb, bbx);
7736 assign( aahi32s,
7737 binop(Iop_SarN32x2,
7738 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)),
7739 mkU8(16) ));
7740 assign( aalo32s,
7741 binop(Iop_SarN32x2,
7742 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)),
7743 mkU8(16) ));
7744 assign( bbhi32s,
7745 binop(Iop_SarN32x2,
7746 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)),
7747 mkU8(16) ));
7748 assign( bblo32s,
7749 binop(Iop_SarN32x2,
7750 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)),
7751 mkU8(16) ));
7752 assign(one32x2, mkU64( (1ULL << 32) + 1 ));
7753 assign(
7754 rHi,
7755 binop(
7756 Iop_ShrN32x2,
7757 binop(
7758 Iop_Add32x2,
7759 binop(
7760 Iop_ShrN32x2,
7761 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)),
7762 mkU8(14)
7764 mkexpr(one32x2)
7766 mkU8(1)
7769 assign(
7770 rLo,
7771 binop(
7772 Iop_ShrN32x2,
7773 binop(
7774 Iop_Add32x2,
7775 binop(
7776 Iop_ShrN32x2,
7777 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)),
7778 mkU8(14)
7780 mkexpr(one32x2)
7782 mkU8(1)
7785 return
7786 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo));
7789 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
7790 values (aa,bb), computes, for each lane:
7792 if aa_lane < 0 then - bb_lane
7793 else if aa_lane > 0 then bb_lane
7794 else 0
7796 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB )
7798 IRTemp aa = newTemp(Ity_I64);
7799 IRTemp bb = newTemp(Ity_I64);
7800 IRTemp zero = newTemp(Ity_I64);
7801 IRTemp bbNeg = newTemp(Ity_I64);
7802 IRTemp negMask = newTemp(Ity_I64);
7803 IRTemp posMask = newTemp(Ity_I64);
7804 IROp opSub = Iop_INVALID;
7805 IROp opCmpGTS = Iop_INVALID;
7807 switch (laneszB) {
7808 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break;
7809 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break;
7810 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break;
7811 default: vassert(0);
7814 assign( aa, aax );
7815 assign( bb, bbx );
7816 assign( zero, mkU64(0) );
7817 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) );
7818 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) );
7819 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) );
7821 return
7822 binop(Iop_Or64,
7823 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)),
7824 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) );
7828 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
7829 value aa, computes, for each lane
7831 if aa < 0 then -aa else aa
7833 Note that the result is interpreted as unsigned, so that the
7834 absolute value of the most negative signed input can be
7835 represented.
7837 static IRExpr* dis_PABS_helper ( IRExpr* aax, Int laneszB )
7839 IRTemp aa = newTemp(Ity_I64);
7840 IRTemp zero = newTemp(Ity_I64);
7841 IRTemp aaNeg = newTemp(Ity_I64);
7842 IRTemp negMask = newTemp(Ity_I64);
7843 IRTemp posMask = newTemp(Ity_I64);
7844 IROp opSub = Iop_INVALID;
7845 IROp opSarN = Iop_INVALID;
7847 switch (laneszB) {
7848 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break;
7849 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break;
7850 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break;
7851 default: vassert(0);
7854 assign( aa, aax );
7855 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) );
7856 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) );
7857 assign( zero, mkU64(0) );
7858 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) );
7859 return
7860 binop(Iop_Or64,
7861 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)),
7862 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) );
7865 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64,
7866 IRTemp lo64, Int byteShift )
7868 vassert(byteShift >= 1 && byteShift <= 7);
7869 return
7870 binop(Iop_Or64,
7871 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))),
7872 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift))
7876 /* Generate a SIGSEGV followed by a restart of the current instruction
7877 if effective_addr is not 16-aligned. This is required behaviour
7878 for some SSE3 instructions and all 128-bit SSSE3 instructions.
7879 This assumes that guest_RIP_curr_instr is set correctly! */
7880 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr )
7882 stmt(
7883 IRStmt_Exit(
7884 binop(Iop_CmpNE32,
7885 binop(Iop_And32,mkexpr(effective_addr),mkU32(0xF)),
7886 mkU32(0)),
7887 Ijk_SigSEGV,
7888 IRConst_U32(guest_EIP_curr_instr),
7889 OFFB_EIP
7895 /* Helper for deciding whether a given insn (starting at the opcode
7896 byte) may validly be used with a LOCK prefix. The following insns
7897 may be used with LOCK when their destination operand is in memory.
7898 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
7900 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
7901 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
7902 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
7903 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
7904 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
7905 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
7906 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
7908 DEC FE /1, FF /1
7909 INC FE /0, FF /0
7911 NEG F6 /3, F7 /3
7912 NOT F6 /2, F7 /2
7914 XCHG 86, 87
7916 BTC 0F BB, 0F BA /7
7917 BTR 0F B3, 0F BA /6
7918 BTS 0F AB, 0F BA /5
7920 CMPXCHG 0F B0, 0F B1
7921 CMPXCHG8B 0F C7 /1
7923 XADD 0F C0, 0F C1
7925 ------------------------------
7927 80 /0 = addb $imm8, rm8
7928 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
7929 82 /0 = addb $imm8, rm8
7930 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
7932 00 = addb r8, rm8
7933 01 = addl r32, rm32 and addw r16, rm16
7935 Same for ADD OR ADC SBB AND SUB XOR
7937 FE /1 = dec rm8
7938 FF /1 = dec rm32 and dec rm16
7940 FE /0 = inc rm8
7941 FF /0 = inc rm32 and inc rm16
7943 F6 /3 = neg rm8
7944 F7 /3 = neg rm32 and neg rm16
7946 F6 /2 = not rm8
7947 F7 /2 = not rm32 and not rm16
7949 0F BB = btcw r16, rm16 and btcl r32, rm32
7950 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
7952 Same for BTS, BTR
7954 static Bool can_be_used_with_LOCK_prefix ( const UChar* opc )
7956 switch (opc[0]) {
7957 case 0x00: case 0x01: case 0x08: case 0x09:
7958 case 0x10: case 0x11: case 0x18: case 0x19:
7959 case 0x20: case 0x21: case 0x28: case 0x29:
7960 case 0x30: case 0x31:
7961 if (!epartIsReg(opc[1]))
7962 return True;
7963 break;
7965 case 0x80: case 0x81: case 0x82: case 0x83:
7966 if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 6
7967 && !epartIsReg(opc[1]))
7968 return True;
7969 break;
7971 case 0xFE: case 0xFF:
7972 if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 1
7973 && !epartIsReg(opc[1]))
7974 return True;
7975 break;
7977 case 0xF6: case 0xF7:
7978 if (gregOfRM(opc[1]) >= 2 && gregOfRM(opc[1]) <= 3
7979 && !epartIsReg(opc[1]))
7980 return True;
7981 break;
7983 case 0x86: case 0x87:
7984 if (!epartIsReg(opc[1]))
7985 return True;
7986 break;
7988 case 0x0F: {
7989 switch (opc[1]) {
7990 case 0xBB: case 0xB3: case 0xAB:
7991 if (!epartIsReg(opc[2]))
7992 return True;
7993 break;
7994 case 0xBA:
7995 if (gregOfRM(opc[2]) >= 5 && gregOfRM(opc[2]) <= 7
7996 && !epartIsReg(opc[2]))
7997 return True;
7998 break;
7999 case 0xB0: case 0xB1:
8000 if (!epartIsReg(opc[2]))
8001 return True;
8002 break;
8003 case 0xC7:
8004 if (gregOfRM(opc[2]) == 1 && !epartIsReg(opc[2]) )
8005 return True;
8006 break;
8007 case 0xC0: case 0xC1:
8008 if (!epartIsReg(opc[2]))
8009 return True;
8010 break;
8011 default:
8012 break;
8013 } /* switch (opc[1]) */
8014 break;
8017 default:
8018 break;
8019 } /* switch (opc[0]) */
8021 return False;
8024 static IRTemp math_BSWAP ( IRTemp t1, IRType ty )
8026 IRTemp t2 = newTemp(ty);
8027 if (ty == Ity_I32) {
8028 assign( t2,
8029 binop(
8030 Iop_Or32,
8031 binop(Iop_Shl32, mkexpr(t1), mkU8(24)),
8032 binop(
8033 Iop_Or32,
8034 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)),
8035 mkU32(0x00FF0000)),
8036 binop(Iop_Or32,
8037 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)),
8038 mkU32(0x0000FF00)),
8039 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)),
8040 mkU32(0x000000FF) )
8043 return t2;
8045 if (ty == Ity_I16) {
8046 assign(t2,
8047 binop(Iop_Or16,
8048 binop(Iop_Shl16, mkexpr(t1), mkU8(8)),
8049 binop(Iop_Shr16, mkexpr(t1), mkU8(8)) ));
8050 return t2;
8052 vassert(0);
8053 /*NOTREACHED*/
8054 return IRTemp_INVALID;
8057 /*------------------------------------------------------------*/
8058 /*--- Disassemble a single instruction ---*/
8059 /*------------------------------------------------------------*/
8061 /* Disassemble a single instruction into IR. The instruction is
8062 located in host memory at &guest_code[delta]. *expect_CAS is set
8063 to True if the resulting IR is expected to contain an IRCAS
8064 statement, and False if it's not expected to. This makes it
8065 possible for the caller of disInstr_X86_WRK to check that
8066 LOCK-prefixed instructions are at least plausibly translated, in
8067 that it becomes possible to check that a (validly) LOCK-prefixed
8068 instruction generates a translation containing an IRCAS, and
8069 instructions without LOCK prefixes don't generate translations
8070 containing an IRCAS.
8072 static
8073 DisResult disInstr_X86_WRK (
8074 /*OUT*/Bool* expect_CAS,
8075 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
8076 Bool resteerCisOk,
8077 void* callback_opaque,
8078 Long delta64,
8079 const VexArchInfo* archinfo,
8080 const VexAbiInfo* vbi,
8081 Bool sigill_diag
8084 IRType ty;
8085 IRTemp addr, t0, t1, t2, t3, t4, t5, t6;
8086 Int alen;
8087 UChar opc, modrm, abyte, pre;
8088 UInt d32;
8089 HChar dis_buf[50];
8090 Int am_sz, d_sz, n_prefixes;
8091 DisResult dres;
8092 const UChar* insn; /* used in SSE decoders */
8094 /* The running delta */
8095 Int delta = (Int)delta64;
8097 /* Holds eip at the start of the insn, so that we can print
8098 consistent error messages for unimplemented insns. */
8099 Int delta_start = delta;
8101 /* sz denotes the nominal data-op size of the insn; we change it to
8102 2 if an 0x66 prefix is seen */
8103 Int sz = 4;
8105 /* sorb holds the segment-override-prefix byte, if any. Zero if no
8106 prefix has been seen, else one of {0x26, 0x36, 0x3E, 0x64, 0x65}
8107 indicating the prefix. */
8108 UChar sorb = 0;
8110 /* Gets set to True if a LOCK prefix is seen. */
8111 Bool pfx_lock = False;
8113 /* Set result defaults. */
8114 dres.whatNext = Dis_Continue;
8115 dres.len = 0;
8116 dres.continueAt = 0;
8117 dres.hint = Dis_HintNone;
8118 dres.jk_StopHere = Ijk_INVALID;
8120 *expect_CAS = False;
8122 addr = t0 = t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID;
8124 vassert(guest_EIP_bbstart + delta == guest_EIP_curr_instr);
8125 DIP("\t0x%x: ", guest_EIP_bbstart+delta);
8127 /* Spot "Special" instructions (see comment at top of file). */
8129 const UChar* code = guest_code + delta;
8130 /* Spot the 12-byte preamble:
8131 C1C703 roll $3, %edi
8132 C1C70D roll $13, %edi
8133 C1C71D roll $29, %edi
8134 C1C713 roll $19, %edi
8136 if (code[ 0] == 0xC1 && code[ 1] == 0xC7 && code[ 2] == 0x03 &&
8137 code[ 3] == 0xC1 && code[ 4] == 0xC7 && code[ 5] == 0x0D &&
8138 code[ 6] == 0xC1 && code[ 7] == 0xC7 && code[ 8] == 0x1D &&
8139 code[ 9] == 0xC1 && code[10] == 0xC7 && code[11] == 0x13) {
8140 /* Got a "Special" instruction preamble. Which one is it? */
8141 if (code[12] == 0x87 && code[13] == 0xDB /* xchgl %ebx,%ebx */) {
8142 /* %EDX = client_request ( %EAX ) */
8143 DIP("%%edx = client_request ( %%eax )\n");
8144 delta += 14;
8145 jmp_lit(&dres, Ijk_ClientReq, guest_EIP_bbstart+delta);
8146 vassert(dres.whatNext == Dis_StopHere);
8147 goto decode_success;
8149 else
8150 if (code[12] == 0x87 && code[13] == 0xC9 /* xchgl %ecx,%ecx */) {
8151 /* %EAX = guest_NRADDR */
8152 DIP("%%eax = guest_NRADDR\n");
8153 delta += 14;
8154 putIReg(4, R_EAX, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
8155 goto decode_success;
8157 else
8158 if (code[12] == 0x87 && code[13] == 0xD2 /* xchgl %edx,%edx */) {
8159 /* call-noredir *%EAX */
8160 DIP("call-noredir *%%eax\n");
8161 delta += 14;
8162 t1 = newTemp(Ity_I32);
8163 assign(t1, getIReg(4,R_EAX));
8164 t2 = newTemp(Ity_I32);
8165 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
8166 putIReg(4, R_ESP, mkexpr(t2));
8167 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta));
8168 jmp_treg(&dres, Ijk_NoRedir, t1);
8169 vassert(dres.whatNext == Dis_StopHere);
8170 goto decode_success;
8172 else
8173 if (code[12] == 0x87 && code[13] == 0xFF /* xchgl %edi,%edi */) {
8174 /* IR injection */
8175 DIP("IR injection\n");
8176 vex_inject_ir(irsb, Iend_LE);
8178 // Invalidate the current insn. The reason is that the IRop we're
8179 // injecting here can change. In which case the translation has to
8180 // be redone. For ease of handling, we simply invalidate all the
8181 // time.
8182 stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_EIP_curr_instr)));
8183 stmt(IRStmt_Put(OFFB_CMLEN, mkU32(14)));
8185 delta += 14;
8187 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_bbstart + delta) ) );
8188 dres.whatNext = Dis_StopHere;
8189 dres.jk_StopHere = Ijk_InvalICache;
8190 goto decode_success;
8192 /* We don't know what it is. */
8193 goto decode_failure;
8194 /*NOTREACHED*/
8198 /* Handle a couple of weird-ass NOPs that have been observed in the
8199 wild. */
8201 const UChar* code = guest_code + delta;
8202 /* Sun's JVM 1.5.0 uses the following as a NOP:
8203 26 2E 64 65 90 %es:%cs:%fs:%gs:nop */
8204 if (code[0] == 0x26 && code[1] == 0x2E && code[2] == 0x64
8205 && code[3] == 0x65 && code[4] == 0x90) {
8206 DIP("%%es:%%cs:%%fs:%%gs:nop\n");
8207 delta += 5;
8208 goto decode_success;
8210 /* Don't barf on recent binutils padding,
8211 all variants of which are: nopw %cs:0x0(%eax,%eax,1)
8212 66 2e 0f 1f 84 00 00 00 00 00
8213 66 66 2e 0f 1f 84 00 00 00 00 00
8214 66 66 66 2e 0f 1f 84 00 00 00 00 00
8215 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8216 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8217 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8219 if (code[0] == 0x66) {
8220 Int data16_cnt;
8221 for (data16_cnt = 1; data16_cnt < 6; data16_cnt++)
8222 if (code[data16_cnt] != 0x66)
8223 break;
8224 if (code[data16_cnt] == 0x2E && code[data16_cnt + 1] == 0x0F
8225 && code[data16_cnt + 2] == 0x1F && code[data16_cnt + 3] == 0x84
8226 && code[data16_cnt + 4] == 0x00 && code[data16_cnt + 5] == 0x00
8227 && code[data16_cnt + 6] == 0x00 && code[data16_cnt + 7] == 0x00
8228 && code[data16_cnt + 8] == 0x00 ) {
8229 DIP("nopw %%cs:0x0(%%eax,%%eax,1)\n");
8230 delta += 9 + data16_cnt;
8231 goto decode_success;
8235 // Intel CET requires the following opcodes to be treated as NOPs
8236 // with any prefix and ModRM, SIB and disp combination:
8237 // "0F 19", "0F 1C", "0F 1D", "0F 1E", "0F 1F"
8238 UInt opcode_index = 0;
8239 // Skip any prefix combination
8240 UInt addr_override = 0;
8241 UInt temp_sz = 4;
8242 Bool is_prefix = True;
8243 while (is_prefix) {
8244 switch (code[opcode_index]) {
8245 case 0x66:
8246 temp_sz = 2;
8247 opcode_index++;
8248 break;
8249 case 0x67:
8250 addr_override = 1;
8251 opcode_index++;
8252 break;
8253 case 0x26: case 0x3E: // if we set segment override here,
8254 case 0x64: case 0x65: // disAMode segfaults
8255 case 0x2E: case 0x36:
8256 case 0xF0: case 0xF2: case 0xF3:
8257 opcode_index++;
8258 break;
8259 default:
8260 is_prefix = False;
8263 // Check the opcode
8264 if (code[opcode_index] == 0x0F) {
8265 switch (code[opcode_index+1]) {
8266 case 0x19:
8267 case 0x1C: case 0x1D:
8268 case 0x1E: case 0x1F:
8269 delta += opcode_index+2;
8270 modrm = getUChar(delta);
8271 if (epartIsReg(modrm)) {
8272 delta += 1;
8273 DIP("nop%c\n", nameISize(temp_sz));
8275 else {
8276 addr = disAMode(&alen, 0/*"no sorb"*/, delta, dis_buf);
8277 delta += alen - addr_override;
8278 DIP("nop%c %s\n", nameISize(temp_sz), dis_buf);
8280 goto decode_success;
8281 default:
8282 break;
8286 /* Normal instruction handling starts here. */
8288 /* Deal with some but not all prefixes:
8289 66(oso)
8290 F0(lock)
8291 2E(cs:) 3E(ds:) 26(es:) 64(fs:) 65(gs:) 36(ss:)
8292 Not dealt with (left in place):
8293 F2 F3
8295 n_prefixes = 0;
8296 while (True) {
8297 if (n_prefixes > 7) goto decode_failure;
8298 pre = getUChar(delta);
8299 switch (pre) {
8300 case 0x66:
8301 sz = 2;
8302 break;
8303 case 0xF0:
8304 pfx_lock = True;
8305 *expect_CAS = True;
8306 break;
8307 case 0x3E: /* %DS: */
8308 case 0x26: /* %ES: */
8309 case 0x64: /* %FS: */
8310 case 0x65: /* %GS: */
8311 case 0x36: /* %SS: */
8312 if (sorb != 0)
8313 goto decode_failure; /* only one seg override allowed */
8314 sorb = pre;
8315 break;
8316 case 0x2E: { /* %CS: */
8317 /* 2E prefix on a conditional branch instruction is a
8318 branch-prediction hint, which can safely be ignored. */
8319 UChar op1 = getIByte(delta+1);
8320 UChar op2 = getIByte(delta+2);
8321 if ((op1 >= 0x70 && op1 <= 0x7F)
8322 || (op1 == 0xE3)
8323 || (op1 == 0x0F && op2 >= 0x80 && op2 <= 0x8F)) {
8324 if (0) vex_printf("vex x86->IR: ignoring branch hint\n");
8325 } else {
8326 /* All other CS override cases are not handled */
8327 goto decode_failure;
8329 break;
8331 default:
8332 goto not_a_prefix;
8334 n_prefixes++;
8335 delta++;
8338 not_a_prefix:
8340 /* Now we should be looking at the primary opcode byte or the
8341 leading F2 or F3. Check that any LOCK prefix is actually
8342 allowed. */
8344 if (pfx_lock) {
8345 if (can_be_used_with_LOCK_prefix( &guest_code[delta] )) {
8346 DIP("lock ");
8347 } else {
8348 *expect_CAS = False;
8349 goto decode_failure;
8354 /* ---------------------------------------------------- */
8355 /* --- The SSE decoder. --- */
8356 /* ---------------------------------------------------- */
8358 /* What did I do to deserve SSE ? Perhaps I was really bad in a
8359 previous life? */
8361 /* Note, this doesn't handle SSE2 or SSE3. That is handled in a
8362 later section, further on. */
8364 insn = &guest_code[delta];
8366 /* Treat fxsave specially. It should be doable even on an SSE0
8367 (Pentium-II class) CPU. Hence be prepared to handle it on
8368 any subarchitecture variant.
8371 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */
8372 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE
8373 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 0) {
8374 IRDirty* d;
8375 modrm = getIByte(delta+2);
8376 vassert(sz == 4);
8377 vassert(!epartIsReg(modrm));
8379 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8380 delta += 2+alen;
8381 gen_SEGV_if_not_16_aligned(addr);
8383 DIP("fxsave %s\n", dis_buf);
8385 /* Uses dirty helper:
8386 void x86g_do_FXSAVE ( VexGuestX86State*, UInt ) */
8387 d = unsafeIRDirty_0_N (
8388 0/*regparms*/,
8389 "x86g_dirtyhelper_FXSAVE",
8390 &x86g_dirtyhelper_FXSAVE,
8391 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
8394 /* declare we're writing memory */
8395 d->mFx = Ifx_Write;
8396 d->mAddr = mkexpr(addr);
8397 d->mSize = 464; /* according to recent Intel docs */
8399 /* declare we're reading guest state */
8400 d->nFxState = 7;
8401 vex_bzero(&d->fxState, sizeof(d->fxState));
8403 d->fxState[0].fx = Ifx_Read;
8404 d->fxState[0].offset = OFFB_FTOP;
8405 d->fxState[0].size = sizeof(UInt);
8407 d->fxState[1].fx = Ifx_Read;
8408 d->fxState[1].offset = OFFB_FPREGS;
8409 d->fxState[1].size = 8 * sizeof(ULong);
8411 d->fxState[2].fx = Ifx_Read;
8412 d->fxState[2].offset = OFFB_FPTAGS;
8413 d->fxState[2].size = 8 * sizeof(UChar);
8415 d->fxState[3].fx = Ifx_Read;
8416 d->fxState[3].offset = OFFB_FPROUND;
8417 d->fxState[3].size = sizeof(UInt);
8419 d->fxState[4].fx = Ifx_Read;
8420 d->fxState[4].offset = OFFB_FC3210;
8421 d->fxState[4].size = sizeof(UInt);
8423 d->fxState[5].fx = Ifx_Read;
8424 d->fxState[5].offset = OFFB_XMM0;
8425 d->fxState[5].size = 8 * sizeof(U128);
8427 d->fxState[6].fx = Ifx_Read;
8428 d->fxState[6].offset = OFFB_SSEROUND;
8429 d->fxState[6].size = sizeof(UInt);
8431 /* Be paranoid ... this assertion tries to ensure the 8 %xmm
8432 images are packed back-to-back. If not, the value of
8433 d->fxState[5].size is wrong. */
8434 vassert(16 == sizeof(U128));
8435 vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16));
8437 stmt( IRStmt_Dirty(d) );
8439 goto decode_success;
8442 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */
8443 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE
8444 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 1) {
8445 IRDirty* d;
8446 modrm = getIByte(delta+2);
8447 vassert(sz == 4);
8448 vassert(!epartIsReg(modrm));
8450 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8451 delta += 2+alen;
8452 gen_SEGV_if_not_16_aligned(addr);
8454 DIP("fxrstor %s\n", dis_buf);
8456 /* Uses dirty helper:
8457 VexEmNote x86g_do_FXRSTOR ( VexGuestX86State*, UInt )
8458 NOTE:
8459 the VexEmNote value is simply ignored (unlike for FRSTOR)
8461 d = unsafeIRDirty_0_N (
8462 0/*regparms*/,
8463 "x86g_dirtyhelper_FXRSTOR",
8464 &x86g_dirtyhelper_FXRSTOR,
8465 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
8468 /* declare we're reading memory */
8469 d->mFx = Ifx_Read;
8470 d->mAddr = mkexpr(addr);
8471 d->mSize = 464; /* according to recent Intel docs */
8473 /* declare we're writing guest state */
8474 d->nFxState = 7;
8475 vex_bzero(&d->fxState, sizeof(d->fxState));
8477 d->fxState[0].fx = Ifx_Write;
8478 d->fxState[0].offset = OFFB_FTOP;
8479 d->fxState[0].size = sizeof(UInt);
8481 d->fxState[1].fx = Ifx_Write;
8482 d->fxState[1].offset = OFFB_FPREGS;
8483 d->fxState[1].size = 8 * sizeof(ULong);
8485 d->fxState[2].fx = Ifx_Write;
8486 d->fxState[2].offset = OFFB_FPTAGS;
8487 d->fxState[2].size = 8 * sizeof(UChar);
8489 d->fxState[3].fx = Ifx_Write;
8490 d->fxState[3].offset = OFFB_FPROUND;
8491 d->fxState[3].size = sizeof(UInt);
8493 d->fxState[4].fx = Ifx_Write;
8494 d->fxState[4].offset = OFFB_FC3210;
8495 d->fxState[4].size = sizeof(UInt);
8497 d->fxState[5].fx = Ifx_Write;
8498 d->fxState[5].offset = OFFB_XMM0;
8499 d->fxState[5].size = 8 * sizeof(U128);
8501 d->fxState[6].fx = Ifx_Write;
8502 d->fxState[6].offset = OFFB_SSEROUND;
8503 d->fxState[6].size = sizeof(UInt);
8505 /* Be paranoid ... this assertion tries to ensure the 8 %xmm
8506 images are packed back-to-back. If not, the value of
8507 d->fxState[5].size is wrong. */
8508 vassert(16 == sizeof(U128));
8509 vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16));
8511 stmt( IRStmt_Dirty(d) );
8513 goto decode_success;
8516 /* ------ SSE decoder main ------ */
8518 /* Skip parts of the decoder which don't apply given the stated
8519 guest subarchitecture. */
8520 if (archinfo->hwcaps == 0/*baseline, no sse at all*/)
8521 goto after_sse_decoders;
8523 /* With mmxext only some extended MMX instructions are recognized.
8524 The mmxext instructions are MASKMOVQ MOVNTQ PAVGB PAVGW PMAXSW
8525 PMAXUB PMINSW PMINUB PMULHUW PSADBW PSHUFW PEXTRW PINSRW PMOVMSKB
8526 PREFETCHNTA PREFETCHT0 PREFETCHT1 PREFETCHT2 SFENCE
8528 http://support.amd.com/us/Embedded_TechDocs/22466.pdf
8529 https://en.wikipedia.org/wiki/3DNow!#3DNow.21_extensions */
8531 if (archinfo->hwcaps == VEX_HWCAPS_X86_MMXEXT/*integer only sse1 subset*/)
8532 goto mmxext;
8534 /* Otherwise we must be doing sse1 or sse2, so we can at least try
8535 for SSE1 here. */
8537 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
8538 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x58) {
8539 delta = dis_SSE_E_to_G_all( sorb, delta+2, "addps", Iop_Add32Fx4 );
8540 goto decode_success;
8543 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
8544 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x58) {
8545 vassert(sz == 4);
8546 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "addss", Iop_Add32F0x4 );
8547 goto decode_success;
8550 /* 0F 55 = ANDNPS -- G = (not G) and E */
8551 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x55) {
8552 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnps", Iop_AndV128 );
8553 goto decode_success;
8556 /* 0F 54 = ANDPS -- G = G and E */
8557 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x54) {
8558 delta = dis_SSE_E_to_G_all( sorb, delta+2, "andps", Iop_AndV128 );
8559 goto decode_success;
8562 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
8563 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC2) {
8564 delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmpps", True, 4 );
8565 goto decode_success;
8568 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
8569 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xC2) {
8570 vassert(sz == 4);
8571 delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpss", False, 4 );
8572 goto decode_success;
8575 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
8576 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
8577 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) {
8578 IRTemp argL = newTemp(Ity_F32);
8579 IRTemp argR = newTemp(Ity_F32);
8580 modrm = getIByte(delta+2);
8581 if (epartIsReg(modrm)) {
8582 assign( argR, getXMMRegLane32F( eregOfRM(modrm), 0/*lowest lane*/ ) );
8583 delta += 2+1;
8584 DIP("[u]comiss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
8585 nameXMMReg(gregOfRM(modrm)) );
8586 } else {
8587 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8588 assign( argR, loadLE(Ity_F32, mkexpr(addr)) );
8589 delta += 2+alen;
8590 DIP("[u]comiss %s,%s\n", dis_buf,
8591 nameXMMReg(gregOfRM(modrm)) );
8593 assign( argL, getXMMRegLane32F( gregOfRM(modrm), 0/*lowest lane*/ ) );
8595 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
8596 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
8597 stmt( IRStmt_Put(
8598 OFFB_CC_DEP1,
8599 binop( Iop_And32,
8600 binop(Iop_CmpF64,
8601 unop(Iop_F32toF64,mkexpr(argL)),
8602 unop(Iop_F32toF64,mkexpr(argR))),
8603 mkU32(0x45)
8604 )));
8605 /* Set NDEP even though it isn't used. This makes redundant-PUT
8606 elimination of previous stores to this field work better. */
8607 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
8608 goto decode_success;
8611 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
8612 half xmm */
8613 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x2A) {
8614 IRTemp arg64 = newTemp(Ity_I64);
8615 IRTemp rmode = newTemp(Ity_I32);
8616 vassert(sz == 4);
8618 modrm = getIByte(delta+2);
8619 if (epartIsReg(modrm)) {
8620 /* Only switch to MMX mode if the source is a MMX register.
8621 See comments on CVTPI2PD for details. Fixes #357059. */
8622 do_MMX_preamble();
8623 assign( arg64, getMMXReg(eregOfRM(modrm)) );
8624 delta += 2+1;
8625 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregOfRM(modrm)),
8626 nameXMMReg(gregOfRM(modrm)));
8627 } else {
8628 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8629 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
8630 delta += 2+alen;
8631 DIP("cvtpi2ps %s,%s\n", dis_buf,
8632 nameXMMReg(gregOfRM(modrm)) );
8635 assign( rmode, get_sse_roundingmode() );
8637 putXMMRegLane32F(
8638 gregOfRM(modrm), 0,
8639 binop(Iop_F64toF32,
8640 mkexpr(rmode),
8641 unop(Iop_I32StoF64,
8642 unop(Iop_64to32, mkexpr(arg64)) )) );
8644 putXMMRegLane32F(
8645 gregOfRM(modrm), 1,
8646 binop(Iop_F64toF32,
8647 mkexpr(rmode),
8648 unop(Iop_I32StoF64,
8649 unop(Iop_64HIto32, mkexpr(arg64)) )) );
8651 goto decode_success;
8654 /* F3 0F 2A = CVTSI2SS -- convert I32 in mem/ireg to F32 in low
8655 quarter xmm */
8656 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x2A) {
8657 IRTemp arg32 = newTemp(Ity_I32);
8658 IRTemp rmode = newTemp(Ity_I32);
8659 vassert(sz == 4);
8661 modrm = getIByte(delta+3);
8662 if (epartIsReg(modrm)) {
8663 assign( arg32, getIReg(4, eregOfRM(modrm)) );
8664 delta += 3+1;
8665 DIP("cvtsi2ss %s,%s\n", nameIReg(4, eregOfRM(modrm)),
8666 nameXMMReg(gregOfRM(modrm)));
8667 } else {
8668 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
8669 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
8670 delta += 3+alen;
8671 DIP("cvtsi2ss %s,%s\n", dis_buf,
8672 nameXMMReg(gregOfRM(modrm)) );
8675 assign( rmode, get_sse_roundingmode() );
8677 putXMMRegLane32F(
8678 gregOfRM(modrm), 0,
8679 binop(Iop_F64toF32,
8680 mkexpr(rmode),
8681 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
8683 goto decode_success;
8686 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
8687 I32 in mmx, according to prevailing SSE rounding mode */
8688 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
8689 I32 in mmx, rounding towards zero */
8690 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) {
8691 IRTemp dst64 = newTemp(Ity_I64);
8692 IRTemp rmode = newTemp(Ity_I32);
8693 IRTemp f32lo = newTemp(Ity_F32);
8694 IRTemp f32hi = newTemp(Ity_F32);
8695 Bool r2zero = toBool(insn[1] == 0x2C);
8697 do_MMX_preamble();
8698 modrm = getIByte(delta+2);
8700 if (epartIsReg(modrm)) {
8701 delta += 2+1;
8702 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
8703 assign(f32hi, getXMMRegLane32F(eregOfRM(modrm), 1));
8704 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
8705 nameXMMReg(eregOfRM(modrm)),
8706 nameMMXReg(gregOfRM(modrm)));
8707 } else {
8708 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8709 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
8710 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add32,
8711 mkexpr(addr),
8712 mkU32(4) )));
8713 delta += 2+alen;
8714 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
8715 dis_buf,
8716 nameMMXReg(gregOfRM(modrm)));
8719 if (r2zero) {
8720 assign(rmode, mkU32((UInt)Irrm_ZERO) );
8721 } else {
8722 assign( rmode, get_sse_roundingmode() );
8725 assign(
8726 dst64,
8727 binop( Iop_32HLto64,
8728 binop( Iop_F64toI32S,
8729 mkexpr(rmode),
8730 unop( Iop_F32toF64, mkexpr(f32hi) ) ),
8731 binop( Iop_F64toI32S,
8732 mkexpr(rmode),
8733 unop( Iop_F32toF64, mkexpr(f32lo) ) )
8737 putMMXReg(gregOfRM(modrm), mkexpr(dst64));
8738 goto decode_success;
8741 /* F3 0F 2D = CVTSS2SI -- convert F32 in mem/low quarter xmm to
8742 I32 in ireg, according to prevailing SSE rounding mode */
8743 /* F3 0F 2C = CVTTSS2SI -- convert F32 in mem/low quarter xmm to
8744 I32 in ireg, rounding towards zero */
8745 if (insn[0] == 0xF3 && insn[1] == 0x0F
8746 && (insn[2] == 0x2D || insn[2] == 0x2C)) {
8747 IRTemp rmode = newTemp(Ity_I32);
8748 IRTemp f32lo = newTemp(Ity_F32);
8749 Bool r2zero = toBool(insn[2] == 0x2C);
8750 vassert(sz == 4);
8752 modrm = getIByte(delta+3);
8753 if (epartIsReg(modrm)) {
8754 delta += 3+1;
8755 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
8756 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "",
8757 nameXMMReg(eregOfRM(modrm)),
8758 nameIReg(4, gregOfRM(modrm)));
8759 } else {
8760 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
8761 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
8762 delta += 3+alen;
8763 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "",
8764 dis_buf,
8765 nameIReg(4, gregOfRM(modrm)));
8768 if (r2zero) {
8769 assign( rmode, mkU32((UInt)Irrm_ZERO) );
8770 } else {
8771 assign( rmode, get_sse_roundingmode() );
8774 putIReg(4, gregOfRM(modrm),
8775 binop( Iop_F64toI32S,
8776 mkexpr(rmode),
8777 unop( Iop_F32toF64, mkexpr(f32lo) ) )
8780 goto decode_success;
8783 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
8784 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5E) {
8785 delta = dis_SSE_E_to_G_all( sorb, delta+2, "divps", Iop_Div32Fx4 );
8786 goto decode_success;
8789 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
8790 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5E) {
8791 vassert(sz == 4);
8792 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "divss", Iop_Div32F0x4 );
8793 goto decode_success;
8796 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
8797 if (insn[0] == 0x0F && insn[1] == 0xAE
8798 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 2) {
8800 IRTemp t64 = newTemp(Ity_I64);
8801 IRTemp ew = newTemp(Ity_I32);
8803 modrm = getIByte(delta+2);
8804 vassert(!epartIsReg(modrm));
8805 vassert(sz == 4);
8807 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8808 delta += 2+alen;
8809 DIP("ldmxcsr %s\n", dis_buf);
8811 /* The only thing we observe in %mxcsr is the rounding mode.
8812 Therefore, pass the 32-bit value (SSE native-format control
8813 word) to a clean helper, getting back a 64-bit value, the
8814 lower half of which is the SSEROUND value to store, and the
8815 upper half of which is the emulation-warning token which may
8816 be generated.
8818 /* ULong x86h_check_ldmxcsr ( UInt ); */
8819 assign( t64, mkIRExprCCall(
8820 Ity_I64, 0/*regparms*/,
8821 "x86g_check_ldmxcsr",
8822 &x86g_check_ldmxcsr,
8823 mkIRExprVec_1( loadLE(Ity_I32, mkexpr(addr)) )
8827 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) );
8828 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
8829 put_emwarn( mkexpr(ew) );
8830 /* Finally, if an emulation warning was reported, side-exit to
8831 the next insn, reporting the warning, so that Valgrind's
8832 dispatcher sees the warning. */
8833 stmt(
8834 IRStmt_Exit(
8835 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
8836 Ijk_EmWarn,
8837 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
8838 OFFB_EIP
8841 goto decode_success;
8845 /* mmxext sse1 subset starts here. mmxext only arches will parse
8846 only this subset of the sse1 instructions. */
8847 mmxext:
8849 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8850 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
8851 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF7) {
8852 Bool ok = False;
8853 delta = dis_MMX( &ok, sorb, sz, delta+1 );
8854 if (!ok)
8855 goto decode_failure;
8856 goto decode_success;
8859 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8860 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
8861 Intel manual does not say anything about the usual business of
8862 the FP reg tags getting trashed whenever an MMX insn happens.
8863 So we just leave them alone.
8865 if (insn[0] == 0x0F && insn[1] == 0xE7) {
8866 modrm = getIByte(delta+2);
8867 if (sz == 4 && !epartIsReg(modrm)) {
8868 /* do_MMX_preamble(); Intel docs don't specify this */
8869 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8870 storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) );
8871 DIP("movntq %s,%s\n", dis_buf,
8872 nameMMXReg(gregOfRM(modrm)));
8873 delta += 2+alen;
8874 goto decode_success;
8876 /* else fall through */
8879 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8880 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
8881 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE0) {
8882 do_MMX_preamble();
8883 delta = dis_MMXop_regmem_to_reg (
8884 sorb, delta+2, insn[1], "pavgb", False );
8885 goto decode_success;
8888 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8889 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
8890 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE3) {
8891 do_MMX_preamble();
8892 delta = dis_MMXop_regmem_to_reg (
8893 sorb, delta+2, insn[1], "pavgw", False );
8894 goto decode_success;
8897 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8898 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
8899 zero-extend of it in ireg(G). */
8900 if (insn[0] == 0x0F && insn[1] == 0xC5) {
8901 modrm = insn[2];
8902 if (sz == 4 && epartIsReg(modrm)) {
8903 IRTemp sV = newTemp(Ity_I64);
8904 t5 = newTemp(Ity_I16);
8905 do_MMX_preamble();
8906 assign(sV, getMMXReg(eregOfRM(modrm)));
8907 breakup64to16s( sV, &t3, &t2, &t1, &t0 );
8908 switch (insn[3] & 3) {
8909 case 0: assign(t5, mkexpr(t0)); break;
8910 case 1: assign(t5, mkexpr(t1)); break;
8911 case 2: assign(t5, mkexpr(t2)); break;
8912 case 3: assign(t5, mkexpr(t3)); break;
8913 default: vassert(0); /*NOTREACHED*/
8915 putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t5)));
8916 DIP("pextrw $%d,%s,%s\n",
8917 (Int)insn[3], nameMMXReg(eregOfRM(modrm)),
8918 nameIReg(4,gregOfRM(modrm)));
8919 delta += 4;
8920 goto decode_success;
8922 /* else fall through */
8925 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8926 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
8927 put it into the specified lane of mmx(G). */
8928 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC4) {
8929 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
8930 mmx reg. t4 is the new lane value. t5 is the original
8931 mmx value. t6 is the new mmx value. */
8932 Int lane;
8933 t4 = newTemp(Ity_I16);
8934 t5 = newTemp(Ity_I64);
8935 t6 = newTemp(Ity_I64);
8936 modrm = insn[2];
8937 do_MMX_preamble();
8939 assign(t5, getMMXReg(gregOfRM(modrm)));
8940 breakup64to16s( t5, &t3, &t2, &t1, &t0 );
8942 if (epartIsReg(modrm)) {
8943 assign(t4, getIReg(2, eregOfRM(modrm)));
8944 delta += 3+1;
8945 lane = insn[3+1-1];
8946 DIP("pinsrw $%d,%s,%s\n", lane,
8947 nameIReg(2,eregOfRM(modrm)),
8948 nameMMXReg(gregOfRM(modrm)));
8949 } else {
8950 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8951 delta += 3+alen;
8952 lane = insn[3+alen-1];
8953 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
8954 DIP("pinsrw $%d,%s,%s\n", lane,
8955 dis_buf,
8956 nameMMXReg(gregOfRM(modrm)));
8959 switch (lane & 3) {
8960 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break;
8961 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break;
8962 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break;
8963 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break;
8964 default: vassert(0); /*NOTREACHED*/
8966 putMMXReg(gregOfRM(modrm), mkexpr(t6));
8967 goto decode_success;
8970 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8971 /* 0F EE = PMAXSW -- 16x4 signed max */
8972 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEE) {
8973 do_MMX_preamble();
8974 delta = dis_MMXop_regmem_to_reg (
8975 sorb, delta+2, insn[1], "pmaxsw", False );
8976 goto decode_success;
8979 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8980 /* 0F DE = PMAXUB -- 8x8 unsigned max */
8981 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDE) {
8982 do_MMX_preamble();
8983 delta = dis_MMXop_regmem_to_reg (
8984 sorb, delta+2, insn[1], "pmaxub", False );
8985 goto decode_success;
8988 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8989 /* 0F EA = PMINSW -- 16x4 signed min */
8990 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEA) {
8991 do_MMX_preamble();
8992 delta = dis_MMXop_regmem_to_reg (
8993 sorb, delta+2, insn[1], "pminsw", False );
8994 goto decode_success;
8997 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8998 /* 0F DA = PMINUB -- 8x8 unsigned min */
8999 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDA) {
9000 do_MMX_preamble();
9001 delta = dis_MMXop_regmem_to_reg (
9002 sorb, delta+2, insn[1], "pminub", False );
9003 goto decode_success;
9006 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9007 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
9008 mmx(E), turn them into a byte, and put zero-extend of it in
9009 ireg(G). */
9010 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD7) {
9011 modrm = insn[2];
9012 if (epartIsReg(modrm)) {
9013 do_MMX_preamble();
9014 t0 = newTemp(Ity_I64);
9015 t1 = newTemp(Ity_I32);
9016 assign(t0, getMMXReg(eregOfRM(modrm)));
9017 assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0))));
9018 putIReg(4, gregOfRM(modrm), mkexpr(t1));
9019 DIP("pmovmskb %s,%s\n", nameMMXReg(eregOfRM(modrm)),
9020 nameIReg(4,gregOfRM(modrm)));
9021 delta += 3;
9022 goto decode_success;
9024 /* else fall through */
9027 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9028 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
9029 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE4) {
9030 do_MMX_preamble();
9031 delta = dis_MMXop_regmem_to_reg (
9032 sorb, delta+2, insn[1], "pmuluh", False );
9033 goto decode_success;
9036 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
9037 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
9038 /* 0F 18 /2 = PREFETCH1 */
9039 /* 0F 18 /3 = PREFETCH2 */
9040 if (insn[0] == 0x0F && insn[1] == 0x18
9041 && !epartIsReg(insn[2])
9042 && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 3) {
9043 const HChar* hintstr = "??";
9045 modrm = getIByte(delta+2);
9046 vassert(!epartIsReg(modrm));
9048 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9049 delta += 2+alen;
9051 switch (gregOfRM(modrm)) {
9052 case 0: hintstr = "nta"; break;
9053 case 1: hintstr = "t0"; break;
9054 case 2: hintstr = "t1"; break;
9055 case 3: hintstr = "t2"; break;
9056 default: vassert(0); /*NOTREACHED*/
9059 DIP("prefetch%s %s\n", hintstr, dis_buf);
9060 goto decode_success;
9063 /* 0F 0D /0 = PREFETCH m8 -- 3DNow! prefetch */
9064 /* 0F 0D /1 = PREFETCHW m8 -- ditto, with some other hint */
9065 if (insn[0] == 0x0F && insn[1] == 0x0D
9066 && !epartIsReg(insn[2])
9067 && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 1) {
9068 const HChar* hintstr = "??";
9070 modrm = getIByte(delta+2);
9071 vassert(!epartIsReg(modrm));
9073 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9074 delta += 2+alen;
9076 switch (gregOfRM(modrm)) {
9077 case 0: hintstr = ""; break;
9078 case 1: hintstr = "w"; break;
9079 default: vassert(0); /*NOTREACHED*/
9082 DIP("prefetch%s %s\n", hintstr, dis_buf);
9083 goto decode_success;
9086 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9087 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
9088 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF6) {
9089 do_MMX_preamble();
9090 delta = dis_MMXop_regmem_to_reg (
9091 sorb, delta+2, insn[1], "psadbw", False );
9092 goto decode_success;
9095 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9096 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
9097 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x70) {
9098 Int order;
9099 IRTemp sV, dV, s3, s2, s1, s0;
9100 s3 = s2 = s1 = s0 = IRTemp_INVALID;
9101 sV = newTemp(Ity_I64);
9102 dV = newTemp(Ity_I64);
9103 do_MMX_preamble();
9104 modrm = insn[2];
9105 if (epartIsReg(modrm)) {
9106 assign( sV, getMMXReg(eregOfRM(modrm)) );
9107 order = (Int)insn[3];
9108 delta += 2+2;
9109 DIP("pshufw $%d,%s,%s\n", order,
9110 nameMMXReg(eregOfRM(modrm)),
9111 nameMMXReg(gregOfRM(modrm)));
9112 } else {
9113 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9114 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
9115 order = (Int)insn[2+alen];
9116 delta += 3+alen;
9117 DIP("pshufw $%d,%s,%s\n", order,
9118 dis_buf,
9119 nameMMXReg(gregOfRM(modrm)));
9121 breakup64to16s( sV, &s3, &s2, &s1, &s0 );
9123 # define SEL(n) \
9124 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
9125 assign(dV,
9126 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
9127 SEL((order>>2)&3), SEL((order>>0)&3) )
9129 putMMXReg(gregOfRM(modrm), mkexpr(dV));
9130 # undef SEL
9131 goto decode_success;
9134 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
9135 if (insn[0] == 0x0F && insn[1] == 0xAE
9136 && epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) {
9137 vassert(sz == 4);
9138 delta += 3;
9139 /* Insert a memory fence. It's sometimes important that these
9140 are carried through to the generated code. */
9141 stmt( IRStmt_MBE(Imbe_Fence) );
9142 DIP("sfence\n");
9143 goto decode_success;
9146 /* End of mmxext sse1 subset. No more sse parsing for mmxext only arches. */
9147 if (archinfo->hwcaps == VEX_HWCAPS_X86_MMXEXT/*integer only sse1 subset*/)
9148 goto after_sse_decoders;
9151 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
9152 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5F) {
9153 delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4 );
9154 goto decode_success;
9157 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
9158 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5F) {
9159 vassert(sz == 4);
9160 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "maxss", Iop_Max32F0x4 );
9161 goto decode_success;
9164 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
9165 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5D) {
9166 delta = dis_SSE_E_to_G_all( sorb, delta+2, "minps", Iop_Min32Fx4 );
9167 goto decode_success;
9170 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
9171 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5D) {
9172 vassert(sz == 4);
9173 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "minss", Iop_Min32F0x4 );
9174 goto decode_success;
9177 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
9178 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
9179 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) {
9180 modrm = getIByte(delta+2);
9181 if (epartIsReg(modrm)) {
9182 putXMMReg( gregOfRM(modrm),
9183 getXMMReg( eregOfRM(modrm) ));
9184 DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9185 nameXMMReg(gregOfRM(modrm)));
9186 delta += 2+1;
9187 } else {
9188 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9189 if (insn[1] == 0x28/*movaps*/)
9190 gen_SEGV_if_not_16_aligned( addr );
9191 putXMMReg( gregOfRM(modrm),
9192 loadLE(Ity_V128, mkexpr(addr)) );
9193 DIP("mov[ua]ps %s,%s\n", dis_buf,
9194 nameXMMReg(gregOfRM(modrm)));
9195 delta += 2+alen;
9197 goto decode_success;
9200 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
9201 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
9202 if (sz == 4 && insn[0] == 0x0F
9203 && (insn[1] == 0x29 || insn[1] == 0x11)) {
9204 modrm = getIByte(delta+2);
9205 if (epartIsReg(modrm)) {
9206 /* fall through; awaiting test case */
9207 } else {
9208 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9209 if (insn[1] == 0x29/*movaps*/)
9210 gen_SEGV_if_not_16_aligned( addr );
9211 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
9212 DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm)),
9213 dis_buf );
9214 delta += 2+alen;
9215 goto decode_success;
9219 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
9220 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
9221 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x16) {
9222 modrm = getIByte(delta+2);
9223 if (epartIsReg(modrm)) {
9224 delta += 2+1;
9225 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
9226 getXMMRegLane64( eregOfRM(modrm), 0 ) );
9227 DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9228 nameXMMReg(gregOfRM(modrm)));
9229 } else {
9230 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9231 delta += 2+alen;
9232 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
9233 loadLE(Ity_I64, mkexpr(addr)) );
9234 DIP("movhps %s,%s\n", dis_buf,
9235 nameXMMReg( gregOfRM(modrm) ));
9237 goto decode_success;
9240 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
9241 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x17) {
9242 if (!epartIsReg(insn[2])) {
9243 delta += 2;
9244 addr = disAMode ( &alen, sorb, delta, dis_buf );
9245 delta += alen;
9246 storeLE( mkexpr(addr),
9247 getXMMRegLane64( gregOfRM(insn[2]),
9248 1/*upper lane*/ ) );
9249 DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ),
9250 dis_buf);
9251 goto decode_success;
9253 /* else fall through */
9256 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
9257 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
9258 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x12) {
9259 modrm = getIByte(delta+2);
9260 if (epartIsReg(modrm)) {
9261 delta += 2+1;
9262 putXMMRegLane64( gregOfRM(modrm),
9263 0/*lower lane*/,
9264 getXMMRegLane64( eregOfRM(modrm), 1 ));
9265 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm)),
9266 nameXMMReg(gregOfRM(modrm)));
9267 } else {
9268 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9269 delta += 2+alen;
9270 putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/,
9271 loadLE(Ity_I64, mkexpr(addr)) );
9272 DIP("movlps %s, %s\n",
9273 dis_buf, nameXMMReg( gregOfRM(modrm) ));
9275 goto decode_success;
9278 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
9279 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x13) {
9280 if (!epartIsReg(insn[2])) {
9281 delta += 2;
9282 addr = disAMode ( &alen, sorb, delta, dis_buf );
9283 delta += alen;
9284 storeLE( mkexpr(addr),
9285 getXMMRegLane64( gregOfRM(insn[2]),
9286 0/*lower lane*/ ) );
9287 DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ),
9288 dis_buf);
9289 goto decode_success;
9291 /* else fall through */
9294 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
9295 to 4 lowest bits of ireg(G) */
9296 if (insn[0] == 0x0F && insn[1] == 0x50) {
9297 modrm = getIByte(delta+2);
9298 if (sz == 4 && epartIsReg(modrm)) {
9299 Int src;
9300 t0 = newTemp(Ity_I32);
9301 t1 = newTemp(Ity_I32);
9302 t2 = newTemp(Ity_I32);
9303 t3 = newTemp(Ity_I32);
9304 delta += 2+1;
9305 src = eregOfRM(modrm);
9306 assign( t0, binop( Iop_And32,
9307 binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)),
9308 mkU32(1) ));
9309 assign( t1, binop( Iop_And32,
9310 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)),
9311 mkU32(2) ));
9312 assign( t2, binop( Iop_And32,
9313 binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)),
9314 mkU32(4) ));
9315 assign( t3, binop( Iop_And32,
9316 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)),
9317 mkU32(8) ));
9318 putIReg(4, gregOfRM(modrm),
9319 binop(Iop_Or32,
9320 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
9321 binop(Iop_Or32, mkexpr(t2), mkexpr(t3))
9324 DIP("movmskps %s,%s\n", nameXMMReg(src),
9325 nameIReg(4, gregOfRM(modrm)));
9326 goto decode_success;
9328 /* else fall through */
9331 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
9332 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
9333 if (insn[0] == 0x0F && insn[1] == 0x2B) {
9334 modrm = getIByte(delta+2);
9335 if (!epartIsReg(modrm)) {
9336 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9337 gen_SEGV_if_not_16_aligned( addr );
9338 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
9339 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
9340 dis_buf,
9341 nameXMMReg(gregOfRM(modrm)));
9342 delta += 2+alen;
9343 goto decode_success;
9345 /* else fall through */
9348 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
9349 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
9350 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x10) {
9351 vassert(sz == 4);
9352 modrm = getIByte(delta+3);
9353 if (epartIsReg(modrm)) {
9354 putXMMRegLane32( gregOfRM(modrm), 0,
9355 getXMMRegLane32( eregOfRM(modrm), 0 ));
9356 DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9357 nameXMMReg(gregOfRM(modrm)));
9358 delta += 3+1;
9359 } else {
9360 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9361 /* zero bits 127:64 */
9362 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
9363 /* zero bits 63:32 */
9364 putXMMRegLane32( gregOfRM(modrm), 1, mkU32(0) );
9365 /* write bits 31:0 */
9366 putXMMRegLane32( gregOfRM(modrm), 0,
9367 loadLE(Ity_I32, mkexpr(addr)) );
9368 DIP("movss %s,%s\n", dis_buf,
9369 nameXMMReg(gregOfRM(modrm)));
9370 delta += 3+alen;
9372 goto decode_success;
9375 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
9376 or lo 1/4 xmm). */
9377 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x11) {
9378 vassert(sz == 4);
9379 modrm = getIByte(delta+3);
9380 if (epartIsReg(modrm)) {
9381 /* fall through, we don't yet have a test case */
9382 } else {
9383 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9384 storeLE( mkexpr(addr),
9385 getXMMRegLane32(gregOfRM(modrm), 0) );
9386 DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm)),
9387 dis_buf);
9388 delta += 3+alen;
9389 goto decode_success;
9393 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
9394 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x59) {
9395 delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulps", Iop_Mul32Fx4 );
9396 goto decode_success;
9399 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
9400 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x59) {
9401 vassert(sz == 4);
9402 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "mulss", Iop_Mul32F0x4 );
9403 goto decode_success;
9406 /* 0F 56 = ORPS -- G = G and E */
9407 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x56) {
9408 delta = dis_SSE_E_to_G_all( sorb, delta+2, "orps", Iop_OrV128 );
9409 goto decode_success;
9412 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
9413 if (insn[0] == 0x0F && insn[1] == 0x53) {
9414 vassert(sz == 4);
9415 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
9416 "rcpps", Iop_RecipEst32Fx4 );
9417 goto decode_success;
9420 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
9421 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x53) {
9422 vassert(sz == 4);
9423 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3,
9424 "rcpss", Iop_RecipEst32F0x4 );
9425 goto decode_success;
9428 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
9429 if (insn[0] == 0x0F && insn[1] == 0x52) {
9430 vassert(sz == 4);
9431 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
9432 "rsqrtps", Iop_RSqrtEst32Fx4 );
9433 goto decode_success;
9436 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
9437 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x52) {
9438 vassert(sz == 4);
9439 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3,
9440 "rsqrtss", Iop_RSqrtEst32F0x4 );
9441 goto decode_success;
9444 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
9445 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC6) {
9446 Int select;
9447 IRTemp sV, dV;
9448 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
9449 sV = newTemp(Ity_V128);
9450 dV = newTemp(Ity_V128);
9451 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
9452 modrm = insn[2];
9453 assign( dV, getXMMReg(gregOfRM(modrm)) );
9455 if (epartIsReg(modrm)) {
9456 assign( sV, getXMMReg(eregOfRM(modrm)) );
9457 select = (Int)insn[3];
9458 delta += 2+2;
9459 DIP("shufps $%d,%s,%s\n", select,
9460 nameXMMReg(eregOfRM(modrm)),
9461 nameXMMReg(gregOfRM(modrm)));
9462 } else {
9463 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9464 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
9465 select = (Int)insn[2+alen];
9466 delta += 3+alen;
9467 DIP("shufps $%d,%s,%s\n", select,
9468 dis_buf,
9469 nameXMMReg(gregOfRM(modrm)));
9472 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
9473 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
9475 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
9476 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
9478 putXMMReg(
9479 gregOfRM(modrm),
9480 mk128from32s( SELS((select>>6)&3), SELS((select>>4)&3),
9481 SELD((select>>2)&3), SELD((select>>0)&3) )
9484 # undef SELD
9485 # undef SELS
9487 goto decode_success;
9490 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
9491 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x51) {
9492 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
9493 "sqrtps", Iop_Sqrt32Fx4 );
9494 goto decode_success;
9497 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
9498 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x51) {
9499 vassert(sz == 4);
9500 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3,
9501 "sqrtss", Iop_Sqrt32F0x4 );
9502 goto decode_success;
9505 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
9506 if (insn[0] == 0x0F && insn[1] == 0xAE
9507 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 3) {
9508 modrm = getIByte(delta+2);
9509 vassert(sz == 4);
9510 vassert(!epartIsReg(modrm));
9512 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9513 delta += 2+alen;
9515 /* Fake up a native SSE mxcsr word. The only thing it depends
9516 on is SSEROUND[1:0], so call a clean helper to cook it up.
9518 /* UInt x86h_create_mxcsr ( UInt sseround ) */
9519 DIP("stmxcsr %s\n", dis_buf);
9520 storeLE( mkexpr(addr),
9521 mkIRExprCCall(
9522 Ity_I32, 0/*regp*/,
9523 "x86g_create_mxcsr", &x86g_create_mxcsr,
9524 mkIRExprVec_1( get_sse_roundingmode() )
9527 goto decode_success;
9530 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
9531 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5C) {
9532 delta = dis_SSE_E_to_G_all( sorb, delta+2, "subps", Iop_Sub32Fx4 );
9533 goto decode_success;
9536 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
9537 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5C) {
9538 vassert(sz == 4);
9539 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "subss", Iop_Sub32F0x4 );
9540 goto decode_success;
9543 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
9544 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
9545 /* These just appear to be special cases of SHUFPS */
9546 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) {
9547 IRTemp sV, dV;
9548 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
9549 Bool hi = toBool(insn[1] == 0x15);
9550 sV = newTemp(Ity_V128);
9551 dV = newTemp(Ity_V128);
9552 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
9553 modrm = insn[2];
9554 assign( dV, getXMMReg(gregOfRM(modrm)) );
9556 if (epartIsReg(modrm)) {
9557 assign( sV, getXMMReg(eregOfRM(modrm)) );
9558 delta += 2+1;
9559 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
9560 nameXMMReg(eregOfRM(modrm)),
9561 nameXMMReg(gregOfRM(modrm)));
9562 } else {
9563 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9564 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
9565 delta += 2+alen;
9566 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
9567 dis_buf,
9568 nameXMMReg(gregOfRM(modrm)));
9571 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
9572 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
9574 if (hi) {
9575 putXMMReg( gregOfRM(modrm), mk128from32s( s3, d3, s2, d2 ) );
9576 } else {
9577 putXMMReg( gregOfRM(modrm), mk128from32s( s1, d1, s0, d0 ) );
9580 goto decode_success;
9583 /* 0F 57 = XORPS -- G = G and E */
9584 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x57) {
9585 delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorps", Iop_XorV128 );
9586 goto decode_success;
9589 /* ---------------------------------------------------- */
9590 /* --- end of the SSE decoder. --- */
9591 /* ---------------------------------------------------- */
9593 /* ---------------------------------------------------- */
9594 /* --- start of the SSE2 decoder. --- */
9595 /* ---------------------------------------------------- */
9597 /* Skip parts of the decoder which don't apply given the stated
9598 guest subarchitecture. */
9599 if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2))
9600 goto after_sse_decoders; /* no SSE2 capabilities */
9602 insn = &guest_code[delta];
9604 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
9605 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x58) {
9606 delta = dis_SSE_E_to_G_all( sorb, delta+2, "addpd", Iop_Add64Fx2 );
9607 goto decode_success;
9610 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
9611 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x58) {
9612 vassert(sz == 4);
9613 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "addsd", Iop_Add64F0x2 );
9614 goto decode_success;
9617 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
9618 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x55) {
9619 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnpd", Iop_AndV128 );
9620 goto decode_success;
9623 /* 66 0F 54 = ANDPD -- G = G and E */
9624 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x54) {
9625 delta = dis_SSE_E_to_G_all( sorb, delta+2, "andpd", Iop_AndV128 );
9626 goto decode_success;
9629 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
9630 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC2) {
9631 delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmppd", True, 8 );
9632 goto decode_success;
9635 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
9636 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xC2) {
9637 vassert(sz == 4);
9638 delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpsd", False, 8 );
9639 goto decode_success;
9642 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
9643 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
9644 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) {
9645 IRTemp argL = newTemp(Ity_F64);
9646 IRTemp argR = newTemp(Ity_F64);
9647 modrm = getIByte(delta+2);
9648 if (epartIsReg(modrm)) {
9649 assign( argR, getXMMRegLane64F( eregOfRM(modrm), 0/*lowest lane*/ ) );
9650 delta += 2+1;
9651 DIP("[u]comisd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9652 nameXMMReg(gregOfRM(modrm)) );
9653 } else {
9654 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9655 assign( argR, loadLE(Ity_F64, mkexpr(addr)) );
9656 delta += 2+alen;
9657 DIP("[u]comisd %s,%s\n", dis_buf,
9658 nameXMMReg(gregOfRM(modrm)) );
9660 assign( argL, getXMMRegLane64F( gregOfRM(modrm), 0/*lowest lane*/ ) );
9662 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
9663 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
9664 stmt( IRStmt_Put(
9665 OFFB_CC_DEP1,
9666 binop( Iop_And32,
9667 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)),
9668 mkU32(0x45)
9669 )));
9670 /* Set NDEP even though it isn't used. This makes redundant-PUT
9671 elimination of previous stores to this field work better. */
9672 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
9673 goto decode_success;
9676 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
9677 F64 in xmm(G) */
9678 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xE6) {
9679 IRTemp arg64 = newTemp(Ity_I64);
9680 vassert(sz == 4);
9682 modrm = getIByte(delta+3);
9683 if (epartIsReg(modrm)) {
9684 assign( arg64, getXMMRegLane64(eregOfRM(modrm), 0) );
9685 delta += 3+1;
9686 DIP("cvtdq2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9687 nameXMMReg(gregOfRM(modrm)));
9688 } else {
9689 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9690 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
9691 delta += 3+alen;
9692 DIP("cvtdq2pd %s,%s\n", dis_buf,
9693 nameXMMReg(gregOfRM(modrm)) );
9696 putXMMRegLane64F(
9697 gregOfRM(modrm), 0,
9698 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)))
9701 putXMMRegLane64F(
9702 gregOfRM(modrm), 1,
9703 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)))
9706 goto decode_success;
9709 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
9710 xmm(G) */
9711 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5B) {
9712 IRTemp argV = newTemp(Ity_V128);
9713 IRTemp rmode = newTemp(Ity_I32);
9715 modrm = getIByte(delta+2);
9716 if (epartIsReg(modrm)) {
9717 assign( argV, getXMMReg(eregOfRM(modrm)) );
9718 delta += 2+1;
9719 DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9720 nameXMMReg(gregOfRM(modrm)));
9721 } else {
9722 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9723 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9724 delta += 2+alen;
9725 DIP("cvtdq2ps %s,%s\n", dis_buf,
9726 nameXMMReg(gregOfRM(modrm)) );
9729 assign( rmode, get_sse_roundingmode() );
9730 breakup128to32s( argV, &t3, &t2, &t1, &t0 );
9732 # define CVT(_t) binop( Iop_F64toF32, \
9733 mkexpr(rmode), \
9734 unop(Iop_I32StoF64,mkexpr(_t)))
9736 putXMMRegLane32F( gregOfRM(modrm), 3, CVT(t3) );
9737 putXMMRegLane32F( gregOfRM(modrm), 2, CVT(t2) );
9738 putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) );
9739 putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) );
9741 # undef CVT
9743 goto decode_success;
9746 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
9747 lo half xmm(G), and zero upper half */
9748 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xE6) {
9749 IRTemp argV = newTemp(Ity_V128);
9750 IRTemp rmode = newTemp(Ity_I32);
9751 vassert(sz == 4);
9753 modrm = getIByte(delta+3);
9754 if (epartIsReg(modrm)) {
9755 assign( argV, getXMMReg(eregOfRM(modrm)) );
9756 delta += 3+1;
9757 DIP("cvtpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9758 nameXMMReg(gregOfRM(modrm)));
9759 } else {
9760 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9761 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9762 delta += 3+alen;
9763 DIP("cvtpd2dq %s,%s\n", dis_buf,
9764 nameXMMReg(gregOfRM(modrm)) );
9767 assign( rmode, get_sse_roundingmode() );
9768 t0 = newTemp(Ity_F64);
9769 t1 = newTemp(Ity_F64);
9770 assign( t0, unop(Iop_ReinterpI64asF64,
9771 unop(Iop_V128to64, mkexpr(argV))) );
9772 assign( t1, unop(Iop_ReinterpI64asF64,
9773 unop(Iop_V128HIto64, mkexpr(argV))) );
9775 # define CVT(_t) binop( Iop_F64toI32S, \
9776 mkexpr(rmode), \
9777 mkexpr(_t) )
9779 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
9780 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
9781 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
9782 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
9784 # undef CVT
9786 goto decode_success;
9789 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
9790 I32 in mmx, according to prevailing SSE rounding mode */
9791 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
9792 I32 in mmx, rounding towards zero */
9793 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) {
9794 IRTemp dst64 = newTemp(Ity_I64);
9795 IRTemp rmode = newTemp(Ity_I32);
9796 IRTemp f64lo = newTemp(Ity_F64);
9797 IRTemp f64hi = newTemp(Ity_F64);
9798 Bool r2zero = toBool(insn[1] == 0x2C);
9800 do_MMX_preamble();
9801 modrm = getIByte(delta+2);
9803 if (epartIsReg(modrm)) {
9804 delta += 2+1;
9805 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
9806 assign(f64hi, getXMMRegLane64F(eregOfRM(modrm), 1));
9807 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "",
9808 nameXMMReg(eregOfRM(modrm)),
9809 nameMMXReg(gregOfRM(modrm)));
9810 } else {
9811 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9812 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
9813 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add32,
9814 mkexpr(addr),
9815 mkU32(8) )));
9816 delta += 2+alen;
9817 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "",
9818 dis_buf,
9819 nameMMXReg(gregOfRM(modrm)));
9822 if (r2zero) {
9823 assign(rmode, mkU32((UInt)Irrm_ZERO) );
9824 } else {
9825 assign( rmode, get_sse_roundingmode() );
9828 assign(
9829 dst64,
9830 binop( Iop_32HLto64,
9831 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ),
9832 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) )
9836 putMMXReg(gregOfRM(modrm), mkexpr(dst64));
9837 goto decode_success;
9840 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
9841 lo half xmm(G), and zero upper half */
9842 /* Note, this is practically identical to CVTPD2DQ. It would have
9843 been nicer to merge them together, but the insn[] offsets differ
9844 by one. */
9845 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5A) {
9846 IRTemp argV = newTemp(Ity_V128);
9847 IRTemp rmode = newTemp(Ity_I32);
9849 modrm = getIByte(delta+2);
9850 if (epartIsReg(modrm)) {
9851 assign( argV, getXMMReg(eregOfRM(modrm)) );
9852 delta += 2+1;
9853 DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9854 nameXMMReg(gregOfRM(modrm)));
9855 } else {
9856 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9857 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9858 delta += 2+alen;
9859 DIP("cvtpd2ps %s,%s\n", dis_buf,
9860 nameXMMReg(gregOfRM(modrm)) );
9863 assign( rmode, get_sse_roundingmode() );
9864 t0 = newTemp(Ity_F64);
9865 t1 = newTemp(Ity_F64);
9866 assign( t0, unop(Iop_ReinterpI64asF64,
9867 unop(Iop_V128to64, mkexpr(argV))) );
9868 assign( t1, unop(Iop_ReinterpI64asF64,
9869 unop(Iop_V128HIto64, mkexpr(argV))) );
9871 # define CVT(_t) binop( Iop_F64toF32, \
9872 mkexpr(rmode), \
9873 mkexpr(_t) )
9875 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
9876 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
9877 putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) );
9878 putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) );
9880 # undef CVT
9882 goto decode_success;
9885 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
9886 xmm(G) */
9887 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x2A) {
9888 IRTemp arg64 = newTemp(Ity_I64);
9890 modrm = getIByte(delta+2);
9891 if (epartIsReg(modrm)) {
9892 /* Only switch to MMX mode if the source is a MMX register.
9893 This is inconsistent with all other instructions which
9894 convert between XMM and (M64 or MMX), which always switch
9895 to MMX mode even if 64-bit operand is M64 and not MMX. At
9896 least, that's what the Intel docs seem to me to say.
9897 Fixes #210264. */
9898 do_MMX_preamble();
9899 assign( arg64, getMMXReg(eregOfRM(modrm)) );
9900 delta += 2+1;
9901 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregOfRM(modrm)),
9902 nameXMMReg(gregOfRM(modrm)));
9903 } else {
9904 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9905 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
9906 delta += 2+alen;
9907 DIP("cvtpi2pd %s,%s\n", dis_buf,
9908 nameXMMReg(gregOfRM(modrm)) );
9911 putXMMRegLane64F(
9912 gregOfRM(modrm), 0,
9913 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) )
9916 putXMMRegLane64F(
9917 gregOfRM(modrm), 1,
9918 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) )
9921 goto decode_success;
9924 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
9925 xmm(G) */
9926 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5B) {
9927 IRTemp argV = newTemp(Ity_V128);
9928 IRTemp rmode = newTemp(Ity_I32);
9930 modrm = getIByte(delta+2);
9931 if (epartIsReg(modrm)) {
9932 assign( argV, getXMMReg(eregOfRM(modrm)) );
9933 delta += 2+1;
9934 DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9935 nameXMMReg(gregOfRM(modrm)));
9936 } else {
9937 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9938 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9939 delta += 2+alen;
9940 DIP("cvtps2dq %s,%s\n", dis_buf,
9941 nameXMMReg(gregOfRM(modrm)) );
9944 assign( rmode, get_sse_roundingmode() );
9945 breakup128to32s( argV, &t3, &t2, &t1, &t0 );
9947 /* This is less than ideal. If it turns out to be a performance
9948 bottleneck it can be improved. */
9949 # define CVT(_t) \
9950 binop( Iop_F64toI32S, \
9951 mkexpr(rmode), \
9952 unop( Iop_F32toF64, \
9953 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
9955 putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) );
9956 putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) );
9957 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
9958 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
9960 # undef CVT
9962 goto decode_success;
9965 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
9966 F64 in xmm(G). */
9967 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5A) {
9968 IRTemp f32lo = newTemp(Ity_F32);
9969 IRTemp f32hi = newTemp(Ity_F32);
9971 modrm = getIByte(delta+2);
9972 if (epartIsReg(modrm)) {
9973 assign( f32lo, getXMMRegLane32F(eregOfRM(modrm), 0) );
9974 assign( f32hi, getXMMRegLane32F(eregOfRM(modrm), 1) );
9975 delta += 2+1;
9976 DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9977 nameXMMReg(gregOfRM(modrm)));
9978 } else {
9979 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9980 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) );
9981 assign( f32hi, loadLE(Ity_F32,
9982 binop(Iop_Add32,mkexpr(addr),mkU32(4))) );
9983 delta += 2+alen;
9984 DIP("cvtps2pd %s,%s\n", dis_buf,
9985 nameXMMReg(gregOfRM(modrm)) );
9988 putXMMRegLane64F( gregOfRM(modrm), 1,
9989 unop(Iop_F32toF64, mkexpr(f32hi)) );
9990 putXMMRegLane64F( gregOfRM(modrm), 0,
9991 unop(Iop_F32toF64, mkexpr(f32lo)) );
9993 goto decode_success;
9996 /* F2 0F 2D = CVTSD2SI -- convert F64 in mem/low half xmm to
9997 I32 in ireg, according to prevailing SSE rounding mode */
9998 /* F2 0F 2C = CVTTSD2SI -- convert F64 in mem/low half xmm to
9999 I32 in ireg, rounding towards zero */
10000 if (insn[0] == 0xF2 && insn[1] == 0x0F
10001 && (insn[2] == 0x2D || insn[2] == 0x2C)) {
10002 IRTemp rmode = newTemp(Ity_I32);
10003 IRTemp f64lo = newTemp(Ity_F64);
10004 Bool r2zero = toBool(insn[2] == 0x2C);
10005 vassert(sz == 4);
10007 modrm = getIByte(delta+3);
10008 if (epartIsReg(modrm)) {
10009 delta += 3+1;
10010 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
10011 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
10012 nameXMMReg(eregOfRM(modrm)),
10013 nameIReg(4, gregOfRM(modrm)));
10014 } else {
10015 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10016 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
10017 delta += 3+alen;
10018 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
10019 dis_buf,
10020 nameIReg(4, gregOfRM(modrm)));
10023 if (r2zero) {
10024 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10025 } else {
10026 assign( rmode, get_sse_roundingmode() );
10029 putIReg(4, gregOfRM(modrm),
10030 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) );
10032 goto decode_success;
10035 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
10036 low 1/4 xmm(G), according to prevailing SSE rounding mode */
10037 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5A) {
10038 IRTemp rmode = newTemp(Ity_I32);
10039 IRTemp f64lo = newTemp(Ity_F64);
10040 vassert(sz == 4);
10042 modrm = getIByte(delta+3);
10043 if (epartIsReg(modrm)) {
10044 delta += 3+1;
10045 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
10046 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10047 nameXMMReg(gregOfRM(modrm)));
10048 } else {
10049 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10050 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
10051 delta += 3+alen;
10052 DIP("cvtsd2ss %s,%s\n", dis_buf,
10053 nameXMMReg(gregOfRM(modrm)));
10056 assign( rmode, get_sse_roundingmode() );
10057 putXMMRegLane32F(
10058 gregOfRM(modrm), 0,
10059 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) )
10062 goto decode_success;
10065 /* F2 0F 2A = CVTSI2SD -- convert I32 in mem/ireg to F64 in low
10066 half xmm */
10067 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x2A) {
10068 IRTemp arg32 = newTemp(Ity_I32);
10069 vassert(sz == 4);
10071 modrm = getIByte(delta+3);
10072 if (epartIsReg(modrm)) {
10073 assign( arg32, getIReg(4, eregOfRM(modrm)) );
10074 delta += 3+1;
10075 DIP("cvtsi2sd %s,%s\n", nameIReg(4, eregOfRM(modrm)),
10076 nameXMMReg(gregOfRM(modrm)));
10077 } else {
10078 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10079 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
10080 delta += 3+alen;
10081 DIP("cvtsi2sd %s,%s\n", dis_buf,
10082 nameXMMReg(gregOfRM(modrm)) );
10085 putXMMRegLane64F(
10086 gregOfRM(modrm), 0,
10087 unop(Iop_I32StoF64, mkexpr(arg32)) );
10089 goto decode_success;
10092 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
10093 low half xmm(G) */
10094 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5A) {
10095 IRTemp f32lo = newTemp(Ity_F32);
10096 vassert(sz == 4);
10098 modrm = getIByte(delta+3);
10099 if (epartIsReg(modrm)) {
10100 delta += 3+1;
10101 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
10102 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10103 nameXMMReg(gregOfRM(modrm)));
10104 } else {
10105 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10106 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
10107 delta += 3+alen;
10108 DIP("cvtss2sd %s,%s\n", dis_buf,
10109 nameXMMReg(gregOfRM(modrm)));
10112 putXMMRegLane64F( gregOfRM(modrm), 0,
10113 unop( Iop_F32toF64, mkexpr(f32lo) ) );
10115 goto decode_success;
10118 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
10119 lo half xmm(G), and zero upper half, rounding towards zero */
10120 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE6) {
10121 IRTemp argV = newTemp(Ity_V128);
10122 IRTemp rmode = newTemp(Ity_I32);
10124 modrm = getIByte(delta+2);
10125 if (epartIsReg(modrm)) {
10126 assign( argV, getXMMReg(eregOfRM(modrm)) );
10127 delta += 2+1;
10128 DIP("cvttpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10129 nameXMMReg(gregOfRM(modrm)));
10130 } else {
10131 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10132 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10133 delta += 2+alen;
10134 DIP("cvttpd2dq %s,%s\n", dis_buf,
10135 nameXMMReg(gregOfRM(modrm)) );
10138 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10140 t0 = newTemp(Ity_F64);
10141 t1 = newTemp(Ity_F64);
10142 assign( t0, unop(Iop_ReinterpI64asF64,
10143 unop(Iop_V128to64, mkexpr(argV))) );
10144 assign( t1, unop(Iop_ReinterpI64asF64,
10145 unop(Iop_V128HIto64, mkexpr(argV))) );
10147 # define CVT(_t) binop( Iop_F64toI32S, \
10148 mkexpr(rmode), \
10149 mkexpr(_t) )
10151 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
10152 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
10153 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
10154 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
10156 # undef CVT
10158 goto decode_success;
10161 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
10162 xmm(G), rounding towards zero */
10163 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5B) {
10164 IRTemp argV = newTemp(Ity_V128);
10165 IRTemp rmode = newTemp(Ity_I32);
10166 vassert(sz == 4);
10168 modrm = getIByte(delta+3);
10169 if (epartIsReg(modrm)) {
10170 assign( argV, getXMMReg(eregOfRM(modrm)) );
10171 delta += 3+1;
10172 DIP("cvttps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10173 nameXMMReg(gregOfRM(modrm)));
10174 } else {
10175 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10176 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10177 delta += 3+alen;
10178 DIP("cvttps2dq %s,%s\n", dis_buf,
10179 nameXMMReg(gregOfRM(modrm)) );
10182 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10183 breakup128to32s( argV, &t3, &t2, &t1, &t0 );
10185 /* This is less than ideal. If it turns out to be a performance
10186 bottleneck it can be improved. */
10187 # define CVT(_t) \
10188 binop( Iop_F64toI32S, \
10189 mkexpr(rmode), \
10190 unop( Iop_F32toF64, \
10191 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
10193 putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) );
10194 putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) );
10195 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
10196 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
10198 # undef CVT
10200 goto decode_success;
10203 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
10204 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5E) {
10205 delta = dis_SSE_E_to_G_all( sorb, delta+2, "divpd", Iop_Div64Fx2 );
10206 goto decode_success;
10209 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
10210 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5E) {
10211 vassert(sz == 4);
10212 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "divsd", Iop_Div64F0x2 );
10213 goto decode_success;
10216 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
10217 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
10218 if (insn[0] == 0x0F && insn[1] == 0xAE
10219 && epartIsReg(insn[2])
10220 && (gregOfRM(insn[2]) == 5 || gregOfRM(insn[2]) == 6)) {
10221 vassert(sz == 4);
10222 delta += 3;
10223 /* Insert a memory fence. It's sometimes important that these
10224 are carried through to the generated code. */
10225 stmt( IRStmt_MBE(Imbe_Fence) );
10226 DIP("%sfence\n", gregOfRM(insn[2])==5 ? "l" : "m");
10227 goto decode_success;
10230 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
10231 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5F) {
10232 delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxpd", Iop_Max64Fx2 );
10233 goto decode_success;
10236 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
10237 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5F) {
10238 vassert(sz == 4);
10239 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "maxsd", Iop_Max64F0x2 );
10240 goto decode_success;
10243 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
10244 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5D) {
10245 delta = dis_SSE_E_to_G_all( sorb, delta+2, "minpd", Iop_Min64Fx2 );
10246 goto decode_success;
10249 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
10250 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5D) {
10251 vassert(sz == 4);
10252 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "minsd", Iop_Min64F0x2 );
10253 goto decode_success;
10256 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
10257 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
10258 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
10259 if (sz == 2 && insn[0] == 0x0F
10260 && (insn[1] == 0x28 || insn[1] == 0x10 || insn[1] == 0x6F)) {
10261 const HChar* wot = insn[1]==0x28 ? "apd" :
10262 insn[1]==0x10 ? "upd" : "dqa";
10263 modrm = getIByte(delta+2);
10264 if (epartIsReg(modrm)) {
10265 putXMMReg( gregOfRM(modrm),
10266 getXMMReg( eregOfRM(modrm) ));
10267 DIP("mov%s %s,%s\n", wot, nameXMMReg(eregOfRM(modrm)),
10268 nameXMMReg(gregOfRM(modrm)));
10269 delta += 2+1;
10270 } else {
10271 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10272 if (insn[1] == 0x28/*movapd*/ || insn[1] == 0x6F/*movdqa*/)
10273 gen_SEGV_if_not_16_aligned( addr );
10274 putXMMReg( gregOfRM(modrm),
10275 loadLE(Ity_V128, mkexpr(addr)) );
10276 DIP("mov%s %s,%s\n", wot, dis_buf,
10277 nameXMMReg(gregOfRM(modrm)));
10278 delta += 2+alen;
10280 goto decode_success;
10283 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
10284 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
10285 if (sz == 2 && insn[0] == 0x0F
10286 && (insn[1] == 0x29 || insn[1] == 0x11)) {
10287 const HChar* wot = insn[1]==0x29 ? "apd" : "upd";
10288 modrm = getIByte(delta+2);
10289 if (epartIsReg(modrm)) {
10290 /* fall through; awaiting test case */
10291 } else {
10292 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10293 if (insn[1] == 0x29/*movapd*/)
10294 gen_SEGV_if_not_16_aligned( addr );
10295 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10296 DIP("mov%s %s,%s\n", wot, nameXMMReg(gregOfRM(modrm)),
10297 dis_buf );
10298 delta += 2+alen;
10299 goto decode_success;
10303 /* 66 0F 6E = MOVD from r/m32 to xmm, zeroing high 3/4 of xmm. */
10304 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6E) {
10305 modrm = getIByte(delta+2);
10306 if (epartIsReg(modrm)) {
10307 delta += 2+1;
10308 putXMMReg(
10309 gregOfRM(modrm),
10310 unop( Iop_32UtoV128, getIReg(4, eregOfRM(modrm)) )
10312 DIP("movd %s, %s\n",
10313 nameIReg(4,eregOfRM(modrm)), nameXMMReg(gregOfRM(modrm)));
10314 } else {
10315 addr = disAMode( &alen, sorb, delta+2, dis_buf );
10316 delta += 2+alen;
10317 putXMMReg(
10318 gregOfRM(modrm),
10319 unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) )
10321 DIP("movd %s, %s\n", dis_buf, nameXMMReg(gregOfRM(modrm)));
10323 goto decode_success;
10326 /* 66 0F 7E = MOVD from xmm low 1/4 to r/m32. */
10327 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x7E) {
10328 modrm = getIByte(delta+2);
10329 if (epartIsReg(modrm)) {
10330 delta += 2+1;
10331 putIReg( 4, eregOfRM(modrm),
10332 getXMMRegLane32(gregOfRM(modrm), 0) );
10333 DIP("movd %s, %s\n",
10334 nameXMMReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm)));
10335 } else {
10336 addr = disAMode( &alen, sorb, delta+2, dis_buf );
10337 delta += 2+alen;
10338 storeLE( mkexpr(addr),
10339 getXMMRegLane32(gregOfRM(modrm), 0) );
10340 DIP("movd %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf);
10342 goto decode_success;
10345 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
10346 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x7F) {
10347 modrm = getIByte(delta+2);
10348 if (epartIsReg(modrm)) {
10349 delta += 2+1;
10350 putXMMReg( eregOfRM(modrm),
10351 getXMMReg(gregOfRM(modrm)) );
10352 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)),
10353 nameXMMReg(eregOfRM(modrm)));
10354 } else {
10355 addr = disAMode( &alen, sorb, delta+2, dis_buf );
10356 delta += 2+alen;
10357 gen_SEGV_if_not_16_aligned( addr );
10358 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10359 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf);
10361 goto decode_success;
10364 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
10365 /* Unfortunately can't simply use the MOVDQA case since the
10366 prefix lengths are different (66 vs F3) */
10367 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x6F) {
10368 vassert(sz == 4);
10369 modrm = getIByte(delta+3);
10370 if (epartIsReg(modrm)) {
10371 putXMMReg( gregOfRM(modrm),
10372 getXMMReg( eregOfRM(modrm) ));
10373 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10374 nameXMMReg(gregOfRM(modrm)));
10375 delta += 3+1;
10376 } else {
10377 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10378 putXMMReg( gregOfRM(modrm),
10379 loadLE(Ity_V128, mkexpr(addr)) );
10380 DIP("movdqu %s,%s\n", dis_buf,
10381 nameXMMReg(gregOfRM(modrm)));
10382 delta += 3+alen;
10384 goto decode_success;
10387 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
10388 /* Unfortunately can't simply use the MOVDQA case since the
10389 prefix lengths are different (66 vs F3) */
10390 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7F) {
10391 vassert(sz == 4);
10392 modrm = getIByte(delta+3);
10393 if (epartIsReg(modrm)) {
10394 delta += 3+1;
10395 putXMMReg( eregOfRM(modrm),
10396 getXMMReg(gregOfRM(modrm)) );
10397 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)),
10398 nameXMMReg(eregOfRM(modrm)));
10399 } else {
10400 addr = disAMode( &alen, sorb, delta+3, dis_buf );
10401 delta += 3+alen;
10402 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10403 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf);
10405 goto decode_success;
10408 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
10409 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD6) {
10410 vassert(sz == 4);
10411 modrm = getIByte(delta+3);
10412 if (epartIsReg(modrm)) {
10413 do_MMX_preamble();
10414 putMMXReg( gregOfRM(modrm),
10415 getXMMRegLane64( eregOfRM(modrm), 0 ));
10416 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10417 nameMMXReg(gregOfRM(modrm)));
10418 delta += 3+1;
10419 goto decode_success;
10420 } else {
10421 /* fall through, apparently no mem case for this insn */
10425 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
10426 /* These seems identical to MOVHPS. This instruction encoding is
10427 completely crazy. */
10428 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x16) {
10429 modrm = getIByte(delta+2);
10430 if (epartIsReg(modrm)) {
10431 /* fall through; apparently reg-reg is not possible */
10432 } else {
10433 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10434 delta += 2+alen;
10435 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
10436 loadLE(Ity_I64, mkexpr(addr)) );
10437 DIP("movhpd %s,%s\n", dis_buf,
10438 nameXMMReg( gregOfRM(modrm) ));
10439 goto decode_success;
10443 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
10444 /* Again, this seems identical to MOVHPS. */
10445 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x17) {
10446 if (!epartIsReg(insn[2])) {
10447 delta += 2;
10448 addr = disAMode ( &alen, sorb, delta, dis_buf );
10449 delta += alen;
10450 storeLE( mkexpr(addr),
10451 getXMMRegLane64( gregOfRM(insn[2]),
10452 1/*upper lane*/ ) );
10453 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ),
10454 dis_buf);
10455 goto decode_success;
10457 /* else fall through */
10460 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
10461 /* Identical to MOVLPS ? */
10462 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x12) {
10463 modrm = getIByte(delta+2);
10464 if (epartIsReg(modrm)) {
10465 /* fall through; apparently reg-reg is not possible */
10466 } else {
10467 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10468 delta += 2+alen;
10469 putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/,
10470 loadLE(Ity_I64, mkexpr(addr)) );
10471 DIP("movlpd %s, %s\n",
10472 dis_buf, nameXMMReg( gregOfRM(modrm) ));
10473 goto decode_success;
10477 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
10478 /* Identical to MOVLPS ? */
10479 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x13) {
10480 if (!epartIsReg(insn[2])) {
10481 delta += 2;
10482 addr = disAMode ( &alen, sorb, delta, dis_buf );
10483 delta += alen;
10484 storeLE( mkexpr(addr),
10485 getXMMRegLane64( gregOfRM(insn[2]),
10486 0/*lower lane*/ ) );
10487 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ),
10488 dis_buf);
10489 goto decode_success;
10491 /* else fall through */
10494 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
10495 2 lowest bits of ireg(G) */
10496 if (insn[0] == 0x0F && insn[1] == 0x50) {
10497 modrm = getIByte(delta+2);
10498 if (sz == 2 && epartIsReg(modrm)) {
10499 Int src;
10500 t0 = newTemp(Ity_I32);
10501 t1 = newTemp(Ity_I32);
10502 delta += 2+1;
10503 src = eregOfRM(modrm);
10504 assign( t0, binop( Iop_And32,
10505 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(31)),
10506 mkU32(1) ));
10507 assign( t1, binop( Iop_And32,
10508 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(30)),
10509 mkU32(2) ));
10510 putIReg(4, gregOfRM(modrm),
10511 binop(Iop_Or32, mkexpr(t0), mkexpr(t1))
10513 DIP("movmskpd %s,%s\n", nameXMMReg(src),
10514 nameIReg(4, gregOfRM(modrm)));
10515 goto decode_success;
10517 /* else fall through */
10520 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
10521 if (insn[0] == 0x0F && insn[1] == 0xF7) {
10522 modrm = getIByte(delta+2);
10523 if (sz == 2 && epartIsReg(modrm)) {
10524 IRTemp regD = newTemp(Ity_V128);
10525 IRTemp mask = newTemp(Ity_V128);
10526 IRTemp olddata = newTemp(Ity_V128);
10527 IRTemp newdata = newTemp(Ity_V128);
10528 addr = newTemp(Ity_I32);
10530 assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) ));
10531 assign( regD, getXMMReg( gregOfRM(modrm) ));
10533 /* Unfortunately can't do the obvious thing with SarN8x16
10534 here since that can't be re-emitted as SSE2 code - no such
10535 insn. */
10536 assign(
10537 mask,
10538 binop(Iop_64HLtoV128,
10539 binop(Iop_SarN8x8,
10540 getXMMRegLane64( eregOfRM(modrm), 1 ),
10541 mkU8(7) ),
10542 binop(Iop_SarN8x8,
10543 getXMMRegLane64( eregOfRM(modrm), 0 ),
10544 mkU8(7) ) ));
10545 assign( olddata, loadLE( Ity_V128, mkexpr(addr) ));
10546 assign( newdata,
10547 binop(Iop_OrV128,
10548 binop(Iop_AndV128,
10549 mkexpr(regD),
10550 mkexpr(mask) ),
10551 binop(Iop_AndV128,
10552 mkexpr(olddata),
10553 unop(Iop_NotV128, mkexpr(mask)))) );
10554 storeLE( mkexpr(addr), mkexpr(newdata) );
10556 delta += 2+1;
10557 DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRM(modrm) ),
10558 nameXMMReg( gregOfRM(modrm) ) );
10559 goto decode_success;
10561 /* else fall through */
10564 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
10565 if (insn[0] == 0x0F && insn[1] == 0xE7) {
10566 modrm = getIByte(delta+2);
10567 if (sz == 2 && !epartIsReg(modrm)) {
10568 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10569 gen_SEGV_if_not_16_aligned( addr );
10570 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10571 DIP("movntdq %s,%s\n", dis_buf,
10572 nameXMMReg(gregOfRM(modrm)));
10573 delta += 2+alen;
10574 goto decode_success;
10576 /* else fall through */
10579 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
10580 if (insn[0] == 0x0F && insn[1] == 0xC3) {
10581 vassert(sz == 4);
10582 modrm = getIByte(delta+2);
10583 if (!epartIsReg(modrm)) {
10584 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10585 storeLE( mkexpr(addr), getIReg(4, gregOfRM(modrm)) );
10586 DIP("movnti %s,%s\n", dis_buf,
10587 nameIReg(4, gregOfRM(modrm)));
10588 delta += 2+alen;
10589 goto decode_success;
10591 /* else fall through */
10594 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
10595 or lo half xmm). */
10596 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD6) {
10597 modrm = getIByte(delta+2);
10598 if (epartIsReg(modrm)) {
10599 /* fall through, awaiting test case */
10600 /* dst: lo half copied, hi half zeroed */
10601 } else {
10602 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10603 storeLE( mkexpr(addr),
10604 getXMMRegLane64( gregOfRM(modrm), 0 ));
10605 DIP("movq %s,%s\n", nameXMMReg(gregOfRM(modrm)), dis_buf );
10606 delta += 2+alen;
10607 goto decode_success;
10611 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
10612 hi half). */
10613 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xD6) {
10614 vassert(sz == 4);
10615 modrm = getIByte(delta+3);
10616 if (epartIsReg(modrm)) {
10617 do_MMX_preamble();
10618 putXMMReg( gregOfRM(modrm),
10619 unop(Iop_64UtoV128, getMMXReg( eregOfRM(modrm) )) );
10620 DIP("movq2dq %s,%s\n", nameMMXReg(eregOfRM(modrm)),
10621 nameXMMReg(gregOfRM(modrm)));
10622 delta += 3+1;
10623 goto decode_success;
10624 } else {
10625 /* fall through, apparently no mem case for this insn */
10629 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
10630 G (lo half xmm). Upper half of G is zeroed out. */
10631 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
10632 G (lo half xmm). If E is mem, upper half of G is zeroed out.
10633 If E is reg, upper half of G is unchanged. */
10634 if ((insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x10)
10635 || (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7E)) {
10636 vassert(sz == 4);
10637 modrm = getIByte(delta+3);
10638 if (epartIsReg(modrm)) {
10639 putXMMRegLane64( gregOfRM(modrm), 0,
10640 getXMMRegLane64( eregOfRM(modrm), 0 ));
10641 if (insn[0] == 0xF3/*MOVQ*/) {
10642 /* zero bits 127:64 */
10643 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
10645 DIP("movsd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10646 nameXMMReg(gregOfRM(modrm)));
10647 delta += 3+1;
10648 } else {
10649 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10650 /* zero bits 127:64 */
10651 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
10652 /* write bits 63:0 */
10653 putXMMRegLane64( gregOfRM(modrm), 0,
10654 loadLE(Ity_I64, mkexpr(addr)) );
10655 DIP("movsd %s,%s\n", dis_buf,
10656 nameXMMReg(gregOfRM(modrm)));
10657 delta += 3+alen;
10659 goto decode_success;
10662 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
10663 or lo half xmm). */
10664 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x11) {
10665 vassert(sz == 4);
10666 modrm = getIByte(delta+3);
10667 if (epartIsReg(modrm)) {
10668 putXMMRegLane64( eregOfRM(modrm), 0,
10669 getXMMRegLane64( gregOfRM(modrm), 0 ));
10670 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)),
10671 nameXMMReg(eregOfRM(modrm)));
10672 delta += 3+1;
10673 } else {
10674 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10675 storeLE( mkexpr(addr),
10676 getXMMRegLane64(gregOfRM(modrm), 0) );
10677 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)),
10678 dis_buf);
10679 delta += 3+alen;
10681 goto decode_success;
10684 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
10685 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x59) {
10686 delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulpd", Iop_Mul64Fx2 );
10687 goto decode_success;
10690 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
10691 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x59) {
10692 vassert(sz == 4);
10693 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "mulsd", Iop_Mul64F0x2 );
10694 goto decode_success;
10697 /* 66 0F 56 = ORPD -- G = G and E */
10698 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x56) {
10699 delta = dis_SSE_E_to_G_all( sorb, delta+2, "orpd", Iop_OrV128 );
10700 goto decode_success;
10703 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
10704 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC6) {
10705 Int select;
10706 IRTemp sV = newTemp(Ity_V128);
10707 IRTemp dV = newTemp(Ity_V128);
10708 IRTemp s1 = newTemp(Ity_I64);
10709 IRTemp s0 = newTemp(Ity_I64);
10710 IRTemp d1 = newTemp(Ity_I64);
10711 IRTemp d0 = newTemp(Ity_I64);
10713 modrm = insn[2];
10714 assign( dV, getXMMReg(gregOfRM(modrm)) );
10716 if (epartIsReg(modrm)) {
10717 assign( sV, getXMMReg(eregOfRM(modrm)) );
10718 select = (Int)insn[3];
10719 delta += 2+2;
10720 DIP("shufpd $%d,%s,%s\n", select,
10721 nameXMMReg(eregOfRM(modrm)),
10722 nameXMMReg(gregOfRM(modrm)));
10723 } else {
10724 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10725 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10726 select = (Int)insn[2+alen];
10727 delta += 3+alen;
10728 DIP("shufpd $%d,%s,%s\n", select,
10729 dis_buf,
10730 nameXMMReg(gregOfRM(modrm)));
10733 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
10734 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
10735 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
10736 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
10738 # define SELD(n) mkexpr((n)==0 ? d0 : d1)
10739 # define SELS(n) mkexpr((n)==0 ? s0 : s1)
10741 putXMMReg(
10742 gregOfRM(modrm),
10743 binop(Iop_64HLtoV128, SELS((select>>1)&1), SELD((select>>0)&1) )
10746 # undef SELD
10747 # undef SELS
10749 goto decode_success;
10752 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
10753 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x51) {
10754 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
10755 "sqrtpd", Iop_Sqrt64Fx2 );
10756 goto decode_success;
10759 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
10760 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x51) {
10761 vassert(sz == 4);
10762 delta = dis_SSE_E_to_G_unary_lo64( sorb, delta+3,
10763 "sqrtsd", Iop_Sqrt64F0x2 );
10764 goto decode_success;
10767 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
10768 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5C) {
10769 delta = dis_SSE_E_to_G_all( sorb, delta+2, "subpd", Iop_Sub64Fx2 );
10770 goto decode_success;
10773 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
10774 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5C) {
10775 vassert(sz == 4);
10776 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "subsd", Iop_Sub64F0x2 );
10777 goto decode_success;
10780 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
10781 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
10782 /* These just appear to be special cases of SHUFPS */
10783 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) {
10784 IRTemp s1 = newTemp(Ity_I64);
10785 IRTemp s0 = newTemp(Ity_I64);
10786 IRTemp d1 = newTemp(Ity_I64);
10787 IRTemp d0 = newTemp(Ity_I64);
10788 IRTemp sV = newTemp(Ity_V128);
10789 IRTemp dV = newTemp(Ity_V128);
10790 Bool hi = toBool(insn[1] == 0x15);
10792 modrm = insn[2];
10793 assign( dV, getXMMReg(gregOfRM(modrm)) );
10795 if (epartIsReg(modrm)) {
10796 assign( sV, getXMMReg(eregOfRM(modrm)) );
10797 delta += 2+1;
10798 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
10799 nameXMMReg(eregOfRM(modrm)),
10800 nameXMMReg(gregOfRM(modrm)));
10801 } else {
10802 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10803 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10804 delta += 2+alen;
10805 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
10806 dis_buf,
10807 nameXMMReg(gregOfRM(modrm)));
10810 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
10811 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
10812 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
10813 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
10815 if (hi) {
10816 putXMMReg( gregOfRM(modrm),
10817 binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) );
10818 } else {
10819 putXMMReg( gregOfRM(modrm),
10820 binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) );
10823 goto decode_success;
10826 /* 66 0F 57 = XORPD -- G = G and E */
10827 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x57) {
10828 delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorpd", Iop_XorV128 );
10829 goto decode_success;
10832 /* 66 0F 6B = PACKSSDW */
10833 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6B) {
10834 delta = dis_SSEint_E_to_G( sorb, delta+2,
10835 "packssdw",
10836 Iop_QNarrowBin32Sto16Sx8, True );
10837 goto decode_success;
10840 /* 66 0F 63 = PACKSSWB */
10841 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x63) {
10842 delta = dis_SSEint_E_to_G( sorb, delta+2,
10843 "packsswb",
10844 Iop_QNarrowBin16Sto8Sx16, True );
10845 goto decode_success;
10848 /* 66 0F 67 = PACKUSWB */
10849 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x67) {
10850 delta = dis_SSEint_E_to_G( sorb, delta+2,
10851 "packuswb",
10852 Iop_QNarrowBin16Sto8Ux16, True );
10853 goto decode_success;
10856 /* 66 0F FC = PADDB */
10857 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFC) {
10858 delta = dis_SSEint_E_to_G( sorb, delta+2,
10859 "paddb", Iop_Add8x16, False );
10860 goto decode_success;
10863 /* 66 0F FE = PADDD */
10864 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFE) {
10865 delta = dis_SSEint_E_to_G( sorb, delta+2,
10866 "paddd", Iop_Add32x4, False );
10867 goto decode_success;
10870 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
10871 /* 0F D4 = PADDQ -- add 64x1 */
10872 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD4) {
10873 do_MMX_preamble();
10874 delta = dis_MMXop_regmem_to_reg (
10875 sorb, delta+2, insn[1], "paddq", False );
10876 goto decode_success;
10879 /* 66 0F D4 = PADDQ */
10880 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD4) {
10881 delta = dis_SSEint_E_to_G( sorb, delta+2,
10882 "paddq", Iop_Add64x2, False );
10883 goto decode_success;
10886 /* 66 0F FD = PADDW */
10887 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFD) {
10888 delta = dis_SSEint_E_to_G( sorb, delta+2,
10889 "paddw", Iop_Add16x8, False );
10890 goto decode_success;
10893 /* 66 0F EC = PADDSB */
10894 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEC) {
10895 delta = dis_SSEint_E_to_G( sorb, delta+2,
10896 "paddsb", Iop_QAdd8Sx16, False );
10897 goto decode_success;
10900 /* 66 0F ED = PADDSW */
10901 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xED) {
10902 delta = dis_SSEint_E_to_G( sorb, delta+2,
10903 "paddsw", Iop_QAdd16Sx8, False );
10904 goto decode_success;
10907 /* 66 0F DC = PADDUSB */
10908 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDC) {
10909 delta = dis_SSEint_E_to_G( sorb, delta+2,
10910 "paddusb", Iop_QAdd8Ux16, False );
10911 goto decode_success;
10914 /* 66 0F DD = PADDUSW */
10915 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDD) {
10916 delta = dis_SSEint_E_to_G( sorb, delta+2,
10917 "paddusw", Iop_QAdd16Ux8, False );
10918 goto decode_success;
10921 /* 66 0F DB = PAND */
10922 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDB) {
10923 delta = dis_SSE_E_to_G_all( sorb, delta+2, "pand", Iop_AndV128 );
10924 goto decode_success;
10927 /* 66 0F DF = PANDN */
10928 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDF) {
10929 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "pandn", Iop_AndV128 );
10930 goto decode_success;
10933 /* 66 0F E0 = PAVGB */
10934 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE0) {
10935 delta = dis_SSEint_E_to_G( sorb, delta+2,
10936 "pavgb", Iop_Avg8Ux16, False );
10937 goto decode_success;
10940 /* 66 0F E3 = PAVGW */
10941 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE3) {
10942 delta = dis_SSEint_E_to_G( sorb, delta+2,
10943 "pavgw", Iop_Avg16Ux8, False );
10944 goto decode_success;
10947 /* 66 0F 74 = PCMPEQB */
10948 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x74) {
10949 delta = dis_SSEint_E_to_G( sorb, delta+2,
10950 "pcmpeqb", Iop_CmpEQ8x16, False );
10951 goto decode_success;
10954 /* 66 0F 76 = PCMPEQD */
10955 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x76) {
10956 delta = dis_SSEint_E_to_G( sorb, delta+2,
10957 "pcmpeqd", Iop_CmpEQ32x4, False );
10958 goto decode_success;
10961 /* 66 0F 75 = PCMPEQW */
10962 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x75) {
10963 delta = dis_SSEint_E_to_G( sorb, delta+2,
10964 "pcmpeqw", Iop_CmpEQ16x8, False );
10965 goto decode_success;
10968 /* 66 0F 64 = PCMPGTB */
10969 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x64) {
10970 delta = dis_SSEint_E_to_G( sorb, delta+2,
10971 "pcmpgtb", Iop_CmpGT8Sx16, False );
10972 goto decode_success;
10975 /* 66 0F 66 = PCMPGTD */
10976 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x66) {
10977 delta = dis_SSEint_E_to_G( sorb, delta+2,
10978 "pcmpgtd", Iop_CmpGT32Sx4, False );
10979 goto decode_success;
10982 /* 66 0F 65 = PCMPGTW */
10983 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x65) {
10984 delta = dis_SSEint_E_to_G( sorb, delta+2,
10985 "pcmpgtw", Iop_CmpGT16Sx8, False );
10986 goto decode_success;
10989 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
10990 zero-extend of it in ireg(G). */
10991 if (insn[0] == 0x0F && insn[1] == 0xC5) {
10992 modrm = insn[2];
10993 if (sz == 2 && epartIsReg(modrm)) {
10994 t5 = newTemp(Ity_V128);
10995 t4 = newTemp(Ity_I16);
10996 assign(t5, getXMMReg(eregOfRM(modrm)));
10997 breakup128to32s( t5, &t3, &t2, &t1, &t0 );
10998 switch (insn[3] & 7) {
10999 case 0: assign(t4, unop(Iop_32to16, mkexpr(t0))); break;
11000 case 1: assign(t4, unop(Iop_32HIto16, mkexpr(t0))); break;
11001 case 2: assign(t4, unop(Iop_32to16, mkexpr(t1))); break;
11002 case 3: assign(t4, unop(Iop_32HIto16, mkexpr(t1))); break;
11003 case 4: assign(t4, unop(Iop_32to16, mkexpr(t2))); break;
11004 case 5: assign(t4, unop(Iop_32HIto16, mkexpr(t2))); break;
11005 case 6: assign(t4, unop(Iop_32to16, mkexpr(t3))); break;
11006 case 7: assign(t4, unop(Iop_32HIto16, mkexpr(t3))); break;
11007 default: vassert(0); /*NOTREACHED*/
11009 putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t4)));
11010 DIP("pextrw $%d,%s,%s\n",
11011 (Int)insn[3], nameXMMReg(eregOfRM(modrm)),
11012 nameIReg(4,gregOfRM(modrm)));
11013 delta += 4;
11014 goto decode_success;
11016 /* else fall through */
11019 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
11020 put it into the specified lane of xmm(G). */
11021 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC4) {
11022 Int lane;
11023 t4 = newTemp(Ity_I16);
11024 modrm = insn[2];
11026 if (epartIsReg(modrm)) {
11027 assign(t4, getIReg(2, eregOfRM(modrm)));
11028 delta += 3+1;
11029 lane = insn[3+1-1];
11030 DIP("pinsrw $%d,%s,%s\n", lane,
11031 nameIReg(2,eregOfRM(modrm)),
11032 nameXMMReg(gregOfRM(modrm)));
11033 } else {
11034 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11035 delta += 3+alen;
11036 lane = insn[3+alen-1];
11037 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
11038 DIP("pinsrw $%d,%s,%s\n", lane,
11039 dis_buf,
11040 nameXMMReg(gregOfRM(modrm)));
11043 putXMMRegLane16( gregOfRM(modrm), lane & 7, mkexpr(t4) );
11044 goto decode_success;
11047 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
11048 E(xmm or mem) to G(xmm) */
11049 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF5) {
11050 IRTemp s1V = newTemp(Ity_V128);
11051 IRTemp s2V = newTemp(Ity_V128);
11052 IRTemp dV = newTemp(Ity_V128);
11053 IRTemp s1Hi = newTemp(Ity_I64);
11054 IRTemp s1Lo = newTemp(Ity_I64);
11055 IRTemp s2Hi = newTemp(Ity_I64);
11056 IRTemp s2Lo = newTemp(Ity_I64);
11057 IRTemp dHi = newTemp(Ity_I64);
11058 IRTemp dLo = newTemp(Ity_I64);
11059 modrm = insn[2];
11060 if (epartIsReg(modrm)) {
11061 assign( s1V, getXMMReg(eregOfRM(modrm)) );
11062 delta += 2+1;
11063 DIP("pmaddwd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11064 nameXMMReg(gregOfRM(modrm)));
11065 } else {
11066 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11067 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) );
11068 delta += 2+alen;
11069 DIP("pmaddwd %s,%s\n", dis_buf,
11070 nameXMMReg(gregOfRM(modrm)));
11072 assign( s2V, getXMMReg(gregOfRM(modrm)) );
11073 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) );
11074 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) );
11075 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) );
11076 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) );
11077 assign( dHi, mkIRExprCCall(
11078 Ity_I64, 0/*regparms*/,
11079 "x86g_calculate_mmx_pmaddwd",
11080 &x86g_calculate_mmx_pmaddwd,
11081 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi))
11083 assign( dLo, mkIRExprCCall(
11084 Ity_I64, 0/*regparms*/,
11085 "x86g_calculate_mmx_pmaddwd",
11086 &x86g_calculate_mmx_pmaddwd,
11087 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo))
11089 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ;
11090 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11091 goto decode_success;
11094 /* 66 0F EE = PMAXSW -- 16x8 signed max */
11095 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEE) {
11096 delta = dis_SSEint_E_to_G( sorb, delta+2,
11097 "pmaxsw", Iop_Max16Sx8, False );
11098 goto decode_success;
11101 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
11102 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDE) {
11103 delta = dis_SSEint_E_to_G( sorb, delta+2,
11104 "pmaxub", Iop_Max8Ux16, False );
11105 goto decode_success;
11108 /* 66 0F EA = PMINSW -- 16x8 signed min */
11109 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEA) {
11110 delta = dis_SSEint_E_to_G( sorb, delta+2,
11111 "pminsw", Iop_Min16Sx8, False );
11112 goto decode_success;
11115 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
11116 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDA) {
11117 delta = dis_SSEint_E_to_G( sorb, delta+2,
11118 "pminub", Iop_Min8Ux16, False );
11119 goto decode_success;
11122 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes
11123 in xmm(E), turn them into a byte, and put zero-extend of it in
11124 ireg(G). */
11125 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD7) {
11126 modrm = insn[2];
11127 if (epartIsReg(modrm)) {
11128 t0 = newTemp(Ity_I64);
11129 t1 = newTemp(Ity_I64);
11130 assign(t0, getXMMRegLane64(eregOfRM(modrm), 0));
11131 assign(t1, getXMMRegLane64(eregOfRM(modrm), 1));
11132 t5 = newTemp(Ity_I32);
11133 assign(t5,
11134 unop(Iop_16Uto32,
11135 binop(Iop_8HLto16,
11136 unop(Iop_GetMSBs8x8, mkexpr(t1)),
11137 unop(Iop_GetMSBs8x8, mkexpr(t0)))));
11138 putIReg(4, gregOfRM(modrm), mkexpr(t5));
11139 DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11140 nameIReg(4,gregOfRM(modrm)));
11141 delta += 3;
11142 goto decode_success;
11144 /* else fall through */
11147 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
11148 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE4) {
11149 delta = dis_SSEint_E_to_G( sorb, delta+2,
11150 "pmulhuw", Iop_MulHi16Ux8, False );
11151 goto decode_success;
11154 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
11155 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE5) {
11156 delta = dis_SSEint_E_to_G( sorb, delta+2,
11157 "pmulhw", Iop_MulHi16Sx8, False );
11158 goto decode_success;
11161 /* 66 0F D5 = PMULHL -- 16x8 multiply */
11162 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD5) {
11163 delta = dis_SSEint_E_to_G( sorb, delta+2,
11164 "pmullw", Iop_Mul16x8, False );
11165 goto decode_success;
11168 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
11169 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
11170 0 to form 64-bit result */
11171 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF4) {
11172 IRTemp sV = newTemp(Ity_I64);
11173 IRTemp dV = newTemp(Ity_I64);
11174 t1 = newTemp(Ity_I32);
11175 t0 = newTemp(Ity_I32);
11176 modrm = insn[2];
11178 do_MMX_preamble();
11179 assign( dV, getMMXReg(gregOfRM(modrm)) );
11181 if (epartIsReg(modrm)) {
11182 assign( sV, getMMXReg(eregOfRM(modrm)) );
11183 delta += 2+1;
11184 DIP("pmuludq %s,%s\n", nameMMXReg(eregOfRM(modrm)),
11185 nameMMXReg(gregOfRM(modrm)));
11186 } else {
11187 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11188 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
11189 delta += 2+alen;
11190 DIP("pmuludq %s,%s\n", dis_buf,
11191 nameMMXReg(gregOfRM(modrm)));
11194 assign( t0, unop(Iop_64to32, mkexpr(dV)) );
11195 assign( t1, unop(Iop_64to32, mkexpr(sV)) );
11196 putMMXReg( gregOfRM(modrm),
11197 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) );
11198 goto decode_success;
11201 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
11202 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
11203 half */
11204 /* This is a really poor translation -- could be improved if
11205 performance critical */
11206 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF4) {
11207 IRTemp sV, dV;
11208 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11209 sV = newTemp(Ity_V128);
11210 dV = newTemp(Ity_V128);
11211 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11212 t1 = newTemp(Ity_I64);
11213 t0 = newTemp(Ity_I64);
11214 modrm = insn[2];
11215 assign( dV, getXMMReg(gregOfRM(modrm)) );
11217 if (epartIsReg(modrm)) {
11218 assign( sV, getXMMReg(eregOfRM(modrm)) );
11219 delta += 2+1;
11220 DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11221 nameXMMReg(gregOfRM(modrm)));
11222 } else {
11223 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11224 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11225 delta += 2+alen;
11226 DIP("pmuludq %s,%s\n", dis_buf,
11227 nameXMMReg(gregOfRM(modrm)));
11230 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
11231 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
11233 assign( t0, binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) );
11234 putXMMRegLane64( gregOfRM(modrm), 0, mkexpr(t0) );
11235 assign( t1, binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)) );
11236 putXMMRegLane64( gregOfRM(modrm), 1, mkexpr(t1) );
11237 goto decode_success;
11240 /* 66 0F EB = POR */
11241 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEB) {
11242 delta = dis_SSE_E_to_G_all( sorb, delta+2, "por", Iop_OrV128 );
11243 goto decode_success;
11246 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
11247 from E(xmm or mem) to G(xmm) */
11248 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF6) {
11249 IRTemp s1V = newTemp(Ity_V128);
11250 IRTemp s2V = newTemp(Ity_V128);
11251 IRTemp dV = newTemp(Ity_V128);
11252 IRTemp s1Hi = newTemp(Ity_I64);
11253 IRTemp s1Lo = newTemp(Ity_I64);
11254 IRTemp s2Hi = newTemp(Ity_I64);
11255 IRTemp s2Lo = newTemp(Ity_I64);
11256 IRTemp dHi = newTemp(Ity_I64);
11257 IRTemp dLo = newTemp(Ity_I64);
11258 modrm = insn[2];
11259 if (epartIsReg(modrm)) {
11260 assign( s1V, getXMMReg(eregOfRM(modrm)) );
11261 delta += 2+1;
11262 DIP("psadbw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11263 nameXMMReg(gregOfRM(modrm)));
11264 } else {
11265 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11266 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) );
11267 delta += 2+alen;
11268 DIP("psadbw %s,%s\n", dis_buf,
11269 nameXMMReg(gregOfRM(modrm)));
11271 assign( s2V, getXMMReg(gregOfRM(modrm)) );
11272 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) );
11273 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) );
11274 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) );
11275 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) );
11276 assign( dHi, mkIRExprCCall(
11277 Ity_I64, 0/*regparms*/,
11278 "x86g_calculate_mmx_psadbw",
11279 &x86g_calculate_mmx_psadbw,
11280 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi))
11282 assign( dLo, mkIRExprCCall(
11283 Ity_I64, 0/*regparms*/,
11284 "x86g_calculate_mmx_psadbw",
11285 &x86g_calculate_mmx_psadbw,
11286 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo))
11288 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ;
11289 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11290 goto decode_success;
11293 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
11294 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x70) {
11295 Int order;
11296 IRTemp sV, dV, s3, s2, s1, s0;
11297 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11298 sV = newTemp(Ity_V128);
11299 dV = newTemp(Ity_V128);
11300 modrm = insn[2];
11301 if (epartIsReg(modrm)) {
11302 assign( sV, getXMMReg(eregOfRM(modrm)) );
11303 order = (Int)insn[3];
11304 delta += 2+2;
11305 DIP("pshufd $%d,%s,%s\n", order,
11306 nameXMMReg(eregOfRM(modrm)),
11307 nameXMMReg(gregOfRM(modrm)));
11308 } else {
11309 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11310 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11311 order = (Int)insn[2+alen];
11312 delta += 3+alen;
11313 DIP("pshufd $%d,%s,%s\n", order,
11314 dis_buf,
11315 nameXMMReg(gregOfRM(modrm)));
11317 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
11319 # define SEL(n) \
11320 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11321 assign(dV,
11322 mk128from32s( SEL((order>>6)&3), SEL((order>>4)&3),
11323 SEL((order>>2)&3), SEL((order>>0)&3) )
11325 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11326 # undef SEL
11327 goto decode_success;
11330 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
11331 mem) to G(xmm), and copy lower half */
11332 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x70) {
11333 Int order;
11334 IRTemp sVhi, dVhi, sV, dV, s3, s2, s1, s0;
11335 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11336 sV = newTemp(Ity_V128);
11337 dV = newTemp(Ity_V128);
11338 sVhi = newTemp(Ity_I64);
11339 dVhi = newTemp(Ity_I64);
11340 modrm = insn[3];
11341 if (epartIsReg(modrm)) {
11342 assign( sV, getXMMReg(eregOfRM(modrm)) );
11343 order = (Int)insn[4];
11344 delta += 4+1;
11345 DIP("pshufhw $%d,%s,%s\n", order,
11346 nameXMMReg(eregOfRM(modrm)),
11347 nameXMMReg(gregOfRM(modrm)));
11348 } else {
11349 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11350 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11351 order = (Int)insn[3+alen];
11352 delta += 4+alen;
11353 DIP("pshufhw $%d,%s,%s\n", order,
11354 dis_buf,
11355 nameXMMReg(gregOfRM(modrm)));
11357 assign( sVhi, unop(Iop_V128HIto64, mkexpr(sV)) );
11358 breakup64to16s( sVhi, &s3, &s2, &s1, &s0 );
11360 # define SEL(n) \
11361 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11362 assign(dVhi,
11363 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
11364 SEL((order>>2)&3), SEL((order>>0)&3) )
11366 assign(dV, binop( Iop_64HLtoV128,
11367 mkexpr(dVhi),
11368 unop(Iop_V128to64, mkexpr(sV))) );
11369 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11370 # undef SEL
11371 goto decode_success;
11374 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
11375 mem) to G(xmm), and copy upper half */
11376 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x70) {
11377 Int order;
11378 IRTemp sVlo, dVlo, sV, dV, s3, s2, s1, s0;
11379 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11380 sV = newTemp(Ity_V128);
11381 dV = newTemp(Ity_V128);
11382 sVlo = newTemp(Ity_I64);
11383 dVlo = newTemp(Ity_I64);
11384 modrm = insn[3];
11385 if (epartIsReg(modrm)) {
11386 assign( sV, getXMMReg(eregOfRM(modrm)) );
11387 order = (Int)insn[4];
11388 delta += 4+1;
11389 DIP("pshuflw $%d,%s,%s\n", order,
11390 nameXMMReg(eregOfRM(modrm)),
11391 nameXMMReg(gregOfRM(modrm)));
11392 } else {
11393 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11394 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11395 order = (Int)insn[3+alen];
11396 delta += 4+alen;
11397 DIP("pshuflw $%d,%s,%s\n", order,
11398 dis_buf,
11399 nameXMMReg(gregOfRM(modrm)));
11401 assign( sVlo, unop(Iop_V128to64, mkexpr(sV)) );
11402 breakup64to16s( sVlo, &s3, &s2, &s1, &s0 );
11404 # define SEL(n) \
11405 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11406 assign(dVlo,
11407 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
11408 SEL((order>>2)&3), SEL((order>>0)&3) )
11410 assign(dV, binop( Iop_64HLtoV128,
11411 unop(Iop_V128HIto64, mkexpr(sV)),
11412 mkexpr(dVlo) ) );
11413 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11414 # undef SEL
11415 goto decode_success;
11418 /* 66 0F 72 /6 ib = PSLLD by immediate */
11419 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72
11420 && epartIsReg(insn[2])
11421 && gregOfRM(insn[2]) == 6) {
11422 delta = dis_SSE_shiftE_imm( delta+2, "pslld", Iop_ShlN32x4 );
11423 goto decode_success;
11426 /* 66 0F F2 = PSLLD by E */
11427 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF2) {
11428 delta = dis_SSE_shiftG_byE( sorb, delta+2, "pslld", Iop_ShlN32x4 );
11429 goto decode_success;
11432 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
11433 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
11434 && epartIsReg(insn[2])
11435 && gregOfRM(insn[2]) == 7) {
11436 IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
11437 Int imm = (Int)insn[3];
11438 Int reg = eregOfRM(insn[2]);
11439 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg));
11440 vassert(imm >= 0 && imm <= 255);
11441 delta += 4;
11443 sV = newTemp(Ity_V128);
11444 dV = newTemp(Ity_V128);
11445 hi64 = newTemp(Ity_I64);
11446 lo64 = newTemp(Ity_I64);
11447 hi64r = newTemp(Ity_I64);
11448 lo64r = newTemp(Ity_I64);
11450 if (imm >= 16) {
11451 putXMMReg(reg, mkV128(0x0000));
11452 goto decode_success;
11455 assign( sV, getXMMReg(reg) );
11456 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
11457 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
11459 if (imm == 0) {
11460 assign( lo64r, mkexpr(lo64) );
11461 assign( hi64r, mkexpr(hi64) );
11463 else
11464 if (imm == 8) {
11465 assign( lo64r, mkU64(0) );
11466 assign( hi64r, mkexpr(lo64) );
11468 else
11469 if (imm > 8) {
11470 assign( lo64r, mkU64(0) );
11471 assign( hi64r, binop( Iop_Shl64,
11472 mkexpr(lo64),
11473 mkU8( 8*(imm-8) ) ));
11474 } else {
11475 assign( lo64r, binop( Iop_Shl64,
11476 mkexpr(lo64),
11477 mkU8(8 * imm) ));
11478 assign( hi64r,
11479 binop( Iop_Or64,
11480 binop(Iop_Shl64, mkexpr(hi64),
11481 mkU8(8 * imm)),
11482 binop(Iop_Shr64, mkexpr(lo64),
11483 mkU8(8 * (8 - imm)) )
11487 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
11488 putXMMReg(reg, mkexpr(dV));
11489 goto decode_success;
11492 /* 66 0F 73 /6 ib = PSLLQ by immediate */
11493 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
11494 && epartIsReg(insn[2])
11495 && gregOfRM(insn[2]) == 6) {
11496 delta = dis_SSE_shiftE_imm( delta+2, "psllq", Iop_ShlN64x2 );
11497 goto decode_success;
11500 /* 66 0F F3 = PSLLQ by E */
11501 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF3) {
11502 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllq", Iop_ShlN64x2 );
11503 goto decode_success;
11506 /* 66 0F 71 /6 ib = PSLLW by immediate */
11507 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71
11508 && epartIsReg(insn[2])
11509 && gregOfRM(insn[2]) == 6) {
11510 delta = dis_SSE_shiftE_imm( delta+2, "psllw", Iop_ShlN16x8 );
11511 goto decode_success;
11514 /* 66 0F F1 = PSLLW by E */
11515 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF1) {
11516 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllw", Iop_ShlN16x8 );
11517 goto decode_success;
11520 /* 66 0F 72 /4 ib = PSRAD by immediate */
11521 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72
11522 && epartIsReg(insn[2])
11523 && gregOfRM(insn[2]) == 4) {
11524 delta = dis_SSE_shiftE_imm( delta+2, "psrad", Iop_SarN32x4 );
11525 goto decode_success;
11528 /* 66 0F E2 = PSRAD by E */
11529 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE2) {
11530 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrad", Iop_SarN32x4 );
11531 goto decode_success;
11534 /* 66 0F 71 /4 ib = PSRAW by immediate */
11535 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71
11536 && epartIsReg(insn[2])
11537 && gregOfRM(insn[2]) == 4) {
11538 delta = dis_SSE_shiftE_imm( delta+2, "psraw", Iop_SarN16x8 );
11539 goto decode_success;
11542 /* 66 0F E1 = PSRAW by E */
11543 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE1) {
11544 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psraw", Iop_SarN16x8 );
11545 goto decode_success;
11548 /* 66 0F 72 /2 ib = PSRLD by immediate */
11549 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72
11550 && epartIsReg(insn[2])
11551 && gregOfRM(insn[2]) == 2) {
11552 delta = dis_SSE_shiftE_imm( delta+2, "psrld", Iop_ShrN32x4 );
11553 goto decode_success;
11556 /* 66 0F D2 = PSRLD by E */
11557 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD2) {
11558 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrld", Iop_ShrN32x4 );
11559 goto decode_success;
11562 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
11563 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
11564 && epartIsReg(insn[2])
11565 && gregOfRM(insn[2]) == 3) {
11566 IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
11567 Int imm = (Int)insn[3];
11568 Int reg = eregOfRM(insn[2]);
11569 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg));
11570 vassert(imm >= 0 && imm <= 255);
11571 delta += 4;
11573 sV = newTemp(Ity_V128);
11574 dV = newTemp(Ity_V128);
11575 hi64 = newTemp(Ity_I64);
11576 lo64 = newTemp(Ity_I64);
11577 hi64r = newTemp(Ity_I64);
11578 lo64r = newTemp(Ity_I64);
11580 if (imm >= 16) {
11581 putXMMReg(reg, mkV128(0x0000));
11582 goto decode_success;
11585 assign( sV, getXMMReg(reg) );
11586 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
11587 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
11589 if (imm == 0) {
11590 assign( lo64r, mkexpr(lo64) );
11591 assign( hi64r, mkexpr(hi64) );
11593 else
11594 if (imm == 8) {
11595 assign( hi64r, mkU64(0) );
11596 assign( lo64r, mkexpr(hi64) );
11598 else
11599 if (imm > 8) {
11600 assign( hi64r, mkU64(0) );
11601 assign( lo64r, binop( Iop_Shr64,
11602 mkexpr(hi64),
11603 mkU8( 8*(imm-8) ) ));
11604 } else {
11605 assign( hi64r, binop( Iop_Shr64,
11606 mkexpr(hi64),
11607 mkU8(8 * imm) ));
11608 assign( lo64r,
11609 binop( Iop_Or64,
11610 binop(Iop_Shr64, mkexpr(lo64),
11611 mkU8(8 * imm)),
11612 binop(Iop_Shl64, mkexpr(hi64),
11613 mkU8(8 * (8 - imm)) )
11618 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
11619 putXMMReg(reg, mkexpr(dV));
11620 goto decode_success;
11623 /* 66 0F 73 /2 ib = PSRLQ by immediate */
11624 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
11625 && epartIsReg(insn[2])
11626 && gregOfRM(insn[2]) == 2) {
11627 delta = dis_SSE_shiftE_imm( delta+2, "psrlq", Iop_ShrN64x2 );
11628 goto decode_success;
11631 /* 66 0F D3 = PSRLQ by E */
11632 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD3) {
11633 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlq", Iop_ShrN64x2 );
11634 goto decode_success;
11637 /* 66 0F 71 /2 ib = PSRLW by immediate */
11638 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71
11639 && epartIsReg(insn[2])
11640 && gregOfRM(insn[2]) == 2) {
11641 delta = dis_SSE_shiftE_imm( delta+2, "psrlw", Iop_ShrN16x8 );
11642 goto decode_success;
11645 /* 66 0F D1 = PSRLW by E */
11646 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD1) {
11647 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlw", Iop_ShrN16x8 );
11648 goto decode_success;
11651 /* 66 0F F8 = PSUBB */
11652 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF8) {
11653 delta = dis_SSEint_E_to_G( sorb, delta+2,
11654 "psubb", Iop_Sub8x16, False );
11655 goto decode_success;
11658 /* 66 0F FA = PSUBD */
11659 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFA) {
11660 delta = dis_SSEint_E_to_G( sorb, delta+2,
11661 "psubd", Iop_Sub32x4, False );
11662 goto decode_success;
11665 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
11666 /* 0F FB = PSUBQ -- sub 64x1 */
11667 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xFB) {
11668 do_MMX_preamble();
11669 delta = dis_MMXop_regmem_to_reg (
11670 sorb, delta+2, insn[1], "psubq", False );
11671 goto decode_success;
11674 /* 66 0F FB = PSUBQ */
11675 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFB) {
11676 delta = dis_SSEint_E_to_G( sorb, delta+2,
11677 "psubq", Iop_Sub64x2, False );
11678 goto decode_success;
11681 /* 66 0F F9 = PSUBW */
11682 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF9) {
11683 delta = dis_SSEint_E_to_G( sorb, delta+2,
11684 "psubw", Iop_Sub16x8, False );
11685 goto decode_success;
11688 /* 66 0F E8 = PSUBSB */
11689 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE8) {
11690 delta = dis_SSEint_E_to_G( sorb, delta+2,
11691 "psubsb", Iop_QSub8Sx16, False );
11692 goto decode_success;
11695 /* 66 0F E9 = PSUBSW */
11696 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE9) {
11697 delta = dis_SSEint_E_to_G( sorb, delta+2,
11698 "psubsw", Iop_QSub16Sx8, False );
11699 goto decode_success;
11702 /* 66 0F D8 = PSUBSB */
11703 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD8) {
11704 delta = dis_SSEint_E_to_G( sorb, delta+2,
11705 "psubusb", Iop_QSub8Ux16, False );
11706 goto decode_success;
11709 /* 66 0F D9 = PSUBSW */
11710 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD9) {
11711 delta = dis_SSEint_E_to_G( sorb, delta+2,
11712 "psubusw", Iop_QSub16Ux8, False );
11713 goto decode_success;
11716 /* 66 0F 68 = PUNPCKHBW */
11717 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x68) {
11718 delta = dis_SSEint_E_to_G( sorb, delta+2,
11719 "punpckhbw",
11720 Iop_InterleaveHI8x16, True );
11721 goto decode_success;
11724 /* 66 0F 6A = PUNPCKHDQ */
11725 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6A) {
11726 delta = dis_SSEint_E_to_G( sorb, delta+2,
11727 "punpckhdq",
11728 Iop_InterleaveHI32x4, True );
11729 goto decode_success;
11732 /* 66 0F 6D = PUNPCKHQDQ */
11733 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6D) {
11734 delta = dis_SSEint_E_to_G( sorb, delta+2,
11735 "punpckhqdq",
11736 Iop_InterleaveHI64x2, True );
11737 goto decode_success;
11740 /* 66 0F 69 = PUNPCKHWD */
11741 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x69) {
11742 delta = dis_SSEint_E_to_G( sorb, delta+2,
11743 "punpckhwd",
11744 Iop_InterleaveHI16x8, True );
11745 goto decode_success;
11748 /* 66 0F 60 = PUNPCKLBW */
11749 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x60) {
11750 delta = dis_SSEint_E_to_G( sorb, delta+2,
11751 "punpcklbw",
11752 Iop_InterleaveLO8x16, True );
11753 goto decode_success;
11756 /* 66 0F 62 = PUNPCKLDQ */
11757 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x62) {
11758 delta = dis_SSEint_E_to_G( sorb, delta+2,
11759 "punpckldq",
11760 Iop_InterleaveLO32x4, True );
11761 goto decode_success;
11764 /* 66 0F 6C = PUNPCKLQDQ */
11765 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6C) {
11766 delta = dis_SSEint_E_to_G( sorb, delta+2,
11767 "punpcklqdq",
11768 Iop_InterleaveLO64x2, True );
11769 goto decode_success;
11772 /* 66 0F 61 = PUNPCKLWD */
11773 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x61) {
11774 delta = dis_SSEint_E_to_G( sorb, delta+2,
11775 "punpcklwd",
11776 Iop_InterleaveLO16x8, True );
11777 goto decode_success;
11780 /* 66 0F EF = PXOR */
11781 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEF) {
11782 delta = dis_SSE_E_to_G_all( sorb, delta+2, "pxor", Iop_XorV128 );
11783 goto decode_success;
11786 //-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */
11787 //-- if (insn[0] == 0x0F && insn[1] == 0xAE
11788 //-- && (!epartIsReg(insn[2]))
11789 //-- && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) {
11790 //-- Bool store = gregOfRM(insn[2]) == 0;
11791 //-- vg_assert(sz == 4);
11792 //-- pair = disAMode ( cb, sorb, eip+2, dis_buf );
11793 //-- t1 = LOW24(pair);
11794 //-- eip += 2+HI8(pair);
11795 //-- uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512,
11796 //-- Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1],
11797 //-- Lit16, (UShort)insn[2],
11798 //-- TempReg, t1 );
11799 //-- DIP("fx%s %s\n", store ? "save" : "rstor", dis_buf );
11800 //-- goto decode_success;
11801 //-- }
11803 /* 0F AE /7 = CLFLUSH -- flush cache line */
11804 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE
11805 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) {
11807 /* This is something of a hack. We need to know the size of the
11808 cache line containing addr. Since we don't (easily), assume
11809 256 on the basis that no real cache would have a line that
11810 big. It's safe to invalidate more stuff than we need, just
11811 inefficient. */
11812 UInt lineszB = 256;
11814 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11815 delta += 2+alen;
11817 /* Round addr down to the start of the containing block. */
11818 stmt( IRStmt_Put(
11819 OFFB_CMSTART,
11820 binop( Iop_And32,
11821 mkexpr(addr),
11822 mkU32( ~(lineszB-1) ))) );
11824 stmt( IRStmt_Put(OFFB_CMLEN, mkU32(lineszB) ) );
11826 jmp_lit(&dres, Ijk_InvalICache, (Addr32)(guest_EIP_bbstart+delta));
11828 DIP("clflush %s\n", dis_buf);
11829 goto decode_success;
11832 /* ---------------------------------------------------- */
11833 /* --- end of the SSE2 decoder. --- */
11834 /* ---------------------------------------------------- */
11836 /* ---------------------------------------------------- */
11837 /* --- start of the SSE3 decoder. --- */
11838 /* ---------------------------------------------------- */
11840 /* Skip parts of the decoder which don't apply given the stated
11841 guest subarchitecture. */
11842 if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE3))
11843 goto after_sse_decoders; /* no SSE3 capabilities */
11845 insn = &guest_code[delta];
11847 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
11848 duplicating some lanes (2:2:0:0). */
11849 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
11850 duplicating some lanes (3:3:1:1). */
11851 if (sz == 4 && insn[0] == 0xF3 && insn[1] == 0x0F
11852 && (insn[2] == 0x12 || insn[2] == 0x16)) {
11853 IRTemp s3, s2, s1, s0;
11854 IRTemp sV = newTemp(Ity_V128);
11855 Bool isH = insn[2] == 0x16;
11856 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11858 modrm = insn[3];
11859 if (epartIsReg(modrm)) {
11860 assign( sV, getXMMReg( eregOfRM(modrm)) );
11861 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
11862 nameXMMReg(eregOfRM(modrm)),
11863 nameXMMReg(gregOfRM(modrm)));
11864 delta += 3+1;
11865 } else {
11866 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11867 gen_SEGV_if_not_16_aligned( addr );
11868 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11869 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
11870 dis_buf,
11871 nameXMMReg(gregOfRM(modrm)));
11872 delta += 3+alen;
11875 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
11876 putXMMReg( gregOfRM(modrm),
11877 isH ? mk128from32s( s3, s3, s1, s1 )
11878 : mk128from32s( s2, s2, s0, s0 ) );
11879 goto decode_success;
11882 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
11883 duplicating some lanes (0:1:0:1). */
11884 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x12) {
11885 IRTemp sV = newTemp(Ity_V128);
11886 IRTemp d0 = newTemp(Ity_I64);
11888 modrm = insn[3];
11889 if (epartIsReg(modrm)) {
11890 assign( sV, getXMMReg( eregOfRM(modrm)) );
11891 DIP("movddup %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11892 nameXMMReg(gregOfRM(modrm)));
11893 delta += 3+1;
11894 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) );
11895 } else {
11896 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11897 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
11898 DIP("movddup %s,%s\n", dis_buf,
11899 nameXMMReg(gregOfRM(modrm)));
11900 delta += 3+alen;
11903 putXMMReg( gregOfRM(modrm), binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) );
11904 goto decode_success;
11907 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
11908 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD0) {
11909 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
11910 IRTemp eV = newTemp(Ity_V128);
11911 IRTemp gV = newTemp(Ity_V128);
11912 IRTemp addV = newTemp(Ity_V128);
11913 IRTemp subV = newTemp(Ity_V128);
11914 IRTemp rm = newTemp(Ity_I32);
11915 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11917 modrm = insn[3];
11918 if (epartIsReg(modrm)) {
11919 assign( eV, getXMMReg( eregOfRM(modrm)) );
11920 DIP("addsubps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11921 nameXMMReg(gregOfRM(modrm)));
11922 delta += 3+1;
11923 } else {
11924 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11925 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
11926 DIP("addsubps %s,%s\n", dis_buf,
11927 nameXMMReg(gregOfRM(modrm)));
11928 delta += 3+alen;
11931 assign( gV, getXMMReg(gregOfRM(modrm)) );
11933 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11934 assign( addV, triop(Iop_Add32Fx4, mkexpr(rm), mkexpr(gV), mkexpr(eV)) );
11935 assign( subV, triop(Iop_Sub32Fx4, mkexpr(rm), mkexpr(gV), mkexpr(eV)) );
11937 breakup128to32s( addV, &a3, &a2, &a1, &a0 );
11938 breakup128to32s( subV, &s3, &s2, &s1, &s0 );
11940 putXMMReg( gregOfRM(modrm), mk128from32s( a3, s2, a1, s0 ));
11941 goto decode_success;
11944 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
11945 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD0) {
11946 IRTemp eV = newTemp(Ity_V128);
11947 IRTemp gV = newTemp(Ity_V128);
11948 IRTemp addV = newTemp(Ity_V128);
11949 IRTemp subV = newTemp(Ity_V128);
11950 IRTemp a1 = newTemp(Ity_I64);
11951 IRTemp s0 = newTemp(Ity_I64);
11952 IRTemp rm = newTemp(Ity_I32);
11954 modrm = insn[2];
11955 if (epartIsReg(modrm)) {
11956 assign( eV, getXMMReg( eregOfRM(modrm)) );
11957 DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11958 nameXMMReg(gregOfRM(modrm)));
11959 delta += 2+1;
11960 } else {
11961 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11962 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
11963 DIP("addsubpd %s,%s\n", dis_buf,
11964 nameXMMReg(gregOfRM(modrm)));
11965 delta += 2+alen;
11968 assign( gV, getXMMReg(gregOfRM(modrm)) );
11970 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11971 assign( addV, triop(Iop_Add64Fx2, mkexpr(rm), mkexpr(gV), mkexpr(eV)) );
11972 assign( subV, triop(Iop_Sub64Fx2, mkexpr(rm), mkexpr(gV), mkexpr(eV)) );
11974 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
11975 assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
11977 putXMMReg( gregOfRM(modrm),
11978 binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) );
11979 goto decode_success;
11982 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
11983 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
11984 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F
11985 && (insn[2] == 0x7C || insn[2] == 0x7D)) {
11986 IRTemp e3, e2, e1, e0, g3, g2, g1, g0;
11987 IRTemp eV = newTemp(Ity_V128);
11988 IRTemp gV = newTemp(Ity_V128);
11989 IRTemp leftV = newTemp(Ity_V128);
11990 IRTemp rightV = newTemp(Ity_V128);
11991 IRTemp rm = newTemp(Ity_I32);
11992 Bool isAdd = insn[2] == 0x7C;
11993 const HChar* str = isAdd ? "add" : "sub";
11994 e3 = e2 = e1 = e0 = g3 = g2 = g1 = g0 = IRTemp_INVALID;
11996 modrm = insn[3];
11997 if (epartIsReg(modrm)) {
11998 assign( eV, getXMMReg( eregOfRM(modrm)) );
11999 DIP("h%sps %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12000 nameXMMReg(gregOfRM(modrm)));
12001 delta += 3+1;
12002 } else {
12003 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12004 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
12005 DIP("h%sps %s,%s\n", str, dis_buf,
12006 nameXMMReg(gregOfRM(modrm)));
12007 delta += 3+alen;
12010 assign( gV, getXMMReg(gregOfRM(modrm)) );
12012 breakup128to32s( eV, &e3, &e2, &e1, &e0 );
12013 breakup128to32s( gV, &g3, &g2, &g1, &g0 );
12015 assign( leftV, mk128from32s( e2, e0, g2, g0 ) );
12016 assign( rightV, mk128from32s( e3, e1, g3, g1 ) );
12018 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
12019 putXMMReg( gregOfRM(modrm),
12020 triop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
12021 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
12022 goto decode_success;
12025 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
12026 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
12027 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) {
12028 IRTemp e1 = newTemp(Ity_I64);
12029 IRTemp e0 = newTemp(Ity_I64);
12030 IRTemp g1 = newTemp(Ity_I64);
12031 IRTemp g0 = newTemp(Ity_I64);
12032 IRTemp eV = newTemp(Ity_V128);
12033 IRTemp gV = newTemp(Ity_V128);
12034 IRTemp leftV = newTemp(Ity_V128);
12035 IRTemp rightV = newTemp(Ity_V128);
12036 IRTemp rm = newTemp(Ity_I32);
12037 Bool isAdd = insn[1] == 0x7C;
12038 const HChar* str = isAdd ? "add" : "sub";
12040 modrm = insn[2];
12041 if (epartIsReg(modrm)) {
12042 assign( eV, getXMMReg( eregOfRM(modrm)) );
12043 DIP("h%spd %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12044 nameXMMReg(gregOfRM(modrm)));
12045 delta += 2+1;
12046 } else {
12047 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
12048 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
12049 DIP("h%spd %s,%s\n", str, dis_buf,
12050 nameXMMReg(gregOfRM(modrm)));
12051 delta += 2+alen;
12054 assign( gV, getXMMReg(gregOfRM(modrm)) );
12056 assign( e1, unop(Iop_V128HIto64, mkexpr(eV) ));
12057 assign( e0, unop(Iop_V128to64, mkexpr(eV) ));
12058 assign( g1, unop(Iop_V128HIto64, mkexpr(gV) ));
12059 assign( g0, unop(Iop_V128to64, mkexpr(gV) ));
12061 assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) );
12062 assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) );
12064 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
12065 putXMMReg( gregOfRM(modrm),
12066 triop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
12067 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
12068 goto decode_success;
12071 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
12072 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xF0) {
12073 modrm = getIByte(delta+3);
12074 if (epartIsReg(modrm)) {
12075 goto decode_failure;
12076 } else {
12077 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12078 putXMMReg( gregOfRM(modrm),
12079 loadLE(Ity_V128, mkexpr(addr)) );
12080 DIP("lddqu %s,%s\n", dis_buf,
12081 nameXMMReg(gregOfRM(modrm)));
12082 delta += 3+alen;
12084 goto decode_success;
12087 /* ---------------------------------------------------- */
12088 /* --- end of the SSE3 decoder. --- */
12089 /* ---------------------------------------------------- */
12091 /* ---------------------------------------------------- */
12092 /* --- start of the SSSE3 decoder. --- */
12093 /* ---------------------------------------------------- */
12095 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
12096 Unsigned Bytes (MMX) */
12097 if (sz == 4
12098 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
12099 IRTemp sV = newTemp(Ity_I64);
12100 IRTemp dV = newTemp(Ity_I64);
12101 IRTemp sVoddsSX = newTemp(Ity_I64);
12102 IRTemp sVevensSX = newTemp(Ity_I64);
12103 IRTemp dVoddsZX = newTemp(Ity_I64);
12104 IRTemp dVevensZX = newTemp(Ity_I64);
12106 modrm = insn[3];
12107 do_MMX_preamble();
12108 assign( dV, getMMXReg(gregOfRM(modrm)) );
12110 if (epartIsReg(modrm)) {
12111 assign( sV, getMMXReg(eregOfRM(modrm)) );
12112 delta += 3+1;
12113 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregOfRM(modrm)),
12114 nameMMXReg(gregOfRM(modrm)));
12115 } else {
12116 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12117 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12118 delta += 3+alen;
12119 DIP("pmaddubsw %s,%s\n", dis_buf,
12120 nameMMXReg(gregOfRM(modrm)));
12123 /* compute dV unsigned x sV signed */
12124 assign( sVoddsSX,
12125 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) );
12126 assign( sVevensSX,
12127 binop(Iop_SarN16x4,
12128 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)),
12129 mkU8(8)) );
12130 assign( dVoddsZX,
12131 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) );
12132 assign( dVevensZX,
12133 binop(Iop_ShrN16x4,
12134 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)),
12135 mkU8(8)) );
12137 putMMXReg(
12138 gregOfRM(modrm),
12139 binop(Iop_QAdd16Sx4,
12140 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
12141 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX))
12144 goto decode_success;
12147 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
12148 Unsigned Bytes (XMM) */
12149 if (sz == 2
12150 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
12151 IRTemp sV = newTemp(Ity_V128);
12152 IRTemp dV = newTemp(Ity_V128);
12153 IRTemp sVoddsSX = newTemp(Ity_V128);
12154 IRTemp sVevensSX = newTemp(Ity_V128);
12155 IRTemp dVoddsZX = newTemp(Ity_V128);
12156 IRTemp dVevensZX = newTemp(Ity_V128);
12158 modrm = insn[3];
12159 assign( dV, getXMMReg(gregOfRM(modrm)) );
12161 if (epartIsReg(modrm)) {
12162 assign( sV, getXMMReg(eregOfRM(modrm)) );
12163 delta += 3+1;
12164 DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
12165 nameXMMReg(gregOfRM(modrm)));
12166 } else {
12167 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12168 gen_SEGV_if_not_16_aligned( addr );
12169 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12170 delta += 3+alen;
12171 DIP("pmaddubsw %s,%s\n", dis_buf,
12172 nameXMMReg(gregOfRM(modrm)));
12175 /* compute dV unsigned x sV signed */
12176 assign( sVoddsSX,
12177 binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) );
12178 assign( sVevensSX,
12179 binop(Iop_SarN16x8,
12180 binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)),
12181 mkU8(8)) );
12182 assign( dVoddsZX,
12183 binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) );
12184 assign( dVevensZX,
12185 binop(Iop_ShrN16x8,
12186 binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)),
12187 mkU8(8)) );
12189 putXMMReg(
12190 gregOfRM(modrm),
12191 binop(Iop_QAdd16Sx8,
12192 binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
12193 binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX))
12196 goto decode_success;
12199 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
12200 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
12201 mmx) and G to G (mmx). */
12202 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
12203 mmx) and G to G (mmx). */
12204 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
12205 to G (mmx). */
12206 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
12207 to G (mmx). */
12208 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
12209 to G (mmx). */
12210 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
12211 to G (mmx). */
12213 if (sz == 4
12214 && insn[0] == 0x0F && insn[1] == 0x38
12215 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
12216 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
12217 const HChar* str = "???";
12218 IROp opV64 = Iop_INVALID;
12219 IROp opCatO = Iop_CatOddLanes16x4;
12220 IROp opCatE = Iop_CatEvenLanes16x4;
12221 IRTemp sV = newTemp(Ity_I64);
12222 IRTemp dV = newTemp(Ity_I64);
12224 modrm = insn[3];
12226 switch (insn[2]) {
12227 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
12228 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
12229 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
12230 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
12231 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
12232 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
12233 default: vassert(0);
12235 if (insn[2] == 0x02 || insn[2] == 0x06) {
12236 opCatO = Iop_InterleaveHI32x2;
12237 opCatE = Iop_InterleaveLO32x2;
12240 do_MMX_preamble();
12241 assign( dV, getMMXReg(gregOfRM(modrm)) );
12243 if (epartIsReg(modrm)) {
12244 assign( sV, getMMXReg(eregOfRM(modrm)) );
12245 delta += 3+1;
12246 DIP("ph%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
12247 nameMMXReg(gregOfRM(modrm)));
12248 } else {
12249 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12250 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12251 delta += 3+alen;
12252 DIP("ph%s %s,%s\n", str, dis_buf,
12253 nameMMXReg(gregOfRM(modrm)));
12256 putMMXReg(
12257 gregOfRM(modrm),
12258 binop(opV64,
12259 binop(opCatE,mkexpr(sV),mkexpr(dV)),
12260 binop(opCatO,mkexpr(sV),mkexpr(dV))
12263 goto decode_success;
12266 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
12267 xmm) and G to G (xmm). */
12268 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
12269 xmm) and G to G (xmm). */
12270 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
12271 G to G (xmm). */
12272 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
12273 G to G (xmm). */
12274 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
12275 G to G (xmm). */
12276 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
12277 G to G (xmm). */
12279 if (sz == 2
12280 && insn[0] == 0x0F && insn[1] == 0x38
12281 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
12282 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
12283 const HChar* str = "???";
12284 IROp opV64 = Iop_INVALID;
12285 IROp opCatO = Iop_CatOddLanes16x4;
12286 IROp opCatE = Iop_CatEvenLanes16x4;
12287 IRTemp sV = newTemp(Ity_V128);
12288 IRTemp dV = newTemp(Ity_V128);
12289 IRTemp sHi = newTemp(Ity_I64);
12290 IRTemp sLo = newTemp(Ity_I64);
12291 IRTemp dHi = newTemp(Ity_I64);
12292 IRTemp dLo = newTemp(Ity_I64);
12294 modrm = insn[3];
12296 switch (insn[2]) {
12297 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
12298 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
12299 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
12300 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
12301 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
12302 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
12303 default: vassert(0);
12305 if (insn[2] == 0x02 || insn[2] == 0x06) {
12306 opCatO = Iop_InterleaveHI32x2;
12307 opCatE = Iop_InterleaveLO32x2;
12310 assign( dV, getXMMReg(gregOfRM(modrm)) );
12312 if (epartIsReg(modrm)) {
12313 assign( sV, getXMMReg( eregOfRM(modrm)) );
12314 DIP("ph%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12315 nameXMMReg(gregOfRM(modrm)));
12316 delta += 3+1;
12317 } else {
12318 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12319 gen_SEGV_if_not_16_aligned( addr );
12320 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12321 DIP("ph%s %s,%s\n", str, dis_buf,
12322 nameXMMReg(gregOfRM(modrm)));
12323 delta += 3+alen;
12326 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12327 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12328 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12329 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12331 /* This isn't a particularly efficient way to compute the
12332 result, but at least it avoids a proliferation of IROps,
12333 hence avoids complication all the backends. */
12334 putXMMReg(
12335 gregOfRM(modrm),
12336 binop(Iop_64HLtoV128,
12337 binop(opV64,
12338 binop(opCatE,mkexpr(sHi),mkexpr(sLo)),
12339 binop(opCatO,mkexpr(sHi),mkexpr(sLo))
12341 binop(opV64,
12342 binop(opCatE,mkexpr(dHi),mkexpr(dLo)),
12343 binop(opCatO,mkexpr(dHi),mkexpr(dLo))
12347 goto decode_success;
12350 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
12351 (MMX) */
12352 if (sz == 4
12353 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
12354 IRTemp sV = newTemp(Ity_I64);
12355 IRTemp dV = newTemp(Ity_I64);
12357 modrm = insn[3];
12358 do_MMX_preamble();
12359 assign( dV, getMMXReg(gregOfRM(modrm)) );
12361 if (epartIsReg(modrm)) {
12362 assign( sV, getMMXReg(eregOfRM(modrm)) );
12363 delta += 3+1;
12364 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregOfRM(modrm)),
12365 nameMMXReg(gregOfRM(modrm)));
12366 } else {
12367 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12368 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12369 delta += 3+alen;
12370 DIP("pmulhrsw %s,%s\n", dis_buf,
12371 nameMMXReg(gregOfRM(modrm)));
12374 putMMXReg(
12375 gregOfRM(modrm),
12376 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) )
12378 goto decode_success;
12381 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
12382 Scale (XMM) */
12383 if (sz == 2
12384 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
12385 IRTemp sV = newTemp(Ity_V128);
12386 IRTemp dV = newTemp(Ity_V128);
12387 IRTemp sHi = newTemp(Ity_I64);
12388 IRTemp sLo = newTemp(Ity_I64);
12389 IRTemp dHi = newTemp(Ity_I64);
12390 IRTemp dLo = newTemp(Ity_I64);
12392 modrm = insn[3];
12393 assign( dV, getXMMReg(gregOfRM(modrm)) );
12395 if (epartIsReg(modrm)) {
12396 assign( sV, getXMMReg(eregOfRM(modrm)) );
12397 delta += 3+1;
12398 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
12399 nameXMMReg(gregOfRM(modrm)));
12400 } else {
12401 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12402 gen_SEGV_if_not_16_aligned( addr );
12403 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12404 delta += 3+alen;
12405 DIP("pmulhrsw %s,%s\n", dis_buf,
12406 nameXMMReg(gregOfRM(modrm)));
12409 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12410 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12411 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12412 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12414 putXMMReg(
12415 gregOfRM(modrm),
12416 binop(Iop_64HLtoV128,
12417 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
12418 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
12421 goto decode_success;
12424 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
12425 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
12426 /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */
12427 if (sz == 4
12428 && insn[0] == 0x0F && insn[1] == 0x38
12429 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
12430 IRTemp sV = newTemp(Ity_I64);
12431 IRTemp dV = newTemp(Ity_I64);
12432 const HChar* str = "???";
12433 Int laneszB = 0;
12435 switch (insn[2]) {
12436 case 0x08: laneszB = 1; str = "b"; break;
12437 case 0x09: laneszB = 2; str = "w"; break;
12438 case 0x0A: laneszB = 4; str = "d"; break;
12439 default: vassert(0);
12442 modrm = insn[3];
12443 do_MMX_preamble();
12444 assign( dV, getMMXReg(gregOfRM(modrm)) );
12446 if (epartIsReg(modrm)) {
12447 assign( sV, getMMXReg(eregOfRM(modrm)) );
12448 delta += 3+1;
12449 DIP("psign%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
12450 nameMMXReg(gregOfRM(modrm)));
12451 } else {
12452 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12453 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12454 delta += 3+alen;
12455 DIP("psign%s %s,%s\n", str, dis_buf,
12456 nameMMXReg(gregOfRM(modrm)));
12459 putMMXReg(
12460 gregOfRM(modrm),
12461 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB )
12463 goto decode_success;
12466 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
12467 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
12468 /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */
12469 if (sz == 2
12470 && insn[0] == 0x0F && insn[1] == 0x38
12471 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
12472 IRTemp sV = newTemp(Ity_V128);
12473 IRTemp dV = newTemp(Ity_V128);
12474 IRTemp sHi = newTemp(Ity_I64);
12475 IRTemp sLo = newTemp(Ity_I64);
12476 IRTemp dHi = newTemp(Ity_I64);
12477 IRTemp dLo = newTemp(Ity_I64);
12478 const HChar* str = "???";
12479 Int laneszB = 0;
12481 switch (insn[2]) {
12482 case 0x08: laneszB = 1; str = "b"; break;
12483 case 0x09: laneszB = 2; str = "w"; break;
12484 case 0x0A: laneszB = 4; str = "d"; break;
12485 default: vassert(0);
12488 modrm = insn[3];
12489 assign( dV, getXMMReg(gregOfRM(modrm)) );
12491 if (epartIsReg(modrm)) {
12492 assign( sV, getXMMReg(eregOfRM(modrm)) );
12493 delta += 3+1;
12494 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12495 nameXMMReg(gregOfRM(modrm)));
12496 } else {
12497 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12498 gen_SEGV_if_not_16_aligned( addr );
12499 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12500 delta += 3+alen;
12501 DIP("psign%s %s,%s\n", str, dis_buf,
12502 nameXMMReg(gregOfRM(modrm)));
12505 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12506 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12507 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12508 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12510 putXMMReg(
12511 gregOfRM(modrm),
12512 binop(Iop_64HLtoV128,
12513 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
12514 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
12517 goto decode_success;
12520 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
12521 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
12522 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
12523 if (sz == 4
12524 && insn[0] == 0x0F && insn[1] == 0x38
12525 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
12526 IRTemp sV = newTemp(Ity_I64);
12527 const HChar* str = "???";
12528 Int laneszB = 0;
12530 switch (insn[2]) {
12531 case 0x1C: laneszB = 1; str = "b"; break;
12532 case 0x1D: laneszB = 2; str = "w"; break;
12533 case 0x1E: laneszB = 4; str = "d"; break;
12534 default: vassert(0);
12537 modrm = insn[3];
12538 do_MMX_preamble();
12540 if (epartIsReg(modrm)) {
12541 assign( sV, getMMXReg(eregOfRM(modrm)) );
12542 delta += 3+1;
12543 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
12544 nameMMXReg(gregOfRM(modrm)));
12545 } else {
12546 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12547 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12548 delta += 3+alen;
12549 DIP("pabs%s %s,%s\n", str, dis_buf,
12550 nameMMXReg(gregOfRM(modrm)));
12553 putMMXReg(
12554 gregOfRM(modrm),
12555 dis_PABS_helper( mkexpr(sV), laneszB )
12557 goto decode_success;
12560 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
12561 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
12562 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
12563 if (sz == 2
12564 && insn[0] == 0x0F && insn[1] == 0x38
12565 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
12566 IRTemp sV = newTemp(Ity_V128);
12567 IRTemp sHi = newTemp(Ity_I64);
12568 IRTemp sLo = newTemp(Ity_I64);
12569 const HChar* str = "???";
12570 Int laneszB = 0;
12572 switch (insn[2]) {
12573 case 0x1C: laneszB = 1; str = "b"; break;
12574 case 0x1D: laneszB = 2; str = "w"; break;
12575 case 0x1E: laneszB = 4; str = "d"; break;
12576 default: vassert(0);
12579 modrm = insn[3];
12581 if (epartIsReg(modrm)) {
12582 assign( sV, getXMMReg(eregOfRM(modrm)) );
12583 delta += 3+1;
12584 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12585 nameXMMReg(gregOfRM(modrm)));
12586 } else {
12587 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12588 gen_SEGV_if_not_16_aligned( addr );
12589 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12590 delta += 3+alen;
12591 DIP("pabs%s %s,%s\n", str, dis_buf,
12592 nameXMMReg(gregOfRM(modrm)));
12595 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12596 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12598 putXMMReg(
12599 gregOfRM(modrm),
12600 binop(Iop_64HLtoV128,
12601 dis_PABS_helper( mkexpr(sHi), laneszB ),
12602 dis_PABS_helper( mkexpr(sLo), laneszB )
12605 goto decode_success;
12608 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
12609 if (sz == 4
12610 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
12611 IRTemp sV = newTemp(Ity_I64);
12612 IRTemp dV = newTemp(Ity_I64);
12613 IRTemp res = newTemp(Ity_I64);
12615 modrm = insn[3];
12616 do_MMX_preamble();
12617 assign( dV, getMMXReg(gregOfRM(modrm)) );
12619 if (epartIsReg(modrm)) {
12620 assign( sV, getMMXReg(eregOfRM(modrm)) );
12621 d32 = (UInt)insn[3+1];
12622 delta += 3+1+1;
12623 DIP("palignr $%u,%s,%s\n", d32,
12624 nameMMXReg(eregOfRM(modrm)),
12625 nameMMXReg(gregOfRM(modrm)));
12626 } else {
12627 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12628 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12629 d32 = (UInt)insn[3+alen];
12630 delta += 3+alen+1;
12631 DIP("palignr $%u%s,%s\n", d32,
12632 dis_buf,
12633 nameMMXReg(gregOfRM(modrm)));
12636 if (d32 == 0) {
12637 assign( res, mkexpr(sV) );
12639 else if (d32 >= 1 && d32 <= 7) {
12640 assign(res,
12641 binop(Iop_Or64,
12642 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d32)),
12643 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d32))
12644 )));
12646 else if (d32 == 8) {
12647 assign( res, mkexpr(dV) );
12649 else if (d32 >= 9 && d32 <= 15) {
12650 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d32-8))) );
12652 else if (d32 >= 16 && d32 <= 255) {
12653 assign( res, mkU64(0) );
12655 else
12656 vassert(0);
12658 putMMXReg( gregOfRM(modrm), mkexpr(res) );
12659 goto decode_success;
12662 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
12663 if (sz == 2
12664 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
12665 IRTemp sV = newTemp(Ity_V128);
12666 IRTemp dV = newTemp(Ity_V128);
12667 IRTemp sHi = newTemp(Ity_I64);
12668 IRTemp sLo = newTemp(Ity_I64);
12669 IRTemp dHi = newTemp(Ity_I64);
12670 IRTemp dLo = newTemp(Ity_I64);
12671 IRTemp rHi = newTemp(Ity_I64);
12672 IRTemp rLo = newTemp(Ity_I64);
12674 modrm = insn[3];
12675 assign( dV, getXMMReg(gregOfRM(modrm)) );
12677 if (epartIsReg(modrm)) {
12678 assign( sV, getXMMReg(eregOfRM(modrm)) );
12679 d32 = (UInt)insn[3+1];
12680 delta += 3+1+1;
12681 DIP("palignr $%u,%s,%s\n", d32,
12682 nameXMMReg(eregOfRM(modrm)),
12683 nameXMMReg(gregOfRM(modrm)));
12684 } else {
12685 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12686 gen_SEGV_if_not_16_aligned( addr );
12687 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12688 d32 = (UInt)insn[3+alen];
12689 delta += 3+alen+1;
12690 DIP("palignr $%u,%s,%s\n", d32,
12691 dis_buf,
12692 nameXMMReg(gregOfRM(modrm)));
12695 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12696 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12697 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12698 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12700 if (d32 == 0) {
12701 assign( rHi, mkexpr(sHi) );
12702 assign( rLo, mkexpr(sLo) );
12704 else if (d32 >= 1 && d32 <= 7) {
12705 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, d32) );
12706 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d32) );
12708 else if (d32 == 8) {
12709 assign( rHi, mkexpr(dLo) );
12710 assign( rLo, mkexpr(sHi) );
12712 else if (d32 >= 9 && d32 <= 15) {
12713 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, d32-8) );
12714 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d32-8) );
12716 else if (d32 == 16) {
12717 assign( rHi, mkexpr(dHi) );
12718 assign( rLo, mkexpr(dLo) );
12720 else if (d32 >= 17 && d32 <= 23) {
12721 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-16))) );
12722 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d32-16) );
12724 else if (d32 == 24) {
12725 assign( rHi, mkU64(0) );
12726 assign( rLo, mkexpr(dHi) );
12728 else if (d32 >= 25 && d32 <= 31) {
12729 assign( rHi, mkU64(0) );
12730 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-24))) );
12732 else if (d32 >= 32 && d32 <= 255) {
12733 assign( rHi, mkU64(0) );
12734 assign( rLo, mkU64(0) );
12736 else
12737 vassert(0);
12739 putXMMReg(
12740 gregOfRM(modrm),
12741 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
12743 goto decode_success;
12746 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
12747 if (sz == 4
12748 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
12749 IRTemp sV = newTemp(Ity_I64);
12750 IRTemp dV = newTemp(Ity_I64);
12752 modrm = insn[3];
12753 do_MMX_preamble();
12754 assign( dV, getMMXReg(gregOfRM(modrm)) );
12756 if (epartIsReg(modrm)) {
12757 assign( sV, getMMXReg(eregOfRM(modrm)) );
12758 delta += 3+1;
12759 DIP("pshufb %s,%s\n", nameMMXReg(eregOfRM(modrm)),
12760 nameMMXReg(gregOfRM(modrm)));
12761 } else {
12762 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12763 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12764 delta += 3+alen;
12765 DIP("pshufb %s,%s\n", dis_buf,
12766 nameMMXReg(gregOfRM(modrm)));
12769 putMMXReg(
12770 gregOfRM(modrm),
12771 binop(
12772 Iop_And64,
12773 /* permute the lanes */
12774 binop(
12775 Iop_Perm8x8,
12776 mkexpr(dV),
12777 binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL))
12779 /* mask off lanes which have (index & 0x80) == 0x80 */
12780 unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7)))
12783 goto decode_success;
12786 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
12787 if (sz == 2
12788 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
12789 IRTemp sV = newTemp(Ity_V128);
12790 IRTemp dV = newTemp(Ity_V128);
12791 IRTemp sHi = newTemp(Ity_I64);
12792 IRTemp sLo = newTemp(Ity_I64);
12793 IRTemp dHi = newTemp(Ity_I64);
12794 IRTemp dLo = newTemp(Ity_I64);
12795 IRTemp rHi = newTemp(Ity_I64);
12796 IRTemp rLo = newTemp(Ity_I64);
12797 IRTemp sevens = newTemp(Ity_I64);
12798 IRTemp mask0x80hi = newTemp(Ity_I64);
12799 IRTemp mask0x80lo = newTemp(Ity_I64);
12800 IRTemp maskBit3hi = newTemp(Ity_I64);
12801 IRTemp maskBit3lo = newTemp(Ity_I64);
12802 IRTemp sAnd7hi = newTemp(Ity_I64);
12803 IRTemp sAnd7lo = newTemp(Ity_I64);
12804 IRTemp permdHi = newTemp(Ity_I64);
12805 IRTemp permdLo = newTemp(Ity_I64);
12807 modrm = insn[3];
12808 assign( dV, getXMMReg(gregOfRM(modrm)) );
12810 if (epartIsReg(modrm)) {
12811 assign( sV, getXMMReg(eregOfRM(modrm)) );
12812 delta += 3+1;
12813 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRM(modrm)),
12814 nameXMMReg(gregOfRM(modrm)));
12815 } else {
12816 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12817 gen_SEGV_if_not_16_aligned( addr );
12818 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12819 delta += 3+alen;
12820 DIP("pshufb %s,%s\n", dis_buf,
12821 nameXMMReg(gregOfRM(modrm)));
12824 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12825 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12826 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12827 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12829 assign( sevens, mkU64(0x0707070707070707ULL) );
12832 mask0x80hi = Not(SarN8x8(sHi,7))
12833 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7)
12834 sAnd7hi = And(sHi,sevens)
12835 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi),
12836 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) )
12837 rHi = And(permdHi,mask0x80hi)
12839 assign(
12840 mask0x80hi,
12841 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7))));
12843 assign(
12844 maskBit3hi,
12845 binop(Iop_SarN8x8,
12846 binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)),
12847 mkU8(7)));
12849 assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens)));
12851 assign(
12852 permdHi,
12853 binop(
12854 Iop_Or64,
12855 binop(Iop_And64,
12856 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)),
12857 mkexpr(maskBit3hi)),
12858 binop(Iop_And64,
12859 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)),
12860 unop(Iop_Not64,mkexpr(maskBit3hi))) ));
12862 assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) );
12864 /* And the same for the lower half of the result. What fun. */
12866 assign(
12867 mask0x80lo,
12868 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7))));
12870 assign(
12871 maskBit3lo,
12872 binop(Iop_SarN8x8,
12873 binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)),
12874 mkU8(7)));
12876 assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens)));
12878 assign(
12879 permdLo,
12880 binop(
12881 Iop_Or64,
12882 binop(Iop_And64,
12883 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)),
12884 mkexpr(maskBit3lo)),
12885 binop(Iop_And64,
12886 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)),
12887 unop(Iop_Not64,mkexpr(maskBit3lo))) ));
12889 assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) );
12891 putXMMReg(
12892 gregOfRM(modrm),
12893 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
12895 goto decode_success;
12898 /* 0F 38 F0 = MOVBE m16/32(E), r16/32(G) */
12899 /* 0F 38 F1 = MOVBE r16/32(G), m16/32(E) */
12900 if ((sz == 2 || sz == 4)
12901 && insn[0] == 0x0F && insn[1] == 0x38
12902 && (insn[2] == 0xF0 || insn[2] == 0xF1)
12903 && !epartIsReg(insn[3])) {
12905 modrm = insn[3];
12906 addr = disAMode(&alen, sorb, delta + 3, dis_buf);
12907 delta += 3 + alen;
12908 ty = szToITy(sz);
12909 IRTemp src = newTemp(ty);
12911 if (insn[2] == 0xF0) { /* LOAD */
12912 assign(src, loadLE(ty, mkexpr(addr)));
12913 IRTemp dst = math_BSWAP(src, ty);
12914 putIReg(sz, gregOfRM(modrm), mkexpr(dst));
12915 DIP("movbe %s,%s\n", dis_buf, nameIReg(sz, gregOfRM(modrm)));
12916 } else { /* STORE */
12917 assign(src, getIReg(sz, gregOfRM(modrm)));
12918 IRTemp dst = math_BSWAP(src, ty);
12919 storeLE(mkexpr(addr), mkexpr(dst));
12920 DIP("movbe %s,%s\n", nameIReg(sz, gregOfRM(modrm)), dis_buf);
12922 goto decode_success;
12925 /* ---------------------------------------------------- */
12926 /* --- end of the SSSE3 decoder. --- */
12927 /* ---------------------------------------------------- */
12929 /* ---------------------------------------------------- */
12930 /* --- start of the SSE4 decoder --- */
12931 /* ---------------------------------------------------- */
12933 /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
12934 (Partial implementation only -- only deal with cases where
12935 the rounding mode is specified directly by the immediate byte.)
12936 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
12937 (Limitations ditto)
12939 if (sz == 2
12940 && insn[0] == 0x0F && insn[1] == 0x3A
12941 && (insn[2] == 0x0B || insn[2] == 0x0A)) {
12943 Bool isD = insn[2] == 0x0B;
12944 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
12945 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
12946 Int imm = 0;
12948 modrm = insn[3];
12950 if (epartIsReg(modrm)) {
12951 assign( src,
12952 isD ? getXMMRegLane64F( eregOfRM(modrm), 0 )
12953 : getXMMRegLane32F( eregOfRM(modrm), 0 ) );
12954 imm = insn[3+1];
12955 if (imm & ~3) goto decode_failure;
12956 delta += 3+1+1;
12957 DIP( "rounds%c $%d,%s,%s\n",
12958 isD ? 'd' : 's',
12959 imm, nameXMMReg( eregOfRM(modrm) ),
12960 nameXMMReg( gregOfRM(modrm) ) );
12961 } else {
12962 addr = disAMode( &alen, sorb, delta+3, dis_buf );
12963 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
12964 imm = insn[3+alen];
12965 if (imm & ~3) goto decode_failure;
12966 delta += 3+alen+1;
12967 DIP( "roundsd $%d,%s,%s\n",
12968 imm, dis_buf, nameXMMReg( gregOfRM(modrm) ) );
12971 /* (imm & 3) contains an Intel-encoded rounding mode. Because
12972 that encoding is the same as the encoding for IRRoundingMode,
12973 we can use that value directly in the IR as a rounding
12974 mode. */
12975 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
12976 mkU32(imm & 3), mkexpr(src)) );
12978 if (isD)
12979 putXMMRegLane64F( gregOfRM(modrm), 0, mkexpr(res) );
12980 else
12981 putXMMRegLane32F( gregOfRM(modrm), 0, mkexpr(res) );
12983 goto decode_success;
12986 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
12987 which we can only decode if we're sure this is an AMD cpu that
12988 supports LZCNT, since otherwise it's BSR, which behaves
12989 differently. */
12990 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xBD
12991 && 0 != (archinfo->hwcaps & VEX_HWCAPS_X86_LZCNT)) {
12992 vassert(sz == 2 || sz == 4);
12993 /*IRType*/ ty = szToITy(sz);
12994 IRTemp src = newTemp(ty);
12995 modrm = insn[3];
12996 if (epartIsReg(modrm)) {
12997 assign(src, getIReg(sz, eregOfRM(modrm)));
12998 delta += 3+1;
12999 DIP("lzcnt%c %s, %s\n", nameISize(sz),
13000 nameIReg(sz, eregOfRM(modrm)),
13001 nameIReg(sz, gregOfRM(modrm)));
13002 } else {
13003 addr = disAMode( &alen, sorb, delta+3, dis_buf );
13004 assign(src, loadLE(ty, mkexpr(addr)));
13005 delta += 3+alen;
13006 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf,
13007 nameIReg(sz, gregOfRM(modrm)));
13010 IRTemp res = gen_LZCNT(ty, src);
13011 putIReg(sz, gregOfRM(modrm), mkexpr(res));
13013 // Update flags. This is pretty lame .. perhaps can do better
13014 // if this turns out to be performance critical.
13015 // O S A P are cleared. Z is set if RESULT == 0.
13016 // C is set if SRC is zero.
13017 IRTemp src32 = newTemp(Ity_I32);
13018 IRTemp res32 = newTemp(Ity_I32);
13019 assign(src32, widenUto32(mkexpr(src)));
13020 assign(res32, widenUto32(mkexpr(res)));
13022 IRTemp oszacp = newTemp(Ity_I32);
13023 assign(
13024 oszacp,
13025 binop(Iop_Or32,
13026 binop(Iop_Shl32,
13027 unop(Iop_1Uto32,
13028 binop(Iop_CmpEQ32, mkexpr(res32), mkU32(0))),
13029 mkU8(X86G_CC_SHIFT_Z)),
13030 binop(Iop_Shl32,
13031 unop(Iop_1Uto32,
13032 binop(Iop_CmpEQ32, mkexpr(src32), mkU32(0))),
13033 mkU8(X86G_CC_SHIFT_C))
13037 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
13038 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
13039 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
13040 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
13042 goto decode_success;
13045 /* ---------------------------------------------------- */
13046 /* --- end of the SSE4 decoder --- */
13047 /* ---------------------------------------------------- */
13049 after_sse_decoders:
13051 /* ---------------------------------------------------- */
13052 /* --- deal with misc 0x67 pfxs (addr size override) -- */
13053 /* ---------------------------------------------------- */
13055 /* 67 E3 = JCXZ (for JECXZ see below) */
13056 if (insn[0] == 0x67 && insn[1] == 0xE3 && sz == 4) {
13057 delta += 2;
13058 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
13059 delta ++;
13060 stmt( IRStmt_Exit(
13061 binop(Iop_CmpEQ16, getIReg(2,R_ECX), mkU16(0)),
13062 Ijk_Boring,
13063 IRConst_U32(d32),
13064 OFFB_EIP
13066 DIP("jcxz 0x%x\n", d32);
13067 goto decode_success;
13070 /* 67 E8 = CALL with redundant addr16 prefix */
13071 if (insn[0] == 0x67 && insn[1] == 0xE8) {
13072 delta++;
13075 /* ---------------------------------------------------- */
13076 /* --- start of the baseline insn decoder -- */
13077 /* ---------------------------------------------------- */
13079 /* Get the primary opcode. */
13080 opc = getIByte(delta); delta++;
13082 /* We get here if the current insn isn't SSE, or this CPU doesn't
13083 support SSE. */
13085 switch (opc) {
13087 /* ------------------------ Control flow --------------- */
13089 case 0xC2: /* RET imm16 */
13090 d32 = getUDisp16(delta);
13091 delta += 2;
13092 dis_ret(&dres, d32);
13093 DIP("ret %u\n", d32);
13094 break;
13095 case 0xC3: /* RET */
13096 dis_ret(&dres, 0);
13097 DIP("ret\n");
13098 break;
13100 case 0xCF: /* IRET */
13101 /* Note, this is an extremely kludgey and limited implementation
13102 of iret. All it really does is:
13103 popl %EIP; popl %CS; popl %EFLAGS.
13104 %CS is set but ignored (as it is in (eg) popw %cs)". */
13105 t1 = newTemp(Ity_I32); /* ESP */
13106 t2 = newTemp(Ity_I32); /* new EIP */
13107 t3 = newTemp(Ity_I32); /* new CS */
13108 t4 = newTemp(Ity_I32); /* new EFLAGS */
13109 assign(t1, getIReg(4,R_ESP));
13110 assign(t2, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(0) )));
13111 assign(t3, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(4) )));
13112 assign(t4, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(8) )));
13113 /* Get stuff off stack */
13114 putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(12)));
13115 /* set %CS (which is ignored anyway) */
13116 putSReg( R_CS, unop(Iop_32to16, mkexpr(t3)) );
13117 /* set %EFLAGS */
13118 set_EFLAGS_from_value( t4, False/*!emit_AC_emwarn*/, 0/*unused*/ );
13119 /* goto new EIP value */
13120 jmp_treg(&dres, Ijk_Ret, t2);
13121 vassert(dres.whatNext == Dis_StopHere);
13122 DIP("iret (very kludgey)\n");
13123 break;
13125 case 0xE8: /* CALL J4 */
13126 d32 = getUDisp32(delta); delta += 4;
13127 d32 += (guest_EIP_bbstart+delta);
13128 /* (guest_eip_bbstart+delta) == return-to addr, d32 == call-to addr */
13129 if (d32 == guest_EIP_bbstart+delta && getIByte(delta) >= 0x58
13130 && getIByte(delta) <= 0x5F) {
13131 /* Specially treat the position-independent-code idiom
13132 call X
13133 X: popl %reg
13135 movl %eip, %reg.
13136 since this generates better code, but for no other reason. */
13137 Int archReg = getIByte(delta) - 0x58;
13138 /* vex_printf("-- fPIC thingy\n"); */
13139 putIReg(4, archReg, mkU32(guest_EIP_bbstart+delta));
13140 delta++; /* Step over the POP */
13141 DIP("call 0x%x ; popl %s\n",d32,nameIReg(4,archReg));
13142 } else {
13143 /* The normal sequence for a call. */
13144 t1 = newTemp(Ity_I32);
13145 assign(t1, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
13146 putIReg(4, R_ESP, mkexpr(t1));
13147 storeLE( mkexpr(t1), mkU32(guest_EIP_bbstart+delta));
13148 if (resteerOkFn( callback_opaque, (Addr32)d32 )) {
13149 /* follow into the call target. */
13150 dres.whatNext = Dis_ResteerU;
13151 dres.continueAt = (Addr32)d32;
13152 } else {
13153 jmp_lit(&dres, Ijk_Call, d32);
13154 vassert(dres.whatNext == Dis_StopHere);
13156 DIP("call 0x%x\n",d32);
13158 break;
13160 //-- case 0xC8: /* ENTER */
13161 //-- d32 = getUDisp16(eip); eip += 2;
13162 //-- abyte = getIByte(delta); delta++;
13163 //--
13164 //-- vg_assert(sz == 4);
13165 //-- vg_assert(abyte == 0);
13166 //--
13167 //-- t1 = newTemp(cb); t2 = newTemp(cb);
13168 //-- uInstr2(cb, GET, sz, ArchReg, R_EBP, TempReg, t1);
13169 //-- uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2);
13170 //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
13171 //-- uLiteral(cb, sz);
13172 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
13173 //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
13174 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBP);
13175 //-- if (d32) {
13176 //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
13177 //-- uLiteral(cb, d32);
13178 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
13179 //-- }
13180 //-- DIP("enter 0x%x, 0x%x", d32, abyte);
13181 //-- break;
13183 case 0xC9: /* LEAVE */
13184 vassert(sz == 4);
13185 t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32);
13186 assign(t1, getIReg(4,R_EBP));
13187 /* First PUT ESP looks redundant, but need it because ESP must
13188 always be up-to-date for Memcheck to work... */
13189 putIReg(4, R_ESP, mkexpr(t1));
13190 assign(t2, loadLE(Ity_I32,mkexpr(t1)));
13191 putIReg(4, R_EBP, mkexpr(t2));
13192 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(4)) );
13193 DIP("leave\n");
13194 break;
13196 /* ---------------- Misc weird-ass insns --------------- */
13198 case 0x27: /* DAA */
13199 case 0x2F: /* DAS */
13200 case 0x37: /* AAA */
13201 case 0x3F: /* AAS */
13202 /* An ugly implementation for some ugly instructions. Oh
13203 well. */
13204 if (sz != 4) goto decode_failure;
13205 t1 = newTemp(Ity_I32);
13206 t2 = newTemp(Ity_I32);
13207 /* Make up a 32-bit value (t1), with the old value of AX in the
13208 bottom 16 bits, and the old OSZACP bitmask in the upper 16
13209 bits. */
13210 assign(t1,
13211 binop(Iop_16HLto32,
13212 unop(Iop_32to16,
13213 mk_x86g_calculate_eflags_all()),
13214 getIReg(2, R_EAX)
13216 /* Call the helper fn, to get a new AX and OSZACP value, and
13217 poke both back into the guest state. Also pass the helper
13218 the actual opcode so it knows which of the 4 instructions it
13219 is doing the computation for. */
13220 vassert(opc == 0x27 || opc == 0x2F || opc == 0x37 || opc == 0x3F);
13221 assign(t2,
13222 mkIRExprCCall(
13223 Ity_I32, 0/*regparm*/, "x86g_calculate_daa_das_aaa_aas",
13224 &x86g_calculate_daa_das_aaa_aas,
13225 mkIRExprVec_2( mkexpr(t1), mkU32( opc & 0xFF) )
13227 putIReg(2, R_EAX, unop(Iop_32to16, mkexpr(t2) ));
13229 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
13230 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
13231 stmt( IRStmt_Put( OFFB_CC_DEP1,
13232 binop(Iop_And32,
13233 binop(Iop_Shr32, mkexpr(t2), mkU8(16)),
13234 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P
13235 | X86G_CC_MASK_A | X86G_CC_MASK_Z
13236 | X86G_CC_MASK_S| X86G_CC_MASK_O )
13240 /* Set NDEP even though it isn't used. This makes redundant-PUT
13241 elimination of previous stores to this field work better. */
13242 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
13243 switch (opc) {
13244 case 0x27: DIP("daa\n"); break;
13245 case 0x2F: DIP("das\n"); break;
13246 case 0x37: DIP("aaa\n"); break;
13247 case 0x3F: DIP("aas\n"); break;
13248 default: vassert(0);
13250 break;
13252 case 0xD4: /* AAM */
13253 case 0xD5: /* AAD */
13254 d32 = getIByte(delta); delta++;
13255 if (sz != 4 || d32 != 10) goto decode_failure;
13256 t1 = newTemp(Ity_I32);
13257 t2 = newTemp(Ity_I32);
13258 /* Make up a 32-bit value (t1), with the old value of AX in the
13259 bottom 16 bits, and the old OSZACP bitmask in the upper 16
13260 bits. */
13261 assign(t1,
13262 binop(Iop_16HLto32,
13263 unop(Iop_32to16,
13264 mk_x86g_calculate_eflags_all()),
13265 getIReg(2, R_EAX)
13267 /* Call the helper fn, to get a new AX and OSZACP value, and
13268 poke both back into the guest state. Also pass the helper
13269 the actual opcode so it knows which of the 2 instructions it
13270 is doing the computation for. */
13271 assign(t2,
13272 mkIRExprCCall(
13273 Ity_I32, 0/*regparm*/, "x86g_calculate_aad_aam",
13274 &x86g_calculate_aad_aam,
13275 mkIRExprVec_2( mkexpr(t1), mkU32( opc & 0xFF) )
13277 putIReg(2, R_EAX, unop(Iop_32to16, mkexpr(t2) ));
13279 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
13280 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
13281 stmt( IRStmt_Put( OFFB_CC_DEP1,
13282 binop(Iop_And32,
13283 binop(Iop_Shr32, mkexpr(t2), mkU8(16)),
13284 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P
13285 | X86G_CC_MASK_A | X86G_CC_MASK_Z
13286 | X86G_CC_MASK_S| X86G_CC_MASK_O )
13290 /* Set NDEP even though it isn't used. This makes
13291 redundant-PUT elimination of previous stores to this field
13292 work better. */
13293 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
13295 DIP(opc == 0xD4 ? "aam\n" : "aad\n");
13296 break;
13298 /* ------------------------ CWD/CDQ -------------------- */
13300 case 0x98: /* CBW */
13301 if (sz == 4) {
13302 putIReg(4, R_EAX, unop(Iop_16Sto32, getIReg(2, R_EAX)));
13303 DIP("cwde\n");
13304 } else {
13305 vassert(sz == 2);
13306 putIReg(2, R_EAX, unop(Iop_8Sto16, getIReg(1, R_EAX)));
13307 DIP("cbw\n");
13309 break;
13311 case 0x99: /* CWD/CDQ */
13312 ty = szToITy(sz);
13313 putIReg(sz, R_EDX,
13314 binop(mkSizedOp(ty,Iop_Sar8),
13315 getIReg(sz, R_EAX),
13316 mkU8(sz == 2 ? 15 : 31)) );
13317 DIP(sz == 2 ? "cwdq\n" : "cdqq\n");
13318 break;
13320 /* ------------------------ FPU ops -------------------- */
13322 case 0x9E: /* SAHF */
13323 codegen_SAHF();
13324 DIP("sahf\n");
13325 break;
13327 case 0x9F: /* LAHF */
13328 codegen_LAHF();
13329 DIP("lahf\n");
13330 break;
13332 case 0x9B: /* FWAIT */
13333 /* ignore? */
13334 DIP("fwait\n");
13335 break;
13337 case 0xD8:
13338 case 0xD9:
13339 case 0xDA:
13340 case 0xDB:
13341 case 0xDC:
13342 case 0xDD:
13343 case 0xDE:
13344 case 0xDF: {
13345 Int delta0 = delta;
13346 Bool decode_OK = False;
13347 delta = dis_FPU ( &decode_OK, sorb, delta );
13348 if (!decode_OK) {
13349 delta = delta0;
13350 goto decode_failure;
13352 break;
13355 /* ------------------------ INC & DEC ------------------ */
13357 case 0x40: /* INC eAX */
13358 case 0x41: /* INC eCX */
13359 case 0x42: /* INC eDX */
13360 case 0x43: /* INC eBX */
13361 case 0x44: /* INC eSP */
13362 case 0x45: /* INC eBP */
13363 case 0x46: /* INC eSI */
13364 case 0x47: /* INC eDI */
13365 vassert(sz == 2 || sz == 4);
13366 ty = szToITy(sz);
13367 t1 = newTemp(ty);
13368 assign( t1, binop(mkSizedOp(ty,Iop_Add8),
13369 getIReg(sz, (UInt)(opc - 0x40)),
13370 mkU(ty,1)) );
13371 setFlags_INC_DEC( True, t1, ty );
13372 putIReg(sz, (UInt)(opc - 0x40), mkexpr(t1));
13373 DIP("inc%c %s\n", nameISize(sz), nameIReg(sz,opc-0x40));
13374 break;
13376 case 0x48: /* DEC eAX */
13377 case 0x49: /* DEC eCX */
13378 case 0x4A: /* DEC eDX */
13379 case 0x4B: /* DEC eBX */
13380 case 0x4C: /* DEC eSP */
13381 case 0x4D: /* DEC eBP */
13382 case 0x4E: /* DEC eSI */
13383 case 0x4F: /* DEC eDI */
13384 vassert(sz == 2 || sz == 4);
13385 ty = szToITy(sz);
13386 t1 = newTemp(ty);
13387 assign( t1, binop(mkSizedOp(ty,Iop_Sub8),
13388 getIReg(sz, (UInt)(opc - 0x48)),
13389 mkU(ty,1)) );
13390 setFlags_INC_DEC( False, t1, ty );
13391 putIReg(sz, (UInt)(opc - 0x48), mkexpr(t1));
13392 DIP("dec%c %s\n", nameISize(sz), nameIReg(sz,opc-0x48));
13393 break;
13395 /* ------------------------ INT ------------------------ */
13397 case 0xCC: /* INT 3 */
13398 jmp_lit(&dres, Ijk_SigTRAP, ((Addr32)guest_EIP_bbstart)+delta);
13399 vassert(dres.whatNext == Dis_StopHere);
13400 DIP("int $0x3\n");
13401 break;
13403 case 0xCD: /* INT imm8 */
13404 d32 = getIByte(delta); delta++;
13406 /* For any of the cases where we emit a jump (that is, for all
13407 currently handled cases), it's important that all ArchRegs
13408 carry their up-to-date value at this point. So we declare an
13409 end-of-block here, which forces any TempRegs caching ArchRegs
13410 to be flushed. */
13412 /* Handle int $0x3F .. $0x4F by synthesising a segfault and a
13413 restart of this instruction (hence the "-2" two lines below,
13414 to get the restart EIP to be this instruction. This is
13415 probably Linux-specific and it would be more correct to only
13416 do this if the VexAbiInfo says that is what we should do.
13417 This used to handle just 0x40-0x43; Jikes RVM uses a larger
13418 range (0x3F-0x49), and this allows some slack as well. */
13419 if (d32 >= 0x3F && d32 <= 0x4F) {
13420 jmp_lit(&dres, Ijk_SigSEGV, ((Addr32)guest_EIP_bbstart)+delta-2);
13421 vassert(dres.whatNext == Dis_StopHere);
13422 DIP("int $0x%x\n", d32);
13423 break;
13426 /* Handle int $0x80 (linux syscalls), int $0x81 and $0x82
13427 (darwin syscalls), int $0x91 (Solaris syscalls) and int $0xD2
13428 (Solaris fasttrap syscalls). As part of this, note where we are, so we
13429 can back up the guest to this point if the syscall needs to
13430 be restarted. */
13431 IRJumpKind jump_kind;
13432 switch (d32) {
13433 case 0x80:
13434 jump_kind = Ijk_Sys_int128;
13435 break;
13436 case 0x81:
13437 jump_kind = Ijk_Sys_int129;
13438 break;
13439 case 0x82:
13440 jump_kind = Ijk_Sys_int130;
13441 break;
13442 case 0x91:
13443 jump_kind = Ijk_Sys_int145;
13444 break;
13445 case 0xD2:
13446 jump_kind = Ijk_Sys_int210;
13447 break;
13448 default:
13449 /* none of the above */
13450 goto decode_failure;
13453 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
13454 mkU32(guest_EIP_curr_instr) ) );
13455 jmp_lit(&dres, jump_kind, ((Addr32)guest_EIP_bbstart)+delta);
13456 vassert(dres.whatNext == Dis_StopHere);
13457 DIP("int $0x%x\n", d32);
13458 break;
13460 /* ------------------------ Jcond, byte offset --------- */
13462 case 0xEB: /* Jb (jump, byte offset) */
13463 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
13464 delta++;
13465 if (resteerOkFn( callback_opaque, (Addr32)d32) ) {
13466 dres.whatNext = Dis_ResteerU;
13467 dres.continueAt = (Addr32)d32;
13468 } else {
13469 jmp_lit(&dres, Ijk_Boring, d32);
13470 vassert(dres.whatNext == Dis_StopHere);
13472 DIP("jmp-8 0x%x\n", d32);
13473 break;
13475 case 0xE9: /* Jv (jump, 16/32 offset) */
13476 vassert(sz == 4); /* JRS added 2004 July 11 */
13477 d32 = (((Addr32)guest_EIP_bbstart)+delta+sz) + getSDisp(sz,delta);
13478 delta += sz;
13479 if (resteerOkFn( callback_opaque, (Addr32)d32) ) {
13480 dres.whatNext = Dis_ResteerU;
13481 dres.continueAt = (Addr32)d32;
13482 } else {
13483 jmp_lit(&dres, Ijk_Boring, d32);
13484 vassert(dres.whatNext == Dis_StopHere);
13486 DIP("jmp 0x%x\n", d32);
13487 break;
13489 case 0x70:
13490 case 0x71:
13491 case 0x72: /* JBb/JNAEb (jump below) */
13492 case 0x73: /* JNBb/JAEb (jump not below) */
13493 case 0x74: /* JZb/JEb (jump zero) */
13494 case 0x75: /* JNZb/JNEb (jump not zero) */
13495 case 0x76: /* JBEb/JNAb (jump below or equal) */
13496 case 0x77: /* JNBEb/JAb (jump not below or equal) */
13497 case 0x78: /* JSb (jump negative) */
13498 case 0x79: /* JSb (jump not negative) */
13499 case 0x7A: /* JP (jump parity even) */
13500 case 0x7B: /* JNP/JPO (jump parity odd) */
13501 case 0x7C: /* JLb/JNGEb (jump less) */
13502 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
13503 case 0x7E: /* JLEb/JNGb (jump less or equal) */
13504 case 0x7F: /* JGb/JNLEb (jump greater) */
13505 { Int jmpDelta;
13506 const HChar* comment = "";
13507 jmpDelta = (Int)getSDisp8(delta);
13508 vassert(-128 <= jmpDelta && jmpDelta < 128);
13509 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + jmpDelta;
13510 delta++;
13511 if (resteerCisOk
13512 && vex_control.guest_chase_cond
13513 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
13514 && jmpDelta < 0
13515 && resteerOkFn( callback_opaque, (Addr32)d32) ) {
13516 /* Speculation: assume this backward branch is taken. So we
13517 need to emit a side-exit to the insn following this one,
13518 on the negation of the condition, and continue at the
13519 branch target address (d32). If we wind up back at the
13520 first instruction of the trace, just stop; it's better to
13521 let the IR loop unroller handle that case. */
13522 stmt( IRStmt_Exit(
13523 mk_x86g_calculate_condition((X86Condcode)(1 ^ (opc - 0x70))),
13524 Ijk_Boring,
13525 IRConst_U32(guest_EIP_bbstart+delta),
13526 OFFB_EIP ) );
13527 dres.whatNext = Dis_ResteerC;
13528 dres.continueAt = (Addr32)d32;
13529 comment = "(assumed taken)";
13531 else
13532 if (resteerCisOk
13533 && vex_control.guest_chase_cond
13534 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
13535 && jmpDelta >= 0
13536 && resteerOkFn( callback_opaque,
13537 (Addr32)(guest_EIP_bbstart+delta)) ) {
13538 /* Speculation: assume this forward branch is not taken. So
13539 we need to emit a side-exit to d32 (the dest) and continue
13540 disassembling at the insn immediately following this
13541 one. */
13542 stmt( IRStmt_Exit(
13543 mk_x86g_calculate_condition((X86Condcode)(opc - 0x70)),
13544 Ijk_Boring,
13545 IRConst_U32(d32),
13546 OFFB_EIP ) );
13547 dres.whatNext = Dis_ResteerC;
13548 dres.continueAt = guest_EIP_bbstart + delta;
13549 comment = "(assumed not taken)";
13551 else {
13552 /* Conservative default translation - end the block at this
13553 point. */
13554 jcc_01( &dres, (X86Condcode)(opc - 0x70),
13555 (Addr32)(guest_EIP_bbstart+delta), d32);
13556 vassert(dres.whatNext == Dis_StopHere);
13558 DIP("j%s-8 0x%x %s\n", name_X86Condcode(opc - 0x70), d32, comment);
13559 break;
13562 case 0xE3: /* JECXZ (for JCXZ see above) */
13563 if (sz != 4) goto decode_failure;
13564 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
13565 delta ++;
13566 stmt( IRStmt_Exit(
13567 binop(Iop_CmpEQ32, getIReg(4,R_ECX), mkU32(0)),
13568 Ijk_Boring,
13569 IRConst_U32(d32),
13570 OFFB_EIP
13572 DIP("jecxz 0x%x\n", d32);
13573 break;
13575 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
13576 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
13577 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
13578 { /* Again, the docs say this uses ECX/CX as a count depending on
13579 the address size override, not the operand one. Since we
13580 don't handle address size overrides, I guess that means
13581 ECX. */
13582 IRExpr* zbit = NULL;
13583 IRExpr* count = NULL;
13584 IRExpr* cond = NULL;
13585 const HChar* xtra = NULL;
13587 if (sz != 4) goto decode_failure;
13588 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
13589 delta++;
13590 putIReg(4, R_ECX, binop(Iop_Sub32, getIReg(4,R_ECX), mkU32(1)));
13592 count = getIReg(4,R_ECX);
13593 cond = binop(Iop_CmpNE32, count, mkU32(0));
13594 switch (opc) {
13595 case 0xE2:
13596 xtra = "";
13597 break;
13598 case 0xE1:
13599 xtra = "e";
13600 zbit = mk_x86g_calculate_condition( X86CondZ );
13601 cond = mkAnd1(cond, zbit);
13602 break;
13603 case 0xE0:
13604 xtra = "ne";
13605 zbit = mk_x86g_calculate_condition( X86CondNZ );
13606 cond = mkAnd1(cond, zbit);
13607 break;
13608 default:
13609 vassert(0);
13611 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U32(d32), OFFB_EIP) );
13613 DIP("loop%s 0x%x\n", xtra, d32);
13614 break;
13617 /* ------------------------ IMUL ----------------------- */
13619 case 0x69: /* IMUL Iv, Ev, Gv */
13620 delta = dis_imul_I_E_G ( sorb, sz, delta, sz );
13621 break;
13622 case 0x6B: /* IMUL Ib, Ev, Gv */
13623 delta = dis_imul_I_E_G ( sorb, sz, delta, 1 );
13624 break;
13626 /* ------------------------ MOV ------------------------ */
13628 case 0x88: /* MOV Gb,Eb */
13629 delta = dis_mov_G_E(sorb, 1, delta);
13630 break;
13632 case 0x89: /* MOV Gv,Ev */
13633 delta = dis_mov_G_E(sorb, sz, delta);
13634 break;
13636 case 0x8A: /* MOV Eb,Gb */
13637 delta = dis_mov_E_G(sorb, 1, delta);
13638 break;
13640 case 0x8B: /* MOV Ev,Gv */
13641 delta = dis_mov_E_G(sorb, sz, delta);
13642 break;
13644 case 0x8D: /* LEA M,Gv */
13645 if (sz != 4)
13646 goto decode_failure;
13647 modrm = getIByte(delta);
13648 if (epartIsReg(modrm))
13649 goto decode_failure;
13650 /* NOTE! this is the one place where a segment override prefix
13651 has no effect on the address calculation. Therefore we pass
13652 zero instead of sorb here. */
13653 addr = disAMode ( &alen, /*sorb*/ 0, delta, dis_buf );
13654 delta += alen;
13655 putIReg(sz, gregOfRM(modrm), mkexpr(addr));
13656 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf,
13657 nameIReg(sz,gregOfRM(modrm)));
13658 break;
13660 case 0x8C: /* MOV Sw,Ew -- MOV from a SEGMENT REGISTER */
13661 delta = dis_mov_Sw_Ew(sorb, sz, delta);
13662 break;
13664 case 0x8E: /* MOV Ew,Sw -- MOV to a SEGMENT REGISTER */
13665 delta = dis_mov_Ew_Sw(sorb, delta);
13666 break;
13668 case 0xA0: /* MOV Ob,AL */
13669 sz = 1;
13670 /* Fall through ... */
13671 case 0xA1: /* MOV Ov,eAX */
13672 d32 = getUDisp32(delta); delta += 4;
13673 ty = szToITy(sz);
13674 addr = newTemp(Ity_I32);
13675 assign( addr, handleSegOverride(sorb, mkU32(d32)) );
13676 putIReg(sz, R_EAX, loadLE(ty, mkexpr(addr)));
13677 DIP("mov%c %s0x%x, %s\n", nameISize(sz), sorbTxt(sorb),
13678 d32, nameIReg(sz,R_EAX));
13679 break;
13681 case 0xA2: /* MOV Ob,AL */
13682 sz = 1;
13683 /* Fall through ... */
13684 case 0xA3: /* MOV eAX,Ov */
13685 d32 = getUDisp32(delta); delta += 4;
13686 ty = szToITy(sz);
13687 addr = newTemp(Ity_I32);
13688 assign( addr, handleSegOverride(sorb, mkU32(d32)) );
13689 storeLE( mkexpr(addr), getIReg(sz,R_EAX) );
13690 DIP("mov%c %s, %s0x%x\n", nameISize(sz), nameIReg(sz,R_EAX),
13691 sorbTxt(sorb), d32);
13692 break;
13694 case 0xB0: /* MOV imm,AL */
13695 case 0xB1: /* MOV imm,CL */
13696 case 0xB2: /* MOV imm,DL */
13697 case 0xB3: /* MOV imm,BL */
13698 case 0xB4: /* MOV imm,AH */
13699 case 0xB5: /* MOV imm,CH */
13700 case 0xB6: /* MOV imm,DH */
13701 case 0xB7: /* MOV imm,BH */
13702 d32 = getIByte(delta); delta += 1;
13703 putIReg(1, opc-0xB0, mkU8(d32));
13704 DIP("movb $0x%x,%s\n", d32, nameIReg(1,opc-0xB0));
13705 break;
13707 case 0xB8: /* MOV imm,eAX */
13708 case 0xB9: /* MOV imm,eCX */
13709 case 0xBA: /* MOV imm,eDX */
13710 case 0xBB: /* MOV imm,eBX */
13711 case 0xBC: /* MOV imm,eSP */
13712 case 0xBD: /* MOV imm,eBP */
13713 case 0xBE: /* MOV imm,eSI */
13714 case 0xBF: /* MOV imm,eDI */
13715 d32 = getUDisp(sz,delta); delta += sz;
13716 putIReg(sz, opc-0xB8, mkU(szToITy(sz), d32));
13717 DIP("mov%c $0x%x,%s\n", nameISize(sz), d32, nameIReg(sz,opc-0xB8));
13718 break;
13720 case 0xC6: /* C6 /0 = MOV Ib,Eb */
13721 sz = 1;
13722 goto maybe_do_Mov_I_E;
13723 case 0xC7: /* C7 /0 = MOV Iv,Ev */
13724 goto maybe_do_Mov_I_E;
13726 maybe_do_Mov_I_E:
13727 modrm = getIByte(delta);
13728 if (gregOfRM(modrm) == 0) {
13729 if (epartIsReg(modrm)) {
13730 delta++; /* mod/rm byte */
13731 d32 = getUDisp(sz,delta); delta += sz;
13732 putIReg(sz, eregOfRM(modrm), mkU(szToITy(sz), d32));
13733 DIP("mov%c $0x%x, %s\n", nameISize(sz), d32,
13734 nameIReg(sz,eregOfRM(modrm)));
13735 } else {
13736 addr = disAMode ( &alen, sorb, delta, dis_buf );
13737 delta += alen;
13738 d32 = getUDisp(sz,delta); delta += sz;
13739 storeLE(mkexpr(addr), mkU(szToITy(sz), d32));
13740 DIP("mov%c $0x%x, %s\n", nameISize(sz), d32, dis_buf);
13742 break;
13744 goto decode_failure;
13746 /* ------------------------ opl imm, A ----------------- */
13748 case 0x04: /* ADD Ib, AL */
13749 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" );
13750 break;
13751 case 0x05: /* ADD Iv, eAX */
13752 delta = dis_op_imm_A( sz, False, Iop_Add8, True, delta, "add" );
13753 break;
13755 case 0x0C: /* OR Ib, AL */
13756 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" );
13757 break;
13758 case 0x0D: /* OR Iv, eAX */
13759 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" );
13760 break;
13762 case 0x14: /* ADC Ib, AL */
13763 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" );
13764 break;
13765 case 0x15: /* ADC Iv, eAX */
13766 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" );
13767 break;
13769 case 0x1C: /* SBB Ib, AL */
13770 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" );
13771 break;
13772 case 0x1D: /* SBB Iv, eAX */
13773 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" );
13774 break;
13776 case 0x24: /* AND Ib, AL */
13777 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" );
13778 break;
13779 case 0x25: /* AND Iv, eAX */
13780 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" );
13781 break;
13783 case 0x2C: /* SUB Ib, AL */
13784 delta = dis_op_imm_A( 1, False, Iop_Sub8, True, delta, "sub" );
13785 break;
13786 case 0x2D: /* SUB Iv, eAX */
13787 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" );
13788 break;
13790 case 0x34: /* XOR Ib, AL */
13791 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" );
13792 break;
13793 case 0x35: /* XOR Iv, eAX */
13794 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" );
13795 break;
13797 case 0x3C: /* CMP Ib, AL */
13798 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" );
13799 break;
13800 case 0x3D: /* CMP Iv, eAX */
13801 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" );
13802 break;
13804 case 0xA8: /* TEST Ib, AL */
13805 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" );
13806 break;
13807 case 0xA9: /* TEST Iv, eAX */
13808 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" );
13809 break;
13811 /* ------------------------ opl Ev, Gv ----------------- */
13813 case 0x02: /* ADD Eb,Gb */
13814 delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, 1, delta, "add" );
13815 break;
13816 case 0x03: /* ADD Ev,Gv */
13817 delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, sz, delta, "add" );
13818 break;
13820 case 0x0A: /* OR Eb,Gb */
13821 delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, 1, delta, "or" );
13822 break;
13823 case 0x0B: /* OR Ev,Gv */
13824 delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, sz, delta, "or" );
13825 break;
13827 case 0x12: /* ADC Eb,Gb */
13828 delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, 1, delta, "adc" );
13829 break;
13830 case 0x13: /* ADC Ev,Gv */
13831 delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, sz, delta, "adc" );
13832 break;
13834 case 0x1A: /* SBB Eb,Gb */
13835 delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, 1, delta, "sbb" );
13836 break;
13837 case 0x1B: /* SBB Ev,Gv */
13838 delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, sz, delta, "sbb" );
13839 break;
13841 case 0x22: /* AND Eb,Gb */
13842 delta = dis_op2_E_G ( sorb, False, Iop_And8, True, 1, delta, "and" );
13843 break;
13844 case 0x23: /* AND Ev,Gv */
13845 delta = dis_op2_E_G ( sorb, False, Iop_And8, True, sz, delta, "and" );
13846 break;
13848 case 0x2A: /* SUB Eb,Gb */
13849 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, 1, delta, "sub" );
13850 break;
13851 case 0x2B: /* SUB Ev,Gv */
13852 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, sz, delta, "sub" );
13853 break;
13855 case 0x32: /* XOR Eb,Gb */
13856 delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, 1, delta, "xor" );
13857 break;
13858 case 0x33: /* XOR Ev,Gv */
13859 delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, sz, delta, "xor" );
13860 break;
13862 case 0x3A: /* CMP Eb,Gb */
13863 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, 1, delta, "cmp" );
13864 break;
13865 case 0x3B: /* CMP Ev,Gv */
13866 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, sz, delta, "cmp" );
13867 break;
13869 case 0x84: /* TEST Eb,Gb */
13870 delta = dis_op2_E_G ( sorb, False, Iop_And8, False, 1, delta, "test" );
13871 break;
13872 case 0x85: /* TEST Ev,Gv */
13873 delta = dis_op2_E_G ( sorb, False, Iop_And8, False, sz, delta, "test" );
13874 break;
13876 /* ------------------------ opl Gv, Ev ----------------- */
13878 case 0x00: /* ADD Gb,Eb */
13879 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13880 Iop_Add8, True, 1, delta, "add" );
13881 break;
13882 case 0x01: /* ADD Gv,Ev */
13883 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13884 Iop_Add8, True, sz, delta, "add" );
13885 break;
13887 case 0x08: /* OR Gb,Eb */
13888 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13889 Iop_Or8, True, 1, delta, "or" );
13890 break;
13891 case 0x09: /* OR Gv,Ev */
13892 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13893 Iop_Or8, True, sz, delta, "or" );
13894 break;
13896 case 0x10: /* ADC Gb,Eb */
13897 delta = dis_op2_G_E ( sorb, pfx_lock, True,
13898 Iop_Add8, True, 1, delta, "adc" );
13899 break;
13900 case 0x11: /* ADC Gv,Ev */
13901 delta = dis_op2_G_E ( sorb, pfx_lock, True,
13902 Iop_Add8, True, sz, delta, "adc" );
13903 break;
13905 case 0x18: /* SBB Gb,Eb */
13906 delta = dis_op2_G_E ( sorb, pfx_lock, True,
13907 Iop_Sub8, True, 1, delta, "sbb" );
13908 break;
13909 case 0x19: /* SBB Gv,Ev */
13910 delta = dis_op2_G_E ( sorb, pfx_lock, True,
13911 Iop_Sub8, True, sz, delta, "sbb" );
13912 break;
13914 case 0x20: /* AND Gb,Eb */
13915 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13916 Iop_And8, True, 1, delta, "and" );
13917 break;
13918 case 0x21: /* AND Gv,Ev */
13919 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13920 Iop_And8, True, sz, delta, "and" );
13921 break;
13923 case 0x28: /* SUB Gb,Eb */
13924 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13925 Iop_Sub8, True, 1, delta, "sub" );
13926 break;
13927 case 0x29: /* SUB Gv,Ev */
13928 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13929 Iop_Sub8, True, sz, delta, "sub" );
13930 break;
13932 case 0x30: /* XOR Gb,Eb */
13933 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13934 Iop_Xor8, True, 1, delta, "xor" );
13935 break;
13936 case 0x31: /* XOR Gv,Ev */
13937 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13938 Iop_Xor8, True, sz, delta, "xor" );
13939 break;
13941 case 0x38: /* CMP Gb,Eb */
13942 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13943 Iop_Sub8, False, 1, delta, "cmp" );
13944 break;
13945 case 0x39: /* CMP Gv,Ev */
13946 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13947 Iop_Sub8, False, sz, delta, "cmp" );
13948 break;
13950 /* ------------------------ POP ------------------------ */
13952 case 0x58: /* POP eAX */
13953 case 0x59: /* POP eCX */
13954 case 0x5A: /* POP eDX */
13955 case 0x5B: /* POP eBX */
13956 case 0x5D: /* POP eBP */
13957 case 0x5E: /* POP eSI */
13958 case 0x5F: /* POP eDI */
13959 case 0x5C: /* POP eSP */
13960 vassert(sz == 2 || sz == 4);
13961 t1 = newTemp(szToITy(sz)); t2 = newTemp(Ity_I32);
13962 assign(t2, getIReg(4, R_ESP));
13963 assign(t1, loadLE(szToITy(sz),mkexpr(t2)));
13964 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz)));
13965 putIReg(sz, opc-0x58, mkexpr(t1));
13966 DIP("pop%c %s\n", nameISize(sz), nameIReg(sz,opc-0x58));
13967 break;
13969 case 0x9D: /* POPF */
13970 vassert(sz == 2 || sz == 4);
13971 t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32);
13972 assign(t2, getIReg(4, R_ESP));
13973 assign(t1, widenUto32(loadLE(szToITy(sz),mkexpr(t2))));
13974 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz)));
13976 /* Generate IR to set %EFLAGS{O,S,Z,A,C,P,D,ID,AC} from the
13977 value in t1. */
13978 set_EFLAGS_from_value( t1, True/*emit_AC_emwarn*/,
13979 ((Addr32)guest_EIP_bbstart)+delta );
13981 DIP("popf%c\n", nameISize(sz));
13982 break;
13984 case 0x61: /* POPA */
13985 /* This is almost certainly wrong for sz==2. So ... */
13986 if (sz != 4) goto decode_failure;
13988 /* t5 is the old %ESP value. */
13989 t5 = newTemp(Ity_I32);
13990 assign( t5, getIReg(4, R_ESP) );
13992 /* Reload all the registers, except %esp. */
13993 putIReg(4,R_EAX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(28)) ));
13994 putIReg(4,R_ECX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(24)) ));
13995 putIReg(4,R_EDX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(20)) ));
13996 putIReg(4,R_EBX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(16)) ));
13997 /* ignore saved %ESP */
13998 putIReg(4,R_EBP, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 8)) ));
13999 putIReg(4,R_ESI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 4)) ));
14000 putIReg(4,R_EDI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 0)) ));
14002 /* and move %ESP back up */
14003 putIReg( 4, R_ESP, binop(Iop_Add32, mkexpr(t5), mkU32(8*4)) );
14005 DIP("popa%c\n", nameISize(sz));
14006 break;
14008 case 0x8F: /* POPL/POPW m32 */
14009 { Int len;
14010 UChar rm = getIByte(delta);
14012 /* make sure this instruction is correct POP */
14013 if (epartIsReg(rm) || gregOfRM(rm) != 0)
14014 goto decode_failure;
14015 /* and has correct size */
14016 if (sz != 4 && sz != 2)
14017 goto decode_failure;
14018 ty = szToITy(sz);
14020 t1 = newTemp(Ity_I32); /* stack address */
14021 t3 = newTemp(ty); /* data */
14022 /* set t1 to ESP: t1 = ESP */
14023 assign( t1, getIReg(4, R_ESP) );
14024 /* load M[ESP] to virtual register t3: t3 = M[t1] */
14025 assign( t3, loadLE(ty, mkexpr(t1)) );
14027 /* increase ESP; must be done before the STORE. Intel manual says:
14028 If the ESP register is used as a base register for addressing
14029 a destination operand in memory, the POP instruction computes
14030 the effective address of the operand after it increments the
14031 ESP register.
14033 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(sz)) );
14035 /* resolve MODR/M */
14036 addr = disAMode ( &len, sorb, delta, dis_buf);
14037 storeLE( mkexpr(addr), mkexpr(t3) );
14039 DIP("pop%c %s\n", sz==2 ? 'w' : 'l', dis_buf);
14041 delta += len;
14042 break;
14045 case 0x1F: /* POP %DS */
14046 dis_pop_segreg( R_DS, sz ); break;
14047 case 0x07: /* POP %ES */
14048 dis_pop_segreg( R_ES, sz ); break;
14049 case 0x17: /* POP %SS */
14050 dis_pop_segreg( R_SS, sz ); break;
14052 /* ------------------------ PUSH ----------------------- */
14054 case 0x50: /* PUSH eAX */
14055 case 0x51: /* PUSH eCX */
14056 case 0x52: /* PUSH eDX */
14057 case 0x53: /* PUSH eBX */
14058 case 0x55: /* PUSH eBP */
14059 case 0x56: /* PUSH eSI */
14060 case 0x57: /* PUSH eDI */
14061 case 0x54: /* PUSH eSP */
14062 /* This is the Right Way, in that the value to be pushed is
14063 established before %esp is changed, so that pushl %esp
14064 correctly pushes the old value. */
14065 vassert(sz == 2 || sz == 4);
14066 ty = sz==2 ? Ity_I16 : Ity_I32;
14067 t1 = newTemp(ty); t2 = newTemp(Ity_I32);
14068 assign(t1, getIReg(sz, opc-0x50));
14069 assign(t2, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)));
14070 putIReg(4, R_ESP, mkexpr(t2) );
14071 storeLE(mkexpr(t2),mkexpr(t1));
14072 DIP("push%c %s\n", nameISize(sz), nameIReg(sz,opc-0x50));
14073 break;
14076 case 0x68: /* PUSH Iv */
14077 d32 = getUDisp(sz,delta); delta += sz;
14078 goto do_push_I;
14079 case 0x6A: /* PUSH Ib, sign-extended to sz */
14080 d32 = getSDisp8(delta); delta += 1;
14081 goto do_push_I;
14082 do_push_I:
14083 ty = szToITy(sz);
14084 t1 = newTemp(Ity_I32); t2 = newTemp(ty);
14085 assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
14086 putIReg(4, R_ESP, mkexpr(t1) );
14087 /* stop mkU16 asserting if d32 is a negative 16-bit number
14088 (bug #132813) */
14089 if (ty == Ity_I16)
14090 d32 &= 0xFFFF;
14091 storeLE( mkexpr(t1), mkU(ty,d32) );
14092 DIP("push%c $0x%x\n", nameISize(sz), d32);
14093 break;
14095 case 0x9C: /* PUSHF */ {
14096 vassert(sz == 2 || sz == 4);
14098 t1 = newTemp(Ity_I32);
14099 assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
14100 putIReg(4, R_ESP, mkexpr(t1) );
14102 /* Calculate OSZACP, and patch in fixed fields as per
14103 Intel docs.
14104 - bit 1 is always 1
14105 - bit 9 is Interrupt Enable (should always be 1 in user mode?)
14107 t2 = newTemp(Ity_I32);
14108 assign( t2, binop(Iop_Or32,
14109 mk_x86g_calculate_eflags_all(),
14110 mkU32( (1<<1)|(1<<9) ) ));
14112 /* Patch in the D flag. This can simply be a copy of bit 10 of
14113 baseBlock[OFFB_DFLAG]. */
14114 t3 = newTemp(Ity_I32);
14115 assign( t3, binop(Iop_Or32,
14116 mkexpr(t2),
14117 binop(Iop_And32,
14118 IRExpr_Get(OFFB_DFLAG,Ity_I32),
14119 mkU32(1<<10)))
14122 /* And patch in the ID flag. */
14123 t4 = newTemp(Ity_I32);
14124 assign( t4, binop(Iop_Or32,
14125 mkexpr(t3),
14126 binop(Iop_And32,
14127 binop(Iop_Shl32, IRExpr_Get(OFFB_IDFLAG,Ity_I32),
14128 mkU8(21)),
14129 mkU32(1<<21)))
14132 /* And patch in the AC flag. */
14133 t5 = newTemp(Ity_I32);
14134 assign( t5, binop(Iop_Or32,
14135 mkexpr(t4),
14136 binop(Iop_And32,
14137 binop(Iop_Shl32, IRExpr_Get(OFFB_ACFLAG,Ity_I32),
14138 mkU8(18)),
14139 mkU32(1<<18)))
14142 /* if sz==2, the stored value needs to be narrowed. */
14143 if (sz == 2)
14144 storeLE( mkexpr(t1), unop(Iop_32to16,mkexpr(t5)) );
14145 else
14146 storeLE( mkexpr(t1), mkexpr(t5) );
14148 DIP("pushf%c\n", nameISize(sz));
14149 break;
14152 case 0x60: /* PUSHA */
14153 /* This is almost certainly wrong for sz==2. So ... */
14154 if (sz != 4) goto decode_failure;
14156 /* This is the Right Way, in that the value to be pushed is
14157 established before %esp is changed, so that pusha
14158 correctly pushes the old %esp value. New value of %esp is
14159 pushed at start. */
14160 /* t0 is the %ESP value we're going to push. */
14161 t0 = newTemp(Ity_I32);
14162 assign( t0, getIReg(4, R_ESP) );
14164 /* t5 will be the new %ESP value. */
14165 t5 = newTemp(Ity_I32);
14166 assign( t5, binop(Iop_Sub32, mkexpr(t0), mkU32(8*4)) );
14168 /* Update guest state before prodding memory. */
14169 putIReg(4, R_ESP, mkexpr(t5));
14171 /* Dump all the registers. */
14172 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(28)), getIReg(4,R_EAX) );
14173 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(24)), getIReg(4,R_ECX) );
14174 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(20)), getIReg(4,R_EDX) );
14175 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(16)), getIReg(4,R_EBX) );
14176 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(12)), mkexpr(t0) /*esp*/);
14177 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 8)), getIReg(4,R_EBP) );
14178 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 4)), getIReg(4,R_ESI) );
14179 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 0)), getIReg(4,R_EDI) );
14181 DIP("pusha%c\n", nameISize(sz));
14182 break;
14184 case 0x0E: /* PUSH %CS */
14185 dis_push_segreg( R_CS, sz ); break;
14186 case 0x1E: /* PUSH %DS */
14187 dis_push_segreg( R_DS, sz ); break;
14188 case 0x06: /* PUSH %ES */
14189 dis_push_segreg( R_ES, sz ); break;
14190 case 0x16: /* PUSH %SS */
14191 dis_push_segreg( R_SS, sz ); break;
14193 /* ------------------------ SCAS et al ----------------- */
14195 case 0xA4: /* MOVS, no REP prefix */
14196 case 0xA5:
14197 if (sorb != 0)
14198 goto decode_failure; /* else dis_string_op asserts */
14199 dis_string_op( dis_MOVS, ( opc == 0xA4 ? 1 : sz ), "movs", sorb );
14200 break;
14202 case 0xA6: /* CMPSb, no REP prefix */
14203 case 0xA7:
14204 if (sorb != 0)
14205 goto decode_failure; /* else dis_string_op asserts */
14206 dis_string_op( dis_CMPS, ( opc == 0xA6 ? 1 : sz ), "cmps", sorb );
14207 break;
14209 case 0xAA: /* STOS, no REP prefix */
14210 case 0xAB:
14211 if (sorb != 0)
14212 goto decode_failure; /* else dis_string_op asserts */
14213 dis_string_op( dis_STOS, ( opc == 0xAA ? 1 : sz ), "stos", sorb );
14214 break;
14216 case 0xAC: /* LODS, no REP prefix */
14217 case 0xAD:
14218 if (sorb != 0)
14219 goto decode_failure; /* else dis_string_op asserts */
14220 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", sorb );
14221 break;
14223 case 0xAE: /* SCAS, no REP prefix */
14224 case 0xAF:
14225 if (sorb != 0)
14226 goto decode_failure; /* else dis_string_op asserts */
14227 dis_string_op( dis_SCAS, ( opc == 0xAE ? 1 : sz ), "scas", sorb );
14228 break;
14231 case 0xFC: /* CLD */
14232 stmt( IRStmt_Put( OFFB_DFLAG, mkU32(1)) );
14233 DIP("cld\n");
14234 break;
14236 case 0xFD: /* STD */
14237 stmt( IRStmt_Put( OFFB_DFLAG, mkU32(0xFFFFFFFF)) );
14238 DIP("std\n");
14239 break;
14241 case 0xF8: /* CLC */
14242 case 0xF9: /* STC */
14243 case 0xF5: /* CMC */
14244 t0 = newTemp(Ity_I32);
14245 t1 = newTemp(Ity_I32);
14246 assign( t0, mk_x86g_calculate_eflags_all() );
14247 switch (opc) {
14248 case 0xF8:
14249 assign( t1, binop(Iop_And32, mkexpr(t0),
14250 mkU32(~X86G_CC_MASK_C)));
14251 DIP("clc\n");
14252 break;
14253 case 0xF9:
14254 assign( t1, binop(Iop_Or32, mkexpr(t0),
14255 mkU32(X86G_CC_MASK_C)));
14256 DIP("stc\n");
14257 break;
14258 case 0xF5:
14259 assign( t1, binop(Iop_Xor32, mkexpr(t0),
14260 mkU32(X86G_CC_MASK_C)));
14261 DIP("cmc\n");
14262 break;
14263 default:
14264 vpanic("disInstr(x86)(clc/stc/cmc)");
14266 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
14267 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
14268 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t1) ));
14269 /* Set NDEP even though it isn't used. This makes redundant-PUT
14270 elimination of previous stores to this field work better. */
14271 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
14272 break;
14274 case 0xD6: /* SALC */
14275 t0 = newTemp(Ity_I32);
14276 t1 = newTemp(Ity_I32);
14277 assign( t0, binop(Iop_And32,
14278 mk_x86g_calculate_eflags_c(),
14279 mkU32(1)) );
14280 assign( t1, binop(Iop_Sar32,
14281 binop(Iop_Shl32, mkexpr(t0), mkU8(31)),
14282 mkU8(31)) );
14283 putIReg(1, R_EAX, unop(Iop_32to8, mkexpr(t1)) );
14284 DIP("salc\n");
14285 break;
14287 /* REPNE prefix insn */
14288 case 0xF2: {
14289 Addr32 eip_orig = guest_EIP_bbstart + delta_start;
14290 if (sorb != 0) goto decode_failure;
14291 abyte = getIByte(delta); delta++;
14293 if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; }
14295 switch (abyte) {
14296 /* According to the Intel manual, "repne movs" should never occur, but
14297 * in practice it has happened, so allow for it here... */
14298 case 0xA4: sz = 1; /* REPNE MOVS<sz> fallthrough */
14299 case 0xA5:
14300 dis_REP_op ( &dres, X86CondNZ, dis_MOVS, sz, eip_orig,
14301 guest_EIP_bbstart+delta, "repne movs" );
14302 break;
14304 case 0xA6: sz = 1; /* REPNE CMP<sz> fallthrough */
14305 case 0xA7:
14306 dis_REP_op ( &dres, X86CondNZ, dis_CMPS, sz, eip_orig,
14307 guest_EIP_bbstart+delta, "repne cmps" );
14308 break;
14310 case 0xAA: sz = 1; /* REPNE STOS<sz> fallthrough */
14311 case 0xAB:
14312 dis_REP_op ( &dres, X86CondNZ, dis_STOS, sz, eip_orig,
14313 guest_EIP_bbstart+delta, "repne stos" );
14314 break;
14316 case 0xAE: sz = 1; /* REPNE SCAS<sz> fallthrough */
14317 case 0xAF:
14318 dis_REP_op ( &dres, X86CondNZ, dis_SCAS, sz, eip_orig,
14319 guest_EIP_bbstart+delta, "repne scas" );
14320 break;
14322 default:
14323 goto decode_failure;
14325 break;
14328 /* REP/REPE prefix insn (for SCAS and CMPS, 0xF3 means REPE,
14329 for the rest, it means REP) */
14330 case 0xF3: {
14331 Addr32 eip_orig = guest_EIP_bbstart + delta_start;
14332 abyte = getIByte(delta); delta++;
14334 if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; }
14336 if (sorb != 0 && abyte != 0x0F) goto decode_failure;
14338 switch (abyte) {
14339 case 0x0F:
14340 switch (getIByte(delta)) {
14341 /* On older CPUs, TZCNT behaves the same as BSF. */
14342 case 0xBC: /* REP BSF Gv,Ev */
14343 delta = dis_bs_E_G ( sorb, sz, delta + 1, True );
14344 break;
14345 /* On older CPUs, LZCNT behaves the same as BSR. */
14346 case 0xBD: /* REP BSR Gv,Ev */
14347 delta = dis_bs_E_G ( sorb, sz, delta + 1, False );
14348 break;
14349 default:
14350 goto decode_failure;
14352 break;
14354 case 0xA4: sz = 1; /* REP MOVS<sz> fallthrough */
14355 case 0xA5:
14356 dis_REP_op ( &dres, X86CondAlways, dis_MOVS, sz, eip_orig,
14357 guest_EIP_bbstart+delta, "rep movs" );
14358 break;
14360 case 0xA6: sz = 1; /* REPE CMP<sz> fallthrough */
14361 case 0xA7:
14362 dis_REP_op ( &dres, X86CondZ, dis_CMPS, sz, eip_orig,
14363 guest_EIP_bbstart+delta, "repe cmps" );
14364 break;
14366 case 0xAA: sz = 1; /* REP STOS<sz> fallthrough */
14367 case 0xAB:
14368 dis_REP_op ( &dres, X86CondAlways, dis_STOS, sz, eip_orig,
14369 guest_EIP_bbstart+delta, "rep stos" );
14370 break;
14372 case 0xAC: sz = 1; /* REP LODS<sz> fallthrough */
14373 case 0xAD:
14374 dis_REP_op ( &dres, X86CondAlways, dis_LODS, sz, eip_orig,
14375 guest_EIP_bbstart+delta, "rep lods" );
14376 break;
14378 case 0xAE: sz = 1; /* REPE SCAS<sz> fallthrough */
14379 case 0xAF:
14380 dis_REP_op ( &dres, X86CondZ, dis_SCAS, sz, eip_orig,
14381 guest_EIP_bbstart+delta, "repe scas" );
14382 break;
14384 case 0x90: /* REP NOP (PAUSE) */
14385 /* a hint to the P4 re spin-wait loop */
14386 DIP("rep nop (P4 pause)\n");
14387 /* "observe" the hint. The Vex client needs to be careful not
14388 to cause very long delays as a result, though. */
14389 jmp_lit(&dres, Ijk_Yield, ((Addr32)guest_EIP_bbstart)+delta);
14390 vassert(dres.whatNext == Dis_StopHere);
14391 break;
14393 case 0xC3: /* REP RET -- same as normal ret? */
14394 dis_ret(&dres, 0);
14395 DIP("rep ret\n");
14396 break;
14398 default:
14399 goto decode_failure;
14401 break;
14404 /* ------------------------ XCHG ----------------------- */
14406 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
14407 prefix; hence it must be translated with an IRCAS (at least, the
14408 memory variant). */
14409 case 0x86: /* XCHG Gb,Eb */
14410 sz = 1;
14411 /* Fall through ... */
14412 case 0x87: /* XCHG Gv,Ev */
14413 modrm = getIByte(delta);
14414 ty = szToITy(sz);
14415 t1 = newTemp(ty); t2 = newTemp(ty);
14416 if (epartIsReg(modrm)) {
14417 assign(t1, getIReg(sz, eregOfRM(modrm)));
14418 assign(t2, getIReg(sz, gregOfRM(modrm)));
14419 putIReg(sz, gregOfRM(modrm), mkexpr(t1));
14420 putIReg(sz, eregOfRM(modrm), mkexpr(t2));
14421 delta++;
14422 DIP("xchg%c %s, %s\n",
14423 nameISize(sz), nameIReg(sz,gregOfRM(modrm)),
14424 nameIReg(sz,eregOfRM(modrm)));
14425 } else {
14426 *expect_CAS = True;
14427 addr = disAMode ( &alen, sorb, delta, dis_buf );
14428 assign( t1, loadLE(ty,mkexpr(addr)) );
14429 assign( t2, getIReg(sz,gregOfRM(modrm)) );
14430 casLE( mkexpr(addr),
14431 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
14432 putIReg( sz, gregOfRM(modrm), mkexpr(t1) );
14433 delta += alen;
14434 DIP("xchg%c %s, %s\n", nameISize(sz),
14435 nameIReg(sz,gregOfRM(modrm)), dis_buf);
14437 break;
14439 case 0x90: /* XCHG eAX,eAX */
14440 DIP("nop\n");
14441 break;
14442 case 0x91: /* XCHG eAX,eCX */
14443 case 0x92: /* XCHG eAX,eDX */
14444 case 0x93: /* XCHG eAX,eBX */
14445 case 0x94: /* XCHG eAX,eSP */
14446 case 0x95: /* XCHG eAX,eBP */
14447 case 0x96: /* XCHG eAX,eSI */
14448 case 0x97: /* XCHG eAX,eDI */
14449 codegen_xchg_eAX_Reg ( sz, opc - 0x90 );
14450 break;
14452 /* ------------------------ XLAT ----------------------- */
14454 case 0xD7: /* XLAT */
14455 if (sz != 4) goto decode_failure; /* sz == 2 is also allowed (0x66) */
14456 putIReg(
14458 R_EAX/*AL*/,
14459 loadLE(Ity_I8,
14460 handleSegOverride(
14461 sorb,
14462 binop(Iop_Add32,
14463 getIReg(4, R_EBX),
14464 unop(Iop_8Uto32, getIReg(1, R_EAX/*AL*/))))));
14466 DIP("xlat%c [ebx]\n", nameISize(sz));
14467 break;
14469 /* ------------------------ IN / OUT ----------------------- */
14471 case 0xE4: /* IN imm8, AL */
14472 sz = 1;
14473 t1 = newTemp(Ity_I32);
14474 abyte = getIByte(delta); delta++;
14475 assign(t1, mkU32( abyte & 0xFF ));
14476 DIP("in%c $%d,%s\n", nameISize(sz), abyte, nameIReg(sz,R_EAX));
14477 goto do_IN;
14478 case 0xE5: /* IN imm8, eAX */
14479 vassert(sz == 2 || sz == 4);
14480 t1 = newTemp(Ity_I32);
14481 abyte = getIByte(delta); delta++;
14482 assign(t1, mkU32( abyte & 0xFF ));
14483 DIP("in%c $%d,%s\n", nameISize(sz), abyte, nameIReg(sz,R_EAX));
14484 goto do_IN;
14485 case 0xEC: /* IN %DX, AL */
14486 sz = 1;
14487 t1 = newTemp(Ity_I32);
14488 assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX)));
14489 DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX),
14490 nameIReg(sz,R_EAX));
14491 goto do_IN;
14492 case 0xED: /* IN %DX, eAX */
14493 vassert(sz == 2 || sz == 4);
14494 t1 = newTemp(Ity_I32);
14495 assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX)));
14496 DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX),
14497 nameIReg(sz,R_EAX));
14498 goto do_IN;
14499 do_IN: {
14500 /* At this point, sz indicates the width, and t1 is a 32-bit
14501 value giving port number. */
14502 IRDirty* d;
14503 vassert(sz == 1 || sz == 2 || sz == 4);
14504 ty = szToITy(sz);
14505 t2 = newTemp(Ity_I32);
14506 d = unsafeIRDirty_1_N(
14508 0/*regparms*/,
14509 "x86g_dirtyhelper_IN",
14510 &x86g_dirtyhelper_IN,
14511 mkIRExprVec_2( mkexpr(t1), mkU32(sz) )
14513 /* do the call, dumping the result in t2. */
14514 stmt( IRStmt_Dirty(d) );
14515 putIReg(sz, R_EAX, narrowTo( ty, mkexpr(t2) ) );
14516 break;
14519 case 0xE6: /* OUT AL, imm8 */
14520 sz = 1;
14521 t1 = newTemp(Ity_I32);
14522 abyte = getIByte(delta); delta++;
14523 assign( t1, mkU32( abyte & 0xFF ) );
14524 DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), abyte);
14525 goto do_OUT;
14526 case 0xE7: /* OUT eAX, imm8 */
14527 vassert(sz == 2 || sz == 4);
14528 t1 = newTemp(Ity_I32);
14529 abyte = getIByte(delta); delta++;
14530 assign( t1, mkU32( abyte & 0xFF ) );
14531 DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), abyte);
14532 goto do_OUT;
14533 case 0xEE: /* OUT AL, %DX */
14534 sz = 1;
14535 t1 = newTemp(Ity_I32);
14536 assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) );
14537 DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX),
14538 nameIReg(2,R_EDX));
14539 goto do_OUT;
14540 case 0xEF: /* OUT eAX, %DX */
14541 vassert(sz == 2 || sz == 4);
14542 t1 = newTemp(Ity_I32);
14543 assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) );
14544 DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX),
14545 nameIReg(2,R_EDX));
14546 goto do_OUT;
14547 do_OUT: {
14548 /* At this point, sz indicates the width, and t1 is a 32-bit
14549 value giving port number. */
14550 IRDirty* d;
14551 vassert(sz == 1 || sz == 2 || sz == 4);
14552 ty = szToITy(sz);
14553 d = unsafeIRDirty_0_N(
14554 0/*regparms*/,
14555 "x86g_dirtyhelper_OUT",
14556 &x86g_dirtyhelper_OUT,
14557 mkIRExprVec_3( mkexpr(t1),
14558 widenUto32( getIReg(sz, R_EAX) ),
14559 mkU32(sz) )
14561 stmt( IRStmt_Dirty(d) );
14562 break;
14565 /* ------------------------ (Grp1 extensions) ---------- */
14567 case 0x82: /* Grp1 Ib,Eb too. Apparently this is the same as
14568 case 0x80, but only in 32-bit mode. */
14569 /* fallthru */
14570 case 0x80: /* Grp1 Ib,Eb */
14571 modrm = getIByte(delta);
14572 am_sz = lengthAMode(delta);
14573 sz = 1;
14574 d_sz = 1;
14575 d32 = getUChar(delta + am_sz);
14576 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
14577 break;
14579 case 0x81: /* Grp1 Iv,Ev */
14580 modrm = getIByte(delta);
14581 am_sz = lengthAMode(delta);
14582 d_sz = sz;
14583 d32 = getUDisp(d_sz, delta + am_sz);
14584 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
14585 break;
14587 case 0x83: /* Grp1 Ib,Ev */
14588 modrm = getIByte(delta);
14589 am_sz = lengthAMode(delta);
14590 d_sz = 1;
14591 d32 = getSDisp8(delta + am_sz);
14592 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
14593 break;
14595 /* ------------------------ (Grp2 extensions) ---------- */
14597 case 0xC0: { /* Grp2 Ib,Eb */
14598 Bool decode_OK = True;
14599 modrm = getIByte(delta);
14600 am_sz = lengthAMode(delta);
14601 d_sz = 1;
14602 d32 = getUChar(delta + am_sz);
14603 sz = 1;
14604 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14605 mkU8(d32 & 0xFF), NULL, &decode_OK );
14606 if (!decode_OK)
14607 goto decode_failure;
14608 break;
14610 case 0xC1: { /* Grp2 Ib,Ev */
14611 Bool decode_OK = True;
14612 modrm = getIByte(delta);
14613 am_sz = lengthAMode(delta);
14614 d_sz = 1;
14615 d32 = getUChar(delta + am_sz);
14616 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14617 mkU8(d32 & 0xFF), NULL, &decode_OK );
14618 if (!decode_OK)
14619 goto decode_failure;
14620 break;
14622 case 0xD0: { /* Grp2 1,Eb */
14623 Bool decode_OK = True;
14624 modrm = getIByte(delta);
14625 am_sz = lengthAMode(delta);
14626 d_sz = 0;
14627 d32 = 1;
14628 sz = 1;
14629 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14630 mkU8(d32), NULL, &decode_OK );
14631 if (!decode_OK)
14632 goto decode_failure;
14633 break;
14635 case 0xD1: { /* Grp2 1,Ev */
14636 Bool decode_OK = True;
14637 modrm = getUChar(delta);
14638 am_sz = lengthAMode(delta);
14639 d_sz = 0;
14640 d32 = 1;
14641 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14642 mkU8(d32), NULL, &decode_OK );
14643 if (!decode_OK)
14644 goto decode_failure;
14645 break;
14647 case 0xD2: { /* Grp2 CL,Eb */
14648 Bool decode_OK = True;
14649 modrm = getUChar(delta);
14650 am_sz = lengthAMode(delta);
14651 d_sz = 0;
14652 sz = 1;
14653 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14654 getIReg(1,R_ECX), "%cl", &decode_OK );
14655 if (!decode_OK)
14656 goto decode_failure;
14657 break;
14659 case 0xD3: { /* Grp2 CL,Ev */
14660 Bool decode_OK = True;
14661 modrm = getIByte(delta);
14662 am_sz = lengthAMode(delta);
14663 d_sz = 0;
14664 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14665 getIReg(1,R_ECX), "%cl", &decode_OK );
14666 if (!decode_OK)
14667 goto decode_failure;
14668 break;
14671 /* ------------------------ (Grp3 extensions) ---------- */
14673 case 0xF6: { /* Grp3 Eb */
14674 Bool decode_OK = True;
14675 delta = dis_Grp3 ( sorb, pfx_lock, 1, delta, &decode_OK );
14676 if (!decode_OK)
14677 goto decode_failure;
14678 break;
14680 case 0xF7: { /* Grp3 Ev */
14681 Bool decode_OK = True;
14682 delta = dis_Grp3 ( sorb, pfx_lock, sz, delta, &decode_OK );
14683 if (!decode_OK)
14684 goto decode_failure;
14685 break;
14688 /* ------------------------ (Grp4 extensions) ---------- */
14690 case 0xFE: { /* Grp4 Eb */
14691 Bool decode_OK = True;
14692 delta = dis_Grp4 ( sorb, pfx_lock, delta, &decode_OK );
14693 if (!decode_OK)
14694 goto decode_failure;
14695 break;
14698 /* ------------------------ (Grp5 extensions) ---------- */
14700 case 0xFF: { /* Grp5 Ev */
14701 Bool decode_OK = True;
14702 delta = dis_Grp5 ( sorb, pfx_lock, sz, delta, &dres, &decode_OK );
14703 if (!decode_OK)
14704 goto decode_failure;
14705 break;
14708 /* ------------------------ Escapes to 2-byte opcodes -- */
14710 case 0x0F: {
14711 opc = getIByte(delta); delta++;
14712 switch (opc) {
14714 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */
14716 case 0xBA: { /* Grp8 Ib,Ev */
14717 Bool decode_OK = False;
14718 modrm = getUChar(delta);
14719 am_sz = lengthAMode(delta);
14720 d32 = getSDisp8(delta + am_sz);
14721 delta = dis_Grp8_Imm ( sorb, pfx_lock, delta, modrm,
14722 am_sz, sz, d32, &decode_OK );
14723 if (!decode_OK)
14724 goto decode_failure;
14725 break;
14728 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */
14730 case 0xBC: /* BSF Gv,Ev */
14731 delta = dis_bs_E_G ( sorb, sz, delta, True );
14732 break;
14733 case 0xBD: /* BSR Gv,Ev */
14734 delta = dis_bs_E_G ( sorb, sz, delta, False );
14735 break;
14737 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */
14739 case 0xC8: /* BSWAP %eax */
14740 case 0xC9:
14741 case 0xCA:
14742 case 0xCB:
14743 case 0xCC:
14744 case 0xCD:
14745 case 0xCE:
14746 case 0xCF: /* BSWAP %edi */
14747 /* AFAICS from the Intel docs, this only exists at size 4. */
14748 if (sz != 4) goto decode_failure;
14750 t1 = newTemp(Ity_I32);
14751 assign( t1, getIReg(4, opc-0xC8) );
14752 t2 = math_BSWAP(t1, Ity_I32);
14754 putIReg(4, opc-0xC8, mkexpr(t2));
14755 DIP("bswapl %s\n", nameIReg(4, opc-0xC8));
14756 break;
14758 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */
14760 case 0xA3: /* BT Gv,Ev */
14761 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpNone );
14762 break;
14763 case 0xB3: /* BTR Gv,Ev */
14764 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpReset );
14765 break;
14766 case 0xAB: /* BTS Gv,Ev */
14767 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpSet );
14768 break;
14769 case 0xBB: /* BTC Gv,Ev */
14770 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpComp );
14771 break;
14773 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */
14775 case 0x40:
14776 case 0x41:
14777 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
14778 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
14779 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
14780 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
14781 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
14782 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
14783 case 0x48: /* CMOVSb (cmov negative) */
14784 case 0x49: /* CMOVSb (cmov not negative) */
14785 case 0x4A: /* CMOVP (cmov parity even) */
14786 case 0x4B: /* CMOVNP (cmov parity odd) */
14787 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
14788 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
14789 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
14790 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
14791 delta = dis_cmov_E_G(sorb, sz, (X86Condcode)(opc - 0x40), delta);
14792 break;
14794 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */
14796 case 0xB0: /* CMPXCHG Gb,Eb */
14797 delta = dis_cmpxchg_G_E ( sorb, pfx_lock, 1, delta );
14798 break;
14799 case 0xB1: /* CMPXCHG Gv,Ev */
14800 delta = dis_cmpxchg_G_E ( sorb, pfx_lock, sz, delta );
14801 break;
14803 case 0xC7: { /* CMPXCHG8B Gv (0F C7 /1) */
14804 IRTemp expdHi = newTemp(Ity_I32);
14805 IRTemp expdLo = newTemp(Ity_I32);
14806 IRTemp dataHi = newTemp(Ity_I32);
14807 IRTemp dataLo = newTemp(Ity_I32);
14808 IRTemp oldHi = newTemp(Ity_I32);
14809 IRTemp oldLo = newTemp(Ity_I32);
14810 IRTemp flags_old = newTemp(Ity_I32);
14811 IRTemp flags_new = newTemp(Ity_I32);
14812 IRTemp success = newTemp(Ity_I1);
14814 /* Translate this using a DCAS, even if there is no LOCK
14815 prefix. Life is too short to bother with generating two
14816 different translations for the with/without-LOCK-prefix
14817 cases. */
14818 *expect_CAS = True;
14820 /* Decode, and generate address. */
14821 if (sz != 4) goto decode_failure;
14822 modrm = getIByte(delta);
14823 if (epartIsReg(modrm)) goto decode_failure;
14824 if (gregOfRM(modrm) != 1) goto decode_failure;
14825 addr = disAMode ( &alen, sorb, delta, dis_buf );
14826 delta += alen;
14828 /* Get the expected and new values. */
14829 assign( expdHi, getIReg(4,R_EDX) );
14830 assign( expdLo, getIReg(4,R_EAX) );
14831 assign( dataHi, getIReg(4,R_ECX) );
14832 assign( dataLo, getIReg(4,R_EBX) );
14834 /* Do the DCAS */
14835 stmt( IRStmt_CAS(
14836 mkIRCAS( oldHi, oldLo,
14837 Iend_LE, mkexpr(addr),
14838 mkexpr(expdHi), mkexpr(expdLo),
14839 mkexpr(dataHi), mkexpr(dataLo)
14840 )));
14842 /* success when oldHi:oldLo == expdHi:expdLo */
14843 assign( success,
14844 binop(Iop_CasCmpEQ32,
14845 binop(Iop_Or32,
14846 binop(Iop_Xor32, mkexpr(oldHi), mkexpr(expdHi)),
14847 binop(Iop_Xor32, mkexpr(oldLo), mkexpr(expdLo))
14849 mkU32(0)
14852 /* If the DCAS is successful, that is to say oldHi:oldLo ==
14853 expdHi:expdLo, then put expdHi:expdLo back in EDX:EAX,
14854 which is where they came from originally. Both the actual
14855 contents of these two regs, and any shadow values, are
14856 unchanged. If the DCAS fails then we're putting into
14857 EDX:EAX the value seen in memory. */
14858 putIReg(4, R_EDX,
14859 IRExpr_ITE( mkexpr(success),
14860 mkexpr(expdHi), mkexpr(oldHi)
14862 putIReg(4, R_EAX,
14863 IRExpr_ITE( mkexpr(success),
14864 mkexpr(expdLo), mkexpr(oldLo)
14867 /* Copy the success bit into the Z flag and leave the others
14868 unchanged */
14869 assign( flags_old, widenUto32(mk_x86g_calculate_eflags_all()));
14870 assign(
14871 flags_new,
14872 binop(Iop_Or32,
14873 binop(Iop_And32, mkexpr(flags_old),
14874 mkU32(~X86G_CC_MASK_Z)),
14875 binop(Iop_Shl32,
14876 binop(Iop_And32,
14877 unop(Iop_1Uto32, mkexpr(success)), mkU32(1)),
14878 mkU8(X86G_CC_SHIFT_Z)) ));
14880 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
14881 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) ));
14882 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
14883 /* Set NDEP even though it isn't used. This makes
14884 redundant-PUT elimination of previous stores to this field
14885 work better. */
14886 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
14888 /* Sheesh. Aren't you glad it was me and not you that had to
14889 write and validate all this grunge? */
14891 DIP("cmpxchg8b %s\n", dis_buf);
14892 break;
14895 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */
14897 case 0xA2: { /* CPUID */
14898 /* Uses dirty helper:
14899 void dirtyhelper_CPUID_sse[012] ( VexGuestX86State* )
14900 declared to mod eax, wr ebx, ecx, edx
14902 IRDirty* d = NULL;
14903 void* fAddr = NULL;
14904 const HChar* fName = NULL;
14905 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE3) {
14906 fName = "x86g_dirtyhelper_CPUID_sse3";
14907 fAddr = &x86g_dirtyhelper_CPUID_sse3;
14909 else
14910 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2) {
14911 fName = "x86g_dirtyhelper_CPUID_sse2";
14912 fAddr = &x86g_dirtyhelper_CPUID_sse2;
14914 else
14915 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE1) {
14916 fName = "x86g_dirtyhelper_CPUID_sse1";
14917 fAddr = &x86g_dirtyhelper_CPUID_sse1;
14919 else
14920 if (archinfo->hwcaps & VEX_HWCAPS_X86_MMXEXT) {
14921 fName = "x86g_dirtyhelper_CPUID_mmxext";
14922 fAddr = &x86g_dirtyhelper_CPUID_mmxext;
14924 else
14925 if (archinfo->hwcaps == 0/*no SSE*/) {
14926 fName = "x86g_dirtyhelper_CPUID_sse0";
14927 fAddr = &x86g_dirtyhelper_CPUID_sse0;
14928 } else
14929 vpanic("disInstr(x86)(cpuid)");
14931 vassert(fName); vassert(fAddr);
14932 d = unsafeIRDirty_0_N ( 0/*regparms*/,
14933 fName, fAddr, mkIRExprVec_1(IRExpr_GSPTR()) );
14934 /* declare guest state effects */
14935 d->nFxState = 4;
14936 vex_bzero(&d->fxState, sizeof(d->fxState));
14937 d->fxState[0].fx = Ifx_Modify;
14938 d->fxState[0].offset = OFFB_EAX;
14939 d->fxState[0].size = 4;
14940 d->fxState[1].fx = Ifx_Write;
14941 d->fxState[1].offset = OFFB_EBX;
14942 d->fxState[1].size = 4;
14943 d->fxState[2].fx = Ifx_Modify;
14944 d->fxState[2].offset = OFFB_ECX;
14945 d->fxState[2].size = 4;
14946 d->fxState[3].fx = Ifx_Write;
14947 d->fxState[3].offset = OFFB_EDX;
14948 d->fxState[3].size = 4;
14949 /* execute the dirty call, side-effecting guest state */
14950 stmt( IRStmt_Dirty(d) );
14951 /* CPUID is a serialising insn. So, just in case someone is
14952 using it as a memory fence ... */
14953 stmt( IRStmt_MBE(Imbe_Fence) );
14954 DIP("cpuid\n");
14955 break;
14958 //-- if (!VG_(cpu_has_feature)(VG_X86_FEAT_CPUID))
14959 //-- goto decode_failure;
14960 //--
14961 //-- t1 = newTemp(cb);
14962 //-- t2 = newTemp(cb);
14963 //-- t3 = newTemp(cb);
14964 //-- t4 = newTemp(cb);
14965 //-- uInstr0(cb, CALLM_S, 0);
14966 //--
14967 //-- uInstr2(cb, GET, 4, ArchReg, R_EAX, TempReg, t1);
14968 //-- uInstr1(cb, PUSH, 4, TempReg, t1);
14969 //--
14970 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t2);
14971 //-- uLiteral(cb, 0);
14972 //-- uInstr1(cb, PUSH, 4, TempReg, t2);
14973 //--
14974 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t3);
14975 //-- uLiteral(cb, 0);
14976 //-- uInstr1(cb, PUSH, 4, TempReg, t3);
14977 //--
14978 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t4);
14979 //-- uLiteral(cb, 0);
14980 //-- uInstr1(cb, PUSH, 4, TempReg, t4);
14981 //--
14982 //-- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_CPUID));
14983 //-- uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty);
14984 //--
14985 //-- uInstr1(cb, POP, 4, TempReg, t4);
14986 //-- uInstr2(cb, PUT, 4, TempReg, t4, ArchReg, R_EDX);
14987 //--
14988 //-- uInstr1(cb, POP, 4, TempReg, t3);
14989 //-- uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, R_ECX);
14990 //--
14991 //-- uInstr1(cb, POP, 4, TempReg, t2);
14992 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBX);
14993 //--
14994 //-- uInstr1(cb, POP, 4, TempReg, t1);
14995 //-- uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_EAX);
14996 //--
14997 //-- uInstr0(cb, CALLM_E, 0);
14998 //-- DIP("cpuid\n");
14999 //-- break;
15000 //--
15001 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */
15003 case 0xB6: /* MOVZXb Eb,Gv */
15004 if (sz != 2 && sz != 4)
15005 goto decode_failure;
15006 delta = dis_movx_E_G ( sorb, delta, 1, sz, False );
15007 break;
15009 case 0xB7: /* MOVZXw Ew,Gv */
15010 if (sz != 4)
15011 goto decode_failure;
15012 delta = dis_movx_E_G ( sorb, delta, 2, 4, False );
15013 break;
15015 case 0xBE: /* MOVSXb Eb,Gv */
15016 if (sz != 2 && sz != 4)
15017 goto decode_failure;
15018 delta = dis_movx_E_G ( sorb, delta, 1, sz, True );
15019 break;
15021 case 0xBF: /* MOVSXw Ew,Gv */
15022 if (sz != 4 && /* accept movsww, sigh, see #250799 */sz != 2)
15023 goto decode_failure;
15024 delta = dis_movx_E_G ( sorb, delta, 2, sz, True );
15025 break;
15027 //-- /* =-=-=-=-=-=-=-=-=-=-= MOVNTI -=-=-=-=-=-=-=-=-= */
15028 //--
15029 //-- case 0xC3: /* MOVNTI Gv,Ev */
15030 //-- vg_assert(sz == 4);
15031 //-- modrm = getUChar(eip);
15032 //-- vg_assert(!epartIsReg(modrm));
15033 //-- t1 = newTemp(cb);
15034 //-- uInstr2(cb, GET, 4, ArchReg, gregOfRM(modrm), TempReg, t1);
15035 //-- pair = disAMode ( cb, sorb, eip, dis_buf );
15036 //-- t2 = LOW24(pair);
15037 //-- eip += HI8(pair);
15038 //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
15039 //-- DIP("movnti %s,%s\n", nameIReg(4,gregOfRM(modrm)), dis_buf);
15040 //-- break;
15042 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */
15044 case 0xAF: /* IMUL Ev, Gv */
15045 delta = dis_mul_E_G ( sorb, sz, delta );
15046 break;
15048 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */
15050 case 0x1F:
15051 modrm = getUChar(delta);
15052 if (epartIsReg(modrm)) goto decode_failure;
15053 addr = disAMode ( &alen, sorb, delta, dis_buf );
15054 delta += alen;
15055 DIP("nop%c %s\n", nameISize(sz), dis_buf);
15056 break;
15058 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */
15059 case 0x80:
15060 case 0x81:
15061 case 0x82: /* JBb/JNAEb (jump below) */
15062 case 0x83: /* JNBb/JAEb (jump not below) */
15063 case 0x84: /* JZb/JEb (jump zero) */
15064 case 0x85: /* JNZb/JNEb (jump not zero) */
15065 case 0x86: /* JBEb/JNAb (jump below or equal) */
15066 case 0x87: /* JNBEb/JAb (jump not below or equal) */
15067 case 0x88: /* JSb (jump negative) */
15068 case 0x89: /* JSb (jump not negative) */
15069 case 0x8A: /* JP (jump parity even) */
15070 case 0x8B: /* JNP/JPO (jump parity odd) */
15071 case 0x8C: /* JLb/JNGEb (jump less) */
15072 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
15073 case 0x8E: /* JLEb/JNGb (jump less or equal) */
15074 case 0x8F: /* JGb/JNLEb (jump greater) */
15075 { Int jmpDelta;
15076 const HChar* comment = "";
15077 jmpDelta = (Int)getUDisp32(delta);
15078 d32 = (((Addr32)guest_EIP_bbstart)+delta+4) + jmpDelta;
15079 delta += 4;
15080 if (resteerCisOk
15081 && vex_control.guest_chase_cond
15082 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
15083 && jmpDelta < 0
15084 && resteerOkFn( callback_opaque, (Addr32)d32) ) {
15085 /* Speculation: assume this backward branch is taken. So
15086 we need to emit a side-exit to the insn following this
15087 one, on the negation of the condition, and continue at
15088 the branch target address (d32). If we wind up back at
15089 the first instruction of the trace, just stop; it's
15090 better to let the IR loop unroller handle that case.*/
15091 stmt( IRStmt_Exit(
15092 mk_x86g_calculate_condition((X86Condcode)
15093 (1 ^ (opc - 0x80))),
15094 Ijk_Boring,
15095 IRConst_U32(guest_EIP_bbstart+delta),
15096 OFFB_EIP ) );
15097 dres.whatNext = Dis_ResteerC;
15098 dres.continueAt = (Addr32)d32;
15099 comment = "(assumed taken)";
15101 else
15102 if (resteerCisOk
15103 && vex_control.guest_chase_cond
15104 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
15105 && jmpDelta >= 0
15106 && resteerOkFn( callback_opaque,
15107 (Addr32)(guest_EIP_bbstart+delta)) ) {
15108 /* Speculation: assume this forward branch is not taken.
15109 So we need to emit a side-exit to d32 (the dest) and
15110 continue disassembling at the insn immediately
15111 following this one. */
15112 stmt( IRStmt_Exit(
15113 mk_x86g_calculate_condition((X86Condcode)(opc - 0x80)),
15114 Ijk_Boring,
15115 IRConst_U32(d32),
15116 OFFB_EIP ) );
15117 dres.whatNext = Dis_ResteerC;
15118 dres.continueAt = guest_EIP_bbstart + delta;
15119 comment = "(assumed not taken)";
15121 else {
15122 /* Conservative default translation - end the block at
15123 this point. */
15124 jcc_01( &dres, (X86Condcode)(opc - 0x80),
15125 (Addr32)(guest_EIP_bbstart+delta), d32);
15126 vassert(dres.whatNext == Dis_StopHere);
15128 DIP("j%s-32 0x%x %s\n", name_X86Condcode(opc - 0x80), d32, comment);
15129 break;
15132 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */
15133 case 0x31: { /* RDTSC */
15134 IRTemp val = newTemp(Ity_I64);
15135 IRExpr** args = mkIRExprVec_0();
15136 IRDirty* d = unsafeIRDirty_1_N (
15137 val,
15138 0/*regparms*/,
15139 "x86g_dirtyhelper_RDTSC",
15140 &x86g_dirtyhelper_RDTSC,
15141 args
15143 /* execute the dirty call, dumping the result in val. */
15144 stmt( IRStmt_Dirty(d) );
15145 putIReg(4, R_EDX, unop(Iop_64HIto32, mkexpr(val)));
15146 putIReg(4, R_EAX, unop(Iop_64to32, mkexpr(val)));
15147 DIP("rdtsc\n");
15148 break;
15151 /* =-=-=-=-=-=-=-=-=- PUSH/POP Sreg =-=-=-=-=-=-=-=-=-= */
15153 case 0xA1: /* POP %FS */
15154 dis_pop_segreg( R_FS, sz ); break;
15155 case 0xA9: /* POP %GS */
15156 dis_pop_segreg( R_GS, sz ); break;
15158 case 0xA0: /* PUSH %FS */
15159 dis_push_segreg( R_FS, sz ); break;
15160 case 0xA8: /* PUSH %GS */
15161 dis_push_segreg( R_GS, sz ); break;
15163 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */
15164 case 0x90:
15165 case 0x91:
15166 case 0x92: /* set-Bb/set-NAEb (jump below) */
15167 case 0x93: /* set-NBb/set-AEb (jump not below) */
15168 case 0x94: /* set-Zb/set-Eb (jump zero) */
15169 case 0x95: /* set-NZb/set-NEb (jump not zero) */
15170 case 0x96: /* set-BEb/set-NAb (jump below or equal) */
15171 case 0x97: /* set-NBEb/set-Ab (jump not below or equal) */
15172 case 0x98: /* set-Sb (jump negative) */
15173 case 0x99: /* set-Sb (jump not negative) */
15174 case 0x9A: /* set-P (jump parity even) */
15175 case 0x9B: /* set-NP (jump parity odd) */
15176 case 0x9C: /* set-Lb/set-NGEb (jump less) */
15177 case 0x9D: /* set-GEb/set-NLb (jump greater or equal) */
15178 case 0x9E: /* set-LEb/set-NGb (jump less or equal) */
15179 case 0x9F: /* set-Gb/set-NLEb (jump greater) */
15180 t1 = newTemp(Ity_I8);
15181 assign( t1, unop(Iop_1Uto8,mk_x86g_calculate_condition(opc-0x90)) );
15182 modrm = getIByte(delta);
15183 if (epartIsReg(modrm)) {
15184 delta++;
15185 putIReg(1, eregOfRM(modrm), mkexpr(t1));
15186 DIP("set%s %s\n", name_X86Condcode(opc-0x90),
15187 nameIReg(1,eregOfRM(modrm)));
15188 } else {
15189 addr = disAMode ( &alen, sorb, delta, dis_buf );
15190 delta += alen;
15191 storeLE( mkexpr(addr), mkexpr(t1) );
15192 DIP("set%s %s\n", name_X86Condcode(opc-0x90), dis_buf);
15194 break;
15196 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */
15198 case 0xA4: /* SHLDv imm8,Gv,Ev */
15199 modrm = getIByte(delta);
15200 d32 = delta + lengthAMode(delta);
15201 vex_sprintf(dis_buf, "$%d", getIByte(d32));
15202 delta = dis_SHLRD_Gv_Ev (
15203 sorb, delta, modrm, sz,
15204 mkU8(getIByte(d32)), True, /* literal */
15205 dis_buf, True );
15206 break;
15207 case 0xA5: /* SHLDv %cl,Gv,Ev */
15208 modrm = getIByte(delta);
15209 delta = dis_SHLRD_Gv_Ev (
15210 sorb, delta, modrm, sz,
15211 getIReg(1,R_ECX), False, /* not literal */
15212 "%cl", True );
15213 break;
15215 case 0xAC: /* SHRDv imm8,Gv,Ev */
15216 modrm = getIByte(delta);
15217 d32 = delta + lengthAMode(delta);
15218 vex_sprintf(dis_buf, "$%d", getIByte(d32));
15219 delta = dis_SHLRD_Gv_Ev (
15220 sorb, delta, modrm, sz,
15221 mkU8(getIByte(d32)), True, /* literal */
15222 dis_buf, False );
15223 break;
15224 case 0xAD: /* SHRDv %cl,Gv,Ev */
15225 modrm = getIByte(delta);
15226 delta = dis_SHLRD_Gv_Ev (
15227 sorb, delta, modrm, sz,
15228 getIReg(1,R_ECX), False, /* not literal */
15229 "%cl", False );
15230 break;
15232 /* =-=-=-=-=-=-=-=-=- SYSENTER -=-=-=-=-=-=-=-=-=-= */
15234 case 0x34:
15235 /* Simple implementation needing a long explaination.
15237 sysenter is a kind of syscall entry. The key thing here
15238 is that the return address is not known -- that is
15239 something that is beyond Vex's knowledge. So this IR
15240 forces a return to the scheduler, which can do what it
15241 likes to simulate the systenter, but it MUST set this
15242 thread's guest_EIP field with the continuation address
15243 before resuming execution. If that doesn't happen, the
15244 thread will jump to address zero, which is probably
15245 fatal.
15248 /* Note where we are, so we can back up the guest to this
15249 point if the syscall needs to be restarted. */
15250 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
15251 mkU32(guest_EIP_curr_instr) ) );
15252 jmp_lit(&dres, Ijk_Sys_sysenter, 0/*bogus next EIP value*/);
15253 vassert(dres.whatNext == Dis_StopHere);
15254 DIP("sysenter");
15255 break;
15257 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */
15259 case 0xC0: { /* XADD Gb,Eb */
15260 Bool decodeOK;
15261 delta = dis_xadd_G_E ( sorb, pfx_lock, 1, delta, &decodeOK );
15262 if (!decodeOK) goto decode_failure;
15263 break;
15265 case 0xC1: { /* XADD Gv,Ev */
15266 Bool decodeOK;
15267 delta = dis_xadd_G_E ( sorb, pfx_lock, sz, delta, &decodeOK );
15268 if (!decodeOK) goto decode_failure;
15269 break;
15272 /* =-=-=-=-=-=-=-=-=- MMXery =-=-=-=-=-=-=-=-=-=-= */
15274 case 0x71:
15275 case 0x72:
15276 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
15278 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
15279 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
15280 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
15281 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
15283 case 0xFC:
15284 case 0xFD:
15285 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
15287 case 0xEC:
15288 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
15290 case 0xDC:
15291 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
15293 case 0xF8:
15294 case 0xF9:
15295 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
15297 case 0xE8:
15298 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
15300 case 0xD8:
15301 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
15303 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
15304 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
15306 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
15308 case 0x74:
15309 case 0x75:
15310 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
15312 case 0x64:
15313 case 0x65:
15314 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
15316 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
15317 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
15318 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
15320 case 0x68:
15321 case 0x69:
15322 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
15324 case 0x60:
15325 case 0x61:
15326 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
15328 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
15329 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
15330 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
15331 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
15333 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
15334 case 0xF2:
15335 case 0xF3:
15337 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
15338 case 0xD2:
15339 case 0xD3:
15341 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
15342 case 0xE2:
15344 Int delta0 = delta-1;
15345 Bool decode_OK = False;
15347 /* If sz==2 this is SSE, and we assume sse idec has
15348 already spotted those cases by now. */
15349 if (sz != 4)
15350 goto decode_failure;
15352 delta = dis_MMX ( &decode_OK, sorb, sz, delta-1 );
15353 if (!decode_OK) {
15354 delta = delta0;
15355 goto decode_failure;
15357 break;
15360 case 0x0E: /* FEMMS */
15361 case 0x77: /* EMMS */
15362 if (sz != 4)
15363 goto decode_failure;
15364 do_EMMS_preamble();
15365 DIP("{f}emms\n");
15366 break;
15368 /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */
15369 case 0x01: /* 0F 01 /0 -- SGDT */
15370 /* 0F 01 /1 -- SIDT */
15372 /* This is really revolting, but ... since each processor
15373 (core) only has one IDT and one GDT, just let the guest
15374 see it (pass-through semantics). I can't see any way to
15375 construct a faked-up value, so don't bother to try. */
15376 modrm = getUChar(delta);
15377 if (epartIsReg(modrm)) goto decode_failure;
15378 if (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)
15379 goto decode_failure;
15380 addr = disAMode ( &alen, sorb, delta, dis_buf );
15381 delta += alen;
15382 switch (gregOfRM(modrm)) {
15383 case 0: DIP("sgdt %s\n", dis_buf); break;
15384 case 1: DIP("sidt %s\n", dis_buf); break;
15385 default: vassert(0); /*NOTREACHED*/
15388 IRDirty* d = unsafeIRDirty_0_N (
15389 0/*regparms*/,
15390 "x86g_dirtyhelper_SxDT",
15391 &x86g_dirtyhelper_SxDT,
15392 mkIRExprVec_2( mkexpr(addr),
15393 mkU32(gregOfRM(modrm)) )
15395 /* declare we're writing memory */
15396 d->mFx = Ifx_Write;
15397 d->mAddr = mkexpr(addr);
15398 d->mSize = 6;
15399 stmt( IRStmt_Dirty(d) );
15400 break;
15403 case 0x05: /* AMD's syscall */
15404 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
15405 mkU32(guest_EIP_curr_instr) ) );
15406 jmp_lit(&dres, Ijk_Sys_syscall, ((Addr32)guest_EIP_bbstart)+delta);
15407 vassert(dres.whatNext == Dis_StopHere);
15408 DIP("syscall\n");
15409 break;
15411 /* =-=-=-=-=-=-=-=-=-=- UD2 =-=-=-=-=-=-=-=-=-=-=-= */
15413 case 0x0B: /* UD2 */
15414 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr) ) );
15415 jmp_lit(&dres, Ijk_NoDecode, guest_EIP_curr_instr);
15416 vassert(dres.whatNext == Dis_StopHere);
15417 DIP("ud2\n");
15418 break;
15420 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */
15422 default:
15423 goto decode_failure;
15424 } /* switch (opc) for the 2-byte opcodes */
15425 goto decode_success;
15426 } /* case 0x0F: of primary opcode */
15428 /* ------------------------ ??? ------------------------ */
15430 default:
15431 decode_failure:
15432 /* All decode failures end up here. */
15433 if (sigill_diag) {
15434 vex_printf("vex x86->IR: unhandled instruction bytes: "
15435 "0x%x 0x%x 0x%x 0x%x\n",
15436 getIByte(delta_start+0),
15437 getIByte(delta_start+1),
15438 getIByte(delta_start+2),
15439 getIByte(delta_start+3));
15442 /* Tell the dispatcher that this insn cannot be decoded, and so has
15443 not been executed, and (is currently) the next to be executed.
15444 EIP should be up-to-date since it made so at the start of each
15445 insn, but nevertheless be paranoid and update it again right
15446 now. */
15447 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr) ) );
15448 jmp_lit(&dres, Ijk_NoDecode, guest_EIP_curr_instr);
15449 vassert(dres.whatNext == Dis_StopHere);
15450 dres.len = 0;
15451 /* We also need to say that a CAS is not expected now, regardless
15452 of what it might have been set to at the start of the function,
15453 since the IR that we've emitted just above (to synthesis a
15454 SIGILL) does not involve any CAS, and presumably no other IR has
15455 been emitted for this (non-decoded) insn. */
15456 *expect_CAS = False;
15457 return dres;
15459 } /* switch (opc) for the main (primary) opcode switch. */
15461 decode_success:
15462 /* All decode successes end up here. */
15463 switch (dres.whatNext) {
15464 case Dis_Continue:
15465 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_bbstart + delta) ) );
15466 break;
15467 case Dis_ResteerU:
15468 case Dis_ResteerC:
15469 stmt( IRStmt_Put( OFFB_EIP, mkU32(dres.continueAt) ) );
15470 break;
15471 case Dis_StopHere:
15472 break;
15473 default:
15474 vassert(0);
15477 DIP("\n");
15478 dres.len = delta - delta_start;
15479 return dres;
15482 #undef DIP
15483 #undef DIS
15486 /*------------------------------------------------------------*/
15487 /*--- Top-level fn ---*/
15488 /*------------------------------------------------------------*/
15490 /* Disassemble a single instruction into IR. The instruction
15491 is located in host memory at &guest_code[delta]. */
15493 DisResult disInstr_X86 ( IRSB* irsb_IN,
15494 Bool (*resteerOkFn) ( void*, Addr ),
15495 Bool resteerCisOk,
15496 void* callback_opaque,
15497 const UChar* guest_code_IN,
15498 Long delta,
15499 Addr guest_IP,
15500 VexArch guest_arch,
15501 const VexArchInfo* archinfo,
15502 const VexAbiInfo* abiinfo,
15503 VexEndness host_endness_IN,
15504 Bool sigill_diag_IN )
15506 Int i, x1, x2;
15507 Bool expect_CAS, has_CAS;
15508 DisResult dres;
15510 /* Set globals (see top of this file) */
15511 vassert(guest_arch == VexArchX86);
15512 guest_code = guest_code_IN;
15513 irsb = irsb_IN;
15514 host_endness = host_endness_IN;
15515 guest_EIP_curr_instr = (Addr32)guest_IP;
15516 guest_EIP_bbstart = (Addr32)toUInt(guest_IP - delta);
15518 x1 = irsb_IN->stmts_used;
15519 expect_CAS = False;
15520 dres = disInstr_X86_WRK ( &expect_CAS, resteerOkFn,
15521 resteerCisOk,
15522 callback_opaque,
15523 delta, archinfo, abiinfo, sigill_diag_IN );
15524 x2 = irsb_IN->stmts_used;
15525 vassert(x2 >= x1);
15527 /* See comment at the top of disInstr_X86_WRK for meaning of
15528 expect_CAS. Here, we (sanity-)check for the presence/absence of
15529 IRCAS as directed by the returned expect_CAS value. */
15530 has_CAS = False;
15531 for (i = x1; i < x2; i++) {
15532 if (irsb_IN->stmts[i]->tag == Ist_CAS)
15533 has_CAS = True;
15536 if (expect_CAS != has_CAS) {
15537 /* inconsistency detected. re-disassemble the instruction so as
15538 to generate a useful error message; then assert. */
15539 vex_traceflags |= VEX_TRACE_FE;
15540 dres = disInstr_X86_WRK ( &expect_CAS, resteerOkFn,
15541 resteerCisOk,
15542 callback_opaque,
15543 delta, archinfo, abiinfo, sigill_diag_IN );
15544 for (i = x1; i < x2; i++) {
15545 vex_printf("\t\t");
15546 ppIRStmt(irsb_IN->stmts[i]);
15547 vex_printf("\n");
15549 /* Failure of this assertion is serious and denotes a bug in
15550 disInstr. */
15551 vpanic("disInstr_X86: inconsistency in LOCK prefix handling");
15554 return dres;
15558 /*--------------------------------------------------------------------*/
15559 /*--- end guest_x86_toIR.c ---*/
15560 /*--------------------------------------------------------------------*/