2 /*---------------------------------------------------------------*/
3 /*--- begin host_amd64_isel.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2017 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
35 #include "libvex_ir.h"
39 #include "main_util.h"
40 #include "main_globals.h"
41 #include "host_generic_regs.h"
42 #include "host_generic_simd64.h"
43 #include "host_generic_simd128.h"
44 #include "host_generic_simd256.h"
45 #include "host_generic_maddf.h"
46 #include "host_amd64_defs.h"
49 /*---------------------------------------------------------*/
50 /*--- x87/SSE control word stuff ---*/
51 /*---------------------------------------------------------*/
53 /* Vex-generated code expects to run with the FPU set as follows: all
54 exceptions masked, round-to-nearest, precision = 53 bits. This
55 corresponds to a FPU control word value of 0x027F.
57 Similarly the SSE control word (%mxcsr) should be 0x1F80.
59 %fpucw and %mxcsr should have these values on entry to
60 Vex-generated code, and should those values should be
64 #define DEFAULT_FPUCW 0x027F
66 #define DEFAULT_MXCSR 0x1F80
68 /* debugging only, do not use */
69 /* define DEFAULT_FPUCW 0x037F */
72 /*---------------------------------------------------------*/
73 /*--- misc helpers ---*/
74 /*---------------------------------------------------------*/
76 /* These are duplicated in guest-amd64/toIR.c */
77 static IRExpr
* unop ( IROp op
, IRExpr
* a
)
79 return IRExpr_Unop(op
, a
);
82 static IRExpr
* binop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
)
84 return IRExpr_Binop(op
, a1
, a2
);
87 static IRExpr
* bind ( Int binder
)
89 return IRExpr_Binder(binder
);
92 static Bool
isZeroU8 ( const IRExpr
* e
)
94 return e
->tag
== Iex_Const
95 && e
->Iex
.Const
.con
->tag
== Ico_U8
96 && e
->Iex
.Const
.con
->Ico
.U8
== 0;
100 /*---------------------------------------------------------*/
102 /*---------------------------------------------------------*/
104 /* This carries around:
106 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
107 might encounter. This is computed before insn selection starts,
110 - A mapping from IRTemp to HReg. This tells the insn selector
111 which virtual register is associated with each IRTemp
112 temporary. This is computed before insn selection starts, and
113 does not change. We expect this mapping to map precisely the
114 same set of IRTemps as the type mapping does.
116 - vregmap holds the primary register for the IRTemp.
117 - vregmapHI is only used for 128-bit integer-typed
118 IRTemps. It holds the identity of a second
119 64-bit virtual HReg, which holds the high half
122 - The host subarchitecture we are selecting insns for.
123 This is set at the start and does not change.
125 - The code array, that is, the insns selected so far.
127 - A counter, for generating new virtual registers.
129 - A Bool for indicating whether we may generate chain-me
130 instructions for control flow transfers, or whether we must use
133 - The maximum guest address of any guest insn in this block.
134 Actually, the address of the highest-addressed byte from any insn
135 in this block. Is set at the start and does not change. This is
136 used for detecting jumps which are definitely forward-edges from
137 this block, and therefore can be made (chained) to the fast entry
138 point of the destination, thereby avoiding the destination's
141 Note, this is all host-independent. (JRS 20050201: well, kinda
142 ... not completely. Compare with ISelEnv for X86.)
147 /* Constant -- are set at the start and do not change. */
156 Bool chainingAllowed
;
159 /* These are modified as we go along. */
166 static HReg
lookupIRTemp ( ISelEnv
* env
, IRTemp tmp
)
169 vassert(tmp
< env
->n_vregmap
);
170 return env
->vregmap
[tmp
];
173 static void lookupIRTempPair ( HReg
* vrHI
, HReg
* vrLO
,
174 ISelEnv
* env
, IRTemp tmp
)
177 vassert(tmp
< env
->n_vregmap
);
178 vassert(! hregIsInvalid(env
->vregmapHI
[tmp
]));
179 *vrLO
= env
->vregmap
[tmp
];
180 *vrHI
= env
->vregmapHI
[tmp
];
183 static void addInstr ( ISelEnv
* env
, AMD64Instr
* instr
)
185 addHInstr(env
->code
, instr
);
186 if (vex_traceflags
& VEX_TRACE_VCODE
) {
187 ppAMD64Instr(instr
, True
);
192 static HReg
newVRegI ( ISelEnv
* env
)
194 HReg reg
= mkHReg(True
/*virtual reg*/, HRcInt64
, 0/*enc*/, env
->vreg_ctr
);
199 static HReg
newVRegV ( ISelEnv
* env
)
201 HReg reg
= mkHReg(True
/*virtual reg*/, HRcVec128
, 0/*enc*/, env
->vreg_ctr
);
207 /*---------------------------------------------------------*/
208 /*--- ISEL: Forward declarations ---*/
209 /*---------------------------------------------------------*/
211 /* These are organised as iselXXX and iselXXX_wrk pairs. The
212 iselXXX_wrk do the real work, but are not to be called directly.
213 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
214 checks that all returned registers are virtual. You should not
215 call the _wrk version directly.
217 static AMD64RMI
* iselIntExpr_RMI_wrk ( ISelEnv
* env
, const IRExpr
* e
);
218 static AMD64RMI
* iselIntExpr_RMI ( ISelEnv
* env
, const IRExpr
* e
);
220 static AMD64RI
* iselIntExpr_RI_wrk ( ISelEnv
* env
, const IRExpr
* e
);
221 static AMD64RI
* iselIntExpr_RI ( ISelEnv
* env
, const IRExpr
* e
);
223 static AMD64RM
* iselIntExpr_RM_wrk ( ISelEnv
* env
, const IRExpr
* e
);
224 static AMD64RM
* iselIntExpr_RM ( ISelEnv
* env
, const IRExpr
* e
);
226 static HReg
iselIntExpr_R_wrk ( ISelEnv
* env
, const IRExpr
* e
);
227 static HReg
iselIntExpr_R ( ISelEnv
* env
, const IRExpr
* e
);
229 static AMD64AMode
* iselIntExpr_AMode_wrk ( ISelEnv
* env
, const IRExpr
* e
);
230 static AMD64AMode
* iselIntExpr_AMode ( ISelEnv
* env
, const IRExpr
* e
);
232 static void iselInt128Expr_wrk ( /*OUT*/HReg
* rHi
, HReg
* rLo
,
233 ISelEnv
* env
, const IRExpr
* e
);
234 static void iselInt128Expr ( /*OUT*/HReg
* rHi
, HReg
* rLo
,
235 ISelEnv
* env
, const IRExpr
* e
);
237 static AMD64CondCode
iselCondCode_wrk ( ISelEnv
* env
, const IRExpr
* e
);
238 static AMD64CondCode
iselCondCode ( ISelEnv
* env
, const IRExpr
* e
);
240 static HReg
iselDblExpr_wrk ( ISelEnv
* env
, const IRExpr
* e
);
241 static HReg
iselDblExpr ( ISelEnv
* env
, const IRExpr
* e
);
243 static HReg
iselFltExpr_wrk ( ISelEnv
* env
, const IRExpr
* e
);
244 static HReg
iselFltExpr ( ISelEnv
* env
, const IRExpr
* e
);
246 static HReg
iselVecExpr_wrk ( ISelEnv
* env
, const IRExpr
* e
);
247 static HReg
iselVecExpr ( ISelEnv
* env
, const IRExpr
* e
);
249 static void iselDVecExpr_wrk ( /*OUT*/HReg
* rHi
, HReg
* rLo
,
250 ISelEnv
* env
, const IRExpr
* e
);
251 static void iselDVecExpr ( /*OUT*/HReg
* rHi
, HReg
* rLo
,
252 ISelEnv
* env
, const IRExpr
* e
);
255 /*---------------------------------------------------------*/
256 /*--- ISEL: Misc helpers ---*/
257 /*---------------------------------------------------------*/
259 static Bool
sane_AMode ( AMD64AMode
* am
)
264 toBool( hregClass(am
->Aam
.IR
.reg
) == HRcInt64
265 && (hregIsVirtual(am
->Aam
.IR
.reg
)
266 || sameHReg(am
->Aam
.IR
.reg
, hregAMD64_RBP())) );
269 toBool( hregClass(am
->Aam
.IRRS
.base
) == HRcInt64
270 && hregIsVirtual(am
->Aam
.IRRS
.base
)
271 && hregClass(am
->Aam
.IRRS
.index
) == HRcInt64
272 && hregIsVirtual(am
->Aam
.IRRS
.index
) );
274 vpanic("sane_AMode: unknown amd64 amode tag");
279 /* Can the lower 32 bits be signedly widened to produce the whole
280 64-bit value? In other words, are the top 33 bits either all 0 or
282 static Bool
fitsIn32Bits ( ULong x
)
287 return toBool(x
== y1
);
290 /* Is this a 64-bit zero expression? */
292 static Bool
isZeroU64 ( const IRExpr
* e
)
294 return e
->tag
== Iex_Const
295 && e
->Iex
.Const
.con
->tag
== Ico_U64
296 && e
->Iex
.Const
.con
->Ico
.U64
== 0ULL;
299 static Bool
isZeroU32 ( const IRExpr
* e
)
301 return e
->tag
== Iex_Const
302 && e
->Iex
.Const
.con
->tag
== Ico_U32
303 && e
->Iex
.Const
.con
->Ico
.U32
== 0;
306 /* Are both args atoms and the same? This is copy of eqIRAtom
307 that omits the assertions that the args are indeed atoms. */
309 static Bool
areAtomsAndEqual ( const IRExpr
* a1
, const IRExpr
* a2
)
311 if (a1
->tag
== Iex_RdTmp
&& a2
->tag
== Iex_RdTmp
)
312 return toBool(a1
->Iex
.RdTmp
.tmp
== a2
->Iex
.RdTmp
.tmp
);
313 if (a1
->tag
== Iex_Const
&& a2
->tag
== Iex_Const
)
314 return eqIRConst(a1
->Iex
.Const
.con
, a2
->Iex
.Const
.con
);
318 /* Make a int reg-reg move. */
320 static AMD64Instr
* mk_iMOVsd_RR ( HReg src
, HReg dst
)
322 vassert(hregClass(src
) == HRcInt64
);
323 vassert(hregClass(dst
) == HRcInt64
);
324 return AMD64Instr_Alu64R(Aalu_MOV
, AMD64RMI_Reg(src
), dst
);
327 /* Make a vector (128 bit) reg-reg move. */
329 static AMD64Instr
* mk_vMOVsd_RR ( HReg src
, HReg dst
)
331 vassert(hregClass(src
) == HRcVec128
);
332 vassert(hregClass(dst
) == HRcVec128
);
333 return AMD64Instr_SseReRg(Asse_MOV
, src
, dst
);
336 /* Advance/retreat %rsp by n. */
338 static void add_to_rsp ( ISelEnv
* env
, Int n
)
340 vassert(n
> 0 && n
< 256 && (n
%8) == 0);
342 AMD64Instr_Alu64R(Aalu_ADD
, AMD64RMI_Imm(n
),
346 static void sub_from_rsp ( ISelEnv
* env
, Int n
)
348 vassert(n
> 0 && n
< 256 && (n
%8) == 0);
350 AMD64Instr_Alu64R(Aalu_SUB
, AMD64RMI_Imm(n
),
354 /* Push 64-bit constants on the stack. */
355 static void push_uimm64( ISelEnv
* env
, ULong uimm64
)
357 /* If uimm64 can be expressed as the sign extension of its
358 lower 32 bits, we can do it the easy way. */
359 Long simm64
= (Long
)uimm64
;
360 if ( simm64
== ((Long
)(uimm64
<< 32) >> 32) ) {
361 addInstr( env
, AMD64Instr_Push(AMD64RMI_Imm( (UInt
)uimm64
)) );
363 HReg tmp
= newVRegI(env
);
364 addInstr( env
, AMD64Instr_Imm64(uimm64
, tmp
) );
365 addInstr( env
, AMD64Instr_Push(AMD64RMI_Reg(tmp
)) );
370 /* Used only in doHelperCall. If possible, produce a single
371 instruction which computes 'e' into 'dst'. If not possible, return
374 static AMD64Instr
* iselIntExpr_single_instruction ( ISelEnv
* env
,
378 /* Per comments in doHelperCall below, appearance of
379 Iex_VECRET implies ill-formed IR. */
380 vassert(e
->tag
!= Iex_VECRET
);
382 /* In this case we give out a copy of the BaseBlock pointer. */
383 if (UNLIKELY(e
->tag
== Iex_GSPTR
)) {
384 return mk_iMOVsd_RR( hregAMD64_RBP(), dst
);
387 vassert(typeOfIRExpr(env
->type_env
, e
) == Ity_I64
);
389 if (e
->tag
== Iex_Const
) {
390 vassert(e
->Iex
.Const
.con
->tag
== Ico_U64
);
391 if (fitsIn32Bits(e
->Iex
.Const
.con
->Ico
.U64
)) {
392 return AMD64Instr_Alu64R(
394 AMD64RMI_Imm(toUInt(e
->Iex
.Const
.con
->Ico
.U64
)),
398 return AMD64Instr_Imm64(e
->Iex
.Const
.con
->Ico
.U64
, dst
);
402 if (e
->tag
== Iex_RdTmp
) {
403 HReg src
= lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
404 return mk_iMOVsd_RR(src
, dst
);
407 if (e
->tag
== Iex_Get
) {
408 vassert(e
->Iex
.Get
.ty
== Ity_I64
);
409 return AMD64Instr_Alu64R(
412 AMD64AMode_IR(e
->Iex
.Get
.offset
,
417 if (e
->tag
== Iex_Unop
418 && e
->Iex
.Unop
.op
== Iop_32Uto64
419 && e
->Iex
.Unop
.arg
->tag
== Iex_RdTmp
) {
420 HReg src
= lookupIRTemp(env
, e
->Iex
.Unop
.arg
->Iex
.RdTmp
.tmp
);
421 return AMD64Instr_MovxLQ(False
, src
, dst
);
424 if (0) { ppIRExpr(e
); vex_printf("\n"); }
430 /* Do a complete function call. |guard| is a Ity_Bit expression
431 indicating whether or not the call happens. If guard==NULL, the
432 call is unconditional. |retloc| is set to indicate where the
433 return value is after the call. The caller (of this fn) must
434 generate code to add |stackAdjustAfterCall| to the stack pointer
435 after the call is done. */
438 void doHelperCall ( /*OUT*/UInt
* stackAdjustAfterCall
,
439 /*OUT*/RetLoc
* retloc
,
442 IRCallee
* cee
, IRType retTy
, IRExpr
** args
)
447 AMD64Instr
* fastinstrs
[6];
450 /* Set default returns. We'll update them later if needed. */
451 *stackAdjustAfterCall
= 0;
452 *retloc
= mk_RetLoc_INVALID();
454 /* These are used for cross-checking that IR-level constraints on
455 the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
459 /* Marshal args for a call and do the call.
461 This function only deals with a tiny set of possibilities, which
462 cover all helpers in practice. The restrictions are that only
463 arguments in registers are supported, hence only 6x64 integer
464 bits in total can be passed. In fact the only supported arg
467 The return type can be I{64,32,16,8} or V{128,256}. In the
468 latter two cases, it is expected that |args| will contain the
469 special node IRExpr_VECRET(), in which case this routine
470 generates code to allocate space on the stack for the vector
471 return value. Since we are not passing any scalars on the
472 stack, it is enough to preallocate the return space before
473 marshalling any arguments, in this case.
475 |args| may also contain IRExpr_GSPTR(), in which case the
476 value in %rbp is passed as the corresponding argument.
478 Generating code which is both efficient and correct when
479 parameters are to be passed in registers is difficult, for the
480 reasons elaborated in detail in comments attached to
481 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
482 of the method described in those comments.
484 The problem is split into two cases: the fast scheme and the
485 slow scheme. In the fast scheme, arguments are computed
486 directly into the target (real) registers. This is only safe
487 when we can be sure that computation of each argument will not
488 trash any real registers set by computation of any other
491 In the slow scheme, all args are first computed into vregs, and
492 once they are all done, they are moved to the relevant real
493 regs. This always gives correct code, but it also gives a bunch
494 of vreg-to-rreg moves which are usually redundant but are hard
495 for the register allocator to get rid of.
497 To decide which scheme to use, all argument expressions are
498 first examined. If they are all so simple that it is clear they
499 will be evaluated without use of any fixed registers, use the
500 fast scheme, else use the slow scheme. Note also that only
501 unconditional calls may use the fast scheme, since having to
502 compute a condition expression could itself trash real
503 registers. Note that for simplicity, in the case where
504 IRExpr_VECRET() is present, we use the slow scheme. This is
505 motivated by the desire to avoid any possible complexity
508 Note this requires being able to examine an expression and
509 determine whether or not evaluation of it might use a fixed
510 register. That requires knowledge of how the rest of this insn
511 selector works. Currently just the following 3 are regarded as
512 safe -- hopefully they cover the majority of arguments in
513 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
516 /* Note that the cee->regparms field is meaningless on AMD64 host
517 (since there is only one calling convention) and so we always
520 for (i
= 0; args
[i
]; i
++)
524 vpanic("doHelperCall(AMD64): cannot currently handle > 6 args");
526 argregs
[0] = hregAMD64_RDI();
527 argregs
[1] = hregAMD64_RSI();
528 argregs
[2] = hregAMD64_RDX();
529 argregs
[3] = hregAMD64_RCX();
530 argregs
[4] = hregAMD64_R8();
531 argregs
[5] = hregAMD64_R9();
533 tmpregs
[0] = tmpregs
[1] = tmpregs
[2] =
534 tmpregs
[3] = tmpregs
[4] = tmpregs
[5] = INVALID_HREG
;
536 fastinstrs
[0] = fastinstrs
[1] = fastinstrs
[2] =
537 fastinstrs
[3] = fastinstrs
[4] = fastinstrs
[5] = NULL
;
539 /* First decide which scheme (slow or fast) is to be used. First
540 assume the fast scheme, and select slow if any contraindications
543 /* We'll need space on the stack for the return value. Avoid
544 possible complications with nested calls by using the slow
546 if (retTy
== Ity_V128
|| retTy
== Ity_V256
)
550 if (guard
->tag
== Iex_Const
551 && guard
->Iex
.Const
.con
->tag
== Ico_U1
552 && guard
->Iex
.Const
.con
->Ico
.U1
== True
) {
555 /* Not manifestly unconditional -- be conservative. */
560 /* Ok, let's try for the fast scheme. If it doesn't pan out, we'll
561 use the slow scheme. Because this is tentative, we can't call
562 addInstr (that is, commit to) any instructions until we're
563 handled all the arguments. So park the resulting instructions
564 in a buffer and emit that if we're successful. */
567 /* In this loop, we process args that can be computed into the
568 destination (real) register with a single instruction, without
569 using any fixed regs. That also includes IRExpr_GSPTR(), but
570 not IRExpr_VECRET(). Indeed, if the IR is well-formed, we can
571 never see IRExpr_VECRET() at this point, since the return-type
572 check above should ensure all those cases use the slow scheme
574 vassert(n_args
>= 0 && n_args
<= 6);
575 for (i
= 0; i
< n_args
; i
++) {
576 IRExpr
* arg
= args
[i
];
577 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg
))) {
578 vassert(typeOfIRExpr(env
->type_env
, args
[i
]) == Ity_I64
);
581 = iselIntExpr_single_instruction( env
, argregs
[i
], args
[i
] );
582 if (fastinstrs
[i
] == NULL
)
586 /* Looks like we're in luck. Emit the accumulated instructions and
587 move on to doing the call itself. */
588 for (i
= 0; i
< n_args
; i
++)
589 addInstr(env
, fastinstrs
[i
]);
591 /* Fast scheme only applies for unconditional calls. Hence: */
597 /* SLOW SCHEME; move via temporaries */
600 # if 0 /* debug only */
601 if (n_args
> 0) {for (i
= 0; args
[i
]; i
++) {
602 ppIRExpr(args
[i
]); vex_printf(" "); }
606 /* If we have a vector return type, allocate a place for it on the
607 stack and record its address. */
608 HReg r_vecRetAddr
= INVALID_HREG
;
609 if (retTy
== Ity_V128
) {
610 r_vecRetAddr
= newVRegI(env
);
611 sub_from_rsp(env
, 16);
612 addInstr(env
, mk_iMOVsd_RR( hregAMD64_RSP(), r_vecRetAddr
));
614 else if (retTy
== Ity_V256
) {
615 r_vecRetAddr
= newVRegI(env
);
616 sub_from_rsp(env
, 32);
617 addInstr(env
, mk_iMOVsd_RR( hregAMD64_RSP(), r_vecRetAddr
));
620 vassert(n_args
>= 0 && n_args
<= 6);
621 for (i
= 0; i
< n_args
; i
++) {
622 IRExpr
* arg
= args
[i
];
623 if (UNLIKELY(arg
->tag
== Iex_GSPTR
)) {
624 tmpregs
[i
] = newVRegI(env
);
625 addInstr(env
, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs
[i
]));
628 else if (UNLIKELY(arg
->tag
== Iex_VECRET
)) {
629 /* We stashed the address of the return slot earlier, so just
631 vassert(!hregIsInvalid(r_vecRetAddr
));
632 tmpregs
[i
] = r_vecRetAddr
;
636 vassert(typeOfIRExpr(env
->type_env
, args
[i
]) == Ity_I64
);
637 tmpregs
[i
] = iselIntExpr_R(env
, args
[i
]);
641 /* Now we can compute the condition. We can't do it earlier
642 because the argument computations could trash the condition
643 codes. Be a bit clever to handle the common case where the
647 if (guard
->tag
== Iex_Const
648 && guard
->Iex
.Const
.con
->tag
== Ico_U1
649 && guard
->Iex
.Const
.con
->Ico
.U1
== True
) {
650 /* unconditional -- do nothing */
652 cc
= iselCondCode( env
, guard
);
656 /* Move the args to their final destinations. */
657 for (i
= 0; i
< n_args
; i
++) {
658 /* None of these insns, including any spill code that might
659 be generated, may alter the condition codes. */
660 addInstr( env
, mk_iMOVsd_RR( tmpregs
[i
], argregs
[i
] ) );
664 /* Do final checks, set the return values, and generate the call
665 instruction proper. */
668 if (retTy
== Ity_V128
|| retTy
== Ity_V256
) {
669 vassert(nVECRETs
== 1);
671 vassert(nVECRETs
== 0);
674 vassert(nGSPTRs
== 0 || nGSPTRs
== 1);
676 vassert(*stackAdjustAfterCall
== 0);
677 vassert(is_RetLoc_INVALID(*retloc
));
680 /* Function doesn't return a value. */
681 *retloc
= mk_RetLoc_simple(RLPri_None
);
683 case Ity_I64
: case Ity_I32
: case Ity_I16
: case Ity_I8
:
684 *retloc
= mk_RetLoc_simple(RLPri_Int
);
687 *retloc
= mk_RetLoc_spRel(RLPri_V128SpRel
, 0);
688 *stackAdjustAfterCall
= 16;
691 *retloc
= mk_RetLoc_spRel(RLPri_V256SpRel
, 0);
692 *stackAdjustAfterCall
= 32;
695 /* IR can denote other possible return types, but we don't
696 handle those here. */
700 /* Finally, generate the call itself. This needs the *retloc value
701 set in the switch above, which is why it's at the end. */
703 AMD64Instr_Call(cc
, (Addr
)cee
->addr
, n_args
, *retloc
));
707 /* Given a guest-state array descriptor, an index expression and a
708 bias, generate an AMD64AMode holding the relevant guest state
712 AMD64AMode
* genGuestArrayOffset ( ISelEnv
* env
, IRRegArray
* descr
,
713 IRExpr
* off
, Int bias
)
716 Int elemSz
= sizeofIRType(descr
->elemTy
);
717 Int nElems
= descr
->nElems
;
719 /* Throw out any cases not generated by an amd64 front end. In
720 theory there might be a day where we need to handle them -- if
721 we ever run non-amd64-guest on amd64 host. */
723 if (nElems
!= 8 || (elemSz
!= 1 && elemSz
!= 8))
724 vpanic("genGuestArrayOffset(amd64 host)");
726 /* Compute off into a reg, %off. Then return:
729 addq $bias, %tmp (if bias != 0)
731 ... base(%rbp, %tmp, shift) ...
734 roff
= iselIntExpr_R(env
, off
);
735 addInstr(env
, mk_iMOVsd_RR(roff
, tmp
));
737 /* Make sure the bias is sane, in the sense that there are
738 no significant bits above bit 30 in it. */
739 vassert(-10000 < bias
&& bias
< 10000);
741 AMD64Instr_Alu64R(Aalu_ADD
, AMD64RMI_Imm(bias
), tmp
));
744 AMD64Instr_Alu64R(Aalu_AND
, AMD64RMI_Imm(7), tmp
));
745 vassert(elemSz
== 1 || elemSz
== 8);
747 AMD64AMode_IRRS( descr
->base
, hregAMD64_RBP(), tmp
,
752 /* Set the SSE unit's rounding mode to default (%mxcsr = 0x1F80) */
754 void set_SSE_rounding_default ( ISelEnv
* env
)
756 /* pushq $DEFAULT_MXCSR
760 AMD64AMode
* zero_rsp
= AMD64AMode_IR(0, hregAMD64_RSP());
761 addInstr(env
, AMD64Instr_Push(AMD64RMI_Imm(DEFAULT_MXCSR
)));
762 addInstr(env
, AMD64Instr_LdMXCSR(zero_rsp
));
766 /* Mess with the FPU's rounding mode: set to the default rounding mode
769 void set_FPU_rounding_default ( ISelEnv
* env
)
771 /* movq $DEFAULT_FPUCW, -8(%rsp)
774 AMD64AMode
* m8_rsp
= AMD64AMode_IR(-8, hregAMD64_RSP());
775 addInstr(env
, AMD64Instr_Alu64M(
776 Aalu_MOV
, AMD64RI_Imm(DEFAULT_FPUCW
), m8_rsp
));
777 addInstr(env
, AMD64Instr_A87LdCW(m8_rsp
));
781 /* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed
782 expression denoting a value in the range 0 .. 3, indicating a round
783 mode encoded as per type IRRoundingMode. Set the SSE machinery to
784 have the same rounding.
787 void set_SSE_rounding_mode ( ISelEnv
* env
, IRExpr
* mode
)
789 /* Note: this sequence only makes sense because DEFAULT_MXCSR has
790 both rounding bits == 0. If that wasn't the case, we couldn't
791 create a new rounding field simply by ORing the new value into
795 andq [[mode]], %reg -- shouldn't be needed; paranoia
797 orq $DEFAULT_MXCSR, %reg
802 HReg reg
= newVRegI(env
);
803 AMD64AMode
* zero_rsp
= AMD64AMode_IR(0, hregAMD64_RSP());
804 addInstr(env
, AMD64Instr_Alu64R(Aalu_MOV
, AMD64RMI_Imm(3), reg
));
805 addInstr(env
, AMD64Instr_Alu64R(Aalu_AND
,
806 iselIntExpr_RMI(env
, mode
), reg
));
807 addInstr(env
, AMD64Instr_Sh64(Ash_SHL
, 13, reg
));
808 addInstr(env
, AMD64Instr_Alu64R(
809 Aalu_OR
, AMD64RMI_Imm(DEFAULT_MXCSR
), reg
));
810 addInstr(env
, AMD64Instr_Push(AMD64RMI_Reg(reg
)));
811 addInstr(env
, AMD64Instr_LdMXCSR(zero_rsp
));
816 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
817 expression denoting a value in the range 0 .. 3, indicating a round
818 mode encoded as per type IRRoundingMode. Set the x87 FPU to have
822 void set_FPU_rounding_mode ( ISelEnv
* env
, IRExpr
* mode
)
824 HReg rrm
= iselIntExpr_R(env
, mode
);
825 HReg rrm2
= newVRegI(env
);
826 AMD64AMode
* m8_rsp
= AMD64AMode_IR(-8, hregAMD64_RSP());
829 andq $3, %rrm2 -- shouldn't be needed; paranoia
831 orq $DEFAULT_FPUCW, %rrm2
835 addInstr(env
, mk_iMOVsd_RR(rrm
, rrm2
));
836 addInstr(env
, AMD64Instr_Alu64R(Aalu_AND
, AMD64RMI_Imm(3), rrm2
));
837 addInstr(env
, AMD64Instr_Sh64(Ash_SHL
, 10, rrm2
));
838 addInstr(env
, AMD64Instr_Alu64R(Aalu_OR
,
839 AMD64RMI_Imm(DEFAULT_FPUCW
), rrm2
));
840 addInstr(env
, AMD64Instr_Alu64M(Aalu_MOV
,
841 AMD64RI_Reg(rrm2
), m8_rsp
));
842 addInstr(env
, AMD64Instr_A87LdCW(m8_rsp
));
846 /* Generate all-zeroes into a new vector register.
848 static HReg
generate_zeroes_V128 ( ISelEnv
* env
)
850 HReg dst
= newVRegV(env
);
851 addInstr(env
, AMD64Instr_SseReRg(Asse_XOR
, dst
, dst
));
855 /* Generate all-ones into a new vector register.
857 static HReg
generate_ones_V128 ( ISelEnv
* env
)
859 HReg dst
= newVRegV(env
);
860 addInstr(env
, AMD64Instr_SseReRg(Asse_CMPEQ32
, dst
, dst
));
865 /* Generate !src into a new vector register. Amazing that there isn't
866 a less crappy way to do this.
868 static HReg
do_sse_NotV128 ( ISelEnv
* env
, HReg src
)
870 HReg dst
= generate_ones_V128(env
);
871 addInstr(env
, AMD64Instr_SseReRg(Asse_XOR
, src
, dst
));
876 /* Expand the given byte into a 64-bit word, by cloning each bit
878 static ULong
bitmask8_to_bytemask64 ( UShort w8
)
880 vassert(w8
== (w8
& 0xFF));
883 for (i
= 0; i
< 8; i
++) {
885 w64
|= (0xFFULL
<< (8 * i
));
891 /*---------------------------------------------------------*/
892 /*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/
893 /*---------------------------------------------------------*/
895 /* Select insns for an integer-typed expression, and add them to the
896 code list. Return a reg holding the result. This reg will be a
897 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
898 want to modify it, ask for a new vreg, copy it in there, and modify
899 the copy. The register allocator will do its best to map both
900 vregs to the same real register, so the copies will often disappear
903 This should handle expressions of 64, 32, 16 and 8-bit type. All
904 results are returned in a 64-bit register. For 32-, 16- and 8-bit
905 expressions, the upper 32/48/56 bits are arbitrary, so you should
906 mask or sign extend partial values if necessary.
909 static HReg
iselIntExpr_R ( ISelEnv
* env
, const IRExpr
* e
)
911 HReg r
= iselIntExpr_R_wrk(env
, e
);
912 /* sanity checks ... */
914 vex_printf("\niselIntExpr_R: "); ppIRExpr(e
); vex_printf("\n");
916 vassert(hregClass(r
) == HRcInt64
);
917 vassert(hregIsVirtual(r
));
921 /* DO NOT CALL THIS DIRECTLY ! */
922 static HReg
iselIntExpr_R_wrk ( ISelEnv
* env
, const IRExpr
* e
)
925 DECLARE_PATTERN(p_1Uto8_64to1
);
926 DECLARE_PATTERN(p_LDle8_then_8Uto64
);
927 DECLARE_PATTERN(p_LDle16_then_16Uto64
);
929 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
931 case Ity_I64
: case Ity_I32
: case Ity_I16
: case Ity_I8
: break;
937 /* --------- TEMP --------- */
939 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
942 /* --------- LOAD --------- */
944 HReg dst
= newVRegI(env
);
945 AMD64AMode
* amode
= iselIntExpr_AMode ( env
, e
->Iex
.Load
.addr
);
947 /* We can't handle big-endian loads, nor load-linked. */
948 if (e
->Iex
.Load
.end
!= Iend_LE
)
952 addInstr(env
, AMD64Instr_Alu64R(Aalu_MOV
,
953 AMD64RMI_Mem(amode
), dst
) );
957 addInstr(env
, AMD64Instr_LoadEX(4,False
,amode
,dst
));
961 addInstr(env
, AMD64Instr_LoadEX(2,False
,amode
,dst
));
965 addInstr(env
, AMD64Instr_LoadEX(1,False
,amode
,dst
));
971 /* --------- BINARY OP --------- */
976 /* Pattern: Sub64(0,x) */
977 /* and: Sub32(0,x) */
978 if ((e
->Iex
.Binop
.op
== Iop_Sub64
&& isZeroU64(e
->Iex
.Binop
.arg1
))
979 || (e
->Iex
.Binop
.op
== Iop_Sub32
&& isZeroU32(e
->Iex
.Binop
.arg1
))) {
980 HReg dst
= newVRegI(env
);
981 HReg reg
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
982 addInstr(env
, mk_iMOVsd_RR(reg
,dst
));
983 addInstr(env
, AMD64Instr_Unary64(Aun_NEG
,dst
));
987 /* Is it an addition or logical style op? */
988 switch (e
->Iex
.Binop
.op
) {
989 case Iop_Add8
: case Iop_Add16
: case Iop_Add32
: case Iop_Add64
:
990 aluOp
= Aalu_ADD
; break;
991 case Iop_Sub8
: case Iop_Sub16
: case Iop_Sub32
: case Iop_Sub64
:
992 aluOp
= Aalu_SUB
; break;
993 case Iop_And8
: case Iop_And16
: case Iop_And32
: case Iop_And64
:
994 aluOp
= Aalu_AND
; break;
995 case Iop_Or8
: case Iop_Or16
: case Iop_Or32
: case Iop_Or64
:
996 aluOp
= Aalu_OR
; break;
997 case Iop_Xor8
: case Iop_Xor16
: case Iop_Xor32
: case Iop_Xor64
:
998 aluOp
= Aalu_XOR
; break;
999 case Iop_Mul16
: case Iop_Mul32
: case Iop_Mul64
:
1000 aluOp
= Aalu_MUL
; break;
1002 aluOp
= Aalu_INVALID
; break;
1004 /* For commutative ops we assume any literal
1005 values are on the second operand. */
1006 if (aluOp
!= Aalu_INVALID
) {
1007 HReg dst
= newVRegI(env
);
1008 HReg reg
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1009 AMD64RMI
* rmi
= iselIntExpr_RMI(env
, e
->Iex
.Binop
.arg2
);
1010 addInstr(env
, mk_iMOVsd_RR(reg
,dst
));
1011 addInstr(env
, AMD64Instr_Alu64R(aluOp
, rmi
, dst
));
1015 /* Perhaps a shift op? */
1016 switch (e
->Iex
.Binop
.op
) {
1017 case Iop_Shl64
: case Iop_Shl32
: case Iop_Shl16
: case Iop_Shl8
:
1018 shOp
= Ash_SHL
; break;
1019 case Iop_Shr64
: case Iop_Shr32
: case Iop_Shr16
: case Iop_Shr8
:
1020 shOp
= Ash_SHR
; break;
1021 case Iop_Sar64
: case Iop_Sar32
: case Iop_Sar16
: case Iop_Sar8
:
1022 shOp
= Ash_SAR
; break;
1024 shOp
= Ash_INVALID
; break;
1026 if (shOp
!= Ash_INVALID
) {
1027 HReg dst
= newVRegI(env
);
1029 /* regL = the value to be shifted */
1030 HReg regL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1031 addInstr(env
, mk_iMOVsd_RR(regL
,dst
));
1033 /* Do any necessary widening for 32/16/8 bit operands */
1034 switch (e
->Iex
.Binop
.op
) {
1035 case Iop_Shr64
: case Iop_Shl64
: case Iop_Sar64
:
1037 case Iop_Shl32
: case Iop_Shl16
: case Iop_Shl8
:
1040 addInstr(env
, AMD64Instr_Alu64R(
1041 Aalu_AND
, AMD64RMI_Imm(0xFF), dst
));
1044 addInstr(env
, AMD64Instr_Alu64R(
1045 Aalu_AND
, AMD64RMI_Imm(0xFFFF), dst
));
1048 addInstr(env
, AMD64Instr_MovxLQ(False
, dst
, dst
));
1051 addInstr(env
, AMD64Instr_Sh64(Ash_SHL
, 56, dst
));
1052 addInstr(env
, AMD64Instr_Sh64(Ash_SAR
, 56, dst
));
1055 addInstr(env
, AMD64Instr_Sh64(Ash_SHL
, 48, dst
));
1056 addInstr(env
, AMD64Instr_Sh64(Ash_SAR
, 48, dst
));
1059 addInstr(env
, AMD64Instr_MovxLQ(True
, dst
, dst
));
1062 ppIROp(e
->Iex
.Binop
.op
);
1066 /* Now consider the shift amount. If it's a literal, we
1067 can do a much better job than the general case. */
1068 if (e
->Iex
.Binop
.arg2
->tag
== Iex_Const
) {
1069 /* assert that the IR is well-typed */
1071 vassert(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U8
);
1072 nshift
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
;
1073 vassert(nshift
>= 0);
1075 /* Can't allow nshift==0 since that means %cl */
1076 addInstr(env
, AMD64Instr_Sh64(shOp
, nshift
, dst
));
1078 /* General case; we have to force the amount into %cl. */
1079 HReg regR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1080 addInstr(env
, mk_iMOVsd_RR(regR
,hregAMD64_RCX()));
1081 addInstr(env
, AMD64Instr_Sh64(shOp
, 0/* %cl */, dst
));
1086 /* Handle misc other scalar ops. */
1087 if (e
->Iex
.Binop
.op
== Iop_Max32U
) {
1088 HReg src1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1089 HReg dst
= newVRegI(env
);
1090 HReg src2
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1091 addInstr(env
, mk_iMOVsd_RR(src1
, dst
));
1092 addInstr(env
, AMD64Instr_Alu32R(Aalu_CMP
, AMD64RMI_Reg(src2
), dst
));
1093 addInstr(env
, AMD64Instr_CMov64(Acc_B
, src2
, dst
));
1097 if (e
->Iex
.Binop
.op
== Iop_DivModS64to32
1098 || e
->Iex
.Binop
.op
== Iop_DivModU64to32
) {
1099 /* 64 x 32 -> (32(rem),32(div)) division */
1100 /* Get the 64-bit operand into edx:eax, and the other into
1102 HReg rax
= hregAMD64_RAX();
1103 HReg rdx
= hregAMD64_RDX();
1104 HReg dst
= newVRegI(env
);
1105 Bool syned
= toBool(e
->Iex
.Binop
.op
== Iop_DivModS64to32
);
1106 AMD64RM
* rmRight
= iselIntExpr_RM(env
, e
->Iex
.Binop
.arg2
);
1107 /* Compute the left operand into a reg, and then
1108 put the top half in edx and the bottom in eax. */
1109 HReg left64
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1110 addInstr(env
, mk_iMOVsd_RR(left64
, rdx
));
1111 addInstr(env
, mk_iMOVsd_RR(left64
, rax
));
1112 addInstr(env
, AMD64Instr_Sh64(Ash_SHR
, 32, rdx
));
1113 addInstr(env
, AMD64Instr_Div(syned
, 4, rmRight
));
1114 addInstr(env
, AMD64Instr_MovxLQ(False
, rdx
, rdx
));
1115 addInstr(env
, AMD64Instr_MovxLQ(False
, rax
, rax
));
1116 addInstr(env
, AMD64Instr_Sh64(Ash_SHL
, 32, rdx
));
1117 addInstr(env
, mk_iMOVsd_RR(rax
, dst
));
1118 addInstr(env
, AMD64Instr_Alu64R(Aalu_OR
, AMD64RMI_Reg(rdx
), dst
));
1122 if (e
->Iex
.Binop
.op
== Iop_32HLto64
) {
1123 HReg hi32
= newVRegI(env
);
1124 HReg lo32
= newVRegI(env
);
1125 HReg hi32s
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1126 HReg lo32s
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1127 addInstr(env
, mk_iMOVsd_RR(hi32s
, hi32
));
1128 addInstr(env
, mk_iMOVsd_RR(lo32s
, lo32
));
1129 addInstr(env
, AMD64Instr_Sh64(Ash_SHL
, 32, hi32
));
1130 addInstr(env
, AMD64Instr_MovxLQ(False
, lo32
, lo32
));
1131 addInstr(env
, AMD64Instr_Alu64R(
1132 Aalu_OR
, AMD64RMI_Reg(lo32
), hi32
));
1136 if (e
->Iex
.Binop
.op
== Iop_16HLto32
) {
1137 HReg hi16
= newVRegI(env
);
1138 HReg lo16
= newVRegI(env
);
1139 HReg hi16s
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1140 HReg lo16s
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1141 addInstr(env
, mk_iMOVsd_RR(hi16s
, hi16
));
1142 addInstr(env
, mk_iMOVsd_RR(lo16s
, lo16
));
1143 addInstr(env
, AMD64Instr_Sh64(Ash_SHL
, 16, hi16
));
1144 addInstr(env
, AMD64Instr_Alu64R(
1145 Aalu_AND
, AMD64RMI_Imm(0xFFFF), lo16
));
1146 addInstr(env
, AMD64Instr_Alu64R(
1147 Aalu_OR
, AMD64RMI_Reg(lo16
), hi16
));
1151 if (e
->Iex
.Binop
.op
== Iop_8HLto16
) {
1152 HReg hi8
= newVRegI(env
);
1153 HReg lo8
= newVRegI(env
);
1154 HReg hi8s
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1155 HReg lo8s
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1156 addInstr(env
, mk_iMOVsd_RR(hi8s
, hi8
));
1157 addInstr(env
, mk_iMOVsd_RR(lo8s
, lo8
));
1158 addInstr(env
, AMD64Instr_Sh64(Ash_SHL
, 8, hi8
));
1159 addInstr(env
, AMD64Instr_Alu64R(
1160 Aalu_AND
, AMD64RMI_Imm(0xFF), lo8
));
1161 addInstr(env
, AMD64Instr_Alu64R(
1162 Aalu_OR
, AMD64RMI_Reg(lo8
), hi8
));
1166 if (e
->Iex
.Binop
.op
== Iop_MullS32
1167 || e
->Iex
.Binop
.op
== Iop_MullS16
1168 || e
->Iex
.Binop
.op
== Iop_MullS8
1169 || e
->Iex
.Binop
.op
== Iop_MullU32
1170 || e
->Iex
.Binop
.op
== Iop_MullU16
1171 || e
->Iex
.Binop
.op
== Iop_MullU8
) {
1172 HReg a32
= newVRegI(env
);
1173 HReg b32
= newVRegI(env
);
1174 HReg a32s
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1175 HReg b32s
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1177 AMD64ShiftOp shr_op
= Ash_SHR
;
1178 switch (e
->Iex
.Binop
.op
) {
1179 case Iop_MullS32
: shr_op
= Ash_SAR
; shift
= 32; break;
1180 case Iop_MullS16
: shr_op
= Ash_SAR
; shift
= 48; break;
1181 case Iop_MullS8
: shr_op
= Ash_SAR
; shift
= 56; break;
1182 case Iop_MullU32
: shr_op
= Ash_SHR
; shift
= 32; break;
1183 case Iop_MullU16
: shr_op
= Ash_SHR
; shift
= 48; break;
1184 case Iop_MullU8
: shr_op
= Ash_SHR
; shift
= 56; break;
1185 default: vassert(0);
1188 addInstr(env
, mk_iMOVsd_RR(a32s
, a32
));
1189 addInstr(env
, mk_iMOVsd_RR(b32s
, b32
));
1190 addInstr(env
, AMD64Instr_Sh64(Ash_SHL
, shift
, a32
));
1191 addInstr(env
, AMD64Instr_Sh64(Ash_SHL
, shift
, b32
));
1192 addInstr(env
, AMD64Instr_Sh64(shr_op
, shift
, a32
));
1193 addInstr(env
, AMD64Instr_Sh64(shr_op
, shift
, b32
));
1194 addInstr(env
, AMD64Instr_Alu64R(Aalu_MUL
, AMD64RMI_Reg(a32
), b32
));
1198 if (e
->Iex
.Binop
.op
== Iop_CmpF64
) {
1199 HReg fL
= iselDblExpr(env
, e
->Iex
.Binop
.arg1
);
1200 HReg fR
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
1201 HReg dst
= newVRegI(env
);
1202 addInstr(env
, AMD64Instr_SseUComIS(8,fL
,fR
,dst
));
1203 /* Mask out irrelevant parts of the result so as to conform
1204 to the CmpF64 definition. */
1205 addInstr(env
, AMD64Instr_Alu64R(Aalu_AND
, AMD64RMI_Imm(0x45), dst
));
1209 if (e
->Iex
.Binop
.op
== Iop_F64toI32S
1210 || e
->Iex
.Binop
.op
== Iop_F64toI64S
) {
1211 Int szD
= e
->Iex
.Binop
.op
==Iop_F64toI32S
? 4 : 8;
1212 HReg rf
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
1213 HReg dst
= newVRegI(env
);
1214 set_SSE_rounding_mode( env
, e
->Iex
.Binop
.arg1
);
1215 addInstr(env
, AMD64Instr_SseSF2SI( 8, szD
, rf
, dst
));
1216 set_SSE_rounding_default(env
);
1220 /* Deal with 64-bit SIMD binary ops. For the most part these are doable
1221 by using the equivalent 128-bit operation and ignoring the upper half
1223 AMD64SseOp op
= Asse_INVALID
;
1224 Bool arg1isEReg
= False
;
1225 Bool preShift32R
= False
;
1226 switch (e
->Iex
.Binop
.op
) {
1227 // The following 3 could be done with 128 bit insns too, but
1228 // first require the inputs to be reformatted.
1229 //case Iop_QNarrowBin32Sto16Sx4:
1230 //op = Asse_PACKSSD; arg1isEReg = True; break;
1231 //case Iop_QNarrowBin16Sto8Sx8:
1232 //op = Asse_PACKSSW; arg1isEReg = True; break;
1233 //case Iop_QNarrowBin16Sto8Ux8:
1234 //op = Asse_PACKUSW; arg1isEReg = True; break;
1236 case Iop_InterleaveHI8x8
:
1237 op
= Asse_UNPCKLB
; arg1isEReg
= True
; preShift32R
= True
;
1239 case Iop_InterleaveHI16x4
:
1240 op
= Asse_UNPCKLW
; arg1isEReg
= True
; preShift32R
= True
;
1242 case Iop_InterleaveHI32x2
:
1243 op
= Asse_UNPCKLD
; arg1isEReg
= True
; preShift32R
= True
;
1245 case Iop_InterleaveLO8x8
:
1246 op
= Asse_UNPCKLB
; arg1isEReg
= True
;
1248 case Iop_InterleaveLO16x4
:
1249 op
= Asse_UNPCKLW
; arg1isEReg
= True
;
1251 case Iop_InterleaveLO32x2
:
1252 op
= Asse_UNPCKLD
; arg1isEReg
= True
;
1255 case Iop_Add8x8
: op
= Asse_ADD8
; break;
1256 case Iop_Add16x4
: op
= Asse_ADD16
; break;
1257 case Iop_Add32x2
: op
= Asse_ADD32
; break;
1258 case Iop_QAdd8Sx8
: op
= Asse_QADD8S
; break;
1259 case Iop_QAdd16Sx4
: op
= Asse_QADD16S
; break;
1260 case Iop_QAdd8Ux8
: op
= Asse_QADD8U
; break;
1261 case Iop_QAdd16Ux4
: op
= Asse_QADD16U
; break;
1262 case Iop_Avg8Ux8
: op
= Asse_AVG8U
; break;
1263 case Iop_Avg16Ux4
: op
= Asse_AVG16U
; break;
1264 case Iop_CmpEQ8x8
: op
= Asse_CMPEQ8
; break;
1265 case Iop_CmpEQ16x4
: op
= Asse_CMPEQ16
; break;
1266 case Iop_CmpEQ32x2
: op
= Asse_CMPEQ32
; break;
1267 case Iop_CmpGT8Sx8
: op
= Asse_CMPGT8S
; break;
1268 case Iop_CmpGT16Sx4
: op
= Asse_CMPGT16S
; break;
1269 case Iop_CmpGT32Sx2
: op
= Asse_CMPGT32S
; break;
1270 case Iop_Max16Sx4
: op
= Asse_MAX16S
; break;
1271 case Iop_Max8Ux8
: op
= Asse_MAX8U
; break;
1272 case Iop_Min16Sx4
: op
= Asse_MIN16S
; break;
1273 case Iop_Min8Ux8
: op
= Asse_MIN8U
; break;
1274 case Iop_MulHi16Ux4
: op
= Asse_MULHI16U
; break;
1275 case Iop_MulHi16Sx4
: op
= Asse_MULHI16S
; break;
1276 case Iop_Mul16x4
: op
= Asse_MUL16
; break;
1277 case Iop_Sub8x8
: op
= Asse_SUB8
; break;
1278 case Iop_Sub16x4
: op
= Asse_SUB16
; break;
1279 case Iop_Sub32x2
: op
= Asse_SUB32
; break;
1280 case Iop_QSub8Sx8
: op
= Asse_QSUB8S
; break;
1281 case Iop_QSub16Sx4
: op
= Asse_QSUB16S
; break;
1282 case Iop_QSub8Ux8
: op
= Asse_QSUB8U
; break;
1283 case Iop_QSub16Ux4
: op
= Asse_QSUB16U
; break;
1286 if (op
!= Asse_INVALID
) {
1287 /* This isn't pretty, but .. move each arg to the low half of an XMM
1288 register, do the operation on the whole register, and move the
1289 result back to an integer register. */
1290 const IRExpr
* arg1
= e
->Iex
.Binop
.arg1
;
1291 const IRExpr
* arg2
= e
->Iex
.Binop
.arg2
;
1292 vassert(typeOfIRExpr(env
->type_env
, arg1
) == Ity_I64
);
1293 vassert(typeOfIRExpr(env
->type_env
, arg2
) == Ity_I64
);
1294 HReg iarg1
= iselIntExpr_R(env
, arg1
);
1295 HReg iarg2
= iselIntExpr_R(env
, arg2
);
1296 HReg varg1
= newVRegV(env
);
1297 HReg varg2
= newVRegV(env
);
1298 HReg idst
= newVRegI(env
);
1299 addInstr(env
, AMD64Instr_SseMOVQ(iarg1
, varg1
, True
/*toXMM*/));
1300 addInstr(env
, AMD64Instr_SseMOVQ(iarg2
, varg2
, True
/*toXMM*/));
1303 addInstr(env
, AMD64Instr_SseShiftN(Asse_SHR128
, 32, varg1
));
1304 addInstr(env
, AMD64Instr_SseShiftN(Asse_SHR128
, 32, varg2
));
1306 addInstr(env
, AMD64Instr_SseReRg(op
, varg1
, varg2
));
1307 addInstr(env
, AMD64Instr_SseMOVQ(idst
, varg2
, False
/*!toXMM*/));
1309 vassert(!preShift32R
);
1310 addInstr(env
, AMD64Instr_SseReRg(op
, varg2
, varg1
));
1311 addInstr(env
, AMD64Instr_SseMOVQ(idst
, varg1
, False
/*!toXMM*/));
1318 switch (e
->Iex
.Binop
.op
) {
1319 case Iop_ShlN16x4
: laneBits
= 16; op
= Asse_SHL16
; break;
1320 case Iop_ShlN32x2
: laneBits
= 32; op
= Asse_SHL32
; break;
1321 case Iop_SarN16x4
: laneBits
= 16; op
= Asse_SAR16
; break;
1322 case Iop_SarN32x2
: laneBits
= 32; op
= Asse_SAR32
; break;
1323 case Iop_ShrN16x4
: laneBits
= 16; op
= Asse_SHR16
; break;
1324 case Iop_ShrN32x2
: laneBits
= 32; op
= Asse_SHR32
; break;
1327 if (op
!= Asse_INVALID
) {
1328 const IRExpr
* arg1
= e
->Iex
.Binop
.arg1
;
1329 const IRExpr
* arg2
= e
->Iex
.Binop
.arg2
;
1330 vassert(typeOfIRExpr(env
->type_env
, arg1
) == Ity_I64
);
1331 vassert(typeOfIRExpr(env
->type_env
, arg2
) == Ity_I8
);
1332 HReg igreg
= iselIntExpr_R(env
, arg1
);
1333 HReg vgreg
= newVRegV(env
);
1334 HReg idst
= newVRegI(env
);
1335 addInstr(env
, AMD64Instr_SseMOVQ(igreg
, vgreg
, True
/*toXMM*/));
1336 /* If it's a shift by an in-range immediate, generate a single
1338 if (arg2
->tag
== Iex_Const
) {
1339 IRConst
* c
= arg2
->Iex
.Const
.con
;
1340 vassert(c
->tag
== Ico_U8
);
1341 UInt shift
= c
->Ico
.U8
;
1342 if (shift
< laneBits
) {
1343 addInstr(env
, AMD64Instr_SseShiftN(op
, shift
, vgreg
));
1344 addInstr(env
, AMD64Instr_SseMOVQ(idst
, vgreg
, False
/*!toXMM*/));
1348 /* Otherwise we have to do it the longwinded way. */
1349 HReg ishift
= iselIntExpr_R(env
, arg2
);
1350 HReg vshift
= newVRegV(env
);
1351 addInstr(env
, AMD64Instr_SseMOVQ(ishift
, vshift
, True
/*toXMM*/));
1352 addInstr(env
, AMD64Instr_SseReRg(op
, vshift
, vgreg
));
1353 addInstr(env
, AMD64Instr_SseMOVQ(idst
, vgreg
, False
/*!toXMM*/));
1357 if (e
->Iex
.Binop
.op
== Iop_Mul32x2
) {
1358 const IRExpr
* arg1
= e
->Iex
.Binop
.arg1
;
1359 const IRExpr
* arg2
= e
->Iex
.Binop
.arg2
;
1360 vassert(typeOfIRExpr(env
->type_env
, arg1
) == Ity_I64
);
1361 vassert(typeOfIRExpr(env
->type_env
, arg2
) == Ity_I64
);
1362 HReg s1
= iselIntExpr_R(env
, arg1
);
1363 HReg s2
= iselIntExpr_R(env
, arg2
);
1364 HReg resLo
= newVRegI(env
);
1365 // resLo = (s1 *64 s2) & 0xFFFF'FFFF
1366 addInstr(env
, mk_iMOVsd_RR(s1
, resLo
));
1367 addInstr(env
, AMD64Instr_Alu64R(Aalu_MUL
, AMD64RMI_Reg(s2
), resLo
));
1368 addInstr(env
, AMD64Instr_MovxLQ(False
, resLo
, resLo
));
1370 // resHi = ((s1 >>u 32) *64 (s2 >>u 32)) << 32;
1371 HReg resHi
= newVRegI(env
);
1372 addInstr(env
, mk_iMOVsd_RR(s1
, resHi
));
1373 addInstr(env
, AMD64Instr_Sh64(Ash_SHR
, 32, resHi
));
1374 HReg tmp
= newVRegI(env
);
1375 addInstr(env
, mk_iMOVsd_RR(s2
, tmp
));
1376 addInstr(env
, AMD64Instr_Sh64(Ash_SHR
, 32, tmp
));
1377 addInstr(env
, AMD64Instr_Alu64R(Aalu_MUL
, AMD64RMI_Reg(tmp
), resHi
));
1378 addInstr(env
, AMD64Instr_Sh64(Ash_SHL
, 32, resHi
));
1380 // final result = resHi | resLo
1381 addInstr(env
, AMD64Instr_Alu64R(Aalu_OR
, AMD64RMI_Reg(resHi
), resLo
));
1385 // A few remaining SIMD64 ops require helper functions, at least for
1387 Bool second_is_UInt
= False
;
1389 switch (e
->Iex
.Binop
.op
) {
1390 case Iop_CatOddLanes16x4
:
1391 fn
= (HWord
)h_generic_calc_CatOddLanes16x4
; break;
1392 case Iop_CatEvenLanes16x4
:
1393 fn
= (HWord
)h_generic_calc_CatEvenLanes16x4
; break;
1394 case Iop_PermOrZero8x8
:
1395 fn
= (HWord
)h_generic_calc_PermOrZero8x8
; break;
1397 case Iop_QNarrowBin32Sto16Sx4
:
1398 fn
= (HWord
)h_generic_calc_QNarrowBin32Sto16Sx4
; break;
1399 case Iop_QNarrowBin16Sto8Sx8
:
1400 fn
= (HWord
)h_generic_calc_QNarrowBin16Sto8Sx8
; break;
1401 case Iop_QNarrowBin16Sto8Ux8
:
1402 fn
= (HWord
)h_generic_calc_QNarrowBin16Sto8Ux8
; break;
1404 case Iop_NarrowBin16to8x8
:
1405 fn
= (HWord
)h_generic_calc_NarrowBin16to8x8
; break;
1406 case Iop_NarrowBin32to16x4
:
1407 fn
= (HWord
)h_generic_calc_NarrowBin32to16x4
; break;
1410 fn
= (HWord
)h_generic_calc_SarN8x8
;
1411 second_is_UInt
= True
;
1415 fn
= (HWord
)0; break;
1417 if (fn
!= (HWord
)0) {
1418 /* Note: the following assumes all helpers are of signature
1419 ULong fn ( ULong, ULong ), and they are
1420 not marked as regparm functions.
1422 HReg dst
= newVRegI(env
);
1423 HReg argL
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
1424 HReg argR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
1426 addInstr(env
, AMD64Instr_MovxLQ(False
, argR
, argR
));
1427 addInstr(env
, mk_iMOVsd_RR(argL
, hregAMD64_RDI()) );
1428 addInstr(env
, mk_iMOVsd_RR(argR
, hregAMD64_RSI()) );
1429 addInstr(env
, AMD64Instr_Call( Acc_ALWAYS
, (ULong
)fn
, 2,
1430 mk_RetLoc_simple(RLPri_Int
) ));
1431 addInstr(env
, mk_iMOVsd_RR(hregAMD64_RAX(), dst
));
1435 // Half-float vector conversion
1436 if (e
->Iex
.Binop
.op
== Iop_F32toF16x4
1437 && (env
->hwcaps
& VEX_HWCAPS_AMD64_F16C
)) {
1438 HReg srcV
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
1439 HReg dstV
= newVRegV(env
);
1440 HReg dstI
= newVRegI(env
);
1441 set_SSE_rounding_mode( env
, e
->Iex
.Binop
.arg1
);
1442 addInstr(env
, AMD64Instr_Sse32Fx4(Asse_F32toF16
, srcV
, dstV
));
1443 set_SSE_rounding_default(env
);
1444 addInstr(env
, AMD64Instr_SseMOVQ(dstI
, dstV
, /*toXMM=*/False
));
1451 /* --------- UNARY OP --------- */
1454 /* 1Uto8(64to1(expr64)) */
1456 DEFINE_PATTERN( p_1Uto8_64to1
,
1457 unop(Iop_1Uto8
, unop(Iop_64to1
, bind(0))) );
1458 if (matchIRExpr(&mi
,p_1Uto8_64to1
,e
)) {
1459 const IRExpr
* expr64
= mi
.bindee
[0];
1460 HReg dst
= newVRegI(env
);
1461 HReg src
= iselIntExpr_R(env
, expr64
);
1462 addInstr(env
, mk_iMOVsd_RR(src
,dst
) );
1463 addInstr(env
, AMD64Instr_Alu64R(Aalu_AND
,
1464 AMD64RMI_Imm(1), dst
));
1469 /* 8Uto64(LDle(expr64)) */
1471 DEFINE_PATTERN(p_LDle8_then_8Uto64
,
1473 IRExpr_Load(Iend_LE
,Ity_I8
,bind(0))) );
1474 if (matchIRExpr(&mi
,p_LDle8_then_8Uto64
,e
)) {
1475 HReg dst
= newVRegI(env
);
1476 AMD64AMode
* amode
= iselIntExpr_AMode ( env
, mi
.bindee
[0] );
1477 addInstr(env
, AMD64Instr_LoadEX(1,False
,amode
,dst
));
1482 /* 16Uto64(LDle(expr64)) */
1484 DEFINE_PATTERN(p_LDle16_then_16Uto64
,
1486 IRExpr_Load(Iend_LE
,Ity_I16
,bind(0))) );
1487 if (matchIRExpr(&mi
,p_LDle16_then_16Uto64
,e
)) {
1488 HReg dst
= newVRegI(env
);
1489 AMD64AMode
* amode
= iselIntExpr_AMode ( env
, mi
.bindee
[0] );
1490 addInstr(env
, AMD64Instr_LoadEX(2,False
,amode
,dst
));
1495 /* 32Uto64( Add32/Sub32/And32/Or32/Xor32(expr32, expr32) )
1496 Use 32 bit arithmetic and let the default zero-extend rule
1497 do the 32Uto64 for free. */
1498 if (e
->Iex
.Unop
.op
== Iop_32Uto64
&& e
->Iex
.Unop
.arg
->tag
== Iex_Binop
) {
1499 IROp opi
= e
->Iex
.Unop
.arg
->Iex
.Binop
.op
; /* inner op */
1500 IRExpr
* argL
= e
->Iex
.Unop
.arg
->Iex
.Binop
.arg1
;
1501 IRExpr
* argR
= e
->Iex
.Unop
.arg
->Iex
.Binop
.arg2
;
1502 AMD64AluOp aluOp
= Aalu_INVALID
;
1504 case Iop_Add32
: aluOp
= Aalu_ADD
; break;
1505 case Iop_Sub32
: aluOp
= Aalu_SUB
; break;
1506 case Iop_And32
: aluOp
= Aalu_AND
; break;
1507 case Iop_Or32
: aluOp
= Aalu_OR
; break;
1508 case Iop_Xor32
: aluOp
= Aalu_XOR
; break;
1511 if (aluOp
!= Aalu_INVALID
) {
1512 /* For commutative ops we assume any literal values are on
1513 the second operand. */
1514 HReg dst
= newVRegI(env
);
1515 HReg reg
= iselIntExpr_R(env
, argL
);
1516 AMD64RMI
* rmi
= iselIntExpr_RMI(env
, argR
);
1517 addInstr(env
, mk_iMOVsd_RR(reg
,dst
));
1518 addInstr(env
, AMD64Instr_Alu32R(aluOp
, rmi
, dst
));
1521 /* just fall through to normal handling for Iop_32Uto64 */
1524 /* Fallback cases */
1525 switch (e
->Iex
.Unop
.op
) {
1528 HReg dst
= newVRegI(env
);
1529 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1530 addInstr(env
, AMD64Instr_MovxLQ(e
->Iex
.Unop
.op
== Iop_32Sto64
,
1534 case Iop_128HIto64
: {
1536 iselInt128Expr(&rHi
,&rLo
, env
, e
->Iex
.Unop
.arg
);
1537 return rHi
; /* and abandon rLo */
1541 iselInt128Expr(&rHi
,&rLo
, env
, e
->Iex
.Unop
.arg
);
1542 return rLo
; /* and abandon rHi */
1549 HReg dst
= newVRegI(env
);
1550 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1551 Bool srcIs16
= toBool( e
->Iex
.Unop
.op
==Iop_16Uto32
1552 || e
->Iex
.Unop
.op
==Iop_16Uto64
);
1553 UInt mask
= srcIs16
? 0xFFFF : 0xFF;
1554 addInstr(env
, mk_iMOVsd_RR(src
,dst
) );
1555 addInstr(env
, AMD64Instr_Alu64R(Aalu_AND
,
1556 AMD64RMI_Imm(mask
), dst
));
1564 HReg dst
= newVRegI(env
);
1565 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1566 Bool srcIs16
= toBool( e
->Iex
.Unop
.op
==Iop_16Sto32
1567 || e
->Iex
.Unop
.op
==Iop_16Sto64
);
1568 UInt amt
= srcIs16
? 48 : 56;
1569 addInstr(env
, mk_iMOVsd_RR(src
,dst
) );
1570 addInstr(env
, AMD64Instr_Sh64(Ash_SHL
, amt
, dst
));
1571 addInstr(env
, AMD64Instr_Sh64(Ash_SAR
, amt
, dst
));
1578 HReg dst
= newVRegI(env
);
1579 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1580 addInstr(env
, mk_iMOVsd_RR(src
,dst
) );
1581 addInstr(env
, AMD64Instr_Unary64(Aun_NOT
,dst
));
1586 case Iop_64HIto32
: {
1587 HReg dst
= newVRegI(env
);
1588 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1590 switch (e
->Iex
.Unop
.op
) {
1591 case Iop_16HIto8
: shift
= 8; break;
1592 case Iop_32HIto16
: shift
= 16; break;
1593 case Iop_64HIto32
: shift
= 32; break;
1594 default: vassert(0);
1596 addInstr(env
, mk_iMOVsd_RR(src
,dst
) );
1597 addInstr(env
, AMD64Instr_Sh64(Ash_SHR
, shift
, dst
));
1603 HReg dst
= newVRegI(env
);
1604 AMD64CondCode cond
= iselCondCode(env
, e
->Iex
.Unop
.arg
);
1605 addInstr(env
, AMD64Instr_Set64(cond
,dst
));
1612 /* could do better than this, but for now ... */
1613 HReg dst
= newVRegI(env
);
1614 AMD64CondCode cond
= iselCondCode(env
, e
->Iex
.Unop
.arg
);
1615 addInstr(env
, AMD64Instr_Set64(cond
,dst
));
1616 addInstr(env
, AMD64Instr_Sh64(Ash_SHL
, 63, dst
));
1617 addInstr(env
, AMD64Instr_Sh64(Ash_SAR
, 63, dst
));
1621 /* Count trailing zeroes, implemented by amd64 'bsfq' */
1622 HReg dst
= newVRegI(env
);
1623 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1624 addInstr(env
, AMD64Instr_Bsfr64(True
,src
,dst
));
1628 /* Count leading zeroes. Do 'bsrq' to establish the index
1629 of the highest set bit, and subtract that value from
1631 HReg tmp
= newVRegI(env
);
1632 HReg dst
= newVRegI(env
);
1633 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1634 addInstr(env
, AMD64Instr_Bsfr64(False
,src
,tmp
));
1635 addInstr(env
, AMD64Instr_Alu64R(Aalu_MOV
,
1636 AMD64RMI_Imm(63), dst
));
1637 addInstr(env
, AMD64Instr_Alu64R(Aalu_SUB
,
1638 AMD64RMI_Reg(tmp
), dst
));
1642 case Iop_CmpwNEZ64
: {
1643 HReg dst
= newVRegI(env
);
1644 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1645 addInstr(env
, mk_iMOVsd_RR(src
,dst
));
1646 addInstr(env
, AMD64Instr_Unary64(Aun_NEG
,dst
));
1647 addInstr(env
, AMD64Instr_Alu64R(Aalu_OR
,
1648 AMD64RMI_Reg(src
), dst
));
1649 addInstr(env
, AMD64Instr_Sh64(Ash_SAR
, 63, dst
));
1653 case Iop_CmpwNEZ32
: {
1654 HReg src
= newVRegI(env
);
1655 HReg dst
= newVRegI(env
);
1656 HReg pre
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1657 addInstr(env
, mk_iMOVsd_RR(pre
,src
));
1658 addInstr(env
, AMD64Instr_MovxLQ(False
, src
, src
));
1659 addInstr(env
, mk_iMOVsd_RR(src
,dst
));
1660 addInstr(env
, AMD64Instr_Unary64(Aun_NEG
,dst
));
1661 addInstr(env
, AMD64Instr_Alu64R(Aalu_OR
,
1662 AMD64RMI_Reg(src
), dst
));
1663 addInstr(env
, AMD64Instr_Sh64(Ash_SAR
, 63, dst
));
1671 HReg dst
= newVRegI(env
);
1672 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1673 addInstr(env
, mk_iMOVsd_RR(src
, dst
));
1674 addInstr(env
, AMD64Instr_Unary64(Aun_NEG
, dst
));
1675 addInstr(env
, AMD64Instr_Alu64R(Aalu_OR
, AMD64RMI_Reg(src
), dst
));
1679 case Iop_V128to32
: {
1680 HReg dst
= newVRegI(env
);
1681 HReg vec
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
1682 AMD64AMode
* rsp_m16
= AMD64AMode_IR(-16, hregAMD64_RSP());
1683 addInstr(env
, AMD64Instr_SseLdSt(False
/*store*/, 16, vec
, rsp_m16
));
1684 addInstr(env
, AMD64Instr_LoadEX(4, False
/*z-widen*/, rsp_m16
, dst
));
1689 case Iop_V128to64
: {
1690 HReg dst
= newVRegI(env
);
1691 HReg vec
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
1692 addInstr(env
, AMD64Instr_SseMOVQ(dst
, vec
, False
/*!toXMM*/));
1695 case Iop_V128HIto64
: {
1696 HReg dst
= newVRegI(env
);
1697 HReg vec
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
1698 HReg vec2
= newVRegV(env
);
1699 addInstr(env
, mk_vMOVsd_RR(vec
, vec2
));
1700 addInstr(env
, AMD64Instr_SseShiftN(Asse_SHR128
, 64, vec2
));
1701 addInstr(env
, AMD64Instr_SseMOVQ(dst
, vec2
, False
/*!toXMM*/));
1705 /* V256to64_{3,2,1,0} */
1706 case Iop_V256to64_0
: case Iop_V256to64_1
:
1707 case Iop_V256to64_2
: case Iop_V256to64_3
: {
1709 iselDVecExpr(&vHi
, &vLo
, env
, e
->Iex
.Unop
.arg
);
1710 /* Do the first part of the selection by deciding which of
1711 the 128 bit registers to look at, and second part using
1712 the same scheme as for V128{HI}to64 above. */
1713 Bool low64of128
= True
;
1714 switch (e
->Iex
.Unop
.op
) {
1715 case Iop_V256to64_0
: vec
= vLo
; low64of128
= True
; break;
1716 case Iop_V256to64_1
: vec
= vLo
; low64of128
= False
; break;
1717 case Iop_V256to64_2
: vec
= vHi
; low64of128
= True
; break;
1718 case Iop_V256to64_3
: vec
= vHi
; low64of128
= False
; break;
1719 default: vassert(0);
1721 HReg dst
= newVRegI(env
);
1723 addInstr(env
, AMD64Instr_SseMOVQ(dst
, vec
, False
/*!toXMM*/));
1725 HReg vec2
= newVRegV(env
);
1726 addInstr(env
, mk_vMOVsd_RR(vec
, vec2
));
1727 addInstr(env
, AMD64Instr_SseShiftN(Asse_SHR128
, 64, vec2
));
1728 addInstr(env
, AMD64Instr_SseMOVQ(dst
, vec2
, False
/*!toXMM*/));
1733 /* ReinterpF64asI64(e) */
1734 /* Given an IEEE754 double, produce an I64 with the same bit
1736 case Iop_ReinterpF64asI64
: {
1737 AMD64AMode
* m8_rsp
= AMD64AMode_IR(-8, hregAMD64_RSP());
1738 HReg dst
= newVRegI(env
);
1739 HReg src
= iselDblExpr(env
, e
->Iex
.Unop
.arg
);
1741 set_SSE_rounding_default(env
);
1742 addInstr(env
, AMD64Instr_SseLdSt(False
/*store*/, 8, src
, m8_rsp
));
1743 addInstr(env
, AMD64Instr_Alu64R(
1744 Aalu_MOV
, AMD64RMI_Mem(m8_rsp
), dst
));
1748 /* ReinterpF32asI32(e) */
1749 /* Given an IEEE754 single, produce an I64 with the same bit
1750 pattern in the lower half. */
1751 case Iop_ReinterpF32asI32
: {
1752 AMD64AMode
* m8_rsp
= AMD64AMode_IR(-8, hregAMD64_RSP());
1753 HReg dst
= newVRegI(env
);
1754 HReg src
= iselFltExpr(env
, e
->Iex
.Unop
.arg
);
1756 set_SSE_rounding_default(env
);
1757 addInstr(env
, AMD64Instr_SseLdSt(False
/*store*/, 4, src
, m8_rsp
));
1758 addInstr(env
, AMD64Instr_LoadEX(4, False
/*unsigned*/, m8_rsp
, dst
));
1768 /* These are no-ops. */
1769 return iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1771 case Iop_GetMSBs8x8
: {
1772 /* Note: the following assumes the helper is of
1774 UInt fn ( ULong ), and is not a regparm fn.
1776 HReg dst
= newVRegI(env
);
1777 HReg arg
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1778 HWord fn
= (HWord
)h_generic_calc_GetMSBs8x8
;
1779 addInstr(env
, mk_iMOVsd_RR(arg
, hregAMD64_RDI()) );
1780 addInstr(env
, AMD64Instr_Call( Acc_ALWAYS
, (ULong
)fn
,
1781 1, mk_RetLoc_simple(RLPri_Int
) ));
1782 /* MovxLQ is not exactly the right thing here. We just
1783 need to get the bottom 8 bits of RAX into dst, and zero
1784 out everything else. Assuming that the helper returns
1785 a UInt with the top 24 bits zeroed out, it'll do,
1787 addInstr(env
, AMD64Instr_MovxLQ(False
, hregAMD64_RAX(), dst
));
1791 case Iop_GetMSBs8x16
: {
1792 /* Note: the following assumes the helper is of signature
1793 UInt fn ( ULong w64hi, ULong w64Lo ),
1794 and is not a regparm fn. */
1795 HReg dst
= newVRegI(env
);
1796 HReg vec
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
1797 HReg rsp
= hregAMD64_RSP();
1798 HWord fn
= (HWord
)h_generic_calc_GetMSBs8x16
;
1799 AMD64AMode
* m8_rsp
= AMD64AMode_IR( -8, rsp
);
1800 AMD64AMode
* m16_rsp
= AMD64AMode_IR(-16, rsp
);
1801 addInstr(env
, AMD64Instr_SseLdSt(False
/*store*/,
1803 /* hi 64 bits into RDI -- the first arg */
1804 addInstr(env
, AMD64Instr_Alu64R( Aalu_MOV
,
1805 AMD64RMI_Mem(m8_rsp
),
1806 hregAMD64_RDI() )); /* 1st arg */
1807 /* lo 64 bits into RSI -- the 2nd arg */
1808 addInstr(env
, AMD64Instr_Alu64R( Aalu_MOV
,
1809 AMD64RMI_Mem(m16_rsp
),
1810 hregAMD64_RSI() )); /* 2nd arg */
1811 addInstr(env
, AMD64Instr_Call( Acc_ALWAYS
, (ULong
)fn
,
1812 2, mk_RetLoc_simple(RLPri_Int
) ));
1813 /* MovxLQ is not exactly the right thing here. We just
1814 need to get the bottom 16 bits of RAX into dst, and zero
1815 out everything else. Assuming that the helper returns
1816 a UInt with the top 16 bits zeroed out, it'll do,
1818 addInstr(env
, AMD64Instr_MovxLQ(False
, hregAMD64_RAX(), dst
));
1826 /* Deal with unary 64-bit SIMD ops. */
1828 switch (e
->Iex
.Unop
.op
) {
1829 case Iop_CmpNEZ32x2
:
1830 fn
= (HWord
)h_generic_calc_CmpNEZ32x2
; break;
1831 case Iop_CmpNEZ16x4
:
1832 fn
= (HWord
)h_generic_calc_CmpNEZ16x4
; break;
1834 fn
= (HWord
)h_generic_calc_CmpNEZ8x8
; break;
1836 fn
= (HWord
)0; break;
1838 if (fn
!= (HWord
)0) {
1839 /* Note: the following assumes all helpers are of
1841 ULong fn ( ULong ), and they are
1842 not marked as regparm functions.
1844 HReg dst
= newVRegI(env
);
1845 HReg arg
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
1846 addInstr(env
, mk_iMOVsd_RR(arg
, hregAMD64_RDI()) );
1847 addInstr(env
, AMD64Instr_Call( Acc_ALWAYS
, (ULong
)fn
, 1,
1848 mk_RetLoc_simple(RLPri_Int
) ));
1849 addInstr(env
, mk_iMOVsd_RR(hregAMD64_RAX(), dst
));
1856 /* --------- GET --------- */
1858 if (ty
== Ity_I64
) {
1859 HReg dst
= newVRegI(env
);
1860 addInstr(env
, AMD64Instr_Alu64R(
1863 AMD64AMode_IR(e
->Iex
.Get
.offset
,
1868 if (ty
== Ity_I8
|| ty
== Ity_I16
|| ty
== Ity_I32
) {
1869 HReg dst
= newVRegI(env
);
1870 addInstr(env
, AMD64Instr_LoadEX(
1871 toUChar(ty
==Ity_I8
? 1 : (ty
==Ity_I16
? 2 : 4)),
1873 AMD64AMode_IR(e
->Iex
.Get
.offset
,hregAMD64_RBP()),
1882 = genGuestArrayOffset(
1883 env
, e
->Iex
.GetI
.descr
,
1884 e
->Iex
.GetI
.ix
, e
->Iex
.GetI
.bias
);
1885 HReg dst
= newVRegI(env
);
1887 addInstr(env
, AMD64Instr_LoadEX( 1, False
, am
, dst
));
1890 if (ty
== Ity_I64
) {
1891 addInstr(env
, AMD64Instr_Alu64R( Aalu_MOV
, AMD64RMI_Mem(am
), dst
));
1897 /* --------- CCALL --------- */
1899 HReg dst
= newVRegI(env
);
1900 vassert(ty
== e
->Iex
.CCall
.retty
);
1902 /* be very restrictive for now. Only 64-bit ints allowed for
1903 args, and 64 or 32 bits for return type. */
1904 if (e
->Iex
.CCall
.retty
!= Ity_I64
&& e
->Iex
.CCall
.retty
!= Ity_I32
)
1907 /* Marshal args, do the call. */
1909 RetLoc rloc
= mk_RetLoc_INVALID();
1910 doHelperCall( &addToSp
, &rloc
, env
, NULL
/*guard*/,
1911 e
->Iex
.CCall
.cee
, e
->Iex
.CCall
.retty
, e
->Iex
.CCall
.args
);
1912 vassert(is_sane_RetLoc(rloc
));
1913 vassert(rloc
.pri
== RLPri_Int
);
1914 vassert(addToSp
== 0);
1916 /* Move to dst, and zero out the top 32 bits if the result type is
1917 Ity_I32. Probably overkill, but still .. */
1918 if (e
->Iex
.CCall
.retty
== Ity_I64
)
1919 addInstr(env
, mk_iMOVsd_RR(hregAMD64_RAX(), dst
));
1921 addInstr(env
, AMD64Instr_MovxLQ(False
, hregAMD64_RAX(), dst
));
1926 /* --------- LITERAL --------- */
1927 /* 64/32/16/8-bit literals */
1929 if (ty
== Ity_I64
) {
1930 HReg r
= newVRegI(env
);
1931 addInstr(env
, AMD64Instr_Imm64(e
->Iex
.Const
.con
->Ico
.U64
, r
));
1934 AMD64RMI
* rmi
= iselIntExpr_RMI ( env
, e
);
1935 HReg r
= newVRegI(env
);
1936 addInstr(env
, AMD64Instr_Alu64R(Aalu_MOV
, rmi
, r
));
1940 /* --------- MULTIPLEX --------- */
1941 case Iex_ITE
: { // VFD
1942 if ((ty
== Ity_I64
|| ty
== Ity_I32
|| ty
== Ity_I16
|| ty
== Ity_I8
)
1943 && typeOfIRExpr(env
->type_env
,e
->Iex
.ITE
.cond
) == Ity_I1
) {
1944 HReg r1
= iselIntExpr_R(env
, e
->Iex
.ITE
.iftrue
);
1945 HReg r0
= iselIntExpr_R(env
, e
->Iex
.ITE
.iffalse
);
1946 HReg dst
= newVRegI(env
);
1947 addInstr(env
, mk_iMOVsd_RR(r1
,dst
));
1948 AMD64CondCode cc
= iselCondCode(env
, e
->Iex
.ITE
.cond
);
1949 addInstr(env
, AMD64Instr_CMov64(cc
^ 1, r0
, dst
));
1955 /* --------- TERNARY OP --------- */
1957 IRTriop
*triop
= e
->Iex
.Triop
.details
;
1958 /* C3210 flags following FPU partial remainder (fprem), both
1959 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1960 if (triop
->op
== Iop_PRemC3210F64
1961 || triop
->op
== Iop_PRem1C3210F64
) {
1962 AMD64AMode
* m8_rsp
= AMD64AMode_IR(-8, hregAMD64_RSP());
1963 HReg arg1
= iselDblExpr(env
, triop
->arg2
);
1964 HReg arg2
= iselDblExpr(env
, triop
->arg3
);
1965 HReg dst
= newVRegI(env
);
1966 addInstr(env
, AMD64Instr_A87Free(2));
1968 /* one arg -> top of x87 stack */
1969 addInstr(env
, AMD64Instr_SseLdSt(False
/*store*/, 8, arg2
, m8_rsp
));
1970 addInstr(env
, AMD64Instr_A87PushPop(m8_rsp
, True
/*push*/, 8));
1972 /* other arg -> top of x87 stack */
1973 addInstr(env
, AMD64Instr_SseLdSt(False
/*store*/, 8, arg1
, m8_rsp
));
1974 addInstr(env
, AMD64Instr_A87PushPop(m8_rsp
, True
/*push*/, 8));
1976 switch (triop
->op
) {
1977 case Iop_PRemC3210F64
:
1978 addInstr(env
, AMD64Instr_A87FpOp(Afp_PREM
));
1980 case Iop_PRem1C3210F64
:
1981 addInstr(env
, AMD64Instr_A87FpOp(Afp_PREM1
));
1986 /* Ignore the result, and instead make off with the FPU's
1987 C3210 flags (in the status word). */
1988 addInstr(env
, AMD64Instr_A87StSW(m8_rsp
));
1989 addInstr(env
, AMD64Instr_Alu64R(Aalu_MOV
,AMD64RMI_Mem(m8_rsp
),dst
));
1990 addInstr(env
, AMD64Instr_Alu64R(Aalu_AND
,AMD64RMI_Imm(0x4700),dst
));
1998 } /* switch (e->tag) */
2000 /* We get here if no pattern matched. */
2003 vpanic("iselIntExpr_R(amd64): cannot reduce tree");
2007 /*---------------------------------------------------------*/
2008 /*--- ISEL: Integer expression auxiliaries ---*/
2009 /*---------------------------------------------------------*/
2011 /* --------------------- AMODEs --------------------- */
2013 /* Return an AMode which computes the value of the specified
2014 expression, possibly also adding insns to the code list as a
2015 result. The expression may only be a 32-bit one.
2018 static AMD64AMode
* iselIntExpr_AMode ( ISelEnv
* env
, const IRExpr
* e
)
2020 AMD64AMode
* am
= iselIntExpr_AMode_wrk(env
, e
);
2021 vassert(sane_AMode(am
));
2025 /* DO NOT CALL THIS DIRECTLY ! */
2026 static AMD64AMode
* iselIntExpr_AMode_wrk ( ISelEnv
* env
, const IRExpr
* e
)
2029 DECLARE_PATTERN(p_complex
);
2030 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
2031 vassert(ty
== Ity_I64
);
2033 /* Add64( Add64(expr1, Shl64(expr2, imm8)), simm32 ) */
2034 /* bind0 bind1 bind2 bind3 */
2035 DEFINE_PATTERN(p_complex
,
2039 binop(Iop_Shl64
, bind(1), bind(2))
2044 if (matchIRExpr(&mi
, p_complex
, e
)) {
2045 const IRExpr
* expr1
= mi
.bindee
[0];
2046 const IRExpr
* expr2
= mi
.bindee
[1];
2047 const IRExpr
* imm8
= mi
.bindee
[2];
2048 const IRExpr
* simm32
= mi
.bindee
[3];
2049 if (imm8
->tag
== Iex_Const
2050 && imm8
->Iex
.Const
.con
->tag
== Ico_U8
2051 && imm8
->Iex
.Const
.con
->Ico
.U8
< 4
2052 /* imm8 is OK, now check simm32 */
2053 && simm32
->tag
== Iex_Const
2054 && simm32
->Iex
.Const
.con
->tag
== Ico_U64
2055 && fitsIn32Bits(simm32
->Iex
.Const
.con
->Ico
.U64
)) {
2056 UInt shift
= imm8
->Iex
.Const
.con
->Ico
.U8
;
2057 UInt offset
= toUInt(simm32
->Iex
.Const
.con
->Ico
.U64
);
2058 HReg r1
= iselIntExpr_R(env
, expr1
);
2059 HReg r2
= iselIntExpr_R(env
, expr2
);
2060 vassert(shift
== 0 || shift
== 1 || shift
== 2 || shift
== 3);
2061 return AMD64AMode_IRRS(offset
, r1
, r2
, shift
);
2065 /* Add64(expr1, Shl64(expr2, imm)) */
2066 if (e
->tag
== Iex_Binop
2067 && e
->Iex
.Binop
.op
== Iop_Add64
2068 && e
->Iex
.Binop
.arg2
->tag
== Iex_Binop
2069 && e
->Iex
.Binop
.arg2
->Iex
.Binop
.op
== Iop_Shl64
2070 && e
->Iex
.Binop
.arg2
->Iex
.Binop
.arg2
->tag
== Iex_Const
2071 && e
->Iex
.Binop
.arg2
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U8
) {
2072 UInt shift
= e
->Iex
.Binop
.arg2
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
;
2073 if (shift
== 1 || shift
== 2 || shift
== 3) {
2074 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
2075 HReg r2
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
->Iex
.Binop
.arg1
);
2076 return AMD64AMode_IRRS(0, r1
, r2
, shift
);
2081 if (e
->tag
== Iex_Binop
2082 && e
->Iex
.Binop
.op
== Iop_Add64
2083 && e
->Iex
.Binop
.arg2
->tag
== Iex_Const
2084 && e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U64
2085 && fitsIn32Bits(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
)) {
2086 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
2087 return AMD64AMode_IR(
2088 toUInt(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
),
2093 /* Doesn't match anything in particular. Generate it into
2094 a register and use that. */
2096 HReg r1
= iselIntExpr_R(env
, e
);
2097 return AMD64AMode_IR(0, r1
);
2102 /* --------------------- RMIs --------------------- */
2104 /* Similarly, calculate an expression into an X86RMI operand. As with
2105 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
2107 static AMD64RMI
* iselIntExpr_RMI ( ISelEnv
* env
, const IRExpr
* e
)
2109 AMD64RMI
* rmi
= iselIntExpr_RMI_wrk(env
, e
);
2110 /* sanity checks ... */
2115 vassert(hregClass(rmi
->Armi
.Reg
.reg
) == HRcInt64
);
2116 vassert(hregIsVirtual(rmi
->Armi
.Reg
.reg
));
2119 vassert(sane_AMode(rmi
->Armi
.Mem
.am
));
2122 vpanic("iselIntExpr_RMI: unknown amd64 RMI tag");
2126 /* DO NOT CALL THIS DIRECTLY ! */
2127 static AMD64RMI
* iselIntExpr_RMI_wrk ( ISelEnv
* env
, const IRExpr
* e
)
2129 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
2130 vassert(ty
== Ity_I64
|| ty
== Ity_I32
2131 || ty
== Ity_I16
|| ty
== Ity_I8
);
2133 /* special case: immediate 64/32/16/8 */
2134 if (e
->tag
== Iex_Const
) {
2135 switch (e
->Iex
.Const
.con
->tag
) {
2137 if (fitsIn32Bits(e
->Iex
.Const
.con
->Ico
.U64
)) {
2138 return AMD64RMI_Imm(toUInt(e
->Iex
.Const
.con
->Ico
.U64
));
2142 return AMD64RMI_Imm(e
->Iex
.Const
.con
->Ico
.U32
); break;
2144 return AMD64RMI_Imm(0xFFFF & e
->Iex
.Const
.con
->Ico
.U16
); break;
2146 return AMD64RMI_Imm(0xFF & e
->Iex
.Const
.con
->Ico
.U8
); break;
2148 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
2152 /* special case: 64-bit GET */
2153 if (e
->tag
== Iex_Get
&& ty
== Ity_I64
) {
2154 return AMD64RMI_Mem(AMD64AMode_IR(e
->Iex
.Get
.offset
,
2158 /* special case: 64-bit load from memory */
2159 if (e
->tag
== Iex_Load
&& ty
== Ity_I64
2160 && e
->Iex
.Load
.end
== Iend_LE
) {
2161 AMD64AMode
* am
= iselIntExpr_AMode(env
, e
->Iex
.Load
.addr
);
2162 return AMD64RMI_Mem(am
);
2165 /* default case: calculate into a register and return that */
2167 HReg r
= iselIntExpr_R ( env
, e
);
2168 return AMD64RMI_Reg(r
);
2173 /* --------------------- RIs --------------------- */
2175 /* Calculate an expression into an AMD64RI operand. As with
2176 iselIntExpr_R, the expression can have type 64, 32, 16 or 8
2179 static AMD64RI
* iselIntExpr_RI ( ISelEnv
* env
, const IRExpr
* e
)
2181 AMD64RI
* ri
= iselIntExpr_RI_wrk(env
, e
);
2182 /* sanity checks ... */
2187 vassert(hregClass(ri
->Ari
.Reg
.reg
) == HRcInt64
);
2188 vassert(hregIsVirtual(ri
->Ari
.Reg
.reg
));
2191 vpanic("iselIntExpr_RI: unknown amd64 RI tag");
2195 /* DO NOT CALL THIS DIRECTLY ! */
2196 static AMD64RI
* iselIntExpr_RI_wrk ( ISelEnv
* env
, const IRExpr
* e
)
2198 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
2199 vassert(ty
== Ity_I64
|| ty
== Ity_I32
2200 || ty
== Ity_I16
|| ty
== Ity_I8
);
2202 /* special case: immediate */
2203 if (e
->tag
== Iex_Const
) {
2204 switch (e
->Iex
.Const
.con
->tag
) {
2206 if (fitsIn32Bits(e
->Iex
.Const
.con
->Ico
.U64
)) {
2207 return AMD64RI_Imm(toUInt(e
->Iex
.Const
.con
->Ico
.U64
));
2211 return AMD64RI_Imm(e
->Iex
.Const
.con
->Ico
.U32
);
2213 return AMD64RI_Imm(0xFFFF & e
->Iex
.Const
.con
->Ico
.U16
);
2215 return AMD64RI_Imm(0xFF & e
->Iex
.Const
.con
->Ico
.U8
);
2217 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
2221 /* default case: calculate into a register and return that */
2223 HReg r
= iselIntExpr_R ( env
, e
);
2224 return AMD64RI_Reg(r
);
2229 /* --------------------- RMs --------------------- */
2231 /* Similarly, calculate an expression into an AMD64RM operand. As
2232 with iselIntExpr_R, the expression can have type 64, 32, 16 or 8
2235 static AMD64RM
* iselIntExpr_RM ( ISelEnv
* env
, const IRExpr
* e
)
2237 AMD64RM
* rm
= iselIntExpr_RM_wrk(env
, e
);
2238 /* sanity checks ... */
2241 vassert(hregClass(rm
->Arm
.Reg
.reg
) == HRcInt64
);
2242 vassert(hregIsVirtual(rm
->Arm
.Reg
.reg
));
2245 vassert(sane_AMode(rm
->Arm
.Mem
.am
));
2248 vpanic("iselIntExpr_RM: unknown amd64 RM tag");
2252 /* DO NOT CALL THIS DIRECTLY ! */
2253 static AMD64RM
* iselIntExpr_RM_wrk ( ISelEnv
* env
, const IRExpr
* e
)
2255 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
2256 vassert(ty
== Ity_I64
|| ty
== Ity_I32
|| ty
== Ity_I16
|| ty
== Ity_I8
);
2258 /* special case: 64-bit GET */
2259 if (e
->tag
== Iex_Get
&& ty
== Ity_I64
) {
2260 return AMD64RM_Mem(AMD64AMode_IR(e
->Iex
.Get
.offset
,
2264 /* special case: load from memory */
2266 /* default case: calculate into a register and return that */
2268 HReg r
= iselIntExpr_R ( env
, e
);
2269 return AMD64RM_Reg(r
);
2274 /* --------------------- CONDCODE --------------------- */
2276 /* Generate code to evaluated a bit-typed expression, returning the
2277 condition code which would correspond when the expression would
2278 notionally have returned 1. */
2280 static AMD64CondCode
iselCondCode ( ISelEnv
* env
, const IRExpr
* e
)
2282 /* Uh, there's nothing we can sanity check here, unfortunately. */
2283 return iselCondCode_wrk(env
,e
);
2286 /* DO NOT CALL THIS DIRECTLY ! */
2287 static AMD64CondCode
iselCondCode_wrk ( ISelEnv
* env
, const IRExpr
* e
)
2290 vassert(typeOfIRExpr(env
->type_env
,e
) == Ity_I1
);
2293 if (e
->tag
== Iex_RdTmp
) {
2294 HReg r64
= lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
2295 addInstr(env
, AMD64Instr_Test64(1,r64
));
2299 /* Constant 1:Bit */
2300 if (e
->tag
== Iex_Const
) {
2302 vassert(e
->Iex
.Const
.con
->tag
== Ico_U1
);
2303 vassert(e
->Iex
.Const
.con
->Ico
.U1
== True
2304 || e
->Iex
.Const
.con
->Ico
.U1
== False
);
2306 addInstr(env
, AMD64Instr_Alu64R(Aalu_MOV
,AMD64RMI_Imm(0),r
));
2307 addInstr(env
, AMD64Instr_Alu64R(Aalu_XOR
,AMD64RMI_Reg(r
),r
));
2308 return e
->Iex
.Const
.con
->Ico
.U1
? Acc_Z
: Acc_NZ
;
2312 if (e
->tag
== Iex_Unop
&& e
->Iex
.Unop
.op
== Iop_Not1
) {
2313 /* Generate code for the arg, and negate the test condition */
2314 return 1 ^ iselCondCode(env
, e
->Iex
.Unop
.arg
);
2317 /* --- patterns rooted at: 64to1 --- */
2320 if (e
->tag
== Iex_Unop
&& e
->Iex
.Unop
.op
== Iop_64to1
) {
2321 HReg reg
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
2322 addInstr(env
, AMD64Instr_Test64(1,reg
));
2326 /* --- patterns rooted at: 32to1 --- */
2329 if (e
->tag
== Iex_Unop
&& e
->Iex
.Unop
.op
== Iop_32to1
) {
2330 HReg reg
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
2331 addInstr(env
, AMD64Instr_Test64(1,reg
));
2335 /* --- patterns rooted at: CmpNEZ8 --- */
2338 if (e
->tag
== Iex_Unop
2339 && e
->Iex
.Unop
.op
== Iop_CmpNEZ8
) {
2340 HReg r
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
2341 addInstr(env
, AMD64Instr_Test64(0xFF,r
));
2345 /* --- patterns rooted at: CmpNEZ16 --- */
2348 if (e
->tag
== Iex_Unop
2349 && e
->Iex
.Unop
.op
== Iop_CmpNEZ16
) {
2350 HReg r
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
2351 addInstr(env
, AMD64Instr_Test64(0xFFFF,r
));
2355 /* --- patterns rooted at: CmpNEZ32 --- */
2357 if (e
->tag
== Iex_Unop
2358 && e
->Iex
.Unop
.op
== Iop_CmpNEZ32
) {
2359 IRExpr
* arg
= e
->Iex
.Unop
.arg
;
2360 if (arg
->tag
== Iex_Binop
2361 && (arg
->Iex
.Binop
.op
== Iop_Or32
2362 || arg
->Iex
.Binop
.op
== Iop_And32
)) {
2363 /* CmpNEZ32(Or32(x,y)) */
2364 /* CmpNEZ32(And32(x,y)) */
2365 HReg r0
= iselIntExpr_R(env
, arg
->Iex
.Binop
.arg1
);
2366 AMD64RMI
* rmi1
= iselIntExpr_RMI(env
, arg
->Iex
.Binop
.arg2
);
2367 HReg tmp
= newVRegI(env
);
2368 addInstr(env
, mk_iMOVsd_RR(r0
, tmp
));
2369 addInstr(env
, AMD64Instr_Alu32R(
2370 arg
->Iex
.Binop
.op
== Iop_Or32
? Aalu_OR
: Aalu_AND
,
2375 HReg r1
= iselIntExpr_R(env
, arg
);
2376 AMD64RMI
* rmi2
= AMD64RMI_Imm(0);
2377 addInstr(env
, AMD64Instr_Alu32R(Aalu_CMP
,rmi2
,r1
));
2381 /* --- patterns rooted at: CmpNEZ64 --- */
2383 if (e
->tag
== Iex_Unop
2384 && e
->Iex
.Unop
.op
== Iop_CmpNEZ64
) {
2385 IRExpr
* arg
= e
->Iex
.Unop
.arg
;
2386 if (arg
->tag
== Iex_Binop
2387 && (arg
->Iex
.Binop
.op
== Iop_Or64
2388 || arg
->Iex
.Binop
.op
== Iop_And64
)) {
2389 /* CmpNEZ64(Or64(x,y)) */
2390 /* CmpNEZ64(And64(x,y)) */
2391 HReg r0
= iselIntExpr_R(env
, arg
->Iex
.Binop
.arg1
);
2392 AMD64RMI
* rmi1
= iselIntExpr_RMI(env
, arg
->Iex
.Binop
.arg2
);
2393 HReg tmp
= newVRegI(env
);
2394 addInstr(env
, mk_iMOVsd_RR(r0
, tmp
));
2395 addInstr(env
, AMD64Instr_Alu64R(
2396 arg
->Iex
.Binop
.op
== Iop_Or64
? Aalu_OR
: Aalu_AND
,
2401 HReg r1
= iselIntExpr_R(env
, arg
);
2402 AMD64RMI
* rmi2
= AMD64RMI_Imm(0);
2403 addInstr(env
, AMD64Instr_Alu64R(Aalu_CMP
,rmi2
,r1
));
2407 /* --- patterns rooted at: Cmp{EQ,NE}{8,16,32} --- */
2409 /* CmpEQ8 / CmpNE8 */
2410 if (e
->tag
== Iex_Binop
2411 && (e
->Iex
.Binop
.op
== Iop_CmpEQ8
2412 || e
->Iex
.Binop
.op
== Iop_CmpNE8
2413 || e
->Iex
.Binop
.op
== Iop_CasCmpEQ8
2414 || e
->Iex
.Binop
.op
== Iop_CasCmpNE8
)) {
2415 if (isZeroU8(e
->Iex
.Binop
.arg2
)) {
2416 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
2417 addInstr(env
, AMD64Instr_Test64(0xFF,r1
));
2418 switch (e
->Iex
.Binop
.op
) {
2419 case Iop_CmpEQ8
: case Iop_CasCmpEQ8
: return Acc_Z
;
2420 case Iop_CmpNE8
: case Iop_CasCmpNE8
: return Acc_NZ
;
2421 default: vpanic("iselCondCode(amd64): CmpXX8(expr,0:I8)");
2424 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
2425 AMD64RMI
* rmi2
= iselIntExpr_RMI(env
, e
->Iex
.Binop
.arg2
);
2426 HReg r
= newVRegI(env
);
2427 addInstr(env
, mk_iMOVsd_RR(r1
,r
));
2428 addInstr(env
, AMD64Instr_Alu64R(Aalu_XOR
,rmi2
,r
));
2429 addInstr(env
, AMD64Instr_Alu64R(Aalu_AND
,AMD64RMI_Imm(0xFF),r
));
2430 switch (e
->Iex
.Binop
.op
) {
2431 case Iop_CmpEQ8
: case Iop_CasCmpEQ8
: return Acc_Z
;
2432 case Iop_CmpNE8
: case Iop_CasCmpNE8
: return Acc_NZ
;
2433 default: vpanic("iselCondCode(amd64): CmpXX8(expr,expr)");
2438 /* CmpEQ16 / CmpNE16 */
2439 if (e
->tag
== Iex_Binop
2440 && (e
->Iex
.Binop
.op
== Iop_CmpEQ16
2441 || e
->Iex
.Binop
.op
== Iop_CmpNE16
2442 || e
->Iex
.Binop
.op
== Iop_CasCmpEQ16
2443 || e
->Iex
.Binop
.op
== Iop_CasCmpNE16
)) {
2444 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
2445 AMD64RMI
* rmi2
= iselIntExpr_RMI(env
, e
->Iex
.Binop
.arg2
);
2446 HReg r
= newVRegI(env
);
2447 addInstr(env
, mk_iMOVsd_RR(r1
,r
));
2448 addInstr(env
, AMD64Instr_Alu64R(Aalu_XOR
,rmi2
,r
));
2449 addInstr(env
, AMD64Instr_Alu64R(Aalu_AND
,AMD64RMI_Imm(0xFFFF),r
));
2450 switch (e
->Iex
.Binop
.op
) {
2451 case Iop_CmpEQ16
: case Iop_CasCmpEQ16
: return Acc_Z
;
2452 case Iop_CmpNE16
: case Iop_CasCmpNE16
: return Acc_NZ
;
2453 default: vpanic("iselCondCode(amd64): CmpXX16");
2457 /* CmpNE64(ccall, 64-bit constant) (--smc-check=all optimisation).
2458 Saves a "movq %rax, %tmp" compared to the default route. */
2459 if (e
->tag
== Iex_Binop
2460 && e
->Iex
.Binop
.op
== Iop_CmpNE64
2461 && e
->Iex
.Binop
.arg1
->tag
== Iex_CCall
2462 && e
->Iex
.Binop
.arg2
->tag
== Iex_Const
) {
2463 IRExpr
* cal
= e
->Iex
.Binop
.arg1
;
2464 IRExpr
* con
= e
->Iex
.Binop
.arg2
;
2465 HReg tmp
= newVRegI(env
);
2466 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
2467 vassert(cal
->Iex
.CCall
.retty
== Ity_I64
); /* else ill-typed IR */
2468 vassert(con
->Iex
.Const
.con
->tag
== Ico_U64
);
2469 /* Marshal args, do the call. */
2471 RetLoc rloc
= mk_RetLoc_INVALID();
2472 doHelperCall( &addToSp
, &rloc
, env
, NULL
/*guard*/,
2474 cal
->Iex
.CCall
.retty
, cal
->Iex
.CCall
.args
);
2475 vassert(is_sane_RetLoc(rloc
));
2476 vassert(rloc
.pri
== RLPri_Int
);
2477 vassert(addToSp
== 0);
2479 addInstr(env
, AMD64Instr_Imm64(con
->Iex
.Const
.con
->Ico
.U64
, tmp
));
2480 addInstr(env
, AMD64Instr_Alu64R(Aalu_CMP
,
2481 AMD64RMI_Reg(hregAMD64_RAX()), tmp
));
2486 if (e
->tag
== Iex_Binop
2487 && (e
->Iex
.Binop
.op
== Iop_CmpEQ64
2488 || e
->Iex
.Binop
.op
== Iop_CmpNE64
2489 || e
->Iex
.Binop
.op
== Iop_CmpLT64S
2490 || e
->Iex
.Binop
.op
== Iop_CmpLT64U
2491 || e
->Iex
.Binop
.op
== Iop_CmpLE64S
2492 || e
->Iex
.Binop
.op
== Iop_CmpLE64U
2493 || e
->Iex
.Binop
.op
== Iop_CasCmpEQ64
2494 || e
->Iex
.Binop
.op
== Iop_CasCmpNE64
2495 || e
->Iex
.Binop
.op
== Iop_ExpCmpNE64
)) {
2496 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
2497 AMD64RMI
* rmi2
= iselIntExpr_RMI(env
, e
->Iex
.Binop
.arg2
);
2498 addInstr(env
, AMD64Instr_Alu64R(Aalu_CMP
,rmi2
,r1
));
2499 switch (e
->Iex
.Binop
.op
) {
2500 case Iop_CmpEQ64
: case Iop_CasCmpEQ64
: return Acc_Z
;
2502 case Iop_CasCmpNE64
: case Iop_ExpCmpNE64
: return Acc_NZ
;
2503 case Iop_CmpLT64S
: return Acc_L
;
2504 case Iop_CmpLT64U
: return Acc_B
;
2505 case Iop_CmpLE64S
: return Acc_LE
;
2506 case Iop_CmpLE64U
: return Acc_BE
;
2507 default: vpanic("iselCondCode(amd64): CmpXX64");
2512 if (e
->tag
== Iex_Binop
2513 && (e
->Iex
.Binop
.op
== Iop_CmpEQ32
2514 || e
->Iex
.Binop
.op
== Iop_CmpNE32
2515 || e
->Iex
.Binop
.op
== Iop_CmpLT32S
2516 || e
->Iex
.Binop
.op
== Iop_CmpLT32U
2517 || e
->Iex
.Binop
.op
== Iop_CmpLE32S
2518 || e
->Iex
.Binop
.op
== Iop_CmpLE32U
2519 || e
->Iex
.Binop
.op
== Iop_CasCmpEQ32
2520 || e
->Iex
.Binop
.op
== Iop_CasCmpNE32
2521 || e
->Iex
.Binop
.op
== Iop_ExpCmpNE32
)) {
2522 HReg r1
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
2523 AMD64RMI
* rmi2
= iselIntExpr_RMI(env
, e
->Iex
.Binop
.arg2
);
2524 addInstr(env
, AMD64Instr_Alu32R(Aalu_CMP
,rmi2
,r1
));
2525 switch (e
->Iex
.Binop
.op
) {
2526 case Iop_CmpEQ32
: case Iop_CasCmpEQ32
: return Acc_Z
;
2528 case Iop_CasCmpNE32
: case Iop_ExpCmpNE32
: return Acc_NZ
;
2529 case Iop_CmpLT32S
: return Acc_L
;
2530 case Iop_CmpLT32U
: return Acc_B
;
2531 case Iop_CmpLE32S
: return Acc_LE
;
2532 case Iop_CmpLE32U
: return Acc_BE
;
2533 default: vpanic("iselCondCode(amd64): CmpXX32");
2537 /* And1(x,y), Or1(x,y) */
2538 /* FIXME: We could (and probably should) do a lot better here. If both args
2539 are in temps already then we can just emit a reg-reg And/Or directly,
2540 followed by the final Test. */
2541 if (e
->tag
== Iex_Binop
2542 && (e
->Iex
.Binop
.op
== Iop_And1
|| e
->Iex
.Binop
.op
== Iop_Or1
)) {
2543 // We could probably be cleverer about this. In the meantime ..
2544 HReg x_as_64
= newVRegI(env
);
2545 AMD64CondCode cc_x
= iselCondCode(env
, e
->Iex
.Binop
.arg1
);
2546 addInstr(env
, AMD64Instr_Set64(cc_x
, x_as_64
));
2547 HReg y_as_64
= newVRegI(env
);
2548 AMD64CondCode cc_y
= iselCondCode(env
, e
->Iex
.Binop
.arg2
);
2549 addInstr(env
, AMD64Instr_Set64(cc_y
, y_as_64
));
2550 AMD64AluOp aop
= e
->Iex
.Binop
.op
== Iop_And1
? Aalu_AND
: Aalu_OR
;
2551 addInstr(env
, AMD64Instr_Alu64R(aop
, AMD64RMI_Reg(x_as_64
), y_as_64
));
2552 addInstr(env
, AMD64Instr_Test64(1, y_as_64
));
2557 vpanic("iselCondCode(amd64)");
2561 /*---------------------------------------------------------*/
2562 /*--- ISEL: Integer expressions (128 bit) ---*/
2563 /*---------------------------------------------------------*/
2565 /* Compute a 128-bit value into a register pair, which is returned as
2566 the first two parameters. As with iselIntExpr_R, these may be
2567 either real or virtual regs; in any case they must not be changed
2568 by subsequent code emitted by the caller. */
2570 static void iselInt128Expr ( HReg
* rHi
, HReg
* rLo
,
2571 ISelEnv
* env
, const IRExpr
* e
)
2573 iselInt128Expr_wrk(rHi
, rLo
, env
, e
);
2575 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
2577 vassert(hregClass(*rHi
) == HRcInt64
);
2578 vassert(hregIsVirtual(*rHi
));
2579 vassert(hregClass(*rLo
) == HRcInt64
);
2580 vassert(hregIsVirtual(*rLo
));
2583 /* DO NOT CALL THIS DIRECTLY ! */
2584 static void iselInt128Expr_wrk ( HReg
* rHi
, HReg
* rLo
,
2585 ISelEnv
* env
, const IRExpr
* e
)
2588 vassert(typeOfIRExpr(env
->type_env
,e
) == Ity_I128
);
2590 /* read 128-bit IRTemp */
2591 if (e
->tag
== Iex_RdTmp
) {
2592 lookupIRTempPair( rHi
, rLo
, env
, e
->Iex
.RdTmp
.tmp
);
2596 /* --------- BINARY ops --------- */
2597 if (e
->tag
== Iex_Binop
) {
2598 switch (e
->Iex
.Binop
.op
) {
2599 /* 64 x 64 -> 128 multiply */
2602 /* get one operand into %rax, and the other into a R/M.
2603 Need to make an educated guess about which is better in
2605 HReg tLo
= newVRegI(env
);
2606 HReg tHi
= newVRegI(env
);
2607 Bool syned
= toBool(e
->Iex
.Binop
.op
== Iop_MullS64
);
2608 AMD64RM
* rmLeft
= iselIntExpr_RM(env
, e
->Iex
.Binop
.arg1
);
2609 HReg rRight
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
2610 addInstr(env
, mk_iMOVsd_RR(rRight
, hregAMD64_RAX()));
2611 addInstr(env
, AMD64Instr_MulL(syned
, rmLeft
));
2612 /* Result is now in RDX:RAX. Tell the caller. */
2613 addInstr(env
, mk_iMOVsd_RR(hregAMD64_RDX(), tHi
));
2614 addInstr(env
, mk_iMOVsd_RR(hregAMD64_RAX(), tLo
));
2620 /* 128 x 64 -> (64(rem),64(div)) division */
2621 case Iop_DivModU128to64
:
2622 case Iop_DivModS128to64
: {
2623 /* Get the 128-bit operand into rdx:rax, and the other into
2626 HReg tLo
= newVRegI(env
);
2627 HReg tHi
= newVRegI(env
);
2628 Bool syned
= toBool(e
->Iex
.Binop
.op
== Iop_DivModS128to64
);
2629 AMD64RM
* rmRight
= iselIntExpr_RM(env
, e
->Iex
.Binop
.arg2
);
2630 iselInt128Expr(&sHi
,&sLo
, env
, e
->Iex
.Binop
.arg1
);
2631 addInstr(env
, mk_iMOVsd_RR(sHi
, hregAMD64_RDX()));
2632 addInstr(env
, mk_iMOVsd_RR(sLo
, hregAMD64_RAX()));
2633 addInstr(env
, AMD64Instr_Div(syned
, 8, rmRight
));
2634 addInstr(env
, mk_iMOVsd_RR(hregAMD64_RDX(), tHi
));
2635 addInstr(env
, mk_iMOVsd_RR(hregAMD64_RAX(), tLo
));
2641 /* 64HLto128(e1,e2) */
2643 *rHi
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg1
);
2644 *rLo
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
2650 } /* if (e->tag == Iex_Binop) */
2653 vpanic("iselInt128Expr");
2657 /*---------------------------------------------------------*/
2658 /*--- ISEL: Floating point expressions (32 bit) ---*/
2659 /*---------------------------------------------------------*/
2661 /* Nothing interesting here; really just wrappers for
2664 static HReg
iselFltExpr ( ISelEnv
* env
, const IRExpr
* e
)
2666 HReg r
= iselFltExpr_wrk( env
, e
);
2668 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
2670 vassert(hregClass(r
) == HRcVec128
);
2671 vassert(hregIsVirtual(r
));
2675 /* DO NOT CALL THIS DIRECTLY */
2676 static HReg
iselFltExpr_wrk ( ISelEnv
* env
, const IRExpr
* e
)
2678 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
2679 vassert(ty
== Ity_F32
);
2681 if (e
->tag
== Iex_RdTmp
) {
2682 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
2685 if (e
->tag
== Iex_Load
&& e
->Iex
.Load
.end
== Iend_LE
) {
2687 HReg res
= newVRegV(env
);
2688 vassert(e
->Iex
.Load
.ty
== Ity_F32
);
2689 am
= iselIntExpr_AMode(env
, e
->Iex
.Load
.addr
);
2690 addInstr(env
, AMD64Instr_SseLdSt(True
/*load*/, 4, res
, am
));
2694 if (e
->tag
== Iex_Binop
2695 && e
->Iex
.Binop
.op
== Iop_F64toF32
) {
2696 /* Although the result is still held in a standard SSE register,
2697 we need to round it to reflect the loss of accuracy/range
2698 entailed in casting it to a 32-bit float. */
2699 HReg dst
= newVRegV(env
);
2700 HReg src
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
2701 set_SSE_rounding_mode( env
, e
->Iex
.Binop
.arg1
);
2702 addInstr(env
, AMD64Instr_SseSDSS(True
/*D->S*/,src
,dst
));
2703 set_SSE_rounding_default( env
);
2707 if (e
->tag
== Iex_Get
) {
2708 AMD64AMode
* am
= AMD64AMode_IR( e
->Iex
.Get
.offset
,
2710 HReg res
= newVRegV(env
);
2711 addInstr(env
, AMD64Instr_SseLdSt( True
/*load*/, 4, res
, am
));
2715 if (e
->tag
== Iex_Unop
2716 && e
->Iex
.Unop
.op
== Iop_ReinterpI32asF32
) {
2717 /* Given an I32, produce an IEEE754 float with the same bit
2719 HReg dst
= newVRegV(env
);
2720 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
2721 AMD64AMode
* m4_rsp
= AMD64AMode_IR(-4, hregAMD64_RSP());
2722 addInstr(env
, AMD64Instr_Store(4, src
, m4_rsp
));
2723 addInstr(env
, AMD64Instr_SseLdSt( True
/*load*/, 4, dst
, m4_rsp
));
2727 if (e
->tag
== Iex_Binop
&& e
->Iex
.Binop
.op
== Iop_RoundF32toInt
) {
2728 AMD64AMode
* m8_rsp
= AMD64AMode_IR(-8, hregAMD64_RSP());
2729 HReg arg
= iselFltExpr(env
, e
->Iex
.Binop
.arg2
);
2730 HReg dst
= newVRegV(env
);
2732 /* rf now holds the value to be rounded. The first thing to do
2733 is set the FPU's rounding mode accordingly. */
2735 /* Set host x87 rounding mode */
2736 set_FPU_rounding_mode( env
, e
->Iex
.Binop
.arg1
);
2738 addInstr(env
, AMD64Instr_SseLdSt(False
/*store*/, 4, arg
, m8_rsp
));
2739 addInstr(env
, AMD64Instr_A87Free(1));
2740 addInstr(env
, AMD64Instr_A87PushPop(m8_rsp
, True
/*push*/, 4));
2741 addInstr(env
, AMD64Instr_A87FpOp(Afp_ROUND
));
2742 addInstr(env
, AMD64Instr_A87PushPop(m8_rsp
, False
/*pop*/, 4));
2743 addInstr(env
, AMD64Instr_SseLdSt(True
/*load*/, 4, dst
, m8_rsp
));
2745 /* Restore default x87 rounding. */
2746 set_FPU_rounding_default( env
);
2751 if (e
->tag
== Iex_Unop
&& e
->Iex
.Unop
.op
== Iop_NegF32
) {
2752 /* Sigh ... very rough code. Could do much better. */
2753 /* Get the 128-bit literal 00---0 10---0 into a register
2754 and xor it with the value to be negated. */
2755 HReg r1
= newVRegI(env
);
2756 HReg dst
= newVRegV(env
);
2757 HReg tmp
= newVRegV(env
);
2758 HReg src
= iselFltExpr(env
, e
->Iex
.Unop
.arg
);
2759 AMD64AMode
* rsp0
= AMD64AMode_IR(0, hregAMD64_RSP());
2760 addInstr(env
, mk_vMOVsd_RR(src
,tmp
));
2761 addInstr(env
, AMD64Instr_Push(AMD64RMI_Imm(0)));
2762 addInstr(env
, AMD64Instr_Imm64( 1ULL<<31, r1
));
2763 addInstr(env
, AMD64Instr_Push(AMD64RMI_Reg(r1
)));
2764 addInstr(env
, AMD64Instr_SseLdSt(True
, 16, dst
, rsp0
));
2765 addInstr(env
, AMD64Instr_SseReRg(Asse_XOR
, tmp
, dst
));
2766 add_to_rsp(env
, 16);
2770 if (e
->tag
== Iex_Qop
&& e
->Iex
.Qop
.details
->op
== Iop_MAddF32
) {
2771 IRQop
*qop
= e
->Iex
.Qop
.details
;
2772 HReg dst
= newVRegV(env
);
2773 HReg argX
= iselFltExpr(env
, qop
->arg2
);
2774 HReg argY
= iselFltExpr(env
, qop
->arg3
);
2775 HReg argZ
= iselFltExpr(env
, qop
->arg4
);
2776 /* XXXROUNDINGFIXME */
2777 /* set roundingmode here */
2778 /* subq $16, %rsp -- make a space*/
2779 sub_from_rsp(env
, 16);
2780 /* Prepare 4 arg regs:
2786 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(0, hregAMD64_RSP()),
2788 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(4, hregAMD64_RSP()),
2790 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(8, hregAMD64_RSP()),
2792 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(12, hregAMD64_RSP()),
2794 /* Store the three args, at (%rsi), (%rdx) and (%rcx):
2795 movss %argX, 0(%rsi)
2796 movss %argY, 0(%rdx)
2797 movss %argZ, 0(%rcx)
2799 addInstr(env
, AMD64Instr_SseLdSt(False
/*!isLoad*/, 4, argX
,
2800 AMD64AMode_IR(0, hregAMD64_RSI())));
2801 addInstr(env
, AMD64Instr_SseLdSt(False
/*!isLoad*/, 4, argY
,
2802 AMD64AMode_IR(0, hregAMD64_RDX())));
2803 addInstr(env
, AMD64Instr_SseLdSt(False
/*!isLoad*/, 4, argZ
,
2804 AMD64AMode_IR(0, hregAMD64_RCX())));
2805 /* call the helper */
2806 addInstr(env
, AMD64Instr_Call( Acc_ALWAYS
,
2807 (ULong
)(HWord
)h_generic_calc_MAddF32
,
2808 4, mk_RetLoc_simple(RLPri_None
) ));
2809 /* fetch the result from memory, using %r_argp, which the
2810 register allocator will keep alive across the call. */
2811 addInstr(env
, AMD64Instr_SseLdSt(True
/*isLoad*/, 4, dst
,
2812 AMD64AMode_IR(0, hregAMD64_RSP())));
2813 /* and finally, clear the space */
2814 add_to_rsp(env
, 16);
2819 vpanic("iselFltExpr_wrk");
2823 /*---------------------------------------------------------*/
2824 /*--- ISEL: Floating point expressions (64 bit) ---*/
2825 /*---------------------------------------------------------*/
2827 /* Compute a 64-bit floating point value into the lower half of an xmm
2828 register, the identity of which is returned. As with
2829 iselIntExpr_R, the returned reg will be virtual, and it must not be
2830 changed by subsequent code emitted by the caller.
2833 /* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
2835 Type S (1 bit) E (11 bits) F (52 bits)
2836 ---- --------- ----------- -----------
2837 signalling NaN u 2047 (max) .0uuuuu---u
2840 quiet NaN u 2047 (max) .1uuuuu---u
2842 negative infinity 1 2047 (max) .000000---0
2844 positive infinity 0 2047 (max) .000000---0
2846 negative zero 1 0 .000000---0
2848 positive zero 0 0 .000000---0
2851 static HReg
iselDblExpr ( ISelEnv
* env
, const IRExpr
* e
)
2853 HReg r
= iselDblExpr_wrk( env
, e
);
2855 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
2857 vassert(hregClass(r
) == HRcVec128
);
2858 vassert(hregIsVirtual(r
));
2862 /* DO NOT CALL THIS DIRECTLY */
2863 static HReg
iselDblExpr_wrk ( ISelEnv
* env
, const IRExpr
* e
)
2865 IRType ty
= typeOfIRExpr(env
->type_env
,e
);
2867 vassert(ty
== Ity_F64
);
2869 if (e
->tag
== Iex_RdTmp
) {
2870 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
2873 if (e
->tag
== Iex_Const
) {
2874 union { ULong u64
; Double f64
; } u
;
2875 HReg res
= newVRegV(env
);
2876 HReg tmp
= newVRegI(env
);
2877 vassert(sizeof(u
) == 8);
2878 vassert(sizeof(u
.u64
) == 8);
2879 vassert(sizeof(u
.f64
) == 8);
2881 if (e
->Iex
.Const
.con
->tag
== Ico_F64
) {
2882 u
.f64
= e
->Iex
.Const
.con
->Ico
.F64
;
2884 else if (e
->Iex
.Const
.con
->tag
== Ico_F64i
) {
2885 u
.u64
= e
->Iex
.Const
.con
->Ico
.F64i
;
2888 vpanic("iselDblExpr(amd64): const");
2890 addInstr(env
, AMD64Instr_Imm64(u
.u64
, tmp
));
2891 addInstr(env
, AMD64Instr_Push(AMD64RMI_Reg(tmp
)));
2892 addInstr(env
, AMD64Instr_SseLdSt(
2893 True
/*load*/, 8, res
,
2894 AMD64AMode_IR(0, hregAMD64_RSP())
2900 if (e
->tag
== Iex_Load
&& e
->Iex
.Load
.end
== Iend_LE
) {
2902 HReg res
= newVRegV(env
);
2903 vassert(e
->Iex
.Load
.ty
== Ity_F64
);
2904 am
= iselIntExpr_AMode(env
, e
->Iex
.Load
.addr
);
2905 addInstr(env
, AMD64Instr_SseLdSt( True
/*load*/, 8, res
, am
));
2909 if (e
->tag
== Iex_Get
) {
2910 AMD64AMode
* am
= AMD64AMode_IR( e
->Iex
.Get
.offset
,
2912 HReg res
= newVRegV(env
);
2913 addInstr(env
, AMD64Instr_SseLdSt( True
/*load*/, 8, res
, am
));
2917 if (e
->tag
== Iex_GetI
) {
2919 = genGuestArrayOffset(
2920 env
, e
->Iex
.GetI
.descr
,
2921 e
->Iex
.GetI
.ix
, e
->Iex
.GetI
.bias
);
2922 HReg res
= newVRegV(env
);
2923 addInstr(env
, AMD64Instr_SseLdSt( True
/*load*/, 8, res
, am
));
2927 if (e
->tag
== Iex_Triop
) {
2928 IRTriop
*triop
= e
->Iex
.Triop
.details
;
2929 AMD64SseOp op
= Asse_INVALID
;
2930 switch (triop
->op
) {
2931 case Iop_AddF64
: op
= Asse_ADDF
; break;
2932 case Iop_SubF64
: op
= Asse_SUBF
; break;
2933 case Iop_MulF64
: op
= Asse_MULF
; break;
2934 case Iop_DivF64
: op
= Asse_DIVF
; break;
2937 if (op
!= Asse_INVALID
) {
2938 HReg dst
= newVRegV(env
);
2939 HReg argL
= iselDblExpr(env
, triop
->arg2
);
2940 HReg argR
= iselDblExpr(env
, triop
->arg3
);
2941 addInstr(env
, mk_vMOVsd_RR(argL
, dst
));
2942 /* XXXROUNDINGFIXME */
2943 /* set roundingmode here */
2944 addInstr(env
, AMD64Instr_Sse64FLo(op
, argR
, dst
));
2949 if (e
->tag
== Iex_Qop
&& e
->Iex
.Qop
.details
->op
== Iop_MAddF64
) {
2950 IRQop
*qop
= e
->Iex
.Qop
.details
;
2951 HReg dst
= newVRegV(env
);
2952 HReg argX
= iselDblExpr(env
, qop
->arg2
);
2953 HReg argY
= iselDblExpr(env
, qop
->arg3
);
2954 HReg argZ
= iselDblExpr(env
, qop
->arg4
);
2955 /* XXXROUNDINGFIXME */
2956 /* set roundingmode here */
2957 /* subq $32, %rsp -- make a space*/
2958 sub_from_rsp(env
, 32);
2959 /* Prepare 4 arg regs:
2965 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(0, hregAMD64_RSP()),
2967 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(8, hregAMD64_RSP()),
2969 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(16, hregAMD64_RSP()),
2971 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(24, hregAMD64_RSP()),
2973 /* Store the three args, at (%rsi), (%rdx) and (%rcx):
2974 movsd %argX, 0(%rsi)
2975 movsd %argY, 0(%rdx)
2976 movsd %argZ, 0(%rcx)
2978 addInstr(env
, AMD64Instr_SseLdSt(False
/*!isLoad*/, 8, argX
,
2979 AMD64AMode_IR(0, hregAMD64_RSI())));
2980 addInstr(env
, AMD64Instr_SseLdSt(False
/*!isLoad*/, 8, argY
,
2981 AMD64AMode_IR(0, hregAMD64_RDX())));
2982 addInstr(env
, AMD64Instr_SseLdSt(False
/*!isLoad*/, 8, argZ
,
2983 AMD64AMode_IR(0, hregAMD64_RCX())));
2984 /* call the helper */
2985 addInstr(env
, AMD64Instr_Call( Acc_ALWAYS
,
2986 (ULong
)(HWord
)h_generic_calc_MAddF64
,
2987 4, mk_RetLoc_simple(RLPri_None
) ));
2988 /* fetch the result from memory, using %r_argp, which the
2989 register allocator will keep alive across the call. */
2990 addInstr(env
, AMD64Instr_SseLdSt(True
/*isLoad*/, 8, dst
,
2991 AMD64AMode_IR(0, hregAMD64_RSP())));
2992 /* and finally, clear the space */
2993 add_to_rsp(env
, 32);
2997 if (e
->tag
== Iex_Binop
&& e
->Iex
.Binop
.op
== Iop_RoundF64toInt
) {
2998 AMD64AMode
* m8_rsp
= AMD64AMode_IR(-8, hregAMD64_RSP());
2999 HReg arg
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
3000 HReg dst
= newVRegV(env
);
3002 /* rf now holds the value to be rounded. The first thing to do
3003 is set the FPU's rounding mode accordingly. */
3005 /* Set host x87 rounding mode */
3006 set_FPU_rounding_mode( env
, e
->Iex
.Binop
.arg1
);
3008 addInstr(env
, AMD64Instr_SseLdSt(False
/*store*/, 8, arg
, m8_rsp
));
3009 addInstr(env
, AMD64Instr_A87Free(1));
3010 addInstr(env
, AMD64Instr_A87PushPop(m8_rsp
, True
/*push*/, 8));
3011 addInstr(env
, AMD64Instr_A87FpOp(Afp_ROUND
));
3012 addInstr(env
, AMD64Instr_A87PushPop(m8_rsp
, False
/*pop*/, 8));
3013 addInstr(env
, AMD64Instr_SseLdSt(True
/*load*/, 8, dst
, m8_rsp
));
3015 /* Restore default x87 rounding. */
3016 set_FPU_rounding_default( env
);
3021 IRTriop
*triop
= e
->Iex
.Triop
.details
;
3022 if (e
->tag
== Iex_Triop
3023 && (triop
->op
== Iop_ScaleF64
3024 || triop
->op
== Iop_AtanF64
3025 || triop
->op
== Iop_Yl2xF64
3026 || triop
->op
== Iop_Yl2xp1F64
3027 || triop
->op
== Iop_PRemF64
3028 || triop
->op
== Iop_PRem1F64
)
3030 AMD64AMode
* m8_rsp
= AMD64AMode_IR(-8, hregAMD64_RSP());
3031 HReg arg1
= iselDblExpr(env
, triop
->arg2
);
3032 HReg arg2
= iselDblExpr(env
, triop
->arg3
);
3033 HReg dst
= newVRegV(env
);
3034 Bool arg2first
= toBool(triop
->op
== Iop_ScaleF64
3035 || triop
->op
== Iop_PRemF64
3036 || triop
->op
== Iop_PRem1F64
);
3037 addInstr(env
, AMD64Instr_A87Free(2));
3039 /* one arg -> top of x87 stack */
3040 addInstr(env
, AMD64Instr_SseLdSt(
3041 False
/*store*/, 8, arg2first
? arg2
: arg1
, m8_rsp
));
3042 addInstr(env
, AMD64Instr_A87PushPop(m8_rsp
, True
/*push*/, 8));
3044 /* other arg -> top of x87 stack */
3045 addInstr(env
, AMD64Instr_SseLdSt(
3046 False
/*store*/, 8, arg2first
? arg1
: arg2
, m8_rsp
));
3047 addInstr(env
, AMD64Instr_A87PushPop(m8_rsp
, True
/*push*/, 8));
3050 /* XXXROUNDINGFIXME */
3051 /* set roundingmode here */
3052 switch (triop
->op
) {
3054 addInstr(env
, AMD64Instr_A87FpOp(Afp_SCALE
));
3057 addInstr(env
, AMD64Instr_A87FpOp(Afp_ATAN
));
3060 addInstr(env
, AMD64Instr_A87FpOp(Afp_YL2X
));
3063 addInstr(env
, AMD64Instr_A87FpOp(Afp_YL2XP1
));
3066 addInstr(env
, AMD64Instr_A87FpOp(Afp_PREM
));
3069 addInstr(env
, AMD64Instr_A87FpOp(Afp_PREM1
));
3076 addInstr(env
, AMD64Instr_A87PushPop(m8_rsp
, False
/*pop*/, 8));
3077 addInstr(env
, AMD64Instr_SseLdSt(True
/*load*/, 8, dst
, m8_rsp
));
3081 if (e
->tag
== Iex_Binop
&& e
->Iex
.Binop
.op
== Iop_I64StoF64
) {
3082 HReg dst
= newVRegV(env
);
3083 HReg src
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
3084 set_SSE_rounding_mode( env
, e
->Iex
.Binop
.arg1
);
3085 addInstr(env
, AMD64Instr_SseSI2SF( 8, 8, src
, dst
));
3086 set_SSE_rounding_default( env
);
3090 if (e
->tag
== Iex_Unop
&& e
->Iex
.Unop
.op
== Iop_I32StoF64
) {
3091 HReg dst
= newVRegV(env
);
3092 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
3093 set_SSE_rounding_default( env
);
3094 addInstr(env
, AMD64Instr_SseSI2SF( 4, 8, src
, dst
));
3098 if (e
->tag
== Iex_Unop
3099 && (e
->Iex
.Unop
.op
== Iop_NegF64
3100 || e
->Iex
.Unop
.op
== Iop_AbsF64
)) {
3101 /* Sigh ... very rough code. Could do much better. */
3102 /* Get the 128-bit literal 00---0 10---0 into a register
3103 and xor/nand it with the value to be negated. */
3104 HReg r1
= newVRegI(env
);
3105 HReg dst
= newVRegV(env
);
3106 HReg tmp
= newVRegV(env
);
3107 HReg src
= iselDblExpr(env
, e
->Iex
.Unop
.arg
);
3108 AMD64AMode
* rsp0
= AMD64AMode_IR(0, hregAMD64_RSP());
3109 addInstr(env
, mk_vMOVsd_RR(src
,tmp
));
3110 addInstr(env
, AMD64Instr_Push(AMD64RMI_Imm(0)));
3111 addInstr(env
, AMD64Instr_Imm64( 1ULL<<63, r1
));
3112 addInstr(env
, AMD64Instr_Push(AMD64RMI_Reg(r1
)));
3113 addInstr(env
, AMD64Instr_SseLdSt(True
, 16, dst
, rsp0
));
3115 if (e
->Iex
.Unop
.op
== Iop_NegF64
)
3116 addInstr(env
, AMD64Instr_SseReRg(Asse_XOR
, tmp
, dst
));
3118 addInstr(env
, AMD64Instr_SseReRg(Asse_ANDN
, tmp
, dst
));
3120 add_to_rsp(env
, 16);
3124 if (e
->tag
== Iex_Binop
) {
3125 A87FpOp fpop
= Afp_INVALID
;
3126 switch (e
->Iex
.Binop
.op
) {
3127 case Iop_SqrtF64
: fpop
= Afp_SQRT
; break;
3128 case Iop_SinF64
: fpop
= Afp_SIN
; break;
3129 case Iop_CosF64
: fpop
= Afp_COS
; break;
3130 case Iop_TanF64
: fpop
= Afp_TAN
; break;
3131 case Iop_2xm1F64
: fpop
= Afp_2XM1
; break;
3134 if (fpop
!= Afp_INVALID
) {
3135 AMD64AMode
* m8_rsp
= AMD64AMode_IR(-8, hregAMD64_RSP());
3136 HReg arg
= iselDblExpr(env
, e
->Iex
.Binop
.arg2
);
3137 HReg dst
= newVRegV(env
);
3138 Int nNeeded
= e
->Iex
.Binop
.op
==Iop_TanF64
? 2 : 1;
3139 addInstr(env
, AMD64Instr_SseLdSt(False
/*store*/, 8, arg
, m8_rsp
));
3140 addInstr(env
, AMD64Instr_A87Free(nNeeded
));
3141 addInstr(env
, AMD64Instr_A87PushPop(m8_rsp
, True
/*push*/, 8));
3142 /* XXXROUNDINGFIXME */
3143 /* set roundingmode here */
3144 /* Note that AMD64Instr_A87FpOp(Afp_TAN) sets the condition
3145 codes. I don't think that matters, since this insn
3146 selector never generates such an instruction intervening
3147 between an flag-setting instruction and a flag-using
3149 addInstr(env
, AMD64Instr_A87FpOp(fpop
));
3150 addInstr(env
, AMD64Instr_A87PushPop(m8_rsp
, False
/*pop*/, 8));
3151 addInstr(env
, AMD64Instr_SseLdSt(True
/*load*/, 8, dst
, m8_rsp
));
3156 if (e
->tag
== Iex_Unop
) {
3157 switch (e
->Iex
.Unop
.op
) {
3158 //.. case Iop_I32toF64: {
3159 //.. HReg dst = newVRegF(env);
3160 //.. HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
3161 //.. addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3162 //.. set_FPU_rounding_default(env);
3163 //.. addInstr(env, X86Instr_FpLdStI(
3164 //.. True/*load*/, 4, dst,
3165 //.. X86AMode_IR(0, hregX86_ESP())));
3166 //.. add_to_esp(env, 4);
3169 case Iop_ReinterpI64asF64
: {
3170 /* Given an I64, produce an IEEE754 double with the same
3172 AMD64AMode
* m8_rsp
= AMD64AMode_IR(-8, hregAMD64_RSP());
3173 HReg dst
= newVRegV(env
);
3174 AMD64RI
* src
= iselIntExpr_RI(env
, e
->Iex
.Unop
.arg
);
3176 set_SSE_rounding_default(env
);
3177 addInstr(env
, AMD64Instr_Alu64M(Aalu_MOV
, src
, m8_rsp
));
3178 addInstr(env
, AMD64Instr_SseLdSt(True
/*load*/, 8, dst
, m8_rsp
));
3181 case Iop_F32toF64
: {
3183 HReg f64
= newVRegV(env
);
3184 /* this shouldn't be necessary, but be paranoid ... */
3185 set_SSE_rounding_default(env
);
3186 f32
= iselFltExpr(env
, e
->Iex
.Unop
.arg
);
3187 addInstr(env
, AMD64Instr_SseSDSS(False
/*S->D*/, f32
, f64
));
3195 /* --------- MULTIPLEX --------- */
3196 if (e
->tag
== Iex_ITE
) { // VFD
3198 vassert(ty
== Ity_F64
);
3199 vassert(typeOfIRExpr(env
->type_env
,e
->Iex
.ITE
.cond
) == Ity_I1
);
3200 r1
= iselDblExpr(env
, e
->Iex
.ITE
.iftrue
);
3201 r0
= iselDblExpr(env
, e
->Iex
.ITE
.iffalse
);
3202 dst
= newVRegV(env
);
3203 addInstr(env
, mk_vMOVsd_RR(r1
,dst
));
3204 AMD64CondCode cc
= iselCondCode(env
, e
->Iex
.ITE
.cond
);
3205 addInstr(env
, AMD64Instr_SseCMov(cc
^ 1, r0
, dst
));
3210 vpanic("iselDblExpr_wrk");
3214 /*---------------------------------------------------------*/
3215 /*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
3216 /*---------------------------------------------------------*/
3218 static HReg
iselVecExpr ( ISelEnv
* env
, const IRExpr
* e
)
3220 HReg r
= iselVecExpr_wrk( env
, e
);
3222 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
3224 vassert(hregClass(r
) == HRcVec128
);
3225 vassert(hregIsVirtual(r
));
3230 /* DO NOT CALL THIS DIRECTLY */
3231 static HReg
iselVecExpr_wrk ( ISelEnv
* env
, const IRExpr
* e
)
3233 HWord fn
= 0; /* address of helper fn, if required */
3234 Bool arg1isEReg
= False
;
3235 AMD64SseOp op
= Asse_INVALID
;
3237 IRType ty
= typeOfIRExpr(env
->type_env
, e
);
3238 vassert(ty
== Ity_V128
);
3241 if (e
->tag
== Iex_RdTmp
) {
3242 return lookupIRTemp(env
, e
->Iex
.RdTmp
.tmp
);
3245 if (e
->tag
== Iex_Get
) {
3246 HReg dst
= newVRegV(env
);
3247 addInstr(env
, AMD64Instr_SseLdSt(
3251 AMD64AMode_IR(e
->Iex
.Get
.offset
, hregAMD64_RBP())
3257 if (e
->tag
== Iex_Load
&& e
->Iex
.Load
.end
== Iend_LE
) {
3258 HReg dst
= newVRegV(env
);
3259 AMD64AMode
* am
= iselIntExpr_AMode(env
, e
->Iex
.Load
.addr
);
3260 addInstr(env
, AMD64Instr_SseLdSt( True
/*load*/, 16, dst
, am
));
3264 if (e
->tag
== Iex_Const
) {
3265 HReg dst
= newVRegV(env
);
3266 vassert(e
->Iex
.Const
.con
->tag
== Ico_V128
);
3267 switch (e
->Iex
.Const
.con
->Ico
.V128
) {
3269 dst
= generate_zeroes_V128(env
);
3272 dst
= generate_ones_V128(env
);
3275 AMD64AMode
* rsp0
= AMD64AMode_IR(0, hregAMD64_RSP());
3276 /* do push_uimm64 twice, first time for the high-order half. */
3277 push_uimm64(env
, bitmask8_to_bytemask64(
3278 (e
->Iex
.Const
.con
->Ico
.V128
>> 8) & 0xFF
3280 push_uimm64(env
, bitmask8_to_bytemask64(
3281 (e
->Iex
.Const
.con
->Ico
.V128
>> 0) & 0xFF
3283 addInstr(env
, AMD64Instr_SseLdSt( True
/*load*/, 16, dst
, rsp0
));
3284 add_to_rsp(env
, 16);
3291 if (e
->tag
== Iex_Unop
) {
3292 switch (e
->Iex
.Unop
.op
) {
3295 HReg arg
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
3296 return do_sse_NotV128(env
, arg
);
3299 case Iop_CmpNEZ64x2
: {
3300 /* We can use SSE2 instructions for this. */
3301 /* Ideally, we want to do a 64Ix2 comparison against zero of
3302 the operand. Problem is no such insn exists. Solution
3303 therefore is to do a 32Ix4 comparison instead, and bitwise-
3304 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
3305 let the not'd result of this initial comparison be a:b:c:d.
3306 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
3307 pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3308 giving the required result.
3310 The required selection sequence is 2,3,0,1, which
3311 according to Intel's documentation means the pshufd
3312 literal value is 0xB1, that is,
3313 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3315 HReg arg
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
3316 HReg tmp
= generate_zeroes_V128(env
);
3317 HReg dst
= newVRegV(env
);
3318 addInstr(env
, AMD64Instr_SseReRg(Asse_CMPEQ32
, arg
, tmp
));
3319 tmp
= do_sse_NotV128(env
, tmp
);
3320 addInstr(env
, AMD64Instr_SseShuf(0xB1, tmp
, dst
));
3321 addInstr(env
, AMD64Instr_SseReRg(Asse_OR
, tmp
, dst
));
3325 case Iop_CmpNEZ32x4
: op
= Asse_CMPEQ32
; goto do_CmpNEZ_vector
;
3326 case Iop_CmpNEZ16x8
: op
= Asse_CMPEQ16
; goto do_CmpNEZ_vector
;
3327 case Iop_CmpNEZ8x16
: op
= Asse_CMPEQ8
; goto do_CmpNEZ_vector
;
3330 HReg arg
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
3331 HReg tmp
= newVRegV(env
);
3332 HReg zero
= generate_zeroes_V128(env
);
3334 addInstr(env
, mk_vMOVsd_RR(arg
, tmp
));
3335 addInstr(env
, AMD64Instr_SseReRg(op
, zero
, tmp
));
3336 dst
= do_sse_NotV128(env
, tmp
);
3340 case Iop_RecipEst32Fx4
: op
= Asse_RCPF
; goto do_32Fx4_unary
;
3341 case Iop_RSqrtEst32Fx4
: op
= Asse_RSQRTF
; goto do_32Fx4_unary
;
3344 HReg arg
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
3345 HReg dst
= newVRegV(env
);
3346 addInstr(env
, AMD64Instr_Sse32Fx4(op
, arg
, dst
));
3350 case Iop_RecipEst32F0x4
: op
= Asse_RCPF
; goto do_32F0x4_unary
;
3351 case Iop_RSqrtEst32F0x4
: op
= Asse_RSQRTF
; goto do_32F0x4_unary
;
3352 case Iop_Sqrt32F0x4
: op
= Asse_SQRTF
; goto do_32F0x4_unary
;
3355 /* A bit subtle. We have to copy the arg to the result
3356 register first, because actually doing the SSE scalar insn
3357 leaves the upper 3/4 of the destination register
3358 unchanged. Whereas the required semantics of these
3359 primops is that the upper 3/4 is simply copied in from the
3361 HReg arg
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
3362 HReg dst
= newVRegV(env
);
3363 addInstr(env
, mk_vMOVsd_RR(arg
, dst
));
3364 addInstr(env
, AMD64Instr_Sse32FLo(op
, arg
, dst
));
3368 case Iop_Sqrt64F0x2
: op
= Asse_SQRTF
; goto do_64F0x2_unary
;
3371 /* A bit subtle. We have to copy the arg to the result
3372 register first, because actually doing the SSE scalar insn
3373 leaves the upper half of the destination register
3374 unchanged. Whereas the required semantics of these
3375 primops is that the upper half is simply copied in from the
3377 HReg arg
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
3378 HReg dst
= newVRegV(env
);
3379 addInstr(env
, mk_vMOVsd_RR(arg
, dst
));
3380 addInstr(env
, AMD64Instr_Sse64FLo(op
, arg
, dst
));
3384 case Iop_32UtoV128
: {
3385 // FIXME maybe just use MOVQ here?
3386 HReg dst
= newVRegV(env
);
3387 AMD64AMode
* rsp_m32
= AMD64AMode_IR(-32, hregAMD64_RSP());
3388 AMD64RI
* ri
= iselIntExpr_RI(env
, e
->Iex
.Unop
.arg
);
3389 addInstr(env
, AMD64Instr_Alu64M(Aalu_MOV
, ri
, rsp_m32
));
3390 addInstr(env
, AMD64Instr_SseLdzLO(4, dst
, rsp_m32
));
3394 case Iop_64UtoV128
: {
3395 // FIXME maybe just use MOVQ here?
3396 HReg dst
= newVRegV(env
);
3397 AMD64AMode
* rsp0
= AMD64AMode_IR(0, hregAMD64_RSP());
3398 AMD64RMI
* rmi
= iselIntExpr_RMI(env
, e
->Iex
.Unop
.arg
);
3399 addInstr(env
, AMD64Instr_Push(rmi
));
3400 addInstr(env
, AMD64Instr_SseLdzLO(8, dst
, rsp0
));
3405 case Iop_V256toV128_0
:
3406 case Iop_V256toV128_1
: {
3408 iselDVecExpr(&vHi
, &vLo
, env
, e
->Iex
.Unop
.arg
);
3409 return (e
->Iex
.Unop
.op
== Iop_V256toV128_1
) ? vHi
: vLo
;
3412 case Iop_F16toF32x4
: {
3413 if (env
->hwcaps
& VEX_HWCAPS_AMD64_F16C
) {
3414 HReg src
= iselIntExpr_R(env
, e
->Iex
.Unop
.arg
);
3415 HReg dst
= newVRegV(env
);
3416 addInstr(env
, AMD64Instr_SseMOVQ(src
, dst
, /*toXMM=*/True
));
3417 addInstr(env
, AMD64Instr_Sse32Fx4(Asse_F16toF32
, dst
, dst
));
3425 } /* switch (e->Iex.Unop.op) */
3426 } /* if (e->tag == Iex_Unop) */
3428 if (e
->tag
== Iex_Binop
) {
3429 switch (e
->Iex
.Binop
.op
) {
3432 case Iop_Sqrt32Fx4
: {
3433 /* :: (rmode, vec) -> vec */
3434 HReg arg
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
3435 HReg dst
= newVRegV(env
);
3436 /* XXXROUNDINGFIXME */
3437 /* set roundingmode here */
3438 addInstr(env
, (e
->Iex
.Binop
.op
== Iop_Sqrt64Fx2
3439 ? AMD64Instr_Sse64Fx2
: AMD64Instr_Sse32Fx4
)
3440 (Asse_SQRTF
, arg
, dst
));
3444 /* FIXME: could we generate MOVQ here? */
3445 case Iop_SetV128lo64
: {
3446 HReg dst
= newVRegV(env
);
3447 HReg srcV
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3448 HReg srcI
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
3449 AMD64AMode
* rsp_m16
= AMD64AMode_IR(-16, hregAMD64_RSP());
3450 addInstr(env
, AMD64Instr_SseLdSt(False
/*store*/, 16, srcV
, rsp_m16
));
3451 addInstr(env
, AMD64Instr_Alu64M(Aalu_MOV
, AMD64RI_Reg(srcI
), rsp_m16
));
3452 addInstr(env
, AMD64Instr_SseLdSt(True
/*load*/, 16, dst
, rsp_m16
));
3456 /* FIXME: could we generate MOVD here? */
3457 case Iop_SetV128lo32
: {
3458 HReg dst
= newVRegV(env
);
3459 HReg srcV
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3460 HReg srcI
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
3461 AMD64AMode
* rsp_m16
= AMD64AMode_IR(-16, hregAMD64_RSP());
3462 addInstr(env
, AMD64Instr_SseLdSt(False
/*store*/, 16, srcV
, rsp_m16
));
3463 addInstr(env
, AMD64Instr_Store(4, srcI
, rsp_m16
));
3464 addInstr(env
, AMD64Instr_SseLdSt(True
/*load*/, 16, dst
, rsp_m16
));
3468 case Iop_64HLtoV128
: {
3469 const IRExpr
* arg1
= e
->Iex
.Binop
.arg1
;
3470 const IRExpr
* arg2
= e
->Iex
.Binop
.arg2
;
3471 HReg dst
= newVRegV(env
);
3472 HReg tmp
= newVRegV(env
);
3473 HReg qHi
= iselIntExpr_R(env
, arg1
);
3474 // If the args are trivially the same (tmp or const), use the same
3475 // source register for both, and only one movq since those are
3476 // (relatively) expensive.
3477 if (areAtomsAndEqual(arg1
, arg2
)) {
3478 addInstr(env
, AMD64Instr_SseMOVQ(qHi
, dst
, True
/*toXMM*/));
3479 addInstr(env
, mk_vMOVsd_RR(dst
, tmp
));
3480 addInstr(env
, AMD64Instr_SseShiftN(Asse_SHL128
, 64, dst
));
3481 addInstr(env
, AMD64Instr_SseReRg(Asse_OR
, tmp
, dst
));
3483 HReg qLo
= iselIntExpr_R(env
, arg2
);
3484 addInstr(env
, AMD64Instr_SseMOVQ(qHi
, dst
, True
/*toXMM*/));
3485 addInstr(env
, AMD64Instr_SseShiftN(Asse_SHL128
, 64, dst
));
3486 addInstr(env
, AMD64Instr_SseMOVQ(qLo
, tmp
, True
/*toXMM*/));
3487 addInstr(env
, AMD64Instr_SseReRg(Asse_OR
, tmp
, dst
));
3492 case Iop_CmpEQ32Fx4
: op
= Asse_CMPEQF
; goto do_32Fx4
;
3493 case Iop_CmpLT32Fx4
: op
= Asse_CMPLTF
; goto do_32Fx4
;
3494 case Iop_CmpLE32Fx4
: op
= Asse_CMPLEF
; goto do_32Fx4
;
3495 case Iop_CmpUN32Fx4
: op
= Asse_CMPUNF
; goto do_32Fx4
;
3496 case Iop_Max32Fx4
: op
= Asse_MAXF
; goto do_32Fx4
;
3497 case Iop_Min32Fx4
: op
= Asse_MINF
; goto do_32Fx4
;
3500 HReg argL
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3501 HReg argR
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
3502 HReg dst
= newVRegV(env
);
3503 addInstr(env
, mk_vMOVsd_RR(argL
, dst
));
3504 addInstr(env
, AMD64Instr_Sse32Fx4(op
, argR
, dst
));
3508 case Iop_CmpEQ64Fx2
: op
= Asse_CMPEQF
; goto do_64Fx2
;
3509 case Iop_CmpLT64Fx2
: op
= Asse_CMPLTF
; goto do_64Fx2
;
3510 case Iop_CmpLE64Fx2
: op
= Asse_CMPLEF
; goto do_64Fx2
;
3511 case Iop_CmpUN64Fx2
: op
= Asse_CMPUNF
; goto do_64Fx2
;
3512 case Iop_Max64Fx2
: op
= Asse_MAXF
; goto do_64Fx2
;
3513 case Iop_Min64Fx2
: op
= Asse_MINF
; goto do_64Fx2
;
3516 HReg argL
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3517 HReg argR
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
3518 HReg dst
= newVRegV(env
);
3519 addInstr(env
, mk_vMOVsd_RR(argL
, dst
));
3520 addInstr(env
, AMD64Instr_Sse64Fx2(op
, argR
, dst
));
3524 case Iop_CmpEQ32F0x4
: op
= Asse_CMPEQF
; goto do_32F0x4
;
3525 case Iop_CmpLT32F0x4
: op
= Asse_CMPLTF
; goto do_32F0x4
;
3526 case Iop_CmpLE32F0x4
: op
= Asse_CMPLEF
; goto do_32F0x4
;
3527 case Iop_CmpUN32F0x4
: op
= Asse_CMPUNF
; goto do_32F0x4
;
3528 case Iop_Add32F0x4
: op
= Asse_ADDF
; goto do_32F0x4
;
3529 case Iop_Div32F0x4
: op
= Asse_DIVF
; goto do_32F0x4
;
3530 case Iop_Max32F0x4
: op
= Asse_MAXF
; goto do_32F0x4
;
3531 case Iop_Min32F0x4
: op
= Asse_MINF
; goto do_32F0x4
;
3532 case Iop_Mul32F0x4
: op
= Asse_MULF
; goto do_32F0x4
;
3533 case Iop_Sub32F0x4
: op
= Asse_SUBF
; goto do_32F0x4
;
3535 HReg argL
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3536 HReg argR
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
3537 HReg dst
= newVRegV(env
);
3538 addInstr(env
, mk_vMOVsd_RR(argL
, dst
));
3539 addInstr(env
, AMD64Instr_Sse32FLo(op
, argR
, dst
));
3543 case Iop_CmpEQ64F0x2
: op
= Asse_CMPEQF
; goto do_64F0x2
;
3544 case Iop_CmpLT64F0x2
: op
= Asse_CMPLTF
; goto do_64F0x2
;
3545 case Iop_CmpLE64F0x2
: op
= Asse_CMPLEF
; goto do_64F0x2
;
3546 case Iop_CmpUN64F0x2
: op
= Asse_CMPUNF
; goto do_64F0x2
;
3547 case Iop_Add64F0x2
: op
= Asse_ADDF
; goto do_64F0x2
;
3548 case Iop_Div64F0x2
: op
= Asse_DIVF
; goto do_64F0x2
;
3549 case Iop_Max64F0x2
: op
= Asse_MAXF
; goto do_64F0x2
;
3550 case Iop_Min64F0x2
: op
= Asse_MINF
; goto do_64F0x2
;
3551 case Iop_Mul64F0x2
: op
= Asse_MULF
; goto do_64F0x2
;
3552 case Iop_Sub64F0x2
: op
= Asse_SUBF
; goto do_64F0x2
;
3554 HReg argL
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3555 HReg argR
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
3556 HReg dst
= newVRegV(env
);
3557 addInstr(env
, mk_vMOVsd_RR(argL
, dst
));
3558 addInstr(env
, AMD64Instr_Sse64FLo(op
, argR
, dst
));
3562 case Iop_PermOrZero8x16
:
3563 if (env
->hwcaps
& VEX_HWCAPS_AMD64_SSSE3
) {
3567 // Otherwise we'll have to generate a call to
3568 // h_generic_calc_PermOrZero8x16 (ATK). But that would only be for a
3569 // host which doesn't have SSSE3, in which case we don't expect this
3570 // IROp to enter the compilation pipeline in the first place.
3573 case Iop_PwExtUSMulQAdd8x16
:
3574 if (env
->hwcaps
& VEX_HWCAPS_AMD64_SSSE3
) {
3575 op
= Asse_PMADDUBSW
;
3580 case Iop_QNarrowBin32Sto16Sx8
:
3581 op
= Asse_PACKSSD
; arg1isEReg
= True
; goto do_SseReRg
;
3582 case Iop_QNarrowBin16Sto8Sx16
:
3583 op
= Asse_PACKSSW
; arg1isEReg
= True
; goto do_SseReRg
;
3584 case Iop_QNarrowBin16Sto8Ux16
:
3585 op
= Asse_PACKUSW
; arg1isEReg
= True
; goto do_SseReRg
;
3587 case Iop_InterleaveHI8x16
:
3588 op
= Asse_UNPCKHB
; arg1isEReg
= True
; goto do_SseReRg
;
3589 case Iop_InterleaveHI16x8
:
3590 op
= Asse_UNPCKHW
; arg1isEReg
= True
; goto do_SseReRg
;
3591 case Iop_InterleaveHI32x4
:
3592 op
= Asse_UNPCKHD
; arg1isEReg
= True
; goto do_SseReRg
;
3593 case Iop_InterleaveHI64x2
:
3594 op
= Asse_UNPCKHQ
; arg1isEReg
= True
; goto do_SseReRg
;
3596 case Iop_InterleaveLO8x16
:
3597 op
= Asse_UNPCKLB
; arg1isEReg
= True
; goto do_SseReRg
;
3598 case Iop_InterleaveLO16x8
:
3599 op
= Asse_UNPCKLW
; arg1isEReg
= True
; goto do_SseReRg
;
3600 case Iop_InterleaveLO32x4
:
3601 op
= Asse_UNPCKLD
; arg1isEReg
= True
; goto do_SseReRg
;
3602 case Iop_InterleaveLO64x2
:
3603 op
= Asse_UNPCKLQ
; arg1isEReg
= True
; goto do_SseReRg
;
3605 case Iop_AndV128
: op
= Asse_AND
; goto do_SseReRg
;
3606 case Iop_OrV128
: op
= Asse_OR
; goto do_SseReRg
;
3607 case Iop_XorV128
: op
= Asse_XOR
; goto do_SseReRg
;
3608 case Iop_Add8x16
: op
= Asse_ADD8
; goto do_SseReRg
;
3609 case Iop_Add16x8
: op
= Asse_ADD16
; goto do_SseReRg
;
3610 case Iop_Add32x4
: op
= Asse_ADD32
; goto do_SseReRg
;
3611 case Iop_Add64x2
: op
= Asse_ADD64
; goto do_SseReRg
;
3612 case Iop_QAdd8Sx16
: op
= Asse_QADD8S
; goto do_SseReRg
;
3613 case Iop_QAdd16Sx8
: op
= Asse_QADD16S
; goto do_SseReRg
;
3614 case Iop_QAdd8Ux16
: op
= Asse_QADD8U
; goto do_SseReRg
;
3615 case Iop_QAdd16Ux8
: op
= Asse_QADD16U
; goto do_SseReRg
;
3616 case Iop_Avg8Ux16
: op
= Asse_AVG8U
; goto do_SseReRg
;
3617 case Iop_Avg16Ux8
: op
= Asse_AVG16U
; goto do_SseReRg
;
3618 case Iop_CmpEQ8x16
: op
= Asse_CMPEQ8
; goto do_SseReRg
;
3619 case Iop_CmpEQ16x8
: op
= Asse_CMPEQ16
; goto do_SseReRg
;
3620 case Iop_CmpEQ32x4
: op
= Asse_CMPEQ32
; goto do_SseReRg
;
3621 case Iop_CmpGT8Sx16
: op
= Asse_CMPGT8S
; goto do_SseReRg
;
3622 case Iop_CmpGT16Sx8
: op
= Asse_CMPGT16S
; goto do_SseReRg
;
3623 case Iop_CmpGT32Sx4
: op
= Asse_CMPGT32S
; goto do_SseReRg
;
3624 case Iop_Max16Sx8
: op
= Asse_MAX16S
; goto do_SseReRg
;
3625 case Iop_Max8Ux16
: op
= Asse_MAX8U
; goto do_SseReRg
;
3626 case Iop_Min16Sx8
: op
= Asse_MIN16S
; goto do_SseReRg
;
3627 case Iop_Min8Ux16
: op
= Asse_MIN8U
; goto do_SseReRg
;
3628 case Iop_MulHi16Ux8
: op
= Asse_MULHI16U
; goto do_SseReRg
;
3629 case Iop_MulHi16Sx8
: op
= Asse_MULHI16S
; goto do_SseReRg
;
3630 case Iop_Mul16x8
: op
= Asse_MUL16
; goto do_SseReRg
;
3631 case Iop_Sub8x16
: op
= Asse_SUB8
; goto do_SseReRg
;
3632 case Iop_Sub16x8
: op
= Asse_SUB16
; goto do_SseReRg
;
3633 case Iop_Sub32x4
: op
= Asse_SUB32
; goto do_SseReRg
;
3634 case Iop_Sub64x2
: op
= Asse_SUB64
; goto do_SseReRg
;
3635 case Iop_QSub8Sx16
: op
= Asse_QSUB8S
; goto do_SseReRg
;
3636 case Iop_QSub16Sx8
: op
= Asse_QSUB16S
; goto do_SseReRg
;
3637 case Iop_QSub8Ux16
: op
= Asse_QSUB8U
; goto do_SseReRg
;
3638 case Iop_QSub16Ux8
: op
= Asse_QSUB16U
; goto do_SseReRg
;
3640 HReg arg1
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3641 HReg arg2
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
3642 HReg dst
= newVRegV(env
);
3644 addInstr(env
, mk_vMOVsd_RR(arg2
, dst
));
3645 addInstr(env
, AMD64Instr_SseReRg(op
, arg1
, dst
));
3647 addInstr(env
, mk_vMOVsd_RR(arg1
, dst
));
3648 addInstr(env
, AMD64Instr_SseReRg(op
, arg2
, dst
));
3653 case Iop_ShlN16x8
: laneBits
= 16; op
= Asse_SHL16
; goto do_SseShift
;
3654 case Iop_ShlN32x4
: laneBits
= 32; op
= Asse_SHL32
; goto do_SseShift
;
3655 case Iop_ShlN64x2
: laneBits
= 64; op
= Asse_SHL64
; goto do_SseShift
;
3656 case Iop_SarN16x8
: laneBits
= 16; op
= Asse_SAR16
; goto do_SseShift
;
3657 case Iop_SarN32x4
: laneBits
= 32; op
= Asse_SAR32
; goto do_SseShift
;
3658 case Iop_ShrN16x8
: laneBits
= 16; op
= Asse_SHR16
; goto do_SseShift
;
3659 case Iop_ShrN32x4
: laneBits
= 32; op
= Asse_SHR32
; goto do_SseShift
;
3660 case Iop_ShrN64x2
: laneBits
= 64; op
= Asse_SHR64
; goto do_SseShift
;
3662 HReg dst
= newVRegV(env
);
3663 HReg greg
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3664 /* If it's a shift by an in-range immediate, generate a single
3666 if (e
->Iex
.Binop
.arg2
->tag
== Iex_Const
) {
3667 IRConst
* c
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
;
3668 vassert(c
->tag
== Ico_U8
);
3669 UInt shift
= c
->Ico
.U8
;
3670 if (shift
< laneBits
) {
3671 addInstr(env
, mk_vMOVsd_RR(greg
, dst
));
3672 addInstr(env
, AMD64Instr_SseShiftN(op
, shift
, dst
));
3676 /* Otherwise we have to do it the longwinded way. */
3677 AMD64RMI
* rmi
= iselIntExpr_RMI(env
, e
->Iex
.Binop
.arg2
);
3678 AMD64AMode
* rsp0
= AMD64AMode_IR(0, hregAMD64_RSP());
3679 HReg ereg
= newVRegV(env
);
3680 addInstr(env
, AMD64Instr_Push(AMD64RMI_Imm(0)));
3681 addInstr(env
, AMD64Instr_Push(rmi
));
3682 addInstr(env
, AMD64Instr_SseLdSt(True
/*load*/, 16, ereg
, rsp0
));
3683 addInstr(env
, mk_vMOVsd_RR(greg
, dst
));
3684 addInstr(env
, AMD64Instr_SseReRg(op
, ereg
, dst
));
3685 add_to_rsp(env
, 16);
3689 case Iop_Mul32x4
: fn
= (HWord
)h_generic_calc_Mul32x4
;
3690 goto do_SseAssistedBinary
;
3691 case Iop_Max32Sx4
: fn
= (HWord
)h_generic_calc_Max32Sx4
;
3692 goto do_SseAssistedBinary
;
3693 case Iop_Min32Sx4
: fn
= (HWord
)h_generic_calc_Min32Sx4
;
3694 goto do_SseAssistedBinary
;
3695 case Iop_Max32Ux4
: fn
= (HWord
)h_generic_calc_Max32Ux4
;
3696 goto do_SseAssistedBinary
;
3697 case Iop_Min32Ux4
: fn
= (HWord
)h_generic_calc_Min32Ux4
;
3698 goto do_SseAssistedBinary
;
3699 case Iop_Max16Ux8
: fn
= (HWord
)h_generic_calc_Max16Ux8
;
3700 goto do_SseAssistedBinary
;
3701 case Iop_Min16Ux8
: fn
= (HWord
)h_generic_calc_Min16Ux8
;
3702 goto do_SseAssistedBinary
;
3703 case Iop_Max8Sx16
: fn
= (HWord
)h_generic_calc_Max8Sx16
;
3704 goto do_SseAssistedBinary
;
3705 case Iop_Min8Sx16
: fn
= (HWord
)h_generic_calc_Min8Sx16
;
3706 goto do_SseAssistedBinary
;
3707 case Iop_CmpEQ64x2
: fn
= (HWord
)h_generic_calc_CmpEQ64x2
;
3708 goto do_SseAssistedBinary
;
3709 case Iop_CmpGT64Sx2
: fn
= (HWord
)h_generic_calc_CmpGT64Sx2
;
3710 goto do_SseAssistedBinary
;
3711 case Iop_Perm32x4
: fn
= (HWord
)h_generic_calc_Perm32x4
;
3712 goto do_SseAssistedBinary
;
3713 case Iop_QNarrowBin32Sto16Ux8
:
3714 fn
= (HWord
)h_generic_calc_QNarrowBin32Sto16Ux8
;
3715 goto do_SseAssistedBinary
;
3716 case Iop_NarrowBin16to8x16
:
3717 fn
= (HWord
)h_generic_calc_NarrowBin16to8x16
;
3718 goto do_SseAssistedBinary
;
3719 case Iop_NarrowBin32to16x8
:
3720 fn
= (HWord
)h_generic_calc_NarrowBin32to16x8
;
3721 goto do_SseAssistedBinary
;
3722 do_SseAssistedBinary
: {
3723 /* RRRufff! RRRufff code is what we're generating here. Oh
3726 HReg dst
= newVRegV(env
);
3727 HReg argL
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3728 HReg argR
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
3729 HReg argp
= newVRegI(env
);
3730 /* subq $112, %rsp -- make a space*/
3731 sub_from_rsp(env
, 112);
3732 /* leaq 48(%rsp), %r_argp -- point into it */
3733 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
3735 /* andq $-16, %r_argp -- 16-align the pointer */
3736 addInstr(env
, AMD64Instr_Alu64R(Aalu_AND
,
3737 AMD64RMI_Imm( ~(UInt
)15 ),
3739 /* Prepare 3 arg regs:
3740 leaq 0(%r_argp), %rdi
3741 leaq 16(%r_argp), %rsi
3742 leaq 32(%r_argp), %rdx
3744 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(0, argp
),
3746 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(16, argp
),
3748 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(32, argp
),
3750 /* Store the two args, at (%rsi) and (%rdx):
3751 movupd %argL, 0(%rsi)
3752 movupd %argR, 0(%rdx)
3754 addInstr(env
, AMD64Instr_SseLdSt(False
/*!isLoad*/, 16, argL
,
3755 AMD64AMode_IR(0, hregAMD64_RSI())));
3756 addInstr(env
, AMD64Instr_SseLdSt(False
/*!isLoad*/, 16, argR
,
3757 AMD64AMode_IR(0, hregAMD64_RDX())));
3758 /* call the helper */
3759 addInstr(env
, AMD64Instr_Call( Acc_ALWAYS
, (ULong
)fn
,
3760 3, mk_RetLoc_simple(RLPri_None
) ));
3761 /* fetch the result from memory, using %r_argp, which the
3762 register allocator will keep alive across the call. */
3763 addInstr(env
, AMD64Instr_SseLdSt(True
/*isLoad*/, 16, dst
,
3764 AMD64AMode_IR(0, argp
)));
3765 /* and finally, clear the space */
3766 add_to_rsp(env
, 112);
3770 case Iop_SarN64x2
: fn
= (HWord
)h_generic_calc_SarN64x2
;
3771 goto do_SseAssistedVectorAndScalar
;
3772 case Iop_SarN8x16
: fn
= (HWord
)h_generic_calc_SarN8x16
;
3773 goto do_SseAssistedVectorAndScalar
;
3774 do_SseAssistedVectorAndScalar
: {
3775 /* RRRufff! RRRufff code is what we're generating here. Oh
3778 HReg dst
= newVRegV(env
);
3779 HReg argL
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
3780 HReg argR
= iselIntExpr_R(env
, e
->Iex
.Binop
.arg2
);
3781 HReg argp
= newVRegI(env
);
3782 /* subq $112, %rsp -- make a space*/
3783 sub_from_rsp(env
, 112);
3784 /* leaq 48(%rsp), %r_argp -- point into it */
3785 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
3787 /* andq $-16, %r_argp -- 16-align the pointer */
3788 addInstr(env
, AMD64Instr_Alu64R(Aalu_AND
,
3789 AMD64RMI_Imm( ~(UInt
)15 ),
3791 /* Prepare 2 vector arg regs:
3792 leaq 0(%r_argp), %rdi
3793 leaq 16(%r_argp), %rsi
3795 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(0, argp
),
3797 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(16, argp
),
3799 /* Store the vector arg, at (%rsi):
3800 movupd %argL, 0(%rsi)
3802 addInstr(env
, AMD64Instr_SseLdSt(False
/*!isLoad*/, 16, argL
,
3803 AMD64AMode_IR(0, hregAMD64_RSI())));
3804 /* And get the scalar value into rdx */
3805 addInstr(env
, mk_iMOVsd_RR(argR
, hregAMD64_RDX()));
3807 /* call the helper */
3808 addInstr(env
, AMD64Instr_Call( Acc_ALWAYS
, (ULong
)fn
,
3809 3, mk_RetLoc_simple(RLPri_None
) ));
3810 /* fetch the result from memory, using %r_argp, which the
3811 register allocator will keep alive across the call. */
3812 addInstr(env
, AMD64Instr_SseLdSt(True
/*isLoad*/, 16, dst
,
3813 AMD64AMode_IR(0, argp
)));
3814 /* and finally, clear the space */
3815 add_to_rsp(env
, 112);
3819 case Iop_I32StoF32x4
:
3820 case Iop_F32toI32Sx4
: {
3821 HReg arg
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
3822 HReg dst
= newVRegV(env
);
3824 = e
->Iex
.Binop
.op
== Iop_I32StoF32x4
? Asse_I2F
: Asse_F2I
;
3825 set_SSE_rounding_mode(env
, e
->Iex
.Binop
.arg1
);
3826 addInstr(env
, AMD64Instr_Sse32Fx4(mop
, arg
, dst
));
3827 set_SSE_rounding_default(env
);
3831 // Half-float vector conversion
3832 case Iop_F32toF16x8
: {
3833 if (env
->hwcaps
& VEX_HWCAPS_AMD64_F16C
) {
3835 iselDVecExpr(&srcHi
, &srcLo
, env
, e
->Iex
.Binop
.arg2
);
3836 HReg dstHi
= newVRegV(env
);
3837 HReg dstLo
= newVRegV(env
);
3838 set_SSE_rounding_mode( env
, e
->Iex
.Binop
.arg1
);
3839 addInstr(env
, AMD64Instr_Sse32Fx4(Asse_F32toF16
, srcHi
, dstHi
));
3840 addInstr(env
, AMD64Instr_Sse32Fx4(Asse_F32toF16
, srcLo
, dstLo
));
3841 set_SSE_rounding_default(env
);
3842 // Now we have the result in dstHi[63:0] and dstLo[63:0], but we
3843 // need to compact all that into one register. There's probably a
3844 // more elegant way to do this, but ..
3845 addInstr(env
, AMD64Instr_SseShiftN(Asse_SHL128
, 64, dstHi
));
3846 // dstHi is now 127:64 = useful data, 63:0 = zero
3847 addInstr(env
, AMD64Instr_SseShiftN(Asse_SHL128
, 64, dstLo
));
3848 addInstr(env
, AMD64Instr_SseShiftN(Asse_SHR128
, 64, dstLo
));
3849 // dstLo is now 127:64 = zero, 63:0 = useful data
3850 addInstr(env
, AMD64Instr_SseReRg(Asse_OR
, dstHi
, dstLo
));
3858 } /* switch (e->Iex.Binop.op) */
3859 } /* if (e->tag == Iex_Binop) */
3861 if (e
->tag
== Iex_Triop
) {
3862 IRTriop
*triop
= e
->Iex
.Triop
.details
;
3863 switch (triop
->op
) {
3865 case Iop_Add64Fx2
: op
= Asse_ADDF
; goto do_64Fx2_w_rm
;
3866 case Iop_Sub64Fx2
: op
= Asse_SUBF
; goto do_64Fx2_w_rm
;
3867 case Iop_Mul64Fx2
: op
= Asse_MULF
; goto do_64Fx2_w_rm
;
3868 case Iop_Div64Fx2
: op
= Asse_DIVF
; goto do_64Fx2_w_rm
;
3871 HReg argL
= iselVecExpr(env
, triop
->arg2
);
3872 HReg argR
= iselVecExpr(env
, triop
->arg3
);
3873 HReg dst
= newVRegV(env
);
3874 addInstr(env
, mk_vMOVsd_RR(argL
, dst
));
3875 /* XXXROUNDINGFIXME */
3876 /* set roundingmode here */
3877 addInstr(env
, AMD64Instr_Sse64Fx2(op
, argR
, dst
));
3881 case Iop_Add32Fx4
: op
= Asse_ADDF
; goto do_32Fx4_w_rm
;
3882 case Iop_Sub32Fx4
: op
= Asse_SUBF
; goto do_32Fx4_w_rm
;
3883 case Iop_Mul32Fx4
: op
= Asse_MULF
; goto do_32Fx4_w_rm
;
3884 case Iop_Div32Fx4
: op
= Asse_DIVF
; goto do_32Fx4_w_rm
;
3887 HReg argL
= iselVecExpr(env
, triop
->arg2
);
3888 HReg argR
= iselVecExpr(env
, triop
->arg3
);
3889 HReg dst
= newVRegV(env
);
3890 addInstr(env
, mk_vMOVsd_RR(argL
, dst
));
3891 /* XXXROUNDINGFIXME */
3892 /* set roundingmode here */
3893 addInstr(env
, AMD64Instr_Sse32Fx4(op
, argR
, dst
));
3899 } /* switch (triop->op) */
3900 } /* if (e->tag == Iex_Triop) */
3902 if (e
->tag
== Iex_ITE
) { // VFD
3903 HReg r1
= iselVecExpr(env
, e
->Iex
.ITE
.iftrue
);
3904 HReg r0
= iselVecExpr(env
, e
->Iex
.ITE
.iffalse
);
3905 HReg dst
= newVRegV(env
);
3906 addInstr(env
, mk_vMOVsd_RR(r1
,dst
));
3907 AMD64CondCode cc
= iselCondCode(env
, e
->Iex
.ITE
.cond
);
3908 addInstr(env
, AMD64Instr_SseCMov(cc
^ 1, r0
, dst
));
3913 vex_printf("iselVecExpr (amd64, subarch = %s): can't reduce\n",
3914 LibVEX_ppVexHwCaps(VexArchAMD64
, env
->hwcaps
));
3916 vpanic("iselVecExpr_wrk");
3920 /*---------------------------------------------------------*/
3921 /*--- ISEL: SIMD (V256) expressions, into 2 XMM regs. --*/
3922 /*---------------------------------------------------------*/
3924 static void iselDVecExpr ( /*OUT*/HReg
* rHi
, /*OUT*/HReg
* rLo
,
3925 ISelEnv
* env
, const IRExpr
* e
)
3927 iselDVecExpr_wrk( rHi
, rLo
, env
, e
);
3929 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
3931 vassert(hregClass(*rHi
) == HRcVec128
);
3932 vassert(hregClass(*rLo
) == HRcVec128
);
3933 vassert(hregIsVirtual(*rHi
));
3934 vassert(hregIsVirtual(*rLo
));
3938 /* DO NOT CALL THIS DIRECTLY */
3939 static void iselDVecExpr_wrk ( /*OUT*/HReg
* rHi
, /*OUT*/HReg
* rLo
,
3940 ISelEnv
* env
, const IRExpr
* e
)
3942 HWord fn
= 0; /* address of helper fn, if required */
3944 IRType ty
= typeOfIRExpr(env
->type_env
, e
);
3945 vassert(ty
== Ity_V256
);
3948 AMD64SseOp op
= Asse_INVALID
;
3950 /* read 256-bit IRTemp */
3951 if (e
->tag
== Iex_RdTmp
) {
3952 lookupIRTempPair( rHi
, rLo
, env
, e
->Iex
.RdTmp
.tmp
);
3956 if (e
->tag
== Iex_Get
) {
3957 HReg vHi
= newVRegV(env
);
3958 HReg vLo
= newVRegV(env
);
3959 HReg rbp
= hregAMD64_RBP();
3960 AMD64AMode
* am0
= AMD64AMode_IR(e
->Iex
.Get
.offset
+ 0, rbp
);
3961 AMD64AMode
* am16
= AMD64AMode_IR(e
->Iex
.Get
.offset
+ 16, rbp
);
3962 addInstr(env
, AMD64Instr_SseLdSt(True
/*load*/, 16, vLo
, am0
));
3963 addInstr(env
, AMD64Instr_SseLdSt(True
/*load*/, 16, vHi
, am16
));
3969 if (e
->tag
== Iex_Load
) {
3970 HReg vHi
= newVRegV(env
);
3971 HReg vLo
= newVRegV(env
);
3972 HReg rA
= iselIntExpr_R(env
, e
->Iex
.Load
.addr
);
3973 AMD64AMode
* am0
= AMD64AMode_IR(0, rA
);
3974 AMD64AMode
* am16
= AMD64AMode_IR(16, rA
);
3975 addInstr(env
, AMD64Instr_SseLdSt(True
/*load*/, 16, vLo
, am0
));
3976 addInstr(env
, AMD64Instr_SseLdSt(True
/*load*/, 16, vHi
, am16
));
3982 if (e
->tag
== Iex_Const
) {
3983 vassert(e
->Iex
.Const
.con
->tag
== Ico_V256
);
3984 switch (e
->Iex
.Const
.con
->Ico
.V256
) {
3986 HReg vHi
= generate_zeroes_V128(env
);
3987 HReg vLo
= newVRegV(env
);
3988 addInstr(env
, mk_vMOVsd_RR(vHi
, vLo
));
3994 break; /* give up. Until such time as is necessary. */
3998 if (e
->tag
== Iex_Unop
) {
3999 switch (e
->Iex
.Unop
.op
) {
4003 iselDVecExpr(&argHi
, &argLo
, env
, e
->Iex
.Unop
.arg
);
4004 *rHi
= do_sse_NotV128(env
, argHi
);
4005 *rLo
= do_sse_NotV128(env
, argLo
);
4009 case Iop_RecipEst32Fx8
: op
= Asse_RCPF
; goto do_32Fx8_unary
;
4010 case Iop_Sqrt32Fx8
: op
= Asse_SQRTF
; goto do_32Fx8_unary
;
4011 case Iop_RSqrtEst32Fx8
: op
= Asse_RSQRTF
; goto do_32Fx8_unary
;
4015 iselDVecExpr(&argHi
, &argLo
, env
, e
->Iex
.Unop
.arg
);
4016 HReg dstHi
= newVRegV(env
);
4017 HReg dstLo
= newVRegV(env
);
4018 addInstr(env
, AMD64Instr_Sse32Fx4(op
, argHi
, dstHi
));
4019 addInstr(env
, AMD64Instr_Sse32Fx4(op
, argLo
, dstLo
));
4025 case Iop_Sqrt64Fx4
: op
= Asse_SQRTF
; goto do_64Fx4_unary
;
4029 iselDVecExpr(&argHi
, &argLo
, env
, e
->Iex
.Unop
.arg
);
4030 HReg dstHi
= newVRegV(env
);
4031 HReg dstLo
= newVRegV(env
);
4032 addInstr(env
, AMD64Instr_Sse64Fx2(op
, argHi
, dstHi
));
4033 addInstr(env
, AMD64Instr_Sse64Fx2(op
, argLo
, dstLo
));
4039 case Iop_CmpNEZ64x4
: {
4040 /* We can use SSE2 instructions for this. */
4041 /* Same scheme as Iop_CmpNEZ64x2, except twice as wide
4042 (obviously). See comment on Iop_CmpNEZ64x2 for
4043 explanation of what's going on here. */
4045 iselDVecExpr(&argHi
, &argLo
, env
, e
->Iex
.Unop
.arg
);
4046 HReg tmpHi
= generate_zeroes_V128(env
);
4047 HReg tmpLo
= newVRegV(env
);
4048 addInstr(env
, mk_vMOVsd_RR(tmpHi
, tmpLo
));
4049 HReg dstHi
= newVRegV(env
);
4050 HReg dstLo
= newVRegV(env
);
4051 addInstr(env
, AMD64Instr_SseReRg(Asse_CMPEQ32
, argHi
, tmpHi
));
4052 addInstr(env
, AMD64Instr_SseReRg(Asse_CMPEQ32
, argLo
, tmpLo
));
4053 tmpHi
= do_sse_NotV128(env
, tmpHi
);
4054 tmpLo
= do_sse_NotV128(env
, tmpLo
);
4055 addInstr(env
, AMD64Instr_SseShuf(0xB1, tmpHi
, dstHi
));
4056 addInstr(env
, AMD64Instr_SseShuf(0xB1, tmpLo
, dstLo
));
4057 addInstr(env
, AMD64Instr_SseReRg(Asse_OR
, tmpHi
, dstHi
));
4058 addInstr(env
, AMD64Instr_SseReRg(Asse_OR
, tmpLo
, dstLo
));
4064 case Iop_CmpNEZ32x8
: op
= Asse_CMPEQ32
; goto do_CmpNEZ_vector
;
4065 case Iop_CmpNEZ16x16
: op
= Asse_CMPEQ16
; goto do_CmpNEZ_vector
;
4066 case Iop_CmpNEZ8x32
: op
= Asse_CMPEQ8
; goto do_CmpNEZ_vector
;
4070 iselDVecExpr(&argHi
, &argLo
, env
, e
->Iex
.Unop
.arg
);
4071 HReg tmpHi
= newVRegV(env
);
4072 HReg tmpLo
= newVRegV(env
);
4073 HReg zero
= generate_zeroes_V128(env
);
4075 addInstr(env
, mk_vMOVsd_RR(argHi
, tmpHi
));
4076 addInstr(env
, mk_vMOVsd_RR(argLo
, tmpLo
));
4077 addInstr(env
, AMD64Instr_SseReRg(op
, zero
, tmpHi
));
4078 addInstr(env
, AMD64Instr_SseReRg(op
, zero
, tmpLo
));
4079 dstHi
= do_sse_NotV128(env
, tmpHi
);
4080 dstLo
= do_sse_NotV128(env
, tmpLo
);
4086 case Iop_F16toF32x8
: {
4087 if (env
->hwcaps
& VEX_HWCAPS_AMD64_F16C
) {
4088 HReg src
= iselVecExpr(env
, e
->Iex
.Unop
.arg
);
4089 HReg srcCopy
= newVRegV(env
);
4090 HReg dstHi
= newVRegV(env
);
4091 HReg dstLo
= newVRegV(env
);
4092 // Copy src, since we'll need to modify it.
4093 addInstr(env
, mk_vMOVsd_RR(src
, srcCopy
));
4094 addInstr(env
, AMD64Instr_Sse32Fx4(Asse_F16toF32
, srcCopy
, dstLo
));
4095 addInstr(env
, AMD64Instr_SseShiftN(Asse_SHR128
, 64, srcCopy
));
4096 addInstr(env
, AMD64Instr_Sse32Fx4(Asse_F16toF32
, srcCopy
, dstHi
));
4106 } /* switch (e->Iex.Unop.op) */
4107 } /* if (e->tag == Iex_Unop) */
4109 if (e
->tag
== Iex_Binop
) {
4110 switch (e
->Iex
.Binop
.op
) {
4112 case Iop_Max64Fx4
: op
= Asse_MAXF
; goto do_64Fx4
;
4113 case Iop_Min64Fx4
: op
= Asse_MINF
; goto do_64Fx4
;
4116 HReg argLhi
, argLlo
, argRhi
, argRlo
;
4117 iselDVecExpr(&argLhi
, &argLlo
, env
, e
->Iex
.Binop
.arg1
);
4118 iselDVecExpr(&argRhi
, &argRlo
, env
, e
->Iex
.Binop
.arg2
);
4119 HReg dstHi
= newVRegV(env
);
4120 HReg dstLo
= newVRegV(env
);
4121 addInstr(env
, mk_vMOVsd_RR(argLhi
, dstHi
));
4122 addInstr(env
, mk_vMOVsd_RR(argLlo
, dstLo
));
4123 addInstr(env
, AMD64Instr_Sse64Fx2(op
, argRhi
, dstHi
));
4124 addInstr(env
, AMD64Instr_Sse64Fx2(op
, argRlo
, dstLo
));
4130 case Iop_Max32Fx8
: op
= Asse_MAXF
; goto do_32Fx8
;
4131 case Iop_Min32Fx8
: op
= Asse_MINF
; goto do_32Fx8
;
4134 HReg argLhi
, argLlo
, argRhi
, argRlo
;
4135 iselDVecExpr(&argLhi
, &argLlo
, env
, e
->Iex
.Binop
.arg1
);
4136 iselDVecExpr(&argRhi
, &argRlo
, env
, e
->Iex
.Binop
.arg2
);
4137 HReg dstHi
= newVRegV(env
);
4138 HReg dstLo
= newVRegV(env
);
4139 addInstr(env
, mk_vMOVsd_RR(argLhi
, dstHi
));
4140 addInstr(env
, mk_vMOVsd_RR(argLlo
, dstLo
));
4141 addInstr(env
, AMD64Instr_Sse32Fx4(op
, argRhi
, dstHi
));
4142 addInstr(env
, AMD64Instr_Sse32Fx4(op
, argRlo
, dstLo
));
4148 case Iop_AndV256
: op
= Asse_AND
; goto do_SseReRg
;
4149 case Iop_OrV256
: op
= Asse_OR
; goto do_SseReRg
;
4150 case Iop_XorV256
: op
= Asse_XOR
; goto do_SseReRg
;
4151 case Iop_Add8x32
: op
= Asse_ADD8
; goto do_SseReRg
;
4152 case Iop_Add16x16
: op
= Asse_ADD16
; goto do_SseReRg
;
4153 case Iop_Add32x8
: op
= Asse_ADD32
; goto do_SseReRg
;
4154 case Iop_Add64x4
: op
= Asse_ADD64
; goto do_SseReRg
;
4155 case Iop_QAdd8Sx32
: op
= Asse_QADD8S
; goto do_SseReRg
;
4156 case Iop_QAdd16Sx16
: op
= Asse_QADD16S
; goto do_SseReRg
;
4157 case Iop_QAdd8Ux32
: op
= Asse_QADD8U
; goto do_SseReRg
;
4158 case Iop_QAdd16Ux16
: op
= Asse_QADD16U
; goto do_SseReRg
;
4159 case Iop_Avg8Ux32
: op
= Asse_AVG8U
; goto do_SseReRg
;
4160 case Iop_Avg16Ux16
: op
= Asse_AVG16U
; goto do_SseReRg
;
4161 case Iop_CmpEQ8x32
: op
= Asse_CMPEQ8
; goto do_SseReRg
;
4162 case Iop_CmpEQ16x16
: op
= Asse_CMPEQ16
; goto do_SseReRg
;
4163 case Iop_CmpEQ32x8
: op
= Asse_CMPEQ32
; goto do_SseReRg
;
4164 case Iop_CmpGT8Sx32
: op
= Asse_CMPGT8S
; goto do_SseReRg
;
4165 case Iop_CmpGT16Sx16
: op
= Asse_CMPGT16S
; goto do_SseReRg
;
4166 case Iop_CmpGT32Sx8
: op
= Asse_CMPGT32S
; goto do_SseReRg
;
4167 case Iop_Max16Sx16
: op
= Asse_MAX16S
; goto do_SseReRg
;
4168 case Iop_Max8Ux32
: op
= Asse_MAX8U
; goto do_SseReRg
;
4169 case Iop_Min16Sx16
: op
= Asse_MIN16S
; goto do_SseReRg
;
4170 case Iop_Min8Ux32
: op
= Asse_MIN8U
; goto do_SseReRg
;
4171 case Iop_MulHi16Ux16
: op
= Asse_MULHI16U
; goto do_SseReRg
;
4172 case Iop_MulHi16Sx16
: op
= Asse_MULHI16S
; goto do_SseReRg
;
4173 case Iop_Mul16x16
: op
= Asse_MUL16
; goto do_SseReRg
;
4174 case Iop_Sub8x32
: op
= Asse_SUB8
; goto do_SseReRg
;
4175 case Iop_Sub16x16
: op
= Asse_SUB16
; goto do_SseReRg
;
4176 case Iop_Sub32x8
: op
= Asse_SUB32
; goto do_SseReRg
;
4177 case Iop_Sub64x4
: op
= Asse_SUB64
; goto do_SseReRg
;
4178 case Iop_QSub8Sx32
: op
= Asse_QSUB8S
; goto do_SseReRg
;
4179 case Iop_QSub16Sx16
: op
= Asse_QSUB16S
; goto do_SseReRg
;
4180 case Iop_QSub8Ux32
: op
= Asse_QSUB8U
; goto do_SseReRg
;
4181 case Iop_QSub16Ux16
: op
= Asse_QSUB16U
; goto do_SseReRg
;
4184 HReg argLhi
, argLlo
, argRhi
, argRlo
;
4185 iselDVecExpr(&argLhi
, &argLlo
, env
, e
->Iex
.Binop
.arg1
);
4186 iselDVecExpr(&argRhi
, &argRlo
, env
, e
->Iex
.Binop
.arg2
);
4187 HReg dstHi
= newVRegV(env
);
4188 HReg dstLo
= newVRegV(env
);
4189 addInstr(env
, mk_vMOVsd_RR(argLhi
, dstHi
));
4190 addInstr(env
, mk_vMOVsd_RR(argLlo
, dstLo
));
4191 addInstr(env
, AMD64Instr_SseReRg(op
, argRhi
, dstHi
));
4192 addInstr(env
, AMD64Instr_SseReRg(op
, argRlo
, dstLo
));
4198 case Iop_ShlN16x16
: laneBits
= 16; op
= Asse_SHL16
; goto do_SseShift
;
4199 case Iop_ShlN32x8
: laneBits
= 32; op
= Asse_SHL32
; goto do_SseShift
;
4200 case Iop_ShlN64x4
: laneBits
= 64; op
= Asse_SHL64
; goto do_SseShift
;
4201 case Iop_SarN16x16
: laneBits
= 16; op
= Asse_SAR16
; goto do_SseShift
;
4202 case Iop_SarN32x8
: laneBits
= 32; op
= Asse_SAR32
; goto do_SseShift
;
4203 case Iop_ShrN16x16
: laneBits
= 16; op
= Asse_SHR16
; goto do_SseShift
;
4204 case Iop_ShrN32x8
: laneBits
= 32; op
= Asse_SHR32
; goto do_SseShift
;
4205 case Iop_ShrN64x4
: laneBits
= 64; op
= Asse_SHR64
; goto do_SseShift
;
4207 HReg dstHi
= newVRegV(env
);
4208 HReg dstLo
= newVRegV(env
);
4209 HReg gregHi
, gregLo
;
4210 iselDVecExpr(&gregHi
, &gregLo
, env
, e
->Iex
.Binop
.arg1
);
4211 /* If it's a shift by an in-range immediate, generate two single
4213 if (e
->Iex
.Binop
.arg2
->tag
== Iex_Const
) {
4214 IRConst
* c
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
;
4215 vassert(c
->tag
== Ico_U8
);
4216 UInt shift
= c
->Ico
.U8
;
4217 if (shift
< laneBits
) {
4218 addInstr(env
, mk_vMOVsd_RR(gregHi
, dstHi
));
4219 addInstr(env
, AMD64Instr_SseShiftN(op
, shift
, dstHi
));
4220 addInstr(env
, mk_vMOVsd_RR(gregLo
, dstLo
));
4221 addInstr(env
, AMD64Instr_SseShiftN(op
, shift
, dstLo
));
4227 /* Otherwise we have to do it the longwinded way. */
4228 AMD64RMI
* rmi
= iselIntExpr_RMI(env
, e
->Iex
.Binop
.arg2
);
4229 AMD64AMode
* rsp0
= AMD64AMode_IR(0, hregAMD64_RSP());
4230 HReg ereg
= newVRegV(env
);
4231 addInstr(env
, AMD64Instr_Push(AMD64RMI_Imm(0)));
4232 addInstr(env
, AMD64Instr_Push(rmi
));
4233 addInstr(env
, AMD64Instr_SseLdSt(True
/*load*/, 16, ereg
, rsp0
));
4234 addInstr(env
, mk_vMOVsd_RR(gregHi
, dstHi
));
4235 addInstr(env
, AMD64Instr_SseReRg(op
, ereg
, dstHi
));
4236 addInstr(env
, mk_vMOVsd_RR(gregLo
, dstLo
));
4237 addInstr(env
, AMD64Instr_SseReRg(op
, ereg
, dstLo
));
4238 add_to_rsp(env
, 16);
4244 case Iop_V128HLtoV256
: {
4245 // Curiously, there doesn't seem to be any benefit to be had here by
4246 // checking whether arg1 and arg2 are the same, in the style of how
4247 // (eg) 64HLtoV128 is handled elsewhere in this file.
4248 *rHi
= iselVecExpr(env
, e
->Iex
.Binop
.arg1
);
4249 *rLo
= iselVecExpr(env
, e
->Iex
.Binop
.arg2
);
4253 case Iop_Mul32x8
: fn
= (HWord
)h_generic_calc_Mul32x4
;
4254 goto do_SseAssistedBinary
;
4255 case Iop_Max32Sx8
: fn
= (HWord
)h_generic_calc_Max32Sx4
;
4256 goto do_SseAssistedBinary
;
4257 case Iop_Min32Sx8
: fn
= (HWord
)h_generic_calc_Min32Sx4
;
4258 goto do_SseAssistedBinary
;
4259 case Iop_Max32Ux8
: fn
= (HWord
)h_generic_calc_Max32Ux4
;
4260 goto do_SseAssistedBinary
;
4261 case Iop_Min32Ux8
: fn
= (HWord
)h_generic_calc_Min32Ux4
;
4262 goto do_SseAssistedBinary
;
4263 case Iop_Max16Ux16
: fn
= (HWord
)h_generic_calc_Max16Ux8
;
4264 goto do_SseAssistedBinary
;
4265 case Iop_Min16Ux16
: fn
= (HWord
)h_generic_calc_Min16Ux8
;
4266 goto do_SseAssistedBinary
;
4267 case Iop_Max8Sx32
: fn
= (HWord
)h_generic_calc_Max8Sx16
;
4268 goto do_SseAssistedBinary
;
4269 case Iop_Min8Sx32
: fn
= (HWord
)h_generic_calc_Min8Sx16
;
4270 goto do_SseAssistedBinary
;
4271 case Iop_CmpEQ64x4
: fn
= (HWord
)h_generic_calc_CmpEQ64x2
;
4272 goto do_SseAssistedBinary
;
4273 case Iop_CmpGT64Sx4
: fn
= (HWord
)h_generic_calc_CmpGT64Sx2
;
4274 goto do_SseAssistedBinary
;
4275 do_SseAssistedBinary
: {
4276 /* RRRufff! RRRufff code is what we're generating here. Oh
4279 HReg dstHi
= newVRegV(env
);
4280 HReg dstLo
= newVRegV(env
);
4281 HReg argLhi
, argLlo
, argRhi
, argRlo
;
4282 iselDVecExpr(&argLhi
, &argLlo
, env
, e
->Iex
.Binop
.arg1
);
4283 iselDVecExpr(&argRhi
, &argRlo
, env
, e
->Iex
.Binop
.arg2
);
4284 HReg argp
= newVRegI(env
);
4285 /* subq $160, %rsp -- make a space*/
4286 sub_from_rsp(env
, 160);
4287 /* leaq 48(%rsp), %r_argp -- point into it */
4288 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
4290 /* andq $-16, %r_argp -- 16-align the pointer */
4291 addInstr(env
, AMD64Instr_Alu64R(Aalu_AND
,
4292 AMD64RMI_Imm( ~(UInt
)15 ),
4294 /* Prepare 3 arg regs:
4295 leaq 0(%r_argp), %rdi
4296 leaq 16(%r_argp), %rsi
4297 leaq 32(%r_argp), %rdx
4299 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(0, argp
),
4301 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(16, argp
),
4303 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(32, argp
),
4305 /* Store the two high args, at (%rsi) and (%rdx):
4306 movupd %argLhi, 0(%rsi)
4307 movupd %argRhi, 0(%rdx)
4309 addInstr(env
, AMD64Instr_SseLdSt(False
/*!isLoad*/, 16, argLhi
,
4310 AMD64AMode_IR(0, hregAMD64_RSI())));
4311 addInstr(env
, AMD64Instr_SseLdSt(False
/*!isLoad*/, 16, argRhi
,
4312 AMD64AMode_IR(0, hregAMD64_RDX())));
4313 /* Store the two low args, at 48(%rsi) and 48(%rdx):
4314 movupd %argLlo, 48(%rsi)
4315 movupd %argRlo, 48(%rdx)
4317 addInstr(env
, AMD64Instr_SseLdSt(False
/*!isLoad*/, 16, argLlo
,
4318 AMD64AMode_IR(48, hregAMD64_RSI())));
4319 addInstr(env
, AMD64Instr_SseLdSt(False
/*!isLoad*/, 16, argRlo
,
4320 AMD64AMode_IR(48, hregAMD64_RDX())));
4321 /* call the helper */
4322 addInstr(env
, AMD64Instr_Call( Acc_ALWAYS
, (ULong
)fn
, 3,
4323 mk_RetLoc_simple(RLPri_None
) ));
4324 /* Prepare 3 arg regs:
4325 leaq 48(%r_argp), %rdi
4326 leaq 64(%r_argp), %rsi
4327 leaq 80(%r_argp), %rdx
4329 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(48, argp
),
4331 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(64, argp
),
4333 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(80, argp
),
4335 /* call the helper */
4336 addInstr(env
, AMD64Instr_Call( Acc_ALWAYS
, (ULong
)fn
, 3,
4337 mk_RetLoc_simple(RLPri_None
) ));
4338 /* fetch the result from memory, using %r_argp, which the
4339 register allocator will keep alive across the call. */
4340 addInstr(env
, AMD64Instr_SseLdSt(True
/*isLoad*/, 16, dstHi
,
4341 AMD64AMode_IR(0, argp
)));
4342 addInstr(env
, AMD64Instr_SseLdSt(True
/*isLoad*/, 16, dstLo
,
4343 AMD64AMode_IR(48, argp
)));
4344 /* and finally, clear the space */
4345 add_to_rsp(env
, 160);
4351 case Iop_Perm32x8
: fn
= (HWord
)h_generic_calc_Perm32x8
;
4352 goto do_SseAssistedBinary256
;
4353 do_SseAssistedBinary256
: {
4354 /* RRRufff! RRRufff code is what we're generating here. Oh
4357 HReg dstHi
= newVRegV(env
);
4358 HReg dstLo
= newVRegV(env
);
4359 HReg argLhi
, argLlo
, argRhi
, argRlo
;
4360 iselDVecExpr(&argLhi
, &argLlo
, env
, e
->Iex
.Binop
.arg1
);
4361 iselDVecExpr(&argRhi
, &argRlo
, env
, e
->Iex
.Binop
.arg2
);
4362 HReg argp
= newVRegI(env
);
4363 /* subq $160, %rsp -- make a space*/
4364 sub_from_rsp(env
, 160);
4365 /* leaq 48(%rsp), %r_argp -- point into it */
4366 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
4368 /* andq $-16, %r_argp -- 16-align the pointer */
4369 addInstr(env
, AMD64Instr_Alu64R(Aalu_AND
,
4370 AMD64RMI_Imm( ~(UInt
)15 ),
4372 /* Prepare 3 arg regs:
4373 leaq 0(%r_argp), %rdi
4374 leaq 32(%r_argp), %rsi
4375 leaq 64(%r_argp), %rdx
4377 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(0, argp
),
4379 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(32, argp
),
4381 addInstr(env
, AMD64Instr_Lea64(AMD64AMode_IR(64, argp
),
4383 /* Store the two args, at (%rsi) and (%rdx):
4384 movupd %argLlo, 0(%rsi)
4385 movupd %argLhi, 16(%rsi)
4386 movupd %argRlo, 0(%rdx)
4387 movupd %argRhi, 16(%rdx)
4389 addInstr(env
, AMD64Instr_SseLdSt(False
/*!isLoad*/, 16, argLlo
,
4390 AMD64AMode_IR(0, hregAMD64_RSI())));
4391 addInstr(env
, AMD64Instr_SseLdSt(False
/*!isLoad*/, 16, argLhi
,
4392 AMD64AMode_IR(16, hregAMD64_RSI())));
4393 addInstr(env
, AMD64Instr_SseLdSt(False
/*!isLoad*/, 16, argRlo
,
4394 AMD64AMode_IR(0, hregAMD64_RDX())));
4395 addInstr(env
, AMD64Instr_SseLdSt(False
/*!isLoad*/, 16, argRhi
,
4396 AMD64AMode_IR(16, hregAMD64_RDX())));
4397 /* call the helper */
4398 addInstr(env
, AMD64Instr_Call( Acc_ALWAYS
, (ULong
)fn
, 3,
4399 mk_RetLoc_simple(RLPri_None
) ));
4400 /* fetch the result from memory, using %r_argp, which the
4401 register allocator will keep alive across the call. */
4402 addInstr(env
, AMD64Instr_SseLdSt(True
/*isLoad*/, 16, dstLo
,
4403 AMD64AMode_IR(0, argp
)));
4404 addInstr(env
, AMD64Instr_SseLdSt(True
/*isLoad*/, 16, dstHi
,
4405 AMD64AMode_IR(16, argp
)));
4406 /* and finally, clear the space */
4407 add_to_rsp(env
, 160);
4413 case Iop_I32StoF32x8
:
4414 case Iop_F32toI32Sx8
: {
4416 iselDVecExpr(&argHi
, &argLo
, env
, e
->Iex
.Binop
.arg2
);
4417 HReg dstHi
= newVRegV(env
);
4418 HReg dstLo
= newVRegV(env
);
4420 = e
->Iex
.Binop
.op
== Iop_I32StoF32x8
? Asse_I2F
: Asse_F2I
;
4421 set_SSE_rounding_mode(env
, e
->Iex
.Binop
.arg1
);
4422 addInstr(env
, AMD64Instr_Sse32Fx4(mop
, argHi
, dstHi
));
4423 addInstr(env
, AMD64Instr_Sse32Fx4(mop
, argLo
, dstLo
));
4424 set_SSE_rounding_default(env
);
4432 } /* switch (e->Iex.Binop.op) */
4433 } /* if (e->tag == Iex_Binop) */
4435 if (e
->tag
== Iex_Triop
) {
4436 IRTriop
*triop
= e
->Iex
.Triop
.details
;
4437 switch (triop
->op
) {
4439 case Iop_Add64Fx4
: op
= Asse_ADDF
; goto do_64Fx4_w_rm
;
4440 case Iop_Sub64Fx4
: op
= Asse_SUBF
; goto do_64Fx4_w_rm
;
4441 case Iop_Mul64Fx4
: op
= Asse_MULF
; goto do_64Fx4_w_rm
;
4442 case Iop_Div64Fx4
: op
= Asse_DIVF
; goto do_64Fx4_w_rm
;
4445 HReg argLhi
, argLlo
, argRhi
, argRlo
;
4446 iselDVecExpr(&argLhi
, &argLlo
, env
, triop
->arg2
);
4447 iselDVecExpr(&argRhi
, &argRlo
, env
, triop
->arg3
);
4448 HReg dstHi
= newVRegV(env
);
4449 HReg dstLo
= newVRegV(env
);
4450 addInstr(env
, mk_vMOVsd_RR(argLhi
, dstHi
));
4451 addInstr(env
, mk_vMOVsd_RR(argLlo
, dstLo
));
4452 /* XXXROUNDINGFIXME */
4453 /* set roundingmode here */
4454 addInstr(env
, AMD64Instr_Sse64Fx2(op
, argRhi
, dstHi
));
4455 addInstr(env
, AMD64Instr_Sse64Fx2(op
, argRlo
, dstLo
));
4461 case Iop_Add32Fx8
: op
= Asse_ADDF
; goto do_32Fx8_w_rm
;
4462 case Iop_Sub32Fx8
: op
= Asse_SUBF
; goto do_32Fx8_w_rm
;
4463 case Iop_Mul32Fx8
: op
= Asse_MULF
; goto do_32Fx8_w_rm
;
4464 case Iop_Div32Fx8
: op
= Asse_DIVF
; goto do_32Fx8_w_rm
;
4467 HReg argLhi
, argLlo
, argRhi
, argRlo
;
4468 iselDVecExpr(&argLhi
, &argLlo
, env
, triop
->arg2
);
4469 iselDVecExpr(&argRhi
, &argRlo
, env
, triop
->arg3
);
4470 HReg dstHi
= newVRegV(env
);
4471 HReg dstLo
= newVRegV(env
);
4472 addInstr(env
, mk_vMOVsd_RR(argLhi
, dstHi
));
4473 addInstr(env
, mk_vMOVsd_RR(argLlo
, dstLo
));
4474 /* XXXROUNDINGFIXME */
4475 /* set roundingmode here */
4476 addInstr(env
, AMD64Instr_Sse32Fx4(op
, argRhi
, dstHi
));
4477 addInstr(env
, AMD64Instr_Sse32Fx4(op
, argRlo
, dstLo
));
4485 } /* switch (triop->op) */
4486 } /* if (e->tag == Iex_Triop) */
4489 if (e
->tag
== Iex_Qop
&& e
->Iex
.Qop
.details
->op
== Iop_64x4toV256
) {
4490 const IRExpr
* arg1
= e
->Iex
.Qop
.details
->arg1
;
4491 const IRExpr
* arg2
= e
->Iex
.Qop
.details
->arg2
;
4492 const IRExpr
* arg3
= e
->Iex
.Qop
.details
->arg3
;
4493 const IRExpr
* arg4
= e
->Iex
.Qop
.details
->arg4
;
4494 // If the args are trivially the same (tmp or const), use the same
4495 // source register for all four, and only one movq since those are
4496 // (relatively) expensive.
4497 if (areAtomsAndEqual(arg1
, arg2
)
4498 && areAtomsAndEqual(arg1
, arg3
) && areAtomsAndEqual(arg1
, arg4
)) {
4499 HReg q3
= iselIntExpr_R(env
, e
->Iex
.Qop
.details
->arg1
);
4500 HReg tmp
= newVRegV(env
);
4501 HReg dst
= newVRegV(env
);
4502 addInstr(env
, AMD64Instr_SseMOVQ(q3
, dst
, True
/*toXMM*/));
4503 addInstr(env
, mk_vMOVsd_RR(dst
, tmp
));
4504 addInstr(env
, AMD64Instr_SseShiftN(Asse_SHL128
, 64, dst
));
4505 addInstr(env
, AMD64Instr_SseReRg(Asse_OR
, tmp
, dst
));
4509 /* arg1 is the most significant (Q3), arg4 the least (Q0) */
4510 HReg q3
= iselIntExpr_R(env
, arg1
);
4511 HReg q2
= iselIntExpr_R(env
, arg2
);
4512 HReg q1
= iselIntExpr_R(env
, arg3
);
4513 HReg q0
= iselIntExpr_R(env
, arg4
);
4514 HReg tmp
= newVRegV(env
);
4515 HReg dstHi
= newVRegV(env
);
4516 HReg dstLo
= newVRegV(env
);
4517 addInstr(env
, AMD64Instr_SseMOVQ(q3
, dstHi
, True
/*toXMM*/));
4518 addInstr(env
, AMD64Instr_SseShiftN(Asse_SHL128
, 64, dstHi
));
4519 addInstr(env
, AMD64Instr_SseMOVQ(q2
, tmp
, True
/*toXMM*/));
4520 addInstr(env
, AMD64Instr_SseReRg(Asse_OR
, tmp
, dstHi
));
4521 addInstr(env
, AMD64Instr_SseMOVQ(q1
, dstLo
, True
/*toXMM*/));
4522 addInstr(env
, AMD64Instr_SseShiftN(Asse_SHL128
, 64, dstLo
));
4523 addInstr(env
, AMD64Instr_SseMOVQ(q0
, tmp
, True
/*toXMM*/));
4524 addInstr(env
, AMD64Instr_SseReRg(Asse_OR
, tmp
, dstLo
));
4531 if (e
->tag
== Iex_ITE
) {
4532 HReg r1Hi
, r1Lo
, r0Hi
, r0Lo
;
4533 iselDVecExpr(&r1Hi
, &r1Lo
, env
, e
->Iex
.ITE
.iftrue
);
4534 iselDVecExpr(&r0Hi
, &r0Lo
, env
, e
->Iex
.ITE
.iffalse
);
4535 HReg dstHi
= newVRegV(env
);
4536 HReg dstLo
= newVRegV(env
);
4537 addInstr(env
, mk_vMOVsd_RR(r1Hi
,dstHi
));
4538 addInstr(env
, mk_vMOVsd_RR(r1Lo
,dstLo
));
4539 AMD64CondCode cc
= iselCondCode(env
, e
->Iex
.ITE
.cond
);
4540 addInstr(env
, AMD64Instr_SseCMov(cc
^ 1, r0Hi
, dstHi
));
4541 addInstr(env
, AMD64Instr_SseCMov(cc
^ 1, r0Lo
, dstLo
));
4548 vex_printf("iselDVecExpr (amd64, subarch = %s): can't reduce\n",
4549 LibVEX_ppVexHwCaps(VexArchAMD64
, env
->hwcaps
));
4551 vpanic("iselDVecExpr_wrk");
4555 /*---------------------------------------------------------*/
4556 /*--- ISEL: Statements ---*/
4557 /*---------------------------------------------------------*/
4559 static void iselStmt ( ISelEnv
* env
, IRStmt
* stmt
)
4561 if (vex_traceflags
& VEX_TRACE_VCODE
) {
4562 vex_printf("\n-- ");
4567 switch (stmt
->tag
) {
4569 /* --------- LOADG (guarded load) --------- */
4571 IRLoadG
* lg
= stmt
->Ist
.LoadG
.details
;
4572 if (lg
->end
!= Iend_LE
)
4575 UChar szB
= 0; /* invalid */
4577 case ILGop_Ident32
: szB
= 4; break;
4578 case ILGop_Ident64
: szB
= 8; break;
4579 case ILGop_IdentV128
: szB
= 16; break;
4586 = iselIntExpr_AMode(env
, lg
->addr
);
4588 = szB
== 16 ? iselVecExpr(env
, lg
->alt
)
4589 : iselIntExpr_R(env
, lg
->alt
);
4591 = lookupIRTemp(env
, lg
->dst
);
4593 /* Get the alt value into the dst. We'll do a conditional load
4594 which overwrites it -- or not -- with loaded data. */
4596 addInstr(env
, mk_vMOVsd_RR(rAlt
, rDst
));
4598 addInstr(env
, mk_iMOVsd_RR(rAlt
, rDst
));
4600 AMD64CondCode cc
= iselCondCode(env
, lg
->guard
);
4602 addInstr(env
, AMD64Instr_SseCLoad(cc
, amAddr
, rDst
));
4604 addInstr(env
, AMD64Instr_CLoad(cc
, szB
, amAddr
, rDst
));
4609 /* --------- STOREG (guarded store) --------- */
4611 IRStoreG
* sg
= stmt
->Ist
.StoreG
.details
;
4612 if (sg
->end
!= Iend_LE
)
4615 UChar szB
= 0; /* invalid */
4616 switch (typeOfIRExpr(env
->type_env
, sg
->data
)) {
4617 case Ity_I32
: szB
= 4; break;
4618 case Ity_I64
: szB
= 8; break;
4619 case Ity_V128
: szB
= 16; break;
4626 = iselIntExpr_AMode(env
, sg
->addr
);
4628 = szB
== 16 ? iselVecExpr(env
, sg
->data
)
4629 : iselIntExpr_R(env
, sg
->data
);
4631 = iselCondCode(env
, sg
->guard
);
4633 addInstr(env
, AMD64Instr_SseCStore(cc
, rSrc
, amAddr
));
4635 addInstr(env
, AMD64Instr_CStore(cc
, szB
, rSrc
, amAddr
));
4640 /* --------- STORE --------- */
4642 IRType tya
= typeOfIRExpr(env
->type_env
, stmt
->Ist
.Store
.addr
);
4643 IRType tyd
= typeOfIRExpr(env
->type_env
, stmt
->Ist
.Store
.data
);
4644 IREndness end
= stmt
->Ist
.Store
.end
;
4646 if (tya
!= Ity_I64
|| end
!= Iend_LE
)
4649 if (tyd
== Ity_I64
) {
4650 AMD64AMode
* am
= iselIntExpr_AMode(env
, stmt
->Ist
.Store
.addr
);
4651 AMD64RI
* ri
= iselIntExpr_RI(env
, stmt
->Ist
.Store
.data
);
4652 addInstr(env
, AMD64Instr_Alu64M(Aalu_MOV
,ri
,am
));
4655 if (tyd
== Ity_I8
|| tyd
== Ity_I16
|| tyd
== Ity_I32
) {
4656 AMD64AMode
* am
= iselIntExpr_AMode(env
, stmt
->Ist
.Store
.addr
);
4657 HReg r
= iselIntExpr_R(env
, stmt
->Ist
.Store
.data
);
4658 addInstr(env
, AMD64Instr_Store(
4659 toUChar(tyd
==Ity_I8
? 1 : (tyd
==Ity_I16
? 2 : 4)),
4663 if (tyd
== Ity_F64
) {
4664 AMD64AMode
* am
= iselIntExpr_AMode(env
, stmt
->Ist
.Store
.addr
);
4665 HReg r
= iselDblExpr(env
, stmt
->Ist
.Store
.data
);
4666 addInstr(env
, AMD64Instr_SseLdSt(False
/*store*/, 8, r
, am
));
4669 if (tyd
== Ity_F32
) {
4670 AMD64AMode
* am
= iselIntExpr_AMode(env
, stmt
->Ist
.Store
.addr
);
4671 HReg r
= iselFltExpr(env
, stmt
->Ist
.Store
.data
);
4672 addInstr(env
, AMD64Instr_SseLdSt(False
/*store*/, 4, r
, am
));
4675 if (tyd
== Ity_V128
) {
4676 AMD64AMode
* am
= iselIntExpr_AMode(env
, stmt
->Ist
.Store
.addr
);
4677 HReg r
= iselVecExpr(env
, stmt
->Ist
.Store
.data
);
4678 addInstr(env
, AMD64Instr_SseLdSt(False
/*store*/, 16, r
, am
));
4681 if (tyd
== Ity_V256
) {
4682 HReg rA
= iselIntExpr_R(env
, stmt
->Ist
.Store
.addr
);
4683 AMD64AMode
* am0
= AMD64AMode_IR(0, rA
);
4684 AMD64AMode
* am16
= AMD64AMode_IR(16, rA
);
4686 iselDVecExpr(&vHi
, &vLo
, env
, stmt
->Ist
.Store
.data
);
4687 addInstr(env
, AMD64Instr_SseLdSt(False
/*store*/, 16, vLo
, am0
));
4688 addInstr(env
, AMD64Instr_SseLdSt(False
/*store*/, 16, vHi
, am16
));
4694 /* --------- PUT --------- */
4696 IRType ty
= typeOfIRExpr(env
->type_env
, stmt
->Ist
.Put
.data
);
4697 if (ty
== Ity_I64
) {
4698 /* We're going to write to memory, so compute the RHS into an
4700 AMD64RI
* ri
= iselIntExpr_RI(env
, stmt
->Ist
.Put
.data
);
4705 AMD64AMode_IR(stmt
->Ist
.Put
.offset
,
4710 if (ty
== Ity_I8
|| ty
== Ity_I16
|| ty
== Ity_I32
) {
4711 HReg r
= iselIntExpr_R(env
, stmt
->Ist
.Put
.data
);
4712 addInstr(env
, AMD64Instr_Store(
4713 toUChar(ty
==Ity_I8
? 1 : (ty
==Ity_I16
? 2 : 4)),
4715 AMD64AMode_IR(stmt
->Ist
.Put
.offset
,
4719 if (ty
== Ity_F32
) {
4720 HReg f32
= iselFltExpr(env
, stmt
->Ist
.Put
.data
);
4721 AMD64AMode
* am
= AMD64AMode_IR(stmt
->Ist
.Put
.offset
, hregAMD64_RBP());
4722 set_SSE_rounding_default(env
); /* paranoia */
4723 addInstr(env
, AMD64Instr_SseLdSt( False
/*store*/, 4, f32
, am
));
4726 if (ty
== Ity_F64
) {
4727 HReg f64
= iselDblExpr(env
, stmt
->Ist
.Put
.data
);
4728 AMD64AMode
* am
= AMD64AMode_IR( stmt
->Ist
.Put
.offset
,
4730 addInstr(env
, AMD64Instr_SseLdSt( False
/*store*/, 8, f64
, am
));
4733 if (ty
== Ity_V128
) {
4734 HReg vec
= iselVecExpr(env
, stmt
->Ist
.Put
.data
);
4735 AMD64AMode
* am
= AMD64AMode_IR(stmt
->Ist
.Put
.offset
,
4737 addInstr(env
, AMD64Instr_SseLdSt(False
/*store*/, 16, vec
, am
));
4740 if (ty
== Ity_V256
) {
4742 iselDVecExpr(&vHi
, &vLo
, env
, stmt
->Ist
.Put
.data
);
4743 HReg rbp
= hregAMD64_RBP();
4744 AMD64AMode
* am0
= AMD64AMode_IR(stmt
->Ist
.Put
.offset
+ 0, rbp
);
4745 AMD64AMode
* am16
= AMD64AMode_IR(stmt
->Ist
.Put
.offset
+ 16, rbp
);
4746 addInstr(env
, AMD64Instr_SseLdSt(False
/*store*/, 16, vLo
, am0
));
4747 addInstr(env
, AMD64Instr_SseLdSt(False
/*store*/, 16, vHi
, am16
));
4753 /* --------- Indexed PUT --------- */
4755 IRPutI
*puti
= stmt
->Ist
.PutI
.details
;
4758 = genGuestArrayOffset(
4760 puti
->ix
, puti
->bias
);
4762 IRType ty
= typeOfIRExpr(env
->type_env
, puti
->data
);
4763 if (ty
== Ity_F64
) {
4764 HReg val
= iselDblExpr(env
, puti
->data
);
4765 addInstr(env
, AMD64Instr_SseLdSt( False
/*store*/, 8, val
, am
));
4769 HReg r
= iselIntExpr_R(env
, puti
->data
);
4770 addInstr(env
, AMD64Instr_Store( 1, r
, am
));
4773 if (ty
== Ity_I64
) {
4774 AMD64RI
* ri
= iselIntExpr_RI(env
, puti
->data
);
4775 addInstr(env
, AMD64Instr_Alu64M( Aalu_MOV
, ri
, am
));
4781 /* --------- TMP --------- */
4783 IRTemp tmp
= stmt
->Ist
.WrTmp
.tmp
;
4784 IRType ty
= typeOfIRTemp(env
->type_env
, tmp
);
4786 /* optimisation: if stmt->Ist.WrTmp.data is Add64(..,..),
4787 compute it into an AMode and then use LEA. This usually
4788 produces fewer instructions, often because (for memcheck
4789 created IR) we get t = address-expression, (t is later used
4790 twice) and so doing this naturally turns address-expression
4791 back into an AMD64 amode. */
4793 && stmt
->Ist
.WrTmp
.data
->tag
== Iex_Binop
4794 && stmt
->Ist
.WrTmp
.data
->Iex
.Binop
.op
== Iop_Add64
) {
4795 AMD64AMode
* am
= iselIntExpr_AMode(env
, stmt
->Ist
.WrTmp
.data
);
4796 HReg dst
= lookupIRTemp(env
, tmp
);
4797 if (am
->tag
== Aam_IR
&& am
->Aam
.IR
.imm
== 0) {
4798 /* Hmm, iselIntExpr_AMode wimped out and just computed the
4799 value into a register. Just emit a normal reg-reg move
4800 so reg-alloc can coalesce it away in the usual way. */
4801 HReg src
= am
->Aam
.IR
.reg
;
4802 addInstr(env
, AMD64Instr_Alu64R(Aalu_MOV
, AMD64RMI_Reg(src
), dst
));
4804 addInstr(env
, AMD64Instr_Lea64(am
,dst
));
4809 if (ty
== Ity_I64
|| ty
== Ity_I32
4810 || ty
== Ity_I16
|| ty
== Ity_I8
) {
4811 AMD64RMI
* rmi
= iselIntExpr_RMI(env
, stmt
->Ist
.WrTmp
.data
);
4812 HReg dst
= lookupIRTemp(env
, tmp
);
4813 addInstr(env
, AMD64Instr_Alu64R(Aalu_MOV
,rmi
,dst
));
4816 if (ty
== Ity_I128
) {
4817 HReg rHi
, rLo
, dstHi
, dstLo
;
4818 iselInt128Expr(&rHi
,&rLo
, env
, stmt
->Ist
.WrTmp
.data
);
4819 lookupIRTempPair( &dstHi
, &dstLo
, env
, tmp
);
4820 addInstr(env
, mk_iMOVsd_RR(rHi
,dstHi
) );
4821 addInstr(env
, mk_iMOVsd_RR(rLo
,dstLo
) );
4825 AMD64CondCode cond
= iselCondCode(env
, stmt
->Ist
.WrTmp
.data
);
4826 HReg dst
= lookupIRTemp(env
, tmp
);
4827 addInstr(env
, AMD64Instr_Set64(cond
, dst
));
4830 if (ty
== Ity_F64
) {
4831 HReg dst
= lookupIRTemp(env
, tmp
);
4832 HReg src
= iselDblExpr(env
, stmt
->Ist
.WrTmp
.data
);
4833 addInstr(env
, mk_vMOVsd_RR(src
, dst
));
4836 if (ty
== Ity_F32
) {
4837 HReg dst
= lookupIRTemp(env
, tmp
);
4838 HReg src
= iselFltExpr(env
, stmt
->Ist
.WrTmp
.data
);
4839 addInstr(env
, mk_vMOVsd_RR(src
, dst
));
4842 if (ty
== Ity_V128
) {
4843 HReg dst
= lookupIRTemp(env
, tmp
);
4844 HReg src
= iselVecExpr(env
, stmt
->Ist
.WrTmp
.data
);
4845 addInstr(env
, mk_vMOVsd_RR(src
, dst
));
4848 if (ty
== Ity_V256
) {
4849 HReg rHi
, rLo
, dstHi
, dstLo
;
4850 iselDVecExpr(&rHi
,&rLo
, env
, stmt
->Ist
.WrTmp
.data
);
4851 lookupIRTempPair( &dstHi
, &dstLo
, env
, tmp
);
4852 addInstr(env
, mk_vMOVsd_RR(rHi
,dstHi
) );
4853 addInstr(env
, mk_vMOVsd_RR(rLo
,dstLo
) );
4859 /* --------- Call to DIRTY helper --------- */
4861 IRDirty
* d
= stmt
->Ist
.Dirty
.details
;
4863 /* Figure out the return type, if any. */
4864 IRType retty
= Ity_INVALID
;
4865 if (d
->tmp
!= IRTemp_INVALID
)
4866 retty
= typeOfIRTemp(env
->type_env
, d
->tmp
);
4868 /* Throw out any return types we don't know about. */
4869 Bool retty_ok
= False
;
4871 case Ity_INVALID
: /* function doesn't return anything */
4872 case Ity_I64
: case Ity_I32
: case Ity_I16
: case Ity_I8
:
4873 case Ity_V128
: case Ity_V256
:
4874 retty_ok
= True
; break;
4879 break; /* will go to stmt_fail: */
4881 /* Marshal args, do the call, and set the return value to
4882 0x555..555 if this is a conditional call that returns a value
4883 and the call is skipped. */
4885 RetLoc rloc
= mk_RetLoc_INVALID();
4886 doHelperCall( &addToSp
, &rloc
, env
, d
->guard
, d
->cee
, retty
, d
->args
);
4887 vassert(is_sane_RetLoc(rloc
));
4889 /* Now figure out what to do with the returned value, if any. */
4892 /* No return value. Nothing to do. */
4893 vassert(d
->tmp
== IRTemp_INVALID
);
4894 vassert(rloc
.pri
== RLPri_None
);
4895 vassert(addToSp
== 0);
4898 case Ity_I64
: case Ity_I32
: case Ity_I16
: case Ity_I8
: {
4899 /* The returned value is in %rax. Park it in the register
4900 associated with tmp. */
4901 vassert(rloc
.pri
== RLPri_Int
);
4902 vassert(addToSp
== 0);
4903 HReg dst
= lookupIRTemp(env
, d
->tmp
);
4904 addInstr(env
, mk_iMOVsd_RR(hregAMD64_RAX(),dst
) );
4908 /* The returned value is on the stack, and rloc.spOff
4909 tells us where. Fish it off the stack and then move
4910 the stack pointer upwards to clear it, as directed by
4912 vassert(rloc
.pri
== RLPri_V128SpRel
);
4913 vassert(addToSp
>= 16);
4914 HReg dst
= lookupIRTemp(env
, d
->tmp
);
4915 AMD64AMode
* am
= AMD64AMode_IR(rloc
.spOff
, hregAMD64_RSP());
4916 addInstr(env
, AMD64Instr_SseLdSt( True
/*load*/, 16, dst
, am
));
4917 add_to_rsp(env
, addToSp
);
4921 /* See comments for Ity_V128. */
4922 vassert(rloc
.pri
== RLPri_V256SpRel
);
4923 vassert(addToSp
>= 32);
4925 lookupIRTempPair(&dstHi
, &dstLo
, env
, d
->tmp
);
4926 AMD64AMode
* amLo
= AMD64AMode_IR(rloc
.spOff
, hregAMD64_RSP());
4927 addInstr(env
, AMD64Instr_SseLdSt( True
/*load*/, 16, dstLo
, amLo
));
4928 AMD64AMode
* amHi
= AMD64AMode_IR(rloc
.spOff
+16, hregAMD64_RSP());
4929 addInstr(env
, AMD64Instr_SseLdSt( True
/*load*/, 16, dstHi
, amHi
));
4930 add_to_rsp(env
, addToSp
);
4940 /* --------- MEM FENCE --------- */
4942 switch (stmt
->Ist
.MBE
.event
) {
4944 addInstr(env
, AMD64Instr_MFence());
4951 /* --------- ACAS --------- */
4953 if (stmt
->Ist
.CAS
.details
->oldHi
== IRTemp_INVALID
) {
4954 /* "normal" singleton CAS */
4956 IRCAS
* cas
= stmt
->Ist
.CAS
.details
;
4957 IRType ty
= typeOfIRExpr(env
->type_env
, cas
->dataLo
);
4958 /* get: cas->expd into %rax, and cas->data into %rbx */
4959 AMD64AMode
* am
= iselIntExpr_AMode(env
, cas
->addr
);
4960 HReg rData
= iselIntExpr_R(env
, cas
->dataLo
);
4961 HReg rExpd
= iselIntExpr_R(env
, cas
->expdLo
);
4962 HReg rOld
= lookupIRTemp(env
, cas
->oldLo
);
4963 vassert(cas
->expdHi
== NULL
);
4964 vassert(cas
->dataHi
== NULL
);
4965 addInstr(env
, mk_iMOVsd_RR(rExpd
, rOld
));
4966 addInstr(env
, mk_iMOVsd_RR(rExpd
, hregAMD64_RAX()));
4967 addInstr(env
, mk_iMOVsd_RR(rData
, hregAMD64_RBX()));
4969 case Ity_I64
: sz
= 8; break;
4970 case Ity_I32
: sz
= 4; break;
4971 case Ity_I16
: sz
= 2; break;
4972 case Ity_I8
: sz
= 1; break;
4973 default: goto unhandled_cas
;
4975 addInstr(env
, AMD64Instr_ACAS(am
, sz
));
4976 addInstr(env
, AMD64Instr_CMov64(Acc_NZ
, hregAMD64_RAX(), rOld
));
4981 IRCAS
* cas
= stmt
->Ist
.CAS
.details
;
4982 IRType ty
= typeOfIRExpr(env
->type_env
, cas
->dataLo
);
4983 /* only 32-bit and 64-bit allowed in this case */
4984 /* get: cas->expdLo into %rax, and cas->dataLo into %rbx */
4985 /* get: cas->expdHi into %rdx, and cas->dataHi into %rcx */
4986 AMD64AMode
* am
= iselIntExpr_AMode(env
, cas
->addr
);
4987 HReg rDataHi
= iselIntExpr_R(env
, cas
->dataHi
);
4988 HReg rDataLo
= iselIntExpr_R(env
, cas
->dataLo
);
4989 HReg rExpdHi
= iselIntExpr_R(env
, cas
->expdHi
);
4990 HReg rExpdLo
= iselIntExpr_R(env
, cas
->expdLo
);
4991 HReg rOldHi
= lookupIRTemp(env
, cas
->oldHi
);
4992 HReg rOldLo
= lookupIRTemp(env
, cas
->oldLo
);
4995 if (!(env
->hwcaps
& VEX_HWCAPS_AMD64_CX16
))
4996 goto unhandled_cas
; /* we'd have to generate
4997 cmpxchg16b, but the host
4998 doesn't support that */
5007 addInstr(env
, mk_iMOVsd_RR(rExpdHi
, rOldHi
));
5008 addInstr(env
, mk_iMOVsd_RR(rExpdLo
, rOldLo
));
5009 addInstr(env
, mk_iMOVsd_RR(rExpdHi
, hregAMD64_RDX()));
5010 addInstr(env
, mk_iMOVsd_RR(rExpdLo
, hregAMD64_RAX()));
5011 addInstr(env
, mk_iMOVsd_RR(rDataHi
, hregAMD64_RCX()));
5012 addInstr(env
, mk_iMOVsd_RR(rDataLo
, hregAMD64_RBX()));
5013 addInstr(env
, AMD64Instr_DACAS(am
, sz
));
5014 addInstr(env
, AMD64Instr_CMov64(Acc_NZ
, hregAMD64_RDX(), rOldHi
));
5015 addInstr(env
, AMD64Instr_CMov64(Acc_NZ
, hregAMD64_RAX(), rOldLo
));
5021 /* --------- INSTR MARK --------- */
5022 /* Doesn't generate any executable code ... */
5026 /* --------- ABI HINT --------- */
5027 /* These have no meaning (denotation in the IR) and so we ignore
5028 them ... if any actually made it this far. */
5032 /* --------- NO-OP --------- */
5036 /* --------- EXIT --------- */
5038 if (stmt
->Ist
.Exit
.dst
->tag
!= Ico_U64
)
5039 vpanic("iselStmt(amd64): Ist_Exit: dst is not a 64-bit value");
5041 AMD64CondCode cc
= iselCondCode(env
, stmt
->Ist
.Exit
.guard
);
5042 AMD64AMode
* amRIP
= AMD64AMode_IR(stmt
->Ist
.Exit
.offsIP
,
5045 /* Case: boring transfer to known address */
5046 if (stmt
->Ist
.Exit
.jk
== Ijk_Boring
) {
5047 if (env
->chainingAllowed
) {
5048 /* .. almost always true .. */
5049 /* Skip the event check at the dst if this is a forwards
5052 = ((Addr64
)stmt
->Ist
.Exit
.dst
->Ico
.U64
) > env
->max_ga
;
5053 if (0) vex_printf("%s", toFastEP
? "Y" : ",");
5054 addInstr(env
, AMD64Instr_XDirect(stmt
->Ist
.Exit
.dst
->Ico
.U64
,
5055 amRIP
, cc
, toFastEP
));
5057 /* .. very occasionally .. */
5058 /* We can't use chaining, so ask for an assisted transfer,
5059 as that's the only alternative that is allowable. */
5060 HReg r
= iselIntExpr_R(env
, IRExpr_Const(stmt
->Ist
.Exit
.dst
));
5061 addInstr(env
, AMD64Instr_XAssisted(r
, amRIP
, cc
, Ijk_Boring
));
5066 /* Case: assisted transfer to arbitrary address */
5067 switch (stmt
->Ist
.Exit
.jk
) {
5068 /* Keep this list in sync with that in iselNext below */
5075 case Ijk_Sys_syscall
:
5076 case Ijk_Sys_int210
:
5077 case Ijk_InvalICache
:
5080 HReg r
= iselIntExpr_R(env
, IRExpr_Const(stmt
->Ist
.Exit
.dst
));
5081 addInstr(env
, AMD64Instr_XAssisted(r
, amRIP
, cc
, stmt
->Ist
.Exit
.jk
));
5088 /* Do we ever expect to see any other kind? */
5096 vpanic("iselStmt(amd64)");
5100 /*---------------------------------------------------------*/
5101 /*--- ISEL: Basic block terminators (Nexts) ---*/
5102 /*---------------------------------------------------------*/
5104 static void iselNext ( ISelEnv
* env
,
5105 IRExpr
* next
, IRJumpKind jk
, Int offsIP
)
5107 if (vex_traceflags
& VEX_TRACE_VCODE
) {
5108 vex_printf( "\n-- PUT(%d) = ", offsIP
);
5110 vex_printf( "; exit-");
5115 /* Case: boring transfer to known address */
5116 if (next
->tag
== Iex_Const
) {
5117 IRConst
* cdst
= next
->Iex
.Const
.con
;
5118 vassert(cdst
->tag
== Ico_U64
);
5119 if (jk
== Ijk_Boring
|| jk
== Ijk_Call
) {
5120 /* Boring transfer to known address */
5121 AMD64AMode
* amRIP
= AMD64AMode_IR(offsIP
, hregAMD64_RBP());
5122 if (env
->chainingAllowed
) {
5123 /* .. almost always true .. */
5124 /* Skip the event check at the dst if this is a forwards
5127 = ((Addr64
)cdst
->Ico
.U64
) > env
->max_ga
;
5128 if (0) vex_printf("%s", toFastEP
? "X" : ".");
5129 addInstr(env
, AMD64Instr_XDirect(cdst
->Ico
.U64
,
5133 /* .. very occasionally .. */
5134 /* We can't use chaining, so ask for an indirect transfer,
5135 as that's the cheapest alternative that is
5137 HReg r
= iselIntExpr_R(env
, next
);
5138 addInstr(env
, AMD64Instr_XAssisted(r
, amRIP
, Acc_ALWAYS
,
5145 /* Case: call/return (==boring) transfer to any address */
5147 case Ijk_Boring
: case Ijk_Ret
: case Ijk_Call
: {
5148 HReg r
= iselIntExpr_R(env
, next
);
5149 AMD64AMode
* amRIP
= AMD64AMode_IR(offsIP
, hregAMD64_RBP());
5150 if (env
->chainingAllowed
) {
5151 addInstr(env
, AMD64Instr_XIndir(r
, amRIP
, Acc_ALWAYS
));
5153 addInstr(env
, AMD64Instr_XAssisted(r
, amRIP
, Acc_ALWAYS
,
5162 /* Case: assisted transfer to arbitrary address */
5164 /* Keep this list in sync with that for Ist_Exit above */
5171 case Ijk_Sys_syscall
:
5172 case Ijk_Sys_int210
:
5173 case Ijk_InvalICache
:
5175 HReg r
= iselIntExpr_R(env
, next
);
5176 AMD64AMode
* amRIP
= AMD64AMode_IR(offsIP
, hregAMD64_RBP());
5177 addInstr(env
, AMD64Instr_XAssisted(r
, amRIP
, Acc_ALWAYS
, jk
));
5184 vex_printf( "\n-- PUT(%d) = ", offsIP
);
5186 vex_printf( "; exit-");
5189 vassert(0); // are we expecting any other kind?
5193 /*---------------------------------------------------------*/
5194 /*--- Insn selector top-level ---*/
5195 /*---------------------------------------------------------*/
5197 /* Translate an entire SB to amd64 code. */
5199 HInstrArray
* iselSB_AMD64 ( const IRSB
* bb
,
5201 const VexArchInfo
* archinfo_host
,
5202 const VexAbiInfo
* vbi
/*UNUSED*/,
5203 Int offs_Host_EvC_Counter
,
5204 Int offs_Host_EvC_FailAddr
,
5205 Bool chainingAllowed
,
5212 UInt hwcaps_host
= archinfo_host
->hwcaps
;
5213 AMD64AMode
*amCounter
, *amFailAddr
;
5216 vassert(arch_host
== VexArchAMD64
);
5217 vassert(0 == (hwcaps_host
5218 & ~(VEX_HWCAPS_AMD64_SSE3
5219 | VEX_HWCAPS_AMD64_SSSE3
5220 | VEX_HWCAPS_AMD64_CX16
5221 | VEX_HWCAPS_AMD64_LZCNT
5222 | VEX_HWCAPS_AMD64_AVX
5223 | VEX_HWCAPS_AMD64_RDTSCP
5224 | VEX_HWCAPS_AMD64_BMI
5225 | VEX_HWCAPS_AMD64_AVX2
5226 | VEX_HWCAPS_AMD64_F16C
5227 | VEX_HWCAPS_AMD64_RDRAND
)));
5229 /* Check that the host's endianness is as expected. */
5230 vassert(archinfo_host
->endness
== VexEndnessLE
);
5232 /* Make up an initial environment to use. */
5233 env
= LibVEX_Alloc_inline(sizeof(ISelEnv
));
5236 /* Set up output code array. */
5237 env
->code
= newHInstrArray();
5239 /* Copy BB's type env. */
5240 env
->type_env
= bb
->tyenv
;
5242 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
5243 change as we go along. */
5244 env
->n_vregmap
= bb
->tyenv
->types_used
;
5245 env
->vregmap
= LibVEX_Alloc_inline(env
->n_vregmap
* sizeof(HReg
));
5246 env
->vregmapHI
= LibVEX_Alloc_inline(env
->n_vregmap
* sizeof(HReg
));
5248 /* and finally ... */
5249 env
->chainingAllowed
= chainingAllowed
;
5250 env
->hwcaps
= hwcaps_host
;
5251 env
->max_ga
= max_ga
;
5253 /* For each IR temporary, allocate a suitably-kinded virtual
5256 for (i
= 0; i
< env
->n_vregmap
; i
++) {
5257 hregHI
= hreg
= INVALID_HREG
;
5258 switch (bb
->tyenv
->types
[i
]) {
5260 case Ity_I8
: case Ity_I16
: case Ity_I32
: case Ity_I64
:
5261 hreg
= mkHReg(True
, HRcInt64
, 0, j
++);
5264 hreg
= mkHReg(True
, HRcInt64
, 0, j
++);
5265 hregHI
= mkHReg(True
, HRcInt64
, 0, j
++);
5270 hreg
= mkHReg(True
, HRcVec128
, 0, j
++);
5273 hreg
= mkHReg(True
, HRcVec128
, 0, j
++);
5274 hregHI
= mkHReg(True
, HRcVec128
, 0, j
++);
5277 ppIRType(bb
->tyenv
->types
[i
]);
5278 vpanic("iselBB(amd64): IRTemp type");
5280 env
->vregmap
[i
] = hreg
;
5281 env
->vregmapHI
[i
] = hregHI
;
5285 /* The very first instruction must be an event check. */
5286 amCounter
= AMD64AMode_IR(offs_Host_EvC_Counter
, hregAMD64_RBP());
5287 amFailAddr
= AMD64AMode_IR(offs_Host_EvC_FailAddr
, hregAMD64_RBP());
5288 addInstr(env
, AMD64Instr_EvCheck(amCounter
, amFailAddr
));
5290 /* Possibly a block counter increment (for profiling). At this
5291 point we don't know the address of the counter, so just pretend
5292 it is zero. It will have to be patched later, but before this
5293 translation is used, by a call to LibVEX_patchProfCtr. */
5295 addInstr(env
, AMD64Instr_ProfInc());
5298 /* Ok, finally we can iterate over the statements. */
5299 for (i
= 0; i
< bb
->stmts_used
; i
++)
5301 iselStmt(env
, bb
->stmts
[i
]);
5303 iselNext(env
, bb
->next
, bb
->jumpkind
, bb
->offsIP
);
5305 /* record the number of vregs we used. */
5306 env
->code
->n_vregs
= env
->vreg_ctr
;
5311 /*---------------------------------------------------------------*/
5312 /*--- end host_amd64_isel.c ---*/
5313 /*---------------------------------------------------------------*/