Bug 439685 compiler warning in callgrind/main.c
[valgrind.git] / VEX / priv / host_ppc_isel.c
blob5ee6d1b6da370764465d3b6075a540869c5a9a6d
3 /*---------------------------------------------------------------*/
4 /*--- begin host_ppc_isel.c ---*/
5 /*---------------------------------------------------------------*/
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
11 Copyright (C) 2004-2017 OpenWorks LLP
12 info@open-works.net
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
27 The GNU General Public License is contained in the file COPYING.
29 Neither the names of the U.S. Department of Energy nor the
30 University of California nor the names of its contributors may be
31 used to endorse or promote products derived from this software
32 without prior written permission.
35 #include "libvex_basictypes.h"
36 #include "libvex_ir.h"
37 #include "libvex.h"
39 #include "ir_match.h"
40 #include "main_util.h"
41 #include "main_globals.h"
42 #include "host_generic_regs.h"
43 #include "host_generic_simd64.h"
44 #include "host_ppc_defs.h"
46 /* GPR register class for ppc32/64 */
47 #define HRcGPR(_mode64) ((_mode64) ? HRcInt64 : HRcInt32)
50 /*---------------------------------------------------------*/
51 /*--- Register Usage Conventions ---*/
52 /*---------------------------------------------------------*/
54 Integer Regs
55 ------------
56 GPR0 Reserved
57 GPR1 Stack Pointer
58 GPR2 not used - TOC pointer
59 GPR3:10 Allocateable
60 GPR11 if mode64: not used - calls by ptr / env ptr for some langs
61 GPR12 if mode64: not used - exceptions / global linkage code
62 GPR13 not used - Thread-specific pointer
63 GPR14:28 Allocateable
64 GPR29 Unused by us (reserved for the dispatcher)
65 GPR30 AltiVec temp spill register
66 GPR31 GuestStatePointer
68 Of Allocateable regs:
69 if (mode64)
70 GPR3:10 Caller-saved regs
71 else
72 GPR3:12 Caller-saved regs
73 GPR14:29 Callee-saved regs
75 GPR3 [Return | Parameter] - carrying reg
76 GPR4:10 Parameter-carrying regs
79 Floating Point Regs
80 -------------------
81 FPR0:31 Allocateable
83 FPR0 Caller-saved - scratch reg
84 if (mode64)
85 FPR1:13 Caller-saved - param & return regs
86 else
87 FPR1:8 Caller-saved - param & return regs
88 FPR9:13 Caller-saved regs
89 FPR14:31 Callee-saved regs
92 Vector Regs (on processors with the VMX feature)
93 -----------
94 VR0-VR1 Volatile scratch registers
95 VR2-VR13 Volatile vector parameters registers
96 VR14-VR19 Volatile scratch registers
97 VR20-VR31 Non-volatile registers
98 VRSAVE Non-volatile 32-bit register
102 /*---------------------------------------------------------*/
103 /*--- PPC FP Status & Control Register Conventions ---*/
104 /*---------------------------------------------------------*/
106 Vex-generated code expects to run with the FPU set as follows: all
107 exceptions masked. The rounding mode is set appropriately before
108 each floating point insn emitted (or left unchanged if known to be
109 correct already). There are a few fp insns (fmr,fneg,fabs,fnabs),
110 which are unaffected by the rm and so the rounding mode is not set
111 prior to them.
113 At least on MPC7447A (Mac Mini), frsqrte is also not affected by
114 rounding mode. At some point the ppc docs get sufficiently vague
115 that the only way to find out is to write test programs.
117 /* Notes on the FP instruction set, 6 Feb 06.
119 What exns -> CR1 ? Sets FPRF ? Observes RM ?
120 -------------------------------------------------------------
122 fmr[.] if . n n
123 fneg[.] if . n n
124 fabs[.] if . n n
125 fnabs[.] if . n n
127 fadd[.] if . y y
128 fadds[.] if . y y
129 fcfid[.] (Si64->dbl) if . y y
130 fcfidU[.] (Ui64->dbl) if . y y
131 fcfids[.] (Si64->sngl) if . Y Y
132 fcfidus[.] (Ui64->sngl) if . Y Y
133 fcmpo (cmp, result n n n
134 fcmpu to crfD) n n n
135 fctid[.] (dbl->i64) if . ->undef y
136 fctidz[.] (dbl->i64) if . ->undef rounds-to-zero
137 fctiw[.] (dbl->i32) if . ->undef y
138 fctiwz[.] (dbl->i32) if . ->undef rounds-to-zero
139 fdiv[.] if . y y
140 fdivs[.] if . y y
141 fmadd[.] if . y y
142 fmadds[.] if . y y
143 fmsub[.] if . y y
144 fmsubs[.] if . y y
145 fmul[.] if . y y
146 fmuls[.] if . y y
148 (note: for fnm*, rounding happens before final negation)
149 fnmadd[.] if . y y
150 fnmadds[.] if . y y
151 fnmsub[.] if . y y
152 fnmsubs[.] if . y y
154 fre[.] if . y y
155 fres[.] if . y y
157 frsqrte[.] if . y apparently not
159 fsqrt[.] if . y y
160 fsqrts[.] if . y y
161 fsub[.] if . y y
162 fsubs[.] if . y y
165 fpscr: bits 30-31 (ibm) is RM
166 24-29 (ibm) are exnmasks/non-IEEE bit, all zero
167 15-19 (ibm) is FPRF: class, <, =, >, UNord
169 ppc fe(guest) makes fpscr read as all zeros except RM (and maybe FPRF
170 in future)
172 mcrfs - move fpscr field to CR field
173 mtfsfi[.] - 4 bit imm moved to fpscr field
174 mtfsf[.] - move frS[low 1/2] to fpscr but using 8-bit field mask
175 mtfsb1[.] - set given fpscr bit
176 mtfsb0[.] - clear given fpscr bit
177 mffs[.] - move all fpscr to frD[low 1/2]
179 For [.] presumably cr1 is set with exn summary bits, as per
180 main FP insns
182 A single precision store truncates/denormalises the in-register value,
183 but does not round it. This is so that flds followed by fsts is
184 always the identity.
188 /*---------------------------------------------------------*/
189 /*--- misc helpers ---*/
190 /*---------------------------------------------------------*/
192 /* These are duplicated in guest-ppc/toIR.c */
193 static IRExpr* unop ( IROp op, IRExpr* a )
195 return IRExpr_Unop(op, a);
198 static IRExpr* mkU32 ( UInt i )
200 return IRExpr_Const(IRConst_U32(i));
203 static IRExpr* bind ( Int binder )
205 return IRExpr_Binder(binder);
208 static Bool isZeroU8 ( IRExpr* e )
210 return e->tag == Iex_Const
211 && e->Iex.Const.con->tag == Ico_U8
212 && e->Iex.Const.con->Ico.U8 == 0;
216 /*---------------------------------------------------------*/
217 /*--- ISelEnv ---*/
218 /*---------------------------------------------------------*/
220 /* This carries around:
222 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
223 might encounter. This is computed before insn selection starts,
224 and does not change.
226 - A mapping from IRTemp to HReg. This tells the insn selector
227 which virtual register(s) are associated with each IRTemp
228 temporary. This is computed before insn selection starts, and
229 does not change. We expect this mapping to map precisely the
230 same set of IRTemps as the type mapping does.
232 - vregmapLo holds the primary register for the IRTemp.
233 - vregmapMedLo holds the secondary register for the IRTemp,
234 if any is needed. That's only for Ity_I64 temps
235 in 32 bit mode or Ity_I128 temps in 64-bit mode.
236 - vregmapMedHi is only for dealing with Ity_I128 temps in
237 32 bit mode. It holds bits 95:64 (Intel numbering)
238 of the IRTemp.
239 - vregmapHi is also only for dealing with Ity_I128 temps
240 in 32 bit mode. It holds the most significant bits
241 (127:96 in Intel numbering) of the IRTemp.
243 - The code array, that is, the insns selected so far.
245 - A counter, for generating new virtual registers.
247 - The host subarchitecture we are selecting insns for.
248 This is set at the start and does not change.
250 - A Bool to tell us if the host is 32 or 64bit.
251 This is set at the start and does not change.
253 - An IRExpr*, which may be NULL, holding the IR expression (an
254 IRRoundingMode-encoded value) to which the FPU's rounding mode
255 was most recently set. Setting to NULL is always safe. Used to
256 avoid redundant settings of the FPU's rounding mode, as
257 described in set_FPU_rounding_mode below.
259 - A VexMiscInfo*, needed for knowing how to generate
260 function calls for this target.
262 - The maximum guest address of any guest insn in this block.
263 Actually, the address of the highest-addressed byte from any
264 insn in this block. Is set at the start and does not change.
265 This is used for detecting jumps which are definitely
266 forward-edges from this block, and therefore can be made
267 (chained) to the fast entry point of the destination, thereby
268 avoiding the destination's event check.
271 typedef
272 struct {
273 /* Constant -- are set at the start and do not change. */
274 IRTypeEnv* type_env;
275 // 64-bit mode 32-bit mode
276 HReg* vregmapLo; // Low 64-bits [63:0] Low 32-bits [31:0]
277 HReg* vregmapMedLo; // high 64-bits[127:64] Next 32-bits [63:32]
278 HReg* vregmapMedHi; // unused Next 32-bits [95:64]
279 HReg* vregmapHi; // unused highest 32-bits [127:96]
280 Int n_vregmap;
282 /* 27 Jan 06: Not currently used, but should be */
283 UInt hwcaps;
285 Bool mode64;
287 const VexAbiInfo* vbi; // unused
289 Bool chainingAllowed;
290 Addr64 max_ga;
292 /* These are modified as we go along. */
293 HInstrArray* code;
294 Int vreg_ctr;
296 IRExpr* previous_rm;
298 ISelEnv;
301 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
303 vassert(tmp >= 0);
304 vassert(tmp < env->n_vregmap);
305 return env->vregmapLo[tmp];
308 static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
309 ISelEnv* env, IRTemp tmp )
311 vassert(tmp >= 0);
312 vassert(tmp < env->n_vregmap);
313 vassert(! hregIsInvalid(env->vregmapMedLo[tmp]));
314 *vrLO = env->vregmapLo[tmp];
315 *vrHI = env->vregmapMedLo[tmp];
318 /* Only for used in 32-bit mode */
319 static void lookupIRTempQuad ( HReg* vrHi, HReg* vrMedHi, HReg* vrMedLo,
320 HReg* vrLo, ISelEnv* env, IRTemp tmp )
322 vassert(!env->mode64);
323 vassert(tmp >= 0);
324 vassert(tmp < env->n_vregmap);
325 vassert(! hregIsInvalid(env->vregmapMedLo[tmp]));
326 *vrHi = env->vregmapHi[tmp];
327 *vrMedHi = env->vregmapMedHi[tmp];
328 *vrMedLo = env->vregmapMedLo[tmp];
329 *vrLo = env->vregmapLo[tmp];
332 static void addInstr ( ISelEnv* env, PPCInstr* instr )
334 addHInstr(env->code, instr);
335 if (vex_traceflags & VEX_TRACE_VCODE) {
336 ppPPCInstr(instr, env->mode64);
337 vex_printf("\n");
341 static HReg newVRegI ( ISelEnv* env )
343 HReg reg
344 = mkHReg(True/*vreg*/, HRcGPR(env->mode64), 0/*enc*/, env->vreg_ctr);
345 env->vreg_ctr++;
346 return reg;
349 static HReg newVRegF ( ISelEnv* env )
351 HReg reg = mkHReg(True/*vreg*/, HRcFlt64, 0/*enc*/, env->vreg_ctr);
352 env->vreg_ctr++;
353 return reg;
356 static HReg newVRegV ( ISelEnv* env )
358 HReg reg = mkHReg(True/*vreg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
359 env->vreg_ctr++;
360 return reg;
364 /*---------------------------------------------------------*/
365 /*--- ISEL: Forward declarations ---*/
366 /*---------------------------------------------------------*/
368 /* These are organised as iselXXX and iselXXX_wrk pairs. The
369 iselXXX_wrk do the real work, but are not to be called directly.
370 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
371 checks that all returned registers are virtual. You should not
372 call the _wrk version directly.
374 'Word' refers to the size of the native machine word, that is,
375 32-bit int in 32-bit mode and 64-bit int in 64-bit mode. '2Word'
376 therefore refers to a double-width (64/128-bit) quantity in two
377 integer registers.
379 /* 32-bit mode: compute an I8/I16/I32 into a GPR.
380 64-bit mode: compute an I8/I16/I32/I64 into a GPR. */
381 static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
382 IREndness IEndianess );
383 static HReg iselWordExpr_R ( ISelEnv* env, const IRExpr* e,
384 IREndness IEndianess );
386 /* 32-bit mode: Compute an I8/I16/I32 into a RH
387 (reg-or-halfword-immediate).
388 64-bit mode: Compute an I8/I16/I32/I64 into a RH
389 (reg-or-halfword-immediate).
390 It's important to specify whether the immediate is to be regarded
391 as signed or not. If yes, this will never return -32768 as an
392 immediate; this guaranteed that all signed immediates that are
393 return can have their sign inverted if need be.
395 static PPCRH* iselWordExpr_RH_wrk ( ISelEnv* env,
396 Bool syned, const IRExpr* e,
397 IREndness IEndianess );
398 static PPCRH* iselWordExpr_RH ( ISelEnv* env,
399 Bool syned, const IRExpr* e,
400 IREndness IEndianess );
402 /* 32-bit mode: compute an I32 into a RI (reg or 32-bit immediate).
403 64-bit mode: compute an I64 into a RI (reg or 64-bit immediate). */
404 static PPCRI* iselWordExpr_RI_wrk ( ISelEnv* env, const IRExpr* e,
405 IREndness IEndianess );
406 static PPCRI* iselWordExpr_RI ( ISelEnv* env, const IRExpr* e,
407 IREndness IEndianess );
409 /* In 32 bit mode ONLY, compute an I8 into a
410 reg-or-5-bit-unsigned-immediate, the latter being an immediate in
411 the range 1 .. 31 inclusive. Used for doing shift amounts. */
412 static PPCRH* iselWordExpr_RH5u_wrk ( ISelEnv* env, const IRExpr* e,
413 IREndness IEndianess );
414 static PPCRH* iselWordExpr_RH5u ( ISelEnv* env, const IRExpr* e,
415 IREndness IEndianess );
417 /* In 64-bit mode ONLY, compute an I8 into a
418 reg-or-6-bit-unsigned-immediate, the latter being an immediate in
419 the range 1 .. 63 inclusive. Used for doing shift amounts. */
420 static PPCRH* iselWordExpr_RH6u_wrk ( ISelEnv* env, const IRExpr* e,
421 IREndness IEndianess );
422 static PPCRH* iselWordExpr_RH6u ( ISelEnv* env, const IRExpr* e,
423 IREndness IEndianess );
425 /* 32-bit mode: compute an I32 into an AMode.
426 64-bit mode: compute an I64 into an AMode.
428 Requires to know (xferTy) the type of data to be loaded/stored
429 using this amode. That is so that, for 64-bit code generation, any
430 PPCAMode_IR returned will have an index (immediate offset) field
431 that is guaranteed to be 4-aligned, if there is any chance that the
432 amode is to be used in ld/ldu/lda/std/stdu.
434 Since there are no such restrictions on 32-bit insns, xferTy is
435 ignored for 32-bit code generation. */
436 static PPCAMode* iselWordExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e,
437 IRType xferTy,
438 IREndness IEndianess );
439 static PPCAMode* iselWordExpr_AMode ( ISelEnv* env, const IRExpr* e,
440 IRType xferTy,
441 IREndness IEndianess );
443 static void iselInt128Expr_to_32x4_wrk ( HReg* rHi, HReg* rMedHi,
444 HReg* rMedLo, HReg* rLo,
445 ISelEnv* env, const IRExpr* e,
446 IREndness IEndianess );
447 static void iselInt128Expr_to_32x4 ( HReg* rHi, HReg* rMedHi,
448 HReg* rMedLo, HReg* rLo,
449 ISelEnv* env, const IRExpr* e,
450 IREndness IEndianess );
453 /* 32-bit mode ONLY: compute an I64 into a GPR pair. */
454 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
455 ISelEnv* env, const IRExpr* e,
456 IREndness IEndianess );
457 static void iselInt64Expr ( HReg* rHi, HReg* rLo,
458 ISelEnv* env, const IRExpr* e,
459 IREndness IEndianess );
461 /* 64-bit mode ONLY: compute an I128 into a GPR64 pair. */
462 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
463 ISelEnv* env, const IRExpr* e,
464 IREndness IEndianess );
466 static void iselInt128Expr ( HReg* rHi, HReg* rLo,
467 ISelEnv* env, const IRExpr* e,
468 IREndness IEndianess );
470 static PPCCondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e,
471 IREndness IEndianess );
472 static PPCCondCode iselCondCode ( ISelEnv* env, const IRExpr* e,
473 IREndness IEndianess );
475 static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e,
476 IREndness IEndianess );
477 static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e,
478 IREndness IEndianess );
480 static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e,
481 IREndness IEndianess );
482 static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e,
483 IREndness IEndianess );
485 static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e,
486 IREndness IEndianess );
487 static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e,
488 IREndness IEndianess );
490 /* 64-bit mode ONLY. */
491 static HReg iselDfp32Expr_wrk ( ISelEnv* env, const IRExpr* e,
492 IREndness IEndianess );
493 static HReg iselDfp32Expr ( ISelEnv* env, const IRExpr* e,
494 IREndness IEndianess );
495 static HReg iselDfp64Expr_wrk ( ISelEnv* env, const IRExpr* e,
496 IREndness IEndianess );
497 static HReg iselDfp64Expr ( ISelEnv* env, const IRExpr* e,
498 IREndness IEndianess );
499 static HReg iselFp128Expr_wrk ( ISelEnv* env, const IRExpr* e,
500 IREndness IEndianess);
501 static HReg iselFp128Expr ( ISelEnv* env, const IRExpr* e,
502 IREndness IEndianess);
504 /* 64-bit mode ONLY: compute an D128 into a GPR64 pair. */
505 static void iselDfp128Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
506 const IRExpr* e, IREndness IEndianess );
507 static void iselDfp128Expr ( HReg* rHi, HReg* rLo, ISelEnv* env,
508 const IRExpr* e, IREndness IEndianess );
510 /*---------------------------------------------------------*/
511 /*--- ISEL: Misc helpers ---*/
512 /*---------------------------------------------------------*/
514 /* Make an int reg-reg move. */
516 static PPCInstr* mk_iMOVds_RR ( HReg r_dst, HReg r_src )
518 vassert(hregClass(r_dst) == hregClass(r_src));
519 vassert(hregClass(r_src) == HRcInt32 ||
520 hregClass(r_src) == HRcInt64);
521 return PPCInstr_Alu(Palu_OR, r_dst, r_src, PPCRH_Reg(r_src));
524 /* Advance/retreat %r1 by n. */
526 static void add_to_sp ( ISelEnv* env, UInt n )
528 HReg sp = StackFramePtr(env->mode64);
529 vassert(n <= 1024 && (n%16) == 0);
530 addInstr(env, PPCInstr_Alu( Palu_ADD, sp, sp,
531 PPCRH_Imm(True,toUShort(n)) ));
534 static void sub_from_sp ( ISelEnv* env, UInt n )
536 HReg sp = StackFramePtr(env->mode64);
537 vassert(n <= 1024 && (n%16) == 0);
538 addInstr(env, PPCInstr_Alu( Palu_SUB, sp, sp,
539 PPCRH_Imm(True,toUShort(n)) ));
543 returns a quadword aligned address on the stack
544 - copies SP, adds 16bytes, aligns to quadword.
545 use sub_from_sp(32) before calling this,
546 as expects to have 32 bytes to play with.
548 static HReg get_sp_aligned16 ( ISelEnv* env )
550 HReg r = newVRegI(env);
551 HReg align16 = newVRegI(env);
552 addInstr(env, mk_iMOVds_RR(r, StackFramePtr(env->mode64)));
553 // add 16
554 addInstr(env, PPCInstr_Alu( Palu_ADD, r, r,
555 PPCRH_Imm(True,toUShort(16)) ));
556 // mask to quadword
557 addInstr(env,
558 PPCInstr_LI(align16, 0xFFFFFFFFFFFFFFF0ULL, env->mode64));
559 addInstr(env, PPCInstr_Alu(Palu_AND, r,r, PPCRH_Reg(align16)));
560 return r;
565 /* Load 2*I32 regs to fp reg */
566 static HReg mk_LoadRR32toFPR ( ISelEnv* env,
567 HReg r_srcHi, HReg r_srcLo )
569 HReg fr_dst = newVRegF(env);
570 PPCAMode *am_addr0, *am_addr1;
572 vassert(!env->mode64);
573 vassert(hregClass(r_srcHi) == HRcInt32);
574 vassert(hregClass(r_srcLo) == HRcInt32);
576 sub_from_sp( env, 16 ); // Move SP down 16 bytes
577 am_addr0 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
578 am_addr1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
580 // store hi,lo as Ity_I32's
581 addInstr(env, PPCInstr_Store( 4, am_addr0, r_srcHi, env->mode64 ));
582 addInstr(env, PPCInstr_Store( 4, am_addr1, r_srcLo, env->mode64 ));
584 // load as float
585 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, am_addr0));
587 add_to_sp( env, 16 ); // Reset SP
588 return fr_dst;
591 /* Load I64 reg to fp reg */
592 static HReg mk_LoadR64toFPR ( ISelEnv* env, HReg r_src )
594 HReg fr_dst = newVRegF(env);
595 PPCAMode *am_addr0;
597 vassert(env->mode64);
598 vassert(hregClass(r_src) == HRcInt64);
600 sub_from_sp( env, 16 ); // Move SP down 16 bytes
601 am_addr0 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
603 // store as Ity_I64
604 addInstr(env, PPCInstr_Store( 8, am_addr0, r_src, env->mode64 ));
606 // load as float
607 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, am_addr0));
609 add_to_sp( env, 16 ); // Reset SP
610 return fr_dst;
614 /* Given an amode, return one which references 4 bytes further
615 along. */
617 static PPCAMode* advance4 ( ISelEnv* env, PPCAMode* am )
619 PPCAMode* am4 = dopyPPCAMode( am );
620 if (am4->tag == Pam_IR
621 && am4->Pam.IR.index + 4 <= 32767) {
622 am4->Pam.IR.index += 4;
623 } else {
624 vpanic("advance4(ppc,host)");
626 return am4;
630 /* Given a guest-state array descriptor, an index expression and a
631 bias, generate a PPCAMode pointing at the relevant piece of
632 guest state. */
633 static
634 PPCAMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
635 IRExpr* off, Int bias, IREndness IEndianess )
637 HReg rtmp, roff;
638 Int elemSz = sizeofIRType(descr->elemTy);
639 Int nElems = descr->nElems;
640 Int shift = 0;
642 /* MAX is somewhat arbitrarily, needs to be at least
643 3 times the size of VexGuestPPC64State */
644 #define MAX 6500
646 /* Throw out any cases we don't need. In theory there might be a
647 day where we need to handle others, but not today. */
649 if (nElems != 16 && nElems != 32)
650 vpanic("genGuestArrayOffset(ppc host)(1)");
652 switch (elemSz) {
653 case 4: shift = 2; break;
654 case 8: shift = 3; break;
655 default: vpanic("genGuestArrayOffset(ppc host)(2)");
658 if (bias < -100 || bias > 100) /* somewhat arbitrarily */
659 vpanic("genGuestArrayOffset(ppc host)(3)");
660 if (descr->base < 0 || descr->base > MAX) { /* somewhat arbitrarily */
661 vex_printf("ERROR: descr->base = %d, is greater then maximum = %d\n",
662 descr->base, MAX);
663 vpanic("genGuestArrayOffset(ppc host)(4)");
666 /* Compute off into a reg, %off. Then return:
668 addi %tmp, %off, bias (if bias != 0)
669 andi %tmp, nElems-1
670 sldi %tmp, shift
671 addi %tmp, %tmp, base
672 ... Baseblockptr + %tmp ...
674 roff = iselWordExpr_R(env, off, IEndianess);
675 rtmp = newVRegI(env);
676 addInstr(env, PPCInstr_Alu(
677 Palu_ADD,
678 rtmp, roff,
679 PPCRH_Imm(True/*signed*/, toUShort(bias))));
680 addInstr(env, PPCInstr_Alu(
681 Palu_AND,
682 rtmp, rtmp,
683 PPCRH_Imm(False/*unsigned*/, toUShort(nElems-1))));
684 addInstr(env, PPCInstr_Shft(
685 Pshft_SHL,
686 env->mode64 ? False : True/*F:64-bit, T:32-bit shift*/,
687 rtmp, rtmp,
688 PPCRH_Imm(False/*unsigned*/, toUShort(shift))));
689 addInstr(env, PPCInstr_Alu(
690 Palu_ADD,
691 rtmp, rtmp,
692 PPCRH_Imm(True/*signed*/, toUShort(descr->base))));
693 return
694 PPCAMode_RR( GuestStatePtr(env->mode64), rtmp );
695 #undef MAX
699 /*---------------------------------------------------------*/
700 /*--- ISEL: Function call helpers ---*/
701 /*---------------------------------------------------------*/
703 /* Used only in doHelperCall. See big comment in doHelperCall re
704 handling of register-parameter args. This function figures out
705 whether evaluation of an expression might require use of a fixed
706 register. If in doubt return True (safe but suboptimal).
708 static
709 Bool mightRequireFixedRegs ( IRExpr* e )
711 switch (e->tag) {
712 case Iex_RdTmp: case Iex_Const: case Iex_Get:
713 return False;
714 default:
715 return True;
720 /* Do a complete function call. |guard| is a Ity_Bit expression
721 indicating whether or not the call happens. If guard==NULL, the
722 call is unconditional. |retloc| is set to indicate where the
723 return value is after the call. The caller (of this fn) must
724 generate code to add |stackAdjustAfterCall| to the stack pointer
725 after the call is done. */
727 static
728 void doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
729 /*OUT*/RetLoc* retloc,
730 ISelEnv* env,
731 IRExpr* guard,
732 IRCallee* cee, IRType retTy, IRExpr** args,
733 IREndness IEndianess)
735 PPCCondCode cc;
736 HReg argregs[PPC_N_REGPARMS];
737 HReg tmpregs[PPC_N_REGPARMS];
738 Bool go_fast;
739 Int n_args, i, argreg;
740 UInt argiregs;
741 Bool mode64 = env->mode64;
743 /* Set default returns. We'll update them later if needed. */
744 *stackAdjustAfterCall = 0;
745 *retloc = mk_RetLoc_INVALID();
747 /* These are used for cross-checking that IR-level constraints on
748 the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
749 UInt nVECRETs = 0;
750 UInt nGSPTRs = 0;
752 /* Marshal args for a call and do the call.
754 This function only deals with a tiny set of possibilities, which
755 cover all helpers in practice. The restrictions are that only
756 arguments in registers are supported, hence only PPC_N_REGPARMS x
757 (mode32:32 | mode64:64) integer bits in total can be passed.
758 In fact the only supported arg type is (mode32:I32 | mode64:I64).
760 The return type can be I{64,32,16,8} or V{128,256}. In the
761 latter two cases, it is expected that |args| will contain the
762 special node IRExpr_VECRET(), in which case this routine
763 generates code to allocate space on the stack for the vector
764 return value. Since we are not passing any scalars on the
765 stack, it is enough to preallocate the return space before
766 marshalling any arguments, in this case.
768 |args| may also contain IRExpr_GSPTR(), in which case the value
769 in the guest state pointer register is passed as the
770 corresponding argument.
772 Generating code which is both efficient and correct when
773 parameters are to be passed in registers is difficult, for the
774 reasons elaborated in detail in comments attached to
775 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
776 of the method described in those comments.
778 The problem is split into two cases: the fast scheme and the
779 slow scheme. In the fast scheme, arguments are computed
780 directly into the target (real) registers. This is only safe
781 when we can be sure that computation of each argument will not
782 trash any real registers set by computation of any other
783 argument.
785 In the slow scheme, all args are first computed into vregs, and
786 once they are all done, they are moved to the relevant real
787 regs. This always gives correct code, but it also gives a bunch
788 of vreg-to-rreg moves which are usually redundant but are hard
789 for the register allocator to get rid of.
791 To decide which scheme to use, all argument expressions are
792 first examined. If they are all so simple that it is clear they
793 will be evaluated without use of any fixed registers, use the
794 fast scheme, else use the slow scheme. Note also that only
795 unconditional calls may use the fast scheme, since having to
796 compute a condition expression could itself trash real
797 registers.
799 Note this requires being able to examine an expression and
800 determine whether or not evaluation of it might use a fixed
801 register. That requires knowledge of how the rest of this insn
802 selector works. Currently just the following 3 are regarded as
803 safe -- hopefully they cover the majority of arguments in
804 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
807 /* Note that the cee->regparms field is meaningless on PPC32/64 host
808 (since there is only one calling convention) and so we always
809 ignore it. */
811 n_args = 0;
812 for (i = 0; args[i]; i++)
813 n_args++;
815 if (n_args > PPC_N_REGPARMS) {
816 vpanic("doHelperCall(PPC): cannot currently handle > 8 args");
817 // PPC_N_REGPARMS
820 /* This is kind of stupid .. the arrays are sized as PPC_N_REGPARMS
821 but we then assume that that value is 8. */
822 vassert(PPC_N_REGPARMS == 8);
824 argregs[0] = hregPPC_GPR3(mode64);
825 argregs[1] = hregPPC_GPR4(mode64);
826 argregs[2] = hregPPC_GPR5(mode64);
827 argregs[3] = hregPPC_GPR6(mode64);
828 argregs[4] = hregPPC_GPR7(mode64);
829 argregs[5] = hregPPC_GPR8(mode64);
830 argregs[6] = hregPPC_GPR9(mode64);
831 argregs[7] = hregPPC_GPR10(mode64);
832 argiregs = 0;
834 tmpregs[0] = tmpregs[1] = tmpregs[2] =
835 tmpregs[3] = tmpregs[4] = tmpregs[5] =
836 tmpregs[6] = tmpregs[7] = INVALID_HREG;
838 /* First decide which scheme (slow or fast) is to be used. First
839 assume the fast scheme, and select slow if any contraindications
840 (wow) appear. */
842 go_fast = True;
844 /* We'll need space on the stack for the return value. Avoid
845 possible complications with nested calls by using the slow
846 scheme. */
847 if (retTy == Ity_V128 || retTy == Ity_V256)
848 go_fast = False;
850 if (go_fast && guard) {
851 if (guard->tag == Iex_Const
852 && guard->Iex.Const.con->tag == Ico_U1
853 && guard->Iex.Const.con->Ico.U1 == True) {
854 /* unconditional */
855 } else {
856 /* Not manifestly unconditional -- be conservative. */
857 go_fast = False;
861 if (go_fast) {
862 for (i = 0; i < n_args; i++) {
863 IRExpr* arg = args[i];
864 if (UNLIKELY(arg->tag == Iex_GSPTR)) {
865 /* that's OK */
867 else if (UNLIKELY(arg->tag == Iex_VECRET)) {
868 /* This implies ill-formed IR, since if the IR was
869 well-formed, the return-type test above would have
870 filtered it out. */
871 vpanic("doHelperCall(PPC): invalid IR");
873 else if (mightRequireFixedRegs(arg)) {
874 go_fast = False;
875 break;
880 /* At this point the scheme to use has been established. Generate
881 code to get the arg values into the argument rregs. */
883 if (go_fast) {
885 /* FAST SCHEME */
886 argreg = 0;
888 for (i = 0; i < n_args; i++) {
889 IRExpr* arg = args[i];
890 vassert(argreg < PPC_N_REGPARMS);
892 if (arg->tag == Iex_GSPTR) {
893 argiregs |= (1 << (argreg+3));
894 addInstr(env, mk_iMOVds_RR( argregs[argreg],
895 GuestStatePtr(mode64) ));
896 argreg++;
897 } else {
898 vassert(arg->tag != Iex_VECRET);
899 IRType ty = typeOfIRExpr(env->type_env, arg);
900 vassert(ty == Ity_I32 || ty == Ity_I64);
901 if (!mode64) {
902 if (ty == Ity_I32) {
903 argiregs |= (1 << (argreg+3));
904 addInstr(env,
905 mk_iMOVds_RR( argregs[argreg],
906 iselWordExpr_R(env, arg,
907 IEndianess) ));
908 } else { // Ity_I64 in 32-bit mode
909 HReg rHi, rLo;
910 if ((argreg%2) == 1)
911 // ppc32 ELF abi spec for passing LONG_LONG
912 argreg++; // XXX: odd argreg => even rN
913 vassert(argreg < PPC_N_REGPARMS-1);
914 iselInt64Expr(&rHi,&rLo, env, arg, IEndianess);
915 argiregs |= (1 << (argreg+3));
916 addInstr(env, mk_iMOVds_RR( argregs[argreg++], rHi ));
917 argiregs |= (1 << (argreg+3));
918 addInstr(env, mk_iMOVds_RR( argregs[argreg], rLo));
920 } else { // mode64
921 argiregs |= (1 << (argreg+3));
922 addInstr(env, mk_iMOVds_RR( argregs[argreg],
923 iselWordExpr_R(env, arg,
924 IEndianess) ));
926 argreg++;
927 } /* if (arg == IRExprP__BBPR) */
930 /* Fast scheme only applies for unconditional calls. Hence: */
931 cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
933 } else {
935 /* SLOW SCHEME; move via temporaries */
936 argreg = 0;
938 /* If we have a vector return type, allocate a place for it on
939 the stack and record its address. Rather than figure out the
940 complexities of PPC{32,64} ELF ABI stack frame layout, simply
941 drop the SP by 1024 and allocate the return point in the
942 middle. I think this should comfortably clear any ABI
943 mandated register save areas. Note that it doesn't maintain
944 the backchain as it should, since we're not doing st{d,w}u to
945 adjust the SP, but .. that doesn't seem to be a big deal.
946 Since we're not expecting to have to unwind out of here. */
947 HReg r_vecRetAddr = INVALID_HREG;
948 if (retTy == Ity_V128) {
949 r_vecRetAddr = newVRegI(env);
950 sub_from_sp(env, 512);
951 addInstr(env, mk_iMOVds_RR( r_vecRetAddr, StackFramePtr(mode64) ));
952 sub_from_sp(env, 512);
954 else if (retTy == Ity_V256) {
955 vassert(0); //ATC
956 r_vecRetAddr = newVRegI(env);
957 sub_from_sp(env, 512);
958 addInstr(env, mk_iMOVds_RR( r_vecRetAddr, StackFramePtr(mode64) ));
959 sub_from_sp(env, 512);
962 vassert(n_args >= 0 && n_args <= 8);
963 for (i = 0; i < n_args; i++) {
964 IRExpr* arg = args[i];
965 vassert(argreg < PPC_N_REGPARMS);
966 if (UNLIKELY(arg->tag == Iex_GSPTR)) {
967 tmpregs[argreg] = newVRegI(env);
968 addInstr(env, mk_iMOVds_RR( tmpregs[argreg],
969 GuestStatePtr(mode64) ));
970 nGSPTRs++;
972 else if (UNLIKELY(arg->tag == Iex_VECRET)) {
973 /* We stashed the address of the return slot earlier, so just
974 retrieve it now. */
975 vassert(!hregIsInvalid(r_vecRetAddr));
976 tmpregs[i] = r_vecRetAddr;
977 nVECRETs++;
979 else {
980 IRType ty = typeOfIRExpr(env->type_env, arg);
981 vassert(ty == Ity_I32 || ty == Ity_I64);
982 if (!mode64) {
983 if (ty == Ity_I32) {
984 tmpregs[argreg] = iselWordExpr_R(env, arg, IEndianess);
985 } else { // Ity_I64 in 32-bit mode
986 HReg rHi, rLo;
987 if ((argreg%2) == 1)
988 // ppc32 ELF abi spec for passing LONG_LONG
989 argreg++; // XXX: odd argreg => even rN
990 vassert(argreg < PPC_N_REGPARMS-1);
991 iselInt64Expr(&rHi,&rLo, env, arg, IEndianess);
992 tmpregs[argreg++] = rHi;
993 tmpregs[argreg] = rLo;
995 } else { // mode64
996 tmpregs[argreg] = iselWordExpr_R(env, arg, IEndianess);
999 argreg++;
1002 /* Now we can compute the condition. We can't do it earlier
1003 because the argument computations could trash the condition
1004 codes. Be a bit clever to handle the common case where the
1005 guard is 1:Bit. */
1006 cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
1007 if (guard) {
1008 if (guard->tag == Iex_Const
1009 && guard->Iex.Const.con->tag == Ico_U1
1010 && guard->Iex.Const.con->Ico.U1 == True) {
1011 /* unconditional -- do nothing */
1012 } else {
1013 cc = iselCondCode( env, guard, IEndianess );
1017 /* Move the args to their final destinations. */
1018 for (i = 0; i < argreg; i++) {
1019 if (hregIsInvalid(tmpregs[i])) // Skip invalid regs
1020 continue;
1021 /* None of these insns, including any spill code that might
1022 be generated, may alter the condition codes. */
1023 argiregs |= (1 << (i+3));
1024 addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
1029 /* Do final checks, set the return values, and generate the call
1030 instruction proper. */
1031 if (retTy == Ity_V128 || retTy == Ity_V256) {
1032 vassert(nVECRETs == 1);
1033 } else {
1034 vassert(nVECRETs == 0);
1037 vassert(nGSPTRs == 0 || nGSPTRs == 1);
1039 vassert(*stackAdjustAfterCall == 0);
1040 vassert(is_RetLoc_INVALID(*retloc));
1041 switch (retTy) {
1042 case Ity_INVALID:
1043 /* Function doesn't return a value. */
1044 *retloc = mk_RetLoc_simple(RLPri_None);
1045 break;
1046 case Ity_I64:
1047 *retloc = mk_RetLoc_simple(mode64 ? RLPri_Int : RLPri_2Int);
1048 break;
1049 case Ity_I32: case Ity_I16: case Ity_I8:
1050 *retloc = mk_RetLoc_simple(RLPri_Int);
1051 break;
1052 case Ity_V128:
1053 /* Result is 512 bytes up the stack, and after it has been
1054 retrieved, adjust SP upwards by 1024. */
1055 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 512);
1056 *stackAdjustAfterCall = 1024;
1057 break;
1058 case Ity_V256:
1059 vassert(0); // ATC
1060 /* Ditto */
1061 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 512);
1062 *stackAdjustAfterCall = 1024;
1063 break;
1064 default:
1065 /* IR can denote other possible return types, but we don't
1066 handle those here. */
1067 vassert(0);
1070 /* Finally, generate the call itself. This needs the *retloc value
1071 set in the switch above, which is why it's at the end. */
1073 Addr64 target = mode64 ? (Addr)cee->addr
1074 : toUInt((Addr)(cee->addr));
1075 addInstr(env, PPCInstr_Call( cc, target, argiregs, *retloc ));
1079 /*---------------------------------------------------------*/
1080 /*--- ISEL: FP rounding mode helpers ---*/
1081 /*---------------------------------------------------------*/
1083 ///* Set FPU's rounding mode to the default */
1084 //static
1085 //void set_FPU_rounding_default ( ISelEnv* env )
1087 // HReg fr_src = newVRegF(env);
1088 // HReg r_src = newVRegI(env);
1090 // /* Default rounding mode = 0x0
1091 // Only supporting the rounding-mode bits - the rest of FPSCR is 0x0
1092 // - so we can set the whole register at once (faster)
1093 // note: upper 32 bits ignored by FpLdFPSCR
1094 // */
1095 // addInstr(env, PPCInstr_LI(r_src, 0x0, env->mode64));
1096 // if (env->mode64) {
1097 // fr_src = mk_LoadR64toFPR( env, r_src ); // 1*I64 -> F64
1098 // } else {
1099 // fr_src = mk_LoadRR32toFPR( env, r_src, r_src ); // 2*I32 -> F64
1100 // }
1101 // addInstr(env, PPCInstr_FpLdFPSCR( fr_src ));
1104 /* Convert IR rounding mode to PPC encoding */
1105 static HReg roundModeIRtoPPC ( ISelEnv* env, HReg r_rmIR )
1108 rounding mode | PPC | IR
1109 -----------------------------------------------
1110 to nearest, ties to even | 000 | 000
1111 to zero | 001 | 011
1112 to +infinity | 010 | 010
1113 to -infinity | 011 | 001
1114 +++++ Below are the extended rounding modes for decimal floating point +++++
1115 to nearest, ties away from 0 | 100 | 100
1116 to nearest, ties toward 0 | 101 | 111
1117 to away from 0 | 110 | 110
1118 to prepare for shorter precision | 111 | 101
1120 HReg r_rmPPC = newVRegI(env);
1121 HReg r_tmp1 = newVRegI(env);
1122 HReg r_tmp2 = newVRegI(env);
1124 vassert(hregClass(r_rmIR) == HRcGPR(env->mode64));
1126 // r_rmPPC = XOR(r_rmIR, r_rmIR << 1) & 3
1128 // slwi tmp1, r_rmIR, 1
1129 // xor tmp1, r_rmIR, tmp1
1130 // andi r_rmPPC, tmp1, 3
1132 addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1133 r_tmp1, r_rmIR, PPCRH_Imm(False,1)));
1135 addInstr( env, PPCInstr_Alu( Palu_AND,
1136 r_tmp2, r_tmp1, PPCRH_Imm( False, 3 ) ) );
1138 addInstr( env, PPCInstr_Alu( Palu_XOR,
1139 r_rmPPC, r_rmIR, PPCRH_Reg( r_tmp2 ) ) );
1141 return r_rmPPC;
1145 /* Set the FPU's rounding mode: 'mode' is an I32-typed expression
1146 denoting a value in the range 0 .. 7, indicating a round mode
1147 encoded as per type IRRoundingMode. Set the PPC FPSCR to have the
1148 same rounding. When the dfp_rm arg is True, set the decimal
1149 floating point rounding mode bits (29:31); otherwise, set the
1150 binary floating point rounding mode bits (62:63).
1152 For speed & simplicity, we're setting the *entire* FPSCR here.
1154 Setting the rounding mode is expensive. So this function tries to
1155 avoid repeatedly setting the rounding mode to the same thing by
1156 first comparing 'mode' to the 'mode' tree supplied in the previous
1157 call to this function, if any. (The previous value is stored in
1158 env->previous_rm.) If 'mode' is a single IR temporary 't' and
1159 env->previous_rm is also just 't', then the setting is skipped.
1161 This is safe because of the SSA property of IR: an IR temporary can
1162 only be defined once and so will have the same value regardless of
1163 where it appears in the block. Cool stuff, SSA.
1165 A safety condition: all attempts to set the RM must be aware of
1166 this mechanism - by being routed through the functions here.
1168 Of course this only helps if blocks where the RM is set more than
1169 once and it is set to the same value each time, *and* that value is
1170 held in the same IR temporary each time. In order to assure the
1171 latter as much as possible, the IR optimiser takes care to do CSE
1172 on any block with any sign of floating point activity.
1174 static
1175 void _set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode, Bool dfp_rm,
1176 IREndness IEndianess )
1178 HReg fr_src = newVRegF(env);
1179 HReg r_src;
1181 vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
1183 /* Do we need to do anything? */
1184 if (env->previous_rm
1185 && env->previous_rm->tag == Iex_RdTmp
1186 && mode->tag == Iex_RdTmp
1187 && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
1188 /* no - setting it to what it was before. */
1189 vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
1190 return;
1193 /* No luck - we better set it, and remember what we set it to. */
1194 env->previous_rm = mode;
1196 /* Only supporting the rounding-mode bits - the rest of FPSCR is
1197 0x0 - so we can set the whole register at once (faster). */
1199 // Resolve rounding mode and convert to PPC representation
1200 r_src = roundModeIRtoPPC( env, iselWordExpr_R(env, mode, IEndianess) );
1202 // gpr -> fpr
1203 if (env->mode64) {
1204 if (dfp_rm) {
1205 HReg r_tmp1 = newVRegI( env );
1206 addInstr( env,
1207 PPCInstr_Shft( Pshft_SHL, False/*64bit shift*/,
1208 r_tmp1, r_src, PPCRH_Imm( False, 32 ) ) );
1209 fr_src = mk_LoadR64toFPR( env, r_tmp1 );
1210 } else {
1211 fr_src = mk_LoadR64toFPR( env, r_src ); // 1*I64 -> F64
1213 } else {
1214 if (dfp_rm) {
1215 HReg r_zero = newVRegI( env );
1216 addInstr( env, PPCInstr_LI( r_zero, 0, env->mode64 ) );
1217 fr_src = mk_LoadRR32toFPR( env, r_src, r_zero );
1218 } else {
1219 fr_src = mk_LoadRR32toFPR( env, r_src, r_src ); // 2*I32 -> F64
1223 // Move to FPSCR
1224 addInstr(env, PPCInstr_FpLdFPSCR( fr_src, dfp_rm ));
1227 static void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode,
1228 IREndness IEndianess )
1230 _set_FPU_rounding_mode(env, mode, False, IEndianess);
1233 static void set_FPU_DFP_rounding_mode ( ISelEnv* env, IRExpr* mode,
1234 IREndness IEndianess )
1236 _set_FPU_rounding_mode(env, mode, True, IEndianess);
1239 static
1240 Bool FPU_rounding_mode_isOdd (IRExpr* mode) {
1241 /* If the rounding mode is set to odd, the the expr must be a constant U8
1242 * value equal to 8. Otherwise, it must be a bin op expressiong that
1243 * calculates the value.
1246 if (mode->tag != Iex_Const)
1247 return False;
1249 vassert(mode->Iex.Const.con->tag == Ico_U32);
1250 vassert(mode->Iex.Const.con->Ico.U32 == 0x8);
1251 return True;
1254 /*---------------------------------------------------------*/
1255 /*--- ISEL: vector helpers ---*/
1256 /*---------------------------------------------------------*/
1258 /* Generate all-zeroes into a new vector register.
1260 static HReg generate_zeroes_V128 ( ISelEnv* env )
1262 HReg dst = newVRegV(env);
1263 addInstr(env, PPCInstr_AvBinary(Pav_XOR, dst, dst, dst));
1264 return dst;
1267 /* Generate all-ones into a new vector register.
1269 static HReg generate_ones_V128 ( ISelEnv* env )
1271 HReg dst = newVRegV(env);
1272 PPCVI5s * src = PPCVI5s_Imm(-1);
1273 addInstr(env, PPCInstr_AvSplat(8, dst, src));
1274 return dst;
1279 Generates code for AvSplat
1280 - takes in IRExpr* of type 8|16|32
1281 returns vector reg of duplicated lanes of input
1282 - uses AvSplat(imm) for imms up to simm6.
1283 otherwise must use store reg & load vector
1285 static HReg mk_AvDuplicateRI( ISelEnv* env, IRExpr* e, IREndness IEndianess )
1287 HReg r_src;
1288 HReg dst = newVRegV(env);
1289 PPCRI* ri = iselWordExpr_RI(env, e, IEndianess);
1290 IRType ty = typeOfIRExpr(env->type_env,e);
1291 UInt sz = (ty == Ity_I8) ? 8 : (ty == Ity_I16) ? 16 : 32;
1292 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
1294 /* special case: immediate */
1295 if (ri->tag == Pri_Imm) {
1296 Int simm32 = (Int)ri->Pri.Imm;
1298 /* figure out if it's do-able with imm splats. */
1299 if (simm32 >= -32 && simm32 <= 31) {
1300 Char simm6 = (Char)simm32;
1301 if (simm6 > 15) { /* 16:31 inclusive */
1302 HReg v1 = newVRegV(env);
1303 HReg v2 = newVRegV(env);
1304 addInstr(env, PPCInstr_AvSplat(sz, v1, PPCVI5s_Imm(-16)));
1305 addInstr(env, PPCInstr_AvSplat(sz, v2, PPCVI5s_Imm(simm6-16)));
1306 addInstr(env,
1307 (sz== 8) ? PPCInstr_AvBin8x16(Pav_SUBU, dst, v2, v1) :
1308 (sz==16) ? PPCInstr_AvBin16x8(Pav_SUBU, dst, v2, v1)
1309 : PPCInstr_AvBin32x4(Pav_SUBU, dst, v2, v1) );
1310 return dst;
1312 if (simm6 < -16) { /* -32:-17 inclusive */
1313 HReg v1 = newVRegV(env);
1314 HReg v2 = newVRegV(env);
1315 addInstr(env, PPCInstr_AvSplat(sz, v1, PPCVI5s_Imm(-16)));
1316 addInstr(env, PPCInstr_AvSplat(sz, v2, PPCVI5s_Imm(simm6+16)));
1317 addInstr(env,
1318 (sz== 8) ? PPCInstr_AvBin8x16(Pav_ADDU, dst, v2, v1) :
1319 (sz==16) ? PPCInstr_AvBin16x8(Pav_ADDU, dst, v2, v1)
1320 : PPCInstr_AvBin32x4(Pav_ADDU, dst, v2, v1) );
1321 return dst;
1323 /* simplest form: -16:15 inclusive */
1324 addInstr(env, PPCInstr_AvSplat(sz, dst, PPCVI5s_Imm(simm6)));
1325 return dst;
1328 /* no luck; use the Slow way. */
1329 r_src = newVRegI(env);
1330 addInstr(env, PPCInstr_LI(r_src, (Long)simm32, env->mode64));
1332 else {
1333 r_src = ri->Pri.Reg;
1337 /* Store r_src multiple times (sz dependent); then load the dest vector. */
1338 HReg r_aligned16;
1339 PPCAMode *am_offset, *am_offset_zero;
1341 sub_from_sp( env, 32 ); // Move SP down
1342 /* Get a 16-aligned address within our stack space */
1343 r_aligned16 = get_sp_aligned16( env );
1345 Int i;
1346 Int stride = (sz == 8) ? 1 : (sz == 16) ? 2 : 4;
1347 UChar num_bytes_to_store = stride;
1348 am_offset_zero = PPCAMode_IR( 0, r_aligned16 );
1349 am_offset = am_offset_zero;
1350 for (i = 0; i < 16; i+=stride, am_offset = PPCAMode_IR( i, r_aligned16)) {
1351 addInstr(env, PPCInstr_Store( num_bytes_to_store, am_offset, r_src, env->mode64 ));
1354 /* Effectively splat the r_src value to dst */
1355 addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 16, dst, am_offset_zero ) );
1356 add_to_sp( env, 32 ); // Reset SP
1358 return dst;
1363 /* for each lane of vSrc: lane == nan ? laneX = all 1's : all 0's */
1364 static HReg isNan ( ISelEnv* env, HReg vSrc, IREndness IEndianess )
1366 HReg zeros, msk_exp, msk_mnt, expt, mnts, vIsNan;
1368 vassert(hregClass(vSrc) == HRcVec128);
1370 zeros = mk_AvDuplicateRI(env, mkU32(0), IEndianess);
1371 msk_exp = mk_AvDuplicateRI(env, mkU32(0x7F800000), IEndianess);
1372 msk_mnt = mk_AvDuplicateRI(env, mkU32(0x7FFFFF), IEndianess);
1373 expt = newVRegV(env);
1374 mnts = newVRegV(env);
1375 vIsNan = newVRegV(env);
1377 /* 32bit float => sign(1) | exponent(8) | mantissa(23)
1378 nan => exponent all ones, mantissa > 0 */
1380 addInstr(env, PPCInstr_AvBinary(Pav_AND, expt, vSrc, msk_exp));
1381 addInstr(env, PPCInstr_AvBin32x4(Pav_CMPEQU, expt, expt, msk_exp));
1382 addInstr(env, PPCInstr_AvBinary(Pav_AND, mnts, vSrc, msk_mnt));
1383 addInstr(env, PPCInstr_AvBin32x4(Pav_CMPGTU, mnts, mnts, zeros));
1384 addInstr(env, PPCInstr_AvBinary(Pav_AND, vIsNan, expt, mnts));
1385 return vIsNan;
1389 /*---------------------------------------------------------*/
1390 /*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/
1391 /*---------------------------------------------------------*/
1393 /* Select insns for an integer-typed expression, and add them to the
1394 code list. Return a reg holding the result. This reg will be a
1395 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
1396 want to modify it, ask for a new vreg, copy it in there, and modify
1397 the copy. The register allocator will do its best to map both
1398 vregs to the same real register, so the copies will often disappear
1399 later in the game.
1401 This should handle expressions of 64, 32, 16 and 8-bit type.
1402 All results are returned in a (mode64 ? 64bit : 32bit) register.
1403 For 16- and 8-bit expressions, the upper (32/48/56 : 16/24) bits
1404 are arbitrary, so you should mask or sign extend partial values
1405 if necessary.
1408 static HReg iselWordExpr_R ( ISelEnv* env, const IRExpr* e,
1409 IREndness IEndianess )
1411 HReg r = iselWordExpr_R_wrk(env, e, IEndianess);
1412 /* sanity checks ... */
1413 # if 0
1414 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1415 # endif
1417 vassert(hregClass(r) == HRcGPR(env->mode64));
1418 vassert(hregIsVirtual(r));
1419 return r;
1422 /* DO NOT CALL THIS DIRECTLY ! */
1423 static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
1424 IREndness IEndianess )
1426 Bool mode64 = env->mode64;
1427 MatchInfo mi;
1428 DECLARE_PATTERN(p_32to1_then_1Uto8);
1430 IRType ty = typeOfIRExpr(env->type_env,e);
1431 vassert(ty == Ity_I8 || ty == Ity_I16 ||
1432 ty == Ity_I32 || ((ty == Ity_I64) && mode64));
1434 switch (e->tag) {
1436 /* --------- TEMP --------- */
1437 case Iex_RdTmp:
1438 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1440 /* --------- LOAD --------- */
1441 case Iex_Load: {
1442 HReg r_dst;
1443 PPCAMode* am_addr;
1444 if (e->Iex.Load.end != IEndianess)
1445 goto irreducible;
1446 r_dst = newVRegI(env);
1447 am_addr = iselWordExpr_AMode( env, e->Iex.Load.addr, ty/*of xfer*/,
1448 IEndianess );
1449 addInstr(env, PPCInstr_Load( toUChar(sizeofIRType(ty)),
1450 r_dst, am_addr, mode64 ));
1451 return r_dst;
1452 /*NOTREACHED*/
1455 /* --------- BINARY OP --------- */
1456 case Iex_Binop: {
1457 PPCAluOp aluOp;
1458 PPCShftOp shftOp;
1460 /* Is it an addition or logical style op? */
1461 switch (e->Iex.Binop.op) {
1462 case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
1463 aluOp = Palu_ADD; break;
1464 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
1465 aluOp = Palu_SUB; break;
1466 case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
1467 aluOp = Palu_AND; break;
1468 case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64:
1469 aluOp = Palu_OR; break;
1470 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
1471 aluOp = Palu_XOR; break;
1472 default:
1473 aluOp = Palu_INVALID; break;
1475 /* For commutative ops we assume any literal
1476 values are on the second operand. */
1477 if (aluOp != Palu_INVALID) {
1478 HReg r_dst = newVRegI(env);
1479 HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1480 PPCRH* ri_srcR = NULL;
1481 /* get right arg into an RH, in the appropriate way */
1482 switch (aluOp) {
1483 case Palu_ADD: case Palu_SUB:
1484 ri_srcR = iselWordExpr_RH(env, True/*signed*/,
1485 e->Iex.Binop.arg2, IEndianess);
1486 break;
1487 case Palu_AND: case Palu_OR: case Palu_XOR:
1488 ri_srcR = iselWordExpr_RH(env, False/*signed*/,
1489 e->Iex.Binop.arg2, IEndianess);
1490 break;
1491 default:
1492 vpanic("iselWordExpr_R_wrk-aluOp-arg2");
1494 addInstr(env, PPCInstr_Alu(aluOp, r_dst, r_srcL, ri_srcR));
1495 return r_dst;
1498 /* a shift? */
1499 switch (e->Iex.Binop.op) {
1500 case Iop_Shl8: case Iop_Shl16: case Iop_Shl32: case Iop_Shl64:
1501 shftOp = Pshft_SHL; break;
1502 case Iop_Shr8: case Iop_Shr16: case Iop_Shr32: case Iop_Shr64:
1503 shftOp = Pshft_SHR; break;
1504 case Iop_Sar8: case Iop_Sar16: case Iop_Sar32: case Iop_Sar64:
1505 shftOp = Pshft_SAR; break;
1506 default:
1507 shftOp = Pshft_INVALID; break;
1509 /* we assume any literal values are on the second operand. */
1510 if (shftOp != Pshft_INVALID) {
1511 HReg r_dst = newVRegI(env);
1512 HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1513 PPCRH* ri_srcR = NULL;
1514 /* get right arg into an RH, in the appropriate way */
1515 switch (shftOp) {
1516 case Pshft_SHL: case Pshft_SHR: case Pshft_SAR:
1517 if (!mode64)
1518 ri_srcR = iselWordExpr_RH5u(env, e->Iex.Binop.arg2, IEndianess);
1519 else
1520 ri_srcR = iselWordExpr_RH6u(env, e->Iex.Binop.arg2, IEndianess);
1521 break;
1522 default:
1523 vpanic("iselIntExpr_R_wrk-shftOp-arg2");
1525 /* widen the left arg if needed */
1526 if (shftOp == Pshft_SHR || shftOp == Pshft_SAR) {
1527 if (ty == Ity_I8 || ty == Ity_I16) {
1528 PPCRH* amt = PPCRH_Imm(False,
1529 toUShort(ty == Ity_I8 ? 24 : 16));
1530 HReg tmp = newVRegI(env);
1531 addInstr(env, PPCInstr_Shft(Pshft_SHL,
1532 True/*32bit shift*/,
1533 tmp, r_srcL, amt));
1534 addInstr(env, PPCInstr_Shft(shftOp,
1535 True/*32bit shift*/,
1536 tmp, tmp, amt));
1537 r_srcL = tmp;
1540 /* Only 64 expressions need 64bit shifts,
1541 32bit shifts are fine for all others */
1542 if (ty == Ity_I64) {
1543 vassert(mode64);
1544 addInstr(env, PPCInstr_Shft(shftOp, False/*64bit shift*/,
1545 r_dst, r_srcL, ri_srcR));
1546 } else {
1547 addInstr(env, PPCInstr_Shft(shftOp, True/*32bit shift*/,
1548 r_dst, r_srcL, ri_srcR));
1550 return r_dst;
1553 /* How about a div? */
1554 if (e->Iex.Binop.op == Iop_DivS32 ||
1555 e->Iex.Binop.op == Iop_DivU32 ||
1556 e->Iex.Binop.op == Iop_DivS32E ||
1557 e->Iex.Binop.op == Iop_DivU32E) {
1558 Bool syned = toBool((e->Iex.Binop.op == Iop_DivS32) || (e->Iex.Binop.op == Iop_DivS32E));
1559 HReg r_dst = newVRegI(env);
1560 HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1561 HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1562 addInstr( env,
1563 PPCInstr_Div( ( ( e->Iex.Binop.op == Iop_DivU32E )
1564 || ( e->Iex.Binop.op == Iop_DivS32E ) ) ? True
1565 : False,
1566 syned,
1567 True/*32bit div*/,
1568 r_dst,
1569 r_srcL,
1570 r_srcR ) );
1571 return r_dst;
1573 if (e->Iex.Binop.op == Iop_DivS64 ||
1574 e->Iex.Binop.op == Iop_DivU64 || e->Iex.Binop.op == Iop_DivS64E
1575 || e->Iex.Binop.op == Iop_DivU64E ) {
1576 Bool syned = toBool((e->Iex.Binop.op == Iop_DivS64) ||(e->Iex.Binop.op == Iop_DivS64E));
1577 HReg r_dst = newVRegI(env);
1578 HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1579 HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1580 vassert(mode64);
1581 addInstr( env,
1582 PPCInstr_Div( ( ( e->Iex.Binop.op == Iop_DivS64E )
1583 || ( e->Iex.Binop.op
1584 == Iop_DivU64E ) ) ? True
1585 : False,
1586 syned,
1587 False/*64bit div*/,
1588 r_dst,
1589 r_srcL,
1590 r_srcR ) );
1591 return r_dst;
1594 /* No? Anyone for a mul? */
1595 if (e->Iex.Binop.op == Iop_Mul32
1596 || e->Iex.Binop.op == Iop_Mul64) {
1597 Bool syned = False;
1598 Bool sz32 = (e->Iex.Binop.op != Iop_Mul64);
1599 HReg r_dst = newVRegI(env);
1600 HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1601 HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1602 addInstr(env, PPCInstr_MulL(syned, False/*lo32*/, sz32,
1603 r_dst, r_srcL, r_srcR));
1604 return r_dst;
1607 /* 32 x 32 -> 64 multiply */
1608 if (mode64
1609 && (e->Iex.Binop.op == Iop_MullU32
1610 || e->Iex.Binop.op == Iop_MullS32)) {
1611 HReg tLo = newVRegI(env);
1612 HReg tHi = newVRegI(env);
1613 HReg r_dst = newVRegI(env);
1614 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS32);
1615 HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1616 HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1617 addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/,
1618 False/*lo32*/, True/*32bit mul*/,
1619 tLo, r_srcL, r_srcR));
1620 addInstr(env, PPCInstr_MulL(syned,
1621 True/*hi32*/, True/*32bit mul*/,
1622 tHi, r_srcL, r_srcR));
1623 addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1624 r_dst, tHi, PPCRH_Imm(False,32)));
1625 addInstr(env, PPCInstr_Alu(Palu_OR,
1626 r_dst, r_dst, PPCRH_Reg(tLo)));
1627 return r_dst;
1630 /* El-mutanto 3-way compare? */
1631 if (e->Iex.Binop.op == Iop_CmpORD32S
1632 || e->Iex.Binop.op == Iop_CmpORD32U) {
1633 Bool syned = toBool(e->Iex.Binop.op == Iop_CmpORD32S);
1634 HReg dst = newVRegI(env);
1635 HReg srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1636 PPCRH* srcR = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2,
1637 IEndianess);
1638 addInstr(env, PPCInstr_Cmp(syned, True/*32bit cmp*/,
1639 7/*cr*/, srcL, srcR));
1640 addInstr(env, PPCInstr_MfCR(dst));
1641 addInstr(env, PPCInstr_Alu(Palu_AND, dst, dst,
1642 PPCRH_Imm(False,7<<1)));
1643 return dst;
1646 if (e->Iex.Binop.op == Iop_CmpORD64S
1647 || e->Iex.Binop.op == Iop_CmpORD64U) {
1648 Bool syned = toBool(e->Iex.Binop.op == Iop_CmpORD64S);
1649 HReg dst = newVRegI(env);
1650 HReg srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1651 PPCRH* srcR = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2,
1652 IEndianess);
1653 vassert(mode64);
1654 addInstr(env, PPCInstr_Cmp(syned, False/*64bit cmp*/,
1655 7/*cr*/, srcL, srcR));
1656 addInstr(env, PPCInstr_MfCR(dst));
1657 addInstr(env, PPCInstr_Alu(Palu_AND, dst, dst,
1658 PPCRH_Imm(False,7<<1)));
1659 return dst;
1662 if (e->Iex.Binop.op == Iop_Max32U) {
1663 HReg r1 = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1664 HReg r2 = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1665 HReg rdst = newVRegI(env);
1666 PPCCondCode cc = mk_PPCCondCode( Pct_TRUE, Pcf_7LT );
1667 addInstr(env, mk_iMOVds_RR(rdst, r1));
1668 addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
1669 7/*cr*/, rdst, PPCRH_Reg(r2)));
1670 addInstr(env, PPCInstr_CMov(cc, rdst, PPCRI_Reg(r2)));
1671 return rdst;
1674 if (e->Iex.Binop.op == Iop_32HLto64) {
1675 HReg r_Hi = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1676 HReg r_Lo = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1677 HReg r_Tmp = newVRegI(env);
1678 HReg r_dst = newVRegI(env);
1679 HReg msk = newVRegI(env);
1680 vassert(mode64);
1681 /* r_dst = OR( r_Hi<<32, r_Lo ) */
1682 addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1683 r_dst, r_Hi, PPCRH_Imm(False,32)));
1684 addInstr(env, PPCInstr_LI(msk, 0xFFFFFFFF, mode64));
1685 addInstr(env, PPCInstr_Alu( Palu_AND, r_Tmp, r_Lo,
1686 PPCRH_Reg(msk) ));
1687 addInstr(env, PPCInstr_Alu( Palu_OR, r_dst, r_dst,
1688 PPCRH_Reg(r_Tmp) ));
1689 return r_dst;
1692 if ((e->Iex.Binop.op == Iop_CmpF64) ||
1693 (e->Iex.Binop.op == Iop_CmpD64) ||
1694 (e->Iex.Binop.op == Iop_CmpD128)) {
1695 HReg fr_srcL;
1696 HReg fr_srcL_lo;
1697 HReg fr_srcR;
1698 HReg fr_srcR_lo;
1700 HReg r_ccPPC = newVRegI(env);
1701 HReg r_ccIR = newVRegI(env);
1702 HReg r_ccIR_b0 = newVRegI(env);
1703 HReg r_ccIR_b2 = newVRegI(env);
1704 HReg r_ccIR_b6 = newVRegI(env);
1706 if (e->Iex.Binop.op == Iop_CmpF64) {
1707 fr_srcL = iselDblExpr(env, e->Iex.Binop.arg1, IEndianess);
1708 fr_srcR = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
1709 addInstr(env, PPCInstr_FpCmp(r_ccPPC, fr_srcL, fr_srcR));
1711 } else if (e->Iex.Binop.op == Iop_CmpD64) {
1712 fr_srcL = iselDfp64Expr(env, e->Iex.Binop.arg1, IEndianess);
1713 fr_srcR = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
1714 addInstr(env, PPCInstr_Dfp64Cmp(r_ccPPC, fr_srcL, fr_srcR));
1716 } else { // e->Iex.Binop.op == Iop_CmpD128
1717 iselDfp128Expr(&fr_srcL, &fr_srcL_lo, env, e->Iex.Binop.arg1,
1718 IEndianess);
1719 iselDfp128Expr(&fr_srcR, &fr_srcR_lo, env, e->Iex.Binop.arg2,
1720 IEndianess);
1721 addInstr(env, PPCInstr_Dfp128Cmp(r_ccPPC, fr_srcL, fr_srcL_lo,
1722 fr_srcR, fr_srcR_lo));
1725 /* Map compare result from PPC to IR,
1726 conforming to CmpF64 definition. */
1728 FP cmp result | PPC | IR
1729 --------------------------
1730 UN | 0x1 | 0x45
1731 EQ | 0x2 | 0x40
1732 GT | 0x4 | 0x00
1733 LT | 0x8 | 0x01
1736 // r_ccIR_b0 = r_ccPPC[0] | r_ccPPC[3]
1737 addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32bit shift*/,
1738 r_ccIR_b0, r_ccPPC,
1739 PPCRH_Imm(False,0x3)));
1740 addInstr(env, PPCInstr_Alu(Palu_OR, r_ccIR_b0,
1741 r_ccPPC, PPCRH_Reg(r_ccIR_b0)));
1742 addInstr(env, PPCInstr_Alu(Palu_AND, r_ccIR_b0,
1743 r_ccIR_b0, PPCRH_Imm(False,0x1)));
1745 // r_ccIR_b2 = r_ccPPC[0]
1746 addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1747 r_ccIR_b2, r_ccPPC,
1748 PPCRH_Imm(False,0x2)));
1749 addInstr(env, PPCInstr_Alu(Palu_AND, r_ccIR_b2,
1750 r_ccIR_b2, PPCRH_Imm(False,0x4)));
1752 // r_ccIR_b6 = r_ccPPC[0] | r_ccPPC[1]
1753 addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32bit shift*/,
1754 r_ccIR_b6, r_ccPPC,
1755 PPCRH_Imm(False,0x1)));
1756 addInstr(env, PPCInstr_Alu(Palu_OR, r_ccIR_b6,
1757 r_ccPPC, PPCRH_Reg(r_ccIR_b6)));
1758 addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1759 r_ccIR_b6, r_ccIR_b6,
1760 PPCRH_Imm(False,0x6)));
1761 addInstr(env, PPCInstr_Alu(Palu_AND, r_ccIR_b6,
1762 r_ccIR_b6, PPCRH_Imm(False,0x40)));
1764 // r_ccIR = r_ccIR_b0 | r_ccIR_b2 | r_ccIR_b6
1765 addInstr(env, PPCInstr_Alu(Palu_OR, r_ccIR,
1766 r_ccIR_b0, PPCRH_Reg(r_ccIR_b2)));
1767 addInstr(env, PPCInstr_Alu(Palu_OR, r_ccIR,
1768 r_ccIR, PPCRH_Reg(r_ccIR_b6)));
1769 return r_ccIR;
1772 if ( e->Iex.Binop.op == Iop_F64toI32S ||
1773 e->Iex.Binop.op == Iop_F64toI32U ) {
1774 /* This works in both mode64 and mode32. */
1775 HReg r1 = StackFramePtr(env->mode64);
1776 PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
1777 HReg fsrc = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
1778 HReg ftmp = newVRegF(env);
1779 HReg idst = newVRegI(env);
1781 /* Set host rounding mode */
1782 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
1784 sub_from_sp( env, 16 );
1785 addInstr(env, PPCInstr_FpCftI(False/*F->I*/, True/*int32*/,
1786 e->Iex.Binop.op == Iop_F64toI32S ? True/*syned*/
1787 : False,
1788 True/*flt64*/,
1789 ftmp, fsrc));
1790 addInstr(env, PPCInstr_FpSTFIW(r1, ftmp));
1791 addInstr(env, PPCInstr_Load(4, idst, zero_r1, mode64));
1793 /* in 64-bit mode we need to sign-widen idst. */
1794 if (mode64)
1795 addInstr(env, PPCInstr_Unary(Pun_EXTSW, idst, idst));
1797 add_to_sp( env, 16 );
1799 ///* Restore default FPU rounding. */
1800 //set_FPU_rounding_default( env );
1801 return idst;
1804 if (e->Iex.Binop.op == Iop_F64toI64S || e->Iex.Binop.op == Iop_F64toI64U ) {
1805 if (mode64) {
1806 HReg r1 = StackFramePtr(env->mode64);
1807 PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
1808 HReg fsrc = iselDblExpr(env, e->Iex.Binop.arg2,
1809 IEndianess);
1810 HReg idst = newVRegI(env);
1811 HReg ftmp = newVRegF(env);
1813 /* Set host rounding mode */
1814 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
1816 sub_from_sp( env, 16 );
1817 addInstr(env, PPCInstr_FpCftI(False/*F->I*/, False/*int64*/,
1818 ( e->Iex.Binop.op == Iop_F64toI64S ) ? True
1819 : False,
1820 True, ftmp, fsrc));
1821 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
1822 addInstr(env, PPCInstr_Load(8, idst, zero_r1, True/*mode64*/));
1823 add_to_sp( env, 16 );
1825 ///* Restore default FPU rounding. */
1826 //set_FPU_rounding_default( env );
1827 return idst;
1831 if (e->Iex.Binop.op == Iop_D64toI64S ) {
1832 HReg r1 = StackFramePtr(env->mode64);
1833 PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
1834 HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
1835 HReg idst = newVRegI(env);
1836 HReg ftmp = newVRegF(env);
1838 /* Set host rounding mode */
1839 set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
1840 addInstr(env, PPCInstr_Dfp64Unary(Pfp_DCTFIX, ftmp, fr_src));
1841 sub_from_sp( env, 16 );
1842 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
1843 addInstr(env, PPCInstr_Load(8, idst, zero_r1, mode64));
1845 add_to_sp( env, 16 );
1847 ///* Restore default FPU rounding. */
1848 //set_FPU_rounding_default( env );
1849 return idst;
1852 if (e->Iex.Binop.op == Iop_D128toI64S ) {
1853 PPCFpOp fpop = Pfp_DCTFIXQ;
1854 HReg r_srcHi = newVRegF(env);
1855 HReg r_srcLo = newVRegF(env);
1856 HReg idst = newVRegI(env);
1857 HReg ftmp = newVRegF(env);
1858 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
1860 set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
1861 iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
1862 IEndianess);
1863 addInstr(env, PPCInstr_DfpD128toD64(fpop, ftmp, r_srcHi, r_srcLo));
1865 // put the D64 result into an integer register
1866 sub_from_sp( env, 16 );
1867 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
1868 addInstr(env, PPCInstr_Load(8, idst, zero_r1, True/*mode64*/));
1869 add_to_sp( env, 16 );
1870 return idst;
1872 break;
1875 /* --------- UNARY OP --------- */
1876 case Iex_Unop: {
1877 IROp op_unop = e->Iex.Unop.op;
1879 /* 1Uto8(32to1(expr32)) */
1880 DEFINE_PATTERN(p_32to1_then_1Uto8,
1881 unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1882 if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1883 const IRExpr* expr32 = mi.bindee[0];
1884 HReg r_dst = newVRegI(env);
1885 HReg r_src = iselWordExpr_R(env, expr32, IEndianess);
1886 addInstr(env, PPCInstr_Alu(Palu_AND, r_dst,
1887 r_src, PPCRH_Imm(False,1)));
1888 return r_dst;
1891 /* 16Uto32(LDbe:I16(expr32)) */
1893 DECLARE_PATTERN(p_LDbe16_then_16Uto32);
1894 DEFINE_PATTERN(p_LDbe16_then_16Uto32,
1895 unop(Iop_16Uto32,
1896 IRExpr_Load(IEndianess,Ity_I16,bind(0))) );
1897 if (matchIRExpr(&mi,p_LDbe16_then_16Uto32,e)) {
1898 HReg r_dst = newVRegI(env);
1899 PPCAMode* amode
1900 = iselWordExpr_AMode( env, mi.bindee[0], Ity_I16/*xfer*/,
1901 IEndianess );
1902 addInstr(env, PPCInstr_Load(2,r_dst,amode, mode64));
1903 return r_dst;
1907 switch (op_unop) {
1908 case Iop_8Uto16:
1909 case Iop_8Uto32:
1910 case Iop_8Uto64:
1911 case Iop_16Uto32:
1912 case Iop_16Uto64: {
1913 HReg r_dst = newVRegI(env);
1914 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1915 UShort mask = toUShort(op_unop==Iop_16Uto64 ? 0xFFFF :
1916 op_unop==Iop_16Uto32 ? 0xFFFF : 0xFF);
1917 addInstr(env, PPCInstr_Alu(Palu_AND,r_dst,r_src,
1918 PPCRH_Imm(False,mask)));
1919 return r_dst;
1921 case Iop_32Uto64: {
1922 HReg r_dst = newVRegI(env);
1923 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1924 vassert(mode64);
1925 addInstr(env,
1926 PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1927 r_dst, r_src, PPCRH_Imm(False,32)));
1928 addInstr(env,
1929 PPCInstr_Shft(Pshft_SHR, False/*64bit shift*/,
1930 r_dst, r_dst, PPCRH_Imm(False,32)));
1931 return r_dst;
1933 case Iop_8Sto16:
1934 case Iop_8Sto32:
1935 case Iop_16Sto32: {
1936 HReg r_dst = newVRegI(env);
1937 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1938 UShort amt = toUShort(op_unop==Iop_16Sto32 ? 16 : 24);
1939 addInstr(env,
1940 PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1941 r_dst, r_src, PPCRH_Imm(False,amt)));
1942 addInstr(env,
1943 PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
1944 r_dst, r_dst, PPCRH_Imm(False,amt)));
1945 return r_dst;
1947 case Iop_8Sto64:
1948 case Iop_16Sto64: {
1949 HReg r_dst = newVRegI(env);
1950 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1951 UShort amt = toUShort(op_unop==Iop_8Sto64 ? 56 : 48);
1952 vassert(mode64);
1953 addInstr(env,
1954 PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1955 r_dst, r_src, PPCRH_Imm(False,amt)));
1956 addInstr(env,
1957 PPCInstr_Shft(Pshft_SAR, False/*64bit shift*/,
1958 r_dst, r_dst, PPCRH_Imm(False,amt)));
1959 return r_dst;
1961 case Iop_32Sto64: {
1962 HReg r_dst = newVRegI(env);
1963 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1964 vassert(mode64);
1965 /* According to the IBM docs, in 64 bit mode, srawi r,r,0
1966 sign extends the lower 32 bits into the upper 32 bits. */
1967 addInstr(env,
1968 PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
1969 r_dst, r_src, PPCRH_Imm(False,0)));
1970 return r_dst;
1972 case Iop_Not8:
1973 case Iop_Not16:
1974 case Iop_Not32:
1975 case Iop_Not64: {
1976 if (op_unop == Iop_Not64) vassert(mode64);
1977 HReg r_dst = newVRegI(env);
1978 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1979 addInstr(env, PPCInstr_Unary(Pun_NOT,r_dst,r_src));
1980 return r_dst;
1982 case Iop_64HIto32: {
1983 if (!mode64) {
1984 HReg rHi, rLo;
1985 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
1986 return rHi; /* and abandon rLo .. poor wee thing :-) */
1987 } else {
1988 HReg r_dst = newVRegI(env);
1989 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1990 addInstr(env,
1991 PPCInstr_Shft(Pshft_SHR, False/*64bit shift*/,
1992 r_dst, r_src, PPCRH_Imm(False,32)));
1993 return r_dst;
1996 case Iop_64to32: {
1997 if (!mode64) {
1998 HReg rHi, rLo;
1999 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
2000 return rLo; /* similar stupid comment to the above ... */
2001 } else {
2002 /* This is a no-op. */
2003 return iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2006 case Iop_64to16: {
2007 if (mode64) { /* This is a no-op. */
2008 return iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2010 break; /* evidently not used in 32-bit mode */
2012 case Iop_16HIto8:
2013 case Iop_32HIto16: {
2014 HReg r_dst = newVRegI(env);
2015 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2016 UShort shift = toUShort(op_unop == Iop_16HIto8 ? 8 : 16);
2017 addInstr(env,
2018 PPCInstr_Shft(Pshft_SHR, True/*32bit shift*/,
2019 r_dst, r_src, PPCRH_Imm(False,shift)));
2020 return r_dst;
2022 case Iop_128HIto64:
2023 if (mode64) {
2024 HReg rHi, rLo;
2025 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
2026 return rHi; /* and abandon rLo .. poor wee thing :-) */
2028 break;
2029 case Iop_128to64:
2030 if (mode64) {
2031 HReg rHi, rLo;
2032 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
2033 return rLo; /* similar stupid comment to the above ... */
2035 break;
2036 case Iop_1Uto64:
2037 case Iop_1Uto32:
2038 case Iop_1Uto8:
2039 if ((op_unop != Iop_1Uto64) || mode64) {
2040 HReg r_dst = newVRegI(env);
2041 PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
2042 addInstr(env, PPCInstr_Set(cond,r_dst));
2043 return r_dst;
2045 break;
2046 case Iop_1Sto8:
2047 case Iop_1Sto16:
2048 case Iop_1Sto32: {
2049 /* could do better than this, but for now ... */
2050 HReg r_dst = newVRegI(env);
2051 PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
2052 addInstr(env, PPCInstr_Set(cond,r_dst));
2053 addInstr(env,
2054 PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
2055 r_dst, r_dst, PPCRH_Imm(False,31)));
2056 addInstr(env,
2057 PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
2058 r_dst, r_dst, PPCRH_Imm(False,31)));
2059 return r_dst;
2061 case Iop_1Sto64:
2062 if (mode64) {
2063 /* could do better than this, but for now ... */
2064 HReg r_dst = newVRegI(env);
2065 PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
2066 addInstr(env, PPCInstr_Set(cond,r_dst));
2067 addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
2068 r_dst, r_dst, PPCRH_Imm(False,63)));
2069 addInstr(env, PPCInstr_Shft(Pshft_SAR, False/*64bit shift*/,
2070 r_dst, r_dst, PPCRH_Imm(False,63)));
2071 return r_dst;
2073 break;
2075 case Iop_Clz32: case Iop_ClzNat32:
2076 case Iop_Clz64: case Iop_ClzNat64: {
2077 // cntlz is available even in the most basic (earliest) ppc
2078 // variants, so it's safe to generate it unconditionally.
2079 HReg r_src, r_dst;
2080 PPCUnaryOp op_clz = (op_unop == Iop_Clz32 || op_unop == Iop_ClzNat32)
2081 ? Pun_CLZ32 : Pun_CLZ64;
2082 if ((op_unop == Iop_Clz64 || op_unop == Iop_ClzNat64) && !mode64)
2083 goto irreducible;
2084 /* Count leading zeroes. */
2085 r_dst = newVRegI(env);
2086 r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2087 addInstr(env, PPCInstr_Unary(op_clz,r_dst,r_src));
2088 return r_dst;
2091 //case Iop_Ctz32:
2092 case Iop_CtzNat32:
2093 //case Iop_Ctz64:
2094 case Iop_CtzNat64:
2096 // Generate code using Clz, because we can't assume the host has
2097 // Ctz. In particular, part of the fix for bug 386945 involves
2098 // creating a Ctz in ir_opt.c from smaller fragments.
2099 PPCUnaryOp op_clz = Pun_CLZ64;
2100 Int WS = 64;
2101 if (op_unop == Iop_Ctz32 || op_unop == Iop_CtzNat32) {
2102 op_clz = Pun_CLZ32;
2103 WS = 32;
2105 /* Compute ctz(arg) = wordsize - clz(~arg & (arg - 1)), thusly:
2106 t1 = arg - 1
2107 t2 = not arg
2108 t2 = t2 & t1
2109 t2 = clz t2
2110 t1 = WS
2111 t2 = t1 - t2
2112 // result in t2
2114 HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2115 HReg t1 = newVRegI(env);
2116 HReg t2 = newVRegI(env);
2117 addInstr(env, PPCInstr_Alu(Palu_SUB, t1, arg, PPCRH_Imm(True, 1)));
2118 addInstr(env, PPCInstr_Unary(Pun_NOT, t2, arg));
2119 addInstr(env, PPCInstr_Alu(Palu_AND, t2, t2, PPCRH_Reg(t1)));
2120 addInstr(env, PPCInstr_Unary(op_clz, t2, t2));
2121 addInstr(env, PPCInstr_LI(t1, WS, False/*!64-bit imm*/));
2122 addInstr(env, PPCInstr_Alu(Palu_SUB, t2, t1, PPCRH_Reg(t2)));
2123 return t2;
2126 case Iop_PopCount64: {
2127 // popcnt{x,d} is only available in later arch revs (ISA 3.0,
2128 // maybe) so it's not really correct to emit it here without a caps
2129 // check for the host.
2130 if (mode64) {
2131 HReg r_dst = newVRegI(env);
2132 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2133 addInstr(env, PPCInstr_Unary(Pun_POP64, r_dst, r_src));
2134 return r_dst;
2136 // We don't expect to be required to handle this in 32-bit mode.
2137 break;
2140 case Iop_PopCount32: {
2141 // Similar comment as for Ctz just above applies -- we really
2142 // should have a caps check here.
2144 HReg r_dst = newVRegI(env);
2145 // This actually generates popcntw, which in 64 bit mode does a
2146 // 32-bit count individually for both low and high halves of the
2147 // word. Per the comment at the top of iselIntExpr_R, in the 64
2148 // bit mode case, the user of this result is required to ignore
2149 // the upper 32 bits of the result. In 32 bit mode this is all
2150 // moot. It is however unclear from the PowerISA 3.0 docs that
2151 // the instruction exists in 32 bit mode; however our own front
2152 // end (guest_ppc_toIR.c) accepts it, so I guess it does exist.
2153 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2154 addInstr(env, PPCInstr_Unary(Pun_POP32, r_dst, r_src));
2155 return r_dst;
2158 case Iop_Reverse8sIn32_x1: {
2159 // A bit of a mouthful, but simply .. 32-bit byte swap.
2160 // This is pretty rubbish code. We could do vastly better if
2161 // rotates, and better, rotate-inserts, were allowed. Note that
2162 // even on a 64 bit target, the right shifts must be done as 32-bit
2163 // so as to introduce zero bits in the right places. So it seems
2164 // simplest to do the whole sequence in 32-bit insns.
2166 r = <argument> // working temporary, initial byte order ABCD
2167 Mask = 00FF00FF
2168 nMask = not Mask
2169 tHi = and r, Mask
2170 tHi = shl tHi, 8
2171 tLo = and r, nMask
2172 tLo = shr tLo, 8
2173 r = or tHi, tLo // now r has order BADC
2174 and repeat for 16 bit chunks ..
2175 Mask = 0000FFFF
2176 nMask = not Mask
2177 tHi = and r, Mask
2178 tHi = shl tHi, 16
2179 tLo = and r, nMask
2180 tLo = shr tLo, 16
2181 r = or tHi, tLo // now r has order DCBA
2183 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2184 HReg rr = newVRegI(env);
2185 HReg rMask = newVRegI(env);
2186 HReg rnMask = newVRegI(env);
2187 HReg rtHi = newVRegI(env);
2188 HReg rtLo = newVRegI(env);
2189 // Copy r_src since we need to modify it
2190 addInstr(env, mk_iMOVds_RR(rr, r_src));
2191 // Swap within 16-bit lanes
2192 addInstr(env, PPCInstr_LI(rMask, 0x00FF00FFULL,
2193 False/* !64bit imm*/));
2194 addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
2195 addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
2196 addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32 bit shift*/,
2197 rtHi, rtHi,
2198 PPCRH_Imm(False/*!signed imm*/, 8)));
2199 addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
2200 addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32 bit shift*/,
2201 rtLo, rtLo,
2202 PPCRH_Imm(False/*!signed imm*/, 8)));
2203 addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
2204 // And now swap the two 16-bit chunks
2205 addInstr(env, PPCInstr_LI(rMask, 0x0000FFFFULL,
2206 False/* !64bit imm*/));
2207 addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
2208 addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
2209 addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32 bit shift*/,
2210 rtHi, rtHi,
2211 PPCRH_Imm(False/*!signed imm*/, 16)));
2212 addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
2213 addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32 bit shift*/,
2214 rtLo, rtLo,
2215 PPCRH_Imm(False/*!signed imm*/, 16)));
2216 addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
2217 return rr;
2220 case Iop_Reverse8sIn64_x1: {
2221 /* See Iop_Reverse8sIn32_x1, but extended to 64bit.
2222 Can only be used in 64bit mode. */
2223 vassert (mode64);
2225 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2226 HReg rr = newVRegI(env);
2227 HReg rMask = newVRegI(env);
2228 HReg rnMask = newVRegI(env);
2229 HReg rtHi = newVRegI(env);
2230 HReg rtLo = newVRegI(env);
2232 // Copy r_src since we need to modify it
2233 addInstr(env, mk_iMOVds_RR(rr, r_src));
2235 // r = (r & 0x00FF00FF00FF00FF) << 8 | (r & 0xFF00FF00FF00FF00) >> 8
2236 addInstr(env, PPCInstr_LI(rMask, 0x00FF00FF00FF00FFULL,
2237 True/* 64bit imm*/));
2238 addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
2239 addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
2240 addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
2241 rtHi, rtHi,
2242 PPCRH_Imm(False/*!signed imm*/, 8)));
2243 addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
2244 addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
2245 rtLo, rtLo,
2246 PPCRH_Imm(False/*!signed imm*/, 8)));
2247 addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
2249 // r = (r & 0x0000FFFF0000FFFF) << 16 | (r & 0xFFFF0000FFFF0000) >> 16
2250 addInstr(env, PPCInstr_LI(rMask, 0x0000FFFF0000FFFFULL,
2251 True/* !64bit imm*/));
2252 addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
2253 addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
2254 addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
2255 rtHi, rtHi,
2256 PPCRH_Imm(False/*!signed imm*/, 16)));
2257 addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
2258 addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
2259 rtLo, rtLo,
2260 PPCRH_Imm(False/*!signed imm*/, 16)));
2261 addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
2263 // r = (r & 0x00000000FFFFFFFF) << 32 | (r & 0xFFFFFFFF00000000) >> 32
2264 /* We don't need to mask anymore, just two more shifts and an or. */
2265 addInstr(env, mk_iMOVds_RR(rtLo, rr));
2266 addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
2267 rtLo, rtLo,
2268 PPCRH_Imm(False/*!signed imm*/, 32)));
2269 addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
2270 rr, rr,
2271 PPCRH_Imm(False/*!signed imm*/, 32)));
2272 addInstr(env, PPCInstr_Alu(Palu_OR, rr, rr, PPCRH_Reg(rtLo)));
2274 return rr;
2277 case Iop_Left8:
2278 case Iop_Left16:
2279 case Iop_Left32:
2280 case Iop_Left64: {
2281 HReg r_src, r_dst;
2282 if (op_unop == Iop_Left64 && !mode64)
2283 goto irreducible;
2284 r_dst = newVRegI(env);
2285 r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2286 addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
2287 addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
2288 return r_dst;
2291 case Iop_CmpwNEZ32: {
2292 HReg r_dst = newVRegI(env);
2293 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2294 addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
2295 addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
2296 addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
2297 r_dst, r_dst, PPCRH_Imm(False, 31)));
2298 return r_dst;
2301 case Iop_CmpwNEZ64: {
2302 HReg r_dst = newVRegI(env);
2303 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2304 if (!mode64) goto irreducible;
2305 addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
2306 addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
2307 addInstr(env, PPCInstr_Shft(Pshft_SAR, False/*64bit shift*/,
2308 r_dst, r_dst, PPCRH_Imm(False, 63)));
2309 return r_dst;
2312 case Iop_V128to32: {
2313 HReg r_aligned16;
2314 HReg dst = newVRegI(env);
2315 HReg vec = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
2316 PPCAMode *am_off0, *am_off_word0;
2317 sub_from_sp( env, 32 ); // Move SP down 32 bytes
2319 // get a quadword aligned address within our stack space
2320 r_aligned16 = get_sp_aligned16( env );
2321 am_off0 = PPCAMode_IR( 0, r_aligned16 );
2323 /* Note that the store below (done via PPCInstr_AvLdSt) uses
2324 * stvx, which stores the vector in proper LE format,
2325 * with byte zero (far right byte of the register in LE format)
2326 * stored at the lowest memory address. Therefore, to obtain
2327 * integer word zero, we need to use that lowest memory address
2328 * as the base for the load.
2330 if (IEndianess == Iend_LE)
2331 am_off_word0 = am_off0;
2332 else
2333 am_off_word0 = PPCAMode_IR( 12,r_aligned16 );
2335 // store vec, load low word to dst
2336 addInstr(env,
2337 PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
2338 addInstr(env,
2339 PPCInstr_Load( 4, dst, am_off_word0, mode64 ));
2341 add_to_sp( env, 32 ); // Reset SP
2342 return dst;
2345 case Iop_V128to64:
2346 case Iop_V128HIto64:
2347 if (mode64) {
2348 HReg r_aligned16;
2349 HReg dst = newVRegI(env);
2350 HReg vec = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
2351 PPCAMode *am_off0, *am_off8, *am_off_arg;
2352 sub_from_sp( env, 32 ); // Move SP down 32 bytes
2354 // get a quadword aligned address within our stack space
2355 r_aligned16 = get_sp_aligned16( env );
2356 am_off0 = PPCAMode_IR( 0, r_aligned16 );
2357 am_off8 = PPCAMode_IR( 8 ,r_aligned16 );
2359 // store vec, load low word or high to dst
2360 addInstr(env,
2361 PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
2362 if (IEndianess == Iend_LE) {
2363 if (op_unop == Iop_V128HIto64)
2364 am_off_arg = am_off8;
2365 else
2366 am_off_arg = am_off0;
2367 } else {
2368 if (op_unop == Iop_V128HIto64)
2369 am_off_arg = am_off0;
2370 else
2371 am_off_arg = am_off8;
2373 addInstr(env,
2374 PPCInstr_Load(
2375 8, dst,
2376 am_off_arg,
2377 mode64 ));
2379 add_to_sp( env, 32 ); // Reset SP
2380 return dst;
2382 break;
2383 case Iop_16to8:
2384 case Iop_32to8:
2385 case Iop_32to16:
2386 case Iop_64to8:
2387 /* These are no-ops. */
2388 return iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2390 /* ReinterpF64asI64(e) */
2391 /* Given an IEEE754 double, produce an I64 with the same bit
2392 pattern. */
2393 case Iop_ReinterpF64asI64:
2394 if (mode64) {
2395 PPCAMode *am_addr;
2396 HReg fr_src = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
2397 HReg r_dst = newVRegI(env);
2399 sub_from_sp( env, 16 ); // Move SP down 16 bytes
2400 am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
2402 // store as F64
2403 addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
2404 fr_src, am_addr ));
2405 // load as Ity_I64
2406 addInstr(env, PPCInstr_Load( 8, r_dst, am_addr, mode64 ));
2408 add_to_sp( env, 16 ); // Reset SP
2409 return r_dst;
2411 break;
2413 /* ReinterpF32asI32(e) */
2414 /* Given an IEEE754 float, produce an I32 with the same bit
2415 pattern. */
2416 case Iop_ReinterpF32asI32: {
2417 /* I believe this generates correct code for both 32- and
2418 64-bit hosts. */
2419 PPCAMode *am_addr;
2420 HReg fr_src = iselFltExpr(env, e->Iex.Unop.arg, IEndianess);
2421 HReg r_dst = newVRegI(env);
2423 sub_from_sp( env, 16 ); // Move SP down 16 bytes
2424 am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
2426 // store as F32
2427 addInstr(env, PPCInstr_FpLdSt( False/*store*/, 4,
2428 fr_src, am_addr ));
2429 // load as Ity_I32
2430 addInstr(env, PPCInstr_Load( 4, r_dst, am_addr, mode64 ));
2432 add_to_sp( env, 16 ); // Reset SP
2433 return r_dst;
2435 break;
2437 case Iop_ReinterpD64asI64:
2438 if (mode64) {
2439 PPCAMode *am_addr;
2440 HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
2441 HReg r_dst = newVRegI(env);
2443 sub_from_sp( env, 16 ); // Move SP down 16 bytes
2444 am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
2446 // store as D64
2447 addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
2448 fr_src, am_addr ));
2449 // load as Ity_I64
2450 addInstr(env, PPCInstr_Load( 8, r_dst, am_addr, mode64 ));
2451 add_to_sp( env, 16 ); // Reset SP
2452 return r_dst;
2454 break;
2456 case Iop_BCDtoDPB: {
2457 /* the following is only valid in 64 bit mode */
2458 if (!mode64) break;
2460 PPCCondCode cc;
2461 UInt argiregs;
2462 HReg argregs[1];
2463 HReg r_dst = newVRegI(env);
2464 Int argreg;
2466 argiregs = 0;
2467 argreg = 0;
2468 argregs[0] = hregPPC_GPR3(mode64);
2470 argiregs |= (1 << (argreg+3));
2471 addInstr(env, mk_iMOVds_RR( argregs[argreg++],
2472 iselWordExpr_R(env, e->Iex.Unop.arg,
2473 IEndianess) ) );
2475 cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
2476 if (IEndianess == Iend_LE) {
2477 addInstr(env, PPCInstr_Call( cc, (Addr)h_calc_BCDtoDPB,
2478 argiregs,
2479 mk_RetLoc_simple(RLPri_Int)) );
2480 } else {
2481 HWord* fdescr;
2482 fdescr = (HWord*)h_calc_BCDtoDPB;
2483 addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
2484 argiregs,
2485 mk_RetLoc_simple(RLPri_Int)) );
2488 addInstr(env, mk_iMOVds_RR(r_dst, argregs[0]));
2489 return r_dst;
2492 case Iop_DPBtoBCD: {
2493 /* the following is only valid in 64 bit mode */
2494 if (!mode64) break;
2496 PPCCondCode cc;
2497 UInt argiregs;
2498 HReg argregs[1];
2499 HReg r_dst = newVRegI(env);
2500 Int argreg;
2502 argiregs = 0;
2503 argreg = 0;
2504 argregs[0] = hregPPC_GPR3(mode64);
2506 argiregs |= (1 << (argreg+3));
2507 addInstr(env, mk_iMOVds_RR( argregs[argreg++],
2508 iselWordExpr_R(env, e->Iex.Unop.arg,
2509 IEndianess) ) );
2511 cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
2513 if (IEndianess == Iend_LE) {
2514 addInstr(env, PPCInstr_Call( cc, (Addr)h_calc_DPBtoBCD,
2515 argiregs,
2516 mk_RetLoc_simple(RLPri_Int) ) );
2517 } else {
2518 HWord* fdescr;
2519 fdescr = (HWord*)h_calc_DPBtoBCD;
2520 addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
2521 argiregs,
2522 mk_RetLoc_simple(RLPri_Int) ) );
2525 addInstr(env, mk_iMOVds_RR(r_dst, argregs[0]));
2526 return r_dst;
2528 case Iop_F32toF16x4_DEP: {
2529 HReg vdst = newVRegV(env); /* V128 */
2530 HReg dst = newVRegI(env); /* I64*/
2531 HReg r0 = newVRegI(env); /* I16*/
2532 HReg r1 = newVRegI(env); /* I16*/
2533 HReg r2 = newVRegI(env); /* I16*/
2534 HReg r3 = newVRegI(env); /* I16*/
2535 HReg vsrc = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
2536 PPCAMode *am_off0, *am_off2, *am_off4, *am_off6, *am_off8;
2537 PPCAMode *am_off10, *am_off12, *am_off14;
2538 HReg r_aligned16;
2540 sub_from_sp( env, 32 ); // Move SP down
2542 /* issue instruction */
2543 addInstr(env, PPCInstr_AvUnary(Pav_F32toF16x4, vdst, vsrc));
2545 /* Get a quadword aligned address within our stack space */
2546 r_aligned16 = get_sp_aligned16( env );
2547 am_off0 = PPCAMode_IR( 0, r_aligned16 );
2548 am_off2 = PPCAMode_IR( 2, r_aligned16 );
2549 am_off4 = PPCAMode_IR( 4, r_aligned16 );
2550 am_off6 = PPCAMode_IR( 6, r_aligned16 );
2551 am_off8 = PPCAMode_IR( 8, r_aligned16 );
2552 am_off10 = PPCAMode_IR( 10, r_aligned16 );
2553 am_off12 = PPCAMode_IR( 12, r_aligned16 );
2554 am_off14 = PPCAMode_IR( 14, r_aligned16 );
2556 /* Store v128 result to stack. */
2557 addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, vdst, am_off0));
2559 /* fetch four I16 from V128, store into contiguous I64 via stack, */
2560 if (IEndianess == Iend_LE) {
2561 addInstr(env, PPCInstr_Load( 2, r3, am_off12, mode64));
2562 addInstr(env, PPCInstr_Load( 2, r2, am_off8, mode64));
2563 addInstr(env, PPCInstr_Load( 2, r1, am_off4, mode64));
2564 addInstr(env, PPCInstr_Load( 2, r0, am_off0, mode64));
2565 } else {
2566 addInstr(env, PPCInstr_Load( 2, r0, am_off14, mode64));
2567 addInstr(env, PPCInstr_Load( 2, r1, am_off10, mode64));
2568 addInstr(env, PPCInstr_Load( 2, r2, am_off6, mode64));
2569 addInstr(env, PPCInstr_Load( 2, r3, am_off2, mode64));
2572 /* store in contiguous 64-bit values */
2573 addInstr(env, PPCInstr_Store( 2, am_off6, r3, mode64));
2574 addInstr(env, PPCInstr_Store( 2, am_off4, r2, mode64));
2575 addInstr(env, PPCInstr_Store( 2, am_off2, r1, mode64));
2576 addInstr(env, PPCInstr_Store( 2, am_off0, r0, mode64));
2578 /* Fetch I64 */
2579 addInstr(env, PPCInstr_Load(8, dst, am_off0, mode64));
2581 add_to_sp( env, 32 ); // Reset SP
2582 return dst;
2585 default:
2586 break;
2589 switch (e->Iex.Unop.op) {
2590 case Iop_ExtractExpD64: {
2592 HReg fr_dst = newVRegI(env);
2593 HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
2594 HReg tmp = newVRegF(env);
2595 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
2596 addInstr(env, PPCInstr_Dfp64Unary(Pfp_DXEX, tmp, fr_src));
2598 // put the D64 result into a integer register
2599 sub_from_sp( env, 16 );
2600 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
2601 addInstr(env, PPCInstr_Load(8, fr_dst, zero_r1, env->mode64));
2602 add_to_sp( env, 16 );
2603 return fr_dst;
2605 case Iop_ExtractExpD128: {
2606 HReg fr_dst = newVRegI(env);
2607 HReg r_srcHi;
2608 HReg r_srcLo;
2609 HReg tmp = newVRegF(env);
2610 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
2612 iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Unop.arg,
2613 IEndianess);
2614 addInstr(env, PPCInstr_ExtractExpD128(Pfp_DXEXQ, tmp,
2615 r_srcHi, r_srcLo));
2617 sub_from_sp( env, 16 );
2618 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
2619 addInstr(env, PPCInstr_Load(8, fr_dst, zero_r1, env->mode64));
2620 add_to_sp( env, 16 );
2621 return fr_dst;
2623 default:
2624 break;
2627 break;
2630 /* --------- GET --------- */
2631 case Iex_Get: {
2632 if (ty == Ity_I8 || ty == Ity_I16 ||
2633 ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
2634 HReg r_dst = newVRegI(env);
2635 PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
2636 GuestStatePtr(mode64) );
2637 addInstr(env, PPCInstr_Load( toUChar(sizeofIRType(ty)),
2638 r_dst, am_addr, mode64 ));
2639 return r_dst;
2641 break;
2644 case Iex_GetI: {
2645 PPCAMode* src_am
2646 = genGuestArrayOffset( env, e->Iex.GetI.descr,
2647 e->Iex.GetI.ix, e->Iex.GetI.bias,
2648 IEndianess );
2649 HReg r_dst = newVRegI(env);
2650 if (mode64 && ty == Ity_I64) {
2651 addInstr(env, PPCInstr_Load( toUChar(8),
2652 r_dst, src_am, mode64 ));
2653 return r_dst;
2655 if ((!mode64) && ty == Ity_I32) {
2656 addInstr(env, PPCInstr_Load( toUChar(4),
2657 r_dst, src_am, mode64 ));
2658 return r_dst;
2660 break;
2663 /* --------- CCALL --------- */
2664 case Iex_CCall: {
2665 vassert(ty == e->Iex.CCall.retty); /* well-formedness of IR */
2667 /* be very restrictive for now. Only 32/64-bit ints allowed for
2668 args, and 32 bits or host machine word for return type. */
2669 if (!(ty == Ity_I32 || (mode64 && ty == Ity_I64)))
2670 goto irreducible;
2672 /* Marshal args, do the call, clear stack. */
2673 UInt addToSp = 0;
2674 RetLoc rloc = mk_RetLoc_INVALID();
2675 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2676 e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args,
2677 IEndianess );
2678 vassert(is_sane_RetLoc(rloc));
2679 vassert(rloc.pri == RLPri_Int);
2680 vassert(addToSp == 0);
2682 /* GPR3 now holds the destination address from Pin_Goto */
2683 HReg r_dst = newVRegI(env);
2684 addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
2685 return r_dst;
2688 /* --------- LITERAL --------- */
2689 /* 32/16/8-bit literals */
2690 case Iex_Const: {
2691 Long l;
2692 HReg r_dst = newVRegI(env);
2693 IRConst* con = e->Iex.Const.con;
2694 switch (con->tag) {
2695 case Ico_U64: if (!mode64) goto irreducible;
2696 l = (Long) con->Ico.U64; break;
2697 case Ico_U32: l = (Long)(Int) con->Ico.U32; break;
2698 case Ico_U16: l = (Long)(Int)(Short)con->Ico.U16; break;
2699 case Ico_U8: l = (Long)(Int)(Char )con->Ico.U8; break;
2700 default: vpanic("iselIntExpr_R.const(ppc)");
2702 addInstr(env, PPCInstr_LI(r_dst, (ULong)l, mode64));
2703 return r_dst;
2706 /* --------- MULTIPLEX --------- */
2707 case Iex_ITE: { // VFD
2708 if ((ty == Ity_I8 || ty == Ity_I16 ||
2709 ty == Ity_I32 || ((ty == Ity_I64) && mode64)) &&
2710 typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
2711 PPCRI* r1 = iselWordExpr_RI(env, e->Iex.ITE.iftrue, IEndianess);
2712 HReg r0 = iselWordExpr_R(env, e->Iex.ITE.iffalse, IEndianess);
2713 HReg r_dst = newVRegI(env);
2714 addInstr(env, mk_iMOVds_RR(r_dst,r0));
2715 PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
2716 addInstr(env, PPCInstr_CMov(cc, r_dst, r1));
2717 return r_dst;
2719 break;
2722 default:
2723 break;
2724 } /* switch (e->tag) */
2727 /* We get here if no pattern matched. */
2728 irreducible:
2729 ppIRExpr(e);
2730 vpanic("iselIntExpr_R(ppc): cannot reduce tree");
2734 /*---------------------------------------------------------*/
2735 /*--- ISEL: Integer expression auxiliaries ---*/
2736 /*---------------------------------------------------------*/
2738 /* --------------------- AMODEs --------------------- */
2740 /* Return an AMode which computes the value of the specified
2741 expression, possibly also adding insns to the code list as a
2742 result. The expression may only be a word-size one.
2745 static Bool uInt_fits_in_16_bits ( UInt u )
2747 /* Is u the same as the sign-extend of its lower 16 bits? */
2748 UInt v = u & 0xFFFF;
2750 v = (Int)(v << 16) >> 16; /* sign extend */
2752 return u == v;
2755 static Bool uLong_fits_in_16_bits ( ULong u )
2757 /* Is u the same as the sign-extend of its lower 16 bits? */
2758 ULong v = u & 0xFFFFULL;
2760 v = (Long)(v << 48) >> 48; /* sign extend */
2762 return u == v;
2765 static Bool uLong_is_4_aligned ( ULong u )
2767 return toBool((u & 3ULL) == 0);
2770 static Bool sane_AMode ( ISelEnv* env, PPCAMode* am )
2772 Bool mode64 = env->mode64;
2773 switch (am->tag) {
2774 case Pam_IR:
2775 /* Using uInt_fits_in_16_bits in 64-bit mode seems a bit bogus,
2776 somehow, but I think it's OK. */
2777 return toBool( hregClass(am->Pam.IR.base) == HRcGPR(mode64) &&
2778 hregIsVirtual(am->Pam.IR.base) &&
2779 uInt_fits_in_16_bits(am->Pam.IR.index) );
2780 case Pam_RR:
2781 return toBool( hregClass(am->Pam.RR.base) == HRcGPR(mode64) &&
2782 hregIsVirtual(am->Pam.RR.base) &&
2783 hregClass(am->Pam.RR.index) == HRcGPR(mode64) &&
2784 hregIsVirtual(am->Pam.RR.index) );
2785 default:
2786 vpanic("sane_AMode: unknown ppc amode tag");
2790 static
2791 PPCAMode* iselWordExpr_AMode ( ISelEnv* env, const IRExpr* e, IRType xferTy,
2792 IREndness IEndianess )
2794 PPCAMode* am = iselWordExpr_AMode_wrk(env, e, xferTy, IEndianess);
2795 vassert(sane_AMode(env, am));
2796 return am;
2799 /* DO NOT CALL THIS DIRECTLY ! */
2800 static PPCAMode* iselWordExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e,
2801 IRType xferTy, IREndness IEndianess )
2803 IRType ty = typeOfIRExpr(env->type_env,e);
2805 if (env->mode64) {
2807 /* If the data load/store type is I32 or I64, this amode might
2808 be destined for use in ld/ldu/lwa/st/stu. In which case
2809 insist that if it comes out as an _IR, the immediate must
2810 have its bottom two bits be zero. This does assume that for
2811 any other type (I8/I16/I128/F32/F64/V128) the amode will not
2812 be parked in any such instruction. But that seems a
2813 reasonable assumption. */
2814 Bool aligned4imm = toBool(xferTy == Ity_I32 || xferTy == Ity_I64);
2816 vassert(ty == Ity_I64);
2818 /* Add64(expr,i), where i == sign-extend of (i & 0xFFFF) */
2819 if (e->tag == Iex_Binop
2820 && e->Iex.Binop.op == Iop_Add64
2821 && e->Iex.Binop.arg2->tag == Iex_Const
2822 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
2823 && (aligned4imm ? uLong_is_4_aligned(e->Iex.Binop.arg2
2824 ->Iex.Const.con->Ico.U64)
2825 : True)
2826 && uLong_fits_in_16_bits(e->Iex.Binop.arg2
2827 ->Iex.Const.con->Ico.U64)) {
2828 return PPCAMode_IR( (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64,
2829 iselWordExpr_R(env, e->Iex.Binop.arg1,
2830 IEndianess) );
2833 /* Add64(expr,expr) */
2834 if (e->tag == Iex_Binop
2835 && e->Iex.Binop.op == Iop_Add64) {
2836 HReg r_base = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
2837 HReg r_idx = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
2838 return PPCAMode_RR( r_idx, r_base );
2841 } else {
2843 vassert(ty == Ity_I32);
2845 /* Add32(expr,i), where i == sign-extend of (i & 0xFFFF) */
2846 if (e->tag == Iex_Binop
2847 && e->Iex.Binop.op == Iop_Add32
2848 && e->Iex.Binop.arg2->tag == Iex_Const
2849 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
2850 && uInt_fits_in_16_bits(e->Iex.Binop.arg2
2851 ->Iex.Const.con->Ico.U32)) {
2852 return PPCAMode_IR( (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32,
2853 iselWordExpr_R(env, e->Iex.Binop.arg1,
2854 IEndianess) );
2857 /* Add32(expr,expr) */
2858 if (e->tag == Iex_Binop
2859 && e->Iex.Binop.op == Iop_Add32) {
2860 HReg r_base = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
2861 HReg r_idx = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
2862 return PPCAMode_RR( r_idx, r_base );
2867 /* Doesn't match anything in particular. Generate it into
2868 a register and use that. */
2869 return PPCAMode_IR( 0, iselWordExpr_R(env,e,IEndianess) );
2873 /* --------------------- RH --------------------- */
2875 /* Compute an I8/I16/I32 (and I64, in 64-bit mode) into a RH
2876 (reg-or-halfword-immediate). It's important to specify whether the
2877 immediate is to be regarded as signed or not. If yes, this will
2878 never return -32768 as an immediate; this guaranteed that all
2879 signed immediates that are return can have their sign inverted if
2880 need be. */
2882 static PPCRH* iselWordExpr_RH ( ISelEnv* env, Bool syned, const IRExpr* e,
2883 IREndness IEndianess )
2885 PPCRH* ri = iselWordExpr_RH_wrk(env, syned, e, IEndianess);
2886 /* sanity checks ... */
2887 switch (ri->tag) {
2888 case Prh_Imm:
2889 vassert(ri->Prh.Imm.syned == syned);
2890 if (syned)
2891 vassert(ri->Prh.Imm.imm16 != 0x8000);
2892 return ri;
2893 case Prh_Reg:
2894 vassert(hregClass(ri->Prh.Reg.reg) == HRcGPR(env->mode64));
2895 vassert(hregIsVirtual(ri->Prh.Reg.reg));
2896 return ri;
2897 default:
2898 vpanic("iselIntExpr_RH: unknown ppc RH tag");
2902 /* DO NOT CALL THIS DIRECTLY ! */
2903 static PPCRH* iselWordExpr_RH_wrk ( ISelEnv* env, Bool syned, const IRExpr* e,
2904 IREndness IEndianess )
2906 ULong u;
2907 Long l;
2908 IRType ty = typeOfIRExpr(env->type_env,e);
2909 vassert(ty == Ity_I8 || ty == Ity_I16 ||
2910 ty == Ity_I32 || ((ty == Ity_I64) && env->mode64));
2912 /* special case: immediate */
2913 if (e->tag == Iex_Const) {
2914 IRConst* con = e->Iex.Const.con;
2915 /* What value are we aiming to generate? */
2916 switch (con->tag) {
2917 /* Note: Not sign-extending - we carry 'syned' around */
2918 case Ico_U64: vassert(env->mode64);
2919 u = con->Ico.U64; break;
2920 case Ico_U32: u = 0xFFFFFFFF & con->Ico.U32; break;
2921 case Ico_U16: u = 0x0000FFFF & con->Ico.U16; break;
2922 case Ico_U8: u = 0x000000FF & con->Ico.U8; break;
2923 default: vpanic("iselIntExpr_RH.Iex_Const(ppch)");
2925 l = (Long)u;
2926 /* Now figure out if it's representable. */
2927 if (!syned && u <= 65535) {
2928 return PPCRH_Imm(False/*unsigned*/, toUShort(u & 0xFFFF));
2930 if (syned && l >= -32767 && l <= 32767) {
2931 return PPCRH_Imm(True/*signed*/, toUShort(u & 0xFFFF));
2933 /* no luck; use the Slow Way. */
2936 /* default case: calculate into a register and return that */
2937 return PPCRH_Reg( iselWordExpr_R ( env, e, IEndianess ) );
2941 /* --------------------- RIs --------------------- */
2943 /* Calculate an expression into an PPCRI operand. As with
2944 iselIntExpr_R, the expression can have type 32, 16 or 8 bits, or,
2945 in 64-bit mode, 64 bits. */
2947 static PPCRI* iselWordExpr_RI ( ISelEnv* env, const IRExpr* e,
2948 IREndness IEndianess )
2950 PPCRI* ri = iselWordExpr_RI_wrk(env, e, IEndianess);
2951 /* sanity checks ... */
2952 switch (ri->tag) {
2953 case Pri_Imm:
2954 return ri;
2955 case Pri_Reg:
2956 vassert(hregClass(ri->Pri.Reg) == HRcGPR(env->mode64));
2957 vassert(hregIsVirtual(ri->Pri.Reg));
2958 return ri;
2959 default:
2960 vpanic("iselIntExpr_RI: unknown ppc RI tag");
2964 /* DO NOT CALL THIS DIRECTLY ! */
2965 static PPCRI* iselWordExpr_RI_wrk ( ISelEnv* env, const IRExpr* e,
2966 IREndness IEndianess )
2968 Long l;
2969 IRType ty = typeOfIRExpr(env->type_env,e);
2970 vassert(ty == Ity_I8 || ty == Ity_I16 ||
2971 ty == Ity_I32 || ((ty == Ity_I64) && env->mode64));
2973 /* special case: immediate */
2974 if (e->tag == Iex_Const) {
2975 IRConst* con = e->Iex.Const.con;
2976 switch (con->tag) {
2977 case Ico_U64: vassert(env->mode64);
2978 l = (Long) con->Ico.U64; break;
2979 case Ico_U32: l = (Long)(Int) con->Ico.U32; break;
2980 case Ico_U16: l = (Long)(Int)(Short)con->Ico.U16; break;
2981 case Ico_U8: l = (Long)(Int)(Char )con->Ico.U8; break;
2982 default: vpanic("iselIntExpr_RI.Iex_Const(ppch)");
2984 return PPCRI_Imm((ULong)l);
2987 /* default case: calculate into a register and return that */
2988 return PPCRI_Reg( iselWordExpr_R ( env, e, IEndianess ) );
2992 /* --------------------- RH5u --------------------- */
2994 /* Compute an I8 into a reg-or-5-bit-unsigned-immediate, the latter
2995 being an immediate in the range 1 .. 31 inclusive. Used for doing
2996 shift amounts. Only used in 32-bit mode. */
2998 static PPCRH* iselWordExpr_RH5u ( ISelEnv* env, const IRExpr* e,
2999 IREndness IEndianess )
3001 PPCRH* ri;
3002 vassert(!env->mode64);
3003 ri = iselWordExpr_RH5u_wrk(env, e, IEndianess);
3004 /* sanity checks ... */
3005 switch (ri->tag) {
3006 case Prh_Imm:
3007 vassert(ri->Prh.Imm.imm16 >= 1 && ri->Prh.Imm.imm16 <= 31);
3008 vassert(!ri->Prh.Imm.syned);
3009 return ri;
3010 case Prh_Reg:
3011 vassert(hregClass(ri->Prh.Reg.reg) == HRcGPR(env->mode64));
3012 vassert(hregIsVirtual(ri->Prh.Reg.reg));
3013 return ri;
3014 default:
3015 vpanic("iselIntExpr_RH5u: unknown ppc RI tag");
3019 /* DO NOT CALL THIS DIRECTLY ! */
3020 static PPCRH* iselWordExpr_RH5u_wrk ( ISelEnv* env, const IRExpr* e,
3021 IREndness IEndianess )
3023 IRType ty = typeOfIRExpr(env->type_env,e);
3024 vassert(ty == Ity_I8);
3026 /* special case: immediate */
3027 if (e->tag == Iex_Const
3028 && e->Iex.Const.con->tag == Ico_U8
3029 && e->Iex.Const.con->Ico.U8 >= 1
3030 && e->Iex.Const.con->Ico.U8 <= 31) {
3031 return PPCRH_Imm(False/*unsigned*/, e->Iex.Const.con->Ico.U8);
3034 /* default case: calculate into a register and return that */
3035 return PPCRH_Reg( iselWordExpr_R ( env, e, IEndianess ) );
3039 /* --------------------- RH6u --------------------- */
3041 /* Compute an I8 into a reg-or-6-bit-unsigned-immediate, the latter
3042 being an immediate in the range 1 .. 63 inclusive. Used for doing
3043 shift amounts. Only used in 64-bit mode. */
3045 static PPCRH* iselWordExpr_RH6u ( ISelEnv* env, const IRExpr* e,
3046 IREndness IEndianess )
3048 PPCRH* ri;
3049 vassert(env->mode64);
3050 ri = iselWordExpr_RH6u_wrk(env, e, IEndianess);
3051 /* sanity checks ... */
3052 switch (ri->tag) {
3053 case Prh_Imm:
3054 vassert(ri->Prh.Imm.imm16 >= 1 && ri->Prh.Imm.imm16 <= 63);
3055 vassert(!ri->Prh.Imm.syned);
3056 return ri;
3057 case Prh_Reg:
3058 vassert(hregClass(ri->Prh.Reg.reg) == HRcGPR(env->mode64));
3059 vassert(hregIsVirtual(ri->Prh.Reg.reg));
3060 return ri;
3061 default:
3062 vpanic("iselIntExpr_RH6u: unknown ppc64 RI tag");
3066 /* DO NOT CALL THIS DIRECTLY ! */
3067 static PPCRH* iselWordExpr_RH6u_wrk ( ISelEnv* env, const IRExpr* e,
3068 IREndness IEndianess )
3070 IRType ty = typeOfIRExpr(env->type_env,e);
3071 vassert(ty == Ity_I8);
3073 /* special case: immediate */
3074 if (e->tag == Iex_Const
3075 && e->Iex.Const.con->tag == Ico_U8
3076 && e->Iex.Const.con->Ico.U8 >= 1
3077 && e->Iex.Const.con->Ico.U8 <= 63) {
3078 return PPCRH_Imm(False/*unsigned*/, e->Iex.Const.con->Ico.U8);
3081 /* default case: calculate into a register and return that */
3082 return PPCRH_Reg( iselWordExpr_R ( env, e, IEndianess ) );
3086 /* --------------------- CONDCODE --------------------- */
3088 /* Generate code to evaluated a bit-typed expression, returning the
3089 condition code which would correspond when the expression would
3090 notionally have returned 1. */
3092 static PPCCondCode iselCondCode ( ISelEnv* env, const IRExpr* e,
3093 IREndness IEndianess )
3095 /* Uh, there's nothing we can sanity check here, unfortunately. */
3096 return iselCondCode_wrk(env,e, IEndianess);
3099 /* DO NOT CALL THIS DIRECTLY ! */
3100 static PPCCondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e,
3101 IREndness IEndianess )
3103 vassert(e);
3104 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
3106 /* Constant 1:Bit */
3107 if (e->tag == Iex_Const) {
3108 // Make a compare that will always be true (or always false):
3109 vassert(e->Iex.Const.con->Ico.U1 == True || e->Iex.Const.con->Ico.U1 == False);
3110 HReg r_zero = newVRegI(env);
3111 addInstr(env, PPCInstr_LI(r_zero, 0, env->mode64));
3112 addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3113 7/*cr*/, r_zero, PPCRH_Reg(r_zero)));
3114 return mk_PPCCondCode( e->Iex.Const.con->Ico.U1 ? Pct_TRUE : Pct_FALSE,
3115 Pcf_7EQ );
3118 /* Not1(...) */
3119 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
3120 /* Generate code for the arg, and negate the test condition */
3121 PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
3122 cond.test = invertCondTest(cond.test);
3123 return cond;
3126 /* --- patterns rooted at: 32to1 or 64to1 --- */
3128 /* 32to1, 64to1 */
3129 if (e->tag == Iex_Unop &&
3130 (e->Iex.Unop.op == Iop_32to1 || e->Iex.Unop.op == Iop_64to1)) {
3131 HReg src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3132 HReg tmp = newVRegI(env);
3133 /* could do better, probably -- andi. */
3134 addInstr(env, PPCInstr_Alu(Palu_AND, tmp,
3135 src, PPCRH_Imm(False,1)));
3136 addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3137 7/*cr*/, tmp, PPCRH_Imm(False,1)));
3138 return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
3141 /* --- patterns rooted at: CmpNEZ8 --- */
3143 /* CmpNEZ8(x) */
3144 /* Note this cloned as CmpNE8(x,0) below. */
3145 /* could do better -- andi. */
3146 if (e->tag == Iex_Unop
3147 && e->Iex.Unop.op == Iop_CmpNEZ8) {
3148 HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3149 HReg tmp = newVRegI(env);
3150 addInstr(env, PPCInstr_Alu(Palu_AND, tmp, arg,
3151 PPCRH_Imm(False,0xFF)));
3152 addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3153 7/*cr*/, tmp, PPCRH_Imm(False,0)));
3154 return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3157 /* --- patterns rooted at: CmpNEZ32 --- */
3159 /* CmpNEZ32(x) */
3160 if (e->tag == Iex_Unop
3161 && e->Iex.Unop.op == Iop_CmpNEZ32) {
3162 HReg r1 = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3163 addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3164 7/*cr*/, r1, PPCRH_Imm(False,0)));
3165 return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3168 /* --- patterns rooted at: Cmp*32* --- */
3170 /* Cmp*32*(x,y) */
3171 if (e->tag == Iex_Binop
3172 && (e->Iex.Binop.op == Iop_CmpEQ32
3173 || e->Iex.Binop.op == Iop_CmpNE32
3174 || e->Iex.Binop.op == Iop_CmpLT32S
3175 || e->Iex.Binop.op == Iop_CmpLT32U
3176 || e->Iex.Binop.op == Iop_CmpLE32S
3177 || e->Iex.Binop.op == Iop_CmpLE32U)) {
3178 Bool syned = (e->Iex.Binop.op == Iop_CmpLT32S ||
3179 e->Iex.Binop.op == Iop_CmpLE32S);
3180 HReg r1 = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3181 PPCRH* ri2 = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2, IEndianess);
3182 addInstr(env, PPCInstr_Cmp(syned, True/*32bit cmp*/,
3183 7/*cr*/, r1, ri2));
3185 switch (e->Iex.Binop.op) {
3186 case Iop_CmpEQ32: return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
3187 case Iop_CmpNE32: return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3188 case Iop_CmpLT32U: case Iop_CmpLT32S:
3189 return mk_PPCCondCode( Pct_TRUE, Pcf_7LT );
3190 case Iop_CmpLE32U: case Iop_CmpLE32S:
3191 return mk_PPCCondCode( Pct_FALSE, Pcf_7GT );
3192 default: vpanic("iselCondCode(ppc): CmpXX32");
3196 /* --- patterns rooted at: CmpNEZ64 --- */
3198 /* CmpNEZ64 */
3199 if (e->tag == Iex_Unop
3200 && e->Iex.Unop.op == Iop_CmpNEZ64) {
3201 if (!env->mode64) {
3202 HReg hi, lo;
3203 HReg tmp = newVRegI(env);
3204 iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg, IEndianess );
3205 addInstr(env, PPCInstr_Alu(Palu_OR, tmp, lo, PPCRH_Reg(hi)));
3206 addInstr(env, PPCInstr_Cmp(False/*sign*/, True/*32bit cmp*/,
3207 7/*cr*/, tmp,PPCRH_Imm(False,0)));
3208 return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3209 } else { // mode64
3210 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3211 addInstr(env, PPCInstr_Cmp(False/*sign*/, False/*64bit cmp*/,
3212 7/*cr*/, r_src,PPCRH_Imm(False,0)));
3213 return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3217 /* --- patterns rooted at: Cmp*64* --- */
3219 /* Cmp*64*(x,y) */
3220 if (e->tag == Iex_Binop
3221 && (e->Iex.Binop.op == Iop_CmpEQ64
3222 || e->Iex.Binop.op == Iop_CmpNE64
3223 || e->Iex.Binop.op == Iop_CmpLT64S
3224 || e->Iex.Binop.op == Iop_CmpLT64U
3225 || e->Iex.Binop.op == Iop_CmpLE64S
3226 || e->Iex.Binop.op == Iop_CmpLE64U)) {
3227 Bool syned = (e->Iex.Binop.op == Iop_CmpLT64S ||
3228 e->Iex.Binop.op == Iop_CmpLE64S);
3229 HReg r1 = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3230 PPCRH* ri2 = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2, IEndianess);
3231 vassert(env->mode64);
3232 addInstr(env, PPCInstr_Cmp(syned, False/*64bit cmp*/,
3233 7/*cr*/, r1, ri2));
3235 switch (e->Iex.Binop.op) {
3236 case Iop_CmpEQ64: return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
3237 case Iop_CmpNE64: return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3238 case Iop_CmpLT64U: case Iop_CmpLT64S:
3239 return mk_PPCCondCode( Pct_TRUE, Pcf_7LT );
3240 case Iop_CmpLE64U: case Iop_CmpLE64S:
3241 return mk_PPCCondCode( Pct_FALSE, Pcf_7GT );
3242 default: vpanic("iselCondCode(ppc): CmpXX64");
3246 /* --- patterns rooted at: CmpNE8 --- */
3248 /* CmpNE8(x,0) */
3249 /* Note this is a direct copy of CmpNEZ8 above. */
3250 /* could do better -- andi. */
3251 if (e->tag == Iex_Binop
3252 && e->Iex.Binop.op == Iop_CmpNE8
3253 && isZeroU8(e->Iex.Binop.arg2)) {
3254 HReg arg = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3255 HReg tmp = newVRegI(env);
3256 addInstr(env, PPCInstr_Alu(Palu_AND, tmp, arg,
3257 PPCRH_Imm(False,0xFF)));
3258 addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3259 7/*cr*/, tmp, PPCRH_Imm(False,0)));
3260 return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3263 /* var */
3264 if (e->tag == Iex_RdTmp) {
3265 HReg r_src = lookupIRTemp(env, e->Iex.RdTmp.tmp);
3266 HReg src_masked = newVRegI(env);
3267 addInstr(env,
3268 PPCInstr_Alu(Palu_AND, src_masked,
3269 r_src, PPCRH_Imm(False,1)));
3270 addInstr(env,
3271 PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3272 7/*cr*/, src_masked, PPCRH_Imm(False,1)));
3273 return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
3276 /* --- And1(x,y), Or1(x,y) --- */
3277 /* FIXME: We could (and probably should) do a lot better here, by using the
3278 iselCondCode_C/_R scheme used in the amd64 insn selector. */
3279 if (e->tag == Iex_Binop
3280 && (e->Iex.Binop.op == Iop_And1 || e->Iex.Binop.op == Iop_Or1)) {
3281 HReg x_as_int = newVRegI(env);
3282 PPCCondCode cc_x = iselCondCode(env, e->Iex.Binop.arg1, IEndianess);
3283 addInstr(env, PPCInstr_Set(cc_x, x_as_int));
3285 HReg y_as_int = newVRegI(env);
3286 PPCCondCode cc_y = iselCondCode(env, e->Iex.Binop.arg2, IEndianess);
3287 addInstr(env, PPCInstr_Set(cc_y, y_as_int));
3289 HReg tmp = newVRegI(env);
3290 PPCAluOp op = e->Iex.Binop.op == Iop_And1 ? Palu_AND : Palu_OR;
3291 addInstr(env, PPCInstr_Alu(op, tmp, x_as_int, PPCRH_Reg(y_as_int)));
3293 addInstr(env, PPCInstr_Alu(Palu_AND, tmp, tmp, PPCRH_Imm(False,1)));
3294 addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3295 7/*cr*/, tmp, PPCRH_Imm(False,1)));
3296 return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
3299 vex_printf("iselCondCode(ppc): No such tag(%u)\n", e->tag);
3300 ppIRExpr(e);
3301 vpanic("iselCondCode(ppc)");
3305 /*---------------------------------------------------------*/
3306 /*--- ISEL: Integer expressions (128 bit) ---*/
3307 /*---------------------------------------------------------*/
3309 /* 64-bit mode ONLY: compute a 128-bit value into a register pair,
3310 which is returned as the first two parameters. As with
3311 iselWordExpr_R, these may be either real or virtual regs; in any
3312 case they must not be changed by subsequent code emitted by the
3313 caller. */
3315 static void iselInt128Expr ( HReg* rHi, HReg* rLo, ISelEnv* env,
3316 const IRExpr* e, IREndness IEndianess )
3318 vassert(env->mode64);
3319 iselInt128Expr_wrk(rHi, rLo, env, e, IEndianess);
3320 # if 0
3321 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3322 # endif
3323 vassert(hregClass(*rHi) == HRcGPR(env->mode64));
3324 vassert(hregIsVirtual(*rHi));
3325 vassert(hregClass(*rLo) == HRcGPR(env->mode64));
3326 vassert(hregIsVirtual(*rLo));
3329 /* DO NOT CALL THIS DIRECTLY ! */
3330 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
3331 const IRExpr* e, IREndness IEndianess )
3333 Bool mode64 = env->mode64;
3335 vassert(e);
3336 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
3338 /* read 128-bit IRTemp */
3339 if (e->tag == Iex_RdTmp) {
3340 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3341 return;
3344 /* 128-bit GET */
3345 if (e->tag == Iex_Get) {
3346 PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
3347 GuestStatePtr(mode64) );
3348 PPCAMode* am_addr4 = advance4(env, am_addr);
3349 HReg tLo = newVRegI(env);
3350 HReg tHi = newVRegI(env);
3352 addInstr(env, PPCInstr_Load( 8, tHi, am_addr, mode64));
3353 addInstr(env, PPCInstr_Load( 8, tLo, am_addr4, mode64));
3354 *rHi = tHi;
3355 *rLo = tLo;
3356 return;
3359 /* --------- BINARY ops --------- */
3360 if (e->tag == Iex_Binop) {
3361 switch (e->Iex.Binop.op) {
3362 /* 64 x 64 -> 128 multiply */
3363 case Iop_MullU64:
3364 case Iop_MullS64: {
3365 HReg tLo = newVRegI(env);
3366 HReg tHi = newVRegI(env);
3367 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
3368 HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3369 HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
3370 addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/,
3371 False/*lo64*/, False/*64bit mul*/,
3372 tLo, r_srcL, r_srcR));
3373 addInstr(env, PPCInstr_MulL(syned,
3374 True/*hi64*/, False/*64bit mul*/,
3375 tHi, r_srcL, r_srcR));
3376 *rHi = tHi;
3377 *rLo = tLo;
3378 return;
3381 /* 64HLto128(e1,e2) */
3382 case Iop_64HLto128:
3383 *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3384 *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
3385 return;
3387 case Iop_D128toI128S: {
3388 HReg srcHi = INVALID_HREG;
3389 HReg srcLo = INVALID_HREG;
3390 HReg dstLo = newVRegI(env);
3391 HReg dstHi = newVRegI(env);
3392 HReg tmp = newVRegV(env);
3393 PPCAMode* am_addr;
3394 PPCAMode* am_addr4;
3396 /* Get the DF128 value, store in two 64-bit halves */
3397 iselDfp128Expr( &srcHi, &srcLo, env, e->Iex.Binop.arg2, IEndianess );
3399 sub_from_sp( env, 16 ); // Move SP down 16 bytes
3400 am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
3401 am_addr4 = advance4(env, am_addr);
3403 addInstr(env, PPCInstr_XFormUnary994(Px_DFPTOIQS, tmp, srcHi, srcLo));
3405 // store the result in the VSR
3406 addInstr(env, PPCInstr_AvLdSt( False/*store*/, 16, tmp, am_addr ));
3408 // load the two Ity_64 values
3409 addInstr(env, PPCInstr_Load( 8, dstHi, am_addr, mode64 ));
3410 addInstr(env, PPCInstr_Load( 8, dstLo, am_addr4, mode64 ));
3412 *rHi = dstHi;
3413 *rLo = dstLo;
3415 add_to_sp( env, 16 ); // Reset SP
3416 return;
3419 default:
3420 break;
3422 } /* if (e->tag == Iex_Binop) */
3425 /* --------- UNARY ops --------- */
3426 if (e->tag == Iex_Unop) {
3427 switch (e->Iex.Unop.op) {
3428 case Iop_ReinterpV128asI128:
3429 case Iop_ReinterpF128asI128: {
3430 HReg src;
3431 HReg dstLo = newVRegI(env);
3432 HReg dstHi = newVRegI(env);
3433 PPCAMode* am_addr;
3434 PPCAMode* am_addr4;
3436 if (e->Iex.Unop.op == Iop_ReinterpF128asI128)
3437 src = iselFp128Expr(env, e->Iex.Unop.arg, IEndianess);
3438 else
3439 src = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
3441 sub_from_sp( env, 16 ); // Move SP down 16 bytes
3442 am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
3443 am_addr4 = advance4(env, am_addr);
3445 // store the Ity_F128 value
3446 addInstr(env, PPCInstr_AvLdSt( False/*store*/, 16, src, am_addr ));
3448 // load the two Ity_64 values
3449 addInstr(env, PPCInstr_Load( 8, dstHi, am_addr, mode64 ));
3450 addInstr(env, PPCInstr_Load( 8, dstLo, am_addr4, mode64 ));
3452 *rHi = dstHi;
3453 *rLo = dstLo;
3454 add_to_sp( env, 16 ); // Reset SP
3455 return;
3457 default:
3458 break;
3460 } /* if (e->tag == Iex_Unop) */
3462 vex_printf("iselInt128Expr(ppc64): No such tag(%u)\n", e->tag);
3463 ppIRExpr(e);
3464 vpanic("iselInt128Expr(ppc64)");
3468 /*---------------------------------------------------------*/
3469 /*--- ISEL: Integer expressions (64 bit) ---*/
3470 /*---------------------------------------------------------*/
3472 /* 32-bit mode ONLY: compute a 128-bit value into a register quad */
3473 static void iselInt128Expr_to_32x4 ( HReg* rHi, HReg* rMedHi, HReg* rMedLo,
3474 HReg* rLo, ISelEnv* env, const IRExpr* e,
3475 IREndness IEndianess )
3477 vassert(!env->mode64);
3478 iselInt128Expr_to_32x4_wrk(rHi, rMedHi, rMedLo, rLo, env, e, IEndianess);
3479 # if 0
3480 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3481 # endif
3482 vassert(hregClass(*rHi) == HRcInt32);
3483 vassert(hregIsVirtual(*rHi));
3484 vassert(hregClass(*rMedHi) == HRcInt32);
3485 vassert(hregIsVirtual(*rMedHi));
3486 vassert(hregClass(*rMedLo) == HRcInt32);
3487 vassert(hregIsVirtual(*rMedLo));
3488 vassert(hregClass(*rLo) == HRcInt32);
3489 vassert(hregIsVirtual(*rLo));
3492 static void iselInt128Expr_to_32x4_wrk ( HReg* rHi, HReg* rMedHi,
3493 HReg* rMedLo, HReg* rLo,
3494 ISelEnv* env, const IRExpr* e,
3495 IREndness IEndianess )
3497 vassert(e);
3498 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
3500 /* read 128-bit IRTemp */
3501 if (e->tag == Iex_RdTmp) {
3502 lookupIRTempQuad( rHi, rMedHi, rMedLo, rLo, env, e->Iex.RdTmp.tmp);
3503 return;
3506 if (e->tag == Iex_Binop) {
3508 IROp op_binop = e->Iex.Binop.op;
3509 switch (op_binop) {
3510 case Iop_64HLto128:
3511 iselInt64Expr(rHi, rMedHi, env, e->Iex.Binop.arg1, IEndianess);
3512 iselInt64Expr(rMedLo, rLo, env, e->Iex.Binop.arg2, IEndianess);
3513 return;
3514 default:
3515 vex_printf("iselInt128Expr_to_32x4_wrk: Binop case 0x%x not found\n",
3516 op_binop);
3517 break;
3521 vex_printf("iselInt128Expr_to_32x4_wrk: e->tag 0x%x not found\n", e->tag);
3522 return;
3525 /* 32-bit mode ONLY: compute a 64-bit value into a register pair,
3526 which is returned as the first two parameters. As with
3527 iselIntExpr_R, these may be either real or virtual regs; in any
3528 case they must not be changed by subsequent code emitted by the
3529 caller. */
3531 static void iselInt64Expr ( HReg* rHi, HReg* rLo,
3532 ISelEnv* env, const IRExpr* e,
3533 IREndness IEndianess )
3535 vassert(!env->mode64);
3536 iselInt64Expr_wrk(rHi, rLo, env, e, IEndianess);
3537 # if 0
3538 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3539 # endif
3540 vassert(hregClass(*rHi) == HRcInt32);
3541 vassert(hregIsVirtual(*rHi));
3542 vassert(hregClass(*rLo) == HRcInt32);
3543 vassert(hregIsVirtual(*rLo));
3546 /* DO NOT CALL THIS DIRECTLY ! */
3547 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
3548 ISelEnv* env, const IRExpr* e,
3549 IREndness IEndianess )
3551 vassert(e);
3552 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
3554 /* 64-bit load */
3555 if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
3556 HReg tLo = newVRegI(env);
3557 HReg tHi = newVRegI(env);
3558 HReg r_addr = iselWordExpr_R(env, e->Iex.Load.addr, IEndianess);
3559 vassert(!env->mode64);
3560 addInstr(env, PPCInstr_Load( 4/*byte-load*/,
3561 tHi, PPCAMode_IR( 0, r_addr ),
3562 False/*32-bit insn please*/) );
3563 addInstr(env, PPCInstr_Load( 4/*byte-load*/,
3564 tLo, PPCAMode_IR( 4, r_addr ),
3565 False/*32-bit insn please*/) );
3566 *rHi = tHi;
3567 *rLo = tLo;
3568 return;
3571 /* 64-bit literal */
3572 if (e->tag == Iex_Const) {
3573 ULong w64 = e->Iex.Const.con->Ico.U64;
3574 UInt wHi = ((UInt)(w64 >> 32)) & 0xFFFFFFFF;
3575 UInt wLo = ((UInt)w64) & 0xFFFFFFFF;
3576 HReg tLo = newVRegI(env);
3577 HReg tHi = newVRegI(env);
3578 vassert(e->Iex.Const.con->tag == Ico_U64);
3579 addInstr(env, PPCInstr_LI(tHi, (Long)(Int)wHi, False/*mode32*/));
3580 addInstr(env, PPCInstr_LI(tLo, (Long)(Int)wLo, False/*mode32*/));
3581 *rHi = tHi;
3582 *rLo = tLo;
3583 return;
3586 /* read 64-bit IRTemp */
3587 if (e->tag == Iex_RdTmp) {
3588 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3589 return;
3592 /* 64-bit GET */
3593 if (e->tag == Iex_Get) {
3594 PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
3595 GuestStatePtr(False/*mode32*/) );
3596 PPCAMode* am_addr4 = advance4(env, am_addr);
3597 HReg tLo = newVRegI(env);
3598 HReg tHi = newVRegI(env);
3599 addInstr(env, PPCInstr_Load( 4, tHi, am_addr, False/*mode32*/ ));
3600 addInstr(env, PPCInstr_Load( 4, tLo, am_addr4, False/*mode32*/ ));
3601 *rHi = tHi;
3602 *rLo = tLo;
3603 return;
3606 /* --------- CCALL --------- */
3607 if(e->tag == Iex_CCall) {
3608 IRType ty = typeOfIRExpr(env->type_env,e);
3609 Bool mode64 = env->mode64;
3611 vassert(ty == e->Iex.CCall.retty); /* well-formedness of IR */
3613 /* be very restrictive for now. Only 32-bit ints allowed for
3614 args, and 32 bits or host machine word for return type. */
3615 vassert(!(ty == Ity_I32 || (mode64 && ty == Ity_I64)));
3617 /* Marshal args, do the call, clear stack. */
3618 UInt addToSp = 0;
3619 RetLoc rloc = mk_RetLoc_INVALID();
3620 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
3621 e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args,
3622 IEndianess );
3623 vassert(is_sane_RetLoc(rloc));
3625 vassert(rloc.pri == RLPri_2Int);
3626 vassert(addToSp == 0);
3628 /* GPR3 now holds the destination address from Pin_Goto */
3629 HReg r_dst = newVRegI(env);
3630 addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
3631 *rHi = r_dst;
3632 *rLo = r_dst;
3633 return;
3636 /* 64-bit ITE */
3637 if (e->tag == Iex_ITE) { // VFD
3638 HReg e0Lo, e0Hi, eXLo, eXHi;
3639 iselInt64Expr(&eXHi, &eXLo, env, e->Iex.ITE.iftrue, IEndianess);
3640 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.ITE.iffalse, IEndianess);
3641 HReg tLo = newVRegI(env);
3642 HReg tHi = newVRegI(env);
3643 addInstr(env, mk_iMOVds_RR(tHi,e0Hi));
3644 addInstr(env, mk_iMOVds_RR(tLo,e0Lo));
3645 PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
3646 addInstr(env, PPCInstr_CMov(cc,tHi,PPCRI_Reg(eXHi)));
3647 addInstr(env, PPCInstr_CMov(cc,tLo,PPCRI_Reg(eXLo)));
3648 *rHi = tHi;
3649 *rLo = tLo;
3650 return;
3653 /* --------- BINARY ops --------- */
3654 if (e->tag == Iex_Binop) {
3655 IROp op_binop = e->Iex.Binop.op;
3656 switch (op_binop) {
3657 /* 32 x 32 -> 64 multiply */
3658 case Iop_MullU32:
3659 case Iop_MullS32: {
3660 HReg tLo = newVRegI(env);
3661 HReg tHi = newVRegI(env);
3662 Bool syned = toBool(op_binop == Iop_MullS32);
3663 HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1,
3664 IEndianess);
3665 HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2,
3666 IEndianess);
3667 addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/,
3668 False/*lo32*/, True/*32bit mul*/,
3669 tLo, r_srcL, r_srcR));
3670 addInstr(env, PPCInstr_MulL(syned,
3671 True/*hi32*/, True/*32bit mul*/,
3672 tHi, r_srcL, r_srcR));
3673 *rHi = tHi;
3674 *rLo = tLo;
3675 return;
3678 /* Or64/And64/Xor64 */
3679 case Iop_Or64:
3680 case Iop_And64:
3681 case Iop_Xor64: {
3682 HReg xLo, xHi, yLo, yHi;
3683 HReg tLo = newVRegI(env);
3684 HReg tHi = newVRegI(env);
3685 PPCAluOp op = (op_binop == Iop_Or64) ? Palu_OR :
3686 (op_binop == Iop_And64) ? Palu_AND : Palu_XOR;
3687 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1, IEndianess);
3688 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2, IEndianess);
3689 addInstr(env, PPCInstr_Alu(op, tHi, xHi, PPCRH_Reg(yHi)));
3690 addInstr(env, PPCInstr_Alu(op, tLo, xLo, PPCRH_Reg(yLo)));
3691 *rHi = tHi;
3692 *rLo = tLo;
3693 return;
3696 /* Add64 */
3697 case Iop_Add64: {
3698 HReg xLo, xHi, yLo, yHi;
3699 HReg tLo = newVRegI(env);
3700 HReg tHi = newVRegI(env);
3701 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1, IEndianess);
3702 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2, IEndianess);
3703 addInstr(env, PPCInstr_AddSubC( True/*add*/, True /*set carry*/,
3704 tLo, xLo, yLo));
3705 addInstr(env, PPCInstr_AddSubC( True/*add*/, False/*read carry*/,
3706 tHi, xHi, yHi));
3707 *rHi = tHi;
3708 *rLo = tLo;
3709 return;
3712 /* 32HLto64(e1,e2) */
3713 case Iop_32HLto64:
3714 *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3715 *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
3716 return;
3718 /* F64toI64[S|U] */
3719 case Iop_F64toI64S: case Iop_F64toI64U: {
3720 HReg tLo = newVRegI(env);
3721 HReg tHi = newVRegI(env);
3722 HReg r1 = StackFramePtr(env->mode64);
3723 PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
3724 PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
3725 HReg fsrc = iselDblExpr(env, e->Iex.Binop.arg2,
3726 IEndianess);
3727 HReg ftmp = newVRegF(env);
3729 vassert(!env->mode64);
3730 /* Set host rounding mode */
3731 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
3733 sub_from_sp( env, 16 );
3734 addInstr(env, PPCInstr_FpCftI(False/*F->I*/, False/*int64*/,
3735 (op_binop == Iop_F64toI64S) ? True : False,
3736 True, ftmp, fsrc));
3737 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
3738 addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3739 addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3740 add_to_sp( env, 16 );
3742 ///* Restore default FPU rounding. */
3743 //set_FPU_rounding_default( env );
3744 *rHi = tHi;
3745 *rLo = tLo;
3746 return;
3748 case Iop_D64toI64S: {
3749 HReg tLo = newVRegI(env);
3750 HReg tHi = newVRegI(env);
3751 HReg r1 = StackFramePtr(env->mode64);
3752 PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
3753 PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
3754 HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
3755 HReg tmp = newVRegF(env);
3757 vassert(!env->mode64);
3758 set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
3759 addInstr(env, PPCInstr_Dfp64Unary(Pfp_DCTFIX, tmp, fr_src));
3761 sub_from_sp( env, 16 );
3762 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
3763 addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3764 addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3765 add_to_sp( env, 16 );
3766 *rHi = tHi;
3767 *rLo = tLo;
3768 return;
3770 case Iop_D128toI64S: {
3771 PPCFpOp fpop = Pfp_DCTFIXQ;
3772 HReg r_srcHi = newVRegF(env);
3773 HReg r_srcLo = newVRegF(env);
3774 HReg tLo = newVRegI(env);
3775 HReg tHi = newVRegI(env);
3776 HReg ftmp = newVRegF(env);
3777 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
3778 PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
3780 set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
3781 iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
3782 IEndianess);
3783 addInstr(env, PPCInstr_DfpD128toD64(fpop, ftmp, r_srcHi, r_srcLo));
3785 // put the D64 result into an integer register pair
3786 sub_from_sp( env, 16 );
3787 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
3788 addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3789 addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3790 add_to_sp( env, 16 );
3791 *rHi = tHi;
3792 *rLo = tLo;
3793 return;
3795 default:
3796 break;
3798 } /* if (e->tag == Iex_Binop) */
3801 /* --------- UNARY ops --------- */
3802 if (e->tag == Iex_Unop) {
3803 switch (e->Iex.Unop.op) {
3805 /* CmpwNEZ64(e) */
3806 case Iop_CmpwNEZ64: {
3807 HReg argHi, argLo;
3808 HReg tmp1 = newVRegI(env);
3809 HReg tmp2 = newVRegI(env);
3810 iselInt64Expr(&argHi, &argLo, env, e->Iex.Unop.arg, IEndianess);
3811 /* tmp1 = argHi | argLo */
3812 addInstr(env, PPCInstr_Alu(Palu_OR, tmp1, argHi, PPCRH_Reg(argLo)));
3813 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
3814 addInstr(env, PPCInstr_Unary(Pun_NEG,tmp2,tmp1));
3815 addInstr(env, PPCInstr_Alu(Palu_OR, tmp2, tmp2, PPCRH_Reg(tmp1)));
3816 addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
3817 tmp2, tmp2, PPCRH_Imm(False, 31)));
3818 *rHi = tmp2;
3819 *rLo = tmp2; /* yes, really tmp2 */
3820 return;
3823 /* Left64 */
3824 case Iop_Left64: {
3825 HReg argHi, argLo;
3826 HReg zero32 = newVRegI(env);
3827 HReg resHi = newVRegI(env);
3828 HReg resLo = newVRegI(env);
3829 iselInt64Expr(&argHi, &argLo, env, e->Iex.Unop.arg, IEndianess);
3830 vassert(env->mode64 == False);
3831 addInstr(env, PPCInstr_LI(zero32, 0, env->mode64));
3832 /* resHi:resLo = - argHi:argLo */
3833 addInstr(env, PPCInstr_AddSubC( False/*sub*/, True/*set carry*/,
3834 resLo, zero32, argLo ));
3835 addInstr(env, PPCInstr_AddSubC( False/*sub*/, False/*read carry*/,
3836 resHi, zero32, argHi ));
3837 /* resHi:resLo |= srcHi:srcLo */
3838 addInstr(env, PPCInstr_Alu(Palu_OR, resLo, resLo, PPCRH_Reg(argLo)));
3839 addInstr(env, PPCInstr_Alu(Palu_OR, resHi, resHi, PPCRH_Reg(argHi)));
3840 *rHi = resHi;
3841 *rLo = resLo;
3842 return;
3845 /* 32Sto64(e) */
3846 case Iop_32Sto64: {
3847 HReg tHi = newVRegI(env);
3848 HReg src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3849 addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
3850 tHi, src, PPCRH_Imm(False,31)));
3851 *rHi = tHi;
3852 *rLo = src;
3853 return;
3855 case Iop_ExtractExpD64: {
3856 HReg tmp = newVRegF(env);
3857 HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
3858 HReg tLo = newVRegI(env);
3859 HReg tHi = newVRegI(env);
3860 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
3861 PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
3863 addInstr(env, PPCInstr_Dfp64Unary(Pfp_DXEX, tmp, fr_src));
3865 // put the D64 result into a integer register pair
3866 sub_from_sp( env, 16 );
3867 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
3868 addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3869 addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3870 add_to_sp( env, 16 );
3871 *rHi = tHi;
3872 *rLo = tLo;
3873 return;
3875 case Iop_ExtractExpD128: {
3876 HReg r_srcHi;
3877 HReg r_srcLo;
3878 HReg tmp = newVRegF(env);
3879 HReg tLo = newVRegI(env);
3880 HReg tHi = newVRegI(env);
3881 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
3882 PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
3884 iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Unop.arg, IEndianess);
3885 addInstr(env, PPCInstr_ExtractExpD128(Pfp_DXEXQ, tmp,
3886 r_srcHi, r_srcLo));
3888 // put the D64 result into a integer register pair
3889 sub_from_sp( env, 16 );
3890 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
3891 addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3892 addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3893 add_to_sp( env, 16 );
3894 *rHi = tHi;
3895 *rLo = tLo;
3896 return;
3899 /* 32Uto64(e) */
3900 case Iop_32Uto64: {
3901 HReg tHi = newVRegI(env);
3902 HReg tLo = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3903 addInstr(env, PPCInstr_LI(tHi, 0, False/*mode32*/));
3904 *rHi = tHi;
3905 *rLo = tLo;
3906 return;
3909 case Iop_128to64: {
3910 /* Narrow, return the low 64-bit half as a 32-bit
3911 * register pair */
3912 HReg r_Hi = INVALID_HREG;
3913 HReg r_MedHi = INVALID_HREG;
3914 HReg r_MedLo = INVALID_HREG;
3915 HReg r_Lo = INVALID_HREG;
3917 iselInt128Expr_to_32x4(&r_Hi, &r_MedHi, &r_MedLo, &r_Lo,
3918 env, e->Iex.Unop.arg, IEndianess);
3919 *rHi = r_MedLo;
3920 *rLo = r_Lo;
3921 return;
3924 case Iop_128HIto64: {
3925 /* Narrow, return the high 64-bit half as a 32-bit
3926 * register pair */
3927 HReg r_Hi = INVALID_HREG;
3928 HReg r_MedHi = INVALID_HREG;
3929 HReg r_MedLo = INVALID_HREG;
3930 HReg r_Lo = INVALID_HREG;
3932 iselInt128Expr_to_32x4(&r_Hi, &r_MedHi, &r_MedLo, &r_Lo,
3933 env, e->Iex.Unop.arg, IEndianess);
3934 *rHi = r_Hi;
3935 *rLo = r_MedHi;
3936 return;
3939 /* V128{HI}to64 */
3940 case Iop_V128HIto64:
3941 case Iop_V128to64: {
3942 HReg r_aligned16;
3943 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 0 : 8;
3944 HReg tLo = newVRegI(env);
3945 HReg tHi = newVRegI(env);
3946 HReg vec = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
3947 PPCAMode *am_off0, *am_offLO, *am_offHI;
3948 sub_from_sp( env, 32 ); // Move SP down 32 bytes
3950 // get a quadword aligned address within our stack space
3951 r_aligned16 = get_sp_aligned16( env );
3952 am_off0 = PPCAMode_IR( 0, r_aligned16 );
3953 am_offHI = PPCAMode_IR( off, r_aligned16 );
3954 am_offLO = PPCAMode_IR( off+4, r_aligned16 );
3956 // store as Vec128
3957 addInstr(env,
3958 PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
3960 // load hi,lo words (of hi/lo half of vec) as Ity_I32's
3961 addInstr(env,
3962 PPCInstr_Load( 4, tHi, am_offHI, False/*mode32*/ ));
3963 addInstr(env,
3964 PPCInstr_Load( 4, tLo, am_offLO, False/*mode32*/ ));
3966 add_to_sp( env, 32 ); // Reset SP
3967 *rHi = tHi;
3968 *rLo = tLo;
3969 return;
3972 /* could do better than this, but for now ... */
3973 case Iop_1Sto64: {
3974 HReg tLo = newVRegI(env);
3975 HReg tHi = newVRegI(env);
3976 PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
3977 addInstr(env, PPCInstr_Set(cond,tLo));
3978 addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
3979 tLo, tLo, PPCRH_Imm(False,31)));
3980 addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
3981 tLo, tLo, PPCRH_Imm(False,31)));
3982 addInstr(env, mk_iMOVds_RR(tHi, tLo));
3983 *rHi = tHi;
3984 *rLo = tLo;
3985 return;
3988 case Iop_Not64: {
3989 HReg xLo, xHi;
3990 HReg tmpLo = newVRegI(env);
3991 HReg tmpHi = newVRegI(env);
3992 iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg, IEndianess);
3993 addInstr(env, PPCInstr_Unary(Pun_NOT,tmpLo,xLo));
3994 addInstr(env, PPCInstr_Unary(Pun_NOT,tmpHi,xHi));
3995 *rHi = tmpHi;
3996 *rLo = tmpLo;
3997 return;
4000 /* ReinterpF64asI64(e) */
4001 /* Given an IEEE754 double, produce an I64 with the same bit
4002 pattern. */
4003 case Iop_ReinterpF64asI64: {
4004 PPCAMode *am_addr0, *am_addr1;
4005 HReg fr_src = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
4006 HReg r_dstLo = newVRegI(env);
4007 HReg r_dstHi = newVRegI(env);
4009 sub_from_sp( env, 16 ); // Move SP down 16 bytes
4010 am_addr0 = PPCAMode_IR( 0, StackFramePtr(False/*mode32*/) );
4011 am_addr1 = PPCAMode_IR( 4, StackFramePtr(False/*mode32*/) );
4013 // store as F64
4014 addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
4015 fr_src, am_addr0 ));
4017 // load hi,lo as Ity_I32's
4018 addInstr(env, PPCInstr_Load( 4, r_dstHi,
4019 am_addr0, False/*mode32*/ ));
4020 addInstr(env, PPCInstr_Load( 4, r_dstLo,
4021 am_addr1, False/*mode32*/ ));
4022 *rHi = r_dstHi;
4023 *rLo = r_dstLo;
4025 add_to_sp( env, 16 ); // Reset SP
4026 return;
4029 case Iop_ReinterpD64asI64: {
4030 HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
4031 PPCAMode *am_addr0, *am_addr1;
4032 HReg r_dstLo = newVRegI(env);
4033 HReg r_dstHi = newVRegI(env);
4036 sub_from_sp( env, 16 ); // Move SP down 16 bytes
4037 am_addr0 = PPCAMode_IR( 0, StackFramePtr(False/*mode32*/) );
4038 am_addr1 = PPCAMode_IR( 4, StackFramePtr(False/*mode32*/) );
4040 // store as D64
4041 addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
4042 fr_src, am_addr0 ));
4044 // load hi,lo as Ity_I32's
4045 addInstr(env, PPCInstr_Load( 4, r_dstHi,
4046 am_addr0, False/*mode32*/ ));
4047 addInstr(env, PPCInstr_Load( 4, r_dstLo,
4048 am_addr1, False/*mode32*/ ));
4049 *rHi = r_dstHi;
4050 *rLo = r_dstLo;
4052 add_to_sp( env, 16 ); // Reset SP
4054 return;
4057 case Iop_BCDtoDPB: {
4058 PPCCondCode cc;
4059 UInt argiregs;
4060 HReg argregs[2];
4061 Int argreg;
4062 HReg tLo = newVRegI(env);
4063 HReg tHi = newVRegI(env);
4064 HReg tmpHi;
4065 HReg tmpLo;
4066 Bool mode64 = env->mode64;
4068 argregs[0] = hregPPC_GPR3(mode64);
4069 argregs[1] = hregPPC_GPR4(mode64);
4071 argiregs = 0;
4072 argreg = 0;
4074 iselInt64Expr( &tmpHi, &tmpLo, env, e->Iex.Unop.arg, IEndianess );
4076 argiregs |= ( 1 << (argreg+3 ) );
4077 addInstr( env, mk_iMOVds_RR( argregs[argreg++], tmpHi ) );
4079 argiregs |= ( 1 << (argreg+3 ) );
4080 addInstr( env, mk_iMOVds_RR( argregs[argreg], tmpLo ) );
4082 cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
4084 if (IEndianess == Iend_LE) {
4085 addInstr( env, PPCInstr_Call( cc, (Addr)h_calc_BCDtoDPB,
4086 argiregs,
4087 mk_RetLoc_simple(RLPri_2Int) ) );
4088 } else {
4089 Addr64 target;
4090 target = mode64 ? (Addr)h_calc_BCDtoDPB :
4091 toUInt( (Addr)h_calc_BCDtoDPB );
4092 addInstr( env, PPCInstr_Call( cc, target,
4093 argiregs,
4094 mk_RetLoc_simple(RLPri_2Int) ) );
4097 addInstr( env, mk_iMOVds_RR( tHi, argregs[argreg-1] ) );
4098 addInstr( env, mk_iMOVds_RR( tLo, argregs[argreg] ) );
4100 *rHi = tHi;
4101 *rLo = tLo;
4102 return;
4105 case Iop_DPBtoBCD: {
4106 PPCCondCode cc;
4107 UInt argiregs;
4108 HReg argregs[2];
4109 Int argreg;
4110 HReg tLo = newVRegI(env);
4111 HReg tHi = newVRegI(env);
4112 HReg tmpHi;
4113 HReg tmpLo;
4114 Bool mode64 = env->mode64;
4116 argregs[0] = hregPPC_GPR3(mode64);
4117 argregs[1] = hregPPC_GPR4(mode64);
4119 argiregs = 0;
4120 argreg = 0;
4122 iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg, IEndianess);
4124 argiregs |= (1 << (argreg+3));
4125 addInstr(env, mk_iMOVds_RR( argregs[argreg++], tmpHi ));
4127 argiregs |= (1 << (argreg+3));
4128 addInstr(env, mk_iMOVds_RR( argregs[argreg], tmpLo));
4130 cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
4132 if (IEndianess == Iend_LE) {
4133 addInstr(env, PPCInstr_Call( cc, (Addr)h_calc_DPBtoBCD,
4134 argiregs,
4135 mk_RetLoc_simple(RLPri_2Int) ) );
4136 } else {
4137 Addr64 target;
4138 target = mode64 ? (Addr)h_calc_DPBtoBCD :
4139 toUInt( (Addr)h_calc_DPBtoBCD );
4140 addInstr(env, PPCInstr_Call( cc, target, argiregs,
4141 mk_RetLoc_simple(RLPri_2Int) ) );
4144 addInstr(env, mk_iMOVds_RR(tHi, argregs[argreg-1]));
4145 addInstr(env, mk_iMOVds_RR(tLo, argregs[argreg]));
4147 *rHi = tHi;
4148 *rLo = tLo;
4149 return;
4152 default:
4153 break;
4155 } /* if (e->tag == Iex_Unop) */
4157 vex_printf("iselInt64Expr(ppc): No such tag(%u)\n", e->tag);
4158 ppIRExpr(e);
4159 vpanic("iselInt64Expr(ppc)");
4163 /*---------------------------------------------------------*/
4164 /*--- ISEL: Floating point expressions (32 bit) ---*/
4165 /*---------------------------------------------------------*/
4167 /* Nothing interesting here; really just wrappers for
4168 64-bit stuff. */
4170 static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e, IREndness IEndianess )
4172 HReg r = iselFltExpr_wrk( env, e, IEndianess );
4173 # if 0
4174 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4175 # endif
4176 vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
4177 vassert(hregIsVirtual(r));
4178 return r;
4181 /* DO NOT CALL THIS DIRECTLY */
4182 static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e,
4183 IREndness IEndianess )
4185 Bool mode64 = env->mode64;
4187 IRType ty = typeOfIRExpr(env->type_env,e);
4188 vassert(ty == Ity_F32);
4190 if (e->tag == Iex_RdTmp) {
4191 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
4194 if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
4195 PPCAMode* am_addr;
4196 HReg r_dst = newVRegF(env);
4197 vassert(e->Iex.Load.ty == Ity_F32);
4198 am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_F32/*xfer*/,
4199 IEndianess);
4200 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 4, r_dst, am_addr));
4201 return r_dst;
4204 if (e->tag == Iex_Get) {
4205 HReg r_dst = newVRegF(env);
4206 PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
4207 GuestStatePtr(env->mode64) );
4208 addInstr(env, PPCInstr_FpLdSt( True/*load*/, 4, r_dst, am_addr ));
4209 return r_dst;
4212 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_TruncF64asF32) {
4213 /* This is quite subtle. The only way to do the relevant
4214 truncation is to do a single-precision store and then a
4215 double precision load to get it back into a register. The
4216 problem is, if the data is then written to memory a second
4217 time, as in
4219 STbe(...) = TruncF64asF32(...)
4221 then will the second truncation further alter the value? The
4222 answer is no: flds (as generated here) followed by fsts
4223 (generated for the STbe) is the identity function on 32-bit
4224 floats, so we are safe.
4226 Another upshot of this is that if iselStmt can see the
4227 entirety of
4229 STbe(...) = TruncF64asF32(arg)
4231 then it can short circuit having to deal with TruncF64asF32
4232 individually; instead just compute arg into a 64-bit FP
4233 register and do 'fsts' (since that itself does the
4234 truncation).
4236 We generate pretty poor code here (should be ok both for
4237 32-bit and 64-bit mode); but it is expected that for the most
4238 part the latter optimisation will apply and hence this code
4239 will not often be used.
4241 HReg fsrc = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
4242 HReg fdst = newVRegF(env);
4243 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4245 sub_from_sp( env, 16 );
4246 // store as F32, hence truncating
4247 addInstr(env, PPCInstr_FpLdSt( False/*store*/, 4,
4248 fsrc, zero_r1 ));
4249 // and reload. Good huh?! (sigh)
4250 addInstr(env, PPCInstr_FpLdSt( True/*load*/, 4,
4251 fdst, zero_r1 ));
4252 add_to_sp( env, 16 );
4253 return fdst;
4256 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64UtoF32) {
4257 if (mode64) {
4258 HReg fdst = newVRegF(env);
4259 HReg isrc = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
4260 HReg r1 = StackFramePtr(env->mode64);
4261 PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
4263 /* Set host rounding mode */
4264 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4266 sub_from_sp( env, 16 );
4268 addInstr(env, PPCInstr_Store(8, zero_r1, isrc, True/*mode64*/));
4269 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
4270 addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
4271 False, False,
4272 fdst, fdst));
4274 add_to_sp( env, 16 );
4276 ///* Restore default FPU rounding. */
4277 //set_FPU_rounding_default( env );
4278 return fdst;
4279 } else {
4280 /* 32-bit mode */
4281 HReg fdst = newVRegF(env);
4282 HReg isrcHi, isrcLo;
4283 HReg r1 = StackFramePtr(env->mode64);
4284 PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
4285 PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
4287 iselInt64Expr(&isrcHi, &isrcLo, env, e->Iex.Binop.arg2, IEndianess);
4289 /* Set host rounding mode */
4290 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4292 sub_from_sp( env, 16 );
4294 addInstr(env, PPCInstr_Store(4, zero_r1, isrcHi, False/*mode32*/));
4295 addInstr(env, PPCInstr_Store(4, four_r1, isrcLo, False/*mode32*/));
4296 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
4297 addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
4298 False, False,
4299 fdst, fdst));
4301 add_to_sp( env, 16 );
4303 ///* Restore default FPU rounding. */
4304 //set_FPU_rounding_default( env );
4305 return fdst;
4310 vex_printf("iselFltExpr(ppc): No such tag(%u)\n", e->tag);
4311 ppIRExpr(e);
4312 vpanic("iselFltExpr_wrk(ppc)");
4316 /*---------------------------------------------------------*/
4317 /*--- ISEL: Floating point expressions (64 bit) ---*/
4318 /*---------------------------------------------------------*/
4320 /* Compute a 64-bit floating point value into a register, the identity
4321 of which is returned. As with iselIntExpr_R, the reg may be either
4322 real or virtual; in any case it must not be changed by subsequent
4323 code emitted by the caller. */
4325 /* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
4327 Type S (1 bit) E (11 bits) F (52 bits)
4328 ---- --------- ----------- -----------
4329 signalling NaN u 2047 (max) .0uuuuu---u
4330 (with at least
4331 one 1 bit)
4332 quiet NaN u 2047 (max) .1uuuuu---u
4334 negative infinity 1 2047 (max) .000000---0
4336 positive infinity 0 2047 (max) .000000---0
4338 negative zero 1 0 .000000---0
4340 positive zero 0 0 .000000---0
4343 static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e, IREndness IEndianess )
4345 HReg r = iselDblExpr_wrk( env, e, IEndianess );
4346 # if 0
4347 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4348 # endif
4349 vassert(hregClass(r) == HRcFlt64);
4350 vassert(hregIsVirtual(r));
4351 return r;
4354 /* DO NOT CALL THIS DIRECTLY */
4355 static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e,
4356 IREndness IEndianess )
4358 Bool mode64 = env->mode64;
4359 IRType ty = typeOfIRExpr(env->type_env,e);
4360 vassert(e);
4361 vassert(ty == Ity_F64);
4363 if (e->tag == Iex_RdTmp) {
4364 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
4367 /* --------- LITERAL --------- */
4368 if (e->tag == Iex_Const) {
4369 union { UInt u32x2[2]; ULong u64; Double f64; } u;
4370 vassert(sizeof(u) == 8);
4371 vassert(sizeof(u.u64) == 8);
4372 vassert(sizeof(u.f64) == 8);
4373 vassert(sizeof(u.u32x2) == 8);
4375 if (e->Iex.Const.con->tag == Ico_F64) {
4376 u.f64 = e->Iex.Const.con->Ico.F64;
4378 else if (e->Iex.Const.con->tag == Ico_F64i) {
4379 u.u64 = e->Iex.Const.con->Ico.F64i;
4381 else
4382 vpanic("iselDblExpr(ppc): const");
4384 if (!mode64) {
4385 HReg r_srcHi = newVRegI(env);
4386 HReg r_srcLo = newVRegI(env);
4387 addInstr(env, PPCInstr_LI(r_srcHi, u.u32x2[0], mode64));
4388 addInstr(env, PPCInstr_LI(r_srcLo, u.u32x2[1], mode64));
4389 return mk_LoadRR32toFPR( env, r_srcHi, r_srcLo );
4390 } else { // mode64
4391 HReg r_src = newVRegI(env);
4392 addInstr(env, PPCInstr_LI(r_src, u.u64, mode64));
4393 return mk_LoadR64toFPR( env, r_src ); // 1*I64 -> F64
4397 /* --------- LOAD --------- */
4398 if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
4399 HReg r_dst = newVRegF(env);
4400 PPCAMode* am_addr;
4401 vassert(e->Iex.Load.ty == Ity_F64);
4402 am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_F64/*xfer*/,
4403 IEndianess);
4404 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dst, am_addr));
4405 return r_dst;
4408 /* --------- GET --------- */
4409 if (e->tag == Iex_Get) {
4410 HReg r_dst = newVRegF(env);
4411 PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
4412 GuestStatePtr(mode64) );
4413 addInstr(env, PPCInstr_FpLdSt( True/*load*/, 8, r_dst, am_addr ));
4414 return r_dst;
4417 /* --------- OPS --------- */
4418 if (e->tag == Iex_Qop) {
4419 PPCFpOp fpop = Pfp_INVALID;
4420 switch (e->Iex.Qop.details->op) {
4421 case Iop_MAddF64: fpop = Pfp_MADDD; break;
4422 case Iop_MAddF64r32: fpop = Pfp_MADDS; break;
4423 case Iop_MSubF64: fpop = Pfp_MSUBD; break;
4424 case Iop_MSubF64r32: fpop = Pfp_MSUBS; break;
4425 default: break;
4427 if (fpop != Pfp_INVALID) {
4428 HReg r_dst = newVRegF(env);
4429 HReg r_srcML = iselDblExpr(env, e->Iex.Qop.details->arg2,
4430 IEndianess);
4431 HReg r_srcMR = iselDblExpr(env, e->Iex.Qop.details->arg3,
4432 IEndianess);
4433 HReg r_srcAcc = iselDblExpr(env, e->Iex.Qop.details->arg4,
4434 IEndianess);
4435 set_FPU_rounding_mode( env, e->Iex.Qop.details->arg1, IEndianess );
4436 addInstr(env, PPCInstr_FpMulAcc(fpop, r_dst,
4437 r_srcML, r_srcMR, r_srcAcc));
4438 return r_dst;
4442 if (e->tag == Iex_Triop) {
4443 IRTriop *triop = e->Iex.Triop.details;
4444 PPCFpOp fpop = Pfp_INVALID;
4445 switch (triop->op) {
4446 case Iop_AddF64: fpop = Pfp_ADDD; break;
4447 case Iop_SubF64: fpop = Pfp_SUBD; break;
4448 case Iop_MulF64: fpop = Pfp_MULD; break;
4449 case Iop_DivF64: fpop = Pfp_DIVD; break;
4450 case Iop_AddF64r32: fpop = Pfp_ADDS; break;
4451 case Iop_SubF64r32: fpop = Pfp_SUBS; break;
4452 case Iop_MulF64r32: fpop = Pfp_MULS; break;
4453 case Iop_DivF64r32: fpop = Pfp_DIVS; break;
4454 default: break;
4456 if (fpop != Pfp_INVALID) {
4457 HReg r_dst = newVRegF(env);
4458 HReg r_srcL = iselDblExpr(env, triop->arg2, IEndianess);
4459 HReg r_srcR = iselDblExpr(env, triop->arg3, IEndianess);
4460 set_FPU_rounding_mode( env, triop->arg1, IEndianess );
4461 addInstr(env, PPCInstr_FpBinary(fpop, r_dst, r_srcL, r_srcR));
4462 return r_dst;
4466 if (e->tag == Iex_Binop) {
4467 PPCFpOp fpop = Pfp_INVALID;
4468 switch (e->Iex.Binop.op) {
4469 case Iop_SqrtF64: fpop = Pfp_SQRT; break;
4470 default: break;
4472 if (fpop == Pfp_SQRT) {
4473 HReg fr_dst = newVRegF(env);
4474 HReg fr_src = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
4475 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4476 addInstr(env, PPCInstr_FpUnary(fpop, fr_dst, fr_src));
4477 return fr_dst;
4481 if (e->tag == Iex_Binop) {
4483 if (e->Iex.Binop.op == Iop_F128toF64) {
4484 HReg fr_dst = newVRegF(env);
4485 HReg fr_src = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
4486 HReg tmp = newVRegV(env);
4487 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4488 PPCAMode* eight_r1 = PPCAMode_IR( 8, StackFramePtr(env->mode64) );
4489 PPCFpOp fpop = Pfp_INVALID;
4491 if (FPU_rounding_mode_isOdd(e->Iex.Binop.arg1)) {
4492 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4493 fpop = Pfp_FPQTODRNDODD;
4494 } else {
4495 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4496 fpop = Pfp_FPQTOD;
4499 addInstr(env, PPCInstr_Fp128Unary(fpop, tmp, fr_src));
4501 /* result is in a 128-bit vector register, move to 64-bit reg to
4502 * match the Iop specification. The result will get moved back
4503 * to a 128-bit register and stored once the value is returned.
4505 sub_from_sp( env, 16 );
4506 addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, tmp, zero_r1));
4507 if (IEndianess == Iend_LE)
4508 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, eight_r1));
4509 else
4510 /* High 64-bits stored at lower address */
4511 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, zero_r1));
4513 add_to_sp( env, 16 );
4515 return fr_dst;
4518 if (e->Iex.Binop.op == Iop_RoundF64toF32) {
4519 HReg r_dst = newVRegF(env);
4520 HReg r_src = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
4521 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4522 addInstr(env, PPCInstr_FpRSP(r_dst, r_src));
4523 //set_FPU_rounding_default( env );
4524 return r_dst;
4527 if (e->Iex.Binop.op == Iop_I64StoF64 || e->Iex.Binop.op == Iop_I64UtoF64) {
4528 if (mode64) {
4529 HReg fdst = newVRegF(env);
4530 HReg isrc = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
4531 HReg r1 = StackFramePtr(env->mode64);
4532 PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
4534 /* Set host rounding mode */
4535 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4537 sub_from_sp( env, 16 );
4539 addInstr(env, PPCInstr_Store(8, zero_r1, isrc, True/*mode64*/));
4540 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
4541 addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
4542 e->Iex.Binop.op == Iop_I64StoF64,
4543 True/*fdst is 64 bit*/,
4544 fdst, fdst));
4546 add_to_sp( env, 16 );
4548 ///* Restore default FPU rounding. */
4549 //set_FPU_rounding_default( env );
4550 return fdst;
4551 } else {
4552 /* 32-bit mode */
4553 HReg fdst = newVRegF(env);
4554 HReg isrcHi, isrcLo;
4555 HReg r1 = StackFramePtr(env->mode64);
4556 PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
4557 PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
4559 iselInt64Expr(&isrcHi, &isrcLo, env, e->Iex.Binop.arg2,
4560 IEndianess);
4562 /* Set host rounding mode */
4563 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4565 sub_from_sp( env, 16 );
4567 addInstr(env, PPCInstr_Store(4, zero_r1, isrcHi, False/*mode32*/));
4568 addInstr(env, PPCInstr_Store(4, four_r1, isrcLo, False/*mode32*/));
4569 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
4570 addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
4571 e->Iex.Binop.op == Iop_I64StoF64,
4572 True/*fdst is 64 bit*/,
4573 fdst, fdst));
4575 add_to_sp( env, 16 );
4577 ///* Restore default FPU rounding. */
4578 //set_FPU_rounding_default( env );
4579 return fdst;
4585 if (e->tag == Iex_Unop) {
4586 PPCFpOp fpop = Pfp_INVALID;
4587 switch (e->Iex.Unop.op) {
4588 case Iop_NegF64: fpop = Pfp_NEG; break;
4589 case Iop_AbsF64: fpop = Pfp_ABS; break;
4590 case Iop_RSqrtEst5GoodF64: fpop = Pfp_RSQRTE; break;
4591 case Iop_RoundF64toF64_NegINF: fpop = Pfp_FRIM; break;
4592 case Iop_RoundF64toF64_PosINF: fpop = Pfp_FRIP; break;
4593 case Iop_RoundF64toF64_NEAREST: fpop = Pfp_FRIN; break;
4594 case Iop_RoundF64toF64_ZERO: fpop = Pfp_FRIZ; break;
4595 default: break;
4597 if (fpop != Pfp_INVALID) {
4598 HReg fr_dst = newVRegF(env);
4599 HReg fr_src = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
4600 addInstr(env, PPCInstr_FpUnary(fpop, fr_dst, fr_src));
4601 return fr_dst;
4605 if (e->tag == Iex_Unop) {
4606 switch (e->Iex.Unop.op) {
4607 case Iop_F128HItoF64:
4608 case Iop_F128LOtoF64:
4610 /* put upper/lower 64-bits of F128 into an F64. */
4611 HReg r_aligned16;
4612 HReg fdst = newVRegF(env);
4613 HReg fsrc = iselFp128Expr(env, e->Iex.Unop.arg, IEndianess);
4614 PPCAMode *am_off0, *am_off8, *am_off_arg;
4615 sub_from_sp( env, 32 ); // Move SP down 32 bytes
4617 // get a quadword aligned address within our stack space
4618 r_aligned16 = get_sp_aligned16( env );
4619 am_off0 = PPCAMode_IR( 0, r_aligned16 );
4620 am_off8 = PPCAMode_IR( 8 ,r_aligned16 );
4622 /* store 128-bit floating point value to memory, load low word
4623 * or high to 64-bit destination floating point register
4625 addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, fsrc, am_off0));
4626 if (IEndianess == Iend_LE) {
4627 if (e->Iex.Binop.op == Iop_F128HItoF64)
4628 am_off_arg = am_off8;
4629 else
4630 am_off_arg = am_off0;
4631 } else {
4632 if (e->Iex.Binop.op == Iop_F128HItoF64)
4633 am_off_arg = am_off0;
4634 else
4635 am_off_arg = am_off8;
4637 addInstr(env,
4638 PPCInstr_FpLdSt( True /*load*/,
4639 8, fdst,
4640 am_off_arg ));
4641 add_to_sp( env, 32 ); // Reset SP
4642 return fdst;
4644 case Iop_ReinterpI64asF64: {
4645 /* Given an I64, produce an IEEE754 double with the same
4646 bit pattern. */
4647 if (!mode64) {
4648 HReg r_srcHi, r_srcLo;
4649 iselInt64Expr( &r_srcHi, &r_srcLo, env, e->Iex.Unop.arg,
4650 IEndianess);
4651 return mk_LoadRR32toFPR( env, r_srcHi, r_srcLo );
4652 } else {
4653 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
4654 return mk_LoadR64toFPR( env, r_src );
4658 case Iop_F32toF64: {
4659 if (e->Iex.Unop.arg->tag == Iex_Unop &&
4660 e->Iex.Unop.arg->Iex.Unop.op == Iop_ReinterpI32asF32 ) {
4661 e = e->Iex.Unop.arg;
4663 HReg src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
4664 HReg fr_dst = newVRegF(env);
4665 PPCAMode *am_addr;
4667 sub_from_sp( env, 16 ); // Move SP down 16 bytes
4668 am_addr = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4670 // store src as Ity_I32's
4671 addInstr(env, PPCInstr_Store( 4, am_addr, src, env->mode64 ));
4673 // load single precision float, but the end results loads into a
4674 // 64-bit FP register -- i.e., F64.
4675 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 4, fr_dst, am_addr));
4677 add_to_sp( env, 16 ); // Reset SP
4678 return fr_dst;
4682 /* this is a no-op */
4683 HReg res = iselFltExpr(env, e->Iex.Unop.arg, IEndianess);
4684 return res;
4686 default:
4687 break;
4691 /* --------- MULTIPLEX --------- */
4692 if (e->tag == Iex_ITE) { // VFD
4693 if (ty == Ity_F64
4694 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
4695 HReg fr1 = iselDblExpr(env, e->Iex.ITE.iftrue, IEndianess);
4696 HReg fr0 = iselDblExpr(env, e->Iex.ITE.iffalse, IEndianess);
4697 HReg fr_dst = newVRegF(env);
4698 addInstr(env, PPCInstr_FpUnary( Pfp_MOV, fr_dst, fr0 ));
4699 PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
4700 addInstr(env, PPCInstr_FpCMov( cc, fr_dst, fr1 ));
4701 return fr_dst;
4705 vex_printf("iselDblExpr(ppc): No such tag(%u)\n", e->tag);
4706 ppIRExpr(e);
4707 vpanic("iselDblExpr_wrk(ppc)");
4710 static HReg iselDfp32Expr(ISelEnv* env, const IRExpr* e, IREndness IEndianess)
4712 HReg r = iselDfp32Expr_wrk( env, e, IEndianess );
4713 vassert(hregClass(r) == HRcFlt64);
4714 vassert( hregIsVirtual(r) );
4715 return r;
4718 /* DO NOT CALL THIS DIRECTLY */
4719 static HReg iselDfp32Expr_wrk(ISelEnv* env, const IRExpr* e,
4720 IREndness IEndianess)
4722 Bool mode64 = env->mode64;
4723 IRType ty = typeOfIRExpr( env->type_env, e );
4725 vassert( e );
4726 vassert( ty == Ity_D32 );
4728 /* --------- GET --------- */
4729 if (e->tag == Iex_Get) {
4730 HReg r_dst = newVRegF( env );
4731 PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
4732 GuestStatePtr(mode64) );
4733 addInstr( env, PPCInstr_FpLdSt( True/*load*/, 8, r_dst, am_addr ) );
4734 return r_dst;
4737 /* --------- LOAD --------- */
4738 if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
4739 PPCAMode* am_addr;
4740 HReg r_dst = newVRegF(env);
4741 vassert(e->Iex.Load.ty == Ity_D32);
4742 am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_D32/*xfer*/,
4743 IEndianess);
4744 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 4, r_dst, am_addr));
4745 return r_dst;
4748 /* --------- OPS --------- */
4749 if (e->tag == Iex_Binop) {
4750 if (e->Iex.Binop.op == Iop_D64toD32) {
4751 HReg fr_dst = newVRegF(env);
4752 HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
4753 set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4754 addInstr(env, PPCInstr_Dfp64Unary(Pfp_DRSP, fr_dst, fr_src));
4755 return fr_dst;
4759 ppIRExpr( e );
4760 vpanic( "iselDfp32Expr_wrk(ppc)" );
4763 static HReg iselFp128Expr( ISelEnv* env, const IRExpr* e, IREndness IEndianess )
4765 HReg r = iselFp128Expr_wrk( env, e, IEndianess );
4766 vassert(hregClass(r) == HRcVec128);
4767 vassert(hregIsVirtual(r));
4768 return r;
4771 /* DO NOT CALL THIS DIRECTLY */
4772 static HReg iselFp128Expr_wrk( ISelEnv* env, const IRExpr* e,
4773 IREndness IEndianess)
4775 Bool mode64 = env->mode64;
4776 PPCFpOp fpop = Pfp_INVALID;
4777 IRType ty = typeOfIRExpr(env->type_env,e);
4779 vassert(e);
4780 vassert( ty == Ity_F128 );
4782 /* read 128-bit IRTemp */
4783 if (e->tag == Iex_RdTmp) {
4784 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
4787 if (e->tag == Iex_Get) {
4788 /* Guest state vectors are 16byte aligned,
4789 so don't need to worry here */
4790 HReg dst = newVRegV(env);
4792 addInstr(env,
4793 PPCInstr_AvLdSt( True/*load*/, 16, dst,
4794 PPCAMode_IR( e->Iex.Get.offset,
4795 GuestStatePtr(mode64) )));
4796 return dst;
4799 if (e->tag == Iex_Unop) {
4800 switch (e->Iex.Unop.op) {
4801 case Iop_TruncF128toI64S:
4802 fpop = Pfp_TRUNCFPQTOISD; goto do_Un_F128;
4803 case Iop_TruncF128toI32S:
4804 fpop = Pfp_TRUNCFPQTOISW; goto do_Un_F128;
4805 case Iop_TruncF128toI64U:
4806 fpop = Pfp_TRUNCFPQTOIUD; goto do_Un_F128;
4807 case Iop_TruncF128toI32U:
4808 fpop = Pfp_TRUNCFPQTOIUW; goto do_Un_F128;
4809 case Iop_TruncF128toI128U:
4810 fpop = Pfp_TRUNCFPQTOIUQ; goto do_Un_F128;
4811 case Iop_TruncF128toI128S:
4812 fpop = Pfp_TRUNCFPQTOISQ; goto do_Un_F128;
4814 do_Un_F128: {
4815 HReg r_dst = newVRegV(env);
4816 HReg r_src = iselFp128Expr(env, e->Iex.Unop.arg, IEndianess);
4817 addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, r_src));
4818 return r_dst;
4821 case Iop_F64toF128: {
4822 fpop = Pfp_FPDTOQ;
4823 HReg r_dst = newVRegV(env);
4824 HReg r_src = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
4825 HReg v128tmp = newVRegV(env);
4826 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4828 /* value is in 64-bit float reg, need to move to 128-bit vector reg */
4829 sub_from_sp( env, 16 );
4830 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, r_src, zero_r1));
4831 addInstr(env, PPCInstr_AvLdSt(True/*load*/, 16, v128tmp, zero_r1));
4832 add_to_sp( env, 16 );
4834 addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, v128tmp));
4835 return r_dst;
4838 case Iop_I64StoF128:
4839 fpop = Pfp_IDSTOQ; goto do_Un_int_F128;
4840 case Iop_I64UtoF128:
4841 fpop = Pfp_IDUTOQ; goto do_Un_int_F128;
4843 do_Un_int_F128: {
4844 HReg r_dst = newVRegV(env);
4845 HReg tmp = newVRegV(env);
4846 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
4847 PPCAMode *am_offhi, *am_offlo;
4848 HReg r_aligned16;
4850 /* source is in a 64-bit integer reg, move to 128-bit float reg
4851 * do this via the stack (easy, convenient, etc).
4853 sub_from_sp( env, 32 ); // Move SP down
4855 /* Get a quadword aligned address within our stack space */
4856 r_aligned16 = get_sp_aligned16( env );
4858 am_offlo = PPCAMode_IR( 0, r_aligned16 );
4859 am_offhi = PPCAMode_IR( 8, r_aligned16 );
4861 /* Inst only uses the upper 64-bit of the source */
4862 addInstr(env, PPCInstr_Load(8, r_src, am_offhi, mode64));
4864 /* Fetch result back from stack. */
4865 addInstr(env, PPCInstr_AvLdSt(True/*load*/, 16, tmp, am_offlo));
4867 add_to_sp( env, 32 ); // Reset SP
4869 addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, tmp));
4870 return r_dst;
4873 case Iop_ReinterpI128asF128:
4875 PPCAMode* am_addr;
4876 PPCAMode* am_addr4;
4877 HReg rHi = INVALID_HREG;
4878 HReg rLo = INVALID_HREG;
4879 HReg dst = newVRegV(env);
4881 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
4883 sub_from_sp( env, 16 ); // Move SP down 16 bytes
4884 am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
4885 am_addr4 = advance4(env, am_addr);
4887 // store the two 64-bit pars
4888 addInstr(env, PPCInstr_Store( 8, am_addr, rHi, mode64 ));
4889 addInstr(env, PPCInstr_Store( 8, am_addr4, rLo, mode64 ));
4891 // load as Ity_F128
4892 addInstr(env, PPCInstr_AvLdSt( True/*fetch*/, 16, dst, am_addr ));
4894 add_to_sp( env, 16 ); // Reset SP
4895 return dst;
4898 default:
4899 break;
4900 } /* switch (e->Iex.Unop.op) */
4901 } /* if (e->tag == Iex_Unop) */
4903 if (e->tag == Iex_Binop) {
4904 switch (e->Iex.Binop.op) {
4906 case Iop_F64HLtoF128:
4908 HReg dst = newVRegV(env);
4909 HReg r_src_hi = iselDblExpr(env, e->Iex.Binop.arg1, IEndianess);
4910 HReg r_src_lo = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
4911 PPCAMode *am_offhi, *am_offlo;
4912 HReg r_aligned16;
4914 /* do this via the stack (easy, convenient, etc) */
4915 sub_from_sp( env, 16 ); // Move SP down
4917 /* Get a quadword aligned address within our stack space */
4918 r_aligned16 = get_sp_aligned16( env );
4920 am_offlo = PPCAMode_IR( 0, r_aligned16 );
4921 am_offhi = PPCAMode_IR( 8, r_aligned16 );
4923 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8,
4924 r_src_lo, am_offlo));
4925 addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8,
4926 r_src_hi, am_offhi));
4928 /* Fetch result back from stack. */
4929 addInstr(env, PPCInstr_AvLdSt(True/*load*/, 16,
4930 dst, am_offlo));
4932 add_to_sp( env, 16 ); // Reset SP
4933 return dst;
4935 case Iop_F128toI128S:
4937 HReg dst = newVRegV(env);
4938 HReg r_src = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
4939 PPCRI* rm = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
4940 /* Note: rm is a set of three bit fields that specify the
4941 * rounding mode and which of the two instructions to issue.
4943 addInstr(env, PPCInstr_AvBinaryInt(Pav_F128toI128S, dst,
4944 r_src, rm));
4945 return dst;
4947 case Iop_RndF128:
4949 HReg dst = newVRegV(env);
4950 HReg r_src = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
4951 PPCRI* rm = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
4952 /* Note: rm is a set of three bit fields that specify the
4953 * rounding mode and which of the two instructions to issue.
4955 addInstr(env, PPCInstr_AvBinaryInt(Pav_ROUNDFPQ, dst,
4956 r_src, rm));
4957 return dst;
4959 case Iop_SqrtF128:
4960 if (FPU_rounding_mode_isOdd(e->Iex.Binop.arg1)) {
4961 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4962 fpop = Pfp_FPSQRTQRNDODD;
4963 goto do_Bin_F128;
4964 } else {
4965 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4966 fpop = Pfp_FPSQRTQ;
4967 goto do_Bin_F128;
4969 case Iop_F128toF32:
4970 if (FPU_rounding_mode_isOdd(e->Iex.Binop.arg1)) {
4971 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4972 fpop = Pfp_FPQTOWRNDODD;
4973 goto do_Bin_F128;
4974 } else {
4975 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4976 fpop = Pfp_FPQTOW;
4977 goto do_Bin_F128;
4979 do_Bin_F128: {
4980 HReg r_dst = newVRegV(env);
4981 HReg r_src = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
4982 addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, r_src));
4983 return r_dst;
4986 case Iop_I128StoF128:
4987 fpop = Pfp_IQSTOQ; goto do_Un_I128_F128_DFP_conversions;
4988 case Iop_I128UtoF128:
4989 fpop = Pfp_IQUTOQ; goto do_Un_I128_F128_DFP_conversions;
4990 do_Un_I128_F128_DFP_conversions: {
4991 PPCAMode* am_addr;
4992 PPCAMode* am_addr4;
4993 HReg rHi, rLo;
4994 HReg r_tmp = newVRegV(env);
4995 HReg r_dst = newVRegV(env);
4997 iselInt128Expr(&rHi,&rLo, env, e->Iex.Binop.arg2, IEndianess);
4999 /* Set host rounding mode for the conversion instruction */
5000 set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5002 sub_from_sp( env, 16 );
5004 am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
5005 am_addr4 = advance4(env, am_addr);
5007 // store the two 64-bit halfs of the I128
5008 addInstr(env, PPCInstr_Store( 8, am_addr, rHi, mode64 ));
5009 addInstr(env, PPCInstr_Store( 8, am_addr4, rLo, mode64 ));
5011 /* Fetch the I128 into an V128 register */
5012 addInstr(env, PPCInstr_AvLdSt( True/*fetch*/, 16, r_tmp, am_addr ));
5013 addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, r_tmp));
5015 add_to_sp( env, 16 ); // Reset SP
5017 return r_dst;
5020 default:
5021 break;
5022 } /* switch (e->Iex.Binop.op) */
5023 } /* if (e->tag == Iex_Binop) */
5025 if (e->tag == Iex_Triop) {
5026 IRTriop *triop = e->Iex.Triop.details;
5028 switch (triop->op) {
5029 case Iop_AddF128:
5030 if (FPU_rounding_mode_isOdd(triop->arg1)) {
5031 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5032 fpop = Pfp_FPADDQRNDODD; goto do_Tri_F128;
5033 } else {
5034 set_FPU_rounding_mode( env, triop->arg1, IEndianess );
5035 fpop = Pfp_FPADDQ; goto do_Tri_F128;
5037 case Iop_SubF128:
5038 if (FPU_rounding_mode_isOdd(triop->arg1)) {
5039 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5040 fpop = Pfp_FPSUBQRNDODD; goto do_Tri_F128;
5041 } else {
5042 set_FPU_rounding_mode( env, triop->arg1, IEndianess );
5043 fpop = Pfp_FPSUBQ; goto do_Tri_F128;
5045 case Iop_MulF128:
5046 if (FPU_rounding_mode_isOdd(triop->arg1)) {
5047 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5048 fpop = Pfp_FPMULQRNDODD; goto do_Tri_F128;
5049 } else {
5050 set_FPU_rounding_mode( env, triop->arg1, IEndianess );
5051 fpop = Pfp_FPMULQ; goto do_Tri_F128;
5053 case Iop_DivF128:
5054 if (FPU_rounding_mode_isOdd(triop->arg1)) {
5055 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5056 fpop = Pfp_FPDIVQRNDODD; goto do_Tri_F128;
5057 } else {
5058 set_FPU_rounding_mode( env, triop->arg1, IEndianess );
5059 fpop = Pfp_FPDIVQ; goto do_Tri_F128;
5061 case Iop_MAddF128:
5062 if (FPU_rounding_mode_isOdd(triop->arg1)) {
5063 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5064 fpop = Pfp_FPMULADDQRNDODD; goto do_Tri_F128;
5065 } else {
5066 set_FPU_rounding_mode( env, triop->arg1, IEndianess );
5067 fpop = Pfp_FPMULADDQ; goto do_Tri_F128;
5070 do_Tri_F128: {
5071 HReg r_dst = newVRegV(env);
5072 HReg r_srcL = iselFp128Expr(env, triop->arg2, IEndianess);
5073 HReg r_srcR = iselFp128Expr(env, triop->arg3, IEndianess);
5075 addInstr(env, PPCInstr_Fp128Binary(fpop, r_dst, r_srcL, r_srcR));
5076 return r_dst;
5079 default:
5080 break;
5081 } /* switch (e->Iex.Triop.op) */
5083 } /* if (e->tag == Iex_Trinop) */
5085 if (e->tag == Iex_Qop) {
5086 IRQop *qop = e->Iex.Qop.details;
5088 switch (qop->op) {
5089 case Iop_MAddF128:
5090 if (FPU_rounding_mode_isOdd(qop->arg1)) {
5091 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5092 fpop = Pfp_FPMULADDQRNDODD; goto do_Quad_F128;
5093 } else {
5094 set_FPU_rounding_mode( env, qop->arg1, IEndianess );
5095 fpop = Pfp_FPMULADDQ; goto do_Quad_F128;
5097 case Iop_MSubF128:
5098 if (FPU_rounding_mode_isOdd(qop->arg1)) {
5099 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5100 fpop = Pfp_FPMULSUBQRNDODD; goto do_Quad_F128;
5101 } else {
5102 set_FPU_rounding_mode( env, qop->arg1, IEndianess );
5103 fpop = Pfp_FPMULSUBQ; goto do_Quad_F128;
5105 case Iop_NegMAddF128:
5106 if (FPU_rounding_mode_isOdd(qop->arg1)) {
5107 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5108 fpop = Pfp_FPNEGMULADDQRNDODD; goto do_Quad_F128;
5109 } else {
5110 set_FPU_rounding_mode( env, qop->arg1, IEndianess );
5111 fpop = Pfp_FPNEGMULADDQ; goto do_Quad_F128;
5113 case Iop_NegMSubF128:
5114 if (FPU_rounding_mode_isOdd(qop->arg1)) {
5115 /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5116 fpop = Pfp_FPNEGMULSUBQRNDODD; goto do_Quad_F128;
5117 } else {
5118 set_FPU_rounding_mode( env, qop->arg1, IEndianess );
5119 fpop = Pfp_FPNEGMULSUBQ; goto do_Quad_F128;
5122 do_Quad_F128: {
5123 HReg r_dst = iselFp128Expr(env, qop->arg3,
5124 IEndianess);
5125 HReg r_srcL = iselFp128Expr(env, qop->arg2,
5126 IEndianess);
5127 HReg r_srcR = iselFp128Expr(env, qop->arg4,
5128 IEndianess);
5130 addInstr(env, PPCInstr_Fp128Ternary(fpop, r_dst, r_srcL, r_srcR));
5131 return r_dst;
5134 default:
5135 break;
5137 } /* if (e->tag == Iex_Qop) */
5139 ppIRExpr( e );
5140 vpanic( "iselFp128Expr(ppc64)" );
5143 static HReg iselDfp64Expr(ISelEnv* env, const IRExpr* e, IREndness IEndianess)
5145 HReg r = iselDfp64Expr_wrk( env, e, IEndianess );
5146 vassert(hregClass(r) == HRcFlt64);
5147 vassert( hregIsVirtual(r) );
5148 return r;
5151 /* DO NOT CALL THIS DIRECTLY */
5152 static HReg iselDfp64Expr_wrk(ISelEnv* env, const IRExpr* e,
5153 IREndness IEndianess)
5155 Bool mode64 = env->mode64;
5156 IRType ty = typeOfIRExpr( env->type_env, e );
5157 HReg r_dstHi, r_dstLo;
5159 vassert( e );
5160 vassert( ty == Ity_D64 );
5162 if (e->tag == Iex_RdTmp) {
5163 return lookupIRTemp( env, e->Iex.RdTmp.tmp );
5166 /* --------- GET --------- */
5167 if (e->tag == Iex_Get) {
5168 HReg r_dst = newVRegF( env );
5169 PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
5170 GuestStatePtr(mode64) );
5171 addInstr( env, PPCInstr_FpLdSt( True/*load*/, 8, r_dst, am_addr ) );
5172 return r_dst;
5175 if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
5176 PPCAMode* am_addr;
5177 HReg r_dst = newVRegF(env);
5178 vassert(e->Iex.Load.ty == Ity_D64);
5179 am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_D64/*xfer*/,
5180 IEndianess);
5181 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dst, am_addr));
5182 return r_dst;
5185 /* --------- OPS --------- */
5186 if (e->tag == Iex_Qop) {
5187 HReg r_dst = newVRegF( env );
5188 return r_dst;
5191 if (e->tag == Iex_Unop) {
5192 HReg fr_dst = newVRegF(env);
5193 switch (e->Iex.Unop.op) {
5194 case Iop_ReinterpI64asD64: {
5195 /* Given an I64, produce an IEEE754 DFP with the same
5196 bit pattern. */
5197 if (!mode64) {
5198 HReg r_srcHi, r_srcLo;
5199 iselInt64Expr( &r_srcHi, &r_srcLo, env, e->Iex.Unop.arg,
5200 IEndianess);
5201 return mk_LoadRR32toFPR( env, r_srcHi, r_srcLo );
5202 } else {
5203 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
5204 return mk_LoadR64toFPR( env, r_src );
5207 case Iop_D32toD64: {
5208 HReg fr_src = iselDfp32Expr(env, e->Iex.Unop.arg, IEndianess);
5209 addInstr(env, PPCInstr_Dfp64Unary(Pfp_DCTDP, fr_dst, fr_src));
5210 return fr_dst;
5212 case Iop_D128HItoD64:
5213 iselDfp128Expr( &r_dstHi, &r_dstLo, env, e->Iex.Unop.arg,
5214 IEndianess );
5215 return r_dstHi;
5216 case Iop_D128LOtoD64:
5217 iselDfp128Expr( &r_dstHi, &r_dstLo, env, e->Iex.Unop.arg,
5218 IEndianess );
5219 return r_dstLo;
5220 case Iop_InsertExpD64: {
5221 HReg fr_srcL = iselDblExpr(env, e->Iex.Binop.arg1, IEndianess);
5222 HReg fr_srcR = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
5224 addInstr(env, PPCInstr_Dfp64Binary(Pfp_DIEX, fr_dst, fr_srcL,
5225 fr_srcR));
5226 return fr_dst;
5228 default:
5229 vex_printf( "ERROR: iselDfp64Expr_wrk, UNKNOWN unop case %d\n",
5230 (Int)e->Iex.Unop.op );
5234 if (e->tag == Iex_Binop) {
5235 PPCFpOp fpop = Pfp_INVALID;
5236 HReg fr_dst = newVRegF(env);
5238 switch (e->Iex.Binop.op) {
5239 case Iop_D128toD64: fpop = Pfp_DRDPQ; break;
5240 case Iop_D64toD32: fpop = Pfp_DRSP; break;
5241 case Iop_I64StoD64: fpop = Pfp_DCFFIX; break;
5242 case Iop_RoundD64toInt: fpop = Pfp_DRINTN; break;
5243 default: break;
5245 if (fpop == Pfp_DRDPQ) {
5246 HReg r_srcHi = newVRegF(env);
5247 HReg r_srcLo = newVRegF(env);
5249 set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5250 iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
5251 IEndianess);
5252 addInstr(env, PPCInstr_DfpD128toD64(fpop, fr_dst, r_srcHi, r_srcLo));
5253 return fr_dst;
5255 } else if (fpop == Pfp_DRINTN) {
5256 HReg fr_src = newVRegF(env);
5257 PPCRI* r_rmc = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
5259 /* NOTE, this IOP takes a DFP value and rounds to the
5260 * neares floating point integer value, i.e. fractional part
5261 * is zero. The result is a decimal floating point number.
5262 * the INT in the name is a bit misleading.
5264 fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
5265 addInstr(env, PPCInstr_DfpRound(fr_dst, fr_src, r_rmc));
5266 return fr_dst;
5268 } else if (fpop == Pfp_DRSP) {
5269 HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
5270 set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5271 addInstr(env, PPCInstr_Dfp64Unary(fpop, fr_dst, fr_src));
5272 return fr_dst;
5274 } else if (fpop == Pfp_DCFFIX) {
5275 HReg fr_src = newVRegF(env);
5276 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5278 set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5279 sub_from_sp( env, 16 );
5281 // put the I64 value into a floating point register
5282 if (mode64) {
5283 HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
5285 addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
5286 } else {
5287 HReg tmpHi, tmpLo;
5288 PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5290 iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Binop.arg2,
5291 IEndianess);
5292 addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*mode32*/));
5293 addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*mode32*/));
5296 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_src, zero_r1));
5297 addInstr(env, PPCInstr_Dfp64Unary(fpop, fr_dst, fr_src));
5298 add_to_sp( env, 16 );
5299 return fr_dst;
5302 switch (e->Iex.Binop.op) {
5303 /* shift instructions D64, I32 -> D64 */
5304 case Iop_ShlD64: fpop = Pfp_DSCLI; break;
5305 case Iop_ShrD64: fpop = Pfp_DSCRI; break;
5306 default: break;
5308 if (fpop != Pfp_INVALID) {
5309 HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg1, IEndianess);
5310 PPCRI* shift = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
5312 /* shift value must be an immediate value */
5313 vassert(shift->tag == Pri_Imm);
5315 addInstr(env, PPCInstr_DfpShift(fpop, fr_dst, fr_src, shift));
5316 return fr_dst;
5319 switch (e->Iex.Binop.op) {
5320 case Iop_InsertExpD64:
5321 fpop = Pfp_DIEX;
5322 break;
5323 default: break;
5325 if (fpop != Pfp_INVALID) {
5326 HReg fr_srcL = newVRegF(env);
5327 HReg fr_srcR = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
5328 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5329 sub_from_sp( env, 16 );
5331 if (env->mode64) {
5332 // put the I64 value into a floating point reg
5333 HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
5335 addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
5336 } else {
5337 // put the I64 register pair into a floating point reg
5338 HReg tmpHi;
5339 HReg tmpLo;
5340 PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5342 iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Binop.arg1,
5343 IEndianess);
5344 addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*!mode64*/));
5345 addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*!mode64*/));
5347 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_srcL, zero_r1));
5348 addInstr(env, PPCInstr_Dfp64Binary(fpop, fr_dst, fr_srcL,
5349 fr_srcR));
5350 add_to_sp( env, 16 );
5351 return fr_dst;
5355 if (e->tag == Iex_Triop) {
5356 IRTriop *triop = e->Iex.Triop.details;
5357 PPCFpOp fpop = Pfp_INVALID;
5359 switch (triop->op) {
5360 case Iop_AddD64:
5361 fpop = Pfp_DFPADD;
5362 break;
5363 case Iop_SubD64:
5364 fpop = Pfp_DFPSUB;
5365 break;
5366 case Iop_MulD64:
5367 fpop = Pfp_DFPMUL;
5368 break;
5369 case Iop_DivD64:
5370 fpop = Pfp_DFPDIV;
5371 break;
5372 default:
5373 break;
5375 if (fpop != Pfp_INVALID) {
5376 HReg r_dst = newVRegF( env );
5377 HReg r_srcL = iselDfp64Expr( env, triop->arg2, IEndianess );
5378 HReg r_srcR = iselDfp64Expr( env, triop->arg3, IEndianess );
5380 set_FPU_DFP_rounding_mode( env, triop->arg1, IEndianess );
5381 addInstr( env, PPCInstr_Dfp64Binary( fpop, r_dst, r_srcL, r_srcR ) );
5382 return r_dst;
5385 switch (triop->op) {
5386 case Iop_QuantizeD64: fpop = Pfp_DQUA; break;
5387 case Iop_SignificanceRoundD64: fpop = Pfp_RRDTR; break;
5388 default: break;
5390 if (fpop == Pfp_DQUA) {
5391 HReg r_dst = newVRegF(env);
5392 HReg r_srcL = iselDfp64Expr(env, triop->arg2, IEndianess);
5393 HReg r_srcR = iselDfp64Expr(env, triop->arg3, IEndianess);
5394 PPCRI* rmc = iselWordExpr_RI(env, triop->arg1, IEndianess);
5395 addInstr(env, PPCInstr_DfpQuantize(fpop, r_dst, r_srcL, r_srcR,
5396 rmc));
5397 return r_dst;
5399 } else if (fpop == Pfp_RRDTR) {
5400 HReg r_dst = newVRegF(env);
5401 HReg r_srcL = newVRegF(env);
5402 HReg r_srcR = iselDfp64Expr(env, triop->arg3, IEndianess);
5403 PPCRI* rmc = iselWordExpr_RI(env, triop->arg1, IEndianess);
5404 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5405 HReg i8_val = iselWordExpr_R(env, triop->arg2, IEndianess);
5407 /* Move I8 to float register to issue instruction */
5408 sub_from_sp( env, 16 );
5409 if (mode64)
5410 addInstr(env, PPCInstr_Store(8, zero_r1, i8_val, True/*mode64*/));
5411 else
5412 addInstr(env, PPCInstr_Store(4, zero_r1, i8_val, False/*mode32*/));
5414 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_srcL, zero_r1));
5415 add_to_sp( env, 16 );
5417 // will set TE and RMC when issuing instruction
5418 addInstr(env, PPCInstr_DfpQuantize(fpop, r_dst, r_srcL, r_srcR, rmc));
5419 return r_dst;
5423 ppIRExpr( e );
5424 vpanic( "iselDfp64Expr_wrk(ppc)" );
5427 static void iselDfp128Expr(HReg* rHi, HReg* rLo, ISelEnv* env, const IRExpr* e,
5428 IREndness IEndianess)
5430 iselDfp128Expr_wrk( rHi, rLo, env, e, IEndianess );
5431 vassert( hregIsVirtual(*rHi) );
5432 vassert( hregIsVirtual(*rLo) );
5435 /* DO NOT CALL THIS DIRECTLY */
5436 static void iselDfp128Expr_wrk(HReg* rHi, HReg *rLo, ISelEnv* env,
5437 const IRExpr* e, IREndness IEndianess)
5439 vassert( e );
5440 vassert( typeOfIRExpr(env->type_env,e) == Ity_D128 );
5442 /* read 128-bit IRTemp */
5443 if (e->tag == Iex_RdTmp) {
5444 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp );
5445 return;
5448 if (e->tag == Iex_Unop) {
5449 HReg r_dstHi = newVRegF(env);
5450 HReg r_dstLo = newVRegF(env);
5452 if (e->Iex.Unop.op == Iop_I64StoD128) {
5453 HReg fr_src = newVRegF(env);
5454 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5456 // put the I64 value into a floating point reg
5457 if (env->mode64) {
5458 HReg tmp = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
5459 addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
5460 } else {
5461 HReg tmpHi, tmpLo;
5462 PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5464 iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg,
5465 IEndianess);
5466 addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*mode32*/));
5467 addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*mode32*/));
5470 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_src, zero_r1));
5471 addInstr(env, PPCInstr_DfpI64StoD128(Pfp_DCFFIXQ, r_dstHi, r_dstLo,
5472 fr_src));
5475 if (e->Iex.Unop.op == Iop_D64toD128) {
5476 HReg r_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
5478 /* Source is 64bit, result is 128 bit. High 64bit source arg,
5479 * is ignored by the instruction. Set high arg to r_src just
5480 * to meet the vassert tests.
5482 addInstr(env, PPCInstr_Dfp128Unary(Pfp_DCTQPQ, r_dstHi, r_dstLo,
5483 r_src, r_src));
5485 *rHi = r_dstHi;
5486 *rLo = r_dstLo;
5487 return;
5490 /* --------- OPS --------- */
5491 if (e->tag == Iex_Binop) {
5492 HReg r_srcHi;
5493 HReg r_srcLo;
5495 switch (e->Iex.Binop.op) {
5496 case Iop_D64HLtoD128:
5497 r_srcHi = iselDfp64Expr( env, e->Iex.Binop.arg1, IEndianess );
5498 r_srcLo = iselDfp64Expr( env, e->Iex.Binop.arg2, IEndianess );
5499 *rHi = r_srcHi;
5500 *rLo = r_srcLo;
5501 return;
5502 break;
5503 case Iop_D128toD64: {
5504 PPCFpOp fpop = Pfp_DRDPQ;
5505 HReg fr_dst = newVRegF(env);
5507 set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5508 iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
5509 IEndianess);
5510 addInstr(env, PPCInstr_DfpD128toD64(fpop, fr_dst, r_srcHi, r_srcLo));
5512 /* Need to meet the interface spec but the result is
5513 * just 64-bits so send the result back in both halfs.
5515 *rHi = fr_dst;
5516 *rLo = fr_dst;
5517 return;
5519 case Iop_ShlD128:
5520 case Iop_ShrD128: {
5521 HReg fr_dst_hi = newVRegF(env);
5522 HReg fr_dst_lo = newVRegF(env);
5523 PPCRI* shift = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
5524 PPCFpOp fpop = Pfp_DSCLIQ; /* fix later if necessary */
5526 iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg1,
5527 IEndianess);
5529 if (e->Iex.Binop.op == Iop_ShrD128)
5530 fpop = Pfp_DSCRIQ;
5532 addInstr(env, PPCInstr_DfpShift128(fpop, fr_dst_hi, fr_dst_lo,
5533 r_srcHi, r_srcLo, shift));
5535 *rHi = fr_dst_hi;
5536 *rLo = fr_dst_lo;
5537 return;
5539 case Iop_RoundD128toInt: {
5540 HReg r_dstHi = newVRegF(env);
5541 HReg r_dstLo = newVRegF(env);
5542 PPCRI* r_rmc = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
5544 // will set R and RMC when issuing instruction
5545 iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
5546 IEndianess);
5548 addInstr(env, PPCInstr_DfpRound128(r_dstHi, r_dstLo,
5549 r_srcHi, r_srcLo, r_rmc));
5550 *rHi = r_dstHi;
5551 *rLo = r_dstLo;
5552 return;
5554 case Iop_InsertExpD128: {
5555 HReg r_dstHi = newVRegF(env);
5556 HReg r_dstLo = newVRegF(env);
5557 HReg r_srcL = newVRegF(env);
5558 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5559 r_srcHi = newVRegF(env);
5560 r_srcLo = newVRegF(env);
5562 iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
5563 IEndianess);
5565 /* Move I64 to float register to issue instruction */
5566 if (env->mode64) {
5567 HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
5568 addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
5569 } else {
5570 HReg tmpHi, tmpLo;
5571 PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5573 iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg,
5574 IEndianess);
5575 addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*mode32*/));
5576 addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*mode32*/));
5579 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_srcL, zero_r1));
5580 addInstr(env, PPCInstr_InsertExpD128(Pfp_DIEXQ,
5581 r_dstHi, r_dstLo,
5582 r_srcL, r_srcHi, r_srcLo));
5583 *rHi = r_dstHi;
5584 *rLo = r_dstLo;
5585 return;
5588 case Iop_I128StoD128: {
5589 HReg tmpF128 = newVRegV(env);
5590 HReg FdstHi = newVRegF(env);
5591 HReg FdstLo = newVRegF(env);
5592 HReg srcLo = newVRegI(env);
5593 HReg srcHi = newVRegI(env);
5594 PPCAMode* am_addr;
5595 PPCAMode* am_addr4;
5597 set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5599 // Get the I128 value, store into a VSR register
5600 iselInt128Expr(&srcHi, &srcLo, env, e->Iex.Binop.arg2, IEndianess);
5602 sub_from_sp( env, 16 ); // Move SP down 16 bytes
5603 am_addr = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5604 am_addr4 = advance4(env, am_addr);
5606 addInstr(env, PPCInstr_Store( 8, am_addr, srcHi, env->mode64 ));
5607 addInstr(env, PPCInstr_Store( 8, am_addr4, srcLo, env->mode64 ));
5609 // load as Ity_F128
5610 addInstr(env, PPCInstr_AvLdSt( True/*fetch*/, 16, tmpF128, am_addr ));
5612 // do conversion
5613 addInstr( env, PPCInstr_XFormUnary994( Px_IQSTODFP, FdstHi, FdstLo,
5614 tmpF128 ) );
5616 *rHi = FdstHi;
5617 *rLo = FdstLo;
5618 add_to_sp( env, 16 ); // Reset SP
5619 return;
5622 default:
5623 vex_printf( "ERROR: iselDfp128Expr_wrk, UNKNOWN binop case %d\n",
5624 (Int)e->Iex.Binop.op );
5625 break;
5629 if (e->tag == Iex_Triop) {
5630 IRTriop *triop = e->Iex.Triop.details;
5631 PPCFpOp fpop = Pfp_INVALID;
5632 HReg r_dstHi = newVRegF(env);
5633 HReg r_dstLo = newVRegF(env);
5635 switch (triop->op) {
5636 case Iop_AddD128:
5637 fpop = Pfp_DFPADDQ;
5638 break;
5639 case Iop_SubD128:
5640 fpop = Pfp_DFPSUBQ;
5641 break;
5642 case Iop_MulD128:
5643 fpop = Pfp_DFPMULQ;
5644 break;
5645 case Iop_DivD128:
5646 fpop = Pfp_DFPDIVQ;
5647 break;
5648 default:
5649 break;
5652 if (fpop != Pfp_INVALID) {
5653 HReg r_srcRHi = newVRegV( env );
5654 HReg r_srcRLo = newVRegV( env );
5656 /* dst will be used to pass in the left operand and get the result. */
5657 iselDfp128Expr( &r_dstHi, &r_dstLo, env, triop->arg2, IEndianess );
5658 iselDfp128Expr( &r_srcRHi, &r_srcRLo, env, triop->arg3, IEndianess );
5659 set_FPU_DFP_rounding_mode( env, triop->arg1, IEndianess );
5660 addInstr( env,
5661 PPCInstr_Dfp128Binary( fpop, r_dstHi, r_dstLo,
5662 r_srcRHi, r_srcRLo ) );
5663 *rHi = r_dstHi;
5664 *rLo = r_dstLo;
5665 return;
5667 switch (triop->op) {
5668 case Iop_QuantizeD128: fpop = Pfp_DQUAQ; break;
5669 case Iop_SignificanceRoundD128: fpop = Pfp_DRRNDQ; break;
5670 default: break;
5672 if (fpop == Pfp_DQUAQ) {
5673 HReg r_srcHi = newVRegF(env);
5674 HReg r_srcLo = newVRegF(env);
5675 PPCRI* rmc = iselWordExpr_RI(env, triop->arg1, IEndianess);
5677 /* dst will be used to pass in the left operand and get the result */
5678 iselDfp128Expr(&r_dstHi, &r_dstLo, env, triop->arg2, IEndianess);
5679 iselDfp128Expr(&r_srcHi, &r_srcLo, env, triop->arg3, IEndianess);
5681 // will set RMC when issuing instruction
5682 addInstr(env, PPCInstr_DfpQuantize128(fpop, r_dstHi, r_dstLo,
5683 r_srcHi, r_srcLo, rmc));
5684 *rHi = r_dstHi;
5685 *rLo = r_dstLo;
5686 return;
5688 } else if (fpop == Pfp_DRRNDQ) {
5689 HReg r_srcHi = newVRegF(env);
5690 HReg r_srcLo = newVRegF(env);
5691 PPCRI* rmc = iselWordExpr_RI(env, triop->arg1, IEndianess);
5692 PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5693 PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5694 HReg i8_val = iselWordExpr_R(env, triop->arg2, IEndianess);
5695 HReg r_zero = newVRegI( env );
5697 iselDfp128Expr(&r_srcHi, &r_srcLo, env, triop->arg3, IEndianess);
5699 /* dst will be used to pass in the left operand and get the result */
5700 /* Move I8 to float register to issue instruction. Note, the
5701 * instruction only looks at the bottom 6 bits so we really don't
5702 * have to clear the upper bits since the iselWordExpr_R sets the
5703 * bottom 8-bits.
5705 sub_from_sp( env, 16 );
5707 if (env->mode64)
5708 addInstr(env, PPCInstr_Store(4, four_r1, i8_val, True/*mode64*/));
5709 else
5710 addInstr(env, PPCInstr_Store(4, four_r1, i8_val, False/*mode32*/));
5712 /* Have to write to the upper bits to ensure they have been
5713 * initialized. The instruction ignores all but the lower 6-bits.
5715 addInstr( env, PPCInstr_LI( r_zero, 0, env->mode64 ) );
5716 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dstHi, zero_r1));
5717 addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dstLo, zero_r1));
5719 add_to_sp( env, 16 );
5721 // will set RMC when issuing instruction
5722 addInstr(env, PPCInstr_DfpQuantize128(fpop, r_dstHi, r_dstLo,
5723 r_srcHi, r_srcLo, rmc));
5724 *rHi = r_dstHi;
5725 *rLo = r_dstLo;
5726 return;
5730 ppIRExpr( e );
5731 vpanic( "iselDfp128Expr(ppc64)" );
5735 /*---------------------------------------------------------*/
5736 /*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
5737 /*---------------------------------------------------------*/
5739 static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e, IREndness IEndianess )
5741 HReg r = iselVecExpr_wrk( env, e, IEndianess );
5742 # if 0
5743 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5744 # endif
5745 vassert(hregClass(r) == HRcVec128);
5746 vassert(hregIsVirtual(r));
5747 return r;
5750 /* DO NOT CALL THIS DIRECTLY */
5751 static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e,
5752 IREndness IEndianess )
5754 Bool mode64 = env->mode64;
5755 PPCAvOp op = Pav_INVALID;
5756 PPCAvFpOp fpop = Pavfp_INVALID;
5757 PPCAvOpBin128 opav128 = Pav_INVALIDBinary128;
5758 PPCAvOpTri128 optri128 = Pav_INVALIDTri128;
5759 IRType ty = typeOfIRExpr(env->type_env,e);
5760 vassert(e);
5761 vassert(ty == Ity_V128);
5763 if (e->tag == Iex_ITE) {
5764 HReg r1 = iselVecExpr( env, e->Iex.ITE.iftrue, IEndianess );
5765 HReg r0 = iselVecExpr( env, e->Iex.ITE.iffalse, IEndianess );
5766 HReg r_dst = newVRegV(env);
5768 // Use OR operator to do move r1 to r_dst
5769 addInstr(env, PPCInstr_AvBinary( Pav_OR, r_dst, r0, r0));
5770 PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
5771 addInstr(env, PPCInstr_AvCMov(cc, r_dst, r1));
5772 return r_dst;
5775 if (e->tag == Iex_RdTmp) {
5776 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5779 if (e->tag == Iex_Get) {
5780 /* Guest state vectors are 16byte aligned,
5781 so don't need to worry here */
5782 HReg dst = newVRegV(env);
5783 addInstr(env,
5784 PPCInstr_AvLdSt( True/*load*/, 16, dst,
5785 PPCAMode_IR( e->Iex.Get.offset,
5786 GuestStatePtr(mode64) )));
5787 return dst;
5790 if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
5791 /* Need to be able to do V128 unaligned loads. The BE unaligned load
5792 * can be accomplised using the following code sequece from the ISA.
5793 * It uses the lvx instruction that does two aligned loads and then
5794 * permute the data to store the required data as if it had been an
5795 * unaligned load.
5797 * lvx Vhi,0,Rb # load MSQ, using the unaligned address in Rb
5798 * lvsl Vp, 0,Rb # Set permute control vector
5799 * addi Rb,Rb,15 # Address of LSQ
5800 * lvx Vlo,0,Rb # load LSQ
5801 * vperm Vt,Vhi,Vlo,Vp # align the data as requested
5804 HReg Vhi = newVRegV(env);
5805 HReg Vlo = newVRegV(env);
5806 HReg Vp = newVRegV(env);
5807 HReg v_dst = newVRegV(env);
5808 HReg rB;
5809 HReg rB_plus_15 = newVRegI(env);
5811 vassert(e->Iex.Load.ty == Ity_V128);
5812 rB = iselWordExpr_R( env, e->Iex.Load.addr, IEndianess );
5814 // lvx Vhi, 0, Rb
5815 addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, Vhi,
5816 PPCAMode_IR(0, rB)) );
5818 if (IEndianess == Iend_LE)
5819 // lvsr Vp, 0, Rb
5820 addInstr(env, PPCInstr_AvSh( False/*right shift*/, Vp,
5821 PPCAMode_IR(0, rB)) );
5822 else
5823 // lvsl Vp, 0, Rb
5824 addInstr(env, PPCInstr_AvSh( True/*left shift*/, Vp,
5825 PPCAMode_IR(0, rB)) );
5827 // addi Rb_plus_15, Rb, 15
5828 addInstr(env, PPCInstr_Alu( Palu_ADD, rB_plus_15,
5829 rB, PPCRH_Imm(True, toUShort(15))) );
5831 // lvx Vlo, 0, Rb_plus_15
5832 addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, Vlo,
5833 PPCAMode_IR(0, rB_plus_15)) );
5835 if (IEndianess == Iend_LE)
5836 // vperm Vt, Vhi, Vlo, Vp
5837 addInstr(env, PPCInstr_AvPerm( v_dst, Vlo, Vhi, Vp ));
5838 else
5839 // vperm Vt, Vhi, Vlo, Vp
5840 addInstr(env, PPCInstr_AvPerm( v_dst, Vhi, Vlo, Vp ));
5842 return v_dst;
5845 if (e->tag == Iex_Unop) {
5846 switch (e->Iex.Unop.op) {
5848 case Iop_F16toF64x2:
5850 HReg dst = newVRegV(env);
5851 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5852 /* Note: PPC only coverts the 16-bt value in the upper word
5853 * to a 64-bit value stored in the upper word. The
5854 * contents of the lower word is undefined.
5856 addInstr(env, PPCInstr_AvUnary(Pav_F16toF64x2, dst, arg));
5857 return dst;
5860 case Iop_F64toF16x2_DEP:
5862 HReg dst = newVRegV(env);
5863 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5864 /* Note: PPC only coverts the 64-bt value in the upper 64-bit of V128
5865 * to a 16-bit value stored in the upper 64-bits of the result
5866 * V128. The contents of the lower 64-bits is undefined.
5868 addInstr(env, PPCInstr_AvUnary(Pav_F64toF16x2, dst, arg));
5869 return dst;
5872 case Iop_F16toF32x4:
5874 HReg src = newVRegV(env);
5875 HReg dst = newVRegV(env);
5876 HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
5877 PPCAMode *am_off0, *am_off8;
5878 HReg r_aligned16;
5880 vassert(mode64);
5881 /* need to put I64 src into upper 64-bits of vector register,
5882 use stack */
5883 sub_from_sp( env, 32 ); // Move SP down
5885 /* Get a quadword aligned address within our stack space */
5886 r_aligned16 = get_sp_aligned16( env );
5887 am_off0 = PPCAMode_IR( 0, r_aligned16 );
5888 am_off8 = PPCAMode_IR( 8, r_aligned16 );
5890 /* Store I64 to stack */
5892 if (IEndianess == Iend_LE) {
5893 addInstr(env, PPCInstr_Store( 8, am_off8, arg, mode64 ));
5894 } else {
5895 addInstr(env, PPCInstr_Store( 8, am_off0, arg, mode64 ));
5898 /* Fetch new v128 src back from stack. */
5899 addInstr(env, PPCInstr_AvLdSt(True/*ld*/, 16, src, am_off0));
5901 /* issue instruction */
5902 addInstr(env, PPCInstr_AvUnary(Pav_F16toF32x4, dst, src));
5903 add_to_sp( env, 32 ); // Reset SP
5905 return dst;
5908 case Iop_F32toF16x4_DEP:
5910 HReg dst = newVRegI(env);
5911 HReg tmp = newVRegV(env);
5912 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5913 PPCAMode *am_off0, *am_off8;
5914 HReg r_aligned16;
5916 /* Instruction returns a V128, the Iop_F32toF16x4 needs to return
5917 * I64. Move the upper 64-bits from the instruction to an I64 via
5918 * the stack and return it.
5920 sub_from_sp( env, 32 ); // Move SP down
5922 addInstr(env, PPCInstr_AvUnary(Pav_F32toF16x4, tmp, arg));
5924 /* Get a quadword aligned address within our stack space */
5925 r_aligned16 = get_sp_aligned16( env );
5926 am_off0 = PPCAMode_IR( 0, r_aligned16 );
5927 am_off8 = PPCAMode_IR( 8, r_aligned16 );
5929 /* Store v128 tmp to stack. */
5930 addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, tmp, am_off0));
5932 /* Fetch I64 from stack */
5933 if (IEndianess == Iend_LE) {
5934 addInstr(env, PPCInstr_Load( 8, dst, am_off8, mode64 ));
5935 } else {
5936 addInstr(env, PPCInstr_Load( 8, dst, am_off0, mode64 ));
5939 add_to_sp( env, 32 ); // Reset SP
5940 return dst;
5943 case Iop_NotV128: {
5944 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5945 HReg dst = newVRegV(env);
5946 addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, arg));
5947 return dst;
5950 case Iop_CmpNEZ8x16: {
5951 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5952 HReg zero = newVRegV(env);
5953 HReg dst = newVRegV(env);
5954 addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
5955 addInstr(env, PPCInstr_AvBin8x16(Pav_CMPEQU, dst, arg, zero));
5956 addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
5957 return dst;
5960 case Iop_CmpNEZ16x8: {
5961 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5962 HReg zero = newVRegV(env);
5963 HReg dst = newVRegV(env);
5964 addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
5965 addInstr(env, PPCInstr_AvBin16x8(Pav_CMPEQU, dst, arg, zero));
5966 addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
5967 return dst;
5970 case Iop_CmpNEZ32x4: {
5971 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5972 HReg zero = newVRegV(env);
5973 HReg dst = newVRegV(env);
5974 addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
5975 addInstr(env, PPCInstr_AvBin32x4(Pav_CMPEQU, dst, arg, zero));
5976 addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
5977 return dst;
5980 case Iop_CmpNEZ64x2: {
5981 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5982 HReg zero = newVRegV(env);
5983 HReg dst = newVRegV(env);
5984 addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
5985 addInstr(env, PPCInstr_AvBin64x2(Pav_CMPEQU, dst, arg, zero));
5986 addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
5987 return dst;
5990 case Iop_RecipEst32Fx4: fpop = Pavfp_RCPF; goto do_32Fx4_unary;
5991 case Iop_RSqrtEst32Fx4: fpop = Pavfp_RSQRTF; goto do_32Fx4_unary;
5992 case Iop_Log2_32Fx4: fpop = Pavfp_Log2; goto do_32Fx4_unary;
5993 case Iop_Exp2_32Fx4: fpop = Pavfp_Exp2; goto do_32Fx4_unary;
5994 case Iop_I32UtoF32x4_DEP: fpop = Pavfp_CVTU2F; goto do_32Fx4_unary;
5995 case Iop_I32StoF32x4_DEP: fpop = Pavfp_CVTS2F; goto do_32Fx4_unary;
5996 case Iop_QF32toI32Ux4_RZ: fpop = Pavfp_QCVTF2U; goto do_32Fx4_unary;
5997 case Iop_QF32toI32Sx4_RZ: fpop = Pavfp_QCVTF2S; goto do_32Fx4_unary;
5998 case Iop_RoundF32x4_RM: fpop = Pavfp_ROUNDM; goto do_32Fx4_unary;
5999 case Iop_RoundF32x4_RP: fpop = Pavfp_ROUNDP; goto do_32Fx4_unary;
6000 case Iop_RoundF32x4_RN: fpop = Pavfp_ROUNDN; goto do_32Fx4_unary;
6001 case Iop_RoundF32x4_RZ: fpop = Pavfp_ROUNDZ; goto do_32Fx4_unary;
6002 do_32Fx4_unary:
6004 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
6005 HReg dst = newVRegV(env);
6006 addInstr(env, PPCInstr_AvUn32Fx4(fpop, dst, arg));
6007 return dst;
6010 case Iop_32UtoV128: {
6011 HReg r_aligned16, r_zeros;
6012 HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
6013 HReg dst = newVRegV(env);
6014 PPCAMode *am_off0, *am_off4, *am_off8, *am_off12;
6015 sub_from_sp( env, 32 ); // Move SP down
6017 /* Get a quadword aligned address within our stack space */
6018 r_aligned16 = get_sp_aligned16( env );
6019 am_off0 = PPCAMode_IR( 0, r_aligned16 );
6020 am_off4 = PPCAMode_IR( 4, r_aligned16 );
6021 am_off8 = PPCAMode_IR( 8, r_aligned16 );
6022 am_off12 = PPCAMode_IR( 12, r_aligned16 );
6024 /* Store zeros */
6025 r_zeros = newVRegI(env);
6026 addInstr(env, PPCInstr_LI(r_zeros, 0x0, mode64));
6027 if (IEndianess == Iend_LE)
6028 addInstr(env, PPCInstr_Store( 4, am_off0, r_src, mode64 ));
6029 else
6030 addInstr(env, PPCInstr_Store( 4, am_off0, r_zeros, mode64 ));
6031 addInstr(env, PPCInstr_Store( 4, am_off4, r_zeros, mode64 ));
6032 addInstr(env, PPCInstr_Store( 4, am_off8, r_zeros, mode64 ));
6034 /* Store r_src in low word of quadword-aligned mem */
6035 if (IEndianess == Iend_LE)
6036 addInstr(env, PPCInstr_Store( 4, am_off12, r_zeros, mode64 ));
6037 else
6038 addInstr(env, PPCInstr_Store( 4, am_off12, r_src, mode64 ));
6040 /* Load word into low word of quadword vector reg */
6041 if (IEndianess == Iend_LE)
6042 addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, dst, am_off0 ));
6043 else
6044 addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, dst, am_off12 ));
6046 add_to_sp( env, 32 ); // Reset SP
6047 return dst;
6050 case Iop_Dup8x16:
6051 case Iop_Dup16x8:
6052 case Iop_Dup32x4:
6053 return mk_AvDuplicateRI(env, e->Iex.Unop.arg, IEndianess);
6055 case Iop_CipherSV128: op = Pav_CIPHERSUBV128; goto do_AvCipherV128Un;
6056 do_AvCipherV128Un: {
6057 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
6058 HReg dst = newVRegV(env);
6059 addInstr(env, PPCInstr_AvCipherV128Unary(op, dst, arg));
6060 return dst;
6063 case Iop_Clz8x16: op = Pav_ZEROCNTBYTE; goto do_zerocnt;
6064 case Iop_Clz16x8: op = Pav_ZEROCNTHALF; goto do_zerocnt;
6065 case Iop_Clz32x4: op = Pav_ZEROCNTWORD; goto do_zerocnt;
6066 case Iop_Clz64x2: op = Pav_ZEROCNTDBL; goto do_zerocnt;
6067 case Iop_Ctz8x16: op = Pav_TRAILINGZEROCNTBYTE; goto do_zerocnt;
6068 case Iop_Ctz16x8: op = Pav_TRAILINGZEROCNTHALF; goto do_zerocnt;
6069 case Iop_Ctz32x4: op = Pav_TRAILINGZEROCNTWORD; goto do_zerocnt;
6070 case Iop_Ctz64x2: op = Pav_TRAILINGZEROCNTDBL; goto do_zerocnt;
6071 case Iop_PwBitMtxXpose64x2: op = Pav_BITMTXXPOSE; goto do_zerocnt;
6072 do_zerocnt:
6074 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
6075 HReg dst = newVRegV(env);
6076 addInstr(env, PPCInstr_AvUnary(op, dst, arg));
6077 return dst;
6080 /* BCD Iops */
6081 case Iop_BCD128toI128S:
6083 HReg dst = newVRegV(env);
6084 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
6085 addInstr(env, PPCInstr_AvUnary( Pav_BCD128toI128S, dst, arg ) );
6086 return dst;
6089 case Iop_MulI128by10: op = Pav_MulI128by10; goto do_MulI128;
6090 case Iop_MulI128by10Carry: op = Pav_MulI128by10Carry; goto do_MulI128;
6091 do_MulI128: {
6092 HReg dst = newVRegV(env);
6093 HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
6094 addInstr(env, PPCInstr_AvUnary(op, dst, arg));
6095 return dst;
6098 case Iop_ReinterpI128asV128: {
6099 PPCAMode* am_addr;
6100 PPCAMode* am_addr4;
6101 HReg rHi, rLo;
6102 HReg dst = newVRegV(env);
6104 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
6106 sub_from_sp( env, 16 ); // Move SP down 16 bytes
6107 am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
6108 am_addr4 = advance4(env, am_addr);
6110 // store the two 64-bit pars
6111 addInstr(env, PPCInstr_Store( 8, am_addr, rHi, mode64 ));
6112 addInstr(env, PPCInstr_Store( 8, am_addr4, rLo, mode64 ));
6114 // load as Ity_V128
6115 addInstr(env, PPCInstr_AvLdSt( True/*fetch*/, 16, dst, am_addr ));
6117 add_to_sp( env, 16 ); // Reset SP
6118 return dst;
6121 default:
6122 break;
6123 } /* switch (e->Iex.Unop.op) */
6124 } /* if (e->tag == Iex_Unop) */
6126 if (e->tag == Iex_Binop) {
6127 switch (e->Iex.Binop.op) {
6129 case Iop_64HLtoV128: {
6130 if (!mode64) {
6131 HReg r3, r2, r1, r0, r_aligned16;
6132 PPCAMode *am_off0, *am_off4, *am_off8, *am_off12;
6133 HReg dst = newVRegV(env);
6134 /* do this via the stack (easy, convenient, etc) */
6135 sub_from_sp( env, 32 ); // Move SP down
6137 // get a quadword aligned address within our stack space
6138 r_aligned16 = get_sp_aligned16( env );
6139 am_off0 = PPCAMode_IR( 0, r_aligned16 );
6140 am_off4 = PPCAMode_IR( 4, r_aligned16 );
6141 am_off8 = PPCAMode_IR( 8, r_aligned16 );
6142 am_off12 = PPCAMode_IR( 12, r_aligned16 );
6144 /* Do the less significant 64 bits */
6145 iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2, IEndianess);
6146 addInstr(env, PPCInstr_Store( 4, am_off12, r0, mode64 ));
6147 addInstr(env, PPCInstr_Store( 4, am_off8, r1, mode64 ));
6148 /* Do the more significant 64 bits */
6149 iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1, IEndianess);
6150 addInstr(env, PPCInstr_Store( 4, am_off4, r2, mode64 ));
6151 addInstr(env, PPCInstr_Store( 4, am_off0, r3, mode64 ));
6153 /* Fetch result back from stack. */
6154 addInstr(env, PPCInstr_AvLdSt(True/*ld*/, 16, dst, am_off0));
6156 add_to_sp( env, 32 ); // Reset SP
6157 return dst;
6158 } else {
6159 HReg rHi = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
6160 HReg rLo = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
6161 HReg dst = newVRegV(env);
6162 HReg r_aligned16;
6163 PPCAMode *am_off0, *am_off8;
6164 /* do this via the stack (easy, convenient, etc) */
6165 sub_from_sp( env, 32 ); // Move SP down
6167 // get a quadword aligned address within our stack space
6168 r_aligned16 = get_sp_aligned16( env );
6169 am_off0 = PPCAMode_IR( 0, r_aligned16 );
6170 am_off8 = PPCAMode_IR( 8, r_aligned16 );
6172 /* Store 2*I64 to stack */
6173 if (IEndianess == Iend_LE) {
6174 addInstr(env, PPCInstr_Store( 8, am_off0, rLo, mode64 ));
6175 addInstr(env, PPCInstr_Store( 8, am_off8, rHi, mode64 ));
6176 } else {
6177 addInstr(env, PPCInstr_Store( 8, am_off0, rHi, mode64 ));
6178 addInstr(env, PPCInstr_Store( 8, am_off8, rLo, mode64 ));
6180 /* Fetch result back from stack. */
6181 addInstr(env, PPCInstr_AvLdSt(True/*ld*/, 16, dst, am_off0));
6183 add_to_sp( env, 32 ); // Reset SP
6184 return dst;
6188 case Iop_Max32Fx4: fpop = Pavfp_MAXF; goto do_32Fx4;
6189 case Iop_Min32Fx4: fpop = Pavfp_MINF; goto do_32Fx4;
6190 case Iop_CmpEQ32Fx4: fpop = Pavfp_CMPEQF; goto do_32Fx4;
6191 case Iop_CmpGT32Fx4: fpop = Pavfp_CMPGTF; goto do_32Fx4;
6192 case Iop_CmpGE32Fx4: fpop = Pavfp_CMPGEF; goto do_32Fx4;
6193 do_32Fx4:
6195 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6196 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6197 HReg dst = newVRegV(env);
6198 addInstr(env, PPCInstr_AvBin32Fx4(fpop, dst, argL, argR));
6199 return dst;
6202 case Iop_CmpLE32Fx4: {
6203 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6204 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6205 HReg dst = newVRegV(env);
6207 /* stay consistent with native ppc compares:
6208 if a left/right lane holds a nan, return zeros for that lane
6209 so: le == NOT(gt OR isNan)
6211 HReg isNanLR = newVRegV(env);
6212 HReg isNanL = isNan(env, argL, IEndianess);
6213 HReg isNanR = isNan(env, argR, IEndianess);
6214 addInstr(env, PPCInstr_AvBinary(Pav_OR, isNanLR,
6215 isNanL, isNanR));
6217 addInstr(env, PPCInstr_AvBin32Fx4(Pavfp_CMPGTF, dst,
6218 argL, argR));
6219 addInstr(env, PPCInstr_AvBinary(Pav_OR, dst, dst, isNanLR));
6220 addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
6221 return dst;
6224 case Iop_AndV128: op = Pav_AND; goto do_AvBin;
6225 case Iop_OrV128: op = Pav_OR; goto do_AvBin;
6226 case Iop_XorV128: op = Pav_XOR; goto do_AvBin;
6227 do_AvBin: {
6228 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6229 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6230 HReg dst = newVRegV(env);
6231 addInstr(env, PPCInstr_AvBinary(op, dst, arg1, arg2));
6232 return dst;
6235 case Iop_Shl8x16: op = Pav_SHL; goto do_AvBin8x16;
6236 case Iop_Shr8x16: op = Pav_SHR; goto do_AvBin8x16;
6237 case Iop_Sar8x16: op = Pav_SAR; goto do_AvBin8x16;
6238 case Iop_Rol8x16: op = Pav_ROTL; goto do_AvBin8x16;
6239 case Iop_InterleaveHI8x16: op = Pav_MRGHI; goto do_AvBin8x16;
6240 case Iop_InterleaveLO8x16: op = Pav_MRGLO; goto do_AvBin8x16;
6241 case Iop_Add8x16: op = Pav_ADDU; goto do_AvBin8x16;
6242 case Iop_QAdd8Ux16: op = Pav_QADDU; goto do_AvBin8x16;
6243 case Iop_QAdd8Sx16: op = Pav_QADDS; goto do_AvBin8x16;
6244 case Iop_Sub8x16: op = Pav_SUBU; goto do_AvBin8x16;
6245 case Iop_QSub8Ux16: op = Pav_QSUBU; goto do_AvBin8x16;
6246 case Iop_QSub8Sx16: op = Pav_QSUBS; goto do_AvBin8x16;
6247 case Iop_Avg8Ux16: op = Pav_AVGU; goto do_AvBin8x16;
6248 case Iop_Avg8Sx16: op = Pav_AVGS; goto do_AvBin8x16;
6249 case Iop_Max8Ux16: op = Pav_MAXU; goto do_AvBin8x16;
6250 case Iop_Max8Sx16: op = Pav_MAXS; goto do_AvBin8x16;
6251 case Iop_Min8Ux16: op = Pav_MINU; goto do_AvBin8x16;
6252 case Iop_Min8Sx16: op = Pav_MINS; goto do_AvBin8x16;
6253 case Iop_MullEven8Ux16: op = Pav_OMULU; goto do_AvBin8x16;
6254 case Iop_MullEven8Sx16: op = Pav_OMULS; goto do_AvBin8x16;
6255 case Iop_CmpEQ8x16: op = Pav_CMPEQU; goto do_AvBin8x16;
6256 case Iop_CmpGT8Ux16: op = Pav_CMPGTU; goto do_AvBin8x16;
6257 case Iop_CmpGT8Sx16: op = Pav_CMPGTS; goto do_AvBin8x16;
6258 case Iop_PolynomialMulAdd8x16: op = Pav_POLYMULADD; goto do_AvBin8x16;
6259 do_AvBin8x16: {
6260 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6261 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6262 HReg dst = newVRegV(env);
6263 addInstr(env, PPCInstr_AvBin8x16(op, dst, arg1, arg2));
6264 return dst;
6267 case Iop_Shl16x8: op = Pav_SHL; goto do_AvBin16x8;
6268 case Iop_Shr16x8: op = Pav_SHR; goto do_AvBin16x8;
6269 case Iop_Sar16x8: op = Pav_SAR; goto do_AvBin16x8;
6270 case Iop_Rol16x8: op = Pav_ROTL; goto do_AvBin16x8;
6271 case Iop_NarrowBin16to8x16: op = Pav_PACKUU; goto do_AvBin16x8;
6272 case Iop_QNarrowBin16Uto8Ux16: op = Pav_QPACKUU; goto do_AvBin16x8;
6273 case Iop_QNarrowBin16Sto8Sx16: op = Pav_QPACKSS; goto do_AvBin16x8;
6274 case Iop_InterleaveHI16x8: op = Pav_MRGHI; goto do_AvBin16x8;
6275 case Iop_InterleaveLO16x8: op = Pav_MRGLO; goto do_AvBin16x8;
6276 case Iop_Add16x8: op = Pav_ADDU; goto do_AvBin16x8;
6277 case Iop_QAdd16Ux8: op = Pav_QADDU; goto do_AvBin16x8;
6278 case Iop_QAdd16Sx8: op = Pav_QADDS; goto do_AvBin16x8;
6279 case Iop_Sub16x8: op = Pav_SUBU; goto do_AvBin16x8;
6280 case Iop_QSub16Ux8: op = Pav_QSUBU; goto do_AvBin16x8;
6281 case Iop_QSub16Sx8: op = Pav_QSUBS; goto do_AvBin16x8;
6282 case Iop_Avg16Ux8: op = Pav_AVGU; goto do_AvBin16x8;
6283 case Iop_Avg16Sx8: op = Pav_AVGS; goto do_AvBin16x8;
6284 case Iop_Max16Ux8: op = Pav_MAXU; goto do_AvBin16x8;
6285 case Iop_Max16Sx8: op = Pav_MAXS; goto do_AvBin16x8;
6286 case Iop_Min16Ux8: op = Pav_MINU; goto do_AvBin16x8;
6287 case Iop_Min16Sx8: op = Pav_MINS; goto do_AvBin16x8;
6288 case Iop_MullEven16Ux8: op = Pav_OMULU; goto do_AvBin16x8;
6289 case Iop_MullEven16Sx8: op = Pav_OMULS; goto do_AvBin16x8;
6290 case Iop_CmpEQ16x8: op = Pav_CMPEQU; goto do_AvBin16x8;
6291 case Iop_CmpGT16Ux8: op = Pav_CMPGTU; goto do_AvBin16x8;
6292 case Iop_CmpGT16Sx8: op = Pav_CMPGTS; goto do_AvBin16x8;
6293 case Iop_PolynomialMulAdd16x8: op = Pav_POLYMULADD; goto do_AvBin16x8;
6294 do_AvBin16x8: {
6295 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6296 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6297 HReg dst = newVRegV(env);
6298 addInstr(env, PPCInstr_AvBin16x8(op, dst, arg1, arg2));
6299 return dst;
6302 case Iop_Shl32x4: op = Pav_SHL; goto do_AvBin32x4;
6303 case Iop_Shr32x4: op = Pav_SHR; goto do_AvBin32x4;
6304 case Iop_Sar32x4: op = Pav_SAR; goto do_AvBin32x4;
6305 case Iop_Rol32x4: op = Pav_ROTL; goto do_AvBin32x4;
6306 case Iop_NarrowBin32to16x8: op = Pav_PACKUU; goto do_AvBin32x4;
6307 case Iop_QNarrowBin32Uto16Ux8: op = Pav_QPACKUU; goto do_AvBin32x4;
6308 case Iop_QNarrowBin32Sto16Sx8: op = Pav_QPACKSS; goto do_AvBin32x4;
6309 case Iop_InterleaveHI32x4: op = Pav_MRGHI; goto do_AvBin32x4;
6310 case Iop_InterleaveLO32x4: op = Pav_MRGLO; goto do_AvBin32x4;
6311 case Iop_Add32x4: op = Pav_ADDU; goto do_AvBin32x4;
6312 case Iop_QAdd32Ux4: op = Pav_QADDU; goto do_AvBin32x4;
6313 case Iop_QAdd32Sx4: op = Pav_QADDS; goto do_AvBin32x4;
6314 case Iop_Sub32x4: op = Pav_SUBU; goto do_AvBin32x4;
6315 case Iop_QSub32Ux4: op = Pav_QSUBU; goto do_AvBin32x4;
6316 case Iop_QSub32Sx4: op = Pav_QSUBS; goto do_AvBin32x4;
6317 case Iop_Avg32Ux4: op = Pav_AVGU; goto do_AvBin32x4;
6318 case Iop_Avg32Sx4: op = Pav_AVGS; goto do_AvBin32x4;
6319 case Iop_Max32Ux4: op = Pav_MAXU; goto do_AvBin32x4;
6320 case Iop_Max32Sx4: op = Pav_MAXS; goto do_AvBin32x4;
6321 case Iop_Min32Ux4: op = Pav_MINU; goto do_AvBin32x4;
6322 case Iop_Min32Sx4: op = Pav_MINS; goto do_AvBin32x4;
6323 case Iop_Mul32x4: op = Pav_MULU; goto do_AvBin32x4;
6324 case Iop_MullEven32Ux4: op = Pav_OMULU; goto do_AvBin32x4;
6325 case Iop_MullEven32Sx4: op = Pav_OMULS; goto do_AvBin32x4;
6326 case Iop_CmpEQ32x4: op = Pav_CMPEQU; goto do_AvBin32x4;
6327 case Iop_CmpGT32Ux4: op = Pav_CMPGTU; goto do_AvBin32x4;
6328 case Iop_CmpGT32Sx4: op = Pav_CMPGTS; goto do_AvBin32x4;
6329 case Iop_CatOddLanes32x4: op = Pav_CATODD; goto do_AvBin32x4;
6330 case Iop_CatEvenLanes32x4: op = Pav_CATEVEN; goto do_AvBin32x4;
6331 case Iop_PolynomialMulAdd32x4: op = Pav_POLYMULADD; goto do_AvBin32x4;
6332 do_AvBin32x4: {
6333 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6334 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6335 HReg dst = newVRegV(env);
6336 addInstr(env, PPCInstr_AvBin32x4(op, dst, arg1, arg2));
6337 return dst;
6340 case Iop_Shl64x2: op = Pav_SHL; goto do_AvBin64x2;
6341 case Iop_Shr64x2: op = Pav_SHR; goto do_AvBin64x2;
6342 case Iop_Sar64x2: op = Pav_SAR; goto do_AvBin64x2;
6343 case Iop_Rol64x2: op = Pav_ROTL; goto do_AvBin64x2;
6344 case Iop_NarrowBin64to32x4: op = Pav_PACKUU; goto do_AvBin64x2;
6345 case Iop_QNarrowBin64Sto32Sx4: op = Pav_QPACKSS; goto do_AvBin64x2;
6346 case Iop_QNarrowBin64Uto32Ux4: op = Pav_QPACKUU; goto do_AvBin64x2;
6347 case Iop_InterleaveHI64x2: op = Pav_MRGHI; goto do_AvBin64x2;
6348 case Iop_InterleaveLO64x2: op = Pav_MRGLO; goto do_AvBin64x2;
6349 case Iop_Add64x2: op = Pav_ADDU; goto do_AvBin64x2;
6350 case Iop_Sub64x2: op = Pav_SUBU; goto do_AvBin64x2;
6351 case Iop_Max64Ux2: op = Pav_MAXU; goto do_AvBin64x2;
6352 case Iop_Max64Sx2: op = Pav_MAXS; goto do_AvBin64x2;
6353 case Iop_Min64Ux2: op = Pav_MINU; goto do_AvBin64x2;
6354 case Iop_Min64Sx2: op = Pav_MINS; goto do_AvBin64x2;
6355 case Iop_CmpEQ64x2: op = Pav_CMPEQU; goto do_AvBin64x2;
6356 case Iop_CmpGT64Ux2: op = Pav_CMPGTU; goto do_AvBin64x2;
6357 case Iop_CmpGT64Sx2: op = Pav_CMPGTS; goto do_AvBin64x2;
6358 case Iop_PolynomialMulAdd64x2: op = Pav_POLYMULADD; goto do_AvBin64x2;
6359 do_AvBin64x2: {
6360 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6361 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6362 HReg dst = newVRegV(env);
6363 addInstr(env, PPCInstr_AvBin64x2(op, dst, arg1, arg2));
6364 return dst;
6367 case Iop_ShlN8x16: op = Pav_SHL; goto do_AvShift8x16;
6368 case Iop_SarN8x16: op = Pav_SAR; goto do_AvShift8x16;
6369 do_AvShift8x16: {
6370 HReg r_src = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6371 HReg dst = newVRegV(env);
6372 HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
6373 addInstr(env, PPCInstr_AvBin8x16(op, dst, r_src, v_shft));
6374 return dst;
6377 case Iop_ShlN16x8: op = Pav_SHL; goto do_AvShift16x8;
6378 case Iop_ShrN16x8: op = Pav_SHR; goto do_AvShift16x8;
6379 case Iop_SarN16x8: op = Pav_SAR; goto do_AvShift16x8;
6380 do_AvShift16x8: {
6381 HReg r_src = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6382 HReg dst = newVRegV(env);
6383 HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
6384 addInstr(env, PPCInstr_AvBin16x8(op, dst, r_src, v_shft));
6385 return dst;
6388 case Iop_ShlN32x4: op = Pav_SHL; goto do_AvShift32x4;
6389 case Iop_ShrN32x4: op = Pav_SHR; goto do_AvShift32x4;
6390 case Iop_SarN32x4: op = Pav_SAR; goto do_AvShift32x4;
6391 do_AvShift32x4: {
6392 HReg r_src = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6393 HReg dst = newVRegV(env);
6394 HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
6395 addInstr(env, PPCInstr_AvBin32x4(op, dst, r_src, v_shft));
6396 return dst;
6399 case Iop_ShlN64x2: op = Pav_SHL; goto do_AvShift64x2;
6400 case Iop_ShrN64x2: op = Pav_SHR; goto do_AvShift64x2;
6401 case Iop_SarN64x2: op = Pav_SAR; goto do_AvShift64x2;
6402 do_AvShift64x2: {
6403 HReg r_src = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6404 HReg dst = newVRegV(env);
6405 HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
6406 addInstr(env, PPCInstr_AvBin64x2(op, dst, r_src, v_shft));
6407 return dst;
6410 case Iop_ShrV128: op = Pav_SHR; goto do_AvShiftV128;
6411 case Iop_ShlV128: op = Pav_SHL; goto do_AvShiftV128;
6412 do_AvShiftV128: {
6413 HReg dst = newVRegV(env);
6414 HReg r_src = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6415 HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
6416 /* Note: shift value gets masked by 127 */
6417 addInstr(env, PPCInstr_AvBinary(op, dst, r_src, v_shft));
6418 return dst;
6421 case Iop_Perm8x16: {
6422 HReg dst = newVRegV(env);
6423 HReg v_src = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6424 HReg v_ctl = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6425 addInstr(env, PPCInstr_AvPerm(dst, v_src, v_src, v_ctl));
6426 return dst;
6429 case Iop_CipherV128: op = Pav_CIPHERV128; goto do_AvCipherV128;
6430 case Iop_CipherLV128: op = Pav_CIPHERLV128; goto do_AvCipherV128;
6431 case Iop_NCipherV128: op = Pav_NCIPHERV128; goto do_AvCipherV128;
6432 case Iop_NCipherLV128:op = Pav_NCIPHERLV128; goto do_AvCipherV128;
6433 do_AvCipherV128: {
6434 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6435 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6436 HReg dst = newVRegV(env);
6437 addInstr(env, PPCInstr_AvCipherV128Binary(op, dst, arg1, arg2));
6438 return dst;
6441 case Iop_SHA256:op = Pav_SHA256; goto do_AvHashV128;
6442 case Iop_SHA512:op = Pav_SHA512; goto do_AvHashV128;
6443 do_AvHashV128: {
6444 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6445 HReg dst = newVRegV(env);
6446 PPCRI* s_field = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
6447 addInstr(env, PPCInstr_AvHashV128Binary(op, dst, arg1, s_field));
6448 return dst;
6451 /* BCD Iops */
6452 case Iop_I128StoBCD128:
6454 HReg dst = newVRegV(env);
6455 HReg arg = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6456 PPCRI* ps = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
6458 addInstr(env, PPCInstr_AvBinaryInt( Pav_I128StoBCD128, dst, arg,
6459 ps ) );
6460 return dst;
6463 case Iop_MulI128by10E: op = Pav_MulI128by10E; goto do_MulI128E;
6464 case Iop_MulI128by10ECarry: op = Pav_MulI128by10ECarry; goto do_MulI128E;
6465 do_MulI128E: {
6466 HReg dst = newVRegV(env);
6467 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6468 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6469 addInstr(env, PPCInstr_AvBinary(op, dst, argL, argR));
6470 return dst;
6473 case Iop_BCDAdd:op = Pav_BCDAdd; goto do_AvBCDV128;
6474 case Iop_BCDSub:op = Pav_BCDSub; goto do_AvBCDV128;
6475 do_AvBCDV128: {
6476 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6477 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6478 HReg dst = newVRegV(env);
6479 addInstr(env, PPCInstr_AvBCDV128Binary(op, dst, arg1, arg2));
6480 return dst;
6483 case Iop_DivU128: opav128 = Pav_DivU128; goto do_IntArithBinaryI128;
6484 case Iop_DivS128: opav128 = Pav_DivS128; goto do_IntArithBinaryI128;
6485 case Iop_DivU128E: opav128 = Pav_DivU128E; goto do_IntArithBinaryI128;
6486 case Iop_DivS128E: opav128 = Pav_DivS128E; goto do_IntArithBinaryI128;
6487 case Iop_ModU128: opav128 = Pav_ModU128; goto do_IntArithBinaryI128;
6488 case Iop_ModS128: opav128 = Pav_ModS128; goto do_IntArithBinaryI128;
6489 do_IntArithBinaryI128: {
6490 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6491 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6492 HReg dst = newVRegV(env);
6493 addInstr(env, PPCInstr_AvBinaryInt128(opav128, dst, arg1, arg2));
6494 return dst;
6497 default:
6498 break;
6499 } /* switch (e->Iex.Binop.op) */
6500 } /* if (e->tag == Iex_Binop) */
6502 if (e->tag == Iex_Triop) {
6503 IRTriop *triop = e->Iex.Triop.details;
6504 switch (triop->op) {
6505 case Iop_Add32Fx4: fpop = Pavfp_ADDF; goto do_32Fx4_with_rm;
6506 case Iop_Sub32Fx4: fpop = Pavfp_SUBF; goto do_32Fx4_with_rm;
6507 case Iop_Mul32Fx4: fpop = Pavfp_MULF; goto do_32Fx4_with_rm;
6508 do_32Fx4_with_rm:
6510 HReg argL = iselVecExpr(env, triop->arg2, IEndianess);
6511 HReg argR = iselVecExpr(env, triop->arg3, IEndianess);
6512 HReg dst = newVRegV(env);
6513 /* FIXME: this is bogus, in the sense that Altivec ignores
6514 FPSCR.RM, at least for some FP operations. So setting the
6515 RM is pointless. This is only really correct in the case
6516 where the RM is known, at JIT time, to be Irrm_NEAREST,
6517 since -- at least for Altivec FP add/sub/mul -- the
6518 emitted insn is hardwired to round to nearest. */
6519 set_FPU_rounding_mode(env, triop->arg1, IEndianess);
6520 addInstr(env, PPCInstr_AvBin32Fx4(fpop, dst, argL, argR));
6521 return dst;
6524 case Iop_2xMultU64Add128CarryOut:
6525 optri128 = Pav_2xMultU64Add128CarryOut; goto do_IntArithTrinaryI128;
6526 do_IntArithTrinaryI128: {
6527 HReg arg1 = iselVecExpr(env, triop->arg1, IEndianess);
6528 HReg arg2 = iselVecExpr(env, triop->arg2, IEndianess);
6529 HReg arg3 = iselVecExpr(env, triop->arg3, IEndianess);
6530 HReg dst = newVRegV(env);
6531 addInstr(env, PPCInstr_AvTernaryInt128(optri128, dst, arg1, arg2,
6532 arg3));
6533 return dst;
6536 default:
6537 break;
6538 } /* switch (e->Iex.Triop.op) */
6539 } /* if (e->tag == Iex_Trinop) */
6542 if (e->tag == Iex_Const ) {
6543 vassert(e->Iex.Const.con->tag == Ico_V128);
6544 if (e->Iex.Const.con->Ico.V128 == 0x0000) {
6545 return generate_zeroes_V128(env);
6547 else if (e->Iex.Const.con->Ico.V128 == 0xffff) {
6548 return generate_ones_V128(env);
6552 vex_printf("iselVecExpr(ppc) (subarch = %s): can't reduce\n",
6553 LibVEX_ppVexHwCaps(mode64 ? VexArchPPC64 : VexArchPPC32,
6554 env->hwcaps));
6555 ppIRExpr(e);
6556 vpanic("iselVecExpr_wrk(ppc)");
6560 /*---------------------------------------------------------*/
6561 /*--- ISEL: Statements ---*/
6562 /*---------------------------------------------------------*/
6564 static void iselStmt ( ISelEnv* env, IRStmt* stmt, IREndness IEndianess )
6566 Bool mode64 = env->mode64;
6567 if (vex_traceflags & VEX_TRACE_VCODE) {
6568 vex_printf("\n -- ");
6569 ppIRStmt(stmt);
6570 vex_printf("\n");
6573 switch (stmt->tag) {
6575 /* --------- STORE --------- */
6576 case Ist_Store: {
6577 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
6578 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
6579 IREndness end = stmt->Ist.Store.end;
6581 if (end != IEndianess)
6582 goto stmt_fail;
6583 if (!mode64 && (tya != Ity_I32))
6584 goto stmt_fail;
6585 if (mode64 && (tya != Ity_I64))
6586 goto stmt_fail;
6588 if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32 ||
6589 (mode64 && (tyd == Ity_I64))) {
6590 PPCAMode* am_addr
6591 = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6592 IEndianess);
6593 HReg r_src = iselWordExpr_R(env, stmt->Ist.Store.data, IEndianess);
6594 addInstr(env, PPCInstr_Store( toUChar(sizeofIRType(tyd)),
6595 am_addr, r_src, mode64 ));
6596 return;
6598 if (tyd == Ity_F64) {
6599 PPCAMode* am_addr
6600 = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6601 IEndianess);
6602 HReg fr_src = iselDblExpr(env, stmt->Ist.Store.data, IEndianess);
6603 addInstr(env,
6604 PPCInstr_FpLdSt(False/*store*/, 8, fr_src, am_addr));
6605 return;
6607 if (tyd == Ity_F32) {
6608 PPCAMode* am_addr
6609 = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6610 IEndianess);
6611 HReg fr_src = iselFltExpr(env, stmt->Ist.Store.data, IEndianess);
6612 addInstr(env,
6613 PPCInstr_FpLdSt(False/*store*/, 4, fr_src, am_addr));
6614 return;
6616 if (tyd == Ity_D64) {
6617 PPCAMode* am_addr
6618 = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6619 IEndianess);
6620 HReg fr_src = iselDfp64Expr(env, stmt->Ist.Store.data, IEndianess);
6621 addInstr(env,
6622 PPCInstr_FpLdSt(False/*store*/, 8, fr_src, am_addr));
6623 return;
6625 if (tyd == Ity_D32) {
6626 PPCAMode* am_addr
6627 = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6628 IEndianess);
6629 HReg fr_src = iselDfp32Expr(env, stmt->Ist.Store.data, IEndianess);
6630 addInstr(env,
6631 PPCInstr_FpLdSt(False/*store*/, 4, fr_src, am_addr));
6632 return;
6634 if (tyd == Ity_V128) {
6635 PPCAMode* am_addr
6636 = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6637 IEndianess);
6638 HReg v_src = iselVecExpr(env, stmt->Ist.Store.data, IEndianess);
6639 addInstr(env,
6640 PPCInstr_AvLdSt(False/*store*/, 16, v_src, am_addr));
6641 return;
6643 if (tyd == Ity_I64 && !mode64) {
6644 /* Just calculate the address in the register. Life is too
6645 short to arse around trying and possibly failing to adjust
6646 the offset in a 'reg+offset' style amode. */
6647 HReg rHi32, rLo32;
6648 HReg r_addr = iselWordExpr_R(env, stmt->Ist.Store.addr, IEndianess);
6649 iselInt64Expr( &rHi32, &rLo32, env, stmt->Ist.Store.data,
6650 IEndianess );
6651 addInstr(env, PPCInstr_Store( 4/*byte-store*/,
6652 PPCAMode_IR( 0, r_addr ),
6653 rHi32,
6654 False/*32-bit insn please*/) );
6655 addInstr(env, PPCInstr_Store( 4/*byte-store*/,
6656 PPCAMode_IR( 4, r_addr ),
6657 rLo32,
6658 False/*32-bit insn please*/) );
6659 return;
6661 break;
6664 /* --------- PUT --------- */
6665 case Ist_Put: {
6666 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
6667 if (ty == Ity_I8 || ty == Ity_I16 ||
6668 ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
6669 HReg r_src = iselWordExpr_R(env, stmt->Ist.Put.data, IEndianess);
6670 PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6671 GuestStatePtr(mode64) );
6672 addInstr(env, PPCInstr_Store( toUChar(sizeofIRType(ty)),
6673 am_addr, r_src, mode64 ));
6674 return;
6676 if (!mode64 && ty == Ity_I64) {
6677 HReg rHi, rLo;
6678 PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6679 GuestStatePtr(mode64) );
6680 PPCAMode* am_addr4 = advance4(env, am_addr);
6681 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.Put.data, IEndianess);
6682 addInstr(env, PPCInstr_Store( 4, am_addr, rHi, mode64 ));
6683 addInstr(env, PPCInstr_Store( 4, am_addr4, rLo, mode64 ));
6684 return;
6686 if (ty == Ity_I128) {
6687 HReg rHi, rLo;
6688 PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6689 GuestStatePtr(mode64) );
6690 PPCAMode* am_addr4 = advance4(env, am_addr);
6692 iselInt128Expr(&rHi,&rLo, env, stmt->Ist.Put.data, IEndianess);
6693 addInstr(env, PPCInstr_Store( 4, am_addr, rHi, mode64 ));
6694 addInstr(env, PPCInstr_Store( 4, am_addr4, rLo, mode64 ));
6695 return;
6697 if (ty == Ity_F128) {
6698 /* Guest state vectors are 16byte aligned,
6699 so don't need to worry here */
6700 HReg v_src = iselFp128Expr(env, stmt->Ist.Put.data, IEndianess);
6702 PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6703 GuestStatePtr(mode64) );
6704 addInstr(env,
6705 PPCInstr_AvLdSt(False/*store*/, 16, v_src, am_addr));
6706 return;
6708 if (ty == Ity_V128) {
6709 /* Guest state vectors are 16byte aligned,
6710 so don't need to worry here */
6711 HReg v_src = iselVecExpr(env, stmt->Ist.Put.data, IEndianess);
6712 PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6713 GuestStatePtr(mode64) );
6714 addInstr(env,
6715 PPCInstr_AvLdSt(False/*store*/, 16, v_src, am_addr));
6716 return;
6718 if (ty == Ity_F64) {
6719 HReg fr_src = iselDblExpr(env, stmt->Ist.Put.data, IEndianess);
6720 PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6721 GuestStatePtr(mode64) );
6722 addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
6723 fr_src, am_addr ));
6724 return;
6726 if (ty == Ity_D32) {
6727 /* The 32-bit value is stored in a 64-bit register */
6728 HReg fr_src = iselDfp32Expr( env, stmt->Ist.Put.data, IEndianess );
6729 PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6730 GuestStatePtr(mode64) );
6731 addInstr( env, PPCInstr_FpLdSt( False/*store*/, 8,
6732 fr_src, am_addr ) );
6733 return;
6735 if (ty == Ity_D64) {
6736 HReg fr_src = iselDfp64Expr( env, stmt->Ist.Put.data, IEndianess );
6737 PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6738 GuestStatePtr(mode64) );
6739 addInstr( env, PPCInstr_FpLdSt( False/*store*/, 8, fr_src, am_addr ) );
6740 return;
6742 break;
6745 /* --------- Indexed PUT --------- */
6746 case Ist_PutI: {
6747 IRPutI *puti = stmt->Ist.PutI.details;
6749 PPCAMode* dst_am
6750 = genGuestArrayOffset(
6751 env, puti->descr,
6752 puti->ix, puti->bias,
6753 IEndianess );
6754 IRType ty = typeOfIRExpr(env->type_env, puti->data);
6755 if (mode64 && ty == Ity_I64) {
6756 HReg r_src = iselWordExpr_R(env, puti->data, IEndianess);
6757 addInstr(env, PPCInstr_Store( toUChar(8),
6758 dst_am, r_src, mode64 ));
6759 return;
6761 if ((!mode64) && ty == Ity_I32) {
6762 HReg r_src = iselWordExpr_R(env, puti->data, IEndianess);
6763 addInstr(env, PPCInstr_Store( toUChar(4),
6764 dst_am, r_src, mode64 ));
6765 return;
6767 break;
6770 /* --------- TMP --------- */
6771 case Ist_WrTmp: {
6772 IRTemp tmp = stmt->Ist.WrTmp.tmp;
6773 IRType ty = typeOfIRTemp(env->type_env, tmp);
6774 if (ty == Ity_I8 || ty == Ity_I16 ||
6775 ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
6776 HReg r_dst = lookupIRTemp(env, tmp);
6777 HReg r_src = iselWordExpr_R(env, stmt->Ist.WrTmp.data, IEndianess);
6778 addInstr(env, mk_iMOVds_RR( r_dst, r_src ));
6779 return;
6781 if (!mode64 && ty == Ity_I64) {
6782 HReg r_srcHi, r_srcLo, r_dstHi, r_dstLo;
6784 iselInt64Expr(&r_srcHi,&r_srcLo, env, stmt->Ist.WrTmp.data,
6785 IEndianess);
6786 lookupIRTempPair( &r_dstHi, &r_dstLo, env, tmp);
6787 addInstr(env, mk_iMOVds_RR(r_dstHi, r_srcHi) );
6788 addInstr(env, mk_iMOVds_RR(r_dstLo, r_srcLo) );
6789 return;
6791 if (mode64 && ty == Ity_I128) {
6792 HReg r_srcHi, r_srcLo, r_dstHi, r_dstLo;
6793 iselInt128Expr(&r_srcHi,&r_srcLo, env, stmt->Ist.WrTmp.data,
6794 IEndianess);
6795 lookupIRTempPair( &r_dstHi, &r_dstLo, env, tmp);
6796 addInstr(env, mk_iMOVds_RR(r_dstHi, r_srcHi) );
6797 addInstr(env, mk_iMOVds_RR(r_dstLo, r_srcLo) );
6798 return;
6800 if (!mode64 && ty == Ity_I128) {
6801 HReg r_srcHi = INVALID_HREG;
6802 HReg r_srcMedHi = INVALID_HREG;
6803 HReg r_srcMedLo = INVALID_HREG;
6804 HReg r_srcLo = INVALID_HREG;
6805 HReg r_dstHi, r_dstMedHi, r_dstMedLo, r_dstLo;
6807 iselInt128Expr_to_32x4(&r_srcHi, &r_srcMedHi,
6808 &r_srcMedLo, &r_srcLo,
6809 env, stmt->Ist.WrTmp.data, IEndianess);
6811 lookupIRTempQuad( &r_dstHi, &r_dstMedHi, &r_dstMedLo,
6812 &r_dstLo, env, tmp);
6814 addInstr(env, mk_iMOVds_RR(r_dstHi, r_srcHi) );
6815 addInstr(env, mk_iMOVds_RR(r_dstMedHi, r_srcMedHi) );
6816 addInstr(env, mk_iMOVds_RR(r_dstMedLo, r_srcMedLo) );
6817 addInstr(env, mk_iMOVds_RR(r_dstLo, r_srcLo) );
6818 return;
6820 if (ty == Ity_I1) {
6821 PPCCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data,
6822 IEndianess);
6823 HReg r_dst = lookupIRTemp(env, tmp);
6824 addInstr(env, PPCInstr_Set(cond, r_dst));
6825 return;
6827 if (ty == Ity_F64) {
6828 HReg fr_dst = lookupIRTemp(env, tmp);
6829 HReg fr_src = iselDblExpr(env, stmt->Ist.WrTmp.data, IEndianess);
6830 addInstr(env, PPCInstr_FpUnary(Pfp_MOV, fr_dst, fr_src));
6831 return;
6833 if (ty == Ity_F32) {
6834 HReg fr_dst = lookupIRTemp(env, tmp);
6835 HReg fr_src = iselFltExpr(env, stmt->Ist.WrTmp.data, IEndianess);
6836 addInstr(env, PPCInstr_FpUnary(Pfp_MOV, fr_dst, fr_src));
6837 return;
6839 if (ty == Ity_D32) {
6840 HReg fr_dst = lookupIRTemp(env, tmp);
6841 HReg fr_src = iselDfp32Expr(env, stmt->Ist.WrTmp.data, IEndianess);
6842 addInstr(env, PPCInstr_Dfp64Unary(Pfp_MOV, fr_dst, fr_src));
6843 return;
6845 if (ty == Ity_F128) {
6846 HReg v_dst = lookupIRTemp(env, tmp);
6847 HReg v_src = iselFp128Expr(env, stmt->Ist.WrTmp.data, IEndianess);
6848 addInstr(env, PPCInstr_AvUnary(Pav_MOV, v_dst, v_src));
6849 return;
6851 if (ty == Ity_V128) {
6852 HReg v_dst = lookupIRTemp(env, tmp);
6853 HReg v_src = iselVecExpr(env, stmt->Ist.WrTmp.data, IEndianess);
6854 addInstr(env, PPCInstr_AvUnary(Pav_MOV, v_dst, v_src));
6855 return;
6857 if (ty == Ity_D64) {
6858 HReg fr_dst = lookupIRTemp( env, tmp );
6859 HReg fr_src = iselDfp64Expr( env, stmt->Ist.WrTmp.data, IEndianess );
6860 addInstr( env, PPCInstr_Dfp64Unary( Pfp_MOV, fr_dst, fr_src ) );
6861 return;
6863 if (ty == Ity_D128) {
6864 HReg fr_srcHi, fr_srcLo, fr_dstHi, fr_dstLo;
6865 // lookupDfp128IRTempPair( &fr_dstHi, &fr_dstLo, env, tmp );
6866 lookupIRTempPair( &fr_dstHi, &fr_dstLo, env, tmp );
6867 iselDfp128Expr( &fr_srcHi, &fr_srcLo, env, stmt->Ist.WrTmp.data,
6868 IEndianess );
6869 addInstr( env, PPCInstr_Dfp64Unary( Pfp_MOV, fr_dstHi, fr_srcHi ) );
6870 addInstr( env, PPCInstr_Dfp64Unary( Pfp_MOV, fr_dstLo, fr_srcLo ) );
6871 return;
6873 break;
6876 /* --------- Load Linked or Store Conditional --------- */
6877 case Ist_LLSC: {
6878 IRTemp res = stmt->Ist.LLSC.result;
6879 IRType tyRes = typeOfIRTemp(env->type_env, res);
6880 IRType tyAddr = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.addr);
6882 if (stmt->Ist.LLSC.end != IEndianess)
6883 goto stmt_fail;
6884 if (!mode64 && (tyAddr != Ity_I32))
6885 goto stmt_fail;
6886 if (mode64 && (tyAddr != Ity_I64))
6887 goto stmt_fail;
6889 if (stmt->Ist.LLSC.storedata == NULL) {
6890 /* LL */
6891 HReg r_addr = iselWordExpr_R( env, stmt->Ist.LLSC.addr, IEndianess );
6892 HReg r_dst = lookupIRTemp(env, res);
6893 if (tyRes == Ity_I8) {
6894 addInstr(env, PPCInstr_LoadL( 1, r_dst, r_addr, mode64 ));
6895 return;
6897 if (tyRes == Ity_I16) {
6898 addInstr(env, PPCInstr_LoadL( 2, r_dst, r_addr, mode64 ));
6899 return;
6901 if (tyRes == Ity_I32) {
6902 addInstr(env, PPCInstr_LoadL( 4, r_dst, r_addr, mode64 ));
6903 return;
6905 if (tyRes == Ity_I64 && mode64) {
6906 addInstr(env, PPCInstr_LoadL( 8, r_dst, r_addr, mode64 ));
6907 return;
6909 /* fallthru */;
6910 } else {
6911 /* SC */
6912 HReg r_res = lookupIRTemp(env, res); /* :: Ity_I1 */
6913 HReg r_a = iselWordExpr_R(env, stmt->Ist.LLSC.addr, IEndianess);
6914 HReg r_src = iselWordExpr_R(env, stmt->Ist.LLSC.storedata,
6915 IEndianess);
6916 HReg r_tmp = newVRegI(env);
6917 IRType tyData = typeOfIRExpr(env->type_env,
6918 stmt->Ist.LLSC.storedata);
6919 vassert(tyRes == Ity_I1);
6920 if (tyData == Ity_I8 || tyData == Ity_I16 || tyData == Ity_I32 ||
6921 (tyData == Ity_I64 && mode64)) {
6922 int size = 0;
6924 if (tyData == Ity_I64)
6925 size = 8;
6926 else if (tyData == Ity_I32)
6927 size = 4;
6928 else if (tyData == Ity_I16)
6929 size = 2;
6930 else if (tyData == Ity_I8)
6931 size = 1;
6933 addInstr(env, PPCInstr_StoreC( size,
6934 r_a, r_src, mode64 ));
6935 addInstr(env, PPCInstr_MfCR( r_tmp ));
6936 addInstr(env, PPCInstr_Shft(
6937 Pshft_SHR,
6938 env->mode64 ? False : True
6939 /*F:64-bit, T:32-bit shift*/,
6940 r_tmp, r_tmp,
6941 PPCRH_Imm(False/*unsigned*/, 29)));
6942 /* Probably unnecessary, since the IR dest type is Ity_I1,
6943 and so we are entitled to leave whatever junk we like
6944 drifting round in the upper 31 or 63 bits of r_res.
6945 However, for the sake of conservativeness .. */
6946 addInstr(env, PPCInstr_Alu(
6947 Palu_AND,
6948 r_res, r_tmp,
6949 PPCRH_Imm(False/*signed*/, 1)));
6950 return;
6952 /* fallthru */
6954 goto stmt_fail;
6955 /*NOTREACHED*/
6958 /* --------- Call to DIRTY helper --------- */
6959 case Ist_Dirty: {
6960 IRDirty* d = stmt->Ist.Dirty.details;
6962 /* Figure out the return type, if any. */
6963 IRType retty = Ity_INVALID;
6964 if (d->tmp != IRTemp_INVALID)
6965 retty = typeOfIRTemp(env->type_env, d->tmp);
6967 /* Throw out any return types we don't know about. The set of
6968 acceptable return types is the same in both 32- and 64-bit
6969 mode, so we don't need to inspect mode64 to make a
6970 decision. */
6971 Bool retty_ok = False;
6972 switch (retty) {
6973 case Ity_INVALID: /* function doesn't return anything */
6974 case Ity_V128:
6975 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
6976 retty_ok = True; break;
6977 default:
6978 break;
6980 if (!retty_ok)
6981 break; /* will go to stmt_fail: */
6983 /* Marshal args, do the call, clear stack, set the return value
6984 to 0x555..555 if this is a conditional call that returns a
6985 value and the call is skipped. */
6986 UInt addToSp = 0;
6987 RetLoc rloc = mk_RetLoc_INVALID();
6988 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args,
6989 IEndianess );
6990 vassert(is_sane_RetLoc(rloc));
6992 /* Now figure out what to do with the returned value, if any. */
6993 switch (retty) {
6994 case Ity_INVALID: {
6995 /* No return value. Nothing to do. */
6996 vassert(d->tmp == IRTemp_INVALID);
6997 vassert(rloc.pri == RLPri_None);
6998 vassert(addToSp == 0);
6999 return;
7001 case Ity_I32: case Ity_I16: case Ity_I8: {
7002 /* The returned value is in %r3. Park it in the register
7003 associated with tmp. */
7004 HReg r_dst = lookupIRTemp(env, d->tmp);
7005 addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
7006 vassert(rloc.pri == RLPri_Int);
7007 vassert(addToSp == 0);
7008 return;
7010 case Ity_I64:
7011 if (mode64) {
7012 /* The returned value is in %r3. Park it in the register
7013 associated with tmp. */
7014 HReg r_dst = lookupIRTemp(env, d->tmp);
7015 addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
7016 vassert(rloc.pri == RLPri_Int);
7017 vassert(addToSp == 0);
7018 } else {
7019 /* The returned value is in %r3:%r4. Park it in the
7020 register-pair associated with tmp. */
7021 HReg r_dstHi = INVALID_HREG;
7022 HReg r_dstLo = INVALID_HREG;
7023 lookupIRTempPair( &r_dstHi, &r_dstLo, env, d->tmp);
7024 addInstr(env, mk_iMOVds_RR(r_dstHi, hregPPC_GPR3(mode64)));
7025 addInstr(env, mk_iMOVds_RR(r_dstLo, hregPPC_GPR4(mode64)));
7026 vassert(rloc.pri == RLPri_2Int);
7027 vassert(addToSp == 0);
7029 return;
7030 case Ity_V128: {
7031 /* The returned value is on the stack, and *retloc tells
7032 us where. Fish it off the stack and then move the
7033 stack pointer upwards to clear it, as directed by
7034 doHelperCall. */
7035 vassert(rloc.pri == RLPri_V128SpRel);
7036 vassert(addToSp >= 16);
7037 HReg dst = lookupIRTemp(env, d->tmp);
7038 PPCAMode* am = PPCAMode_IR(rloc.spOff, StackFramePtr(mode64));
7039 addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, dst, am ));
7040 add_to_sp(env, addToSp);
7041 return;
7043 default:
7044 /*NOTREACHED*/
7045 vassert(0);
7049 /* --------- MEM FENCE --------- */
7050 case Ist_MBE:
7051 switch (stmt->Ist.MBE.event) {
7052 case Imbe_Fence:
7053 addInstr(env, PPCInstr_MFence());
7054 return;
7055 default:
7056 break;
7058 break;
7060 /* --------- INSTR MARK --------- */
7061 /* Doesn't generate any executable code ... */
7062 case Ist_IMark:
7063 return;
7065 /* --------- ABI HINT --------- */
7066 /* These have no meaning (denotation in the IR) and so we ignore
7067 them ... if any actually made it this far. */
7068 case Ist_AbiHint:
7069 return;
7071 /* --------- NO-OP --------- */
7072 /* Fairly self-explanatory, wouldn't you say? */
7073 case Ist_NoOp:
7074 return;
7076 /* --------- EXIT --------- */
7077 case Ist_Exit: {
7078 IRConst* dst = stmt->Ist.Exit.dst;
7079 if (!mode64 && dst->tag != Ico_U32)
7080 vpanic("iselStmt(ppc): Ist_Exit: dst is not a 32-bit value");
7081 if (mode64 && dst->tag != Ico_U64)
7082 vpanic("iselStmt(ppc64): Ist_Exit: dst is not a 64-bit value");
7084 PPCCondCode cc = iselCondCode(env, stmt->Ist.Exit.guard, IEndianess);
7085 PPCAMode* amCIA = PPCAMode_IR(stmt->Ist.Exit.offsIP,
7086 hregPPC_GPR31(mode64));
7088 /* Case: boring transfer to known address */
7089 if (stmt->Ist.Exit.jk == Ijk_Boring
7090 || stmt->Ist.Exit.jk == Ijk_Call
7091 /* || stmt->Ist.Exit.jk == Ijk_Ret */) {
7092 if (env->chainingAllowed) {
7093 /* .. almost always true .. */
7094 /* Skip the event check at the dst if this is a forwards
7095 edge. */
7096 Bool toFastEP
7097 = mode64
7098 ? (((Addr64)stmt->Ist.Exit.dst->Ico.U64) > (Addr64)env->max_ga)
7099 : (((Addr32)stmt->Ist.Exit.dst->Ico.U32) > (Addr32)env->max_ga);
7100 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
7101 addInstr(env, PPCInstr_XDirect(
7102 mode64 ? (Addr64)stmt->Ist.Exit.dst->Ico.U64
7103 : (Addr64)stmt->Ist.Exit.dst->Ico.U32,
7104 amCIA, cc, toFastEP));
7105 } else {
7106 /* .. very occasionally .. */
7107 /* We can't use chaining, so ask for an assisted transfer,
7108 as that's the only alternative that is allowable. */
7109 HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst),
7110 IEndianess);
7111 addInstr(env, PPCInstr_XAssisted(r, amCIA, cc, Ijk_Boring));
7113 return;
7116 /* Case: assisted transfer to arbitrary address */
7117 switch (stmt->Ist.Exit.jk) {
7118 /* Keep this list in sync with that in iselNext below */
7119 case Ijk_ClientReq:
7120 case Ijk_EmFail:
7121 case Ijk_EmWarn:
7122 case Ijk_NoDecode:
7123 case Ijk_NoRedir:
7124 case Ijk_SigBUS:
7125 case Ijk_SigTRAP:
7126 case Ijk_Sys_syscall:
7127 case Ijk_InvalICache:
7129 HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst),
7130 IEndianess);
7131 addInstr(env, PPCInstr_XAssisted(r, amCIA, cc,
7132 stmt->Ist.Exit.jk));
7133 return;
7135 default:
7136 break;
7139 /* Do we ever expect to see any other kind? */
7140 goto stmt_fail;
7143 default: break;
7145 stmt_fail:
7146 ppIRStmt(stmt);
7147 vpanic("iselStmt(ppc)");
7151 /*---------------------------------------------------------*/
7152 /*--- ISEL: Basic block terminators (Nexts) ---*/
7153 /*---------------------------------------------------------*/
7155 static void iselNext ( ISelEnv* env,
7156 IRExpr* next, IRJumpKind jk, Int offsIP,
7157 IREndness IEndianess)
7159 if (vex_traceflags & VEX_TRACE_VCODE) {
7160 vex_printf( "\n-- PUT(%d) = ", offsIP);
7161 ppIRExpr( next );
7162 vex_printf( "; exit-");
7163 ppIRJumpKind(jk);
7164 vex_printf( "\n");
7167 PPCCondCode always = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
7169 /* Case: boring transfer to known address */
7170 if (next->tag == Iex_Const) {
7171 IRConst* cdst = next->Iex.Const.con;
7172 vassert(cdst->tag == (env->mode64 ? Ico_U64 :Ico_U32));
7173 if (jk == Ijk_Boring || jk == Ijk_Call) {
7174 /* Boring transfer to known address */
7175 PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
7176 if (env->chainingAllowed) {
7177 /* .. almost always true .. */
7178 /* Skip the event check at the dst if this is a forwards
7179 edge. */
7180 Bool toFastEP
7181 = env->mode64
7182 ? (((Addr64)cdst->Ico.U64) > (Addr64)env->max_ga)
7183 : (((Addr32)cdst->Ico.U32) > (Addr32)env->max_ga);
7184 if (0) vex_printf("%s", toFastEP ? "X" : ".");
7185 addInstr(env, PPCInstr_XDirect(
7186 env->mode64 ? (Addr64)cdst->Ico.U64
7187 : (Addr64)cdst->Ico.U32,
7188 amCIA, always, toFastEP));
7189 } else {
7190 /* .. very occasionally .. */
7191 /* We can't use chaining, so ask for an assisted transfer,
7192 as that's the only alternative that is allowable. */
7193 HReg r = iselWordExpr_R(env, next, IEndianess);
7194 addInstr(env, PPCInstr_XAssisted(r, amCIA, always,
7195 Ijk_Boring));
7197 return;
7201 /* Case: call/return (==boring) transfer to any address */
7202 switch (jk) {
7203 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
7204 HReg r = iselWordExpr_R(env, next, IEndianess);
7205 PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
7206 if (env->chainingAllowed) {
7207 addInstr(env, PPCInstr_XIndir(r, amCIA, always));
7208 } else {
7209 addInstr(env, PPCInstr_XAssisted(r, amCIA, always,
7210 Ijk_Boring));
7212 return;
7214 default:
7215 break;
7218 /* Case: assisted transfer to arbitrary address */
7219 switch (jk) {
7220 /* Keep this list in sync with that for Ist_Exit above */
7221 case Ijk_ClientReq:
7222 case Ijk_EmFail:
7223 case Ijk_EmWarn:
7224 case Ijk_NoDecode:
7225 case Ijk_NoRedir:
7226 case Ijk_SigBUS:
7227 case Ijk_SigTRAP:
7228 case Ijk_Sys_syscall:
7229 case Ijk_InvalICache:
7231 HReg r = iselWordExpr_R(env, next, IEndianess);
7232 PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
7233 addInstr(env, PPCInstr_XAssisted(r, amCIA, always, jk));
7234 return;
7236 default:
7237 break;
7240 vex_printf( "\n-- PUT(%d) = ", offsIP);
7241 ppIRExpr( next );
7242 vex_printf( "; exit-");
7243 ppIRJumpKind(jk);
7244 vex_printf( "\n");
7245 vassert(0); // are we expecting any other kind?
7249 /*---------------------------------------------------------*/
7250 /*--- Insn selector top-level ---*/
7251 /*---------------------------------------------------------*/
7253 /* Translate an entire SB to ppc code. */
7254 HInstrArray* iselSB_PPC ( const IRSB* bb,
7255 VexArch arch_host,
7256 const VexArchInfo* archinfo_host,
7257 const VexAbiInfo* vbi,
7258 Int offs_Host_EvC_Counter,
7259 Int offs_Host_EvC_FailAddr,
7260 Bool chainingAllowed,
7261 Bool addProfInc,
7262 Addr max_ga)
7265 Int i, j;
7266 HReg hregLo, hregMedLo, hregMedHi, hregHi;
7267 ISelEnv* env;
7268 UInt hwcaps_host = archinfo_host->hwcaps;
7269 Bool mode64 = False;
7270 UInt mask32, mask64;
7271 PPCAMode *amCounter, *amFailAddr;
7272 IREndness IEndianess;
7274 vassert(arch_host == VexArchPPC32 || arch_host == VexArchPPC64);
7275 mode64 = arch_host == VexArchPPC64;
7277 /* do some sanity checks,
7278 * Note: no 32-bit support for ISA 3.0, ISA 3.1
7280 mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V
7281 | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX | VEX_HWCAPS_PPC32_VX
7282 | VEX_HWCAPS_PPC32_DFP | VEX_HWCAPS_PPC32_ISA2_07;
7284 mask64 = VEX_HWCAPS_PPC64_V | VEX_HWCAPS_PPC64_FX
7285 | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX | VEX_HWCAPS_PPC64_DFP
7286 | VEX_HWCAPS_PPC64_ISA2_07 | VEX_HWCAPS_PPC64_ISA3_0
7287 | VEX_HWCAPS_PPC64_ISA3_1;
7289 if (mode64) {
7290 vassert((hwcaps_host & mask32) == 0);
7291 } else {
7292 vassert((hwcaps_host & mask64) == 0);
7295 /* Check that the host's endianness is as expected. */
7296 vassert((archinfo_host->endness == VexEndnessBE) ||
7297 (archinfo_host->endness == VexEndnessLE));
7299 if (archinfo_host->endness == VexEndnessBE)
7300 IEndianess = Iend_BE;
7301 else
7302 IEndianess = Iend_LE;
7304 /* Make up an initial environment to use. */
7305 env = LibVEX_Alloc_inline(sizeof(ISelEnv));
7306 env->vreg_ctr = 0;
7308 /* Are we being ppc32 or ppc64? */
7309 env->mode64 = mode64;
7311 /* Set up output code array. */
7312 env->code = newHInstrArray();
7314 /* Copy BB's type env. */
7315 env->type_env = bb->tyenv;
7317 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
7318 * change as we go along.
7320 * vregmap2 and vregmap3 are only used in 32 bit mode
7321 * for supporting I128 in 32-bit mode
7323 env->n_vregmap = bb->tyenv->types_used;
7324 env->vregmapLo = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
7325 env->vregmapMedLo = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
7326 if (mode64) {
7327 env->vregmapMedHi = NULL;
7328 env->vregmapHi = NULL;
7329 } else {
7330 env->vregmapMedHi = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
7331 env->vregmapHi = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
7334 /* and finally ... */
7335 env->chainingAllowed = chainingAllowed;
7336 env->max_ga = max_ga;
7337 env->hwcaps = hwcaps_host;
7338 env->previous_rm = NULL;
7339 env->vbi = vbi;
7341 /* For each IR temporary, allocate a suitably-kinded virtual
7342 register. */
7343 j = 0;
7344 for (i = 0; i < env->n_vregmap; i++) {
7345 hregLo = hregMedLo = hregMedHi = hregHi = INVALID_HREG;
7346 switch (bb->tyenv->types[i]) {
7347 case Ity_I1:
7348 case Ity_I8:
7349 case Ity_I16:
7350 case Ity_I32:
7351 if (mode64) {
7352 hregLo = mkHReg(True, HRcInt64, 0, j++);
7353 } else {
7354 hregLo = mkHReg(True, HRcInt32, 0, j++);
7356 break;
7357 case Ity_I64:
7358 if (mode64) {
7359 hregLo = mkHReg(True, HRcInt64, 0, j++);
7360 } else {
7361 hregLo = mkHReg(True, HRcInt32, 0, j++);
7362 hregMedLo = mkHReg(True, HRcInt32, 0, j++);
7364 break;
7365 case Ity_I128:
7366 if (mode64) {
7367 hregLo = mkHReg(True, HRcInt64, 0, j++);
7368 hregMedLo = mkHReg(True, HRcInt64, 0, j++);
7369 } else {
7370 hregLo = mkHReg(True, HRcInt32, 0, j++);
7371 hregMedLo = mkHReg(True, HRcInt32, 0, j++);
7372 hregMedHi = mkHReg(True, HRcInt32, 0, j++);
7373 hregHi = mkHReg(True, HRcInt32, 0, j++);
7375 break;
7376 case Ity_F32:
7377 case Ity_F64:
7378 hregLo = mkHReg(True, HRcFlt64, 0, j++);
7379 break;
7380 case Ity_F128:
7381 case Ity_V128:
7382 hregLo = mkHReg(True, HRcVec128, 0, j++);
7383 break;
7384 case Ity_D32:
7385 case Ity_D64:
7386 hregLo = mkHReg(True, HRcFlt64, 0, j++);
7387 break;
7388 case Ity_D128:
7389 hregLo = mkHReg(True, HRcFlt64, 0, j++);
7390 hregMedLo = mkHReg(True, HRcFlt64, 0, j++);
7391 break;
7392 default:
7393 ppIRType(bb->tyenv->types[i]);
7394 vpanic("iselBB(ppc): IRTemp type");
7396 env->vregmapLo[i] = hregLo;
7397 env->vregmapMedLo[i] = hregMedLo;
7398 if (!mode64) {
7399 env->vregmapMedHi[i] = hregMedHi;
7400 env->vregmapHi[i] = hregHi;
7403 env->vreg_ctr = j;
7405 /* The very first instruction must be an event check. */
7406 amCounter = PPCAMode_IR(offs_Host_EvC_Counter, hregPPC_GPR31(mode64));
7407 amFailAddr = PPCAMode_IR(offs_Host_EvC_FailAddr, hregPPC_GPR31(mode64));
7408 addInstr(env, PPCInstr_EvCheck(amCounter, amFailAddr));
7410 /* Possibly a block counter increment (for profiling). At this
7411 point we don't know the address of the counter, so just pretend
7412 it is zero. It will have to be patched later, but before this
7413 translation is used, by a call to LibVEX_patchProfCtr. */
7414 if (addProfInc) {
7415 addInstr(env, PPCInstr_ProfInc());
7418 /* Ok, finally we can iterate over the statements. */
7419 for (i = 0; i < bb->stmts_used; i++)
7420 iselStmt(env, bb->stmts[i], IEndianess);
7422 iselNext(env, bb->next, bb->jumpkind, bb->offsIP, IEndianess);
7424 /* record the number of vregs we used. */
7425 env->code->n_vregs = env->vreg_ctr;
7426 return env->code;
7430 /*---------------------------------------------------------------*/
7431 /*--- end host_ppc_isel.c ---*/
7432 /*---------------------------------------------------------------*/