VEX/priv/host_ppc_isel.c

   1
   2
   3 /*---------------------------------------------------------------*/
   4 /*--- begin                                   host_ppc_isel.c ---*/
   5 /*---------------------------------------------------------------*/
   6
   7 /*
   8    This file is part of Valgrind, a dynamic binary instrumentation
   9    framework.
  10
  11    Copyright (C) 2004-2017 OpenWorks LLP
  12       info@open-works.net
  13
  14    This program is free software; you can redistribute it and/or
  15    modify it under the terms of the GNU General Public License as
  16    published by the Free Software Foundation; either version 2 of the
  17    License, or (at your option) any later version.
  18
  19    This program is distributed in the hope that it will be useful, but
  20    WITHOUT ANY WARRANTY; without even the implied warranty of
  21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22    General Public License for more details.
  23
  24    You should have received a copy of the GNU General Public License
  25    along with this program; if not, see <http://www.gnu.org/licenses/>.
  26
  27    The GNU General Public License is contained in the file COPYING.
  28
  29    Neither the names of the U.S. Department of Energy nor the
  30    University of California nor the names of its contributors may be
  31    used to endorse or promote products derived from this software
  32    without prior written permission.
  33 */
  34
  35 #include "libvex_basictypes.h"
  36 #include "libvex_ir.h"
  37 #include "libvex.h"
  38
  39 #include "ir_match.h"
  40 #include "main_util.h"
  41 #include "main_globals.h"
  42 #include "host_generic_regs.h"
  43 #include "host_generic_simd64.h"
  44 #include "host_ppc_defs.h"
  45
  46 /* GPR register class for ppc32/64 */
  47 #define HRcGPR(_mode64) ((_mode64) ? HRcInt64 : HRcInt32)
  48
  49
  50 /*---------------------------------------------------------*/
  51 /*--- Register Usage Conventions                        ---*/
  52 /*---------------------------------------------------------*/
  53 /*
  54   Integer Regs
  55   ------------
  56   GPR0       Reserved
  57   GPR1       Stack Pointer
  58   GPR2       not used - TOC pointer
  59   GPR3:10    Allocateable
  60   GPR11      if mode64: not used - calls by ptr / env ptr for some langs
  61   GPR12      if mode64: not used - exceptions / global linkage code
  62   GPR13      not used - Thread-specific pointer
  63   GPR14:28   Allocateable
  64   GPR29      Unused by us (reserved for the dispatcher)
  65   GPR30      AltiVec temp spill register
  66   GPR31      GuestStatePointer
  67
  68   Of Allocateable regs:
  69   if (mode64)
  70     GPR3:10  Caller-saved regs
  71   else
  72     GPR3:12  Caller-saved regs
  73   GPR14:29   Callee-saved regs
  74
  75   GPR3       [Return | Parameter] - carrying reg
  76   GPR4:10    Parameter-carrying regs
  77
  78
  79   Floating Point Regs
  80   -------------------
  81   FPR0:31    Allocateable
  82
  83   FPR0       Caller-saved - scratch reg
  84   if (mode64)
  85     FPR1:13  Caller-saved - param & return regs
  86   else
  87     FPR1:8   Caller-saved - param & return regs
  88     FPR9:13  Caller-saved regs
  89   FPR14:31   Callee-saved regs
  90
  91
  92   Vector Regs (on processors with the VMX feature)
  93   -----------
  94   VR0-VR1    Volatile scratch registers
  95   VR2-VR13   Volatile vector parameters registers
  96   VR14-VR19  Volatile scratch registers
  97   VR20-VR31  Non-volatile registers
  98   VRSAVE     Non-volatile 32-bit register
  99 */
 100
 101
 102 /*---------------------------------------------------------*/
 103 /*--- PPC FP Status & Control Register Conventions      ---*/
 104 /*---------------------------------------------------------*/
 105 /*
 106   Vex-generated code expects to run with the FPU set as follows: all
 107   exceptions masked.  The rounding mode is set appropriately before
 108   each floating point insn emitted (or left unchanged if known to be
 109   correct already).  There are a few fp insns (fmr,fneg,fabs,fnabs),
 110   which are unaffected by the rm and so the rounding mode is not set
 111   prior to them.
 112
 113   At least on MPC7447A (Mac Mini), frsqrte is also not affected by
 114   rounding mode.  At some point the ppc docs get sufficiently vague
 115   that the only way to find out is to write test programs.
 116 */
 117 /* Notes on the FP instruction set, 6 Feb 06.
 118
 119 What                 exns -> CR1 ?   Sets FPRF ?   Observes RM ?
 120 -------------------------------------------------------------
 121
 122 fmr[.]                   if .             n             n
 123 fneg[.]                  if .             n             n
 124 fabs[.]                  if .             n             n
 125 fnabs[.]                 if .             n             n
 126
 127 fadd[.]                  if .             y             y
 128 fadds[.]                 if .             y             y
 129 fcfid[.] (Si64->dbl)     if .             y             y
 130 fcfidU[.] (Ui64->dbl)    if .             y             y
 131 fcfids[.] (Si64->sngl)   if .             Y             Y
 132 fcfidus[.] (Ui64->sngl)  if .             Y             Y
 133 fcmpo (cmp, result       n                n             n
 134 fcmpu  to crfD)          n                n             n
 135 fctid[.]  (dbl->i64)     if .       ->undef             y
 136 fctidz[.] (dbl->i64)     if .       ->undef    rounds-to-zero
 137 fctiw[.]  (dbl->i32)     if .       ->undef             y
 138 fctiwz[.] (dbl->i32)     if .       ->undef    rounds-to-zero
 139 fdiv[.]                  if .             y             y
 140 fdivs[.]                 if .             y             y
 141 fmadd[.]                 if .             y             y
 142 fmadds[.]                if .             y             y
 143 fmsub[.]                 if .             y             y
 144 fmsubs[.]                if .             y             y
 145 fmul[.]                  if .             y             y
 146 fmuls[.]                 if .             y             y
 147
 148 (note: for fnm*, rounding happens before final negation)
 149 fnmadd[.]                if .             y             y
 150 fnmadds[.]               if .             y             y
 151 fnmsub[.]                if .             y             y
 152 fnmsubs[.]               if .             y             y
 153
 154 fre[.]                   if .             y             y
 155 fres[.]                  if .             y             y
 156
 157 frsqrte[.]               if .             y       apparently not
 158
 159 fsqrt[.]                 if .             y             y
 160 fsqrts[.]                if .             y             y
 161 fsub[.]                  if .             y             y
 162 fsubs[.]                 if .             y             y
 163
 164
 165 fpscr: bits 30-31 (ibm) is RM
 166             24-29 (ibm) are exnmasks/non-IEEE bit, all zero
 167             15-19 (ibm) is FPRF: class, <, =, >, UNord
 168
 169 ppc fe(guest) makes fpscr read as all zeros except RM (and maybe FPRF
 170 in future)
 171
 172 mcrfs     - move fpscr field to CR field
 173 mtfsfi[.] - 4 bit imm moved to fpscr field
 174 mtfsf[.]  - move frS[low 1/2] to fpscr but using 8-bit field mask
 175 mtfsb1[.] - set given fpscr bit
 176 mtfsb0[.] - clear given fpscr bit
 177 mffs[.]   - move all fpscr to frD[low 1/2]
 178
 179 For [.] presumably cr1 is set with exn summary bits, as per
 180 main FP insns
 181
 182 A single precision store truncates/denormalises the in-register value,
 183 but does not round it.  This is so that flds followed by fsts is
 184 always the identity.
 185 */
 186
 187
 188 /*---------------------------------------------------------*/
 189 /*--- misc helpers                                      ---*/
 190 /*---------------------------------------------------------*/
 191
 192 /* These are duplicated in guest-ppc/toIR.c */
 193 static IRExpr* unop ( IROp op, IRExpr* a )
 194 {
 195    return IRExpr_Unop(op, a);
 196 }
 197
 198 static IRExpr* mkU32 ( UInt i )
 199 {
 200    return IRExpr_Const(IRConst_U32(i));
 201 }
 202
 203 static IRExpr* bind ( Int binder )
 204 {
 205    return IRExpr_Binder(binder);
 206 }
 207
 208 static Bool isZeroU8 ( IRExpr* e )
 209 {
 210    return e->tag == Iex_Const
 211           && e->Iex.Const.con->tag == Ico_U8
 212           && e->Iex.Const.con->Ico.U8 == 0;
 213 }
 214
 215
 216 /*---------------------------------------------------------*/
 217 /*--- ISelEnv                                           ---*/
 218 /*---------------------------------------------------------*/
 219
 220 /* This carries around:
 221
 222    - A mapping from IRTemp to IRType, giving the type of any IRTemp we
 223      might encounter.  This is computed before insn selection starts,
 224      and does not change.
 225
 226    - A mapping from IRTemp to HReg.  This tells the insn selector
 227      which virtual register(s) are associated with each IRTemp
 228      temporary.  This is computed before insn selection starts, and
 229      does not change.  We expect this mapping to map precisely the
 230      same set of IRTemps as the type mapping does.
 231
 232          - vregmapLo    holds the primary register for the IRTemp.
 233          - vregmapMedLo holds the secondary register for the IRTemp,
 234               if any is needed.  That's only for Ity_I64 temps
 235               in 32 bit mode or Ity_I128 temps in 64-bit mode.
 236          - vregmapMedHi is only for dealing with Ity_I128 temps in
 237               32 bit mode.  It holds bits 95:64 (Intel numbering)
 238               of the IRTemp.
 239          - vregmapHi is also only for dealing with Ity_I128 temps
 240               in 32 bit mode.  It holds the most significant bits
 241               (127:96 in Intel numbering) of the IRTemp.
 242
 243     - The code array, that is, the insns selected so far.
 244
 245     - A counter, for generating new virtual registers.
 246
 247     - The host subarchitecture we are selecting insns for.
 248       This is set at the start and does not change.
 249
 250     - A Bool to tell us if the host is 32 or 64bit.
 251       This is set at the start and does not change.
 252
 253     - An IRExpr*, which may be NULL, holding the IR expression (an
 254       IRRoundingMode-encoded value) to which the FPU's rounding mode
 255       was most recently set.  Setting to NULL is always safe.  Used to
 256       avoid redundant settings of the FPU's rounding mode, as
 257       described in set_FPU_rounding_mode below.
 258
 259     - A VexMiscInfo*, needed for knowing how to generate
 260       function calls for this target.
 261
 262     - The maximum guest address of any guest insn in this block.
 263       Actually, the address of the highest-addressed byte from any
 264       insn in this block.  Is set at the start and does not change.
 265       This is used for detecting jumps which are definitely
 266       forward-edges from this block, and therefore can be made
 267       (chained) to the fast entry point of the destination, thereby
 268       avoiding the destination's event check.
 269 */
 270
 271 typedef
 272    struct {
 273       /* Constant -- are set at the start and do not change. */
 274       IRTypeEnv* type_env;
 275                               //    64-bit mode              32-bit mode
 276       HReg*    vregmapLo;     // Low 64-bits [63:0]    Low 32-bits     [31:0]
 277       HReg*    vregmapMedLo;  // high 64-bits[127:64]  Next 32-bits    [63:32]
 278       HReg*    vregmapMedHi;  // unused                Next 32-bits    [95:64]
 279       HReg*    vregmapHi;     // unused                highest 32-bits [127:96]
 280       Int      n_vregmap;
 281
 282       /* 27 Jan 06: Not currently used, but should be */
 283       UInt         hwcaps;
 284
 285       Bool         mode64;
 286
 287       const VexAbiInfo*  vbi;   // unused
 288
 289       Bool         chainingAllowed;
 290       Addr64       max_ga;
 291
 292       /* These are modified as we go along. */
 293       HInstrArray* code;
 294       Int          vreg_ctr;
 295
 296       IRExpr*      previous_rm;
 297    }
 298    ISelEnv;
 299
 300
 301 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
 302 {
 303    vassert(tmp < env->n_vregmap);
 304    return env->vregmapLo[tmp];
 305 }
 306
 307 static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
 308                                ISelEnv* env, IRTemp tmp )
 309 {
 310    vassert(tmp < env->n_vregmap);
 311    vassert(! hregIsInvalid(env->vregmapMedLo[tmp]));
 312    *vrLO = env->vregmapLo[tmp];
 313    *vrHI = env->vregmapMedLo[tmp];
 314 }
 315
 316 /* Only for used in 32-bit mode */
 317 static void lookupIRTempQuad ( HReg* vrHi, HReg* vrMedHi, HReg* vrMedLo,
 318                                HReg* vrLo, ISelEnv* env, IRTemp tmp )
 319 {
 320    vassert(!env->mode64);
 321    vassert(tmp < env->n_vregmap);
 322    vassert(! hregIsInvalid(env->vregmapMedLo[tmp]));
 323    *vrHi    = env->vregmapHi[tmp];
 324    *vrMedHi = env->vregmapMedHi[tmp];
 325    *vrMedLo = env->vregmapMedLo[tmp];
 326    *vrLo    = env->vregmapLo[tmp];
 327 }
 328
 329 static void addInstr ( ISelEnv* env, PPCInstr* instr )
 330 {
 331    addHInstr(env->code, instr);
 332    if (vex_traceflags & VEX_TRACE_VCODE) {
 333       ppPPCInstr(instr, env->mode64);
 334       vex_printf("\n");
 335    }
 336 }
 337
 338 static HReg newVRegI ( ISelEnv* env )
 339 {
 340    HReg reg
 341       = mkHReg(True/*vreg*/, HRcGPR(env->mode64), 0/*enc*/, env->vreg_ctr);
 342    env->vreg_ctr++;
 343    return reg;
 344 }
 345
 346 static HReg newVRegF ( ISelEnv* env )
 347 {
 348    HReg reg = mkHReg(True/*vreg*/, HRcFlt64, 0/*enc*/, env->vreg_ctr);
 349    env->vreg_ctr++;
 350    return reg;
 351 }
 352
 353 static HReg newVRegV ( ISelEnv* env )
 354 {
 355    HReg reg = mkHReg(True/*vreg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
 356    env->vreg_ctr++;
 357    return reg;
 358 }
 359
 360
 361 /*---------------------------------------------------------*/
 362 /*--- ISEL: Forward declarations                        ---*/
 363 /*---------------------------------------------------------*/
 364
 365 /* These are organised as iselXXX and iselXXX_wrk pairs.  The
 366    iselXXX_wrk do the real work, but are not to be called directly.
 367    For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
 368    checks that all returned registers are virtual.  You should not
 369    call the _wrk version directly.
 370
 371    'Word' refers to the size of the native machine word, that is,
 372    32-bit int in 32-bit mode and 64-bit int in 64-bit mode.  '2Word'
 373    therefore refers to a double-width (64/128-bit) quantity in two
 374    integer registers.
 375 */
 376 /* 32-bit mode: compute an I8/I16/I32 into a GPR.
 377    64-bit mode: compute an I8/I16/I32/I64 into a GPR. */
 378 static HReg          iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
 379                                           IREndness IEndianess );
 380 static HReg          iselWordExpr_R     ( ISelEnv* env, const IRExpr* e,
 381                                           IREndness IEndianess );
 382
 383 /* 32-bit mode: Compute an I8/I16/I32 into a RH
 384                 (reg-or-halfword-immediate).
 385    64-bit mode: Compute an I8/I16/I32/I64 into a RH
 386                 (reg-or-halfword-immediate).
 387    It's important to specify whether the immediate is to be regarded
 388    as signed or not.  If yes, this will never return -32768 as an
 389    immediate; this guaranteed that all signed immediates that are
 390    return can have their sign inverted if need be.
 391 */
 392 static PPCRH*        iselWordExpr_RH_wrk ( ISelEnv* env,
 393                                            Bool syned, const IRExpr* e,
 394                                            IREndness IEndianess );
 395 static PPCRH*        iselWordExpr_RH     ( ISelEnv* env,
 396                                            Bool syned, const IRExpr* e,
 397                                            IREndness IEndianess );
 398
 399 /* 32-bit mode: compute an I32 into a RI (reg or 32-bit immediate).
 400    64-bit mode: compute an I64 into a RI (reg or 64-bit immediate). */
 401 static PPCRI*        iselWordExpr_RI_wrk ( ISelEnv* env, const IRExpr* e,
 402                                            IREndness IEndianess );
 403 static PPCRI*        iselWordExpr_RI     ( ISelEnv* env, const IRExpr* e,
 404                                            IREndness IEndianess );
 405
 406 /* In 32 bit mode ONLY, compute an I8 into a
 407    reg-or-5-bit-unsigned-immediate, the latter being an immediate in
 408    the range 1 .. 31 inclusive.  Used for doing shift amounts. */
 409 static PPCRH*        iselWordExpr_RH5u_wrk ( ISelEnv* env, const IRExpr* e,
 410                                              IREndness IEndianess );
 411 static PPCRH*        iselWordExpr_RH5u     ( ISelEnv* env, const IRExpr* e,
 412                                              IREndness IEndianess );
 413
 414 /* In 64-bit mode ONLY, compute an I8 into a
 415    reg-or-6-bit-unsigned-immediate, the latter being an immediate in
 416    the range 1 .. 63 inclusive.  Used for doing shift amounts. */
 417 static PPCRH*        iselWordExpr_RH6u_wrk ( ISelEnv* env, const IRExpr* e,
 418                                              IREndness IEndianess );
 419 static PPCRH*        iselWordExpr_RH6u     ( ISelEnv* env, const IRExpr* e,
 420                                              IREndness IEndianess );
 421
 422 /* 32-bit mode: compute an I32 into an AMode.
 423    64-bit mode: compute an I64 into an AMode.
 424
 425    Requires to know (xferTy) the type of data to be loaded/stored
 426    using this amode.  That is so that, for 64-bit code generation, any
 427    PPCAMode_IR returned will have an index (immediate offset) field
 428    that is guaranteed to be 4-aligned, if there is any chance that the
 429    amode is to be used in ld/ldu/lda/std/stdu.
 430
 431    Since there are no such restrictions on 32-bit insns, xferTy is
 432    ignored for 32-bit code generation. */
 433 static PPCAMode*     iselWordExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e,
 434                                               IRType xferTy,
 435                                               IREndness IEndianess );
 436 static PPCAMode*     iselWordExpr_AMode     ( ISelEnv* env, const IRExpr* e,
 437                                               IRType xferTy,
 438                                               IREndness IEndianess );
 439
 440 static void iselInt128Expr_to_32x4_wrk ( HReg* rHi, HReg* rMedHi,
 441                                          HReg* rMedLo, HReg* rLo,
 442                                          ISelEnv* env, const IRExpr* e,
 443                                          IREndness IEndianess );
 444 static void iselInt128Expr_to_32x4     ( HReg* rHi, HReg* rMedHi,
 445                                          HReg* rMedLo, HReg* rLo,
 446                                          ISelEnv* env, const IRExpr* e,
 447                                          IREndness IEndianess );
 448
 449
 450 /* 32-bit mode ONLY: compute an I64 into a GPR pair. */
 451 static void          iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
 452                                          ISelEnv* env, const IRExpr* e,
 453                                          IREndness IEndianess );
 454 static void          iselInt64Expr     ( HReg* rHi, HReg* rLo,
 455                                          ISelEnv* env, const IRExpr* e,
 456                                          IREndness IEndianess );
 457
 458 /* 64-bit mode ONLY: compute an I128 into a GPR64 pair. */
 459 static void          iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
 460                                           ISelEnv* env, const IRExpr* e,
 461                                           IREndness IEndianess );
 462
 463 static void          iselInt128Expr     ( HReg* rHi, HReg* rLo,
 464                                           ISelEnv* env, const IRExpr* e,
 465                                           IREndness IEndianess );
 466
 467 static PPCCondCode   iselCondCode_wrk ( ISelEnv* env, const IRExpr* e,
 468                                         IREndness IEndianess );
 469 static PPCCondCode   iselCondCode     ( ISelEnv* env, const IRExpr* e,
 470                                         IREndness IEndianess );
 471
 472 static HReg          iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e,
 473                                        IREndness IEndianess );
 474 static HReg          iselDblExpr     ( ISelEnv* env, const IRExpr* e,
 475                                        IREndness IEndianess );
 476
 477 static HReg          iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e,
 478                                        IREndness IEndianess );
 479 static HReg          iselFltExpr     ( ISelEnv* env, const IRExpr* e,
 480                                        IREndness IEndianess );
 481
 482 static HReg          iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e,
 483                                        IREndness IEndianess );
 484 static HReg          iselVecExpr     ( ISelEnv* env, const IRExpr* e,
 485                                        IREndness IEndianess );
 486
 487 /* 64-bit mode ONLY. */
 488 static HReg          iselDfp32Expr_wrk ( ISelEnv* env, const IRExpr* e,
 489                                          IREndness IEndianess );
 490 static HReg          iselDfp32Expr     ( ISelEnv* env, const IRExpr* e,
 491                                          IREndness IEndianess );
 492 static HReg          iselDfp64Expr_wrk ( ISelEnv* env, const IRExpr* e,
 493                                          IREndness IEndianess );
 494 static HReg          iselDfp64Expr     ( ISelEnv* env, const IRExpr* e,
 495                                          IREndness IEndianess );
 496 static HReg iselFp128Expr_wrk ( ISelEnv* env, const IRExpr* e,
 497                                 IREndness IEndianess);
 498 static HReg iselFp128Expr     ( ISelEnv* env, const IRExpr* e,
 499                                 IREndness IEndianess);
 500
 501 /* 64-bit mode ONLY: compute an D128 into a GPR64 pair. */
 502 static void iselDfp128Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
 503                                  const IRExpr* e, IREndness IEndianess );
 504 static void iselDfp128Expr     ( HReg* rHi, HReg* rLo, ISelEnv* env,
 505                                  const IRExpr* e, IREndness IEndianess );
 506
 507 /*---------------------------------------------------------*/
 508 /*--- ISEL: Misc helpers                                ---*/
 509 /*---------------------------------------------------------*/
 510
 511 /* Make an int reg-reg move. */
 512
 513 static PPCInstr* mk_iMOVds_RR ( HReg r_dst, HReg r_src )
 514 {
 515    vassert(hregClass(r_dst) == hregClass(r_src));
 516    vassert(hregClass(r_src) ==  HRcInt32 ||
 517            hregClass(r_src) ==  HRcInt64);
 518    return PPCInstr_Alu(Palu_OR, r_dst, r_src, PPCRH_Reg(r_src));
 519 }
 520
 521 /* Advance/retreat %r1 by n. */
 522
 523 static void add_to_sp ( ISelEnv* env, UInt n )
 524 {
 525    HReg sp = StackFramePtr(env->mode64);
 526    vassert(n <= 1024 && (n%16) == 0);
 527    addInstr(env, PPCInstr_Alu( Palu_ADD, sp, sp,
 528                                PPCRH_Imm(True,toUShort(n)) ));
 529 }
 530
 531 static void sub_from_sp ( ISelEnv* env, UInt n )
 532 {
 533    HReg sp = StackFramePtr(env->mode64);
 534    vassert(n <= 1024 && (n%16) == 0);
 535    addInstr(env, PPCInstr_Alu( Palu_SUB, sp, sp,
 536                                PPCRH_Imm(True,toUShort(n)) ));
 537 }
 538
 539 /*
 540   returns a quadword aligned address on the stack
 541    - copies SP, adds 16bytes, aligns to quadword.
 542   use sub_from_sp(32) before calling this,
 543   as expects to have 32 bytes to play with.
 544 */
 545 static HReg get_sp_aligned16 ( ISelEnv* env )
 546 {
 547    HReg       r = newVRegI(env);
 548    HReg align16 = newVRegI(env);
 549    addInstr(env, mk_iMOVds_RR(r, StackFramePtr(env->mode64)));
 550    // add 16
 551    addInstr(env, PPCInstr_Alu( Palu_ADD, r, r,
 552                                PPCRH_Imm(True,toUShort(16)) ));
 553    // mask to quadword
 554    addInstr(env,
 555             PPCInstr_LI(align16, 0xFFFFFFFFFFFFFFF0ULL, env->mode64));
 556    addInstr(env, PPCInstr_Alu(Palu_AND, r,r, PPCRH_Reg(align16)));
 557    return r;
 558 }
 559
 560
 561
 562 /* Load 2*I32 regs to fp reg */
 563 static HReg mk_LoadRR32toFPR ( ISelEnv* env,
 564                                HReg r_srcHi, HReg r_srcLo )
 565 {
 566    HReg fr_dst = newVRegF(env);
 567    PPCAMode *am_addr0, *am_addr1;
 568
 569    vassert(!env->mode64);
 570    vassert(hregClass(r_srcHi) == HRcInt32);
 571    vassert(hregClass(r_srcLo) == HRcInt32);
 572
 573    sub_from_sp( env, 16 );        // Move SP down 16 bytes
 574    am_addr0 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
 575    am_addr1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
 576
 577    // store hi,lo as Ity_I32's
 578    addInstr(env, PPCInstr_Store( 4, am_addr0, r_srcHi, env->mode64 ));
 579    addInstr(env, PPCInstr_Store( 4, am_addr1, r_srcLo, env->mode64 ));
 580
 581    // load as float
 582    addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, am_addr0));
 583
 584    add_to_sp( env, 16 );          // Reset SP
 585    return fr_dst;
 586 }
 587
 588 /* Load I64 reg to fp reg */
 589 static HReg mk_LoadR64toFPR ( ISelEnv* env, HReg r_src )
 590 {
 591    HReg fr_dst = newVRegF(env);
 592    PPCAMode *am_addr0;
 593
 594    vassert(env->mode64);
 595    vassert(hregClass(r_src) == HRcInt64);
 596
 597    sub_from_sp( env, 16 );        // Move SP down 16 bytes
 598    am_addr0 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
 599
 600    // store as Ity_I64
 601    addInstr(env, PPCInstr_Store( 8, am_addr0, r_src, env->mode64 ));
 602
 603    // load as float
 604    addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, am_addr0));
 605
 606    add_to_sp( env, 16 );          // Reset SP
 607    return fr_dst;
 608 }
 609
 610
 611 /* Given an amode, return one which references 4 bytes further
 612    along. */
 613
 614 static PPCAMode* advance4 ( ISelEnv* env, PPCAMode* am )
 615 {
 616    PPCAMode* am4 = dopyPPCAMode( am );
 617    if (am4->tag == Pam_IR
 618        && am4->Pam.IR.index + 4 <= 32767) {
 619       am4->Pam.IR.index += 4;
 620    } else {
 621       vpanic("advance4(ppc,host)");
 622    }
 623    return am4;
 624 }
 625
 626
 627 /* Given a guest-state array descriptor, an index expression and a
 628    bias, generate a PPCAMode pointing at the relevant piece of
 629    guest state.  */
 630 static
 631 PPCAMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
 632                                 IRExpr* off, Int bias, IREndness IEndianess )
 633 {
 634    HReg rtmp, roff;
 635    Int  elemSz = sizeofIRType(descr->elemTy);
 636    Int  nElems = descr->nElems;
 637    Int  shift  = 0;
 638
 639    /* MAX is somewhat arbitrarily, needs to be at least
 640       3 times the size of VexGuestPPC64State */
 641 #define MAX 6500
 642
 643    /* Throw out any cases we don't need.  In theory there might be a
 644       day where we need to handle others, but not today. */
 645
 646    if (nElems != 16 && nElems != 32)
 647       vpanic("genGuestArrayOffset(ppc host)(1)");
 648
 649    switch (elemSz) {
 650       case 4:  shift = 2; break;
 651       case 8:  shift = 3; break;
 652       default: vpanic("genGuestArrayOffset(ppc host)(2)");
 653    }
 654
 655    if (bias < -100 || bias > 100) /* somewhat arbitrarily */
 656       vpanic("genGuestArrayOffset(ppc host)(3)");
 657    if (descr->base < 0 || descr->base > MAX) { /* somewhat arbitrarily */
 658       vex_printf("ERROR: descr->base = %d, is greater then maximum = %d\n",
 659                  descr->base, MAX);
 660       vpanic("genGuestArrayOffset(ppc host)(4)");
 661    }
 662
 663    /* Compute off into a reg, %off.  Then return:
 664
 665          addi %tmp, %off, bias (if bias != 0)
 666          andi %tmp, nElems-1
 667          sldi %tmp, shift
 668          addi %tmp, %tmp, base
 669          ... Baseblockptr + %tmp ...
 670    */
 671    roff = iselWordExpr_R(env, off, IEndianess);
 672    rtmp = newVRegI(env);
 673    addInstr(env, PPCInstr_Alu(
 674                     Palu_ADD,
 675                     rtmp, roff,
 676                     PPCRH_Imm(True/*signed*/, toUShort(bias))));
 677    addInstr(env, PPCInstr_Alu(
 678                     Palu_AND,
 679                     rtmp, rtmp,
 680                     PPCRH_Imm(False/*unsigned*/, toUShort(nElems-1))));
 681    addInstr(env, PPCInstr_Shft(
 682                     Pshft_SHL,
 683                     env->mode64 ? False : True/*F:64-bit, T:32-bit shift*/,
 684                     rtmp, rtmp,
 685                     PPCRH_Imm(False/*unsigned*/, toUShort(shift))));
 686    addInstr(env, PPCInstr_Alu(
 687                     Palu_ADD,
 688                     rtmp, rtmp,
 689                     PPCRH_Imm(True/*signed*/, toUShort(descr->base))));
 690    return
 691       PPCAMode_RR( GuestStatePtr(env->mode64), rtmp );
 692 #undef MAX
 693 }
 694
 695
 696 /*---------------------------------------------------------*/
 697 /*--- ISEL: Function call helpers                       ---*/
 698 /*---------------------------------------------------------*/
 699
 700 /* Used only in doHelperCall.  See big comment in doHelperCall re
 701    handling of register-parameter args.  This function figures out
 702    whether evaluation of an expression might require use of a fixed
 703    register.  If in doubt return True (safe but suboptimal).
 704 */
 705 static
 706 Bool mightRequireFixedRegs ( IRExpr* e )
 707 {
 708    switch (e->tag) {
 709    case Iex_RdTmp: case Iex_Const: case Iex_Get:
 710       return False;
 711    default:
 712       return True;
 713    }
 714 }
 715
 716
 717 /* Do a complete function call.  |guard| is a Ity_Bit expression
 718    indicating whether or not the call happens.  If guard==NULL, the
 719    call is unconditional.  |retloc| is set to indicate where the
 720    return value is after the call.  The caller (of this fn) must
 721    generate code to add |stackAdjustAfterCall| to the stack pointer
 722    after the call is done. */
 723
 724 static
 725 void doHelperCall ( /*OUT*/UInt*   stackAdjustAfterCall,
 726                     /*OUT*/RetLoc* retloc,
 727                     ISelEnv* env,
 728                     IRExpr* guard,
 729                     IRCallee* cee, IRType retTy, IRExpr** args,
 730                     IREndness IEndianess)
 731 {
 732    PPCCondCode cc;
 733    HReg        argregs[PPC_N_REGPARMS];
 734    HReg        tmpregs[PPC_N_REGPARMS];
 735    Bool        go_fast;
 736    Int         n_args, i, argreg;
 737    UInt        argiregs;
 738    Bool        mode64 = env->mode64;
 739
 740    /* Set default returns.  We'll update them later if needed. */
 741    *stackAdjustAfterCall = 0;
 742    *retloc               = mk_RetLoc_INVALID();
 743
 744    /* These are used for cross-checking that IR-level constraints on
 745       the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
 746    UInt nVECRETs = 0;
 747    UInt nGSPTRs  = 0;
 748
 749    /* Marshal args for a call and do the call.
 750
 751       This function only deals with a tiny set of possibilities, which
 752       cover all helpers in practice.  The restrictions are that only
 753       arguments in registers are supported, hence only PPC_N_REGPARMS x
 754       (mode32:32 | mode64:64) integer bits in total can be passed.
 755       In fact the only supported arg type is (mode32:I32 | mode64:I64).
 756
 757       The return type can be I{64,32,16,8} or V{128,256}.  In the
 758       latter two cases, it is expected that |args| will contain the
 759       special node IRExpr_VECRET(), in which case this routine
 760       generates code to allocate space on the stack for the vector
 761       return value.  Since we are not passing any scalars on the
 762       stack, it is enough to preallocate the return space before
 763       marshalling any arguments, in this case.
 764
 765       |args| may also contain IRExpr_GSPTR(), in which case the value
 766       in the guest state pointer register is passed as the
 767       corresponding argument.
 768
 769       Generating code which is both efficient and correct when
 770       parameters are to be passed in registers is difficult, for the
 771       reasons elaborated in detail in comments attached to
 772       doHelperCall() in priv/host-x86/isel.c.  Here, we use a variant
 773       of the method described in those comments.
 774
 775       The problem is split into two cases: the fast scheme and the
 776       slow scheme.  In the fast scheme, arguments are computed
 777       directly into the target (real) registers.  This is only safe
 778       when we can be sure that computation of each argument will not
 779       trash any real registers set by computation of any other
 780       argument.
 781
 782       In the slow scheme, all args are first computed into vregs, and
 783       once they are all done, they are moved to the relevant real
 784       regs.  This always gives correct code, but it also gives a bunch
 785       of vreg-to-rreg moves which are usually redundant but are hard
 786       for the register allocator to get rid of.
 787
 788       To decide which scheme to use, all argument expressions are
 789       first examined.  If they are all so simple that it is clear they
 790       will be evaluated without use of any fixed registers, use the
 791       fast scheme, else use the slow scheme.  Note also that only
 792       unconditional calls may use the fast scheme, since having to
 793       compute a condition expression could itself trash real
 794       registers.
 795
 796       Note this requires being able to examine an expression and
 797       determine whether or not evaluation of it might use a fixed
 798       register.  That requires knowledge of how the rest of this insn
 799       selector works.  Currently just the following 3 are regarded as
 800       safe -- hopefully they cover the majority of arguments in
 801       practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
 802    */
 803
 804    /* Note that the cee->regparms field is meaningless on PPC32/64 host
 805       (since there is only one calling convention) and so we always
 806       ignore it. */
 807
 808    n_args = 0;
 809    for (i = 0; args[i]; i++)
 810       n_args++;
 811
 812    if (n_args > PPC_N_REGPARMS) {
 813       vpanic("doHelperCall(PPC): cannot currently handle > 8 args");
 814       // PPC_N_REGPARMS
 815    }
 816
 817    /* This is kind of stupid .. the arrays are sized as PPC_N_REGPARMS
 818       but we then assume that that value is 8. */
 819    vassert(PPC_N_REGPARMS == 8);
 820
 821    argregs[0] = hregPPC_GPR3(mode64);
 822    argregs[1] = hregPPC_GPR4(mode64);
 823    argregs[2] = hregPPC_GPR5(mode64);
 824    argregs[3] = hregPPC_GPR6(mode64);
 825    argregs[4] = hregPPC_GPR7(mode64);
 826    argregs[5] = hregPPC_GPR8(mode64);
 827    argregs[6] = hregPPC_GPR9(mode64);
 828    argregs[7] = hregPPC_GPR10(mode64);
 829    argiregs = 0;
 830
 831    tmpregs[0] = tmpregs[1] = tmpregs[2] =
 832    tmpregs[3] = tmpregs[4] = tmpregs[5] =
 833    tmpregs[6] = tmpregs[7] = INVALID_HREG;
 834
 835    /* First decide which scheme (slow or fast) is to be used.  First
 836       assume the fast scheme, and select slow if any contraindications
 837       (wow) appear. */
 838
 839    go_fast = True;
 840
 841    /* We'll need space on the stack for the return value.  Avoid
 842       possible complications with nested calls by using the slow
 843       scheme. */
 844    if (retTy == Ity_V128 || retTy == Ity_V256)
 845       go_fast = False;
 846
 847    if (go_fast && guard) {
 848       if (guard->tag == Iex_Const
 849           && guard->Iex.Const.con->tag == Ico_U1
 850           && guard->Iex.Const.con->Ico.U1 == True) {
 851          /* unconditional */
 852       } else {
 853          /* Not manifestly unconditional -- be conservative. */
 854          go_fast = False;
 855       }
 856    }
 857
 858    if (go_fast) {
 859       for (i = 0; i < n_args; i++) {
 860          IRExpr* arg = args[i];
 861          if (UNLIKELY(arg->tag == Iex_GSPTR)) {
 862             /* that's OK */
 863          }
 864          else if (UNLIKELY(arg->tag == Iex_VECRET)) {
 865             /* This implies ill-formed IR, since if the IR was
 866                well-formed, the return-type test above would have
 867                filtered it out. */
 868             vpanic("doHelperCall(PPC): invalid IR");
 869          }
 870          else if (mightRequireFixedRegs(arg)) {
 871             go_fast = False;
 872             break;
 873          }
 874       }
 875    }
 876
 877    /* At this point the scheme to use has been established.  Generate
 878       code to get the arg values into the argument rregs. */
 879
 880    if (go_fast) {
 881
 882       /* FAST SCHEME */
 883       argreg = 0;
 884
 885       for (i = 0; i < n_args; i++) {
 886          IRExpr* arg = args[i];
 887          vassert(argreg < PPC_N_REGPARMS);
 888
 889          if (arg->tag == Iex_GSPTR) {
 890             argiregs |= (1 << (argreg+3));
 891             addInstr(env, mk_iMOVds_RR( argregs[argreg],
 892                                         GuestStatePtr(mode64) ));
 893             argreg++;
 894          } else {
 895             vassert(arg->tag != Iex_VECRET);
 896             IRType ty = typeOfIRExpr(env->type_env, arg);
 897             vassert(ty == Ity_I32 || ty == Ity_I64);
 898             if (!mode64) {
 899                if (ty == Ity_I32) {
 900                   argiregs |= (1 << (argreg+3));
 901                   addInstr(env,
 902                            mk_iMOVds_RR( argregs[argreg],
 903                                          iselWordExpr_R(env, arg,
 904                                                         IEndianess) ));
 905                } else { // Ity_I64 in 32-bit mode
 906                   HReg rHi, rLo;
 907                   if ((argreg%2) == 1)
 908                                  // ppc32 ELF abi spec for passing LONG_LONG
 909                      argreg++;   // XXX: odd argreg => even rN
 910                   vassert(argreg < PPC_N_REGPARMS-1);
 911                   iselInt64Expr(&rHi,&rLo, env, arg, IEndianess);
 912                   argiregs |= (1 << (argreg+3));
 913                   addInstr(env, mk_iMOVds_RR( argregs[argreg++], rHi ));
 914                   argiregs |= (1 << (argreg+3));
 915                   addInstr(env, mk_iMOVds_RR( argregs[argreg], rLo));
 916                }
 917             } else { // mode64
 918                argiregs |= (1 << (argreg+3));
 919                addInstr(env, mk_iMOVds_RR( argregs[argreg],
 920                                            iselWordExpr_R(env, arg,
 921                                                           IEndianess) ));
 922             }
 923             argreg++;
 924          } /* if (arg == IRExprP__BBPR) */
 925       }
 926
 927       /* Fast scheme only applies for unconditional calls.  Hence: */
 928       cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
 929
 930    } else {
 931
 932       /* SLOW SCHEME; move via temporaries */
 933       argreg = 0;
 934
 935       /* If we have a vector return type, allocate a place for it on
 936          the stack and record its address.  Rather than figure out the
 937          complexities of PPC{32,64} ELF ABI stack frame layout, simply
 938          drop the SP by 1024 and allocate the return point in the
 939          middle.  I think this should comfortably clear any ABI
 940          mandated register save areas.  Note that it doesn't maintain
 941          the backchain as it should, since we're not doing st{d,w}u to
 942          adjust the SP, but .. that doesn't seem to be a big deal.
 943          Since we're not expecting to have to unwind out of here. */
 944       HReg r_vecRetAddr = INVALID_HREG;
 945       if (retTy == Ity_V128) {
 946          r_vecRetAddr = newVRegI(env);
 947          sub_from_sp(env, 512);
 948          addInstr(env, mk_iMOVds_RR( r_vecRetAddr, StackFramePtr(mode64) ));
 949          sub_from_sp(env, 512);
 950       }
 951       else if (retTy == Ity_V256) {
 952          vassert(0); //ATC
 953          r_vecRetAddr = newVRegI(env);
 954          sub_from_sp(env, 512);
 955          addInstr(env, mk_iMOVds_RR( r_vecRetAddr, StackFramePtr(mode64) ));
 956          sub_from_sp(env, 512);
 957       }
 958
 959       vassert(n_args >= 0 && n_args <= 8);
 960       for (i = 0; i < n_args; i++) {
 961          IRExpr* arg = args[i];
 962          vassert(argreg < PPC_N_REGPARMS);
 963          if (UNLIKELY(arg->tag == Iex_GSPTR)) {
 964             tmpregs[argreg] = newVRegI(env);
 965             addInstr(env, mk_iMOVds_RR( tmpregs[argreg],
 966                                         GuestStatePtr(mode64) ));
 967             nGSPTRs++;
 968          }
 969          else if (UNLIKELY(arg->tag == Iex_VECRET)) {
 970             /* We stashed the address of the return slot earlier, so just
 971                retrieve it now. */
 972             vassert(!hregIsInvalid(r_vecRetAddr));
 973             tmpregs[i] = r_vecRetAddr;
 974             nVECRETs++;
 975          }
 976          else {
 977             IRType ty = typeOfIRExpr(env->type_env, arg);
 978             vassert(ty == Ity_I32 || ty == Ity_I64);
 979             if (!mode64) {
 980                if (ty == Ity_I32) {
 981                   tmpregs[argreg] = iselWordExpr_R(env, arg, IEndianess);
 982                } else { // Ity_I64 in 32-bit mode
 983                   HReg rHi, rLo;
 984                   if ((argreg%2) == 1)
 985                                 // ppc32 ELF abi spec for passing LONG_LONG
 986                      argreg++;  // XXX: odd argreg => even rN
 987                   vassert(argreg < PPC_N_REGPARMS-1);
 988                   iselInt64Expr(&rHi,&rLo, env, arg, IEndianess);
 989                   tmpregs[argreg++] = rHi;
 990                   tmpregs[argreg]   = rLo;
 991                }
 992             } else { // mode64
 993                tmpregs[argreg] = iselWordExpr_R(env, arg, IEndianess);
 994             }
 995          }
 996          argreg++;
 997       }
 998
 999       /* Now we can compute the condition.  We can't do it earlier
1000          because the argument computations could trash the condition
1001          codes.  Be a bit clever to handle the common case where the
1002          guard is 1:Bit. */
1003       cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
1004       if (guard) {
1005          if (guard->tag == Iex_Const
1006              && guard->Iex.Const.con->tag == Ico_U1
1007              && guard->Iex.Const.con->Ico.U1 == True) {
1008             /* unconditional -- do nothing */
1009          } else {
1010             cc = iselCondCode( env, guard, IEndianess );
1011          }
1012       }
1013
1014       /* Move the args to their final destinations. */
1015       for (i = 0; i < argreg; i++) {
1016          if (hregIsInvalid(tmpregs[i]))  // Skip invalid regs
1017             continue;
1018          /* None of these insns, including any spill code that might
1019             be generated, may alter the condition codes. */
1020          argiregs |= (1 << (i+3));
1021          addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
1022       }
1023
1024    }
1025
1026    /* Do final checks, set the return values, and generate the call
1027       instruction proper. */
1028    if (retTy == Ity_V128 || retTy == Ity_V256) {
1029       vassert(nVECRETs == 1);
1030    } else {
1031       vassert(nVECRETs == 0);
1032    }
1033
1034    vassert(nGSPTRs == 0 || nGSPTRs == 1);
1035
1036    vassert(*stackAdjustAfterCall == 0);
1037    vassert(is_RetLoc_INVALID(*retloc));
1038    switch (retTy) {
1039       case Ity_INVALID:
1040          /* Function doesn't return a value. */
1041          *retloc = mk_RetLoc_simple(RLPri_None);
1042          break;
1043       case Ity_I64:
1044          *retloc = mk_RetLoc_simple(mode64 ? RLPri_Int : RLPri_2Int);
1045          break;
1046       case Ity_I32: case Ity_I16: case Ity_I8:
1047          *retloc = mk_RetLoc_simple(RLPri_Int);
1048          break;
1049       case Ity_V128:
1050          /* Result is 512 bytes up the stack, and after it has been
1051             retrieved, adjust SP upwards by 1024. */
1052          *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 512);
1053          *stackAdjustAfterCall = 1024;
1054          break;
1055       case Ity_V256:
1056          vassert(0); // ATC
1057          /* Ditto */
1058          *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 512);
1059          *stackAdjustAfterCall = 1024;
1060          break;
1061       default:
1062          /* IR can denote other possible return types, but we don't
1063             handle those here. */
1064          vassert(0);
1065    }
1066
1067    /* Finally, generate the call itself.  This needs the *retloc value
1068       set in the switch above, which is why it's at the end. */
1069
1070    Addr64 target = mode64 ? (Addr)cee->addr
1071                           : toUInt((Addr)(cee->addr));
1072    addInstr(env, PPCInstr_Call( cc, target, argiregs, *retloc ));
1073 }
1074
1075
1076 /*---------------------------------------------------------*/
1077 /*--- ISEL: FP rounding mode helpers                    ---*/
1078 /*---------------------------------------------------------*/
1079
1080 ///* Set FPU's rounding mode to the default */
1081 //static
1082 //void set_FPU_rounding_default ( ISelEnv* env )
1083 //{
1084 //   HReg fr_src = newVRegF(env);
1085 //   HReg r_src  = newVRegI(env);
1086 //
1087 //   /* Default rounding mode = 0x0
1088 //      Only supporting the rounding-mode bits - the rest of FPSCR is 0x0
1089 //       - so we can set the whole register at once (faster)
1090 //      note: upper 32 bits ignored by FpLdFPSCR
1091 //   */
1092 //   addInstr(env, PPCInstr_LI(r_src, 0x0, env->mode64));
1093 //   if (env->mode64) {
1094 //      fr_src = mk_LoadR64toFPR( env, r_src );         // 1*I64 -> F64
1095 //   } else {
1096 //      fr_src = mk_LoadRR32toFPR( env, r_src, r_src ); // 2*I32 -> F64
1097 //   }
1098 //   addInstr(env, PPCInstr_FpLdFPSCR( fr_src ));
1099 //}
1100
1101 /* Convert IR rounding mode to PPC encoding */
1102 static HReg roundModeIRtoPPC ( ISelEnv* env, HReg r_rmIR )
1103 {
1104    /*
1105    rounding mode                     | PPC  |  IR
1106    -----------------------------------------------
1107    to nearest, ties to even          | 000  | 000
1108    to zero                           | 001  | 011
1109    to +infinity                      | 010  | 010
1110    to -infinity                      | 011  | 001
1111    +++++ Below are the extended rounding modes for decimal floating point +++++
1112    to nearest, ties away from 0      | 100  | 100
1113    to nearest, ties toward 0         | 101  | 111
1114    to away from 0                    | 110  | 110
1115    to prepare for shorter precision  | 111  | 101
1116    */
1117    HReg r_rmPPC = newVRegI(env);
1118    HReg r_tmp1  = newVRegI(env);
1119    HReg r_tmp2  = newVRegI(env);
1120
1121    vassert(hregClass(r_rmIR) == HRcGPR(env->mode64));
1122
1123    // r_rmPPC = XOR(r_rmIR, r_rmIR << 1) & 3
1124    //
1125    // slwi  tmp1,    r_rmIR, 1
1126    // xor   tmp1,    r_rmIR, tmp1
1127    // andi  r_rmPPC, tmp1, 3
1128
1129    addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1130                                r_tmp1, r_rmIR, PPCRH_Imm(False,1)));
1131
1132    addInstr( env, PPCInstr_Alu( Palu_AND,
1133                                 r_tmp2, r_tmp1, PPCRH_Imm( False, 3 ) ) );
1134
1135    addInstr( env, PPCInstr_Alu( Palu_XOR,
1136                                 r_rmPPC, r_rmIR, PPCRH_Reg( r_tmp2 ) ) );
1137
1138    return r_rmPPC;
1139 }
1140
1141
1142 /* Set the FPU's rounding mode: 'mode' is an I32-typed expression
1143    denoting a value in the range 0 .. 7, indicating a round mode
1144    encoded as per type IRRoundingMode.  Set the PPC FPSCR to have the
1145    same rounding.  When the dfp_rm arg is True, set the decimal
1146    floating point rounding mode bits (29:31); otherwise, set the
1147    binary floating point rounding mode bits (62:63).
1148
1149    For speed & simplicity, we're setting the *entire* FPSCR here.
1150
1151    Setting the rounding mode is expensive.  So this function tries to
1152    avoid repeatedly setting the rounding mode to the same thing by
1153    first comparing 'mode' to the 'mode' tree supplied in the previous
1154    call to this function, if any.  (The previous value is stored in
1155    env->previous_rm.)  If 'mode' is a single IR temporary 't' and
1156    env->previous_rm is also just 't', then the setting is skipped.
1157
1158    This is safe because of the SSA property of IR: an IR temporary can
1159    only be defined once and so will have the same value regardless of
1160    where it appears in the block.  Cool stuff, SSA.
1161
1162    A safety condition: all attempts to set the RM must be aware of
1163    this mechanism - by being routed through the functions here.
1164
1165    Of course this only helps if blocks where the RM is set more than
1166    once and it is set to the same value each time, *and* that value is
1167    held in the same IR temporary each time.  In order to assure the
1168    latter as much as possible, the IR optimiser takes care to do CSE
1169    on any block with any sign of floating point activity.
1170 */
1171 static
1172 void _set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode, Bool dfp_rm,
1173                               IREndness IEndianess )
1174 {
1175    HReg fr_src = newVRegF(env);
1176    HReg r_src;
1177
1178    vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
1179
1180    /* Do we need to do anything? */
1181    if (env->previous_rm
1182        && env->previous_rm->tag == Iex_RdTmp
1183        && mode->tag == Iex_RdTmp
1184        && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
1185       /* no - setting it to what it was before.  */
1186       vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
1187       return;
1188    }
1189
1190    /* No luck - we better set it, and remember what we set it to. */
1191    env->previous_rm = mode;
1192
1193    /* Only supporting the rounding-mode bits - the rest of FPSCR is
1194       0x0 - so we can set the whole register at once (faster). */
1195
1196    // Resolve rounding mode and convert to PPC representation
1197    r_src = roundModeIRtoPPC( env, iselWordExpr_R(env, mode, IEndianess) );
1198
1199    // gpr -> fpr
1200    if (env->mode64) {
1201       if (dfp_rm) {
1202          HReg r_tmp1 = newVRegI( env );
1203          addInstr( env,
1204                    PPCInstr_Shft( Pshft_SHL, False/*64bit shift*/,
1205                                   r_tmp1, r_src, PPCRH_Imm( False, 32 ) ) );
1206          fr_src = mk_LoadR64toFPR( env, r_tmp1 );
1207       } else {
1208          fr_src = mk_LoadR64toFPR( env, r_src ); // 1*I64 -> F64
1209       }
1210    } else {
1211       if (dfp_rm) {
1212          HReg r_zero = newVRegI( env );
1213          addInstr( env, PPCInstr_LI( r_zero, 0, env->mode64 ) );
1214          fr_src = mk_LoadRR32toFPR( env, r_src, r_zero );
1215       } else {
1216          fr_src = mk_LoadRR32toFPR( env, r_src, r_src ); // 2*I32 -> F64
1217       }
1218    }
1219
1220    // Move to FPSCR
1221    addInstr(env, PPCInstr_FpLdFPSCR( fr_src, dfp_rm ));
1222 }
1223
1224 static void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode,
1225                                     IREndness IEndianess )
1226 {
1227    _set_FPU_rounding_mode(env, mode, False, IEndianess);
1228 }
1229
1230 static void set_FPU_DFP_rounding_mode ( ISelEnv* env, IRExpr* mode,
1231                                         IREndness IEndianess )
1232 {
1233    _set_FPU_rounding_mode(env, mode, True, IEndianess);
1234 }
1235
1236 static
1237 Bool FPU_rounding_mode_isOdd (IRExpr* mode) {
1238    /* If the rounding mode is set to odd, the the expr must be a constant U8
1239     * value equal to 8.  Otherwise, it must be a bin op expressiong that
1240     * calculates the value.
1241     */
1242
1243    if (mode->tag != Iex_Const)
1244       return False;
1245
1246    vassert(mode->Iex.Const.con->tag == Ico_U32);
1247    vassert(mode->Iex.Const.con->Ico.U32 == 0x8);
1248    return True;
1249 }
1250
1251 /*---------------------------------------------------------*/
1252 /*--- ISEL: vector helpers                              ---*/
1253 /*---------------------------------------------------------*/
1254
1255 /* Generate all-zeroes into a new vector register.
1256 */
1257 static HReg generate_zeroes_V128 ( ISelEnv* env )
1258 {
1259    HReg dst = newVRegV(env);
1260    addInstr(env, PPCInstr_AvBinary(Pav_XOR, dst, dst, dst));
1261    return dst;
1262 }
1263
1264 /* Generate all-ones into a new vector register.
1265 */
1266 static HReg generate_ones_V128 ( ISelEnv* env )
1267 {
1268    HReg dst = newVRegV(env);
1269    PPCVI5s * src = PPCVI5s_Imm(-1);
1270    addInstr(env, PPCInstr_AvSplat(8, dst, src));
1271    return dst;
1272 }
1273
1274
1275 /*
1276   Generates code for AvSplat
1277   - takes in IRExpr* of type 8|16|32
1278     returns vector reg of duplicated lanes of input
1279   - uses AvSplat(imm) for imms up to simm6.
1280     otherwise must use store reg & load vector
1281 */
1282 static HReg mk_AvDuplicateRI( ISelEnv* env, IRExpr* e, IREndness IEndianess )
1283 {
1284    HReg   r_src;
1285    HReg   dst = newVRegV(env);
1286    PPCRI* ri  = iselWordExpr_RI(env, e, IEndianess);
1287    IRType ty  = typeOfIRExpr(env->type_env,e);
1288    UInt   sz  = (ty == Ity_I8) ? 8 : (ty == Ity_I16) ? 16 : 32;
1289    vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
1290
1291    /* special case: immediate */
1292    if (ri->tag == Pri_Imm) {
1293       Int simm32 = (Int)ri->Pri.Imm;
1294
1295       /* figure out if it's do-able with imm splats. */
1296       if (simm32 >= -32 && simm32 <= 31) {
1297          Char simm6 = (Char)simm32;
1298          if (simm6 > 15) {           /* 16:31 inclusive */
1299             HReg v1 = newVRegV(env);
1300             HReg v2 = newVRegV(env);
1301             addInstr(env, PPCInstr_AvSplat(sz, v1, PPCVI5s_Imm(-16)));
1302             addInstr(env, PPCInstr_AvSplat(sz, v2, PPCVI5s_Imm(simm6-16)));
1303             addInstr(env,
1304                (sz== 8) ? PPCInstr_AvBin8x16(Pav_SUBU, dst, v2, v1) :
1305                (sz==16) ? PPCInstr_AvBin16x8(Pav_SUBU, dst, v2, v1)
1306                         : PPCInstr_AvBin32x4(Pav_SUBU, dst, v2, v1) );
1307             return dst;
1308          }
1309          if (simm6 < -16) {          /* -32:-17 inclusive */
1310             HReg v1 = newVRegV(env);
1311             HReg v2 = newVRegV(env);
1312             addInstr(env, PPCInstr_AvSplat(sz, v1, PPCVI5s_Imm(-16)));
1313             addInstr(env, PPCInstr_AvSplat(sz, v2, PPCVI5s_Imm(simm6+16)));
1314             addInstr(env,
1315                (sz== 8) ? PPCInstr_AvBin8x16(Pav_ADDU, dst, v2, v1) :
1316                (sz==16) ? PPCInstr_AvBin16x8(Pav_ADDU, dst, v2, v1)
1317                         : PPCInstr_AvBin32x4(Pav_ADDU, dst, v2, v1) );
1318             return dst;
1319          }
1320          /* simplest form:              -16:15 inclusive */
1321          addInstr(env, PPCInstr_AvSplat(sz, dst, PPCVI5s_Imm(simm6)));
1322          return dst;
1323       }
1324
1325       /* no luck; use the Slow way. */
1326       r_src = newVRegI(env);
1327       addInstr(env, PPCInstr_LI(r_src, (Long)simm32, env->mode64));
1328    }
1329    else {
1330       r_src = ri->Pri.Reg;
1331    }
1332
1333    {
1334       /* Store r_src multiple times (sz dependent); then load the dest vector. */
1335       HReg r_aligned16;
1336       PPCAMode *am_offset, *am_offset_zero;
1337
1338       sub_from_sp( env, 32 );     // Move SP down
1339       /* Get a 16-aligned address within our stack space */
1340       r_aligned16 = get_sp_aligned16( env );
1341
1342       Int i;
1343       Int stride = (sz == 8) ? 1 : (sz == 16) ? 2 : 4;
1344       UChar num_bytes_to_store = stride;
1345       am_offset_zero = PPCAMode_IR( 0, r_aligned16 );
1346       am_offset = am_offset_zero;
1347       for (i = 0; i < 16; i+=stride, am_offset = PPCAMode_IR( i, r_aligned16)) {
1348          addInstr(env, PPCInstr_Store( num_bytes_to_store, am_offset, r_src, env->mode64 ));
1349       }
1350
1351       /* Effectively splat the r_src value to dst */
1352       addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 16, dst, am_offset_zero ) );
1353       add_to_sp( env, 32 );       // Reset SP
1354
1355       return dst;
1356    }
1357 }
1358
1359
1360 /* for each lane of vSrc: lane == nan ? laneX = all 1's : all 0's */
1361 static HReg isNan ( ISelEnv* env, HReg vSrc, IREndness IEndianess )
1362 {
1363    HReg zeros, msk_exp, msk_mnt, expt, mnts, vIsNan;
1364
1365    vassert(hregClass(vSrc) == HRcVec128);
1366
1367    zeros   = mk_AvDuplicateRI(env, mkU32(0), IEndianess);
1368    msk_exp = mk_AvDuplicateRI(env, mkU32(0x7F800000), IEndianess);
1369    msk_mnt = mk_AvDuplicateRI(env, mkU32(0x7FFFFF), IEndianess);
1370    expt    = newVRegV(env);
1371    mnts    = newVRegV(env);
1372    vIsNan  = newVRegV(env);
1373
1374    /* 32bit float => sign(1) | exponent(8) | mantissa(23)
1375       nan => exponent all ones, mantissa > 0 */
1376
1377    addInstr(env, PPCInstr_AvBinary(Pav_AND, expt, vSrc, msk_exp));
1378    addInstr(env, PPCInstr_AvBin32x4(Pav_CMPEQU, expt, expt, msk_exp));
1379    addInstr(env, PPCInstr_AvBinary(Pav_AND, mnts, vSrc, msk_mnt));
1380    addInstr(env, PPCInstr_AvBin32x4(Pav_CMPGTU, mnts, mnts, zeros));
1381    addInstr(env, PPCInstr_AvBinary(Pav_AND, vIsNan, expt, mnts));
1382    return vIsNan;
1383 }
1384
1385
1386 /*---------------------------------------------------------*/
1387 /*--- ISEL: Integer expressions (64/32/16/8 bit)        ---*/
1388 /*---------------------------------------------------------*/
1389
1390 /* Select insns for an integer-typed expression, and add them to the
1391    code list.  Return a reg holding the result.  This reg will be a
1392    virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
1393    want to modify it, ask for a new vreg, copy it in there, and modify
1394    the copy.  The register allocator will do its best to map both
1395    vregs to the same real register, so the copies will often disappear
1396    later in the game.
1397
1398    This should handle expressions of 64, 32, 16 and 8-bit type.
1399    All results are returned in a (mode64 ? 64bit : 32bit) register.
1400    For 16- and 8-bit expressions, the upper (32/48/56 : 16/24) bits
1401    are arbitrary, so you should mask or sign extend partial values
1402    if necessary.
1403 */
1404
1405 static HReg iselWordExpr_R ( ISelEnv* env, const IRExpr* e,
1406                              IREndness IEndianess )
1407 {
1408    HReg r = iselWordExpr_R_wrk(env, e, IEndianess);
1409    /* sanity checks ... */
1410 #  if 0
1411    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1412 #  endif
1413
1414    vassert(hregClass(r) == HRcGPR(env->mode64));
1415    vassert(hregIsVirtual(r));
1416    return r;
1417 }
1418
1419 /* DO NOT CALL THIS DIRECTLY ! */
1420 static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
1421                                  IREndness IEndianess )
1422 {
1423    Bool mode64 = env->mode64;
1424    MatchInfo mi;
1425    DECLARE_PATTERN(p_32to1_then_1Uto8);
1426
1427    IRType ty = typeOfIRExpr(env->type_env,e);
1428    vassert(ty == Ity_I8 || ty == Ity_I16 ||
1429            ty == Ity_I32 || ((ty == Ity_I64) && mode64));
1430
1431    switch (e->tag) {
1432
1433    /* --------- TEMP --------- */
1434    case Iex_RdTmp:
1435       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1436
1437    /* --------- LOAD --------- */
1438    case Iex_Load: {
1439       HReg      r_dst;
1440       PPCAMode* am_addr;
1441       if (e->Iex.Load.end != IEndianess)
1442          goto irreducible;
1443       r_dst   = newVRegI(env);
1444       am_addr = iselWordExpr_AMode( env, e->Iex.Load.addr, ty/*of xfer*/,
1445                                     IEndianess );
1446       addInstr(env, PPCInstr_Load( toUChar(sizeofIRType(ty)),
1447                                    r_dst, am_addr, mode64 ));
1448       return r_dst;
1449       /*NOTREACHED*/
1450    }
1451
1452    /* --------- BINARY OP --------- */
1453    case Iex_Binop: {
1454       PPCAluOp  aluOp;
1455       PPCShftOp shftOp;
1456
1457       /* Is it an addition or logical style op? */
1458       switch (e->Iex.Binop.op) {
1459       case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
1460          aluOp = Palu_ADD; break;
1461       case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
1462          aluOp = Palu_SUB; break;
1463       case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
1464          aluOp = Palu_AND; break;
1465       case Iop_Or8:  case Iop_Or16:  case Iop_Or32:  case Iop_Or64:
1466          aluOp = Palu_OR; break;
1467       case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
1468          aluOp = Palu_XOR; break;
1469       default:
1470          aluOp = Palu_INVALID; break;
1471       }
1472       /* For commutative ops we assume any literal
1473          values are on the second operand. */
1474       if (aluOp != Palu_INVALID) {
1475          HReg   r_dst   = newVRegI(env);
1476          HReg   r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1477          PPCRH* ri_srcR = NULL;
1478          /* get right arg into an RH, in the appropriate way */
1479          switch (aluOp) {
1480          case Palu_ADD: case Palu_SUB:
1481             ri_srcR = iselWordExpr_RH(env, True/*signed*/,
1482                                       e->Iex.Binop.arg2, IEndianess);
1483             break;
1484          case Palu_AND: case Palu_OR: case Palu_XOR:
1485             ri_srcR = iselWordExpr_RH(env, False/*signed*/,
1486                                       e->Iex.Binop.arg2, IEndianess);
1487             break;
1488          default:
1489             vpanic("iselWordExpr_R_wrk-aluOp-arg2");
1490          }
1491          addInstr(env, PPCInstr_Alu(aluOp, r_dst, r_srcL, ri_srcR));
1492          return r_dst;
1493       }
1494
1495       /* a shift? */
1496       switch (e->Iex.Binop.op) {
1497       case Iop_Shl8: case Iop_Shl16: case Iop_Shl32: case Iop_Shl64:
1498          shftOp = Pshft_SHL; break;
1499       case Iop_Shr8: case Iop_Shr16: case Iop_Shr32: case Iop_Shr64:
1500          shftOp = Pshft_SHR; break;
1501       case Iop_Sar8: case Iop_Sar16: case Iop_Sar32: case Iop_Sar64:
1502          shftOp = Pshft_SAR; break;
1503       default:
1504          shftOp = Pshft_INVALID; break;
1505       }
1506       /* we assume any literal values are on the second operand. */
1507       if (shftOp != Pshft_INVALID) {
1508          HReg   r_dst   = newVRegI(env);
1509          HReg   r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1510          PPCRH* ri_srcR = NULL;
1511          /* get right arg into an RH, in the appropriate way */
1512          switch (shftOp) {
1513          case Pshft_SHL: case Pshft_SHR: case Pshft_SAR:
1514             if (!mode64)
1515                ri_srcR = iselWordExpr_RH5u(env, e->Iex.Binop.arg2, IEndianess);
1516             else
1517                ri_srcR = iselWordExpr_RH6u(env, e->Iex.Binop.arg2, IEndianess);
1518             break;
1519          default:
1520             vpanic("iselIntExpr_R_wrk-shftOp-arg2");
1521          }
1522          /* widen the left arg if needed */
1523          if (shftOp == Pshft_SHR || shftOp == Pshft_SAR) {
1524             if (ty == Ity_I8 || ty == Ity_I16) {
1525                PPCRH* amt = PPCRH_Imm(False,
1526                                       toUShort(ty == Ity_I8 ? 24 : 16));
1527                HReg   tmp = newVRegI(env);
1528                addInstr(env, PPCInstr_Shft(Pshft_SHL,
1529                                            True/*32bit shift*/,
1530                                            tmp, r_srcL, amt));
1531                addInstr(env, PPCInstr_Shft(shftOp,
1532                                            True/*32bit shift*/,
1533                                            tmp, tmp,    amt));
1534                r_srcL = tmp;
1535             }
1536          }
1537          /* Only 64 expressions need 64bit shifts,
1538             32bit shifts are fine for all others */
1539          if (ty == Ity_I64) {
1540             vassert(mode64);
1541             addInstr(env, PPCInstr_Shft(shftOp, False/*64bit shift*/,
1542                                         r_dst, r_srcL, ri_srcR));
1543          } else {
1544             addInstr(env, PPCInstr_Shft(shftOp, True/*32bit shift*/,
1545                                         r_dst, r_srcL, ri_srcR));
1546          }
1547          return r_dst;
1548       }
1549
1550       /* How about a div? */
1551       if (e->Iex.Binop.op == Iop_DivS32 ||
1552           e->Iex.Binop.op == Iop_DivU32 ||
1553           e->Iex.Binop.op == Iop_DivS32E ||
1554           e->Iex.Binop.op == Iop_DivU32E) {
1555          Bool syned  = toBool((e->Iex.Binop.op == Iop_DivS32) || (e->Iex.Binop.op == Iop_DivS32E));
1556          HReg r_dst  = newVRegI(env);
1557          HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1558          HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1559          addInstr( env,
1560                       PPCInstr_Div( ( ( e->Iex.Binop.op == Iop_DivU32E )
1561                                              || ( e->Iex.Binop.op == Iop_DivS32E ) ) ? True
1562                                                                                      : False,
1563                                     syned,
1564                                     True/*32bit div*/,
1565                                     r_dst,
1566                                     r_srcL,
1567                                     r_srcR ) );
1568          return r_dst;
1569       }
1570       if (e->Iex.Binop.op == Iop_DivS64 ||
1571           e->Iex.Binop.op == Iop_DivU64 || e->Iex.Binop.op == Iop_DivS64E
1572           || e->Iex.Binop.op == Iop_DivU64E ) {
1573          Bool syned  = toBool((e->Iex.Binop.op == Iop_DivS64) ||(e->Iex.Binop.op == Iop_DivS64E));
1574          HReg r_dst  = newVRegI(env);
1575          HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1576          HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1577          vassert(mode64);
1578          addInstr( env,
1579                       PPCInstr_Div( ( ( e->Iex.Binop.op == Iop_DivS64E )
1580                                              || ( e->Iex.Binop.op
1581                                                       == Iop_DivU64E ) ) ? True
1582                                                                          : False,
1583                                     syned,
1584                                     False/*64bit div*/,
1585                                     r_dst,
1586                                     r_srcL,
1587                                     r_srcR ) );
1588          return r_dst;
1589       }
1590
1591       /* No? Anyone for a mul? */
1592       if (e->Iex.Binop.op == Iop_Mul32
1593           || e->Iex.Binop.op == Iop_Mul64) {
1594          Bool syned       = False;
1595          Bool sz32        = (e->Iex.Binop.op != Iop_Mul64);
1596          HReg r_dst       = newVRegI(env);
1597          HReg r_srcL      = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1598          HReg r_srcR      = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1599          addInstr(env, PPCInstr_MulL(syned, False/*lo32*/, sz32,
1600                                      r_dst, r_srcL, r_srcR));
1601          return r_dst;
1602       }
1603
1604       /* 32 x 32 -> 64 multiply */
1605       if (mode64
1606           && (e->Iex.Binop.op == Iop_MullU32
1607               || e->Iex.Binop.op == Iop_MullS32)) {
1608          HReg tLo    = newVRegI(env);
1609          HReg tHi    = newVRegI(env);
1610          HReg r_dst  = newVRegI(env);
1611          Bool syned  = toBool(e->Iex.Binop.op == Iop_MullS32);
1612          HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1613          HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1614          addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/,
1615                                      False/*lo32*/, True/*32bit mul*/,
1616                                      tLo, r_srcL, r_srcR));
1617          addInstr(env, PPCInstr_MulL(syned,
1618                                      True/*hi32*/, True/*32bit mul*/,
1619                                      tHi, r_srcL, r_srcR));
1620          addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1621                                      r_dst, tHi, PPCRH_Imm(False,32)));
1622          addInstr(env, PPCInstr_Alu(Palu_OR,
1623                                     r_dst, r_dst, PPCRH_Reg(tLo)));
1624          return r_dst;
1625       }
1626
1627       /* El-mutanto 3-way compare? */
1628       if (e->Iex.Binop.op == Iop_CmpORD32S
1629           || e->Iex.Binop.op == Iop_CmpORD32U) {
1630          Bool   syned = toBool(e->Iex.Binop.op == Iop_CmpORD32S);
1631          HReg   dst   = newVRegI(env);
1632          HReg   srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1633          PPCRH* srcR  = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2,
1634                                         IEndianess);
1635          addInstr(env, PPCInstr_Cmp(syned, True/*32bit cmp*/,
1636                                     7/*cr*/, srcL, srcR));
1637          addInstr(env, PPCInstr_MfCR(dst));
1638          addInstr(env, PPCInstr_Alu(Palu_AND, dst, dst,
1639                                     PPCRH_Imm(False,7<<1)));
1640          return dst;
1641       }
1642
1643       if (e->Iex.Binop.op == Iop_CmpORD64S
1644           || e->Iex.Binop.op == Iop_CmpORD64U) {
1645          Bool   syned = toBool(e->Iex.Binop.op == Iop_CmpORD64S);
1646          HReg   dst   = newVRegI(env);
1647          HReg   srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1648          PPCRH* srcR  = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2,
1649                                         IEndianess);
1650          vassert(mode64);
1651          addInstr(env, PPCInstr_Cmp(syned, False/*64bit cmp*/,
1652                                     7/*cr*/, srcL, srcR));
1653          addInstr(env, PPCInstr_MfCR(dst));
1654          addInstr(env, PPCInstr_Alu(Palu_AND, dst, dst,
1655                                     PPCRH_Imm(False,7<<1)));
1656          return dst;
1657       }
1658
1659       if (e->Iex.Binop.op == Iop_Max32U) {
1660          HReg        r1   = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1661          HReg        r2   = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1662          HReg        rdst = newVRegI(env);
1663          PPCCondCode cc   = mk_PPCCondCode( Pct_TRUE, Pcf_7LT );
1664          addInstr(env, mk_iMOVds_RR(rdst, r1));
1665          addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
1666                                     7/*cr*/, rdst, PPCRH_Reg(r2)));
1667          addInstr(env, PPCInstr_CMov(cc, rdst, PPCRI_Reg(r2)));
1668          return rdst;
1669       }
1670
1671       if (e->Iex.Binop.op == Iop_32HLto64) {
1672          HReg   r_Hi  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1673          HReg   r_Lo  = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1674          HReg   r_Tmp = newVRegI(env);
1675          HReg   r_dst = newVRegI(env);
1676          HReg   msk   = newVRegI(env);
1677          vassert(mode64);
1678          /* r_dst = OR( r_Hi<<32, r_Lo ) */
1679          addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1680                                      r_dst, r_Hi, PPCRH_Imm(False,32)));
1681          addInstr(env, PPCInstr_LI(msk, 0xFFFFFFFF, mode64));
1682          addInstr(env, PPCInstr_Alu( Palu_AND, r_Tmp, r_Lo,
1683                                      PPCRH_Reg(msk) ));
1684          addInstr(env, PPCInstr_Alu( Palu_OR, r_dst, r_dst,
1685                                      PPCRH_Reg(r_Tmp) ));
1686          return r_dst;
1687       }
1688
1689       if ((e->Iex.Binop.op == Iop_CmpF64) ||
1690           (e->Iex.Binop.op == Iop_CmpD64) ||
1691           (e->Iex.Binop.op == Iop_CmpD128)) {
1692          HReg fr_srcL;
1693          HReg fr_srcL_lo;
1694          HReg fr_srcR;
1695          HReg fr_srcR_lo;
1696
1697          HReg r_ccPPC   = newVRegI(env);
1698          HReg r_ccIR    = newVRegI(env);
1699          HReg r_ccIR_b0 = newVRegI(env);
1700          HReg r_ccIR_b2 = newVRegI(env);
1701          HReg r_ccIR_b6 = newVRegI(env);
1702
1703          if (e->Iex.Binop.op == Iop_CmpF64) {
1704             fr_srcL = iselDblExpr(env, e->Iex.Binop.arg1, IEndianess);
1705             fr_srcR = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
1706             addInstr(env, PPCInstr_FpCmp(r_ccPPC, fr_srcL, fr_srcR));
1707
1708          } else if (e->Iex.Binop.op == Iop_CmpD64) {
1709             fr_srcL = iselDfp64Expr(env, e->Iex.Binop.arg1, IEndianess);
1710             fr_srcR = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
1711             addInstr(env, PPCInstr_Dfp64Cmp(r_ccPPC, fr_srcL, fr_srcR));
1712
1713          } else {    //  e->Iex.Binop.op == Iop_CmpD128
1714             iselDfp128Expr(&fr_srcL, &fr_srcL_lo, env, e->Iex.Binop.arg1,
1715                            IEndianess);
1716             iselDfp128Expr(&fr_srcR, &fr_srcR_lo, env, e->Iex.Binop.arg2,
1717                            IEndianess);
1718             addInstr(env, PPCInstr_Dfp128Cmp(r_ccPPC, fr_srcL, fr_srcL_lo,
1719                                              fr_srcR, fr_srcR_lo));
1720          }
1721
1722          /* Map compare result from PPC to IR,
1723             conforming to CmpF64 definition. */
1724          /*
1725            FP cmp result | PPC | IR
1726            --------------------------
1727            UN            | 0x1 | 0x45
1728            EQ            | 0x2 | 0x40
1729            GT            | 0x4 | 0x00
1730            LT            | 0x8 | 0x01
1731          */
1732
1733          // r_ccIR_b0 = r_ccPPC[0] | r_ccPPC[3]
1734          addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32bit shift*/,
1735                                      r_ccIR_b0, r_ccPPC,
1736                                      PPCRH_Imm(False,0x3)));
1737          addInstr(env, PPCInstr_Alu(Palu_OR,  r_ccIR_b0,
1738                                     r_ccPPC,   PPCRH_Reg(r_ccIR_b0)));
1739          addInstr(env, PPCInstr_Alu(Palu_AND, r_ccIR_b0,
1740                                     r_ccIR_b0, PPCRH_Imm(False,0x1)));
1741
1742          // r_ccIR_b2 = r_ccPPC[0]
1743          addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1744                                      r_ccIR_b2, r_ccPPC,
1745                                      PPCRH_Imm(False,0x2)));
1746          addInstr(env, PPCInstr_Alu(Palu_AND, r_ccIR_b2,
1747                                     r_ccIR_b2, PPCRH_Imm(False,0x4)));
1748
1749          // r_ccIR_b6 = r_ccPPC[0] | r_ccPPC[1]
1750          addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32bit shift*/,
1751                                      r_ccIR_b6, r_ccPPC,
1752                                      PPCRH_Imm(False,0x1)));
1753          addInstr(env, PPCInstr_Alu(Palu_OR,  r_ccIR_b6,
1754                                     r_ccPPC, PPCRH_Reg(r_ccIR_b6)));
1755          addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1756                                      r_ccIR_b6, r_ccIR_b6,
1757                                      PPCRH_Imm(False,0x6)));
1758          addInstr(env, PPCInstr_Alu(Palu_AND, r_ccIR_b6,
1759                                     r_ccIR_b6, PPCRH_Imm(False,0x40)));
1760
1761          // r_ccIR = r_ccIR_b0 | r_ccIR_b2 | r_ccIR_b6
1762          addInstr(env, PPCInstr_Alu(Palu_OR, r_ccIR,
1763                                     r_ccIR_b0, PPCRH_Reg(r_ccIR_b2)));
1764          addInstr(env, PPCInstr_Alu(Palu_OR, r_ccIR,
1765                                     r_ccIR,    PPCRH_Reg(r_ccIR_b6)));
1766          return r_ccIR;
1767       }
1768
1769       if ( e->Iex.Binop.op == Iop_F64toI32S ||
1770                e->Iex.Binop.op == Iop_F64toI32U ) {
1771          /* This works in both mode64 and mode32. */
1772          HReg      r1      = StackFramePtr(env->mode64);
1773          PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
1774          HReg      fsrc    = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
1775          HReg      ftmp    = newVRegF(env);
1776          HReg      idst    = newVRegI(env);
1777
1778          /* Set host rounding mode */
1779          set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
1780
1781          sub_from_sp( env, 16 );
1782          addInstr(env, PPCInstr_FpCftI(False/*F->I*/, True/*int32*/,
1783                                        e->Iex.Binop.op == Iop_F64toI32S ? True/*syned*/
1784                                                                      : False,
1785                                        True/*flt64*/,
1786                                        ftmp, fsrc));
1787          addInstr(env, PPCInstr_FpSTFIW(r1, ftmp));
1788          addInstr(env, PPCInstr_Load(4, idst, zero_r1, mode64));
1789
1790          /* in 64-bit mode we need to sign-widen idst. */
1791          if (mode64)
1792             addInstr(env, PPCInstr_Unary(Pun_EXTSW, idst, idst));
1793
1794          add_to_sp( env, 16 );
1795
1796          ///* Restore default FPU rounding. */
1797          //set_FPU_rounding_default( env );
1798          return idst;
1799       }
1800
1801       if (e->Iex.Binop.op == Iop_F64toI64S || e->Iex.Binop.op == Iop_F64toI64U ) {
1802          if (mode64) {
1803             HReg      r1      = StackFramePtr(env->mode64);
1804             PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
1805             HReg      fsrc    = iselDblExpr(env, e->Iex.Binop.arg2,
1806                                             IEndianess);
1807             HReg      idst    = newVRegI(env);
1808             HReg      ftmp    = newVRegF(env);
1809
1810             /* Set host rounding mode */
1811             set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
1812
1813             sub_from_sp( env, 16 );
1814             addInstr(env, PPCInstr_FpCftI(False/*F->I*/, False/*int64*/,
1815                                           ( e->Iex.Binop.op == Iop_F64toI64S ) ? True
1816                                                                             : False,
1817                                           True, ftmp, fsrc));
1818             addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
1819             addInstr(env, PPCInstr_Load(8, idst, zero_r1, True/*mode64*/));
1820             add_to_sp( env, 16 );
1821
1822             ///* Restore default FPU rounding. */
1823             //set_FPU_rounding_default( env );
1824             return idst;
1825          }
1826       }
1827
1828       if (e->Iex.Binop.op == Iop_D64toI64S ) {
1829          HReg      r1      = StackFramePtr(env->mode64);
1830          PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
1831          HReg      fr_src  = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
1832          HReg      idst    = newVRegI(env);
1833          HReg      ftmp    = newVRegF(env);
1834
1835          /* Set host rounding mode */
1836          set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
1837          addInstr(env, PPCInstr_Dfp64Unary(Pfp_DCTFIX, ftmp, fr_src));
1838          sub_from_sp( env, 16 );
1839          addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
1840          addInstr(env, PPCInstr_Load(8, idst, zero_r1, mode64));
1841
1842          add_to_sp( env, 16 );
1843
1844          ///* Restore default FPU rounding. */
1845          //set_FPU_rounding_default( env );
1846          return idst;
1847       }
1848
1849       if (e->Iex.Binop.op == Iop_D128toI64S ) {
1850          PPCFpOp fpop = Pfp_DCTFIXQ;
1851          HReg r_srcHi = newVRegF(env);
1852          HReg r_srcLo = newVRegF(env);
1853          HReg idst    = newVRegI(env);
1854          HReg ftmp    = newVRegF(env);
1855          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
1856
1857          set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
1858          iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
1859                         IEndianess);
1860          addInstr(env, PPCInstr_DfpD128toD64(fpop, ftmp, r_srcHi, r_srcLo));
1861
1862          // put the D64 result into an integer register
1863          sub_from_sp( env, 16 );
1864          addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
1865          addInstr(env, PPCInstr_Load(8, idst, zero_r1, True/*mode64*/));
1866          add_to_sp( env, 16 );
1867          return idst;
1868       }
1869       break;
1870    }
1871
1872    /* --------- UNARY OP --------- */
1873    case Iex_Unop: {
1874       IROp op_unop = e->Iex.Unop.op;
1875
1876       /* 1Uto8(32to1(expr32)) */
1877       DEFINE_PATTERN(p_32to1_then_1Uto8,
1878                      unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1879       if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1880          const IRExpr* expr32 = mi.bindee[0];
1881          HReg r_dst = newVRegI(env);
1882          HReg r_src = iselWordExpr_R(env, expr32, IEndianess);
1883          addInstr(env, PPCInstr_Alu(Palu_AND, r_dst,
1884                                     r_src, PPCRH_Imm(False,1)));
1885          return r_dst;
1886       }
1887
1888       /* 16Uto32(LDbe:I16(expr32)) */
1889       {
1890          DECLARE_PATTERN(p_LDbe16_then_16Uto32);
1891          DEFINE_PATTERN(p_LDbe16_then_16Uto32,
1892                         unop(Iop_16Uto32,
1893                              IRExpr_Load(IEndianess,Ity_I16,bind(0))) );
1894          if (matchIRExpr(&mi,p_LDbe16_then_16Uto32,e)) {
1895             HReg r_dst = newVRegI(env);
1896             PPCAMode* amode
1897                = iselWordExpr_AMode( env, mi.bindee[0], Ity_I16/*xfer*/,
1898                                      IEndianess );
1899             addInstr(env, PPCInstr_Load(2,r_dst,amode, mode64));
1900             return r_dst;
1901          }
1902       }
1903
1904       switch (op_unop) {
1905       case Iop_8Uto16:
1906       case Iop_8Uto32:
1907       case Iop_8Uto64:
1908       case Iop_16Uto32:
1909       case Iop_16Uto64: {
1910          HReg   r_dst = newVRegI(env);
1911          HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1912          UShort mask  = toUShort(op_unop==Iop_16Uto64 ? 0xFFFF :
1913                                  op_unop==Iop_16Uto32 ? 0xFFFF : 0xFF);
1914          addInstr(env, PPCInstr_Alu(Palu_AND,r_dst,r_src,
1915                                     PPCRH_Imm(False,mask)));
1916          return r_dst;
1917       }
1918       case Iop_32Uto64: {
1919          HReg r_dst = newVRegI(env);
1920          HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1921          vassert(mode64);
1922          addInstr(env,
1923                   PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1924                                 r_dst, r_src, PPCRH_Imm(False,32)));
1925          addInstr(env,
1926                   PPCInstr_Shft(Pshft_SHR, False/*64bit shift*/,
1927                                 r_dst, r_dst, PPCRH_Imm(False,32)));
1928          return r_dst;
1929       }
1930       case Iop_8Sto16:
1931       case Iop_8Sto32:
1932       case Iop_16Sto32: {
1933          HReg   r_dst = newVRegI(env);
1934          HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1935          UShort amt   = toUShort(op_unop==Iop_16Sto32 ? 16 : 24);
1936          addInstr(env,
1937                   PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1938                                 r_dst, r_src, PPCRH_Imm(False,amt)));
1939          addInstr(env,
1940                   PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
1941                                 r_dst, r_dst, PPCRH_Imm(False,amt)));
1942          return r_dst;
1943       }
1944       case Iop_8Sto64:
1945       case Iop_16Sto64: {
1946          HReg   r_dst = newVRegI(env);
1947          HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1948          UShort amt   = toUShort(op_unop==Iop_8Sto64  ? 56 : 48);
1949          vassert(mode64);
1950          addInstr(env,
1951                   PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1952                                 r_dst, r_src, PPCRH_Imm(False,amt)));
1953          addInstr(env,
1954                   PPCInstr_Shft(Pshft_SAR, False/*64bit shift*/,
1955                                 r_dst, r_dst, PPCRH_Imm(False,amt)));
1956          return r_dst;
1957       }
1958       case Iop_32Sto64: {
1959          HReg   r_dst = newVRegI(env);
1960          HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1961          vassert(mode64);
1962          /* According to the IBM docs, in 64 bit mode, srawi r,r,0
1963             sign extends the lower 32 bits into the upper 32 bits. */
1964          addInstr(env,
1965                   PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
1966                                 r_dst, r_src, PPCRH_Imm(False,0)));
1967          return r_dst;
1968       }
1969       case Iop_Not8:
1970       case Iop_Not16:
1971       case Iop_Not32:
1972       case Iop_Not64: {
1973          if (op_unop == Iop_Not64) vassert(mode64);
1974          HReg r_dst = newVRegI(env);
1975          HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1976          addInstr(env, PPCInstr_Unary(Pun_NOT,r_dst,r_src));
1977          return r_dst;
1978       }
1979       case Iop_64HIto32: {
1980          if (!mode64) {
1981             HReg rHi, rLo;
1982             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
1983             return rHi; /* and abandon rLo .. poor wee thing :-) */
1984          } else {
1985             HReg   r_dst = newVRegI(env);
1986             HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1987             addInstr(env,
1988                      PPCInstr_Shft(Pshft_SHR, False/*64bit shift*/,
1989                                    r_dst, r_src, PPCRH_Imm(False,32)));
1990             return r_dst;
1991          }
1992       }
1993       case Iop_64to32: {
1994          if (!mode64) {
1995             HReg rHi, rLo;
1996             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
1997             return rLo; /* similar stupid comment to the above ... */
1998          } else {
1999             /* This is a no-op. */
2000             return iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2001          }
2002       }
2003       case Iop_64to16: {
2004          if (mode64) { /* This is a no-op. */
2005             return iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2006          }
2007          break; /* evidently not used in 32-bit mode */
2008       }
2009       case Iop_16HIto8:
2010       case Iop_32HIto16: {
2011          HReg   r_dst = newVRegI(env);
2012          HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2013          UShort shift = toUShort(op_unop == Iop_16HIto8 ? 8 : 16);
2014          addInstr(env,
2015                   PPCInstr_Shft(Pshft_SHR, True/*32bit shift*/,
2016                                 r_dst, r_src, PPCRH_Imm(False,shift)));
2017          return r_dst;
2018       }
2019       case Iop_128HIto64:
2020          if (mode64) {
2021             HReg rHi, rLo;
2022             iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
2023             return rHi; /* and abandon rLo .. poor wee thing :-) */
2024          }
2025          break;
2026       case Iop_128to64:
2027          if (mode64) {
2028             HReg rHi, rLo;
2029             iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
2030             return rLo; /* similar stupid comment to the above ... */
2031          }
2032          break;
2033       case Iop_1Uto64:
2034       case Iop_1Uto32:
2035       case Iop_1Uto8:
2036          if ((op_unop != Iop_1Uto64) || mode64) {
2037             HReg        r_dst = newVRegI(env);
2038             PPCCondCode cond  = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
2039             addInstr(env, PPCInstr_Set(cond,r_dst));
2040             return r_dst;
2041          }
2042          break;
2043       case Iop_1Sto8:
2044       case Iop_1Sto16:
2045       case Iop_1Sto32: {
2046          /* could do better than this, but for now ... */
2047          HReg        r_dst = newVRegI(env);
2048          PPCCondCode cond  = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
2049          addInstr(env, PPCInstr_Set(cond,r_dst));
2050          addInstr(env,
2051                   PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
2052                                 r_dst, r_dst, PPCRH_Imm(False,31)));
2053          addInstr(env,
2054                   PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
2055                                 r_dst, r_dst, PPCRH_Imm(False,31)));
2056          return r_dst;
2057       }
2058       case Iop_1Sto64:
2059          if (mode64) {
2060             /* could do better than this, but for now ... */
2061             HReg        r_dst = newVRegI(env);
2062             PPCCondCode cond  = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
2063             addInstr(env, PPCInstr_Set(cond,r_dst));
2064             addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
2065                                         r_dst, r_dst, PPCRH_Imm(False,63)));
2066             addInstr(env, PPCInstr_Shft(Pshft_SAR, False/*64bit shift*/,
2067                                         r_dst, r_dst, PPCRH_Imm(False,63)));
2068             return r_dst;
2069          }
2070          break;
2071
2072       case Iop_Clz32: case Iop_ClzNat32:
2073       case Iop_Clz64: case Iop_ClzNat64: {
2074          // cntlz is available even in the most basic (earliest) ppc
2075          // variants, so it's safe to generate it unconditionally.
2076          HReg r_src, r_dst;
2077          PPCUnaryOp op_clz = (op_unop == Iop_Clz32 || op_unop == Iop_ClzNat32)
2078                                 ? Pun_CLZ32 : Pun_CLZ64;
2079          if ((op_unop == Iop_Clz64 || op_unop == Iop_ClzNat64) && !mode64)
2080             goto irreducible;
2081          /* Count leading zeroes. */
2082          r_dst = newVRegI(env);
2083          r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2084          addInstr(env, PPCInstr_Unary(op_clz,r_dst,r_src));
2085          return r_dst;
2086       }
2087
2088       //case Iop_Ctz32:
2089       case Iop_CtzNat32:
2090       //case Iop_Ctz64:
2091       case Iop_CtzNat64:
2092       {
2093          // Generate code using Clz, because we can't assume the host has
2094          // Ctz.  In particular, part of the fix for bug 386945 involves
2095          // creating a Ctz in ir_opt.c from smaller fragments.
2096          PPCUnaryOp op_clz = Pun_CLZ64;
2097          Int WS = 64;
2098          if (op_unop == Iop_Ctz32 || op_unop == Iop_CtzNat32) {
2099             op_clz = Pun_CLZ32;
2100             WS = 32;
2101          }
2102          /* Compute ctz(arg) = wordsize - clz(~arg & (arg - 1)), thusly:
2103             t1 = arg - 1
2104             t2 = not arg
2105             t2 = t2 & t1
2106             t2 = clz t2
2107             t1 = WS
2108             t2 = t1 - t2
2109             // result in t2
2110          */
2111          HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2112          HReg t1 = newVRegI(env);
2113          HReg t2 = newVRegI(env);
2114          addInstr(env, PPCInstr_Alu(Palu_SUB, t1, arg, PPCRH_Imm(True, 1)));
2115          addInstr(env, PPCInstr_Unary(Pun_NOT, t2, arg));
2116          addInstr(env, PPCInstr_Alu(Palu_AND, t2, t2, PPCRH_Reg(t1)));
2117          addInstr(env, PPCInstr_Unary(op_clz, t2, t2));
2118          addInstr(env, PPCInstr_LI(t1, WS, False/*!64-bit imm*/));
2119          addInstr(env, PPCInstr_Alu(Palu_SUB, t2, t1, PPCRH_Reg(t2)));
2120          return t2;
2121       }
2122
2123       case Iop_PopCount64: {
2124          // popcnt{x,d} is only available in later arch revs (ISA 3.0,
2125          // maybe) so it's not really correct to emit it here without a caps
2126          // check for the host.
2127          if (mode64) {
2128             HReg r_dst = newVRegI(env);
2129             HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2130             addInstr(env, PPCInstr_Unary(Pun_POP64, r_dst, r_src));
2131             return r_dst;
2132          }
2133          // We don't expect to be required to handle this in 32-bit mode.
2134          break;
2135       }
2136
2137       case Iop_PopCount32: {
2138          // Similar comment as for Ctz just above applies -- we really
2139          // should have a caps check here.
2140
2141         HReg r_dst = newVRegI(env);
2142         // This actually generates popcntw, which in 64 bit mode does a
2143         // 32-bit count individually for both low and high halves of the
2144         // word.  Per the comment at the top of iselIntExpr_R, in the 64
2145         // bit mode case, the user of this result is required to ignore
2146         // the upper 32 bits of the result.  In 32 bit mode this is all
2147         // moot.  It is however unclear from the PowerISA 3.0 docs that
2148         // the instruction exists in 32 bit mode; however our own front
2149         // end (guest_ppc_toIR.c) accepts it, so I guess it does exist.
2150         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2151         addInstr(env, PPCInstr_Unary(Pun_POP32, r_dst, r_src));
2152         return r_dst;
2153       }
2154
2155       case Iop_Reverse8sIn32_x1: {
2156          // A bit of a mouthful, but simply .. 32-bit byte swap.
2157          // This is pretty rubbish code.  We could do vastly better if
2158          // rotates, and better, rotate-inserts, were allowed.  Note that
2159          // even on a 64 bit target, the right shifts must be done as 32-bit
2160          // so as to introduce zero bits in the right places.  So it seems
2161          // simplest to do the whole sequence in 32-bit insns.
2162          /*
2163             r     = <argument>  // working temporary, initial byte order ABCD
2164             Mask  = 00FF00FF
2165             nMask = not Mask
2166             tHi   = and r, Mask
2167             tHi   = shl tHi, 8
2168             tLo   = and r, nMask
2169             tLo   = shr tLo, 8
2170             r     = or tHi, tLo  // now r has order BADC
2171             and repeat for 16 bit chunks ..
2172             Mask  = 0000FFFF
2173             nMask = not Mask
2174             tHi   = and r, Mask
2175             tHi   = shl tHi, 16
2176             tLo   = and r, nMask
2177             tLo   = shr tLo, 16
2178             r     = or tHi, tLo  // now r has order DCBA
2179          */
2180          HReg r_src  = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2181          HReg rr     = newVRegI(env);
2182          HReg rMask  = newVRegI(env);
2183          HReg rnMask = newVRegI(env);
2184          HReg rtHi   = newVRegI(env);
2185          HReg rtLo   = newVRegI(env);
2186          // Copy r_src since we need to modify it
2187          addInstr(env, mk_iMOVds_RR(rr, r_src));
2188          // Swap within 16-bit lanes
2189          addInstr(env, PPCInstr_LI(rMask, 0x00FF00FFULL,
2190                                    False/* !64bit imm*/));
2191          addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
2192          addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
2193          addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32 bit shift*/,
2194                                      rtHi, rtHi,
2195                                      PPCRH_Imm(False/*!signed imm*/, 8)));
2196          addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
2197          addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32 bit shift*/,
2198                                      rtLo, rtLo,
2199                                      PPCRH_Imm(False/*!signed imm*/, 8)));
2200          addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
2201          // And now swap the two 16-bit chunks
2202          addInstr(env, PPCInstr_LI(rMask, 0x0000FFFFULL,
2203                                    False/* !64bit imm*/));
2204          addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
2205          addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
2206          addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32 bit shift*/,
2207                                      rtHi, rtHi,
2208                                      PPCRH_Imm(False/*!signed imm*/, 16)));
2209          addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
2210          addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32 bit shift*/,
2211                                      rtLo, rtLo,
2212                                      PPCRH_Imm(False/*!signed imm*/, 16)));
2213          addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
2214          return rr;
2215       }
2216
2217       case Iop_Reverse8sIn64_x1: {
2218          /* See Iop_Reverse8sIn32_x1, but extended to 64bit.
2219             Can only be used in 64bit mode.  */
2220          vassert (mode64);
2221
2222          HReg r_src  = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2223          HReg rr     = newVRegI(env);
2224          HReg rMask  = newVRegI(env);
2225          HReg rnMask = newVRegI(env);
2226          HReg rtHi   = newVRegI(env);
2227          HReg rtLo   = newVRegI(env);
2228
2229          // Copy r_src since we need to modify it
2230          addInstr(env, mk_iMOVds_RR(rr, r_src));
2231
2232          // r = (r & 0x00FF00FF00FF00FF) << 8 | (r & 0xFF00FF00FF00FF00) >> 8
2233          addInstr(env, PPCInstr_LI(rMask, 0x00FF00FF00FF00FFULL,
2234                                    True/* 64bit imm*/));
2235          addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
2236          addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
2237          addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
2238                                      rtHi, rtHi,
2239                                      PPCRH_Imm(False/*!signed imm*/, 8)));
2240          addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
2241          addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
2242                                      rtLo, rtLo,
2243                                      PPCRH_Imm(False/*!signed imm*/, 8)));
2244          addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
2245
2246          // r = (r & 0x0000FFFF0000FFFF) << 16 | (r & 0xFFFF0000FFFF0000) >> 16
2247          addInstr(env, PPCInstr_LI(rMask, 0x0000FFFF0000FFFFULL,
2248                                    True/* !64bit imm*/));
2249          addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
2250          addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
2251          addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
2252                                      rtHi, rtHi,
2253                                      PPCRH_Imm(False/*!signed imm*/, 16)));
2254          addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
2255          addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
2256                                      rtLo, rtLo,
2257                                      PPCRH_Imm(False/*!signed imm*/, 16)));
2258          addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
2259
2260          // r = (r & 0x00000000FFFFFFFF) << 32 | (r & 0xFFFFFFFF00000000) >> 32
2261          /* We don't need to mask anymore, just two more shifts and an or.  */
2262          addInstr(env, mk_iMOVds_RR(rtLo, rr));
2263          addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
2264                                      rtLo, rtLo,
2265                                      PPCRH_Imm(False/*!signed imm*/, 32)));
2266          addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
2267                                      rr, rr,
2268                                      PPCRH_Imm(False/*!signed imm*/, 32)));
2269          addInstr(env, PPCInstr_Alu(Palu_OR, rr, rr, PPCRH_Reg(rtLo)));
2270
2271          return rr;
2272       }
2273
2274       case Iop_Left8:
2275       case Iop_Left16:
2276       case Iop_Left32:
2277       case Iop_Left64: {
2278          HReg r_src, r_dst;
2279          if (op_unop == Iop_Left64 && !mode64)
2280             goto irreducible;
2281          r_dst = newVRegI(env);
2282          r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2283          addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
2284          addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
2285          return r_dst;
2286       }
2287
2288       case Iop_CmpwNEZ32: {
2289          HReg r_dst = newVRegI(env);
2290          HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2291          addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
2292          addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
2293          addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
2294                                      r_dst, r_dst, PPCRH_Imm(False, 31)));
2295          return r_dst;
2296       }
2297
2298       case Iop_CmpwNEZ64: {
2299          HReg r_dst = newVRegI(env);
2300          HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2301          if (!mode64) goto irreducible;
2302          addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
2303          addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
2304          addInstr(env, PPCInstr_Shft(Pshft_SAR, False/*64bit shift*/,
2305                                      r_dst, r_dst, PPCRH_Imm(False, 63)));
2306          return r_dst;
2307       }
2308
2309       case Iop_V128to32: {
2310          HReg        r_aligned16;
2311          HReg        dst  = newVRegI(env);
2312          HReg        vec  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
2313          PPCAMode *am_off0, *am_off_word0;
2314          sub_from_sp( env, 32 );     // Move SP down 32 bytes
2315
2316          // get a quadword aligned address within our stack space
2317          r_aligned16 = get_sp_aligned16( env );
2318          am_off0  = PPCAMode_IR( 0, r_aligned16 );
2319
2320          /* Note that the store below (done via PPCInstr_AvLdSt) uses
2321           * stvx, which stores the vector in proper LE format,
2322           * with byte zero (far right byte of the register in LE format)
2323           * stored at the lowest memory address.  Therefore, to obtain
2324           * integer word zero, we need to use that lowest memory address
2325           * as the base for the load.
2326           */
2327          if (IEndianess == Iend_LE)
2328             am_off_word0 = am_off0;
2329          else
2330             am_off_word0 = PPCAMode_IR( 12,r_aligned16 );
2331
2332          // store vec, load low word to dst
2333          addInstr(env,
2334                   PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
2335          addInstr(env,
2336                   PPCInstr_Load( 4, dst, am_off_word0, mode64 ));
2337
2338          add_to_sp( env, 32 );       // Reset SP
2339          return dst;
2340       }
2341
2342       case Iop_V128to64:
2343       case Iop_V128HIto64:
2344          if (mode64) {
2345             HReg     r_aligned16;
2346             HReg     dst = newVRegI(env);
2347             HReg     vec = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
2348             PPCAMode *am_off0, *am_off8, *am_off_arg;
2349             sub_from_sp( env, 32 );     // Move SP down 32 bytes
2350
2351             // get a quadword aligned address within our stack space
2352             r_aligned16 = get_sp_aligned16( env );
2353             am_off0 = PPCAMode_IR( 0, r_aligned16 );
2354             am_off8 = PPCAMode_IR( 8 ,r_aligned16 );
2355
2356             // store vec, load low word or high to dst
2357             addInstr(env,
2358                      PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
2359             if (IEndianess == Iend_LE) {
2360                if (op_unop == Iop_V128HIto64)
2361                   am_off_arg = am_off8;
2362                else
2363                   am_off_arg = am_off0;
2364             } else {
2365                if (op_unop == Iop_V128HIto64)
2366                   am_off_arg = am_off0;
2367                else
2368                   am_off_arg = am_off8;
2369             }
2370             addInstr(env,
2371                      PPCInstr_Load(
2372                         8, dst,
2373                         am_off_arg,
2374                         mode64 ));
2375
2376             add_to_sp( env, 32 );       // Reset SP
2377             return dst;
2378          }
2379          break;
2380       case Iop_16to8:
2381       case Iop_32to8:
2382       case Iop_32to16:
2383       case Iop_64to8:
2384          /* These are no-ops. */
2385          return iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2386
2387       /* ReinterpF64asI64(e) */
2388       /* Given an IEEE754 double, produce an I64 with the same bit
2389          pattern. */
2390       case Iop_ReinterpF64asI64:
2391          if (mode64) {
2392             PPCAMode *am_addr;
2393             HReg fr_src = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
2394             HReg r_dst  = newVRegI(env);
2395
2396             sub_from_sp( env, 16 );     // Move SP down 16 bytes
2397             am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
2398
2399             // store as F64
2400             addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
2401                                            fr_src, am_addr ));
2402             // load as Ity_I64
2403             addInstr(env, PPCInstr_Load( 8, r_dst, am_addr, mode64 ));
2404
2405             add_to_sp( env, 16 );       // Reset SP
2406             return r_dst;
2407          }
2408          break;
2409
2410       /* ReinterpF32asI32(e) */
2411       /* Given an IEEE754 float, produce an I32 with the same bit
2412          pattern. */
2413       case Iop_ReinterpF32asI32: {
2414          /* I believe this generates correct code for both 32- and
2415             64-bit hosts. */
2416          PPCAMode *am_addr;
2417          HReg fr_src = iselFltExpr(env, e->Iex.Unop.arg, IEndianess);
2418          HReg r_dst  = newVRegI(env);
2419
2420          sub_from_sp( env, 16 );     // Move SP down 16 bytes
2421          am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
2422
2423          // store as F32
2424          addInstr(env, PPCInstr_FpLdSt( False/*store*/, 4,
2425                                         fr_src, am_addr ));
2426          // load as Ity_I32
2427          addInstr(env, PPCInstr_Load( 4, r_dst, am_addr, mode64 ));
2428
2429          add_to_sp( env, 16 );       // Reset SP
2430          return r_dst;
2431       }
2432       break;
2433
2434       case Iop_ReinterpD64asI64:
2435          if (mode64) {
2436             PPCAMode *am_addr;
2437             HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
2438             HReg r_dst  = newVRegI(env);
2439
2440             sub_from_sp( env, 16 );     // Move SP down 16 bytes
2441             am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
2442
2443             // store as D64
2444             addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
2445                                            fr_src, am_addr ));
2446             // load as Ity_I64
2447             addInstr(env, PPCInstr_Load( 8, r_dst, am_addr, mode64 ));
2448             add_to_sp( env, 16 );       // Reset SP
2449             return r_dst;
2450          }
2451          break;
2452
2453       case Iop_BCDtoDPB: {
2454          /* the following is only valid in 64 bit mode */
2455          if (!mode64) break;
2456
2457          PPCCondCode cc;
2458          UInt        argiregs;
2459          HReg        argregs[1];
2460          HReg        r_dst  = newVRegI(env);
2461          Int         argreg;
2462
2463          argiregs = 0;
2464          argreg = 0;
2465          argregs[0] = hregPPC_GPR3(mode64);
2466
2467          argiregs |= (1 << (argreg+3));
2468          addInstr(env, mk_iMOVds_RR( argregs[argreg++],
2469                                      iselWordExpr_R(env, e->Iex.Unop.arg,
2470                                                     IEndianess) ) );
2471
2472          cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
2473          if (IEndianess == Iend_LE) {
2474              addInstr(env, PPCInstr_Call( cc, (Addr)h_calc_BCDtoDPB,
2475                                           argiregs,
2476                                           mk_RetLoc_simple(RLPri_Int)) );
2477          } else {
2478              HWord*      fdescr;
2479              fdescr = (HWord*)h_calc_BCDtoDPB;
2480              addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
2481                                           argiregs,
2482                                           mk_RetLoc_simple(RLPri_Int)) );
2483          }
2484
2485          addInstr(env, mk_iMOVds_RR(r_dst, argregs[0]));
2486          return r_dst;
2487       }
2488
2489       case Iop_DPBtoBCD: {
2490          /* the following is only valid in 64 bit mode */
2491          if (!mode64) break;
2492
2493          PPCCondCode cc;
2494          UInt        argiregs;
2495          HReg        argregs[1];
2496          HReg        r_dst  = newVRegI(env);
2497          Int         argreg;
2498
2499          argiregs = 0;
2500          argreg = 0;
2501          argregs[0] = hregPPC_GPR3(mode64);
2502
2503          argiregs |= (1 << (argreg+3));
2504          addInstr(env, mk_iMOVds_RR( argregs[argreg++],
2505                                      iselWordExpr_R(env, e->Iex.Unop.arg,
2506                                                     IEndianess) ) );
2507
2508          cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
2509
2510         if (IEndianess == Iend_LE) {
2511             addInstr(env, PPCInstr_Call( cc, (Addr)h_calc_DPBtoBCD,
2512                                          argiregs,
2513                                          mk_RetLoc_simple(RLPri_Int) ) );
2514         } else {
2515             HWord*      fdescr;
2516             fdescr = (HWord*)h_calc_DPBtoBCD;
2517             addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
2518                                          argiregs,
2519                                          mk_RetLoc_simple(RLPri_Int) ) );
2520          }
2521
2522          addInstr(env, mk_iMOVds_RR(r_dst, argregs[0]));
2523          return r_dst;
2524       }
2525       case Iop_F32toF16x4_DEP: {
2526          HReg vdst = newVRegV(env);    /* V128 */
2527          HReg dst  = newVRegI(env);    /* I64*/
2528          HReg r0 = newVRegI(env);    /* I16*/
2529          HReg r1 = newVRegI(env);    /* I16*/
2530          HReg r2 = newVRegI(env);    /* I16*/
2531          HReg r3 = newVRegI(env);    /* I16*/
2532          HReg vsrc  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
2533          PPCAMode *am_off0, *am_off2, *am_off4, *am_off6, *am_off8;
2534          PPCAMode *am_off10, *am_off12, *am_off14;
2535          HReg r_aligned16;
2536
2537          sub_from_sp( env, 32 );     // Move SP down
2538
2539          /* issue instruction */
2540          addInstr(env, PPCInstr_AvUnary(Pav_F32toF16x4, vdst, vsrc));
2541
2542          /* Get a  quadword aligned address within our stack space */
2543          r_aligned16 = get_sp_aligned16( env );
2544          am_off0  = PPCAMode_IR( 0, r_aligned16 );
2545          am_off2  = PPCAMode_IR( 2, r_aligned16 );
2546          am_off4  = PPCAMode_IR( 4, r_aligned16 );
2547          am_off6  = PPCAMode_IR( 6, r_aligned16 );
2548          am_off8  = PPCAMode_IR( 8, r_aligned16 );
2549          am_off10 = PPCAMode_IR( 10, r_aligned16 );
2550          am_off12 = PPCAMode_IR( 12, r_aligned16 );
2551          am_off14 = PPCAMode_IR( 14, r_aligned16 );
2552
2553          /* Store v128 result to stack. */
2554          addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, vdst, am_off0));
2555
2556          /* fetch four I16 from V128, store into contiguous I64 via stack,  */
2557          if (IEndianess == Iend_LE) {
2558             addInstr(env, PPCInstr_Load( 2, r3, am_off12, mode64));
2559             addInstr(env, PPCInstr_Load( 2, r2, am_off8, mode64));
2560             addInstr(env, PPCInstr_Load( 2, r1, am_off4, mode64));
2561             addInstr(env, PPCInstr_Load( 2, r0, am_off0, mode64));
2562          } else {
2563             addInstr(env, PPCInstr_Load( 2, r0, am_off14, mode64));
2564             addInstr(env, PPCInstr_Load( 2, r1, am_off10, mode64));
2565             addInstr(env, PPCInstr_Load( 2, r2, am_off6, mode64));
2566             addInstr(env, PPCInstr_Load( 2, r3, am_off2, mode64));
2567          }
2568
2569          /* store in contiguous 64-bit values */
2570          addInstr(env, PPCInstr_Store( 2, am_off6, r3, mode64));
2571          addInstr(env, PPCInstr_Store( 2, am_off4, r2, mode64));
2572          addInstr(env, PPCInstr_Store( 2, am_off2, r1, mode64));
2573          addInstr(env, PPCInstr_Store( 2, am_off0, r0, mode64));
2574
2575          /* Fetch I64 */
2576          addInstr(env, PPCInstr_Load(8, dst, am_off0, mode64));
2577
2578          add_to_sp( env, 32 );          // Reset SP
2579          return dst;
2580       }
2581
2582       default:
2583          break;
2584       }
2585
2586      switch (e->Iex.Unop.op) {
2587         case Iop_ExtractExpD64: {
2588
2589             HReg fr_dst = newVRegI(env);
2590             HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
2591             HReg tmp    = newVRegF(env);
2592             PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
2593             addInstr(env, PPCInstr_Dfp64Unary(Pfp_DXEX, tmp, fr_src));
2594
2595             // put the D64 result into a integer register
2596             sub_from_sp( env, 16 );
2597             addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
2598             addInstr(env, PPCInstr_Load(8, fr_dst, zero_r1, env->mode64));
2599             add_to_sp( env, 16 );
2600             return fr_dst;
2601          }
2602          case Iop_ExtractExpD128: {
2603             HReg fr_dst = newVRegI(env);
2604             HReg r_srcHi;
2605             HReg r_srcLo;
2606             HReg tmp    = newVRegF(env);
2607             PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
2608
2609             iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Unop.arg,
2610                            IEndianess);
2611             addInstr(env, PPCInstr_ExtractExpD128(Pfp_DXEXQ, tmp,
2612                                                   r_srcHi, r_srcLo));
2613
2614             sub_from_sp( env, 16 );
2615             addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
2616             addInstr(env, PPCInstr_Load(8, fr_dst, zero_r1, env->mode64));
2617             add_to_sp( env, 16 );
2618             return fr_dst;
2619          }
2620          default:
2621             break;
2622       }
2623
2624       break;
2625    }
2626
2627    /* --------- GET --------- */
2628    case Iex_Get: {
2629       if (ty == Ity_I8  || ty == Ity_I16 ||
2630           ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
2631          HReg r_dst = newVRegI(env);
2632          PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
2633                                           GuestStatePtr(mode64) );
2634          addInstr(env, PPCInstr_Load( toUChar(sizeofIRType(ty)),
2635                                       r_dst, am_addr, mode64 ));
2636          return r_dst;
2637       }
2638       break;
2639    }
2640
2641    case Iex_GetI: {
2642       PPCAMode* src_am
2643          = genGuestArrayOffset( env, e->Iex.GetI.descr,
2644                                 e->Iex.GetI.ix, e->Iex.GetI.bias,
2645                                 IEndianess );
2646       HReg r_dst = newVRegI(env);
2647       if (mode64 && ty == Ity_I64) {
2648          addInstr(env, PPCInstr_Load( toUChar(8),
2649                                       r_dst, src_am, mode64 ));
2650          return r_dst;
2651       }
2652       if ((!mode64) && ty == Ity_I32) {
2653          addInstr(env, PPCInstr_Load( toUChar(4),
2654                                       r_dst, src_am, mode64 ));
2655          return r_dst;
2656       }
2657       break;
2658    }
2659
2660    /* --------- CCALL --------- */
2661    case Iex_CCall: {
2662       vassert(ty == e->Iex.CCall.retty); /* well-formedness of IR */
2663
2664       /* be very restrictive for now.  Only 32/64-bit ints allowed for
2665          args, and 32 bits or host machine word for return type. */
2666       if (!(ty == Ity_I32 || (mode64 && ty == Ity_I64)))
2667          goto irreducible;
2668
2669       /* Marshal args, do the call, clear stack. */
2670       UInt   addToSp = 0;
2671       RetLoc rloc    = mk_RetLoc_INVALID();
2672       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2673                     e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args,
2674                     IEndianess );
2675       vassert(is_sane_RetLoc(rloc));
2676       vassert(rloc.pri == RLPri_Int);
2677       vassert(addToSp == 0);
2678
2679       /* GPR3 now holds the destination address from Pin_Goto */
2680       HReg r_dst = newVRegI(env);
2681       addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
2682       return r_dst;
2683    }
2684
2685    /* --------- LITERAL --------- */
2686    /* 32/16/8-bit literals */
2687    case Iex_Const: {
2688       Long l;
2689       HReg r_dst = newVRegI(env);
2690       IRConst* con = e->Iex.Const.con;
2691       switch (con->tag) {
2692          case Ico_U64: if (!mode64) goto irreducible;
2693                        l = (Long)            con->Ico.U64; break;
2694          case Ico_U32: l = (Long)(Int)       con->Ico.U32; break;
2695          case Ico_U16: l = (Long)(Int)(Short)con->Ico.U16; break;
2696          case Ico_U8:  l = (Long)(Int)(Char )con->Ico.U8;  break;
2697          default:      vpanic("iselIntExpr_R.const(ppc)");
2698       }
2699       addInstr(env, PPCInstr_LI(r_dst, (ULong)l, mode64));
2700       return r_dst;
2701    }
2702
2703    /* --------- MULTIPLEX --------- */
2704    case Iex_ITE: { // VFD
2705       if ((ty == Ity_I8  || ty == Ity_I16 ||
2706            ty == Ity_I32 || ((ty == Ity_I64) && mode64)) &&
2707           typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
2708          PPCRI* r1    = iselWordExpr_RI(env, e->Iex.ITE.iftrue, IEndianess);
2709          HReg   r0    = iselWordExpr_R(env, e->Iex.ITE.iffalse, IEndianess);
2710          HReg   r_dst = newVRegI(env);
2711          addInstr(env, mk_iMOVds_RR(r_dst,r0));
2712          PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
2713          addInstr(env, PPCInstr_CMov(cc, r_dst, r1));
2714          return r_dst;
2715       }
2716       break;
2717    }
2718
2719    default:
2720       break;
2721    } /* switch (e->tag) */
2722
2723
2724    /* We get here if no pattern matched. */
2725  irreducible:
2726    ppIRExpr(e);
2727    vpanic("iselIntExpr_R(ppc): cannot reduce tree");
2728 }
2729
2730
2731 /*---------------------------------------------------------*/
2732 /*--- ISEL: Integer expression auxiliaries              ---*/
2733 /*---------------------------------------------------------*/
2734
2735 /* --------------------- AMODEs --------------------- */
2736
2737 /* Return an AMode which computes the value of the specified
2738    expression, possibly also adding insns to the code list as a
2739    result.  The expression may only be a word-size one.
2740 */
2741
2742 static Bool uInt_fits_in_16_bits ( UInt u )
2743 {
2744    /* Is u the same as the sign-extend of its lower 16 bits? */
2745    UInt v = u & 0xFFFF;
2746
2747    v = (Int)(v << 16) >> 16;   /* sign extend */
2748
2749    return u == v;
2750 }
2751
2752 static Bool uLong_fits_in_16_bits ( ULong u )
2753 {
2754    /* Is u the same as the sign-extend of its lower 16 bits? */
2755    ULong v = u & 0xFFFFULL;
2756
2757    v = (Long)(v << 48) >> 48;   /* sign extend */
2758
2759    return u == v;
2760 }
2761
2762 static Bool uLong_is_4_aligned ( ULong u )
2763 {
2764    return toBool((u & 3ULL) == 0);
2765 }
2766
2767 static Bool sane_AMode ( ISelEnv* env, PPCAMode* am )
2768 {
2769    Bool mode64 = env->mode64;
2770    switch (am->tag) {
2771    case Pam_IR:
2772       /* Using uInt_fits_in_16_bits in 64-bit mode seems a bit bogus,
2773          somehow, but I think it's OK. */
2774       return toBool( hregClass(am->Pam.IR.base) == HRcGPR(mode64) &&
2775                      hregIsVirtual(am->Pam.IR.base) &&
2776                      uInt_fits_in_16_bits(am->Pam.IR.index) );
2777    case Pam_RR:
2778       return toBool( hregClass(am->Pam.RR.base) == HRcGPR(mode64) &&
2779                      hregIsVirtual(am->Pam.RR.base) &&
2780                      hregClass(am->Pam.RR.index) == HRcGPR(mode64) &&
2781                      hregIsVirtual(am->Pam.RR.index) );
2782    default:
2783       vpanic("sane_AMode: unknown ppc amode tag");
2784    }
2785 }
2786
2787 static
2788 PPCAMode* iselWordExpr_AMode ( ISelEnv* env, const IRExpr* e, IRType xferTy,
2789                                IREndness IEndianess )
2790 {
2791    PPCAMode* am = iselWordExpr_AMode_wrk(env, e, xferTy, IEndianess);
2792    vassert(sane_AMode(env, am));
2793    return am;
2794 }
2795
2796 /* DO NOT CALL THIS DIRECTLY ! */
2797 static PPCAMode* iselWordExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e,
2798                                           IRType xferTy, IREndness IEndianess )
2799 {
2800    IRType ty = typeOfIRExpr(env->type_env,e);
2801
2802    if (env->mode64) {
2803
2804       /* If the data load/store type is I32 or I64, this amode might
2805          be destined for use in ld/ldu/lwa/st/stu.  In which case
2806          insist that if it comes out as an _IR, the immediate must
2807          have its bottom two bits be zero.  This does assume that for
2808          any other type (I8/I16/I128/F32/F64/V128) the amode will not
2809          be parked in any such instruction.  But that seems a
2810          reasonable assumption.  */
2811       Bool aligned4imm = toBool(xferTy == Ity_I32 || xferTy == Ity_I64);
2812
2813       vassert(ty == Ity_I64);
2814
2815       /* Add64(expr,i), where i == sign-extend of (i & 0xFFFF) */
2816       if (e->tag == Iex_Binop
2817           && e->Iex.Binop.op == Iop_Add64
2818           && e->Iex.Binop.arg2->tag == Iex_Const
2819           && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
2820           && (aligned4imm  ? uLong_is_4_aligned(e->Iex.Binop.arg2
2821                                                  ->Iex.Const.con->Ico.U64)
2822                            : True)
2823           && uLong_fits_in_16_bits(e->Iex.Binop.arg2
2824                                     ->Iex.Const.con->Ico.U64)) {
2825          return PPCAMode_IR( (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64,
2826                              iselWordExpr_R(env, e->Iex.Binop.arg1,
2827                                             IEndianess) );
2828       }
2829
2830       /* Add64(expr,expr) */
2831       if (e->tag == Iex_Binop
2832           && e->Iex.Binop.op == Iop_Add64) {
2833          HReg r_base = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
2834          HReg r_idx  = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
2835          return PPCAMode_RR( r_idx, r_base );
2836       }
2837
2838    } else {
2839
2840       vassert(ty == Ity_I32);
2841
2842       /* Add32(expr,i), where i == sign-extend of (i & 0xFFFF) */
2843       if (e->tag == Iex_Binop
2844           && e->Iex.Binop.op == Iop_Add32
2845           && e->Iex.Binop.arg2->tag == Iex_Const
2846           && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
2847           && uInt_fits_in_16_bits(e->Iex.Binop.arg2
2848                                    ->Iex.Const.con->Ico.U32)) {
2849          return PPCAMode_IR( (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32,
2850                              iselWordExpr_R(env, e->Iex.Binop.arg1,
2851                                             IEndianess) );
2852       }
2853
2854       /* Add32(expr,expr) */
2855       if (e->tag == Iex_Binop
2856           && e->Iex.Binop.op == Iop_Add32) {
2857          HReg r_base = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
2858          HReg r_idx  = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
2859          return PPCAMode_RR( r_idx, r_base );
2860       }
2861
2862    }
2863
2864    /* Doesn't match anything in particular.  Generate it into
2865       a register and use that. */
2866    return PPCAMode_IR( 0, iselWordExpr_R(env,e,IEndianess) );
2867 }
2868
2869
2870 /* --------------------- RH --------------------- */
2871
2872 /* Compute an I8/I16/I32 (and I64, in 64-bit mode) into a RH
2873    (reg-or-halfword-immediate).  It's important to specify whether the
2874    immediate is to be regarded as signed or not.  If yes, this will
2875    never return -32768 as an immediate; this guaranteed that all
2876    signed immediates that are return can have their sign inverted if
2877    need be. */
2878
2879 static PPCRH* iselWordExpr_RH ( ISelEnv* env, Bool syned, const IRExpr* e,
2880                                 IREndness IEndianess )
2881 {
2882   PPCRH* ri = iselWordExpr_RH_wrk(env, syned, e, IEndianess);
2883    /* sanity checks ... */
2884    switch (ri->tag) {
2885    case Prh_Imm:
2886       vassert(ri->Prh.Imm.syned == syned);
2887       if (syned)
2888          vassert(ri->Prh.Imm.imm16 != 0x8000);
2889       return ri;
2890    case Prh_Reg:
2891       vassert(hregClass(ri->Prh.Reg.reg) == HRcGPR(env->mode64));
2892       vassert(hregIsVirtual(ri->Prh.Reg.reg));
2893       return ri;
2894    default:
2895       vpanic("iselIntExpr_RH: unknown ppc RH tag");
2896    }
2897 }
2898
2899 /* DO NOT CALL THIS DIRECTLY ! */
2900 static PPCRH* iselWordExpr_RH_wrk ( ISelEnv* env, Bool syned, const IRExpr* e,
2901                                     IREndness IEndianess )
2902 {
2903    ULong u;
2904    Long  l;
2905    IRType ty = typeOfIRExpr(env->type_env,e);
2906    vassert(ty == Ity_I8  || ty == Ity_I16 ||
2907            ty == Ity_I32 || ((ty == Ity_I64) && env->mode64));
2908
2909    /* special case: immediate */
2910    if (e->tag == Iex_Const) {
2911       IRConst* con = e->Iex.Const.con;
2912       /* What value are we aiming to generate? */
2913       switch (con->tag) {
2914       /* Note: Not sign-extending - we carry 'syned' around */
2915       case Ico_U64: vassert(env->mode64);
2916                     u =              con->Ico.U64; break;
2917       case Ico_U32: u = 0xFFFFFFFF & con->Ico.U32; break;
2918       case Ico_U16: u = 0x0000FFFF & con->Ico.U16; break;
2919       case Ico_U8:  u = 0x000000FF & con->Ico.U8; break;
2920       default:      vpanic("iselIntExpr_RH.Iex_Const(ppch)");
2921       }
2922       l = (Long)u;
2923       /* Now figure out if it's representable. */
2924       if (!syned && u <= 65535) {
2925          return PPCRH_Imm(False/*unsigned*/, toUShort(u & 0xFFFF));
2926       }
2927       if (syned && l >= -32767 && l <= 32767) {
2928          return PPCRH_Imm(True/*signed*/, toUShort(u & 0xFFFF));
2929       }
2930       /* no luck; use the Slow Way. */
2931    }
2932
2933    /* default case: calculate into a register and return that */
2934    return PPCRH_Reg( iselWordExpr_R ( env, e, IEndianess ) );
2935 }
2936
2937
2938 /* --------------------- RIs --------------------- */
2939
2940 /* Calculate an expression into an PPCRI operand.  As with
2941    iselIntExpr_R, the expression can have type 32, 16 or 8 bits, or,
2942    in 64-bit mode, 64 bits. */
2943
2944 static PPCRI* iselWordExpr_RI ( ISelEnv* env, const IRExpr* e,
2945                                 IREndness IEndianess )
2946 {
2947    PPCRI* ri = iselWordExpr_RI_wrk(env, e, IEndianess);
2948    /* sanity checks ... */
2949    switch (ri->tag) {
2950    case Pri_Imm:
2951       return ri;
2952    case Pri_Reg:
2953       vassert(hregClass(ri->Pri.Reg) == HRcGPR(env->mode64));
2954       vassert(hregIsVirtual(ri->Pri.Reg));
2955       return ri;
2956    default:
2957       vpanic("iselIntExpr_RI: unknown ppc RI tag");
2958    }
2959 }
2960
2961 /* DO NOT CALL THIS DIRECTLY ! */
2962 static PPCRI* iselWordExpr_RI_wrk ( ISelEnv* env, const IRExpr* e,
2963                                     IREndness IEndianess )
2964 {
2965    Long  l;
2966    IRType ty = typeOfIRExpr(env->type_env,e);
2967    vassert(ty == Ity_I8  || ty == Ity_I16 ||
2968            ty == Ity_I32 || ((ty == Ity_I64) && env->mode64));
2969
2970    /* special case: immediate */
2971    if (e->tag == Iex_Const) {
2972       IRConst* con = e->Iex.Const.con;
2973       switch (con->tag) {
2974       case Ico_U64: vassert(env->mode64);
2975                     l = (Long)            con->Ico.U64; break;
2976       case Ico_U32: l = (Long)(Int)       con->Ico.U32; break;
2977       case Ico_U16: l = (Long)(Int)(Short)con->Ico.U16; break;
2978       case Ico_U8:  l = (Long)(Int)(Char )con->Ico.U8;  break;
2979       default:      vpanic("iselIntExpr_RI.Iex_Const(ppch)");
2980       }
2981       return PPCRI_Imm((ULong)l);
2982    }
2983
2984    /* default case: calculate into a register and return that */
2985    return PPCRI_Reg( iselWordExpr_R ( env, e, IEndianess ) );
2986 }
2987
2988
2989 /* --------------------- RH5u --------------------- */
2990
2991 /* Compute an I8 into a reg-or-5-bit-unsigned-immediate, the latter
2992    being an immediate in the range 1 .. 31 inclusive.  Used for doing
2993    shift amounts.  Only used in 32-bit mode. */
2994
2995 static PPCRH* iselWordExpr_RH5u ( ISelEnv* env, const IRExpr* e,
2996                                   IREndness IEndianess )
2997 {
2998    PPCRH* ri;
2999    vassert(!env->mode64);
3000    ri = iselWordExpr_RH5u_wrk(env, e, IEndianess);
3001    /* sanity checks ... */
3002    switch (ri->tag) {
3003    case Prh_Imm:
3004       vassert(ri->Prh.Imm.imm16 >= 1 && ri->Prh.Imm.imm16 <= 31);
3005       vassert(!ri->Prh.Imm.syned);
3006       return ri;
3007    case Prh_Reg:
3008       vassert(hregClass(ri->Prh.Reg.reg) == HRcGPR(env->mode64));
3009       vassert(hregIsVirtual(ri->Prh.Reg.reg));
3010       return ri;
3011    default:
3012       vpanic("iselIntExpr_RH5u: unknown ppc RI tag");
3013    }
3014 }
3015
3016 /* DO NOT CALL THIS DIRECTLY ! */
3017 static PPCRH* iselWordExpr_RH5u_wrk ( ISelEnv* env, const IRExpr* e,
3018                                       IREndness IEndianess )
3019 {
3020    IRType ty = typeOfIRExpr(env->type_env,e);
3021    vassert(ty == Ity_I8);
3022
3023    /* special case: immediate */
3024    if (e->tag == Iex_Const
3025        && e->Iex.Const.con->tag == Ico_U8
3026        && e->Iex.Const.con->Ico.U8 >= 1
3027        && e->Iex.Const.con->Ico.U8 <= 31) {
3028       return PPCRH_Imm(False/*unsigned*/, e->Iex.Const.con->Ico.U8);
3029    }
3030
3031    /* default case: calculate into a register and return that */
3032    return PPCRH_Reg( iselWordExpr_R ( env, e, IEndianess ) );
3033 }
3034
3035
3036 /* --------------------- RH6u --------------------- */
3037
3038 /* Compute an I8 into a reg-or-6-bit-unsigned-immediate, the latter
3039    being an immediate in the range 1 .. 63 inclusive.  Used for doing
3040    shift amounts.  Only used in 64-bit mode. */
3041
3042 static PPCRH* iselWordExpr_RH6u ( ISelEnv* env, const IRExpr* e,
3043                                   IREndness IEndianess )
3044 {
3045    PPCRH* ri;
3046    vassert(env->mode64);
3047    ri = iselWordExpr_RH6u_wrk(env, e, IEndianess);
3048    /* sanity checks ... */
3049    switch (ri->tag) {
3050    case Prh_Imm:
3051       vassert(ri->Prh.Imm.imm16 >= 1 && ri->Prh.Imm.imm16 <= 63);
3052       vassert(!ri->Prh.Imm.syned);
3053       return ri;
3054    case Prh_Reg:
3055       vassert(hregClass(ri->Prh.Reg.reg) == HRcGPR(env->mode64));
3056       vassert(hregIsVirtual(ri->Prh.Reg.reg));
3057       return ri;
3058    default:
3059       vpanic("iselIntExpr_RH6u: unknown ppc64 RI tag");
3060    }
3061 }
3062
3063 /* DO NOT CALL THIS DIRECTLY ! */
3064 static PPCRH* iselWordExpr_RH6u_wrk ( ISelEnv* env, const IRExpr* e,
3065                                       IREndness IEndianess )
3066 {
3067    IRType ty = typeOfIRExpr(env->type_env,e);
3068    vassert(ty == Ity_I8);
3069
3070    /* special case: immediate */
3071    if (e->tag == Iex_Const
3072        && e->Iex.Const.con->tag == Ico_U8
3073        && e->Iex.Const.con->Ico.U8 >= 1
3074        && e->Iex.Const.con->Ico.U8 <= 63) {
3075       return PPCRH_Imm(False/*unsigned*/, e->Iex.Const.con->Ico.U8);
3076    }
3077
3078    /* default case: calculate into a register and return that */
3079    return PPCRH_Reg( iselWordExpr_R ( env, e, IEndianess ) );
3080 }
3081
3082
3083 /* --------------------- CONDCODE --------------------- */
3084
3085 /* Generate code to evaluated a bit-typed expression, returning the
3086    condition code which would correspond when the expression would
3087    notionally have returned 1. */
3088
3089 static PPCCondCode iselCondCode ( ISelEnv* env, const IRExpr* e,
3090                                   IREndness IEndianess )
3091 {
3092    /* Uh, there's nothing we can sanity check here, unfortunately. */
3093    return iselCondCode_wrk(env,e, IEndianess);
3094 }
3095
3096 /* DO NOT CALL THIS DIRECTLY ! */
3097 static PPCCondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e,
3098                                       IREndness IEndianess )
3099 {
3100    vassert(e);
3101    vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
3102
3103    /* Constant 1:Bit */
3104    if (e->tag == Iex_Const) {
3105       // Make a compare that will always be true (or always false):
3106       vassert(e->Iex.Const.con->Ico.U1 == True || e->Iex.Const.con->Ico.U1 == False);
3107       HReg r_zero = newVRegI(env);
3108       addInstr(env, PPCInstr_LI(r_zero, 0, env->mode64));
3109       addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3110                                  7/*cr*/, r_zero, PPCRH_Reg(r_zero)));
3111       return mk_PPCCondCode( e->Iex.Const.con->Ico.U1 ? Pct_TRUE : Pct_FALSE,
3112                              Pcf_7EQ );
3113    }
3114
3115    /* Not1(...) */
3116    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
3117       /* Generate code for the arg, and negate the test condition */
3118       PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
3119       cond.test = invertCondTest(cond.test);
3120       return cond;
3121    }
3122
3123    /* --- patterns rooted at: 32to1 or 64to1 --- */
3124
3125    /* 32to1, 64to1 */
3126    if (e->tag == Iex_Unop &&
3127        (e->Iex.Unop.op == Iop_32to1 || e->Iex.Unop.op == Iop_64to1)) {
3128       HReg src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3129       HReg tmp = newVRegI(env);
3130       /* could do better, probably -- andi. */
3131       addInstr(env, PPCInstr_Alu(Palu_AND, tmp,
3132                                  src, PPCRH_Imm(False,1)));
3133       addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3134                                  7/*cr*/, tmp, PPCRH_Imm(False,1)));
3135       return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
3136    }
3137
3138    /* --- patterns rooted at: CmpNEZ8 --- */
3139
3140    /* CmpNEZ8(x) */
3141    /* Note this cloned as CmpNE8(x,0) below. */
3142    /* could do better -- andi. */
3143    if (e->tag == Iex_Unop
3144        && e->Iex.Unop.op == Iop_CmpNEZ8) {
3145       HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3146       HReg tmp = newVRegI(env);
3147       addInstr(env, PPCInstr_Alu(Palu_AND, tmp, arg,
3148                                  PPCRH_Imm(False,0xFF)));
3149       addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3150                                  7/*cr*/, tmp, PPCRH_Imm(False,0)));
3151       return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3152    }
3153
3154    /* --- patterns rooted at: CmpNEZ32 --- */
3155
3156    /* CmpNEZ32(x) */
3157    if (e->tag == Iex_Unop
3158        && e->Iex.Unop.op == Iop_CmpNEZ32) {
3159       HReg r1 = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3160       addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3161                                  7/*cr*/, r1, PPCRH_Imm(False,0)));
3162       return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3163    }
3164
3165    /* --- patterns rooted at: Cmp*32* --- */
3166
3167    /* Cmp*32*(x,y) */
3168    if (e->tag == Iex_Binop
3169        && (e->Iex.Binop.op == Iop_CmpEQ32
3170            || e->Iex.Binop.op == Iop_CmpNE32
3171            || e->Iex.Binop.op == Iop_CmpLT32S
3172            || e->Iex.Binop.op == Iop_CmpLT32U
3173            || e->Iex.Binop.op == Iop_CmpLE32S
3174            || e->Iex.Binop.op == Iop_CmpLE32U)) {
3175       Bool syned = (e->Iex.Binop.op == Iop_CmpLT32S ||
3176                     e->Iex.Binop.op == Iop_CmpLE32S);
3177       HReg   r1  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3178       PPCRH* ri2 = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2, IEndianess);
3179       addInstr(env, PPCInstr_Cmp(syned, True/*32bit cmp*/,
3180                                  7/*cr*/, r1, ri2));
3181
3182       switch (e->Iex.Binop.op) {
3183       case Iop_CmpEQ32:  return mk_PPCCondCode( Pct_TRUE,  Pcf_7EQ );
3184       case Iop_CmpNE32:  return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3185       case Iop_CmpLT32U: case Iop_CmpLT32S:
3186          return mk_PPCCondCode( Pct_TRUE,  Pcf_7LT );
3187       case Iop_CmpLE32U: case Iop_CmpLE32S:
3188          return mk_PPCCondCode( Pct_FALSE, Pcf_7GT );
3189       default: vpanic("iselCondCode(ppc): CmpXX32");
3190       }
3191    }
3192
3193    /* --- patterns rooted at: CmpNEZ64 --- */
3194
3195    /* CmpNEZ64 */
3196    if (e->tag == Iex_Unop
3197        && e->Iex.Unop.op == Iop_CmpNEZ64) {
3198       if (!env->mode64) {
3199          HReg hi, lo;
3200          HReg tmp = newVRegI(env);
3201          iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg, IEndianess );
3202          addInstr(env, PPCInstr_Alu(Palu_OR, tmp, lo, PPCRH_Reg(hi)));
3203          addInstr(env, PPCInstr_Cmp(False/*sign*/, True/*32bit cmp*/,
3204                                     7/*cr*/, tmp,PPCRH_Imm(False,0)));
3205          return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3206       } else {  // mode64
3207          HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3208          addInstr(env, PPCInstr_Cmp(False/*sign*/, False/*64bit cmp*/,
3209                                     7/*cr*/, r_src,PPCRH_Imm(False,0)));
3210          return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3211       }
3212    }
3213
3214    /* --- patterns rooted at: Cmp*64* --- */
3215
3216    /* Cmp*64*(x,y) */
3217    if (e->tag == Iex_Binop
3218        && (e->Iex.Binop.op == Iop_CmpEQ64
3219            || e->Iex.Binop.op == Iop_CmpNE64
3220            || e->Iex.Binop.op == Iop_CmpLT64S
3221            || e->Iex.Binop.op == Iop_CmpLT64U
3222            || e->Iex.Binop.op == Iop_CmpLE64S
3223            || e->Iex.Binop.op == Iop_CmpLE64U)) {
3224       Bool   syned = (e->Iex.Binop.op == Iop_CmpLT64S ||
3225                       e->Iex.Binop.op == Iop_CmpLE64S);
3226       HReg    r1 = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3227       PPCRH* ri2 = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2, IEndianess);
3228       vassert(env->mode64);
3229       addInstr(env, PPCInstr_Cmp(syned, False/*64bit cmp*/,
3230                                  7/*cr*/, r1, ri2));
3231
3232       switch (e->Iex.Binop.op) {
3233       case Iop_CmpEQ64:  return mk_PPCCondCode( Pct_TRUE,  Pcf_7EQ );
3234       case Iop_CmpNE64:  return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3235       case Iop_CmpLT64U:  case Iop_CmpLT64S:
3236          return mk_PPCCondCode( Pct_TRUE,  Pcf_7LT );
3237       case Iop_CmpLE64U: case Iop_CmpLE64S:
3238          return mk_PPCCondCode( Pct_FALSE, Pcf_7GT );
3239       default: vpanic("iselCondCode(ppc): CmpXX64");
3240       }
3241    }
3242
3243    /* --- patterns rooted at: CmpNE8 --- */
3244
3245    /* CmpNE8(x,0) */
3246    /* Note this is a direct copy of CmpNEZ8 above. */
3247    /* could do better -- andi. */
3248    if (e->tag == Iex_Binop
3249        && e->Iex.Binop.op == Iop_CmpNE8
3250        && isZeroU8(e->Iex.Binop.arg2)) {
3251       HReg arg = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3252       HReg tmp = newVRegI(env);
3253       addInstr(env, PPCInstr_Alu(Palu_AND, tmp, arg,
3254                                  PPCRH_Imm(False,0xFF)));
3255       addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3256                                  7/*cr*/, tmp, PPCRH_Imm(False,0)));
3257       return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3258    }
3259
3260    /* var */
3261    if (e->tag == Iex_RdTmp) {
3262       HReg r_src      = lookupIRTemp(env, e->Iex.RdTmp.tmp);
3263       HReg src_masked = newVRegI(env);
3264       addInstr(env,
3265                PPCInstr_Alu(Palu_AND, src_masked,
3266                             r_src, PPCRH_Imm(False,1)));
3267       addInstr(env,
3268                PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3269                             7/*cr*/, src_masked, PPCRH_Imm(False,1)));
3270       return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
3271    }
3272
3273    /* --- And1(x,y), Or1(x,y) --- */
3274    /* FIXME: We could (and probably should) do a lot better here, by using the
3275       iselCondCode_C/_R scheme used in the amd64 insn selector. */
3276    if (e->tag == Iex_Binop
3277         && (e->Iex.Binop.op == Iop_And1 || e->Iex.Binop.op == Iop_Or1)) {
3278       HReg x_as_int = newVRegI(env);
3279       PPCCondCode cc_x = iselCondCode(env, e->Iex.Binop.arg1, IEndianess);
3280       addInstr(env, PPCInstr_Set(cc_x, x_as_int));
3281
3282       HReg y_as_int = newVRegI(env);
3283       PPCCondCode cc_y = iselCondCode(env, e->Iex.Binop.arg2, IEndianess);
3284       addInstr(env, PPCInstr_Set(cc_y, y_as_int));
3285
3286       HReg tmp = newVRegI(env);
3287       PPCAluOp op = e->Iex.Binop.op == Iop_And1 ? Palu_AND : Palu_OR;
3288       addInstr(env, PPCInstr_Alu(op, tmp, x_as_int, PPCRH_Reg(y_as_int)));
3289
3290       addInstr(env, PPCInstr_Alu(Palu_AND, tmp, tmp, PPCRH_Imm(False,1)));
3291       addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3292                                  7/*cr*/, tmp, PPCRH_Imm(False,1)));
3293       return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
3294    }
3295
3296    vex_printf("iselCondCode(ppc): No such tag(%u)\n", e->tag);
3297    ppIRExpr(e);
3298    vpanic("iselCondCode(ppc)");
3299 }
3300
3301
3302 /*---------------------------------------------------------*/
3303 /*--- ISEL: Integer expressions (128 bit)               ---*/
3304 /*---------------------------------------------------------*/
3305
3306 /* 64-bit mode ONLY: compute a 128-bit value into a register pair,
3307    which is returned as the first two parameters.  As with
3308    iselWordExpr_R, these may be either real or virtual regs; in any
3309    case they must not be changed by subsequent code emitted by the
3310    caller.  */
3311
3312 static void iselInt128Expr ( HReg* rHi, HReg* rLo, ISelEnv* env,
3313                              const IRExpr* e, IREndness IEndianess )
3314 {
3315    vassert(env->mode64);
3316    iselInt128Expr_wrk(rHi, rLo, env, e, IEndianess);
3317 #  if 0
3318    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3319 #  endif
3320    vassert(hregClass(*rHi) == HRcGPR(env->mode64));
3321    vassert(hregIsVirtual(*rHi));
3322    vassert(hregClass(*rLo) == HRcGPR(env->mode64));
3323    vassert(hregIsVirtual(*rLo));
3324 }
3325
3326 /* DO NOT CALL THIS DIRECTLY ! */
3327 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
3328                                  const IRExpr* e, IREndness IEndianess )
3329 {
3330    Bool mode64 = env->mode64;
3331
3332    vassert(e);
3333    vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
3334
3335    /* read 128-bit IRTemp */
3336    if (e->tag == Iex_RdTmp) {
3337       lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3338       return;
3339    }
3340
3341    /* 128-bit GET */
3342    if (e->tag == Iex_Get) {
3343       PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
3344                                        GuestStatePtr(mode64) );
3345       PPCAMode* am_addr4 = advance4(env, am_addr);
3346       HReg tLo = newVRegI(env);
3347       HReg tHi = newVRegI(env);
3348
3349       addInstr(env, PPCInstr_Load( 8, tHi, am_addr,  mode64));
3350       addInstr(env, PPCInstr_Load( 8, tLo, am_addr4, mode64));
3351       *rHi = tHi;
3352       *rLo = tLo;
3353       return;
3354    }
3355
3356    /* --------- BINARY ops --------- */
3357    if (e->tag == Iex_Binop) {
3358       switch (e->Iex.Binop.op) {
3359       /* 64 x 64 -> 128 multiply */
3360       case Iop_MullU64:
3361       case Iop_MullS64: {
3362          HReg     tLo     = newVRegI(env);
3363          HReg     tHi     = newVRegI(env);
3364          Bool     syned   = toBool(e->Iex.Binop.op == Iop_MullS64);
3365          HReg     r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3366          HReg     r_srcR  = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
3367          addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/,
3368                                      False/*lo64*/, False/*64bit mul*/,
3369                                      tLo, r_srcL, r_srcR));
3370          addInstr(env, PPCInstr_MulL(syned,
3371                                      True/*hi64*/, False/*64bit mul*/,
3372                                      tHi, r_srcL, r_srcR));
3373          *rHi = tHi;
3374          *rLo = tLo;
3375          return;
3376       }
3377
3378       /* 64HLto128(e1,e2) */
3379       case Iop_64HLto128:
3380          *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3381          *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
3382          return;
3383
3384       case Iop_D128toI128S: {
3385          HReg srcHi = INVALID_HREG;
3386          HReg srcLo = INVALID_HREG;
3387          HReg dstLo = newVRegI(env);
3388          HReg dstHi = newVRegI(env);
3389          HReg tmp = newVRegV(env);
3390          PPCAMode* am_addr;
3391          PPCAMode* am_addr4;
3392
3393          /* Get the DF128 value, store in two 64-bit halves */
3394          iselDfp128Expr( &srcHi, &srcLo, env, e->Iex.Binop.arg2, IEndianess );
3395
3396          sub_from_sp( env, 16 );     // Move SP down 16 bytes
3397          am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
3398          am_addr4 = advance4(env, am_addr);
3399
3400          addInstr(env, PPCInstr_XFormUnary994(Px_DFPTOIQS, tmp, srcHi, srcLo));
3401
3402          // store the result in the VSR
3403          addInstr(env, PPCInstr_AvLdSt( False/*store*/, 16, tmp, am_addr ));
3404
3405          // load the two Ity_64 values
3406          addInstr(env, PPCInstr_Load( 8, dstHi, am_addr,  mode64 ));
3407          addInstr(env, PPCInstr_Load( 8, dstLo, am_addr4, mode64 ));
3408
3409          *rHi = dstHi;
3410          *rLo = dstLo;
3411
3412          add_to_sp( env, 16 );       // Reset SP
3413          return;
3414       }
3415
3416       default:
3417          break;
3418       }
3419    } /* if (e->tag == Iex_Binop) */
3420
3421
3422    /* --------- UNARY ops --------- */
3423    if (e->tag == Iex_Unop) {
3424       switch (e->Iex.Unop.op) {
3425       case Iop_ReinterpV128asI128:
3426       case Iop_ReinterpF128asI128: {
3427          HReg     src;
3428          HReg     dstLo = newVRegI(env);
3429          HReg     dstHi = newVRegI(env);
3430          PPCAMode* am_addr;
3431          PPCAMode* am_addr4;
3432
3433          if (e->Iex.Unop.op == Iop_ReinterpF128asI128)
3434             src = iselFp128Expr(env, e->Iex.Unop.arg, IEndianess);
3435          else
3436             src = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
3437
3438          sub_from_sp( env, 16 );     // Move SP down 16 bytes
3439          am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
3440          am_addr4 = advance4(env, am_addr);
3441
3442          // store the Ity_F128 value
3443          addInstr(env, PPCInstr_AvLdSt( False/*store*/, 16, src, am_addr ));
3444
3445          // load the two Ity_64 values
3446          addInstr(env, PPCInstr_Load( 8, dstHi, am_addr,  mode64 ));
3447          addInstr(env, PPCInstr_Load( 8, dstLo, am_addr4, mode64 ));
3448
3449          *rHi = dstHi;
3450          *rLo = dstLo;
3451          add_to_sp( env, 16 );       // Reset SP
3452          return;
3453       }
3454       default:
3455          break;
3456       }
3457    } /* if (e->tag == Iex_Unop) */
3458
3459    vex_printf("iselInt128Expr(ppc64): No such tag(%u)\n", e->tag);
3460    ppIRExpr(e);
3461    vpanic("iselInt128Expr(ppc64)");
3462 }
3463
3464
3465 /*---------------------------------------------------------*/
3466 /*--- ISEL: Integer expressions (64 bit)                ---*/
3467 /*---------------------------------------------------------*/
3468
3469 /* 32-bit mode ONLY: compute a 128-bit value into a register quad */
3470 static void iselInt128Expr_to_32x4 ( HReg* rHi, HReg* rMedHi, HReg* rMedLo,
3471                                      HReg* rLo, ISelEnv* env, const IRExpr* e,
3472                                      IREndness IEndianess )
3473 {
3474    vassert(!env->mode64);
3475    iselInt128Expr_to_32x4_wrk(rHi, rMedHi, rMedLo, rLo, env, e, IEndianess);
3476 #  if 0
3477    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3478 #  endif
3479    vassert(hregClass(*rHi) == HRcInt32);
3480    vassert(hregIsVirtual(*rHi));
3481    vassert(hregClass(*rMedHi) == HRcInt32);
3482    vassert(hregIsVirtual(*rMedHi));
3483    vassert(hregClass(*rMedLo) == HRcInt32);
3484    vassert(hregIsVirtual(*rMedLo));
3485    vassert(hregClass(*rLo) == HRcInt32);
3486    vassert(hregIsVirtual(*rLo));
3487 }
3488
3489 static void iselInt128Expr_to_32x4_wrk ( HReg* rHi, HReg* rMedHi,
3490                                          HReg* rMedLo, HReg* rLo,
3491                                          ISelEnv* env, const IRExpr* e,
3492                                          IREndness IEndianess )
3493 {
3494    vassert(e);
3495    vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
3496
3497    /* read 128-bit IRTemp */
3498    if (e->tag == Iex_RdTmp) {
3499       lookupIRTempQuad( rHi, rMedHi, rMedLo, rLo, env, e->Iex.RdTmp.tmp);
3500       return;
3501    }
3502
3503    if (e->tag == Iex_Binop) {
3504
3505       IROp op_binop = e->Iex.Binop.op;
3506       switch (op_binop) {
3507       case Iop_64HLto128:
3508          iselInt64Expr(rHi, rMedHi, env, e->Iex.Binop.arg1, IEndianess);
3509          iselInt64Expr(rMedLo, rLo, env, e->Iex.Binop.arg2, IEndianess);
3510          return;
3511       default:
3512          vex_printf("iselInt128Expr_to_32x4_wrk: Binop case 0x%x not found\n",
3513                     op_binop);
3514          break;
3515       }
3516    }
3517
3518    vex_printf("iselInt128Expr_to_32x4_wrk: e->tag 0x%x not found\n", e->tag);
3519    return;
3520 }
3521
3522 /* 32-bit mode ONLY: compute a 64-bit value into a register pair,
3523    which is returned as the first two parameters.  As with
3524    iselIntExpr_R, these may be either real or virtual regs; in any
3525    case they must not be changed by subsequent code emitted by the
3526    caller.  */
3527
3528 static void iselInt64Expr ( HReg* rHi, HReg* rLo,
3529                             ISelEnv* env, const IRExpr* e,
3530                             IREndness IEndianess )
3531 {
3532    vassert(!env->mode64);
3533    iselInt64Expr_wrk(rHi, rLo, env, e, IEndianess);
3534 #  if 0
3535    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3536 #  endif
3537    vassert(hregClass(*rHi) == HRcInt32);
3538    vassert(hregIsVirtual(*rHi));
3539    vassert(hregClass(*rLo) == HRcInt32);
3540    vassert(hregIsVirtual(*rLo));
3541 }
3542
3543 /* DO NOT CALL THIS DIRECTLY ! */
3544 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
3545                                 ISelEnv* env, const IRExpr* e,
3546                                 IREndness IEndianess )
3547 {
3548    vassert(e);
3549    vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
3550
3551    /* 64-bit load */
3552    if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
3553       HReg tLo    = newVRegI(env);
3554       HReg tHi    = newVRegI(env);
3555       HReg r_addr = iselWordExpr_R(env, e->Iex.Load.addr, IEndianess);
3556       vassert(!env->mode64);
3557       addInstr(env, PPCInstr_Load( 4/*byte-load*/,
3558                                    tHi, PPCAMode_IR( 0, r_addr ),
3559                                    False/*32-bit insn please*/) );
3560       addInstr(env, PPCInstr_Load( 4/*byte-load*/,
3561                                    tLo, PPCAMode_IR( 4, r_addr ),
3562                                    False/*32-bit insn please*/) );
3563       *rHi = tHi;
3564       *rLo = tLo;
3565       return;
3566    }
3567
3568    /* 64-bit literal */
3569    if (e->tag == Iex_Const) {
3570       ULong w64 = e->Iex.Const.con->Ico.U64;
3571       UInt  wHi = ((UInt)(w64 >> 32)) & 0xFFFFFFFF;
3572       UInt  wLo = ((UInt)w64) & 0xFFFFFFFF;
3573       HReg  tLo = newVRegI(env);
3574       HReg  tHi = newVRegI(env);
3575       vassert(e->Iex.Const.con->tag == Ico_U64);
3576       addInstr(env, PPCInstr_LI(tHi, (Long)(Int)wHi, False/*mode32*/));
3577       addInstr(env, PPCInstr_LI(tLo, (Long)(Int)wLo, False/*mode32*/));
3578       *rHi = tHi;
3579       *rLo = tLo;
3580       return;
3581    }
3582
3583    /* read 64-bit IRTemp */
3584    if (e->tag == Iex_RdTmp) {
3585       lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3586       return;
3587    }
3588
3589    /* 64-bit GET */
3590    if (e->tag == Iex_Get) {
3591       PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
3592                                        GuestStatePtr(False/*mode32*/) );
3593       PPCAMode* am_addr4 = advance4(env, am_addr);
3594       HReg tLo = newVRegI(env);
3595       HReg tHi = newVRegI(env);
3596       addInstr(env, PPCInstr_Load( 4, tHi, am_addr,  False/*mode32*/ ));
3597       addInstr(env, PPCInstr_Load( 4, tLo, am_addr4, False/*mode32*/ ));
3598       *rHi = tHi;
3599       *rLo = tLo;
3600       return;
3601    }
3602
3603    /* --------- CCALL --------- */
3604    if(e->tag == Iex_CCall) {
3605       IRType ty = typeOfIRExpr(env->type_env,e);
3606       Bool mode64 = env->mode64;
3607
3608       vassert(ty == e->Iex.CCall.retty); /* well-formedness of IR */
3609
3610       /* be very restrictive for now.  Only 32-bit ints allowed for
3611          args, and 32 bits or host machine word for return type. */
3612       vassert(!(ty == Ity_I32 || (mode64 && ty == Ity_I64)));
3613
3614       /* Marshal args, do the call, clear stack. */
3615       UInt   addToSp = 0;
3616       RetLoc rloc    = mk_RetLoc_INVALID();
3617       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
3618                     e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args,
3619                     IEndianess );
3620       vassert(is_sane_RetLoc(rloc));
3621
3622       vassert(rloc.pri == RLPri_2Int);
3623       vassert(addToSp == 0);
3624
3625       /* GPR3 now holds the destination address from Pin_Goto */
3626       HReg r_dst = newVRegI(env);
3627       addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
3628       *rHi = r_dst;
3629       *rLo = r_dst;
3630       return;
3631    }
3632
3633    /* 64-bit ITE */
3634    if (e->tag == Iex_ITE) { // VFD
3635       HReg e0Lo, e0Hi, eXLo, eXHi;
3636       iselInt64Expr(&eXHi, &eXLo, env, e->Iex.ITE.iftrue, IEndianess);
3637       iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.ITE.iffalse, IEndianess);
3638       HReg tLo = newVRegI(env);
3639       HReg tHi = newVRegI(env);
3640       addInstr(env, mk_iMOVds_RR(tHi,e0Hi));
3641       addInstr(env, mk_iMOVds_RR(tLo,e0Lo));
3642       PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
3643       addInstr(env, PPCInstr_CMov(cc,tHi,PPCRI_Reg(eXHi)));
3644       addInstr(env, PPCInstr_CMov(cc,tLo,PPCRI_Reg(eXLo)));
3645       *rHi = tHi;
3646       *rLo = tLo;
3647       return;
3648    }
3649
3650    /* --------- BINARY ops --------- */
3651    if (e->tag == Iex_Binop) {
3652       IROp op_binop = e->Iex.Binop.op;
3653       switch (op_binop) {
3654          /* 32 x 32 -> 64 multiply */
3655          case Iop_MullU32:
3656          case Iop_MullS32: {
3657             HReg     tLo     = newVRegI(env);
3658             HReg     tHi     = newVRegI(env);
3659             Bool     syned   = toBool(op_binop == Iop_MullS32);
3660             HReg     r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1,
3661                                               IEndianess);
3662             HReg     r_srcR  = iselWordExpr_R(env, e->Iex.Binop.arg2,
3663                                               IEndianess);
3664             addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/,
3665                                         False/*lo32*/, True/*32bit mul*/,
3666                                         tLo, r_srcL, r_srcR));
3667             addInstr(env, PPCInstr_MulL(syned,
3668                                         True/*hi32*/, True/*32bit mul*/,
3669                                         tHi, r_srcL, r_srcR));
3670             *rHi = tHi;
3671             *rLo = tLo;
3672             return;
3673          }
3674
3675          /* Or64/And64/Xor64 */
3676          case Iop_Or64:
3677          case Iop_And64:
3678          case Iop_Xor64: {
3679             HReg xLo, xHi, yLo, yHi;
3680             HReg tLo = newVRegI(env);
3681             HReg tHi = newVRegI(env);
3682             PPCAluOp op = (op_binop == Iop_Or64) ? Palu_OR :
3683                           (op_binop == Iop_And64) ? Palu_AND : Palu_XOR;
3684             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1, IEndianess);
3685             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2, IEndianess);
3686             addInstr(env, PPCInstr_Alu(op, tHi, xHi, PPCRH_Reg(yHi)));
3687             addInstr(env, PPCInstr_Alu(op, tLo, xLo, PPCRH_Reg(yLo)));
3688             *rHi = tHi;
3689             *rLo = tLo;
3690             return;
3691          }
3692
3693          /* Add64 */
3694          case Iop_Add64: {
3695             HReg xLo, xHi, yLo, yHi;
3696             HReg tLo = newVRegI(env);
3697             HReg tHi = newVRegI(env);
3698             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1, IEndianess);
3699             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2, IEndianess);
3700             addInstr(env, PPCInstr_AddSubC( True/*add*/, True /*set carry*/,
3701                                             tLo, xLo, yLo));
3702             addInstr(env, PPCInstr_AddSubC( True/*add*/, False/*read carry*/,
3703                                             tHi, xHi, yHi));
3704             *rHi = tHi;
3705             *rLo = tLo;
3706             return;
3707          }
3708
3709          /* 32HLto64(e1,e2) */
3710          case Iop_32HLto64:
3711             *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3712             *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
3713             return;
3714
3715          /* F64toI64[S|U] */
3716          case Iop_F64toI64S: case Iop_F64toI64U: {
3717             HReg      tLo     = newVRegI(env);
3718             HReg      tHi     = newVRegI(env);
3719             HReg      r1      = StackFramePtr(env->mode64);
3720             PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
3721             PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
3722             HReg      fsrc    = iselDblExpr(env, e->Iex.Binop.arg2,
3723                                             IEndianess);
3724             HReg      ftmp    = newVRegF(env);
3725
3726             vassert(!env->mode64);
3727             /* Set host rounding mode */
3728             set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
3729
3730             sub_from_sp( env, 16 );
3731             addInstr(env, PPCInstr_FpCftI(False/*F->I*/, False/*int64*/,
3732                                           (op_binop == Iop_F64toI64S) ? True : False,
3733                                           True, ftmp, fsrc));
3734             addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
3735             addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3736             addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3737             add_to_sp( env, 16 );
3738
3739             ///* Restore default FPU rounding. */
3740             //set_FPU_rounding_default( env );
3741             *rHi = tHi;
3742             *rLo = tLo;
3743             return;
3744          }
3745          case Iop_D64toI64S: {
3746             HReg      tLo     = newVRegI(env);
3747             HReg      tHi     = newVRegI(env);
3748             HReg      r1      = StackFramePtr(env->mode64);
3749             PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
3750             PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
3751             HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
3752             HReg tmp    = newVRegF(env);
3753
3754             vassert(!env->mode64);
3755             set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
3756             addInstr(env, PPCInstr_Dfp64Unary(Pfp_DCTFIX, tmp, fr_src));
3757
3758             sub_from_sp( env, 16 );
3759             addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
3760             addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3761             addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3762             add_to_sp( env, 16 );
3763             *rHi = tHi;
3764             *rLo = tLo;
3765             return;
3766          }
3767          case Iop_D128toI64S: {
3768             PPCFpOp fpop = Pfp_DCTFIXQ;
3769             HReg r_srcHi = newVRegF(env);
3770             HReg r_srcLo = newVRegF(env);
3771             HReg tLo     = newVRegI(env);
3772             HReg tHi     = newVRegI(env);
3773             HReg ftmp    = newVRegF(env);
3774             PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
3775             PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
3776
3777             set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
3778             iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
3779                            IEndianess);
3780             addInstr(env, PPCInstr_DfpD128toD64(fpop, ftmp, r_srcHi, r_srcLo));
3781
3782             // put the D64 result into an integer register pair
3783             sub_from_sp( env, 16 );
3784             addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
3785             addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3786             addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3787             add_to_sp( env, 16 );
3788             *rHi = tHi;
3789             *rLo = tLo;
3790             return;
3791          }
3792          default:
3793             break;
3794       }
3795    } /* if (e->tag == Iex_Binop) */
3796
3797
3798    /* --------- UNARY ops --------- */
3799    if (e->tag == Iex_Unop) {
3800       switch (e->Iex.Unop.op) {
3801
3802       /* CmpwNEZ64(e) */
3803       case Iop_CmpwNEZ64: {
3804          HReg argHi, argLo;
3805          HReg tmp1  = newVRegI(env);
3806          HReg tmp2  = newVRegI(env);
3807          iselInt64Expr(&argHi, &argLo, env, e->Iex.Unop.arg, IEndianess);
3808          /* tmp1 = argHi | argLo */
3809          addInstr(env, PPCInstr_Alu(Palu_OR, tmp1, argHi, PPCRH_Reg(argLo)));
3810          /* tmp2 = (tmp1 | -tmp1) >>s 31 */
3811          addInstr(env, PPCInstr_Unary(Pun_NEG,tmp2,tmp1));
3812          addInstr(env, PPCInstr_Alu(Palu_OR, tmp2, tmp2, PPCRH_Reg(tmp1)));
3813          addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
3814                                      tmp2, tmp2, PPCRH_Imm(False, 31)));
3815          *rHi = tmp2;
3816          *rLo = tmp2; /* yes, really tmp2 */
3817          return;
3818       }
3819
3820       /* Left64 */
3821       case Iop_Left64: {
3822          HReg argHi, argLo;
3823          HReg zero32 = newVRegI(env);
3824          HReg resHi  = newVRegI(env);
3825          HReg resLo  = newVRegI(env);
3826          iselInt64Expr(&argHi, &argLo, env, e->Iex.Unop.arg, IEndianess);
3827          vassert(env->mode64 == False);
3828          addInstr(env, PPCInstr_LI(zero32, 0, env->mode64));
3829          /* resHi:resLo = - argHi:argLo */
3830          addInstr(env, PPCInstr_AddSubC( False/*sub*/, True/*set carry*/,
3831                                          resLo, zero32, argLo ));
3832          addInstr(env, PPCInstr_AddSubC( False/*sub*/, False/*read carry*/,
3833                                          resHi, zero32, argHi ));
3834          /* resHi:resLo |= srcHi:srcLo */
3835          addInstr(env, PPCInstr_Alu(Palu_OR, resLo, resLo, PPCRH_Reg(argLo)));
3836          addInstr(env, PPCInstr_Alu(Palu_OR, resHi, resHi, PPCRH_Reg(argHi)));
3837          *rHi = resHi;
3838          *rLo = resLo;
3839          return;
3840       }
3841
3842       /* 32Sto64(e) */
3843       case Iop_32Sto64: {
3844          HReg tHi = newVRegI(env);
3845          HReg src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3846          addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
3847                                      tHi, src, PPCRH_Imm(False,31)));
3848          *rHi = tHi;
3849          *rLo = src;
3850          return;
3851       }
3852       case Iop_ExtractExpD64: {
3853          HReg tmp    = newVRegF(env);
3854          HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
3855          HReg      tLo     = newVRegI(env);
3856          HReg      tHi     = newVRegI(env);
3857          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
3858          PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
3859
3860          addInstr(env, PPCInstr_Dfp64Unary(Pfp_DXEX, tmp, fr_src));
3861
3862          // put the D64 result into a integer register pair
3863          sub_from_sp( env, 16 );
3864          addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
3865          addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3866          addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3867          add_to_sp( env, 16 );
3868          *rHi = tHi;
3869          *rLo = tLo;
3870          return;
3871       }
3872       case Iop_ExtractExpD128: {
3873          HReg      r_srcHi;
3874          HReg      r_srcLo;
3875          HReg      tmp     = newVRegF(env);
3876          HReg      tLo     = newVRegI(env);
3877          HReg      tHi     = newVRegI(env);
3878          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
3879          PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
3880
3881          iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Unop.arg, IEndianess);
3882          addInstr(env, PPCInstr_ExtractExpD128(Pfp_DXEXQ, tmp,
3883                                                   r_srcHi, r_srcLo));
3884
3885          // put the D64 result into a integer register pair
3886          sub_from_sp( env, 16 );
3887          addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
3888          addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3889          addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3890          add_to_sp( env, 16 );
3891          *rHi = tHi;
3892          *rLo = tLo;
3893          return;
3894       }
3895
3896       /* 32Uto64(e) */
3897       case Iop_32Uto64: {
3898          HReg tHi = newVRegI(env);
3899          HReg tLo = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3900          addInstr(env, PPCInstr_LI(tHi, 0, False/*mode32*/));
3901          *rHi = tHi;
3902          *rLo = tLo;
3903          return;
3904       }
3905
3906       case Iop_128to64: {
3907          /* Narrow, return the low 64-bit half as a 32-bit
3908           * register pair */
3909          HReg r_Hi    = INVALID_HREG;
3910          HReg r_MedHi = INVALID_HREG;
3911          HReg r_MedLo = INVALID_HREG;
3912          HReg r_Lo    = INVALID_HREG;
3913
3914          iselInt128Expr_to_32x4(&r_Hi, &r_MedHi, &r_MedLo, &r_Lo,
3915                                 env, e->Iex.Unop.arg, IEndianess);
3916          *rHi = r_MedLo;
3917          *rLo = r_Lo;
3918          return;
3919       }
3920
3921       case Iop_128HIto64: {
3922          /* Narrow, return the high 64-bit half as a 32-bit
3923           *  register pair */
3924          HReg r_Hi    = INVALID_HREG;
3925          HReg r_MedHi = INVALID_HREG;
3926          HReg r_MedLo = INVALID_HREG;
3927          HReg r_Lo    = INVALID_HREG;
3928
3929          iselInt128Expr_to_32x4(&r_Hi, &r_MedHi, &r_MedLo, &r_Lo,
3930                                 env, e->Iex.Unop.arg, IEndianess);
3931          *rHi = r_Hi;
3932          *rLo = r_MedHi;
3933          return;
3934       }
3935
3936       /* V128{HI}to64 */
3937       case Iop_V128HIto64:
3938       case Iop_V128to64: {
3939          HReg r_aligned16;
3940          Int  off = e->Iex.Unop.op==Iop_V128HIto64 ? 0 : 8;
3941          HReg tLo = newVRegI(env);
3942          HReg tHi = newVRegI(env);
3943          HReg vec = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
3944          PPCAMode *am_off0, *am_offLO, *am_offHI;
3945          sub_from_sp( env, 32 );     // Move SP down 32 bytes
3946
3947          // get a quadword aligned address within our stack space
3948          r_aligned16 = get_sp_aligned16( env );
3949          am_off0  = PPCAMode_IR( 0,     r_aligned16 );
3950          am_offHI = PPCAMode_IR( off,   r_aligned16 );
3951          am_offLO = PPCAMode_IR( off+4, r_aligned16 );
3952
3953          // store as Vec128
3954          addInstr(env,
3955                   PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
3956
3957          // load hi,lo words (of hi/lo half of vec) as Ity_I32's
3958          addInstr(env,
3959                   PPCInstr_Load( 4, tHi, am_offHI, False/*mode32*/ ));
3960          addInstr(env,
3961                   PPCInstr_Load( 4, tLo, am_offLO, False/*mode32*/ ));
3962
3963          add_to_sp( env, 32 );       // Reset SP
3964          *rHi = tHi;
3965          *rLo = tLo;
3966          return;
3967       }
3968
3969       /* could do better than this, but for now ... */
3970       case Iop_1Sto64: {
3971          HReg tLo = newVRegI(env);
3972          HReg tHi = newVRegI(env);
3973          PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
3974          addInstr(env, PPCInstr_Set(cond,tLo));
3975          addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
3976                                      tLo, tLo, PPCRH_Imm(False,31)));
3977          addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
3978                                      tLo, tLo, PPCRH_Imm(False,31)));
3979          addInstr(env, mk_iMOVds_RR(tHi, tLo));
3980          *rHi = tHi;
3981          *rLo = tLo;
3982          return;
3983       }
3984
3985       case Iop_Not64: {
3986          HReg xLo, xHi;
3987          HReg tmpLo = newVRegI(env);
3988          HReg tmpHi = newVRegI(env);
3989          iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg, IEndianess);
3990          addInstr(env, PPCInstr_Unary(Pun_NOT,tmpLo,xLo));
3991          addInstr(env, PPCInstr_Unary(Pun_NOT,tmpHi,xHi));
3992          *rHi = tmpHi;
3993          *rLo = tmpLo;
3994          return;
3995       }
3996
3997       /* ReinterpF64asI64(e) */
3998       /* Given an IEEE754 double, produce an I64 with the same bit
3999          pattern. */
4000       case Iop_ReinterpF64asI64: {
4001          PPCAMode *am_addr0, *am_addr1;
4002          HReg fr_src  = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
4003          HReg r_dstLo = newVRegI(env);
4004          HReg r_dstHi = newVRegI(env);
4005
4006          sub_from_sp( env, 16 );     // Move SP down 16 bytes
4007          am_addr0 = PPCAMode_IR( 0, StackFramePtr(False/*mode32*/) );
4008          am_addr1 = PPCAMode_IR( 4, StackFramePtr(False/*mode32*/) );
4009
4010          // store as F64
4011          addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
4012                                         fr_src, am_addr0 ));
4013
4014          // load hi,lo as Ity_I32's
4015          addInstr(env, PPCInstr_Load( 4, r_dstHi,
4016                                       am_addr0, False/*mode32*/ ));
4017          addInstr(env, PPCInstr_Load( 4, r_dstLo,
4018                                       am_addr1, False/*mode32*/ ));
4019          *rHi = r_dstHi;
4020          *rLo = r_dstLo;
4021
4022          add_to_sp( env, 16 );       // Reset SP
4023          return;
4024       }
4025
4026       case Iop_ReinterpD64asI64: {
4027          HReg fr_src  = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
4028          PPCAMode *am_addr0, *am_addr1;
4029          HReg r_dstLo = newVRegI(env);
4030          HReg r_dstHi = newVRegI(env);
4031
4032
4033          sub_from_sp( env, 16 );     // Move SP down 16 bytes
4034          am_addr0 = PPCAMode_IR( 0, StackFramePtr(False/*mode32*/) );
4035          am_addr1 = PPCAMode_IR( 4, StackFramePtr(False/*mode32*/) );
4036
4037          // store as D64
4038          addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
4039                                         fr_src, am_addr0 ));
4040
4041          // load hi,lo as Ity_I32's
4042          addInstr(env, PPCInstr_Load( 4, r_dstHi,
4043                                       am_addr0, False/*mode32*/ ));
4044          addInstr(env, PPCInstr_Load( 4, r_dstLo,
4045                                       am_addr1, False/*mode32*/ ));
4046          *rHi = r_dstHi;
4047          *rLo = r_dstLo;
4048
4049          add_to_sp( env, 16 );       // Reset SP
4050
4051          return;
4052       }
4053
4054       case Iop_BCDtoDPB: {
4055          PPCCondCode cc;
4056          UInt        argiregs;
4057          HReg        argregs[2];
4058          Int         argreg;
4059          HReg        tLo = newVRegI(env);
4060          HReg        tHi = newVRegI(env);
4061          HReg        tmpHi;
4062          HReg        tmpLo;
4063          Bool        mode64 = env->mode64;
4064
4065          argregs[0] = hregPPC_GPR3(mode64);
4066          argregs[1] = hregPPC_GPR4(mode64);
4067
4068          argiregs = 0;
4069          argreg = 0;
4070
4071          iselInt64Expr( &tmpHi, &tmpLo, env, e->Iex.Unop.arg, IEndianess );
4072
4073          argiregs |= ( 1 << (argreg+3 ) );
4074          addInstr( env, mk_iMOVds_RR( argregs[argreg++], tmpHi ) );
4075
4076          argiregs |= ( 1 << (argreg+3 ) );
4077          addInstr( env, mk_iMOVds_RR( argregs[argreg], tmpLo ) );
4078
4079          cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
4080
4081          if (IEndianess == Iend_LE) {
4082              addInstr( env, PPCInstr_Call( cc, (Addr)h_calc_BCDtoDPB,
4083                                            argiregs,
4084                                            mk_RetLoc_simple(RLPri_2Int) ) );
4085          } else {
4086              Addr64 target;
4087              target = mode64 ? (Addr)h_calc_BCDtoDPB :
4088                toUInt( (Addr)h_calc_BCDtoDPB );
4089              addInstr( env, PPCInstr_Call( cc, target,
4090                                            argiregs,
4091                                            mk_RetLoc_simple(RLPri_2Int) ) );
4092          }
4093
4094          addInstr( env, mk_iMOVds_RR( tHi, argregs[argreg-1] ) );
4095          addInstr( env, mk_iMOVds_RR( tLo, argregs[argreg] ) );
4096
4097          *rHi = tHi;
4098          *rLo = tLo;
4099          return;
4100       }
4101
4102       case Iop_DPBtoBCD: {
4103          PPCCondCode cc;
4104          UInt        argiregs;
4105          HReg        argregs[2];
4106          Int         argreg;
4107          HReg        tLo = newVRegI(env);
4108          HReg        tHi = newVRegI(env);
4109          HReg        tmpHi;
4110          HReg        tmpLo;
4111          Bool        mode64 = env->mode64;
4112
4113          argregs[0] = hregPPC_GPR3(mode64);
4114          argregs[1] = hregPPC_GPR4(mode64);
4115
4116          argiregs = 0;
4117          argreg = 0;
4118
4119          iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg, IEndianess);
4120
4121          argiregs |= (1 << (argreg+3));
4122          addInstr(env, mk_iMOVds_RR( argregs[argreg++], tmpHi ));
4123
4124          argiregs |= (1 << (argreg+3));
4125          addInstr(env, mk_iMOVds_RR( argregs[argreg], tmpLo));
4126
4127          cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
4128
4129          if (IEndianess == Iend_LE) {
4130              addInstr(env, PPCInstr_Call( cc, (Addr)h_calc_DPBtoBCD,
4131                                           argiregs,
4132                                           mk_RetLoc_simple(RLPri_2Int) ) );
4133          } else {
4134              Addr64 target;
4135              target = mode64 ? (Addr)h_calc_DPBtoBCD :
4136                toUInt( (Addr)h_calc_DPBtoBCD );
4137              addInstr(env, PPCInstr_Call( cc, target, argiregs,
4138                                           mk_RetLoc_simple(RLPri_2Int) ) );
4139          }
4140
4141          addInstr(env, mk_iMOVds_RR(tHi, argregs[argreg-1]));
4142          addInstr(env, mk_iMOVds_RR(tLo, argregs[argreg]));
4143
4144          *rHi = tHi;
4145          *rLo = tLo;
4146          return;
4147       }
4148
4149       default:
4150          break;
4151       }
4152    } /* if (e->tag == Iex_Unop) */
4153
4154    vex_printf("iselInt64Expr(ppc): No such tag(%u)\n", e->tag);
4155    ppIRExpr(e);
4156    vpanic("iselInt64Expr(ppc)");
4157 }
4158
4159
4160 /*---------------------------------------------------------*/
4161 /*--- ISEL: Floating point expressions (32 bit)         ---*/
4162 /*---------------------------------------------------------*/
4163
4164 /* Nothing interesting here; really just wrappers for
4165    64-bit stuff. */
4166
4167 static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e, IREndness IEndianess )
4168 {
4169   HReg r = iselFltExpr_wrk( env, e, IEndianess );
4170 #  if 0
4171    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4172 #  endif
4173    vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
4174    vassert(hregIsVirtual(r));
4175    return r;
4176 }
4177
4178 /* DO NOT CALL THIS DIRECTLY */
4179 static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e,
4180                               IREndness IEndianess )
4181 {
4182    Bool        mode64 = env->mode64;
4183
4184    IRType ty = typeOfIRExpr(env->type_env,e);
4185    vassert(ty == Ity_F32);
4186
4187    if (e->tag == Iex_RdTmp) {
4188       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
4189    }
4190
4191    if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
4192       PPCAMode* am_addr;
4193       HReg r_dst = newVRegF(env);
4194       vassert(e->Iex.Load.ty == Ity_F32);
4195       am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_F32/*xfer*/,
4196                                    IEndianess);
4197       addInstr(env, PPCInstr_FpLdSt(True/*load*/, 4, r_dst, am_addr));
4198       return r_dst;
4199    }
4200
4201    if (e->tag == Iex_Get) {
4202       HReg r_dst = newVRegF(env);
4203       PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
4204                                        GuestStatePtr(env->mode64) );
4205       addInstr(env, PPCInstr_FpLdSt( True/*load*/, 4, r_dst, am_addr ));
4206       return r_dst;
4207    }
4208
4209    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_TruncF64asF32) {
4210       /* This is quite subtle.  The only way to do the relevant
4211          truncation is to do a single-precision store and then a
4212          double precision load to get it back into a register.  The
4213          problem is, if the data is then written to memory a second
4214          time, as in
4215
4216             STbe(...) = TruncF64asF32(...)
4217
4218          then will the second truncation further alter the value?  The
4219          answer is no: flds (as generated here) followed by fsts
4220          (generated for the STbe) is the identity function on 32-bit
4221          floats, so we are safe.
4222
4223          Another upshot of this is that if iselStmt can see the
4224          entirety of
4225
4226             STbe(...) = TruncF64asF32(arg)
4227
4228          then it can short circuit having to deal with TruncF64asF32
4229          individually; instead just compute arg into a 64-bit FP
4230          register and do 'fsts' (since that itself does the
4231          truncation).
4232
4233          We generate pretty poor code here (should be ok both for
4234          32-bit and 64-bit mode); but it is expected that for the most
4235          part the latter optimisation will apply and hence this code
4236          will not often be used.
4237       */
4238       HReg      fsrc    = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
4239       HReg      fdst    = newVRegF(env);
4240       PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4241
4242       sub_from_sp( env, 16 );
4243       // store as F32, hence truncating
4244       addInstr(env, PPCInstr_FpLdSt( False/*store*/, 4,
4245                                      fsrc, zero_r1 ));
4246       // and reload.  Good huh?! (sigh)
4247       addInstr(env, PPCInstr_FpLdSt( True/*load*/, 4,
4248                                      fdst, zero_r1 ));
4249       add_to_sp( env, 16 );
4250       return fdst;
4251    }
4252
4253    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64UtoF32) {
4254       if (mode64) {
4255          HReg fdst = newVRegF(env);
4256          HReg isrc = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
4257          HReg r1   = StackFramePtr(env->mode64);
4258          PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
4259
4260          /* Set host rounding mode */
4261          set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4262
4263          sub_from_sp( env, 16 );
4264
4265          addInstr(env, PPCInstr_Store(8, zero_r1, isrc, True/*mode64*/));
4266          addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
4267          addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
4268                                        False, False,
4269                                        fdst, fdst));
4270
4271          add_to_sp( env, 16 );
4272
4273          ///* Restore default FPU rounding. */
4274          //set_FPU_rounding_default( env );
4275          return fdst;
4276       } else {
4277          /* 32-bit mode */
4278          HReg fdst = newVRegF(env);
4279          HReg isrcHi, isrcLo;
4280          HReg r1   = StackFramePtr(env->mode64);
4281          PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
4282          PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
4283
4284          iselInt64Expr(&isrcHi, &isrcLo, env, e->Iex.Binop.arg2, IEndianess);
4285
4286          /* Set host rounding mode */
4287          set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4288
4289          sub_from_sp( env, 16 );
4290
4291          addInstr(env, PPCInstr_Store(4, zero_r1, isrcHi, False/*mode32*/));
4292          addInstr(env, PPCInstr_Store(4, four_r1, isrcLo, False/*mode32*/));
4293          addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
4294          addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
4295                                        False, False,
4296                                        fdst, fdst));
4297
4298          add_to_sp( env, 16 );
4299
4300          ///* Restore default FPU rounding. */
4301          //set_FPU_rounding_default( env );
4302          return fdst;
4303       }
4304
4305    }
4306
4307    vex_printf("iselFltExpr(ppc): No such tag(%u)\n", e->tag);
4308    ppIRExpr(e);
4309    vpanic("iselFltExpr_wrk(ppc)");
4310 }
4311
4312
4313 /*---------------------------------------------------------*/
4314 /*--- ISEL: Floating point expressions (64 bit)         ---*/
4315 /*---------------------------------------------------------*/
4316
4317 /* Compute a 64-bit floating point value into a register, the identity
4318    of which is returned.  As with iselIntExpr_R, the reg may be either
4319    real or virtual; in any case it must not be changed by subsequent
4320    code emitted by the caller.  */
4321
4322 /* IEEE 754 formats.  From http://www.freesoft.org/CIE/RFC/1832/32.htm:
4323
4324     Type                  S (1 bit)   E (11 bits)   F (52 bits)
4325     ----                  ---------   -----------   -----------
4326     signalling NaN        u           2047 (max)    .0uuuuu---u
4327                                                     (with at least
4328                                                      one 1 bit)
4329     quiet NaN             u           2047 (max)    .1uuuuu---u
4330
4331     negative infinity     1           2047 (max)    .000000---0
4332
4333     positive infinity     0           2047 (max)    .000000---0
4334
4335     negative zero         1           0             .000000---0
4336
4337     positive zero         0           0             .000000---0
4338 */
4339
4340 static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e, IREndness IEndianess )
4341 {
4342    HReg r = iselDblExpr_wrk( env, e, IEndianess );
4343 #  if 0
4344    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4345 #  endif
4346    vassert(hregClass(r) == HRcFlt64);
4347    vassert(hregIsVirtual(r));
4348    return r;
4349 }
4350
4351 /* DO NOT CALL THIS DIRECTLY */
4352 static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e,
4353                               IREndness IEndianess )
4354 {
4355    Bool mode64 = env->mode64;
4356    IRType ty = typeOfIRExpr(env->type_env,e);
4357    vassert(e);
4358    vassert(ty == Ity_F64);
4359
4360    if (e->tag == Iex_RdTmp) {
4361       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
4362    }
4363
4364    /* --------- LITERAL --------- */
4365    if (e->tag == Iex_Const) {
4366       union { UInt u32x2[2]; ULong u64; Double f64; } u;
4367       vassert(sizeof(u) == 8);
4368       vassert(sizeof(u.u64) == 8);
4369       vassert(sizeof(u.f64) == 8);
4370       vassert(sizeof(u.u32x2) == 8);
4371
4372       if (e->Iex.Const.con->tag == Ico_F64) {
4373          u.f64 = e->Iex.Const.con->Ico.F64;
4374       }
4375       else if (e->Iex.Const.con->tag == Ico_F64i) {
4376          u.u64 = e->Iex.Const.con->Ico.F64i;
4377       }
4378       else
4379          vpanic("iselDblExpr(ppc): const");
4380
4381       if (!mode64) {
4382          HReg r_srcHi = newVRegI(env);
4383          HReg r_srcLo = newVRegI(env);
4384          addInstr(env, PPCInstr_LI(r_srcHi, u.u32x2[0], mode64));
4385          addInstr(env, PPCInstr_LI(r_srcLo, u.u32x2[1], mode64));
4386          return mk_LoadRR32toFPR( env, r_srcHi, r_srcLo );
4387       } else { // mode64
4388          HReg r_src = newVRegI(env);
4389          addInstr(env, PPCInstr_LI(r_src, u.u64, mode64));
4390          return mk_LoadR64toFPR( env, r_src );         // 1*I64 -> F64
4391       }
4392    }
4393
4394    /* --------- LOAD --------- */
4395    if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
4396       HReg r_dst = newVRegF(env);
4397       PPCAMode* am_addr;
4398       vassert(e->Iex.Load.ty == Ity_F64);
4399       am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_F64/*xfer*/,
4400                                    IEndianess);
4401       addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dst, am_addr));
4402       return r_dst;
4403    }
4404
4405    /* --------- GET --------- */
4406    if (e->tag == Iex_Get) {
4407       HReg r_dst = newVRegF(env);
4408       PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
4409                                        GuestStatePtr(mode64) );
4410       addInstr(env, PPCInstr_FpLdSt( True/*load*/, 8, r_dst, am_addr ));
4411       return r_dst;
4412    }
4413
4414    /* --------- OPS --------- */
4415    if (e->tag == Iex_Qop) {
4416       PPCFpOp fpop = Pfp_INVALID;
4417       switch (e->Iex.Qop.details->op) {
4418          case Iop_MAddF64:    fpop = Pfp_MADDD; break;
4419          case Iop_MAddF64r32: fpop = Pfp_MADDS; break;
4420          case Iop_MSubF64:    fpop = Pfp_MSUBD; break;
4421          case Iop_MSubF64r32: fpop = Pfp_MSUBS; break;
4422          default: break;
4423       }
4424       if (fpop != Pfp_INVALID) {
4425          HReg r_dst  = newVRegF(env);
4426          HReg r_srcML  = iselDblExpr(env, e->Iex.Qop.details->arg2,
4427                                      IEndianess);
4428          HReg r_srcMR  = iselDblExpr(env, e->Iex.Qop.details->arg3,
4429                                      IEndianess);
4430          HReg r_srcAcc = iselDblExpr(env, e->Iex.Qop.details->arg4,
4431                                      IEndianess);
4432          set_FPU_rounding_mode( env, e->Iex.Qop.details->arg1, IEndianess );
4433          addInstr(env, PPCInstr_FpMulAcc(fpop, r_dst,
4434                                                r_srcML, r_srcMR, r_srcAcc));
4435          return r_dst;
4436       }
4437    }
4438
4439    if (e->tag == Iex_Triop) {
4440       IRTriop *triop = e->Iex.Triop.details;
4441       PPCFpOp fpop = Pfp_INVALID;
4442       switch (triop->op) {
4443          case Iop_AddF64:    fpop = Pfp_ADDD; break;
4444          case Iop_SubF64:    fpop = Pfp_SUBD; break;
4445          case Iop_MulF64:    fpop = Pfp_MULD; break;
4446          case Iop_DivF64:    fpop = Pfp_DIVD; break;
4447          case Iop_AddF64r32: fpop = Pfp_ADDS; break;
4448          case Iop_SubF64r32: fpop = Pfp_SUBS; break;
4449          case Iop_MulF64r32: fpop = Pfp_MULS; break;
4450          case Iop_DivF64r32: fpop = Pfp_DIVS; break;
4451          default: break;
4452       }
4453       if (fpop != Pfp_INVALID) {
4454          HReg r_dst  = newVRegF(env);
4455          HReg r_srcL = iselDblExpr(env, triop->arg2, IEndianess);
4456          HReg r_srcR = iselDblExpr(env, triop->arg3, IEndianess);
4457          set_FPU_rounding_mode( env, triop->arg1, IEndianess );
4458          addInstr(env, PPCInstr_FpBinary(fpop, r_dst, r_srcL, r_srcR));
4459          return r_dst;
4460       }
4461    }
4462
4463    if (e->tag == Iex_Binop) {
4464       PPCFpOp fpop = Pfp_INVALID;
4465       switch (e->Iex.Binop.op) {
4466       case Iop_SqrtF64:   fpop = Pfp_SQRT;   break;
4467       default: break;
4468       }
4469       if (fpop == Pfp_SQRT) {
4470          HReg fr_dst = newVRegF(env);
4471          HReg fr_src = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
4472          set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4473          addInstr(env, PPCInstr_FpUnary(fpop, fr_dst, fr_src));
4474          return fr_dst;
4475       }
4476    }
4477
4478    if (e->tag == Iex_Binop) {
4479
4480       if (e->Iex.Binop.op == Iop_F128toF64) {
4481          HReg fr_dst = newVRegF(env);
4482          HReg fr_src = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
4483          HReg tmp = newVRegV(env);
4484          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4485          PPCAMode* eight_r1 = PPCAMode_IR( 8, StackFramePtr(env->mode64) );
4486          PPCFpOp fpop = Pfp_INVALID;
4487
4488          if (FPU_rounding_mode_isOdd(e->Iex.Binop.arg1)) {
4489             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4490             fpop = Pfp_FPQTODRNDODD;
4491          } else {
4492             set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4493             fpop = Pfp_FPQTOD;
4494          }
4495
4496          addInstr(env, PPCInstr_Fp128Unary(fpop, tmp, fr_src));
4497
4498          /* result is in a 128-bit vector register, move to 64-bit reg to
4499           * match the Iop specification.  The result will get moved back
4500           * to a 128-bit register and stored once the value is returned.
4501           */
4502          sub_from_sp( env, 16 );
4503          addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, tmp, zero_r1));
4504          if (IEndianess == Iend_LE)
4505             addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, eight_r1));
4506          else
4507             /* High 64-bits stored at lower address */
4508             addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, zero_r1));
4509
4510          add_to_sp( env, 16 );
4511
4512          return fr_dst;
4513       }
4514
4515       if (e->Iex.Binop.op == Iop_RoundF64toF32) {
4516          HReg r_dst = newVRegF(env);
4517          HReg r_src = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
4518          set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4519          addInstr(env, PPCInstr_FpRSP(r_dst, r_src));
4520          //set_FPU_rounding_default( env );
4521          return r_dst;
4522       }
4523
4524       if (e->Iex.Binop.op == Iop_I64StoF64 || e->Iex.Binop.op == Iop_I64UtoF64) {
4525          if (mode64) {
4526             HReg fdst = newVRegF(env);
4527             HReg isrc = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
4528             HReg r1   = StackFramePtr(env->mode64);
4529             PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
4530
4531             /* Set host rounding mode */
4532             set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4533
4534             sub_from_sp( env, 16 );
4535
4536             addInstr(env, PPCInstr_Store(8, zero_r1, isrc, True/*mode64*/));
4537             addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
4538             addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
4539                                           e->Iex.Binop.op == Iop_I64StoF64,
4540                                           True/*fdst is 64 bit*/,
4541                                           fdst, fdst));
4542
4543             add_to_sp( env, 16 );
4544
4545             ///* Restore default FPU rounding. */
4546             //set_FPU_rounding_default( env );
4547             return fdst;
4548          } else {
4549             /* 32-bit mode */
4550             HReg fdst = newVRegF(env);
4551             HReg isrcHi, isrcLo;
4552             HReg r1   = StackFramePtr(env->mode64);
4553             PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
4554             PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
4555
4556             iselInt64Expr(&isrcHi, &isrcLo, env, e->Iex.Binop.arg2,
4557                           IEndianess);
4558
4559             /* Set host rounding mode */
4560             set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4561
4562             sub_from_sp( env, 16 );
4563
4564             addInstr(env, PPCInstr_Store(4, zero_r1, isrcHi, False/*mode32*/));
4565             addInstr(env, PPCInstr_Store(4, four_r1, isrcLo, False/*mode32*/));
4566             addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
4567             addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
4568                                           e->Iex.Binop.op == Iop_I64StoF64,
4569                                           True/*fdst is 64 bit*/,
4570                                           fdst, fdst));
4571
4572             add_to_sp( env, 16 );
4573
4574             ///* Restore default FPU rounding. */
4575             //set_FPU_rounding_default( env );
4576             return fdst;
4577          }
4578       }
4579
4580    }
4581
4582    if (e->tag == Iex_Unop) {
4583       PPCFpOp fpop = Pfp_INVALID;
4584       switch (e->Iex.Unop.op) {
4585          case Iop_NegF64:     fpop = Pfp_NEG; break;
4586          case Iop_AbsF64:     fpop = Pfp_ABS; break;
4587          case Iop_RSqrtEst5GoodF64:      fpop = Pfp_RSQRTE; break;
4588          case Iop_RoundF64toF64_NegINF:  fpop = Pfp_FRIM; break;
4589          case Iop_RoundF64toF64_PosINF:  fpop = Pfp_FRIP; break;
4590          case Iop_RoundF64toF64_NEAREST: fpop = Pfp_FRIN; break;
4591          case Iop_RoundF64toF64_ZERO:    fpop = Pfp_FRIZ; break;
4592          default: break;
4593       }
4594       if (fpop != Pfp_INVALID) {
4595          HReg fr_dst = newVRegF(env);
4596          HReg fr_src = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
4597          addInstr(env, PPCInstr_FpUnary(fpop, fr_dst, fr_src));
4598          return fr_dst;
4599       }
4600    }
4601
4602    if (e->tag == Iex_Unop) {
4603       switch (e->Iex.Unop.op) {
4604       case Iop_F128HItoF64:
4605       case Iop_F128LOtoF64:
4606          {
4607             /* put upper/lower 64-bits of F128 into an F64. */
4608             HReg     r_aligned16;
4609             HReg     fdst = newVRegF(env);
4610             HReg     fsrc = iselFp128Expr(env, e->Iex.Unop.arg, IEndianess);
4611             PPCAMode *am_off0, *am_off8, *am_off_arg;
4612             sub_from_sp( env, 32 );     // Move SP down 32 bytes
4613
4614             // get a quadword aligned address within our stack space
4615             r_aligned16 = get_sp_aligned16( env );
4616             am_off0 = PPCAMode_IR( 0, r_aligned16 );
4617             am_off8 = PPCAMode_IR( 8 ,r_aligned16 );
4618
4619             /* store 128-bit floating point value to memory, load low word
4620              * or high to 64-bit destination floating point register
4621              */
4622             addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, fsrc, am_off0));
4623             if (IEndianess == Iend_LE) {
4624                if (e->Iex.Binop.op == Iop_F128HItoF64)
4625                   am_off_arg = am_off8;
4626                else
4627                   am_off_arg = am_off0;
4628             } else {
4629                if (e->Iex.Binop.op == Iop_F128HItoF64)
4630                   am_off_arg = am_off0;
4631                else
4632                   am_off_arg = am_off8;
4633             }
4634             addInstr(env,
4635                     PPCInstr_FpLdSt( True /*load*/,
4636                                       8, fdst,
4637                                       am_off_arg ));
4638             add_to_sp( env, 32 );       // Reset SP
4639             return fdst;
4640          }
4641          case Iop_ReinterpI64asF64: {
4642             /* Given an I64, produce an IEEE754 double with the same
4643                bit pattern. */
4644             if (!mode64) {
4645                HReg r_srcHi, r_srcLo;
4646                iselInt64Expr( &r_srcHi, &r_srcLo, env, e->Iex.Unop.arg,
4647                                IEndianess);
4648                return mk_LoadRR32toFPR( env, r_srcHi, r_srcLo );
4649             } else {
4650                HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
4651                return mk_LoadR64toFPR( env, r_src );
4652             }
4653          }
4654
4655          case Iop_F32toF64: {
4656             if (e->Iex.Unop.arg->tag == Iex_Unop &&
4657                      e->Iex.Unop.arg->Iex.Unop.op == Iop_ReinterpI32asF32 ) {
4658                e = e->Iex.Unop.arg;
4659
4660                HReg src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
4661                HReg fr_dst = newVRegF(env);
4662                PPCAMode *am_addr;
4663
4664                sub_from_sp( env, 16 );        // Move SP down 16 bytes
4665                am_addr = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4666
4667                // store src as Ity_I32's
4668                addInstr(env, PPCInstr_Store( 4, am_addr, src, env->mode64 ));
4669
4670                // load single precision float, but the end results loads into a
4671                // 64-bit FP register -- i.e., F64.
4672                addInstr(env, PPCInstr_FpLdSt(True/*load*/, 4, fr_dst, am_addr));
4673
4674                add_to_sp( env, 16 );          // Reset SP
4675                return fr_dst;
4676             }
4677
4678
4679             /* this is a no-op */
4680             HReg res = iselFltExpr(env, e->Iex.Unop.arg, IEndianess);
4681             return res;
4682          }
4683          default:
4684             break;
4685       }
4686    }
4687
4688    /* --------- MULTIPLEX --------- */
4689    if (e->tag == Iex_ITE) { // VFD
4690       if (ty == Ity_F64
4691           && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
4692          HReg fr1    = iselDblExpr(env, e->Iex.ITE.iftrue, IEndianess);
4693          HReg fr0    = iselDblExpr(env, e->Iex.ITE.iffalse, IEndianess);
4694          HReg fr_dst = newVRegF(env);
4695          addInstr(env, PPCInstr_FpUnary( Pfp_MOV, fr_dst, fr0 ));
4696          PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
4697          addInstr(env, PPCInstr_FpCMov( cc, fr_dst, fr1 ));
4698          return fr_dst;
4699       }
4700    }
4701
4702    vex_printf("iselDblExpr(ppc): No such tag(%u)\n", e->tag);
4703    ppIRExpr(e);
4704    vpanic("iselDblExpr_wrk(ppc)");
4705 }
4706
4707 static HReg iselDfp32Expr(ISelEnv* env, const IRExpr* e, IREndness IEndianess)
4708 {
4709    HReg r = iselDfp32Expr_wrk( env, e, IEndianess );
4710    vassert(hregClass(r) == HRcFlt64);
4711    vassert( hregIsVirtual(r) );
4712    return r;
4713 }
4714
4715 /* DO NOT CALL THIS DIRECTLY */
4716 static HReg iselDfp32Expr_wrk(ISelEnv* env, const IRExpr* e,
4717                               IREndness IEndianess)
4718 {
4719    Bool mode64 = env->mode64;
4720    IRType ty = typeOfIRExpr( env->type_env, e );
4721
4722    vassert( e );
4723    vassert( ty == Ity_D32 );
4724
4725    /* --------- GET --------- */
4726    if (e->tag == Iex_Get) {
4727       HReg r_dst = newVRegF( env );
4728       PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
4729                                        GuestStatePtr(mode64) );
4730       addInstr( env, PPCInstr_FpLdSt( True/*load*/, 8, r_dst, am_addr ) );
4731       return r_dst;
4732    }
4733
4734    /* --------- LOAD --------- */
4735    if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
4736       PPCAMode* am_addr;
4737       HReg r_dst = newVRegF(env);
4738       vassert(e->Iex.Load.ty == Ity_D32);
4739       am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_D32/*xfer*/,
4740                                    IEndianess);
4741       addInstr(env, PPCInstr_FpLdSt(True/*load*/, 4, r_dst, am_addr));
4742       return r_dst;
4743    }
4744
4745    /* --------- OPS --------- */
4746    if (e->tag == Iex_Binop) {
4747       if (e->Iex.Binop.op == Iop_D64toD32) {
4748          HReg fr_dst = newVRegF(env);
4749          HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
4750          set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4751          addInstr(env, PPCInstr_Dfp64Unary(Pfp_DRSP, fr_dst, fr_src));
4752          return fr_dst;
4753       }
4754    }
4755
4756    ppIRExpr( e );
4757    vpanic( "iselDfp32Expr_wrk(ppc)" );
4758 }
4759
4760 static HReg iselFp128Expr( ISelEnv* env, const IRExpr* e, IREndness IEndianess )
4761 {
4762    HReg r = iselFp128Expr_wrk( env, e, IEndianess );
4763    vassert(hregClass(r) == HRcVec128);
4764    vassert(hregIsVirtual(r));
4765    return r;
4766 }
4767
4768 /* DO NOT CALL THIS DIRECTLY */
4769 static HReg iselFp128Expr_wrk( ISelEnv* env, const IRExpr* e,
4770                                IREndness IEndianess)
4771 {
4772    Bool mode64 = env->mode64;
4773    PPCFpOp fpop = Pfp_INVALID;
4774    IRType  ty = typeOfIRExpr(env->type_env,e);
4775
4776    vassert(e);
4777    vassert( ty == Ity_F128 );
4778
4779    /* read 128-bit IRTemp */
4780    if (e->tag == Iex_RdTmp) {
4781       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
4782    }
4783
4784   if (e->tag == Iex_Get) {
4785       /* Guest state vectors are 16byte aligned,
4786          so don't need to worry here */
4787       HReg dst = newVRegV(env);
4788
4789       addInstr(env,
4790                PPCInstr_AvLdSt( True/*load*/, 16, dst,
4791                                 PPCAMode_IR( e->Iex.Get.offset,
4792                                              GuestStatePtr(mode64) )));
4793       return dst;
4794    }
4795
4796    if (e->tag == Iex_Unop) {
4797       switch (e->Iex.Unop.op) {
4798       case Iop_TruncF128toI64S:
4799          fpop = Pfp_TRUNCFPQTOISD; goto do_Un_F128;
4800       case Iop_TruncF128toI32S:
4801          fpop = Pfp_TRUNCFPQTOISW; goto do_Un_F128;
4802       case Iop_TruncF128toI64U:
4803          fpop = Pfp_TRUNCFPQTOIUD; goto do_Un_F128;
4804       case Iop_TruncF128toI32U:
4805          fpop = Pfp_TRUNCFPQTOIUW; goto do_Un_F128;
4806       case Iop_TruncF128toI128U:
4807          fpop = Pfp_TRUNCFPQTOIUQ; goto do_Un_F128;
4808       case Iop_TruncF128toI128S:
4809          fpop = Pfp_TRUNCFPQTOISQ; goto do_Un_F128;
4810
4811       do_Un_F128: {
4812          HReg r_dst = newVRegV(env);
4813          HReg r_src = iselFp128Expr(env, e->Iex.Unop.arg, IEndianess);
4814          addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, r_src));
4815          return r_dst;
4816       }
4817
4818       case Iop_F64toF128: {
4819          fpop = Pfp_FPDTOQ;
4820          HReg r_dst = newVRegV(env);
4821          HReg r_src = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
4822          HReg v128tmp = newVRegV(env);
4823          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4824
4825          /* value is in 64-bit float reg, need to move to 128-bit vector reg */
4826          sub_from_sp( env, 16 );
4827          addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, r_src, zero_r1));
4828          addInstr(env, PPCInstr_AvLdSt(True/*load*/, 16, v128tmp, zero_r1));
4829          add_to_sp( env, 16 );
4830
4831          addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, v128tmp));
4832          return r_dst;
4833       }
4834
4835       case Iop_I64StoF128:
4836          fpop = Pfp_IDSTOQ; goto do_Un_int_F128;
4837       case Iop_I64UtoF128:
4838          fpop = Pfp_IDUTOQ; goto do_Un_int_F128;
4839
4840       do_Un_int_F128: {
4841          HReg r_dst = newVRegV(env);
4842          HReg tmp = newVRegV(env);
4843          HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
4844          PPCAMode *am_offhi, *am_offlo;
4845          HReg r_aligned16;
4846
4847          /* source is in a 64-bit integer reg, move to 128-bit float reg
4848           * do this via the stack (easy, convenient, etc).
4849           */
4850          sub_from_sp( env, 32 );        // Move SP down
4851
4852          /* Get a quadword aligned address within our stack space */
4853          r_aligned16 = get_sp_aligned16( env );
4854
4855          am_offlo  = PPCAMode_IR( 0,  r_aligned16 );
4856          am_offhi  = PPCAMode_IR( 8,  r_aligned16 );
4857
4858          /* Inst only uses the upper 64-bit of the source */
4859          addInstr(env, PPCInstr_Load(8, r_src, am_offhi, mode64));
4860
4861          /* Fetch result back from stack. */
4862          addInstr(env, PPCInstr_AvLdSt(True/*load*/, 16, tmp, am_offlo));
4863
4864          add_to_sp( env, 32 );          // Reset SP
4865
4866          addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, tmp));
4867          return r_dst;
4868       }
4869
4870       case Iop_ReinterpI128asF128:
4871       {
4872          PPCAMode* am_addr;
4873          PPCAMode* am_addr4;
4874          HReg rHi = INVALID_HREG;
4875          HReg rLo = INVALID_HREG;
4876          HReg dst  = newVRegV(env);
4877
4878          iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
4879
4880          sub_from_sp( env, 16 );     // Move SP down 16 bytes
4881          am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
4882          am_addr4 = advance4(env, am_addr);
4883
4884          // store the two 64-bit pars
4885          addInstr(env, PPCInstr_Store( 8, am_addr,  rHi, mode64 ));
4886          addInstr(env, PPCInstr_Store( 8, am_addr4, rLo, mode64 ));
4887
4888          // load as Ity_F128
4889          addInstr(env, PPCInstr_AvLdSt( True/*fetch*/, 16, dst, am_addr ));
4890
4891          add_to_sp( env, 16 );       // Reset SP
4892          return dst;
4893       }
4894
4895       default:
4896          break;
4897       } /* switch (e->Iex.Unop.op) */
4898    } /* if (e->tag == Iex_Unop) */
4899
4900    if (e->tag == Iex_Binop) {
4901       switch (e->Iex.Binop.op) {
4902
4903       case Iop_F64HLtoF128:
4904          {
4905             HReg dst    = newVRegV(env);
4906             HReg r_src_hi = iselDblExpr(env, e->Iex.Binop.arg1, IEndianess);
4907             HReg r_src_lo = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
4908             PPCAMode *am_offhi, *am_offlo;
4909             HReg r_aligned16;
4910
4911             /* do this via the stack (easy, convenient, etc) */
4912             sub_from_sp( env, 16 );        // Move SP down
4913
4914             /* Get a quadword aligned address within our stack space */
4915             r_aligned16 = get_sp_aligned16( env );
4916
4917             am_offlo  = PPCAMode_IR( 0,  r_aligned16 );
4918             am_offhi  = PPCAMode_IR( 8,  r_aligned16 );
4919
4920             addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8,
4921                                           r_src_lo, am_offlo));
4922             addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8,
4923                                           r_src_hi, am_offhi));
4924
4925             /* Fetch result back from stack. */
4926             addInstr(env, PPCInstr_AvLdSt(True/*load*/, 16,
4927                                           dst, am_offlo));
4928
4929             add_to_sp( env, 16 );          // Reset SP
4930             return dst;
4931          }
4932       case Iop_F128toI128S:
4933          {
4934             HReg dst    = newVRegV(env);
4935             HReg r_src  = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
4936             PPCRI* rm = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
4937             /* Note: rm is a set of three bit fields that specify the
4938              * rounding mode and which of the two instructions to issue.
4939              */
4940             addInstr(env, PPCInstr_AvBinaryInt(Pav_F128toI128S, dst,
4941                                                r_src, rm));
4942             return dst;
4943          }
4944       case Iop_RndF128:
4945          {
4946             HReg dst    = newVRegV(env);
4947             HReg r_src  = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
4948             PPCRI* rm = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
4949             /* Note: rm is a set of three bit fields that specify the
4950              * rounding mode and which of the two instructions to issue.
4951              */
4952             addInstr(env, PPCInstr_AvBinaryInt(Pav_ROUNDFPQ, dst,
4953                                                r_src, rm));
4954             return dst;
4955          }
4956       case Iop_SqrtF128:
4957          if (FPU_rounding_mode_isOdd(e->Iex.Binop.arg1)) {
4958             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4959             fpop = Pfp_FPSQRTQRNDODD;
4960             goto do_Bin_F128;
4961          } else {
4962             set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4963             fpop = Pfp_FPSQRTQ;
4964             goto do_Bin_F128;
4965          }
4966       case Iop_F128toF32:
4967          if (FPU_rounding_mode_isOdd(e->Iex.Binop.arg1)) {
4968             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4969             fpop = Pfp_FPQTOWRNDODD;
4970             goto do_Bin_F128;
4971          } else {
4972             set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4973             fpop = Pfp_FPQTOW;
4974             goto do_Bin_F128;
4975          }
4976       do_Bin_F128: {
4977          HReg r_dst = newVRegV(env);
4978          HReg r_src = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
4979          addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, r_src));
4980          return r_dst;
4981       }
4982
4983       case Iop_I128StoF128:
4984          fpop = Pfp_IQSTOQ; goto do_Un_I128_F128_DFP_conversions;
4985       case Iop_I128UtoF128:
4986          fpop = Pfp_IQUTOQ; goto do_Un_I128_F128_DFP_conversions;
4987       do_Un_I128_F128_DFP_conversions: {
4988          PPCAMode* am_addr;
4989          PPCAMode* am_addr4;
4990          HReg rHi, rLo;
4991          HReg r_tmp = newVRegV(env);
4992          HReg r_dst = newVRegV(env);
4993
4994          iselInt128Expr(&rHi,&rLo, env, e->Iex.Binop.arg2, IEndianess);
4995
4996          /* Set host rounding mode for the conversion instruction */
4997          set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4998
4999          sub_from_sp( env, 16 );
5000
5001          am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
5002          am_addr4 = advance4(env, am_addr);
5003
5004          // store the two 64-bit halfs of the I128
5005          addInstr(env, PPCInstr_Store( 8, am_addr,  rHi, mode64 ));
5006          addInstr(env, PPCInstr_Store( 8, am_addr4, rLo, mode64 ));
5007
5008          /* Fetch the I128 into an V128 register */
5009          addInstr(env, PPCInstr_AvLdSt( True/*fetch*/, 16, r_tmp, am_addr ));
5010          addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, r_tmp));
5011
5012          add_to_sp( env, 16 );       // Reset SP
5013
5014          return r_dst;
5015       }
5016
5017       default:
5018          break;
5019       } /* switch (e->Iex.Binop.op) */
5020    } /* if (e->tag == Iex_Binop) */
5021
5022    if (e->tag == Iex_Triop) {
5023       IRTriop *triop = e->Iex.Triop.details;
5024
5025       switch (triop->op) {
5026       case Iop_AddF128:
5027          if (FPU_rounding_mode_isOdd(triop->arg1)) {
5028             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5029             fpop = Pfp_FPADDQRNDODD; goto do_Tri_F128;
5030          } else {
5031             set_FPU_rounding_mode( env, triop->arg1, IEndianess );
5032             fpop = Pfp_FPADDQ; goto do_Tri_F128;
5033          }
5034       case Iop_SubF128:
5035          if (FPU_rounding_mode_isOdd(triop->arg1)) {
5036             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5037             fpop = Pfp_FPSUBQRNDODD; goto do_Tri_F128;
5038          } else {
5039             set_FPU_rounding_mode( env, triop->arg1, IEndianess );
5040             fpop = Pfp_FPSUBQ; goto do_Tri_F128;
5041          }
5042       case Iop_MulF128:
5043          if (FPU_rounding_mode_isOdd(triop->arg1)) {
5044             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5045             fpop = Pfp_FPMULQRNDODD; goto do_Tri_F128;
5046          } else {
5047             set_FPU_rounding_mode( env, triop->arg1, IEndianess );
5048             fpop = Pfp_FPMULQ; goto do_Tri_F128;
5049          }
5050       case Iop_DivF128:
5051          if (FPU_rounding_mode_isOdd(triop->arg1)) {
5052             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5053             fpop = Pfp_FPDIVQRNDODD; goto do_Tri_F128;
5054          } else {
5055             set_FPU_rounding_mode( env, triop->arg1, IEndianess );
5056             fpop = Pfp_FPDIVQ; goto do_Tri_F128;
5057          }
5058       case Iop_MAddF128:
5059          if (FPU_rounding_mode_isOdd(triop->arg1)) {
5060             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5061             fpop = Pfp_FPMULADDQRNDODD; goto do_Tri_F128;
5062          } else {
5063             set_FPU_rounding_mode( env, triop->arg1, IEndianess );
5064             fpop = Pfp_FPMULADDQ; goto do_Tri_F128;
5065          }
5066
5067    do_Tri_F128: {
5068          HReg r_dst  = newVRegV(env);
5069          HReg r_srcL = iselFp128Expr(env, triop->arg2, IEndianess);
5070          HReg r_srcR = iselFp128Expr(env, triop->arg3, IEndianess);
5071
5072          addInstr(env, PPCInstr_Fp128Binary(fpop, r_dst, r_srcL, r_srcR));
5073          return r_dst;
5074       }
5075
5076       default:
5077          break;
5078       } /* switch (e->Iex.Triop.op) */
5079
5080    } /* if (e->tag == Iex_Trinop) */
5081
5082    if (e->tag == Iex_Qop) {
5083       IRQop *qop = e->Iex.Qop.details;
5084
5085       switch (qop->op) {
5086       case Iop_MAddF128:
5087          if (FPU_rounding_mode_isOdd(qop->arg1)) {
5088             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5089             fpop = Pfp_FPMULADDQRNDODD; goto do_Quad_F128;
5090          } else {
5091             set_FPU_rounding_mode( env, qop->arg1, IEndianess );
5092             fpop = Pfp_FPMULADDQ; goto do_Quad_F128;
5093          }
5094       case Iop_MSubF128:
5095          if (FPU_rounding_mode_isOdd(qop->arg1)) {
5096             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5097             fpop = Pfp_FPMULSUBQRNDODD; goto do_Quad_F128;
5098          } else {
5099             set_FPU_rounding_mode( env, qop->arg1, IEndianess );
5100             fpop = Pfp_FPMULSUBQ; goto do_Quad_F128;
5101          }
5102       case Iop_NegMAddF128:
5103          if (FPU_rounding_mode_isOdd(qop->arg1)) {
5104             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5105             fpop = Pfp_FPNEGMULADDQRNDODD; goto do_Quad_F128;
5106          } else {
5107             set_FPU_rounding_mode( env, qop->arg1, IEndianess );
5108             fpop = Pfp_FPNEGMULADDQ; goto do_Quad_F128;
5109          }
5110       case Iop_NegMSubF128:
5111          if (FPU_rounding_mode_isOdd(qop->arg1)) {
5112             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5113             fpop = Pfp_FPNEGMULSUBQRNDODD; goto do_Quad_F128;
5114          } else {
5115             set_FPU_rounding_mode( env, qop->arg1, IEndianess );
5116             fpop = Pfp_FPNEGMULSUBQ; goto do_Quad_F128;
5117          }
5118
5119       do_Quad_F128: {
5120          HReg r_dst = iselFp128Expr(env, qop->arg3,
5121                                     IEndianess);
5122          HReg r_srcL = iselFp128Expr(env, qop->arg2,
5123                                      IEndianess);
5124          HReg r_srcR = iselFp128Expr(env, qop->arg4,
5125                                      IEndianess);
5126
5127          addInstr(env, PPCInstr_Fp128Ternary(fpop, r_dst, r_srcL, r_srcR));
5128          return r_dst;
5129          }
5130
5131       default:
5132          break;
5133       }
5134    }   /* if (e->tag == Iex_Qop) */
5135
5136    ppIRExpr( e );
5137    vpanic( "iselFp128Expr(ppc64)" );
5138 }
5139
5140 static HReg iselDfp64Expr(ISelEnv* env, const IRExpr* e, IREndness IEndianess)
5141 {
5142    HReg r = iselDfp64Expr_wrk( env, e, IEndianess );
5143    vassert(hregClass(r) == HRcFlt64);
5144    vassert( hregIsVirtual(r) );
5145    return r;
5146 }
5147
5148 /* DO NOT CALL THIS DIRECTLY */
5149 static HReg iselDfp64Expr_wrk(ISelEnv* env, const IRExpr* e,
5150                               IREndness IEndianess)
5151 {
5152    Bool mode64 = env->mode64;
5153    IRType ty = typeOfIRExpr( env->type_env, e );
5154    HReg r_dstHi, r_dstLo;
5155
5156    vassert( e );
5157    vassert( ty == Ity_D64 );
5158
5159    if (e->tag == Iex_RdTmp) {
5160       return lookupIRTemp( env, e->Iex.RdTmp.tmp );
5161    }
5162
5163    /* --------- GET --------- */
5164    if (e->tag == Iex_Get) {
5165       HReg r_dst = newVRegF( env );
5166       PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
5167                                        GuestStatePtr(mode64) );
5168       addInstr( env, PPCInstr_FpLdSt( True/*load*/, 8, r_dst, am_addr ) );
5169       return r_dst;
5170    }
5171
5172    if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
5173       PPCAMode* am_addr;
5174       HReg r_dst = newVRegF(env);
5175       vassert(e->Iex.Load.ty == Ity_D64);
5176       am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_D64/*xfer*/,
5177                                    IEndianess);
5178       addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dst, am_addr));
5179       return r_dst;
5180    }
5181
5182    /* --------- OPS --------- */
5183    if (e->tag == Iex_Qop) {
5184       HReg r_dst = newVRegF( env );
5185       return r_dst;
5186    }
5187
5188    if (e->tag == Iex_Unop) {
5189       HReg fr_dst = newVRegF(env);
5190       switch (e->Iex.Unop.op) {
5191       case Iop_ReinterpI64asD64: {
5192          /* Given an I64, produce an IEEE754 DFP with the same
5193                bit pattern. */
5194          if (!mode64) {
5195             HReg r_srcHi, r_srcLo;
5196             iselInt64Expr( &r_srcHi, &r_srcLo, env, e->Iex.Unop.arg,
5197                            IEndianess);
5198             return mk_LoadRR32toFPR( env, r_srcHi, r_srcLo );
5199          } else {
5200             HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
5201             return mk_LoadR64toFPR( env, r_src );
5202          }
5203       }
5204       case Iop_D32toD64: {
5205          HReg fr_src = iselDfp32Expr(env, e->Iex.Unop.arg, IEndianess);
5206          addInstr(env, PPCInstr_Dfp64Unary(Pfp_DCTDP, fr_dst, fr_src));
5207          return fr_dst;
5208       }
5209       case Iop_D128HItoD64:
5210          iselDfp128Expr( &r_dstHi, &r_dstLo, env, e->Iex.Unop.arg,
5211                          IEndianess );
5212          return r_dstHi;
5213       case Iop_D128LOtoD64:
5214          iselDfp128Expr( &r_dstHi, &r_dstLo, env, e->Iex.Unop.arg,
5215                          IEndianess );
5216          return r_dstLo;
5217       case Iop_InsertExpD64: {
5218          HReg fr_srcL = iselDblExpr(env, e->Iex.Binop.arg1, IEndianess);
5219          HReg fr_srcR = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
5220
5221          addInstr(env, PPCInstr_Dfp64Binary(Pfp_DIEX, fr_dst, fr_srcL,
5222                                             fr_srcR));
5223          return fr_dst;
5224        }
5225       default:
5226          vex_printf( "ERROR: iselDfp64Expr_wrk, UNKNOWN unop case %d\n",
5227                      (Int)e->Iex.Unop.op );
5228       }
5229    }
5230
5231    if (e->tag == Iex_Binop) {
5232       PPCFpOp fpop = Pfp_INVALID;
5233       HReg fr_dst = newVRegF(env);
5234
5235       switch (e->Iex.Binop.op) {
5236       case Iop_D128toD64:     fpop = Pfp_DRDPQ;  break;
5237       case Iop_D64toD32:      fpop = Pfp_DRSP;   break;
5238       case Iop_I64StoD64:     fpop = Pfp_DCFFIX; break;
5239       case Iop_RoundD64toInt: fpop = Pfp_DRINTN; break;
5240       default: break;
5241       }
5242       if (fpop == Pfp_DRDPQ) {
5243          HReg r_srcHi = newVRegF(env);
5244          HReg r_srcLo = newVRegF(env);
5245
5246          set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5247          iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
5248                         IEndianess);
5249          addInstr(env, PPCInstr_DfpD128toD64(fpop, fr_dst, r_srcHi, r_srcLo));
5250          return fr_dst;
5251
5252       } else if (fpop == Pfp_DRINTN) {
5253          HReg fr_src = newVRegF(env);
5254          PPCRI* r_rmc = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
5255
5256          /* NOTE, this IOP takes a DFP value and rounds to the
5257           * neares floating point integer value, i.e. fractional part
5258           * is zero.  The result is a decimal floating point number.
5259           * the INT in the name is a bit misleading.
5260           */
5261          fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
5262          addInstr(env, PPCInstr_DfpRound(fr_dst, fr_src, r_rmc));
5263          return fr_dst;
5264
5265       } else if (fpop == Pfp_DRSP) {
5266          HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
5267          set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5268          addInstr(env, PPCInstr_Dfp64Unary(fpop, fr_dst, fr_src));
5269          return fr_dst;
5270
5271       } else if (fpop == Pfp_DCFFIX) {
5272          HReg fr_src = newVRegF(env);
5273          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5274
5275          set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5276          sub_from_sp( env, 16 );
5277
5278          // put the I64 value into a floating point register
5279          if (mode64) {
5280            HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
5281
5282            addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
5283          } else {
5284             HReg tmpHi, tmpLo;
5285             PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5286
5287             iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Binop.arg2,
5288                           IEndianess);
5289             addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*mode32*/));
5290             addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*mode32*/));
5291          }
5292
5293          addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8,  fr_src, zero_r1));
5294          addInstr(env, PPCInstr_Dfp64Unary(fpop, fr_dst, fr_src));
5295          add_to_sp( env, 16 );
5296          return fr_dst;
5297       }
5298
5299       switch (e->Iex.Binop.op) {
5300       /* shift instructions D64, I32 -> D64 */
5301       case Iop_ShlD64: fpop = Pfp_DSCLI; break;
5302       case Iop_ShrD64: fpop = Pfp_DSCRI; break;
5303       default: break;
5304       }
5305       if (fpop != Pfp_INVALID) {
5306          HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg1, IEndianess);
5307          PPCRI* shift = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
5308
5309          /* shift value must be an immediate value */
5310          vassert(shift->tag == Pri_Imm);
5311
5312          addInstr(env, PPCInstr_DfpShift(fpop, fr_dst, fr_src, shift));
5313          return fr_dst;
5314       }
5315
5316       switch (e->Iex.Binop.op) {
5317       case Iop_InsertExpD64:
5318          fpop = Pfp_DIEX;
5319          break;
5320       default:  break;
5321       }
5322       if (fpop != Pfp_INVALID) {
5323          HReg fr_srcL = newVRegF(env);
5324          HReg fr_srcR = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
5325          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5326          sub_from_sp( env, 16 );
5327
5328          if (env->mode64) {
5329             // put the I64 value into a floating point reg
5330             HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
5331
5332             addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
5333          } else {
5334             // put the I64 register pair into a floating point reg
5335             HReg tmpHi;
5336             HReg tmpLo;
5337             PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5338
5339             iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Binop.arg1,
5340                           IEndianess);
5341             addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*!mode64*/));
5342             addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*!mode64*/));
5343          }
5344          addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_srcL, zero_r1));
5345          addInstr(env, PPCInstr_Dfp64Binary(fpop, fr_dst, fr_srcL,
5346                                             fr_srcR));
5347          add_to_sp( env, 16 );
5348          return fr_dst;
5349       }
5350    }
5351
5352    if (e->tag == Iex_Triop) {
5353       IRTriop *triop = e->Iex.Triop.details;
5354       PPCFpOp fpop = Pfp_INVALID;
5355
5356       switch (triop->op) {
5357       case Iop_AddD64:
5358          fpop = Pfp_DFPADD;
5359          break;
5360       case Iop_SubD64:
5361          fpop = Pfp_DFPSUB;
5362          break;
5363       case Iop_MulD64:
5364          fpop = Pfp_DFPMUL;
5365          break;
5366       case Iop_DivD64:
5367          fpop = Pfp_DFPDIV;
5368          break;
5369       default:
5370          break;
5371       }
5372       if (fpop != Pfp_INVALID) {
5373          HReg r_dst = newVRegF( env );
5374          HReg r_srcL = iselDfp64Expr( env, triop->arg2, IEndianess );
5375          HReg r_srcR = iselDfp64Expr( env, triop->arg3, IEndianess );
5376
5377          set_FPU_DFP_rounding_mode( env, triop->arg1, IEndianess );
5378          addInstr( env, PPCInstr_Dfp64Binary( fpop, r_dst, r_srcL, r_srcR ) );
5379          return r_dst;
5380       }
5381
5382       switch (triop->op) {
5383       case Iop_QuantizeD64:          fpop = Pfp_DQUA;  break;
5384       case Iop_SignificanceRoundD64: fpop = Pfp_RRDTR; break;
5385       default: break;
5386       }
5387       if (fpop == Pfp_DQUA) {
5388          HReg r_dst = newVRegF(env);
5389          HReg r_srcL = iselDfp64Expr(env, triop->arg2, IEndianess);
5390          HReg r_srcR = iselDfp64Expr(env, triop->arg3, IEndianess);
5391          PPCRI* rmc  = iselWordExpr_RI(env, triop->arg1, IEndianess);
5392          addInstr(env, PPCInstr_DfpQuantize(fpop, r_dst, r_srcL, r_srcR,
5393                                             rmc));
5394          return r_dst;
5395
5396       } else if (fpop == Pfp_RRDTR) {
5397          HReg r_dst = newVRegF(env);
5398          HReg r_srcL = newVRegF(env);
5399          HReg r_srcR = iselDfp64Expr(env, triop->arg3, IEndianess);
5400          PPCRI* rmc  = iselWordExpr_RI(env, triop->arg1, IEndianess);
5401          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5402          HReg i8_val = iselWordExpr_R(env, triop->arg2, IEndianess);
5403
5404          /* Move I8 to float register to issue instruction */
5405          sub_from_sp( env, 16 );
5406          if (mode64)
5407             addInstr(env, PPCInstr_Store(8, zero_r1, i8_val, True/*mode64*/));
5408          else
5409             addInstr(env, PPCInstr_Store(4, zero_r1, i8_val, False/*mode32*/));
5410
5411          addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_srcL, zero_r1));
5412          add_to_sp( env, 16 );
5413
5414          // will set TE and RMC when issuing instruction
5415          addInstr(env, PPCInstr_DfpQuantize(fpop, r_dst, r_srcL, r_srcR, rmc));
5416          return r_dst;
5417       }
5418    }
5419
5420    ppIRExpr( e );
5421    vpanic( "iselDfp64Expr_wrk(ppc)" );
5422 }
5423
5424 static void iselDfp128Expr(HReg* rHi, HReg* rLo, ISelEnv* env, const IRExpr* e,
5425                            IREndness IEndianess)
5426 {
5427    iselDfp128Expr_wrk( rHi, rLo, env, e, IEndianess );
5428    vassert( hregIsVirtual(*rHi) );
5429    vassert( hregIsVirtual(*rLo) );
5430 }
5431
5432 /* DO NOT CALL THIS DIRECTLY */
5433 static void iselDfp128Expr_wrk(HReg* rHi, HReg *rLo, ISelEnv* env,
5434                                const IRExpr* e, IREndness IEndianess)
5435 {
5436    vassert( e );
5437    vassert( typeOfIRExpr(env->type_env,e) == Ity_D128 );
5438
5439    /* read 128-bit IRTemp */
5440    if (e->tag == Iex_RdTmp) {
5441       lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp );
5442       return;
5443    }
5444
5445    if (e->tag == Iex_Unop) {
5446       HReg r_dstHi = newVRegF(env);
5447       HReg r_dstLo = newVRegF(env);
5448
5449       if (e->Iex.Unop.op == Iop_I64StoD128) {
5450          HReg fr_src = newVRegF(env);
5451          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5452
5453          // put the I64 value into a floating point reg
5454          if (env->mode64) {
5455             HReg tmp   = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
5456             addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
5457          } else {
5458             HReg tmpHi, tmpLo;
5459             PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5460
5461             iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg,
5462                           IEndianess);
5463             addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*mode32*/));
5464             addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*mode32*/));
5465          }
5466
5467          addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_src, zero_r1));
5468          addInstr(env, PPCInstr_DfpI64StoD128(Pfp_DCFFIXQ, r_dstHi, r_dstLo,
5469                                               fr_src));
5470       }
5471
5472       if (e->Iex.Unop.op == Iop_D64toD128) {
5473          HReg r_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
5474
5475          /* Source is 64bit, result is 128 bit.  High 64bit source arg,
5476           * is ignored by the instruction.  Set high arg to r_src just
5477           * to meet the vassert tests.
5478           */
5479          addInstr(env, PPCInstr_Dfp128Unary(Pfp_DCTQPQ, r_dstHi, r_dstLo,
5480                                             r_src, r_src));
5481       }
5482       *rHi = r_dstHi;
5483       *rLo = r_dstLo;
5484       return;
5485    }
5486
5487    /* --------- OPS --------- */
5488    if (e->tag == Iex_Binop) {
5489       HReg r_srcHi;
5490       HReg r_srcLo;
5491
5492       switch (e->Iex.Binop.op) {
5493       case Iop_D64HLtoD128:
5494          r_srcHi = iselDfp64Expr( env, e->Iex.Binop.arg1, IEndianess );
5495          r_srcLo = iselDfp64Expr( env, e->Iex.Binop.arg2, IEndianess );
5496          *rHi = r_srcHi;
5497          *rLo = r_srcLo;
5498          return;
5499          break;
5500       case Iop_D128toD64: {
5501          PPCFpOp fpop = Pfp_DRDPQ;
5502          HReg fr_dst  = newVRegF(env);
5503
5504          set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5505          iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
5506                         IEndianess);
5507          addInstr(env, PPCInstr_DfpD128toD64(fpop, fr_dst, r_srcHi, r_srcLo));
5508
5509          /* Need to meet the interface spec but the result is
5510           * just 64-bits so send the result back in both halfs.
5511           */
5512          *rHi = fr_dst;
5513          *rLo = fr_dst;
5514          return;
5515       }
5516       case Iop_ShlD128:
5517       case Iop_ShrD128: {
5518          HReg fr_dst_hi = newVRegF(env);
5519          HReg fr_dst_lo = newVRegF(env);
5520          PPCRI* shift = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
5521          PPCFpOp fpop = Pfp_DSCLIQ;  /* fix later if necessary */
5522
5523          iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg1,
5524                         IEndianess);
5525
5526          if (e->Iex.Binop.op == Iop_ShrD128)
5527             fpop = Pfp_DSCRIQ;
5528
5529          addInstr(env, PPCInstr_DfpShift128(fpop, fr_dst_hi, fr_dst_lo,
5530                                             r_srcHi, r_srcLo, shift));
5531
5532          *rHi = fr_dst_hi;
5533          *rLo = fr_dst_lo;
5534          return;
5535       }
5536       case Iop_RoundD128toInt: {
5537          HReg r_dstHi = newVRegF(env);
5538          HReg r_dstLo = newVRegF(env);
5539          PPCRI* r_rmc = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
5540
5541          // will set R and RMC when issuing instruction
5542          iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
5543                         IEndianess);
5544
5545          addInstr(env, PPCInstr_DfpRound128(r_dstHi, r_dstLo,
5546                                             r_srcHi, r_srcLo, r_rmc));
5547          *rHi = r_dstHi;
5548          *rLo = r_dstLo;
5549          return;
5550       }
5551       case Iop_InsertExpD128: {
5552          HReg r_dstHi = newVRegF(env);
5553          HReg r_dstLo = newVRegF(env);
5554          HReg r_srcL  = newVRegF(env);
5555          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5556          r_srcHi = newVRegF(env);
5557          r_srcLo = newVRegF(env);
5558
5559          iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
5560                         IEndianess);
5561
5562          /* Move I64 to float register to issue instruction */
5563          if (env->mode64) {
5564             HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
5565             addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
5566          } else {
5567             HReg tmpHi, tmpLo;
5568             PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5569
5570             iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg,
5571                           IEndianess);
5572             addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*mode32*/));
5573             addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*mode32*/));
5574          }
5575
5576          addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_srcL, zero_r1));
5577          addInstr(env, PPCInstr_InsertExpD128(Pfp_DIEXQ,
5578                                               r_dstHi, r_dstLo,
5579                                               r_srcL, r_srcHi, r_srcLo));
5580          *rHi = r_dstHi;
5581          *rLo = r_dstLo;
5582          return;
5583       }
5584
5585       case Iop_I128StoD128: {
5586          HReg tmpF128 = newVRegV(env);
5587          HReg FdstHi = newVRegF(env);
5588          HReg FdstLo = newVRegF(env);
5589          HReg srcLo = newVRegI(env);
5590          HReg srcHi = newVRegI(env);
5591          PPCAMode* am_addr;
5592          PPCAMode* am_addr4;
5593
5594          set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5595
5596          // Get the I128 value, store into a VSR register
5597          iselInt128Expr(&srcHi, &srcLo, env, e->Iex.Binop.arg2, IEndianess);
5598
5599          sub_from_sp( env, 16 );     // Move SP down 16 bytes
5600          am_addr = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5601          am_addr4 = advance4(env, am_addr);
5602
5603          addInstr(env, PPCInstr_Store( 8, am_addr,  srcHi, env->mode64 ));
5604          addInstr(env, PPCInstr_Store( 8, am_addr4, srcLo, env->mode64 ));
5605
5606          // load as Ity_F128
5607          addInstr(env, PPCInstr_AvLdSt( True/*fetch*/, 16, tmpF128, am_addr ));
5608
5609          // do conversion
5610          addInstr( env, PPCInstr_XFormUnary994( Px_IQSTODFP, FdstHi, FdstLo,
5611                                                 tmpF128 ) );
5612
5613          *rHi = FdstHi;
5614          *rLo = FdstLo;
5615          add_to_sp( env, 16 );       // Reset SP
5616          return;
5617       }
5618
5619       default:
5620          vex_printf( "ERROR: iselDfp128Expr_wrk, UNKNOWN binop case %d\n",
5621                      (Int)e->Iex.Binop.op );
5622          break;
5623       }
5624    }
5625
5626    if (e->tag == Iex_Triop) {
5627       IRTriop *triop = e->Iex.Triop.details;
5628       PPCFpOp fpop = Pfp_INVALID;
5629       HReg r_dstHi = newVRegF(env);
5630       HReg r_dstLo = newVRegF(env);
5631
5632       switch (triop->op) {
5633       case Iop_AddD128:
5634          fpop = Pfp_DFPADDQ;
5635          break;
5636       case Iop_SubD128:
5637          fpop = Pfp_DFPSUBQ;
5638          break;
5639       case Iop_MulD128:
5640          fpop = Pfp_DFPMULQ;
5641          break;
5642       case Iop_DivD128:
5643          fpop = Pfp_DFPDIVQ;
5644          break;
5645       default:
5646          break;
5647       }
5648
5649       if (fpop != Pfp_INVALID) {
5650          HReg r_srcRHi = newVRegV( env );
5651          HReg r_srcRLo = newVRegV( env );
5652
5653          /* dst will be used to pass in the left operand and get the result. */
5654          iselDfp128Expr( &r_dstHi, &r_dstLo, env, triop->arg2, IEndianess );
5655          iselDfp128Expr( &r_srcRHi, &r_srcRLo, env, triop->arg3, IEndianess );
5656          set_FPU_DFP_rounding_mode( env, triop->arg1, IEndianess );
5657          addInstr( env,
5658                    PPCInstr_Dfp128Binary( fpop, r_dstHi, r_dstLo,
5659                                           r_srcRHi, r_srcRLo ) );
5660          *rHi = r_dstHi;
5661          *rLo = r_dstLo;
5662          return;
5663       }
5664       switch (triop->op) {
5665       case Iop_QuantizeD128:          fpop = Pfp_DQUAQ;  break;
5666       case Iop_SignificanceRoundD128: fpop = Pfp_DRRNDQ; break;
5667       default: break;
5668       }
5669       if (fpop == Pfp_DQUAQ) {
5670          HReg r_srcHi = newVRegF(env);
5671          HReg r_srcLo = newVRegF(env);
5672          PPCRI* rmc = iselWordExpr_RI(env, triop->arg1, IEndianess);
5673
5674          /* dst will be used to pass in the left operand and get the result */
5675          iselDfp128Expr(&r_dstHi, &r_dstLo, env, triop->arg2, IEndianess);
5676          iselDfp128Expr(&r_srcHi, &r_srcLo, env, triop->arg3, IEndianess);
5677
5678          // will set RMC when issuing instruction
5679          addInstr(env, PPCInstr_DfpQuantize128(fpop, r_dstHi, r_dstLo,
5680                                                r_srcHi, r_srcLo, rmc));
5681         *rHi = r_dstHi;
5682         *rLo = r_dstLo;
5683          return;
5684
5685       } else if (fpop == Pfp_DRRNDQ) {
5686          HReg r_srcHi = newVRegF(env);
5687          HReg r_srcLo = newVRegF(env);
5688          PPCRI* rmc = iselWordExpr_RI(env, triop->arg1, IEndianess);
5689          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5690          PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5691          HReg i8_val = iselWordExpr_R(env, triop->arg2, IEndianess);
5692          HReg r_zero = newVRegI( env );
5693
5694          iselDfp128Expr(&r_srcHi, &r_srcLo, env, triop->arg3, IEndianess);
5695
5696          /* dst will be used to pass in the left operand and get the result */
5697          /* Move I8 to float register to issue instruction.  Note, the
5698           * instruction only looks at the bottom 6 bits so we really don't
5699           * have to clear the upper bits since the iselWordExpr_R sets the
5700           * bottom 8-bits.
5701           */
5702          sub_from_sp( env, 16 );
5703
5704          if (env->mode64)
5705             addInstr(env, PPCInstr_Store(4, four_r1, i8_val, True/*mode64*/));
5706          else
5707             addInstr(env, PPCInstr_Store(4, four_r1, i8_val, False/*mode32*/));
5708
5709          /* Have to write to the upper bits to ensure they have been
5710           * initialized. The instruction ignores all but the lower 6-bits.
5711           */
5712          addInstr( env, PPCInstr_LI( r_zero, 0, env->mode64 ) );
5713          addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dstHi, zero_r1));
5714          addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dstLo, zero_r1));
5715
5716          add_to_sp( env, 16 );
5717
5718          // will set RMC when issuing instruction
5719          addInstr(env, PPCInstr_DfpQuantize128(fpop, r_dstHi, r_dstLo,
5720                                                r_srcHi, r_srcLo, rmc));
5721          *rHi = r_dstHi;
5722          *rLo = r_dstLo;
5723          return;
5724       }
5725  }
5726
5727    ppIRExpr( e );
5728    vpanic( "iselDfp128Expr(ppc64)" );
5729 }
5730
5731
5732 /*---------------------------------------------------------*/
5733 /*--- ISEL: SIMD (Vector) expressions, 128 bit.         ---*/
5734 /*---------------------------------------------------------*/
5735
5736 static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e, IREndness IEndianess )
5737 {
5738    HReg r = iselVecExpr_wrk( env, e, IEndianess );
5739 #  if 0
5740    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5741 #  endif
5742    vassert(hregClass(r) == HRcVec128);
5743    vassert(hregIsVirtual(r));
5744    return r;
5745 }
5746
5747 /* DO NOT CALL THIS DIRECTLY */
5748 static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e,
5749                               IREndness IEndianess )
5750 {
5751    Bool mode64 = env->mode64;
5752    PPCAvOp op = Pav_INVALID;
5753    PPCAvFpOp fpop = Pavfp_INVALID;
5754    PPCAvOpBin128 opav128 = Pav_INVALIDBinary128;
5755    PPCAvOpTri128 optri128 = Pav_INVALIDTri128;
5756    IRType  ty = typeOfIRExpr(env->type_env,e);
5757    vassert(e);
5758    vassert(ty == Ity_V128);
5759
5760    if (e->tag == Iex_ITE) {
5761       HReg r1 = iselVecExpr( env, e->Iex.ITE.iftrue, IEndianess );
5762       HReg r0 = iselVecExpr( env, e->Iex.ITE.iffalse, IEndianess );
5763       HReg r_dst = newVRegV(env);
5764
5765       // Use OR operator to do move r1 to r_dst
5766       addInstr(env, PPCInstr_AvBinary( Pav_OR, r_dst, r0, r0));
5767       PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
5768       addInstr(env, PPCInstr_AvCMov(cc, r_dst, r1));
5769       return r_dst;
5770    }
5771
5772    if (e->tag == Iex_RdTmp) {
5773       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5774    }
5775
5776    if (e->tag == Iex_Get) {
5777       /* Guest state vectors are 16byte aligned,
5778          so don't need to worry here */
5779       HReg dst = newVRegV(env);
5780       addInstr(env,
5781                PPCInstr_AvLdSt( True/*load*/, 16, dst,
5782                                 PPCAMode_IR( e->Iex.Get.offset,
5783                                              GuestStatePtr(mode64) )));
5784       return dst;
5785    }
5786
5787    if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
5788       /* Need to be able to do V128 unaligned loads. The BE unaligned load
5789        * can be accomplised using the following code sequece from the ISA.
5790        * It uses the lvx instruction that does two aligned loads and then
5791        * permute the data to store the required data as if it had been an
5792        * unaligned load.
5793        *
5794        *   lvx  Vhi,0,Rb        # load MSQ, using the unaligned address in Rb
5795        *   lvsl Vp, 0,Rb        # Set permute control vector
5796        *   addi Rb,Rb,15        # Address of LSQ
5797        *   lvx  Vlo,0,Rb        # load LSQ
5798        *   vperm Vt,Vhi,Vlo,Vp  # align the data as requested
5799        */
5800
5801       HReg Vhi   = newVRegV(env);
5802       HReg Vlo   = newVRegV(env);
5803       HReg Vp    = newVRegV(env);
5804       HReg v_dst = newVRegV(env);
5805       HReg rB;
5806       HReg rB_plus_15 = newVRegI(env);
5807
5808       vassert(e->Iex.Load.ty == Ity_V128);
5809       rB = iselWordExpr_R( env, e->Iex.Load.addr, IEndianess );
5810
5811       // lvx  Vhi, 0, Rb
5812       addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, Vhi,
5813                                      PPCAMode_IR(0, rB)) );
5814
5815       if (IEndianess == Iend_LE)
5816          // lvsr Vp, 0, Rb
5817          addInstr(env, PPCInstr_AvSh( False/*right shift*/, Vp,
5818                                       PPCAMode_IR(0, rB)) );
5819       else
5820          // lvsl Vp, 0, Rb
5821          addInstr(env, PPCInstr_AvSh( True/*left shift*/, Vp,
5822                                       PPCAMode_IR(0, rB)) );
5823
5824       // addi Rb_plus_15, Rb, 15
5825       addInstr(env, PPCInstr_Alu( Palu_ADD, rB_plus_15,
5826                                   rB, PPCRH_Imm(True, toUShort(15))) );
5827
5828       // lvx  Vlo, 0, Rb_plus_15
5829       addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, Vlo,
5830                                      PPCAMode_IR(0, rB_plus_15)) );
5831
5832       if (IEndianess == Iend_LE)
5833          // vperm Vt, Vhi, Vlo, Vp
5834          addInstr(env, PPCInstr_AvPerm( v_dst, Vlo, Vhi, Vp ));
5835       else
5836          // vperm Vt, Vhi, Vlo, Vp
5837          addInstr(env, PPCInstr_AvPerm( v_dst, Vhi, Vlo, Vp ));
5838
5839       return v_dst;
5840    }
5841
5842    if (e->tag == Iex_Unop) {
5843       switch (e->Iex.Unop.op) {
5844
5845       case Iop_F16toF64x2:
5846          {
5847             HReg dst = newVRegV(env);
5848             HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5849             /* Note: PPC only coverts the 16-bt value in the upper word
5850              *       to a 64-bit value stored in the upper word.  The
5851              *       contents of the lower word is undefined.
5852              */
5853             addInstr(env, PPCInstr_AvUnary(Pav_F16toF64x2, dst, arg));
5854             return dst;
5855          }
5856
5857       case Iop_F64toF16x2_DEP:
5858          {
5859             HReg dst = newVRegV(env);
5860             HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5861             /* Note: PPC only coverts the 64-bt value in the upper 64-bit of V128
5862              * to a 16-bit value stored in the upper 64-bits of the result
5863              * V128.  The contents of the lower 64-bits is undefined.
5864              */
5865             addInstr(env, PPCInstr_AvUnary(Pav_F64toF16x2, dst, arg));
5866             return dst;
5867          }
5868
5869       case Iop_F16toF32x4:
5870          {
5871             HReg src = newVRegV(env);
5872             HReg dst = newVRegV(env);
5873             HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
5874             PPCAMode *am_off0, *am_off8;
5875             HReg r_aligned16;
5876
5877             vassert(mode64);
5878             /* need to put I64 src into upper 64-bits of vector register,
5879                use stack */
5880             sub_from_sp( env, 32 );     // Move SP down
5881
5882             /* Get a quadword aligned address within our stack space */
5883             r_aligned16 = get_sp_aligned16( env );
5884             am_off0  = PPCAMode_IR( 0, r_aligned16 );
5885             am_off8  = PPCAMode_IR( 8, r_aligned16 );
5886
5887             /* Store I64 to stack */
5888
5889             if (IEndianess == Iend_LE) {
5890                addInstr(env, PPCInstr_Store( 8, am_off8, arg, mode64 ));
5891             } else {
5892                addInstr(env, PPCInstr_Store( 8, am_off0, arg, mode64 ));
5893             }
5894
5895             /* Fetch new v128 src back from stack. */
5896             addInstr(env, PPCInstr_AvLdSt(True/*ld*/, 16, src, am_off0));
5897
5898             /* issue instruction */
5899             addInstr(env, PPCInstr_AvUnary(Pav_F16toF32x4, dst, src));
5900             add_to_sp( env, 32 );          // Reset SP
5901
5902             return dst;
5903          }
5904
5905       case Iop_F32toF16x4_DEP:
5906          {
5907             HReg dst = newVRegI(env);
5908             HReg tmp = newVRegV(env);
5909             HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5910             PPCAMode *am_off0, *am_off8;
5911             HReg r_aligned16;
5912
5913             /* Instruction returns a V128, the Iop_F32toF16x4 needs to return
5914              * I64.  Move the upper 64-bits from the instruction to an I64 via
5915              * the stack and return it.
5916              */
5917             sub_from_sp( env, 32 );     // Move SP down
5918
5919             addInstr(env, PPCInstr_AvUnary(Pav_F32toF16x4, tmp, arg));
5920
5921             /* Get a quadword aligned address within our stack space */
5922             r_aligned16 = get_sp_aligned16( env );
5923             am_off0  = PPCAMode_IR( 0, r_aligned16 );
5924             am_off8  = PPCAMode_IR( 8, r_aligned16 );
5925
5926             /* Store v128 tmp to stack. */
5927             addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, tmp, am_off0));
5928
5929             /* Fetch I64 from stack */
5930             if (IEndianess == Iend_LE) {
5931                addInstr(env, PPCInstr_Load( 8, dst, am_off8, mode64 ));
5932             } else {
5933                addInstr(env, PPCInstr_Load( 8, dst, am_off0, mode64 ));
5934             }
5935
5936             add_to_sp( env, 32 );          // Reset SP
5937             return dst;
5938          }
5939
5940       case Iop_NotV128: {
5941          HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5942          HReg dst = newVRegV(env);
5943          addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, arg));
5944          return dst;
5945       }
5946
5947       case Iop_CmpNEZ8x16: {
5948          HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5949          HReg zero = newVRegV(env);
5950          HReg dst  = newVRegV(env);
5951          addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
5952          addInstr(env, PPCInstr_AvBin8x16(Pav_CMPEQU, dst, arg, zero));
5953          addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
5954          return dst;
5955       }
5956
5957       case Iop_CmpNEZ16x8: {
5958          HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5959          HReg zero = newVRegV(env);
5960          HReg dst  = newVRegV(env);
5961          addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
5962          addInstr(env, PPCInstr_AvBin16x8(Pav_CMPEQU, dst, arg, zero));
5963          addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
5964          return dst;
5965       }
5966
5967       case Iop_CmpNEZ32x4: {
5968          HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5969          HReg zero = newVRegV(env);
5970          HReg dst  = newVRegV(env);
5971          addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
5972          addInstr(env, PPCInstr_AvBin32x4(Pav_CMPEQU, dst, arg, zero));
5973          addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
5974          return dst;
5975       }
5976
5977       case Iop_CmpNEZ64x2: {
5978          HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5979          HReg zero = newVRegV(env);
5980          HReg dst  = newVRegV(env);
5981          addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
5982          addInstr(env, PPCInstr_AvBin64x2(Pav_CMPEQU, dst, arg, zero));
5983          addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
5984          return dst;
5985       }
5986
5987       case Iop_RecipEst32Fx4: fpop = Pavfp_RCPF;    goto do_32Fx4_unary;
5988       case Iop_RSqrtEst32Fx4: fpop = Pavfp_RSQRTF;  goto do_32Fx4_unary;
5989       case Iop_Log2_32Fx4:    fpop = Pavfp_Log2;    goto do_32Fx4_unary;
5990       case Iop_Exp2_32Fx4:    fpop = Pavfp_Exp2;    goto do_32Fx4_unary;
5991       case Iop_I32UtoF32x4_DEP: fpop = Pavfp_CVTU2F;  goto do_32Fx4_unary;
5992       case Iop_I32StoF32x4_DEP: fpop = Pavfp_CVTS2F;  goto do_32Fx4_unary;
5993       case Iop_QF32toI32Ux4_RZ: fpop = Pavfp_QCVTF2U; goto do_32Fx4_unary;
5994       case Iop_QF32toI32Sx4_RZ: fpop = Pavfp_QCVTF2S; goto do_32Fx4_unary;
5995       case Iop_RoundF32x4_RM: fpop = Pavfp_ROUNDM;  goto do_32Fx4_unary;
5996       case Iop_RoundF32x4_RP: fpop = Pavfp_ROUNDP;  goto do_32Fx4_unary;
5997       case Iop_RoundF32x4_RN: fpop = Pavfp_ROUNDN;  goto do_32Fx4_unary;
5998       case Iop_RoundF32x4_RZ: fpop = Pavfp_ROUNDZ;  goto do_32Fx4_unary;
5999       do_32Fx4_unary:
6000       {
6001          HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
6002          HReg dst = newVRegV(env);
6003          addInstr(env, PPCInstr_AvUn32Fx4(fpop, dst, arg));
6004          return dst;
6005       }
6006
6007       case Iop_32UtoV128: {
6008          HReg r_aligned16, r_zeros;
6009          HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
6010          HReg   dst = newVRegV(env);
6011          PPCAMode *am_off0, *am_off4, *am_off8, *am_off12;
6012          sub_from_sp( env, 32 );     // Move SP down
6013
6014          /* Get a quadword aligned address within our stack space */
6015          r_aligned16 = get_sp_aligned16( env );
6016          am_off0  = PPCAMode_IR( 0,  r_aligned16 );
6017          am_off4  = PPCAMode_IR( 4,  r_aligned16 );
6018          am_off8  = PPCAMode_IR( 8,  r_aligned16 );
6019          am_off12 = PPCAMode_IR( 12, r_aligned16 );
6020
6021          /* Store zeros */
6022          r_zeros = newVRegI(env);
6023          addInstr(env, PPCInstr_LI(r_zeros, 0x0, mode64));
6024          if (IEndianess == Iend_LE)
6025             addInstr(env, PPCInstr_Store( 4, am_off0, r_src, mode64 ));
6026          else
6027             addInstr(env, PPCInstr_Store( 4, am_off0, r_zeros, mode64 ));
6028          addInstr(env, PPCInstr_Store( 4, am_off4, r_zeros, mode64 ));
6029          addInstr(env, PPCInstr_Store( 4, am_off8, r_zeros, mode64 ));
6030
6031          /* Store r_src in low word of quadword-aligned mem */
6032          if (IEndianess == Iend_LE)
6033             addInstr(env, PPCInstr_Store( 4, am_off12, r_zeros, mode64 ));
6034          else
6035             addInstr(env, PPCInstr_Store( 4, am_off12, r_src, mode64 ));
6036
6037          /* Load word into low word of quadword vector reg */
6038          if (IEndianess == Iend_LE)
6039             addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, dst, am_off0 ));
6040          else
6041             addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, dst, am_off12 ));
6042
6043          add_to_sp( env, 32 );       // Reset SP
6044          return dst;
6045       }
6046
6047       case Iop_Dup8x16:
6048       case Iop_Dup16x8:
6049       case Iop_Dup32x4:
6050          return mk_AvDuplicateRI(env, e->Iex.Unop.arg, IEndianess);
6051
6052       case Iop_CipherSV128: op = Pav_CIPHERSUBV128; goto do_AvCipherV128Un;
6053       do_AvCipherV128Un: {
6054          HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
6055          HReg dst = newVRegV(env);
6056          addInstr(env, PPCInstr_AvCipherV128Unary(op, dst, arg));
6057          return dst;
6058       }
6059
6060       case Iop_Clz8x16: op = Pav_ZEROCNTBYTE;   goto do_zerocnt;
6061       case Iop_Clz16x8: op = Pav_ZEROCNTHALF;   goto do_zerocnt;
6062       case Iop_Clz32x4: op = Pav_ZEROCNTWORD;   goto do_zerocnt;
6063       case Iop_Clz64x2: op = Pav_ZEROCNTDBL;    goto do_zerocnt;
6064       case Iop_Ctz8x16: op = Pav_TRAILINGZEROCNTBYTE; goto do_zerocnt;
6065       case Iop_Ctz16x8: op = Pav_TRAILINGZEROCNTHALF; goto do_zerocnt;
6066       case Iop_Ctz32x4: op = Pav_TRAILINGZEROCNTWORD; goto do_zerocnt;
6067       case Iop_Ctz64x2: op = Pav_TRAILINGZEROCNTDBL;  goto do_zerocnt;
6068       case Iop_PwBitMtxXpose64x2: op = Pav_BITMTXXPOSE;  goto do_zerocnt;
6069       do_zerocnt:
6070       {
6071         HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
6072         HReg dst = newVRegV(env);
6073         addInstr(env, PPCInstr_AvUnary(op, dst, arg));
6074         return dst;
6075       }
6076
6077       /* BCD Iops */
6078       case Iop_BCD128toI128S:
6079          {
6080             HReg dst  = newVRegV(env);
6081             HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
6082             addInstr(env, PPCInstr_AvUnary( Pav_BCD128toI128S, dst, arg ) );
6083             return dst;
6084          }
6085
6086       case Iop_MulI128by10:       op = Pav_MulI128by10;      goto do_MulI128;
6087       case Iop_MulI128by10Carry:  op = Pav_MulI128by10Carry; goto do_MulI128;
6088       do_MulI128: {
6089             HReg dst = newVRegV(env);
6090             HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
6091             addInstr(env, PPCInstr_AvUnary(op, dst, arg));
6092             return dst;
6093          }
6094
6095        case Iop_ReinterpI128asV128: {
6096           PPCAMode* am_addr;
6097           PPCAMode* am_addr4;
6098           HReg rHi, rLo;
6099           HReg dst  = newVRegV(env);
6100
6101           iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
6102
6103           sub_from_sp( env, 16 );     // Move SP down 16 bytes
6104           am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
6105           am_addr4 = advance4(env, am_addr);
6106
6107           // store the two 64-bit pars
6108           addInstr(env, PPCInstr_Store( 8, am_addr,  rHi, mode64 ));
6109           addInstr(env, PPCInstr_Store( 8, am_addr4, rLo, mode64 ));
6110
6111           // load as Ity_V128
6112           addInstr(env, PPCInstr_AvLdSt( True/*fetch*/, 16, dst, am_addr ));
6113
6114           add_to_sp( env, 16 );       // Reset SP
6115           return dst;
6116         }
6117
6118       default:
6119          break;
6120       } /* switch (e->Iex.Unop.op) */
6121    } /* if (e->tag == Iex_Unop) */
6122
6123    if (e->tag == Iex_Binop) {
6124       switch (e->Iex.Binop.op) {
6125
6126       case Iop_64HLtoV128: {
6127          if (!mode64) {
6128             HReg     r3, r2, r1, r0, r_aligned16;
6129             PPCAMode *am_off0, *am_off4, *am_off8, *am_off12;
6130             HReg     dst = newVRegV(env);
6131             /* do this via the stack (easy, convenient, etc) */
6132             sub_from_sp( env, 32 );        // Move SP down
6133
6134             // get a quadword aligned address within our stack space
6135             r_aligned16 = get_sp_aligned16( env );
6136             am_off0  = PPCAMode_IR( 0,  r_aligned16 );
6137             am_off4  = PPCAMode_IR( 4,  r_aligned16 );
6138             am_off8  = PPCAMode_IR( 8,  r_aligned16 );
6139             am_off12 = PPCAMode_IR( 12, r_aligned16 );
6140
6141             /* Do the less significant 64 bits */
6142             iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2, IEndianess);
6143             addInstr(env, PPCInstr_Store( 4, am_off12, r0, mode64 ));
6144             addInstr(env, PPCInstr_Store( 4, am_off8,  r1, mode64 ));
6145             /* Do the more significant 64 bits */
6146             iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1, IEndianess);
6147             addInstr(env, PPCInstr_Store( 4, am_off4, r2, mode64 ));
6148             addInstr(env, PPCInstr_Store( 4, am_off0, r3, mode64 ));
6149
6150             /* Fetch result back from stack. */
6151             addInstr(env, PPCInstr_AvLdSt(True/*ld*/, 16, dst, am_off0));
6152
6153             add_to_sp( env, 32 );          // Reset SP
6154             return dst;
6155          } else {
6156             HReg     rHi = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
6157             HReg     rLo = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
6158             HReg     dst = newVRegV(env);
6159             HReg     r_aligned16;
6160             PPCAMode *am_off0, *am_off8;
6161             /* do this via the stack (easy, convenient, etc) */
6162             sub_from_sp( env, 32 );        // Move SP down
6163
6164             // get a quadword aligned address within our stack space
6165             r_aligned16 = get_sp_aligned16( env );
6166             am_off0  = PPCAMode_IR( 0,  r_aligned16 );
6167             am_off8  = PPCAMode_IR( 8,  r_aligned16 );
6168
6169             /* Store 2*I64 to stack */
6170             if (IEndianess == Iend_LE) {
6171                addInstr(env, PPCInstr_Store( 8, am_off0, rLo, mode64 ));
6172                addInstr(env, PPCInstr_Store( 8, am_off8, rHi, mode64 ));
6173             } else {
6174                addInstr(env, PPCInstr_Store( 8, am_off0, rHi, mode64 ));
6175                addInstr(env, PPCInstr_Store( 8, am_off8, rLo, mode64 ));
6176             }
6177             /* Fetch result back from stack. */
6178             addInstr(env, PPCInstr_AvLdSt(True/*ld*/, 16, dst, am_off0));
6179
6180             add_to_sp( env, 32 );          // Reset SP
6181             return dst;
6182          }
6183       }
6184
6185       case Iop_Max32Fx4:   fpop = Pavfp_MAXF;   goto do_32Fx4;
6186       case Iop_Min32Fx4:   fpop = Pavfp_MINF;   goto do_32Fx4;
6187       case Iop_CmpEQ32Fx4: fpop = Pavfp_CMPEQF; goto do_32Fx4;
6188       case Iop_CmpGT32Fx4: fpop = Pavfp_CMPGTF; goto do_32Fx4;
6189       case Iop_CmpGE32Fx4: fpop = Pavfp_CMPGEF; goto do_32Fx4;
6190       do_32Fx4:
6191       {
6192          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6193          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6194          HReg dst = newVRegV(env);
6195          addInstr(env, PPCInstr_AvBin32Fx4(fpop, dst, argL, argR));
6196          return dst;
6197       }
6198
6199       case Iop_CmpLE32Fx4: {
6200          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6201          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6202          HReg dst = newVRegV(env);
6203
6204          /* stay consistent with native ppc compares:
6205             if a left/right lane holds a nan, return zeros for that lane
6206             so: le == NOT(gt OR isNan)
6207           */
6208          HReg isNanLR = newVRegV(env);
6209          HReg isNanL = isNan(env, argL, IEndianess);
6210          HReg isNanR = isNan(env, argR, IEndianess);
6211          addInstr(env, PPCInstr_AvBinary(Pav_OR, isNanLR,
6212                                          isNanL, isNanR));
6213
6214          addInstr(env, PPCInstr_AvBin32Fx4(Pavfp_CMPGTF, dst,
6215                                            argL, argR));
6216          addInstr(env, PPCInstr_AvBinary(Pav_OR, dst, dst, isNanLR));
6217          addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
6218          return dst;
6219       }
6220
6221       case Iop_AndV128:    op = Pav_AND;      goto do_AvBin;
6222       case Iop_OrV128:     op = Pav_OR;       goto do_AvBin;
6223       case Iop_XorV128:    op = Pav_XOR;      goto do_AvBin;
6224       do_AvBin: {
6225          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6226          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6227          HReg dst  = newVRegV(env);
6228          addInstr(env, PPCInstr_AvBinary(op, dst, arg1, arg2));
6229          return dst;
6230       }
6231
6232       case Iop_Shl8x16:    op = Pav_SHL;    goto do_AvBin8x16;
6233       case Iop_Shr8x16:    op = Pav_SHR;    goto do_AvBin8x16;
6234       case Iop_Sar8x16:    op = Pav_SAR;    goto do_AvBin8x16;
6235       case Iop_Rol8x16:    op = Pav_ROTL;   goto do_AvBin8x16;
6236       case Iop_InterleaveHI8x16: op = Pav_MRGHI;  goto do_AvBin8x16;
6237       case Iop_InterleaveLO8x16: op = Pav_MRGLO;  goto do_AvBin8x16;
6238       case Iop_Add8x16:    op = Pav_ADDU;   goto do_AvBin8x16;
6239       case Iop_QAdd8Ux16:  op = Pav_QADDU;  goto do_AvBin8x16;
6240       case Iop_QAdd8Sx16:  op = Pav_QADDS;  goto do_AvBin8x16;
6241       case Iop_Sub8x16:    op = Pav_SUBU;   goto do_AvBin8x16;
6242       case Iop_QSub8Ux16:  op = Pav_QSUBU;  goto do_AvBin8x16;
6243       case Iop_QSub8Sx16:  op = Pav_QSUBS;  goto do_AvBin8x16;
6244       case Iop_Avg8Ux16:   op = Pav_AVGU;   goto do_AvBin8x16;
6245       case Iop_Avg8Sx16:   op = Pav_AVGS;   goto do_AvBin8x16;
6246       case Iop_Max8Ux16:   op = Pav_MAXU;   goto do_AvBin8x16;
6247       case Iop_Max8Sx16:   op = Pav_MAXS;   goto do_AvBin8x16;
6248       case Iop_Min8Ux16:   op = Pav_MINU;   goto do_AvBin8x16;
6249       case Iop_Min8Sx16:   op = Pav_MINS;   goto do_AvBin8x16;
6250       case Iop_MullEven8Ux16: op = Pav_OMULU;  goto do_AvBin8x16;
6251       case Iop_MullEven8Sx16: op = Pav_OMULS;  goto do_AvBin8x16;
6252       case Iop_CmpEQ8x16:  op = Pav_CMPEQU; goto do_AvBin8x16;
6253       case Iop_CmpGT8Ux16: op = Pav_CMPGTU; goto do_AvBin8x16;
6254       case Iop_CmpGT8Sx16: op = Pav_CMPGTS; goto do_AvBin8x16;
6255       case Iop_PolynomialMulAdd8x16: op = Pav_POLYMULADD; goto do_AvBin8x16;
6256       do_AvBin8x16: {
6257          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6258          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6259          HReg dst  = newVRegV(env);
6260          addInstr(env, PPCInstr_AvBin8x16(op, dst, arg1, arg2));
6261          return dst;
6262       }
6263
6264       case Iop_Shl16x8:    op = Pav_SHL;    goto do_AvBin16x8;
6265       case Iop_Shr16x8:    op = Pav_SHR;    goto do_AvBin16x8;
6266       case Iop_Sar16x8:    op = Pav_SAR;    goto do_AvBin16x8;
6267       case Iop_Rol16x8:    op = Pav_ROTL;   goto do_AvBin16x8;
6268       case Iop_NarrowBin16to8x16:    op = Pav_PACKUU;  goto do_AvBin16x8;
6269       case Iop_QNarrowBin16Uto8Ux16: op = Pav_QPACKUU; goto do_AvBin16x8;
6270       case Iop_QNarrowBin16Sto8Sx16: op = Pav_QPACKSS; goto do_AvBin16x8;
6271       case Iop_InterleaveHI16x8:  op = Pav_MRGHI;  goto do_AvBin16x8;
6272       case Iop_InterleaveLO16x8:  op = Pav_MRGLO;  goto do_AvBin16x8;
6273       case Iop_Add16x8:    op = Pav_ADDU;   goto do_AvBin16x8;
6274       case Iop_QAdd16Ux8:  op = Pav_QADDU;  goto do_AvBin16x8;
6275       case Iop_QAdd16Sx8:  op = Pav_QADDS;  goto do_AvBin16x8;
6276       case Iop_Sub16x8:    op = Pav_SUBU;   goto do_AvBin16x8;
6277       case Iop_QSub16Ux8:  op = Pav_QSUBU;  goto do_AvBin16x8;
6278       case Iop_QSub16Sx8:  op = Pav_QSUBS;  goto do_AvBin16x8;
6279       case Iop_Avg16Ux8:   op = Pav_AVGU;   goto do_AvBin16x8;
6280       case Iop_Avg16Sx8:   op = Pav_AVGS;   goto do_AvBin16x8;
6281       case Iop_Max16Ux8:   op = Pav_MAXU;   goto do_AvBin16x8;
6282       case Iop_Max16Sx8:   op = Pav_MAXS;   goto do_AvBin16x8;
6283       case Iop_Min16Ux8:   op = Pav_MINU;   goto do_AvBin16x8;
6284       case Iop_Min16Sx8:   op = Pav_MINS;   goto do_AvBin16x8;
6285       case Iop_MullEven16Ux8: op = Pav_OMULU;  goto do_AvBin16x8;
6286       case Iop_MullEven16Sx8: op = Pav_OMULS;  goto do_AvBin16x8;
6287       case Iop_CmpEQ16x8:  op = Pav_CMPEQU; goto do_AvBin16x8;
6288       case Iop_CmpGT16Ux8: op = Pav_CMPGTU; goto do_AvBin16x8;
6289       case Iop_CmpGT16Sx8: op = Pav_CMPGTS; goto do_AvBin16x8;
6290       case Iop_PolynomialMulAdd16x8: op = Pav_POLYMULADD; goto do_AvBin16x8;
6291       do_AvBin16x8: {
6292          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6293          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6294          HReg dst  = newVRegV(env);
6295          addInstr(env, PPCInstr_AvBin16x8(op, dst, arg1, arg2));
6296          return dst;
6297       }
6298
6299       case Iop_Shl32x4:    op = Pav_SHL;    goto do_AvBin32x4;
6300       case Iop_Shr32x4:    op = Pav_SHR;    goto do_AvBin32x4;
6301       case Iop_Sar32x4:    op = Pav_SAR;    goto do_AvBin32x4;
6302       case Iop_Rol32x4:    op = Pav_ROTL;   goto do_AvBin32x4;
6303       case Iop_NarrowBin32to16x8:    op = Pav_PACKUU;  goto do_AvBin32x4;
6304       case Iop_QNarrowBin32Uto16Ux8: op = Pav_QPACKUU; goto do_AvBin32x4;
6305       case Iop_QNarrowBin32Sto16Sx8: op = Pav_QPACKSS; goto do_AvBin32x4;
6306       case Iop_InterleaveHI32x4:  op = Pav_MRGHI;  goto do_AvBin32x4;
6307       case Iop_InterleaveLO32x4:  op = Pav_MRGLO;  goto do_AvBin32x4;
6308       case Iop_Add32x4:    op = Pav_ADDU;   goto do_AvBin32x4;
6309       case Iop_QAdd32Ux4:  op = Pav_QADDU;  goto do_AvBin32x4;
6310       case Iop_QAdd32Sx4:  op = Pav_QADDS;  goto do_AvBin32x4;
6311       case Iop_Sub32x4:    op = Pav_SUBU;   goto do_AvBin32x4;
6312       case Iop_QSub32Ux4:  op = Pav_QSUBU;  goto do_AvBin32x4;
6313       case Iop_QSub32Sx4:  op = Pav_QSUBS;  goto do_AvBin32x4;
6314       case Iop_Avg32Ux4:   op = Pav_AVGU;   goto do_AvBin32x4;
6315       case Iop_Avg32Sx4:   op = Pav_AVGS;   goto do_AvBin32x4;
6316       case Iop_Max32Ux4:   op = Pav_MAXU;   goto do_AvBin32x4;
6317       case Iop_Max32Sx4:   op = Pav_MAXS;   goto do_AvBin32x4;
6318       case Iop_Min32Ux4:   op = Pav_MINU;   goto do_AvBin32x4;
6319       case Iop_Min32Sx4:   op = Pav_MINS;   goto do_AvBin32x4;
6320       case Iop_Mul32x4:    op = Pav_MULU;   goto do_AvBin32x4;
6321       case Iop_MullEven32Ux4: op = Pav_OMULU;  goto do_AvBin32x4;
6322       case Iop_MullEven32Sx4: op = Pav_OMULS;  goto do_AvBin32x4;
6323       case Iop_CmpEQ32x4:  op = Pav_CMPEQU; goto do_AvBin32x4;
6324       case Iop_CmpGT32Ux4: op = Pav_CMPGTU; goto do_AvBin32x4;
6325       case Iop_CmpGT32Sx4: op = Pav_CMPGTS; goto do_AvBin32x4;
6326       case Iop_CatOddLanes32x4:  op = Pav_CATODD;  goto do_AvBin32x4;
6327       case Iop_CatEvenLanes32x4: op = Pav_CATEVEN; goto do_AvBin32x4;
6328       case Iop_PolynomialMulAdd32x4: op = Pav_POLYMULADD; goto do_AvBin32x4;
6329       do_AvBin32x4: {
6330          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6331          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6332          HReg dst  = newVRegV(env);
6333          addInstr(env, PPCInstr_AvBin32x4(op, dst, arg1, arg2));
6334          return dst;
6335       }
6336
6337       case Iop_Shl64x2:    op = Pav_SHL;    goto do_AvBin64x2;
6338       case Iop_Shr64x2:    op = Pav_SHR;    goto do_AvBin64x2;
6339       case Iop_Sar64x2:    op = Pav_SAR;    goto do_AvBin64x2;
6340       case Iop_Rol64x2:    op = Pav_ROTL;   goto do_AvBin64x2;
6341       case Iop_NarrowBin64to32x4:    op = Pav_PACKUU;  goto do_AvBin64x2;
6342       case Iop_QNarrowBin64Sto32Sx4: op = Pav_QPACKSS; goto do_AvBin64x2;
6343       case Iop_QNarrowBin64Uto32Ux4: op = Pav_QPACKUU; goto do_AvBin64x2;
6344       case Iop_InterleaveHI64x2:  op = Pav_MRGHI;  goto do_AvBin64x2;
6345       case Iop_InterleaveLO64x2:  op = Pav_MRGLO;  goto do_AvBin64x2;
6346       case Iop_Add64x2:    op = Pav_ADDU;   goto do_AvBin64x2;
6347       case Iop_Sub64x2:    op = Pav_SUBU;   goto do_AvBin64x2;
6348       case Iop_Max64Ux2:   op = Pav_MAXU;   goto do_AvBin64x2;
6349       case Iop_Max64Sx2:   op = Pav_MAXS;   goto do_AvBin64x2;
6350       case Iop_Min64Ux2:   op = Pav_MINU;   goto do_AvBin64x2;
6351       case Iop_Min64Sx2:   op = Pav_MINS;   goto do_AvBin64x2;
6352       case Iop_CmpEQ64x2:  op = Pav_CMPEQU; goto do_AvBin64x2;
6353       case Iop_CmpGT64Ux2: op = Pav_CMPGTU; goto do_AvBin64x2;
6354       case Iop_CmpGT64Sx2: op = Pav_CMPGTS; goto do_AvBin64x2;
6355       case Iop_PolynomialMulAdd64x2: op = Pav_POLYMULADD; goto do_AvBin64x2;
6356       do_AvBin64x2: {
6357          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6358          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6359          HReg dst  = newVRegV(env);
6360          addInstr(env, PPCInstr_AvBin64x2(op, dst, arg1, arg2));
6361          return dst;
6362       }
6363
6364       case Iop_ShlN8x16: op = Pav_SHL; goto do_AvShift8x16;
6365       case Iop_SarN8x16: op = Pav_SAR; goto do_AvShift8x16;
6366       do_AvShift8x16: {
6367          HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6368          HReg dst    = newVRegV(env);
6369          HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
6370          addInstr(env, PPCInstr_AvBin8x16(op, dst, r_src, v_shft));
6371          return dst;
6372       }
6373
6374       case Iop_ShlN16x8: op = Pav_SHL; goto do_AvShift16x8;
6375       case Iop_ShrN16x8: op = Pav_SHR; goto do_AvShift16x8;
6376       case Iop_SarN16x8: op = Pav_SAR; goto do_AvShift16x8;
6377       do_AvShift16x8: {
6378          HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6379          HReg dst    = newVRegV(env);
6380          HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
6381          addInstr(env, PPCInstr_AvBin16x8(op, dst, r_src, v_shft));
6382          return dst;
6383       }
6384
6385       case Iop_ShlN32x4: op = Pav_SHL; goto do_AvShift32x4;
6386       case Iop_ShrN32x4: op = Pav_SHR; goto do_AvShift32x4;
6387       case Iop_SarN32x4: op = Pav_SAR; goto do_AvShift32x4;
6388       do_AvShift32x4: {
6389          HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6390          HReg dst    = newVRegV(env);
6391          HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
6392          addInstr(env, PPCInstr_AvBin32x4(op, dst, r_src, v_shft));
6393          return dst;
6394       }
6395
6396       case Iop_ShlN64x2: op = Pav_SHL; goto do_AvShift64x2;
6397       case Iop_ShrN64x2: op = Pav_SHR; goto do_AvShift64x2;
6398       case Iop_SarN64x2: op = Pav_SAR; goto do_AvShift64x2;
6399       do_AvShift64x2: {
6400          HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6401          HReg dst    = newVRegV(env);
6402          HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
6403          addInstr(env, PPCInstr_AvBin64x2(op, dst, r_src, v_shft));
6404          return dst;
6405       }
6406
6407       case Iop_ShrV128: op = Pav_SHR; goto do_AvShiftV128;
6408       case Iop_ShlV128: op = Pav_SHL; goto do_AvShiftV128;
6409       do_AvShiftV128: {
6410          HReg dst    = newVRegV(env);
6411          HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6412          HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
6413          /* Note: shift value gets masked by 127 */
6414          addInstr(env, PPCInstr_AvBinary(op, dst, r_src, v_shft));
6415          return dst;
6416       }
6417
6418       case Iop_Perm8x16: {
6419          HReg dst   = newVRegV(env);
6420          HReg v_src = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6421          HReg v_ctl = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6422          addInstr(env, PPCInstr_AvPerm(dst, v_src, v_src, v_ctl));
6423          return dst;
6424       }
6425
6426       case Iop_CipherV128:  op = Pav_CIPHERV128;   goto do_AvCipherV128;
6427       case Iop_CipherLV128: op = Pav_CIPHERLV128;  goto do_AvCipherV128;
6428       case Iop_NCipherV128: op = Pav_NCIPHERV128;  goto do_AvCipherV128;
6429       case Iop_NCipherLV128:op = Pav_NCIPHERLV128; goto do_AvCipherV128;
6430       do_AvCipherV128: {
6431          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6432          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6433          HReg dst  = newVRegV(env);
6434          addInstr(env, PPCInstr_AvCipherV128Binary(op, dst, arg1, arg2));
6435          return dst;
6436       }
6437
6438       case Iop_SHA256:op = Pav_SHA256; goto do_AvHashV128;
6439       case Iop_SHA512:op = Pav_SHA512; goto do_AvHashV128;
6440       do_AvHashV128: {
6441          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6442          HReg dst  = newVRegV(env);
6443          PPCRI* s_field = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
6444          addInstr(env, PPCInstr_AvHashV128Binary(op, dst, arg1, s_field));
6445          return dst;
6446       }
6447
6448       /* BCD Iops */
6449       case Iop_I128StoBCD128:
6450          {
6451             HReg dst = newVRegV(env);
6452             HReg arg = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6453             PPCRI* ps = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
6454
6455             addInstr(env, PPCInstr_AvBinaryInt( Pav_I128StoBCD128, dst, arg,
6456                                                 ps ) );
6457             return dst;
6458          }
6459
6460       case Iop_MulI128by10E:       op = Pav_MulI128by10E;      goto do_MulI128E;
6461       case Iop_MulI128by10ECarry:  op = Pav_MulI128by10ECarry; goto do_MulI128E;
6462       do_MulI128E: {
6463             HReg dst  = newVRegV(env);
6464             HReg argL = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6465             HReg argR = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6466             addInstr(env, PPCInstr_AvBinary(op, dst, argL, argR));
6467             return dst;
6468          }
6469
6470       case Iop_BCDAdd:op = Pav_BCDAdd; goto do_AvBCDV128;
6471       case Iop_BCDSub:op = Pav_BCDSub; goto do_AvBCDV128;
6472       do_AvBCDV128: {
6473          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6474          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6475          HReg dst  = newVRegV(env);
6476          addInstr(env, PPCInstr_AvBCDV128Binary(op, dst, arg1, arg2));
6477          return dst;
6478       }
6479
6480       case Iop_DivU128: opav128 = Pav_DivU128; goto do_IntArithBinaryI128;
6481       case Iop_DivS128: opav128 = Pav_DivS128; goto do_IntArithBinaryI128;
6482       case Iop_DivU128E: opav128 = Pav_DivU128E; goto do_IntArithBinaryI128;
6483       case Iop_DivS128E: opav128 = Pav_DivS128E; goto do_IntArithBinaryI128;
6484       case Iop_ModU128:  opav128 = Pav_ModU128;  goto do_IntArithBinaryI128;
6485       case Iop_ModS128:  opav128 = Pav_ModS128;  goto do_IntArithBinaryI128;
6486       do_IntArithBinaryI128: {
6487          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6488          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6489          HReg dst  = newVRegV(env);
6490          addInstr(env, PPCInstr_AvBinaryInt128(opav128, dst, arg1, arg2));
6491          return dst;
6492       }
6493
6494       default:
6495          break;
6496       } /* switch (e->Iex.Binop.op) */
6497    } /* if (e->tag == Iex_Binop) */
6498
6499    if (e->tag == Iex_Triop) {
6500       IRTriop *triop = e->Iex.Triop.details;
6501       switch (triop->op) {
6502       case Iop_Add32Fx4: fpop = Pavfp_ADDF; goto do_32Fx4_with_rm;
6503       case Iop_Sub32Fx4: fpop = Pavfp_SUBF; goto do_32Fx4_with_rm;
6504       case Iop_Mul32Fx4: fpop = Pavfp_MULF; goto do_32Fx4_with_rm;
6505       do_32Fx4_with_rm:
6506       {
6507          HReg argL = iselVecExpr(env, triop->arg2, IEndianess);
6508          HReg argR = iselVecExpr(env, triop->arg3, IEndianess);
6509          HReg dst  = newVRegV(env);
6510          /* FIXME: this is bogus, in the sense that Altivec ignores
6511             FPSCR.RM, at least for some FP operations.  So setting the
6512             RM is pointless.  This is only really correct in the case
6513             where the RM is known, at JIT time, to be Irrm_NEAREST,
6514             since -- at least for Altivec FP add/sub/mul -- the
6515             emitted insn is hardwired to round to nearest. */
6516          set_FPU_rounding_mode(env, triop->arg1, IEndianess);
6517          addInstr(env, PPCInstr_AvBin32Fx4(fpop, dst, argL, argR));
6518          return dst;
6519       }
6520
6521       case Iop_2xMultU64Add128CarryOut:
6522          optri128 = Pav_2xMultU64Add128CarryOut;  goto do_IntArithTrinaryI128;
6523       do_IntArithTrinaryI128: {
6524          HReg arg1 = iselVecExpr(env, triop->arg1, IEndianess);
6525          HReg arg2 = iselVecExpr(env, triop->arg2, IEndianess);
6526          HReg arg3 = iselVecExpr(env, triop->arg3, IEndianess);
6527          HReg dst  = newVRegV(env);
6528          addInstr(env, PPCInstr_AvTernaryInt128(optri128, dst, arg1, arg2,
6529                                                 arg3));
6530          return dst;
6531       }
6532
6533       default:
6534          break;
6535       } /* switch (e->Iex.Triop.op) */
6536    } /* if (e->tag == Iex_Trinop) */
6537
6538
6539    if (e->tag == Iex_Const ) {
6540       vassert(e->Iex.Const.con->tag == Ico_V128);
6541       if (e->Iex.Const.con->Ico.V128 == 0x0000) {
6542          return generate_zeroes_V128(env);
6543       }
6544       else if (e->Iex.Const.con->Ico.V128 == 0xffff) {
6545          return generate_ones_V128(env);
6546       }
6547    }
6548
6549    vex_printf("iselVecExpr(ppc) (subarch = %s): can't reduce\n",
6550               LibVEX_ppVexHwCaps(mode64 ? VexArchPPC64 : VexArchPPC32,
6551                                  env->hwcaps));
6552    ppIRExpr(e);
6553    vpanic("iselVecExpr_wrk(ppc)");
6554 }
6555
6556
6557 /*---------------------------------------------------------*/
6558 /*--- ISEL: Statements                                  ---*/
6559 /*---------------------------------------------------------*/
6560
6561 static void iselStmt ( ISelEnv* env, IRStmt* stmt, IREndness IEndianess )
6562 {
6563    Bool mode64 = env->mode64;
6564    if (vex_traceflags & VEX_TRACE_VCODE) {
6565       vex_printf("\n -- ");
6566       ppIRStmt(stmt);
6567       vex_printf("\n");
6568    }
6569
6570    switch (stmt->tag) {
6571
6572    /* --------- STORE --------- */
6573    case Ist_Store: {
6574       IRType    tya   = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
6575       IRType    tyd   = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
6576       IREndness end   = stmt->Ist.Store.end;
6577
6578       if (end != IEndianess)
6579          goto stmt_fail;
6580       if (!mode64 && (tya != Ity_I32))
6581          goto stmt_fail;
6582       if (mode64 && (tya != Ity_I64))
6583          goto stmt_fail;
6584
6585       if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32 ||
6586           (mode64 && (tyd == Ity_I64))) {
6587          PPCAMode* am_addr
6588             = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6589                                  IEndianess);
6590          HReg r_src = iselWordExpr_R(env, stmt->Ist.Store.data, IEndianess);
6591          addInstr(env, PPCInstr_Store( toUChar(sizeofIRType(tyd)),
6592                                        am_addr, r_src, mode64 ));
6593          return;
6594       }
6595       if (tyd == Ity_F64) {
6596          PPCAMode* am_addr
6597             = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6598                                  IEndianess);
6599          HReg fr_src = iselDblExpr(env, stmt->Ist.Store.data, IEndianess);
6600          addInstr(env,
6601                   PPCInstr_FpLdSt(False/*store*/, 8, fr_src, am_addr));
6602          return;
6603       }
6604       if (tyd == Ity_F32) {
6605          PPCAMode* am_addr
6606             = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6607                                  IEndianess);
6608          HReg fr_src = iselFltExpr(env, stmt->Ist.Store.data, IEndianess);
6609          addInstr(env,
6610                   PPCInstr_FpLdSt(False/*store*/, 4, fr_src, am_addr));
6611          return;
6612       }
6613       if (tyd == Ity_D64) {
6614          PPCAMode* am_addr
6615             = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6616                                  IEndianess);
6617          HReg fr_src = iselDfp64Expr(env, stmt->Ist.Store.data, IEndianess);
6618          addInstr(env,
6619                   PPCInstr_FpLdSt(False/*store*/, 8, fr_src, am_addr));
6620          return;
6621       }
6622       if (tyd == Ity_D32) {
6623          PPCAMode* am_addr
6624             = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6625                                  IEndianess);
6626          HReg fr_src = iselDfp32Expr(env, stmt->Ist.Store.data, IEndianess);
6627          addInstr(env,
6628                   PPCInstr_FpLdSt(False/*store*/, 4, fr_src, am_addr));
6629          return;
6630       }
6631       if (tyd == Ity_V128) {
6632          PPCAMode* am_addr
6633             = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6634                                  IEndianess);
6635          HReg v_src = iselVecExpr(env, stmt->Ist.Store.data, IEndianess);
6636          addInstr(env,
6637                   PPCInstr_AvLdSt(False/*store*/, 16, v_src, am_addr));
6638          return;
6639       }
6640       if (tyd == Ity_I64 && !mode64) {
6641          /* Just calculate the address in the register.  Life is too
6642             short to arse around trying and possibly failing to adjust
6643             the offset in a 'reg+offset' style amode. */
6644          HReg rHi32, rLo32;
6645          HReg r_addr = iselWordExpr_R(env, stmt->Ist.Store.addr, IEndianess);
6646          iselInt64Expr( &rHi32, &rLo32, env, stmt->Ist.Store.data,
6647                         IEndianess );
6648          addInstr(env, PPCInstr_Store( 4/*byte-store*/,
6649                                        PPCAMode_IR( 0, r_addr ),
6650                                        rHi32,
6651                                        False/*32-bit insn please*/) );
6652          addInstr(env, PPCInstr_Store( 4/*byte-store*/,
6653                                        PPCAMode_IR( 4, r_addr ),
6654                                        rLo32,
6655                                        False/*32-bit insn please*/) );
6656          return;
6657       }
6658       break;
6659    }
6660
6661    /* --------- PUT --------- */
6662    case Ist_Put: {
6663       IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
6664       if (ty == Ity_I8  || ty == Ity_I16 ||
6665           ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
6666          HReg r_src = iselWordExpr_R(env, stmt->Ist.Put.data, IEndianess);
6667          PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6668                                           GuestStatePtr(mode64) );
6669          addInstr(env, PPCInstr_Store( toUChar(sizeofIRType(ty)),
6670                                        am_addr, r_src, mode64 ));
6671          return;
6672       }
6673       if (!mode64 && ty == Ity_I64) {
6674          HReg rHi, rLo;
6675          PPCAMode* am_addr  = PPCAMode_IR( stmt->Ist.Put.offset,
6676                                            GuestStatePtr(mode64) );
6677          PPCAMode* am_addr4 = advance4(env, am_addr);
6678          iselInt64Expr(&rHi,&rLo, env, stmt->Ist.Put.data, IEndianess);
6679          addInstr(env, PPCInstr_Store( 4, am_addr,  rHi, mode64 ));
6680          addInstr(env, PPCInstr_Store( 4, am_addr4, rLo, mode64 ));
6681          return;
6682       }
6683       if (ty == Ity_I128) {
6684          HReg rHi, rLo;
6685          PPCAMode* am_addr  = PPCAMode_IR( stmt->Ist.Put.offset,
6686                                            GuestStatePtr(mode64) );
6687          PPCAMode* am_addr4 = advance4(env, am_addr);
6688
6689          iselInt128Expr(&rHi,&rLo, env, stmt->Ist.Put.data, IEndianess);
6690          addInstr(env, PPCInstr_Store( 4, am_addr,  rHi, mode64 ));
6691          addInstr(env, PPCInstr_Store( 4, am_addr4, rLo, mode64 ));
6692          return;
6693       }
6694       if (ty == Ity_F128) {
6695          /* Guest state vectors are 16byte aligned,
6696             so don't need to worry here */
6697          HReg v_src = iselFp128Expr(env, stmt->Ist.Put.data, IEndianess);
6698
6699          PPCAMode* am_addr  = PPCAMode_IR( stmt->Ist.Put.offset,
6700                                            GuestStatePtr(mode64) );
6701          addInstr(env,
6702                   PPCInstr_AvLdSt(False/*store*/, 16, v_src, am_addr));
6703          return;
6704       }
6705       if (ty == Ity_V128) {
6706          /* Guest state vectors are 16byte aligned,
6707             so don't need to worry here */
6708          HReg v_src = iselVecExpr(env, stmt->Ist.Put.data, IEndianess);
6709          PPCAMode* am_addr  = PPCAMode_IR( stmt->Ist.Put.offset,
6710                                            GuestStatePtr(mode64) );
6711          addInstr(env,
6712                   PPCInstr_AvLdSt(False/*store*/, 16, v_src, am_addr));
6713          return;
6714       }
6715       if (ty == Ity_F64) {
6716          HReg fr_src = iselDblExpr(env, stmt->Ist.Put.data, IEndianess);
6717          PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6718                                           GuestStatePtr(mode64) );
6719          addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
6720                                         fr_src, am_addr ));
6721          return;
6722       }
6723       if (ty == Ity_D32) {
6724          /* The 32-bit value is stored in a 64-bit register */
6725          HReg fr_src = iselDfp32Expr( env, stmt->Ist.Put.data, IEndianess );
6726          PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6727                                           GuestStatePtr(mode64) );
6728          addInstr( env, PPCInstr_FpLdSt( False/*store*/, 8,
6729                                          fr_src, am_addr ) );
6730          return;
6731       }
6732       if (ty == Ity_D64) {
6733          HReg fr_src = iselDfp64Expr( env, stmt->Ist.Put.data, IEndianess );
6734          PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6735                                           GuestStatePtr(mode64) );
6736          addInstr( env, PPCInstr_FpLdSt( False/*store*/, 8, fr_src, am_addr ) );
6737          return;
6738       }
6739       break;
6740    }
6741
6742    /* --------- Indexed PUT --------- */
6743    case Ist_PutI: {
6744       IRPutI *puti = stmt->Ist.PutI.details;
6745
6746       PPCAMode* dst_am
6747          = genGuestArrayOffset(
6748               env, puti->descr,
6749               puti->ix, puti->bias,
6750               IEndianess );
6751       IRType ty = typeOfIRExpr(env->type_env, puti->data);
6752       if (mode64 && ty == Ity_I64) {
6753          HReg r_src = iselWordExpr_R(env, puti->data, IEndianess);
6754          addInstr(env, PPCInstr_Store( toUChar(8),
6755                                        dst_am, r_src, mode64 ));
6756          return;
6757       }
6758       if ((!mode64) && ty == Ity_I32) {
6759          HReg r_src = iselWordExpr_R(env, puti->data, IEndianess);
6760          addInstr(env, PPCInstr_Store( toUChar(4),
6761                                        dst_am, r_src, mode64 ));
6762          return;
6763       }
6764       break;
6765    }
6766
6767    /* --------- TMP --------- */
6768    case Ist_WrTmp: {
6769       IRTemp tmp = stmt->Ist.WrTmp.tmp;
6770       IRType ty = typeOfIRTemp(env->type_env, tmp);
6771       if (ty == Ity_I8  || ty == Ity_I16 ||
6772           ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
6773          HReg r_dst = lookupIRTemp(env, tmp);
6774          HReg r_src = iselWordExpr_R(env, stmt->Ist.WrTmp.data, IEndianess);
6775          addInstr(env, mk_iMOVds_RR( r_dst, r_src ));
6776          return;
6777       }
6778       if (!mode64 && ty == Ity_I64) {
6779          HReg r_srcHi, r_srcLo, r_dstHi, r_dstLo;
6780
6781          iselInt64Expr(&r_srcHi,&r_srcLo, env, stmt->Ist.WrTmp.data,
6782                        IEndianess);
6783          lookupIRTempPair( &r_dstHi, &r_dstLo, env, tmp);
6784          addInstr(env, mk_iMOVds_RR(r_dstHi, r_srcHi) );
6785          addInstr(env, mk_iMOVds_RR(r_dstLo, r_srcLo) );
6786          return;
6787       }
6788       if (mode64 && ty == Ity_I128) {
6789          HReg r_srcHi, r_srcLo, r_dstHi, r_dstLo;
6790          iselInt128Expr(&r_srcHi,&r_srcLo, env, stmt->Ist.WrTmp.data,
6791                         IEndianess);
6792          lookupIRTempPair( &r_dstHi, &r_dstLo, env, tmp);
6793          addInstr(env, mk_iMOVds_RR(r_dstHi, r_srcHi) );
6794          addInstr(env, mk_iMOVds_RR(r_dstLo, r_srcLo) );
6795          return;
6796       }
6797       if (!mode64 && ty == Ity_I128) {
6798          HReg r_srcHi    = INVALID_HREG;
6799          HReg r_srcMedHi = INVALID_HREG;
6800          HReg r_srcMedLo = INVALID_HREG;
6801          HReg r_srcLo    = INVALID_HREG;
6802          HReg r_dstHi, r_dstMedHi, r_dstMedLo, r_dstLo;
6803
6804          iselInt128Expr_to_32x4(&r_srcHi, &r_srcMedHi,
6805                                 &r_srcMedLo, &r_srcLo,
6806                                 env, stmt->Ist.WrTmp.data, IEndianess);
6807
6808          lookupIRTempQuad( &r_dstHi, &r_dstMedHi, &r_dstMedLo,
6809                            &r_dstLo, env, tmp);
6810
6811          addInstr(env, mk_iMOVds_RR(r_dstHi,    r_srcHi) );
6812          addInstr(env, mk_iMOVds_RR(r_dstMedHi, r_srcMedHi) );
6813          addInstr(env, mk_iMOVds_RR(r_dstMedLo, r_srcMedLo) );
6814          addInstr(env, mk_iMOVds_RR(r_dstLo,    r_srcLo) );
6815          return;
6816       }
6817       if (ty == Ity_I1) {
6818          PPCCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data,
6819                                          IEndianess);
6820          HReg r_dst = lookupIRTemp(env, tmp);
6821          addInstr(env, PPCInstr_Set(cond, r_dst));
6822          return;
6823       }
6824       if (ty == Ity_F64) {
6825          HReg fr_dst = lookupIRTemp(env, tmp);
6826          HReg fr_src = iselDblExpr(env, stmt->Ist.WrTmp.data, IEndianess);
6827          addInstr(env, PPCInstr_FpUnary(Pfp_MOV, fr_dst, fr_src));
6828          return;
6829       }
6830       if (ty == Ity_F32) {
6831          HReg fr_dst = lookupIRTemp(env, tmp);
6832          HReg fr_src = iselFltExpr(env, stmt->Ist.WrTmp.data, IEndianess);
6833          addInstr(env, PPCInstr_FpUnary(Pfp_MOV, fr_dst, fr_src));
6834          return;
6835       }
6836       if (ty == Ity_D32) {
6837          HReg fr_dst = lookupIRTemp(env, tmp);
6838          HReg fr_src = iselDfp32Expr(env, stmt->Ist.WrTmp.data, IEndianess);
6839          addInstr(env, PPCInstr_Dfp64Unary(Pfp_MOV, fr_dst, fr_src));
6840          return;
6841       }
6842       if (ty == Ity_F128) {
6843          HReg v_dst = lookupIRTemp(env, tmp);
6844          HReg v_src = iselFp128Expr(env, stmt->Ist.WrTmp.data, IEndianess);
6845          addInstr(env, PPCInstr_AvUnary(Pav_MOV, v_dst, v_src));
6846          return;
6847       }
6848       if (ty == Ity_V128) {
6849          HReg v_dst = lookupIRTemp(env, tmp);
6850          HReg v_src = iselVecExpr(env, stmt->Ist.WrTmp.data, IEndianess);
6851          addInstr(env, PPCInstr_AvUnary(Pav_MOV, v_dst, v_src));
6852          return;
6853       }
6854       if (ty == Ity_D64) {
6855          HReg fr_dst = lookupIRTemp( env, tmp );
6856          HReg fr_src = iselDfp64Expr( env, stmt->Ist.WrTmp.data, IEndianess );
6857          addInstr( env, PPCInstr_Dfp64Unary( Pfp_MOV, fr_dst, fr_src ) );
6858          return;
6859       }
6860       if (ty == Ity_D128) {
6861          HReg fr_srcHi, fr_srcLo, fr_dstHi, fr_dstLo;
6862          //         lookupDfp128IRTempPair( &fr_dstHi, &fr_dstLo, env, tmp );
6863          lookupIRTempPair( &fr_dstHi, &fr_dstLo, env, tmp );
6864          iselDfp128Expr( &fr_srcHi, &fr_srcLo, env, stmt->Ist.WrTmp.data,
6865                          IEndianess );
6866          addInstr( env, PPCInstr_Dfp64Unary( Pfp_MOV, fr_dstHi, fr_srcHi ) );
6867          addInstr( env, PPCInstr_Dfp64Unary( Pfp_MOV, fr_dstLo, fr_srcLo ) );
6868          return;
6869       }
6870       break;
6871    }
6872
6873    /* --------- Load Linked or Store Conditional --------- */
6874    case Ist_LLSC: {
6875       IRTemp res    = stmt->Ist.LLSC.result;
6876       IRType tyRes  = typeOfIRTemp(env->type_env, res);
6877       IRType tyAddr = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.addr);
6878
6879       if (stmt->Ist.LLSC.end != IEndianess)
6880          goto stmt_fail;
6881       if (!mode64 && (tyAddr != Ity_I32))
6882          goto stmt_fail;
6883       if (mode64 && (tyAddr != Ity_I64))
6884          goto stmt_fail;
6885
6886       if (stmt->Ist.LLSC.storedata == NULL) {
6887          /* LL */
6888          HReg r_addr = iselWordExpr_R( env, stmt->Ist.LLSC.addr, IEndianess );
6889          HReg r_dst  = lookupIRTemp(env, res);
6890          if (tyRes == Ity_I8) {
6891             addInstr(env, PPCInstr_LoadL( 1, r_dst, r_addr, mode64 ));
6892             return;
6893          }
6894          if (tyRes == Ity_I16) {
6895             addInstr(env, PPCInstr_LoadL( 2, r_dst, r_addr, mode64 ));
6896             return;
6897          }
6898          if (tyRes == Ity_I32) {
6899             addInstr(env, PPCInstr_LoadL( 4, r_dst, r_addr, mode64 ));
6900             return;
6901          }
6902          if (tyRes == Ity_I64 && mode64) {
6903             addInstr(env, PPCInstr_LoadL( 8, r_dst, r_addr, mode64 ));
6904             return;
6905          }
6906          /* fallthru */;
6907       } else {
6908          /* SC */
6909          HReg   r_res  = lookupIRTemp(env, res); /* :: Ity_I1 */
6910          HReg   r_a    = iselWordExpr_R(env, stmt->Ist.LLSC.addr, IEndianess);
6911          HReg   r_src  = iselWordExpr_R(env, stmt->Ist.LLSC.storedata,
6912                                         IEndianess);
6913          HReg   r_tmp  = newVRegI(env);
6914          IRType tyData = typeOfIRExpr(env->type_env,
6915                                       stmt->Ist.LLSC.storedata);
6916          vassert(tyRes == Ity_I1);
6917          if (tyData == Ity_I8 || tyData == Ity_I16 || tyData == Ity_I32 ||
6918             (tyData == Ity_I64 && mode64)) {
6919             int size = 0;
6920
6921             if (tyData == Ity_I64)
6922                size = 8;
6923             else if (tyData == Ity_I32)
6924                size = 4;
6925             else if (tyData == Ity_I16)
6926                size = 2;
6927             else if (tyData == Ity_I8)
6928                size = 1;
6929
6930             addInstr(env, PPCInstr_StoreC( size,
6931                                            r_a, r_src, mode64 ));
6932             addInstr(env, PPCInstr_MfCR( r_tmp ));
6933             addInstr(env, PPCInstr_Shft(
6934                              Pshft_SHR,
6935                              env->mode64 ? False : True
6936                                 /*F:64-bit, T:32-bit shift*/,
6937                              r_tmp, r_tmp,
6938                              PPCRH_Imm(False/*unsigned*/, 29)));
6939             /* Probably unnecessary, since the IR dest type is Ity_I1,
6940                and so we are entitled to leave whatever junk we like
6941                drifting round in the upper 31 or 63 bits of r_res.
6942                However, for the sake of conservativeness .. */
6943             addInstr(env, PPCInstr_Alu(
6944                              Palu_AND,
6945                              r_res, r_tmp,
6946                              PPCRH_Imm(False/*signed*/, 1)));
6947             return;
6948          }
6949          /* fallthru */
6950       }
6951       goto stmt_fail;
6952       /*NOTREACHED*/
6953    }
6954
6955    /* --------- Call to DIRTY helper --------- */
6956    case Ist_Dirty: {
6957       IRDirty* d = stmt->Ist.Dirty.details;
6958
6959       /* Figure out the return type, if any. */
6960       IRType retty = Ity_INVALID;
6961       if (d->tmp != IRTemp_INVALID)
6962          retty = typeOfIRTemp(env->type_env, d->tmp);
6963
6964       /* Throw out any return types we don't know about.  The set of
6965          acceptable return types is the same in both 32- and 64-bit
6966          mode, so we don't need to inspect mode64 to make a
6967          decision. */
6968       Bool retty_ok = False;
6969       switch (retty) {
6970          case Ity_INVALID: /* function doesn't return anything */
6971          case Ity_V128:
6972          case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
6973             retty_ok = True; break;
6974          default:
6975             break;
6976       }
6977       if (!retty_ok)
6978          break; /* will go to stmt_fail: */
6979
6980       /* Marshal args, do the call, clear stack, set the return value
6981          to 0x555..555 if this is a conditional call that returns a
6982          value and the call is skipped. */
6983       UInt   addToSp = 0;
6984       RetLoc rloc    = mk_RetLoc_INVALID();
6985       doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args,
6986                     IEndianess );
6987       vassert(is_sane_RetLoc(rloc));
6988
6989       /* Now figure out what to do with the returned value, if any. */
6990       switch (retty) {
6991          case Ity_INVALID: {
6992             /* No return value.  Nothing to do. */
6993             vassert(d->tmp == IRTemp_INVALID);
6994             vassert(rloc.pri == RLPri_None);
6995             vassert(addToSp == 0);
6996             return;
6997          }
6998          case Ity_I32: case Ity_I16: case Ity_I8: {
6999             /* The returned value is in %r3.  Park it in the register
7000                associated with tmp. */
7001             HReg r_dst = lookupIRTemp(env, d->tmp);
7002             addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
7003             vassert(rloc.pri == RLPri_Int);
7004             vassert(addToSp == 0);
7005             return;
7006          }
7007          case Ity_I64:
7008             if (mode64) {
7009                /* The returned value is in %r3.  Park it in the register
7010                   associated with tmp. */
7011                HReg r_dst = lookupIRTemp(env, d->tmp);
7012                addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
7013                vassert(rloc.pri == RLPri_Int);
7014                vassert(addToSp == 0);
7015             } else {
7016                /* The returned value is in %r3:%r4.  Park it in the
7017                   register-pair associated with tmp. */
7018                HReg r_dstHi = INVALID_HREG;
7019                HReg r_dstLo = INVALID_HREG;
7020                lookupIRTempPair( &r_dstHi, &r_dstLo, env, d->tmp);
7021                addInstr(env, mk_iMOVds_RR(r_dstHi, hregPPC_GPR3(mode64)));
7022                addInstr(env, mk_iMOVds_RR(r_dstLo, hregPPC_GPR4(mode64)));
7023                vassert(rloc.pri == RLPri_2Int);
7024                vassert(addToSp == 0);
7025             }
7026             return;
7027          case Ity_V128: {
7028             /* The returned value is on the stack, and *retloc tells
7029                us where.  Fish it off the stack and then move the
7030                stack pointer upwards to clear it, as directed by
7031                doHelperCall. */
7032             vassert(rloc.pri == RLPri_V128SpRel);
7033             vassert(addToSp >= 16);
7034             HReg      dst = lookupIRTemp(env, d->tmp);
7035             PPCAMode* am  = PPCAMode_IR(rloc.spOff, StackFramePtr(mode64));
7036             addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, dst, am ));
7037             add_to_sp(env, addToSp);
7038             return;
7039          }
7040          default:
7041             /*NOTREACHED*/
7042             vassert(0);
7043       }
7044    }
7045
7046    /* --------- MEM FENCE --------- */
7047    case Ist_MBE:
7048       switch (stmt->Ist.MBE.event) {
7049          case Imbe_Fence:
7050             addInstr(env, PPCInstr_MFence());
7051             return;
7052          default:
7053             break;
7054       }
7055       break;
7056
7057    /* --------- INSTR MARK --------- */
7058    /* Doesn't generate any executable code ... */
7059    case Ist_IMark:
7060        return;
7061
7062    /* --------- ABI HINT --------- */
7063    /* These have no meaning (denotation in the IR) and so we ignore
7064       them ... if any actually made it this far. */
7065    case Ist_AbiHint:
7066        return;
7067
7068    /* --------- NO-OP --------- */
7069    /* Fairly self-explanatory, wouldn't you say? */
7070    case Ist_NoOp:
7071        return;
7072
7073    /* --------- EXIT --------- */
7074    case Ist_Exit: {
7075       IRConst* dst = stmt->Ist.Exit.dst;
7076       if (!mode64 && dst->tag != Ico_U32)
7077          vpanic("iselStmt(ppc): Ist_Exit: dst is not a 32-bit value");
7078       if (mode64 && dst->tag != Ico_U64)
7079          vpanic("iselStmt(ppc64): Ist_Exit: dst is not a 64-bit value");
7080
7081       PPCCondCode cc    = iselCondCode(env, stmt->Ist.Exit.guard, IEndianess);
7082       PPCAMode*   amCIA = PPCAMode_IR(stmt->Ist.Exit.offsIP,
7083                                       hregPPC_GPR31(mode64));
7084
7085       /* Case: boring transfer to known address */
7086       if (stmt->Ist.Exit.jk == Ijk_Boring
7087           || stmt->Ist.Exit.jk == Ijk_Call
7088           /* || stmt->Ist.Exit.jk == Ijk_Ret */) {
7089          if (env->chainingAllowed) {
7090             /* .. almost always true .. */
7091             /* Skip the event check at the dst if this is a forwards
7092                edge. */
7093             Bool toFastEP
7094                = mode64
7095                ? (((Addr64)stmt->Ist.Exit.dst->Ico.U64) > (Addr64)env->max_ga)
7096                : (((Addr32)stmt->Ist.Exit.dst->Ico.U32) > (Addr32)env->max_ga);
7097             if (0) vex_printf("%s", toFastEP ? "Y" : ",");
7098             addInstr(env, PPCInstr_XDirect(
7099                              mode64 ? (Addr64)stmt->Ist.Exit.dst->Ico.U64
7100                                     : (Addr64)stmt->Ist.Exit.dst->Ico.U32,
7101                              amCIA, cc, toFastEP));
7102          } else {
7103             /* .. very occasionally .. */
7104             /* We can't use chaining, so ask for an assisted transfer,
7105                as that's the only alternative that is allowable. */
7106             HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst),
7107                                     IEndianess);
7108             addInstr(env, PPCInstr_XAssisted(r, amCIA, cc, Ijk_Boring));
7109          }
7110          return;
7111       }
7112
7113       /* Case: assisted transfer to arbitrary address */
7114       switch (stmt->Ist.Exit.jk) {
7115          /* Keep this list in sync with that in iselNext below */
7116          case Ijk_ClientReq:
7117          case Ijk_EmFail:
7118          case Ijk_EmWarn:
7119          case Ijk_NoDecode:
7120          case Ijk_NoRedir:
7121          case Ijk_SigBUS:
7122          case Ijk_SigTRAP:
7123          case Ijk_Sys_syscall:
7124          case Ijk_InvalICache:
7125          {
7126             HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst),
7127                                     IEndianess);
7128             addInstr(env, PPCInstr_XAssisted(r, amCIA, cc,
7129                                              stmt->Ist.Exit.jk));
7130             return;
7131          }
7132          default:
7133             break;
7134       }
7135
7136       /* Do we ever expect to see any other kind? */
7137       goto stmt_fail;
7138    }
7139
7140    default: break;
7141    }
7142   stmt_fail:
7143    ppIRStmt(stmt);
7144    vpanic("iselStmt(ppc)");
7145 }
7146
7147
7148 /*---------------------------------------------------------*/
7149 /*--- ISEL: Basic block terminators (Nexts)             ---*/
7150 /*---------------------------------------------------------*/
7151
7152 static void iselNext ( ISelEnv* env,
7153                        IRExpr* next, IRJumpKind jk, Int offsIP,
7154                        IREndness IEndianess)
7155 {
7156    if (vex_traceflags & VEX_TRACE_VCODE) {
7157       vex_printf( "\n-- PUT(%d) = ", offsIP);
7158       ppIRExpr( next );
7159       vex_printf( "; exit-");
7160       ppIRJumpKind(jk);
7161       vex_printf( "\n");
7162    }
7163
7164    PPCCondCode always = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
7165
7166    /* Case: boring transfer to known address */
7167    if (next->tag == Iex_Const) {
7168       IRConst* cdst = next->Iex.Const.con;
7169       vassert(cdst->tag == (env->mode64 ? Ico_U64 :Ico_U32));
7170       if (jk == Ijk_Boring || jk == Ijk_Call) {
7171          /* Boring transfer to known address */
7172          PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
7173          if (env->chainingAllowed) {
7174             /* .. almost always true .. */
7175             /* Skip the event check at the dst if this is a forwards
7176                edge. */
7177             Bool toFastEP
7178                = env->mode64
7179                ? (((Addr64)cdst->Ico.U64) > (Addr64)env->max_ga)
7180                : (((Addr32)cdst->Ico.U32) > (Addr32)env->max_ga);
7181             if (0) vex_printf("%s", toFastEP ? "X" : ".");
7182             addInstr(env, PPCInstr_XDirect(
7183                              env->mode64 ? (Addr64)cdst->Ico.U64
7184                                          : (Addr64)cdst->Ico.U32,
7185                              amCIA, always, toFastEP));
7186          } else {
7187             /* .. very occasionally .. */
7188             /* We can't use chaining, so ask for an assisted transfer,
7189                as that's the only alternative that is allowable. */
7190             HReg r = iselWordExpr_R(env, next, IEndianess);
7191             addInstr(env, PPCInstr_XAssisted(r, amCIA, always,
7192                                              Ijk_Boring));
7193          }
7194          return;
7195       }
7196    }
7197
7198    /* Case: call/return (==boring) transfer to any address */
7199    switch (jk) {
7200       case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
7201          HReg       r     = iselWordExpr_R(env, next, IEndianess);
7202          PPCAMode*  amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
7203          if (env->chainingAllowed) {
7204             addInstr(env, PPCInstr_XIndir(r, amCIA, always));
7205          } else {
7206             addInstr(env, PPCInstr_XAssisted(r, amCIA, always,
7207                                              Ijk_Boring));
7208          }
7209          return;
7210       }
7211       default:
7212          break;
7213    }
7214
7215    /* Case: assisted transfer to arbitrary address */
7216    switch (jk) {
7217       /* Keep this list in sync with that for Ist_Exit above */
7218       case Ijk_ClientReq:
7219       case Ijk_EmFail:
7220       case Ijk_EmWarn:
7221       case Ijk_NoDecode:
7222       case Ijk_NoRedir:
7223       case Ijk_SigBUS:
7224       case Ijk_SigTRAP:
7225       case Ijk_Sys_syscall:
7226       case Ijk_InvalICache:
7227       {
7228          HReg      r     = iselWordExpr_R(env, next, IEndianess);
7229          PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
7230          addInstr(env, PPCInstr_XAssisted(r, amCIA, always, jk));
7231          return;
7232       }
7233       default:
7234          break;
7235    }
7236
7237    vex_printf( "\n-- PUT(%d) = ", offsIP);
7238    ppIRExpr( next );
7239    vex_printf( "; exit-");
7240    ppIRJumpKind(jk);
7241    vex_printf( "\n");
7242    vassert(0); // are we expecting any other kind?
7243 }
7244
7245
7246 /*---------------------------------------------------------*/
7247 /*--- Insn selector top-level                           ---*/
7248 /*---------------------------------------------------------*/
7249
7250 /* Translate an entire SB to ppc code. */
7251 HInstrArray* iselSB_PPC ( const IRSB* bb,
7252                           VexArch      arch_host,
7253                           const VexArchInfo* archinfo_host,
7254                           const VexAbiInfo*  vbi,
7255                           Int offs_Host_EvC_Counter,
7256                           Int offs_Host_EvC_FailAddr,
7257                           Bool chainingAllowed,
7258                           Bool addProfInc,
7259                           Addr max_ga)
7260
7261 {
7262    Int       i, j;
7263    HReg      hregLo, hregMedLo, hregMedHi, hregHi;
7264    ISelEnv*  env;
7265    UInt      hwcaps_host = archinfo_host->hwcaps;
7266    Bool      mode64 = False;
7267    UInt      mask32, mask64;
7268    PPCAMode *amCounter, *amFailAddr;
7269    IREndness IEndianess;
7270
7271    vassert(arch_host == VexArchPPC32 || arch_host == VexArchPPC64);
7272    mode64 = arch_host == VexArchPPC64;
7273
7274    /* do some sanity checks,
7275     * Note: no 32-bit support for ISA 3.0, ISA 3.1
7276     */
7277    mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V
7278             | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX | VEX_HWCAPS_PPC32_VX
7279             | VEX_HWCAPS_PPC32_DFP | VEX_HWCAPS_PPC32_ISA2_07;
7280
7281    mask64 = VEX_HWCAPS_PPC64_V | VEX_HWCAPS_PPC64_FX
7282             | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX | VEX_HWCAPS_PPC64_DFP
7283             | VEX_HWCAPS_PPC64_ISA2_07 | VEX_HWCAPS_PPC64_ISA3_0
7284             | VEX_HWCAPS_PPC64_ISA3_1;
7285
7286    if (mode64) {
7287       vassert((hwcaps_host & mask32) == 0);
7288    } else {
7289       vassert((hwcaps_host & mask64) == 0);
7290    }
7291
7292    /* Check that the host's endianness is as expected. */
7293    vassert((archinfo_host->endness == VexEndnessBE) ||
7294            (archinfo_host->endness == VexEndnessLE));
7295
7296    if (archinfo_host->endness == VexEndnessBE)
7297      IEndianess = Iend_BE;
7298    else
7299      IEndianess = Iend_LE;
7300
7301    /* Make up an initial environment to use. */
7302    env = LibVEX_Alloc_inline(sizeof(ISelEnv));
7303    env->vreg_ctr = 0;
7304
7305    /* Are we being ppc32 or ppc64? */
7306    env->mode64 = mode64;
7307
7308    /* Set up output code array. */
7309    env->code = newHInstrArray();
7310
7311    /* Copy BB's type env. */
7312    env->type_env = bb->tyenv;
7313
7314    /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
7315     * change as we go along.
7316     *
7317     * vregmap2 and vregmap3 are only used in 32 bit mode
7318     * for supporting I128 in 32-bit mode
7319     */
7320    env->n_vregmap = bb->tyenv->types_used;
7321    env->vregmapLo    = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
7322    env->vregmapMedLo = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
7323    if (mode64) {
7324       env->vregmapMedHi = NULL;
7325       env->vregmapHi    = NULL;
7326    } else {
7327       env->vregmapMedHi = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
7328       env->vregmapHi    = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
7329    }
7330
7331    /* and finally ... */
7332    env->chainingAllowed = chainingAllowed;
7333    env->max_ga          = max_ga;
7334    env->hwcaps          = hwcaps_host;
7335    env->previous_rm     = NULL;
7336    env->vbi             = vbi;
7337
7338    /* For each IR temporary, allocate a suitably-kinded virtual
7339       register. */
7340    j = 0;
7341    for (i = 0; i < env->n_vregmap; i++) {
7342       hregLo = hregMedLo = hregMedHi = hregHi = INVALID_HREG;
7343       switch (bb->tyenv->types[i]) {
7344       case Ity_I1:
7345       case Ity_I8:
7346       case Ity_I16:
7347       case Ity_I32:
7348          if (mode64) {
7349             hregLo = mkHReg(True, HRcInt64, 0, j++);
7350          } else {
7351             hregLo = mkHReg(True, HRcInt32, 0, j++);
7352          }
7353          break;
7354       case Ity_I64:
7355          if (mode64) {
7356             hregLo    = mkHReg(True, HRcInt64, 0, j++);
7357          } else {
7358             hregLo    = mkHReg(True, HRcInt32, 0, j++);
7359             hregMedLo = mkHReg(True, HRcInt32, 0, j++);
7360          }
7361          break;
7362       case Ity_I128:
7363          if (mode64) {
7364             hregLo    = mkHReg(True, HRcInt64, 0, j++);
7365             hregMedLo = mkHReg(True, HRcInt64, 0, j++);
7366          } else {
7367             hregLo    = mkHReg(True, HRcInt32, 0, j++);
7368             hregMedLo = mkHReg(True, HRcInt32, 0, j++);
7369             hregMedHi = mkHReg(True, HRcInt32, 0, j++);
7370             hregHi    = mkHReg(True, HRcInt32, 0, j++);
7371          }
7372          break;
7373       case Ity_F32:
7374       case Ity_F64:
7375          hregLo = mkHReg(True, HRcFlt64, 0, j++);
7376          break;
7377       case Ity_F128:
7378       case Ity_V128:
7379          hregLo = mkHReg(True, HRcVec128, 0, j++);
7380          break;
7381       case Ity_D32:
7382       case Ity_D64:
7383          hregLo = mkHReg(True, HRcFlt64, 0, j++);
7384          break;
7385       case Ity_D128:
7386          hregLo    = mkHReg(True, HRcFlt64, 0, j++);
7387          hregMedLo = mkHReg(True, HRcFlt64, 0, j++);
7388          break;
7389       default:
7390          ppIRType(bb->tyenv->types[i]);
7391          vpanic("iselBB(ppc): IRTemp type");
7392       }
7393       env->vregmapLo[i]    = hregLo;
7394       env->vregmapMedLo[i] = hregMedLo;
7395       if (!mode64) {
7396          env->vregmapMedHi[i] = hregMedHi;
7397          env->vregmapHi[i]    = hregHi;
7398       }
7399    }
7400    env->vreg_ctr = j;
7401
7402    /* The very first instruction must be an event check. */
7403    amCounter  = PPCAMode_IR(offs_Host_EvC_Counter, hregPPC_GPR31(mode64));
7404    amFailAddr = PPCAMode_IR(offs_Host_EvC_FailAddr, hregPPC_GPR31(mode64));
7405    addInstr(env, PPCInstr_EvCheck(amCounter, amFailAddr));
7406
7407    /* Possibly a block counter increment (for profiling).  At this
7408       point we don't know the address of the counter, so just pretend
7409       it is zero.  It will have to be patched later, but before this
7410       translation is used, by a call to LibVEX_patchProfCtr. */
7411    if (addProfInc) {
7412       addInstr(env, PPCInstr_ProfInc());
7413    }
7414
7415    /* Ok, finally we can iterate over the statements. */
7416    for (i = 0; i < bb->stmts_used; i++)
7417       iselStmt(env, bb->stmts[i], IEndianess);
7418
7419    iselNext(env, bb->next, bb->jumpkind, bb->offsIP, IEndianess);
7420
7421    /* record the number of vregs we used. */
7422    env->code->n_vregs = env->vreg_ctr;
7423    return env->code;
7424 }
7425
7426
7427 /*---------------------------------------------------------------*/
7428 /*--- end                                     host_ppc_isel.c ---*/
7429 /*---------------------------------------------------------------*/