VEX/priv/host_ppc_isel.c

   1
   2
   3 /*---------------------------------------------------------------*/
   4 /*--- begin                                   host_ppc_isel.c ---*/
   5 /*---------------------------------------------------------------*/
   6
   7 /*
   8    This file is part of Valgrind, a dynamic binary instrumentation
   9    framework.
  10
  11    Copyright (C) 2004-2017 OpenWorks LLP
  12       info@open-works.net
  13
  14    This program is free software; you can redistribute it and/or
  15    modify it under the terms of the GNU General Public License as
  16    published by the Free Software Foundation; either version 2 of the
  17    License, or (at your option) any later version.
  18
  19    This program is distributed in the hope that it will be useful, but
  20    WITHOUT ANY WARRANTY; without even the implied warranty of
  21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22    General Public License for more details.
  23
  24    You should have received a copy of the GNU General Public License
  25    along with this program; if not, see <http://www.gnu.org/licenses/>.
  26
  27    The GNU General Public License is contained in the file COPYING.
  28
  29    Neither the names of the U.S. Department of Energy nor the
  30    University of California nor the names of its contributors may be
  31    used to endorse or promote products derived from this software
  32    without prior written permission.
  33 */
  34
  35 #include "libvex_basictypes.h"
  36 #include "libvex_ir.h"
  37 #include "libvex.h"
  38
  39 #include "ir_match.h"
  40 #include "main_util.h"
  41 #include "main_globals.h"
  42 #include "host_generic_regs.h"
  43 #include "host_generic_simd64.h"
  44 #include "host_ppc_defs.h"
  45
  46 /* GPR register class for ppc32/64 */
  47 #define HRcGPR(_mode64) ((_mode64) ? HRcInt64 : HRcInt32)
  48
  49
  50 /*---------------------------------------------------------*/
  51 /*--- Register Usage Conventions                        ---*/
  52 /*---------------------------------------------------------*/
  53 /*
  54   Integer Regs
  55   ------------
  56   GPR0       Reserved
  57   GPR1       Stack Pointer
  58   GPR2       not used - TOC pointer
  59   GPR3:10    Allocateable
  60   GPR11      if mode64: not used - calls by ptr / env ptr for some langs
  61   GPR12      if mode64: not used - exceptions / global linkage code
  62   GPR13      not used - Thread-specific pointer
  63   GPR14:28   Allocateable
  64   GPR29      Unused by us (reserved for the dispatcher)
  65   GPR30      AltiVec temp spill register
  66   GPR31      GuestStatePointer
  67
  68   Of Allocateable regs:
  69   if (mode64)
  70     GPR3:10  Caller-saved regs
  71   else
  72     GPR3:12  Caller-saved regs
  73   GPR14:29   Callee-saved regs
  74
  75   GPR3       [Return | Parameter] - carrying reg
  76   GPR4:10    Parameter-carrying regs
  77
  78
  79   Floating Point Regs
  80   -------------------
  81   FPR0:31    Allocateable
  82
  83   FPR0       Caller-saved - scratch reg
  84   if (mode64)
  85     FPR1:13  Caller-saved - param & return regs
  86   else
  87     FPR1:8   Caller-saved - param & return regs
  88     FPR9:13  Caller-saved regs
  89   FPR14:31   Callee-saved regs
  90
  91
  92   Vector Regs (on processors with the VMX feature)
  93   -----------
  94   VR0-VR1    Volatile scratch registers
  95   VR2-VR13   Volatile vector parameters registers
  96   VR14-VR19  Volatile scratch registers
  97   VR20-VR31  Non-volatile registers
  98   VRSAVE     Non-volatile 32-bit register
  99 */
 100
 101
 102 /*---------------------------------------------------------*/
 103 /*--- PPC FP Status & Control Register Conventions      ---*/
 104 /*---------------------------------------------------------*/
 105 /*
 106   Vex-generated code expects to run with the FPU set as follows: all
 107   exceptions masked.  The rounding mode is set appropriately before
 108   each floating point insn emitted (or left unchanged if known to be
 109   correct already).  There are a few fp insns (fmr,fneg,fabs,fnabs),
 110   which are unaffected by the rm and so the rounding mode is not set
 111   prior to them.
 112
 113   At least on MPC7447A (Mac Mini), frsqrte is also not affected by
 114   rounding mode.  At some point the ppc docs get sufficiently vague
 115   that the only way to find out is to write test programs.
 116 */
 117 /* Notes on the FP instruction set, 6 Feb 06.
 118
 119 What                 exns -> CR1 ?   Sets FPRF ?   Observes RM ?
 120 -------------------------------------------------------------
 121
 122 fmr[.]                   if .             n             n
 123 fneg[.]                  if .             n             n
 124 fabs[.]                  if .             n             n
 125 fnabs[.]                 if .             n             n
 126
 127 fadd[.]                  if .             y             y
 128 fadds[.]                 if .             y             y
 129 fcfid[.] (Si64->dbl)     if .             y             y
 130 fcfidU[.] (Ui64->dbl)    if .             y             y
 131 fcfids[.] (Si64->sngl)   if .             Y             Y
 132 fcfidus[.] (Ui64->sngl)  if .             Y             Y
 133 fcmpo (cmp, result       n                n             n
 134 fcmpu  to crfD)          n                n             n
 135 fctid[.]  (dbl->i64)     if .       ->undef             y
 136 fctidz[.] (dbl->i64)     if .       ->undef    rounds-to-zero
 137 fctiw[.]  (dbl->i32)     if .       ->undef             y
 138 fctiwz[.] (dbl->i32)     if .       ->undef    rounds-to-zero
 139 fdiv[.]                  if .             y             y
 140 fdivs[.]                 if .             y             y
 141 fmadd[.]                 if .             y             y
 142 fmadds[.]                if .             y             y
 143 fmsub[.]                 if .             y             y
 144 fmsubs[.]                if .             y             y
 145 fmul[.]                  if .             y             y
 146 fmuls[.]                 if .             y             y
 147
 148 (note: for fnm*, rounding happens before final negation)
 149 fnmadd[.]                if .             y             y
 150 fnmadds[.]               if .             y             y
 151 fnmsub[.]                if .             y             y
 152 fnmsubs[.]               if .             y             y
 153
 154 fre[.]                   if .             y             y
 155 fres[.]                  if .             y             y
 156
 157 frsqrte[.]               if .             y       apparently not
 158
 159 fsqrt[.]                 if .             y             y
 160 fsqrts[.]                if .             y             y
 161 fsub[.]                  if .             y             y
 162 fsubs[.]                 if .             y             y
 163
 164
 165 fpscr: bits 30-31 (ibm) is RM
 166             24-29 (ibm) are exnmasks/non-IEEE bit, all zero
 167             15-19 (ibm) is FPRF: class, <, =, >, UNord
 168
 169 ppc fe(guest) makes fpscr read as all zeros except RM (and maybe FPRF
 170 in future)
 171
 172 mcrfs     - move fpscr field to CR field
 173 mtfsfi[.] - 4 bit imm moved to fpscr field
 174 mtfsf[.]  - move frS[low 1/2] to fpscr but using 8-bit field mask
 175 mtfsb1[.] - set given fpscr bit
 176 mtfsb0[.] - clear given fpscr bit
 177 mffs[.]   - move all fpscr to frD[low 1/2]
 178
 179 For [.] presumably cr1 is set with exn summary bits, as per
 180 main FP insns
 181
 182 A single precision store truncates/denormalises the in-register value,
 183 but does not round it.  This is so that flds followed by fsts is
 184 always the identity.
 185 */
 186
 187
 188 /*---------------------------------------------------------*/
 189 /*--- misc helpers                                      ---*/
 190 /*---------------------------------------------------------*/
 191
 192 /* These are duplicated in guest-ppc/toIR.c */
 193 static IRExpr* unop ( IROp op, IRExpr* a )
 194 {
 195    return IRExpr_Unop(op, a);
 196 }
 197
 198 static IRExpr* mkU32 ( UInt i )
 199 {
 200    return IRExpr_Const(IRConst_U32(i));
 201 }
 202
 203 static IRExpr* bind ( Int binder )
 204 {
 205    return IRExpr_Binder(binder);
 206 }
 207
 208 static Bool isZeroU8 ( IRExpr* e )
 209 {
 210    return e->tag == Iex_Const
 211           && e->Iex.Const.con->tag == Ico_U8
 212           && e->Iex.Const.con->Ico.U8 == 0;
 213 }
 214
 215
 216 /*---------------------------------------------------------*/
 217 /*--- ISelEnv                                           ---*/
 218 /*---------------------------------------------------------*/
 219
 220 /* This carries around:
 221
 222    - A mapping from IRTemp to IRType, giving the type of any IRTemp we
 223      might encounter.  This is computed before insn selection starts,
 224      and does not change.
 225
 226    - A mapping from IRTemp to HReg.  This tells the insn selector
 227      which virtual register(s) are associated with each IRTemp
 228      temporary.  This is computed before insn selection starts, and
 229      does not change.  We expect this mapping to map precisely the
 230      same set of IRTemps as the type mapping does.
 231
 232          - vregmapLo    holds the primary register for the IRTemp.
 233          - vregmapMedLo holds the secondary register for the IRTemp,
 234               if any is needed.  That's only for Ity_I64 temps
 235               in 32 bit mode or Ity_I128 temps in 64-bit mode.
 236          - vregmapMedHi is only for dealing with Ity_I128 temps in
 237               32 bit mode.  It holds bits 95:64 (Intel numbering)
 238               of the IRTemp.
 239          - vregmapHi is also only for dealing with Ity_I128 temps
 240               in 32 bit mode.  It holds the most significant bits
 241               (127:96 in Intel numbering) of the IRTemp.
 242
 243     - The code array, that is, the insns selected so far.
 244
 245     - A counter, for generating new virtual registers.
 246
 247     - The host subarchitecture we are selecting insns for.
 248       This is set at the start and does not change.
 249
 250     - A Bool to tell us if the host is 32 or 64bit.
 251       This is set at the start and does not change.
 252
 253     - An IRExpr*, which may be NULL, holding the IR expression (an
 254       IRRoundingMode-encoded value) to which the FPU's rounding mode
 255       was most recently set.  Setting to NULL is always safe.  Used to
 256       avoid redundant settings of the FPU's rounding mode, as
 257       described in set_FPU_rounding_mode below.
 258
 259     - A VexMiscInfo*, needed for knowing how to generate
 260       function calls for this target.
 261
 262     - The maximum guest address of any guest insn in this block.
 263       Actually, the address of the highest-addressed byte from any
 264       insn in this block.  Is set at the start and does not change.
 265       This is used for detecting jumps which are definitely
 266       forward-edges from this block, and therefore can be made
 267       (chained) to the fast entry point of the destination, thereby
 268       avoiding the destination's event check.
 269 */
 270
 271 typedef
 272    struct {
 273       /* Constant -- are set at the start and do not change. */
 274       IRTypeEnv* type_env;
 275                               //    64-bit mode              32-bit mode
 276       HReg*    vregmapLo;     // Low 64-bits [63:0]    Low 32-bits     [31:0]
 277       HReg*    vregmapMedLo;  // high 64-bits[127:64]  Next 32-bits    [63:32]
 278       HReg*    vregmapMedHi;  // unused                Next 32-bits    [95:64]
 279       HReg*    vregmapHi;     // unused                highest 32-bits [127:96]
 280       Int      n_vregmap;
 281
 282       /* 27 Jan 06: Not currently used, but should be */
 283       UInt         hwcaps;
 284
 285       Bool         mode64;
 286
 287       const VexAbiInfo*  vbi;   // unused
 288
 289       Bool         chainingAllowed;
 290       Addr64       max_ga;
 291
 292       /* These are modified as we go along. */
 293       HInstrArray* code;
 294       Int          vreg_ctr;
 295
 296       IRExpr*      previous_rm;
 297    }
 298    ISelEnv;
 299
 300
 301 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
 302 {
 303    vassert(tmp >= 0);
 304    vassert(tmp < env->n_vregmap);
 305    return env->vregmapLo[tmp];
 306 }
 307
 308 static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
 309                                ISelEnv* env, IRTemp tmp )
 310 {
 311    vassert(tmp >= 0);
 312    vassert(tmp < env->n_vregmap);
 313    vassert(! hregIsInvalid(env->vregmapMedLo[tmp]));
 314    *vrLO = env->vregmapLo[tmp];
 315    *vrHI = env->vregmapMedLo[tmp];
 316 }
 317
 318 /* Only for used in 32-bit mode */
 319 static void lookupIRTempQuad ( HReg* vrHi, HReg* vrMedHi, HReg* vrMedLo,
 320                                HReg* vrLo, ISelEnv* env, IRTemp tmp )
 321 {
 322    vassert(!env->mode64);
 323    vassert(tmp >= 0);
 324    vassert(tmp < env->n_vregmap);
 325    vassert(! hregIsInvalid(env->vregmapMedLo[tmp]));
 326    *vrHi    = env->vregmapHi[tmp];
 327    *vrMedHi = env->vregmapMedHi[tmp];
 328    *vrMedLo = env->vregmapMedLo[tmp];
 329    *vrLo    = env->vregmapLo[tmp];
 330 }
 331
 332 static void addInstr ( ISelEnv* env, PPCInstr* instr )
 333 {
 334    addHInstr(env->code, instr);
 335    if (vex_traceflags & VEX_TRACE_VCODE) {
 336       ppPPCInstr(instr, env->mode64);
 337       vex_printf("\n");
 338    }
 339 }
 340
 341 static HReg newVRegI ( ISelEnv* env )
 342 {
 343    HReg reg
 344       = mkHReg(True/*vreg*/, HRcGPR(env->mode64), 0/*enc*/, env->vreg_ctr);
 345    env->vreg_ctr++;
 346    return reg;
 347 }
 348
 349 static HReg newVRegF ( ISelEnv* env )
 350 {
 351    HReg reg = mkHReg(True/*vreg*/, HRcFlt64, 0/*enc*/, env->vreg_ctr);
 352    env->vreg_ctr++;
 353    return reg;
 354 }
 355
 356 static HReg newVRegV ( ISelEnv* env )
 357 {
 358    HReg reg = mkHReg(True/*vreg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
 359    env->vreg_ctr++;
 360    return reg;
 361 }
 362
 363
 364 /*---------------------------------------------------------*/
 365 /*--- ISEL: Forward declarations                        ---*/
 366 /*---------------------------------------------------------*/
 367
 368 /* These are organised as iselXXX and iselXXX_wrk pairs.  The
 369    iselXXX_wrk do the real work, but are not to be called directly.
 370    For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
 371    checks that all returned registers are virtual.  You should not
 372    call the _wrk version directly.
 373
 374    'Word' refers to the size of the native machine word, that is,
 375    32-bit int in 32-bit mode and 64-bit int in 64-bit mode.  '2Word'
 376    therefore refers to a double-width (64/128-bit) quantity in two
 377    integer registers.
 378 */
 379 /* 32-bit mode: compute an I8/I16/I32 into a GPR.
 380    64-bit mode: compute an I8/I16/I32/I64 into a GPR. */
 381 static HReg          iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
 382                                           IREndness IEndianess );
 383 static HReg          iselWordExpr_R     ( ISelEnv* env, const IRExpr* e,
 384                                           IREndness IEndianess );
 385
 386 /* 32-bit mode: Compute an I8/I16/I32 into a RH
 387                 (reg-or-halfword-immediate).
 388    64-bit mode: Compute an I8/I16/I32/I64 into a RH
 389                 (reg-or-halfword-immediate).
 390    It's important to specify whether the immediate is to be regarded
 391    as signed or not.  If yes, this will never return -32768 as an
 392    immediate; this guaranteed that all signed immediates that are
 393    return can have their sign inverted if need be.
 394 */
 395 static PPCRH*        iselWordExpr_RH_wrk ( ISelEnv* env,
 396                                            Bool syned, const IRExpr* e,
 397                                            IREndness IEndianess );
 398 static PPCRH*        iselWordExpr_RH     ( ISelEnv* env,
 399                                            Bool syned, const IRExpr* e,
 400                                            IREndness IEndianess );
 401
 402 /* 32-bit mode: compute an I32 into a RI (reg or 32-bit immediate).
 403    64-bit mode: compute an I64 into a RI (reg or 64-bit immediate). */
 404 static PPCRI*        iselWordExpr_RI_wrk ( ISelEnv* env, const IRExpr* e,
 405                                            IREndness IEndianess );
 406 static PPCRI*        iselWordExpr_RI     ( ISelEnv* env, const IRExpr* e,
 407                                            IREndness IEndianess );
 408
 409 /* In 32 bit mode ONLY, compute an I8 into a
 410    reg-or-5-bit-unsigned-immediate, the latter being an immediate in
 411    the range 1 .. 31 inclusive.  Used for doing shift amounts. */
 412 static PPCRH*        iselWordExpr_RH5u_wrk ( ISelEnv* env, const IRExpr* e,
 413                                              IREndness IEndianess );
 414 static PPCRH*        iselWordExpr_RH5u     ( ISelEnv* env, const IRExpr* e,
 415                                              IREndness IEndianess );
 416
 417 /* In 64-bit mode ONLY, compute an I8 into a
 418    reg-or-6-bit-unsigned-immediate, the latter being an immediate in
 419    the range 1 .. 63 inclusive.  Used for doing shift amounts. */
 420 static PPCRH*        iselWordExpr_RH6u_wrk ( ISelEnv* env, const IRExpr* e,
 421                                              IREndness IEndianess );
 422 static PPCRH*        iselWordExpr_RH6u     ( ISelEnv* env, const IRExpr* e,
 423                                              IREndness IEndianess );
 424
 425 /* 32-bit mode: compute an I32 into an AMode.
 426    64-bit mode: compute an I64 into an AMode.
 427
 428    Requires to know (xferTy) the type of data to be loaded/stored
 429    using this amode.  That is so that, for 64-bit code generation, any
 430    PPCAMode_IR returned will have an index (immediate offset) field
 431    that is guaranteed to be 4-aligned, if there is any chance that the
 432    amode is to be used in ld/ldu/lda/std/stdu.
 433
 434    Since there are no such restrictions on 32-bit insns, xferTy is
 435    ignored for 32-bit code generation. */
 436 static PPCAMode*     iselWordExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e,
 437                                               IRType xferTy,
 438                                               IREndness IEndianess );
 439 static PPCAMode*     iselWordExpr_AMode     ( ISelEnv* env, const IRExpr* e,
 440                                               IRType xferTy,
 441                                               IREndness IEndianess );
 442
 443 static void iselInt128Expr_to_32x4_wrk ( HReg* rHi, HReg* rMedHi,
 444                                          HReg* rMedLo, HReg* rLo,
 445                                          ISelEnv* env, const IRExpr* e,
 446                                          IREndness IEndianess );
 447 static void iselInt128Expr_to_32x4     ( HReg* rHi, HReg* rMedHi,
 448                                          HReg* rMedLo, HReg* rLo,
 449                                          ISelEnv* env, const IRExpr* e,
 450                                          IREndness IEndianess );
 451
 452
 453 /* 32-bit mode ONLY: compute an I64 into a GPR pair. */
 454 static void          iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
 455                                          ISelEnv* env, const IRExpr* e,
 456                                          IREndness IEndianess );
 457 static void          iselInt64Expr     ( HReg* rHi, HReg* rLo,
 458                                          ISelEnv* env, const IRExpr* e,
 459                                          IREndness IEndianess );
 460
 461 /* 64-bit mode ONLY: compute an I128 into a GPR64 pair. */
 462 static void          iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
 463                                           ISelEnv* env, const IRExpr* e,
 464                                           IREndness IEndianess );
 465
 466 static void          iselInt128Expr     ( HReg* rHi, HReg* rLo,
 467                                           ISelEnv* env, const IRExpr* e,
 468                                           IREndness IEndianess );
 469
 470 static PPCCondCode   iselCondCode_wrk ( ISelEnv* env, const IRExpr* e,
 471                                         IREndness IEndianess );
 472 static PPCCondCode   iselCondCode     ( ISelEnv* env, const IRExpr* e,
 473                                         IREndness IEndianess );
 474
 475 static HReg          iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e,
 476                                        IREndness IEndianess );
 477 static HReg          iselDblExpr     ( ISelEnv* env, const IRExpr* e,
 478                                        IREndness IEndianess );
 479
 480 static HReg          iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e,
 481                                        IREndness IEndianess );
 482 static HReg          iselFltExpr     ( ISelEnv* env, const IRExpr* e,
 483                                        IREndness IEndianess );
 484
 485 static HReg          iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e,
 486                                        IREndness IEndianess );
 487 static HReg          iselVecExpr     ( ISelEnv* env, const IRExpr* e,
 488                                        IREndness IEndianess );
 489
 490 /* 64-bit mode ONLY. */
 491 static HReg          iselDfp32Expr_wrk ( ISelEnv* env, const IRExpr* e,
 492                                          IREndness IEndianess );
 493 static HReg          iselDfp32Expr     ( ISelEnv* env, const IRExpr* e,
 494                                          IREndness IEndianess );
 495 static HReg          iselDfp64Expr_wrk ( ISelEnv* env, const IRExpr* e,
 496                                          IREndness IEndianess );
 497 static HReg          iselDfp64Expr     ( ISelEnv* env, const IRExpr* e,
 498                                          IREndness IEndianess );
 499 static HReg iselFp128Expr_wrk ( ISelEnv* env, const IRExpr* e,
 500                                 IREndness IEndianess);
 501 static HReg iselFp128Expr     ( ISelEnv* env, const IRExpr* e,
 502                                 IREndness IEndianess);
 503
 504 /* 64-bit mode ONLY: compute an D128 into a GPR64 pair. */
 505 static void iselDfp128Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
 506                                  const IRExpr* e, IREndness IEndianess );
 507 static void iselDfp128Expr     ( HReg* rHi, HReg* rLo, ISelEnv* env,
 508                                  const IRExpr* e, IREndness IEndianess );
 509
 510 /*---------------------------------------------------------*/
 511 /*--- ISEL: Misc helpers                                ---*/
 512 /*---------------------------------------------------------*/
 513
 514 /* Make an int reg-reg move. */
 515
 516 static PPCInstr* mk_iMOVds_RR ( HReg r_dst, HReg r_src )
 517 {
 518    vassert(hregClass(r_dst) == hregClass(r_src));
 519    vassert(hregClass(r_src) ==  HRcInt32 ||
 520            hregClass(r_src) ==  HRcInt64);
 521    return PPCInstr_Alu(Palu_OR, r_dst, r_src, PPCRH_Reg(r_src));
 522 }
 523
 524 /* Advance/retreat %r1 by n. */
 525
 526 static void add_to_sp ( ISelEnv* env, UInt n )
 527 {
 528    HReg sp = StackFramePtr(env->mode64);
 529    vassert(n <= 1024 && (n%16) == 0);
 530    addInstr(env, PPCInstr_Alu( Palu_ADD, sp, sp,
 531                                PPCRH_Imm(True,toUShort(n)) ));
 532 }
 533
 534 static void sub_from_sp ( ISelEnv* env, UInt n )
 535 {
 536    HReg sp = StackFramePtr(env->mode64);
 537    vassert(n <= 1024 && (n%16) == 0);
 538    addInstr(env, PPCInstr_Alu( Palu_SUB, sp, sp,
 539                                PPCRH_Imm(True,toUShort(n)) ));
 540 }
 541
 542 /*
 543   returns a quadword aligned address on the stack
 544    - copies SP, adds 16bytes, aligns to quadword.
 545   use sub_from_sp(32) before calling this,
 546   as expects to have 32 bytes to play with.
 547 */
 548 static HReg get_sp_aligned16 ( ISelEnv* env )
 549 {
 550    HReg       r = newVRegI(env);
 551    HReg align16 = newVRegI(env);
 552    addInstr(env, mk_iMOVds_RR(r, StackFramePtr(env->mode64)));
 553    // add 16
 554    addInstr(env, PPCInstr_Alu( Palu_ADD, r, r,
 555                                PPCRH_Imm(True,toUShort(16)) ));
 556    // mask to quadword
 557    addInstr(env,
 558             PPCInstr_LI(align16, 0xFFFFFFFFFFFFFFF0ULL, env->mode64));
 559    addInstr(env, PPCInstr_Alu(Palu_AND, r,r, PPCRH_Reg(align16)));
 560    return r;
 561 }
 562
 563
 564
 565 /* Load 2*I32 regs to fp reg */
 566 static HReg mk_LoadRR32toFPR ( ISelEnv* env,
 567                                HReg r_srcHi, HReg r_srcLo )
 568 {
 569    HReg fr_dst = newVRegF(env);
 570    PPCAMode *am_addr0, *am_addr1;
 571
 572    vassert(!env->mode64);
 573    vassert(hregClass(r_srcHi) == HRcInt32);
 574    vassert(hregClass(r_srcLo) == HRcInt32);
 575
 576    sub_from_sp( env, 16 );        // Move SP down 16 bytes
 577    am_addr0 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
 578    am_addr1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
 579
 580    // store hi,lo as Ity_I32's
 581    addInstr(env, PPCInstr_Store( 4, am_addr0, r_srcHi, env->mode64 ));
 582    addInstr(env, PPCInstr_Store( 4, am_addr1, r_srcLo, env->mode64 ));
 583
 584    // load as float
 585    addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, am_addr0));
 586
 587    add_to_sp( env, 16 );          // Reset SP
 588    return fr_dst;
 589 }
 590
 591 /* Load I64 reg to fp reg */
 592 static HReg mk_LoadR64toFPR ( ISelEnv* env, HReg r_src )
 593 {
 594    HReg fr_dst = newVRegF(env);
 595    PPCAMode *am_addr0;
 596
 597    vassert(env->mode64);
 598    vassert(hregClass(r_src) == HRcInt64);
 599
 600    sub_from_sp( env, 16 );        // Move SP down 16 bytes
 601    am_addr0 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
 602
 603    // store as Ity_I64
 604    addInstr(env, PPCInstr_Store( 8, am_addr0, r_src, env->mode64 ));
 605
 606    // load as float
 607    addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, am_addr0));
 608
 609    add_to_sp( env, 16 );          // Reset SP
 610    return fr_dst;
 611 }
 612
 613
 614 /* Given an amode, return one which references 4 bytes further
 615    along. */
 616
 617 static PPCAMode* advance4 ( ISelEnv* env, PPCAMode* am )
 618 {
 619    PPCAMode* am4 = dopyPPCAMode( am );
 620    if (am4->tag == Pam_IR
 621        && am4->Pam.IR.index + 4 <= 32767) {
 622       am4->Pam.IR.index += 4;
 623    } else {
 624       vpanic("advance4(ppc,host)");
 625    }
 626    return am4;
 627 }
 628
 629
 630 /* Given a guest-state array descriptor, an index expression and a
 631    bias, generate a PPCAMode pointing at the relevant piece of
 632    guest state.  */
 633 static
 634 PPCAMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
 635                                 IRExpr* off, Int bias, IREndness IEndianess )
 636 {
 637    HReg rtmp, roff;
 638    Int  elemSz = sizeofIRType(descr->elemTy);
 639    Int  nElems = descr->nElems;
 640    Int  shift  = 0;
 641
 642    /* MAX is somewhat arbitrarily, needs to be at least
 643       3 times the size of VexGuestPPC64State */
 644 #define MAX 6500
 645
 646    /* Throw out any cases we don't need.  In theory there might be a
 647       day where we need to handle others, but not today. */
 648
 649    if (nElems != 16 && nElems != 32)
 650       vpanic("genGuestArrayOffset(ppc host)(1)");
 651
 652    switch (elemSz) {
 653       case 4:  shift = 2; break;
 654       case 8:  shift = 3; break;
 655       default: vpanic("genGuestArrayOffset(ppc host)(2)");
 656    }
 657
 658    if (bias < -100 || bias > 100) /* somewhat arbitrarily */
 659       vpanic("genGuestArrayOffset(ppc host)(3)");
 660    if (descr->base < 0 || descr->base > MAX) { /* somewhat arbitrarily */
 661       vex_printf("ERROR: descr->base = %d, is greater then maximum = %d\n",
 662                  descr->base, MAX);
 663       vpanic("genGuestArrayOffset(ppc host)(4)");
 664    }
 665
 666    /* Compute off into a reg, %off.  Then return:
 667
 668          addi %tmp, %off, bias (if bias != 0)
 669          andi %tmp, nElems-1
 670          sldi %tmp, shift
 671          addi %tmp, %tmp, base
 672          ... Baseblockptr + %tmp ...
 673    */
 674    roff = iselWordExpr_R(env, off, IEndianess);
 675    rtmp = newVRegI(env);
 676    addInstr(env, PPCInstr_Alu(
 677                     Palu_ADD,
 678                     rtmp, roff,
 679                     PPCRH_Imm(True/*signed*/, toUShort(bias))));
 680    addInstr(env, PPCInstr_Alu(
 681                     Palu_AND,
 682                     rtmp, rtmp,
 683                     PPCRH_Imm(False/*unsigned*/, toUShort(nElems-1))));
 684    addInstr(env, PPCInstr_Shft(
 685                     Pshft_SHL,
 686                     env->mode64 ? False : True/*F:64-bit, T:32-bit shift*/,
 687                     rtmp, rtmp,
 688                     PPCRH_Imm(False/*unsigned*/, toUShort(shift))));
 689    addInstr(env, PPCInstr_Alu(
 690                     Palu_ADD,
 691                     rtmp, rtmp,
 692                     PPCRH_Imm(True/*signed*/, toUShort(descr->base))));
 693    return
 694       PPCAMode_RR( GuestStatePtr(env->mode64), rtmp );
 695 #undef MAX
 696 }
 697
 698
 699 /*---------------------------------------------------------*/
 700 /*--- ISEL: Function call helpers                       ---*/
 701 /*---------------------------------------------------------*/
 702
 703 /* Used only in doHelperCall.  See big comment in doHelperCall re
 704    handling of register-parameter args.  This function figures out
 705    whether evaluation of an expression might require use of a fixed
 706    register.  If in doubt return True (safe but suboptimal).
 707 */
 708 static
 709 Bool mightRequireFixedRegs ( IRExpr* e )
 710 {
 711    switch (e->tag) {
 712    case Iex_RdTmp: case Iex_Const: case Iex_Get:
 713       return False;
 714    default:
 715       return True;
 716    }
 717 }
 718
 719
 720 /* Do a complete function call.  |guard| is a Ity_Bit expression
 721    indicating whether or not the call happens.  If guard==NULL, the
 722    call is unconditional.  |retloc| is set to indicate where the
 723    return value is after the call.  The caller (of this fn) must
 724    generate code to add |stackAdjustAfterCall| to the stack pointer
 725    after the call is done. */
 726
 727 static
 728 void doHelperCall ( /*OUT*/UInt*   stackAdjustAfterCall,
 729                     /*OUT*/RetLoc* retloc,
 730                     ISelEnv* env,
 731                     IRExpr* guard,
 732                     IRCallee* cee, IRType retTy, IRExpr** args,
 733                     IREndness IEndianess)
 734 {
 735    PPCCondCode cc;
 736    HReg        argregs[PPC_N_REGPARMS];
 737    HReg        tmpregs[PPC_N_REGPARMS];
 738    Bool        go_fast;
 739    Int         n_args, i, argreg;
 740    UInt        argiregs;
 741    Bool        mode64 = env->mode64;
 742
 743    /* Set default returns.  We'll update them later if needed. */
 744    *stackAdjustAfterCall = 0;
 745    *retloc               = mk_RetLoc_INVALID();
 746
 747    /* These are used for cross-checking that IR-level constraints on
 748       the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
 749    UInt nVECRETs = 0;
 750    UInt nGSPTRs  = 0;
 751
 752    /* Marshal args for a call and do the call.
 753
 754       This function only deals with a tiny set of possibilities, which
 755       cover all helpers in practice.  The restrictions are that only
 756       arguments in registers are supported, hence only PPC_N_REGPARMS x
 757       (mode32:32 | mode64:64) integer bits in total can be passed.
 758       In fact the only supported arg type is (mode32:I32 | mode64:I64).
 759
 760       The return type can be I{64,32,16,8} or V{128,256}.  In the
 761       latter two cases, it is expected that |args| will contain the
 762       special node IRExpr_VECRET(), in which case this routine
 763       generates code to allocate space on the stack for the vector
 764       return value.  Since we are not passing any scalars on the
 765       stack, it is enough to preallocate the return space before
 766       marshalling any arguments, in this case.
 767
 768       |args| may also contain IRExpr_GSPTR(), in which case the value
 769       in the guest state pointer register is passed as the
 770       corresponding argument.
 771
 772       Generating code which is both efficient and correct when
 773       parameters are to be passed in registers is difficult, for the
 774       reasons elaborated in detail in comments attached to
 775       doHelperCall() in priv/host-x86/isel.c.  Here, we use a variant
 776       of the method described in those comments.
 777
 778       The problem is split into two cases: the fast scheme and the
 779       slow scheme.  In the fast scheme, arguments are computed
 780       directly into the target (real) registers.  This is only safe
 781       when we can be sure that computation of each argument will not
 782       trash any real registers set by computation of any other
 783       argument.
 784
 785       In the slow scheme, all args are first computed into vregs, and
 786       once they are all done, they are moved to the relevant real
 787       regs.  This always gives correct code, but it also gives a bunch
 788       of vreg-to-rreg moves which are usually redundant but are hard
 789       for the register allocator to get rid of.
 790
 791       To decide which scheme to use, all argument expressions are
 792       first examined.  If they are all so simple that it is clear they
 793       will be evaluated without use of any fixed registers, use the
 794       fast scheme, else use the slow scheme.  Note also that only
 795       unconditional calls may use the fast scheme, since having to
 796       compute a condition expression could itself trash real
 797       registers.
 798
 799       Note this requires being able to examine an expression and
 800       determine whether or not evaluation of it might use a fixed
 801       register.  That requires knowledge of how the rest of this insn
 802       selector works.  Currently just the following 3 are regarded as
 803       safe -- hopefully they cover the majority of arguments in
 804       practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
 805    */
 806
 807    /* Note that the cee->regparms field is meaningless on PPC32/64 host
 808       (since there is only one calling convention) and so we always
 809       ignore it. */
 810
 811    n_args = 0;
 812    for (i = 0; args[i]; i++)
 813       n_args++;
 814
 815    if (n_args > PPC_N_REGPARMS) {
 816       vpanic("doHelperCall(PPC): cannot currently handle > 8 args");
 817       // PPC_N_REGPARMS
 818    }
 819
 820    /* This is kind of stupid .. the arrays are sized as PPC_N_REGPARMS
 821       but we then assume that that value is 8. */
 822    vassert(PPC_N_REGPARMS == 8);
 823
 824    argregs[0] = hregPPC_GPR3(mode64);
 825    argregs[1] = hregPPC_GPR4(mode64);
 826    argregs[2] = hregPPC_GPR5(mode64);
 827    argregs[3] = hregPPC_GPR6(mode64);
 828    argregs[4] = hregPPC_GPR7(mode64);
 829    argregs[5] = hregPPC_GPR8(mode64);
 830    argregs[6] = hregPPC_GPR9(mode64);
 831    argregs[7] = hregPPC_GPR10(mode64);
 832    argiregs = 0;
 833
 834    tmpregs[0] = tmpregs[1] = tmpregs[2] =
 835    tmpregs[3] = tmpregs[4] = tmpregs[5] =
 836    tmpregs[6] = tmpregs[7] = INVALID_HREG;
 837
 838    /* First decide which scheme (slow or fast) is to be used.  First
 839       assume the fast scheme, and select slow if any contraindications
 840       (wow) appear. */
 841
 842    go_fast = True;
 843
 844    /* We'll need space on the stack for the return value.  Avoid
 845       possible complications with nested calls by using the slow
 846       scheme. */
 847    if (retTy == Ity_V128 || retTy == Ity_V256)
 848       go_fast = False;
 849
 850    if (go_fast && guard) {
 851       if (guard->tag == Iex_Const
 852           && guard->Iex.Const.con->tag == Ico_U1
 853           && guard->Iex.Const.con->Ico.U1 == True) {
 854          /* unconditional */
 855       } else {
 856          /* Not manifestly unconditional -- be conservative. */
 857          go_fast = False;
 858       }
 859    }
 860
 861    if (go_fast) {
 862       for (i = 0; i < n_args; i++) {
 863          IRExpr* arg = args[i];
 864          if (UNLIKELY(arg->tag == Iex_GSPTR)) {
 865             /* that's OK */
 866          }
 867          else if (UNLIKELY(arg->tag == Iex_VECRET)) {
 868             /* This implies ill-formed IR, since if the IR was
 869                well-formed, the return-type test above would have
 870                filtered it out. */
 871             vpanic("doHelperCall(PPC): invalid IR");
 872          }
 873          else if (mightRequireFixedRegs(arg)) {
 874             go_fast = False;
 875             break;
 876          }
 877       }
 878    }
 879
 880    /* At this point the scheme to use has been established.  Generate
 881       code to get the arg values into the argument rregs. */
 882
 883    if (go_fast) {
 884
 885       /* FAST SCHEME */
 886       argreg = 0;
 887
 888       for (i = 0; i < n_args; i++) {
 889          IRExpr* arg = args[i];
 890          vassert(argreg < PPC_N_REGPARMS);
 891
 892          if (arg->tag == Iex_GSPTR) {
 893             argiregs |= (1 << (argreg+3));
 894             addInstr(env, mk_iMOVds_RR( argregs[argreg],
 895                                         GuestStatePtr(mode64) ));
 896             argreg++;
 897          } else {
 898             vassert(arg->tag != Iex_VECRET);
 899             IRType ty = typeOfIRExpr(env->type_env, arg);
 900             vassert(ty == Ity_I32 || ty == Ity_I64);
 901             if (!mode64) {
 902                if (ty == Ity_I32) {
 903                   argiregs |= (1 << (argreg+3));
 904                   addInstr(env,
 905                            mk_iMOVds_RR( argregs[argreg],
 906                                          iselWordExpr_R(env, arg,
 907                                                         IEndianess) ));
 908                } else { // Ity_I64 in 32-bit mode
 909                   HReg rHi, rLo;
 910                   if ((argreg%2) == 1)
 911                                  // ppc32 ELF abi spec for passing LONG_LONG
 912                      argreg++;   // XXX: odd argreg => even rN
 913                   vassert(argreg < PPC_N_REGPARMS-1);
 914                   iselInt64Expr(&rHi,&rLo, env, arg, IEndianess);
 915                   argiregs |= (1 << (argreg+3));
 916                   addInstr(env, mk_iMOVds_RR( argregs[argreg++], rHi ));
 917                   argiregs |= (1 << (argreg+3));
 918                   addInstr(env, mk_iMOVds_RR( argregs[argreg], rLo));
 919                }
 920             } else { // mode64
 921                argiregs |= (1 << (argreg+3));
 922                addInstr(env, mk_iMOVds_RR( argregs[argreg],
 923                                            iselWordExpr_R(env, arg,
 924                                                           IEndianess) ));
 925             }
 926             argreg++;
 927          } /* if (arg == IRExprP__BBPR) */
 928       }
 929
 930       /* Fast scheme only applies for unconditional calls.  Hence: */
 931       cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
 932
 933    } else {
 934
 935       /* SLOW SCHEME; move via temporaries */
 936       argreg = 0;
 937
 938       /* If we have a vector return type, allocate a place for it on
 939          the stack and record its address.  Rather than figure out the
 940          complexities of PPC{32,64} ELF ABI stack frame layout, simply
 941          drop the SP by 1024 and allocate the return point in the
 942          middle.  I think this should comfortably clear any ABI
 943          mandated register save areas.  Note that it doesn't maintain
 944          the backchain as it should, since we're not doing st{d,w}u to
 945          adjust the SP, but .. that doesn't seem to be a big deal.
 946          Since we're not expecting to have to unwind out of here. */
 947       HReg r_vecRetAddr = INVALID_HREG;
 948       if (retTy == Ity_V128) {
 949          r_vecRetAddr = newVRegI(env);
 950          sub_from_sp(env, 512);
 951          addInstr(env, mk_iMOVds_RR( r_vecRetAddr, StackFramePtr(mode64) ));
 952          sub_from_sp(env, 512);
 953       }
 954       else if (retTy == Ity_V256) {
 955          vassert(0); //ATC
 956          r_vecRetAddr = newVRegI(env);
 957          sub_from_sp(env, 512);
 958          addInstr(env, mk_iMOVds_RR( r_vecRetAddr, StackFramePtr(mode64) ));
 959          sub_from_sp(env, 512);
 960       }
 961
 962       vassert(n_args >= 0 && n_args <= 8);
 963       for (i = 0; i < n_args; i++) {
 964          IRExpr* arg = args[i];
 965          vassert(argreg < PPC_N_REGPARMS);
 966          if (UNLIKELY(arg->tag == Iex_GSPTR)) {
 967             tmpregs[argreg] = newVRegI(env);
 968             addInstr(env, mk_iMOVds_RR( tmpregs[argreg],
 969                                         GuestStatePtr(mode64) ));
 970             nGSPTRs++;
 971          }
 972          else if (UNLIKELY(arg->tag == Iex_VECRET)) {
 973             /* We stashed the address of the return slot earlier, so just
 974                retrieve it now. */
 975             vassert(!hregIsInvalid(r_vecRetAddr));
 976             tmpregs[i] = r_vecRetAddr;
 977             nVECRETs++;
 978          }
 979          else {
 980             IRType ty = typeOfIRExpr(env->type_env, arg);
 981             vassert(ty == Ity_I32 || ty == Ity_I64);
 982             if (!mode64) {
 983                if (ty == Ity_I32) {
 984                   tmpregs[argreg] = iselWordExpr_R(env, arg, IEndianess);
 985                } else { // Ity_I64 in 32-bit mode
 986                   HReg rHi, rLo;
 987                   if ((argreg%2) == 1)
 988                                 // ppc32 ELF abi spec for passing LONG_LONG
 989                      argreg++;  // XXX: odd argreg => even rN
 990                   vassert(argreg < PPC_N_REGPARMS-1);
 991                   iselInt64Expr(&rHi,&rLo, env, arg, IEndianess);
 992                   tmpregs[argreg++] = rHi;
 993                   tmpregs[argreg]   = rLo;
 994                }
 995             } else { // mode64
 996                tmpregs[argreg] = iselWordExpr_R(env, arg, IEndianess);
 997             }
 998          }
 999          argreg++;
1000       }
1001
1002       /* Now we can compute the condition.  We can't do it earlier
1003          because the argument computations could trash the condition
1004          codes.  Be a bit clever to handle the common case where the
1005          guard is 1:Bit. */
1006       cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
1007       if (guard) {
1008          if (guard->tag == Iex_Const
1009              && guard->Iex.Const.con->tag == Ico_U1
1010              && guard->Iex.Const.con->Ico.U1 == True) {
1011             /* unconditional -- do nothing */
1012          } else {
1013             cc = iselCondCode( env, guard, IEndianess );
1014          }
1015       }
1016
1017       /* Move the args to their final destinations. */
1018       for (i = 0; i < argreg; i++) {
1019          if (hregIsInvalid(tmpregs[i]))  // Skip invalid regs
1020             continue;
1021          /* None of these insns, including any spill code that might
1022             be generated, may alter the condition codes. */
1023          argiregs |= (1 << (i+3));
1024          addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
1025       }
1026
1027    }
1028
1029    /* Do final checks, set the return values, and generate the call
1030       instruction proper. */
1031    if (retTy == Ity_V128 || retTy == Ity_V256) {
1032       vassert(nVECRETs == 1);
1033    } else {
1034       vassert(nVECRETs == 0);
1035    }
1036
1037    vassert(nGSPTRs == 0 || nGSPTRs == 1);
1038
1039    vassert(*stackAdjustAfterCall == 0);
1040    vassert(is_RetLoc_INVALID(*retloc));
1041    switch (retTy) {
1042       case Ity_INVALID:
1043          /* Function doesn't return a value. */
1044          *retloc = mk_RetLoc_simple(RLPri_None);
1045          break;
1046       case Ity_I64:
1047          *retloc = mk_RetLoc_simple(mode64 ? RLPri_Int : RLPri_2Int);
1048          break;
1049       case Ity_I32: case Ity_I16: case Ity_I8:
1050          *retloc = mk_RetLoc_simple(RLPri_Int);
1051          break;
1052       case Ity_V128:
1053          /* Result is 512 bytes up the stack, and after it has been
1054             retrieved, adjust SP upwards by 1024. */
1055          *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 512);
1056          *stackAdjustAfterCall = 1024;
1057          break;
1058       case Ity_V256:
1059          vassert(0); // ATC
1060          /* Ditto */
1061          *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 512);
1062          *stackAdjustAfterCall = 1024;
1063          break;
1064       default:
1065          /* IR can denote other possible return types, but we don't
1066             handle those here. */
1067          vassert(0);
1068    }
1069
1070    /* Finally, generate the call itself.  This needs the *retloc value
1071       set in the switch above, which is why it's at the end. */
1072
1073    Addr64 target = mode64 ? (Addr)cee->addr
1074                           : toUInt((Addr)(cee->addr));
1075    addInstr(env, PPCInstr_Call( cc, target, argiregs, *retloc ));
1076 }
1077
1078
1079 /*---------------------------------------------------------*/
1080 /*--- ISEL: FP rounding mode helpers                    ---*/
1081 /*---------------------------------------------------------*/
1082
1083 ///* Set FPU's rounding mode to the default */
1084 //static
1085 //void set_FPU_rounding_default ( ISelEnv* env )
1086 //{
1087 //   HReg fr_src = newVRegF(env);
1088 //   HReg r_src  = newVRegI(env);
1089 //
1090 //   /* Default rounding mode = 0x0
1091 //      Only supporting the rounding-mode bits - the rest of FPSCR is 0x0
1092 //       - so we can set the whole register at once (faster)
1093 //      note: upper 32 bits ignored by FpLdFPSCR
1094 //   */
1095 //   addInstr(env, PPCInstr_LI(r_src, 0x0, env->mode64));
1096 //   if (env->mode64) {
1097 //      fr_src = mk_LoadR64toFPR( env, r_src );         // 1*I64 -> F64
1098 //   } else {
1099 //      fr_src = mk_LoadRR32toFPR( env, r_src, r_src ); // 2*I32 -> F64
1100 //   }
1101 //   addInstr(env, PPCInstr_FpLdFPSCR( fr_src ));
1102 //}
1103
1104 /* Convert IR rounding mode to PPC encoding */
1105 static HReg roundModeIRtoPPC ( ISelEnv* env, HReg r_rmIR )
1106 {
1107    /*
1108    rounding mode                     | PPC  |  IR
1109    -----------------------------------------------
1110    to nearest, ties to even          | 000  | 000
1111    to zero                           | 001  | 011
1112    to +infinity                      | 010  | 010
1113    to -infinity                      | 011  | 001
1114    +++++ Below are the extended rounding modes for decimal floating point +++++
1115    to nearest, ties away from 0      | 100  | 100
1116    to nearest, ties toward 0         | 101  | 111
1117    to away from 0                    | 110  | 110
1118    to prepare for shorter precision  | 111  | 101
1119    */
1120    HReg r_rmPPC = newVRegI(env);
1121    HReg r_tmp1  = newVRegI(env);
1122    HReg r_tmp2  = newVRegI(env);
1123
1124    vassert(hregClass(r_rmIR) == HRcGPR(env->mode64));
1125
1126    // r_rmPPC = XOR(r_rmIR, r_rmIR << 1) & 3
1127    //
1128    // slwi  tmp1,    r_rmIR, 1
1129    // xor   tmp1,    r_rmIR, tmp1
1130    // andi  r_rmPPC, tmp1, 3
1131
1132    addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1133                                r_tmp1, r_rmIR, PPCRH_Imm(False,1)));
1134
1135    addInstr( env, PPCInstr_Alu( Palu_AND,
1136                                 r_tmp2, r_tmp1, PPCRH_Imm( False, 3 ) ) );
1137
1138    addInstr( env, PPCInstr_Alu( Palu_XOR,
1139                                 r_rmPPC, r_rmIR, PPCRH_Reg( r_tmp2 ) ) );
1140
1141    return r_rmPPC;
1142 }
1143
1144
1145 /* Set the FPU's rounding mode: 'mode' is an I32-typed expression
1146    denoting a value in the range 0 .. 7, indicating a round mode
1147    encoded as per type IRRoundingMode.  Set the PPC FPSCR to have the
1148    same rounding.  When the dfp_rm arg is True, set the decimal
1149    floating point rounding mode bits (29:31); otherwise, set the
1150    binary floating point rounding mode bits (62:63).
1151
1152    For speed & simplicity, we're setting the *entire* FPSCR here.
1153
1154    Setting the rounding mode is expensive.  So this function tries to
1155    avoid repeatedly setting the rounding mode to the same thing by
1156    first comparing 'mode' to the 'mode' tree supplied in the previous
1157    call to this function, if any.  (The previous value is stored in
1158    env->previous_rm.)  If 'mode' is a single IR temporary 't' and
1159    env->previous_rm is also just 't', then the setting is skipped.
1160
1161    This is safe because of the SSA property of IR: an IR temporary can
1162    only be defined once and so will have the same value regardless of
1163    where it appears in the block.  Cool stuff, SSA.
1164
1165    A safety condition: all attempts to set the RM must be aware of
1166    this mechanism - by being routed through the functions here.
1167
1168    Of course this only helps if blocks where the RM is set more than
1169    once and it is set to the same value each time, *and* that value is
1170    held in the same IR temporary each time.  In order to assure the
1171    latter as much as possible, the IR optimiser takes care to do CSE
1172    on any block with any sign of floating point activity.
1173 */
1174 static
1175 void _set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode, Bool dfp_rm,
1176                               IREndness IEndianess )
1177 {
1178    HReg fr_src = newVRegF(env);
1179    HReg r_src;
1180
1181    vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
1182
1183    /* Do we need to do anything? */
1184    if (env->previous_rm
1185        && env->previous_rm->tag == Iex_RdTmp
1186        && mode->tag == Iex_RdTmp
1187        && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
1188       /* no - setting it to what it was before.  */
1189       vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
1190       return;
1191    }
1192
1193    /* No luck - we better set it, and remember what we set it to. */
1194    env->previous_rm = mode;
1195
1196    /* Only supporting the rounding-mode bits - the rest of FPSCR is
1197       0x0 - so we can set the whole register at once (faster). */
1198
1199    // Resolve rounding mode and convert to PPC representation
1200    r_src = roundModeIRtoPPC( env, iselWordExpr_R(env, mode, IEndianess) );
1201
1202    // gpr -> fpr
1203    if (env->mode64) {
1204       if (dfp_rm) {
1205          HReg r_tmp1 = newVRegI( env );
1206          addInstr( env,
1207                    PPCInstr_Shft( Pshft_SHL, False/*64bit shift*/,
1208                                   r_tmp1, r_src, PPCRH_Imm( False, 32 ) ) );
1209          fr_src = mk_LoadR64toFPR( env, r_tmp1 );
1210       } else {
1211          fr_src = mk_LoadR64toFPR( env, r_src ); // 1*I64 -> F64
1212       }
1213    } else {
1214       if (dfp_rm) {
1215          HReg r_zero = newVRegI( env );
1216          addInstr( env, PPCInstr_LI( r_zero, 0, env->mode64 ) );
1217          fr_src = mk_LoadRR32toFPR( env, r_src, r_zero );
1218       } else {
1219          fr_src = mk_LoadRR32toFPR( env, r_src, r_src ); // 2*I32 -> F64
1220       }
1221    }
1222
1223    // Move to FPSCR
1224    addInstr(env, PPCInstr_FpLdFPSCR( fr_src, dfp_rm ));
1225 }
1226
1227 static void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode,
1228                                     IREndness IEndianess )
1229 {
1230    _set_FPU_rounding_mode(env, mode, False, IEndianess);
1231 }
1232
1233 static void set_FPU_DFP_rounding_mode ( ISelEnv* env, IRExpr* mode,
1234                                         IREndness IEndianess )
1235 {
1236    _set_FPU_rounding_mode(env, mode, True, IEndianess);
1237 }
1238
1239 static
1240 Bool FPU_rounding_mode_isOdd (IRExpr* mode) {
1241    /* If the rounding mode is set to odd, the the expr must be a constant U8
1242     * value equal to 8.  Otherwise, it must be a bin op expressiong that
1243     * calculates the value.
1244     */
1245
1246    if (mode->tag != Iex_Const)
1247       return False;
1248
1249    vassert(mode->Iex.Const.con->tag == Ico_U32);
1250    vassert(mode->Iex.Const.con->Ico.U32 == 0x8);
1251    return True;
1252 }
1253
1254 /*---------------------------------------------------------*/
1255 /*--- ISEL: vector helpers                              ---*/
1256 /*---------------------------------------------------------*/
1257
1258 /* Generate all-zeroes into a new vector register.
1259 */
1260 static HReg generate_zeroes_V128 ( ISelEnv* env )
1261 {
1262    HReg dst = newVRegV(env);
1263    addInstr(env, PPCInstr_AvBinary(Pav_XOR, dst, dst, dst));
1264    return dst;
1265 }
1266
1267 /* Generate all-ones into a new vector register.
1268 */
1269 static HReg generate_ones_V128 ( ISelEnv* env )
1270 {
1271    HReg dst = newVRegV(env);
1272    PPCVI5s * src = PPCVI5s_Imm(-1);
1273    addInstr(env, PPCInstr_AvSplat(8, dst, src));
1274    return dst;
1275 }
1276
1277
1278 /*
1279   Generates code for AvSplat
1280   - takes in IRExpr* of type 8|16|32
1281     returns vector reg of duplicated lanes of input
1282   - uses AvSplat(imm) for imms up to simm6.
1283     otherwise must use store reg & load vector
1284 */
1285 static HReg mk_AvDuplicateRI( ISelEnv* env, IRExpr* e, IREndness IEndianess )
1286 {
1287    HReg   r_src;
1288    HReg   dst = newVRegV(env);
1289    PPCRI* ri  = iselWordExpr_RI(env, e, IEndianess);
1290    IRType ty  = typeOfIRExpr(env->type_env,e);
1291    UInt   sz  = (ty == Ity_I8) ? 8 : (ty == Ity_I16) ? 16 : 32;
1292    vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
1293
1294    /* special case: immediate */
1295    if (ri->tag == Pri_Imm) {
1296       Int simm32 = (Int)ri->Pri.Imm;
1297
1298       /* figure out if it's do-able with imm splats. */
1299       if (simm32 >= -32 && simm32 <= 31) {
1300          Char simm6 = (Char)simm32;
1301          if (simm6 > 15) {           /* 16:31 inclusive */
1302             HReg v1 = newVRegV(env);
1303             HReg v2 = newVRegV(env);
1304             addInstr(env, PPCInstr_AvSplat(sz, v1, PPCVI5s_Imm(-16)));
1305             addInstr(env, PPCInstr_AvSplat(sz, v2, PPCVI5s_Imm(simm6-16)));
1306             addInstr(env,
1307                (sz== 8) ? PPCInstr_AvBin8x16(Pav_SUBU, dst, v2, v1) :
1308                (sz==16) ? PPCInstr_AvBin16x8(Pav_SUBU, dst, v2, v1)
1309                         : PPCInstr_AvBin32x4(Pav_SUBU, dst, v2, v1) );
1310             return dst;
1311          }
1312          if (simm6 < -16) {          /* -32:-17 inclusive */
1313             HReg v1 = newVRegV(env);
1314             HReg v2 = newVRegV(env);
1315             addInstr(env, PPCInstr_AvSplat(sz, v1, PPCVI5s_Imm(-16)));
1316             addInstr(env, PPCInstr_AvSplat(sz, v2, PPCVI5s_Imm(simm6+16)));
1317             addInstr(env,
1318                (sz== 8) ? PPCInstr_AvBin8x16(Pav_ADDU, dst, v2, v1) :
1319                (sz==16) ? PPCInstr_AvBin16x8(Pav_ADDU, dst, v2, v1)
1320                         : PPCInstr_AvBin32x4(Pav_ADDU, dst, v2, v1) );
1321             return dst;
1322          }
1323          /* simplest form:              -16:15 inclusive */
1324          addInstr(env, PPCInstr_AvSplat(sz, dst, PPCVI5s_Imm(simm6)));
1325          return dst;
1326       }
1327
1328       /* no luck; use the Slow way. */
1329       r_src = newVRegI(env);
1330       addInstr(env, PPCInstr_LI(r_src, (Long)simm32, env->mode64));
1331    }
1332    else {
1333       r_src = ri->Pri.Reg;
1334    }
1335
1336    {
1337       /* Store r_src multiple times (sz dependent); then load the dest vector. */
1338       HReg r_aligned16;
1339       PPCAMode *am_offset, *am_offset_zero;
1340
1341       sub_from_sp( env, 32 );     // Move SP down
1342       /* Get a 16-aligned address within our stack space */
1343       r_aligned16 = get_sp_aligned16( env );
1344
1345       Int i;
1346       Int stride = (sz == 8) ? 1 : (sz == 16) ? 2 : 4;
1347       UChar num_bytes_to_store = stride;
1348       am_offset_zero = PPCAMode_IR( 0, r_aligned16 );
1349       am_offset = am_offset_zero;
1350       for (i = 0; i < 16; i+=stride, am_offset = PPCAMode_IR( i, r_aligned16)) {
1351          addInstr(env, PPCInstr_Store( num_bytes_to_store, am_offset, r_src, env->mode64 ));
1352       }
1353
1354       /* Effectively splat the r_src value to dst */
1355       addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 16, dst, am_offset_zero ) );
1356       add_to_sp( env, 32 );       // Reset SP
1357
1358       return dst;
1359    }
1360 }
1361
1362
1363 /* for each lane of vSrc: lane == nan ? laneX = all 1's : all 0's */
1364 static HReg isNan ( ISelEnv* env, HReg vSrc, IREndness IEndianess )
1365 {
1366    HReg zeros, msk_exp, msk_mnt, expt, mnts, vIsNan;
1367
1368    vassert(hregClass(vSrc) == HRcVec128);
1369
1370    zeros   = mk_AvDuplicateRI(env, mkU32(0), IEndianess);
1371    msk_exp = mk_AvDuplicateRI(env, mkU32(0x7F800000), IEndianess);
1372    msk_mnt = mk_AvDuplicateRI(env, mkU32(0x7FFFFF), IEndianess);
1373    expt    = newVRegV(env);
1374    mnts    = newVRegV(env);
1375    vIsNan  = newVRegV(env);
1376
1377    /* 32bit float => sign(1) | exponent(8) | mantissa(23)
1378       nan => exponent all ones, mantissa > 0 */
1379
1380    addInstr(env, PPCInstr_AvBinary(Pav_AND, expt, vSrc, msk_exp));
1381    addInstr(env, PPCInstr_AvBin32x4(Pav_CMPEQU, expt, expt, msk_exp));
1382    addInstr(env, PPCInstr_AvBinary(Pav_AND, mnts, vSrc, msk_mnt));
1383    addInstr(env, PPCInstr_AvBin32x4(Pav_CMPGTU, mnts, mnts, zeros));
1384    addInstr(env, PPCInstr_AvBinary(Pav_AND, vIsNan, expt, mnts));
1385    return vIsNan;
1386 }
1387
1388
1389 /*---------------------------------------------------------*/
1390 /*--- ISEL: Integer expressions (64/32/16/8 bit)        ---*/
1391 /*---------------------------------------------------------*/
1392
1393 /* Select insns for an integer-typed expression, and add them to the
1394    code list.  Return a reg holding the result.  This reg will be a
1395    virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
1396    want to modify it, ask for a new vreg, copy it in there, and modify
1397    the copy.  The register allocator will do its best to map both
1398    vregs to the same real register, so the copies will often disappear
1399    later in the game.
1400
1401    This should handle expressions of 64, 32, 16 and 8-bit type.
1402    All results are returned in a (mode64 ? 64bit : 32bit) register.
1403    For 16- and 8-bit expressions, the upper (32/48/56 : 16/24) bits
1404    are arbitrary, so you should mask or sign extend partial values
1405    if necessary.
1406 */
1407
1408 static HReg iselWordExpr_R ( ISelEnv* env, const IRExpr* e,
1409                              IREndness IEndianess )
1410 {
1411    HReg r = iselWordExpr_R_wrk(env, e, IEndianess);
1412    /* sanity checks ... */
1413 #  if 0
1414    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1415 #  endif
1416
1417    vassert(hregClass(r) == HRcGPR(env->mode64));
1418    vassert(hregIsVirtual(r));
1419    return r;
1420 }
1421
1422 /* DO NOT CALL THIS DIRECTLY ! */
1423 static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
1424                                  IREndness IEndianess )
1425 {
1426    Bool mode64 = env->mode64;
1427    MatchInfo mi;
1428    DECLARE_PATTERN(p_32to1_then_1Uto8);
1429
1430    IRType ty = typeOfIRExpr(env->type_env,e);
1431    vassert(ty == Ity_I8 || ty == Ity_I16 ||
1432            ty == Ity_I32 || ((ty == Ity_I64) && mode64));
1433
1434    switch (e->tag) {
1435
1436    /* --------- TEMP --------- */
1437    case Iex_RdTmp:
1438       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1439
1440    /* --------- LOAD --------- */
1441    case Iex_Load: {
1442       HReg      r_dst;
1443       PPCAMode* am_addr;
1444       if (e->Iex.Load.end != IEndianess)
1445          goto irreducible;
1446       r_dst   = newVRegI(env);
1447       am_addr = iselWordExpr_AMode( env, e->Iex.Load.addr, ty/*of xfer*/,
1448                                     IEndianess );
1449       addInstr(env, PPCInstr_Load( toUChar(sizeofIRType(ty)),
1450                                    r_dst, am_addr, mode64 ));
1451       return r_dst;
1452       /*NOTREACHED*/
1453    }
1454
1455    /* --------- BINARY OP --------- */
1456    case Iex_Binop: {
1457       PPCAluOp  aluOp;
1458       PPCShftOp shftOp;
1459
1460       /* Is it an addition or logical style op? */
1461       switch (e->Iex.Binop.op) {
1462       case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
1463          aluOp = Palu_ADD; break;
1464       case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
1465          aluOp = Palu_SUB; break;
1466       case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
1467          aluOp = Palu_AND; break;
1468       case Iop_Or8:  case Iop_Or16:  case Iop_Or32:  case Iop_Or64:
1469          aluOp = Palu_OR; break;
1470       case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
1471          aluOp = Palu_XOR; break;
1472       default:
1473          aluOp = Palu_INVALID; break;
1474       }
1475       /* For commutative ops we assume any literal
1476          values are on the second operand. */
1477       if (aluOp != Palu_INVALID) {
1478          HReg   r_dst   = newVRegI(env);
1479          HReg   r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1480          PPCRH* ri_srcR = NULL;
1481          /* get right arg into an RH, in the appropriate way */
1482          switch (aluOp) {
1483          case Palu_ADD: case Palu_SUB:
1484             ri_srcR = iselWordExpr_RH(env, True/*signed*/,
1485                                       e->Iex.Binop.arg2, IEndianess);
1486             break;
1487          case Palu_AND: case Palu_OR: case Palu_XOR:
1488             ri_srcR = iselWordExpr_RH(env, False/*signed*/,
1489                                       e->Iex.Binop.arg2, IEndianess);
1490             break;
1491          default:
1492             vpanic("iselWordExpr_R_wrk-aluOp-arg2");
1493          }
1494          addInstr(env, PPCInstr_Alu(aluOp, r_dst, r_srcL, ri_srcR));
1495          return r_dst;
1496       }
1497
1498       /* a shift? */
1499       switch (e->Iex.Binop.op) {
1500       case Iop_Shl8: case Iop_Shl16: case Iop_Shl32: case Iop_Shl64:
1501          shftOp = Pshft_SHL; break;
1502       case Iop_Shr8: case Iop_Shr16: case Iop_Shr32: case Iop_Shr64:
1503          shftOp = Pshft_SHR; break;
1504       case Iop_Sar8: case Iop_Sar16: case Iop_Sar32: case Iop_Sar64:
1505          shftOp = Pshft_SAR; break;
1506       default:
1507          shftOp = Pshft_INVALID; break;
1508       }
1509       /* we assume any literal values are on the second operand. */
1510       if (shftOp != Pshft_INVALID) {
1511          HReg   r_dst   = newVRegI(env);
1512          HReg   r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1513          PPCRH* ri_srcR = NULL;
1514          /* get right arg into an RH, in the appropriate way */
1515          switch (shftOp) {
1516          case Pshft_SHL: case Pshft_SHR: case Pshft_SAR:
1517             if (!mode64)
1518                ri_srcR = iselWordExpr_RH5u(env, e->Iex.Binop.arg2, IEndianess);
1519             else
1520                ri_srcR = iselWordExpr_RH6u(env, e->Iex.Binop.arg2, IEndianess);
1521             break;
1522          default:
1523             vpanic("iselIntExpr_R_wrk-shftOp-arg2");
1524          }
1525          /* widen the left arg if needed */
1526          if (shftOp == Pshft_SHR || shftOp == Pshft_SAR) {
1527             if (ty == Ity_I8 || ty == Ity_I16) {
1528                PPCRH* amt = PPCRH_Imm(False,
1529                                       toUShort(ty == Ity_I8 ? 24 : 16));
1530                HReg   tmp = newVRegI(env);
1531                addInstr(env, PPCInstr_Shft(Pshft_SHL,
1532                                            True/*32bit shift*/,
1533                                            tmp, r_srcL, amt));
1534                addInstr(env, PPCInstr_Shft(shftOp,
1535                                            True/*32bit shift*/,
1536                                            tmp, tmp,    amt));
1537                r_srcL = tmp;
1538             }
1539          }
1540          /* Only 64 expressions need 64bit shifts,
1541             32bit shifts are fine for all others */
1542          if (ty == Ity_I64) {
1543             vassert(mode64);
1544             addInstr(env, PPCInstr_Shft(shftOp, False/*64bit shift*/,
1545                                         r_dst, r_srcL, ri_srcR));
1546          } else {
1547             addInstr(env, PPCInstr_Shft(shftOp, True/*32bit shift*/,
1548                                         r_dst, r_srcL, ri_srcR));
1549          }
1550          return r_dst;
1551       }
1552
1553       /* How about a div? */
1554       if (e->Iex.Binop.op == Iop_DivS32 ||
1555           e->Iex.Binop.op == Iop_DivU32 ||
1556           e->Iex.Binop.op == Iop_DivS32E ||
1557           e->Iex.Binop.op == Iop_DivU32E) {
1558          Bool syned  = toBool((e->Iex.Binop.op == Iop_DivS32) || (e->Iex.Binop.op == Iop_DivS32E));
1559          HReg r_dst  = newVRegI(env);
1560          HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1561          HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1562          addInstr( env,
1563                       PPCInstr_Div( ( ( e->Iex.Binop.op == Iop_DivU32E )
1564                                              || ( e->Iex.Binop.op == Iop_DivS32E ) ) ? True
1565                                                                                      : False,
1566                                     syned,
1567                                     True/*32bit div*/,
1568                                     r_dst,
1569                                     r_srcL,
1570                                     r_srcR ) );
1571          return r_dst;
1572       }
1573       if (e->Iex.Binop.op == Iop_DivS64 ||
1574           e->Iex.Binop.op == Iop_DivU64 || e->Iex.Binop.op == Iop_DivS64E
1575           || e->Iex.Binop.op == Iop_DivU64E ) {
1576          Bool syned  = toBool((e->Iex.Binop.op == Iop_DivS64) ||(e->Iex.Binop.op == Iop_DivS64E));
1577          HReg r_dst  = newVRegI(env);
1578          HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1579          HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1580          vassert(mode64);
1581          addInstr( env,
1582                       PPCInstr_Div( ( ( e->Iex.Binop.op == Iop_DivS64E )
1583                                              || ( e->Iex.Binop.op
1584                                                       == Iop_DivU64E ) ) ? True
1585                                                                          : False,
1586                                     syned,
1587                                     False/*64bit div*/,
1588                                     r_dst,
1589                                     r_srcL,
1590                                     r_srcR ) );
1591          return r_dst;
1592       }
1593
1594       /* No? Anyone for a mul? */
1595       if (e->Iex.Binop.op == Iop_Mul32
1596           || e->Iex.Binop.op == Iop_Mul64) {
1597          Bool syned       = False;
1598          Bool sz32        = (e->Iex.Binop.op != Iop_Mul64);
1599          HReg r_dst       = newVRegI(env);
1600          HReg r_srcL      = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1601          HReg r_srcR      = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1602          addInstr(env, PPCInstr_MulL(syned, False/*lo32*/, sz32,
1603                                      r_dst, r_srcL, r_srcR));
1604          return r_dst;
1605       }
1606
1607       /* 32 x 32 -> 64 multiply */
1608       if (mode64
1609           && (e->Iex.Binop.op == Iop_MullU32
1610               || e->Iex.Binop.op == Iop_MullS32)) {
1611          HReg tLo    = newVRegI(env);
1612          HReg tHi    = newVRegI(env);
1613          HReg r_dst  = newVRegI(env);
1614          Bool syned  = toBool(e->Iex.Binop.op == Iop_MullS32);
1615          HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1616          HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1617          addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/,
1618                                      False/*lo32*/, True/*32bit mul*/,
1619                                      tLo, r_srcL, r_srcR));
1620          addInstr(env, PPCInstr_MulL(syned,
1621                                      True/*hi32*/, True/*32bit mul*/,
1622                                      tHi, r_srcL, r_srcR));
1623          addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1624                                      r_dst, tHi, PPCRH_Imm(False,32)));
1625          addInstr(env, PPCInstr_Alu(Palu_OR,
1626                                     r_dst, r_dst, PPCRH_Reg(tLo)));
1627          return r_dst;
1628       }
1629
1630       /* El-mutanto 3-way compare? */
1631       if (e->Iex.Binop.op == Iop_CmpORD32S
1632           || e->Iex.Binop.op == Iop_CmpORD32U) {
1633          Bool   syned = toBool(e->Iex.Binop.op == Iop_CmpORD32S);
1634          HReg   dst   = newVRegI(env);
1635          HReg   srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1636          PPCRH* srcR  = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2,
1637                                         IEndianess);
1638          addInstr(env, PPCInstr_Cmp(syned, True/*32bit cmp*/,
1639                                     7/*cr*/, srcL, srcR));
1640          addInstr(env, PPCInstr_MfCR(dst));
1641          addInstr(env, PPCInstr_Alu(Palu_AND, dst, dst,
1642                                     PPCRH_Imm(False,7<<1)));
1643          return dst;
1644       }
1645
1646       if (e->Iex.Binop.op == Iop_CmpORD64S
1647           || e->Iex.Binop.op == Iop_CmpORD64U) {
1648          Bool   syned = toBool(e->Iex.Binop.op == Iop_CmpORD64S);
1649          HReg   dst   = newVRegI(env);
1650          HReg   srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1651          PPCRH* srcR  = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2,
1652                                         IEndianess);
1653          vassert(mode64);
1654          addInstr(env, PPCInstr_Cmp(syned, False/*64bit cmp*/,
1655                                     7/*cr*/, srcL, srcR));
1656          addInstr(env, PPCInstr_MfCR(dst));
1657          addInstr(env, PPCInstr_Alu(Palu_AND, dst, dst,
1658                                     PPCRH_Imm(False,7<<1)));
1659          return dst;
1660       }
1661
1662       if (e->Iex.Binop.op == Iop_Max32U) {
1663          HReg        r1   = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1664          HReg        r2   = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1665          HReg        rdst = newVRegI(env);
1666          PPCCondCode cc   = mk_PPCCondCode( Pct_TRUE, Pcf_7LT );
1667          addInstr(env, mk_iMOVds_RR(rdst, r1));
1668          addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
1669                                     7/*cr*/, rdst, PPCRH_Reg(r2)));
1670          addInstr(env, PPCInstr_CMov(cc, rdst, PPCRI_Reg(r2)));
1671          return rdst;
1672       }
1673
1674       if (e->Iex.Binop.op == Iop_32HLto64) {
1675          HReg   r_Hi  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1676          HReg   r_Lo  = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1677          HReg   r_Tmp = newVRegI(env);
1678          HReg   r_dst = newVRegI(env);
1679          HReg   msk   = newVRegI(env);
1680          vassert(mode64);
1681          /* r_dst = OR( r_Hi<<32, r_Lo ) */
1682          addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1683                                      r_dst, r_Hi, PPCRH_Imm(False,32)));
1684          addInstr(env, PPCInstr_LI(msk, 0xFFFFFFFF, mode64));
1685          addInstr(env, PPCInstr_Alu( Palu_AND, r_Tmp, r_Lo,
1686                                      PPCRH_Reg(msk) ));
1687          addInstr(env, PPCInstr_Alu( Palu_OR, r_dst, r_dst,
1688                                      PPCRH_Reg(r_Tmp) ));
1689          return r_dst;
1690       }
1691
1692       if ((e->Iex.Binop.op == Iop_CmpF64) ||
1693           (e->Iex.Binop.op == Iop_CmpD64) ||
1694           (e->Iex.Binop.op == Iop_CmpD128)) {
1695          HReg fr_srcL;
1696          HReg fr_srcL_lo;
1697          HReg fr_srcR;
1698          HReg fr_srcR_lo;
1699
1700          HReg r_ccPPC   = newVRegI(env);
1701          HReg r_ccIR    = newVRegI(env);
1702          HReg r_ccIR_b0 = newVRegI(env);
1703          HReg r_ccIR_b2 = newVRegI(env);
1704          HReg r_ccIR_b6 = newVRegI(env);
1705
1706          if (e->Iex.Binop.op == Iop_CmpF64) {
1707             fr_srcL = iselDblExpr(env, e->Iex.Binop.arg1, IEndianess);
1708             fr_srcR = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
1709             addInstr(env, PPCInstr_FpCmp(r_ccPPC, fr_srcL, fr_srcR));
1710
1711          } else if (e->Iex.Binop.op == Iop_CmpD64) {
1712             fr_srcL = iselDfp64Expr(env, e->Iex.Binop.arg1, IEndianess);
1713             fr_srcR = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
1714             addInstr(env, PPCInstr_Dfp64Cmp(r_ccPPC, fr_srcL, fr_srcR));
1715
1716          } else {    //  e->Iex.Binop.op == Iop_CmpD128
1717             iselDfp128Expr(&fr_srcL, &fr_srcL_lo, env, e->Iex.Binop.arg1,
1718                            IEndianess);
1719             iselDfp128Expr(&fr_srcR, &fr_srcR_lo, env, e->Iex.Binop.arg2,
1720                            IEndianess);
1721             addInstr(env, PPCInstr_Dfp128Cmp(r_ccPPC, fr_srcL, fr_srcL_lo,
1722                                              fr_srcR, fr_srcR_lo));
1723          }
1724
1725          /* Map compare result from PPC to IR,
1726             conforming to CmpF64 definition. */
1727          /*
1728            FP cmp result | PPC | IR
1729            --------------------------
1730            UN            | 0x1 | 0x45
1731            EQ            | 0x2 | 0x40
1732            GT            | 0x4 | 0x00
1733            LT            | 0x8 | 0x01
1734          */
1735
1736          // r_ccIR_b0 = r_ccPPC[0] | r_ccPPC[3]
1737          addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32bit shift*/,
1738                                      r_ccIR_b0, r_ccPPC,
1739                                      PPCRH_Imm(False,0x3)));
1740          addInstr(env, PPCInstr_Alu(Palu_OR,  r_ccIR_b0,
1741                                     r_ccPPC,   PPCRH_Reg(r_ccIR_b0)));
1742          addInstr(env, PPCInstr_Alu(Palu_AND, r_ccIR_b0,
1743                                     r_ccIR_b0, PPCRH_Imm(False,0x1)));
1744
1745          // r_ccIR_b2 = r_ccPPC[0]
1746          addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1747                                      r_ccIR_b2, r_ccPPC,
1748                                      PPCRH_Imm(False,0x2)));
1749          addInstr(env, PPCInstr_Alu(Palu_AND, r_ccIR_b2,
1750                                     r_ccIR_b2, PPCRH_Imm(False,0x4)));
1751
1752          // r_ccIR_b6 = r_ccPPC[0] | r_ccPPC[1]
1753          addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32bit shift*/,
1754                                      r_ccIR_b6, r_ccPPC,
1755                                      PPCRH_Imm(False,0x1)));
1756          addInstr(env, PPCInstr_Alu(Palu_OR,  r_ccIR_b6,
1757                                     r_ccPPC, PPCRH_Reg(r_ccIR_b6)));
1758          addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1759                                      r_ccIR_b6, r_ccIR_b6,
1760                                      PPCRH_Imm(False,0x6)));
1761          addInstr(env, PPCInstr_Alu(Palu_AND, r_ccIR_b6,
1762                                     r_ccIR_b6, PPCRH_Imm(False,0x40)));
1763
1764          // r_ccIR = r_ccIR_b0 | r_ccIR_b2 | r_ccIR_b6
1765          addInstr(env, PPCInstr_Alu(Palu_OR, r_ccIR,
1766                                     r_ccIR_b0, PPCRH_Reg(r_ccIR_b2)));
1767          addInstr(env, PPCInstr_Alu(Palu_OR, r_ccIR,
1768                                     r_ccIR,    PPCRH_Reg(r_ccIR_b6)));
1769          return r_ccIR;
1770       }
1771
1772       if ( e->Iex.Binop.op == Iop_F64toI32S ||
1773                e->Iex.Binop.op == Iop_F64toI32U ) {
1774          /* This works in both mode64 and mode32. */
1775          HReg      r1      = StackFramePtr(env->mode64);
1776          PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
1777          HReg      fsrc    = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
1778          HReg      ftmp    = newVRegF(env);
1779          HReg      idst    = newVRegI(env);
1780
1781          /* Set host rounding mode */
1782          set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
1783
1784          sub_from_sp( env, 16 );
1785          addInstr(env, PPCInstr_FpCftI(False/*F->I*/, True/*int32*/,
1786                                        e->Iex.Binop.op == Iop_F64toI32S ? True/*syned*/
1787                                                                      : False,
1788                                        True/*flt64*/,
1789                                        ftmp, fsrc));
1790          addInstr(env, PPCInstr_FpSTFIW(r1, ftmp));
1791          addInstr(env, PPCInstr_Load(4, idst, zero_r1, mode64));
1792
1793          /* in 64-bit mode we need to sign-widen idst. */
1794          if (mode64)
1795             addInstr(env, PPCInstr_Unary(Pun_EXTSW, idst, idst));
1796
1797          add_to_sp( env, 16 );
1798
1799          ///* Restore default FPU rounding. */
1800          //set_FPU_rounding_default( env );
1801          return idst;
1802       }
1803
1804       if (e->Iex.Binop.op == Iop_F64toI64S || e->Iex.Binop.op == Iop_F64toI64U ) {
1805          if (mode64) {
1806             HReg      r1      = StackFramePtr(env->mode64);
1807             PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
1808             HReg      fsrc    = iselDblExpr(env, e->Iex.Binop.arg2,
1809                                             IEndianess);
1810             HReg      idst    = newVRegI(env);
1811             HReg      ftmp    = newVRegF(env);
1812
1813             /* Set host rounding mode */
1814             set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
1815
1816             sub_from_sp( env, 16 );
1817             addInstr(env, PPCInstr_FpCftI(False/*F->I*/, False/*int64*/,
1818                                           ( e->Iex.Binop.op == Iop_F64toI64S ) ? True
1819                                                                             : False,
1820                                           True, ftmp, fsrc));
1821             addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
1822             addInstr(env, PPCInstr_Load(8, idst, zero_r1, True/*mode64*/));
1823             add_to_sp( env, 16 );
1824
1825             ///* Restore default FPU rounding. */
1826             //set_FPU_rounding_default( env );
1827             return idst;
1828          }
1829       }
1830
1831       if (e->Iex.Binop.op == Iop_D64toI64S ) {
1832          HReg      r1      = StackFramePtr(env->mode64);
1833          PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
1834          HReg      fr_src  = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
1835          HReg      idst    = newVRegI(env);
1836          HReg      ftmp    = newVRegF(env);
1837
1838          /* Set host rounding mode */
1839          set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
1840          addInstr(env, PPCInstr_Dfp64Unary(Pfp_DCTFIX, ftmp, fr_src));
1841          sub_from_sp( env, 16 );
1842          addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
1843          addInstr(env, PPCInstr_Load(8, idst, zero_r1, mode64));
1844
1845          add_to_sp( env, 16 );
1846
1847          ///* Restore default FPU rounding. */
1848          //set_FPU_rounding_default( env );
1849          return idst;
1850       }
1851
1852       if (e->Iex.Binop.op == Iop_D128toI64S ) {
1853          PPCFpOp fpop = Pfp_DCTFIXQ;
1854          HReg r_srcHi = newVRegF(env);
1855          HReg r_srcLo = newVRegF(env);
1856          HReg idst    = newVRegI(env);
1857          HReg ftmp    = newVRegF(env);
1858          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
1859
1860          set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
1861          iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
1862                         IEndianess);
1863          addInstr(env, PPCInstr_DfpD128toD64(fpop, ftmp, r_srcHi, r_srcLo));
1864
1865          // put the D64 result into an integer register
1866          sub_from_sp( env, 16 );
1867          addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
1868          addInstr(env, PPCInstr_Load(8, idst, zero_r1, True/*mode64*/));
1869          add_to_sp( env, 16 );
1870          return idst;
1871       }
1872       break;
1873    }
1874
1875    /* --------- UNARY OP --------- */
1876    case Iex_Unop: {
1877       IROp op_unop = e->Iex.Unop.op;
1878
1879       /* 1Uto8(32to1(expr32)) */
1880       DEFINE_PATTERN(p_32to1_then_1Uto8,
1881                      unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1882       if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1883          const IRExpr* expr32 = mi.bindee[0];
1884          HReg r_dst = newVRegI(env);
1885          HReg r_src = iselWordExpr_R(env, expr32, IEndianess);
1886          addInstr(env, PPCInstr_Alu(Palu_AND, r_dst,
1887                                     r_src, PPCRH_Imm(False,1)));
1888          return r_dst;
1889       }
1890
1891       /* 16Uto32(LDbe:I16(expr32)) */
1892       {
1893          DECLARE_PATTERN(p_LDbe16_then_16Uto32);
1894          DEFINE_PATTERN(p_LDbe16_then_16Uto32,
1895                         unop(Iop_16Uto32,
1896                              IRExpr_Load(IEndianess,Ity_I16,bind(0))) );
1897          if (matchIRExpr(&mi,p_LDbe16_then_16Uto32,e)) {
1898             HReg r_dst = newVRegI(env);
1899             PPCAMode* amode
1900                = iselWordExpr_AMode( env, mi.bindee[0], Ity_I16/*xfer*/,
1901                                      IEndianess );
1902             addInstr(env, PPCInstr_Load(2,r_dst,amode, mode64));
1903             return r_dst;
1904          }
1905       }
1906
1907       switch (op_unop) {
1908       case Iop_8Uto16:
1909       case Iop_8Uto32:
1910       case Iop_8Uto64:
1911       case Iop_16Uto32:
1912       case Iop_16Uto64: {
1913          HReg   r_dst = newVRegI(env);
1914          HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1915          UShort mask  = toUShort(op_unop==Iop_16Uto64 ? 0xFFFF :
1916                                  op_unop==Iop_16Uto32 ? 0xFFFF : 0xFF);
1917          addInstr(env, PPCInstr_Alu(Palu_AND,r_dst,r_src,
1918                                     PPCRH_Imm(False,mask)));
1919          return r_dst;
1920       }
1921       case Iop_32Uto64: {
1922          HReg r_dst = newVRegI(env);
1923          HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1924          vassert(mode64);
1925          addInstr(env,
1926                   PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1927                                 r_dst, r_src, PPCRH_Imm(False,32)));
1928          addInstr(env,
1929                   PPCInstr_Shft(Pshft_SHR, False/*64bit shift*/,
1930                                 r_dst, r_dst, PPCRH_Imm(False,32)));
1931          return r_dst;
1932       }
1933       case Iop_8Sto16:
1934       case Iop_8Sto32:
1935       case Iop_16Sto32: {
1936          HReg   r_dst = newVRegI(env);
1937          HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1938          UShort amt   = toUShort(op_unop==Iop_16Sto32 ? 16 : 24);
1939          addInstr(env,
1940                   PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1941                                 r_dst, r_src, PPCRH_Imm(False,amt)));
1942          addInstr(env,
1943                   PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
1944                                 r_dst, r_dst, PPCRH_Imm(False,amt)));
1945          return r_dst;
1946       }
1947       case Iop_8Sto64:
1948       case Iop_16Sto64: {
1949          HReg   r_dst = newVRegI(env);
1950          HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1951          UShort amt   = toUShort(op_unop==Iop_8Sto64  ? 56 : 48);
1952          vassert(mode64);
1953          addInstr(env,
1954                   PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1955                                 r_dst, r_src, PPCRH_Imm(False,amt)));
1956          addInstr(env,
1957                   PPCInstr_Shft(Pshft_SAR, False/*64bit shift*/,
1958                                 r_dst, r_dst, PPCRH_Imm(False,amt)));
1959          return r_dst;
1960       }
1961       case Iop_32Sto64: {
1962          HReg   r_dst = newVRegI(env);
1963          HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1964          vassert(mode64);
1965          /* According to the IBM docs, in 64 bit mode, srawi r,r,0
1966             sign extends the lower 32 bits into the upper 32 bits. */
1967          addInstr(env,
1968                   PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
1969                                 r_dst, r_src, PPCRH_Imm(False,0)));
1970          return r_dst;
1971       }
1972       case Iop_Not8:
1973       case Iop_Not16:
1974       case Iop_Not32:
1975       case Iop_Not64: {
1976          if (op_unop == Iop_Not64) vassert(mode64);
1977          HReg r_dst = newVRegI(env);
1978          HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1979          addInstr(env, PPCInstr_Unary(Pun_NOT,r_dst,r_src));
1980          return r_dst;
1981       }
1982       case Iop_64HIto32: {
1983          if (!mode64) {
1984             HReg rHi, rLo;
1985             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
1986             return rHi; /* and abandon rLo .. poor wee thing :-) */
1987          } else {
1988             HReg   r_dst = newVRegI(env);
1989             HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1990             addInstr(env,
1991                      PPCInstr_Shft(Pshft_SHR, False/*64bit shift*/,
1992                                    r_dst, r_src, PPCRH_Imm(False,32)));
1993             return r_dst;
1994          }
1995       }
1996       case Iop_64to32: {
1997          if (!mode64) {
1998             HReg rHi, rLo;
1999             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
2000             return rLo; /* similar stupid comment to the above ... */
2001          } else {
2002             /* This is a no-op. */
2003             return iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2004          }
2005       }
2006       case Iop_64to16: {
2007          if (mode64) { /* This is a no-op. */
2008             return iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2009          }
2010          break; /* evidently not used in 32-bit mode */
2011       }
2012       case Iop_16HIto8:
2013       case Iop_32HIto16: {
2014          HReg   r_dst = newVRegI(env);
2015          HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2016          UShort shift = toUShort(op_unop == Iop_16HIto8 ? 8 : 16);
2017          addInstr(env,
2018                   PPCInstr_Shft(Pshft_SHR, True/*32bit shift*/,
2019                                 r_dst, r_src, PPCRH_Imm(False,shift)));
2020          return r_dst;
2021       }
2022       case Iop_128HIto64:
2023          if (mode64) {
2024             HReg rHi, rLo;
2025             iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
2026             return rHi; /* and abandon rLo .. poor wee thing :-) */
2027          }
2028          break;
2029       case Iop_128to64:
2030          if (mode64) {
2031             HReg rHi, rLo;
2032             iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
2033             return rLo; /* similar stupid comment to the above ... */
2034          }
2035          break;
2036       case Iop_1Uto64:
2037       case Iop_1Uto32:
2038       case Iop_1Uto8:
2039          if ((op_unop != Iop_1Uto64) || mode64) {
2040             HReg        r_dst = newVRegI(env);
2041             PPCCondCode cond  = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
2042             addInstr(env, PPCInstr_Set(cond,r_dst));
2043             return r_dst;
2044          }
2045          break;
2046       case Iop_1Sto8:
2047       case Iop_1Sto16:
2048       case Iop_1Sto32: {
2049          /* could do better than this, but for now ... */
2050          HReg        r_dst = newVRegI(env);
2051          PPCCondCode cond  = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
2052          addInstr(env, PPCInstr_Set(cond,r_dst));
2053          addInstr(env,
2054                   PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
2055                                 r_dst, r_dst, PPCRH_Imm(False,31)));
2056          addInstr(env,
2057                   PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
2058                                 r_dst, r_dst, PPCRH_Imm(False,31)));
2059          return r_dst;
2060       }
2061       case Iop_1Sto64:
2062          if (mode64) {
2063             /* could do better than this, but for now ... */
2064             HReg        r_dst = newVRegI(env);
2065             PPCCondCode cond  = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
2066             addInstr(env, PPCInstr_Set(cond,r_dst));
2067             addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
2068                                         r_dst, r_dst, PPCRH_Imm(False,63)));
2069             addInstr(env, PPCInstr_Shft(Pshft_SAR, False/*64bit shift*/,
2070                                         r_dst, r_dst, PPCRH_Imm(False,63)));
2071             return r_dst;
2072          }
2073          break;
2074
2075       case Iop_Clz32: case Iop_ClzNat32:
2076       case Iop_Clz64: case Iop_ClzNat64: {
2077          // cntlz is available even in the most basic (earliest) ppc
2078          // variants, so it's safe to generate it unconditionally.
2079          HReg r_src, r_dst;
2080          PPCUnaryOp op_clz = (op_unop == Iop_Clz32 || op_unop == Iop_ClzNat32)
2081                                 ? Pun_CLZ32 : Pun_CLZ64;
2082          if ((op_unop == Iop_Clz64 || op_unop == Iop_ClzNat64) && !mode64)
2083             goto irreducible;
2084          /* Count leading zeroes. */
2085          r_dst = newVRegI(env);
2086          r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2087          addInstr(env, PPCInstr_Unary(op_clz,r_dst,r_src));
2088          return r_dst;
2089       }
2090
2091       //case Iop_Ctz32:
2092       case Iop_CtzNat32:
2093       //case Iop_Ctz64:
2094       case Iop_CtzNat64:
2095       {
2096          // Generate code using Clz, because we can't assume the host has
2097          // Ctz.  In particular, part of the fix for bug 386945 involves
2098          // creating a Ctz in ir_opt.c from smaller fragments.
2099          PPCUnaryOp op_clz = Pun_CLZ64;
2100          Int WS = 64;
2101          if (op_unop == Iop_Ctz32 || op_unop == Iop_CtzNat32) {
2102             op_clz = Pun_CLZ32;
2103             WS = 32;
2104          }
2105          /* Compute ctz(arg) = wordsize - clz(~arg & (arg - 1)), thusly:
2106             t1 = arg - 1
2107             t2 = not arg
2108             t2 = t2 & t1
2109             t2 = clz t2
2110             t1 = WS
2111             t2 = t1 - t2
2112             // result in t2
2113          */
2114          HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2115          HReg t1 = newVRegI(env);
2116          HReg t2 = newVRegI(env);
2117          addInstr(env, PPCInstr_Alu(Palu_SUB, t1, arg, PPCRH_Imm(True, 1)));
2118          addInstr(env, PPCInstr_Unary(Pun_NOT, t2, arg));
2119          addInstr(env, PPCInstr_Alu(Palu_AND, t2, t2, PPCRH_Reg(t1)));
2120          addInstr(env, PPCInstr_Unary(op_clz, t2, t2));
2121          addInstr(env, PPCInstr_LI(t1, WS, False/*!64-bit imm*/));
2122          addInstr(env, PPCInstr_Alu(Palu_SUB, t2, t1, PPCRH_Reg(t2)));
2123          return t2;
2124       }
2125
2126       case Iop_PopCount64: {
2127          // popcnt{x,d} is only available in later arch revs (ISA 3.0,
2128          // maybe) so it's not really correct to emit it here without a caps
2129          // check for the host.
2130          if (mode64) {
2131             HReg r_dst = newVRegI(env);
2132             HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2133             addInstr(env, PPCInstr_Unary(Pun_POP64, r_dst, r_src));
2134             return r_dst;
2135          }
2136          // We don't expect to be required to handle this in 32-bit mode.
2137          break;
2138       }
2139
2140       case Iop_PopCount32: {
2141          // Similar comment as for Ctz just above applies -- we really
2142          // should have a caps check here.
2143
2144         HReg r_dst = newVRegI(env);
2145         // This actually generates popcntw, which in 64 bit mode does a
2146         // 32-bit count individually for both low and high halves of the
2147         // word.  Per the comment at the top of iselIntExpr_R, in the 64
2148         // bit mode case, the user of this result is required to ignore
2149         // the upper 32 bits of the result.  In 32 bit mode this is all
2150         // moot.  It is however unclear from the PowerISA 3.0 docs that
2151         // the instruction exists in 32 bit mode; however our own front
2152         // end (guest_ppc_toIR.c) accepts it, so I guess it does exist.
2153         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2154         addInstr(env, PPCInstr_Unary(Pun_POP32, r_dst, r_src));
2155         return r_dst;
2156       }
2157
2158       case Iop_Reverse8sIn32_x1: {
2159          // A bit of a mouthful, but simply .. 32-bit byte swap.
2160          // This is pretty rubbish code.  We could do vastly better if
2161          // rotates, and better, rotate-inserts, were allowed.  Note that
2162          // even on a 64 bit target, the right shifts must be done as 32-bit
2163          // so as to introduce zero bits in the right places.  So it seems
2164          // simplest to do the whole sequence in 32-bit insns.
2165          /*
2166             r     = <argument>  // working temporary, initial byte order ABCD
2167             Mask  = 00FF00FF
2168             nMask = not Mask
2169             tHi   = and r, Mask
2170             tHi   = shl tHi, 8
2171             tLo   = and r, nMask
2172             tLo   = shr tLo, 8
2173             r     = or tHi, tLo  // now r has order BADC
2174             and repeat for 16 bit chunks ..
2175             Mask  = 0000FFFF
2176             nMask = not Mask
2177             tHi   = and r, Mask
2178             tHi   = shl tHi, 16
2179             tLo   = and r, nMask
2180             tLo   = shr tLo, 16
2181             r     = or tHi, tLo  // now r has order DCBA
2182          */
2183          HReg r_src  = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2184          HReg rr     = newVRegI(env);
2185          HReg rMask  = newVRegI(env);
2186          HReg rnMask = newVRegI(env);
2187          HReg rtHi   = newVRegI(env);
2188          HReg rtLo   = newVRegI(env);
2189          // Copy r_src since we need to modify it
2190          addInstr(env, mk_iMOVds_RR(rr, r_src));
2191          // Swap within 16-bit lanes
2192          addInstr(env, PPCInstr_LI(rMask, 0x00FF00FFULL,
2193                                    False/* !64bit imm*/));
2194          addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
2195          addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
2196          addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32 bit shift*/,
2197                                      rtHi, rtHi,
2198                                      PPCRH_Imm(False/*!signed imm*/, 8)));
2199          addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
2200          addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32 bit shift*/,
2201                                      rtLo, rtLo,
2202                                      PPCRH_Imm(False/*!signed imm*/, 8)));
2203          addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
2204          // And now swap the two 16-bit chunks
2205          addInstr(env, PPCInstr_LI(rMask, 0x0000FFFFULL,
2206                                    False/* !64bit imm*/));
2207          addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
2208          addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
2209          addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32 bit shift*/,
2210                                      rtHi, rtHi,
2211                                      PPCRH_Imm(False/*!signed imm*/, 16)));
2212          addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
2213          addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32 bit shift*/,
2214                                      rtLo, rtLo,
2215                                      PPCRH_Imm(False/*!signed imm*/, 16)));
2216          addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
2217          return rr;
2218       }
2219
2220       case Iop_Reverse8sIn64_x1: {
2221          /* See Iop_Reverse8sIn32_x1, but extended to 64bit.
2222             Can only be used in 64bit mode.  */
2223          vassert (mode64);
2224
2225          HReg r_src  = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2226          HReg rr     = newVRegI(env);
2227          HReg rMask  = newVRegI(env);
2228          HReg rnMask = newVRegI(env);
2229          HReg rtHi   = newVRegI(env);
2230          HReg rtLo   = newVRegI(env);
2231
2232          // Copy r_src since we need to modify it
2233          addInstr(env, mk_iMOVds_RR(rr, r_src));
2234
2235          // r = (r & 0x00FF00FF00FF00FF) << 8 | (r & 0xFF00FF00FF00FF00) >> 8
2236          addInstr(env, PPCInstr_LI(rMask, 0x00FF00FF00FF00FFULL,
2237                                    True/* 64bit imm*/));
2238          addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
2239          addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
2240          addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
2241                                      rtHi, rtHi,
2242                                      PPCRH_Imm(False/*!signed imm*/, 8)));
2243          addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
2244          addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
2245                                      rtLo, rtLo,
2246                                      PPCRH_Imm(False/*!signed imm*/, 8)));
2247          addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
2248
2249          // r = (r & 0x0000FFFF0000FFFF) << 16 | (r & 0xFFFF0000FFFF0000) >> 16
2250          addInstr(env, PPCInstr_LI(rMask, 0x0000FFFF0000FFFFULL,
2251                                    True/* !64bit imm*/));
2252          addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
2253          addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
2254          addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
2255                                      rtHi, rtHi,
2256                                      PPCRH_Imm(False/*!signed imm*/, 16)));
2257          addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
2258          addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
2259                                      rtLo, rtLo,
2260                                      PPCRH_Imm(False/*!signed imm*/, 16)));
2261          addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
2262
2263          // r = (r & 0x00000000FFFFFFFF) << 32 | (r & 0xFFFFFFFF00000000) >> 32
2264          /* We don't need to mask anymore, just two more shifts and an or.  */
2265          addInstr(env, mk_iMOVds_RR(rtLo, rr));
2266          addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
2267                                      rtLo, rtLo,
2268                                      PPCRH_Imm(False/*!signed imm*/, 32)));
2269          addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
2270                                      rr, rr,
2271                                      PPCRH_Imm(False/*!signed imm*/, 32)));
2272          addInstr(env, PPCInstr_Alu(Palu_OR, rr, rr, PPCRH_Reg(rtLo)));
2273
2274          return rr;
2275       }
2276
2277       case Iop_Left8:
2278       case Iop_Left16:
2279       case Iop_Left32:
2280       case Iop_Left64: {
2281          HReg r_src, r_dst;
2282          if (op_unop == Iop_Left64 && !mode64)
2283             goto irreducible;
2284          r_dst = newVRegI(env);
2285          r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2286          addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
2287          addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
2288          return r_dst;
2289       }
2290
2291       case Iop_CmpwNEZ32: {
2292          HReg r_dst = newVRegI(env);
2293          HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2294          addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
2295          addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
2296          addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
2297                                      r_dst, r_dst, PPCRH_Imm(False, 31)));
2298          return r_dst;
2299       }
2300
2301       case Iop_CmpwNEZ64: {
2302          HReg r_dst = newVRegI(env);
2303          HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2304          if (!mode64) goto irreducible;
2305          addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
2306          addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
2307          addInstr(env, PPCInstr_Shft(Pshft_SAR, False/*64bit shift*/,
2308                                      r_dst, r_dst, PPCRH_Imm(False, 63)));
2309          return r_dst;
2310       }
2311
2312       case Iop_V128to32: {
2313          HReg        r_aligned16;
2314          HReg        dst  = newVRegI(env);
2315          HReg        vec  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
2316          PPCAMode *am_off0, *am_off_word0;
2317          sub_from_sp( env, 32 );     // Move SP down 32 bytes
2318
2319          // get a quadword aligned address within our stack space
2320          r_aligned16 = get_sp_aligned16( env );
2321          am_off0  = PPCAMode_IR( 0, r_aligned16 );
2322
2323          /* Note that the store below (done via PPCInstr_AvLdSt) uses
2324           * stvx, which stores the vector in proper LE format,
2325           * with byte zero (far right byte of the register in LE format)
2326           * stored at the lowest memory address.  Therefore, to obtain
2327           * integer word zero, we need to use that lowest memory address
2328           * as the base for the load.
2329           */
2330          if (IEndianess == Iend_LE)
2331             am_off_word0 = am_off0;
2332          else
2333             am_off_word0 = PPCAMode_IR( 12,r_aligned16 );
2334
2335          // store vec, load low word to dst
2336          addInstr(env,
2337                   PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
2338          addInstr(env,
2339                   PPCInstr_Load( 4, dst, am_off_word0, mode64 ));
2340
2341          add_to_sp( env, 32 );       // Reset SP
2342          return dst;
2343       }
2344
2345       case Iop_V128to64:
2346       case Iop_V128HIto64:
2347          if (mode64) {
2348             HReg     r_aligned16;
2349             HReg     dst = newVRegI(env);
2350             HReg     vec = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
2351             PPCAMode *am_off0, *am_off8, *am_off_arg;
2352             sub_from_sp( env, 32 );     // Move SP down 32 bytes
2353
2354             // get a quadword aligned address within our stack space
2355             r_aligned16 = get_sp_aligned16( env );
2356             am_off0 = PPCAMode_IR( 0, r_aligned16 );
2357             am_off8 = PPCAMode_IR( 8 ,r_aligned16 );
2358
2359             // store vec, load low word or high to dst
2360             addInstr(env,
2361                      PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
2362             if (IEndianess == Iend_LE) {
2363                if (op_unop == Iop_V128HIto64)
2364                   am_off_arg = am_off8;
2365                else
2366                   am_off_arg = am_off0;
2367             } else {
2368                if (op_unop == Iop_V128HIto64)
2369                   am_off_arg = am_off0;
2370                else
2371                   am_off_arg = am_off8;
2372             }
2373             addInstr(env,
2374                      PPCInstr_Load(
2375                         8, dst,
2376                         am_off_arg,
2377                         mode64 ));
2378
2379             add_to_sp( env, 32 );       // Reset SP
2380             return dst;
2381          }
2382          break;
2383       case Iop_16to8:
2384       case Iop_32to8:
2385       case Iop_32to16:
2386       case Iop_64to8:
2387          /* These are no-ops. */
2388          return iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2389
2390       /* ReinterpF64asI64(e) */
2391       /* Given an IEEE754 double, produce an I64 with the same bit
2392          pattern. */
2393       case Iop_ReinterpF64asI64:
2394          if (mode64) {
2395             PPCAMode *am_addr;
2396             HReg fr_src = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
2397             HReg r_dst  = newVRegI(env);
2398
2399             sub_from_sp( env, 16 );     // Move SP down 16 bytes
2400             am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
2401
2402             // store as F64
2403             addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
2404                                            fr_src, am_addr ));
2405             // load as Ity_I64
2406             addInstr(env, PPCInstr_Load( 8, r_dst, am_addr, mode64 ));
2407
2408             add_to_sp( env, 16 );       // Reset SP
2409             return r_dst;
2410          }
2411          break;
2412
2413       /* ReinterpF32asI32(e) */
2414       /* Given an IEEE754 float, produce an I32 with the same bit
2415          pattern. */
2416       case Iop_ReinterpF32asI32: {
2417          /* I believe this generates correct code for both 32- and
2418             64-bit hosts. */
2419          PPCAMode *am_addr;
2420          HReg fr_src = iselFltExpr(env, e->Iex.Unop.arg, IEndianess);
2421          HReg r_dst  = newVRegI(env);
2422
2423          sub_from_sp( env, 16 );     // Move SP down 16 bytes
2424          am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
2425
2426          // store as F32
2427          addInstr(env, PPCInstr_FpLdSt( False/*store*/, 4,
2428                                         fr_src, am_addr ));
2429          // load as Ity_I32
2430          addInstr(env, PPCInstr_Load( 4, r_dst, am_addr, mode64 ));
2431
2432          add_to_sp( env, 16 );       // Reset SP
2433          return r_dst;
2434       }
2435       break;
2436
2437       case Iop_ReinterpD64asI64:
2438          if (mode64) {
2439             PPCAMode *am_addr;
2440             HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
2441             HReg r_dst  = newVRegI(env);
2442
2443             sub_from_sp( env, 16 );     // Move SP down 16 bytes
2444             am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
2445
2446             // store as D64
2447             addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
2448                                            fr_src, am_addr ));
2449             // load as Ity_I64
2450             addInstr(env, PPCInstr_Load( 8, r_dst, am_addr, mode64 ));
2451             add_to_sp( env, 16 );       // Reset SP
2452             return r_dst;
2453          }
2454          break;
2455
2456       case Iop_BCDtoDPB: {
2457          /* the following is only valid in 64 bit mode */
2458          if (!mode64) break;
2459
2460          PPCCondCode cc;
2461          UInt        argiregs;
2462          HReg        argregs[1];
2463          HReg        r_dst  = newVRegI(env);
2464          Int         argreg;
2465
2466          argiregs = 0;
2467          argreg = 0;
2468          argregs[0] = hregPPC_GPR3(mode64);
2469
2470          argiregs |= (1 << (argreg+3));
2471          addInstr(env, mk_iMOVds_RR( argregs[argreg++],
2472                                      iselWordExpr_R(env, e->Iex.Unop.arg,
2473                                                     IEndianess) ) );
2474
2475          cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
2476          if (IEndianess == Iend_LE) {
2477              addInstr(env, PPCInstr_Call( cc, (Addr)h_calc_BCDtoDPB,
2478                                           argiregs,
2479                                           mk_RetLoc_simple(RLPri_Int)) );
2480          } else {
2481              HWord*      fdescr;
2482              fdescr = (HWord*)h_calc_BCDtoDPB;
2483              addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
2484                                           argiregs,
2485                                           mk_RetLoc_simple(RLPri_Int)) );
2486          }
2487
2488          addInstr(env, mk_iMOVds_RR(r_dst, argregs[0]));
2489          return r_dst;
2490       }
2491
2492       case Iop_DPBtoBCD: {
2493          /* the following is only valid in 64 bit mode */
2494          if (!mode64) break;
2495
2496          PPCCondCode cc;
2497          UInt        argiregs;
2498          HReg        argregs[1];
2499          HReg        r_dst  = newVRegI(env);
2500          Int         argreg;
2501
2502          argiregs = 0;
2503          argreg = 0;
2504          argregs[0] = hregPPC_GPR3(mode64);
2505
2506          argiregs |= (1 << (argreg+3));
2507          addInstr(env, mk_iMOVds_RR( argregs[argreg++],
2508                                      iselWordExpr_R(env, e->Iex.Unop.arg,
2509                                                     IEndianess) ) );
2510
2511          cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
2512
2513         if (IEndianess == Iend_LE) {
2514             addInstr(env, PPCInstr_Call( cc, (Addr)h_calc_DPBtoBCD,
2515                                          argiregs,
2516                                          mk_RetLoc_simple(RLPri_Int) ) );
2517         } else {
2518             HWord*      fdescr;
2519             fdescr = (HWord*)h_calc_DPBtoBCD;
2520             addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
2521                                          argiregs,
2522                                          mk_RetLoc_simple(RLPri_Int) ) );
2523          }
2524
2525          addInstr(env, mk_iMOVds_RR(r_dst, argregs[0]));
2526          return r_dst;
2527       }
2528       case Iop_F32toF16x4_DEP: {
2529          HReg vdst = newVRegV(env);    /* V128 */
2530          HReg dst  = newVRegI(env);    /* I64*/
2531          HReg r0 = newVRegI(env);    /* I16*/
2532          HReg r1 = newVRegI(env);    /* I16*/
2533          HReg r2 = newVRegI(env);    /* I16*/
2534          HReg r3 = newVRegI(env);    /* I16*/
2535          HReg vsrc  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
2536          PPCAMode *am_off0, *am_off2, *am_off4, *am_off6, *am_off8;
2537          PPCAMode *am_off10, *am_off12, *am_off14;
2538          HReg r_aligned16;
2539
2540          sub_from_sp( env, 32 );     // Move SP down
2541
2542          /* issue instruction */
2543          addInstr(env, PPCInstr_AvUnary(Pav_F32toF16x4, vdst, vsrc));
2544
2545          /* Get a  quadword aligned address within our stack space */
2546          r_aligned16 = get_sp_aligned16( env );
2547          am_off0  = PPCAMode_IR( 0, r_aligned16 );
2548          am_off2  = PPCAMode_IR( 2, r_aligned16 );
2549          am_off4  = PPCAMode_IR( 4, r_aligned16 );
2550          am_off6  = PPCAMode_IR( 6, r_aligned16 );
2551          am_off8  = PPCAMode_IR( 8, r_aligned16 );
2552          am_off10 = PPCAMode_IR( 10, r_aligned16 );
2553          am_off12 = PPCAMode_IR( 12, r_aligned16 );
2554          am_off14 = PPCAMode_IR( 14, r_aligned16 );
2555
2556          /* Store v128 result to stack. */
2557          addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, vdst, am_off0));
2558
2559          /* fetch four I16 from V128, store into contiguous I64 via stack,  */
2560          if (IEndianess == Iend_LE) {
2561             addInstr(env, PPCInstr_Load( 2, r3, am_off12, mode64));
2562             addInstr(env, PPCInstr_Load( 2, r2, am_off8, mode64));
2563             addInstr(env, PPCInstr_Load( 2, r1, am_off4, mode64));
2564             addInstr(env, PPCInstr_Load( 2, r0, am_off0, mode64));
2565          } else {
2566             addInstr(env, PPCInstr_Load( 2, r0, am_off14, mode64));
2567             addInstr(env, PPCInstr_Load( 2, r1, am_off10, mode64));
2568             addInstr(env, PPCInstr_Load( 2, r2, am_off6, mode64));
2569             addInstr(env, PPCInstr_Load( 2, r3, am_off2, mode64));
2570          }
2571
2572          /* store in contiguous 64-bit values */
2573          addInstr(env, PPCInstr_Store( 2, am_off6, r3, mode64));
2574          addInstr(env, PPCInstr_Store( 2, am_off4, r2, mode64));
2575          addInstr(env, PPCInstr_Store( 2, am_off2, r1, mode64));
2576          addInstr(env, PPCInstr_Store( 2, am_off0, r0, mode64));
2577
2578          /* Fetch I64 */
2579          addInstr(env, PPCInstr_Load(8, dst, am_off0, mode64));
2580
2581          add_to_sp( env, 32 );          // Reset SP
2582          return dst;
2583       }
2584
2585       default:
2586          break;
2587       }
2588
2589      switch (e->Iex.Unop.op) {
2590         case Iop_ExtractExpD64: {
2591
2592             HReg fr_dst = newVRegI(env);
2593             HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
2594             HReg tmp    = newVRegF(env);
2595             PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
2596             addInstr(env, PPCInstr_Dfp64Unary(Pfp_DXEX, tmp, fr_src));
2597
2598             // put the D64 result into a integer register
2599             sub_from_sp( env, 16 );
2600             addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
2601             addInstr(env, PPCInstr_Load(8, fr_dst, zero_r1, env->mode64));
2602             add_to_sp( env, 16 );
2603             return fr_dst;
2604          }
2605          case Iop_ExtractExpD128: {
2606             HReg fr_dst = newVRegI(env);
2607             HReg r_srcHi;
2608             HReg r_srcLo;
2609             HReg tmp    = newVRegF(env);
2610             PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
2611
2612             iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Unop.arg,
2613                            IEndianess);
2614             addInstr(env, PPCInstr_ExtractExpD128(Pfp_DXEXQ, tmp,
2615                                                   r_srcHi, r_srcLo));
2616
2617             sub_from_sp( env, 16 );
2618             addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
2619             addInstr(env, PPCInstr_Load(8, fr_dst, zero_r1, env->mode64));
2620             add_to_sp( env, 16 );
2621             return fr_dst;
2622          }
2623          default:
2624             break;
2625       }
2626
2627       break;
2628    }
2629
2630    /* --------- GET --------- */
2631    case Iex_Get: {
2632       if (ty == Ity_I8  || ty == Ity_I16 ||
2633           ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
2634          HReg r_dst = newVRegI(env);
2635          PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
2636                                           GuestStatePtr(mode64) );
2637          addInstr(env, PPCInstr_Load( toUChar(sizeofIRType(ty)),
2638                                       r_dst, am_addr, mode64 ));
2639          return r_dst;
2640       }
2641       break;
2642    }
2643
2644    case Iex_GetI: {
2645       PPCAMode* src_am
2646          = genGuestArrayOffset( env, e->Iex.GetI.descr,
2647                                 e->Iex.GetI.ix, e->Iex.GetI.bias,
2648                                 IEndianess );
2649       HReg r_dst = newVRegI(env);
2650       if (mode64 && ty == Ity_I64) {
2651          addInstr(env, PPCInstr_Load( toUChar(8),
2652                                       r_dst, src_am, mode64 ));
2653          return r_dst;
2654       }
2655       if ((!mode64) && ty == Ity_I32) {
2656          addInstr(env, PPCInstr_Load( toUChar(4),
2657                                       r_dst, src_am, mode64 ));
2658          return r_dst;
2659       }
2660       break;
2661    }
2662
2663    /* --------- CCALL --------- */
2664    case Iex_CCall: {
2665       vassert(ty == e->Iex.CCall.retty); /* well-formedness of IR */
2666
2667       /* be very restrictive for now.  Only 32/64-bit ints allowed for
2668          args, and 32 bits or host machine word for return type. */
2669       if (!(ty == Ity_I32 || (mode64 && ty == Ity_I64)))
2670          goto irreducible;
2671
2672       /* Marshal args, do the call, clear stack. */
2673       UInt   addToSp = 0;
2674       RetLoc rloc    = mk_RetLoc_INVALID();
2675       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2676                     e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args,
2677                     IEndianess );
2678       vassert(is_sane_RetLoc(rloc));
2679       vassert(rloc.pri == RLPri_Int);
2680       vassert(addToSp == 0);
2681
2682       /* GPR3 now holds the destination address from Pin_Goto */
2683       HReg r_dst = newVRegI(env);
2684       addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
2685       return r_dst;
2686    }
2687
2688    /* --------- LITERAL --------- */
2689    /* 32/16/8-bit literals */
2690    case Iex_Const: {
2691       Long l;
2692       HReg r_dst = newVRegI(env);
2693       IRConst* con = e->Iex.Const.con;
2694       switch (con->tag) {
2695          case Ico_U64: if (!mode64) goto irreducible;
2696                        l = (Long)            con->Ico.U64; break;
2697          case Ico_U32: l = (Long)(Int)       con->Ico.U32; break;
2698          case Ico_U16: l = (Long)(Int)(Short)con->Ico.U16; break;
2699          case Ico_U8:  l = (Long)(Int)(Char )con->Ico.U8;  break;
2700          default:      vpanic("iselIntExpr_R.const(ppc)");
2701       }
2702       addInstr(env, PPCInstr_LI(r_dst, (ULong)l, mode64));
2703       return r_dst;
2704    }
2705
2706    /* --------- MULTIPLEX --------- */
2707    case Iex_ITE: { // VFD
2708       if ((ty == Ity_I8  || ty == Ity_I16 ||
2709            ty == Ity_I32 || ((ty == Ity_I64) && mode64)) &&
2710           typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
2711          PPCRI* r1    = iselWordExpr_RI(env, e->Iex.ITE.iftrue, IEndianess);
2712          HReg   r0    = iselWordExpr_R(env, e->Iex.ITE.iffalse, IEndianess);
2713          HReg   r_dst = newVRegI(env);
2714          addInstr(env, mk_iMOVds_RR(r_dst,r0));
2715          PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
2716          addInstr(env, PPCInstr_CMov(cc, r_dst, r1));
2717          return r_dst;
2718       }
2719       break;
2720    }
2721
2722    default:
2723       break;
2724    } /* switch (e->tag) */
2725
2726
2727    /* We get here if no pattern matched. */
2728  irreducible:
2729    ppIRExpr(e);
2730    vpanic("iselIntExpr_R(ppc): cannot reduce tree");
2731 }
2732
2733
2734 /*---------------------------------------------------------*/
2735 /*--- ISEL: Integer expression auxiliaries              ---*/
2736 /*---------------------------------------------------------*/
2737
2738 /* --------------------- AMODEs --------------------- */
2739
2740 /* Return an AMode which computes the value of the specified
2741    expression, possibly also adding insns to the code list as a
2742    result.  The expression may only be a word-size one.
2743 */
2744
2745 static Bool uInt_fits_in_16_bits ( UInt u )
2746 {
2747    /* Is u the same as the sign-extend of its lower 16 bits? */
2748    UInt v = u & 0xFFFF;
2749
2750    v = (Int)(v << 16) >> 16;   /* sign extend */
2751
2752    return u == v;
2753 }
2754
2755 static Bool uLong_fits_in_16_bits ( ULong u )
2756 {
2757    /* Is u the same as the sign-extend of its lower 16 bits? */
2758    ULong v = u & 0xFFFFULL;
2759
2760    v = (Long)(v << 48) >> 48;   /* sign extend */
2761
2762    return u == v;
2763 }
2764
2765 static Bool uLong_is_4_aligned ( ULong u )
2766 {
2767    return toBool((u & 3ULL) == 0);
2768 }
2769
2770 static Bool sane_AMode ( ISelEnv* env, PPCAMode* am )
2771 {
2772    Bool mode64 = env->mode64;
2773    switch (am->tag) {
2774    case Pam_IR:
2775       /* Using uInt_fits_in_16_bits in 64-bit mode seems a bit bogus,
2776          somehow, but I think it's OK. */
2777       return toBool( hregClass(am->Pam.IR.base) == HRcGPR(mode64) &&
2778                      hregIsVirtual(am->Pam.IR.base) &&
2779                      uInt_fits_in_16_bits(am->Pam.IR.index) );
2780    case Pam_RR:
2781       return toBool( hregClass(am->Pam.RR.base) == HRcGPR(mode64) &&
2782                      hregIsVirtual(am->Pam.RR.base) &&
2783                      hregClass(am->Pam.RR.index) == HRcGPR(mode64) &&
2784                      hregIsVirtual(am->Pam.RR.index) );
2785    default:
2786       vpanic("sane_AMode: unknown ppc amode tag");
2787    }
2788 }
2789
2790 static
2791 PPCAMode* iselWordExpr_AMode ( ISelEnv* env, const IRExpr* e, IRType xferTy,
2792                                IREndness IEndianess )
2793 {
2794    PPCAMode* am = iselWordExpr_AMode_wrk(env, e, xferTy, IEndianess);
2795    vassert(sane_AMode(env, am));
2796    return am;
2797 }
2798
2799 /* DO NOT CALL THIS DIRECTLY ! */
2800 static PPCAMode* iselWordExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e,
2801                                           IRType xferTy, IREndness IEndianess )
2802 {
2803    IRType ty = typeOfIRExpr(env->type_env,e);
2804
2805    if (env->mode64) {
2806
2807       /* If the data load/store type is I32 or I64, this amode might
2808          be destined for use in ld/ldu/lwa/st/stu.  In which case
2809          insist that if it comes out as an _IR, the immediate must
2810          have its bottom two bits be zero.  This does assume that for
2811          any other type (I8/I16/I128/F32/F64/V128) the amode will not
2812          be parked in any such instruction.  But that seems a
2813          reasonable assumption.  */
2814       Bool aligned4imm = toBool(xferTy == Ity_I32 || xferTy == Ity_I64);
2815
2816       vassert(ty == Ity_I64);
2817
2818       /* Add64(expr,i), where i == sign-extend of (i & 0xFFFF) */
2819       if (e->tag == Iex_Binop
2820           && e->Iex.Binop.op == Iop_Add64
2821           && e->Iex.Binop.arg2->tag == Iex_Const
2822           && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
2823           && (aligned4imm  ? uLong_is_4_aligned(e->Iex.Binop.arg2
2824                                                  ->Iex.Const.con->Ico.U64)
2825                            : True)
2826           && uLong_fits_in_16_bits(e->Iex.Binop.arg2
2827                                     ->Iex.Const.con->Ico.U64)) {
2828          return PPCAMode_IR( (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64,
2829                              iselWordExpr_R(env, e->Iex.Binop.arg1,
2830                                             IEndianess) );
2831       }
2832
2833       /* Add64(expr,expr) */
2834       if (e->tag == Iex_Binop
2835           && e->Iex.Binop.op == Iop_Add64) {
2836          HReg r_base = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
2837          HReg r_idx  = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
2838          return PPCAMode_RR( r_idx, r_base );
2839       }
2840
2841    } else {
2842
2843       vassert(ty == Ity_I32);
2844
2845       /* Add32(expr,i), where i == sign-extend of (i & 0xFFFF) */
2846       if (e->tag == Iex_Binop
2847           && e->Iex.Binop.op == Iop_Add32
2848           && e->Iex.Binop.arg2->tag == Iex_Const
2849           && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
2850           && uInt_fits_in_16_bits(e->Iex.Binop.arg2
2851                                    ->Iex.Const.con->Ico.U32)) {
2852          return PPCAMode_IR( (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32,
2853                              iselWordExpr_R(env, e->Iex.Binop.arg1,
2854                                             IEndianess) );
2855       }
2856
2857       /* Add32(expr,expr) */
2858       if (e->tag == Iex_Binop
2859           && e->Iex.Binop.op == Iop_Add32) {
2860          HReg r_base = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
2861          HReg r_idx  = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
2862          return PPCAMode_RR( r_idx, r_base );
2863       }
2864
2865    }
2866
2867    /* Doesn't match anything in particular.  Generate it into
2868       a register and use that. */
2869    return PPCAMode_IR( 0, iselWordExpr_R(env,e,IEndianess) );
2870 }
2871
2872
2873 /* --------------------- RH --------------------- */
2874
2875 /* Compute an I8/I16/I32 (and I64, in 64-bit mode) into a RH
2876    (reg-or-halfword-immediate).  It's important to specify whether the
2877    immediate is to be regarded as signed or not.  If yes, this will
2878    never return -32768 as an immediate; this guaranteed that all
2879    signed immediates that are return can have their sign inverted if
2880    need be. */
2881
2882 static PPCRH* iselWordExpr_RH ( ISelEnv* env, Bool syned, const IRExpr* e,
2883                                 IREndness IEndianess )
2884 {
2885   PPCRH* ri = iselWordExpr_RH_wrk(env, syned, e, IEndianess);
2886    /* sanity checks ... */
2887    switch (ri->tag) {
2888    case Prh_Imm:
2889       vassert(ri->Prh.Imm.syned == syned);
2890       if (syned)
2891          vassert(ri->Prh.Imm.imm16 != 0x8000);
2892       return ri;
2893    case Prh_Reg:
2894       vassert(hregClass(ri->Prh.Reg.reg) == HRcGPR(env->mode64));
2895       vassert(hregIsVirtual(ri->Prh.Reg.reg));
2896       return ri;
2897    default:
2898       vpanic("iselIntExpr_RH: unknown ppc RH tag");
2899    }
2900 }
2901
2902 /* DO NOT CALL THIS DIRECTLY ! */
2903 static PPCRH* iselWordExpr_RH_wrk ( ISelEnv* env, Bool syned, const IRExpr* e,
2904                                     IREndness IEndianess )
2905 {
2906    ULong u;
2907    Long  l;
2908    IRType ty = typeOfIRExpr(env->type_env,e);
2909    vassert(ty == Ity_I8  || ty == Ity_I16 ||
2910            ty == Ity_I32 || ((ty == Ity_I64) && env->mode64));
2911
2912    /* special case: immediate */
2913    if (e->tag == Iex_Const) {
2914       IRConst* con = e->Iex.Const.con;
2915       /* What value are we aiming to generate? */
2916       switch (con->tag) {
2917       /* Note: Not sign-extending - we carry 'syned' around */
2918       case Ico_U64: vassert(env->mode64);
2919                     u =              con->Ico.U64; break;
2920       case Ico_U32: u = 0xFFFFFFFF & con->Ico.U32; break;
2921       case Ico_U16: u = 0x0000FFFF & con->Ico.U16; break;
2922       case Ico_U8:  u = 0x000000FF & con->Ico.U8; break;
2923       default:      vpanic("iselIntExpr_RH.Iex_Const(ppch)");
2924       }
2925       l = (Long)u;
2926       /* Now figure out if it's representable. */
2927       if (!syned && u <= 65535) {
2928          return PPCRH_Imm(False/*unsigned*/, toUShort(u & 0xFFFF));
2929       }
2930       if (syned && l >= -32767 && l <= 32767) {
2931          return PPCRH_Imm(True/*signed*/, toUShort(u & 0xFFFF));
2932       }
2933       /* no luck; use the Slow Way. */
2934    }
2935
2936    /* default case: calculate into a register and return that */
2937    return PPCRH_Reg( iselWordExpr_R ( env, e, IEndianess ) );
2938 }
2939
2940
2941 /* --------------------- RIs --------------------- */
2942
2943 /* Calculate an expression into an PPCRI operand.  As with
2944    iselIntExpr_R, the expression can have type 32, 16 or 8 bits, or,
2945    in 64-bit mode, 64 bits. */
2946
2947 static PPCRI* iselWordExpr_RI ( ISelEnv* env, const IRExpr* e,
2948                                 IREndness IEndianess )
2949 {
2950    PPCRI* ri = iselWordExpr_RI_wrk(env, e, IEndianess);
2951    /* sanity checks ... */
2952    switch (ri->tag) {
2953    case Pri_Imm:
2954       return ri;
2955    case Pri_Reg:
2956       vassert(hregClass(ri->Pri.Reg) == HRcGPR(env->mode64));
2957       vassert(hregIsVirtual(ri->Pri.Reg));
2958       return ri;
2959    default:
2960       vpanic("iselIntExpr_RI: unknown ppc RI tag");
2961    }
2962 }
2963
2964 /* DO NOT CALL THIS DIRECTLY ! */
2965 static PPCRI* iselWordExpr_RI_wrk ( ISelEnv* env, const IRExpr* e,
2966                                     IREndness IEndianess )
2967 {
2968    Long  l;
2969    IRType ty = typeOfIRExpr(env->type_env,e);
2970    vassert(ty == Ity_I8  || ty == Ity_I16 ||
2971            ty == Ity_I32 || ((ty == Ity_I64) && env->mode64));
2972
2973    /* special case: immediate */
2974    if (e->tag == Iex_Const) {
2975       IRConst* con = e->Iex.Const.con;
2976       switch (con->tag) {
2977       case Ico_U64: vassert(env->mode64);
2978                     l = (Long)            con->Ico.U64; break;
2979       case Ico_U32: l = (Long)(Int)       con->Ico.U32; break;
2980       case Ico_U16: l = (Long)(Int)(Short)con->Ico.U16; break;
2981       case Ico_U8:  l = (Long)(Int)(Char )con->Ico.U8;  break;
2982       default:      vpanic("iselIntExpr_RI.Iex_Const(ppch)");
2983       }
2984       return PPCRI_Imm((ULong)l);
2985    }
2986
2987    /* default case: calculate into a register and return that */
2988    return PPCRI_Reg( iselWordExpr_R ( env, e, IEndianess ) );
2989 }
2990
2991
2992 /* --------------------- RH5u --------------------- */
2993
2994 /* Compute an I8 into a reg-or-5-bit-unsigned-immediate, the latter
2995    being an immediate in the range 1 .. 31 inclusive.  Used for doing
2996    shift amounts.  Only used in 32-bit mode. */
2997
2998 static PPCRH* iselWordExpr_RH5u ( ISelEnv* env, const IRExpr* e,
2999                                   IREndness IEndianess )
3000 {
3001    PPCRH* ri;
3002    vassert(!env->mode64);
3003    ri = iselWordExpr_RH5u_wrk(env, e, IEndianess);
3004    /* sanity checks ... */
3005    switch (ri->tag) {
3006    case Prh_Imm:
3007       vassert(ri->Prh.Imm.imm16 >= 1 && ri->Prh.Imm.imm16 <= 31);
3008       vassert(!ri->Prh.Imm.syned);
3009       return ri;
3010    case Prh_Reg:
3011       vassert(hregClass(ri->Prh.Reg.reg) == HRcGPR(env->mode64));
3012       vassert(hregIsVirtual(ri->Prh.Reg.reg));
3013       return ri;
3014    default:
3015       vpanic("iselIntExpr_RH5u: unknown ppc RI tag");
3016    }
3017 }
3018
3019 /* DO NOT CALL THIS DIRECTLY ! */
3020 static PPCRH* iselWordExpr_RH5u_wrk ( ISelEnv* env, const IRExpr* e,
3021                                       IREndness IEndianess )
3022 {
3023    IRType ty = typeOfIRExpr(env->type_env,e);
3024    vassert(ty == Ity_I8);
3025
3026    /* special case: immediate */
3027    if (e->tag == Iex_Const
3028        && e->Iex.Const.con->tag == Ico_U8
3029        && e->Iex.Const.con->Ico.U8 >= 1
3030        && e->Iex.Const.con->Ico.U8 <= 31) {
3031       return PPCRH_Imm(False/*unsigned*/, e->Iex.Const.con->Ico.U8);
3032    }
3033
3034    /* default case: calculate into a register and return that */
3035    return PPCRH_Reg( iselWordExpr_R ( env, e, IEndianess ) );
3036 }
3037
3038
3039 /* --------------------- RH6u --------------------- */
3040
3041 /* Compute an I8 into a reg-or-6-bit-unsigned-immediate, the latter
3042    being an immediate in the range 1 .. 63 inclusive.  Used for doing
3043    shift amounts.  Only used in 64-bit mode. */
3044
3045 static PPCRH* iselWordExpr_RH6u ( ISelEnv* env, const IRExpr* e,
3046                                   IREndness IEndianess )
3047 {
3048    PPCRH* ri;
3049    vassert(env->mode64);
3050    ri = iselWordExpr_RH6u_wrk(env, e, IEndianess);
3051    /* sanity checks ... */
3052    switch (ri->tag) {
3053    case Prh_Imm:
3054       vassert(ri->Prh.Imm.imm16 >= 1 && ri->Prh.Imm.imm16 <= 63);
3055       vassert(!ri->Prh.Imm.syned);
3056       return ri;
3057    case Prh_Reg:
3058       vassert(hregClass(ri->Prh.Reg.reg) == HRcGPR(env->mode64));
3059       vassert(hregIsVirtual(ri->Prh.Reg.reg));
3060       return ri;
3061    default:
3062       vpanic("iselIntExpr_RH6u: unknown ppc64 RI tag");
3063    }
3064 }
3065
3066 /* DO NOT CALL THIS DIRECTLY ! */
3067 static PPCRH* iselWordExpr_RH6u_wrk ( ISelEnv* env, const IRExpr* e,
3068                                       IREndness IEndianess )
3069 {
3070    IRType ty = typeOfIRExpr(env->type_env,e);
3071    vassert(ty == Ity_I8);
3072
3073    /* special case: immediate */
3074    if (e->tag == Iex_Const
3075        && e->Iex.Const.con->tag == Ico_U8
3076        && e->Iex.Const.con->Ico.U8 >= 1
3077        && e->Iex.Const.con->Ico.U8 <= 63) {
3078       return PPCRH_Imm(False/*unsigned*/, e->Iex.Const.con->Ico.U8);
3079    }
3080
3081    /* default case: calculate into a register and return that */
3082    return PPCRH_Reg( iselWordExpr_R ( env, e, IEndianess ) );
3083 }
3084
3085
3086 /* --------------------- CONDCODE --------------------- */
3087
3088 /* Generate code to evaluated a bit-typed expression, returning the
3089    condition code which would correspond when the expression would
3090    notionally have returned 1. */
3091
3092 static PPCCondCode iselCondCode ( ISelEnv* env, const IRExpr* e,
3093                                   IREndness IEndianess )
3094 {
3095    /* Uh, there's nothing we can sanity check here, unfortunately. */
3096    return iselCondCode_wrk(env,e, IEndianess);
3097 }
3098
3099 /* DO NOT CALL THIS DIRECTLY ! */
3100 static PPCCondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e,
3101                                       IREndness IEndianess )
3102 {
3103    vassert(e);
3104    vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
3105
3106    /* Constant 1:Bit */
3107    if (e->tag == Iex_Const) {
3108       // Make a compare that will always be true (or always false):
3109       vassert(e->Iex.Const.con->Ico.U1 == True || e->Iex.Const.con->Ico.U1 == False);
3110       HReg r_zero = newVRegI(env);
3111       addInstr(env, PPCInstr_LI(r_zero, 0, env->mode64));
3112       addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3113                                  7/*cr*/, r_zero, PPCRH_Reg(r_zero)));
3114       return mk_PPCCondCode( e->Iex.Const.con->Ico.U1 ? Pct_TRUE : Pct_FALSE,
3115                              Pcf_7EQ );
3116    }
3117
3118    /* Not1(...) */
3119    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
3120       /* Generate code for the arg, and negate the test condition */
3121       PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
3122       cond.test = invertCondTest(cond.test);
3123       return cond;
3124    }
3125
3126    /* --- patterns rooted at: 32to1 or 64to1 --- */
3127
3128    /* 32to1, 64to1 */
3129    if (e->tag == Iex_Unop &&
3130        (e->Iex.Unop.op == Iop_32to1 || e->Iex.Unop.op == Iop_64to1)) {
3131       HReg src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3132       HReg tmp = newVRegI(env);
3133       /* could do better, probably -- andi. */
3134       addInstr(env, PPCInstr_Alu(Palu_AND, tmp,
3135                                  src, PPCRH_Imm(False,1)));
3136       addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3137                                  7/*cr*/, tmp, PPCRH_Imm(False,1)));
3138       return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
3139    }
3140
3141    /* --- patterns rooted at: CmpNEZ8 --- */
3142
3143    /* CmpNEZ8(x) */
3144    /* Note this cloned as CmpNE8(x,0) below. */
3145    /* could do better -- andi. */
3146    if (e->tag == Iex_Unop
3147        && e->Iex.Unop.op == Iop_CmpNEZ8) {
3148       HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3149       HReg tmp = newVRegI(env);
3150       addInstr(env, PPCInstr_Alu(Palu_AND, tmp, arg,
3151                                  PPCRH_Imm(False,0xFF)));
3152       addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3153                                  7/*cr*/, tmp, PPCRH_Imm(False,0)));
3154       return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3155    }
3156
3157    /* --- patterns rooted at: CmpNEZ32 --- */
3158
3159    /* CmpNEZ32(x) */
3160    if (e->tag == Iex_Unop
3161        && e->Iex.Unop.op == Iop_CmpNEZ32) {
3162       HReg r1 = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3163       addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3164                                  7/*cr*/, r1, PPCRH_Imm(False,0)));
3165       return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3166    }
3167
3168    /* --- patterns rooted at: Cmp*32* --- */
3169
3170    /* Cmp*32*(x,y) */
3171    if (e->tag == Iex_Binop
3172        && (e->Iex.Binop.op == Iop_CmpEQ32
3173            || e->Iex.Binop.op == Iop_CmpNE32
3174            || e->Iex.Binop.op == Iop_CmpLT32S
3175            || e->Iex.Binop.op == Iop_CmpLT32U
3176            || e->Iex.Binop.op == Iop_CmpLE32S
3177            || e->Iex.Binop.op == Iop_CmpLE32U)) {
3178       Bool syned = (e->Iex.Binop.op == Iop_CmpLT32S ||
3179                     e->Iex.Binop.op == Iop_CmpLE32S);
3180       HReg   r1  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3181       PPCRH* ri2 = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2, IEndianess);
3182       addInstr(env, PPCInstr_Cmp(syned, True/*32bit cmp*/,
3183                                  7/*cr*/, r1, ri2));
3184
3185       switch (e->Iex.Binop.op) {
3186       case Iop_CmpEQ32:  return mk_PPCCondCode( Pct_TRUE,  Pcf_7EQ );
3187       case Iop_CmpNE32:  return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3188       case Iop_CmpLT32U: case Iop_CmpLT32S:
3189          return mk_PPCCondCode( Pct_TRUE,  Pcf_7LT );
3190       case Iop_CmpLE32U: case Iop_CmpLE32S:
3191          return mk_PPCCondCode( Pct_FALSE, Pcf_7GT );
3192       default: vpanic("iselCondCode(ppc): CmpXX32");
3193       }
3194    }
3195
3196    /* --- patterns rooted at: CmpNEZ64 --- */
3197
3198    /* CmpNEZ64 */
3199    if (e->tag == Iex_Unop
3200        && e->Iex.Unop.op == Iop_CmpNEZ64) {
3201       if (!env->mode64) {
3202          HReg hi, lo;
3203          HReg tmp = newVRegI(env);
3204          iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg, IEndianess );
3205          addInstr(env, PPCInstr_Alu(Palu_OR, tmp, lo, PPCRH_Reg(hi)));
3206          addInstr(env, PPCInstr_Cmp(False/*sign*/, True/*32bit cmp*/,
3207                                     7/*cr*/, tmp,PPCRH_Imm(False,0)));
3208          return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3209       } else {  // mode64
3210          HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3211          addInstr(env, PPCInstr_Cmp(False/*sign*/, False/*64bit cmp*/,
3212                                     7/*cr*/, r_src,PPCRH_Imm(False,0)));
3213          return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3214       }
3215    }
3216
3217    /* --- patterns rooted at: Cmp*64* --- */
3218
3219    /* Cmp*64*(x,y) */
3220    if (e->tag == Iex_Binop
3221        && (e->Iex.Binop.op == Iop_CmpEQ64
3222            || e->Iex.Binop.op == Iop_CmpNE64
3223            || e->Iex.Binop.op == Iop_CmpLT64S
3224            || e->Iex.Binop.op == Iop_CmpLT64U
3225            || e->Iex.Binop.op == Iop_CmpLE64S
3226            || e->Iex.Binop.op == Iop_CmpLE64U)) {
3227       Bool   syned = (e->Iex.Binop.op == Iop_CmpLT64S ||
3228                       e->Iex.Binop.op == Iop_CmpLE64S);
3229       HReg    r1 = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3230       PPCRH* ri2 = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2, IEndianess);
3231       vassert(env->mode64);
3232       addInstr(env, PPCInstr_Cmp(syned, False/*64bit cmp*/,
3233                                  7/*cr*/, r1, ri2));
3234
3235       switch (e->Iex.Binop.op) {
3236       case Iop_CmpEQ64:  return mk_PPCCondCode( Pct_TRUE,  Pcf_7EQ );
3237       case Iop_CmpNE64:  return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3238       case Iop_CmpLT64U:  case Iop_CmpLT64S:
3239          return mk_PPCCondCode( Pct_TRUE,  Pcf_7LT );
3240       case Iop_CmpLE64U: case Iop_CmpLE64S:
3241          return mk_PPCCondCode( Pct_FALSE, Pcf_7GT );
3242       default: vpanic("iselCondCode(ppc): CmpXX64");
3243       }
3244    }
3245
3246    /* --- patterns rooted at: CmpNE8 --- */
3247
3248    /* CmpNE8(x,0) */
3249    /* Note this is a direct copy of CmpNEZ8 above. */
3250    /* could do better -- andi. */
3251    if (e->tag == Iex_Binop
3252        && e->Iex.Binop.op == Iop_CmpNE8
3253        && isZeroU8(e->Iex.Binop.arg2)) {
3254       HReg arg = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3255       HReg tmp = newVRegI(env);
3256       addInstr(env, PPCInstr_Alu(Palu_AND, tmp, arg,
3257                                  PPCRH_Imm(False,0xFF)));
3258       addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3259                                  7/*cr*/, tmp, PPCRH_Imm(False,0)));
3260       return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3261    }
3262
3263    /* var */
3264    if (e->tag == Iex_RdTmp) {
3265       HReg r_src      = lookupIRTemp(env, e->Iex.RdTmp.tmp);
3266       HReg src_masked = newVRegI(env);
3267       addInstr(env,
3268                PPCInstr_Alu(Palu_AND, src_masked,
3269                             r_src, PPCRH_Imm(False,1)));
3270       addInstr(env,
3271                PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3272                             7/*cr*/, src_masked, PPCRH_Imm(False,1)));
3273       return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
3274    }
3275
3276    /* --- And1(x,y), Or1(x,y) --- */
3277    /* FIXME: We could (and probably should) do a lot better here, by using the
3278       iselCondCode_C/_R scheme used in the amd64 insn selector. */
3279    if (e->tag == Iex_Binop
3280         && (e->Iex.Binop.op == Iop_And1 || e->Iex.Binop.op == Iop_Or1)) {
3281       HReg x_as_int = newVRegI(env);
3282       PPCCondCode cc_x = iselCondCode(env, e->Iex.Binop.arg1, IEndianess);
3283       addInstr(env, PPCInstr_Set(cc_x, x_as_int));
3284
3285       HReg y_as_int = newVRegI(env);
3286       PPCCondCode cc_y = iselCondCode(env, e->Iex.Binop.arg2, IEndianess);
3287       addInstr(env, PPCInstr_Set(cc_y, y_as_int));
3288
3289       HReg tmp = newVRegI(env);
3290       PPCAluOp op = e->Iex.Binop.op == Iop_And1 ? Palu_AND : Palu_OR;
3291       addInstr(env, PPCInstr_Alu(op, tmp, x_as_int, PPCRH_Reg(y_as_int)));
3292
3293       addInstr(env, PPCInstr_Alu(Palu_AND, tmp, tmp, PPCRH_Imm(False,1)));
3294       addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3295                                  7/*cr*/, tmp, PPCRH_Imm(False,1)));
3296       return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
3297    }
3298
3299    vex_printf("iselCondCode(ppc): No such tag(%u)\n", e->tag);
3300    ppIRExpr(e);
3301    vpanic("iselCondCode(ppc)");
3302 }
3303
3304
3305 /*---------------------------------------------------------*/
3306 /*--- ISEL: Integer expressions (128 bit)               ---*/
3307 /*---------------------------------------------------------*/
3308
3309 /* 64-bit mode ONLY: compute a 128-bit value into a register pair,
3310    which is returned as the first two parameters.  As with
3311    iselWordExpr_R, these may be either real or virtual regs; in any
3312    case they must not be changed by subsequent code emitted by the
3313    caller.  */
3314
3315 static void iselInt128Expr ( HReg* rHi, HReg* rLo, ISelEnv* env,
3316                              const IRExpr* e, IREndness IEndianess )
3317 {
3318    vassert(env->mode64);
3319    iselInt128Expr_wrk(rHi, rLo, env, e, IEndianess);
3320 #  if 0
3321    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3322 #  endif
3323    vassert(hregClass(*rHi) == HRcGPR(env->mode64));
3324    vassert(hregIsVirtual(*rHi));
3325    vassert(hregClass(*rLo) == HRcGPR(env->mode64));
3326    vassert(hregIsVirtual(*rLo));
3327 }
3328
3329 /* DO NOT CALL THIS DIRECTLY ! */
3330 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
3331                                  const IRExpr* e, IREndness IEndianess )
3332 {
3333    Bool mode64 = env->mode64;
3334
3335    vassert(e);
3336    vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
3337
3338    /* read 128-bit IRTemp */
3339    if (e->tag == Iex_RdTmp) {
3340       lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3341       return;
3342    }
3343
3344    /* 128-bit GET */
3345    if (e->tag == Iex_Get) {
3346       PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
3347                                        GuestStatePtr(mode64) );
3348       PPCAMode* am_addr4 = advance4(env, am_addr);
3349       HReg tLo = newVRegI(env);
3350       HReg tHi = newVRegI(env);
3351
3352       addInstr(env, PPCInstr_Load( 8, tHi, am_addr,  mode64));
3353       addInstr(env, PPCInstr_Load( 8, tLo, am_addr4, mode64));
3354       *rHi = tHi;
3355       *rLo = tLo;
3356       return;
3357    }
3358
3359    /* --------- BINARY ops --------- */
3360    if (e->tag == Iex_Binop) {
3361       switch (e->Iex.Binop.op) {
3362       /* 64 x 64 -> 128 multiply */
3363       case Iop_MullU64:
3364       case Iop_MullS64: {
3365          HReg     tLo     = newVRegI(env);
3366          HReg     tHi     = newVRegI(env);
3367          Bool     syned   = toBool(e->Iex.Binop.op == Iop_MullS64);
3368          HReg     r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3369          HReg     r_srcR  = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
3370          addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/,
3371                                      False/*lo64*/, False/*64bit mul*/,
3372                                      tLo, r_srcL, r_srcR));
3373          addInstr(env, PPCInstr_MulL(syned,
3374                                      True/*hi64*/, False/*64bit mul*/,
3375                                      tHi, r_srcL, r_srcR));
3376          *rHi = tHi;
3377          *rLo = tLo;
3378          return;
3379       }
3380
3381       /* 64HLto128(e1,e2) */
3382       case Iop_64HLto128:
3383          *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3384          *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
3385          return;
3386
3387       case Iop_D128toI128S: {
3388          HReg srcHi = INVALID_HREG;
3389          HReg srcLo = INVALID_HREG;
3390          HReg dstLo = newVRegI(env);
3391          HReg dstHi = newVRegI(env);
3392          HReg tmp = newVRegV(env);
3393          PPCAMode* am_addr;
3394          PPCAMode* am_addr4;
3395
3396          /* Get the DF128 value, store in two 64-bit halves */
3397          iselDfp128Expr( &srcHi, &srcLo, env, e->Iex.Binop.arg2, IEndianess );
3398
3399          sub_from_sp( env, 16 );     // Move SP down 16 bytes
3400          am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
3401          am_addr4 = advance4(env, am_addr);
3402
3403          addInstr(env, PPCInstr_XFormUnary994(Px_DFPTOIQS, tmp, srcHi, srcLo));
3404
3405          // store the result in the VSR
3406          addInstr(env, PPCInstr_AvLdSt( False/*store*/, 16, tmp, am_addr ));
3407
3408          // load the two Ity_64 values
3409          addInstr(env, PPCInstr_Load( 8, dstHi, am_addr,  mode64 ));
3410          addInstr(env, PPCInstr_Load( 8, dstLo, am_addr4, mode64 ));
3411
3412          *rHi = dstHi;
3413          *rLo = dstLo;
3414
3415          add_to_sp( env, 16 );       // Reset SP
3416          return;
3417       }
3418
3419       default:
3420          break;
3421       }
3422    } /* if (e->tag == Iex_Binop) */
3423
3424
3425    /* --------- UNARY ops --------- */
3426    if (e->tag == Iex_Unop) {
3427       switch (e->Iex.Unop.op) {
3428       case Iop_ReinterpV128asI128:
3429       case Iop_ReinterpF128asI128: {
3430          HReg     src;
3431          HReg     dstLo = newVRegI(env);
3432          HReg     dstHi = newVRegI(env);
3433          PPCAMode* am_addr;
3434          PPCAMode* am_addr4;
3435
3436          if (e->Iex.Unop.op == Iop_ReinterpF128asI128)
3437             src = iselFp128Expr(env, e->Iex.Unop.arg, IEndianess);
3438          else
3439             src = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
3440
3441          sub_from_sp( env, 16 );     // Move SP down 16 bytes
3442          am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
3443          am_addr4 = advance4(env, am_addr);
3444
3445          // store the Ity_F128 value
3446          addInstr(env, PPCInstr_AvLdSt( False/*store*/, 16, src, am_addr ));
3447
3448          // load the two Ity_64 values
3449          addInstr(env, PPCInstr_Load( 8, dstHi, am_addr,  mode64 ));
3450          addInstr(env, PPCInstr_Load( 8, dstLo, am_addr4, mode64 ));
3451
3452          *rHi = dstHi;
3453          *rLo = dstLo;
3454          add_to_sp( env, 16 );       // Reset SP
3455          return;
3456       }
3457       default:
3458          break;
3459       }
3460    } /* if (e->tag == Iex_Unop) */
3461
3462    vex_printf("iselInt128Expr(ppc64): No such tag(%u)\n", e->tag);
3463    ppIRExpr(e);
3464    vpanic("iselInt128Expr(ppc64)");
3465 }
3466
3467
3468 /*---------------------------------------------------------*/
3469 /*--- ISEL: Integer expressions (64 bit)                ---*/
3470 /*---------------------------------------------------------*/
3471
3472 /* 32-bit mode ONLY: compute a 128-bit value into a register quad */
3473 static void iselInt128Expr_to_32x4 ( HReg* rHi, HReg* rMedHi, HReg* rMedLo,
3474                                      HReg* rLo, ISelEnv* env, const IRExpr* e,
3475                                      IREndness IEndianess )
3476 {
3477    vassert(!env->mode64);
3478    iselInt128Expr_to_32x4_wrk(rHi, rMedHi, rMedLo, rLo, env, e, IEndianess);
3479 #  if 0
3480    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3481 #  endif
3482    vassert(hregClass(*rHi) == HRcInt32);
3483    vassert(hregIsVirtual(*rHi));
3484    vassert(hregClass(*rMedHi) == HRcInt32);
3485    vassert(hregIsVirtual(*rMedHi));
3486    vassert(hregClass(*rMedLo) == HRcInt32);
3487    vassert(hregIsVirtual(*rMedLo));
3488    vassert(hregClass(*rLo) == HRcInt32);
3489    vassert(hregIsVirtual(*rLo));
3490 }
3491
3492 static void iselInt128Expr_to_32x4_wrk ( HReg* rHi, HReg* rMedHi,
3493                                          HReg* rMedLo, HReg* rLo,
3494                                          ISelEnv* env, const IRExpr* e,
3495                                          IREndness IEndianess )
3496 {
3497    vassert(e);
3498    vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
3499
3500    /* read 128-bit IRTemp */
3501    if (e->tag == Iex_RdTmp) {
3502       lookupIRTempQuad( rHi, rMedHi, rMedLo, rLo, env, e->Iex.RdTmp.tmp);
3503       return;
3504    }
3505
3506    if (e->tag == Iex_Binop) {
3507
3508       IROp op_binop = e->Iex.Binop.op;
3509       switch (op_binop) {
3510       case Iop_64HLto128:
3511          iselInt64Expr(rHi, rMedHi, env, e->Iex.Binop.arg1, IEndianess);
3512          iselInt64Expr(rMedLo, rLo, env, e->Iex.Binop.arg2, IEndianess);
3513          return;
3514       default:
3515          vex_printf("iselInt128Expr_to_32x4_wrk: Binop case 0x%x not found\n",
3516                     op_binop);
3517          break;
3518       }
3519    }
3520
3521    vex_printf("iselInt128Expr_to_32x4_wrk: e->tag 0x%x not found\n", e->tag);
3522    return;
3523 }
3524
3525 /* 32-bit mode ONLY: compute a 64-bit value into a register pair,
3526    which is returned as the first two parameters.  As with
3527    iselIntExpr_R, these may be either real or virtual regs; in any
3528    case they must not be changed by subsequent code emitted by the
3529    caller.  */
3530
3531 static void iselInt64Expr ( HReg* rHi, HReg* rLo,
3532                             ISelEnv* env, const IRExpr* e,
3533                             IREndness IEndianess )
3534 {
3535    vassert(!env->mode64);
3536    iselInt64Expr_wrk(rHi, rLo, env, e, IEndianess);
3537 #  if 0
3538    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3539 #  endif
3540    vassert(hregClass(*rHi) == HRcInt32);
3541    vassert(hregIsVirtual(*rHi));
3542    vassert(hregClass(*rLo) == HRcInt32);
3543    vassert(hregIsVirtual(*rLo));
3544 }
3545
3546 /* DO NOT CALL THIS DIRECTLY ! */
3547 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
3548                                 ISelEnv* env, const IRExpr* e,
3549                                 IREndness IEndianess )
3550 {
3551    vassert(e);
3552    vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
3553
3554    /* 64-bit load */
3555    if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
3556       HReg tLo    = newVRegI(env);
3557       HReg tHi    = newVRegI(env);
3558       HReg r_addr = iselWordExpr_R(env, e->Iex.Load.addr, IEndianess);
3559       vassert(!env->mode64);
3560       addInstr(env, PPCInstr_Load( 4/*byte-load*/,
3561                                    tHi, PPCAMode_IR( 0, r_addr ),
3562                                    False/*32-bit insn please*/) );
3563       addInstr(env, PPCInstr_Load( 4/*byte-load*/,
3564                                    tLo, PPCAMode_IR( 4, r_addr ),
3565                                    False/*32-bit insn please*/) );
3566       *rHi = tHi;
3567       *rLo = tLo;
3568       return;
3569    }
3570
3571    /* 64-bit literal */
3572    if (e->tag == Iex_Const) {
3573       ULong w64 = e->Iex.Const.con->Ico.U64;
3574       UInt  wHi = ((UInt)(w64 >> 32)) & 0xFFFFFFFF;
3575       UInt  wLo = ((UInt)w64) & 0xFFFFFFFF;
3576       HReg  tLo = newVRegI(env);
3577       HReg  tHi = newVRegI(env);
3578       vassert(e->Iex.Const.con->tag == Ico_U64);
3579       addInstr(env, PPCInstr_LI(tHi, (Long)(Int)wHi, False/*mode32*/));
3580       addInstr(env, PPCInstr_LI(tLo, (Long)(Int)wLo, False/*mode32*/));
3581       *rHi = tHi;
3582       *rLo = tLo;
3583       return;
3584    }
3585
3586    /* read 64-bit IRTemp */
3587    if (e->tag == Iex_RdTmp) {
3588       lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3589       return;
3590    }
3591
3592    /* 64-bit GET */
3593    if (e->tag == Iex_Get) {
3594       PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
3595                                        GuestStatePtr(False/*mode32*/) );
3596       PPCAMode* am_addr4 = advance4(env, am_addr);
3597       HReg tLo = newVRegI(env);
3598       HReg tHi = newVRegI(env);
3599       addInstr(env, PPCInstr_Load( 4, tHi, am_addr,  False/*mode32*/ ));
3600       addInstr(env, PPCInstr_Load( 4, tLo, am_addr4, False/*mode32*/ ));
3601       *rHi = tHi;
3602       *rLo = tLo;
3603       return;
3604    }
3605
3606    /* --------- CCALL --------- */
3607    if(e->tag == Iex_CCall) {
3608       IRType ty = typeOfIRExpr(env->type_env,e);
3609       Bool mode64 = env->mode64;
3610
3611       vassert(ty == e->Iex.CCall.retty); /* well-formedness of IR */
3612
3613       /* be very restrictive for now.  Only 32-bit ints allowed for
3614          args, and 32 bits or host machine word for return type. */
3615       vassert(!(ty == Ity_I32 || (mode64 && ty == Ity_I64)));
3616
3617       /* Marshal args, do the call, clear stack. */
3618       UInt   addToSp = 0;
3619       RetLoc rloc    = mk_RetLoc_INVALID();
3620       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
3621                     e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args,
3622                     IEndianess );
3623       vassert(is_sane_RetLoc(rloc));
3624
3625       vassert(rloc.pri == RLPri_2Int);
3626       vassert(addToSp == 0);
3627
3628       /* GPR3 now holds the destination address from Pin_Goto */
3629       HReg r_dst = newVRegI(env);
3630       addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
3631       *rHi = r_dst;
3632       *rLo = r_dst;
3633       return;
3634    }
3635
3636    /* 64-bit ITE */
3637    if (e->tag == Iex_ITE) { // VFD
3638       HReg e0Lo, e0Hi, eXLo, eXHi;
3639       iselInt64Expr(&eXHi, &eXLo, env, e->Iex.ITE.iftrue, IEndianess);
3640       iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.ITE.iffalse, IEndianess);
3641       HReg tLo = newVRegI(env);
3642       HReg tHi = newVRegI(env);
3643       addInstr(env, mk_iMOVds_RR(tHi,e0Hi));
3644       addInstr(env, mk_iMOVds_RR(tLo,e0Lo));
3645       PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
3646       addInstr(env, PPCInstr_CMov(cc,tHi,PPCRI_Reg(eXHi)));
3647       addInstr(env, PPCInstr_CMov(cc,tLo,PPCRI_Reg(eXLo)));
3648       *rHi = tHi;
3649       *rLo = tLo;
3650       return;
3651    }
3652
3653    /* --------- BINARY ops --------- */
3654    if (e->tag == Iex_Binop) {
3655       IROp op_binop = e->Iex.Binop.op;
3656       switch (op_binop) {
3657          /* 32 x 32 -> 64 multiply */
3658          case Iop_MullU32:
3659          case Iop_MullS32: {
3660             HReg     tLo     = newVRegI(env);
3661             HReg     tHi     = newVRegI(env);
3662             Bool     syned   = toBool(op_binop == Iop_MullS32);
3663             HReg     r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1,
3664                                               IEndianess);
3665             HReg     r_srcR  = iselWordExpr_R(env, e->Iex.Binop.arg2,
3666                                               IEndianess);
3667             addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/,
3668                                         False/*lo32*/, True/*32bit mul*/,
3669                                         tLo, r_srcL, r_srcR));
3670             addInstr(env, PPCInstr_MulL(syned,
3671                                         True/*hi32*/, True/*32bit mul*/,
3672                                         tHi, r_srcL, r_srcR));
3673             *rHi = tHi;
3674             *rLo = tLo;
3675             return;
3676          }
3677
3678          /* Or64/And64/Xor64 */
3679          case Iop_Or64:
3680          case Iop_And64:
3681          case Iop_Xor64: {
3682             HReg xLo, xHi, yLo, yHi;
3683             HReg tLo = newVRegI(env);
3684             HReg tHi = newVRegI(env);
3685             PPCAluOp op = (op_binop == Iop_Or64) ? Palu_OR :
3686                           (op_binop == Iop_And64) ? Palu_AND : Palu_XOR;
3687             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1, IEndianess);
3688             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2, IEndianess);
3689             addInstr(env, PPCInstr_Alu(op, tHi, xHi, PPCRH_Reg(yHi)));
3690             addInstr(env, PPCInstr_Alu(op, tLo, xLo, PPCRH_Reg(yLo)));
3691             *rHi = tHi;
3692             *rLo = tLo;
3693             return;
3694          }
3695
3696          /* Add64 */
3697          case Iop_Add64: {
3698             HReg xLo, xHi, yLo, yHi;
3699             HReg tLo = newVRegI(env);
3700             HReg tHi = newVRegI(env);
3701             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1, IEndianess);
3702             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2, IEndianess);
3703             addInstr(env, PPCInstr_AddSubC( True/*add*/, True /*set carry*/,
3704                                             tLo, xLo, yLo));
3705             addInstr(env, PPCInstr_AddSubC( True/*add*/, False/*read carry*/,
3706                                             tHi, xHi, yHi));
3707             *rHi = tHi;
3708             *rLo = tLo;
3709             return;
3710          }
3711
3712          /* 32HLto64(e1,e2) */
3713          case Iop_32HLto64:
3714             *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3715             *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
3716             return;
3717
3718          /* F64toI64[S|U] */
3719          case Iop_F64toI64S: case Iop_F64toI64U: {
3720             HReg      tLo     = newVRegI(env);
3721             HReg      tHi     = newVRegI(env);
3722             HReg      r1      = StackFramePtr(env->mode64);
3723             PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
3724             PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
3725             HReg      fsrc    = iselDblExpr(env, e->Iex.Binop.arg2,
3726                                             IEndianess);
3727             HReg      ftmp    = newVRegF(env);
3728
3729             vassert(!env->mode64);
3730             /* Set host rounding mode */
3731             set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
3732
3733             sub_from_sp( env, 16 );
3734             addInstr(env, PPCInstr_FpCftI(False/*F->I*/, False/*int64*/,
3735                                           (op_binop == Iop_F64toI64S) ? True : False,
3736                                           True, ftmp, fsrc));
3737             addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
3738             addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3739             addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3740             add_to_sp( env, 16 );
3741
3742             ///* Restore default FPU rounding. */
3743             //set_FPU_rounding_default( env );
3744             *rHi = tHi;
3745             *rLo = tLo;
3746             return;
3747          }
3748          case Iop_D64toI64S: {
3749             HReg      tLo     = newVRegI(env);
3750             HReg      tHi     = newVRegI(env);
3751             HReg      r1      = StackFramePtr(env->mode64);
3752             PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
3753             PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
3754             HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
3755             HReg tmp    = newVRegF(env);
3756
3757             vassert(!env->mode64);
3758             set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
3759             addInstr(env, PPCInstr_Dfp64Unary(Pfp_DCTFIX, tmp, fr_src));
3760
3761             sub_from_sp( env, 16 );
3762             addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
3763             addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3764             addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3765             add_to_sp( env, 16 );
3766             *rHi = tHi;
3767             *rLo = tLo;
3768             return;
3769          }
3770          case Iop_D128toI64S: {
3771             PPCFpOp fpop = Pfp_DCTFIXQ;
3772             HReg r_srcHi = newVRegF(env);
3773             HReg r_srcLo = newVRegF(env);
3774             HReg tLo     = newVRegI(env);
3775             HReg tHi     = newVRegI(env);
3776             HReg ftmp    = newVRegF(env);
3777             PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
3778             PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
3779
3780             set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
3781             iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
3782                            IEndianess);
3783             addInstr(env, PPCInstr_DfpD128toD64(fpop, ftmp, r_srcHi, r_srcLo));
3784
3785             // put the D64 result into an integer register pair
3786             sub_from_sp( env, 16 );
3787             addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
3788             addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3789             addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3790             add_to_sp( env, 16 );
3791             *rHi = tHi;
3792             *rLo = tLo;
3793             return;
3794          }
3795          default:
3796             break;
3797       }
3798    } /* if (e->tag == Iex_Binop) */
3799
3800
3801    /* --------- UNARY ops --------- */
3802    if (e->tag == Iex_Unop) {
3803       switch (e->Iex.Unop.op) {
3804
3805       /* CmpwNEZ64(e) */
3806       case Iop_CmpwNEZ64: {
3807          HReg argHi, argLo;
3808          HReg tmp1  = newVRegI(env);
3809          HReg tmp2  = newVRegI(env);
3810          iselInt64Expr(&argHi, &argLo, env, e->Iex.Unop.arg, IEndianess);
3811          /* tmp1 = argHi | argLo */
3812          addInstr(env, PPCInstr_Alu(Palu_OR, tmp1, argHi, PPCRH_Reg(argLo)));
3813          /* tmp2 = (tmp1 | -tmp1) >>s 31 */
3814          addInstr(env, PPCInstr_Unary(Pun_NEG,tmp2,tmp1));
3815          addInstr(env, PPCInstr_Alu(Palu_OR, tmp2, tmp2, PPCRH_Reg(tmp1)));
3816          addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
3817                                      tmp2, tmp2, PPCRH_Imm(False, 31)));
3818          *rHi = tmp2;
3819          *rLo = tmp2; /* yes, really tmp2 */
3820          return;
3821       }
3822
3823       /* Left64 */
3824       case Iop_Left64: {
3825          HReg argHi, argLo;
3826          HReg zero32 = newVRegI(env);
3827          HReg resHi  = newVRegI(env);
3828          HReg resLo  = newVRegI(env);
3829          iselInt64Expr(&argHi, &argLo, env, e->Iex.Unop.arg, IEndianess);
3830          vassert(env->mode64 == False);
3831          addInstr(env, PPCInstr_LI(zero32, 0, env->mode64));
3832          /* resHi:resLo = - argHi:argLo */
3833          addInstr(env, PPCInstr_AddSubC( False/*sub*/, True/*set carry*/,
3834                                          resLo, zero32, argLo ));
3835          addInstr(env, PPCInstr_AddSubC( False/*sub*/, False/*read carry*/,
3836                                          resHi, zero32, argHi ));
3837          /* resHi:resLo |= srcHi:srcLo */
3838          addInstr(env, PPCInstr_Alu(Palu_OR, resLo, resLo, PPCRH_Reg(argLo)));
3839          addInstr(env, PPCInstr_Alu(Palu_OR, resHi, resHi, PPCRH_Reg(argHi)));
3840          *rHi = resHi;
3841          *rLo = resLo;
3842          return;
3843       }
3844
3845       /* 32Sto64(e) */
3846       case Iop_32Sto64: {
3847          HReg tHi = newVRegI(env);
3848          HReg src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3849          addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
3850                                      tHi, src, PPCRH_Imm(False,31)));
3851          *rHi = tHi;
3852          *rLo = src;
3853          return;
3854       }
3855       case Iop_ExtractExpD64: {
3856          HReg tmp    = newVRegF(env);
3857          HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
3858          HReg      tLo     = newVRegI(env);
3859          HReg      tHi     = newVRegI(env);
3860          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
3861          PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
3862
3863          addInstr(env, PPCInstr_Dfp64Unary(Pfp_DXEX, tmp, fr_src));
3864
3865          // put the D64 result into a integer register pair
3866          sub_from_sp( env, 16 );
3867          addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
3868          addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3869          addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3870          add_to_sp( env, 16 );
3871          *rHi = tHi;
3872          *rLo = tLo;
3873          return;
3874       }
3875       case Iop_ExtractExpD128: {
3876          HReg      r_srcHi;
3877          HReg      r_srcLo;
3878          HReg      tmp     = newVRegF(env);
3879          HReg      tLo     = newVRegI(env);
3880          HReg      tHi     = newVRegI(env);
3881          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
3882          PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
3883
3884          iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Unop.arg, IEndianess);
3885          addInstr(env, PPCInstr_ExtractExpD128(Pfp_DXEXQ, tmp,
3886                                                   r_srcHi, r_srcLo));
3887
3888          // put the D64 result into a integer register pair
3889          sub_from_sp( env, 16 );
3890          addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
3891          addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3892          addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3893          add_to_sp( env, 16 );
3894          *rHi = tHi;
3895          *rLo = tLo;
3896          return;
3897       }
3898
3899       /* 32Uto64(e) */
3900       case Iop_32Uto64: {
3901          HReg tHi = newVRegI(env);
3902          HReg tLo = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3903          addInstr(env, PPCInstr_LI(tHi, 0, False/*mode32*/));
3904          *rHi = tHi;
3905          *rLo = tLo;
3906          return;
3907       }
3908
3909       case Iop_128to64: {
3910          /* Narrow, return the low 64-bit half as a 32-bit
3911           * register pair */
3912          HReg r_Hi    = INVALID_HREG;
3913          HReg r_MedHi = INVALID_HREG;
3914          HReg r_MedLo = INVALID_HREG;
3915          HReg r_Lo    = INVALID_HREG;
3916
3917          iselInt128Expr_to_32x4(&r_Hi, &r_MedHi, &r_MedLo, &r_Lo,
3918                                 env, e->Iex.Unop.arg, IEndianess);
3919          *rHi = r_MedLo;
3920          *rLo = r_Lo;
3921          return;
3922       }
3923
3924       case Iop_128HIto64: {
3925          /* Narrow, return the high 64-bit half as a 32-bit
3926           *  register pair */
3927          HReg r_Hi    = INVALID_HREG;
3928          HReg r_MedHi = INVALID_HREG;
3929          HReg r_MedLo = INVALID_HREG;
3930          HReg r_Lo    = INVALID_HREG;
3931
3932          iselInt128Expr_to_32x4(&r_Hi, &r_MedHi, &r_MedLo, &r_Lo,
3933                                 env, e->Iex.Unop.arg, IEndianess);
3934          *rHi = r_Hi;
3935          *rLo = r_MedHi;
3936          return;
3937       }
3938
3939       /* V128{HI}to64 */
3940       case Iop_V128HIto64:
3941       case Iop_V128to64: {
3942          HReg r_aligned16;
3943          Int  off = e->Iex.Unop.op==Iop_V128HIto64 ? 0 : 8;
3944          HReg tLo = newVRegI(env);
3945          HReg tHi = newVRegI(env);
3946          HReg vec = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
3947          PPCAMode *am_off0, *am_offLO, *am_offHI;
3948          sub_from_sp( env, 32 );     // Move SP down 32 bytes
3949
3950          // get a quadword aligned address within our stack space
3951          r_aligned16 = get_sp_aligned16( env );
3952          am_off0  = PPCAMode_IR( 0,     r_aligned16 );
3953          am_offHI = PPCAMode_IR( off,   r_aligned16 );
3954          am_offLO = PPCAMode_IR( off+4, r_aligned16 );
3955
3956          // store as Vec128
3957          addInstr(env,
3958                   PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
3959
3960          // load hi,lo words (of hi/lo half of vec) as Ity_I32's
3961          addInstr(env,
3962                   PPCInstr_Load( 4, tHi, am_offHI, False/*mode32*/ ));
3963          addInstr(env,
3964                   PPCInstr_Load( 4, tLo, am_offLO, False/*mode32*/ ));
3965
3966          add_to_sp( env, 32 );       // Reset SP
3967          *rHi = tHi;
3968          *rLo = tLo;
3969          return;
3970       }
3971
3972       /* could do better than this, but for now ... */
3973       case Iop_1Sto64: {
3974          HReg tLo = newVRegI(env);
3975          HReg tHi = newVRegI(env);
3976          PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
3977          addInstr(env, PPCInstr_Set(cond,tLo));
3978          addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
3979                                      tLo, tLo, PPCRH_Imm(False,31)));
3980          addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
3981                                      tLo, tLo, PPCRH_Imm(False,31)));
3982          addInstr(env, mk_iMOVds_RR(tHi, tLo));
3983          *rHi = tHi;
3984          *rLo = tLo;
3985          return;
3986       }
3987
3988       case Iop_Not64: {
3989          HReg xLo, xHi;
3990          HReg tmpLo = newVRegI(env);
3991          HReg tmpHi = newVRegI(env);
3992          iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg, IEndianess);
3993          addInstr(env, PPCInstr_Unary(Pun_NOT,tmpLo,xLo));
3994          addInstr(env, PPCInstr_Unary(Pun_NOT,tmpHi,xHi));
3995          *rHi = tmpHi;
3996          *rLo = tmpLo;
3997          return;
3998       }
3999
4000       /* ReinterpF64asI64(e) */
4001       /* Given an IEEE754 double, produce an I64 with the same bit
4002          pattern. */
4003       case Iop_ReinterpF64asI64: {
4004          PPCAMode *am_addr0, *am_addr1;
4005          HReg fr_src  = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
4006          HReg r_dstLo = newVRegI(env);
4007          HReg r_dstHi = newVRegI(env);
4008
4009          sub_from_sp( env, 16 );     // Move SP down 16 bytes
4010          am_addr0 = PPCAMode_IR( 0, StackFramePtr(False/*mode32*/) );
4011          am_addr1 = PPCAMode_IR( 4, StackFramePtr(False/*mode32*/) );
4012
4013          // store as F64
4014          addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
4015                                         fr_src, am_addr0 ));
4016
4017          // load hi,lo as Ity_I32's
4018          addInstr(env, PPCInstr_Load( 4, r_dstHi,
4019                                       am_addr0, False/*mode32*/ ));
4020          addInstr(env, PPCInstr_Load( 4, r_dstLo,
4021                                       am_addr1, False/*mode32*/ ));
4022          *rHi = r_dstHi;
4023          *rLo = r_dstLo;
4024
4025          add_to_sp( env, 16 );       // Reset SP
4026          return;
4027       }
4028
4029       case Iop_ReinterpD64asI64: {
4030          HReg fr_src  = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
4031          PPCAMode *am_addr0, *am_addr1;
4032          HReg r_dstLo = newVRegI(env);
4033          HReg r_dstHi = newVRegI(env);
4034
4035
4036          sub_from_sp( env, 16 );     // Move SP down 16 bytes
4037          am_addr0 = PPCAMode_IR( 0, StackFramePtr(False/*mode32*/) );
4038          am_addr1 = PPCAMode_IR( 4, StackFramePtr(False/*mode32*/) );
4039
4040          // store as D64
4041          addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
4042                                         fr_src, am_addr0 ));
4043
4044          // load hi,lo as Ity_I32's
4045          addInstr(env, PPCInstr_Load( 4, r_dstHi,
4046                                       am_addr0, False/*mode32*/ ));
4047          addInstr(env, PPCInstr_Load( 4, r_dstLo,
4048                                       am_addr1, False/*mode32*/ ));
4049          *rHi = r_dstHi;
4050          *rLo = r_dstLo;
4051
4052          add_to_sp( env, 16 );       // Reset SP
4053
4054          return;
4055       }
4056
4057       case Iop_BCDtoDPB: {
4058          PPCCondCode cc;
4059          UInt        argiregs;
4060          HReg        argregs[2];
4061          Int         argreg;
4062          HReg        tLo = newVRegI(env);
4063          HReg        tHi = newVRegI(env);
4064          HReg        tmpHi;
4065          HReg        tmpLo;
4066          Bool        mode64 = env->mode64;
4067
4068          argregs[0] = hregPPC_GPR3(mode64);
4069          argregs[1] = hregPPC_GPR4(mode64);
4070
4071          argiregs = 0;
4072          argreg = 0;
4073
4074          iselInt64Expr( &tmpHi, &tmpLo, env, e->Iex.Unop.arg, IEndianess );
4075
4076          argiregs |= ( 1 << (argreg+3 ) );
4077          addInstr( env, mk_iMOVds_RR( argregs[argreg++], tmpHi ) );
4078
4079          argiregs |= ( 1 << (argreg+3 ) );
4080          addInstr( env, mk_iMOVds_RR( argregs[argreg], tmpLo ) );
4081
4082          cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
4083
4084          if (IEndianess == Iend_LE) {
4085              addInstr( env, PPCInstr_Call( cc, (Addr)h_calc_BCDtoDPB,
4086                                            argiregs,
4087                                            mk_RetLoc_simple(RLPri_2Int) ) );
4088          } else {
4089              Addr64 target;
4090              target = mode64 ? (Addr)h_calc_BCDtoDPB :
4091                toUInt( (Addr)h_calc_BCDtoDPB );
4092              addInstr( env, PPCInstr_Call( cc, target,
4093                                            argiregs,
4094                                            mk_RetLoc_simple(RLPri_2Int) ) );
4095          }
4096
4097          addInstr( env, mk_iMOVds_RR( tHi, argregs[argreg-1] ) );
4098          addInstr( env, mk_iMOVds_RR( tLo, argregs[argreg] ) );
4099
4100          *rHi = tHi;
4101          *rLo = tLo;
4102          return;
4103       }
4104
4105       case Iop_DPBtoBCD: {
4106          PPCCondCode cc;
4107          UInt        argiregs;
4108          HReg        argregs[2];
4109          Int         argreg;
4110          HReg        tLo = newVRegI(env);
4111          HReg        tHi = newVRegI(env);
4112          HReg        tmpHi;
4113          HReg        tmpLo;
4114          Bool        mode64 = env->mode64;
4115
4116          argregs[0] = hregPPC_GPR3(mode64);
4117          argregs[1] = hregPPC_GPR4(mode64);
4118
4119          argiregs = 0;
4120          argreg = 0;
4121
4122          iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg, IEndianess);
4123
4124          argiregs |= (1 << (argreg+3));
4125          addInstr(env, mk_iMOVds_RR( argregs[argreg++], tmpHi ));
4126
4127          argiregs |= (1 << (argreg+3));
4128          addInstr(env, mk_iMOVds_RR( argregs[argreg], tmpLo));
4129
4130          cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
4131
4132          if (IEndianess == Iend_LE) {
4133              addInstr(env, PPCInstr_Call( cc, (Addr)h_calc_DPBtoBCD,
4134                                           argiregs,
4135                                           mk_RetLoc_simple(RLPri_2Int) ) );
4136          } else {
4137              Addr64 target;
4138              target = mode64 ? (Addr)h_calc_DPBtoBCD :
4139                toUInt( (Addr)h_calc_DPBtoBCD );
4140              addInstr(env, PPCInstr_Call( cc, target, argiregs,
4141                                           mk_RetLoc_simple(RLPri_2Int) ) );
4142          }
4143
4144          addInstr(env, mk_iMOVds_RR(tHi, argregs[argreg-1]));
4145          addInstr(env, mk_iMOVds_RR(tLo, argregs[argreg]));
4146
4147          *rHi = tHi;
4148          *rLo = tLo;
4149          return;
4150       }
4151
4152       default:
4153          break;
4154       }
4155    } /* if (e->tag == Iex_Unop) */
4156
4157    vex_printf("iselInt64Expr(ppc): No such tag(%u)\n", e->tag);
4158    ppIRExpr(e);
4159    vpanic("iselInt64Expr(ppc)");
4160 }
4161
4162
4163 /*---------------------------------------------------------*/
4164 /*--- ISEL: Floating point expressions (32 bit)         ---*/
4165 /*---------------------------------------------------------*/
4166
4167 /* Nothing interesting here; really just wrappers for
4168    64-bit stuff. */
4169
4170 static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e, IREndness IEndianess )
4171 {
4172   HReg r = iselFltExpr_wrk( env, e, IEndianess );
4173 #  if 0
4174    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4175 #  endif
4176    vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
4177    vassert(hregIsVirtual(r));
4178    return r;
4179 }
4180
4181 /* DO NOT CALL THIS DIRECTLY */
4182 static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e,
4183                               IREndness IEndianess )
4184 {
4185    Bool        mode64 = env->mode64;
4186
4187    IRType ty = typeOfIRExpr(env->type_env,e);
4188    vassert(ty == Ity_F32);
4189
4190    if (e->tag == Iex_RdTmp) {
4191       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
4192    }
4193
4194    if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
4195       PPCAMode* am_addr;
4196       HReg r_dst = newVRegF(env);
4197       vassert(e->Iex.Load.ty == Ity_F32);
4198       am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_F32/*xfer*/,
4199                                    IEndianess);
4200       addInstr(env, PPCInstr_FpLdSt(True/*load*/, 4, r_dst, am_addr));
4201       return r_dst;
4202    }
4203
4204    if (e->tag == Iex_Get) {
4205       HReg r_dst = newVRegF(env);
4206       PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
4207                                        GuestStatePtr(env->mode64) );
4208       addInstr(env, PPCInstr_FpLdSt( True/*load*/, 4, r_dst, am_addr ));
4209       return r_dst;
4210    }
4211
4212    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_TruncF64asF32) {
4213       /* This is quite subtle.  The only way to do the relevant
4214          truncation is to do a single-precision store and then a
4215          double precision load to get it back into a register.  The
4216          problem is, if the data is then written to memory a second
4217          time, as in
4218
4219             STbe(...) = TruncF64asF32(...)
4220
4221          then will the second truncation further alter the value?  The
4222          answer is no: flds (as generated here) followed by fsts
4223          (generated for the STbe) is the identity function on 32-bit
4224          floats, so we are safe.
4225
4226          Another upshot of this is that if iselStmt can see the
4227          entirety of
4228
4229             STbe(...) = TruncF64asF32(arg)
4230
4231          then it can short circuit having to deal with TruncF64asF32
4232          individually; instead just compute arg into a 64-bit FP
4233          register and do 'fsts' (since that itself does the
4234          truncation).
4235
4236          We generate pretty poor code here (should be ok both for
4237          32-bit and 64-bit mode); but it is expected that for the most
4238          part the latter optimisation will apply and hence this code
4239          will not often be used.
4240       */
4241       HReg      fsrc    = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
4242       HReg      fdst    = newVRegF(env);
4243       PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4244
4245       sub_from_sp( env, 16 );
4246       // store as F32, hence truncating
4247       addInstr(env, PPCInstr_FpLdSt( False/*store*/, 4,
4248                                      fsrc, zero_r1 ));
4249       // and reload.  Good huh?! (sigh)
4250       addInstr(env, PPCInstr_FpLdSt( True/*load*/, 4,
4251                                      fdst, zero_r1 ));
4252       add_to_sp( env, 16 );
4253       return fdst;
4254    }
4255
4256    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64UtoF32) {
4257       if (mode64) {
4258          HReg fdst = newVRegF(env);
4259          HReg isrc = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
4260          HReg r1   = StackFramePtr(env->mode64);
4261          PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
4262
4263          /* Set host rounding mode */
4264          set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4265
4266          sub_from_sp( env, 16 );
4267
4268          addInstr(env, PPCInstr_Store(8, zero_r1, isrc, True/*mode64*/));
4269          addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
4270          addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
4271                                        False, False,
4272                                        fdst, fdst));
4273
4274          add_to_sp( env, 16 );
4275
4276          ///* Restore default FPU rounding. */
4277          //set_FPU_rounding_default( env );
4278          return fdst;
4279       } else {
4280          /* 32-bit mode */
4281          HReg fdst = newVRegF(env);
4282          HReg isrcHi, isrcLo;
4283          HReg r1   = StackFramePtr(env->mode64);
4284          PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
4285          PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
4286
4287          iselInt64Expr(&isrcHi, &isrcLo, env, e->Iex.Binop.arg2, IEndianess);
4288
4289          /* Set host rounding mode */
4290          set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4291
4292          sub_from_sp( env, 16 );
4293
4294          addInstr(env, PPCInstr_Store(4, zero_r1, isrcHi, False/*mode32*/));
4295          addInstr(env, PPCInstr_Store(4, four_r1, isrcLo, False/*mode32*/));
4296          addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
4297          addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
4298                                        False, False,
4299                                        fdst, fdst));
4300
4301          add_to_sp( env, 16 );
4302
4303          ///* Restore default FPU rounding. */
4304          //set_FPU_rounding_default( env );
4305          return fdst;
4306       }
4307
4308    }
4309
4310    vex_printf("iselFltExpr(ppc): No such tag(%u)\n", e->tag);
4311    ppIRExpr(e);
4312    vpanic("iselFltExpr_wrk(ppc)");
4313 }
4314
4315
4316 /*---------------------------------------------------------*/
4317 /*--- ISEL: Floating point expressions (64 bit)         ---*/
4318 /*---------------------------------------------------------*/
4319
4320 /* Compute a 64-bit floating point value into a register, the identity
4321    of which is returned.  As with iselIntExpr_R, the reg may be either
4322    real or virtual; in any case it must not be changed by subsequent
4323    code emitted by the caller.  */
4324
4325 /* IEEE 754 formats.  From http://www.freesoft.org/CIE/RFC/1832/32.htm:
4326
4327     Type                  S (1 bit)   E (11 bits)   F (52 bits)
4328     ----                  ---------   -----------   -----------
4329     signalling NaN        u           2047 (max)    .0uuuuu---u
4330                                                     (with at least
4331                                                      one 1 bit)
4332     quiet NaN             u           2047 (max)    .1uuuuu---u
4333
4334     negative infinity     1           2047 (max)    .000000---0
4335
4336     positive infinity     0           2047 (max)    .000000---0
4337
4338     negative zero         1           0             .000000---0
4339
4340     positive zero         0           0             .000000---0
4341 */
4342
4343 static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e, IREndness IEndianess )
4344 {
4345    HReg r = iselDblExpr_wrk( env, e, IEndianess );
4346 #  if 0
4347    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4348 #  endif
4349    vassert(hregClass(r) == HRcFlt64);
4350    vassert(hregIsVirtual(r));
4351    return r;
4352 }
4353
4354 /* DO NOT CALL THIS DIRECTLY */
4355 static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e,
4356                               IREndness IEndianess )
4357 {
4358    Bool mode64 = env->mode64;
4359    IRType ty = typeOfIRExpr(env->type_env,e);
4360    vassert(e);
4361    vassert(ty == Ity_F64);
4362
4363    if (e->tag == Iex_RdTmp) {
4364       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
4365    }
4366
4367    /* --------- LITERAL --------- */
4368    if (e->tag == Iex_Const) {
4369       union { UInt u32x2[2]; ULong u64; Double f64; } u;
4370       vassert(sizeof(u) == 8);
4371       vassert(sizeof(u.u64) == 8);
4372       vassert(sizeof(u.f64) == 8);
4373       vassert(sizeof(u.u32x2) == 8);
4374
4375       if (e->Iex.Const.con->tag == Ico_F64) {
4376          u.f64 = e->Iex.Const.con->Ico.F64;
4377       }
4378       else if (e->Iex.Const.con->tag == Ico_F64i) {
4379          u.u64 = e->Iex.Const.con->Ico.F64i;
4380       }
4381       else
4382          vpanic("iselDblExpr(ppc): const");
4383
4384       if (!mode64) {
4385          HReg r_srcHi = newVRegI(env);
4386          HReg r_srcLo = newVRegI(env);
4387          addInstr(env, PPCInstr_LI(r_srcHi, u.u32x2[0], mode64));
4388          addInstr(env, PPCInstr_LI(r_srcLo, u.u32x2[1], mode64));
4389          return mk_LoadRR32toFPR( env, r_srcHi, r_srcLo );
4390       } else { // mode64
4391          HReg r_src = newVRegI(env);
4392          addInstr(env, PPCInstr_LI(r_src, u.u64, mode64));
4393          return mk_LoadR64toFPR( env, r_src );         // 1*I64 -> F64
4394       }
4395    }
4396
4397    /* --------- LOAD --------- */
4398    if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
4399       HReg r_dst = newVRegF(env);
4400       PPCAMode* am_addr;
4401       vassert(e->Iex.Load.ty == Ity_F64);
4402       am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_F64/*xfer*/,
4403                                    IEndianess);
4404       addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dst, am_addr));
4405       return r_dst;
4406    }
4407
4408    /* --------- GET --------- */
4409    if (e->tag == Iex_Get) {
4410       HReg r_dst = newVRegF(env);
4411       PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
4412                                        GuestStatePtr(mode64) );
4413       addInstr(env, PPCInstr_FpLdSt( True/*load*/, 8, r_dst, am_addr ));
4414       return r_dst;
4415    }
4416
4417    /* --------- OPS --------- */
4418    if (e->tag == Iex_Qop) {
4419       PPCFpOp fpop = Pfp_INVALID;
4420       switch (e->Iex.Qop.details->op) {
4421          case Iop_MAddF64:    fpop = Pfp_MADDD; break;
4422          case Iop_MAddF64r32: fpop = Pfp_MADDS; break;
4423          case Iop_MSubF64:    fpop = Pfp_MSUBD; break;
4424          case Iop_MSubF64r32: fpop = Pfp_MSUBS; break;
4425          default: break;
4426       }
4427       if (fpop != Pfp_INVALID) {
4428          HReg r_dst  = newVRegF(env);
4429          HReg r_srcML  = iselDblExpr(env, e->Iex.Qop.details->arg2,
4430                                      IEndianess);
4431          HReg r_srcMR  = iselDblExpr(env, e->Iex.Qop.details->arg3,
4432                                      IEndianess);
4433          HReg r_srcAcc = iselDblExpr(env, e->Iex.Qop.details->arg4,
4434                                      IEndianess);
4435          set_FPU_rounding_mode( env, e->Iex.Qop.details->arg1, IEndianess );
4436          addInstr(env, PPCInstr_FpMulAcc(fpop, r_dst,
4437                                                r_srcML, r_srcMR, r_srcAcc));
4438          return r_dst;
4439       }
4440    }
4441
4442    if (e->tag == Iex_Triop) {
4443       IRTriop *triop = e->Iex.Triop.details;
4444       PPCFpOp fpop = Pfp_INVALID;
4445       switch (triop->op) {
4446          case Iop_AddF64:    fpop = Pfp_ADDD; break;
4447          case Iop_SubF64:    fpop = Pfp_SUBD; break;
4448          case Iop_MulF64:    fpop = Pfp_MULD; break;
4449          case Iop_DivF64:    fpop = Pfp_DIVD; break;
4450          case Iop_AddF64r32: fpop = Pfp_ADDS; break;
4451          case Iop_SubF64r32: fpop = Pfp_SUBS; break;
4452          case Iop_MulF64r32: fpop = Pfp_MULS; break;
4453          case Iop_DivF64r32: fpop = Pfp_DIVS; break;
4454          default: break;
4455       }
4456       if (fpop != Pfp_INVALID) {
4457          HReg r_dst  = newVRegF(env);
4458          HReg r_srcL = iselDblExpr(env, triop->arg2, IEndianess);
4459          HReg r_srcR = iselDblExpr(env, triop->arg3, IEndianess);
4460          set_FPU_rounding_mode( env, triop->arg1, IEndianess );
4461          addInstr(env, PPCInstr_FpBinary(fpop, r_dst, r_srcL, r_srcR));
4462          return r_dst;
4463       }
4464    }
4465
4466    if (e->tag == Iex_Binop) {
4467       PPCFpOp fpop = Pfp_INVALID;
4468       switch (e->Iex.Binop.op) {
4469       case Iop_SqrtF64:   fpop = Pfp_SQRT;   break;
4470       default: break;
4471       }
4472       if (fpop == Pfp_SQRT) {
4473          HReg fr_dst = newVRegF(env);
4474          HReg fr_src = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
4475          set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4476          addInstr(env, PPCInstr_FpUnary(fpop, fr_dst, fr_src));
4477          return fr_dst;
4478       }
4479    }
4480
4481    if (e->tag == Iex_Binop) {
4482
4483       if (e->Iex.Binop.op == Iop_F128toF64) {
4484          HReg fr_dst = newVRegF(env);
4485          HReg fr_src = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
4486          HReg tmp = newVRegV(env);
4487          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4488          PPCAMode* eight_r1 = PPCAMode_IR( 8, StackFramePtr(env->mode64) );
4489          PPCFpOp fpop = Pfp_INVALID;
4490
4491          if (FPU_rounding_mode_isOdd(e->Iex.Binop.arg1)) {
4492             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4493             fpop = Pfp_FPQTODRNDODD;
4494          } else {
4495             set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4496             fpop = Pfp_FPQTOD;
4497          }
4498
4499          addInstr(env, PPCInstr_Fp128Unary(fpop, tmp, fr_src));
4500
4501          /* result is in a 128-bit vector register, move to 64-bit reg to
4502           * match the Iop specification.  The result will get moved back
4503           * to a 128-bit register and stored once the value is returned.
4504           */
4505          sub_from_sp( env, 16 );
4506          addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, tmp, zero_r1));
4507          if (IEndianess == Iend_LE)
4508             addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, eight_r1));
4509          else
4510             /* High 64-bits stored at lower address */
4511             addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, zero_r1));
4512
4513          add_to_sp( env, 16 );
4514
4515          return fr_dst;
4516       }
4517
4518       if (e->Iex.Binop.op == Iop_RoundF64toF32) {
4519          HReg r_dst = newVRegF(env);
4520          HReg r_src = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
4521          set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4522          addInstr(env, PPCInstr_FpRSP(r_dst, r_src));
4523          //set_FPU_rounding_default( env );
4524          return r_dst;
4525       }
4526
4527       if (e->Iex.Binop.op == Iop_I64StoF64 || e->Iex.Binop.op == Iop_I64UtoF64) {
4528          if (mode64) {
4529             HReg fdst = newVRegF(env);
4530             HReg isrc = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
4531             HReg r1   = StackFramePtr(env->mode64);
4532             PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
4533
4534             /* Set host rounding mode */
4535             set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4536
4537             sub_from_sp( env, 16 );
4538
4539             addInstr(env, PPCInstr_Store(8, zero_r1, isrc, True/*mode64*/));
4540             addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
4541             addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
4542                                           e->Iex.Binop.op == Iop_I64StoF64,
4543                                           True/*fdst is 64 bit*/,
4544                                           fdst, fdst));
4545
4546             add_to_sp( env, 16 );
4547
4548             ///* Restore default FPU rounding. */
4549             //set_FPU_rounding_default( env );
4550             return fdst;
4551          } else {
4552             /* 32-bit mode */
4553             HReg fdst = newVRegF(env);
4554             HReg isrcHi, isrcLo;
4555             HReg r1   = StackFramePtr(env->mode64);
4556             PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
4557             PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
4558
4559             iselInt64Expr(&isrcHi, &isrcLo, env, e->Iex.Binop.arg2,
4560                           IEndianess);
4561
4562             /* Set host rounding mode */
4563             set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4564
4565             sub_from_sp( env, 16 );
4566
4567             addInstr(env, PPCInstr_Store(4, zero_r1, isrcHi, False/*mode32*/));
4568             addInstr(env, PPCInstr_Store(4, four_r1, isrcLo, False/*mode32*/));
4569             addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
4570             addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
4571                                           e->Iex.Binop.op == Iop_I64StoF64,
4572                                           True/*fdst is 64 bit*/,
4573                                           fdst, fdst));
4574
4575             add_to_sp( env, 16 );
4576
4577             ///* Restore default FPU rounding. */
4578             //set_FPU_rounding_default( env );
4579             return fdst;
4580          }
4581       }
4582
4583    }
4584
4585    if (e->tag == Iex_Unop) {
4586       PPCFpOp fpop = Pfp_INVALID;
4587       switch (e->Iex.Unop.op) {
4588          case Iop_NegF64:     fpop = Pfp_NEG; break;
4589          case Iop_AbsF64:     fpop = Pfp_ABS; break;
4590          case Iop_RSqrtEst5GoodF64:      fpop = Pfp_RSQRTE; break;
4591          case Iop_RoundF64toF64_NegINF:  fpop = Pfp_FRIM; break;
4592          case Iop_RoundF64toF64_PosINF:  fpop = Pfp_FRIP; break;
4593          case Iop_RoundF64toF64_NEAREST: fpop = Pfp_FRIN; break;
4594          case Iop_RoundF64toF64_ZERO:    fpop = Pfp_FRIZ; break;
4595          default: break;
4596       }
4597       if (fpop != Pfp_INVALID) {
4598          HReg fr_dst = newVRegF(env);
4599          HReg fr_src = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
4600          addInstr(env, PPCInstr_FpUnary(fpop, fr_dst, fr_src));
4601          return fr_dst;
4602       }
4603    }
4604
4605    if (e->tag == Iex_Unop) {
4606       switch (e->Iex.Unop.op) {
4607       case Iop_F128HItoF64:
4608       case Iop_F128LOtoF64:
4609          {
4610             /* put upper/lower 64-bits of F128 into an F64. */
4611             HReg     r_aligned16;
4612             HReg     fdst = newVRegF(env);
4613             HReg     fsrc = iselFp128Expr(env, e->Iex.Unop.arg, IEndianess);
4614             PPCAMode *am_off0, *am_off8, *am_off_arg;
4615             sub_from_sp( env, 32 );     // Move SP down 32 bytes
4616
4617             // get a quadword aligned address within our stack space
4618             r_aligned16 = get_sp_aligned16( env );
4619             am_off0 = PPCAMode_IR( 0, r_aligned16 );
4620             am_off8 = PPCAMode_IR( 8 ,r_aligned16 );
4621
4622             /* store 128-bit floating point value to memory, load low word
4623              * or high to 64-bit destination floating point register
4624              */
4625             addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, fsrc, am_off0));
4626             if (IEndianess == Iend_LE) {
4627                if (e->Iex.Binop.op == Iop_F128HItoF64)
4628                   am_off_arg = am_off8;
4629                else
4630                   am_off_arg = am_off0;
4631             } else {
4632                if (e->Iex.Binop.op == Iop_F128HItoF64)
4633                   am_off_arg = am_off0;
4634                else
4635                   am_off_arg = am_off8;
4636             }
4637             addInstr(env,
4638                     PPCInstr_FpLdSt( True /*load*/,
4639                                       8, fdst,
4640                                       am_off_arg ));
4641             add_to_sp( env, 32 );       // Reset SP
4642             return fdst;
4643          }
4644          case Iop_ReinterpI64asF64: {
4645             /* Given an I64, produce an IEEE754 double with the same
4646                bit pattern. */
4647             if (!mode64) {
4648                HReg r_srcHi, r_srcLo;
4649                iselInt64Expr( &r_srcHi, &r_srcLo, env, e->Iex.Unop.arg,
4650                                IEndianess);
4651                return mk_LoadRR32toFPR( env, r_srcHi, r_srcLo );
4652             } else {
4653                HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
4654                return mk_LoadR64toFPR( env, r_src );
4655             }
4656          }
4657
4658          case Iop_F32toF64: {
4659             if (e->Iex.Unop.arg->tag == Iex_Unop &&
4660                      e->Iex.Unop.arg->Iex.Unop.op == Iop_ReinterpI32asF32 ) {
4661                e = e->Iex.Unop.arg;
4662
4663                HReg src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
4664                HReg fr_dst = newVRegF(env);
4665                PPCAMode *am_addr;
4666
4667                sub_from_sp( env, 16 );        // Move SP down 16 bytes
4668                am_addr = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4669
4670                // store src as Ity_I32's
4671                addInstr(env, PPCInstr_Store( 4, am_addr, src, env->mode64 ));
4672
4673                // load single precision float, but the end results loads into a
4674                // 64-bit FP register -- i.e., F64.
4675                addInstr(env, PPCInstr_FpLdSt(True/*load*/, 4, fr_dst, am_addr));
4676
4677                add_to_sp( env, 16 );          // Reset SP
4678                return fr_dst;
4679             }
4680
4681
4682             /* this is a no-op */
4683             HReg res = iselFltExpr(env, e->Iex.Unop.arg, IEndianess);
4684             return res;
4685          }
4686          default:
4687             break;
4688       }
4689    }
4690
4691    /* --------- MULTIPLEX --------- */
4692    if (e->tag == Iex_ITE) { // VFD
4693       if (ty == Ity_F64
4694           && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
4695          HReg fr1    = iselDblExpr(env, e->Iex.ITE.iftrue, IEndianess);
4696          HReg fr0    = iselDblExpr(env, e->Iex.ITE.iffalse, IEndianess);
4697          HReg fr_dst = newVRegF(env);
4698          addInstr(env, PPCInstr_FpUnary( Pfp_MOV, fr_dst, fr0 ));
4699          PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
4700          addInstr(env, PPCInstr_FpCMov( cc, fr_dst, fr1 ));
4701          return fr_dst;
4702       }
4703    }
4704
4705    vex_printf("iselDblExpr(ppc): No such tag(%u)\n", e->tag);
4706    ppIRExpr(e);
4707    vpanic("iselDblExpr_wrk(ppc)");
4708 }
4709
4710 static HReg iselDfp32Expr(ISelEnv* env, const IRExpr* e, IREndness IEndianess)
4711 {
4712    HReg r = iselDfp32Expr_wrk( env, e, IEndianess );
4713    vassert(hregClass(r) == HRcFlt64);
4714    vassert( hregIsVirtual(r) );
4715    return r;
4716 }
4717
4718 /* DO NOT CALL THIS DIRECTLY */
4719 static HReg iselDfp32Expr_wrk(ISelEnv* env, const IRExpr* e,
4720                               IREndness IEndianess)
4721 {
4722    Bool mode64 = env->mode64;
4723    IRType ty = typeOfIRExpr( env->type_env, e );
4724
4725    vassert( e );
4726    vassert( ty == Ity_D32 );
4727
4728    /* --------- GET --------- */
4729    if (e->tag == Iex_Get) {
4730       HReg r_dst = newVRegF( env );
4731       PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
4732                                        GuestStatePtr(mode64) );
4733       addInstr( env, PPCInstr_FpLdSt( True/*load*/, 8, r_dst, am_addr ) );
4734       return r_dst;
4735    }
4736
4737    /* --------- LOAD --------- */
4738    if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
4739       PPCAMode* am_addr;
4740       HReg r_dst = newVRegF(env);
4741       vassert(e->Iex.Load.ty == Ity_D32);
4742       am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_D32/*xfer*/,
4743                                    IEndianess);
4744       addInstr(env, PPCInstr_FpLdSt(True/*load*/, 4, r_dst, am_addr));
4745       return r_dst;
4746    }
4747
4748    /* --------- OPS --------- */
4749    if (e->tag == Iex_Binop) {
4750       if (e->Iex.Binop.op == Iop_D64toD32) {
4751          HReg fr_dst = newVRegF(env);
4752          HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
4753          set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4754          addInstr(env, PPCInstr_Dfp64Unary(Pfp_DRSP, fr_dst, fr_src));
4755          return fr_dst;
4756       }
4757    }
4758
4759    ppIRExpr( e );
4760    vpanic( "iselDfp32Expr_wrk(ppc)" );
4761 }
4762
4763 static HReg iselFp128Expr( ISelEnv* env, const IRExpr* e, IREndness IEndianess )
4764 {
4765    HReg r = iselFp128Expr_wrk( env, e, IEndianess );
4766    vassert(hregClass(r) == HRcVec128);
4767    vassert(hregIsVirtual(r));
4768    return r;
4769 }
4770
4771 /* DO NOT CALL THIS DIRECTLY */
4772 static HReg iselFp128Expr_wrk( ISelEnv* env, const IRExpr* e,
4773                                IREndness IEndianess)
4774 {
4775    Bool mode64 = env->mode64;
4776    PPCFpOp fpop = Pfp_INVALID;
4777    IRType  ty = typeOfIRExpr(env->type_env,e);
4778
4779    vassert(e);
4780    vassert( ty == Ity_F128 );
4781
4782    /* read 128-bit IRTemp */
4783    if (e->tag == Iex_RdTmp) {
4784       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
4785    }
4786
4787   if (e->tag == Iex_Get) {
4788       /* Guest state vectors are 16byte aligned,
4789          so don't need to worry here */
4790       HReg dst = newVRegV(env);
4791
4792       addInstr(env,
4793                PPCInstr_AvLdSt( True/*load*/, 16, dst,
4794                                 PPCAMode_IR( e->Iex.Get.offset,
4795                                              GuestStatePtr(mode64) )));
4796       return dst;
4797    }
4798
4799    if (e->tag == Iex_Unop) {
4800       switch (e->Iex.Unop.op) {
4801       case Iop_TruncF128toI64S:
4802          fpop = Pfp_TRUNCFPQTOISD; goto do_Un_F128;
4803       case Iop_TruncF128toI32S:
4804          fpop = Pfp_TRUNCFPQTOISW; goto do_Un_F128;
4805       case Iop_TruncF128toI64U:
4806          fpop = Pfp_TRUNCFPQTOIUD; goto do_Un_F128;
4807       case Iop_TruncF128toI32U:
4808          fpop = Pfp_TRUNCFPQTOIUW; goto do_Un_F128;
4809       case Iop_TruncF128toI128U:
4810          fpop = Pfp_TRUNCFPQTOIUQ; goto do_Un_F128;
4811       case Iop_TruncF128toI128S:
4812          fpop = Pfp_TRUNCFPQTOISQ; goto do_Un_F128;
4813
4814       do_Un_F128: {
4815          HReg r_dst = newVRegV(env);
4816          HReg r_src = iselFp128Expr(env, e->Iex.Unop.arg, IEndianess);
4817          addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, r_src));
4818          return r_dst;
4819       }
4820
4821       case Iop_F64toF128: {
4822          fpop = Pfp_FPDTOQ;
4823          HReg r_dst = newVRegV(env);
4824          HReg r_src = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
4825          HReg v128tmp = newVRegV(env);
4826          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4827
4828          /* value is in 64-bit float reg, need to move to 128-bit vector reg */
4829          sub_from_sp( env, 16 );
4830          addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, r_src, zero_r1));
4831          addInstr(env, PPCInstr_AvLdSt(True/*load*/, 16, v128tmp, zero_r1));
4832          add_to_sp( env, 16 );
4833
4834          addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, v128tmp));
4835          return r_dst;
4836       }
4837
4838       case Iop_I64StoF128:
4839          fpop = Pfp_IDSTOQ; goto do_Un_int_F128;
4840       case Iop_I64UtoF128:
4841          fpop = Pfp_IDUTOQ; goto do_Un_int_F128;
4842
4843       do_Un_int_F128: {
4844          HReg r_dst = newVRegV(env);
4845          HReg tmp = newVRegV(env);
4846          HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
4847          PPCAMode *am_offhi, *am_offlo;
4848          HReg r_aligned16;
4849
4850          /* source is in a 64-bit integer reg, move to 128-bit float reg
4851           * do this via the stack (easy, convenient, etc).
4852           */
4853          sub_from_sp( env, 32 );        // Move SP down
4854
4855          /* Get a quadword aligned address within our stack space */
4856          r_aligned16 = get_sp_aligned16( env );
4857
4858          am_offlo  = PPCAMode_IR( 0,  r_aligned16 );
4859          am_offhi  = PPCAMode_IR( 8,  r_aligned16 );
4860
4861          /* Inst only uses the upper 64-bit of the source */
4862          addInstr(env, PPCInstr_Load(8, r_src, am_offhi, mode64));
4863
4864          /* Fetch result back from stack. */
4865          addInstr(env, PPCInstr_AvLdSt(True/*load*/, 16, tmp, am_offlo));
4866
4867          add_to_sp( env, 32 );          // Reset SP
4868
4869          addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, tmp));
4870          return r_dst;
4871       }
4872
4873       case Iop_ReinterpI128asF128:
4874       {
4875          PPCAMode* am_addr;
4876          PPCAMode* am_addr4;
4877          HReg rHi = INVALID_HREG;
4878          HReg rLo = INVALID_HREG;
4879          HReg dst  = newVRegV(env);
4880
4881          iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
4882
4883          sub_from_sp( env, 16 );     // Move SP down 16 bytes
4884          am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
4885          am_addr4 = advance4(env, am_addr);
4886
4887          // store the two 64-bit pars
4888          addInstr(env, PPCInstr_Store( 8, am_addr,  rHi, mode64 ));
4889          addInstr(env, PPCInstr_Store( 8, am_addr4, rLo, mode64 ));
4890
4891          // load as Ity_F128
4892          addInstr(env, PPCInstr_AvLdSt( True/*fetch*/, 16, dst, am_addr ));
4893
4894          add_to_sp( env, 16 );       // Reset SP
4895          return dst;
4896       }
4897
4898       default:
4899          break;
4900       } /* switch (e->Iex.Unop.op) */
4901    } /* if (e->tag == Iex_Unop) */
4902
4903    if (e->tag == Iex_Binop) {
4904       switch (e->Iex.Binop.op) {
4905
4906       case Iop_F64HLtoF128:
4907          {
4908             HReg dst    = newVRegV(env);
4909             HReg r_src_hi = iselDblExpr(env, e->Iex.Binop.arg1, IEndianess);
4910             HReg r_src_lo = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
4911             PPCAMode *am_offhi, *am_offlo;
4912             HReg r_aligned16;
4913
4914             /* do this via the stack (easy, convenient, etc) */
4915             sub_from_sp( env, 16 );        // Move SP down
4916
4917             /* Get a quadword aligned address within our stack space */
4918             r_aligned16 = get_sp_aligned16( env );
4919
4920             am_offlo  = PPCAMode_IR( 0,  r_aligned16 );
4921             am_offhi  = PPCAMode_IR( 8,  r_aligned16 );
4922
4923             addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8,
4924                                           r_src_lo, am_offlo));
4925             addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8,
4926                                           r_src_hi, am_offhi));
4927
4928             /* Fetch result back from stack. */
4929             addInstr(env, PPCInstr_AvLdSt(True/*load*/, 16,
4930                                           dst, am_offlo));
4931
4932             add_to_sp( env, 16 );          // Reset SP
4933             return dst;
4934          }
4935       case Iop_F128toI128S:
4936          {
4937             HReg dst    = newVRegV(env);
4938             HReg r_src  = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
4939             PPCRI* rm = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
4940             /* Note: rm is a set of three bit fields that specify the
4941              * rounding mode and which of the two instructions to issue.
4942              */
4943             addInstr(env, PPCInstr_AvBinaryInt(Pav_F128toI128S, dst,
4944                                                r_src, rm));
4945             return dst;
4946          }
4947       case Iop_RndF128:
4948          {
4949             HReg dst    = newVRegV(env);
4950             HReg r_src  = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
4951             PPCRI* rm = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
4952             /* Note: rm is a set of three bit fields that specify the
4953              * rounding mode and which of the two instructions to issue.
4954              */
4955             addInstr(env, PPCInstr_AvBinaryInt(Pav_ROUNDFPQ, dst,
4956                                                r_src, rm));
4957             return dst;
4958          }
4959       case Iop_SqrtF128:
4960          if (FPU_rounding_mode_isOdd(e->Iex.Binop.arg1)) {
4961             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4962             fpop = Pfp_FPSQRTQRNDODD;
4963             goto do_Bin_F128;
4964          } else {
4965             set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4966             fpop = Pfp_FPSQRTQ;
4967             goto do_Bin_F128;
4968          }
4969       case Iop_F128toF32:
4970          if (FPU_rounding_mode_isOdd(e->Iex.Binop.arg1)) {
4971             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4972             fpop = Pfp_FPQTOWRNDODD;
4973             goto do_Bin_F128;
4974          } else {
4975             set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4976             fpop = Pfp_FPQTOW;
4977             goto do_Bin_F128;
4978          }
4979       do_Bin_F128: {
4980          HReg r_dst = newVRegV(env);
4981          HReg r_src = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
4982          addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, r_src));
4983          return r_dst;
4984       }
4985
4986       case Iop_I128StoF128:
4987          fpop = Pfp_IQSTOQ; goto do_Un_I128_F128_DFP_conversions;
4988       case Iop_I128UtoF128:
4989          fpop = Pfp_IQUTOQ; goto do_Un_I128_F128_DFP_conversions;
4990       do_Un_I128_F128_DFP_conversions: {
4991          PPCAMode* am_addr;
4992          PPCAMode* am_addr4;
4993          HReg rHi, rLo;
4994          HReg r_tmp = newVRegV(env);
4995          HReg r_dst = newVRegV(env);
4996
4997          iselInt128Expr(&rHi,&rLo, env, e->Iex.Binop.arg2, IEndianess);
4998
4999          /* Set host rounding mode for the conversion instruction */
5000          set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5001
5002          sub_from_sp( env, 16 );
5003
5004          am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
5005          am_addr4 = advance4(env, am_addr);
5006
5007          // store the two 64-bit halfs of the I128
5008          addInstr(env, PPCInstr_Store( 8, am_addr,  rHi, mode64 ));
5009          addInstr(env, PPCInstr_Store( 8, am_addr4, rLo, mode64 ));
5010
5011          /* Fetch the I128 into an V128 register */
5012          addInstr(env, PPCInstr_AvLdSt( True/*fetch*/, 16, r_tmp, am_addr ));
5013          addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, r_tmp));
5014
5015          add_to_sp( env, 16 );       // Reset SP
5016
5017          return r_dst;
5018       }
5019
5020       default:
5021          break;
5022       } /* switch (e->Iex.Binop.op) */
5023    } /* if (e->tag == Iex_Binop) */
5024
5025    if (e->tag == Iex_Triop) {
5026       IRTriop *triop = e->Iex.Triop.details;
5027
5028       switch (triop->op) {
5029       case Iop_AddF128:
5030          if (FPU_rounding_mode_isOdd(triop->arg1)) {
5031             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5032             fpop = Pfp_FPADDQRNDODD; goto do_Tri_F128;
5033          } else {
5034             set_FPU_rounding_mode( env, triop->arg1, IEndianess );
5035             fpop = Pfp_FPADDQ; goto do_Tri_F128;
5036          }
5037       case Iop_SubF128:
5038          if (FPU_rounding_mode_isOdd(triop->arg1)) {
5039             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5040             fpop = Pfp_FPSUBQRNDODD; goto do_Tri_F128;
5041          } else {
5042             set_FPU_rounding_mode( env, triop->arg1, IEndianess );
5043             fpop = Pfp_FPSUBQ; goto do_Tri_F128;
5044          }
5045       case Iop_MulF128:
5046          if (FPU_rounding_mode_isOdd(triop->arg1)) {
5047             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5048             fpop = Pfp_FPMULQRNDODD; goto do_Tri_F128;
5049          } else {
5050             set_FPU_rounding_mode( env, triop->arg1, IEndianess );
5051             fpop = Pfp_FPMULQ; goto do_Tri_F128;
5052          }
5053       case Iop_DivF128:
5054          if (FPU_rounding_mode_isOdd(triop->arg1)) {
5055             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5056             fpop = Pfp_FPDIVQRNDODD; goto do_Tri_F128;
5057          } else {
5058             set_FPU_rounding_mode( env, triop->arg1, IEndianess );
5059             fpop = Pfp_FPDIVQ; goto do_Tri_F128;
5060          }
5061       case Iop_MAddF128:
5062          if (FPU_rounding_mode_isOdd(triop->arg1)) {
5063             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5064             fpop = Pfp_FPMULADDQRNDODD; goto do_Tri_F128;
5065          } else {
5066             set_FPU_rounding_mode( env, triop->arg1, IEndianess );
5067             fpop = Pfp_FPMULADDQ; goto do_Tri_F128;
5068          }
5069
5070    do_Tri_F128: {
5071          HReg r_dst  = newVRegV(env);
5072          HReg r_srcL = iselFp128Expr(env, triop->arg2, IEndianess);
5073          HReg r_srcR = iselFp128Expr(env, triop->arg3, IEndianess);
5074
5075          addInstr(env, PPCInstr_Fp128Binary(fpop, r_dst, r_srcL, r_srcR));
5076          return r_dst;
5077       }
5078
5079       default:
5080          break;
5081       } /* switch (e->Iex.Triop.op) */
5082
5083    } /* if (e->tag == Iex_Trinop) */
5084
5085    if (e->tag == Iex_Qop) {
5086       IRQop *qop = e->Iex.Qop.details;
5087
5088       switch (qop->op) {
5089       case Iop_MAddF128:
5090          if (FPU_rounding_mode_isOdd(qop->arg1)) {
5091             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5092             fpop = Pfp_FPMULADDQRNDODD; goto do_Quad_F128;
5093          } else {
5094             set_FPU_rounding_mode( env, qop->arg1, IEndianess );
5095             fpop = Pfp_FPMULADDQ; goto do_Quad_F128;
5096          }
5097       case Iop_MSubF128:
5098          if (FPU_rounding_mode_isOdd(qop->arg1)) {
5099             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5100             fpop = Pfp_FPMULSUBQRNDODD; goto do_Quad_F128;
5101          } else {
5102             set_FPU_rounding_mode( env, qop->arg1, IEndianess );
5103             fpop = Pfp_FPMULSUBQ; goto do_Quad_F128;
5104          }
5105       case Iop_NegMAddF128:
5106          if (FPU_rounding_mode_isOdd(qop->arg1)) {
5107             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5108             fpop = Pfp_FPNEGMULADDQRNDODD; goto do_Quad_F128;
5109          } else {
5110             set_FPU_rounding_mode( env, qop->arg1, IEndianess );
5111             fpop = Pfp_FPNEGMULADDQ; goto do_Quad_F128;
5112          }
5113       case Iop_NegMSubF128:
5114          if (FPU_rounding_mode_isOdd(qop->arg1)) {
5115             /* use rounding mode specified by RN. Issue inst with R0 = 0 */
5116             fpop = Pfp_FPNEGMULSUBQRNDODD; goto do_Quad_F128;
5117          } else {
5118             set_FPU_rounding_mode( env, qop->arg1, IEndianess );
5119             fpop = Pfp_FPNEGMULSUBQ; goto do_Quad_F128;
5120          }
5121
5122       do_Quad_F128: {
5123          HReg r_dst = iselFp128Expr(env, qop->arg3,
5124                                     IEndianess);
5125          HReg r_srcL = iselFp128Expr(env, qop->arg2,
5126                                      IEndianess);
5127          HReg r_srcR = iselFp128Expr(env, qop->arg4,
5128                                      IEndianess);
5129
5130          addInstr(env, PPCInstr_Fp128Ternary(fpop, r_dst, r_srcL, r_srcR));
5131          return r_dst;
5132          }
5133
5134       default:
5135          break;
5136       }
5137    }   /* if (e->tag == Iex_Qop) */
5138
5139    ppIRExpr( e );
5140    vpanic( "iselFp128Expr(ppc64)" );
5141 }
5142
5143 static HReg iselDfp64Expr(ISelEnv* env, const IRExpr* e, IREndness IEndianess)
5144 {
5145    HReg r = iselDfp64Expr_wrk( env, e, IEndianess );
5146    vassert(hregClass(r) == HRcFlt64);
5147    vassert( hregIsVirtual(r) );
5148    return r;
5149 }
5150
5151 /* DO NOT CALL THIS DIRECTLY */
5152 static HReg iselDfp64Expr_wrk(ISelEnv* env, const IRExpr* e,
5153                               IREndness IEndianess)
5154 {
5155    Bool mode64 = env->mode64;
5156    IRType ty = typeOfIRExpr( env->type_env, e );
5157    HReg r_dstHi, r_dstLo;
5158
5159    vassert( e );
5160    vassert( ty == Ity_D64 );
5161
5162    if (e->tag == Iex_RdTmp) {
5163       return lookupIRTemp( env, e->Iex.RdTmp.tmp );
5164    }
5165
5166    /* --------- GET --------- */
5167    if (e->tag == Iex_Get) {
5168       HReg r_dst = newVRegF( env );
5169       PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
5170                                        GuestStatePtr(mode64) );
5171       addInstr( env, PPCInstr_FpLdSt( True/*load*/, 8, r_dst, am_addr ) );
5172       return r_dst;
5173    }
5174
5175    if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
5176       PPCAMode* am_addr;
5177       HReg r_dst = newVRegF(env);
5178       vassert(e->Iex.Load.ty == Ity_D64);
5179       am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_D64/*xfer*/,
5180                                    IEndianess);
5181       addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dst, am_addr));
5182       return r_dst;
5183    }
5184
5185    /* --------- OPS --------- */
5186    if (e->tag == Iex_Qop) {
5187       HReg r_dst = newVRegF( env );
5188       return r_dst;
5189    }
5190
5191    if (e->tag == Iex_Unop) {
5192       HReg fr_dst = newVRegF(env);
5193       switch (e->Iex.Unop.op) {
5194       case Iop_ReinterpI64asD64: {
5195          /* Given an I64, produce an IEEE754 DFP with the same
5196                bit pattern. */
5197          if (!mode64) {
5198             HReg r_srcHi, r_srcLo;
5199             iselInt64Expr( &r_srcHi, &r_srcLo, env, e->Iex.Unop.arg,
5200                            IEndianess);
5201             return mk_LoadRR32toFPR( env, r_srcHi, r_srcLo );
5202          } else {
5203             HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
5204             return mk_LoadR64toFPR( env, r_src );
5205          }
5206       }
5207       case Iop_D32toD64: {
5208          HReg fr_src = iselDfp32Expr(env, e->Iex.Unop.arg, IEndianess);
5209          addInstr(env, PPCInstr_Dfp64Unary(Pfp_DCTDP, fr_dst, fr_src));
5210          return fr_dst;
5211       }
5212       case Iop_D128HItoD64:
5213          iselDfp128Expr( &r_dstHi, &r_dstLo, env, e->Iex.Unop.arg,
5214                          IEndianess );
5215          return r_dstHi;
5216       case Iop_D128LOtoD64:
5217          iselDfp128Expr( &r_dstHi, &r_dstLo, env, e->Iex.Unop.arg,
5218                          IEndianess );
5219          return r_dstLo;
5220       case Iop_InsertExpD64: {
5221          HReg fr_srcL = iselDblExpr(env, e->Iex.Binop.arg1, IEndianess);
5222          HReg fr_srcR = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
5223
5224          addInstr(env, PPCInstr_Dfp64Binary(Pfp_DIEX, fr_dst, fr_srcL,
5225                                             fr_srcR));
5226          return fr_dst;
5227        }
5228       default:
5229          vex_printf( "ERROR: iselDfp64Expr_wrk, UNKNOWN unop case %d\n",
5230                      (Int)e->Iex.Unop.op );
5231       }
5232    }
5233
5234    if (e->tag == Iex_Binop) {
5235       PPCFpOp fpop = Pfp_INVALID;
5236       HReg fr_dst = newVRegF(env);
5237
5238       switch (e->Iex.Binop.op) {
5239       case Iop_D128toD64:     fpop = Pfp_DRDPQ;  break;
5240       case Iop_D64toD32:      fpop = Pfp_DRSP;   break;
5241       case Iop_I64StoD64:     fpop = Pfp_DCFFIX; break;
5242       case Iop_RoundD64toInt: fpop = Pfp_DRINTN; break;
5243       default: break;
5244       }
5245       if (fpop == Pfp_DRDPQ) {
5246          HReg r_srcHi = newVRegF(env);
5247          HReg r_srcLo = newVRegF(env);
5248
5249          set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5250          iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
5251                         IEndianess);
5252          addInstr(env, PPCInstr_DfpD128toD64(fpop, fr_dst, r_srcHi, r_srcLo));
5253          return fr_dst;
5254
5255       } else if (fpop == Pfp_DRINTN) {
5256          HReg fr_src = newVRegF(env);
5257          PPCRI* r_rmc = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
5258
5259          /* NOTE, this IOP takes a DFP value and rounds to the
5260           * neares floating point integer value, i.e. fractional part
5261           * is zero.  The result is a decimal floating point number.
5262           * the INT in the name is a bit misleading.
5263           */
5264          fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
5265          addInstr(env, PPCInstr_DfpRound(fr_dst, fr_src, r_rmc));
5266          return fr_dst;
5267
5268       } else if (fpop == Pfp_DRSP) {
5269          HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
5270          set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5271          addInstr(env, PPCInstr_Dfp64Unary(fpop, fr_dst, fr_src));
5272          return fr_dst;
5273
5274       } else if (fpop == Pfp_DCFFIX) {
5275          HReg fr_src = newVRegF(env);
5276          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5277
5278          set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5279          sub_from_sp( env, 16 );
5280
5281          // put the I64 value into a floating point register
5282          if (mode64) {
5283            HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
5284
5285            addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
5286          } else {
5287             HReg tmpHi, tmpLo;
5288             PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5289
5290             iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Binop.arg2,
5291                           IEndianess);
5292             addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*mode32*/));
5293             addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*mode32*/));
5294          }
5295
5296          addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8,  fr_src, zero_r1));
5297          addInstr(env, PPCInstr_Dfp64Unary(fpop, fr_dst, fr_src));
5298          add_to_sp( env, 16 );
5299          return fr_dst;
5300       }
5301
5302       switch (e->Iex.Binop.op) {
5303       /* shift instructions D64, I32 -> D64 */
5304       case Iop_ShlD64: fpop = Pfp_DSCLI; break;
5305       case Iop_ShrD64: fpop = Pfp_DSCRI; break;
5306       default: break;
5307       }
5308       if (fpop != Pfp_INVALID) {
5309          HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg1, IEndianess);
5310          PPCRI* shift = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
5311
5312          /* shift value must be an immediate value */
5313          vassert(shift->tag == Pri_Imm);
5314
5315          addInstr(env, PPCInstr_DfpShift(fpop, fr_dst, fr_src, shift));
5316          return fr_dst;
5317       }
5318
5319       switch (e->Iex.Binop.op) {
5320       case Iop_InsertExpD64:
5321          fpop = Pfp_DIEX;
5322          break;
5323       default:  break;
5324       }
5325       if (fpop != Pfp_INVALID) {
5326          HReg fr_srcL = newVRegF(env);
5327          HReg fr_srcR = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
5328          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5329          sub_from_sp( env, 16 );
5330
5331          if (env->mode64) {
5332             // put the I64 value into a floating point reg
5333             HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
5334
5335             addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
5336          } else {
5337             // put the I64 register pair into a floating point reg
5338             HReg tmpHi;
5339             HReg tmpLo;
5340             PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5341
5342             iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Binop.arg1,
5343                           IEndianess);
5344             addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*!mode64*/));
5345             addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*!mode64*/));
5346          }
5347          addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_srcL, zero_r1));
5348          addInstr(env, PPCInstr_Dfp64Binary(fpop, fr_dst, fr_srcL,
5349                                             fr_srcR));
5350          add_to_sp( env, 16 );
5351          return fr_dst;
5352       }
5353    }
5354
5355    if (e->tag == Iex_Triop) {
5356       IRTriop *triop = e->Iex.Triop.details;
5357       PPCFpOp fpop = Pfp_INVALID;
5358
5359       switch (triop->op) {
5360       case Iop_AddD64:
5361          fpop = Pfp_DFPADD;
5362          break;
5363       case Iop_SubD64:
5364          fpop = Pfp_DFPSUB;
5365          break;
5366       case Iop_MulD64:
5367          fpop = Pfp_DFPMUL;
5368          break;
5369       case Iop_DivD64:
5370          fpop = Pfp_DFPDIV;
5371          break;
5372       default:
5373          break;
5374       }
5375       if (fpop != Pfp_INVALID) {
5376          HReg r_dst = newVRegF( env );
5377          HReg r_srcL = iselDfp64Expr( env, triop->arg2, IEndianess );
5378          HReg r_srcR = iselDfp64Expr( env, triop->arg3, IEndianess );
5379
5380          set_FPU_DFP_rounding_mode( env, triop->arg1, IEndianess );
5381          addInstr( env, PPCInstr_Dfp64Binary( fpop, r_dst, r_srcL, r_srcR ) );
5382          return r_dst;
5383       }
5384
5385       switch (triop->op) {
5386       case Iop_QuantizeD64:          fpop = Pfp_DQUA;  break;
5387       case Iop_SignificanceRoundD64: fpop = Pfp_RRDTR; break;
5388       default: break;
5389       }
5390       if (fpop == Pfp_DQUA) {
5391          HReg r_dst = newVRegF(env);
5392          HReg r_srcL = iselDfp64Expr(env, triop->arg2, IEndianess);
5393          HReg r_srcR = iselDfp64Expr(env, triop->arg3, IEndianess);
5394          PPCRI* rmc  = iselWordExpr_RI(env, triop->arg1, IEndianess);
5395          addInstr(env, PPCInstr_DfpQuantize(fpop, r_dst, r_srcL, r_srcR,
5396                                             rmc));
5397          return r_dst;
5398
5399       } else if (fpop == Pfp_RRDTR) {
5400          HReg r_dst = newVRegF(env);
5401          HReg r_srcL = newVRegF(env);
5402          HReg r_srcR = iselDfp64Expr(env, triop->arg3, IEndianess);
5403          PPCRI* rmc  = iselWordExpr_RI(env, triop->arg1, IEndianess);
5404          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5405          HReg i8_val = iselWordExpr_R(env, triop->arg2, IEndianess);
5406
5407          /* Move I8 to float register to issue instruction */
5408          sub_from_sp( env, 16 );
5409          if (mode64)
5410             addInstr(env, PPCInstr_Store(8, zero_r1, i8_val, True/*mode64*/));
5411          else
5412             addInstr(env, PPCInstr_Store(4, zero_r1, i8_val, False/*mode32*/));
5413
5414          addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_srcL, zero_r1));
5415          add_to_sp( env, 16 );
5416
5417          // will set TE and RMC when issuing instruction
5418          addInstr(env, PPCInstr_DfpQuantize(fpop, r_dst, r_srcL, r_srcR, rmc));
5419          return r_dst;
5420       }
5421    }
5422
5423    ppIRExpr( e );
5424    vpanic( "iselDfp64Expr_wrk(ppc)" );
5425 }
5426
5427 static void iselDfp128Expr(HReg* rHi, HReg* rLo, ISelEnv* env, const IRExpr* e,
5428                            IREndness IEndianess)
5429 {
5430    iselDfp128Expr_wrk( rHi, rLo, env, e, IEndianess );
5431    vassert( hregIsVirtual(*rHi) );
5432    vassert( hregIsVirtual(*rLo) );
5433 }
5434
5435 /* DO NOT CALL THIS DIRECTLY */
5436 static void iselDfp128Expr_wrk(HReg* rHi, HReg *rLo, ISelEnv* env,
5437                                const IRExpr* e, IREndness IEndianess)
5438 {
5439    vassert( e );
5440    vassert( typeOfIRExpr(env->type_env,e) == Ity_D128 );
5441
5442    /* read 128-bit IRTemp */
5443    if (e->tag == Iex_RdTmp) {
5444       lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp );
5445       return;
5446    }
5447
5448    if (e->tag == Iex_Unop) {
5449       HReg r_dstHi = newVRegF(env);
5450       HReg r_dstLo = newVRegF(env);
5451
5452       if (e->Iex.Unop.op == Iop_I64StoD128) {
5453          HReg fr_src = newVRegF(env);
5454          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5455
5456          // put the I64 value into a floating point reg
5457          if (env->mode64) {
5458             HReg tmp   = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
5459             addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
5460          } else {
5461             HReg tmpHi, tmpLo;
5462             PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5463
5464             iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg,
5465                           IEndianess);
5466             addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*mode32*/));
5467             addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*mode32*/));
5468          }
5469
5470          addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_src, zero_r1));
5471          addInstr(env, PPCInstr_DfpI64StoD128(Pfp_DCFFIXQ, r_dstHi, r_dstLo,
5472                                               fr_src));
5473       }
5474
5475       if (e->Iex.Unop.op == Iop_D64toD128) {
5476          HReg r_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
5477
5478          /* Source is 64bit, result is 128 bit.  High 64bit source arg,
5479           * is ignored by the instruction.  Set high arg to r_src just
5480           * to meet the vassert tests.
5481           */
5482          addInstr(env, PPCInstr_Dfp128Unary(Pfp_DCTQPQ, r_dstHi, r_dstLo,
5483                                             r_src, r_src));
5484       }
5485       *rHi = r_dstHi;
5486       *rLo = r_dstLo;
5487       return;
5488    }
5489
5490    /* --------- OPS --------- */
5491    if (e->tag == Iex_Binop) {
5492       HReg r_srcHi;
5493       HReg r_srcLo;
5494
5495       switch (e->Iex.Binop.op) {
5496       case Iop_D64HLtoD128:
5497          r_srcHi = iselDfp64Expr( env, e->Iex.Binop.arg1, IEndianess );
5498          r_srcLo = iselDfp64Expr( env, e->Iex.Binop.arg2, IEndianess );
5499          *rHi = r_srcHi;
5500          *rLo = r_srcLo;
5501          return;
5502          break;
5503       case Iop_D128toD64: {
5504          PPCFpOp fpop = Pfp_DRDPQ;
5505          HReg fr_dst  = newVRegF(env);
5506
5507          set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5508          iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
5509                         IEndianess);
5510          addInstr(env, PPCInstr_DfpD128toD64(fpop, fr_dst, r_srcHi, r_srcLo));
5511
5512          /* Need to meet the interface spec but the result is
5513           * just 64-bits so send the result back in both halfs.
5514           */
5515          *rHi = fr_dst;
5516          *rLo = fr_dst;
5517          return;
5518       }
5519       case Iop_ShlD128:
5520       case Iop_ShrD128: {
5521          HReg fr_dst_hi = newVRegF(env);
5522          HReg fr_dst_lo = newVRegF(env);
5523          PPCRI* shift = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
5524          PPCFpOp fpop = Pfp_DSCLIQ;  /* fix later if necessary */
5525
5526          iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg1,
5527                         IEndianess);
5528
5529          if (e->Iex.Binop.op == Iop_ShrD128)
5530             fpop = Pfp_DSCRIQ;
5531
5532          addInstr(env, PPCInstr_DfpShift128(fpop, fr_dst_hi, fr_dst_lo,
5533                                             r_srcHi, r_srcLo, shift));
5534
5535          *rHi = fr_dst_hi;
5536          *rLo = fr_dst_lo;
5537          return;
5538       }
5539       case Iop_RoundD128toInt: {
5540          HReg r_dstHi = newVRegF(env);
5541          HReg r_dstLo = newVRegF(env);
5542          PPCRI* r_rmc = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
5543
5544          // will set R and RMC when issuing instruction
5545          iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
5546                         IEndianess);
5547
5548          addInstr(env, PPCInstr_DfpRound128(r_dstHi, r_dstLo,
5549                                             r_srcHi, r_srcLo, r_rmc));
5550          *rHi = r_dstHi;
5551          *rLo = r_dstLo;
5552          return;
5553       }
5554       case Iop_InsertExpD128: {
5555          HReg r_dstHi = newVRegF(env);
5556          HReg r_dstLo = newVRegF(env);
5557          HReg r_srcL  = newVRegF(env);
5558          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5559          r_srcHi = newVRegF(env);
5560          r_srcLo = newVRegF(env);
5561
5562          iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
5563                         IEndianess);
5564
5565          /* Move I64 to float register to issue instruction */
5566          if (env->mode64) {
5567             HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
5568             addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
5569          } else {
5570             HReg tmpHi, tmpLo;
5571             PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5572
5573             iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg,
5574                           IEndianess);
5575             addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*mode32*/));
5576             addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*mode32*/));
5577          }
5578
5579          addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_srcL, zero_r1));
5580          addInstr(env, PPCInstr_InsertExpD128(Pfp_DIEXQ,
5581                                               r_dstHi, r_dstLo,
5582                                               r_srcL, r_srcHi, r_srcLo));
5583          *rHi = r_dstHi;
5584          *rLo = r_dstLo;
5585          return;
5586       }
5587
5588       case Iop_I128StoD128: {
5589          HReg tmpF128 = newVRegV(env);
5590          HReg FdstHi = newVRegF(env);
5591          HReg FdstLo = newVRegF(env);
5592          HReg srcLo = newVRegI(env);
5593          HReg srcHi = newVRegI(env);
5594          PPCAMode* am_addr;
5595          PPCAMode* am_addr4;
5596
5597          set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5598
5599          // Get the I128 value, store into a VSR register
5600          iselInt128Expr(&srcHi, &srcLo, env, e->Iex.Binop.arg2, IEndianess);
5601
5602          sub_from_sp( env, 16 );     // Move SP down 16 bytes
5603          am_addr = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5604          am_addr4 = advance4(env, am_addr);
5605
5606          addInstr(env, PPCInstr_Store( 8, am_addr,  srcHi, env->mode64 ));
5607          addInstr(env, PPCInstr_Store( 8, am_addr4, srcLo, env->mode64 ));
5608
5609          // load as Ity_F128
5610          addInstr(env, PPCInstr_AvLdSt( True/*fetch*/, 16, tmpF128, am_addr ));
5611
5612          // do conversion
5613          addInstr( env, PPCInstr_XFormUnary994( Px_IQSTODFP, FdstHi, FdstLo,
5614                                                 tmpF128 ) );
5615
5616          *rHi = FdstHi;
5617          *rLo = FdstLo;
5618          add_to_sp( env, 16 );       // Reset SP
5619          return;
5620       }
5621
5622       default:
5623          vex_printf( "ERROR: iselDfp128Expr_wrk, UNKNOWN binop case %d\n",
5624                      (Int)e->Iex.Binop.op );
5625          break;
5626       }
5627    }
5628
5629    if (e->tag == Iex_Triop) {
5630       IRTriop *triop = e->Iex.Triop.details;
5631       PPCFpOp fpop = Pfp_INVALID;
5632       HReg r_dstHi = newVRegF(env);
5633       HReg r_dstLo = newVRegF(env);
5634
5635       switch (triop->op) {
5636       case Iop_AddD128:
5637          fpop = Pfp_DFPADDQ;
5638          break;
5639       case Iop_SubD128:
5640          fpop = Pfp_DFPSUBQ;
5641          break;
5642       case Iop_MulD128:
5643          fpop = Pfp_DFPMULQ;
5644          break;
5645       case Iop_DivD128:
5646          fpop = Pfp_DFPDIVQ;
5647          break;
5648       default:
5649          break;
5650       }
5651
5652       if (fpop != Pfp_INVALID) {
5653          HReg r_srcRHi = newVRegV( env );
5654          HReg r_srcRLo = newVRegV( env );
5655
5656          /* dst will be used to pass in the left operand and get the result. */
5657          iselDfp128Expr( &r_dstHi, &r_dstLo, env, triop->arg2, IEndianess );
5658          iselDfp128Expr( &r_srcRHi, &r_srcRLo, env, triop->arg3, IEndianess );
5659          set_FPU_DFP_rounding_mode( env, triop->arg1, IEndianess );
5660          addInstr( env,
5661                    PPCInstr_Dfp128Binary( fpop, r_dstHi, r_dstLo,
5662                                           r_srcRHi, r_srcRLo ) );
5663          *rHi = r_dstHi;
5664          *rLo = r_dstLo;
5665          return;
5666       }
5667       switch (triop->op) {
5668       case Iop_QuantizeD128:          fpop = Pfp_DQUAQ;  break;
5669       case Iop_SignificanceRoundD128: fpop = Pfp_DRRNDQ; break;
5670       default: break;
5671       }
5672       if (fpop == Pfp_DQUAQ) {
5673          HReg r_srcHi = newVRegF(env);
5674          HReg r_srcLo = newVRegF(env);
5675          PPCRI* rmc = iselWordExpr_RI(env, triop->arg1, IEndianess);
5676
5677          /* dst will be used to pass in the left operand and get the result */
5678          iselDfp128Expr(&r_dstHi, &r_dstLo, env, triop->arg2, IEndianess);
5679          iselDfp128Expr(&r_srcHi, &r_srcLo, env, triop->arg3, IEndianess);
5680
5681          // will set RMC when issuing instruction
5682          addInstr(env, PPCInstr_DfpQuantize128(fpop, r_dstHi, r_dstLo,
5683                                                r_srcHi, r_srcLo, rmc));
5684         *rHi = r_dstHi;
5685         *rLo = r_dstLo;
5686          return;
5687
5688       } else if (fpop == Pfp_DRRNDQ) {
5689          HReg r_srcHi = newVRegF(env);
5690          HReg r_srcLo = newVRegF(env);
5691          PPCRI* rmc = iselWordExpr_RI(env, triop->arg1, IEndianess);
5692          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5693          PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5694          HReg i8_val = iselWordExpr_R(env, triop->arg2, IEndianess);
5695          HReg r_zero = newVRegI( env );
5696
5697          iselDfp128Expr(&r_srcHi, &r_srcLo, env, triop->arg3, IEndianess);
5698
5699          /* dst will be used to pass in the left operand and get the result */
5700          /* Move I8 to float register to issue instruction.  Note, the
5701           * instruction only looks at the bottom 6 bits so we really don't
5702           * have to clear the upper bits since the iselWordExpr_R sets the
5703           * bottom 8-bits.
5704           */
5705          sub_from_sp( env, 16 );
5706
5707          if (env->mode64)
5708             addInstr(env, PPCInstr_Store(4, four_r1, i8_val, True/*mode64*/));
5709          else
5710             addInstr(env, PPCInstr_Store(4, four_r1, i8_val, False/*mode32*/));
5711
5712          /* Have to write to the upper bits to ensure they have been
5713           * initialized. The instruction ignores all but the lower 6-bits.
5714           */
5715          addInstr( env, PPCInstr_LI( r_zero, 0, env->mode64 ) );
5716          addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dstHi, zero_r1));
5717          addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dstLo, zero_r1));
5718
5719          add_to_sp( env, 16 );
5720
5721          // will set RMC when issuing instruction
5722          addInstr(env, PPCInstr_DfpQuantize128(fpop, r_dstHi, r_dstLo,
5723                                                r_srcHi, r_srcLo, rmc));
5724          *rHi = r_dstHi;
5725          *rLo = r_dstLo;
5726          return;
5727       }
5728  }
5729
5730    ppIRExpr( e );
5731    vpanic( "iselDfp128Expr(ppc64)" );
5732 }
5733
5734
5735 /*---------------------------------------------------------*/
5736 /*--- ISEL: SIMD (Vector) expressions, 128 bit.         ---*/
5737 /*---------------------------------------------------------*/
5738
5739 static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e, IREndness IEndianess )
5740 {
5741    HReg r = iselVecExpr_wrk( env, e, IEndianess );
5742 #  if 0
5743    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5744 #  endif
5745    vassert(hregClass(r) == HRcVec128);
5746    vassert(hregIsVirtual(r));
5747    return r;
5748 }
5749
5750 /* DO NOT CALL THIS DIRECTLY */
5751 static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e,
5752                               IREndness IEndianess )
5753 {
5754    Bool mode64 = env->mode64;
5755    PPCAvOp op = Pav_INVALID;
5756    PPCAvFpOp fpop = Pavfp_INVALID;
5757    PPCAvOpBin128 opav128 = Pav_INVALIDBinary128;
5758    PPCAvOpTri128 optri128 = Pav_INVALIDTri128;
5759    IRType  ty = typeOfIRExpr(env->type_env,e);
5760    vassert(e);
5761    vassert(ty == Ity_V128);
5762
5763    if (e->tag == Iex_ITE) {
5764       HReg r1 = iselVecExpr( env, e->Iex.ITE.iftrue, IEndianess );
5765       HReg r0 = iselVecExpr( env, e->Iex.ITE.iffalse, IEndianess );
5766       HReg r_dst = newVRegV(env);
5767
5768       // Use OR operator to do move r1 to r_dst
5769       addInstr(env, PPCInstr_AvBinary( Pav_OR, r_dst, r0, r0));
5770       PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
5771       addInstr(env, PPCInstr_AvCMov(cc, r_dst, r1));
5772       return r_dst;
5773    }
5774
5775    if (e->tag == Iex_RdTmp) {
5776       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5777    }
5778
5779    if (e->tag == Iex_Get) {
5780       /* Guest state vectors are 16byte aligned,
5781          so don't need to worry here */
5782       HReg dst = newVRegV(env);
5783       addInstr(env,
5784                PPCInstr_AvLdSt( True/*load*/, 16, dst,
5785                                 PPCAMode_IR( e->Iex.Get.offset,
5786                                              GuestStatePtr(mode64) )));
5787       return dst;
5788    }
5789
5790    if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
5791       /* Need to be able to do V128 unaligned loads. The BE unaligned load
5792        * can be accomplised using the following code sequece from the ISA.
5793        * It uses the lvx instruction that does two aligned loads and then
5794        * permute the data to store the required data as if it had been an
5795        * unaligned load.
5796        *
5797        *   lvx  Vhi,0,Rb        # load MSQ, using the unaligned address in Rb
5798        *   lvsl Vp, 0,Rb        # Set permute control vector
5799        *   addi Rb,Rb,15        # Address of LSQ
5800        *   lvx  Vlo,0,Rb        # load LSQ
5801        *   vperm Vt,Vhi,Vlo,Vp  # align the data as requested
5802        */
5803
5804       HReg Vhi   = newVRegV(env);
5805       HReg Vlo   = newVRegV(env);
5806       HReg Vp    = newVRegV(env);
5807       HReg v_dst = newVRegV(env);
5808       HReg rB;
5809       HReg rB_plus_15 = newVRegI(env);
5810
5811       vassert(e->Iex.Load.ty == Ity_V128);
5812       rB = iselWordExpr_R( env, e->Iex.Load.addr, IEndianess );
5813
5814       // lvx  Vhi, 0, Rb
5815       addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, Vhi,
5816                                      PPCAMode_IR(0, rB)) );
5817
5818       if (IEndianess == Iend_LE)
5819          // lvsr Vp, 0, Rb
5820          addInstr(env, PPCInstr_AvSh( False/*right shift*/, Vp,
5821                                       PPCAMode_IR(0, rB)) );
5822       else
5823          // lvsl Vp, 0, Rb
5824          addInstr(env, PPCInstr_AvSh( True/*left shift*/, Vp,
5825                                       PPCAMode_IR(0, rB)) );
5826
5827       // addi Rb_plus_15, Rb, 15
5828       addInstr(env, PPCInstr_Alu( Palu_ADD, rB_plus_15,
5829                                   rB, PPCRH_Imm(True, toUShort(15))) );
5830
5831       // lvx  Vlo, 0, Rb_plus_15
5832       addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, Vlo,
5833                                      PPCAMode_IR(0, rB_plus_15)) );
5834
5835       if (IEndianess == Iend_LE)
5836          // vperm Vt, Vhi, Vlo, Vp
5837          addInstr(env, PPCInstr_AvPerm( v_dst, Vlo, Vhi, Vp ));
5838       else
5839          // vperm Vt, Vhi, Vlo, Vp
5840          addInstr(env, PPCInstr_AvPerm( v_dst, Vhi, Vlo, Vp ));
5841
5842       return v_dst;
5843    }
5844
5845    if (e->tag == Iex_Unop) {
5846       switch (e->Iex.Unop.op) {
5847
5848       case Iop_F16toF64x2:
5849          {
5850             HReg dst = newVRegV(env);
5851             HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5852             /* Note: PPC only coverts the 16-bt value in the upper word
5853              *       to a 64-bit value stored in the upper word.  The
5854              *       contents of the lower word is undefined.
5855              */
5856             addInstr(env, PPCInstr_AvUnary(Pav_F16toF64x2, dst, arg));
5857             return dst;
5858          }
5859
5860       case Iop_F64toF16x2_DEP:
5861          {
5862             HReg dst = newVRegV(env);
5863             HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5864             /* Note: PPC only coverts the 64-bt value in the upper 64-bit of V128
5865              * to a 16-bit value stored in the upper 64-bits of the result
5866              * V128.  The contents of the lower 64-bits is undefined.
5867              */
5868             addInstr(env, PPCInstr_AvUnary(Pav_F64toF16x2, dst, arg));
5869             return dst;
5870          }
5871
5872       case Iop_F16toF32x4:
5873          {
5874             HReg src = newVRegV(env);
5875             HReg dst = newVRegV(env);
5876             HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
5877             PPCAMode *am_off0, *am_off8;
5878             HReg r_aligned16;
5879
5880             vassert(mode64);
5881             /* need to put I64 src into upper 64-bits of vector register,
5882                use stack */
5883             sub_from_sp( env, 32 );     // Move SP down
5884
5885             /* Get a quadword aligned address within our stack space */
5886             r_aligned16 = get_sp_aligned16( env );
5887             am_off0  = PPCAMode_IR( 0, r_aligned16 );
5888             am_off8  = PPCAMode_IR( 8, r_aligned16 );
5889
5890             /* Store I64 to stack */
5891
5892             if (IEndianess == Iend_LE) {
5893                addInstr(env, PPCInstr_Store( 8, am_off8, arg, mode64 ));
5894             } else {
5895                addInstr(env, PPCInstr_Store( 8, am_off0, arg, mode64 ));
5896             }
5897
5898             /* Fetch new v128 src back from stack. */
5899             addInstr(env, PPCInstr_AvLdSt(True/*ld*/, 16, src, am_off0));
5900
5901             /* issue instruction */
5902             addInstr(env, PPCInstr_AvUnary(Pav_F16toF32x4, dst, src));
5903             add_to_sp( env, 32 );          // Reset SP
5904
5905             return dst;
5906          }
5907
5908       case Iop_F32toF16x4_DEP:
5909          {
5910             HReg dst = newVRegI(env);
5911             HReg tmp = newVRegV(env);
5912             HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5913             PPCAMode *am_off0, *am_off8;
5914             HReg r_aligned16;
5915
5916             /* Instruction returns a V128, the Iop_F32toF16x4 needs to return
5917              * I64.  Move the upper 64-bits from the instruction to an I64 via
5918              * the stack and return it.
5919              */
5920             sub_from_sp( env, 32 );     // Move SP down
5921
5922             addInstr(env, PPCInstr_AvUnary(Pav_F32toF16x4, tmp, arg));
5923
5924             /* Get a quadword aligned address within our stack space */
5925             r_aligned16 = get_sp_aligned16( env );
5926             am_off0  = PPCAMode_IR( 0, r_aligned16 );
5927             am_off8  = PPCAMode_IR( 8, r_aligned16 );
5928
5929             /* Store v128 tmp to stack. */
5930             addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, tmp, am_off0));
5931
5932             /* Fetch I64 from stack */
5933             if (IEndianess == Iend_LE) {
5934                addInstr(env, PPCInstr_Load( 8, dst, am_off8, mode64 ));
5935             } else {
5936                addInstr(env, PPCInstr_Load( 8, dst, am_off0, mode64 ));
5937             }
5938
5939             add_to_sp( env, 32 );          // Reset SP
5940             return dst;
5941          }
5942
5943       case Iop_NotV128: {
5944          HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5945          HReg dst = newVRegV(env);
5946          addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, arg));
5947          return dst;
5948       }
5949
5950       case Iop_CmpNEZ8x16: {
5951          HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5952          HReg zero = newVRegV(env);
5953          HReg dst  = newVRegV(env);
5954          addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
5955          addInstr(env, PPCInstr_AvBin8x16(Pav_CMPEQU, dst, arg, zero));
5956          addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
5957          return dst;
5958       }
5959
5960       case Iop_CmpNEZ16x8: {
5961          HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5962          HReg zero = newVRegV(env);
5963          HReg dst  = newVRegV(env);
5964          addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
5965          addInstr(env, PPCInstr_AvBin16x8(Pav_CMPEQU, dst, arg, zero));
5966          addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
5967          return dst;
5968       }
5969
5970       case Iop_CmpNEZ32x4: {
5971          HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5972          HReg zero = newVRegV(env);
5973          HReg dst  = newVRegV(env);
5974          addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
5975          addInstr(env, PPCInstr_AvBin32x4(Pav_CMPEQU, dst, arg, zero));
5976          addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
5977          return dst;
5978       }
5979
5980       case Iop_CmpNEZ64x2: {
5981          HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5982          HReg zero = newVRegV(env);
5983          HReg dst  = newVRegV(env);
5984          addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
5985          addInstr(env, PPCInstr_AvBin64x2(Pav_CMPEQU, dst, arg, zero));
5986          addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
5987          return dst;
5988       }
5989
5990       case Iop_RecipEst32Fx4: fpop = Pavfp_RCPF;    goto do_32Fx4_unary;
5991       case Iop_RSqrtEst32Fx4: fpop = Pavfp_RSQRTF;  goto do_32Fx4_unary;
5992       case Iop_Log2_32Fx4:    fpop = Pavfp_Log2;    goto do_32Fx4_unary;
5993       case Iop_Exp2_32Fx4:    fpop = Pavfp_Exp2;    goto do_32Fx4_unary;
5994       case Iop_I32UtoF32x4_DEP: fpop = Pavfp_CVTU2F;  goto do_32Fx4_unary;
5995       case Iop_I32StoF32x4_DEP: fpop = Pavfp_CVTS2F;  goto do_32Fx4_unary;
5996       case Iop_QF32toI32Ux4_RZ: fpop = Pavfp_QCVTF2U; goto do_32Fx4_unary;
5997       case Iop_QF32toI32Sx4_RZ: fpop = Pavfp_QCVTF2S; goto do_32Fx4_unary;
5998       case Iop_RoundF32x4_RM: fpop = Pavfp_ROUNDM;  goto do_32Fx4_unary;
5999       case Iop_RoundF32x4_RP: fpop = Pavfp_ROUNDP;  goto do_32Fx4_unary;
6000       case Iop_RoundF32x4_RN: fpop = Pavfp_ROUNDN;  goto do_32Fx4_unary;
6001       case Iop_RoundF32x4_RZ: fpop = Pavfp_ROUNDZ;  goto do_32Fx4_unary;
6002       do_32Fx4_unary:
6003       {
6004          HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
6005          HReg dst = newVRegV(env);
6006          addInstr(env, PPCInstr_AvUn32Fx4(fpop, dst, arg));
6007          return dst;
6008       }
6009
6010       case Iop_32UtoV128: {
6011          HReg r_aligned16, r_zeros;
6012          HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
6013          HReg   dst = newVRegV(env);
6014          PPCAMode *am_off0, *am_off4, *am_off8, *am_off12;
6015          sub_from_sp( env, 32 );     // Move SP down
6016
6017          /* Get a quadword aligned address within our stack space */
6018          r_aligned16 = get_sp_aligned16( env );
6019          am_off0  = PPCAMode_IR( 0,  r_aligned16 );
6020          am_off4  = PPCAMode_IR( 4,  r_aligned16 );
6021          am_off8  = PPCAMode_IR( 8,  r_aligned16 );
6022          am_off12 = PPCAMode_IR( 12, r_aligned16 );
6023
6024          /* Store zeros */
6025          r_zeros = newVRegI(env);
6026          addInstr(env, PPCInstr_LI(r_zeros, 0x0, mode64));
6027          if (IEndianess == Iend_LE)
6028             addInstr(env, PPCInstr_Store( 4, am_off0, r_src, mode64 ));
6029          else
6030             addInstr(env, PPCInstr_Store( 4, am_off0, r_zeros, mode64 ));
6031          addInstr(env, PPCInstr_Store( 4, am_off4, r_zeros, mode64 ));
6032          addInstr(env, PPCInstr_Store( 4, am_off8, r_zeros, mode64 ));
6033
6034          /* Store r_src in low word of quadword-aligned mem */
6035          if (IEndianess == Iend_LE)
6036             addInstr(env, PPCInstr_Store( 4, am_off12, r_zeros, mode64 ));
6037          else
6038             addInstr(env, PPCInstr_Store( 4, am_off12, r_src, mode64 ));
6039
6040          /* Load word into low word of quadword vector reg */
6041          if (IEndianess == Iend_LE)
6042             addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, dst, am_off0 ));
6043          else
6044             addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, dst, am_off12 ));
6045
6046          add_to_sp( env, 32 );       // Reset SP
6047          return dst;
6048       }
6049
6050       case Iop_Dup8x16:
6051       case Iop_Dup16x8:
6052       case Iop_Dup32x4:
6053          return mk_AvDuplicateRI(env, e->Iex.Unop.arg, IEndianess);
6054
6055       case Iop_CipherSV128: op = Pav_CIPHERSUBV128; goto do_AvCipherV128Un;
6056       do_AvCipherV128Un: {
6057          HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
6058          HReg dst = newVRegV(env);
6059          addInstr(env, PPCInstr_AvCipherV128Unary(op, dst, arg));
6060          return dst;
6061       }
6062
6063       case Iop_Clz8x16: op = Pav_ZEROCNTBYTE;   goto do_zerocnt;
6064       case Iop_Clz16x8: op = Pav_ZEROCNTHALF;   goto do_zerocnt;
6065       case Iop_Clz32x4: op = Pav_ZEROCNTWORD;   goto do_zerocnt;
6066       case Iop_Clz64x2: op = Pav_ZEROCNTDBL;    goto do_zerocnt;
6067       case Iop_Ctz8x16: op = Pav_TRAILINGZEROCNTBYTE; goto do_zerocnt;
6068       case Iop_Ctz16x8: op = Pav_TRAILINGZEROCNTHALF; goto do_zerocnt;
6069       case Iop_Ctz32x4: op = Pav_TRAILINGZEROCNTWORD; goto do_zerocnt;
6070       case Iop_Ctz64x2: op = Pav_TRAILINGZEROCNTDBL;  goto do_zerocnt;
6071       case Iop_PwBitMtxXpose64x2: op = Pav_BITMTXXPOSE;  goto do_zerocnt;
6072       do_zerocnt:
6073       {
6074         HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
6075         HReg dst = newVRegV(env);
6076         addInstr(env, PPCInstr_AvUnary(op, dst, arg));
6077         return dst;
6078       }
6079
6080       /* BCD Iops */
6081       case Iop_BCD128toI128S:
6082          {
6083             HReg dst  = newVRegV(env);
6084             HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
6085             addInstr(env, PPCInstr_AvUnary( Pav_BCD128toI128S, dst, arg ) );
6086             return dst;
6087          }
6088
6089       case Iop_MulI128by10:       op = Pav_MulI128by10;      goto do_MulI128;
6090       case Iop_MulI128by10Carry:  op = Pav_MulI128by10Carry; goto do_MulI128;
6091       do_MulI128: {
6092             HReg dst = newVRegV(env);
6093             HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
6094             addInstr(env, PPCInstr_AvUnary(op, dst, arg));
6095             return dst;
6096          }
6097
6098        case Iop_ReinterpI128asV128: {
6099           PPCAMode* am_addr;
6100           PPCAMode* am_addr4;
6101           HReg rHi, rLo;
6102           HReg dst  = newVRegV(env);
6103
6104           iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
6105
6106           sub_from_sp( env, 16 );     // Move SP down 16 bytes
6107           am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
6108           am_addr4 = advance4(env, am_addr);
6109
6110           // store the two 64-bit pars
6111           addInstr(env, PPCInstr_Store( 8, am_addr,  rHi, mode64 ));
6112           addInstr(env, PPCInstr_Store( 8, am_addr4, rLo, mode64 ));
6113
6114           // load as Ity_V128
6115           addInstr(env, PPCInstr_AvLdSt( True/*fetch*/, 16, dst, am_addr ));
6116
6117           add_to_sp( env, 16 );       // Reset SP
6118           return dst;
6119         }
6120
6121       default:
6122          break;
6123       } /* switch (e->Iex.Unop.op) */
6124    } /* if (e->tag == Iex_Unop) */
6125
6126    if (e->tag == Iex_Binop) {
6127       switch (e->Iex.Binop.op) {
6128
6129       case Iop_64HLtoV128: {
6130          if (!mode64) {
6131             HReg     r3, r2, r1, r0, r_aligned16;
6132             PPCAMode *am_off0, *am_off4, *am_off8, *am_off12;
6133             HReg     dst = newVRegV(env);
6134             /* do this via the stack (easy, convenient, etc) */
6135             sub_from_sp( env, 32 );        // Move SP down
6136
6137             // get a quadword aligned address within our stack space
6138             r_aligned16 = get_sp_aligned16( env );
6139             am_off0  = PPCAMode_IR( 0,  r_aligned16 );
6140             am_off4  = PPCAMode_IR( 4,  r_aligned16 );
6141             am_off8  = PPCAMode_IR( 8,  r_aligned16 );
6142             am_off12 = PPCAMode_IR( 12, r_aligned16 );
6143
6144             /* Do the less significant 64 bits */
6145             iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2, IEndianess);
6146             addInstr(env, PPCInstr_Store( 4, am_off12, r0, mode64 ));
6147             addInstr(env, PPCInstr_Store( 4, am_off8,  r1, mode64 ));
6148             /* Do the more significant 64 bits */
6149             iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1, IEndianess);
6150             addInstr(env, PPCInstr_Store( 4, am_off4, r2, mode64 ));
6151             addInstr(env, PPCInstr_Store( 4, am_off0, r3, mode64 ));
6152
6153             /* Fetch result back from stack. */
6154             addInstr(env, PPCInstr_AvLdSt(True/*ld*/, 16, dst, am_off0));
6155
6156             add_to_sp( env, 32 );          // Reset SP
6157             return dst;
6158          } else {
6159             HReg     rHi = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
6160             HReg     rLo = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
6161             HReg     dst = newVRegV(env);
6162             HReg     r_aligned16;
6163             PPCAMode *am_off0, *am_off8;
6164             /* do this via the stack (easy, convenient, etc) */
6165             sub_from_sp( env, 32 );        // Move SP down
6166
6167             // get a quadword aligned address within our stack space
6168             r_aligned16 = get_sp_aligned16( env );
6169             am_off0  = PPCAMode_IR( 0,  r_aligned16 );
6170             am_off8  = PPCAMode_IR( 8,  r_aligned16 );
6171
6172             /* Store 2*I64 to stack */
6173             if (IEndianess == Iend_LE) {
6174                addInstr(env, PPCInstr_Store( 8, am_off0, rLo, mode64 ));
6175                addInstr(env, PPCInstr_Store( 8, am_off8, rHi, mode64 ));
6176             } else {
6177                addInstr(env, PPCInstr_Store( 8, am_off0, rHi, mode64 ));
6178                addInstr(env, PPCInstr_Store( 8, am_off8, rLo, mode64 ));
6179             }
6180             /* Fetch result back from stack. */
6181             addInstr(env, PPCInstr_AvLdSt(True/*ld*/, 16, dst, am_off0));
6182
6183             add_to_sp( env, 32 );          // Reset SP
6184             return dst;
6185          }
6186       }
6187
6188       case Iop_Max32Fx4:   fpop = Pavfp_MAXF;   goto do_32Fx4;
6189       case Iop_Min32Fx4:   fpop = Pavfp_MINF;   goto do_32Fx4;
6190       case Iop_CmpEQ32Fx4: fpop = Pavfp_CMPEQF; goto do_32Fx4;
6191       case Iop_CmpGT32Fx4: fpop = Pavfp_CMPGTF; goto do_32Fx4;
6192       case Iop_CmpGE32Fx4: fpop = Pavfp_CMPGEF; goto do_32Fx4;
6193       do_32Fx4:
6194       {
6195          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6196          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6197          HReg dst = newVRegV(env);
6198          addInstr(env, PPCInstr_AvBin32Fx4(fpop, dst, argL, argR));
6199          return dst;
6200       }
6201
6202       case Iop_CmpLE32Fx4: {
6203          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6204          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6205          HReg dst = newVRegV(env);
6206
6207          /* stay consistent with native ppc compares:
6208             if a left/right lane holds a nan, return zeros for that lane
6209             so: le == NOT(gt OR isNan)
6210           */
6211          HReg isNanLR = newVRegV(env);
6212          HReg isNanL = isNan(env, argL, IEndianess);
6213          HReg isNanR = isNan(env, argR, IEndianess);
6214          addInstr(env, PPCInstr_AvBinary(Pav_OR, isNanLR,
6215                                          isNanL, isNanR));
6216
6217          addInstr(env, PPCInstr_AvBin32Fx4(Pavfp_CMPGTF, dst,
6218                                            argL, argR));
6219          addInstr(env, PPCInstr_AvBinary(Pav_OR, dst, dst, isNanLR));
6220          addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
6221          return dst;
6222       }
6223
6224       case Iop_AndV128:    op = Pav_AND;      goto do_AvBin;
6225       case Iop_OrV128:     op = Pav_OR;       goto do_AvBin;
6226       case Iop_XorV128:    op = Pav_XOR;      goto do_AvBin;
6227       do_AvBin: {
6228          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6229          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6230          HReg dst  = newVRegV(env);
6231          addInstr(env, PPCInstr_AvBinary(op, dst, arg1, arg2));
6232          return dst;
6233       }
6234
6235       case Iop_Shl8x16:    op = Pav_SHL;    goto do_AvBin8x16;
6236       case Iop_Shr8x16:    op = Pav_SHR;    goto do_AvBin8x16;
6237       case Iop_Sar8x16:    op = Pav_SAR;    goto do_AvBin8x16;
6238       case Iop_Rol8x16:    op = Pav_ROTL;   goto do_AvBin8x16;
6239       case Iop_InterleaveHI8x16: op = Pav_MRGHI;  goto do_AvBin8x16;
6240       case Iop_InterleaveLO8x16: op = Pav_MRGLO;  goto do_AvBin8x16;
6241       case Iop_Add8x16:    op = Pav_ADDU;   goto do_AvBin8x16;
6242       case Iop_QAdd8Ux16:  op = Pav_QADDU;  goto do_AvBin8x16;
6243       case Iop_QAdd8Sx16:  op = Pav_QADDS;  goto do_AvBin8x16;
6244       case Iop_Sub8x16:    op = Pav_SUBU;   goto do_AvBin8x16;
6245       case Iop_QSub8Ux16:  op = Pav_QSUBU;  goto do_AvBin8x16;
6246       case Iop_QSub8Sx16:  op = Pav_QSUBS;  goto do_AvBin8x16;
6247       case Iop_Avg8Ux16:   op = Pav_AVGU;   goto do_AvBin8x16;
6248       case Iop_Avg8Sx16:   op = Pav_AVGS;   goto do_AvBin8x16;
6249       case Iop_Max8Ux16:   op = Pav_MAXU;   goto do_AvBin8x16;
6250       case Iop_Max8Sx16:   op = Pav_MAXS;   goto do_AvBin8x16;
6251       case Iop_Min8Ux16:   op = Pav_MINU;   goto do_AvBin8x16;
6252       case Iop_Min8Sx16:   op = Pav_MINS;   goto do_AvBin8x16;
6253       case Iop_MullEven8Ux16: op = Pav_OMULU;  goto do_AvBin8x16;
6254       case Iop_MullEven8Sx16: op = Pav_OMULS;  goto do_AvBin8x16;
6255       case Iop_CmpEQ8x16:  op = Pav_CMPEQU; goto do_AvBin8x16;
6256       case Iop_CmpGT8Ux16: op = Pav_CMPGTU; goto do_AvBin8x16;
6257       case Iop_CmpGT8Sx16: op = Pav_CMPGTS; goto do_AvBin8x16;
6258       case Iop_PolynomialMulAdd8x16: op = Pav_POLYMULADD; goto do_AvBin8x16;
6259       do_AvBin8x16: {
6260          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6261          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6262          HReg dst  = newVRegV(env);
6263          addInstr(env, PPCInstr_AvBin8x16(op, dst, arg1, arg2));
6264          return dst;
6265       }
6266
6267       case Iop_Shl16x8:    op = Pav_SHL;    goto do_AvBin16x8;
6268       case Iop_Shr16x8:    op = Pav_SHR;    goto do_AvBin16x8;
6269       case Iop_Sar16x8:    op = Pav_SAR;    goto do_AvBin16x8;
6270       case Iop_Rol16x8:    op = Pav_ROTL;   goto do_AvBin16x8;
6271       case Iop_NarrowBin16to8x16:    op = Pav_PACKUU;  goto do_AvBin16x8;
6272       case Iop_QNarrowBin16Uto8Ux16: op = Pav_QPACKUU; goto do_AvBin16x8;
6273       case Iop_QNarrowBin16Sto8Sx16: op = Pav_QPACKSS; goto do_AvBin16x8;
6274       case Iop_InterleaveHI16x8:  op = Pav_MRGHI;  goto do_AvBin16x8;
6275       case Iop_InterleaveLO16x8:  op = Pav_MRGLO;  goto do_AvBin16x8;
6276       case Iop_Add16x8:    op = Pav_ADDU;   goto do_AvBin16x8;
6277       case Iop_QAdd16Ux8:  op = Pav_QADDU;  goto do_AvBin16x8;
6278       case Iop_QAdd16Sx8:  op = Pav_QADDS;  goto do_AvBin16x8;
6279       case Iop_Sub16x8:    op = Pav_SUBU;   goto do_AvBin16x8;
6280       case Iop_QSub16Ux8:  op = Pav_QSUBU;  goto do_AvBin16x8;
6281       case Iop_QSub16Sx8:  op = Pav_QSUBS;  goto do_AvBin16x8;
6282       case Iop_Avg16Ux8:   op = Pav_AVGU;   goto do_AvBin16x8;
6283       case Iop_Avg16Sx8:   op = Pav_AVGS;   goto do_AvBin16x8;
6284       case Iop_Max16Ux8:   op = Pav_MAXU;   goto do_AvBin16x8;
6285       case Iop_Max16Sx8:   op = Pav_MAXS;   goto do_AvBin16x8;
6286       case Iop_Min16Ux8:   op = Pav_MINU;   goto do_AvBin16x8;
6287       case Iop_Min16Sx8:   op = Pav_MINS;   goto do_AvBin16x8;
6288       case Iop_MullEven16Ux8: op = Pav_OMULU;  goto do_AvBin16x8;
6289       case Iop_MullEven16Sx8: op = Pav_OMULS;  goto do_AvBin16x8;
6290       case Iop_CmpEQ16x8:  op = Pav_CMPEQU; goto do_AvBin16x8;
6291       case Iop_CmpGT16Ux8: op = Pav_CMPGTU; goto do_AvBin16x8;
6292       case Iop_CmpGT16Sx8: op = Pav_CMPGTS; goto do_AvBin16x8;
6293       case Iop_PolynomialMulAdd16x8: op = Pav_POLYMULADD; goto do_AvBin16x8;
6294       do_AvBin16x8: {
6295          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6296          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6297          HReg dst  = newVRegV(env);
6298          addInstr(env, PPCInstr_AvBin16x8(op, dst, arg1, arg2));
6299          return dst;
6300       }
6301
6302       case Iop_Shl32x4:    op = Pav_SHL;    goto do_AvBin32x4;
6303       case Iop_Shr32x4:    op = Pav_SHR;    goto do_AvBin32x4;
6304       case Iop_Sar32x4:    op = Pav_SAR;    goto do_AvBin32x4;
6305       case Iop_Rol32x4:    op = Pav_ROTL;   goto do_AvBin32x4;
6306       case Iop_NarrowBin32to16x8:    op = Pav_PACKUU;  goto do_AvBin32x4;
6307       case Iop_QNarrowBin32Uto16Ux8: op = Pav_QPACKUU; goto do_AvBin32x4;
6308       case Iop_QNarrowBin32Sto16Sx8: op = Pav_QPACKSS; goto do_AvBin32x4;
6309       case Iop_InterleaveHI32x4:  op = Pav_MRGHI;  goto do_AvBin32x4;
6310       case Iop_InterleaveLO32x4:  op = Pav_MRGLO;  goto do_AvBin32x4;
6311       case Iop_Add32x4:    op = Pav_ADDU;   goto do_AvBin32x4;
6312       case Iop_QAdd32Ux4:  op = Pav_QADDU;  goto do_AvBin32x4;
6313       case Iop_QAdd32Sx4:  op = Pav_QADDS;  goto do_AvBin32x4;
6314       case Iop_Sub32x4:    op = Pav_SUBU;   goto do_AvBin32x4;
6315       case Iop_QSub32Ux4:  op = Pav_QSUBU;  goto do_AvBin32x4;
6316       case Iop_QSub32Sx4:  op = Pav_QSUBS;  goto do_AvBin32x4;
6317       case Iop_Avg32Ux4:   op = Pav_AVGU;   goto do_AvBin32x4;
6318       case Iop_Avg32Sx4:   op = Pav_AVGS;   goto do_AvBin32x4;
6319       case Iop_Max32Ux4:   op = Pav_MAXU;   goto do_AvBin32x4;
6320       case Iop_Max32Sx4:   op = Pav_MAXS;   goto do_AvBin32x4;
6321       case Iop_Min32Ux4:   op = Pav_MINU;   goto do_AvBin32x4;
6322       case Iop_Min32Sx4:   op = Pav_MINS;   goto do_AvBin32x4;
6323       case Iop_Mul32x4:    op = Pav_MULU;   goto do_AvBin32x4;
6324       case Iop_MullEven32Ux4: op = Pav_OMULU;  goto do_AvBin32x4;
6325       case Iop_MullEven32Sx4: op = Pav_OMULS;  goto do_AvBin32x4;
6326       case Iop_CmpEQ32x4:  op = Pav_CMPEQU; goto do_AvBin32x4;
6327       case Iop_CmpGT32Ux4: op = Pav_CMPGTU; goto do_AvBin32x4;
6328       case Iop_CmpGT32Sx4: op = Pav_CMPGTS; goto do_AvBin32x4;
6329       case Iop_CatOddLanes32x4:  op = Pav_CATODD;  goto do_AvBin32x4;
6330       case Iop_CatEvenLanes32x4: op = Pav_CATEVEN; goto do_AvBin32x4;
6331       case Iop_PolynomialMulAdd32x4: op = Pav_POLYMULADD; goto do_AvBin32x4;
6332       do_AvBin32x4: {
6333          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6334          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6335          HReg dst  = newVRegV(env);
6336          addInstr(env, PPCInstr_AvBin32x4(op, dst, arg1, arg2));
6337          return dst;
6338       }
6339
6340       case Iop_Shl64x2:    op = Pav_SHL;    goto do_AvBin64x2;
6341       case Iop_Shr64x2:    op = Pav_SHR;    goto do_AvBin64x2;
6342       case Iop_Sar64x2:    op = Pav_SAR;    goto do_AvBin64x2;
6343       case Iop_Rol64x2:    op = Pav_ROTL;   goto do_AvBin64x2;
6344       case Iop_NarrowBin64to32x4:    op = Pav_PACKUU;  goto do_AvBin64x2;
6345       case Iop_QNarrowBin64Sto32Sx4: op = Pav_QPACKSS; goto do_AvBin64x2;
6346       case Iop_QNarrowBin64Uto32Ux4: op = Pav_QPACKUU; goto do_AvBin64x2;
6347       case Iop_InterleaveHI64x2:  op = Pav_MRGHI;  goto do_AvBin64x2;
6348       case Iop_InterleaveLO64x2:  op = Pav_MRGLO;  goto do_AvBin64x2;
6349       case Iop_Add64x2:    op = Pav_ADDU;   goto do_AvBin64x2;
6350       case Iop_Sub64x2:    op = Pav_SUBU;   goto do_AvBin64x2;
6351       case Iop_Max64Ux2:   op = Pav_MAXU;   goto do_AvBin64x2;
6352       case Iop_Max64Sx2:   op = Pav_MAXS;   goto do_AvBin64x2;
6353       case Iop_Min64Ux2:   op = Pav_MINU;   goto do_AvBin64x2;
6354       case Iop_Min64Sx2:   op = Pav_MINS;   goto do_AvBin64x2;
6355       case Iop_CmpEQ64x2:  op = Pav_CMPEQU; goto do_AvBin64x2;
6356       case Iop_CmpGT64Ux2: op = Pav_CMPGTU; goto do_AvBin64x2;
6357       case Iop_CmpGT64Sx2: op = Pav_CMPGTS; goto do_AvBin64x2;
6358       case Iop_PolynomialMulAdd64x2: op = Pav_POLYMULADD; goto do_AvBin64x2;
6359       do_AvBin64x2: {
6360          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6361          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6362          HReg dst  = newVRegV(env);
6363          addInstr(env, PPCInstr_AvBin64x2(op, dst, arg1, arg2));
6364          return dst;
6365       }
6366
6367       case Iop_ShlN8x16: op = Pav_SHL; goto do_AvShift8x16;
6368       case Iop_SarN8x16: op = Pav_SAR; goto do_AvShift8x16;
6369       do_AvShift8x16: {
6370          HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6371          HReg dst    = newVRegV(env);
6372          HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
6373          addInstr(env, PPCInstr_AvBin8x16(op, dst, r_src, v_shft));
6374          return dst;
6375       }
6376
6377       case Iop_ShlN16x8: op = Pav_SHL; goto do_AvShift16x8;
6378       case Iop_ShrN16x8: op = Pav_SHR; goto do_AvShift16x8;
6379       case Iop_SarN16x8: op = Pav_SAR; goto do_AvShift16x8;
6380       do_AvShift16x8: {
6381          HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6382          HReg dst    = newVRegV(env);
6383          HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
6384          addInstr(env, PPCInstr_AvBin16x8(op, dst, r_src, v_shft));
6385          return dst;
6386       }
6387
6388       case Iop_ShlN32x4: op = Pav_SHL; goto do_AvShift32x4;
6389       case Iop_ShrN32x4: op = Pav_SHR; goto do_AvShift32x4;
6390       case Iop_SarN32x4: op = Pav_SAR; goto do_AvShift32x4;
6391       do_AvShift32x4: {
6392          HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6393          HReg dst    = newVRegV(env);
6394          HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
6395          addInstr(env, PPCInstr_AvBin32x4(op, dst, r_src, v_shft));
6396          return dst;
6397       }
6398
6399       case Iop_ShlN64x2: op = Pav_SHL; goto do_AvShift64x2;
6400       case Iop_ShrN64x2: op = Pav_SHR; goto do_AvShift64x2;
6401       case Iop_SarN64x2: op = Pav_SAR; goto do_AvShift64x2;
6402       do_AvShift64x2: {
6403          HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6404          HReg dst    = newVRegV(env);
6405          HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
6406          addInstr(env, PPCInstr_AvBin64x2(op, dst, r_src, v_shft));
6407          return dst;
6408       }
6409
6410       case Iop_ShrV128: op = Pav_SHR; goto do_AvShiftV128;
6411       case Iop_ShlV128: op = Pav_SHL; goto do_AvShiftV128;
6412       do_AvShiftV128: {
6413          HReg dst    = newVRegV(env);
6414          HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6415          HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
6416          /* Note: shift value gets masked by 127 */
6417          addInstr(env, PPCInstr_AvBinary(op, dst, r_src, v_shft));
6418          return dst;
6419       }
6420
6421       case Iop_Perm8x16: {
6422          HReg dst   = newVRegV(env);
6423          HReg v_src = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6424          HReg v_ctl = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6425          addInstr(env, PPCInstr_AvPerm(dst, v_src, v_src, v_ctl));
6426          return dst;
6427       }
6428
6429       case Iop_CipherV128:  op = Pav_CIPHERV128;   goto do_AvCipherV128;
6430       case Iop_CipherLV128: op = Pav_CIPHERLV128;  goto do_AvCipherV128;
6431       case Iop_NCipherV128: op = Pav_NCIPHERV128;  goto do_AvCipherV128;
6432       case Iop_NCipherLV128:op = Pav_NCIPHERLV128; goto do_AvCipherV128;
6433       do_AvCipherV128: {
6434          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6435          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6436          HReg dst  = newVRegV(env);
6437          addInstr(env, PPCInstr_AvCipherV128Binary(op, dst, arg1, arg2));
6438          return dst;
6439       }
6440
6441       case Iop_SHA256:op = Pav_SHA256; goto do_AvHashV128;
6442       case Iop_SHA512:op = Pav_SHA512; goto do_AvHashV128;
6443       do_AvHashV128: {
6444          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6445          HReg dst  = newVRegV(env);
6446          PPCRI* s_field = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
6447          addInstr(env, PPCInstr_AvHashV128Binary(op, dst, arg1, s_field));
6448          return dst;
6449       }
6450
6451       /* BCD Iops */
6452       case Iop_I128StoBCD128:
6453          {
6454             HReg dst = newVRegV(env);
6455             HReg arg = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6456             PPCRI* ps = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
6457
6458             addInstr(env, PPCInstr_AvBinaryInt( Pav_I128StoBCD128, dst, arg,
6459                                                 ps ) );
6460             return dst;
6461          }
6462
6463       case Iop_MulI128by10E:       op = Pav_MulI128by10E;      goto do_MulI128E;
6464       case Iop_MulI128by10ECarry:  op = Pav_MulI128by10ECarry; goto do_MulI128E;
6465       do_MulI128E: {
6466             HReg dst  = newVRegV(env);
6467             HReg argL = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6468             HReg argR = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6469             addInstr(env, PPCInstr_AvBinary(op, dst, argL, argR));
6470             return dst;
6471          }
6472
6473       case Iop_BCDAdd:op = Pav_BCDAdd; goto do_AvBCDV128;
6474       case Iop_BCDSub:op = Pav_BCDSub; goto do_AvBCDV128;
6475       do_AvBCDV128: {
6476          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6477          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6478          HReg dst  = newVRegV(env);
6479          addInstr(env, PPCInstr_AvBCDV128Binary(op, dst, arg1, arg2));
6480          return dst;
6481       }
6482
6483       case Iop_DivU128: opav128 = Pav_DivU128; goto do_IntArithBinaryI128;
6484       case Iop_DivS128: opav128 = Pav_DivS128; goto do_IntArithBinaryI128;
6485       case Iop_DivU128E: opav128 = Pav_DivU128E; goto do_IntArithBinaryI128;
6486       case Iop_DivS128E: opav128 = Pav_DivS128E; goto do_IntArithBinaryI128;
6487       case Iop_ModU128:  opav128 = Pav_ModU128;  goto do_IntArithBinaryI128;
6488       case Iop_ModS128:  opav128 = Pav_ModS128;  goto do_IntArithBinaryI128;
6489       do_IntArithBinaryI128: {
6490          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6491          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6492          HReg dst  = newVRegV(env);
6493          addInstr(env, PPCInstr_AvBinaryInt128(opav128, dst, arg1, arg2));
6494          return dst;
6495       }
6496
6497       default:
6498          break;
6499       } /* switch (e->Iex.Binop.op) */
6500    } /* if (e->tag == Iex_Binop) */
6501
6502    if (e->tag == Iex_Triop) {
6503       IRTriop *triop = e->Iex.Triop.details;
6504       switch (triop->op) {
6505       case Iop_Add32Fx4: fpop = Pavfp_ADDF; goto do_32Fx4_with_rm;
6506       case Iop_Sub32Fx4: fpop = Pavfp_SUBF; goto do_32Fx4_with_rm;
6507       case Iop_Mul32Fx4: fpop = Pavfp_MULF; goto do_32Fx4_with_rm;
6508       do_32Fx4_with_rm:
6509       {
6510          HReg argL = iselVecExpr(env, triop->arg2, IEndianess);
6511          HReg argR = iselVecExpr(env, triop->arg3, IEndianess);
6512          HReg dst  = newVRegV(env);
6513          /* FIXME: this is bogus, in the sense that Altivec ignores
6514             FPSCR.RM, at least for some FP operations.  So setting the
6515             RM is pointless.  This is only really correct in the case
6516             where the RM is known, at JIT time, to be Irrm_NEAREST,
6517             since -- at least for Altivec FP add/sub/mul -- the
6518             emitted insn is hardwired to round to nearest. */
6519          set_FPU_rounding_mode(env, triop->arg1, IEndianess);
6520          addInstr(env, PPCInstr_AvBin32Fx4(fpop, dst, argL, argR));
6521          return dst;
6522       }
6523
6524       case Iop_2xMultU64Add128CarryOut:
6525          optri128 = Pav_2xMultU64Add128CarryOut;  goto do_IntArithTrinaryI128;
6526       do_IntArithTrinaryI128: {
6527          HReg arg1 = iselVecExpr(env, triop->arg1, IEndianess);
6528          HReg arg2 = iselVecExpr(env, triop->arg2, IEndianess);
6529          HReg arg3 = iselVecExpr(env, triop->arg3, IEndianess);
6530          HReg dst  = newVRegV(env);
6531          addInstr(env, PPCInstr_AvTernaryInt128(optri128, dst, arg1, arg2,
6532                                                 arg3));
6533          return dst;
6534       }
6535
6536       default:
6537          break;
6538       } /* switch (e->Iex.Triop.op) */
6539    } /* if (e->tag == Iex_Trinop) */
6540
6541
6542    if (e->tag == Iex_Const ) {
6543       vassert(e->Iex.Const.con->tag == Ico_V128);
6544       if (e->Iex.Const.con->Ico.V128 == 0x0000) {
6545          return generate_zeroes_V128(env);
6546       }
6547       else if (e->Iex.Const.con->Ico.V128 == 0xffff) {
6548          return generate_ones_V128(env);
6549       }
6550    }
6551
6552    vex_printf("iselVecExpr(ppc) (subarch = %s): can't reduce\n",
6553               LibVEX_ppVexHwCaps(mode64 ? VexArchPPC64 : VexArchPPC32,
6554                                  env->hwcaps));
6555    ppIRExpr(e);
6556    vpanic("iselVecExpr_wrk(ppc)");
6557 }
6558
6559
6560 /*---------------------------------------------------------*/
6561 /*--- ISEL: Statements                                  ---*/
6562 /*---------------------------------------------------------*/
6563
6564 static void iselStmt ( ISelEnv* env, IRStmt* stmt, IREndness IEndianess )
6565 {
6566    Bool mode64 = env->mode64;
6567    if (vex_traceflags & VEX_TRACE_VCODE) {
6568       vex_printf("\n -- ");
6569       ppIRStmt(stmt);
6570       vex_printf("\n");
6571    }
6572
6573    switch (stmt->tag) {
6574
6575    /* --------- STORE --------- */
6576    case Ist_Store: {
6577       IRType    tya   = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
6578       IRType    tyd   = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
6579       IREndness end   = stmt->Ist.Store.end;
6580
6581       if (end != IEndianess)
6582          goto stmt_fail;
6583       if (!mode64 && (tya != Ity_I32))
6584          goto stmt_fail;
6585       if (mode64 && (tya != Ity_I64))
6586          goto stmt_fail;
6587
6588       if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32 ||
6589           (mode64 && (tyd == Ity_I64))) {
6590          PPCAMode* am_addr
6591             = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6592                                  IEndianess);
6593          HReg r_src = iselWordExpr_R(env, stmt->Ist.Store.data, IEndianess);
6594          addInstr(env, PPCInstr_Store( toUChar(sizeofIRType(tyd)),
6595                                        am_addr, r_src, mode64 ));
6596          return;
6597       }
6598       if (tyd == Ity_F64) {
6599          PPCAMode* am_addr
6600             = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6601                                  IEndianess);
6602          HReg fr_src = iselDblExpr(env, stmt->Ist.Store.data, IEndianess);
6603          addInstr(env,
6604                   PPCInstr_FpLdSt(False/*store*/, 8, fr_src, am_addr));
6605          return;
6606       }
6607       if (tyd == Ity_F32) {
6608          PPCAMode* am_addr
6609             = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6610                                  IEndianess);
6611          HReg fr_src = iselFltExpr(env, stmt->Ist.Store.data, IEndianess);
6612          addInstr(env,
6613                   PPCInstr_FpLdSt(False/*store*/, 4, fr_src, am_addr));
6614          return;
6615       }
6616       if (tyd == Ity_D64) {
6617          PPCAMode* am_addr
6618             = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6619                                  IEndianess);
6620          HReg fr_src = iselDfp64Expr(env, stmt->Ist.Store.data, IEndianess);
6621          addInstr(env,
6622                   PPCInstr_FpLdSt(False/*store*/, 8, fr_src, am_addr));
6623          return;
6624       }
6625       if (tyd == Ity_D32) {
6626          PPCAMode* am_addr
6627             = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6628                                  IEndianess);
6629          HReg fr_src = iselDfp32Expr(env, stmt->Ist.Store.data, IEndianess);
6630          addInstr(env,
6631                   PPCInstr_FpLdSt(False/*store*/, 4, fr_src, am_addr));
6632          return;
6633       }
6634       if (tyd == Ity_V128) {
6635          PPCAMode* am_addr
6636             = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6637                                  IEndianess);
6638          HReg v_src = iselVecExpr(env, stmt->Ist.Store.data, IEndianess);
6639          addInstr(env,
6640                   PPCInstr_AvLdSt(False/*store*/, 16, v_src, am_addr));
6641          return;
6642       }
6643       if (tyd == Ity_I64 && !mode64) {
6644          /* Just calculate the address in the register.  Life is too
6645             short to arse around trying and possibly failing to adjust
6646             the offset in a 'reg+offset' style amode. */
6647          HReg rHi32, rLo32;
6648          HReg r_addr = iselWordExpr_R(env, stmt->Ist.Store.addr, IEndianess);
6649          iselInt64Expr( &rHi32, &rLo32, env, stmt->Ist.Store.data,
6650                         IEndianess );
6651          addInstr(env, PPCInstr_Store( 4/*byte-store*/,
6652                                        PPCAMode_IR( 0, r_addr ),
6653                                        rHi32,
6654                                        False/*32-bit insn please*/) );
6655          addInstr(env, PPCInstr_Store( 4/*byte-store*/,
6656                                        PPCAMode_IR( 4, r_addr ),
6657                                        rLo32,
6658                                        False/*32-bit insn please*/) );
6659          return;
6660       }
6661       break;
6662    }
6663
6664    /* --------- PUT --------- */
6665    case Ist_Put: {
6666       IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
6667       if (ty == Ity_I8  || ty == Ity_I16 ||
6668           ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
6669          HReg r_src = iselWordExpr_R(env, stmt->Ist.Put.data, IEndianess);
6670          PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6671                                           GuestStatePtr(mode64) );
6672          addInstr(env, PPCInstr_Store( toUChar(sizeofIRType(ty)),
6673                                        am_addr, r_src, mode64 ));
6674          return;
6675       }
6676       if (!mode64 && ty == Ity_I64) {
6677          HReg rHi, rLo;
6678          PPCAMode* am_addr  = PPCAMode_IR( stmt->Ist.Put.offset,
6679                                            GuestStatePtr(mode64) );
6680          PPCAMode* am_addr4 = advance4(env, am_addr);
6681          iselInt64Expr(&rHi,&rLo, env, stmt->Ist.Put.data, IEndianess);
6682          addInstr(env, PPCInstr_Store( 4, am_addr,  rHi, mode64 ));
6683          addInstr(env, PPCInstr_Store( 4, am_addr4, rLo, mode64 ));
6684          return;
6685       }
6686       if (ty == Ity_I128) {
6687          HReg rHi, rLo;
6688          PPCAMode* am_addr  = PPCAMode_IR( stmt->Ist.Put.offset,
6689                                            GuestStatePtr(mode64) );
6690          PPCAMode* am_addr4 = advance4(env, am_addr);
6691
6692          iselInt128Expr(&rHi,&rLo, env, stmt->Ist.Put.data, IEndianess);
6693          addInstr(env, PPCInstr_Store( 4, am_addr,  rHi, mode64 ));
6694          addInstr(env, PPCInstr_Store( 4, am_addr4, rLo, mode64 ));
6695          return;
6696       }
6697       if (ty == Ity_F128) {
6698          /* Guest state vectors are 16byte aligned,
6699             so don't need to worry here */
6700          HReg v_src = iselFp128Expr(env, stmt->Ist.Put.data, IEndianess);
6701
6702          PPCAMode* am_addr  = PPCAMode_IR( stmt->Ist.Put.offset,
6703                                            GuestStatePtr(mode64) );
6704          addInstr(env,
6705                   PPCInstr_AvLdSt(False/*store*/, 16, v_src, am_addr));
6706          return;
6707       }
6708       if (ty == Ity_V128) {
6709          /* Guest state vectors are 16byte aligned,
6710             so don't need to worry here */
6711          HReg v_src = iselVecExpr(env, stmt->Ist.Put.data, IEndianess);
6712          PPCAMode* am_addr  = PPCAMode_IR( stmt->Ist.Put.offset,
6713                                            GuestStatePtr(mode64) );
6714          addInstr(env,
6715                   PPCInstr_AvLdSt(False/*store*/, 16, v_src, am_addr));
6716          return;
6717       }
6718       if (ty == Ity_F64) {
6719          HReg fr_src = iselDblExpr(env, stmt->Ist.Put.data, IEndianess);
6720          PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6721                                           GuestStatePtr(mode64) );
6722          addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
6723                                         fr_src, am_addr ));
6724          return;
6725       }
6726       if (ty == Ity_D32) {
6727          /* The 32-bit value is stored in a 64-bit register */
6728          HReg fr_src = iselDfp32Expr( env, stmt->Ist.Put.data, IEndianess );
6729          PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6730                                           GuestStatePtr(mode64) );
6731          addInstr( env, PPCInstr_FpLdSt( False/*store*/, 8,
6732                                          fr_src, am_addr ) );
6733          return;
6734       }
6735       if (ty == Ity_D64) {
6736          HReg fr_src = iselDfp64Expr( env, stmt->Ist.Put.data, IEndianess );
6737          PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6738                                           GuestStatePtr(mode64) );
6739          addInstr( env, PPCInstr_FpLdSt( False/*store*/, 8, fr_src, am_addr ) );
6740          return;
6741       }
6742       break;
6743    }
6744
6745    /* --------- Indexed PUT --------- */
6746    case Ist_PutI: {
6747       IRPutI *puti = stmt->Ist.PutI.details;
6748
6749       PPCAMode* dst_am
6750          = genGuestArrayOffset(
6751               env, puti->descr,
6752               puti->ix, puti->bias,
6753               IEndianess );
6754       IRType ty = typeOfIRExpr(env->type_env, puti->data);
6755       if (mode64 && ty == Ity_I64) {
6756          HReg r_src = iselWordExpr_R(env, puti->data, IEndianess);
6757          addInstr(env, PPCInstr_Store( toUChar(8),
6758                                        dst_am, r_src, mode64 ));
6759          return;
6760       }
6761       if ((!mode64) && ty == Ity_I32) {
6762          HReg r_src = iselWordExpr_R(env, puti->data, IEndianess);
6763          addInstr(env, PPCInstr_Store( toUChar(4),
6764                                        dst_am, r_src, mode64 ));
6765          return;
6766       }
6767       break;
6768    }
6769
6770    /* --------- TMP --------- */
6771    case Ist_WrTmp: {
6772       IRTemp tmp = stmt->Ist.WrTmp.tmp;
6773       IRType ty = typeOfIRTemp(env->type_env, tmp);
6774       if (ty == Ity_I8  || ty == Ity_I16 ||
6775           ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
6776          HReg r_dst = lookupIRTemp(env, tmp);
6777          HReg r_src = iselWordExpr_R(env, stmt->Ist.WrTmp.data, IEndianess);
6778          addInstr(env, mk_iMOVds_RR( r_dst, r_src ));
6779          return;
6780       }
6781       if (!mode64 && ty == Ity_I64) {
6782          HReg r_srcHi, r_srcLo, r_dstHi, r_dstLo;
6783
6784          iselInt64Expr(&r_srcHi,&r_srcLo, env, stmt->Ist.WrTmp.data,
6785                        IEndianess);
6786          lookupIRTempPair( &r_dstHi, &r_dstLo, env, tmp);
6787          addInstr(env, mk_iMOVds_RR(r_dstHi, r_srcHi) );
6788          addInstr(env, mk_iMOVds_RR(r_dstLo, r_srcLo) );
6789          return;
6790       }
6791       if (mode64 && ty == Ity_I128) {
6792          HReg r_srcHi, r_srcLo, r_dstHi, r_dstLo;
6793          iselInt128Expr(&r_srcHi,&r_srcLo, env, stmt->Ist.WrTmp.data,
6794                         IEndianess);
6795          lookupIRTempPair( &r_dstHi, &r_dstLo, env, tmp);
6796          addInstr(env, mk_iMOVds_RR(r_dstHi, r_srcHi) );
6797          addInstr(env, mk_iMOVds_RR(r_dstLo, r_srcLo) );
6798          return;
6799       }
6800       if (!mode64 && ty == Ity_I128) {
6801          HReg r_srcHi    = INVALID_HREG;
6802          HReg r_srcMedHi = INVALID_HREG;
6803          HReg r_srcMedLo = INVALID_HREG;
6804          HReg r_srcLo    = INVALID_HREG;
6805          HReg r_dstHi, r_dstMedHi, r_dstMedLo, r_dstLo;
6806
6807          iselInt128Expr_to_32x4(&r_srcHi, &r_srcMedHi,
6808                                 &r_srcMedLo, &r_srcLo,
6809                                 env, stmt->Ist.WrTmp.data, IEndianess);
6810
6811          lookupIRTempQuad( &r_dstHi, &r_dstMedHi, &r_dstMedLo,
6812                            &r_dstLo, env, tmp);
6813
6814          addInstr(env, mk_iMOVds_RR(r_dstHi,    r_srcHi) );
6815          addInstr(env, mk_iMOVds_RR(r_dstMedHi, r_srcMedHi) );
6816          addInstr(env, mk_iMOVds_RR(r_dstMedLo, r_srcMedLo) );
6817          addInstr(env, mk_iMOVds_RR(r_dstLo,    r_srcLo) );
6818          return;
6819       }
6820       if (ty == Ity_I1) {
6821          PPCCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data,
6822                                          IEndianess);
6823          HReg r_dst = lookupIRTemp(env, tmp);
6824          addInstr(env, PPCInstr_Set(cond, r_dst));
6825          return;
6826       }
6827       if (ty == Ity_F64) {
6828          HReg fr_dst = lookupIRTemp(env, tmp);
6829          HReg fr_src = iselDblExpr(env, stmt->Ist.WrTmp.data, IEndianess);
6830          addInstr(env, PPCInstr_FpUnary(Pfp_MOV, fr_dst, fr_src));
6831          return;
6832       }
6833       if (ty == Ity_F32) {
6834          HReg fr_dst = lookupIRTemp(env, tmp);
6835          HReg fr_src = iselFltExpr(env, stmt->Ist.WrTmp.data, IEndianess);
6836          addInstr(env, PPCInstr_FpUnary(Pfp_MOV, fr_dst, fr_src));
6837          return;
6838       }
6839       if (ty == Ity_D32) {
6840          HReg fr_dst = lookupIRTemp(env, tmp);
6841          HReg fr_src = iselDfp32Expr(env, stmt->Ist.WrTmp.data, IEndianess);
6842          addInstr(env, PPCInstr_Dfp64Unary(Pfp_MOV, fr_dst, fr_src));
6843          return;
6844       }
6845       if (ty == Ity_F128) {
6846          HReg v_dst = lookupIRTemp(env, tmp);
6847          HReg v_src = iselFp128Expr(env, stmt->Ist.WrTmp.data, IEndianess);
6848          addInstr(env, PPCInstr_AvUnary(Pav_MOV, v_dst, v_src));
6849          return;
6850       }
6851       if (ty == Ity_V128) {
6852          HReg v_dst = lookupIRTemp(env, tmp);
6853          HReg v_src = iselVecExpr(env, stmt->Ist.WrTmp.data, IEndianess);
6854          addInstr(env, PPCInstr_AvUnary(Pav_MOV, v_dst, v_src));
6855          return;
6856       }
6857       if (ty == Ity_D64) {
6858          HReg fr_dst = lookupIRTemp( env, tmp );
6859          HReg fr_src = iselDfp64Expr( env, stmt->Ist.WrTmp.data, IEndianess );
6860          addInstr( env, PPCInstr_Dfp64Unary( Pfp_MOV, fr_dst, fr_src ) );
6861          return;
6862       }
6863       if (ty == Ity_D128) {
6864          HReg fr_srcHi, fr_srcLo, fr_dstHi, fr_dstLo;
6865          //         lookupDfp128IRTempPair( &fr_dstHi, &fr_dstLo, env, tmp );
6866          lookupIRTempPair( &fr_dstHi, &fr_dstLo, env, tmp );
6867          iselDfp128Expr( &fr_srcHi, &fr_srcLo, env, stmt->Ist.WrTmp.data,
6868                          IEndianess );
6869          addInstr( env, PPCInstr_Dfp64Unary( Pfp_MOV, fr_dstHi, fr_srcHi ) );
6870          addInstr( env, PPCInstr_Dfp64Unary( Pfp_MOV, fr_dstLo, fr_srcLo ) );
6871          return;
6872       }
6873       break;
6874    }
6875
6876    /* --------- Load Linked or Store Conditional --------- */
6877    case Ist_LLSC: {
6878       IRTemp res    = stmt->Ist.LLSC.result;
6879       IRType tyRes  = typeOfIRTemp(env->type_env, res);
6880       IRType tyAddr = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.addr);
6881
6882       if (stmt->Ist.LLSC.end != IEndianess)
6883          goto stmt_fail;
6884       if (!mode64 && (tyAddr != Ity_I32))
6885          goto stmt_fail;
6886       if (mode64 && (tyAddr != Ity_I64))
6887          goto stmt_fail;
6888
6889       if (stmt->Ist.LLSC.storedata == NULL) {
6890          /* LL */
6891          HReg r_addr = iselWordExpr_R( env, stmt->Ist.LLSC.addr, IEndianess );
6892          HReg r_dst  = lookupIRTemp(env, res);
6893          if (tyRes == Ity_I8) {
6894             addInstr(env, PPCInstr_LoadL( 1, r_dst, r_addr, mode64 ));
6895             return;
6896          }
6897          if (tyRes == Ity_I16) {
6898             addInstr(env, PPCInstr_LoadL( 2, r_dst, r_addr, mode64 ));
6899             return;
6900          }
6901          if (tyRes == Ity_I32) {
6902             addInstr(env, PPCInstr_LoadL( 4, r_dst, r_addr, mode64 ));
6903             return;
6904          }
6905          if (tyRes == Ity_I64 && mode64) {
6906             addInstr(env, PPCInstr_LoadL( 8, r_dst, r_addr, mode64 ));
6907             return;
6908          }
6909          /* fallthru */;
6910       } else {
6911          /* SC */
6912          HReg   r_res  = lookupIRTemp(env, res); /* :: Ity_I1 */
6913          HReg   r_a    = iselWordExpr_R(env, stmt->Ist.LLSC.addr, IEndianess);
6914          HReg   r_src  = iselWordExpr_R(env, stmt->Ist.LLSC.storedata,
6915                                         IEndianess);
6916          HReg   r_tmp  = newVRegI(env);
6917          IRType tyData = typeOfIRExpr(env->type_env,
6918                                       stmt->Ist.LLSC.storedata);
6919          vassert(tyRes == Ity_I1);
6920          if (tyData == Ity_I8 || tyData == Ity_I16 || tyData == Ity_I32 ||
6921             (tyData == Ity_I64 && mode64)) {
6922             int size = 0;
6923
6924             if (tyData == Ity_I64)
6925                size = 8;
6926             else if (tyData == Ity_I32)
6927                size = 4;
6928             else if (tyData == Ity_I16)
6929                size = 2;
6930             else if (tyData == Ity_I8)
6931                size = 1;
6932
6933             addInstr(env, PPCInstr_StoreC( size,
6934                                            r_a, r_src, mode64 ));
6935             addInstr(env, PPCInstr_MfCR( r_tmp ));
6936             addInstr(env, PPCInstr_Shft(
6937                              Pshft_SHR,
6938                              env->mode64 ? False : True
6939                                 /*F:64-bit, T:32-bit shift*/,
6940                              r_tmp, r_tmp,
6941                              PPCRH_Imm(False/*unsigned*/, 29)));
6942             /* Probably unnecessary, since the IR dest type is Ity_I1,
6943                and so we are entitled to leave whatever junk we like
6944                drifting round in the upper 31 or 63 bits of r_res.
6945                However, for the sake of conservativeness .. */
6946             addInstr(env, PPCInstr_Alu(
6947                              Palu_AND,
6948                              r_res, r_tmp,
6949                              PPCRH_Imm(False/*signed*/, 1)));
6950             return;
6951          }
6952          /* fallthru */
6953       }
6954       goto stmt_fail;
6955       /*NOTREACHED*/
6956    }
6957
6958    /* --------- Call to DIRTY helper --------- */
6959    case Ist_Dirty: {
6960       IRDirty* d = stmt->Ist.Dirty.details;
6961
6962       /* Figure out the return type, if any. */
6963       IRType retty = Ity_INVALID;
6964       if (d->tmp != IRTemp_INVALID)
6965          retty = typeOfIRTemp(env->type_env, d->tmp);
6966
6967       /* Throw out any return types we don't know about.  The set of
6968          acceptable return types is the same in both 32- and 64-bit
6969          mode, so we don't need to inspect mode64 to make a
6970          decision. */
6971       Bool retty_ok = False;
6972       switch (retty) {
6973          case Ity_INVALID: /* function doesn't return anything */
6974          case Ity_V128:
6975          case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
6976             retty_ok = True; break;
6977          default:
6978             break;
6979       }
6980       if (!retty_ok)
6981          break; /* will go to stmt_fail: */
6982
6983       /* Marshal args, do the call, clear stack, set the return value
6984          to 0x555..555 if this is a conditional call that returns a
6985          value and the call is skipped. */
6986       UInt   addToSp = 0;
6987       RetLoc rloc    = mk_RetLoc_INVALID();
6988       doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args,
6989                     IEndianess );
6990       vassert(is_sane_RetLoc(rloc));
6991
6992       /* Now figure out what to do with the returned value, if any. */
6993       switch (retty) {
6994          case Ity_INVALID: {
6995             /* No return value.  Nothing to do. */
6996             vassert(d->tmp == IRTemp_INVALID);
6997             vassert(rloc.pri == RLPri_None);
6998             vassert(addToSp == 0);
6999             return;
7000          }
7001          case Ity_I32: case Ity_I16: case Ity_I8: {
7002             /* The returned value is in %r3.  Park it in the register
7003                associated with tmp. */
7004             HReg r_dst = lookupIRTemp(env, d->tmp);
7005             addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
7006             vassert(rloc.pri == RLPri_Int);
7007             vassert(addToSp == 0);
7008             return;
7009          }
7010          case Ity_I64:
7011             if (mode64) {
7012                /* The returned value is in %r3.  Park it in the register
7013                   associated with tmp. */
7014                HReg r_dst = lookupIRTemp(env, d->tmp);
7015                addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
7016                vassert(rloc.pri == RLPri_Int);
7017                vassert(addToSp == 0);
7018             } else {
7019                /* The returned value is in %r3:%r4.  Park it in the
7020                   register-pair associated with tmp. */
7021                HReg r_dstHi = INVALID_HREG;
7022                HReg r_dstLo = INVALID_HREG;
7023                lookupIRTempPair( &r_dstHi, &r_dstLo, env, d->tmp);
7024                addInstr(env, mk_iMOVds_RR(r_dstHi, hregPPC_GPR3(mode64)));
7025                addInstr(env, mk_iMOVds_RR(r_dstLo, hregPPC_GPR4(mode64)));
7026                vassert(rloc.pri == RLPri_2Int);
7027                vassert(addToSp == 0);
7028             }
7029             return;
7030          case Ity_V128: {
7031             /* The returned value is on the stack, and *retloc tells
7032                us where.  Fish it off the stack and then move the
7033                stack pointer upwards to clear it, as directed by
7034                doHelperCall. */
7035             vassert(rloc.pri == RLPri_V128SpRel);
7036             vassert(addToSp >= 16);
7037             HReg      dst = lookupIRTemp(env, d->tmp);
7038             PPCAMode* am  = PPCAMode_IR(rloc.spOff, StackFramePtr(mode64));
7039             addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, dst, am ));
7040             add_to_sp(env, addToSp);
7041             return;
7042          }
7043          default:
7044             /*NOTREACHED*/
7045             vassert(0);
7046       }
7047    }
7048
7049    /* --------- MEM FENCE --------- */
7050    case Ist_MBE:
7051       switch (stmt->Ist.MBE.event) {
7052          case Imbe_Fence:
7053             addInstr(env, PPCInstr_MFence());
7054             return;
7055          default:
7056             break;
7057       }
7058       break;
7059
7060    /* --------- INSTR MARK --------- */
7061    /* Doesn't generate any executable code ... */
7062    case Ist_IMark:
7063        return;
7064
7065    /* --------- ABI HINT --------- */
7066    /* These have no meaning (denotation in the IR) and so we ignore
7067       them ... if any actually made it this far. */
7068    case Ist_AbiHint:
7069        return;
7070
7071    /* --------- NO-OP --------- */
7072    /* Fairly self-explanatory, wouldn't you say? */
7073    case Ist_NoOp:
7074        return;
7075
7076    /* --------- EXIT --------- */
7077    case Ist_Exit: {
7078       IRConst* dst = stmt->Ist.Exit.dst;
7079       if (!mode64 && dst->tag != Ico_U32)
7080          vpanic("iselStmt(ppc): Ist_Exit: dst is not a 32-bit value");
7081       if (mode64 && dst->tag != Ico_U64)
7082          vpanic("iselStmt(ppc64): Ist_Exit: dst is not a 64-bit value");
7083
7084       PPCCondCode cc    = iselCondCode(env, stmt->Ist.Exit.guard, IEndianess);
7085       PPCAMode*   amCIA = PPCAMode_IR(stmt->Ist.Exit.offsIP,
7086                                       hregPPC_GPR31(mode64));
7087
7088       /* Case: boring transfer to known address */
7089       if (stmt->Ist.Exit.jk == Ijk_Boring
7090           || stmt->Ist.Exit.jk == Ijk_Call
7091           /* || stmt->Ist.Exit.jk == Ijk_Ret */) {
7092          if (env->chainingAllowed) {
7093             /* .. almost always true .. */
7094             /* Skip the event check at the dst if this is a forwards
7095                edge. */
7096             Bool toFastEP
7097                = mode64
7098                ? (((Addr64)stmt->Ist.Exit.dst->Ico.U64) > (Addr64)env->max_ga)
7099                : (((Addr32)stmt->Ist.Exit.dst->Ico.U32) > (Addr32)env->max_ga);
7100             if (0) vex_printf("%s", toFastEP ? "Y" : ",");
7101             addInstr(env, PPCInstr_XDirect(
7102                              mode64 ? (Addr64)stmt->Ist.Exit.dst->Ico.U64
7103                                     : (Addr64)stmt->Ist.Exit.dst->Ico.U32,
7104                              amCIA, cc, toFastEP));
7105          } else {
7106             /* .. very occasionally .. */
7107             /* We can't use chaining, so ask for an assisted transfer,
7108                as that's the only alternative that is allowable. */
7109             HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst),
7110                                     IEndianess);
7111             addInstr(env, PPCInstr_XAssisted(r, amCIA, cc, Ijk_Boring));
7112          }
7113          return;
7114       }
7115
7116       /* Case: assisted transfer to arbitrary address */
7117       switch (stmt->Ist.Exit.jk) {
7118          /* Keep this list in sync with that in iselNext below */
7119          case Ijk_ClientReq:
7120          case Ijk_EmFail:
7121          case Ijk_EmWarn:
7122          case Ijk_NoDecode:
7123          case Ijk_NoRedir:
7124          case Ijk_SigBUS:
7125          case Ijk_SigTRAP:
7126          case Ijk_Sys_syscall:
7127          case Ijk_InvalICache:
7128          {
7129             HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst),
7130                                     IEndianess);
7131             addInstr(env, PPCInstr_XAssisted(r, amCIA, cc,
7132                                              stmt->Ist.Exit.jk));
7133             return;
7134          }
7135          default:
7136             break;
7137       }
7138
7139       /* Do we ever expect to see any other kind? */
7140       goto stmt_fail;
7141    }
7142
7143    default: break;
7144    }
7145   stmt_fail:
7146    ppIRStmt(stmt);
7147    vpanic("iselStmt(ppc)");
7148 }
7149
7150
7151 /*---------------------------------------------------------*/
7152 /*--- ISEL: Basic block terminators (Nexts)             ---*/
7153 /*---------------------------------------------------------*/
7154
7155 static void iselNext ( ISelEnv* env,
7156                        IRExpr* next, IRJumpKind jk, Int offsIP,
7157                        IREndness IEndianess)
7158 {
7159    if (vex_traceflags & VEX_TRACE_VCODE) {
7160       vex_printf( "\n-- PUT(%d) = ", offsIP);
7161       ppIRExpr( next );
7162       vex_printf( "; exit-");
7163       ppIRJumpKind(jk);
7164       vex_printf( "\n");
7165    }
7166
7167    PPCCondCode always = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
7168
7169    /* Case: boring transfer to known address */
7170    if (next->tag == Iex_Const) {
7171       IRConst* cdst = next->Iex.Const.con;
7172       vassert(cdst->tag == (env->mode64 ? Ico_U64 :Ico_U32));
7173       if (jk == Ijk_Boring || jk == Ijk_Call) {
7174          /* Boring transfer to known address */
7175          PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
7176          if (env->chainingAllowed) {
7177             /* .. almost always true .. */
7178             /* Skip the event check at the dst if this is a forwards
7179                edge. */
7180             Bool toFastEP
7181                = env->mode64
7182                ? (((Addr64)cdst->Ico.U64) > (Addr64)env->max_ga)
7183                : (((Addr32)cdst->Ico.U32) > (Addr32)env->max_ga);
7184             if (0) vex_printf("%s", toFastEP ? "X" : ".");
7185             addInstr(env, PPCInstr_XDirect(
7186                              env->mode64 ? (Addr64)cdst->Ico.U64
7187                                          : (Addr64)cdst->Ico.U32,
7188                              amCIA, always, toFastEP));
7189          } else {
7190             /* .. very occasionally .. */
7191             /* We can't use chaining, so ask for an assisted transfer,
7192                as that's the only alternative that is allowable. */
7193             HReg r = iselWordExpr_R(env, next, IEndianess);
7194             addInstr(env, PPCInstr_XAssisted(r, amCIA, always,
7195                                              Ijk_Boring));
7196          }
7197          return;
7198       }
7199    }
7200
7201    /* Case: call/return (==boring) transfer to any address */
7202    switch (jk) {
7203       case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
7204          HReg       r     = iselWordExpr_R(env, next, IEndianess);
7205          PPCAMode*  amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
7206          if (env->chainingAllowed) {
7207             addInstr(env, PPCInstr_XIndir(r, amCIA, always));
7208          } else {
7209             addInstr(env, PPCInstr_XAssisted(r, amCIA, always,
7210                                              Ijk_Boring));
7211          }
7212          return;
7213       }
7214       default:
7215          break;
7216    }
7217
7218    /* Case: assisted transfer to arbitrary address */
7219    switch (jk) {
7220       /* Keep this list in sync with that for Ist_Exit above */
7221       case Ijk_ClientReq:
7222       case Ijk_EmFail:
7223       case Ijk_EmWarn:
7224       case Ijk_NoDecode:
7225       case Ijk_NoRedir:
7226       case Ijk_SigBUS:
7227       case Ijk_SigTRAP:
7228       case Ijk_Sys_syscall:
7229       case Ijk_InvalICache:
7230       {
7231          HReg      r     = iselWordExpr_R(env, next, IEndianess);
7232          PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
7233          addInstr(env, PPCInstr_XAssisted(r, amCIA, always, jk));
7234          return;
7235       }
7236       default:
7237          break;
7238    }
7239
7240    vex_printf( "\n-- PUT(%d) = ", offsIP);
7241    ppIRExpr( next );
7242    vex_printf( "; exit-");
7243    ppIRJumpKind(jk);
7244    vex_printf( "\n");
7245    vassert(0); // are we expecting any other kind?
7246 }
7247
7248
7249 /*---------------------------------------------------------*/
7250 /*--- Insn selector top-level                           ---*/
7251 /*---------------------------------------------------------*/
7252
7253 /* Translate an entire SB to ppc code. */
7254 HInstrArray* iselSB_PPC ( const IRSB* bb,
7255                           VexArch      arch_host,
7256                           const VexArchInfo* archinfo_host,
7257                           const VexAbiInfo*  vbi,
7258                           Int offs_Host_EvC_Counter,
7259                           Int offs_Host_EvC_FailAddr,
7260                           Bool chainingAllowed,
7261                           Bool addProfInc,
7262                           Addr max_ga)
7263
7264 {
7265    Int       i, j;
7266    HReg      hregLo, hregMedLo, hregMedHi, hregHi;
7267    ISelEnv*  env;
7268    UInt      hwcaps_host = archinfo_host->hwcaps;
7269    Bool      mode64 = False;
7270    UInt      mask32, mask64;
7271    PPCAMode *amCounter, *amFailAddr;
7272    IREndness IEndianess;
7273
7274    vassert(arch_host == VexArchPPC32 || arch_host == VexArchPPC64);
7275    mode64 = arch_host == VexArchPPC64;
7276
7277    /* do some sanity checks,
7278     * Note: no 32-bit support for ISA 3.0, ISA 3.1
7279     */
7280    mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V
7281             | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX | VEX_HWCAPS_PPC32_VX
7282             | VEX_HWCAPS_PPC32_DFP | VEX_HWCAPS_PPC32_ISA2_07;
7283
7284    mask64 = VEX_HWCAPS_PPC64_V | VEX_HWCAPS_PPC64_FX
7285             | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX | VEX_HWCAPS_PPC64_DFP
7286             | VEX_HWCAPS_PPC64_ISA2_07 | VEX_HWCAPS_PPC64_ISA3_0
7287             | VEX_HWCAPS_PPC64_ISA3_1;
7288
7289    if (mode64) {
7290       vassert((hwcaps_host & mask32) == 0);
7291    } else {
7292       vassert((hwcaps_host & mask64) == 0);
7293    }
7294
7295    /* Check that the host's endianness is as expected. */
7296    vassert((archinfo_host->endness == VexEndnessBE) ||
7297            (archinfo_host->endness == VexEndnessLE));
7298
7299    if (archinfo_host->endness == VexEndnessBE)
7300      IEndianess = Iend_BE;
7301    else
7302      IEndianess = Iend_LE;
7303
7304    /* Make up an initial environment to use. */
7305    env = LibVEX_Alloc_inline(sizeof(ISelEnv));
7306    env->vreg_ctr = 0;
7307
7308    /* Are we being ppc32 or ppc64? */
7309    env->mode64 = mode64;
7310
7311    /* Set up output code array. */
7312    env->code = newHInstrArray();
7313
7314    /* Copy BB's type env. */
7315    env->type_env = bb->tyenv;
7316
7317    /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
7318     * change as we go along.
7319     *
7320     * vregmap2 and vregmap3 are only used in 32 bit mode
7321     * for supporting I128 in 32-bit mode
7322     */
7323    env->n_vregmap = bb->tyenv->types_used;
7324    env->vregmapLo    = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
7325    env->vregmapMedLo = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
7326    if (mode64) {
7327       env->vregmapMedHi = NULL;
7328       env->vregmapHi    = NULL;
7329    } else {
7330       env->vregmapMedHi = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
7331       env->vregmapHi    = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
7332    }
7333
7334    /* and finally ... */
7335    env->chainingAllowed = chainingAllowed;
7336    env->max_ga          = max_ga;
7337    env->hwcaps          = hwcaps_host;
7338    env->previous_rm     = NULL;
7339    env->vbi             = vbi;
7340
7341    /* For each IR temporary, allocate a suitably-kinded virtual
7342       register. */
7343    j = 0;
7344    for (i = 0; i < env->n_vregmap; i++) {
7345       hregLo = hregMedLo = hregMedHi = hregHi = INVALID_HREG;
7346       switch (bb->tyenv->types[i]) {
7347       case Ity_I1:
7348       case Ity_I8:
7349       case Ity_I16:
7350       case Ity_I32:
7351          if (mode64) {
7352             hregLo = mkHReg(True, HRcInt64, 0, j++);
7353          } else {
7354             hregLo = mkHReg(True, HRcInt32, 0, j++);
7355          }
7356          break;
7357       case Ity_I64:
7358          if (mode64) {
7359             hregLo    = mkHReg(True, HRcInt64, 0, j++);
7360          } else {
7361             hregLo    = mkHReg(True, HRcInt32, 0, j++);
7362             hregMedLo = mkHReg(True, HRcInt32, 0, j++);
7363          }
7364          break;
7365       case Ity_I128:
7366          if (mode64) {
7367             hregLo    = mkHReg(True, HRcInt64, 0, j++);
7368             hregMedLo = mkHReg(True, HRcInt64, 0, j++);
7369          } else {
7370             hregLo    = mkHReg(True, HRcInt32, 0, j++);
7371             hregMedLo = mkHReg(True, HRcInt32, 0, j++);
7372             hregMedHi = mkHReg(True, HRcInt32, 0, j++);
7373             hregHi    = mkHReg(True, HRcInt32, 0, j++);
7374          }
7375          break;
7376       case Ity_F32:
7377       case Ity_F64:
7378          hregLo = mkHReg(True, HRcFlt64, 0, j++);
7379          break;
7380       case Ity_F128:
7381       case Ity_V128:
7382          hregLo = mkHReg(True, HRcVec128, 0, j++);
7383          break;
7384       case Ity_D32:
7385       case Ity_D64:
7386          hregLo = mkHReg(True, HRcFlt64, 0, j++);
7387          break;
7388       case Ity_D128:
7389          hregLo    = mkHReg(True, HRcFlt64, 0, j++);
7390          hregMedLo = mkHReg(True, HRcFlt64, 0, j++);
7391          break;
7392       default:
7393          ppIRType(bb->tyenv->types[i]);
7394          vpanic("iselBB(ppc): IRTemp type");
7395       }
7396       env->vregmapLo[i]    = hregLo;
7397       env->vregmapMedLo[i] = hregMedLo;
7398       if (!mode64) {
7399          env->vregmapMedHi[i] = hregMedHi;
7400          env->vregmapHi[i]    = hregHi;
7401       }
7402    }
7403    env->vreg_ctr = j;
7404
7405    /* The very first instruction must be an event check. */
7406    amCounter  = PPCAMode_IR(offs_Host_EvC_Counter, hregPPC_GPR31(mode64));
7407    amFailAddr = PPCAMode_IR(offs_Host_EvC_FailAddr, hregPPC_GPR31(mode64));
7408    addInstr(env, PPCInstr_EvCheck(amCounter, amFailAddr));
7409
7410    /* Possibly a block counter increment (for profiling).  At this
7411       point we don't know the address of the counter, so just pretend
7412       it is zero.  It will have to be patched later, but before this
7413       translation is used, by a call to LibVEX_patchProfCtr. */
7414    if (addProfInc) {
7415       addInstr(env, PPCInstr_ProfInc());
7416    }
7417
7418    /* Ok, finally we can iterate over the statements. */
7419    for (i = 0; i < bb->stmts_used; i++)
7420       iselStmt(env, bb->stmts[i], IEndianess);
7421
7422    iselNext(env, bb->next, bb->jumpkind, bb->offsIP, IEndianess);
7423
7424    /* record the number of vregs we used. */
7425    env->code->n_vregs = env->vreg_ctr;
7426    return env->code;
7427 }
7428
7429
7430 /*---------------------------------------------------------------*/
7431 /*--- end                                     host_ppc_isel.c ---*/
7432 /*---------------------------------------------------------------*/