js/src/nanojit/NativePPC.h

   1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
   2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
   3 /* ***** BEGIN LICENSE BLOCK *****
   4  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   5  *
   6  * The contents of this file are subject to the Mozilla Public License Version
   7  * 1.1 (the "License"); you may not use this file except in compliance with
   8  * the License. You may obtain a copy of the License at
   9  * http://www.mozilla.org/MPL/
  10  *
  11  * Software distributed under the License is distributed on an "AS IS" basis,
  12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  13  * for the specific language governing rights and limitations under the
  14  * License.
  15  *
  16  * The Original Code is [Open Source Virtual Machine].
  17  *
  18  * The Initial Developer of the Original Code is
  19  * Adobe System Incorporated.
  20  * Portions created by the Initial Developer are Copyright (C) 2008
  21  * the Initial Developer. All Rights Reserved.
  22  *
  23  * Contributor(s):
  24  *   Adobe AS3 Team
  25  *
  26  * Alternatively, the contents of this file may be used under the terms of
  27  * either the GNU General Public License Version 2 or later (the "GPL"), or
  28  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  29  * in which case the provisions of the GPL or the LGPL are applicable instead
  30  * of those above. If you wish to allow use of your version of this file only
  31  * under the terms of either the GPL or the LGPL, and not to allow others to
  32  * use your version of this file under the terms of the MPL, indicate your
  33  * decision by deleting the provisions above and replace them with the notice
  34  * and other provisions required by the GPL or the LGPL. If you do not delete
  35  * the provisions above, a recipient may use your version of this file under
  36  * the terms of any one of the MPL, the GPL or the LGPL.
  37  *
  38  * ***** END LICENSE BLOCK ***** */
  39
  40 #ifndef __nanojit_NativePPC__
  41 #define __nanojit_NativePPC__
  42
  43 #ifdef PERFM
  44 #define DOPROF
  45 #include "../vprof/vprof.h"
  46 #define count_instr() _nvprof("ppc",1)
  47 #define count_prolog() _nvprof("ppc-prolog",1); count_instr();
  48 #define count_imt() _nvprof("ppc-imt",1) count_instr()
  49 #else
  50 #define count_instr()
  51 #define count_prolog()
  52 #define count_imt()
  53 #endif
  54
  55 #include "NativeCommon.h"
  56
  57 namespace nanojit
  58 {
  59 #define NJ_MAX_STACK_ENTRY              4096
  60 #define NJ_ALIGN_STACK                  16
  61
  62 #define NJ_JTBL_SUPPORTED               1
  63 #define NJ_EXPANDED_LOADSTORE_SUPPORTED 0
  64 #define NJ_F2I_SUPPORTED                0
  65 #define NJ_SOFTFLOAT_SUPPORTED          0
  66 #define NJ_DIVI_SUPPORTED               0
  67
  68     enum ConditionRegister {
  69         CR0 = 0,
  70         CR1 = 1,
  71         CR2 = 2,
  72         CR3 = 3,
  73         CR4 = 4,
  74         CR5 = 5,
  75         CR6 = 6,
  76         CR7 = 7,
  77     };
  78
  79     enum ConditionBit {
  80         COND_lt = 0, // msb of CR
  81         COND_gt = 1,
  82         COND_eq = 2,
  83         COND_so = 3, // lsb of CR
  84         COND_un = 3,
  85     };
  86
  87     // this is the BO field in condition instructions
  88     enum ConditionOption {
  89         BO_true = 12, // branch if true
  90         BO_false = 4, // branch if false
  91     };
  92
  93     static const Register
  94         // general purpose 32bit regs
  95         R0   = { 0 },   // scratch or the value 0, excluded from regalloc
  96         SP   = { 1 },   // stack pointer, excluded from regalloc
  97         R2   = { 2 },   // scratch on MacOSX, rtoc pointer elsewhere
  98         R3   = { 3 },   // this, return value, MSW of int64 return
  99         R4   = { 4 },   // param, LSW of int64 return
 100         R5   = { 5 },   // param
 101         R6   = { 6 },   // param
 102         R7   = { 7 },   // param
 103         R8   = { 8 },   // param
 104         R9   = { 9 },   // param
 105         R10  = { 10 },  // param
 106         R11  = { 11 },  // scratch in leaf funcs, outgoing arg ptr otherwise
 107         R12  = { 12 },  // scratch
 108         R13  = { 13 },  // ppc32: saved, ppc64: thread-specific storage
 109         R14  = { 14 },  // saved
 110         R15  = { 15 },
 111         R16  = { 16 },
 112         R17  = { 17 },
 113         R18  = { 18 },
 114         R19  = { 19 },
 115         R20  = { 20 },
 116         R21  = { 21 },
 117         R22  = { 22 },
 118         R23  = { 23 },
 119         R24  = { 24 },
 120         R25  = { 25 },
 121         R26  = { 26 },
 122         R27  = { 27 },
 123         R28  = { 28 },
 124         R29  = { 29 },
 125         R30  = { 30 },
 126         R31  = { 31 },   // excluded from regalloc since we use it as FP
 127         FP  = R31,
 128
 129         // FP regs
 130         F0  = { 32 },   // scratch, excluded from reg alloc
 131         F1  = { 33 },   // param, double return value
 132         F2  = { 34 },   // param
 133         F3  = { 35 },   // param
 134         F4  = { 36 },   // param
 135         F5  = { 37 },   // param
 136         F6  = { 38 },   // param
 137         F7  = { 39 },   // param
 138         F8  = { 40 },   // param
 139         F9  = { 41 },   // param
 140         F10 = { 42 },   // param
 141         F11 = { 43 },   // param
 142         F12 = { 44 },   // param
 143         F13 = { 45 },   // param
 144         F14 = { 46 },   // F14-31 saved
 145         F15 = { 47 },
 146         F16 = { 48 },
 147         F17 = { 49 },
 148         F18 = { 50 },
 149         F19 = { 51 },
 150         F20 = { 52 },
 151         F21 = { 53 },
 152         F22 = { 54 },
 153         F23 = { 55 },
 154         F24 = { 56 },
 155         F25 = { 57 },
 156         F26 = { 58 },
 157         F27 = { 59 },
 158         F28 = { 60 },
 159         F29 = { 61 },
 160         F30 = { 62 },
 161         F31 = { 63 },
 162
 163         // special purpose registers (SPR)
 164         Rxer = { 1 },
 165         Rlr  = { 8 },
 166         Rctr = { 9 },
 167
 168         deprecated_UnknownReg = { 127 };    // XXX: remove eventually, see bug 538924
 169
 170     static const uint32_t FirstRegNum = 0; // R0
 171     static const uint32_t LastRegNum = 64; // F31
 172
 173     enum PpcOpcode {
 174         // opcodes
 175         PPC_add     = 0x7C000214, // add
 176         PPC_addo    = 0x7C000614, // add & OE=1 (can set OV)
 177         PPC_addi    = 0x38000000, // add immediate
 178         PPC_addis   = 0x3C000000, // add immediate shifted
 179         PPC_and     = 0x7C000038, // and
 180         PPC_andc    = 0x7C000078, // and with compliment
 181         PPC_andi    = 0x70000000, // and immediate
 182         PPC_andis   = 0x74000000, // and immediate shifted
 183         PPC_b       = 0x48000000, // branch
 184         PPC_bc      = 0x40000000, // branch conditional
 185         PPC_bcctr   = 0x4C000420, // branch conditional to count register
 186         PPC_cmp     = 0x7C000000, // compare
 187         PPC_cmpi    = 0x2C000000, // compare immediate
 188         PPC_cmpl    = 0x7C000040, // compare logical
 189         PPC_cmpli   = 0x28000000, // compare logical immediate
 190         PPC_cror    = 0x4C000382, // condition register or
 191         PPC_extsw   = 0x7C0007B4, // extend sign word
 192         PPC_fadd    = 0xFC00002A, // floating add (double precision)
 193         PPC_fcfid   = 0xFC00069C, // floating convert from integer doubleword
 194         PPC_fcmpu   = 0xFC000000, // floating compare unordered
 195         PPC_fdiv    = 0xFC000024, // floating divide (double precision)
 196         PPC_fmr     = 0xFC000090, // floating move register (double precision)
 197         PPC_fmul    = 0xFC000032, // floating multiply (double precision)
 198         PPC_fneg    = 0xFC000050, // floating negate
 199         PPC_fsub    = 0xFC000028, // floating subtract (double precision)
 200         PPC_lbz     = 0x88000000, // load byte and zero
 201         PPC_lbzx    = 0x7C0000AE, // load byte and zero indexed
 202         PPC_ld      = 0xE8000000, // load doubleword
 203         PPC_ldx     = 0x7C00002A, // load doubleword indexed
 204         PPC_lfd     = 0xC8000000, // load floating point double
 205         PPC_lfdx    = 0x7C0004AE, // load floating-point double indexed
 206         PPC_lhz     = 0xA0000000, // load halfword and zero
 207         PPC_lhzx    = 0x7C00022E, // load halfword and zero indexed
 208         PPC_lwz     = 0x80000000, // load word and zero
 209         PPC_lwzx    = 0x7C00002E, // load word and zero indexed
 210         PPC_mfcr    = 0x7C000026, // move from condition register
 211         PPC_mfspr   = 0x7C0002A6, // move from spr (special purpose register)
 212         PPC_mtspr   = 0x7C0003A6, // move to spr
 213         PPC_mulli   = 0x1C000000, // multiply low immediate
 214         PPC_mullw   = 0x7C0001D6, // multiply low word
 215         PPC_neg     = 0x7C0000D0, // negate
 216         PPC_nor     = 0x7C0000F8, // nor
 217         PPC_or      = 0x7C000378, // or
 218         PPC_ori     = 0x60000000, // or immediate
 219         PPC_oris    = 0x64000000, // or immediate shifted
 220         PPC_rlwinm  = 0x54000000, // rotate left word then and with mask
 221         PPC_rldicl  = 0x78000000, // rotate left doubleword immediate then clear left
 222         PPC_rldicr  = 0x78000004, // rotate left doubleword immediate then clear right
 223         PPC_rldimi  = 0x7800000C, // rotate left doubleword immediate then mask insert
 224         PPC_sld     = 0x7C000036, // shift left doubleword
 225         PPC_slw     = 0x7C000030, // shift left word
 226         PPC_srad    = 0x7C000634, // shift right algebraic doubleword (sign ext)
 227         PPC_sradi   = 0x7C000674, // shift right algebraic doubleword immediate
 228         PPC_sraw    = 0x7C000630, // shift right algebraic word (sign ext)
 229         PPC_srawi   = 0x7C000670, // shift right algebraic word immediate
 230         PPC_srd     = 0x7C000436, // shift right doubleword (zero ext)
 231         PPC_srw     = 0x7C000430, // shift right word (zero ext)
 232         PPC_stb     = 0x98000000, // store byte
 233         PPC_stbx    = 0x7C0001AE, // store byte indexed
 234         PPC_std     = 0xF8000000, // store doubleword
 235         PPC_stdu    = 0xF8000001, // store doubleword with update
 236         PPC_stdux   = 0x7C00016A, // store doubleword with update indexed
 237         PPC_stdx    = 0x7C00012A, // store doubleword indexed
 238         PPC_stfd    = 0xD8000000, // store floating-point double
 239         PPC_stfdx   = 0x7C0005AE, // store floating-point double indexed
 240         PPC_stw     = 0x90000000, // store word
 241         PPC_stwu    = 0x94000000, // store word with update
 242         PPC_stwux   = 0x7C00016E, // store word with update indexed
 243         PPC_stwx    = 0x7C00012E, // store word indexed
 244         PPC_subf    = 0x7C000050, // subtract from
 245         PPC_xor     = 0x7C000278, // xor
 246         PPC_xori    = 0x68000000, // xor immediate
 247         PPC_xoris   = 0x6C000000, // xor immediate shifted
 248
 249         // simplified mnemonics
 250         PPC_mr = PPC_or,
 251         PPC_not = PPC_nor,
 252         PPC_nop = PPC_ori,
 253     };
 254
 255     typedef uint64_t RegisterMask;
 256
 257     static const RegisterMask GpRegs = 0xffffffff;
 258     static const RegisterMask FpRegs = 0xffffffff00000000LL;
 259     // R31 is a saved reg too, but we use it as our Frame ptr FP
 260 #ifdef NANOJIT_64BIT
 261     // R13 reserved for thread-specific storage on ppc64-darwin
 262     static const RegisterMask SavedRegs = 0x7fffc000; // R14-R30 saved
 263     static const int NumSavedRegs = 17; // R14-R30
 264 #else
 265     static const RegisterMask SavedRegs = 0x7fffe000; // R13-R30 saved
 266     static const int NumSavedRegs = 18; // R13-R30
 267 #endif
 268
 269     static inline bool IsGpReg(Register r) {
 270         return r <= R31;
 271     }
 272     static inline bool IsFpReg(Register r) {
 273         return r >= F0;
 274     }
 275
 276     verbose_only( extern const char* regNames[]; )
 277
 278     #define DECLARE_PLATFORM_STATS()
 279     #define DECLARE_PLATFORM_REGALLOC()
 280
 281 #ifdef NANOJIT_64BIT
 282     #define DECL_PPC64()\
 283         void asm_qbinop(LIns*);
 284 #else
 285     #define DECL_PPC64()
 286 #endif
 287
 288     #define DECLARE_PLATFORM_ASSEMBLER()                                    \
 289         const static Register argRegs[8], retRegs[2];                       \
 290         void underrunProtect(int bytes);                                    \
 291         void nativePageReset();                                             \
 292         void nativePageSetup();                                             \
 293         bool hardenNopInsertion(const Config& /*c*/) { return false; }      \
 294         void br(NIns *addr, int link);                                      \
 295         void br_far(NIns *addr, int link);                                  \
 296         void asm_regarg(ArgType, LIns*, Register);                          \
 297         void asm_li(Register r, int32_t imm);                               \
 298         void asm_li32(Register r, int32_t imm);                             \
 299         void asm_li64(Register r, uint64_t imm);                            \
 300         void asm_cmp(LOpcode op, LIns *a, LIns *b, ConditionRegister);      \
 301         NIns* asm_branch_far(bool onfalse, LIns *cond, NIns * const targ);  \
 302         NIns* asm_branch_near(bool onfalse, LIns *cond, NIns * const targ); \
 303         int  max_param_size; /* bytes */                                    \
 304         DECL_PPC64()
 305
 306     const int LARGEST_UNDERRUN_PROT = 9*4;  // largest value passed to underrunProtect
 307
 308     typedef uint32_t NIns;
 309
 310     // Bytes of icache to flush after Assembler::patch
 311     const size_t LARGEST_BRANCH_PATCH = 4 * sizeof(NIns);
 312
 313     #define EMIT1(ins, fmt, ...) do {\
 314         underrunProtect(4);\
 315         *(--_nIns) = (NIns) (ins);\
 316         asm_output(fmt, ##__VA_ARGS__);\
 317         } while (0) /* no semi */
 318
 319     #define GPR(r) REGNUM(r)
 320     #define FPR(r) (REGNUM(r) & 31)
 321
 322     #define Bx(li,aa,lk) EMIT1(PPC_b | ((li)&0xffffff)<<2 | (aa)<<1 | (lk),\
 323         "b%s%s %p", (lk)?"l":"", (aa)?"a":"", _nIns+(li))
 324
 325     #define B(li)   Bx(li,0,0)
 326     #define BA(li)  Bx(li,1,0)
 327     #define BL(li)  Bx(li,0,1)
 328     #define BLA(li) Bx(li,1,1)
 329
 330     #define BCx(op,bo,bit,cr,bd,aa,lk) EMIT1(PPC_bc | (bo)<<21 | (4*(cr)+COND_##bit)<<16 |\
 331         ((bd)&0x3fff)<<2 | (aa)<<1 | (lk),\
 332         "%s%s%s cr%d,%p", #op, (lk)?"l":"", (aa)?"a":"", (cr), _nIns+(bd))
 333
 334     #define BLT(cr,bd) BCx(blt, BO_true,  lt, cr, bd, 0, 0)
 335     #define BGT(cr,bd) BCx(bgt, BO_true,  gt, cr, bd, 0, 0)
 336     #define BEQ(cr,bd) BCx(beq, BO_true,  eq, cr, bd, 0, 0)
 337     #define BGE(cr,bd) BCx(bge, BO_false, lt, cr, bd, 0, 0)
 338     #define BLE(cr,bd) BCx(ble, BO_false, gt, cr, bd, 0, 0)
 339     #define BNE(cr,bd) BCx(bne, BO_false, eq, cr, bd, 0, 0)
 340     #define BNG(cr,bd) BCx(bng, BO_false, gt, cr, bd, 0, 0)
 341     #define BNL(cr,bd) BCx(bnl, BO_false, lt, cr, bd, 0, 0)
 342
 343     #define BCCTRx(op, bo, bit, cr, lk) EMIT1(PPC_bcctr | (bo)<<21 | (4*(cr)+COND_##bit)<<16 | (lk)&1,\
 344         "%sctr%s cr%d", #op, (lk)?"l":"", (cr))
 345
 346     #define BLTCTR(cr) BCCTRx(blt, BO_true,  lt, cr, 0)
 347     #define BGTCTR(cr) BCCTRx(bgt, BO_true,  gt, cr, 0)
 348     #define BEQCTR(cr) BCCTRx(beq, BO_true,  eq, cr, 0)
 349     #define BGECTR(cr) BCCTRx(bge, BO_false, lt, cr, 0)
 350     #define BLECTR(cr) BCCTRx(ble, BO_false, gt, cr, 0)
 351     #define BNECTR(cr) BCCTRx(bne, BO_false, eq, cr, 0)
 352     #define BNGCTR(cr) BCCTRx(bng, BO_false, gt, cr, 0)
 353     #define BNLCTR(cr) BCCTRx(bnl, BO_false, lt, cr, 0)
 354
 355     #define Simple(asm,op) EMIT1(op, "%s", #asm)
 356
 357     #define BCTR(link) EMIT1(0x4E800420 | (link), "bctr%s", (link) ? "l" : "")
 358     #define BCTRL() BCTR(1)
 359
 360     #define BLR()   EMIT1(0x4E800020, "blr")
 361     #define NOP()   EMIT1(PPC_nop, "nop") /* ori 0,0,0 */
 362
 363     #define ALU2(op, rd, ra, rb, rc) EMIT1(PPC_##op | GPR(rd)<<21 | GPR(ra)<<16 | GPR(rb)<<11 | (rc),\
 364         "%s%s %s,%s,%s", #op, (rc)?".":"", gpn(rd), gpn(ra), gpn(rb))
 365     #define BITALU2(op, ra, rs, rb, rc) EMIT1(PPC_##op | GPR(rs)<<21 | GPR(ra)<<16 | GPR(rb)<<11 | (rc),\
 366         "%s%s %s,%s,%s", #op, (rc)?".":"", gpn(ra), gpn(rs), gpn(rb))
 367     #define FPUAB(op, d, a, b, rc) EMIT1(PPC_##op | FPR(d)<<21 | FPR(a)<<16 | FPR(b)<<11 | (rc),\
 368         "%s%s %s,%s,%s", #op, (rc)?".":"", gpn(d), gpn(a), gpn(b))
 369     #define FPUAC(op, d, a, c, rc) EMIT1(PPC_##op | FPR(d)<<21 | FPR(a)<<16 | FPR(c)<<6 | (rc),\
 370         "%s%s %s,%s,%s", #op, (rc)?".":"", gpn(d), gpn(a), gpn(c))
 371
 372     #define ADD(rd,ra,rb)   ALU2(add,  rd, ra, rb, 0)
 373     #define ADD_(rd,ra,rb)  ALU2(add,  rd, ra, rb, 1)
 374     #define ADDO(rd,ra,rb)  ALU2(addo, rd, ra, rb, 0)
 375     #define ADDO_(rd,ra,rb) ALU2(addo, rd, ra, rb, 1)
 376     #define SUBF(rd,ra,rb)  ALU2(subf, rd, ra, rb, 0)
 377     #define SUBF_(rd,ra,rb) ALU2(subf, rd, ra, rb, 1)
 378
 379     #define AND(rd,rs,rb)   BITALU2(and,  rd, rs, rb, 0)
 380     #define AND_(rd,rs,rb)  BITALU2(and,  rd, rs, rb, 1)
 381     #define OR(rd,rs,rb)    BITALU2(or,   rd, rs, rb, 0)
 382     #define OR_(rd,rs,rb)   BITALU2(or,   rd, rs, rb, 1)
 383     #define NOR(rd,rs,rb)   BITALU2(nor,  rd, rs, rb, 0)
 384     #define NOR_(rd,rs,rb)  BITALU2(nor,  rd, rs, rb, 1)
 385     #define SLW(rd,rs,rb)   BITALU2(slw,  rd, rs, rb, 0)
 386     #define SLW_(rd,rs,rb)  BITALU2(slw,  rd, rs, rb, 1)
 387     #define SRW(rd,rs,rb)   BITALU2(srw,  rd, rs, rb, 0)
 388     #define SRW_(rd,rs,rb)  BITALU2(srw,  rd, rs, rb, 1)
 389     #define SRAW(rd,rs,rb)  BITALU2(sraw, rd, rs, rb, 0)
 390     #define SRAW_(rd,rs,rb) BITALU2(sraw, rd, rs, rb, 1)
 391     #define XOR(rd,rs,rb)   BITALU2(xor,  rd, rs, rb, 0)
 392     #define XOR_(rd,rs,rb)  BITALU2(xor,  rd, rs, rb, 1)
 393
 394     #define SLD(rd,rs,rb)   BITALU2(sld,  rd, rs, rb, 0)
 395     #define SRD(rd,rs,rb)   BITALU2(srd,  rd, rs, rb, 0)
 396     #define SRAD(rd,rs,rb)  BITALU2(srad, rd, rs, rb, 0)
 397
 398     #define FADD(rd,ra,rb)  FPUAB(fadd, rd, ra, rb, 0)
 399     #define FADD_(rd,ra,rb) FPUAB(fadd, rd, ra, rb, 1)
 400     #define FDIV(rd,ra,rb)  FPUAB(fdiv, rd, ra, rb, 0)
 401     #define FDIV_(rd,ra,rb) FPUAB(fdiv, rd, ra, rb, 1)
 402     #define FMUL(rd,ra,rb)  FPUAC(fmul, rd, ra, rb, 0)
 403     #define FMUL_(rd,ra,rb) FPUAC(fmul, rd, ra, rb, 1)
 404     #define FSUB(rd,ra,rb)  FPUAB(fsub, rd, ra, rb, 0)
 405     #define FSUB_(rd,ra,rb) FPUAB(fsub, rd, ra, rb, 1)
 406
 407     #define MULLI(rd,ra,simm) EMIT1(PPC_mulli | GPR(rd)<<21 | GPR(ra)<<16 | uint16_t(simm),\
 408         "mulli %s,%s,%d", gpn(rd), gpn(ra), int16_t(simm))
 409     #define MULLW(rd,ra,rb) EMIT1(PPC_mullw | GPR(rd)<<21 | GPR(ra)<<16 | GPR(rb)<<11,\
 410         "mullw %s,%s,%s", gpn(rd), gpn(ra), gpn(rb))
 411
 412     // same as ALU2 with rs=rb, for simplified mnemonics
 413     #define ALU1(op, ra, rs, rc) EMIT1(PPC_##op | GPR(rs)<<21 | GPR(ra)<<16 | GPR(rs)<<11 | (rc),\
 414         "%s%s %s,%s", #op, (rc)?".":"", gpn(ra), gpn(rs))
 415
 416     #define MR(rd, rs)    ALU1(mr,    rd, rs, 0)   // or   rd,rs,rs
 417     #define MR_(rd, rs)   ALU1(mr,    rd, rs, 1)   // or.  rd,rs,rs
 418     #define NOT(rd, rs)   ALU1(not,   rd, rs, 0)   // nor  rd,rs,rs
 419     #define NOT_(rd, rs)  ALU1(not,   rd, rs, 0)   // nor. rd,rs,rs
 420
 421     #define EXTSW(rd, rs) EMIT1(PPC_extsw | GPR(rs)<<21 | GPR(rd)<<16,\
 422         "extsw %s,%s", gpn(rd), gpn(rs))
 423
 424     #define NEG(rd, rs)  EMIT1(PPC_neg | GPR(rd)<<21 | GPR(rs)<<16, "neg %s,%s", gpn(rd), gpn(rs))
 425     #define FNEG(rd,rs)  EMIT1(PPC_fneg | FPR(rd)<<21 | FPR(rs)<<11, "fneg %s,%s", gpn(rd), gpn(rs))
 426     #define FMR(rd,rb)   EMIT1(PPC_fmr  | FPR(rd)<<21 | FPR(rb)<<11, "fmr %s,%s", gpn(rd), gpn(rb))
 427     #define FCFID(rd,rs) EMIT1(PPC_fcfid | FPR(rd)<<21 | FPR(rs)<<11, "fcfid %s,%s", gpn(rd), gpn(rs))
 428
 429     #define JMP(addr) br(addr, 0)
 430
 431     #define SPR(spr) (REGNUM(R##spr)>>5|(REGNUM(R##spr)&31)<<5)
 432     #define MTSPR(spr,rs) EMIT1(PPC_mtspr | GPR(rs)<<21 | SPR(spr)<<11,\
 433         "mt%s %s", #spr, gpn(rs))
 434     #define MFSPR(rd,spr) EMIT1(PPC_mfspr | GPR(rd)<<21 | SPR(spr)<<11,\
 435         "mf%s %s", #spr, gpn(rd))
 436
 437     #define MTXER(r) MTSPR(xer, r)
 438     #define MTLR(r)  MTSPR(lr,  r)
 439     #define MTCTR(r) MTSPR(ctr, r)
 440
 441     #define MFXER(r) MFSPR(r, xer)
 442     #define MFLR(r)  MFSPR(r, lr)
 443     #define MFCTR(r) MFSPR(r, ctr)
 444
 445     #define MEMd(op, r, d, a) do {\
 446         NanoAssert(isS16(d));\
 447         EMIT1(PPC_##op | GPR(r)<<21 | GPR(a)<<16 | uint16_t(d), "%s %s,%d(%s)", #op, gpn(r), int16_t(d), gpn(a));\
 448         } while(0) /* no addr */
 449
 450     #define FMEMd(op, r, d, b) do {\
 451         NanoAssert(isS16(d));\
 452         EMIT1(PPC_##op | FPR(r)<<21 | GPR(b)<<16 | uint16_t(d), "%s %s,%d(%s)", #op, gpn(r), int16_t(d), gpn(b));\
 453         } while(0) /* no addr */
 454
 455     #define MEMx(op, r, a, b) EMIT1(PPC_##op | GPR(r)<<21 | GPR(a)<<16 | GPR(b)<<11,\
 456         "%s %s,%s,%s", #op, gpn(r), gpn(a), gpn(b))
 457     #define FMEMx(op, r, a, b) EMIT1(PPC_##op | FPR(r)<<21 | GPR(a)<<16 | GPR(b)<<11,\
 458         "%s %s,%s,%s", #op, gpn(r), gpn(a), gpn(b))
 459
 460     #define MEMux(op, rs, ra, rb) EMIT1(PPC_##op | GPR(rs)<<21 | GPR(ra)<<16 | GPR(rb)<<11,\
 461                 "%s %s,%s,%s", #op, gpn(rs), gpn(ra), gpn(rb))
 462
 463     #define LBZ(r,  d, b) MEMd(lbz,  r, d, b)
 464     #define LHZ(r,  d, b) MEMd(lhz,  r, d, b)
 465     #define LWZ(r,  d, b) MEMd(lwz,  r, d, b)
 466     #define LD(r,   d, b) MEMd(ld,   r, d, b)
 467     #define LBZX(r, a, b) MEMx(lbzx, r, a, b)
 468     #define LHZX(r, a, b) MEMx(lhzx, r, a, b)
 469     #define LWZX(r, a, b) MEMx(lwzx, r, a, b)
 470     #define LDX(r,  a, b) MEMx(ldx,  r, a, b)
 471
 472     // store word (32-bit integer)
 473     #define STW(r,  d, b)     MEMd(stw,    r, d, b)
 474     #define STWU(r, d, b)     MEMd(stwu,   r, d, b)
 475     #define STWX(s, a, b)     MEMx(stwx,   s, a, b)
 476     #define STWUX(s, a, b)    MEMux(stwux, s, a, b)
 477
 478     // store byte
 479     #define STB(r,  d, b)     MEMd(stb,    r, d, b)
 480     #define STBX(s, a, b)     MEMx(stbx,   s, a, b)
 481
 482     // store double (64-bit float)
 483     #define STD(r,  d, b)     MEMd(std,    r, d, b)
 484     #define STDU(r, d, b)     MEMd(stdu,   r, d, b)
 485     #define STDX(s, a, b)     MEMx(stdx,   s, a, b)
 486     #define STDUX(s, a, b)    MEMux(stdux, s, a, b)
 487
 488 #ifdef NANOJIT_64BIT
 489     #define LP(r, d, b)       LD(r, d, b)
 490     #define STP(r, d, b)      STD(r, d, b)
 491     #define STPU(r, d, b)     STDU(r, d, b)
 492     #define STPX(s, a, b)     STDX(s, a, b)
 493     #define STPUX(s, a, b)    STDUX(s, a, b)
 494 #else
 495     #define LP(r, d, b)       LWZ(r, d, b)
 496     #define STP(r, d, b)      STW(r, d, b)
 497     #define STPU(r, d, b)     STWU(r, d, b)
 498     #define STPX(s, a, b)     STWX(s, a, b)
 499     #define STPUX(s, a, b)    STWUX(s, a, b)
 500 #endif
 501
 502     #define LFD(r,  d, b) FMEMd(lfd,  r, d, b)
 503     #define LFDX(r, a, b) FMEMx(lfdx, r, a, b)
 504     #define STFD(r, d, b) FMEMd(stfd, r, d, b)
 505     #define STFDX(s, a, b) FMEMx(stfdx, s, a, b)
 506
 507     #define ALUI(op,rd,ra,d) EMIT1(PPC_##op | GPR(rd)<<21 | GPR(ra)<<16 | uint16_t(d),\
 508                 "%s %s,%s,%d (0x%x)", #op, gpn(rd), gpn(ra), int16_t(d), int16_t(d))
 509
 510     #define ADDI(rd,ra,d)  ALUI(addi,  rd, ra, d)
 511     #define ADDIS(rd,ra,d) ALUI(addis, rd, ra, d)
 512
 513     // bitwise operators have different src/dest registers
 514     #define BITALUI(op,rd,ra,d) EMIT1(PPC_##op | GPR(ra)<<21 | GPR(rd)<<16 | uint16_t(d),\
 515                 "%s %s,%s,%u (0x%x)", #op, gpn(rd), gpn(ra), uint16_t(d), uint16_t(d))
 516
 517     #define ANDI(rd,ra,d)  BITALUI(andi,  rd, ra, d)
 518     #define ORI(rd,ra,d)   BITALUI(ori,   rd, ra, d)
 519     #define ORIS(rd,ra,d)  BITALUI(oris,  rd, ra, d)
 520     #define XORI(rd,ra,d)  BITALUI(xori,  rd, ra, d)
 521     #define XORIS(rd,ra,d) BITALUI(xoris, rd, ra, d)
 522
 523     #define SUBI(rd,ra,d) EMIT1(PPC_addi | GPR(rd)<<21 | GPR(ra)<<16 | uint16_t(-(d)),\
 524         "subi %s,%s,%d", gpn(rd), gpn(ra), (d))
 525
 526     #define LI(rd,v) EMIT1(PPC_addi | GPR(rd)<<21 | uint16_t(v),\
 527         "li %s,%d (0x%x)", gpn(rd), int16_t(v), int16_t(v)) /* addi rd,0,v */
 528
 529     #define LIS(rd,v) EMIT1(PPC_addis | GPR(rd)<<21 | uint16_t(v),\
 530         "lis %s,%d (0x%x)", gpn(rd), int16_t(v), int16_t(v)<<16) /* addis, rd,0,v */
 531
 532     #define MTCR(rs) /* mtcrf 0xff,rs */
 533     #define MFCR(rd) EMIT1(PPC_mfcr | GPR(rd)<<21, "mfcr %s", gpn(rd))
 534
 535     #define CMPx(op, crfd, ra, rb, l) EMIT1(PPC_##op | (crfd)<<23 | (l)<<21 | GPR(ra)<<16 | GPR(rb)<<11,\
 536         "%s%c cr%d,%s,%s", #op, (l)?'d':'w', (crfd), gpn(ra), gpn(rb))
 537
 538     #define CMPW(cr, ra, rb)   CMPx(cmp,    cr, ra, rb, 0)
 539     #define CMPLW(cr, ra, rb)  CMPx(cmpl,   cr, ra, rb, 0)
 540     #define CMPD(cr, ra, rb)   CMPx(cmp,    cr, ra, rb, 1)
 541     #define CMPLD(cr, ra, rb)  CMPx(cmpl,   cr, ra, rb, 1)
 542
 543     #define CMPxI(cr, ra, simm, l) EMIT1(PPC_cmpi | (cr)<<23 | (l)<<21 | GPR(ra)<<16 | uint16_t(simm),\
 544         "cmp%ci cr%d,%s,%d (0x%x)", (l)?'d':'w', (cr), gpn(ra), int16_t(simm), int16_t(simm))
 545
 546     #define CMPWI(cr, ra, simm) CMPxI(cr, ra, simm, 0)
 547     #define CMPDI(cr, ra, simm) CMPxI(cr, ra, simm, 1)
 548
 549     #define CMPLxI(cr, ra, uimm, l) EMIT1(PPC_cmpli | (cr)<<23 | (l)<<21 | GPR(ra)<<16 | uint16_t(uimm),\
 550         "cmp%ci cr%d,%s,%d (0x%x)", (l)?'d':'w', (cr), gpn(ra), uint16_t(uimm), uint16_t(uimm))
 551
 552     #define CMPLWI(cr, ra, uimm) CMPLxI(cr, ra, uimm, 0)
 553     #define CMPLDI(cr, ra, uimm) CMPLxI(cr, ra, uimm, 1)
 554
 555     #define FCMPx(op, crfd, ra, rb) EMIT1(PPC_##op | (crfd)<<23 | FPR(ra)<<16 | FPR(rb)<<11,\
 556         "%s cr%d,%s,%s", #op, (crfd), gpn(ra), gpn(rb))
 557
 558     #define FCMPU(cr, ra, rb) FCMPx(fcmpu, cr, ra, rb)
 559
 560     #define CROR(cr,d,a,b) EMIT1(PPC_cror | (4*(cr)+COND_##d)<<21 | (4*(cr)+COND_##a)<<16 | (4*(cr)+COND_##b)<<11,\
 561         "cror %d,%d,%d", 4*(cr)+COND_##d, 4*(cr)+COND_##a, 4*(cr)+COND_##b)
 562
 563     #define RLWINM(rd,rs,sh,mb,me) EMIT1(PPC_rlwinm | GPR(rs)<<21 | GPR(rd)<<16 | (sh)<<11 | (mb)<<6 | (me)<<1,\
 564         "rlwinm %s,%s,%d,%d,%d", gpn(rd), gpn(rs), (sh), (mb), (me))
 565
 566     #define LO5(sh) ((sh) & 31)
 567     #define BIT6(sh) (((sh) >> 5) & 1)
 568     #define SPLITMB(mb) (LO5(mb)<<1 | BIT6(mb))
 569
 570     #define RLDICL(rd,rs,sh,mb) \
 571         EMIT1(PPC_rldicl | GPR(rs)<<21 | GPR(rd)<<16 | LO5(sh)<<11 | SPLITMB(mb)<<5 | BIT6(sh)<<1,\
 572         "rldicl %s,%s,%d,%d", gpn(rd), gpn(rs), (sh), (mb))
 573
 574     // clrldi d,s,n => rldicl d,s,0,n
 575     #define CLRLDI(rd,rs,n) \
 576         EMIT1(PPC_rldicl | GPR(rs)<<21 | GPR(rd)<<16 | SPLITMB(n)<<5,\
 577         "clrldi %s,%s,%d", gpn(rd), gpn(rs), (n))
 578
 579     #define RLDIMI(rd,rs,sh,mb) \
 580         EMIT1(PPC_rldimi | GPR(rs)<<21 | GPR(rd)<<16 | LO5(sh)<<11 | SPLITMB(mb)<<5 | BIT6(sh)<<1,\
 581         "rldimi %s,%s,%d,%d", gpn(rd), gpn(rs), (sh), (mb))
 582
 583     // insrdi rD,rS,n,b => rldimi rD,rS,64-(b+n),b: insert n bit value into rD starting at b
 584     #define INSRDI(rd,rs,n,b) \
 585         EMIT1(PPC_rldimi | GPR(rs)<<21 | GPR(rd)<<16 | LO5(64-((b)+(n)))<<11 | SPLITMB(b)<<5 | BIT6(64-((b)+(n)))<<1,\
 586         "insrdi %s,%s,%d,%d", gpn(rd), gpn(rs), (n), (b))
 587
 588     #define EXTRWI(rd,rs,n,b) EMIT1(PPC_rlwinm | GPR(rs)<<21 | GPR(rd)<<16 | ((n)+(b))<<11 | (32-(n))<<6 | 31<<1,\
 589         "extrwi %s,%s,%d,%d", gpn(rd), gpn(rs), (n), (b))
 590
 591     // sldi rd,rs,n (n<64) => rldicr rd,rs,n,63-n
 592     #define SLDI(rd,rs,n) EMIT1(PPC_rldicr | GPR(rs)<<21 | GPR(rd)<<16 | LO5(n)<<11 | SPLITMB(63-(n))<<5 | BIT6(n)<<1,\
 593         "sldi %s,%s,%d", gpn(rd), gpn(rs), (n))
 594
 595     #define SLWI(rd,rs,n) EMIT1(PPC_rlwinm | GPR(rs)<<21 | GPR(rd)<<16 | (n)<<11 | 0<<6 | (31-(n))<<1,\
 596         "slwi %s,%s,%d", gpn(rd), gpn(rs), (n))
 597     #define SRWI(rd,rs,n) EMIT1(PPC_rlwinm | GPR(rs)<<21 | GPR(rd)<<16 | (32-(n))<<11 | (n)<<6 | 31<<1,\
 598         "slwi %s,%s,%d", gpn(rd), gpn(rs), (n))
 599     #define SRAWI(rd,rs,n) EMIT1(PPC_srawi | GPR(rs)<<21 | GPR(rd)<<16 | (n)<<11,\
 600         "srawi %s,%s,%d", gpn(rd), gpn(rs), (n))
 601
 602 } // namespace nanojit
 603
 604 #endif // __nanojit_NativePPC__