nanojit/NativeARM.h

   1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
   2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
   3 /* ***** BEGIN LICENSE BLOCK *****
   4  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   5  *
   6  * The contents of this file are subject to the Mozilla Public License Version
   7  * 1.1 (the "License"); you may not use this file except in compliance with
   8  * the License. You may obtain a copy of the License at
   9  * http://www.mozilla.org/MPL/
  10  *
  11  * Software distributed under the License is distributed on an "AS IS" basis,
  12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  13  * for the specific language governing rights and limitations under the
  14  * License.
  15  *
  16  * The Original Code is [Open Source Virtual Machine].
  17  *
  18  * The Initial Developer of the Original Code is
  19  * Adobe System Incorporated.
  20  * Portions created by the Initial Developer are Copyright (C) 2004-2007
  21  * the Initial Developer. All Rights Reserved.
  22  *
  23  * Contributor(s):
  24  *   Adobe AS3 Team
  25  *   Vladimir Vukicevic <vladimir@pobox.com>
  26  *   Jacob Bramley <Jacob.Bramley@arm.com>
  27  *
  28  * Alternatively, the contents of this file may be used under the terms of
  29  * either the GNU General Public License Version 2 or later (the "GPL"), or
  30  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  31  * in which case the provisions of the GPL or the LGPL are applicable instead
  32  * of those above. If you wish to allow use of your version of this file only
  33  * under the terms of either the GPL or the LGPL, and not to allow others to
  34  * use your version of this file under the terms of the MPL, indicate your
  35  * decision by deleting the provisions above and replace them with the notice
  36  * and other provisions required by the GPL or the LGPL. If you do not delete
  37  * the provisions above, a recipient may use your version of this file under
  38  * the terms of any one of the MPL, the GPL or the LGPL.
  39  *
  40  * ***** END LICENSE BLOCK ***** */
  41
  42
  43 #ifndef __nanojit_NativeARM__
  44 #define __nanojit_NativeARM__
  45
  46
  47 #ifdef PERFM
  48 #include "../vprof/vprof.h"
  49 #define count_instr() _nvprof("arm",1)
  50 #define count_prolog() _nvprof("arm-prolog",1); count_instr();
  51 #define count_imt() _nvprof("arm-imt",1) count_instr()
  52 #else
  53 #define count_instr()
  54 #define count_prolog()
  55 #define count_imt()
  56 #endif
  57
  58 #ifdef DEBUG
  59 #define ARM_ARCH_AT_LEAST(wanted) (_config.arm_arch >= (wanted))
  60 #define ARM_VFP (_config.arm_vfp)
  61 #else
  62 /* Note: Non DEBUG builds will ignore arm_arch if it is lower than
  63    NJ_COMPILER_ARM_ARCH, and will ignore arm_vfp if NJ_COMPILER_ARM_ARCH
  64    is greater or equal to 7. */
  65 #define ARM_ARCH_AT_LEAST(wanted) \
  66     ((NJ_COMPILER_ARM_ARCH >= (wanted)) || (_config.arm_arch >= (wanted)))
  67 #define ARM_VFP ((NJ_COMPILER_ARM_ARCH >= 7) || (_config.arm_vfp))
  68 #endif
  69
  70 namespace nanojit
  71 {
  72 #if defined VMCFG_DOUBLE_MSW_FIRST || defined _MSC_VER
  73 #  undef  NJ_ARM_EABI
  74 #else
  75 #  define NJ_ARM_EABI  1
  76 #endif
  77
  78 // GCC defines __ARM_PCS_VFP if it uses hardware floating point ABI
  79 // See http://gcc.gnu.org/viewcvs?view=revision&revision=162637
  80 #ifdef __ARM_PCS_VFP
  81 #  define NJ_ARM_EABI_HARD_FLOAT 1
  82 #endif
  83
  84 #ifdef NJ_ARM_EABI_HARD_FLOAT
  85 #  define ARM_EABI_HARD true
  86 #else
  87 #  define ARM_EABI_HARD false
  88 #endif
  89
  90 // only d0-d7 are used; in addition, we'll use d0 as s0-s1 for i2d/u2f/etc.
  91 #define NJ_VFP_MAX_REGISTERS            8
  92 #define NJ_MAX_REGISTERS                (11 + NJ_VFP_MAX_REGISTERS)
  93 #define NJ_MAX_STACK_ENTRY              4096
  94 #define NJ_MAX_PARAMETERS               16
  95 #define NJ_ALIGN_STACK                  8
  96
  97 #define NJ_JTBL_SUPPORTED               1
  98 #define NJ_EXPANDED_LOADSTORE_SUPPORTED 1
  99 #define NJ_F2I_SUPPORTED                1
 100 #define NJ_SOFTFLOAT_SUPPORTED          1
 101 #define NJ_DIVI_SUPPORTED               0
 102
 103 #define NJ_CONSTANT_POOLS
 104 const int NJ_MAX_CPOOL_OFFSET = 4096;
 105 const int NJ_CPOOL_SIZE = 16;
 106
 107 const int LARGEST_UNDERRUN_PROT = 32;  // largest value passed to underrunProtect
 108
 109 typedef int NIns;
 110
 111 // Bytes of icache to flush after Assembler::patch
 112 const size_t LARGEST_BRANCH_PATCH = 2 * sizeof(NIns);
 113
 114 /* ARM registers */
 115 typedef uint32_t Register;
 116 static const Register
 117     R0  = { 0 },
 118     R1  = { 1 },
 119     R2  = { 2 },
 120     R3  = { 3 },
 121     R4  = { 4 },
 122     R5  = { 5 },
 123     R6  = { 6 },
 124     R7  = { 7 },
 125     R8  = { 8 },
 126     R9  = { 9 },
 127     R10 = { 10 },
 128     FP  = { 11 },
 129     IP  = { 12 },
 130     SP  = { 13 },
 131     LR  = { 14 },
 132     PC  = { 15 },
 133
 134     // VFP regs (we currently only use D0-D7 and S0)
 135     D0 = { 16 },
 136     D1 = { 17 },
 137     D2 = { 18 },
 138     D3 = { 19 },
 139     D4 = { 20 },
 140     D5 = { 21 },
 141     D6 = { 22 },
 142     D7 = { 23 },
 143     // D8-D15 are caller-saved registers that we don't currently handle.
 144
 145     FirstFloatReg = D0,
 146     LastFloatReg = D7,
 147
 148     deprecated_UnknownReg = { 32 },     // XXX: remove eventually, see bug 538924
 149
 150     // S0 overlaps with D0 and is hard-coded into i2d and u2f operations
 151     S0 = { 24 },
 152
 153     SBZ = { 0 } ;   // Used for 'should-be-zero' fields in instructions with
 154                     // unused register fields.
 155
 156 /* winmo builds error with C2057 and C2229 on usage of First/LastRegNum as R0/D7 */
 157 static const uint32_t FirstRegNum = 0; /* R0 */
 158 static const uint32_t LastRegNum = 23; /* D7 */
 159 }
 160
 161 #define NJ_USE_UINT32_REGISTER 1
 162 #include "NativeCommon.h"
 163
 164 namespace nanojit
 165 {
 166
 167 /* ARM condition codes */
 168 typedef enum {
 169     EQ = 0x0, // Equal
 170     NE = 0x1, // Not Equal
 171     CS = 0x2, // Carry Set (or HS)
 172     HS = 0x2,
 173     CC = 0x3, // Carry Clear (or LO)
 174     LO = 0x3,
 175     MI = 0x4, // MInus
 176     PL = 0x5, // PLus
 177     VS = 0x6, // oVerflow Set
 178     VC = 0x7, // oVerflow Clear
 179     HI = 0x8, // HIgher
 180     LS = 0x9, // Lower or Same
 181     GE = 0xA, // Greater or Equal
 182     LT = 0xB, // Less Than
 183     GT = 0xC, // Greater Than
 184     LE = 0xD, // Less or Equal
 185     AL = 0xE, // ALways
 186
 187     // Note that condition code NV is unpredictable on ARMv3 and ARMv4, and has
 188     // special meaning for ARMv5 onwards. As such, it should never be used in
 189     // an instruction encoding unless the special (ARMv5+) meaning is required.
 190     NV = 0xF  // NeVer
 191 } ConditionCode;
 192 #define IsCond(cc)        (((cc) >= EQ) && ((cc) <= AL))
 193
 194 // Bit 0 of the condition code can be flipped to obtain the opposite condition.
 195 // However, this won't work for AL because its opposite — NV — has special
 196 // meaning.
 197 #define OppositeCond(cc)  ((ConditionCode)((unsigned int)(cc)^0x1))
 198
 199 typedef int RegisterMask;
 200 typedef struct _FragInfo {
 201     RegisterMask    needRestoring;
 202     NIns*           epilogue;
 203 } FragInfo;
 204
 205 typedef struct _ParameterRegisters {
 206     int stkd;
 207     Register r;
 208 #ifdef NJ_ARM_EABI_HARD_FLOAT
 209     Register float_r;
 210 #endif
 211 } ParameterRegisters;
 212
 213 #ifdef NJ_ARM_EABI_HARD_FLOAT
 214 #define init_params(a,b,c) { (a), (b), (c) }
 215 #else
 216 #define init_params(a,b,c) { (a), (b) }
 217 #endif
 218
 219 // D0-D7 are not saved; D8-D15 are, but we don't use those,
 220 // so we don't have to worry about saving/restoring them
 221 static const RegisterMask SavedFpRegs = 0;
 222 static const RegisterMask SavedRegs = 1<<R4 | 1<<R5 | 1<<R6 | 1<<R7 | 1<<R8 | 1<<R9 | 1<<R10;
 223 static const int NumSavedRegs = 7;
 224
 225 static const RegisterMask FpRegs = 1<<D0 | 1<<D1 | 1<<D2 | 1<<D3 | 1<<D4 | 1<<D5 | 1<<D6 | 1<<D7;
 226 static const RegisterMask GpRegs = 0xFFFF;
 227 static const RegisterMask AllowableFlagRegs = 1<<R0 | 1<<R1 | 1<<R2 | 1<<R3 | 1<<R4 | 1<<R5 | 1<<R6 | 1<<R7 | 1<<R8 | 1<<R9 | 1<<R10;
 228
 229 #define isU12(offs) (((offs) & 0xfff) == (offs))
 230
 231 #define IsFpReg(_r)     ((rmask((Register)_r) & (FpRegs)) != 0)
 232 #define IsGpReg(_r)     ((rmask((Register)_r) & (GpRegs)) != 0)
 233 #define FpRegNum(_fpr)  ((_fpr) - FirstFloatReg)
 234
 235 #define firstreg()      R0
 236 // only good for normal regs
 237 #define imm2register(c) (Register)(c-1)
 238
 239 verbose_only( extern const char* regNames[]; )
 240 verbose_only( extern const char* condNames[]; )
 241 verbose_only( extern const char* shiftNames[]; )
 242
 243 // abstract to platform specific calls
 244 #define nExtractPlatformFlags(x)    0
 245
 246 #define DECLARE_PLATFORM_STATS()
 247
 248 #define DECLARE_PLATFORM_REGALLOC()
 249
 250 #ifdef DEBUG
 251 # define DECLARE_PLATFORM_ASSEMBLER_DEBUG()                             \
 252     inline bool         isOp2Imm(uint32_t literal);                     \
 253     inline uint32_t     decOp2Imm(uint32_t enc);
 254 #else
 255 // define stubs, for code that defines NJ_VERBOSE without DEBUG
 256 # define DECLARE_PLATFORM_ASSEMBLER_DEBUG()                             \
 257     inline bool         isOp2Imm(uint32_t ) { return true; }            \
 258     inline uint32_t     decOp2Imm(uint32_t ) { return 0; }
 259 #endif
 260
 261 #define DECLARE_PLATFORM_ASSEMBLER()                                            \
 262                                                                                 \
 263     DECLARE_PLATFORM_ASSEMBLER_DEBUG()                                          \
 264                                                                                 \
 265     const static Register argRegs[4], retRegs[2];                               \
 266                                                                                 \
 267     void        BranchWithLink(NIns* addr);                                     \
 268     inline void BLX(Register addr, bool chk = true);                            \
 269     void        JMP_far(NIns*);                                                 \
 270     void        B_cond_chk(ConditionCode, NIns*, bool);                         \
 271     void        underrunProtect(int bytes);                                     \
 272     void        nativePageReset();                                              \
 273     void        nativePageSetup();                                              \
 274     bool        hardenNopInsertion(const Config& /*c*/) { return false; }       \
 275     void        asm_immd_nochk(Register, int32_t, int32_t);                     \
 276     void        asm_regarg(ArgType, LIns*, Register);                           \
 277     void        asm_stkarg(LIns* p, int stkd);                                  \
 278     void        asm_cmpi(Register, int32_t imm);                                \
 279     void        asm_ldr_chk(Register d, Register b, int32_t off, bool chk);     \
 280     int32_t     asm_str(Register rt, Register rr, int32_t off);                 \
 281     void        asm_cmp(LIns *cond);                                            \
 282     void        asm_cmpd(LIns *cond);                                           \
 283     void        asm_ld_imm(Register d, int32_t imm, bool chk = true);           \
 284     void        asm_arg(ArgType ty, LIns* arg, ParameterRegisters& params);     \
 285     void        asm_arg_64(LIns* arg, ParameterRegisters& params);              \
 286     void        asm_add_imm(Register rd, Register rn, int32_t imm, int stat = 0);   \
 287     void        asm_sub_imm(Register rd, Register rn, int32_t imm, int stat = 0);   \
 288     void        asm_and_imm(Register rd, Register rn, int32_t imm, int stat = 0);   \
 289     void        asm_orr_imm(Register rd, Register rn, int32_t imm, int stat = 0);   \
 290     void        asm_eor_imm(Register rd, Register rn, int32_t imm, int stat = 0);   \
 291     inline bool     encOp2Imm(uint32_t literal, uint32_t * enc);                \
 292     inline uint32_t CountLeadingZeroes(uint32_t data);                          \
 293     int *       _nSlot;                                                         \
 294     int *       _nExitSlot;                                                     \
 295     int         max_out_args; /* bytes */
 296
 297 #define IMM32(imm)  *(--_nIns) = (NIns)((imm));
 298
 299 #define OP_IMM  (1<<25)
 300 #define OP_STAT (1<<20)
 301
 302 #define COND_AL ((uint32_t)AL<<28)
 303
 304 typedef enum {
 305     LSL_imm = 0, // LSL #c - Logical Shift Left
 306     LSL_reg = 1, // LSL Rc - Logical Shift Left
 307     LSR_imm = 2, // LSR #c - Logical Shift Right
 308     LSR_reg = 3, // LSR Rc - Logical Shift Right
 309     ASR_imm = 4, // ASR #c - Arithmetic Shift Right
 310     ASR_reg = 5, // ASR Rc - Arithmetic Shift Right
 311     ROR_imm = 6, // Rotate Right (c != 0)
 312     RRX     = 6, // Rotate Right one bit with extend (c == 0)
 313     ROR_reg = 7  // Rotate Right
 314 } ShiftOperator;
 315 #define IsShift(sh)    (((sh) >= LSL_imm) && ((sh) <= ROR_reg))
 316
 317 #define LD32_size 8
 318
 319 #define BEGIN_NATIVE_CODE(x)                    \
 320     { DWORD* _nIns = (uint8_t*)x
 321
 322 #define END_NATIVE_CODE(x)                      \
 323     (x) = (dictwordp*)_nIns; }
 324
 325 // BX
 326 #define BX(_r)  do {                                                    \
 327         underrunProtect(4);                                             \
 328         NanoAssert(IsGpReg(_r));                                        \
 329         *(--_nIns) = (NIns)( COND_AL | (0x12<<20) | (0xFFF<<8) | (1<<4) | (_r)); \
 330         asm_output("bx %s", gpn(_r)); } while(0)
 331
 332 /*
 333  * ALU operations
 334  */
 335
 336 enum {
 337     ARM_and = 0,
 338     ARM_eor = 1,
 339     ARM_sub = 2,
 340     ARM_rsb = 3,
 341     ARM_add = 4,
 342     ARM_adc = 5,
 343     ARM_sbc = 6,
 344     ARM_rsc = 7,
 345     ARM_tst = 8,
 346     ARM_teq = 9,
 347     ARM_cmp = 10,
 348     ARM_cmn = 11,
 349     ARM_orr = 12,
 350     ARM_mov = 13,
 351     ARM_bic = 14,
 352     ARM_mvn = 15
 353 };
 354 #define IsOp(op)      (((ARM_##op) >= ARM_and) && ((ARM_##op) <= ARM_mvn))
 355
 356 // ALU operation with register and 8-bit immediate arguments
 357 //  S   - bit, 0 or 1, whether the CPSR register is updated
 358 //  rd  - destination register
 359 //  rl  - first (left) operand register
 360 //  op2imm  - operand 2 immediate. Use encOp2Imm (from NativeARM.cpp) to calculate this.
 361 #define ALUi(cond, op, S, rd, rl, op2imm)   ALUi_chk(cond, op, S, rd, rl, op2imm, 1)
 362 #define ALUi_chk(cond, op, S, rd, rl, op2imm, chk) do {\
 363         if (chk) underrunProtect(4);\
 364         NanoAssert(IsCond(cond));\
 365         NanoAssert(IsOp(op));\
 366         NanoAssert(((S)==0) || ((S)==1));\
 367         NanoAssert(IsGpReg(rd) && IsGpReg(rl));\
 368         NanoAssert(isOp2Imm(op2imm));\
 369         *(--_nIns) = (NIns) ((cond)<<28 | OP_IMM | (ARM_##op)<<21 | (S)<<20 | (rl)<<16 | (rd)<<12 | (op2imm));\
 370         if (ARM_##op == ARM_mov || ARM_##op == ARM_mvn) {               \
 371             asm_output("%s%s%s %s, #0x%X", #op, condNames[cond], (S)?"s":"", gpn(rd), decOp2Imm(op2imm));\
 372         } else if (ARM_##op >= ARM_tst && ARM_##op <= ARM_cmn) {         \
 373             NanoAssert(S==1);\
 374             asm_output("%s%s %s, #0x%X", #op, condNames[cond], gpn(rl), decOp2Imm(op2imm));\
 375         } else {                                                        \
 376             asm_output("%s%s%s %s, %s, #0x%X", #op, condNames[cond], (S)?"s":"", gpn(rd), gpn(rl), decOp2Imm(op2imm));\
 377         }\
 378     } while (0)
 379
 380 // ALU operation with two register arguments
 381 //  S   - bit, 0 or 1, whether the CPSR register is updated
 382 //  rd  - destination register
 383 //  rl  - first (left) operand register
 384 //  rr  - first (left) operand register
 385 #define ALUr(cond, op, S, rd, rl, rr)   ALUr_chk(cond, op, S, rd, rl, rr, 1)
 386 #define ALUr_chk(cond, op, S, rd, rl, rr, chk) do {\
 387         if (chk) underrunProtect(4);\
 388         NanoAssert(IsCond(cond));\
 389         NanoAssert(IsOp(op));\
 390         NanoAssert(((S)==0) || ((S)==1));\
 391         NanoAssert(IsGpReg(rd) && IsGpReg(rl) && IsGpReg(rr));\
 392         *(--_nIns) = (NIns) ((cond)<<28 |(ARM_##op)<<21 | (S)<<20 | (rl)<<16 | (rd)<<12 | (rr));\
 393         if (ARM_##op == ARM_mov || ARM_##op == ARM_mvn) {               \
 394             asm_output("%s%s%s %s, %s", #op, condNames[cond], (S)?"s":"", gpn(rd), gpn(rr));\
 395         } else if (ARM_##op >= ARM_tst && ARM_##op <= ARM_cmn) {         \
 396             NanoAssert(S==1);\
 397             asm_output("%s%s  %s, %s", #op, condNames[cond], gpn(rl), gpn(rr));\
 398         } else {                                                        \
 399             asm_output("%s%s%s %s, %s, %s", #op, condNames[cond], (S)?"s":"", gpn(rd), gpn(rl), gpn(rr));\
 400         }\
 401     } while (0)
 402
 403 // ALU operation with two register arguments, with rr operated on by a shift and shift immediate
 404 //  S   - bit, 0 or 1, whether the CPSR register is updated
 405 //  rd  - destination register
 406 //  rl  - first (left) operand register
 407 //  rr  - second (right) operand register
 408 //  sh  - a ShiftOperator
 409 //  imm - immediate argument to shift operator, 5 bits (0..31)
 410 #define ALUr_shi(cond, op, S, rd, rl, rr, sh, imm) do {\
 411         underrunProtect(4);\
 412         NanoAssert(IsCond(cond));\
 413         NanoAssert(IsOp(op));\
 414         NanoAssert(((S)==0) || ((S)==1));\
 415         NanoAssert(IsGpReg(rd) && IsGpReg(rl) && IsGpReg(rr));\
 416         NanoAssert(IsShift(sh));\
 417         NanoAssert((imm)>=0 && (imm)<32);\
 418         *(--_nIns) = (NIns) ((cond)<<28 |(ARM_##op)<<21 | (S)<<20 | (rl)<<16 | (rd)<<12 | (imm)<<7 | (sh)<<4 | (rr));\
 419         if (ARM_##op == ARM_mov || ARM_##op == ARM_mvn) {               \
 420             NanoAssert(rl==0);                                          \
 421             asm_output("%s%s%s %s, %s, %s #%d", #op, condNames[cond], (S)?"s":"", gpn(rd), gpn(rr), shiftNames[sh], (imm));\
 422         } else if (ARM_##op >= ARM_tst && ARM_##op <= ARM_cmn) {         \
 423             NanoAssert(S==1);\
 424             NanoAssert(rd==0);\
 425             asm_output("%s%s  %s, %s, %s #%d", #op, condNames[cond], gpn(rl), gpn(rr), shiftNames[sh], (imm));\
 426         } else {                                                        \
 427             asm_output("%s%s%s %s, %s, %s, %s #%d", #op, condNames[cond], (S)?"s":"", gpn(rd), gpn(rl), gpn(rr), shiftNames[sh], (imm));\
 428         }\
 429     } while (0)
 430
 431 // ALU operation with two register arguments, with rr operated on by a shift and shift register
 432 //  S   - bit, 0 or 1, whether the CPSR register is updated
 433 //  rd  - destination register
 434 //  rl  - first (left) operand register
 435 //  rr  - first (left) operand register
 436 //  sh  - a ShiftOperator
 437 //  rs  - shift operand register
 438 #define ALUr_shr(cond, op, S, rd, rl, rr, sh, rs) do {\
 439         underrunProtect(4);\
 440         NanoAssert(IsCond(cond));\
 441         NanoAssert(IsOp(op));\
 442         NanoAssert(((S)==0) || ((S)==1));\
 443         NanoAssert(IsGpReg(rd) && IsGpReg(rl) && IsGpReg(rr) && IsGpReg(rs));\
 444         NanoAssert(IsShift(sh));\
 445         *(--_nIns) = (NIns) ((cond)<<28 |(ARM_##op)<<21 | (S)<<20 | (rl)<<16 | (rd)<<12 | (rs)<<8 | (sh)<<4 | (rr));\
 446         if (ARM_##op == ARM_mov || ARM_##op == ARM_mvn) {               \
 447             asm_output("%s%s%s %s, %s, %s %s", #op, condNames[cond], (S)?"s":"", gpn(rd), gpn(rr), shiftNames[sh], gpn(rs));\
 448         } else if (ARM_##op >= ARM_tst && ARM_##op <= ARM_cmn) {         \
 449             NanoAssert(S==1);\
 450             asm_output("%s%s  %s, %s, %s %s", #op, condNames[cond], gpn(rl), gpn(rr), shiftNames[sh], gpn(rs));\
 451         } else {                                                        \
 452             asm_output("%s%s%s %s, %s, %s, %s %s", #op, condNames[cond], (S)?"s":"", gpn(rd), gpn(rl), gpn(rr), shiftNames[sh], gpn(rs));\
 453         }\
 454     } while (0)
 455
 456 // --------
 457 // Basic arithmetic operations.
 458 // --------
 459 // Argument naming conventions for these macros:
 460 //  _d      Destination register.
 461 //  _l      First (left) operand.
 462 //  _r      Second (right) operand.
 463 //  _op2imm An operand 2 immediate value. Use encOp2Imm to calculate this.
 464 //  _s      Set to 1 to update the status flags (for subsequent conditional
 465 //          tests). Otherwise, set to 0.
 466
 467 // _d = _l + decOp2Imm(_op2imm)
 468 #define ADDis(_d,_l,_op2imm,_s) ALUi(AL, add, _s, _d, _l, _op2imm)
 469 #define ADDi(_d,_l,_op2imm)     ALUi(AL, add,  0, _d, _l, _op2imm)
 470
 471 // _d = _l & ~decOp2Imm(_op2imm)
 472 #define BICis(_d,_l,_op2imm,_s) ALUi(AL, bic, _s, _d, _l, _op2imm)
 473 #define BICi(_d,_l,_op2imm)     ALUi(AL, bic,  0, _d, _l, _op2imm)
 474
 475 // _d = _l - decOp2Imm(_op2imm)
 476 #define SUBis(_d,_l,_op2imm,_s) ALUi(AL, sub, _s, _d, _l, _op2imm)
 477 #define SUBi(_d,_l,_op2imm)     ALUi(AL, sub,  0, _d, _l, _op2imm)
 478
 479 // _d = _l & decOp2Imm(_op2imm)
 480 #define ANDis(_d,_l,_op2imm,_s) ALUi(AL, and, _s, _d, _l, _op2imm)
 481 #define ANDi(_d,_l,_op2imm)     ALUi(AL, and,  0, _d, _l, _op2imm)
 482
 483 // _d = _l | decOp2Imm(_op2imm)
 484 #define ORRis(_d,_l,_op2imm,_s) ALUi(AL, orr, _s, _d, _l, _op2imm)
 485 #define ORRi(_d,_l,_op2imm)     ALUi(AL, orr,  0, _d, _l, _op2imm)
 486
 487 // _d = _l ^ decOp2Imm(_op2imm)
 488 #define EORis(_d,_l,_op2imm,_s) ALUi(AL, eor, _s, _d, _l, _op2imm)
 489 #define EORi(_d,_l,_op2imm)     ALUi(AL, eor,  0, _d, _l, _op2imm)
 490
 491 // _d = _l | _r
 492 #define ORRs(_d,_l,_r,_s)   ALUr(AL, orr, _s, _d, _l, _r)
 493 #define ORR(_d,_l,_r)       ALUr(AL, orr,  0, _d, _l, _r)
 494
 495 // _d = _l & _r
 496 #define ANDs(_d,_l,_r,_s)   ALUr(AL, and, _s, _d, _l, _r)
 497 #define AND(_d,_l,_r)       ALUr(AL, and,  0, _d, _l, _r)
 498
 499 // _d = _l ^ _r
 500 #define EORs(_d,_l,_r,_s)   ALUr(AL, eor, _s, _d, _l, _r)
 501 #define EOR(_d,_l,_r)       ALUr(AL, eor,  0, _d, _l, _r)
 502
 503 // _d = _l + _r
 504 #define ADDs(_d,_l,_r,_s)   ALUr(AL, add, _s, _d, _l, _r)
 505 #define ADD(_d,_l,_r)       ALUr(AL, add,  0, _d, _l, _r)
 506
 507 // _d = _l - _r
 508 #define SUBs(_d,_l,_r,_s)   ALUr(AL, sub, _s, _d, _l, _r)
 509 #define SUB(_d,_l,_r)       ALUr(AL, sub,  0, _d, _l, _r)
 510
 511 // --------
 512 // Other operations.
 513 // --------
 514
 515 // [_d_hi,_d] = _l * _r
 516 #define SMULL(_d, _d_hi, _l, _r)  do {                                                          \
 517         underrunProtect(4);                                                                     \
 518         NanoAssert(ARM_ARCH_AT_LEAST(6) || ((_d   ) != (_l)));                               \
 519         NanoAssert(ARM_ARCH_AT_LEAST(6) || ((_d_hi) != (_l)));                               \
 520         NanoAssert(IsGpReg(_d) && IsGpReg(_d_hi) && IsGpReg(_l) && IsGpReg(_r));                \
 521         NanoAssert(((_d) != PC) && ((_d_hi) != PC) && ((_l) != PC) && ((_r) != PC));            \
 522         *(--_nIns) = (NIns)( COND_AL | 0xc00090 | (_d_hi)<<16 | (_d)<<12 | (_r)<<8 | (_l) );    \
 523         asm_output("smull %s, %s, %s, %s",gpn(_d),gpn(_d_hi),gpn(_l),gpn(_r));                  \
 524 } while(0)
 525
 526 // _d = _l * _r
 527 #define MUL(_d, _l, _r)  do {                                               \
 528         underrunProtect(4);                                                 \
 529         NanoAssert(ARM_ARCH_AT_LEAST(6) || ((_d) != (_l)));              \
 530         NanoAssert(IsGpReg(_d) && IsGpReg(_l) && IsGpReg(_r));              \
 531         NanoAssert(((_d) != PC) && ((_l) != PC) && ((_r) != PC));           \
 532         *(--_nIns) = (NIns)( COND_AL | (_d)<<16 | (_r)<<8 | 0x90 | (_l) );  \
 533         asm_output("mul %s, %s, %s",gpn(_d),gpn(_l),gpn(_r));               \
 534 } while(0)
 535
 536 // RSBS _d, _r
 537 // _d = 0 - _r
 538 #define RSBS(_d,_r) ALUi(AL, rsb, 1, _d, _r, 0)
 539
 540 // MVN
 541 // _d = ~_r (one's compliment)
 542 #define MVN(_d,_r)                          ALUr(AL, mvn, 0, _d, 0, _r)
 543 #define MVNis_chk(_d,_op2imm,_stat,_chk)    ALUi_chk(AL, mvn, _stat, _d, 0, op2imm, _chk)
 544 #define MVNis(_d,_op2imm,_stat)             MVNis_chk(_d,_op2imm,_stat,1);
 545
 546 // Logical Shift Right (LSR) rotates the bits without maintaining sign extensions.
 547 // MOVS _d, _r, LSR <_s>
 548 // _d = _r >> _s
 549 #define LSR(_d,_r,_s) ALUr_shr(AL, mov, 1, _d, 0, _r, LSR_reg, _s)
 550
 551 // Logical Shift Right (LSR) rotates the bits without maintaining sign extensions.
 552 // MOVS _d, _r, LSR #(_imm & 0x1f)
 553 // _d = _r >> (_imm & 0x1f)
 554 #define LSRi(_d,_r,_imm)  ALUr_shi(AL, mov, 1, _d, 0, _r, LSR_imm, (_imm & 0x1f))
 555
 556 // Arithmetic Shift Right (ASR) maintains the sign extension.
 557 // MOVS _d, _r, ASR <_s>
 558 // _d = _r >> _s
 559 #define ASR(_d,_r,_s) ALUr_shr(AL, mov, 1, _d, 0, _r, ASR_reg, _s)
 560
 561 // Arithmetic Shift Right (ASR) maintains the sign extension.
 562 // MOVS _r, _r, ASR #(_imm & 0x1f)
 563 // _d = _r >> (_imm & 0x1f)
 564 #define ASRi(_d,_r,_imm) ALUr_shi(AL, mov, 1, _d, 0, _r, ASR_imm, (_imm & 0x1f))
 565
 566 // Logical Shift Left (LSL).
 567 // MOVS _d, _r, LSL <_s>
 568 // _d = _r << _s
 569 #define LSL(_d, _r, _s) ALUr_shr(AL, mov, 1, _d, 0, _r, LSL_reg, _s)
 570
 571 // Logical Shift Left (LSL).
 572 // MOVS _d, _r, LSL #(_imm & 0x1f)
 573 // _d = _r << (_imm & 0x1f)
 574 #define LSLi(_d, _r, _imm) ALUr_shi(AL, mov, 1, _d, 0, _r, LSL_imm, (_imm & 0x1f))
 575
 576 // TST
 577 #define TST(_l,_r)      ALUr(AL, tst, 1, 0, _l, _r)
 578 #define TSTi(_d,_imm)   ALUi(AL, tst, 1, 0, _d, _imm)
 579
 580 // CMP
 581 #define CMP(_l,_r)  ALUr(AL, cmp, 1, 0, _l, _r)
 582 #define CMN(_l,_r)  ALUr(AL, cmn, 1, 0, _l, _r)
 583
 584 // MOV
 585 #define MOVis_chk(_d,_op2imm,_stat,_chk)    ALUi_chk(AL, mov, _stat, _d, 0, op2imm, _chk)
 586 #define MOVis(_d,_op2imm,_stat)             MOVis_chk(_d,_op2imm,_stat,1)
 587 #define MOVi(_d,_op2imm)                    MOVis(_d,_op2imm,0);
 588
 589 #define MOV_cond(_cond,_d,_s)               ALUr(_cond, mov, 0, _d, 0, _s)
 590
 591 #define MOV(dr,sr)   MOV_cond(AL, dr, sr)
 592
 593 // _d = [_b+off]
 594 #define LDR(_d,_b,_off)        asm_ldr_chk(_d,_b,_off,1)
 595 #define LDR_nochk(_d,_b,_off)  asm_ldr_chk(_d,_b,_off,0)
 596
 597 // _d = [_b + _x<<_s]
 598 #define LDR_scaled(_d, _b, _x, _s) do { \
 599         NanoAssert(((_s)&31) == _s);\
 600         NanoAssert(IsGpReg(_d) && IsGpReg(_b) && IsGpReg(_x));\
 601         underrunProtect(4);\
 602         *(--_nIns) = (NIns)(COND_AL | (0x79<<20) | ((_b)<<16) | ((_d)<<12) | ((_s)<<7) | (_x));\
 603         asm_output("ldr %s, [%s, +%s, LSL #%d]", gpn(_d), gpn(_b), gpn(_x), (_s));\
 604     } while (0)
 605
 606 // _d = #_imm
 607 #define LDi(_d,_imm) asm_ld_imm(_d,_imm)
 608
 609 // MOVW and MOVT are ARMv6T2 or newer only
 610
 611 // MOVW -- writes _imm into _d, zero-extends.
 612 #define MOVWi_cond_chk(_cond,_d,_imm,_chk) do {                         \
 613         NanoAssert(isU16(_imm));                                        \
 614         NanoAssert(IsGpReg(_d));                                        \
 615         NanoAssert(IsCond(_cond));                                      \
 616         if (_chk) underrunProtect(4);                                   \
 617         *(--_nIns) = (NIns)( (_cond)<<28 | 3<<24 | 0<<20 | (((_imm)>>12)&0xf)<<16 | (_d)<<12 | ((_imm)&0xfff) ); \
 618         asm_output("movw%s %s, #0x%x", condNames[_cond], gpn(_d), (_imm)); \
 619     } while (0)
 620
 621 #define MOVWi(_d,_imm)              MOVWi_cond_chk(AL, _d, _imm, 1)
 622 #define MOVWi_chk(_d,_imm,_chk)     MOVWi_cond_chk(AL, _d, _imm, _chk)
 623 #define MOVWi_cond(_cond,_d,_imm)   MOVWi_cond_chk(_cond, _d, _imm, 1)
 624
 625 // MOVT -- writes _imm into top halfword of _d, does not affect bottom halfword
 626 #define MOVTi_cond_chk(_cond,_d,_imm,_chk) do {                         \
 627         NanoAssert(isU16(_imm));                                        \
 628         NanoAssert(IsGpReg(_d));                                        \
 629         NanoAssert(IsCond(_cond));                                      \
 630         if (_chk) underrunProtect(4);                                   \
 631         *(--_nIns) = (NIns)( (_cond)<<28 | 3<<24 | 4<<20 | (((_imm)>>12)&0xf)<<16 | (_d)<<12 | ((_imm)&0xfff) ); \
 632         asm_output("movt%s %s, #0x%x", condNames[_cond], gpn(_d), (_imm)); \
 633     } while (0)
 634
 635 #define MOVTi(_d,_imm)              MOVTi_cond_chk(AL, _d, _imm, 1)
 636 #define MOVTi_chk(_d,_imm,_chk)     MOVTi_cond_chk(AL, _d, _imm, _chk)
 637 #define MOVTi_cond(_cond,_d,_imm)   MOVTi_cond_chk(_cond, _d, _imm, 1)
 638
 639 // i386 compat, for Assembler.cpp
 640 #define MR(d,s)                     MOV(d,s)
 641 #define ST(base,offset,reg)         STR(reg,base,offset)
 642
 643 // Load a byte (8 bits). The offset range is ±4095.
 644 #define LDRB(_d,_n,_off) do {                                           \
 645         NanoAssert(IsGpReg(_d) && IsGpReg(_n));                         \
 646         underrunProtect(4);                                             \
 647         if (_off < 0) {                                                 \
 648             NanoAssert(isU12(-(_off)));                                 \
 649             *(--_nIns) = (NIns)( COND_AL | (0x55<<20) | ((_n)<<16) | ((_d)<<12) | ((-(_off))&0xfff)  ); \
 650         } else {                                                        \
 651             NanoAssert(isU12(_off));                                    \
 652             *(--_nIns) = (NIns)( COND_AL | (0x5D<<20) | ((_n)<<16) | ((_d)<<12) | ((_off)&0xfff)  ); \
 653         }                                                               \
 654         asm_output("ldrb %s, [%s,#%d]", gpn(_d),gpn(_n),(_off));        \
 655     } while(0)
 656
 657 // Load a byte (8 bits), sign-extend to 32 bits. The offset range is
 658 // ±255 (different from LDRB, same as LDRH/LDRSH)
 659 #define LDRSB(_d,_n,_off) do {                                          \
 660         NanoAssert(IsGpReg(_d) && IsGpReg(_n));                         \
 661         underrunProtect(4);                                             \
 662         if (_off < 0) {                                                 \
 663             NanoAssert(isU8(-(_off)));                                  \
 664             *(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_n)<<16) | ((_d)<<12) | ((0xD)<<4) | (((-(_off))&0xf0)<<4) | ((-(_off))&0xf) ); \
 665         } else {                                                        \
 666             NanoAssert(isU8(_off));                                     \
 667             *(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_n)<<16) | ((_d)<<12) | ((0xD)<<4) | (((_off)&0xf0)<<4) | ((_off)&0xf) ); \
 668         }                                                               \
 669         asm_output("ldrsb %s, [%s,#%d]", gpn(_d),gpn(_n),(_off));       \
 670     } while(0)
 671
 672 // Load and sign-extend a half word (16 bits). The offset range is ±255, and
 673 // must be aligned to two bytes on some architectures (the caller is responsible
 674 // for ensuring appropriate alignment)
 675 #define LDRH(_d,_n,_off) do {                                           \
 676         NanoAssert(IsGpReg(_d) && IsGpReg(_n));                         \
 677         underrunProtect(4);                                             \
 678         if (_off < 0) {                                                 \
 679             NanoAssert(isU8(-(_off)));                                  \
 680             *(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_n)<<16) | ((_d)<<12) | ((0xB)<<4) | (((-(_off))&0xf0)<<4) | ((-(_off))&0xf) ); \
 681         } else {                                                        \
 682             NanoAssert(isU8(_off));                                     \
 683             *(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_n)<<16) | ((_d)<<12) | ((0xB)<<4) | (((_off)&0xf0)<<4) | ((_off)&0xf) ); \
 684         }                                                               \
 685         asm_output("ldrh %s, [%s,#%d]", gpn(_d),gpn(_n),(_off));       \
 686     } while(0)
 687
 688 // Load and sign-extend a half word (16 bits). The offset range is ±255, and
 689 // must be aligned to two bytes on some architectures (the caller is responsible
 690 // for ensuring appropriate alignment)
 691 #define LDRSH(_d,_n,_off) do {                                          \
 692         NanoAssert(IsGpReg(_d) && IsGpReg(_n));                         \
 693         underrunProtect(4);                                             \
 694         if (_off < 0) {                                                 \
 695             NanoAssert(isU8(-(_off)));                                  \
 696             *(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_n)<<16) | ((_d)<<12) | ((0xF)<<4) | (((-(_off))&0xf0)<<4) | ((-(_off))&0xf) ); \
 697         } else {                                                        \
 698             NanoAssert(isU8(_off));                                     \
 699             *(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_n)<<16) | ((_d)<<12) | ((0xF)<<4) | (((_off)&0xf0)<<4) | ((_off)&0xf) ); \
 700         }                                                               \
 701         asm_output("ldrsh %s, [%s,#%d]", gpn(_d),gpn(_n),(_off));       \
 702     } while(0)
 703
 704 // Valid offset for STR and STRB is +/- 4095, STRH only has +/- 255
 705 #define STR(_d,_n,_off) do {                                            \
 706         NanoAssert(IsGpReg(_d) && IsGpReg(_n));                         \
 707         NanoAssert(isU12(_off) || isU12(-(_off)));                      \
 708         underrunProtect(4);                                             \
 709         if ((_off)<0)   *(--_nIns) = (NIns)( COND_AL | (0x50<<20) | ((_n)<<16) | ((_d)<<12) | ((-(_off))&0xFFF) ); \
 710         else            *(--_nIns) = (NIns)( COND_AL | (0x58<<20) | ((_n)<<16) | ((_d)<<12) | ((_off)&0xFFF) ); \
 711         asm_output("str %s, [%s, #%d]", gpn(_d), gpn(_n), (_off)); \
 712     } while(0)
 713
 714 #define STRB(_d,_n,_off) do {                                           \
 715         NanoAssert(IsGpReg(_d) && IsGpReg(_n));                         \
 716         NanoAssert(isU12(_off) || isU12(-(_off)));                      \
 717         underrunProtect(4);                                             \
 718         if ((_off)<0)   *(--_nIns) = (NIns)( COND_AL | (0x54<<20) | ((_n)<<16) | ((_d)<<12) | ((-(_off))&0xFFF) ); \
 719         else            *(--_nIns) = (NIns)( COND_AL | (0x5C<<20) | ((_n)<<16) | ((_d)<<12) | ((_off)&0xFFF) ); \
 720         asm_output("strb %s, [%s, #%d]", gpn(_d), gpn(_n), (_off)); \
 721     } while(0)
 722
 723 // Only +/- 255 range, unlike STRB/STR
 724 #define STRH(_d,_n,_off) do {                                           \
 725         NanoAssert(IsGpReg(_d) && IsGpReg(_n));                         \
 726         underrunProtect(4);                                             \
 727         if ((_off)<0) {                                                 \
 728             NanoAssert(isU8(-(_off)));                                  \
 729             *(--_nIns) = (NIns)( COND_AL | (0x14<<20) | ((_n)<<16) | ((_d)<<12) | (((-(_off))&0xF0)<<4) | (0xB<<4) | ((-(_off))&0xF) ); \
 730         } else {                                                        \
 731             NanoAssert(isU8(_off));                                     \
 732             *(--_nIns) = (NIns)( COND_AL | (0x1C<<20) | ((_n)<<16) | ((_d)<<12) | (((_off)&0xF0)<<4) | (0xB<<4) | ((_off)&0xF) ); \
 733         }                                                               \
 734         asm_output("strh %s, [%s, #%d]", gpn(_d), gpn(_n), (_off));     \
 735     } while(0)
 736
 737 // Encode a breakpoint. The ID is not important and is ignored by the
 738 // processor, but it can be useful as a marker when debugging emitted code.
 739 #define BKPT_insn       ((NIns)( COND_AL | (0x12<<20) | (0x7<<4) ))
 740 #define BKPTi_insn(id)  ((NIns)(BKPT_insn | ((id << 4) & 0xfff00) | (id & 0xf)));
 741
 742 #define BKPT_nochk()    BKPTi_nochk(0)
 743 #define BKPTi_nochk(id) do {                                \
 744         NanoAssert((id & 0xffff) == id);                    \
 745         *(--_nIns) = BKPTi_insn(id);                        \
 746         } while (0)
 747
 748 // STMFD SP!, {reg}
 749 #define PUSHr(_r)  do {                                                 \
 750         underrunProtect(4);                                             \
 751         NanoAssert(IsGpReg(_r));                                        \
 752         *(--_nIns) = (NIns)( COND_AL | (0x92<<20) | (SP<<16) | rmask(_r) ); \
 753         asm_output("push %s",gpn(_r)); } while (0)
 754
 755 // STMFD SP!,{reglist}
 756 #define PUSH_mask(_mask)  do {                                          \
 757         underrunProtect(4);                                             \
 758         NanoAssert(isU16(_mask));                                       \
 759         *(--_nIns) = (NIns)( COND_AL | (0x92<<20) | (SP<<16) | (_mask) ); \
 760         asm_output("push %x", (_mask));} while (0)
 761
 762 // LDMFD SP!,{reg}
 763 #define POPr(_r) do {                                                   \
 764         underrunProtect(4);                                             \
 765         NanoAssert(IsGpReg(_r));                                        \
 766         *(--_nIns) = (NIns)( COND_AL | (0x8B<<20) | (SP<<16) | rmask(_r) ); \
 767         asm_output("pop %s",gpn(_r));} while (0)
 768
 769 // LDMFD SP!,{reglist}
 770 #define POP_mask(_mask) do {                                            \
 771         underrunProtect(4);                                             \
 772         NanoAssert(isU16(_mask));                                       \
 773         *(--_nIns) = (NIns)( COND_AL | (0x8B<<20) | (SP<<16) | (_mask) ); \
 774         asm_output("pop %x", (_mask));} while (0)
 775
 776 // PC always points to current instruction + 8, so when calculating pc-relative
 777 // offsets, use PC+8.
 778 #define PC_OFFSET_FROM(target,frompc) ((intptr_t)(target) - ((intptr_t)(frompc) + 8))
 779
 780 #define B_cond(_c,_t)                           \
 781     B_cond_chk(_c,_t,1)
 782
 783 #define B_nochk(_t)                             \
 784     B_cond_chk(AL,_t,0)
 785
 786 #define B(t)    B_cond(AL,t)
 787 #define BHI(t)  B_cond(HI,t)
 788 #define BLS(t)  B_cond(LS,t)
 789 #define BHS(t)  B_cond(HS,t)
 790 #define BLO(t)  B_cond(LO,t)
 791 #define BEQ(t)  B_cond(EQ,t)
 792 #define BNE(t)  B_cond(NE,t)
 793 #define BLT(t)  B_cond(LT,t)
 794 #define BGE(t)  B_cond(GE,t)
 795 #define BLE(t)  B_cond(LE,t)
 796 #define BGT(t)  B_cond(GT,t)
 797 #define BVS(t)  B_cond(VS,t)
 798 #define BVC(t)  B_cond(VC,t)
 799 #define BCC(t)  B_cond(CC,t)
 800 #define BCS(t)  B_cond(CS,t)
 801
 802 #define JMP(t) B(t)
 803 #define JMP_nochk(t) B_nochk(t)
 804
 805 // MOV(cond) _r, #1
 806 // MOV(!cond) _r, #0
 807 #define SET(_r,_cond) do {                                              \
 808     ConditionCode _opp = OppositeCond(_cond);                           \
 809     underrunProtect(8);                                                 \
 810     *(--_nIns) = (NIns)( ( _opp<<28) | (0x3A<<20) | ((_r)<<12) | (0) ); \
 811     *(--_nIns) = (NIns)( (_cond<<28) | (0x3A<<20) | ((_r)<<12) | (1) ); \
 812     asm_output("mov%s %s, #1", condNames[_cond], gpn(_r));              \
 813     asm_output("mov%s %s, #0", condNames[_opp], gpn(_r));               \
 814     } while (0)
 815
 816 #define SETEQ(r)    SET(r,EQ)
 817 #define SETNE(r)    SET(r,NE)
 818 #define SETLT(r)    SET(r,LT)
 819 #define SETLE(r)    SET(r,LE)
 820 #define SETGT(r)    SET(r,GT)
 821 #define SETGE(r)    SET(r,GE)
 822 #define SETLO(r)    SET(r,LO)
 823 #define SETLS(r)    SET(r,LS)
 824 #define SETHI(r)    SET(r,HI)
 825 #define SETHS(r)    SET(r,HS)
 826 #define SETVS(r)    SET(r,VS)
 827 #define SETCS(r)    SET(r,CS)
 828
 829 // Load and sign extend a 16-bit value into a reg
 830 #define MOVSX(_d,_off,_b) do {                                          \
 831         if ((_off)>=0) {                                                \
 832             if ((_off)<256) {                                           \
 833                 underrunProtect(4);                                     \
 834                 *(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_b)<<16) | ((_d)<<12) |  ((((_off)>>4)&0xF)<<8) | (0xF<<4) | ((_off)&0xF)  ); \
 835             } else if ((_off)<=510) {                                   \
 836                 underrunProtect(8);                                     \
 837                 int rem = (_off) - 255;                                 \
 838                 NanoAssert(rem<256);                                    \
 839                 *(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_d)<<16) | ((_d)<<12) |  ((((rem)>>4)&0xF)<<8) | (0xF<<4) | ((rem)&0xF)  ); \
 840                 *(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | ((_b)<<16) | ((_d)<<12) | (0xFF) ); \
 841             } else {                                                    \
 842                 underrunProtect(16);                                    \
 843                 int rem = (_off) & 3;                                   \
 844                 *(--_nIns) = (NIns)( COND_AL | (0x19<<20) | ((_b)<<16) | ((_d)<<12) | (0xF<<4) | (_d) ); \
 845                 asm_output("ldrsh %s,[%s, #%d]",gpn(_d), gpn(_b), (_off)); \
 846                 *(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | ((_d)<<16) | ((_d)<<12) | rem ); \
 847                 *(--_nIns) = (NIns)( COND_AL | (0x1A<<20) | ((_d)<<12) | (2<<7)| (_d) ); \
 848                 *(--_nIns) = (NIns)( COND_AL | (0x3B<<20) | ((_d)<<12) | (((_off)>>2)&0xFF) ); \
 849                 asm_output("mov %s,%d",gpn(_d),(_off));                \
 850             }                                                           \
 851         } else {                                                        \
 852             if ((_off)>-256) {                                          \
 853                 underrunProtect(4);                                     \
 854                 *(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_b)<<16) | ((_d)<<12) |  ((((-(_off))>>4)&0xF)<<8) | (0xF<<4) | ((-(_off))&0xF)  ); \
 855                 asm_output("ldrsh %s,[%s, #%d]",gpn(_d), gpn(_b), (_off)); \
 856             } else if ((_off)>=-510){                                   \
 857                 underrunProtect(8);                                     \
 858                 int rem = -(_off) - 255;                                \
 859                 NanoAssert(rem<256);                                    \
 860                 *(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_d)<<16) | ((_d)<<12) |  ((((rem)>>4)&0xF)<<8) | (0xF<<4) | ((rem)&0xF)  ); \
 861                 *(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_b)<<16) | ((_d)<<12) | (0xFF) ); \
 862             } else NanoAssert(0);                                        \
 863         }                                                               \
 864     } while(0)
 865
 866 /*
 867  * VFP
 868  */
 869
 870 #define FMDRR(_Dm,_Rd,_Rn) do {                                         \
 871         underrunProtect(4);                                             \
 872         NanoAssert(ARM_VFP);                                    \
 873         NanoAssert(IsFpReg(_Dm) && IsGpReg(_Rd) && IsGpReg(_Rn));       \
 874         *(--_nIns) = (NIns)( COND_AL | (0xC4<<20) | ((_Rn)<<16) | ((_Rd)<<12) | (0xB1<<4) | (FpRegNum(_Dm)) ); \
 875         asm_output("fmdrr %s,%s,%s", gpn(_Dm), gpn(_Rd), gpn(_Rn));    \
 876     } while (0)
 877
 878 #define FMRRD(_Rd,_Rn,_Dm) do {                                         \
 879         underrunProtect(4);                                             \
 880         NanoAssert(ARM_VFP);                                    \
 881         NanoAssert(IsGpReg(_Rd) && IsGpReg(_Rn) && IsFpReg(_Dm));       \
 882         *(--_nIns) = (NIns)( COND_AL | (0xC5<<20) | ((_Rn)<<16) | ((_Rd)<<12) | (0xB1<<4) | (FpRegNum(_Dm)) ); \
 883         asm_output("fmrrd %s,%s,%s", gpn(_Rd), gpn(_Rn), gpn(_Dm));    \
 884     } while (0)
 885
 886 #define FMRDH(_Rd,_Dn) do {                                             \
 887         underrunProtect(4);                                             \
 888         NanoAssert(ARM_VFP);                                    \
 889         NanoAssert(IsGpReg(_Rd) && IsFpReg(_Dn));                       \
 890         *(--_nIns) = (NIns)( COND_AL | (0xE3<<20) | (FpRegNum(_Dn)<<16) | ((_Rd)<<12) | (0xB<<8) | (1<<4) ); \
 891         asm_output("fmrdh %s,%s", gpn(_Rd), gpn(_Dn));                  \
 892     } while (0)
 893
 894 #define FMRDL(_Rd,_Dn) do {                                             \
 895         underrunProtect(4);                                             \
 896         NanoAssert(ARM_VFP);                                    \
 897         NanoAssert(IsGpReg(_Rd) && IsFpReg(_Dn));                       \
 898         *(--_nIns) = (NIns)( COND_AL | (0xE1<<20) | (FpRegNum(_Dn)<<16) | ((_Rd)<<12) | (0xB<<8) | (1<<4) ); \
 899         asm_output("fmrdh %s,%s", gpn(_Rd), gpn(_Dn));                  \
 900     } while (0)
 901
 902 #define FSTD_allowD7(_Dd,_Rn,_offs,_allowD7) do {                       \
 903         underrunProtect(4);                                             \
 904         NanoAssert(ARM_VFP);                                            \
 905         NanoAssert(((_offs)%4) == 0);                                   \
 906         NanoAssert(isU8((_offs)/4) || isU8(-(_offs)/4));                \
 907         NanoAssert(IsFpReg(_Dd) && IsGpReg(_Rn));                       \
 908         int negflag = 1<<23;                                            \
 909         intptr_t offs = (_offs);                                        \
 910         if (_offs < 0) {                                                \
 911             negflag = 0<<23;                                            \
 912             offs = -(offs);                                             \
 913         }                                                               \
 914         *(--_nIns) = (NIns)( COND_AL | (0xD0<<20) | ((_Rn)<<16) | (FpRegNum(_Dd)<<12) | (0xB<<8) | negflag | ((offs>>2)&0xff) ); \
 915         asm_output("fstd %s,%s(%d)", gpn(_Dd), gpn(_Rn), _offs);    \
 916     } while (0)
 917
 918 #define FSTD(_Dd,_Rn,_offs) \
 919         FSTD_allowD7(_Dd,_Rn,_offs,0)
 920
 921 #define FLDD_chk(_Dd,_Rn,_offs,_chk) do {                               \
 922         if(_chk) underrunProtect(4);                                    \
 923         NanoAssert(ARM_VFP);                                            \
 924         NanoAssert(((_offs)%4) == 0);                                   \
 925         NanoAssert(isU8((_offs)/4) || isU8(-(_offs)/4));                \
 926         NanoAssert(IsFpReg(_Dd) && !IsFpReg(_Rn));                      \
 927         int negflag = 1<<23;                                            \
 928         intptr_t offs = (_offs);                                        \
 929         if (_offs < 0) {                                                \
 930             negflag = 0<<23;                                            \
 931             offs = -(offs);                                             \
 932         }                                                               \
 933         *(--_nIns) = (NIns)( COND_AL | (0xD1<<20) | ((_Rn)<<16) | (FpRegNum(_Dd)<<12) | (0xB<<8) | negflag | ((offs>>2)&0xff) ); \
 934         asm_output("fldd %s,%s(%d)", gpn(_Dd), gpn(_Rn), _offs);       \
 935     } while (0)
 936 #define FLDD(_Dd,_Rn,_offs) FLDD_chk(_Dd,_Rn,_offs,1)
 937
 938 #define FUITOD(_Dd,_Sm) do {                                            \
 939         underrunProtect(4);                                             \
 940         NanoAssert(ARM_VFP);                                    \
 941         NanoAssert(IsFpReg(_Dd) && ((_Sm) == S0));                     \
 942         *(--_nIns) = (NIns)( COND_AL | (0xEB8<<16) | (FpRegNum(_Dd)<<12) | (0x2D<<6) | (0<<5) | (0x0) ); \
 943         asm_output("fuitod %s,%s", gpn(_Dd), gpn(_Sm));                \
 944     } while (0)
 945
 946 #define FNEGD(_Dd,_Dm) do {                                             \
 947         underrunProtect(4);                                             \
 948         NanoAssert(ARM_VFP);                                    \
 949         NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dm));                       \
 950         *(--_nIns) = (NIns)( COND_AL | (0xEB1<<16) | (FpRegNum(_Dd)<<12) | (0xB4<<4) | (FpRegNum(_Dm)) ); \
 951         asm_output("fnegd %s,%s", gpn(_Dd), gpn(_Dm));                 \
 952     } while (0)
 953
 954 #define FADDD(_Dd,_Dn,_Dm) do {                                         \
 955         underrunProtect(4);                                             \
 956         NanoAssert(ARM_VFP);                                    \
 957         NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dn) && IsFpReg(_Dm));       \
 958         *(--_nIns) = (NIns)( COND_AL | (0xE3<<20) | (FpRegNum(_Dn)<<16) | (FpRegNum(_Dd)<<12) | (0xB0<<4) | (FpRegNum(_Dm)) ); \
 959         asm_output("faddd %s,%s,%s", gpn(_Dd), gpn(_Dn), gpn(_Dm));    \
 960     } while (0)
 961
 962 #define FSUBD(_Dd,_Dn,_Dm) do {                                         \
 963         underrunProtect(4);                                             \
 964         NanoAssert(ARM_VFP);                                    \
 965         NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dn) && IsFpReg(_Dm));       \
 966         *(--_nIns) = (NIns)( COND_AL | (0xE3<<20) | (FpRegNum(_Dn)<<16) | (FpRegNum(_Dd)<<12) | (0xB4<<4) | (FpRegNum(_Dm)) ); \
 967         asm_output("fsubd %s,%s,%s", gpn(_Dd), gpn(_Dn), gpn(_Dm));    \
 968     } while (0)
 969
 970 #define FMULD(_Dd,_Dn,_Dm) do {                                         \
 971         underrunProtect(4);                                             \
 972         NanoAssert(ARM_VFP);                                    \
 973         NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dn) && IsFpReg(_Dm));       \
 974         *(--_nIns) = (NIns)( COND_AL | (0xE2<<20) | (FpRegNum(_Dn)<<16) | (FpRegNum(_Dd)<<12) | (0xB0<<4) | (FpRegNum(_Dm)) ); \
 975         asm_output("fmuld %s,%s,%s", gpn(_Dd), gpn(_Dn), gpn(_Dm));    \
 976     } while (0)
 977
 978 #define FDIVD(_Dd,_Dn,_Dm) do {                                         \
 979         underrunProtect(4);                                             \
 980         NanoAssert(ARM_VFP);                                    \
 981         NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dn) && IsFpReg(_Dm));       \
 982         *(--_nIns) = (NIns)( COND_AL | (0xE8<<20) | (FpRegNum(_Dn)<<16) | (FpRegNum(_Dd)<<12) | (0xB0<<4) | (FpRegNum(_Dm)) ); \
 983         asm_output("fdivd %s,%s,%s", gpn(_Dd), gpn(_Dn), gpn(_Dm));    \
 984     } while (0)
 985
 986 #define FMSTAT() do {                               \
 987         underrunProtect(4);                         \
 988         NanoAssert(ARM_VFP);                \
 989         *(--_nIns) = (NIns)( COND_AL | 0x0EF1FA10); \
 990         asm_output("fmstat");                       \
 991     } while (0)
 992
 993 #define FCMPD(_Dd,_Dm,_E) do {                                          \
 994         underrunProtect(4);                                             \
 995         NanoAssert(ARM_VFP);                                    \
 996         NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dm));                       \
 997         NanoAssert(((_E)==0) || ((_E)==1));                             \
 998         *(--_nIns) = (NIns)( COND_AL | (0xEB4<<16) | (FpRegNum(_Dd)<<12) | (0xB<<8) | ((_E)<<7) | (0x4<<4) | (FpRegNum(_Dm)) ); \
 999         asm_output("fcmp%sd %s,%s", (((_E)==1)?"e":""), gpn(_Dd), gpn(_Dm)); \
1000     } while (0)
1001
1002 #define FCPYD_cond(_cond,_Dd,_Dm) do {                                      \
1003         underrunProtect(4);                                                 \
1004         NanoAssert(ARM_VFP);                                                \
1005         NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dm));                           \
1006         NanoAssert(IsCond(_cond));                                          \
1007         *(--_nIns) = (NIns)( ((_cond)<<28) | (0xEB0<<16) | (FpRegNum(_Dd)<<12) | (0xB4<<4) | (FpRegNum(_Dm)) ); \
1008         asm_output("fcpyd%s %s,%s", condNames[_cond], gpn(_Dd), gpn(_Dm));  \
1009     } while (0)
1010 #define FCPYD(_Dd,_Dm)      FCPYD_cond(AL,_Dd,_Dm)
1011
1012 #define FMRS(_Rd,_Sn) do {                                              \
1013         underrunProtect(4);                                             \
1014         NanoAssert(ARM_VFP);                                    \
1015         NanoAssert(((_Sn) == S0) && IsGpReg(_Rd));                     \
1016         *(--_nIns) = (NIns)( COND_AL | (0xE1<<20) | (0x0<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
1017         asm_output("fmrs %s,%s", gpn(_Rd), gpn(_Sn));                  \
1018     } while (0)
1019
1020 /*
1021  * The following instructions can only be used with S0 as the
1022  * single-precision register; that limitation can be removed if
1023  * needed, but we'd have to teach NJ about all the single precision
1024  * regs, and their encoding is strange (top 4 bits usually in a block,
1025  * low bit elsewhere).
1026  */
1027
1028 #define FSITOD(_Dd,_Sm) do {                                            \
1029         underrunProtect(4);                                             \
1030         NanoAssert(ARM_VFP);                                    \
1031         NanoAssert(IsFpReg(_Dd) && ((_Sm) == S0));                     \
1032         *(--_nIns) = (NIns)( COND_AL | (0xEB8<<16) | (FpRegNum(_Dd)<<12) | (0x2F<<6) | (0<<5) | (0x0) ); \
1033         asm_output("fsitod %s,%s", gpn(_Dd), gpn(_Sm));                \
1034     } while (0)
1035
1036 #define FMSR(_Sn,_Rd) do {                                              \
1037         underrunProtect(4);                                             \
1038         NanoAssert(ARM_VFP);                                    \
1039         NanoAssert(((_Sn) == S0) && IsGpReg(_Rd));                     \
1040         *(--_nIns) = (NIns)( COND_AL | (0xE0<<20) | (0x0<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
1041         asm_output("fmsr %s,%s", gpn(_Sn), gpn(_Rd));                  \
1042     } while (0)
1043
1044 #define FMRS(_Rd,_Sn) do {                                              \
1045         underrunProtect(4);                                             \
1046         NanoAssert(ARM_VFP);                                    \
1047         NanoAssert(((_Sn) == S0) && IsGpReg(_Rd));                     \
1048         *(--_nIns) = (NIns)( COND_AL | (0xE1<<20) | (0x0<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
1049         asm_output("fmrs %s,%s", gpn(_Rd), gpn(_Sn));                  \
1050     } while (0)
1051
1052 #define FMSR(_Sn,_Rd) do {                                              \
1053         underrunProtect(4);                                             \
1054         NanoAssert(ARM_VFP);                                    \
1055         NanoAssert(((_Sn) == S0) && IsGpReg(_Rd));                     \
1056         *(--_nIns) = (NIns)( COND_AL | (0xE0<<20) | (0x0<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
1057         asm_output("fmsr %s,%s", gpn(_Sn), gpn(_Rd));                  \
1058     } while (0)
1059
1060 #define FCVTSD(_Sd,_Dm) do {                        \
1061         underrunProtect(4);                         \
1062         NanoAssert(ARM_VFP);                \
1063         NanoAssert(((_Sd) == S0) && IsFpReg(_Dm)); \
1064         *(--_nIns) = (NIns)( COND_AL | (0xEB7<<16) | (0x0<<12) | (0xBC<<4) | (FpRegNum(_Dm)) ); \
1065         asm_output("[0x%08x] fcvtsd s0,%s", *_nIns, gpn(_Dm));                          \
1066     } while (0)
1067
1068 #define FCVTDS(_Dd,_Sm) do {                                    \
1069         underrunProtect(4);                                     \
1070         NanoAssert(ARM_VFP);                                    \
1071         NanoAssert(((_Sm) == S0) && IsFpReg(_Dd));             \
1072         *(--_nIns) = (NIns)( COND_AL | (0xEB7<<16) | (FpRegNum(_Dd)<<12) | (0xAC<<4) | (0x0) ); \
1073         asm_output("fcvtds %s,s0", gpn(_Dd));                  \
1074     } while(0)
1075
1076 #define FLDS(_Sd,_Rn,_offs) do {                                \
1077         underrunProtect(4);                                     \
1078         NanoAssert(ARM_VFP);                                    \
1079         NanoAssert(((_Sd) == S0) && !IsFpReg(_Rn));            \
1080         NanoAssert(((_offs)%4) == 0);                           \
1081         NanoAssert((isU8((_offs)/4)) || isU8(-(_offs)/4));      \
1082         int addflag = 1<<23;                                    \
1083         intptr_t offs = (_offs);                                \
1084         if (offs < 0) {                                         \
1085             addflag = 0;                                        \
1086             offs = -offs;                                       \
1087         }                                                       \
1088         *(--_nIns) = (NIns)( COND_AL | (0xD1<<20) | ((_Rn)<<16) | (0x0<<12) | (0xA << 8) | addflag | ((offs>>2)&0xff) ); \
1089         asm_output("flds s0, [%s, #%d]", gpn(_Rn), (_offs));   \
1090     } while (0)
1091
1092 #define FSTS(_Sd,_Rn,_offs) do {                                \
1093         underrunProtect(4);                                     \
1094         NanoAssert(ARM_VFP);                                    \
1095         NanoAssert(((_Sd) == S0) && !IsFpReg(_Rn));            \
1096         NanoAssert(((_offs)%4) == 0);                           \
1097         NanoAssert((isU8((_offs)/4)) || isU8(-(_offs)/4));      \
1098         int addflag = 1<<23;                                    \
1099         intptr_t offs = (_offs);                                \
1100         if (offs < 0) {                                         \
1101             addflag = 0;                                        \
1102             offs = -offs;                                       \
1103         }                                                       \
1104         *(--_nIns) = (NIns)( COND_AL | (0xD0<<20) | ((_Rn)<<16) | (0x0<<12) | (0xA << 8) | addflag | ((offs>>2)&0xff) ); \
1105         asm_output("fsts s0, [%s, #%d]", gpn(_Rn), (_offs));   \
1106     } while (0)
1107
1108 #define FTOSID(_Sd,_Dm) do {                                   \
1109         underrunProtect(4);                                    \
1110         NanoAssert(ARM_VFP);                           \
1111         NanoAssert(((_Sd) == S0) && IsFpReg(_Dm));            \
1112         *(--_nIns) = (NIns)( COND_AL | (0xEBD<<16) | (0x0<<12) | (0xB4<<4) | FpRegNum(_Dm) ); \
1113         asm_output("ftosid s0, %s", gpn(_Dm));                \
1114     } while (0)
1115
1116 } // namespace nanojit
1117 #endif // __nanojit_NativeARM__