nanojit/NativeARM.h

   1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
   2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
   3 /* ***** BEGIN LICENSE BLOCK *****
   4  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   5  *
   6  * The contents of this file are subject to the Mozilla Public License Version
   7  * 1.1 (the "License"); you may not use this file except in compliance with
   8  * the License. You may obtain a copy of the License at
   9  * http://www.mozilla.org/MPL/
  10  *
  11  * Software distributed under the License is distributed on an "AS IS" basis,
  12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  13  * for the specific language governing rights and limitations under the
  14  * License.
  15  *
  16  * The Original Code is [Open Source Virtual Machine].
  17  *
  18  * The Initial Developer of the Original Code is
  19  * Adobe System Incorporated.
  20  * Portions created by the Initial Developer are Copyright (C) 2004-2007
  21  * the Initial Developer. All Rights Reserved.
  22  *
  23  * Contributor(s):
  24  *   Adobe AS3 Team
  25  *   Vladimir Vukicevic <vladimir@pobox.com>
  26  *
  27  * Alternatively, the contents of this file may be used under the terms of
  28  * either the GNU General Public License Version 2 or later (the "GPL"), or
  29  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  30  * in which case the provisions of the GPL or the LGPL are applicable instead
  31  * of those above. If you wish to allow use of your version of this file only
  32  * under the terms of either the GPL or the LGPL, and not to allow others to
  33  * use your version of this file under the terms of the MPL, indicate your
  34  * decision by deleting the provisions above and replace them with the notice
  35  * and other provisions required by the GPL or the LGPL. If you do not delete
  36  * the provisions above, a recipient may use your version of this file under
  37  * the terms of any one of the MPL, the GPL or the LGPL.
  38  *
  39  * ***** END LICENSE BLOCK ***** */
  40
  41
  42 #ifndef __nanojit_NativeARM__
  43 #define __nanojit_NativeARM__
  44
  45
  46 #ifdef PERFM
  47 #include "../vprof/vprof.h"
  48 #define count_instr() _nvprof("arm",1)
  49 #define count_prolog() _nvprof("arm-prolog",1); count_instr();
  50 #define count_imt() _nvprof("arm-imt",1) count_instr()
  51 #else
  52 #define count_instr()
  53 #define count_prolog()
  54 #define count_imt()
  55 #endif
  56
  57 namespace nanojit
  58 {
  59 #if defined VMCFG_DOUBLE_MSW_FIRST || defined _MSC_VER
  60 #  undef  NJ_ARM_EABI
  61 #else
  62 #  define NJ_ARM_EABI  1
  63 #endif
  64
  65 // only d0-d6 are actually used; we'll use d7 as s14-s15 for i2f/u2f/etc.
  66 #define NJ_VFP_MAX_REGISTERS            8
  67 #define NJ_MAX_REGISTERS                (11 + NJ_VFP_MAX_REGISTERS)
  68 #define NJ_MAX_STACK_ENTRY              4096
  69 #define NJ_MAX_PARAMETERS               16
  70 #define NJ_ALIGN_STACK                  8
  71
  72 #define NJ_JTBL_SUPPORTED               1
  73 #define NJ_EXPANDED_LOADSTORE_SUPPORTED 1
  74 #define NJ_F2I_SUPPORTED                1
  75 #define NJ_SOFTFLOAT_SUPPORTED          1
  76
  77 #define NJ_CONSTANT_POOLS
  78 const int NJ_MAX_CPOOL_OFFSET = 4096;
  79 const int NJ_CPOOL_SIZE = 16;
  80
  81 const int LARGEST_UNDERRUN_PROT = 32;  // largest value passed to underrunProtect
  82
  83 typedef int NIns;
  84
  85 // Bytes of icache to flush after Assembler::patch
  86 const size_t LARGEST_BRANCH_PATCH = 2 * sizeof(NIns);
  87
  88 /* ARM registers */
  89 typedef enum {
  90     R0  = 0,
  91     R1  = 1,
  92     R2  = 2,
  93     R3  = 3,
  94     R4  = 4,
  95     R5  = 5,
  96     R6  = 6,
  97     R7  = 7,
  98     R8  = 8,
  99     R9  = 9,
 100     R10 = 10,
 101     FP  = 11,
 102     IP  = 12,
 103     SP  = 13,
 104     LR  = 14,
 105     PC  = 15,
 106
 107     // VFP regs (we currently only use D0-D6 and S14)
 108     D0 = 16,
 109     D1 = 17,
 110     D2 = 18,
 111     D3 = 19,
 112     D4 = 20,
 113     D5 = 21,
 114     D6 = 22,
 115     // S14 overlaps with D7 and is hard-coded into i2f and u2f operations, but
 116     // D7 is still listed here for completeness and to facilitate assertions.
 117     D7 = 23,
 118
 119     FirstFloatReg = D0,
 120     LastFloatReg = D6,
 121
 122     FirstReg = R0,
 123     LastReg = D6,
 124     deprecated_UnknownReg = 32,     // XXX: remove eventually, see bug 538924
 125
 126     S14 = 24
 127 } Register;
 128
 129 /* ARM condition codes */
 130 typedef enum {
 131     EQ = 0x0, // Equal
 132     NE = 0x1, // Not Equal
 133     CS = 0x2, // Carry Set (or HS)
 134     HS = 0x2,
 135     CC = 0x3, // Carry Clear (or LO)
 136     LO = 0x3,
 137     MI = 0x4, // MInus
 138     PL = 0x5, // PLus
 139     VS = 0x6, // oVerflow Set
 140     VC = 0x7, // oVerflow Clear
 141     HI = 0x8, // HIgher
 142     LS = 0x9, // Lower or Same
 143     GE = 0xA, // Greater or Equal
 144     LT = 0xB, // Less Than
 145     GT = 0xC, // Greater Than
 146     LE = 0xD, // Less or Equal
 147     AL = 0xE, // ALways
 148
 149     // Note that condition code NV is unpredictable on ARMv3 and ARMv4, and has
 150     // special meaning for ARMv5 onwards. As such, it should never be used in
 151     // an instruction encoding unless the special (ARMv5+) meaning is required.
 152     NV = 0xF  // NeVer
 153 } ConditionCode;
 154 #define IsCond(cc)        (((cc) >= EQ) && ((cc) <= AL))
 155
 156 // Bit 0 of the condition code can be flipped to obtain the opposite condition.
 157 // However, this won't work for AL because its opposite — NV — has special
 158 // meaning.
 159 #define OppositeCond(cc)  ((ConditionCode)((unsigned int)(cc)^0x1))
 160
 161 typedef int RegisterMask;
 162 typedef struct _FragInfo {
 163     RegisterMask    needRestoring;
 164     NIns*           epilogue;
 165 } FragInfo;
 166
 167 // D0-D6 are not saved; D7-D15 are, but we don't use those,
 168 // so we don't have to worry about saving/restoring them
 169 static const RegisterMask SavedFpRegs = 0;
 170 static const RegisterMask SavedRegs = 1<<R4 | 1<<R5 | 1<<R6 | 1<<R7 | 1<<R8 | 1<<R9 | 1<<R10;
 171 static const int NumSavedRegs = 7;
 172
 173 static const RegisterMask FpRegs = 1<<D0 | 1<<D1 | 1<<D2 | 1<<D3 | 1<<D4 | 1<<D5 | 1<<D6; // no D7; S14-S15 are used for i2f/u2f.
 174 static const RegisterMask GpRegs = 0xFFFF;
 175 static const RegisterMask AllowableFlagRegs = 1<<R0 | 1<<R1 | 1<<R2 | 1<<R3 | 1<<R4 | 1<<R5 | 1<<R6 | 1<<R7 | 1<<R8 | 1<<R9 | 1<<R10;
 176
 177 #define isU12(offs) (((offs) & 0xfff) == (offs))
 178
 179 #define IsFpReg(_r)     ((rmask((Register)_r) & (FpRegs)) != 0)
 180 #define IsGpReg(_r)     ((rmask((Register)_r) & (GpRegs)) != 0)
 181 #define FpRegNum(_fpr)  ((_fpr) - FirstFloatReg)
 182
 183 #define firstreg()      R0
 184 // only good for normal regs
 185 #define imm2register(c) (Register)(c-1)
 186
 187 verbose_only( extern const char* regNames[]; )
 188 verbose_only( extern const char* condNames[]; )
 189 verbose_only( extern const char* shiftNames[]; )
 190
 191 // abstract to platform specific calls
 192 #define nExtractPlatformFlags(x)    0
 193
 194 #define DECLARE_PLATFORM_STATS()
 195
 196 #define DECLARE_PLATFORM_REGALLOC()
 197
 198 #ifdef DEBUG
 199 # define DECLARE_PLATFORM_ASSEMBLER_DEBUG()                             \
 200     inline bool         isOp2Imm(uint32_t literal);                     \
 201     inline uint32_t     decOp2Imm(uint32_t enc);
 202 #else
 203 // define stubs, for code that defines NJ_VERBOSE without DEBUG
 204 # define DECLARE_PLATFORM_ASSEMBLER_DEBUG()                             \
 205     inline bool         isOp2Imm(uint32_t ) { return true; }            \
 206     inline uint32_t     decOp2Imm(uint32_t ) { return 0; }
 207 #endif
 208
 209 #define DECLARE_PLATFORM_ASSEMBLER()                                            \
 210                                                                                 \
 211     DECLARE_PLATFORM_ASSEMBLER_DEBUG()                                          \
 212                                                                                 \
 213     const static Register argRegs[4], retRegs[2];                               \
 214                                                                                 \
 215     void        BranchWithLink(NIns* addr);                                     \
 216     inline void BLX(Register addr, bool chk = true);                            \
 217     void        JMP_far(NIns*);                                                 \
 218     void        B_cond_chk(ConditionCode, NIns*, bool);                         \
 219     void        underrunProtect(int bytes);                                     \
 220     void        nativePageReset();                                              \
 221     void        nativePageSetup();                                              \
 222     void        asm_immf_nochk(Register, int32_t, int32_t);                     \
 223     void        asm_regarg(ArgType, LInsp, Register);                           \
 224     void        asm_stkarg(LInsp p, int stkd);                                  \
 225     void        asm_cmpi(Register, int32_t imm);                                \
 226     void        asm_ldr_chk(Register d, Register b, int32_t off, bool chk);     \
 227     void        asm_cmp(LIns *cond);                                            \
 228     void        asm_fcmp(LIns *cond);                                           \
 229     void        asm_ld_imm(Register d, int32_t imm, bool chk = true);           \
 230     void        asm_arg(ArgType ty, LInsp arg, Register& r, int& stkd);         \
 231     void        asm_arg_64(LInsp arg, Register& r, int& stkd);                  \
 232     void        asm_add_imm(Register rd, Register rn, int32_t imm, int stat = 0);   \
 233     void        asm_sub_imm(Register rd, Register rn, int32_t imm, int stat = 0);   \
 234     void        asm_and_imm(Register rd, Register rn, int32_t imm, int stat = 0);   \
 235     void        asm_orr_imm(Register rd, Register rn, int32_t imm, int stat = 0);   \
 236     void        asm_eor_imm(Register rd, Register rn, int32_t imm, int stat = 0);   \
 237     inline bool     encOp2Imm(uint32_t literal, uint32_t * enc);                \
 238     inline uint32_t CountLeadingZeroes(uint32_t data);                          \
 239     int *       _nSlot;                                                         \
 240     int *       _nExitSlot;                                                     \
 241     bool        blx_lr_bug;                                                     \
 242     int         max_out_args; /* bytes */
 243
 244 #define IMM32(imm)  *(--_nIns) = (NIns)((imm));
 245
 246 #define OP_IMM  (1<<25)
 247 #define OP_STAT (1<<20)
 248
 249 #define COND_AL ((uint32_t)AL<<28)
 250
 251 typedef enum {
 252     LSL_imm = 0, // LSL #c - Logical Shift Left
 253     LSL_reg = 1, // LSL Rc - Logical Shift Left
 254     LSR_imm = 2, // LSR #c - Logical Shift Right
 255     LSR_reg = 3, // LSR Rc - Logical Shift Right
 256     ASR_imm = 4, // ASR #c - Arithmetic Shift Right
 257     ASR_reg = 5, // ASR Rc - Arithmetic Shift Right
 258     ROR_imm = 6, // Rotate Right (c != 0)
 259     RRX     = 6, // Rotate Right one bit with extend (c == 0)
 260     ROR_reg = 7  // Rotate Right
 261 } ShiftOperator;
 262 #define IsShift(sh)    (((sh) >= LSL_imm) && ((sh) <= ROR_reg))
 263
 264 #define LD32_size 8
 265
 266 #define BEGIN_NATIVE_CODE(x)                    \
 267     { DWORD* _nIns = (uint8_t*)x
 268
 269 #define END_NATIVE_CODE(x)                      \
 270     (x) = (dictwordp*)_nIns; }
 271
 272 // BX
 273 #define BX(_r)  do {                                                    \
 274         underrunProtect(4);                                             \
 275         NanoAssert(IsGpReg(_r));                                        \
 276         *(--_nIns) = (NIns)( COND_AL | (0x12<<20) | (0xFFF<<8) | (1<<4) | (_r)); \
 277         asm_output("bx %s", gpn(_r)); } while(0)
 278
 279 /*
 280  * ALU operations
 281  */
 282
 283 enum {
 284     ARM_and = 0,
 285     ARM_eor = 1,
 286     ARM_sub = 2,
 287     ARM_rsb = 3,
 288     ARM_add = 4,
 289     ARM_adc = 5,
 290     ARM_sbc = 6,
 291     ARM_rsc = 7,
 292     ARM_tst = 8,
 293     ARM_teq = 9,
 294     ARM_cmp = 10,
 295     ARM_cmn = 11,
 296     ARM_orr = 12,
 297     ARM_mov = 13,
 298     ARM_bic = 14,
 299     ARM_mvn = 15
 300 };
 301 #define IsOp(op)      (((ARM_##op) >= ARM_and) && ((ARM_##op) <= ARM_mvn))
 302
 303 // ALU operation with register and 8-bit immediate arguments
 304 //  S   - bit, 0 or 1, whether the CPSR register is updated
 305 //  rd  - destination register
 306 //  rl  - first (left) operand register
 307 //  op2imm  - operand 2 immediate. Use encOp2Imm (from NativeARM.cpp) to calculate this.
 308 #define ALUi(cond, op, S, rd, rl, op2imm)   ALUi_chk(cond, op, S, rd, rl, op2imm, 1)
 309 #define ALUi_chk(cond, op, S, rd, rl, op2imm, chk) do {\
 310         if (chk) underrunProtect(4);\
 311         NanoAssert(IsCond(cond));\
 312         NanoAssert(IsOp(op));\
 313         NanoAssert(((S)==0) || ((S)==1));\
 314         NanoAssert(IsGpReg(rd) && IsGpReg(rl));\
 315         NanoAssert(isOp2Imm(op2imm));\
 316         *(--_nIns) = (NIns) ((cond)<<28 | OP_IMM | (ARM_##op)<<21 | (S)<<20 | (rl)<<16 | (rd)<<12 | (op2imm));\
 317         if (ARM_##op == ARM_mov || ARM_##op == ARM_mvn) {               \
 318             asm_output("%s%s%s %s, #0x%X", #op, condNames[cond], (S)?"s":"", gpn(rd), decOp2Imm(op2imm));\
 319         } else if (ARM_##op >= ARM_tst && ARM_##op <= ARM_cmn) {         \
 320             NanoAssert(S==1);\
 321             asm_output("%s%s %s, #0x%X", #op, condNames[cond], gpn(rl), decOp2Imm(op2imm));\
 322         } else {                                                        \
 323             asm_output("%s%s%s %s, %s, #0x%X", #op, condNames[cond], (S)?"s":"", gpn(rd), gpn(rl), decOp2Imm(op2imm));\
 324         }\
 325     } while (0)
 326
 327 // ALU operation with two register arguments
 328 //  S   - bit, 0 or 1, whether the CPSR register is updated
 329 //  rd  - destination register
 330 //  rl  - first (left) operand register
 331 //  rr  - first (left) operand register
 332 #define ALUr(cond, op, S, rd, rl, rr)   ALUr_chk(cond, op, S, rd, rl, rr, 1)
 333 #define ALUr_chk(cond, op, S, rd, rl, rr, chk) do {\
 334         if (chk) underrunProtect(4);\
 335         NanoAssert(IsCond(cond));\
 336         NanoAssert(IsOp(op));\
 337         NanoAssert(((S)==0) || ((S)==1));\
 338         NanoAssert(IsGpReg(rd) && IsGpReg(rl) && IsGpReg(rr));\
 339         *(--_nIns) = (NIns) ((cond)<<28 |(ARM_##op)<<21 | (S)<<20 | (rl)<<16 | (rd)<<12 | (rr));\
 340         if (ARM_##op == ARM_mov || ARM_##op == ARM_mvn) {               \
 341             asm_output("%s%s%s %s, %s", #op, condNames[cond], (S)?"s":"", gpn(rd), gpn(rr));\
 342         } else if (ARM_##op >= ARM_tst && ARM_##op <= ARM_cmn) {         \
 343             NanoAssert(S==1);\
 344             asm_output("%s%s  %s, %s", #op, condNames[cond], gpn(rl), gpn(rr));\
 345         } else {                                                        \
 346             asm_output("%s%s%s %s, %s, %s", #op, condNames[cond], (S)?"s":"", gpn(rd), gpn(rl), gpn(rr));\
 347         }\
 348     } while (0)
 349
 350 // ALU operation with two register arguments, with rr operated on by a shift and shift immediate
 351 //  S   - bit, 0 or 1, whether the CPSR register is updated
 352 //  rd  - destination register
 353 //  rl  - first (left) operand register
 354 //  rr  - second (right) operand register
 355 //  sh  - a ShiftOperator
 356 //  imm - immediate argument to shift operator, 5 bits (0..31)
 357 #define ALUr_shi(cond, op, S, rd, rl, rr, sh, imm) do {\
 358         underrunProtect(4);\
 359         NanoAssert(IsCond(cond));\
 360         NanoAssert(IsOp(op));\
 361         NanoAssert(((S)==0) || ((S)==1));\
 362         NanoAssert(IsGpReg(rd) && IsGpReg(rl) && IsGpReg(rr));\
 363         NanoAssert(IsShift(sh));\
 364         NanoAssert((imm)>=0 && (imm)<32);\
 365         *(--_nIns) = (NIns) ((cond)<<28 |(ARM_##op)<<21 | (S)<<20 | (rl)<<16 | (rd)<<12 | (imm)<<7 | (sh)<<4 | (rr));\
 366         if (ARM_##op == ARM_mov || ARM_##op == ARM_mvn) {               \
 367             NanoAssert(rl==0);                                          \
 368             asm_output("%s%s%s %s, %s, %s #%d", #op, condNames[cond], (S)?"s":"", gpn(rd), gpn(rr), shiftNames[sh], (imm));\
 369         } else if (ARM_##op >= ARM_tst && ARM_##op <= ARM_cmn) {         \
 370             NanoAssert(S==1);\
 371             NanoAssert(rd==0);\
 372             asm_output("%s%s  %s, %s, %s #%d", #op, condNames[cond], gpn(rl), gpn(rr), shiftNames[sh], (imm));\
 373         } else {                                                        \
 374             asm_output("%s%s%s %s, %s, %s, %s #%d", #op, condNames[cond], (S)?"s":"", gpn(rd), gpn(rl), gpn(rr), shiftNames[sh], (imm));\
 375         }\
 376     } while (0)
 377
 378 // ALU operation with two register arguments, with rr operated on by a shift and shift register
 379 //  S   - bit, 0 or 1, whether the CPSR register is updated
 380 //  rd  - destination register
 381 //  rl  - first (left) operand register
 382 //  rr  - first (left) operand register
 383 //  sh  - a ShiftOperator
 384 //  rs  - shift operand register
 385 #define ALUr_shr(cond, op, S, rd, rl, rr, sh, rs) do {\
 386         underrunProtect(4);\
 387         NanoAssert(IsCond(cond));\
 388         NanoAssert(IsOp(op));\
 389         NanoAssert(((S)==0) || ((S)==1));\
 390         NanoAssert(IsGpReg(rd) && IsGpReg(rl) && IsGpReg(rr) && IsGpReg(rs));\
 391         NanoAssert(IsShift(sh));\
 392         *(--_nIns) = (NIns) ((cond)<<28 |(ARM_##op)<<21 | (S)<<20 | (rl)<<16 | (rd)<<12 | (rs)<<8 | (sh)<<4 | (rr));\
 393         if (ARM_##op == ARM_mov || ARM_##op == ARM_mvn) {               \
 394             asm_output("%s%s%s %s, %s, %s %s", #op, condNames[cond], (S)?"s":"", gpn(rd), gpn(rr), shiftNames[sh], gpn(rs));\
 395         } else if (ARM_##op >= ARM_tst && ARM_##op <= ARM_cmn) {         \
 396             NanoAssert(S==1);\
 397             asm_output("%s%s  %s, %s, %s %s", #op, condNames[cond], gpn(rl), gpn(rr), shiftNames[sh], gpn(rs));\
 398         } else {                                                        \
 399             asm_output("%s%s%s %s, %s, %s, %s %s", #op, condNames[cond], (S)?"s":"", gpn(rd), gpn(rl), gpn(rr), shiftNames[sh], gpn(rs));\
 400         }\
 401     } while (0)
 402
 403 // --------
 404 // Basic arithmetic operations.
 405 // --------
 406 // Argument naming conventions for these macros:
 407 //  _d      Destination register.
 408 //  _l      First (left) operand.
 409 //  _r      Second (right) operand.
 410 //  _op2imm An operand 2 immediate value. Use encOp2Imm to calculate this.
 411 //  _s      Set to 1 to update the status flags (for subsequent conditional
 412 //          tests). Otherwise, set to 0.
 413
 414 // _d = _l + decOp2Imm(_op2imm)
 415 #define ADDis(_d,_l,_op2imm,_s) ALUi(AL, add, _s, _d, _l, _op2imm)
 416 #define ADDi(_d,_l,_op2imm)     ALUi(AL, add,  0, _d, _l, _op2imm)
 417
 418 // _d = _l & ~decOp2Imm(_op2imm)
 419 #define BICis(_d,_l,_op2imm,_s) ALUi(AL, bic, _s, _d, _l, _op2imm)
 420 #define BICi(_d,_l,_op2imm)     ALUi(AL, bic,  0, _d, _l, _op2imm)
 421
 422 // _d = _l - decOp2Imm(_op2imm)
 423 #define SUBis(_d,_l,_op2imm,_s) ALUi(AL, sub, _s, _d, _l, _op2imm)
 424 #define SUBi(_d,_l,_op2imm)     ALUi(AL, sub,  0, _d, _l, _op2imm)
 425
 426 // _d = _l & decOp2Imm(_op2imm)
 427 #define ANDis(_d,_l,_op2imm,_s) ALUi(AL, and, _s, _d, _l, _op2imm)
 428 #define ANDi(_d,_l,_op2imm)     ALUi(AL, and,  0, _d, _l, _op2imm)
 429
 430 // _d = _l | decOp2Imm(_op2imm)
 431 #define ORRis(_d,_l,_op2imm,_s) ALUi(AL, orr, _s, _d, _l, _op2imm)
 432 #define ORRi(_d,_l,_op2imm)     ALUi(AL, orr,  0, _d, _l, _op2imm)
 433
 434 // _d = _l ^ decOp2Imm(_op2imm)
 435 #define EORis(_d,_l,_op2imm,_s) ALUi(AL, eor, _s, _d, _l, _op2imm)
 436 #define EORi(_d,_l,_op2imm)     ALUi(AL, eor,  0, _d, _l, _op2imm)
 437
 438 // _d = _l | _r
 439 #define ORRs(_d,_l,_r,_s)   ALUr(AL, orr, _s, _d, _l, _r)
 440 #define ORR(_d,_l,_r)       ALUr(AL, orr,  0, _d, _l, _r)
 441
 442 // _d = _l & _r
 443 #define ANDs(_d,_l,_r,_s)   ALUr(AL, and, _s, _d, _l, _r)
 444 #define AND(_d,_l,_r)       ALUr(AL, and,  0, _d, _l, _r)
 445
 446 // _d = _l ^ _r
 447 #define EORs(_d,_l,_r,_s)   ALUr(AL, eor, _s, _d, _l, _r)
 448 #define EOR(_d,_l,_r)       ALUr(AL, eor,  0, _d, _l, _r)
 449
 450 // _d = _l + _r
 451 #define ADDs(_d,_l,_r,_s)   ALUr(AL, add, _s, _d, _l, _r)
 452 #define ADD(_d,_l,_r)       ALUr(AL, add,  0, _d, _l, _r)
 453
 454 // _d = _l - _r
 455 #define SUBs(_d,_l,_r,_s)   ALUr(AL, sub, _s, _d, _l, _r)
 456 #define SUB(_d,_l,_r)       ALUr(AL, sub,  0, _d, _l, _r)
 457
 458 // --------
 459 // Other operations.
 460 // --------
 461
 462 // [_d_hi,_d] = _l * _r
 463 #define SMULL(_d, _d_hi, _l, _r)  do {                                                          \
 464         underrunProtect(4);                                                                     \
 465         NanoAssert((_config.arm_arch >= 6) || ((_d   ) != (_l)));                               \
 466         NanoAssert((_config.arm_arch >= 6) || ((_d_hi) != (_l)));                               \
 467         NanoAssert(IsGpReg(_d) && IsGpReg(_d_hi) && IsGpReg(_l) && IsGpReg(_r));                \
 468         NanoAssert(((_d) != PC) && ((_d_hi) != PC) && ((_l) != PC) && ((_r) != PC));            \
 469         *(--_nIns) = (NIns)( COND_AL | 0xc00090 | (_d_hi)<<16 | (_d)<<12 | (_r)<<8 | (_l) );    \
 470         asm_output("smull %s, %s, %s, %s",gpn(_d),gpn(_d_hi),gpn(_l),gpn(_r));                  \
 471 } while(0)
 472
 473 // _d = _l * _r
 474 #define MUL(_d, _l, _r)  do {                                               \
 475         underrunProtect(4);                                                 \
 476         NanoAssert((_config.arm_arch >= 6) || ((_d) != (_l)));              \
 477         NanoAssert(IsGpReg(_d) && IsGpReg(_l) && IsGpReg(_r));              \
 478         NanoAssert(((_d) != PC) && ((_l) != PC) && ((_r) != PC));           \
 479         *(--_nIns) = (NIns)( COND_AL | (_d)<<16 | (_r)<<8 | 0x90 | (_l) );  \
 480         asm_output("mul %s, %s, %s",gpn(_d),gpn(_l),gpn(_r));               \
 481 } while(0)
 482
 483 // RSBS _d, _r
 484 // _d = 0 - _r
 485 #define RSBS(_d,_r) ALUi(AL, rsb, 1, _d, _r, 0)
 486
 487 // MVN
 488 // _d = ~_r (one's compliment)
 489 #define MVN(_d,_r)                          ALUr(AL, mvn, 0, _d, 0, _r)
 490 #define MVNis_chk(_d,_op2imm,_stat,_chk)    ALUi_chk(AL, mvn, _stat, _d, 0, op2imm, _chk)
 491 #define MVNis(_d,_op2imm,_stat)             MVNis_chk(_d,_op2imm,_stat,1);
 492
 493 // Logical Shift Right (LSR) rotates the bits without maintaining sign extensions.
 494 // MOVS _d, _r, LSR <_s>
 495 // _d = _r >> _s
 496 #define LSR(_d,_r,_s) ALUr_shr(AL, mov, 1, _d, 0, _r, LSR_reg, _s)
 497
 498 // Logical Shift Right (LSR) rotates the bits without maintaining sign extensions.
 499 // MOVS _d, _r, LSR #(_imm & 0x1f)
 500 // _d = _r >> (_imm & 0x1f)
 501 #define LSRi(_d,_r,_imm)  ALUr_shi(AL, mov, 1, _d, 0, _r, LSR_imm, (_imm & 0x1f))
 502
 503 // Arithmetic Shift Right (ASR) maintains the sign extension.
 504 // MOVS _d, _r, ASR <_s>
 505 // _d = _r >> _s
 506 #define ASR(_d,_r,_s) ALUr_shr(AL, mov, 1, _d, 0, _r, ASR_reg, _s)
 507
 508 // Arithmetic Shift Right (ASR) maintains the sign extension.
 509 // MOVS _r, _r, ASR #(_imm & 0x1f)
 510 // _d = _r >> (_imm & 0x1f)
 511 #define ASRi(_d,_r,_imm) ALUr_shi(AL, mov, 1, _d, 0, _r, ASR_imm, (_imm & 0x1f))
 512
 513 // Logical Shift Left (LSL).
 514 // MOVS _d, _r, LSL <_s>
 515 // _d = _r << _s
 516 #define LSL(_d, _r, _s) ALUr_shr(AL, mov, 1, _d, 0, _r, LSL_reg, _s)
 517
 518 // Logical Shift Left (LSL).
 519 // MOVS _d, _r, LSL #(_imm & 0x1f)
 520 // _d = _r << (_imm & 0x1f)
 521 #define LSLi(_d, _r, _imm) ALUr_shi(AL, mov, 1, _d, 0, _r, LSL_imm, (_imm & 0x1f))
 522
 523 // TST
 524 #define TST(_l,_r)      ALUr(AL, tst, 1, 0, _l, _r)
 525 #define TSTi(_d,_imm)   ALUi(AL, tst, 1, 0, _d, _imm)
 526
 527 // CMP
 528 #define CMP(_l,_r)  ALUr(AL, cmp, 1, 0, _l, _r)
 529 #define CMN(_l,_r)  ALUr(AL, cmn, 1, 0, _l, _r)
 530
 531 // MOV
 532 #define MOVis_chk(_d,_op2imm,_stat,_chk)    ALUi_chk(AL, mov, _stat, _d, 0, op2imm, _chk)
 533 #define MOVis(_d,_op2imm,_stat)             MOVis_chk(_d,_op2imm,_stat,1)
 534 #define MOVi(_d,_op2imm)                    MOVis(_d,_op2imm,0);
 535
 536 #define MOV_cond(_cond,_d,_s)               ALUr(_cond, mov, 0, _d, 0, _s)
 537
 538 #define MOV(dr,sr)   MOV_cond(AL, dr, sr)
 539 #define MOVEQ(dr,sr) MOV_cond(EQ, dr, sr)
 540 #define MOVNE(dr,sr) MOV_cond(NE, dr, sr)
 541 #define MOVLT(dr,sr) MOV_cond(LT, dr, sr)
 542 #define MOVLE(dr,sr) MOV_cond(LE, dr, sr)
 543 #define MOVGT(dr,sr) MOV_cond(GT, dr, sr)
 544 #define MOVGE(dr,sr) MOV_cond(GE, dr, sr)
 545 #define MOVLO(dr,sr) MOV_cond(LO, dr, sr) // Equivalent to MOVCC
 546 #define MOVCC(dr,sr) MOV_cond(CC, dr, sr) // Equivalent to MOVLO
 547 #define MOVLS(dr,sr) MOV_cond(LS, dr, sr)
 548 #define MOVHI(dr,sr) MOV_cond(HI, dr, sr)
 549 #define MOVHS(dr,sr) MOV_cond(HS, dr, sr) // Equivalent to MOVCS
 550 #define MOVCS(dr,sr) MOV_cond(CS, dr, sr) // Equivalent to MOVHS
 551 #define MOVVC(dr,sr) MOV_cond(VC, dr, sr) // overflow clear
 552
 553 // _d = [_b+off]
 554 #define LDR(_d,_b,_off)        asm_ldr_chk(_d,_b,_off,1)
 555 #define LDR_nochk(_d,_b,_off)  asm_ldr_chk(_d,_b,_off,0)
 556
 557 // _d = [_b + _x<<_s]
 558 #define LDR_scaled(_d, _b, _x, _s) do { \
 559         NanoAssert(((_s)&31) == _s);\
 560         NanoAssert(IsGpReg(_d) && IsGpReg(_b) && IsGpReg(_x));\
 561         underrunProtect(4);\
 562         *(--_nIns) = (NIns)(COND_AL | (0x79<<20) | ((_b)<<16) | ((_d)<<12) | ((_s)<<7) | (_x));\
 563         asm_output("ldr %s, [%s, +%s, LSL #%d]", gpn(_d), gpn(_b), gpn(_x), (_s));\
 564     } while (0)
 565
 566 // _d = #_imm
 567 #define LDi(_d,_imm) asm_ld_imm(_d,_imm)
 568
 569 // MOVW and MOVT are ARMv6T2 or newer only
 570
 571 // MOVW -- writes _imm into _d, zero-extends.
 572 #define MOVWi_cond_chk(_cond,_d,_imm,_chk) do {                         \
 573         NanoAssert(isU16(_imm));                                        \
 574         NanoAssert(IsGpReg(_d));                                        \
 575         NanoAssert(IsCond(_cond));                                      \
 576         if (_chk) underrunProtect(4);                                   \
 577         *(--_nIns) = (NIns)( (_cond)<<28 | 3<<24 | 0<<20 | (((_imm)>>12)&0xf)<<16 | (_d)<<12 | ((_imm)&0xfff) ); \
 578         asm_output("movw%s %s, #0x%x", condNames[_cond], gpn(_d), (_imm)); \
 579     } while (0)
 580
 581 #define MOVWi(_d,_imm)              MOVWi_cond_chk(AL, _d, _imm, 1)
 582 #define MOVWi_chk(_d,_imm,_chk)     MOVWi_cond_chk(AL, _d, _imm, _chk)
 583 #define MOVWi_cond(_cond,_d,_imm)   MOVWi_cond_chk(_cond, _d, _imm, 1)
 584
 585 // MOVT -- writes _imm into top halfword of _d, does not affect bottom halfword
 586 #define MOVTi_cond_chk(_cond,_d,_imm,_chk) do {                         \
 587         NanoAssert(isU16(_imm));                                        \
 588         NanoAssert(IsGpReg(_d));                                        \
 589         NanoAssert(IsCond(_cond));                                      \
 590         if (_chk) underrunProtect(4);                                   \
 591         *(--_nIns) = (NIns)( (_cond)<<28 | 3<<24 | 4<<20 | (((_imm)>>12)&0xf)<<16 | (_d)<<12 | ((_imm)&0xfff) ); \
 592         asm_output("movt%s %s, #0x%x", condNames[_cond], gpn(_d), (_imm)); \
 593     } while (0)
 594
 595 #define MOVTi(_d,_imm)              MOVTi_cond_chk(AL, _d, _imm, 1)
 596 #define MOVTi_chk(_d,_imm,_chk)     MOVTi_cond_chk(AL, _d, _imm, _chk)
 597 #define MOVTi_cond(_cond,_d,_imm)   MOVTi_cond_chk(_cond, _d, _imm, 1)
 598
 599 // i386 compat, for Assembler.cpp
 600 #define MR(d,s)                     MOV(d,s)
 601 #define ST(base,offset,reg)         STR(reg,base,offset)
 602
 603 // Load a byte (8 bits). The offset range is ±4095.
 604 #define LDRB(_d,_n,_off) do {                                           \
 605         NanoAssert(IsGpReg(_d) && IsGpReg(_n));                         \
 606         underrunProtect(4);                                             \
 607         if (_off < 0) {                                                 \
 608             NanoAssert(isU12(-_off));                                   \
 609             *(--_nIns) = (NIns)( COND_AL | (0x55<<20) | ((_n)<<16) | ((_d)<<12) | ((-_off)&0xfff)  ); \
 610         } else {                                                        \
 611             NanoAssert(isU12(_off));                                    \
 612             *(--_nIns) = (NIns)( COND_AL | (0x5D<<20) | ((_n)<<16) | ((_d)<<12) | ((_off)&0xfff)  ); \
 613         }                                                               \
 614         asm_output("ldrb %s, [%s,#%d]", gpn(_d),gpn(_n),(_off));        \
 615     } while(0)
 616
 617 // Load a byte (8 bits), sign-extend to 32 bits. The offset range is
 618 // ±255 (different from LDRB, same as LDRH/LDRSH)
 619 #define LDRSB(_d,_n,_off) do {                                          \
 620         NanoAssert(IsGpReg(_d) && IsGpReg(_n));                         \
 621         underrunProtect(4);                                             \
 622         if (_off < 0) {                                                 \
 623             NanoAssert(isU8(-_off));                                    \
 624             *(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_n)<<16) | ((_d)<<12) | ((0xD)<<4) | (((-_off)&0xf0)<<4) | ((-_off)&0xf) ); \
 625         } else {                                                        \
 626             NanoAssert(isU8(_off));                                     \
 627             *(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_n)<<16) | ((_d)<<12) | ((0xD)<<4) | (((_off)&0xf0)<<4) | ((_off)&0xf) ); \
 628         }                                                               \
 629         asm_output("ldrsb %s, [%s,#%d]", gpn(_d),gpn(_n),(_off));       \
 630     } while(0)
 631
 632 // Load and sign-extend a half word (16 bits). The offset range is ±255, and
 633 // must be aligned to two bytes on some architectures (the caller is responsible
 634 // for ensuring appropriate alignment)
 635 #define LDRH(_d,_n,_off) do {                                           \
 636         NanoAssert(IsGpReg(_d) && IsGpReg(_n));                         \
 637         underrunProtect(4);                                             \
 638         if (_off < 0) {                                                 \
 639             NanoAssert(isU8(-_off));                                    \
 640             *(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_n)<<16) | ((_d)<<12) | ((0xB)<<4) | (((-_off)&0xf0)<<4) | ((-_off)&0xf) ); \
 641         } else {                                                        \
 642             NanoAssert(isU8(_off));                                     \
 643             *(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_n)<<16) | ((_d)<<12) | ((0xB)<<4) | (((_off)&0xf0)<<4) | ((_off)&0xf) ); \
 644         }                                                               \
 645         asm_output("ldrh %s, [%s,#%d]", gpn(_d),gpn(_n),(_off));       \
 646     } while(0)
 647
 648 // Load and sign-extend a half word (16 bits). The offset range is ±255, and
 649 // must be aligned to two bytes on some architectures (the caller is responsible
 650 // for ensuring appropriate alignment)
 651 #define LDRSH(_d,_n,_off) do {                                          \
 652         NanoAssert(IsGpReg(_d) && IsGpReg(_n));                         \
 653         underrunProtect(4);                                             \
 654         if (_off < 0) {                                                 \
 655             NanoAssert(isU8(-_off));                                    \
 656             *(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_n)<<16) | ((_d)<<12) | ((0xF)<<4) | (((-_off)&0xf0)<<4) | ((-_off)&0xf) ); \
 657         } else {                                                        \
 658             NanoAssert(isU8(_off));                                     \
 659             *(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_n)<<16) | ((_d)<<12) | ((0xF)<<4) | (((_off)&0xf0)<<4) | ((_off)&0xf) ); \
 660         }                                                               \
 661         asm_output("ldrsh %s, [%s,#%d]", gpn(_d),gpn(_n),(_off));       \
 662     } while(0)
 663
 664 // Valid offset for STR and STRB is +/- 4095, STRH only has +/- 255
 665 #define STR(_d,_n,_off) do {                                            \
 666         NanoAssert(IsGpReg(_d) && IsGpReg(_n));                         \
 667         NanoAssert(isU12(_off) || isU12(-_off));                        \
 668         underrunProtect(4);                                             \
 669         if ((_off)<0)   *(--_nIns) = (NIns)( COND_AL | (0x50<<20) | ((_n)<<16) | ((_d)<<12) | ((-(_off))&0xFFF) ); \
 670         else            *(--_nIns) = (NIns)( COND_AL | (0x58<<20) | ((_n)<<16) | ((_d)<<12) | ((_off)&0xFFF) ); \
 671         asm_output("str %s, [%s, #%d]", gpn(_d), gpn(_n), (_off)); \
 672     } while(0)
 673
 674 #define STRB(_d,_n,_off) do {                                           \
 675         NanoAssert(IsGpReg(_d) && IsGpReg(_n));                         \
 676         NanoAssert(isU12(_off) || isU12(-_off));                        \
 677         underrunProtect(4);                                             \
 678         if ((_off)<0)   *(--_nIns) = (NIns)( COND_AL | (0x54<<20) | ((_n)<<16) | ((_d)<<12) | ((-(_off))&0xFFF) ); \
 679         else            *(--_nIns) = (NIns)( COND_AL | (0x5C<<20) | ((_n)<<16) | ((_d)<<12) | ((_off)&0xFFF) ); \
 680         asm_output("strb %s, [%s, #%d]", gpn(_d), gpn(_n), (_off)); \
 681     } while(0)
 682
 683 // Only +/- 255 range, unlike STRB/STR
 684 #define STRH(_d,_n,_off) do {                                           \
 685         NanoAssert(IsGpReg(_d) && IsGpReg(_n));                         \
 686         underrunProtect(4);                                             \
 687         if ((_off)<0) {                                                 \
 688             NanoAssert(isU8(-_off));                                    \
 689             *(--_nIns) = (NIns)( COND_AL | (0x14<<20) | ((_n)<<16) | ((_d)<<12) | (((-(_off))&0xF0)<<4) | (0xB<<4) | ((-(_off))&0xF) ); \
 690         } else {                                                        \
 691             NanoAssert(isU8(_off));                                     \
 692             *(--_nIns) = (NIns)( COND_AL | (0x1C<<20) | ((_n)<<16) | ((_d)<<12) | (((_off)&0xF0)<<4) | (0xB<<4) | ((_off)&0xF) ); \
 693         }                                                               \
 694         asm_output("strh %s, [%s, #%d]", gpn(_d), gpn(_n), (_off));     \
 695     } while(0)
 696
 697 // Encode a breakpoint. The ID is not important and is ignored by the
 698 // processor, but it can be useful as a marker when debugging emitted code.
 699 #define BKPT_insn       ((NIns)( COND_AL | (0x12<<20) | (0x7<<4) ))
 700 #define BKPTi_insn(id)  ((NIns)(BKPT_insn | ((id << 4) & 0xfff00) | (id & 0xf)));
 701
 702 #define BKPT_nochk()    BKPTi_nochk(0)
 703 #define BKPTi_nochk(id) do {                                \
 704         NanoAssert((id & 0xffff) == id);                    \
 705         *(--_nIns) = BKPTi_insn(id);                        \
 706         } while (0)
 707
 708 // STMFD SP!, {reg}
 709 #define PUSHr(_r)  do {                                                 \
 710         underrunProtect(4);                                             \
 711         NanoAssert(IsGpReg(_r));                                        \
 712         *(--_nIns) = (NIns)( COND_AL | (0x92<<20) | (SP<<16) | rmask(_r) ); \
 713         asm_output("push %s",gpn(_r)); } while (0)
 714
 715 // STMFD SP!,{reglist}
 716 #define PUSH_mask(_mask)  do {                                          \
 717         underrunProtect(4);                                             \
 718         NanoAssert(isU16(_mask));                                       \
 719         *(--_nIns) = (NIns)( COND_AL | (0x92<<20) | (SP<<16) | (_mask) ); \
 720         asm_output("push %x", (_mask));} while (0)
 721
 722 // LDMFD SP!,{reg}
 723 #define POPr(_r) do {                                                   \
 724         underrunProtect(4);                                             \
 725         NanoAssert(IsGpReg(_r));                                        \
 726         *(--_nIns) = (NIns)( COND_AL | (0x8B<<20) | (SP<<16) | rmask(_r) ); \
 727         asm_output("pop %s",gpn(_r));} while (0)
 728
 729 // LDMFD SP!,{reglist}
 730 #define POP_mask(_mask) do {                                            \
 731         underrunProtect(4);                                             \
 732         NanoAssert(isU16(_mask));                                       \
 733         *(--_nIns) = (NIns)( COND_AL | (0x8B<<20) | (SP<<16) | (_mask) ); \
 734         asm_output("pop %x", (_mask));} while (0)
 735
 736 // PC always points to current instruction + 8, so when calculating pc-relative
 737 // offsets, use PC+8.
 738 #define PC_OFFSET_FROM(target,frompc) ((intptr_t)(target) - ((intptr_t)(frompc) + 8))
 739
 740 #define B_cond(_c,_t)                           \
 741     B_cond_chk(_c,_t,1)
 742
 743 #define B_nochk(_t)                             \
 744     B_cond_chk(AL,_t,0)
 745
 746 #define B(t)    B_cond(AL,t)
 747 #define BHI(t)  B_cond(HI,t)
 748 #define BLS(t)  B_cond(LS,t)
 749 #define BHS(t)  B_cond(HS,t)
 750 #define BLO(t)  B_cond(LO,t)
 751 #define BEQ(t)  B_cond(EQ,t)
 752 #define BNE(t)  B_cond(NE,t)
 753 #define BLT(t)  B_cond(LT,t)
 754 #define BGE(t)  B_cond(GE,t)
 755 #define BLE(t)  B_cond(LE,t)
 756 #define BGT(t)  B_cond(GT,t)
 757 #define BVS(t)  B_cond(VS,t)
 758 #define BVC(t)  B_cond(VC,t)
 759 #define BCC(t)  B_cond(CC,t)
 760 #define BCS(t)  B_cond(CS,t)
 761
 762 #define JMP(t) B(t)
 763 #define JMP_nochk(t) B_nochk(t)
 764
 765 // MOV(cond) _r, #1
 766 // MOV(!cond) _r, #0
 767 #define SET(_r,_cond) do {                                              \
 768     ConditionCode _opp = OppositeCond(_cond);                           \
 769     underrunProtect(8);                                                 \
 770     *(--_nIns) = (NIns)( ( _opp<<28) | (0x3A<<20) | ((_r)<<12) | (0) ); \
 771     *(--_nIns) = (NIns)( (_cond<<28) | (0x3A<<20) | ((_r)<<12) | (1) ); \
 772     asm_output("mov%s %s, #1", condNames[_cond], gpn(_r));              \
 773     asm_output("mov%s %s, #0", condNames[_opp], gpn(_r));               \
 774     } while (0)
 775
 776 #define SETEQ(r)    SET(r,EQ)
 777 #define SETNE(r)    SET(r,NE)
 778 #define SETLT(r)    SET(r,LT)
 779 #define SETLE(r)    SET(r,LE)
 780 #define SETGT(r)    SET(r,GT)
 781 #define SETGE(r)    SET(r,GE)
 782 #define SETLO(r)    SET(r,LO)
 783 #define SETLS(r)    SET(r,LS)
 784 #define SETHI(r)    SET(r,HI)
 785 #define SETHS(r)    SET(r,HS)
 786 #define SETVS(r)    SET(r,VS)
 787 #define SETCS(r)    SET(r,CS)
 788
 789 // Load and sign extend a 16-bit value into a reg
 790 #define MOVSX(_d,_off,_b) do {                                          \
 791         if ((_off)>=0) {                                                \
 792             if ((_off)<256) {                                           \
 793                 underrunProtect(4);                                     \
 794                 *(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_b)<<16) | ((_d)<<12) |  ((((_off)>>4)&0xF)<<8) | (0xF<<4) | ((_off)&0xF)  ); \
 795             } else if ((_off)<=510) {                                   \
 796                 underrunProtect(8);                                     \
 797                 int rem = (_off) - 255;                                 \
 798                 NanoAssert(rem<256);                                    \
 799                 *(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_d)<<16) | ((_d)<<12) |  ((((rem)>>4)&0xF)<<8) | (0xF<<4) | ((rem)&0xF)  ); \
 800                 *(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | ((_b)<<16) | ((_d)<<12) | (0xFF) ); \
 801             } else {                                                    \
 802                 underrunProtect(16);                                    \
 803                 int rem = (_off) & 3;                                   \
 804                 *(--_nIns) = (NIns)( COND_AL | (0x19<<20) | ((_b)<<16) | ((_d)<<12) | (0xF<<4) | (_d) ); \
 805                 asm_output("ldrsh %s,[%s, #%d]",gpn(_d), gpn(_b), (_off)); \
 806                 *(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | ((_d)<<16) | ((_d)<<12) | rem ); \
 807                 *(--_nIns) = (NIns)( COND_AL | (0x1A<<20) | ((_d)<<12) | (2<<7)| (_d) ); \
 808                 *(--_nIns) = (NIns)( COND_AL | (0x3B<<20) | ((_d)<<12) | (((_off)>>2)&0xFF) ); \
 809                 asm_output("mov %s,%d",gpn(_d),(_off));                \
 810             }                                                           \
 811         } else {                                                        \
 812             if ((_off)>-256) {                                          \
 813                 underrunProtect(4);                                     \
 814                 *(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_b)<<16) | ((_d)<<12) |  ((((-(_off))>>4)&0xF)<<8) | (0xF<<4) | ((-(_off))&0xF)  ); \
 815                 asm_output("ldrsh %s,[%s, #%d]",gpn(_d), gpn(_b), (_off)); \
 816             } else if ((_off)>=-510){                                   \
 817                 underrunProtect(8);                                     \
 818                 int rem = -(_off) - 255;                                \
 819                 NanoAssert(rem<256);                                    \
 820                 *(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_d)<<16) | ((_d)<<12) |  ((((rem)>>4)&0xF)<<8) | (0xF<<4) | ((rem)&0xF)  ); \
 821                 *(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_b)<<16) | ((_d)<<12) | (0xFF) ); \
 822             } else NanoAssert(0);                                        \
 823         }                                                               \
 824     } while(0)
 825
 826 /*
 827  * VFP
 828  */
 829
 830 #define FMDRR(_Dm,_Rd,_Rn) do {                                         \
 831         underrunProtect(4);                                             \
 832         NanoAssert(_config.arm_vfp);                                    \
 833         NanoAssert(IsFpReg(_Dm) && IsGpReg(_Rd) && IsGpReg(_Rn));       \
 834         *(--_nIns) = (NIns)( COND_AL | (0xC4<<20) | ((_Rn)<<16) | ((_Rd)<<12) | (0xB1<<4) | (FpRegNum(_Dm)) ); \
 835         asm_output("fmdrr %s,%s,%s", gpn(_Dm), gpn(_Rd), gpn(_Rn));    \
 836     } while (0)
 837
 838 #define FMRRD(_Rd,_Rn,_Dm) do {                                         \
 839         underrunProtect(4);                                             \
 840         NanoAssert(_config.arm_vfp);                                    \
 841         NanoAssert(IsGpReg(_Rd) && IsGpReg(_Rn) && IsFpReg(_Dm));       \
 842         *(--_nIns) = (NIns)( COND_AL | (0xC5<<20) | ((_Rn)<<16) | ((_Rd)<<12) | (0xB1<<4) | (FpRegNum(_Dm)) ); \
 843         asm_output("fmrrd %s,%s,%s", gpn(_Rd), gpn(_Rn), gpn(_Dm));    \
 844     } while (0)
 845
 846 #define FMRDH(_Rd,_Dn) do {                                             \
 847         underrunProtect(4);                                             \
 848         NanoAssert(_config.arm_vfp);                                    \
 849         NanoAssert(IsGpReg(_Rd) && IsFpReg(_Dn));                       \
 850         *(--_nIns) = (NIns)( COND_AL | (0xE3<<20) | (FpRegNum(_Dn)<<16) | ((_Rd)<<12) | (0xB<<8) | (1<<4) ); \
 851         asm_output("fmrdh %s,%s", gpn(_Rd), gpn(_Dn));                  \
 852     } while (0)
 853
 854 #define FMRDL(_Rd,_Dn) do {                                             \
 855         underrunProtect(4);                                             \
 856         NanoAssert(_config.arm_vfp);                                    \
 857         NanoAssert(IsGpReg(_Rd) && IsFpReg(_Dn));                       \
 858         *(--_nIns) = (NIns)( COND_AL | (0xE1<<20) | (FpRegNum(_Dn)<<16) | ((_Rd)<<12) | (0xB<<8) | (1<<4) ); \
 859         asm_output("fmrdh %s,%s", gpn(_Rd), gpn(_Dn));                  \
 860     } while (0)
 861
 862 #define FSTD_allowD7(_Dd,_Rn,_offs,_allowD7) do {                               \
 863         underrunProtect(4);                                             \
 864         NanoAssert(_config.arm_vfp);                                    \
 865         NanoAssert((((_offs) & 3) == 0) && isS8((_offs) >> 2));         \
 866         NanoAssert((IsFpReg(_Dd) || ((_allowD7) && (_Dd) == D7)) && !IsFpReg(_Rn));     \
 867         int negflag = 1<<23;                                            \
 868         intptr_t offs = (_offs);                                        \
 869         if (_offs < 0) {                                                \
 870             negflag = 0<<23;                                            \
 871             offs = -(offs);                                             \
 872         }                                                               \
 873         *(--_nIns) = (NIns)( COND_AL | (0xD0<<20) | ((_Rn)<<16) | (FpRegNum(_Dd)<<12) | (0xB<<8) | negflag | ((offs>>2)&0xff) ); \
 874         asm_output("fstd %s,%s(%d)", gpn(_Dd), gpn(_Rn), _offs);    \
 875     } while (0)
 876
 877 #define FSTD(_Dd,_Rn,_offs) \
 878         FSTD_allowD7(_Dd,_Rn,_offs,0)
 879
 880 #define FLDD_chk(_Dd,_Rn,_offs,_chk) do {                               \
 881         if(_chk) underrunProtect(4);                                    \
 882         NanoAssert(_config.arm_vfp);                                    \
 883         NanoAssert((((_offs) & 3) == 0) && isS8((_offs) >> 2));         \
 884         NanoAssert(IsFpReg(_Dd) && !IsFpReg(_Rn));                      \
 885         int negflag = 1<<23;                                            \
 886         intptr_t offs = (_offs);                                        \
 887         if (_offs < 0) {                                                \
 888             negflag = 0<<23;                                            \
 889             offs = -(offs);                                             \
 890         }                                                               \
 891         *(--_nIns) = (NIns)( COND_AL | (0xD1<<20) | ((_Rn)<<16) | (FpRegNum(_Dd)<<12) | (0xB<<8) | negflag | ((offs>>2)&0xff) ); \
 892         asm_output("fldd %s,%s(%d)", gpn(_Dd), gpn(_Rn), _offs);       \
 893     } while (0)
 894 #define FLDD(_Dd,_Rn,_offs) FLDD_chk(_Dd,_Rn,_offs,1)
 895
 896 #define FUITOD(_Dd,_Sm) do {                                            \
 897         underrunProtect(4);                                             \
 898         NanoAssert(_config.arm_vfp);                                    \
 899         NanoAssert(IsFpReg(_Dd) && ((_Sm) == S14));                     \
 900         *(--_nIns) = (NIns)( COND_AL | (0xEB8<<16) | (FpRegNum(_Dd)<<12) | (0x2D<<6) | (0<<5) | (0x7) ); \
 901         asm_output("fuitod %s,%s", gpn(_Dd), gpn(_Sm));                \
 902     } while (0)
 903
 904 #define FNEGD(_Dd,_Dm) do {                                             \
 905         underrunProtect(4);                                             \
 906         NanoAssert(_config.arm_vfp);                                    \
 907         NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dm));                       \
 908         *(--_nIns) = (NIns)( COND_AL | (0xEB1<<16) | (FpRegNum(_Dd)<<12) | (0xB4<<4) | (FpRegNum(_Dm)) ); \
 909         asm_output("fnegd %s,%s", gpn(_Dd), gpn(_Dm));                 \
 910     } while (0)
 911
 912 #define FADDD(_Dd,_Dn,_Dm) do {                                         \
 913         underrunProtect(4);                                             \
 914         NanoAssert(_config.arm_vfp);                                    \
 915         NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dn) && IsFpReg(_Dm));       \
 916         *(--_nIns) = (NIns)( COND_AL | (0xE3<<20) | (FpRegNum(_Dn)<<16) | (FpRegNum(_Dd)<<12) | (0xB0<<4) | (FpRegNum(_Dm)) ); \
 917         asm_output("faddd %s,%s,%s", gpn(_Dd), gpn(_Dn), gpn(_Dm));    \
 918     } while (0)
 919
 920 #define FSUBD(_Dd,_Dn,_Dm) do {                                         \
 921         underrunProtect(4);                                             \
 922         NanoAssert(_config.arm_vfp);                                    \
 923         NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dn) && IsFpReg(_Dm));       \
 924         *(--_nIns) = (NIns)( COND_AL | (0xE3<<20) | (FpRegNum(_Dn)<<16) | (FpRegNum(_Dd)<<12) | (0xB4<<4) | (FpRegNum(_Dm)) ); \
 925         asm_output("fsubd %s,%s,%s", gpn(_Dd), gpn(_Dn), gpn(_Dm));    \
 926     } while (0)
 927
 928 #define FMULD(_Dd,_Dn,_Dm) do {                                         \
 929         underrunProtect(4);                                             \
 930         NanoAssert(_config.arm_vfp);                                    \
 931         NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dn) && IsFpReg(_Dm));       \
 932         *(--_nIns) = (NIns)( COND_AL | (0xE2<<20) | (FpRegNum(_Dn)<<16) | (FpRegNum(_Dd)<<12) | (0xB0<<4) | (FpRegNum(_Dm)) ); \
 933         asm_output("fmuld %s,%s,%s", gpn(_Dd), gpn(_Dn), gpn(_Dm));    \
 934     } while (0)
 935
 936 #define FDIVD(_Dd,_Dn,_Dm) do {                                         \
 937         underrunProtect(4);                                             \
 938         NanoAssert(_config.arm_vfp);                                    \
 939         NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dn) && IsFpReg(_Dm));       \
 940         *(--_nIns) = (NIns)( COND_AL | (0xE8<<20) | (FpRegNum(_Dn)<<16) | (FpRegNum(_Dd)<<12) | (0xB0<<4) | (FpRegNum(_Dm)) ); \
 941         asm_output("fmuld %s,%s,%s", gpn(_Dd), gpn(_Dn), gpn(_Dm));    \
 942     } while (0)
 943
 944 #define FMSTAT() do {                               \
 945         underrunProtect(4);                         \
 946         NanoAssert(_config.arm_vfp);                \
 947         *(--_nIns) = (NIns)( COND_AL | 0x0EF1FA10); \
 948         asm_output("fmstat");                       \
 949     } while (0)
 950
 951 #define FCMPD(_Dd,_Dm,_E) do {                                          \
 952         underrunProtect(4);                                             \
 953         NanoAssert(_config.arm_vfp);                                    \
 954         NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dm));                       \
 955         NanoAssert(((_E)==0) || ((_E)==1));                             \
 956         *(--_nIns) = (NIns)( COND_AL | (0xEB4<<16) | (FpRegNum(_Dd)<<12) | (0xB<<8) | ((_E)<<7) | (0x4<<4) | (FpRegNum(_Dm)) ); \
 957         asm_output("fcmp%sd %s,%s", (((_E)==1)?"e":""), gpn(_Dd), gpn(_Dm)); \
 958     } while (0)
 959
 960 #define FCPYD(_Dd,_Dm) do {                                             \
 961         underrunProtect(4);                                             \
 962         NanoAssert(_config.arm_vfp);                                    \
 963         NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dm));                       \
 964         *(--_nIns) = (NIns)( COND_AL | (0xEB0<<16) | (FpRegNum(_Dd)<<12) | (0xB4<<4) | (FpRegNum(_Dm)) ); \
 965         asm_output("fcpyd %s,%s", gpn(_Dd), gpn(_Dm));                 \
 966     } while (0)
 967
 968 #define FMRS(_Rd,_Sn) do {                                              \
 969         underrunProtect(4);                                             \
 970         NanoAssert(_config.arm_vfp);                                    \
 971         NanoAssert(((_Sn) == S14) && IsGpReg(_Rd));                     \
 972         *(--_nIns) = (NIns)( COND_AL | (0xE1<<20) | (0x7<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
 973         asm_output("fmrs %s,%s", gpn(_Rd), gpn(_Sn));                  \
 974     } while (0)
 975
 976 /*
 977  * The following instructions can only be used with S14 as the
 978  * single-precision register; that limitation can be removed if
 979  * needed, but we'd have to teach NJ about all the single precision
 980  * regs, and their encoding is strange (top 4 bits usually in a block,
 981  * low bit elsewhere).
 982  */
 983
 984 #define FSITOD(_Dd,_Sm) do {                                            \
 985         underrunProtect(4);                                             \
 986         NanoAssert(_config.arm_vfp);                                    \
 987         NanoAssert(IsFpReg(_Dd) && ((_Sm) == S14));                     \
 988         *(--_nIns) = (NIns)( COND_AL | (0xEB8<<16) | (FpRegNum(_Dd)<<12) | (0x2F<<6) | (0<<5) | (0x7) ); \
 989         asm_output("fsitod %s,%s", gpn(_Dd), gpn(_Sm));                \
 990     } while (0)
 991
 992 #define FMSR(_Sn,_Rd) do {                                              \
 993         underrunProtect(4);                                             \
 994         NanoAssert(_config.arm_vfp);                                    \
 995         NanoAssert(((_Sn) == S14) && IsGpReg(_Rd));                     \
 996         *(--_nIns) = (NIns)( COND_AL | (0xE0<<20) | (0x7<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
 997         asm_output("fmsr %s,%s", gpn(_Sn), gpn(_Rd));                  \
 998     } while (0)
 999
1000 #define FMRS(_Rd,_Sn) do {                                              \
1001         underrunProtect(4);                                             \
1002         NanoAssert(_config.arm_vfp);                                    \
1003         NanoAssert(((_Sn) == S14) && IsGpReg(_Rd));                     \
1004         *(--_nIns) = (NIns)( COND_AL | (0xE1<<20) | (0x7<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
1005         asm_output("fmrs %s,%s", gpn(_Rd), gpn(_Sn));                  \
1006     } while (0)
1007
1008 #define FMSR(_Sn,_Rd) do {                                              \
1009         underrunProtect(4);                                             \
1010         NanoAssert(_config.arm_vfp);                                    \
1011         NanoAssert(((_Sn) == S14) && IsGpReg(_Rd));                     \
1012         *(--_nIns) = (NIns)( COND_AL | (0xE0<<20) | (0x7<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
1013         asm_output("fmsr %s,%s", gpn(_Sn), gpn(_Rd));                  \
1014     } while (0)
1015
1016 #define FCVTSD(_Sd,_Dm) do {                        \
1017         underrunProtect(4);                         \
1018         NanoAssert(_config.arm_vfp);                \
1019         NanoAssert(((_Sd) == S14) && IsFpReg(_Dm)); \
1020         *(--_nIns) = (NIns)( COND_AL | (0xEB7<<16) | (0x7<<12) | (0xBC<<4) | (FpRegNum(_Dm)) ); \
1021         asm_output("[0x%08x] fcvtsd s14,%s", *_nIns, gpn(_Dm));                          \
1022     } while (0)
1023
1024 #define FCVTDS_allowD7(_Dd,_Sm,_allowD7) do {       \
1025         underrunProtect(4);                         \
1026         NanoAssert(_config.arm_vfp);                \
1027         NanoAssert(((_Sm) == S14) && (IsFpReg(_Dd) || ((_allowD7) && (_Dd) == D7))); \
1028         *(--_nIns) = (NIns)( COND_AL | (0xEB7<<16) | (FpRegNum(_Dd)<<12) | (0xAC<<4) | (0x7) ); \
1029         asm_output("[0x%08x] fcvtds %s,s14", *_nIns, gpn(_Dd));      \
1030     } while(0)
1031
1032 #define FCVTDS(_Dd,_Sm) \
1033     FCVTDS_allowD7(_Dd,_Sm,0)
1034
1035 #define FLDS(_Sd,_Rn,_offs) do {                                \
1036         underrunProtect(4);                                     \
1037         NanoAssert(_config.arm_vfp);                            \
1038         NanoAssert(((_Sd) == S14) && !IsFpReg(_Rn));            \
1039         NanoAssert((((_offs) & 3) == 0) && isS8((_offs) >> 2)); \
1040         int addflag = 1<<23;                                    \
1041         intptr_t offs = (_offs);                                \
1042         if (offs < 0) {                                         \
1043             addflag = 0;                                        \
1044             offs = -offs;                                       \
1045         }                                                       \
1046         *(--_nIns) = (NIns)( COND_AL | (0xD1<<20) | ((_Rn)<<16) | (0x7<<12) | (0xA << 8) | addflag | ((offs>>2)&0xff) ); \
1047         asm_output("[0x%08x] flds s14, [%s, #%d]", *_nIns, gpn(_Rn), (_offs)); \
1048     } while (0)
1049
1050 #define FSTS(_Sd,_Rn,_offs) do {                                \
1051         underrunProtect(4);                                     \
1052         NanoAssert(_config.arm_vfp);                            \
1053         NanoAssert(((_Sd) == S14) && !IsFpReg(_Rn));            \
1054         NanoAssert((((_offs) & 3) == 0) && isS8((_offs) >> 2)); \
1055         int addflag = 1<<23;                                    \
1056         intptr_t offs = (_offs);                                \
1057         if (offs < 0) {                                         \
1058             addflag = 0;                                        \
1059             offs = -offs;                                       \
1060         }                                                       \
1061         *(--_nIns) = (NIns)( COND_AL | (0xD0<<20) | ((_Rn)<<16) | (0x7<<12) | (0xA << 8) | addflag | ((offs>>2)&0xff) ); \
1062         asm_output("[0x%08x] fsts s14, [%s, #%d]", *_nIns, gpn(_Rn), (_offs)); \
1063     } while (0)
1064
1065 #define FTOSID(_Sd,_Dm) do {                                   \
1066         underrunProtect(4);                                    \
1067         NanoAssert(_config.arm_vfp);                           \
1068         NanoAssert(((_Sd) == S14) && IsFpReg(_Dm));            \
1069         *(--_nIns) = (NIns)( COND_AL | (0xEBD<<16) | (0x7<<12) | (0xB4<<4) | FpRegNum(_Dm) ); \
1070         asm_output("ftosid s14, %s", gpn(_Dm));                \
1071     } while (0)
1072
1073 } // namespace nanojit
1074 #endif // __nanojit_NativeARM__