nanojit/Nativei386.h

   1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
   2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
   3 /* ***** BEGIN LICENSE BLOCK *****
   4  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   5  *
   6  * The contents of this file are subject to the Mozilla Public License Version
   7  * 1.1 (the "License"); you may not use this file except in compliance with
   8  * the License. You may obtain a copy of the License at
   9  * http://www.mozilla.org/MPL/
  10  *
  11  * Software distributed under the License is distributed on an "AS IS" basis,
  12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  13  * for the specific language governing rights and limitations under the
  14  * License.
  15  *
  16  * The Original Code is [Open Source Virtual Machine].
  17  *
  18  * The Initial Developer of the Original Code is
  19  * Adobe System Incorporated.
  20  * Portions created by the Initial Developer are Copyright (C) 2004-2007
  21  * the Initial Developer. All Rights Reserved.
  22  *
  23  * Contributor(s):
  24  *   Adobe AS3 Team
  25  *
  26  * Alternatively, the contents of this file may be used under the terms of
  27  * either the GNU General Public License Version 2 or later (the "GPL"), or
  28  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  29  * in which case the provisions of the GPL or the LGPL are applicable instead
  30  * of those above. If you wish to allow use of your version of this file only
  31  * under the terms of either the GPL or the LGPL, and not to allow others to
  32  * use your version of this file under the terms of the MPL, indicate your
  33  * decision by deleting the provisions above and replace them with the notice
  34  * and other provisions required by the GPL or the LGPL. If you do not delete
  35  * the provisions above, a recipient may use your version of this file under
  36  * the terms of any one of the MPL, the GPL or the LGPL.
  37  *
  38  * ***** END LICENSE BLOCK ***** */
  39
  40
  41 #ifndef __nanojit_Nativei386__
  42 #define __nanojit_Nativei386__
  43
  44 #ifdef PERFM
  45 #define DOPROF
  46 #include "../vprof/vprof.h"
  47 #define count_instr() _nvprof("x86",1)
  48 #define count_ret() _nvprof("x86-ret",1); count_instr();
  49 #define count_push() _nvprof("x86-push",1); count_instr();
  50 #define count_pop() _nvprof("x86-pop",1); count_instr();
  51 #define count_st() _nvprof("x86-st",1); count_instr();
  52 #define count_stq() _nvprof("x86-stq",1); count_instr();
  53 #define count_ld() _nvprof("x86-ld",1); count_instr();
  54 #define count_ldq() _nvprof("x86-ldq",1); count_instr();
  55 #define count_call() _nvprof("x86-call",1); count_instr();
  56 #define count_calli() _nvprof("x86-calli",1); count_instr();
  57 #define count_prolog() _nvprof("x86-prolog",1); count_instr();
  58 #define count_alu() _nvprof("x86-alu",1); count_instr();
  59 #define count_mov() _nvprof("x86-mov",1); count_instr();
  60 #define count_fpu() _nvprof("x86-fpu",1); count_instr();
  61 #define count_jmp() _nvprof("x86-jmp",1); count_instr();
  62 #define count_jcc() _nvprof("x86-jcc",1); count_instr();
  63 #define count_fpuld() _nvprof("x86-ldq",1); _nvprof("x86-fpu",1); count_instr()
  64 #define count_aluld() _nvprof("x86-ld",1); _nvprof("x86-alu",1); count_instr()
  65 #define count_alust() _nvprof("x86-ld",1); _nvprof("x86-alu",1); _nvprof("x86-st",1); count_instr()
  66 #define count_pushld() _nvprof("x86-ld",1); _nvprof("x86-push",1); count_instr()
  67 #define count_imt() _nvprof("x86-imt",1) count_instr()
  68 #else
  69 #define count_instr()
  70 #define count_ret()
  71 #define count_push()
  72 #define count_pop()
  73 #define count_st()
  74 #define count_stq()
  75 #define count_ld()
  76 #define count_ldq()
  77 #define count_call()
  78 #define count_calli()
  79 #define count_prolog()
  80 #define count_alu()
  81 #define count_mov()
  82 #define count_fpu()
  83 #define count_jmp()
  84 #define count_jcc()
  85 #define count_fpuld()
  86 #define count_aluld()
  87 #define count_alust()
  88 #define count_pushld()
  89 #define count_imt()
  90 #endif
  91
  92 namespace nanojit
  93 {
  94     const int NJ_MAX_REGISTERS = 24; // gpregs, x87 regs, xmm regs
  95
  96     #define NJ_MAX_STACK_ENTRY           4096
  97     #define NJ_MAX_PARAMETERS               1
  98
  99     #define NJ_USES_QUAD_CONSTANTS          1
 100
 101     #define NJ_JTBL_SUPPORTED               1
 102     #define NJ_EXPANDED_LOADSTORE_SUPPORTED 1
 103     #define NJ_F2I_SUPPORTED                1
 104     #define NJ_SOFTFLOAT_SUPPORTED          0
 105
 106         // Preserve a 16-byte stack alignment, to support the use of
 107         // SSE instructions like MOVDQA (if not by Tamarin itself,
 108         // then by the C functions it calls).
 109     const int NJ_ALIGN_STACK = 16;
 110
 111     const int32_t LARGEST_UNDERRUN_PROT = 32;  // largest value passed to underrunProtect
 112
 113     typedef uint8_t NIns;
 114
 115     // Bytes of icache to flush after patch
 116     const size_t LARGEST_BRANCH_PATCH = 16 * sizeof(NIns);
 117
 118     // These are used as register numbers in various parts of the code
 119     typedef enum
 120     {
 121         // general purpose 32bit regs
 122         EAX = 0, // return value, scratch
 123         ECX = 1, // this/arg0, scratch
 124         EDX = 2, // arg1, return-msw, scratch
 125         EBX = 3,
 126         ESP = 4, // stack pointer
 127         EBP = 5, // frame pointer
 128         ESI = 6,
 129         EDI = 7,
 130
 131         SP = ESP, // alias SP to ESP for convenience
 132         FP = EBP, // alias FP to EBP for convenience
 133
 134         // SSE regs come before X87 so we prefer them
 135         XMM0 = 8,
 136         XMM1 = 9,
 137         XMM2 = 10,
 138         XMM3 = 11,
 139         XMM4 = 12,
 140         XMM5 = 13,
 141         XMM6 = 14,
 142         XMM7 = 15,
 143
 144         // X87 regs
 145         FST0 = 16,
 146
 147         FirstReg = 0,
 148         LastReg = 16,
 149         deprecated_UnknownReg = 17,        // XXX: remove eventually, see bug 538924
 150         UnspecifiedReg = 17
 151     }
 152     Register;
 153
 154     typedef int RegisterMask;
 155
 156     static const int NumSavedRegs = 3;
 157     static const RegisterMask SavedRegs = 1<<EBX | 1<<EDI | 1<<ESI;
 158     static const RegisterMask GpRegs = SavedRegs | 1<<EAX | 1<<ECX | 1<<EDX;
 159     static const RegisterMask XmmRegs = 1<<XMM0|1<<XMM1|1<<XMM2|1<<XMM3|1<<XMM4|1<<XMM5|1<<XMM6|1<<XMM7;
 160     static const RegisterMask x87Regs = 1<<FST0;
 161     static const RegisterMask FpRegs = x87Regs | XmmRegs;
 162     static const RegisterMask ScratchRegs = 1<<EAX | 1<<ECX | 1<<EDX | FpRegs;
 163
 164     static const RegisterMask AllowableFlagRegs = 1<<EAX |1<<ECX | 1<<EDX | 1<<EBX;
 165
 166     #define _rmask_(r)      (1<<(r))
 167     #define _is_xmm_reg_(r) ((_rmask_(r)&XmmRegs)!=0)
 168     #define _is_x87_reg_(r) ((_rmask_(r)&x87Regs)!=0)
 169     #define _is_fp_reg_(r)  ((_rmask_(r)&FpRegs)!=0)
 170     #define _is_gp_reg_(r)  ((_rmask_(r)&GpRegs)!=0)
 171
 172     verbose_only( extern const char* regNames[]; )
 173
 174     #define DECLARE_PLATFORM_STATS()
 175
 176     #define DECLARE_PLATFORM_REGALLOC()
 177
 178     #define JCC32 0x0f
 179     #define JMP8  0xeb
 180     #define JMP32 0xe9
 181
 182     #define DECLARE_PLATFORM_ASSEMBLER()    \
 183         const static Register argRegs[2], retRegs[2]; \
 184         int32_t max_stk_args;\
 185         void nativePageReset();\
 186         void nativePageSetup();\
 187         void underrunProtect(int);\
 188         void asm_immi(Register r, int32_t val, bool canClobberCCs);\
 189         void asm_stkarg(LInsp p, int32_t& stkd);\
 190         void asm_farg(LInsp, int32_t& stkd);\
 191         void asm_arg(ArgType ty, LInsp p, Register r, int32_t& stkd);\
 192         void asm_pusharg(LInsp);\
 193         void asm_fcmp(LIns *cond);\
 194         NIns* asm_fbranch(bool, LIns*, NIns*);\
 195         void asm_cmp(LIns *cond); \
 196         void asm_div_mod(LIns *cond); \
 197         void asm_load(int d, Register r); \
 198         void asm_immf(Register r, uint64_t q, double d, bool canClobberCCs); \
 199         void IMM8(int32_t i) { \
 200             _nIns -= 1; \
 201             *((int8_t*)_nIns) = (int8_t)(i); \
 202         }; \
 203         void IMM16(int32_t i) { \
 204             _nIns -= 2; \
 205             *((int16_t*)_nIns) = (int16_t)(i); \
 206         }; \
 207         void IMM32(int32_t i) { \
 208             _nIns -= 4; \
 209             *((int32_t*)_nIns) = (int32_t)(i); \
 210         }; \
 211         void MODRMs(int32_t r, int32_t d, Register b, int32_t l, int32_t i); \
 212         void MODRMm(int32_t r, int32_t d, Register b); \
 213         void MODRMSIB(Register reg, Register base, int32_t index, int32_t scale, int32_t disp); \
 214         void MODRMdm(int32_t r, int32_t addr); \
 215         void MODRM(int32_t d, int32_t s) { \
 216             NanoAssert((unsigned(d))<8 && (unsigned(s))<8); \
 217             *(--_nIns) = (uint8_t) ( 3<<6 | d<<3 | s ); \
 218         }; \
 219         void ALU0(int32_t o); \
 220         void ALUm(int32_t c, int32_t r, int32_t d, Register b); \
 221         void ALUdm(int32_t c, int32_t r, int32_t addr); \
 222         void ALUsib(int32_t c, Register r, Register base, int32_t index, int32_t scale, int32_t disp); \
 223         void ALUm16(int32_t c, int32_t r, int32_t d, Register b); \
 224         void ALU2dm(int32_t c, int32_t r, int32_t addr); \
 225         void ALU2m(int32_t c, int32_t r, int32_t d, Register b); \
 226         void ALU2sib(int32_t c, Register r, Register base, int32_t index, int32_t scale, int32_t disp); \
 227         void ALU(int32_t c, int32_t d, int32_t s) { \
 228             underrunProtect(2); \
 229             MODRM(d,s); \
 230             *(--_nIns) = uint8_t(c); \
 231         }; \
 232         void ALUi(int32_t c, int32_t r, int32_t i); \
 233         void ALUmi(int32_t c, int32_t d, Register b, int32_t i); \
 234         void ALU2(int32_t c, int32_t d, int32_t s); \
 235         void LAHF(); \
 236         void SAHF(); \
 237         void OR(Register l, Register r); \
 238         void AND(Register l, Register r); \
 239         void XOR(Register l, Register r); \
 240         void ADD(Register l, Register r); \
 241         void SUB(Register l, Register r); \
 242         void MUL(Register l, Register r); \
 243         void DIV(Register r); \
 244         void NOT(Register r); \
 245         void NEG(Register r); \
 246         void SHR(Register r, Register s); \
 247         void SAR(Register r, Register s); \
 248         void SHL(Register r, Register s); \
 249         void SHIFT(int32_t c, Register r, int32_t i); \
 250         void SHLi(Register r, int32_t i); \
 251         void SHRi(Register r, int32_t i); \
 252         void SARi(Register r, int32_t i); \
 253         void MOVZX8(Register d, Register s); \
 254         void SUBi(Register r, int32_t i); \
 255         void ADDi(Register r, int32_t i); \
 256         void ANDi(Register r, int32_t i); \
 257         void ORi(Register r, int32_t i); \
 258         void XORi(Register r, int32_t i); \
 259         void ADDmi(int32_t d, Register b, int32_t i); \
 260         void TEST(Register d, Register s); \
 261         void CMP(Register l, Register r); \
 262         void CMPi(Register r, int32_t i); \
 263         void MR(Register d, Register s) { \
 264             count_mov(); \
 265             ALU(0x8b,d,s); \
 266             asm_output("mov %s,%s",gpn(d),gpn(s)); \
 267         }; \
 268         void LEA(Register r, int32_t d, Register b); \
 269         void LEAmi4(Register r, int32_t d, int32_t i); \
 270         void CDQ(); \
 271         void INCLi(int32_t p); \
 272         void SETE( Register r); \
 273         void SETNP(Register r); \
 274         void SETL( Register r); \
 275         void SETLE(Register r); \
 276         void SETG( Register r); \
 277         void SETGE(Register r); \
 278         void SETB( Register r); \
 279         void SETBE(Register r); \
 280         void SETA( Register r); \
 281         void SETAE(Register r); \
 282         void SETO( Register r); \
 283         void MREQ(Register d, Register s); \
 284         void MRNE(Register d, Register s); \
 285         void MRL( Register d, Register s); \
 286         void MRLE(Register d, Register s); \
 287         void MRG( Register d, Register s); \
 288         void MRGE(Register d, Register s); \
 289         void MRB( Register d, Register s); \
 290         void MRBE(Register d, Register s); \
 291         void MRA( Register d, Register s); \
 292         void MRAE(Register d, Register s); \
 293         void MRNO(Register d, Register s); \
 294         void LD(Register reg, int32_t disp, Register base); \
 295         void LDdm(Register reg, int32_t addr); \
 296         void LDsib(Register reg, int32_t disp, Register base, int32_t index, int32_t scale); \
 297         void LD16S(Register r, int32_t d, Register b); \
 298         void LD16Sdm(Register r, int32_t addr); \
 299         void LD16Ssib(Register r, int32_t disp, Register base, int32_t index, int32_t scale); \
 300         void LD16Z(Register r, int32_t d, Register b); \
 301         void LD16Zdm(Register r, int32_t addr); \
 302         void LD16Zsib(Register r, int32_t disp, Register base, int32_t index, int32_t scale); \
 303         void LD8Z(Register r, int32_t d, Register b); \
 304         void LD8Zdm(Register r, int32_t addr); \
 305         void LD8Zsib(Register r, int32_t disp, Register base, int32_t ndex, int32_t scale); \
 306         void LD8S(Register r, int32_t d, Register b); \
 307         void LD8Sdm(Register r, int32_t addr); \
 308         void LD8Ssib(Register r, int32_t disp, Register base, int32_t index, int32_t scale); \
 309         void LDi(Register r, int32_t i); \
 310         void ST8(Register base, int32_t disp, Register reg); \
 311         void ST16(Register base, int32_t disp, Register reg); \
 312         void ST(Register base, int32_t disp, Register reg); \
 313         void ST8i(Register base, int32_t disp, int32_t imm); \
 314         void ST16i(Register base, int32_t disp, int32_t imm); \
 315         void STi(Register base, int32_t disp, int32_t imm); \
 316         void RET(); \
 317         void NOP(); \
 318         void INT3(); \
 319         void PUSHi(int32_t i); \
 320         void PUSHi32(int32_t i); \
 321         void PUSHr(Register r); \
 322         void PUSHm(int32_t d, Register b); \
 323         void POPr(Register r); \
 324         void JCC(int32_t o, NIns* t, const char* n); \
 325         void JMP_long(NIns* t); \
 326         void JMP(NIns* t) { \
 327             count_jmp(); \
 328             underrunProtect(5); \
 329             intptr_t tt = t ? (intptr_t)t - (intptr_t)_nIns : 0; \
 330             if (t && isS8(tt)) { \
 331                 _nIns -= 2; \
 332                 _nIns[0] = JMP8; \
 333                 _nIns[1] = uint8_t(tt & 0xff); \
 334             } else { \
 335                 IMM32(tt); \
 336                 *(--_nIns) = JMP32; \
 337             } \
 338             asm_output("jmp %p", t); \
 339         }; \
 340         void JMP_indirect(Register r); \
 341         void JMP_indexed(Register x, int32_t ss, NIns** addr); \
 342         void JE(NIns* t); \
 343         void JNE(NIns* t); \
 344         void JP(NIns* t); \
 345         void JNP(NIns* t); \
 346         void JB(NIns* t); \
 347         void JNB(NIns* t); \
 348         void JBE(NIns* t); \
 349         void JNBE(NIns* t); \
 350         void JA(NIns* t); \
 351         void JNA(NIns* t); \
 352         void JAE(NIns* t); \
 353         void JNAE(NIns* t); \
 354         void JL(NIns* t); \
 355         void JNL(NIns* t); \
 356         void JLE(NIns* t); \
 357         void JNLE(NIns* t); \
 358         void JG(NIns* t); \
 359         void JNG(NIns* t); \
 360         void JGE(NIns* t); \
 361         void JNGE(NIns* t); \
 362         void JO(NIns* t); \
 363         void JNO(NIns* t); \
 364         void SSE(int32_t c, int32_t d, int32_t s); \
 365         void SSEm(int32_t c, int32_t r, int32_t d, Register b); \
 366         void LDSDm(Register r, const double* addr); \
 367         void SSE_LDSD(Register r, int32_t d, Register b); \
 368         void SSE_LDQ( Register r, int32_t d, Register b); \
 369         void SSE_LDSS(Register r, int32_t d, Register b); \
 370         void SSE_STSD(int32_t d, Register b, Register r); \
 371         void SSE_STQ( int32_t d, Register b, Register r); \
 372         void SSE_STSS(int32_t d, Register b, Register r); \
 373         void SSE_CVTSI2SD(Register xr, Register gr); \
 374         void SSE_CVTSD2SI(Register gr, Register xr); \
 375         void SSE_CVTSD2SS(Register xr, Register gr); \
 376         void SSE_CVTSS2SD(Register xr, Register gr); \
 377         void SSE_CVTDQ2PD(Register d, Register r); \
 378         void SSE_MOVD(Register d, Register s); \
 379         void SSE_MOVSD(Register rd, Register rs); \
 380         void SSE_MOVDm(Register d, Register b, Register xrs); \
 381         void SSE_ADDSD(Register rd, Register rs); \
 382         void SSE_ADDSDm(Register r, const double* addr); \
 383         void SSE_SUBSD(Register rd, Register rs); \
 384         void SSE_MULSD(Register rd, Register rs); \
 385         void SSE_DIVSD(Register rd, Register rs); \
 386         void SSE_UCOMISD(Register rl, Register rr); \
 387         void SSE_CVTSI2SDm(Register xr, Register d, Register b); \
 388         void SSE_XORPD(Register r, const uint32_t* maskaddr); \
 389         void SSE_XORPDr(Register rd, Register rs); \
 390         void FPUc(int32_t o); \
 391         void FPU(int32_t o, Register r) { \
 392             underrunProtect(2); \
 393             *(--_nIns) = uint8_t(((uint8_t)(o)&0xff) | (r&7)); \
 394             *(--_nIns) = (uint8_t)((o>>8)&0xff); \
 395         }; \
 396         void FPUm(int32_t o, int32_t d, Register b); \
 397         void FPUdm(int32_t o, const double* const m); \
 398         void TEST_AH(int32_t i); \
 399         void TEST_AX(int32_t i); \
 400         void FNSTSW_AX(); \
 401         void FCHS(); \
 402         void FLD1(); \
 403         void FLDZ(); \
 404         void FFREE(Register r); \
 405         void FST32(bool p, int32_t d, Register b); \
 406         void FSTQ(bool p, int32_t d, Register b); \
 407         void FSTPQ(int32_t d, Register b); \
 408         void FCOM(bool p, int32_t d, Register b); \
 409         void FCOMdm(bool p, const double* dm); \
 410         void FLD32(int32_t d, Register b); \
 411         void FLDQ(int32_t d, Register b); \
 412         void FLDQdm(const double* dm); \
 413         void FILDQ(int32_t d, Register b); \
 414         void FILD(int32_t d, Register b); \
 415         void FIST(bool p, int32_t d, Register b); \
 416         void FADD( int32_t d, Register b); \
 417         void FSUB( int32_t d, Register b); \
 418         void FSUBR(int32_t d, Register b); \
 419         void FMUL( int32_t d, Register b); \
 420         void FDIV( int32_t d, Register b); \
 421         void FDIVR(int32_t d, Register b); \
 422         void FADDdm( const double *dm); \
 423         void FSUBRdm(const double* dm); \
 424         void FMULdm( const double* dm); \
 425         void FDIVRdm(const double* dm); \
 426         void FINCSTP(); \
 427         void FSTP(Register r) { \
 428             count_fpu(); \
 429             FPU(0xddd8, r); \
 430             asm_output("fstp %s",gpn(r)); fpu_pop(); \
 431         }; \
 432         void FCOMP(); \
 433         void FCOMPP(); \
 434         void FLDr(Register r); \
 435         void EMMS(); \
 436         void CALL(const CallInfo* ci); \
 437         void CALLr(const CallInfo* ci, Register r);
 438 }
 439
 440
 441
 442 #endif // __nanojit_Nativei386__