nanojit/NativeX64.h

   1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
   2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
   3 /* ***** BEGIN LICENSE BLOCK *****
   4  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   5  *
   6  * The contents of this file are subject to the Mozilla Public License Version
   7  * 1.1 (the "License"); you may not use this file except in compliance with
   8  * the License. You may obtain a copy of the License at
   9  * http://www.mozilla.org/MPL/
  10  *
  11  * Software distributed under the License is distributed on an "AS IS" basis,
  12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  13  * for the specific language governing rights and limitations under the
  14  * License.
  15  *
  16  * The Original Code is [Open Source Virtual Machine].
  17  *
  18  * The Initial Developer of the Original Code is
  19  * Adobe System Incorporated.
  20  * Portions created by the Initial Developer are Copyright (C) 2008
  21  * the Initial Developer. All Rights Reserved.
  22  *
  23  * Contributor(s):
  24  *   Adobe AS3 Team
  25  *
  26  * Alternatively, the contents of this file may be used under the terms of
  27  * either the GNU General Public License Version 2 or later (the "GPL"), or
  28  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  29  * in which case the provisions of the GPL or the LGPL are applicable instead
  30  * of those above. If you wish to allow use of your version of this file only
  31  * under the terms of either the GPL or the LGPL, and not to allow others to
  32  * use your version of this file under the terms of the MPL, indicate your
  33  * decision by deleting the provisions above and replace them with the notice
  34  * and other provisions required by the GPL or the LGPL. If you do not delete
  35  * the provisions above, a recipient may use your version of this file under
  36  * the terms of any one of the MPL, the GPL or the LGPL.
  37  *
  38  * ***** END LICENSE BLOCK ***** */
  39
  40 #ifndef __nanojit_NativeX64__
  41 #define __nanojit_NativeX64__
  42
  43 #ifndef NANOJIT_64BIT
  44 #error "NANOJIT_64BIT must be defined for X64 backend"
  45 #endif
  46
  47 #ifdef PERFM
  48 #define DOPROF
  49 #include "../vprof/vprof.h"
  50 #define count_instr() _nvprof("x64",1)
  51 #define count_prolog() _nvprof("x64-prolog",1); count_instr();
  52 #define count_imt() _nvprof("x64-imt",1) count_instr()
  53 #else
  54 #define count_instr()
  55 #define count_prolog()
  56 #define count_imt()
  57 #endif
  58
  59 namespace nanojit
  60 {
  61 #define NJ_MAX_STACK_ENTRY              4096
  62 #define NJ_ALIGN_STACK                  16
  63
  64 #define NJ_JTBL_SUPPORTED               1
  65 #define NJ_EXPANDED_LOADSTORE_SUPPORTED 1
  66 #define NJ_F2I_SUPPORTED                1
  67 #define NJ_SOFTFLOAT_SUPPORTED          0
  68
  69     enum Register {
  70         RAX = 0, // 1st int return, # of sse varargs
  71         RCX = 1, // 4th int arg
  72         RDX = 2, // 3rd int arg 2nd return
  73         RBX = 3, // saved
  74         RSP = 4, // stack ptr
  75         RBP = 5, // frame ptr, saved, sib reqd
  76         RSI = 6, // 2nd int arg
  77         RDI = 7, // 1st int arg
  78         R8  = 8, // 5th int arg
  79         R9  = 9, // 6th int arg
  80         R10 = 10, // scratch
  81         R11 = 11, // scratch
  82         R12 = 12, // saved
  83         R13 = 13, // saved, sib reqd like rbp
  84         R14 = 14, // saved
  85         R15 = 15, // saved
  86
  87         XMM0  = 16, // 1st double arg, return
  88         XMM1  = 17, // 2nd double arg, return
  89         XMM2  = 18, // 3rd double arg
  90         XMM3  = 19, // 4th double arg
  91         XMM4  = 20, // 5th double arg
  92         XMM5  = 21, // 6th double arg
  93         XMM6  = 22, // 7th double arg
  94         XMM7  = 23, // 8th double arg
  95         XMM8  = 24, // scratch
  96         XMM9  = 25, // scratch
  97         XMM10 = 26, // scratch
  98         XMM11 = 27, // scratch
  99         XMM12 = 28, // scratch
 100         XMM13 = 29, // scratch
 101         XMM14 = 30, // scratch
 102         XMM15 = 31, // scratch
 103
 104         FP = RBP,
 105
 106         FirstReg = RAX,
 107         LastReg = XMM15,
 108
 109         deprecated_UnknownReg = 32,        // XXX: remove eventually, see bug 538924
 110         UnspecifiedReg = 32
 111     };
 112
 113 /*
 114  * Micro-templating variable-length opcodes, idea first
 115  * describe by Mike Pall of Luajit.
 116  *
 117  * X86-64 opcode encodings:  LSB encodes the length of the
 118  * opcode in bytes, remaining bytes are encoded as 1-7 bytes
 119  * in a single uint64_t value.  The value is written as a single
 120  * store into the code stream, and the code pointer is decremented
 121  * by the length.  each successive instruction partially overlaps
 122  * the previous one.
 123  *
 124  * emit methods below are able to encode mod/rm, sib, rex, and
 125  * register and small immediate values into these opcode values
 126  * without much branchy code.
 127  *
 128  * these opcodes encapsulate all the const parts of the instruction.
 129  * for example, the alu-immediate opcodes (add, sub, etc) encode
 130  * part of their opcode in the R field of the mod/rm byte;  this
 131  * hardcoded value is in the constant below, and the R argument
 132  * to emitrr() is 0.  In a few cases, a whole instruction is encoded
 133  * this way (eg callrax).
 134  *
 135  * when a disp32, imm32, or imm64 suffix can't fit in an 8-byte
 136  * opcode, then it is written into the code separately and not counted
 137  * in the opcode length.
 138  */
 139
 140     enum X64Opcode
 141 #if defined(_MSC_VER) && _MSC_VER >= 1400
 142 #pragma warning(disable:4480) // nonstandard extension used: specifying underlying type for enum
 143           : uint64_t
 144 #endif
 145     {
 146         // 64bit opcode constants
 147         //              msb        lsb len
 148         X64_addqrr  = 0xC003480000000003LL, // 64bit add r += b
 149         X64_addqri  = 0xC081480000000003LL, // 64bit add r += int64(imm32)
 150         X64_addqr8  = 0x00C0834800000004LL, // 64bit add r += int64(imm8)
 151         X64_andqri  = 0xE081480000000003LL, // 64bit and r &= int64(imm32)
 152         X64_andqr8  = 0x00E0834800000004LL, // 64bit and r &= int64(imm8)
 153         X64_orqri   = 0xC881480000000003LL, // 64bit or  r |= int64(imm32)
 154         X64_orqr8   = 0x00C8834800000004LL, // 64bit or  r |= int64(imm8)
 155         X64_xorqri  = 0xF081480000000003LL, // 64bit xor r ^= int64(imm32)
 156         X64_xorqr8  = 0x00F0834800000004LL, // 64bit xor r ^= int64(imm8)
 157         X64_addlri  = 0xC081400000000003LL, // 32bit add r += imm32
 158         X64_addlr8  = 0x00C0834000000004LL, // 32bit add r += imm8
 159         X64_andlri  = 0xE081400000000003LL, // 32bit and r &= imm32
 160         X64_andlr8  = 0x00E0834000000004LL, // 32bit and r &= imm8
 161         X64_orlri   = 0xC881400000000003LL, // 32bit or  r |= imm32
 162         X64_orlr8   = 0x00C8834000000004LL, // 32bit or  r |= imm8
 163         X64_sublri  = 0xE881400000000003LL, // 32bit sub r -= imm32
 164         X64_sublr8  = 0x00E8834000000004LL, // 32bit sub r -= imm8
 165         X64_xorlri  = 0xF081400000000003LL, // 32bit xor r ^= imm32
 166         X64_xorlr8  = 0x00F0834000000004LL, // 32bit xor r ^= imm8
 167         X64_addrr   = 0xC003400000000003LL, // 32bit add r += b
 168         X64_andqrr  = 0xC023480000000003LL, // 64bit and r &= b
 169         X64_andrr   = 0xC023400000000003LL, // 32bit and r &= b
 170         X64_call    = 0x00000000E8000005LL, // near call
 171         X64_callrax = 0xD0FF000000000002LL, // indirect call to addr in rax (no REX)
 172         X64_cmovqno = 0xC0410F4800000004LL, // 64bit conditional mov if (no overflow) r = b
 173         X64_cmovqnae= 0xC0420F4800000004LL, // 64bit conditional mov if (uint <)  r = b
 174         X64_cmovqnb = 0xC0430F4800000004LL, // 64bit conditional mov if (uint >=) r = b
 175         X64_cmovqne = 0xC0450F4800000004LL, // 64bit conditional mov if (c)       r = b
 176         X64_cmovqna = 0xC0460F4800000004LL, // 64bit conditional mov if (uint <=) r = b
 177         X64_cmovqnbe= 0xC0470F4800000004LL, // 64bit conditional mov if (uint >)  r = b
 178         X64_cmovqnge= 0xC04C0F4800000004LL, // 64bit conditional mov if (int <)   r = b
 179         X64_cmovqnl = 0xC04D0F4800000004LL, // 64bit conditional mov if (int >=)  r = b
 180         X64_cmovqng = 0xC04E0F4800000004LL, // 64bit conditional mov if (int <=)  r = b
 181         X64_cmovqnle= 0xC04F0F4800000004LL, // 64bit conditional mov if (int >)   r = b
 182         X64_cmovno  = 0xC0410F4000000004LL, // 32bit conditional mov if (no overflow) r = b
 183         X64_cmovnae = 0xC0420F4000000004LL, // 32bit conditional mov if (uint <)  r = b
 184         X64_cmovnb  = 0xC0430F4000000004LL, // 32bit conditional mov if (uint >=) r = b
 185         X64_cmovne  = 0xC0450F4000000004LL, // 32bit conditional mov if (c)       r = b
 186         X64_cmovna  = 0xC0460F4000000004LL, // 32bit conditional mov if (uint <=) r = b
 187         X64_cmovnbe = 0xC0470F4000000004LL, // 32bit conditional mov if (uint >)  r = b
 188         X64_cmovnge = 0xC04C0F4000000004LL, // 32bit conditional mov if (int <)   r = b
 189         X64_cmovnl  = 0xC04D0F4000000004LL, // 32bit conditional mov if (int >=)  r = b
 190         X64_cmovng  = 0xC04E0F4000000004LL, // 32bit conditional mov if (int <=)  r = b
 191         X64_cmovnle = 0xC04F0F4000000004LL, // 32bit conditional mov if (int >)   r = b
 192         X64_cmplr   = 0xC03B400000000003LL, // 32bit compare r,b
 193         X64_cmpqr   = 0xC03B480000000003LL, // 64bit compare r,b
 194         X64_cmplri  = 0xF881400000000003LL, // 32bit compare r,imm32
 195         X64_cmpqri  = 0xF881480000000003LL, // 64bit compare r,int64(imm32)
 196         X64_cmplr8  = 0x00F8834000000004LL, // 32bit compare r,imm8
 197         X64_cmpqr8  = 0x00F8834800000004LL, // 64bit compare r,int64(imm8)
 198         X64_cvtsi2sd= 0xC02A0F40F2000005LL, // convert int32 to double r = (double) b
 199         X64_cvtsq2sd= 0xC02A0F48F2000005LL, // convert int64 to double r = (double) b
 200         X64_cvtss2sd= 0xC05A0F40F3000005LL, // convert float to double r = (double) b
 201         X64_cvtsd2ss= 0xC05A0F40F2000005LL, // convert double to float r = (float) b
 202         X64_cvtsd2si= 0xC02D0F40F2000005LL, // convert double to int32 r = (int32) b
 203         X64_divsd   = 0xC05E0F40F2000005LL, // divide scalar double r /= b
 204         X64_mulsd   = 0xC0590F40F2000005LL, // multiply scalar double r *= b
 205         X64_addsd   = 0xC0580F40F2000005LL, // add scalar double r += b
 206         X64_idiv    = 0xF8F7400000000003LL, // 32bit signed div (rax = rdx:rax/r, rdx=rdx:rax%r)
 207         X64_imul    = 0xC0AF0F4000000004LL, // 32bit signed mul r *= b
 208         X64_imuli   = 0xC069400000000003LL, // 32bit signed mul r = b * imm32
 209         X64_imul8   = 0x00C06B4000000004LL, // 32bit signed mul r = b * imm8
 210         X64_jmpi    = 0x0000000025FF0006LL, // jump *0(rip)
 211         X64_jmp     = 0x00000000E9000005LL, // jump near rel32
 212         X64_jmp8    = 0x00EB000000000002LL, // jump near rel8
 213         X64_jo      = 0x00000000800F0006LL, // jump near if overflow
 214         X64_jb      = 0x00000000820F0006LL, // jump near if below (uint <)
 215         X64_jae     = 0x00000000830F0006LL, // jump near if above or equal (uint >=)
 216         X64_ja      = 0x00000000870F0006LL, // jump near if above (uint >)
 217         X64_jbe     = 0x00000000860F0006LL, // jump near if below or equal (uint <=)
 218         X64_je      = 0x00000000840F0006LL, // near jump if equal
 219         X64_jl      = 0x000000008C0F0006LL, // jump near if less (int <)
 220         X64_jge     = 0x000000008D0F0006LL, // jump near if greater or equal (int >=)
 221         X64_jg      = 0x000000008F0F0006LL, // jump near if greater (int >)
 222         X64_jle     = 0x000000008E0F0006LL, // jump near if less or equal (int <=)
 223         X64_jp      = 0x000000008A0F0006LL, // jump near if parity (PF == 1)
 224         X64_jneg    = 0x0000000001000000LL, // xor with this mask to negate the condition
 225         X64_jo8     = 0x0070000000000002LL, // jump near if overflow
 226         X64_jb8     = 0x0072000000000002LL, // jump near if below (uint <)
 227         X64_jae8    = 0x0073000000000002LL, // jump near if above or equal (uint >=)
 228         X64_ja8     = 0x0077000000000002LL, // jump near if above (uint >)
 229         X64_jbe8    = 0x0076000000000002LL, // jump near if below or equal (uint <=)
 230         X64_je8     = 0x0074000000000002LL, // near jump if equal
 231         X64_jne8    = 0x0075000000000002LL, // jump near if not equal
 232         X64_jl8     = 0x007C000000000002LL, // jump near if less (int <)
 233         X64_jge8    = 0x007D000000000002LL, // jump near if greater or equal (int >=)
 234         X64_jg8     = 0x007F000000000002LL, // jump near if greater (int >)
 235         X64_jle8    = 0x007E000000000002LL, // jump near if less or equal (int <=)
 236         X64_jp8     = 0x007A000000000002LL, // jump near if parity (PF == 1)
 237         X64_jnp8    = 0x007B000000000002LL, // jump near if not parity (PF == 0)
 238         X64_jneg8   = 0x0001000000000000LL, // xor with this mask to negate the condition
 239         X64_leaqrm  = 0x00000000808D4807LL, // 64bit load effective addr reg <- disp32+base
 240         X64_learm   = 0x00000000808D4007LL, // 32bit load effective addr reg <- disp32+base
 241         X64_learip  = 0x00000000058D4807LL, // 64bit RIP-relative lea. reg <- disp32+rip (modrm = 00rrr101 = 05)
 242         X64_movlr   = 0xC08B400000000003LL, // 32bit mov r <- b
 243         X64_movbmr  = 0x0000000080884007LL, // 8bit store r -> [b+d32]
 244         X64_movsmr  = 0x8089406600000004LL, // 16bit store r -> [b+d32]
 245         X64_movlmr  = 0x0000000080894007LL, // 32bit store r -> [b+d32]
 246         X64_movlrm  = 0x00000000808B4007LL, // 32bit load r <- [b+d32]
 247         X64_movqmr  = 0x0000000080894807LL, // 64bit store gpr -> [b+d32]
 248         X64_movqspr = 0x0024448948000005LL, // 64bit store gpr -> [rsp+d32] (sib required)
 249         X64_movqr   = 0xC08B480000000003LL, // 64bit mov r <- b
 250         X64_movqi   = 0xB848000000000002LL, // 64bit mov r <- imm64
 251         X64_movi    = 0xB840000000000002LL, // 32bit mov r <- imm32
 252         X64_movqi32 = 0xC0C7480000000003LL, // 64bit mov r <- int64(imm32)
 253         X64_movapsr = 0xC0280F4000000004LL, // 128bit mov xmm <- xmm
 254         X64_movqrx  = 0xC07E0F4866000005LL, // 64bit mov b <- xmm-r (reverses the usual r/b order)
 255         X64_movqxr  = 0xC06E0F4866000005LL, // 64bit mov b -> xmm-r
 256         X64_movqrm  = 0x00000000808B4807LL, // 64bit load r <- [b+d32]
 257         X64_movsdrr = 0xC0100F40F2000005LL, // 64bit mov xmm-r <- xmm-b (upper 64bits unchanged)
 258         X64_movsdrm = 0x80100F40F2000005LL, // 64bit load xmm-r <- [b+d32] (upper 64 cleared)
 259         X64_movsdmr = 0x80110F40F2000005LL, // 64bit store xmm-r -> [b+d32]
 260         X64_movssrm = 0x80100F40F3000005LL, // 32bit load xmm-r <- [b+d32] (upper 96 cleared)
 261         X64_movssmr = 0x80110F40F3000005LL, // 32bit store xmm-r -> [b+d32]
 262         X64_movsxdr = 0xC063480000000003LL, // sign extend i32 to i64 r = (int64)(int32) b
 263         X64_movzx8  = 0xC0B60F4000000004LL, // zero extend i8 to i64 r = (uint64)(uint8) b
 264         X64_movzx8m = 0x80B60F4000000004LL, // zero extend i8 load to i32 r <- [b+d32]
 265         X64_movzx16m= 0x80B70F4000000004LL, // zero extend i16 load to i32 r <- [b+d32]
 266         X64_movsx8m = 0x80BE0F4000000004LL, // sign extend i8 load to i32 r <- [b+d32]
 267         X64_movsx16m= 0x80BF0F4000000004LL, // sign extend i16 load to i32 r <- [b+d32]
 268         X64_neg     = 0xD8F7400000000003LL, // 32bit two's compliment b = -b
 269         X64_nop1    = 0x9000000000000001LL, // one byte NOP
 270         X64_nop2    = 0x9066000000000002LL, // two byte NOP
 271         X64_nop3    = 0x001F0F0000000003LL, // three byte NOP
 272         X64_nop4    = 0x00401F0F00000004LL, // four byte NOP
 273         X64_nop5    = 0x0000441F0F000005LL, // five byte NOP
 274         X64_nop6    = 0x0000441F0F660006LL, // six byte NOP
 275         X64_nop7    = 0x00000000801F0F07LL, // seven byte NOP
 276         X64_not     = 0xD0F7400000000003LL, // 32bit ones compliment b = ~b
 277         X64_orlrr   = 0xC00B400000000003LL, // 32bit or r |= b
 278         X64_orqrr   = 0xC00B480000000003LL, // 64bit or r |= b
 279         X64_popr    = 0x5840000000000002LL, // 64bit pop r <- [rsp++]
 280         X64_pushr   = 0x5040000000000002LL, // 64bit push r -> [--rsp]
 281         X64_pxor    = 0xC0EF0F4066000005LL, // 128bit xor xmm-r ^= xmm-b
 282         X64_ret     = 0xC300000000000001LL, // near return from called procedure
 283         X64_sete    = 0xC0940F4000000004LL, // set byte if equal (ZF == 1)
 284         X64_seto    = 0xC0900F4000000004LL, // set byte if overflow (OF == 1)
 285         X64_setc    = 0xC0920F4000000004LL, // set byte if carry (CF == 1)
 286         X64_setl    = 0xC09C0F4000000004LL, // set byte if less (int <) (SF != OF)
 287         X64_setle   = 0xC09E0F4000000004LL, // set byte if less or equal (int <=) (ZF == 1 || SF != OF)
 288         X64_setg    = 0xC09F0F4000000004LL, // set byte if greater (int >) (ZF == 0 && SF == OF)
 289         X64_setge   = 0xC09D0F4000000004LL, // set byte if greater or equal (int >=) (SF == OF)
 290         X64_seta    = 0xC0970F4000000004LL, // set byte if above (uint >) (CF == 0 && ZF == 0)
 291         X64_setae   = 0xC0930F4000000004LL, // set byte if above or equal (uint >=) (CF == 0)
 292         X64_setb    = 0xC0920F4000000004LL, // set byte if below (uint <) (CF == 1)
 293         X64_setbe   = 0xC0960F4000000004LL, // set byte if below or equal (uint <=) (ZF == 1 || CF == 1)
 294         X64_subsd   = 0xC05C0F40F2000005LL, // subtract scalar double r -= b
 295         X64_shl     = 0xE0D3400000000003LL, // 32bit left shift r <<= rcx
 296         X64_shlq    = 0xE0D3480000000003LL, // 64bit left shift r <<= rcx
 297         X64_shr     = 0xE8D3400000000003LL, // 32bit uint right shift r >>= rcx
 298         X64_shrq    = 0xE8D3480000000003LL, // 64bit uint right shift r >>= rcx
 299         X64_sar     = 0xF8D3400000000003LL, // 32bit int right shift r >>= rcx
 300         X64_sarq    = 0xF8D3480000000003LL, // 64bit int right shift r >>= rcx
 301         X64_shli    = 0x00E0C14000000004LL, // 32bit left shift r <<= imm8
 302         X64_shlqi   = 0x00E0C14800000004LL, // 64bit left shift r <<= imm8
 303         X64_sari    = 0x00F8C14000000004LL, // 32bit int right shift r >>= imm8
 304         X64_sarqi   = 0x00F8C14800000004LL, // 64bit int right shift r >>= imm8
 305         X64_shri    = 0x00E8C14000000004LL, // 32bit uint right shift r >>= imm8
 306         X64_shrqi   = 0x00E8C14800000004LL, // 64bit uint right shift r >>= imm8
 307         X64_subqrr  = 0xC02B480000000003LL, // 64bit sub r -= b
 308         X64_subrr   = 0xC02B400000000003LL, // 32bit sub r -= b
 309         X64_subqri  = 0xE881480000000003LL, // 64bit sub r -= int64(imm32)
 310         X64_subqr8  = 0x00E8834800000004LL, // 64bit sub r -= int64(imm8)
 311         X64_ucomisd = 0xC02E0F4066000005LL, // unordered compare scalar double
 312         X64_xorqrr  = 0xC033480000000003LL, // 64bit xor r &= b
 313         X64_xorrr   = 0xC033400000000003LL, // 32bit xor r &= b
 314         X64_xorpd   = 0xC0570F4066000005LL, // 128bit xor xmm (two packed doubles)
 315         X64_xorps   = 0xC0570F4000000004LL, // 128bit xor xmm (four packed singles), one byte shorter
 316         X64_xorpsm  = 0x05570F4000000004LL, // 128bit xor xmm, [rip+disp32]
 317         X64_xorpsa  = 0x2504570F40000005LL, // 128bit xor xmm, [disp32]
 318         X64_inclmRAX= 0x00FF000000000002LL, // incl (%rax)
 319         X64_jmpx    = 0xC524ff4000000004LL, // jmp [d32+x*8]
 320         X64_jmpxb   = 0xC024ff4000000004LL, // jmp [b+x*8]
 321
 322         X86_and8r   = 0xC022000000000002LL, // and rl,rh
 323         X86_sete    = 0xC0940F0000000003LL, // no-rex version of X64_sete
 324         X86_setnp   = 0xC09B0F0000000003LL  // no-rex set byte if odd parity (ordered fcmp result) (PF == 0)
 325     };
 326
 327     typedef uint32_t RegisterMask;
 328
 329     static const RegisterMask GpRegs = 0xffff;
 330     static const RegisterMask FpRegs = 0xffff0000;
 331 #ifdef _MSC_VER
 332     static const RegisterMask SavedRegs = 1<<RBX | 1<<RSI | 1<<RDI | 1<<R12 | 1<<R13 | 1<<R14 | 1<<R15;
 333     static const int NumSavedRegs = 7; // rbx, rsi, rdi, r12-15
 334     static const int NumArgRegs = 4;
 335 #else
 336     static const RegisterMask SavedRegs = 1<<RBX | 1<<R12 | 1<<R13 | 1<<R14 | 1<<R15;
 337     static const int NumSavedRegs = 5; // rbx, r12-15
 338     static const int NumArgRegs = 6;
 339 #endif
 340     // Warning:  when talking about single byte registers, RSP/RBP/RSI/RDI are
 341     // actually synonyms for AH/CH/DH/BH.  So this value means "any
 342     // single-byte GpReg except AH/CH/DH/BH".
 343     static const int SingleByteStoreRegs = GpRegs & ~(1<<RSP | 1<<RBP | 1<<RSI | 1<<RDI);
 344
 345     static inline bool IsFpReg(Register r) {
 346         return ((1<<r) & FpRegs) != 0;
 347     }
 348     static inline bool IsGpReg(Register r) {
 349         return ((1<<r) & GpRegs) != 0;
 350     }
 351
 352     verbose_only( extern const char* regNames[]; )
 353     verbose_only( extern const char* gpRegNames32[]; )
 354     verbose_only( extern const char* gpRegNames8[]; )
 355     verbose_only( extern const char* gpRegNames8hi[]; )
 356
 357     #define DECLARE_PLATFORM_STATS()
 358     #define DECLARE_PLATFORM_REGALLOC()
 359
 360     #define DECLARE_PLATFORM_ASSEMBLER()                                    \
 361         const static Register argRegs[NumArgRegs], retRegs[1];              \
 362         void underrunProtect(ptrdiff_t bytes);                              \
 363         void nativePageReset();                                             \
 364         void nativePageSetup();                                             \
 365         void asm_qbinop(LIns*);                                             \
 366         void MR(Register, Register);\
 367         void JMP(NIns*);\
 368         void JMPl(NIns*);\
 369         void emit(uint64_t op);\
 370         void emit8(uint64_t op, int64_t val);\
 371         void emit_target8(size_t underrun, uint64_t op, NIns* target);\
 372         void emit_target32(size_t underrun, uint64_t op, NIns* target);\
 373         void emit_target64(size_t underrun, uint64_t op, NIns* target); \
 374         void emitrr(uint64_t op, Register r, Register b);\
 375         void emitrxb(uint64_t op, Register r, Register x, Register b);\
 376         void emitxb(uint64_t op, Register x, Register b) { emitrxb(op, (Register)0, x, b); }\
 377         void emitrr8(uint64_t op, Register r, Register b);\
 378         void emitr(uint64_t op, Register b) { emitrr(op, (Register)0, b); }\
 379         void emitr8(uint64_t op, Register b) { emitrr8(op, (Register)0, b); }\
 380         void emitprr(uint64_t op, Register r, Register b);\
 381         void emitrm8(uint64_t op, Register r, int32_t d, Register b);\
 382         void emitrm(uint64_t op, Register r, int32_t d, Register b);\
 383         void emitrm_wide(uint64_t op, Register r, int32_t d, Register b);\
 384         uint64_t emit_disp32(uint64_t op, int32_t d);\
 385         void emitprm(uint64_t op, Register r, int32_t d, Register b);\
 386         void emitrr_imm(uint64_t op, Register r, Register b, int32_t imm);\
 387         void emitr_imm64(uint64_t op, Register r, uint64_t imm);\
 388         void emitrxb_imm(uint64_t op, Register r, Register x, Register b, int32_t imm);\
 389         void emitr_imm(uint64_t op, Register r, int32_t imm) { emitrr_imm(op, (Register)0, r, imm); }\
 390         void emitr_imm8(uint64_t op, Register b, int32_t imm8);\
 391         void emitxm_abs(uint64_t op, Register r, int32_t addr32);\
 392         void emitxm_rel(uint64_t op, Register r, NIns* addr64);\
 393         bool isTargetWithinS8(NIns* target);\
 394         bool isTargetWithinS32(NIns* target);\
 395         void asm_immi(Register r, int32_t v, bool canClobberCCs);\
 396         void asm_immq(Register r, uint64_t v, bool canClobberCCs);\
 397         void asm_immf(Register r, uint64_t v, bool canClobberCCs);\
 398         void asm_regarg(ArgType, LIns*, Register);\
 399         void asm_stkarg(ArgType, LIns*, int);\
 400         void asm_shift(LIns*);\
 401         void asm_shift_imm(LIns*);\
 402         void asm_arith_imm(LIns*);\
 403         void beginOp1Regs(LIns *ins, RegisterMask allow, Register &rr, Register &ra);\
 404         void beginOp2Regs(LIns *ins, RegisterMask allow, Register &rr, Register &ra, Register &rb);\
 405         void endOpRegs(LIns *ins, Register rr, Register ra);\
 406         void beginLoadRegs(LIns *ins, RegisterMask allow, Register &rr, int32_t &d, Register &rb);\
 407         void endLoadRegs(LIns *ins);\
 408         void dis(NIns *p, int bytes);\
 409         void asm_cmp(LIns*);\
 410         void asm_cmp_imm(LIns*);\
 411         void asm_fcmp(LIns*, LIns*);\
 412         NIns* asm_fbranch(bool, LIns*, NIns*);\
 413         void asm_div(LIns *ins);\
 414         void asm_div_mod(LIns *ins);\
 415         int max_stk_used;\
 416         void PUSHR(Register r);\
 417         void POPR(Register r);\
 418         void NOT(Register r);\
 419         void NEG(Register r);\
 420         void IDIV(Register r);\
 421         void SHR(Register r);\
 422         void SAR(Register r);\
 423         void SHL(Register r);\
 424         void SHRQ(Register r);\
 425         void SARQ(Register r);\
 426         void SHLQ(Register r);\
 427         void SHRI(Register r, int i);\
 428         void SARI(Register r, int i);\
 429         void SHLI(Register r, int i);\
 430         void SHRQI(Register r, int i);\
 431         void SARQI(Register r, int i);\
 432         void SHLQI(Register r, int i);\
 433         void SETE(Register r);\
 434         void SETL(Register r);\
 435         void SETLE(Register r);\
 436         void SETG(Register r);\
 437         void SETGE(Register r);\
 438         void SETB(Register r);\
 439         void SETBE(Register r);\
 440         void SETA(Register r);\
 441         void SETAE(Register r);\
 442         void SETO(Register r);\
 443         void ADDRR(Register l, Register r);\
 444         void SUBRR(Register l, Register r);\
 445         void ANDRR(Register l, Register r);\
 446         void ORLRR(Register l, Register r);\
 447         void XORRR(Register l, Register r);\
 448         void IMUL(Register l, Register r);\
 449         void CMPLR(Register l, Register r);\
 450         void MOVLR(Register l, Register r);\
 451         void ADDQRR(Register l, Register r);\
 452         void SUBQRR(Register l, Register r);\
 453         void ANDQRR(Register l, Register r);\
 454         void ORQRR(Register l, Register r);\
 455         void XORQRR(Register l, Register r);\
 456         void CMPQR(Register l, Register r);\
 457         void MOVQR(Register l, Register r);\
 458         void MOVAPSR(Register l, Register r);\
 459         void CMOVNO(Register l, Register r);\
 460         void CMOVNE(Register l, Register r);\
 461         void CMOVNL(Register l, Register r);\
 462         void CMOVNLE(Register l, Register r);\
 463         void CMOVNG(Register l, Register r);\
 464         void CMOVNGE(Register l, Register r);\
 465         void CMOVNB(Register l, Register r);\
 466         void CMOVNBE(Register l, Register r);\
 467         void CMOVNA(Register l, Register r);\
 468         void CMOVNAE(Register l, Register r);\
 469         void CMOVQNO(Register l, Register r);\
 470         void CMOVQNE(Register l, Register r);\
 471         void CMOVQNL(Register l, Register r);\
 472         void CMOVQNLE(Register l, Register r);\
 473         void CMOVQNG(Register l, Register r);\
 474         void CMOVQNGE(Register l, Register r);\
 475         void CMOVQNB(Register l, Register r);\
 476         void CMOVQNBE(Register l, Register r);\
 477         void CMOVQNA(Register l, Register r);\
 478         void CMOVQNAE(Register l, Register r);\
 479         void MOVSXDR(Register l, Register r);\
 480         void MOVZX8(Register l, Register r);\
 481         void XORPS(Register r);\
 482         void XORPS(Register l, Register r);\
 483         void DIVSD(Register l, Register r);\
 484         void MULSD(Register l, Register r);\
 485         void ADDSD(Register l, Register r);\
 486         void SUBSD(Register l, Register r);\
 487         void CVTSQ2SD(Register l, Register r);\
 488         void CVTSI2SD(Register l, Register r);\
 489         void CVTSS2SD(Register l, Register r);\
 490         void CVTSD2SS(Register l, Register r);\
 491         void CVTSD2SI(Register l, Register r);\
 492         void UCOMISD(Register l, Register r);\
 493         void MOVQRX(Register l, Register r);\
 494         void MOVQXR(Register l, Register r);\
 495         void MOVI(Register r, int32_t i32);\
 496         void ADDLRI(Register r, int32_t i32);\
 497         void SUBLRI(Register r, int32_t i32);\
 498         void ANDLRI(Register r, int32_t i32);\
 499         void ORLRI(Register r, int32_t i32);\
 500         void XORLRI(Register r, int32_t i32);\
 501         void CMPLRI(Register r, int32_t i32);\
 502         void ADDQRI(Register r, int32_t i32);\
 503         void SUBQRI(Register r, int32_t i32);\
 504         void ANDQRI(Register r, int32_t i32);\
 505         void ORQRI(Register r, int32_t i32);\
 506         void XORQRI(Register r, int32_t i32);\
 507         void CMPQRI(Register r, int32_t i32);\
 508         void MOVQI32(Register r, int32_t i32);\
 509         void ADDLR8(Register r, int32_t i8);\
 510         void SUBLR8(Register r, int32_t i8);\
 511         void ANDLR8(Register r, int32_t i8);\
 512         void ORLR8(Register r, int32_t i8);\
 513         void XORLR8(Register r, int32_t i8);\
 514         void CMPLR8(Register r, int32_t i8);\
 515         void ADDQR8(Register r, int32_t i8);\
 516         void SUBQR8(Register r, int32_t i8);\
 517         void ANDQR8(Register r, int32_t i8);\
 518         void ORQR8(Register r, int32_t i8);\
 519         void XORQR8(Register r, int32_t i8);\
 520         void CMPQR8(Register r, int32_t i8);\
 521         void IMULI(Register l, Register r, int32_t i32);\
 522         void MOVQI(Register r, uint64_t u64);\
 523         void LEARIP(Register r, int32_t d);\
 524         void LEAQRM(Register r, int d, Register b);\
 525         void MOVLRM(Register r, int d, Register b);\
 526         void MOVQRM(Register r, int d, Register b);\
 527         void MOVBMR(Register r, int d, Register b);\
 528         void MOVSMR(Register r, int d, Register b);\
 529         void MOVLMR(Register r, int d, Register b);\
 530         void MOVQMR(Register r, int d, Register b);\
 531         void MOVZX8M(Register r, int d, Register b);\
 532         void MOVZX16M(Register r, int d, Register b);\
 533         void MOVSX8M(Register r, int d, Register b);\
 534         void MOVSX16M(Register r, int d, Register b);\
 535         void MOVSDRM(Register r, int d, Register b);\
 536         void MOVSDMR(Register r, int d, Register b);\
 537         void MOVSSMR(Register r, int d, Register b);\
 538         void MOVSSRM(Register r, int d, Register b);\
 539         void JMP8(size_t n, NIns* t);\
 540         void JMP32(size_t n, NIns* t);\
 541         void JMP64(size_t n, NIns* t);\
 542         void JMPX(Register indexreg, NIns** table);\
 543         void JMPXB(Register indexreg, Register tablereg);\
 544         void JO(size_t n, NIns* t);\
 545         void JE(size_t n, NIns* t);\
 546         void JL(size_t n, NIns* t);\
 547         void JLE(size_t n, NIns* t);\
 548         void JG(size_t n, NIns* t);\
 549         void JGE(size_t n, NIns* t);\
 550         void JB(size_t n, NIns* t);\
 551         void JBE(size_t n, NIns* t);\
 552         void JA(size_t n, NIns* t);\
 553         void JAE(size_t n, NIns* t);\
 554         void JP(size_t n, NIns* t);\
 555         void JNO(size_t n, NIns* t);\
 556         void JNE(size_t n, NIns* t);\
 557         void JNL(size_t n, NIns* t);\
 558         void JNLE(size_t n, NIns* t);\
 559         void JNG(size_t n, NIns* t);\
 560         void JNGE(size_t n, NIns* t);\
 561         void JNB(size_t n, NIns* t);\
 562         void JNBE(size_t n, NIns* t);\
 563         void JNA(size_t n, NIns* t);\
 564         void JNAE(size_t n, NIns* t);\
 565         void JO8(size_t n, NIns* t);\
 566         void JE8(size_t n, NIns* t);\
 567         void JL8(size_t n, NIns* t);\
 568         void JLE8(size_t n, NIns* t);\
 569         void JG8(size_t n, NIns* t);\
 570         void JGE8(size_t n, NIns* t);\
 571         void JB8(size_t n, NIns* t);\
 572         void JBE8(size_t n, NIns* t);\
 573         void JA8(size_t n, NIns* t);\
 574         void JAE8(size_t n, NIns* t);\
 575         void JP8(size_t n, NIns* t);\
 576         void JNO8(size_t n, NIns* t);\
 577         void JNE8(size_t n, NIns* t);\
 578         void JNL8(size_t n, NIns* t);\
 579         void JNLE8(size_t n, NIns* t);\
 580         void JNG8(size_t n, NIns* t);\
 581         void JNGE8(size_t n, NIns* t);\
 582         void JNB8(size_t n, NIns* t);\
 583         void JNBE8(size_t n, NIns* t);\
 584         void JNA8(size_t n, NIns* t);\
 585         void JNAE8(size_t n, NIns* t);\
 586         void CALL(size_t n, NIns* t);\
 587         void CALLRAX();\
 588         void RET();\
 589         void MOVQSPR(int d, Register r);\
 590         void XORPSA(Register r, int32_t i32);\
 591         void XORPSM(Register r, NIns* a64);\
 592         void X86_AND8R(Register r);\
 593         void X86_SETNP(Register r);\
 594         void X86_SETE(Register r);\
 595
 596     const int LARGEST_UNDERRUN_PROT = 32;  // largest value passed to underrunProtect
 597
 598     typedef uint8_t NIns;
 599
 600     // Bytes of icache to flush after Assembler::patch
 601     const size_t LARGEST_BRANCH_PATCH = 16 * sizeof(NIns);
 602
 603 } // namespace nanojit
 604
 605 #endif // __nanojit_NativeX64__