1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
16 * The Original Code is [Open Source Virtual Machine].
18 * The Initial Developer of the Original Code is
19 * Adobe System Incorporated.
20 * Portions created by the Initial Developer are Copyright (C) 2008
21 * the Initial Developer. All Rights Reserved.
26 * Alternatively, the contents of this file may be used under the terms of
27 * either the GNU General Public License Version 2 or later (the "GPL"), or
28 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
40 #ifndef __nanojit_NativeX64__
41 #define __nanojit_NativeX64__
44 #error "NANOJIT_64BIT must be defined for X64 backend"
49 #include "../vprof/vprof.h"
50 #define count_instr() _nvprof("x64",1)
51 #define count_prolog() _nvprof("x64-prolog",1); count_instr();
52 #define count_imt() _nvprof("x64-imt",1) count_instr()
55 #define count_prolog()
61 const int NJ_LOG2_PAGE_SIZE
= 12; // 4K
62 #define NJ_MAX_STACK_ENTRY 256
63 #define NJ_ALIGN_STACK 16
66 RAX
= 0, // 1st int return, # of sse varargs
67 RCX
= 1, // 4th int arg
68 RDX
= 2, // 3rd int arg 2nd return
71 RBP
= 5, // frame ptr, saved, sib reqd
72 RSI
= 6, // 2nd int arg
73 RDI
= 7, // 1st int arg
74 R8
= 8, // 5th int arg
75 R9
= 9, // 6th int arg
79 R13
= 13, // saved, sib reqd like rbp
83 XMM0
= 16, // 1st double arg, return
84 XMM1
= 17, // 2nd double arg, return
85 XMM2
= 18, // 3rd double arg
86 XMM3
= 19, // 4th double arg
87 XMM4
= 20, // 5th double arg
88 XMM5
= 21, // 6th double arg
89 XMM6
= 22, // 7th double arg
90 XMM7
= 23, // 8th double arg
93 XMM10
= 26, // scratch
94 XMM11
= 27, // scratch
95 XMM12
= 28, // scratch
96 XMM13
= 29, // scratch
97 XMM14
= 30, // scratch
98 XMM15
= 31, // scratch
107 * Micro-templating variable-length opcodes, idea first
108 * describe by Mike Pall of Luajit.
110 * X86-64 opcode encodings: LSB encodes the length of the
111 * opcode in bytes, remaining bytes are encoded as 1-7 bytes
112 * in a single uint64_t value. The value is written as a single
113 * store into the code stream, and the code pointer is decremented
114 * by the length. each successive instruction partially overlaps
117 * emit methods below are able to encode mod/rm, sib, rex, and
118 * register and small immediate values into these opcode values
119 * without much branchy code.
121 * these opcodes encapsulate all the const parts of the instruction.
122 * for example, the alu-immediate opcodes (add, sub, etc) encode
123 * part of their opcode in the R field of the mod/rm byte; this
124 * hardcoded value is in the constant below, and the R argument
125 * to emitrr() is 0. In a few cases, a whole instruction is encoded
126 * this way (eg callrax).
128 * when a disp32, imm32, or imm64 suffix can't fit in an 8-byte
129 * opcode, then it is written into the code separately and not counted
130 * in the opcode length.
134 #if defined(_MSC_VER) && _MSC_VER >= 1400
135 #pragma warning(disable:4480) // nonstandard extension used: specifying underlying type for enum
139 // 64bit opcode constants
141 X64_addqrr
= 0xC003480000000003LL
, // 64bit add r += b
142 X64_addqri
= 0xC081480000000003LL
, // 64bit add r += int64(imm32)
143 X64_addqr8
= 0x00C0834800000004LL
, // 64bit add r += int64(imm8)
144 X64_andqri
= 0xE081480000000003LL
, // 64bit and r &= int64(imm32)
145 X64_andqr8
= 0x00E0834800000004LL
, // 64bit and r &= int64(imm8)
146 X64_orqri
= 0xC881480000000003LL
, // 64bit or r |= int64(imm32)
147 X64_orqr8
= 0x00C8834800000004LL
, // 64bit or r |= int64(imm8)
148 X64_xorqri
= 0xF081480000000003LL
, // 64bit xor r ^= int64(imm32)
149 X64_xorqr8
= 0x00F0834800000004LL
, // 64bit xor r ^= int64(imm8)
150 X64_addlri
= 0xC081400000000003LL
, // 32bit add r += imm32
151 X64_addlr8
= 0x00C0834000000004LL
, // 32bit add r += imm8
152 X64_andlri
= 0xE081400000000003LL
, // 32bit and r &= imm32
153 X64_andlr8
= 0x00E0834000000004LL
, // 32bit and r &= imm8
154 X64_orlri
= 0xC881400000000003LL
, // 32bit or r |= imm32
155 X64_orlr8
= 0x00C8834000000004LL
, // 32bit or r |= imm8
156 X64_sublri
= 0xE881400000000003LL
, // 32bit sub r -= imm32
157 X64_sublr8
= 0x00E8834000000004LL
, // 32bit sub r -= imm8
158 X64_xorlri
= 0xF081400000000003LL
, // 32bit xor r ^= imm32
159 X64_xorlr8
= 0x00F0834000000004LL
, // 32bit xor r ^= imm8
160 X64_addrr
= 0xC003400000000003LL
, // 32bit add r += b
161 X64_andqrr
= 0xC023480000000003LL
, // 64bit and r &= b
162 X64_andrr
= 0xC023400000000003LL
, // 32bit and r &= b
163 X64_call
= 0x00000000E8000005LL
, // near call
164 X64_callrax
= 0xD0FF000000000002LL
, // indirect call to addr in rax (no REX)
165 X64_cmovqne
= 0xC0450F4800000004LL
, // 64bit conditional mov if (c) r = b
166 X64_cmplr
= 0xC03B400000000003LL
, // 32bit compare r,b
167 X64_cmpqr
= 0xC03B480000000003LL
, // 64bit compare r,b
168 X64_cmplri
= 0xF881400000000003LL
, // 32bit compare r,imm32
169 X64_cmpqri
= 0xF881480000000003LL
, // 64bit compare r,int64(imm32)
170 X64_cmplr8
= 0x00F8834000000004LL
, // 32bit compare r,imm8
171 X64_cmpqr8
= 0x00F8834800000004LL
, // 64bit compare r,int64(imm8)
172 X64_cvtsi2sd
= 0xC02A0F40F2000005LL
, // convert int32 to double r = (double) b
173 X64_cvtsq2sd
= 0xC02A0F48F2000005LL
, // convert int64 to double r = (double) b
174 X64_divsd
= 0xC05E0F40F2000005LL
, // divide scalar double r /= b
175 X64_mulsd
= 0xC0590F40F2000005LL
, // multiply scalar double r *= b
176 X64_addsd
= 0xC0580F40F2000005LL
, // add scalar double r += b
177 X64_imul
= 0xC0AF0F4000000004LL
, // 32bit signed mul r *= b
178 X64_imuli
= 0xC069400000000003LL
, // 32bit signed mul r = b * imm32
179 X64_imul8
= 0x00C06B4000000004LL
, // 32bit signed mul r = b * imm8
180 X64_jmp
= 0x00000000E9000005LL
, // jump near rel32
181 X64_jmp8
= 0x00EB000000000002LL
, // jump near rel8
182 X64_jb
= 0x00000000820F0006LL
, // jump near if below (uint <)
183 X64_jae
= 0x00000000830F0006LL
, // jump near if above or equal (uint >=)
184 X64_ja
= 0x00000000870F0006LL
, // jump near if above (uint >)
185 X64_jbe
= 0x00000000860F0006LL
, // jump near if below or equal (uint <=)
186 X64_je
= 0x00000000840F0006LL
, // near jump if equal
187 X64_jne
= 0x00000000850F0006LL
, // jump near if not equal
188 X64_jl
= 0x000000008C0F0006LL
, // jump near if less (int <)
189 X64_jge
= 0x000000008D0F0006LL
, // jump near if greater or equal (int >=)
190 X64_jg
= 0x000000008F0F0006LL
, // jump near if greater (int >)
191 X64_jle
= 0x000000008E0F0006LL
, // jump near if less or equal (int <=)
192 X64_jp
= 0x000000008A0F0006LL
, // jump near if parity (PF == 1)
193 X64_jnp
= 0x000000008B0F0006LL
, // jump near if not parity (PF == 0)
194 X64_jneg
= 0x0000000001000000LL
, // xor with this mask to negate the condition
195 X64_jb8
= 0x0072000000000002LL
, // jump near if below (uint <)
196 X64_jae8
= 0x0073000000000002LL
, // jump near if above or equal (uint >=)
197 X64_ja8
= 0x0077000000000002LL
, // jump near if above (uint >)
198 X64_jbe8
= 0x0076000000000002LL
, // jump near if below or equal (uint <=)
199 X64_je8
= 0x0074000000000002LL
, // near jump if equal
200 X64_jne8
= 0x0075000000000002LL
, // jump near if not equal
201 X64_jl8
= 0x007C000000000002LL
, // jump near if less (int <)
202 X64_jge8
= 0x007D000000000002LL
, // jump near if greater or equal (int >=)
203 X64_jg8
= 0x007F000000000002LL
, // jump near if greater (int >)
204 X64_jle8
= 0x007E000000000002LL
, // jump near if less or equal (int <=)
205 X64_jp8
= 0x007A000000000002LL
, // jump near if parity (PF == 1)
206 X64_jnp8
= 0x007B000000000002LL
, // jump near if not parity (PF == 0)
207 X64_jneg8
= 0x0001000000000000LL
, // xor with this mask to negate the condition
208 X64_leaqrm
= 0x00000000808D4807LL
, // 64bit load effective addr reg <- disp32+base
209 X64_learm
= 0x00000000808D4007LL
, // 32bit load effective addr reg <- disp32+base
210 X64_movlr
= 0xC08B400000000003LL
, // 32bit mov r <- b
211 X64_movlmr
= 0x0000000080894007LL
, // 32bit store r -> [b+d32]
212 X64_movlrm
= 0x00000000808B4007LL
, // 32bit load r <- [b+d32]
213 X64_movqmr
= 0x0000000080894807LL
, // 64bit store gpr -> [b+d32]
214 X64_movqspr
= 0x0024448948000005LL
, // 64bit store gpr -> [rsp+d32] (sib required)
215 X64_movqr
= 0xC08B480000000003LL
, // 64bit mov r <- b
216 X64_movqi
= 0xB848000000000002LL
, // 64bit mov r <- imm64
217 X64_movi
= 0xB840000000000002LL
, // 32bit mov r <- imm32
218 X64_movqi32
= 0xC0C7480000000003LL
, // 64bit mov r <- int64(imm32)
219 X64_movapsr
= 0xC0280F4000000004LL
, // 128bit mov xmm <- xmm
220 X64_movqrx
= 0xC07E0F4866000005LL
, // 64bit mov b <- xmm-r
221 X64_movqxr
= 0xC06E0F4866000005LL
, // 64bit mov b -> xmm-r
222 X64_movqrm
= 0x00000000808B4807LL
, // 64bit load r <- [b+d32]
223 X64_movsdrr
= 0xC0100F40F2000005LL
, // 64bit mov xmm-r <- xmm-b (upper 64bits unchanged)
224 X64_movsdrm
= 0x80100F40F2000005LL
, // 64bit load xmm-r <- [b+d32] (upper 64 cleared)
225 X64_movsdmr
= 0x80110F40F2000005LL
, // 64bit store xmm-r -> [b+d32]
226 X64_movsxdr
= 0xC063480000000003LL
, // sign extend i32 to i64 r = (int64)(int32) b
227 X64_movzx8
= 0xC0B60F4000000004LL
, // zero extend i8 to i64 r = (uint64)(uint8) b
228 X64_movzx8m
= 0x80B60F4000000004LL
, // zero extend i8 load to i32 r <- [b+d32]
229 X64_movzx16m
= 0x80B70F4000000004LL
, // zero extend i16 load to i32 r <- [b+d32]
230 X64_neg
= 0xD8F7400000000003LL
, // 32bit two's compliment b = -b
231 X64_nop1
= 0x9000000000000001LL
, // one byte NOP
232 X64_nop2
= 0x9066000000000002LL
, // two byte NOP
233 X64_nop3
= 0x001F0F0000000003LL
, // three byte NOP
234 X64_nop4
= 0x00401F0F00000004LL
, // four byte NOP
235 X64_nop5
= 0x0000441F0F000005LL
, // five byte NOP
236 X64_nop6
= 0x0000441F0F660006LL
, // six byte NOP
237 X64_nop7
= 0x00000000801F0F07LL
, // seven byte NOP
238 X64_not
= 0xD0F7400000000003LL
, // 32bit ones compliment b = ~b
239 X64_orlrr
= 0xC00B400000000003LL
, // 32bit or r |= b
240 X64_orqrr
= 0xC00B480000000003LL
, // 64bit or r |= b
241 X64_popr
= 0x5840000000000002LL
, // 64bit pop r <- [rsp++]
242 X64_pushr
= 0x5040000000000002LL
, // 64bit push r -> [--rsp]
243 X64_pxor
= 0xC0EF0F4066000005LL
, // 128bit xor xmm-r ^= xmm-b
244 X64_ret
= 0xC300000000000001LL
, // near return from called procedure
245 X64_sete
= 0xC0940F4000000004LL
, // set byte if equal (ZF == 1)
246 X64_seto
= 0xC0900F4000000004LL
, // set byte if overflow (OF == 1)
247 X64_setc
= 0xC0920F4000000004LL
, // set byte if carry (CF == 1)
248 X64_setl
= 0xC09C0F4000000004LL
, // set byte if less (int <) (SF != OF)
249 X64_setle
= 0xC09E0F4000000004LL
, // set byte if less or equal (int <=) (ZF == 1 || SF != OF)
250 X64_setg
= 0xC09F0F4000000004LL
, // set byte if greater (int >) (ZF == 0 && SF == OF)
251 X64_setge
= 0xC09D0F4000000004LL
, // set byte if greater or equal (int >=) (SF == OF)
252 X64_seta
= 0xC0970F4000000004LL
, // set byte if above (uint >) (CF == 0 && ZF == 0)
253 X64_setae
= 0xC0930F4000000004LL
, // set byte if above or equal (uint >=) (CF == 0)
254 X64_setb
= 0xC0920F4000000004LL
, // set byte if below (uint <) (CF == 1)
255 X64_setbe
= 0xC0960F4000000004LL
, // set byte if below or equal (uint <=) (ZF == 1 || CF == 1)
256 X64_subsd
= 0xC05C0F40F2000005LL
, // subtract scalar double r -= b
257 X64_shl
= 0xE0D3400000000003LL
, // 32bit left shift r <<= rcx
258 X64_shlq
= 0xE0D3480000000003LL
, // 64bit left shift r <<= rcx
259 X64_shr
= 0xE8D3400000000003LL
, // 32bit uint right shift r >>= rcx
260 X64_shrq
= 0xE8D3480000000003LL
, // 64bit uint right shift r >>= rcx
261 X64_sar
= 0xF8D3400000000003LL
, // 32bit int right shift r >>= rcx
262 X64_sarq
= 0xF8D3480000000003LL
, // 64bit int right shift r >>= rcx
263 X64_shli
= 0x00E0C14000000004LL
, // 32bit left shift r <<= imm8
264 X64_shlqi
= 0x00E0C14800000004LL
, // 64bit left shift r <<= imm8
265 X64_sari
= 0x00F8C14000000004LL
, // 32bit int right shift r >>= imm8
266 X64_sarqi
= 0x00F8C14800000004LL
, // 64bit int right shift r >>= imm8
267 X64_shri
= 0x00E8C14000000004LL
, // 32bit uint right shift r >>= imm8
268 X64_shrqi
= 0x00E8C14800000004LL
, // 64bit uint right shift r >>= imm8
269 X64_subqrr
= 0xC02B480000000003LL
, // 64bit sub r -= b
270 X64_subrr
= 0xC02B400000000003LL
, // 32bit sub r -= b
271 X64_subqri
= 0xE881480000000003LL
, // 64bit sub r -= int64(imm32)
272 X64_subqr8
= 0x00E8834800000004LL
, // 64bit sub r -= int64(imm8)
273 X64_ucomisd
= 0xC02E0F4066000005LL
, // unordered compare scalar double
274 X64_xorqrr
= 0xC033480000000003LL
, // 64bit xor r &= b
275 X64_xorrr
= 0xC033400000000003LL
, // 32bit xor r &= b
276 X64_xorpd
= 0xC0570F4066000005LL
, // 128bit xor xmm (two packed doubles)
277 X64_xorps
= 0xC0570F4000000004LL
, // 128bit xor xmm (four packed singles), one byte shorter
278 X64_xorpsm
= 0x05570F4000000004LL
, // 128bit xor xmm, [rip+disp32]
279 X64_xorpsa
= 0x2504570F40000005LL
, // 128bit xor xmm, [disp32]
281 X86_and8r
= 0xC022000000000002LL
, // and rl,rh
282 X86_sete
= 0xC0940F0000000003LL
, // no-rex version of X64_sete
283 X86_setnp
= 0xC09B0F0000000003LL
// no-rex set byte if odd parity (ordered fcmp result) (PF == 0)
286 typedef uint32_t RegisterMask
;
288 static const RegisterMask GpRegs
= 0xffff;
289 static const RegisterMask FpRegs
= 0xffff0000;
290 static const bool CalleeRegsNeedExplicitSaving
= true;
292 static const RegisterMask SavedRegs
= 1<<RBX
| 1<<RSI
| 1<<RDI
| 1<<R12
| 1<<R13
| 1<<R14
| 1<<R15
;
293 static const int NumSavedRegs
= 7; // rbx, rsi, rdi, r12-15
294 static const int NumArgRegs
= 4;
296 static const RegisterMask SavedRegs
= 1<<RBX
| 1<<R12
| 1<<R13
| 1<<R14
| 1<<R15
;
297 static const int NumSavedRegs
= 5; // rbx, r12-15
298 static const int NumArgRegs
= 6;
301 static inline bool IsFpReg(Register r
) {
302 return ((1<<r
) & FpRegs
) != 0;
304 static inline bool IsGpReg(Register r
) {
305 return ((1<<r
) & GpRegs
) != 0;
308 verbose_only( extern const char* regNames
[]; )
310 #define DECLARE_PLATFORM_STATS()
311 #define DECLARE_PLATFORM_REGALLOC()
313 #define DECLARE_PLATFORM_ASSEMBLER() \
314 const static Register argRegs[NumArgRegs], retRegs[1]; \
315 void underrunProtect(ptrdiff_t bytes); \
316 void nativePageReset(); \
317 void nativePageSetup(); \
318 void asm_qbinop(LIns*); \
319 void MR(Register, Register);\
322 void emit(uint64_t op);\
323 void emit8(uint64_t op, int64_t val);\
324 void emit32(uint64_t op, int64_t val);\
325 void emitrr(uint64_t op, Register r, Register b);\
326 void emitrr8(uint64_t op, Register r, Register b);\
327 void emitr(uint64_t op, Register b) { emitrr(op, (Register)0, b); }\
328 void emitr8(uint64_t op, Register b) { emitrr8(op, (Register)0, b); }\
329 void emitprr(uint64_t op, Register r, Register b);\
330 void emitrm(uint64_t op, Register r, int32_t d, Register b);\
331 void emitrm_wide(uint64_t op, Register r, int32_t d, Register b);\
332 uint64_t emit_disp32(uint64_t op, int32_t d);\
333 void emitprm(uint64_t op, Register r, int32_t d, Register b);\
334 void emitrr_imm(uint64_t op, Register r, Register b, int32_t imm);\
335 void emitr_imm(uint64_t op, Register r, int32_t imm) { emitrr_imm(op, (Register)0, r, imm); }\
336 void emitr_imm8(uint64_t op, Register b, int32_t imm8);\
337 void emit_int(Register r, int32_t v);\
338 void emit_quad(Register r, uint64_t v);\
339 void asm_regarg(ArgSize, LIns*, Register);\
340 void asm_stkarg(ArgSize, LIns*, int);\
341 void asm_shift(LIns*);\
342 void asm_shift_imm(LIns*);\
343 void asm_arith_imm(LIns*);\
344 void regalloc_unary(LIns *ins, RegisterMask allow, Register &rr, Register &ra);\
345 void regalloc_binary(LIns *ins, RegisterMask allow, Register &rr, Register &ra, Register &rb);\
346 void regalloc_load(LIns *ins, Register &rr, int32_t &d, Register &rb);\
347 void dis(NIns *p, int bytes);\
348 void asm_cmp(LIns*);\
349 void asm_cmp_imm(LIns*);\
350 void fcmp(LIns*, LIns*);\
351 NIns* asm_fbranch(bool, LIns*, NIns*);\
354 #define swapptrs() { NIns* _tins = _nIns; _nIns=_nExitIns; _nExitIns=_tins; }
356 const int LARGEST_UNDERRUN_PROT
= 32; // largest value passed to underrunProtect
358 typedef uint8_t NIns
;
360 inline Register
nextreg(Register r
) {
361 return Register(r
+1);
364 } // namespace nanojit
366 #endif // __nanojit_NativeX64__