tagged release 0.6.4
[parrot.git] / src / jit / amd64 / jit_emit.h
blobccfe1c9f6e392d2108561a176b106873392fb558
1 /*
2 * Copyright (C) 2007-2008, The Perl Foundation.
3 */
5 /*
6 * jit_emit.h
8 * amd64
10 * $Id$
12 =head1 NAME
14 src/jit/amd64/jit_emit.h - AMD64 JIT Generation
16 =head1 DESCRIPTION
18 Provide the support for a JIT on the AMD64 architecture.
20 =cut
24 #ifndef PARROT_JIT_AMD64_JIT_EMIT_H_GUARD
25 #define PARROT_JIT_AMD64_JIT_EMIT_H_GUARD
29 hex to binary converter
30 perl -ne '@a=split;push@b,unpack"B*",chr hex foreach@a;print"@b\n";@b=()'
32 src/jit/amd64/jit_emit.h copied to src/jit_emit.h
33 src/jit/amd64/exec_dep.h copied to src/exec_dep.h
34 src/jit/amd64/core.jit used to build src/jit_cpu.h
35 src/jit/amd64/core.jit used to build src/exec_cpu.h
37 src/exec_start.c #define JIT_EMIT 1
38 src/exec.c #define JIT_EMIT 1
39 src/jit.c #define JIT_EMIT 0
40 src/jit_cpu.c #define JIT_EMIT 2
42 REX Byte
43 0100 wrxb
44 w => make 64 bits
45 r => use upper 8 registers for REG
46 x => high bit of index in SIB
47 b => use upper 8 registers for r/m, and for the base in SIB
49 ModRM
50 mm REG R/m
51 mm => mode
52 00 no displacement
53 01 disp8
54 10 disp16/32
55 11 only regs
56 xxx/yyy
57 0 EAX AX AL SS0 MM0
58 1 ECX CX CL SS1 MM1
59 2 EDX DX DL SS2 MM2
60 3 EBX BX BL SS3 MM3
61 4 ESP SP AH SS4 MM4
62 5 EBP BP CH SS5 MM5
63 6 ESI SI DH SS6 MM6
64 7 EDI DI BH SS7 MM7
66 SIB
67 ssiiibbb
68 scale
69 index
70 base
71 [index * 2**scale + base]
74 For calling functions use: RDI, RSI, RDX, RCX, R8, and R9
75 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, and XMM7
76 For varargs, RAX is used for the number of SSE regs
77 Extras are pushed onto the stack
78 The return is in RAX
80 R11 is a scratch register, not preserved, not used in calling
81 R10 is used for a static chain pointer
83 RBP, RBX, and R12->R15 are preserved
87 #include <unistd.h>
88 #include <limits.h>
90 void Parrot_jit_begin(Parrot_jit_info_t *, Interp *);
91 static const char div_by_zero[] = "Divide by zero";
92 static const int mxcsr = 0x7fa0; /* Add 6000 to mxcsr */
95 /* This is used for testing whether or not keeping these two in registers is an
96 * improvement or not. This file may need to be expanded further to know for
97 * sure. But so far, there appears a 20 second improvement on my 2GHz. */
98 #undef USE_OP_MAP_AND_CODE_START
102 * define all the available cpu registers
103 * reserve some for special purposes
105 typedef enum {
106 RAX,
107 ISR2 = RAX, /* Not like it's good for anything else */
108 RCX,
109 RDX,
110 RBX,
111 RSP,
112 RBP,
113 RSI,
114 RDI,
118 R10,
119 R11,
120 ISR1 = R11,
121 R12,
122 R13,
123 #ifdef USE_OP_MAP_AND_CODE_START
124 CODE_START = R13,
125 #endif
126 R14,
127 #ifdef USE_OP_MAP_AND_CODE_START
128 OP_MAP = R14,
129 #endif
130 R15,
131 INTERP = R15
132 } amd64_iregister_t;
137 =head2 Register usage
139 =over 4
141 =item RAX
143 Return values, second scratch
145 =item RCX
147 Allocated, unpreserved
149 =item RDX
151 Allocated, unpreserved
153 =item RBX
155 Parrot register frame pointer
157 =item RSP
159 Stack pointer
161 =item RBP
163 Base pointer
165 =item RSI
167 Allocated, unpreserved
169 =item RDI
171 Allocated, unpreserved
173 =item R8
175 Allocated, unpreserved
177 =item R9
179 Allocated, unpreserved
181 =item R10
183 Allocated, unpreserved
185 =item R11
187 Scratch
189 =item R12
191 Allocated, preserved
193 =item R13
195 Allocated, preserved, or code_start
197 =item R14
199 Allocated, preserved, or op_map
201 =item R15
203 Interp
205 =item XMM0
207 Scratch
209 =item XMM1-XMM15
211 Allocated, all unpreserved
213 =back
215 =cut
220 #define Parrot_jit_emit_get_base_reg_no(pc) RBX
222 typedef enum {
223 XMM0,
224 FSR1 = XMM0,
225 XMM1,
226 FSR2 = XMM1,
227 XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
228 XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15
229 } amd64_fregister_t;
232 /* Call can be handled without a fixup */
233 enum { JIT_AMD64BRANCH, JIT_AMD64JUMP, JIT_AMD64CALL };
237 * now define macros for all possible (and implemented) operations
239 * Parrot defines JIT_EMIT to 1 or 2, when this file is included in
240 * exec_cpu.c or jit_cpu.c
245 =head2 Macros
247 Most of the functionality is provided by macros instead of functions.
249 =over 4
251 =cut
255 #define b00 0
256 #define b01 1
257 #define b10 2
258 #define b11 3
260 #define b000 0
261 #define b001 1
262 #define b010 2
263 #define b011 3
264 #define b100 4
265 #define b101 5
266 #define b110 6
267 #define b111 7
269 /* rex.[wrxb], incomplete but oh well */
272 =item C<emit_rex64(pc, reg, rm)>
274 The REX prefix, setting REX.W making the instruction 64 bit.
276 =item C<emit_rex(pc, reg, rm)>
278 The REX prefix, only emitted if using an extended register.
280 =cut
283 # define emit_rex64(pc, reg, rm) \
284 *((pc)++) = (char)(0x48 | (((reg) & 8) >> 1) | (((rm) & 8) >> 3))
286 # define emit_rex(pc, dst, src) { \
287 if ((dst) & 8 || (src) & 8) \
288 *((pc)++) = (char)(0x40 | (((dst) & 8) >> 1) | (((src) & 8) >> 3)); }
290 /* Use a 0x66 prefix for increased padding */
291 # define emit_nop(pc) { \
292 *((pc)++) = (char)(0x90); }
294 # define emit_modrm(pc, mod, dst, src) { \
295 *((pc)++) = (char)(((mod) << 6) | (((dst) & 7) << 3) | ((src) & 7)); }
297 # define emit_sib(pc, scale, index, base) { \
298 *((pc)++) = (char)(((scale) << 6) | (((index) & 7) << 3) | ((base) & 7)); }
300 /* 0xXX +rq */
301 # define emit_op_r(op, pc, reg) { \
302 emit_rex64((pc), 0x0, (reg)); \
303 *((pc)++) = (char)((op) | ((reg) & 7)); \
306 # define emit_64op_r(op, pc, reg) { \
307 emit_rex((pc), 0x0, (reg)); \
308 *((pc)++) = (char)((op) | ((reg) & 7)); \
311 /* 0xXX /r */
312 # define emit_op_r_r(op, pc, dst, src) { \
313 emit_rex64((pc), (dst), (src)); \
314 *((pc)++) = (char) (op); \
315 emit_modrm((pc), b11, (dst), (src)); \
318 # define emit_op_r_mr(op, pc, dst, src, disp) { \
319 emit_rex64((pc), (dst), (src)); \
320 *((pc)++) = (char) (op); \
321 if ((disp) == 0) { \
322 emit_modrm((pc), b00, (dst), (src)); \
324 else if (is8bit(disp)) { \
325 emit_modrm((pc), b01, (dst), (src)); \
326 *((pc)++) = (char)(disp); \
328 else { \
329 emit_modrm((pc), b10, (dst), (src)); \
330 *(int *)(pc) = (int)(disp); \
331 (pc) += 4; \
335 # define emit_op_i(op, pc, imm) { \
336 *((pc)++) = (char)(op); \
337 *(int *)(pc) = (int)(imm); \
338 (pc) += 4; \
341 # define emit_op_r_i(pc, op, op2, code, dst, imm) { \
342 emit_rex64((pc), 0x0, (dst)); \
343 if (is8bit(imm)) { \
344 *((pc)++) = (char) (op); \
345 emit_modrm((pc), b11, (code), (dst)); \
346 *((pc)++) = (char)(imm); \
348 else { \
349 *((pc)++) = (char) (op2); \
350 emit_modrm((pc), b11, (code), (dst)); \
351 *(int *)(pc) = (int)(imm); \
352 (pc) += 4; \
356 # define emit_op_mr_i(pc, op, op2, code, dst, disp, imm) { \
357 emit_rex64((pc), 0x0, (dst)); \
358 if (is8bit(imm)) { \
359 *((pc)++) = (char) (op); \
360 if ((disp) == 0) { \
361 emit_modrm((pc), b00, (code), (dst)); \
363 else if (is8bit(disp)) { \
364 emit_modrm((pc), b01, (code), (dst)); \
365 *((pc)++) = (char)(disp); \
367 else { \
368 emit_modrm((pc), b10, (code), (dst)); \
369 *(int *)(pc) = (int)(disp); \
370 (pc) += 4; \
372 *((pc)++) = (char)(imm); \
374 else { \
375 *((pc)++) = (char) (op2); \
376 if ((disp) == 0) { \
377 emit_modrm((pc), b00, (code), (dst)); \
379 else if (is8bit(disp)) { \
380 emit_modrm((pc), b01, (code), (dst)); \
381 *((pc)++) = (char)(disp); \
383 else { \
384 emit_modrm((pc), b10, (code), (dst)); \
385 *(int *)(pc) = (int)(disp); \
386 (pc) += 4; \
388 *(int *)(pc) = (int)(imm); \
389 (pc) += 4; \
393 /* Test for zero, then call this, and it'll throw a real_exception if you try
394 * to divide by zero */
395 # define emit_div_check_zero(pc) { \
396 char *sav_ptr; \
397 emit_jcc((pc), jcc_jnz, 0x00); \
398 sav_ptr = (char *)((pc) - 1); \
399 emit_mov_r_r((pc), RDI, INTERP); \
400 emit_mov_r_i((pc), RSI, 0); \
401 emit_mov_r_i((pc), RDX, E_ZeroDivisionError); \
402 emit_mov_r_i((pc), RCX, div_by_zero); \
403 /* We must explicitly zero out RAX, since RAX is used in calling
404 * conventions for va_arg functions, and real_exception is a va_arg
405 * function */ \
406 emit_xor_r_r((pc), RAX, RAX); \
407 /* This assumes that jit_info is defined, if it's not, the code's not "consistent" */ \
408 call_func(jit_info, (void (*)(void)) real_exception); \
409 *sav_ptr = (char)((pc) - sav_ptr - 1); \
412 # define emit_cmp_r_i(pc, dst, imm) emit_op_r_i((pc), 0x83, 0x81, 0x7, (dst), (imm))
413 # define emit_cmp_mr_i(pc, dst, disp, imm) emit_op_mr_i((pc), 0x83, 0x81, 0x7, (dst), (disp), (imm))
414 # define emit_cmp_r_r(pc, dst, src) emit_op_r_r(0x3b, (pc), (dst), (src))
415 # define emit_cmp_r_mr(pc, dst, src, disp) emit_op_r_mr(0x3b, (pc), (dst), (src), (disp))
416 # define emit_cmp_mr_r(pc, dst, disp, src) emit_op_r_mr(0x39, (pc), (src), (dst), (disp))
419 # define emit_add_r_i(pc, dst, imm) emit_op_r_i((pc), 0x83, 0x81, 0x0, (dst), (imm))
420 # define emit_add_mr_i(pc, dst, disp, imm) emit_op_mr_i((pc), 0x83, 0x81, 0x0, (dst), (disp), (imm))
421 # define emit_add_r_r(pc, dst, src) emit_op_r_r(0x03, (pc), (dst), (src))
422 # define emit_add_r_mr(pc, dst, src, disp) emit_op_r_mr(0x03, (pc), (dst), (src), (disp))
423 # define emit_add_mr_r(pc, dst, disp, src) emit_op_r_mr(0x01, (pc), (src), (dst), (disp))
425 # define emit_sub_r_i(pc, dst, imm) emit_op_r_i((pc), 0x83, 0x81, 0x5, (dst), (imm))
426 # define emit_sub_mr_i(pc, dst, disp, imm) emit_op_mr_i((pc), 0x83, 0x81, 0x5, (dst), (disp), (imm))
427 # define emit_sub_r_r(pc, dst, src) emit_op_r_r(0x2b, (pc), (dst), (src))
428 # define emit_sub_r_mr(pc, dst, src, disp) emit_op_r_mr(0x29, (pc), (dst), (src), (disp))
429 # define emit_sub_mr_r(pc, dst, disp, src) emit_op_r_mr(0x2b, (pc), (src), (dst), (disp))
431 # define emit_xchg_r_r(pc, dst, src) emit_op_r_r(0x87, (pc), (dst), (src))
432 # define emit_xchg_r_mr(pc, dst, src, disp) emit_op_r_mr(0x87, (pc), (dst), (src), (disp))
433 # define emit_xchg_mr_r(pc, dst, disp, src) emit_op_r_mr(0x87, (pc), (src), (dst), (disp))
435 # define emit_xor_r_i(pc, dst, imm) emit_op_r_i((pc), 0x83, 0x81, 0x6, (dst), (imm))
436 # define emit_xor_mr_i(pc, dst, disp, imm) emit_op_mr_i((pc), 0x83, 0x81, 0x6, (dst), (disp), (imm))
437 # define emit_xor_r_r(pc, dst, src) emit_op_r_r(0x33, (pc), (dst), (src))
438 # define emit_xor_r_mr(pc, dst, src, disp) emit_op_r_mr(0x33, (pc), (dst), (src), (disp))
439 # define emit_xor_mr_r(pc, dst, disp, src) emit_op_r_mr(0x31, (pc), (src), (dst), (disp))
441 # define emit_and_r_i(pc, dst, imm) emit_op_r_i((pc), 0x83, 0x81, 0x4, (dst), (imm))
442 # define emit_and_mr_i(pc, dst, disp, imm) emit_op_mr_i((pc), 0x83, 0x81, 0x4, (dst), (disp), (imm))
443 # define emit_and_r_r(pc, dst, src) emit_op_r_r(0x23, (pc), (dst), (src))
444 # define emit_and_r_mr(pc, dst, src, disp) emit_op_r_mr(0x23, (pc), (dst), (src), (disp))
445 # define emit_and_mr_r(pc, dst, disp, src) emit_op_r_mr(0x21, (pc), (src), (dst), (disp))
447 # define emit_or_r_i(pc, dst, imm) emit_op_r_i((pc), 0x83, 0x81, 0x1, (dst), (imm))
448 # define emit_or_mr_i(pc, dst, disp, imm) emit_op_mr_i((pc), 0x83, 0x81, 0x1, (dst), (disp), (imm))
449 # define emit_or_r_r(pc, dst, src) emit_op_r_r(0x0b, (pc), (dst), (src))
450 # define emit_or_r_mr(pc, dst, src, disp) emit_op_r_mr(0x0b, (pc), (dst), (src), (disp))
451 # define emit_or_mr_r(pc, dst, disp, src) emit_op_r_mr(0x09, (pc), (src), (dst), (disp))
453 # define emit_imul_r_r(pc, dst, src) { \
454 emit_rex64((pc), (dst), (src)); \
455 *((pc)++) = (char) 0x0f; \
456 *((pc)++) = (char) 0xaf; \
457 emit_modrm((pc), b11, (dst), (src)); \
459 # define emit_imul_r_mr(pc, dst, src, disp) { \
460 emit_rex64((pc), (dst), (src)); \
461 *((pc)++) = (char) 0x0f; \
462 *((pc)++) = (char) 0xaf; \
463 if ((disp) == 0) { \
464 emit_modrm((pc), b00, (dst), (src)); \
466 else if (is8bit(disp)) { \
467 emit_modrm((pc), b01, (dst), (src)); \
468 *((pc)++) = (char)(disp); \
470 else { \
471 emit_modrm((pc), b10, (dst), (src)); \
472 *(int *)(pc) = (int)(disp); \
473 (pc) += 4; \
477 # define emit_idiv_r_r(pc, dst, src) { \
478 emit_xor_r_r((pc), RDX, RDX); \
479 emit_test_r((pc), (src)); \
480 emit_div_check_zero(pc); \
481 emit_op_r_r(0xf7, (pc), 0x7, (src)); \
483 # define emit_idiv_r_mr(pc, dst, src, disp) { \
484 emit_xor_r_r((pc), RDX, RDX); \
485 emit_mov_r_mr((pc), ISR1, (src), (disp)); \
486 emit_test_r((pc), ISR1); \
487 emit_div_check_zero(pc); \
488 emit_op_r_mr(0xf7, (pc), 0x7, (src), (disp)); \
491 # define emit_abs_r(pc, reg) emit_and_r_i((pc), (reg), ~(1L << 63))
493 # define emit_neg_r(pc, reg) emit_op_r_r(0xf7, (pc), 0x2, (reg))
494 # define emit_not_r(pc, reg) emit_op_r_r(0xf7, (pc), 0x3, (reg))
495 # define emit_inc_r(pc, reg) emit_op_r_r(0xff, (pc), 0x0, (reg))
496 # define emit_dec_r(pc, reg) emit_op_r_r(0xff, (pc), 0x1, (reg))
498 /* This needs a fixup it seems... call_r doesn't */
499 # define emit_call_i(pc, imm) emit_op_i(0xe8, (pc), (imm))
500 # define emit_call_r(pc, reg) { \
501 emit_rex64((pc), 0x0, (reg)); \
502 *(pc)++ = (char)0xff; \
503 emit_modrm((pc), b11, 0x2, (reg)); }
505 # define emit_jmp_r_r(pc, reg1, reg2) { \
506 emit_rex((pc), (reg1), (reg2)); \
507 *((pc)++) = (char)0xff; \
508 emit_modrm((pc), b00, 0x4, b100); \
509 emit_sib((pc), b00, (reg1), (reg2)); \
512 # define emit_jmp_i(pc, imm) emit_op_i(0xe9, (pc), (imm))
513 # define emit_jmp_i_fixup(ji, imm) { \
514 opcode_t opcode; \
515 opcode = jit_info->op_i + (imm); \
516 Parrot_jit_newfixup(jit_info); \
517 jit_info->arena.fixups->type = JIT_AMD64JUMP; \
518 jit_info->arena.fixups->param.opcode = opcode; \
519 if (jit_info->optimizer->cur_section->branch_target == \
520 jit_info->optimizer->cur_section) \
521 jit_info->arena.fixups->skip = \
522 jit_info->optimizer->cur_section->branch_target->load_size; \
523 emit_jmp_i(jit_info->native_ptr, 0xdead); \
526 # define emit_leave(pc) *((pc)++) = (char)0xc9;
527 # define emit_ret(pc) *((pc)++) = (char)0xc3;
529 # define emit_mov_r_r(pc, dst, src) \
530 emit_op_r_r(0x8B, (pc), (dst), (src))
532 /* mov [reg + offs], imm */
533 # define emit_mov_mr_i(pc, reg, offs, imm) {\
534 if (is32bit(imm)) { \
535 emit_rex64((pc), 0x0, (reg)); \
536 *((pc)++) = (char) 0xc7; \
537 if ((offs) == 0) { \
538 emit_modrm((pc), b00, 0x0, (reg)); \
540 else if (is8bit(offs)) { \
541 emit_modrm((pc), b01, 0x0, (reg)); \
542 *((pc)++) = (char)(offs); \
544 else { \
545 emit_modrm((pc), b10, 0x0, (reg)); \
546 *(int *)(pc) = (int)(offs); \
547 (pc) += 4; \
549 *(int *)(pc) = (int)(imm); \
550 (pc) += 4; \
552 else { \
553 emit_mov_r_i((pc), ISR1, (imm)); \
554 emit_mov_mr_r((pc), (reg), (offs), ISR1); \
558 /* mov reg, imm */
559 # define emit_mov_r_i(pc, reg, imm) {\
560 emit_op_r(0xb8, (pc), (reg)); \
561 *(long *)(pc) = (long)(imm); \
562 (pc) += 8; \
565 /* push reg */
566 # define emit_push_r(pc, reg) emit_64op_r(0x50, (pc), (reg))
567 /* pop reg */
568 # define emit_pop_r(pc, reg) emit_64op_r(0x58, (pc), (reg))
570 /* push imm */
571 # define emit_push_i(pc, imm) emit_op_i(0x68, (pc), (imm))
573 /* did you know, that (unsigned)0 is not an 8 bit value? */
574 # define is8bit(c) (((long)(c)) >= -128 && ((long)(c)) <= 127)
575 # define is32bit(c) (((long)(c)) >= -2147483648 && ((long)(c)) <= 2147483647)
577 # define emit_get_int_from_stack(pc, dst, disp) \
578 emit_mov_r_mr((pc), (dst), RBP, (disp))
580 # define emit_send_int_to_stack(pc, src, disp) \
581 emit_mov_mr_r((pc), RBP, (disp), (src))
583 /* mov dst, [src + disp] */
584 # define emit_mov_r_mr(pc, dst, src, disp) \
585 emit_op_r_mr(0x8b, (pc), (dst), (src), (disp))
586 /* mov [dst + disp], src */
587 # define emit_mov_mr_r(pc, dst, disp, src) \
588 emit_op_r_mr(0x89, (pc), (src), (dst), (disp))
590 /* lea dst, [src + disp] */
591 # define emit_lea_r_mr(pc, dst, src, disp) \
592 emit_op_r_mr(0x8d, (pc), (src), (dst), (disp))
594 /* push rbp
595 * mov rbp, rsp */
596 /* move rsp to rbp; set rbp to rsp */
597 # define jit_emit_stack_frame_enter(pc) { \
598 emit_push_r((pc), RBP); \
599 emit_mov_r_r((pc), RBP, RSP); \
602 /* pop rbp */
603 # define jit_emit_stack_frame_leave(pc) { \
604 emit_pop_r((pc), RBP); \
607 # define emit_jcc(pc, code, disp) { \
608 if (is8bit(disp)) { \
609 *((pc)++) = (char) 0x70 | (code); \
610 *((pc)++) = (char) (disp); \
612 else { \
613 *((pc)++) = (char) 0x0f; \
614 *((pc)++) = (char) 0x80 | (code); \
615 *(int *)(pc) = (int)(disp); \
616 (pc) += 4; \
620 # define emit_jcc_fixup(ji, code, imm) { \
621 opcode_t opcode; \
622 opcode = (ji)->op_i + (imm); \
623 Parrot_jit_newfixup(ji); \
624 (ji)->arena.fixups->type = JIT_AMD64BRANCH; \
625 (ji)->arena.fixups->param.opcode = opcode; \
626 if ((ji)->optimizer->cur_section->branch_target == \
627 (ji)->optimizer->cur_section) \
628 (ji)->arena.fixups->skip = \
629 (ji)->optimizer->cur_section->branch_target->load_size; \
630 emit_jcc((ji)->native_ptr, (code), 0xdead); \
633 typedef enum {
634 jcc_jo, /* Jump if overflow */
635 jcc_jno, /* Jump if not overflow */
636 jcc_jb, /* Jump if below */
637 jcc_jc = jcc_jb, /* Jump if carry */
638 jcc_jnae = jcc_jb, /* Jump if not above or equal */
639 jcc_jnb, /* Jump if not below */
640 jcc_jnc = jcc_jnb, /* Jump if not carry */
641 jcc_jae = jcc_jnb, /* Jump if above or equal */
642 jcc_jz, /* Jump if zero */
643 jcc_je = jcc_jz, /* Jump if equal */
644 jcc_jnz, /* Jump if not zero */
645 jcc_jne = jcc_jnz, /* Jump if not equal */
646 jcc_jbe, /* Jump if below or equal */
647 jcc_jna = jcc_jbe, /* Jump if not above */
648 jcc_jnbe, /* Jump if not below or equal */
649 jcc_ja = jcc_jnbe, /* Jump if above */
650 jcc_js, /* Jump if sign */
651 jcc_jns, /* Jump if not sign */
652 jcc_jp, /* Jump if parity */
653 jcc_jpe = jcc_jp, /* Jump if parity even */
654 jcc_jnp, /* Jump if not parity */
655 jcc_jpo = jcc_jnp, /* Jump if parity odd */
656 jcc_jl, /* Jump if less */
657 jcc_jnge = jcc_jl, /* Jump if not greater or equal */
658 jcc_jnl, /* Jump if not less */
659 jcc_jge = jcc_jnl, /* Jump if greater or equal */
660 jcc_jle, /* Jump if less or equal */
661 jcc_jng = jcc_jle, /* Jump if not greater */
662 jcc_jnle, /* Jump if not less or equal */
663 jcc_jg = jcc_jnle /* Jump if greater */
664 } amd64_jcc_t;
666 # define emit_test_r(pc, reg) \
667 emit_op_r_r(0x85, (pc), (reg), (reg))
669 # define emit_test_r_r(pc, dst, src) \
670 emit_op_r_r(0x85, (pc), (src), (dst))
672 /* pop r15
673 * pop r14
674 * pop r13
675 * pop r12
676 * pop rbx
677 * pop rbp
678 * ret */
679 # define jit_emit_end(pc) { \
680 emit_pop_r((pc), R15); \
681 emit_pop_r((pc), R14); \
682 emit_pop_r((pc), R13); \
683 emit_pop_r((pc), R12); \
684 emit_pop_r((pc), RBX); \
685 emit_pop_r((pc), RBP); \
686 emit_leave(pc); \
687 emit_ret(pc); \
691 /**************************************
692 * Floating Point *
693 **************************************/
695 # define emit_op_x_x(prefix, op, pc, dst, src) { \
696 *((pc)++) = (char) (prefix); \
697 emit_rex((pc), (dst), (src)); \
698 *((pc)++) = (char) 0x0f; \
699 *((pc)++) = (char) (op); \
700 emit_modrm((pc), b11, (dst), (src)); \
703 # define emit_op64_x_x(prefix, op, pc, dst, src) { \
704 *((pc)++) = (char) (prefix); \
705 emit_rex64((pc), (dst), (src)); \
706 *((pc)++) = (char) 0x0f; \
707 *((pc)++) = (char) (op); \
708 emit_modrm((pc), b11, (dst), (src)); \
711 # define emit_op_x_mx(prefix, op, pc, dst, src, offs) { \
712 *((pc)++) = (char) (prefix); \
713 emit_rex((pc), (dst), (src)); \
714 *((pc)++) = (char) 0x0f; \
715 *((pc)++) = (char) (op); \
716 if ((offs) == 0) { \
717 emit_modrm((pc), b00, (dst), (src)); \
719 else if (is8bit(offs)) { \
720 emit_modrm((pc), b01, (dst), (src)); \
721 *((pc)++) = (char)(long)(offs); \
723 else { \
724 emit_modrm((pc), b10, (dst), (src)); \
725 *(int *)(pc) = (int)(long)(offs); \
726 (pc) += 4; \
730 # define emit_op64_x_mx(prefix, op, pc, dst, src, offs) { \
731 *((pc)++) = (char) (prefix); \
732 emit_rex64((pc), (dst), (src)); \
733 *((pc)++) = (char) 0x0f; \
734 *((pc)++) = (char) (op); \
735 if ((offs) == 0 || (src) == RBP) { \
736 emit_modrm((pc), b00, (dst), (src)); \
738 else if (is8bit(offs)) { \
739 emit_modrm((pc), b01, (dst), (src)); \
740 *((pc)++) = (char)(long)(offs); \
742 else { \
743 emit_modrm((pc), b10, (dst), (src)); \
744 *(int *)(pc) = (int)(long)(offs); \
745 (pc) += 4; \
749 # define emit_mov_x_x(pc, dst, src) emit_op_x_x(0x66, 0x28, (pc), (dst), (src))
751 # define emit_mov_x_mx(pc, dst, src, offs) emit_op_x_mx(0xf2, 0x10, (pc), (dst), (src), (offs))
752 # define emit_mov_mx_x(pc, dst, offs, src) emit_op_x_mx(0xf2, 0x11, (pc), (src), (dst), (offs))
754 /* Intended to zero a register */
755 # define emit_movhlps_x_x(pc, dst, src) { \
756 emit_rex((pc), (src), (dst)); \
757 *((pc)++) = (char) 0x0f; \
758 *((pc)++) = (char) 0x12; \
759 emit_modrm((pc), b11, (src), (dst)); \
762 # define emit_movlhps_x_x(pc, dst, src) { \
763 emit_rex((pc), (src), (dst)); \
764 *((pc)++) = (char) 0x0f; \
765 *((pc)++) = (char) 0x16; \
766 emit_modrm((pc), b11, (src), (dst)); \
769 # define emit_movd_r_x(pc, dst, src) { \
770 *((pc)++) = (char) 0x66; \
771 emit_rex64((pc), (dst), (src)); \
772 *((pc)++) = (char) 0x0f; \
773 *((pc)++) = (char) 0x7e; \
774 emit_modrm((pc), b11, (dst), (src)); \
777 # define emit_movd_x_r(pc, dst, src) { \
778 *((pc)++) = (char) 0x66; \
779 emit_rex64((pc), (dst), (src)); \
780 *((pc)++) = (char) 0x0f; \
781 *((pc)++) = (char) 0x6e; \
782 emit_modrm((pc), b11, (dst), (src)); \
785 # define emit_test_x(pc, reg) { \
786 emit_xor_x_x((pc), FSR2, FSR2); \
787 emit_comisd_x_x((pc), (reg), FSR2); \
790 # define emit_comisd_x_x(pc, dst, src) emit_op_x_x(0x66, 0x2f, (pc), (dst), (src))
791 # define emit_comisd_x_mx(pc, dst, src, offs) emit_op_x_mx(0x66, 0x2f, (pc), (dst), (src), (offs))
793 # define emit_add_x_x(pc, dst, src) emit_op_x_x(0xf2, 0x58, (pc), (dst), (src))
794 # define emit_add_x_mx(pc, dst, src, offs) emit_op_x_mx(0xf2, 0x58, (pc), (dst), (src), (offs))
796 # define emit_sub_x_x(pc, dst, src) emit_op_x_x(0xf2, 0x5c, (pc), (dst), (src))
797 # define emit_sub_x_mx(pc, dst, src, offs) emit_op_x_mx(0xf2, 0x5c, (pc), (dst), (src), (offs))
799 # define emit_and_x_x(pc, dst, src) emit_op_x_x(0x66, 0x54, (pc), (dst), (src))
800 # define emit_and_x_mx(pc, dst, src, offs) emit_op_x_mx(0x66, 0x54, (pc), (dst), (src), (offs))
802 # define emit_xor_x_x(pc, dst, src) emit_op_x_x(0x66, 0x57, (pc), (dst), (src))
803 # define emit_xor_x_mx(pc, dst, src, offs) emit_op_x_mx(0x66, 0x57, (pc), (dst), (src), (offs))
805 # define emit_mul_x_x(pc, dst, src) emit_op_x_x(0xf2, 0x59, (pc), (dst), (src))
806 # define emit_mul_x_mx(pc, dst, src, offs) emit_op_x_mx(0xf2, 0x59, (pc), (dst), (src), (offs))
808 /* I tried to do a check for FSR1 == dst, such as from core.jit, but the bugs
809 * to track down, the hackish things to do(movhlpd and movlhpd were used, but I
810 * gave up and settled on the cleaner and likely faster overall method of
811 * getting an FSR2 and going down to 14 mapped registers.
813 # define emit_div_x_x(pc, dst, src) { \
814 emit_movhlps_x_x((pc), FSR2, FSR2); \
815 emit_comisd_x_x((pc), FSR2, (src)); \
816 emit_div_check_zero((pc)); \
817 emit_op_x_x(0xf2, 0x5e, (pc), (dst), (src)); \
819 # define emit_div_x_mx(pc, dst, src, offs) { \
820 emit_movhlps_x_x((pc), FSR2, FSR2); \
821 emit_comisd_x_mx((pc), FSR2, (src), (offs)); \
822 emit_div_check_zero((pc)); \
823 emit_op_x_mx(0xf2, 0x5e, (pc), (dst), (src), (offs)); \
826 # define emit_sqrt_x_x(pc, dst, src) emit_op_x_x(0xf2, 0x51, (pc), (dst), (src))
827 # define emit_sqrt_x_mx(pc, dst, src, offs) emit_op_x_mx(0xf2, 0x51, (pc), (dst), (src), (offs))
829 # define emit_cvtsi2sd_x_mr(pc, dst, src, offs) emit_op64_x_mx(0xf2, 0x2a, (pc), (dst), (src), (offs))
830 # define emit_cvtsi2sd_x_r(pc, dst, src) emit_op64_x_x(0xf2, 0x2a, (pc), (dst), (src))
832 /* Truncate */
833 # define emit_cvttsd2si_r_mx(pc, dst, src, offs) emit_op64_x_mx(0xf2, 0x2c, (pc), (dst), (src), (offs))
834 # define emit_cvttsd2si_r_x(pc, dst, src) emit_op64_x_x(0xf2, 0x2c, (pc), (dst), (src))
836 /* Round */
837 # define emit_cvtsd2si_r_mx(pc, dst, src, offs) emit_op64_x_mx(0xf2, 0x2d, (pc), (dst), (src), (offs))
838 # define emit_cvtsd2si_r_x(pc, dst, src) emit_op64_x_x(0xf2, 0x2d, (pc), (dst), (src))
840 # define emit_ldmxcsr(pc) { \
841 emit_xor_r_r((pc), ISR2, ISR2); \
842 *((pc)++) = (char) 0x0f; \
843 *((pc)++) = (char) 0xAE; \
844 emit_modrm((pc), b10, 0x2, ISR2); \
845 *(int *)(pc) = (int)&mxcsr; \
846 (pc) += 4; \
848 /*********************************************************/
850 #ifdef JIT_EMIT
852 # ifdef USE_OP_MAP_AND_CODE_START
853 /* These two can be mixed together just like in the i386 jit. All the places I
854 * can see this being called require it to be included, but for the moment I'm
855 * keeping it as these macros. */
857 * emit code that gets interp->code->jit_info->arena->op_map
858 * and sets the OP_MAP register
860 # define jit_emit_load_op_map(pc) { \
861 emit_mov_r_mr((pc), OP_MAP, INTERP, (long)offsetof(Interp, code)); \
862 emit_mov_r_mr((pc), OP_MAP, OP_MAP, (long)offsetof(PackFile_ByteCode, jit_info)); \
863 emit_lea_r_mr((pc), OP_MAP, OP_MAP, (long)offsetof(Parrot_jit_info_t, arena)); \
864 emit_mov_r_mr((pc), OP_MAP, OP_MAP, (long)offsetof(Parrot_jit_arena_t, op_map)); \
868 * emit code that gets interp->code->base.data
869 * and sets the CODE_START register
871 # define jit_emit_load_code_start(pc) { \
872 emit_mov_r_mr((pc), CODE_START, INTERP, (long)offsetof(Interp, code)); \
873 emit_mov_r_mr((pc), CODE_START, CODE_START, (long)offsetof(PackFile_Segment, data)); \
876 # endif /* USE_OP_MAP_AND_CODE_START */
879 * emit code that calls a Parrot opcode function
881 static void call_func(Parrot_jit_info_t *jit_info, void *addr) {
882 if ((long)addr > (long)INT_MAX) {
883 /* Move the address into our scratch register R11
884 * We cannot use just the immediate form of call because the address
885 * will be too large if we're using a shared parrot, but will be ok on
886 * a static parrot.
888 * This will most likely only be used on shared libraries.
890 /* Absolute near call to R11 */
891 emit_mov_r_i(jit_info->native_ptr, R11, addr);
892 emit_call_r(jit_info->native_ptr, R11);
894 else {
895 /* Call with an immediate value. Mainly for a static parrot, and
896 * debugging */
897 Parrot_jit_newfixup(jit_info);
898 jit_info->arena.fixups->type = JIT_AMD64CALL;
899 jit_info->arena.fixups->param.fptr = D2FPTR(addr);
900 emit_call_i(jit_info->native_ptr, 0xdead);
904 /* Jump to RAX, which needs to be set before calling this */
905 static void
906 Parrot_emit_jump_to_rax(Parrot_jit_info_t *jit_info, Interp *interp)
908 if (!jit_info->objfile) {
909 # ifdef USE_OP_MAP_AND_CODE_START
910 /* Get interp->code->base.data */
911 jit_emit_load_code_start(jit_info->native_ptr);
912 emit_sub_r_r(jit_info->native_ptr, RAX, CODE_START);
914 /* Get interp->code->jit_info->arena->op_map */
915 jit_emit_load_op_map(jit_info->native_ptr);
916 # else
917 /* emit code that gets interp->code->base.data */
918 emit_mov_r_mr(jit_info->native_ptr, RCX, INTERP, (long)offsetof(Interp, code));
919 emit_mov_r_mr(jit_info->native_ptr, RDX, RCX, (long)offsetof(PackFile_Segment, data));
920 emit_sub_r_r(jit_info->native_ptr, RAX, RDX);
922 /* Reuse interp->code in RCX, get interp->code->jit_info->arena->op_map */
923 emit_mov_r_mr(jit_info->native_ptr, RDX, RCX, (long)offsetof(PackFile_ByteCode, jit_info));
924 emit_lea_r_mr(jit_info->native_ptr, RDX, RDX, (long)offsetof(Parrot_jit_info_t, arena));
925 emit_mov_r_mr(jit_info->native_ptr, RDX, RDX, (long)offsetof(Parrot_jit_arena_t, op_map));
926 # endif
928 /* Base pointer */
929 emit_mov_r_mr(jit_info->native_ptr, RBX, INTERP, (long)offsetof(Interp, ctx.bp));
931 # ifdef USE_OP_MAP_AND_CODE_START
932 emit_jmp_r_r(jit_info->native_ptr, RAX, OP_MAP);
933 # else
934 emit_jmp_r_r(jit_info->native_ptr, RAX, RDX);
935 # endif
939 #endif /* JIT_EMIT */
941 #if JIT_EMIT == 2
944 * emit code that calls a core.ops function from src/core_ops.c,
945 * the generated code is the translation of this:
947 * PC = ((INTERP->op_func_table)[*PC])(PC,INTERP)
951 extern int jit_op_count(void);
953 void
954 Parrot_jit_normal_op(Parrot_jit_info_t *jit_info,
955 Interp *interp)
957 int cur_op = *jit_info->cur_op;
958 static int check;
960 if (cur_op >= jit_op_count()) {
961 cur_op = CORE_OPS_wrapper__;
964 /* check every eight ops, could be changed to 16, or 32, or ... */
965 if ((++check & 0x7) == 0) {
966 emit_mov_r_i(jit_info->native_ptr, RDI, jit_info->cur_op);
967 emit_mov_r_r(jit_info->native_ptr, RSI, INTERP);
968 call_func(jit_info, (void (*)(void))interp->op_func_table[CORE_OPS_check_events]);
971 emit_mov_r_i(jit_info->native_ptr, RDI, jit_info->cur_op);
972 emit_mov_r_r(jit_info->native_ptr, RSI, INTERP);
974 call_func(jit_info, (void (*)(void))interp->op_func_table[cur_op]);
979 * emit code for a branching parrot opcode. All cached registers
980 * need recalculation, as a branch can go into different code segments
981 * with different code start and different jit_info
983 void
984 Parrot_jit_cpcf_op(Parrot_jit_info_t *jit_info,
985 Interp *interp)
987 Parrot_jit_normal_op(jit_info, interp);
988 Parrot_emit_jump_to_rax(jit_info, interp);
992 * release stack frame end exit see core.jit
994 static void Parrot_end_jit(Parrot_jit_info_t *, Interp *);
996 # undef Parrot_jit_restart_op
998 * emit code that might leave the JIT runcore
999 * see ppc or i386
1001 void
1002 Parrot_jit_restart_op(Parrot_jit_info_t *jit_info,
1003 Interp *interp)
1005 char *sav_ptr;
1007 Parrot_jit_normal_op(jit_info, interp);
1008 emit_test_r(jit_info->native_ptr, RAX);
1010 /* Quick fixup, but we know it's 12, anyway it needs to be a byte */
1011 emit_jcc(jit_info->native_ptr, jcc_jnz, 0x00);
1012 sav_ptr = (char *)(jit_info->native_ptr - 1);
1013 Parrot_end_jit(jit_info, interp);
1014 *sav_ptr = (char)(jit_info->native_ptr - sav_ptr - 1);
1016 Parrot_emit_jump_to_rax(jit_info, interp);
1019 #endif /* JIT_EMIT == 2 */
1021 #if JIT_EMIT == 0
1023 # define REQUIRES_CONSTANT_POOL 0
1026 * emit stack frame according to ABI
1027 * preserve mapped registers according to ABI
1028 * load INTERP, OP_MAP, CODE_START, BP registers
1029 * then run the code at pc
1031 * the function is called as
1032 * runops(interp, pc)
1034 * at runtime
1036 void
1037 Parrot_jit_begin(Parrot_jit_info_t *jit_info,
1038 Interp *interp)
1040 jit_emit_stack_frame_enter(jit_info->native_ptr);
1042 /* Saved registers */
1043 /* push rbp
1044 * push rbx
1045 * push r12
1046 * push r13
1047 * push r14
1048 * push r15 */
1049 emit_push_r(jit_info->native_ptr, RBP);
1050 emit_push_r(jit_info->native_ptr, RBX);
1051 emit_push_r(jit_info->native_ptr, R12);
1052 emit_push_r(jit_info->native_ptr, R13);
1053 emit_push_r(jit_info->native_ptr, R14);
1054 emit_push_r(jit_info->native_ptr, R15);
1055 /* When our "function" gets called, RDI will be the interp,
1056 * and RSI will be jit_info->native_ptr */
1057 /* Set R15 to interp */
1058 /* mov r15, rdi */
1059 /* emit_ldmxcsr(jit_info->native_ptr); */
1060 emit_mov_r_r(jit_info->native_ptr, R15, RDI);
1061 emit_mov_r_r(jit_info->native_ptr, RAX, RSI);
1063 Parrot_emit_jump_to_rax(jit_info, interp);
1069 * fix up all emitted branches
1070 * see ppc or i386
1072 static void
1073 Parrot_jit_dofixup(Parrot_jit_info_t *jit_info,
1074 Interp *interp)
1076 Parrot_jit_fixup_t *fixup, *next;
1077 char *fixup_ptr;
1079 fixup = jit_info->arena.fixups;
1081 while (fixup) {
1082 switch (fixup->type) {
1083 case JIT_AMD64BRANCH:
1084 fixup_ptr = Parrot_jit_fixup_target(jit_info, fixup) + 2;
1085 *(int *)(fixup_ptr) =
1086 jit_info->arena.op_map[fixup->param.opcode].offset
1087 - (fixup->native_offset + 6) + fixup->skip;
1088 break;
1089 case JIT_AMD64JUMP:
1090 fixup_ptr = Parrot_jit_fixup_target(jit_info, fixup) + 1;
1091 *(int *)(fixup_ptr) =
1092 jit_info->arena.op_map[fixup->param.opcode].offset
1093 - (fixup->native_offset + 5) + fixup->skip;
1094 break;
1095 case JIT_AMD64CALL:
1096 fixup_ptr = jit_info->arena.start + fixup->native_offset + 1;
1097 *(int *)(fixup_ptr) = (int)(long)fixup->param.fptr -
1098 (int)(long)fixup_ptr - 4;
1099 break;
1100 default:
1101 real_exception(interp, NULL, JIT_ERROR,
1102 "Unknown fixup type: %d\n", fixup->type);
1104 next = fixup->next;
1105 free(fixup);
1106 fixup = next;
1108 jit_info->arena.fixups = NULL;
1111 static void
1112 Parrot_jit_begin_sub(Parrot_jit_info_t *jit_info,
1113 Interp *interp)
1115 /* NOT CALLED CURRENTLY */
1121 * define interface functions for register -> parrot register moves
1122 * and v.v.
1125 /* set mem to reg */
1126 static void
1127 jit_mov_mx_x(Interp *interp, Parrot_jit_info_t *jit_info,
1128 int base_reg, INTVAL offs, int src_reg)
1130 emit_mov_mx_x(jit_info->native_ptr, base_reg, offs, src_reg);
1133 static void
1134 jit_mov_mr_r(Interp *interp, Parrot_jit_info_t *jit_info,
1135 int base_reg, INTVAL offs, int src_reg)
1137 emit_mov_mr_r(jit_info->native_ptr, base_reg, offs, src_reg);
1140 /* set reg to mem */
1141 static void
1142 jit_mov_x_mx(Interp *interp, Parrot_jit_info_t *jit_info,
1143 int dst_reg, int base_reg, INTVAL offs)
1145 emit_mov_x_mx(jit_info->native_ptr, dst_reg, base_reg, offs);
1148 static void
1149 jit_mov_r_mr(Interp *interp, Parrot_jit_info_t *jit_info,
1150 int dst_reg, int base_reg, INTVAL offs)
1152 emit_mov_r_mr(jit_info->native_ptr, dst_reg, base_reg, offs);
1156 * define how many int and float registers can be used by the
1157 * jit core
1160 # define INT_REGISTERS_TO_MAP 10
1161 # define FLOAT_REGISTERS_TO_MAP 15
1164 * enumerate these mapped registers
1165 * please note that you have to preserve registers in
1166 * Parrot_jit_begin according to the ABI of the architecture
1169 static const char intval_map[INT_REGISTERS_TO_MAP] =
1172 /* Preserved, we'd have more, but keeping code_start, op_map, interp,
1173 * and the base pointer in registers takes away four, not to mention
1174 * RBP which is used for easier debugging. That's five registers used
1175 * for one reason or another at the moment. I'm not sure if it's worth
1176 * it yet. */
1178 * RAX ISR2
1179 * RDX for idiv, TODO: handle corner cases, i.e. steal i386 code
1180 * RBX for Interp->ctx.bp
1181 * RBP for debugging, can add it to the preserved list
1182 * R12
1183 * R13 for CODE_START
1184 * R14 for OP_MAP
1185 * R15 for INTERP
1187 R12,
1188 # ifndef USE_OP_MAP_AND_CODE_START
1189 R13, R14,
1190 # endif
1191 /* Unpreserved */
1192 RCX, RSI, RDI, R8, R9, R10, RDX
1195 static const char floatval_map[FLOAT_REGISTERS_TO_MAP] =
1197 XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
1198 XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15
1203 * define arch specific details in jit_arch_info
1206 static const jit_arch_info arch_info = {
1207 /* CPU <- Parrot reg move functions */
1208 jit_mov_r_mr,
1209 jit_mov_x_mx,
1210 /* Parrot <- CPU reg move functions */
1211 jit_mov_mr_r,
1212 jit_mov_mx_x,
1213 Parrot_jit_dofixup,
1214 (jit_arch_f)0,
1216 /* JIT_CODE_FILE */
1218 Parrot_jit_begin, /* emit code prologue */
1219 # ifdef USE_OP_MAP_AND_CODE_START
1222 # else
1223 9, /* mapped int */
1224 3, /* preserved int */
1225 # endif
1226 intval_map, /* which ints mapped */
1227 14, /* mapped float */
1228 0, /* preserved float */
1229 floatval_map /* which floats mapped */
1231 /* JIT_CODE_SUB */
1233 Parrot_jit_begin_sub,
1236 intval_map,
1239 floatval_map
1241 /* JIT_CODE_SUB_REGS_ONLY */
1243 /*Parrot_jit_begin_sub_regs*/0, /* emit code prologue */
1246 intval_map,
1249 floatval_map
1254 const jit_arch_info *
1255 Parrot_jit_init(Interp *interp)
1257 return &arch_info;
1261 #endif /* JIT_EMIT == 0 */
1262 #endif /* PARROT_JIT_AMD64_JIT_EMIT_H_GUARD */
1266 =back
1268 =cut
1273 * Local variables:
1274 * c-file-style: "parrot"
1275 * End:
1276 * vim: expandtab shiftwidth=4: