2 * Copyright (C) 2007-2008, The Perl Foundation.
14 src/jit/amd64/jit_emit.h - AMD64 JIT Generation
18 Provide the support for a JIT on the AMD64 architecture.
24 #ifndef PARROT_JIT_AMD64_JIT_EMIT_H_GUARD
25 #define PARROT_JIT_AMD64_JIT_EMIT_H_GUARD
29 hex to binary converter
30 perl -ne '@a=split;push@b,unpack"B*",chr hex foreach@a;print"@b\n";@b=()'
32 src/jit/amd64/jit_emit.h copied to src/jit_emit.h
33 src/jit/amd64/exec_dep.h copied to src/exec_dep.h
34 src/jit/amd64/core.jit used to build src/jit_cpu.h
35 src/jit/amd64/core.jit used to build src/exec_cpu.h
37 src/exec_start.c #define JIT_EMIT 1
38 src/exec.c #define JIT_EMIT 1
39 src/jit.c #define JIT_EMIT 0
40 src/jit_cpu.c #define JIT_EMIT 2
45 r => use upper 8 registers for REG
46 x => high bit of index in SIB
47 b => use upper 8 registers for r/m, and for the base in SIB
71 [index * 2**scale + base]
74 For calling functions use: RDI, RSI, RDX, RCX, R8, and R9
75 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, and XMM7
76 For varargs, RAX is used for the number of SSE regs
77 Extras are pushed onto the stack
80 R11 is a scratch register, not preserved, not used in calling
81 R10 is used for a static chain pointer
83 RBP, RBX, and R12->R15 are preserved
90 void Parrot_jit_begin(Parrot_jit_info_t
*, Interp
*);
91 static const char div_by_zero
[] = "Divide by zero";
92 static const int mxcsr
= 0x7fa0; /* Add 6000 to mxcsr */
95 /* This is used for testing whether or not keeping these two in registers is an
96 * improvement or not. This file may need to be expanded further to know for
97 * sure. But so far, there appears a 20 second improvement on my 2GHz. */
98 #undef USE_OP_MAP_AND_CODE_START
102 * define all the available cpu registers
103 * reserve some for special purposes
107 ISR2
= RAX
, /* Not like it's good for anything else */
123 #ifdef USE_OP_MAP_AND_CODE_START
127 #ifdef USE_OP_MAP_AND_CODE_START
137 =head2 Register usage
143 Return values, second scratch
147 Allocated, unpreserved
151 Allocated, unpreserved
155 Parrot register frame pointer
167 Allocated, unpreserved
171 Allocated, unpreserved
175 Allocated, unpreserved
179 Allocated, unpreserved
183 Allocated, unpreserved
195 Allocated, preserved, or code_start
199 Allocated, preserved, or op_map
211 Allocated, all unpreserved
220 #define Parrot_jit_emit_get_base_reg_no(pc) RBX
227 XMM2
, XMM3
, XMM4
, XMM5
, XMM6
, XMM7
,
228 XMM8
, XMM9
, XMM10
, XMM11
, XMM12
, XMM13
, XMM14
, XMM15
232 /* Call can be handled without a fixup */
233 enum { JIT_AMD64BRANCH
, JIT_AMD64JUMP
, JIT_AMD64CALL
};
237 * now define macros for all possible (and implemented) operations
239 * Parrot defines JIT_EMIT to 1 or 2, when this file is included in
240 * exec_cpu.c or jit_cpu.c
247 Most of the functionality is provided by macros instead of functions.
269 /* rex.[wrxb], incomplete but oh well */
272 =item C<emit_rex64(pc, reg, rm)>
274 The REX prefix, setting REX.W making the instruction 64 bit.
276 =item C<emit_rex(pc, reg, rm)>
278 The REX prefix, only emitted if using an extended register.
283 # define emit_rex64(pc, reg, rm) \
284 *((pc)++) = (char)(0x48 | (((reg) & 8) >> 1) | (((rm) & 8) >> 3))
286 # define emit_rex(pc, dst, src) { \
287 if ((dst) & 8 || (src) & 8) \
288 *((pc)++) = (char)(0x40 | (((dst) & 8) >> 1) | (((src) & 8) >> 3)); }
290 /* Use a 0x66 prefix for increased padding */
291 # define emit_nop(pc) { \
292 *((pc)++) = (char)(0x90); }
294 # define emit_modrm(pc, mod, dst, src) { \
295 *((pc)++) = (char)(((mod) << 6) | (((dst) & 7) << 3) | ((src) & 7)); }
297 # define emit_sib(pc, scale, index, base) { \
298 *((pc)++) = (char)(((scale) << 6) | (((index) & 7) << 3) | ((base) & 7)); }
301 # define emit_op_r(op, pc, reg) { \
302 emit_rex64((pc), 0x0, (reg)); \
303 *((pc)++) = (char)((op) | ((reg) & 7)); \
306 # define emit_64op_r(op, pc, reg) { \
307 emit_rex((pc), 0x0, (reg)); \
308 *((pc)++) = (char)((op) | ((reg) & 7)); \
312 # define emit_op_r_r(op, pc, dst, src) { \
313 emit_rex64((pc), (dst), (src)); \
314 *((pc)++) = (char) (op); \
315 emit_modrm((pc), b11, (dst), (src)); \
318 # define emit_op_r_mr(op, pc, dst, src, disp) { \
319 emit_rex64((pc), (dst), (src)); \
320 *((pc)++) = (char) (op); \
322 emit_modrm((pc), b00, (dst), (src)); \
324 else if (is8bit(disp)) { \
325 emit_modrm((pc), b01, (dst), (src)); \
326 *((pc)++) = (char)(disp); \
329 emit_modrm((pc), b10, (dst), (src)); \
330 *(int *)(pc) = (int)(disp); \
335 # define emit_op_i(op, pc, imm) { \
336 *((pc)++) = (char)(op); \
337 *(int *)(pc) = (int)(imm); \
341 # define emit_op_r_i(pc, op, op2, code, dst, imm) { \
342 emit_rex64((pc), 0x0, (dst)); \
344 *((pc)++) = (char) (op); \
345 emit_modrm((pc), b11, (code), (dst)); \
346 *((pc)++) = (char)(imm); \
349 *((pc)++) = (char) (op2); \
350 emit_modrm((pc), b11, (code), (dst)); \
351 *(int *)(pc) = (int)(imm); \
356 # define emit_op_mr_i(pc, op, op2, code, dst, disp, imm) { \
357 emit_rex64((pc), 0x0, (dst)); \
359 *((pc)++) = (char) (op); \
361 emit_modrm((pc), b00, (code), (dst)); \
363 else if (is8bit(disp)) { \
364 emit_modrm((pc), b01, (code), (dst)); \
365 *((pc)++) = (char)(disp); \
368 emit_modrm((pc), b10, (code), (dst)); \
369 *(int *)(pc) = (int)(disp); \
372 *((pc)++) = (char)(imm); \
375 *((pc)++) = (char) (op2); \
377 emit_modrm((pc), b00, (code), (dst)); \
379 else if (is8bit(disp)) { \
380 emit_modrm((pc), b01, (code), (dst)); \
381 *((pc)++) = (char)(disp); \
384 emit_modrm((pc), b10, (code), (dst)); \
385 *(int *)(pc) = (int)(disp); \
388 *(int *)(pc) = (int)(imm); \
393 /* Test for zero, then call this, and it'll throw a real_exception if you try
394 * to divide by zero */
395 # define emit_div_check_zero(pc) { \
397 emit_jcc((pc), jcc_jnz, 0x00); \
398 sav_ptr = (char *)((pc) - 1); \
399 emit_mov_r_r((pc), RDI, INTERP); \
400 emit_mov_r_i((pc), RSI, 0); \
401 emit_mov_r_i((pc), RDX, E_ZeroDivisionError); \
402 emit_mov_r_i((pc), RCX, div_by_zero); \
403 /* We must explicitly zero out RAX, since RAX is used in calling
404 * conventions for va_arg functions, and real_exception is a va_arg
406 emit_xor_r_r((pc), RAX, RAX); \
407 /* This assumes that jit_info is defined, if it's not, the code's not "consistent" */ \
408 call_func(jit_info, (void (*)(void)) real_exception); \
409 *sav_ptr = (char)((pc) - sav_ptr - 1); \
412 # define emit_cmp_r_i(pc, dst, imm) emit_op_r_i((pc), 0x83, 0x81, 0x7, (dst), (imm))
413 # define emit_cmp_mr_i(pc, dst, disp, imm) emit_op_mr_i((pc), 0x83, 0x81, 0x7, (dst), (disp), (imm))
414 # define emit_cmp_r_r(pc, dst, src) emit_op_r_r(0x3b, (pc), (dst), (src))
415 # define emit_cmp_r_mr(pc, dst, src, disp) emit_op_r_mr(0x3b, (pc), (dst), (src), (disp))
416 # define emit_cmp_mr_r(pc, dst, disp, src) emit_op_r_mr(0x39, (pc), (src), (dst), (disp))
419 # define emit_add_r_i(pc, dst, imm) emit_op_r_i((pc), 0x83, 0x81, 0x0, (dst), (imm))
420 # define emit_add_mr_i(pc, dst, disp, imm) emit_op_mr_i((pc), 0x83, 0x81, 0x0, (dst), (disp), (imm))
421 # define emit_add_r_r(pc, dst, src) emit_op_r_r(0x03, (pc), (dst), (src))
422 # define emit_add_r_mr(pc, dst, src, disp) emit_op_r_mr(0x03, (pc), (dst), (src), (disp))
423 # define emit_add_mr_r(pc, dst, disp, src) emit_op_r_mr(0x01, (pc), (src), (dst), (disp))
425 # define emit_sub_r_i(pc, dst, imm) emit_op_r_i((pc), 0x83, 0x81, 0x5, (dst), (imm))
426 # define emit_sub_mr_i(pc, dst, disp, imm) emit_op_mr_i((pc), 0x83, 0x81, 0x5, (dst), (disp), (imm))
427 # define emit_sub_r_r(pc, dst, src) emit_op_r_r(0x2b, (pc), (dst), (src))
428 # define emit_sub_r_mr(pc, dst, src, disp) emit_op_r_mr(0x29, (pc), (dst), (src), (disp))
429 # define emit_sub_mr_r(pc, dst, disp, src) emit_op_r_mr(0x2b, (pc), (src), (dst), (disp))
431 # define emit_xchg_r_r(pc, dst, src) emit_op_r_r(0x87, (pc), (dst), (src))
432 # define emit_xchg_r_mr(pc, dst, src, disp) emit_op_r_mr(0x87, (pc), (dst), (src), (disp))
433 # define emit_xchg_mr_r(pc, dst, disp, src) emit_op_r_mr(0x87, (pc), (src), (dst), (disp))
435 # define emit_xor_r_i(pc, dst, imm) emit_op_r_i((pc), 0x83, 0x81, 0x6, (dst), (imm))
436 # define emit_xor_mr_i(pc, dst, disp, imm) emit_op_mr_i((pc), 0x83, 0x81, 0x6, (dst), (disp), (imm))
437 # define emit_xor_r_r(pc, dst, src) emit_op_r_r(0x33, (pc), (dst), (src))
438 # define emit_xor_r_mr(pc, dst, src, disp) emit_op_r_mr(0x33, (pc), (dst), (src), (disp))
439 # define emit_xor_mr_r(pc, dst, disp, src) emit_op_r_mr(0x31, (pc), (src), (dst), (disp))
441 # define emit_and_r_i(pc, dst, imm) emit_op_r_i((pc), 0x83, 0x81, 0x4, (dst), (imm))
442 # define emit_and_mr_i(pc, dst, disp, imm) emit_op_mr_i((pc), 0x83, 0x81, 0x4, (dst), (disp), (imm))
443 # define emit_and_r_r(pc, dst, src) emit_op_r_r(0x23, (pc), (dst), (src))
444 # define emit_and_r_mr(pc, dst, src, disp) emit_op_r_mr(0x23, (pc), (dst), (src), (disp))
445 # define emit_and_mr_r(pc, dst, disp, src) emit_op_r_mr(0x21, (pc), (src), (dst), (disp))
447 # define emit_or_r_i(pc, dst, imm) emit_op_r_i((pc), 0x83, 0x81, 0x1, (dst), (imm))
448 # define emit_or_mr_i(pc, dst, disp, imm) emit_op_mr_i((pc), 0x83, 0x81, 0x1, (dst), (disp), (imm))
449 # define emit_or_r_r(pc, dst, src) emit_op_r_r(0x0b, (pc), (dst), (src))
450 # define emit_or_r_mr(pc, dst, src, disp) emit_op_r_mr(0x0b, (pc), (dst), (src), (disp))
451 # define emit_or_mr_r(pc, dst, disp, src) emit_op_r_mr(0x09, (pc), (src), (dst), (disp))
453 # define emit_imul_r_r(pc, dst, src) { \
454 emit_rex64((pc), (dst), (src)); \
455 *((pc)++) = (char) 0x0f; \
456 *((pc)++) = (char) 0xaf; \
457 emit_modrm((pc), b11, (dst), (src)); \
459 # define emit_imul_r_mr(pc, dst, src, disp) { \
460 emit_rex64((pc), (dst), (src)); \
461 *((pc)++) = (char) 0x0f; \
462 *((pc)++) = (char) 0xaf; \
464 emit_modrm((pc), b00, (dst), (src)); \
466 else if (is8bit(disp)) { \
467 emit_modrm((pc), b01, (dst), (src)); \
468 *((pc)++) = (char)(disp); \
471 emit_modrm((pc), b10, (dst), (src)); \
472 *(int *)(pc) = (int)(disp); \
477 # define emit_idiv_r_r(pc, dst, src) { \
478 emit_xor_r_r((pc), RDX, RDX); \
479 emit_test_r((pc), (src)); \
480 emit_div_check_zero(pc); \
481 emit_op_r_r(0xf7, (pc), 0x7, (src)); \
483 # define emit_idiv_r_mr(pc, dst, src, disp) { \
484 emit_xor_r_r((pc), RDX, RDX); \
485 emit_mov_r_mr((pc), ISR1, (src), (disp)); \
486 emit_test_r((pc), ISR1); \
487 emit_div_check_zero(pc); \
488 emit_op_r_mr(0xf7, (pc), 0x7, (src), (disp)); \
491 # define emit_abs_r(pc, reg) emit_and_r_i((pc), (reg), ~(1L << 63))
493 # define emit_neg_r(pc, reg) emit_op_r_r(0xf7, (pc), 0x2, (reg))
494 # define emit_not_r(pc, reg) emit_op_r_r(0xf7, (pc), 0x3, (reg))
495 # define emit_inc_r(pc, reg) emit_op_r_r(0xff, (pc), 0x0, (reg))
496 # define emit_dec_r(pc, reg) emit_op_r_r(0xff, (pc), 0x1, (reg))
498 /* This needs a fixup it seems... call_r doesn't */
499 # define emit_call_i(pc, imm) emit_op_i(0xe8, (pc), (imm))
500 # define emit_call_r(pc, reg) { \
501 emit_rex64((pc), 0x0, (reg)); \
502 *(pc)++ = (char)0xff; \
503 emit_modrm((pc), b11, 0x2, (reg)); }
505 # define emit_jmp_r_r(pc, reg1, reg2) { \
506 emit_rex((pc), (reg1), (reg2)); \
507 *((pc)++) = (char)0xff; \
508 emit_modrm((pc), b00, 0x4, b100); \
509 emit_sib((pc), b00, (reg1), (reg2)); \
512 # define emit_jmp_i(pc, imm) emit_op_i(0xe9, (pc), (imm))
513 # define emit_jmp_i_fixup(ji, imm) { \
515 opcode = jit_info->op_i + (imm); \
516 Parrot_jit_newfixup(jit_info); \
517 jit_info->arena.fixups->type = JIT_AMD64JUMP; \
518 jit_info->arena.fixups->param.opcode = opcode; \
519 if (jit_info->optimizer->cur_section->branch_target == \
520 jit_info->optimizer->cur_section) \
521 jit_info->arena.fixups->skip = \
522 jit_info->optimizer->cur_section->branch_target->load_size; \
523 emit_jmp_i(jit_info->native_ptr, 0xdead); \
526 # define emit_leave(pc) *((pc)++) = (char)0xc9;
527 # define emit_ret(pc) *((pc)++) = (char)0xc3;
529 # define emit_mov_r_r(pc, dst, src) \
530 emit_op_r_r(0x8B, (pc), (dst), (src))
532 /* mov [reg + offs], imm */
533 # define emit_mov_mr_i(pc, reg, offs, imm) {\
534 if (is32bit(imm)) { \
535 emit_rex64((pc), 0x0, (reg)); \
536 *((pc)++) = (char) 0xc7; \
538 emit_modrm((pc), b00, 0x0, (reg)); \
540 else if (is8bit(offs)) { \
541 emit_modrm((pc), b01, 0x0, (reg)); \
542 *((pc)++) = (char)(offs); \
545 emit_modrm((pc), b10, 0x0, (reg)); \
546 *(int *)(pc) = (int)(offs); \
549 *(int *)(pc) = (int)(imm); \
553 emit_mov_r_i((pc), ISR1, (imm)); \
554 emit_mov_mr_r((pc), (reg), (offs), ISR1); \
559 # define emit_mov_r_i(pc, reg, imm) {\
560 emit_op_r(0xb8, (pc), (reg)); \
561 *(long *)(pc) = (long)(imm); \
566 # define emit_push_r(pc, reg) emit_64op_r(0x50, (pc), (reg))
568 # define emit_pop_r(pc, reg) emit_64op_r(0x58, (pc), (reg))
571 # define emit_push_i(pc, imm) emit_op_i(0x68, (pc), (imm))
573 /* did you know, that (unsigned)0 is not an 8 bit value? */
574 # define is8bit(c) (((long)(c)) >= -128 && ((long)(c)) <= 127)
575 # define is32bit(c) (((long)(c)) >= -2147483648 && ((long)(c)) <= 2147483647)
577 # define emit_get_int_from_stack(pc, dst, disp) \
578 emit_mov_r_mr((pc), (dst), RBP, (disp))
580 # define emit_send_int_to_stack(pc, src, disp) \
581 emit_mov_mr_r((pc), RBP, (disp), (src))
583 /* mov dst, [src + disp] */
584 # define emit_mov_r_mr(pc, dst, src, disp) \
585 emit_op_r_mr(0x8b, (pc), (dst), (src), (disp))
586 /* mov [dst + disp], src */
587 # define emit_mov_mr_r(pc, dst, disp, src) \
588 emit_op_r_mr(0x89, (pc), (src), (dst), (disp))
590 /* lea dst, [src + disp] */
591 # define emit_lea_r_mr(pc, dst, src, disp) \
592 emit_op_r_mr(0x8d, (pc), (src), (dst), (disp))
596 /* move rsp to rbp; set rbp to rsp */
597 # define jit_emit_stack_frame_enter(pc) { \
598 emit_push_r((pc), RBP); \
599 emit_mov_r_r((pc), RBP, RSP); \
603 # define jit_emit_stack_frame_leave(pc) { \
604 emit_pop_r((pc), RBP); \
607 # define emit_jcc(pc, code, disp) { \
608 if (is8bit(disp)) { \
609 *((pc)++) = (char) 0x70 | (code); \
610 *((pc)++) = (char) (disp); \
613 *((pc)++) = (char) 0x0f; \
614 *((pc)++) = (char) 0x80 | (code); \
615 *(int *)(pc) = (int)(disp); \
620 # define emit_jcc_fixup(ji, code, imm) { \
622 opcode = (ji)->op_i + (imm); \
623 Parrot_jit_newfixup(ji); \
624 (ji)->arena.fixups->type = JIT_AMD64BRANCH; \
625 (ji)->arena.fixups->param.opcode = opcode; \
626 if ((ji)->optimizer->cur_section->branch_target == \
627 (ji)->optimizer->cur_section) \
628 (ji)->arena.fixups->skip = \
629 (ji)->optimizer->cur_section->branch_target->load_size; \
630 emit_jcc((ji)->native_ptr, (code), 0xdead); \
634 jcc_jo
, /* Jump if overflow */
635 jcc_jno
, /* Jump if not overflow */
636 jcc_jb
, /* Jump if below */
637 jcc_jc
= jcc_jb
, /* Jump if carry */
638 jcc_jnae
= jcc_jb
, /* Jump if not above or equal */
639 jcc_jnb
, /* Jump if not below */
640 jcc_jnc
= jcc_jnb
, /* Jump if not carry */
641 jcc_jae
= jcc_jnb
, /* Jump if above or equal */
642 jcc_jz
, /* Jump if zero */
643 jcc_je
= jcc_jz
, /* Jump if equal */
644 jcc_jnz
, /* Jump if not zero */
645 jcc_jne
= jcc_jnz
, /* Jump if not equal */
646 jcc_jbe
, /* Jump if below or equal */
647 jcc_jna
= jcc_jbe
, /* Jump if not above */
648 jcc_jnbe
, /* Jump if not below or equal */
649 jcc_ja
= jcc_jnbe
, /* Jump if above */
650 jcc_js
, /* Jump if sign */
651 jcc_jns
, /* Jump if not sign */
652 jcc_jp
, /* Jump if parity */
653 jcc_jpe
= jcc_jp
, /* Jump if parity even */
654 jcc_jnp
, /* Jump if not parity */
655 jcc_jpo
= jcc_jnp
, /* Jump if parity odd */
656 jcc_jl
, /* Jump if less */
657 jcc_jnge
= jcc_jl
, /* Jump if not greater or equal */
658 jcc_jnl
, /* Jump if not less */
659 jcc_jge
= jcc_jnl
, /* Jump if greater or equal */
660 jcc_jle
, /* Jump if less or equal */
661 jcc_jng
= jcc_jle
, /* Jump if not greater */
662 jcc_jnle
, /* Jump if not less or equal */
663 jcc_jg
= jcc_jnle
/* Jump if greater */
666 # define emit_test_r(pc, reg) \
667 emit_op_r_r(0x85, (pc), (reg), (reg))
669 # define emit_test_r_r(pc, dst, src) \
670 emit_op_r_r(0x85, (pc), (src), (dst))
679 # define jit_emit_end(pc) { \
680 emit_pop_r((pc), R15); \
681 emit_pop_r((pc), R14); \
682 emit_pop_r((pc), R13); \
683 emit_pop_r((pc), R12); \
684 emit_pop_r((pc), RBX); \
685 emit_pop_r((pc), RBP); \
691 /**************************************
693 **************************************/
695 # define emit_op_x_x(prefix, op, pc, dst, src) { \
696 *((pc)++) = (char) (prefix); \
697 emit_rex((pc), (dst), (src)); \
698 *((pc)++) = (char) 0x0f; \
699 *((pc)++) = (char) (op); \
700 emit_modrm((pc), b11, (dst), (src)); \
703 # define emit_op64_x_x(prefix, op, pc, dst, src) { \
704 *((pc)++) = (char) (prefix); \
705 emit_rex64((pc), (dst), (src)); \
706 *((pc)++) = (char) 0x0f; \
707 *((pc)++) = (char) (op); \
708 emit_modrm((pc), b11, (dst), (src)); \
711 # define emit_op_x_mx(prefix, op, pc, dst, src, offs) { \
712 *((pc)++) = (char) (prefix); \
713 emit_rex((pc), (dst), (src)); \
714 *((pc)++) = (char) 0x0f; \
715 *((pc)++) = (char) (op); \
717 emit_modrm((pc), b00, (dst), (src)); \
719 else if (is8bit(offs)) { \
720 emit_modrm((pc), b01, (dst), (src)); \
721 *((pc)++) = (char)(long)(offs); \
724 emit_modrm((pc), b10, (dst), (src)); \
725 *(int *)(pc) = (int)(long)(offs); \
730 # define emit_op64_x_mx(prefix, op, pc, dst, src, offs) { \
731 *((pc)++) = (char) (prefix); \
732 emit_rex64((pc), (dst), (src)); \
733 *((pc)++) = (char) 0x0f; \
734 *((pc)++) = (char) (op); \
735 if ((offs) == 0 || (src) == RBP) { \
736 emit_modrm((pc), b00, (dst), (src)); \
738 else if (is8bit(offs)) { \
739 emit_modrm((pc), b01, (dst), (src)); \
740 *((pc)++) = (char)(long)(offs); \
743 emit_modrm((pc), b10, (dst), (src)); \
744 *(int *)(pc) = (int)(long)(offs); \
749 # define emit_mov_x_x(pc, dst, src) emit_op_x_x(0x66, 0x28, (pc), (dst), (src))
751 # define emit_mov_x_mx(pc, dst, src, offs) emit_op_x_mx(0xf2, 0x10, (pc), (dst), (src), (offs))
752 # define emit_mov_mx_x(pc, dst, offs, src) emit_op_x_mx(0xf2, 0x11, (pc), (src), (dst), (offs))
754 /* Intended to zero a register */
755 # define emit_movhlps_x_x(pc, dst, src) { \
756 emit_rex((pc), (src), (dst)); \
757 *((pc)++) = (char) 0x0f; \
758 *((pc)++) = (char) 0x12; \
759 emit_modrm((pc), b11, (src), (dst)); \
762 # define emit_movlhps_x_x(pc, dst, src) { \
763 emit_rex((pc), (src), (dst)); \
764 *((pc)++) = (char) 0x0f; \
765 *((pc)++) = (char) 0x16; \
766 emit_modrm((pc), b11, (src), (dst)); \
769 # define emit_movd_r_x(pc, dst, src) { \
770 *((pc)++) = (char) 0x66; \
771 emit_rex64((pc), (dst), (src)); \
772 *((pc)++) = (char) 0x0f; \
773 *((pc)++) = (char) 0x7e; \
774 emit_modrm((pc), b11, (dst), (src)); \
777 # define emit_movd_x_r(pc, dst, src) { \
778 *((pc)++) = (char) 0x66; \
779 emit_rex64((pc), (dst), (src)); \
780 *((pc)++) = (char) 0x0f; \
781 *((pc)++) = (char) 0x6e; \
782 emit_modrm((pc), b11, (dst), (src)); \
785 # define emit_test_x(pc, reg) { \
786 emit_xor_x_x((pc), FSR2, FSR2); \
787 emit_comisd_x_x((pc), (reg), FSR2); \
790 # define emit_comisd_x_x(pc, dst, src) emit_op_x_x(0x66, 0x2f, (pc), (dst), (src))
791 # define emit_comisd_x_mx(pc, dst, src, offs) emit_op_x_mx(0x66, 0x2f, (pc), (dst), (src), (offs))
793 # define emit_add_x_x(pc, dst, src) emit_op_x_x(0xf2, 0x58, (pc), (dst), (src))
794 # define emit_add_x_mx(pc, dst, src, offs) emit_op_x_mx(0xf2, 0x58, (pc), (dst), (src), (offs))
796 # define emit_sub_x_x(pc, dst, src) emit_op_x_x(0xf2, 0x5c, (pc), (dst), (src))
797 # define emit_sub_x_mx(pc, dst, src, offs) emit_op_x_mx(0xf2, 0x5c, (pc), (dst), (src), (offs))
799 # define emit_and_x_x(pc, dst, src) emit_op_x_x(0x66, 0x54, (pc), (dst), (src))
800 # define emit_and_x_mx(pc, dst, src, offs) emit_op_x_mx(0x66, 0x54, (pc), (dst), (src), (offs))
802 # define emit_xor_x_x(pc, dst, src) emit_op_x_x(0x66, 0x57, (pc), (dst), (src))
803 # define emit_xor_x_mx(pc, dst, src, offs) emit_op_x_mx(0x66, 0x57, (pc), (dst), (src), (offs))
805 # define emit_mul_x_x(pc, dst, src) emit_op_x_x(0xf2, 0x59, (pc), (dst), (src))
806 # define emit_mul_x_mx(pc, dst, src, offs) emit_op_x_mx(0xf2, 0x59, (pc), (dst), (src), (offs))
808 /* I tried to do a check for FSR1 == dst, such as from core.jit, but the bugs
809 * to track down, the hackish things to do(movhlpd and movlhpd were used, but I
810 * gave up and settled on the cleaner and likely faster overall method of
811 * getting an FSR2 and going down to 14 mapped registers.
813 # define emit_div_x_x(pc, dst, src) { \
814 emit_movhlps_x_x((pc), FSR2, FSR2); \
815 emit_comisd_x_x((pc), FSR2, (src)); \
816 emit_div_check_zero((pc)); \
817 emit_op_x_x(0xf2, 0x5e, (pc), (dst), (src)); \
819 # define emit_div_x_mx(pc, dst, src, offs) { \
820 emit_movhlps_x_x((pc), FSR2, FSR2); \
821 emit_comisd_x_mx((pc), FSR2, (src), (offs)); \
822 emit_div_check_zero((pc)); \
823 emit_op_x_mx(0xf2, 0x5e, (pc), (dst), (src), (offs)); \
826 # define emit_sqrt_x_x(pc, dst, src) emit_op_x_x(0xf2, 0x51, (pc), (dst), (src))
827 # define emit_sqrt_x_mx(pc, dst, src, offs) emit_op_x_mx(0xf2, 0x51, (pc), (dst), (src), (offs))
829 # define emit_cvtsi2sd_x_mr(pc, dst, src, offs) emit_op64_x_mx(0xf2, 0x2a, (pc), (dst), (src), (offs))
830 # define emit_cvtsi2sd_x_r(pc, dst, src) emit_op64_x_x(0xf2, 0x2a, (pc), (dst), (src))
833 # define emit_cvttsd2si_r_mx(pc, dst, src, offs) emit_op64_x_mx(0xf2, 0x2c, (pc), (dst), (src), (offs))
834 # define emit_cvttsd2si_r_x(pc, dst, src) emit_op64_x_x(0xf2, 0x2c, (pc), (dst), (src))
837 # define emit_cvtsd2si_r_mx(pc, dst, src, offs) emit_op64_x_mx(0xf2, 0x2d, (pc), (dst), (src), (offs))
838 # define emit_cvtsd2si_r_x(pc, dst, src) emit_op64_x_x(0xf2, 0x2d, (pc), (dst), (src))
840 # define emit_ldmxcsr(pc) { \
841 emit_xor_r_r((pc), ISR2, ISR2); \
842 *((pc)++) = (char) 0x0f; \
843 *((pc)++) = (char) 0xAE; \
844 emit_modrm((pc), b10, 0x2, ISR2); \
845 *(int *)(pc) = (int)&mxcsr; \
848 /*********************************************************/
852 # ifdef USE_OP_MAP_AND_CODE_START
853 /* These two can be mixed together just like in the i386 jit. All the places I
854 * can see this being called require it to be included, but for the moment I'm
855 * keeping it as these macros. */
857 * emit code that gets interp->code->jit_info->arena->op_map
858 * and sets the OP_MAP register
860 # define jit_emit_load_op_map(pc) { \
861 emit_mov_r_mr((pc), OP_MAP, INTERP, (long)offsetof(Interp, code)); \
862 emit_mov_r_mr((pc), OP_MAP, OP_MAP, (long)offsetof(PackFile_ByteCode, jit_info)); \
863 emit_lea_r_mr((pc), OP_MAP, OP_MAP, (long)offsetof(Parrot_jit_info_t, arena)); \
864 emit_mov_r_mr((pc), OP_MAP, OP_MAP, (long)offsetof(Parrot_jit_arena_t, op_map)); \
868 * emit code that gets interp->code->base.data
869 * and sets the CODE_START register
871 # define jit_emit_load_code_start(pc) { \
872 emit_mov_r_mr((pc), CODE_START, INTERP, (long)offsetof(Interp, code)); \
873 emit_mov_r_mr((pc), CODE_START, CODE_START, (long)offsetof(PackFile_Segment, data)); \
876 # endif /* USE_OP_MAP_AND_CODE_START */
879 * emit code that calls a Parrot opcode function
881 static void call_func(Parrot_jit_info_t
*jit_info
, void *addr
) {
882 if ((long)addr
> (long)INT_MAX
) {
883 /* Move the address into our scratch register R11
884 * We cannot use just the immediate form of call because the address
885 * will be too large if we're using a shared parrot, but will be ok on
888 * This will most likely only be used on shared libraries.
890 /* Absolute near call to R11 */
891 emit_mov_r_i(jit_info
->native_ptr
, R11
, addr
);
892 emit_call_r(jit_info
->native_ptr
, R11
);
895 /* Call with an immediate value. Mainly for a static parrot, and
897 Parrot_jit_newfixup(jit_info
);
898 jit_info
->arena
.fixups
->type
= JIT_AMD64CALL
;
899 jit_info
->arena
.fixups
->param
.fptr
= D2FPTR(addr
);
900 emit_call_i(jit_info
->native_ptr
, 0xdead);
904 /* Jump to RAX, which needs to be set before calling this */
906 Parrot_emit_jump_to_rax(Parrot_jit_info_t
*jit_info
, Interp
*interp
)
908 if (!jit_info
->objfile
) {
909 # ifdef USE_OP_MAP_AND_CODE_START
910 /* Get interp->code->base.data */
911 jit_emit_load_code_start(jit_info
->native_ptr
);
912 emit_sub_r_r(jit_info
->native_ptr
, RAX
, CODE_START
);
914 /* Get interp->code->jit_info->arena->op_map */
915 jit_emit_load_op_map(jit_info
->native_ptr
);
917 /* emit code that gets interp->code->base.data */
918 emit_mov_r_mr(jit_info
->native_ptr
, RCX
, INTERP
, (long)offsetof(Interp
, code
));
919 emit_mov_r_mr(jit_info
->native_ptr
, RDX
, RCX
, (long)offsetof(PackFile_Segment
, data
));
920 emit_sub_r_r(jit_info
->native_ptr
, RAX
, RDX
);
922 /* Reuse interp->code in RCX, get interp->code->jit_info->arena->op_map */
923 emit_mov_r_mr(jit_info
->native_ptr
, RDX
, RCX
, (long)offsetof(PackFile_ByteCode
, jit_info
));
924 emit_lea_r_mr(jit_info
->native_ptr
, RDX
, RDX
, (long)offsetof(Parrot_jit_info_t
, arena
));
925 emit_mov_r_mr(jit_info
->native_ptr
, RDX
, RDX
, (long)offsetof(Parrot_jit_arena_t
, op_map
));
929 emit_mov_r_mr(jit_info
->native_ptr
, RBX
, INTERP
, (long)offsetof(Interp
, ctx
.bp
));
931 # ifdef USE_OP_MAP_AND_CODE_START
932 emit_jmp_r_r(jit_info
->native_ptr
, RAX
, OP_MAP
);
934 emit_jmp_r_r(jit_info
->native_ptr
, RAX
, RDX
);
939 #endif /* JIT_EMIT */
944 * emit code that calls a core.ops function from src/core_ops.c,
945 * the generated code is the translation of this:
947 * PC = ((INTERP->op_func_table)[*PC])(PC,INTERP)
951 extern int jit_op_count(void);
954 Parrot_jit_normal_op(Parrot_jit_info_t
*jit_info
,
957 int cur_op
= *jit_info
->cur_op
;
960 if (cur_op
>= jit_op_count()) {
961 cur_op
= CORE_OPS_wrapper__
;
964 /* check every eight ops, could be changed to 16, or 32, or ... */
965 if ((++check
& 0x7) == 0) {
966 emit_mov_r_i(jit_info
->native_ptr
, RDI
, jit_info
->cur_op
);
967 emit_mov_r_r(jit_info
->native_ptr
, RSI
, INTERP
);
968 call_func(jit_info
, (void (*)(void))interp
->op_func_table
[CORE_OPS_check_events
]);
971 emit_mov_r_i(jit_info
->native_ptr
, RDI
, jit_info
->cur_op
);
972 emit_mov_r_r(jit_info
->native_ptr
, RSI
, INTERP
);
974 call_func(jit_info
, (void (*)(void))interp
->op_func_table
[cur_op
]);
979 * emit code for a branching parrot opcode. All cached registers
980 * need recalculation, as a branch can go into different code segments
981 * with different code start and different jit_info
984 Parrot_jit_cpcf_op(Parrot_jit_info_t
*jit_info
,
987 Parrot_jit_normal_op(jit_info
, interp
);
988 Parrot_emit_jump_to_rax(jit_info
, interp
);
992 * release stack frame end exit see core.jit
994 static void Parrot_end_jit(Parrot_jit_info_t
*, Interp
*);
996 # undef Parrot_jit_restart_op
998 * emit code that might leave the JIT runcore
1002 Parrot_jit_restart_op(Parrot_jit_info_t
*jit_info
,
1007 Parrot_jit_normal_op(jit_info
, interp
);
1008 emit_test_r(jit_info
->native_ptr
, RAX
);
1010 /* Quick fixup, but we know it's 12, anyway it needs to be a byte */
1011 emit_jcc(jit_info
->native_ptr
, jcc_jnz
, 0x00);
1012 sav_ptr
= (char *)(jit_info
->native_ptr
- 1);
1013 Parrot_end_jit(jit_info
, interp
);
1014 *sav_ptr
= (char)(jit_info
->native_ptr
- sav_ptr
- 1);
1016 Parrot_emit_jump_to_rax(jit_info
, interp
);
1019 #endif /* JIT_EMIT == 2 */
1023 # define REQUIRES_CONSTANT_POOL 0
1026 * emit stack frame according to ABI
1027 * preserve mapped registers according to ABI
1028 * load INTERP, OP_MAP, CODE_START, BP registers
1029 * then run the code at pc
1031 * the function is called as
1032 * runops(interp, pc)
1037 Parrot_jit_begin(Parrot_jit_info_t
*jit_info
,
1040 jit_emit_stack_frame_enter(jit_info
->native_ptr
);
1042 /* Saved registers */
1049 emit_push_r(jit_info
->native_ptr
, RBP
);
1050 emit_push_r(jit_info
->native_ptr
, RBX
);
1051 emit_push_r(jit_info
->native_ptr
, R12
);
1052 emit_push_r(jit_info
->native_ptr
, R13
);
1053 emit_push_r(jit_info
->native_ptr
, R14
);
1054 emit_push_r(jit_info
->native_ptr
, R15
);
1055 /* When our "function" gets called, RDI will be the interp,
1056 * and RSI will be jit_info->native_ptr */
1057 /* Set R15 to interp */
1059 /* emit_ldmxcsr(jit_info->native_ptr); */
1060 emit_mov_r_r(jit_info
->native_ptr
, R15
, RDI
);
1061 emit_mov_r_r(jit_info
->native_ptr
, RAX
, RSI
);
1063 Parrot_emit_jump_to_rax(jit_info
, interp
);
1069 * fix up all emitted branches
1073 Parrot_jit_dofixup(Parrot_jit_info_t
*jit_info
,
1076 Parrot_jit_fixup_t
*fixup
, *next
;
1079 fixup
= jit_info
->arena
.fixups
;
1082 switch (fixup
->type
) {
1083 case JIT_AMD64BRANCH
:
1084 fixup_ptr
= Parrot_jit_fixup_target(jit_info
, fixup
) + 2;
1085 *(int *)(fixup_ptr
) =
1086 jit_info
->arena
.op_map
[fixup
->param
.opcode
].offset
1087 - (fixup
->native_offset
+ 6) + fixup
->skip
;
1090 fixup_ptr
= Parrot_jit_fixup_target(jit_info
, fixup
) + 1;
1091 *(int *)(fixup_ptr
) =
1092 jit_info
->arena
.op_map
[fixup
->param
.opcode
].offset
1093 - (fixup
->native_offset
+ 5) + fixup
->skip
;
1096 fixup_ptr
= jit_info
->arena
.start
+ fixup
->native_offset
+ 1;
1097 *(int *)(fixup_ptr
) = (int)(long)fixup
->param
.fptr
-
1098 (int)(long)fixup_ptr
- 4;
1101 real_exception(interp
, NULL
, JIT_ERROR
,
1102 "Unknown fixup type: %d\n", fixup
->type
);
1108 jit_info
->arena
.fixups
= NULL
;
1112 Parrot_jit_begin_sub(Parrot_jit_info_t
*jit_info
,
1115 /* NOT CALLED CURRENTLY */
1121 * define interface functions for register -> parrot register moves
1125 /* set mem to reg */
1127 jit_mov_mx_x(Interp
*interp
, Parrot_jit_info_t
*jit_info
,
1128 int base_reg
, INTVAL offs
, int src_reg
)
1130 emit_mov_mx_x(jit_info
->native_ptr
, base_reg
, offs
, src_reg
);
1134 jit_mov_mr_r(Interp
*interp
, Parrot_jit_info_t
*jit_info
,
1135 int base_reg
, INTVAL offs
, int src_reg
)
1137 emit_mov_mr_r(jit_info
->native_ptr
, base_reg
, offs
, src_reg
);
1140 /* set reg to mem */
1142 jit_mov_x_mx(Interp
*interp
, Parrot_jit_info_t
*jit_info
,
1143 int dst_reg
, int base_reg
, INTVAL offs
)
1145 emit_mov_x_mx(jit_info
->native_ptr
, dst_reg
, base_reg
, offs
);
1149 jit_mov_r_mr(Interp
*interp
, Parrot_jit_info_t
*jit_info
,
1150 int dst_reg
, int base_reg
, INTVAL offs
)
1152 emit_mov_r_mr(jit_info
->native_ptr
, dst_reg
, base_reg
, offs
);
1156 * define how many int and float registers can be used by the
1160 # define INT_REGISTERS_TO_MAP 10
1161 # define FLOAT_REGISTERS_TO_MAP 15
1164 * enumerate these mapped registers
1165 * please note that you have to preserve registers in
1166 * Parrot_jit_begin according to the ABI of the architecture
1169 static const char intval_map
[INT_REGISTERS_TO_MAP
] =
1172 /* Preserved, we'd have more, but keeping code_start, op_map, interp,
1173 * and the base pointer in registers takes away four, not to mention
1174 * RBP which is used for easier debugging. That's five registers used
1175 * for one reason or another at the moment. I'm not sure if it's worth
1179 * RDX for idiv, TODO: handle corner cases, i.e. steal i386 code
1180 * RBX for Interp->ctx.bp
1181 * RBP for debugging, can add it to the preserved list
1183 * R13 for CODE_START
1188 # ifndef USE_OP_MAP_AND_CODE_START
1192 RCX
, RSI
, RDI
, R8
, R9
, R10
, RDX
1195 static const char floatval_map
[FLOAT_REGISTERS_TO_MAP
] =
1197 XMM2
, XMM3
, XMM4
, XMM5
, XMM6
, XMM7
,
1198 XMM8
, XMM9
, XMM10
, XMM11
, XMM12
, XMM13
, XMM14
, XMM15
1203 * define arch specific details in jit_arch_info
1206 static const jit_arch_info arch_info
= {
1207 /* CPU <- Parrot reg move functions */
1210 /* Parrot <- CPU reg move functions */
1218 Parrot_jit_begin
, /* emit code prologue */
1219 # ifdef USE_OP_MAP_AND_CODE_START
1224 3, /* preserved int */
1226 intval_map
, /* which ints mapped */
1227 14, /* mapped float */
1228 0, /* preserved float */
1229 floatval_map
/* which floats mapped */
1233 Parrot_jit_begin_sub
,
1241 /* JIT_CODE_SUB_REGS_ONLY */
1243 /*Parrot_jit_begin_sub_regs*/0, /* emit code prologue */
1254 const jit_arch_info
*
1255 Parrot_jit_init(Interp
*interp
)
1261 #endif /* JIT_EMIT == 0 */
1262 #endif /* PARROT_JIT_AMD64_JIT_EMIT_H_GUARD */
1274 * c-file-style: "parrot"
1276 * vim: expandtab shiftwidth=4: