1 // This file is a fragment of the yjit.o compilation unit. See yjit.c.
8 #include "yjit_iface.h"
10 #include "yjit_codegen.h"
12 // For exiting from YJIT frame from branch_stub_hit().
13 // Filled by gen_code_for_exit_from_stub().
14 static uint8_t *code_for_exit_from_stub
= NULL
;
17 Get an operand for the adjusted stack pointer address
20 ctx_sp_opnd(ctx_t
*ctx
, int32_t offset_bytes
)
22 int32_t offset
= (ctx
->sp_offset
* sizeof(VALUE
)) + offset_bytes
;
23 return mem_opnd(64, REG_SP
, offset
);
27 Push one new value on the temp stack with an explicit mapping
28 Return a pointer to the new stack top
31 ctx_stack_push_mapping(ctx_t
*ctx
, temp_type_mapping_t mapping
)
33 // If type propagation is disabled, store no types
34 if (rb_yjit_opts
.no_type_prop
) {
35 mapping
.type
= TYPE_UNKNOWN
;
38 // Keep track of the type and mapping of the value
39 if (ctx
->stack_size
< MAX_TEMP_TYPES
) {
40 ctx
->temp_mapping
[ctx
->stack_size
] = mapping
.mapping
;
41 ctx
->temp_types
[ctx
->stack_size
] = mapping
.type
;
43 RUBY_ASSERT(mapping
.mapping
.kind
!= TEMP_LOCAL
|| mapping
.mapping
.idx
< MAX_LOCAL_TYPES
);
44 RUBY_ASSERT(mapping
.mapping
.kind
!= TEMP_STACK
|| mapping
.mapping
.idx
== 0);
45 RUBY_ASSERT(mapping
.mapping
.kind
!= TEMP_SELF
|| mapping
.mapping
.idx
== 0);
51 // SP points just above the topmost value
52 int32_t offset
= (ctx
->sp_offset
- 1) * sizeof(VALUE
);
53 return mem_opnd(64, REG_SP
, offset
);
58 Push one new value on the temp stack
59 Return a pointer to the new stack top
62 ctx_stack_push(ctx_t
*ctx
, val_type_t type
)
64 temp_type_mapping_t mapping
= { MAP_STACK
, type
};
65 return ctx_stack_push_mapping(ctx
, mapping
);
69 Push the self value on the stack
72 ctx_stack_push_self(ctx_t
*ctx
)
74 temp_type_mapping_t mapping
= { MAP_SELF
, TYPE_UNKNOWN
};
75 return ctx_stack_push_mapping(ctx
, mapping
);
79 Push a local variable on the stack
82 ctx_stack_push_local(ctx_t
*ctx
, size_t local_idx
)
84 if (local_idx
>= MAX_LOCAL_TYPES
) {
85 return ctx_stack_push(ctx
, TYPE_UNKNOWN
);
88 temp_type_mapping_t mapping
= {
89 (temp_mapping_t
){ .kind
= TEMP_LOCAL
, .idx
= local_idx
},
93 return ctx_stack_push_mapping(ctx
, mapping
);
97 Pop N values off the stack
98 Return a pointer to the stack top before the pop operation
101 ctx_stack_pop(ctx_t
*ctx
, size_t n
)
103 RUBY_ASSERT(n
<= ctx
->stack_size
);
105 // SP points just above the topmost value
106 int32_t offset
= (ctx
->sp_offset
- 1) * sizeof(VALUE
);
107 x86opnd_t top
= mem_opnd(64, REG_SP
, offset
);
109 // Clear the types of the popped values
110 for (size_t i
= 0; i
< n
; ++i
)
112 size_t idx
= ctx
->stack_size
- i
- 1;
113 if (idx
< MAX_TEMP_TYPES
) {
114 ctx
->temp_types
[idx
] = TYPE_UNKNOWN
;
115 ctx
->temp_mapping
[idx
] = MAP_STACK
;
119 ctx
->stack_size
-= n
;
126 Get an operand pointing to a slot on the temp stack
129 ctx_stack_opnd(ctx_t
*ctx
, int32_t idx
)
131 // SP points just above the topmost value
132 int32_t offset
= (ctx
->sp_offset
- 1 - idx
) * sizeof(VALUE
);
133 x86opnd_t opnd
= mem_opnd(64, REG_SP
, offset
);
139 Get the type of an instruction operand
142 ctx_get_opnd_type(const ctx_t
*ctx
, insn_opnd_t opnd
)
145 return ctx
->self_type
;
147 RUBY_ASSERT(opnd
.idx
< ctx
->stack_size
);
148 int stack_idx
= ctx
->stack_size
- 1 - opnd
.idx
;
150 // If outside of tracked range, do nothing
151 if (stack_idx
>= MAX_TEMP_TYPES
)
154 temp_mapping_t mapping
= ctx
->temp_mapping
[stack_idx
];
156 switch (mapping
.kind
) {
158 return ctx
->self_type
;
161 return ctx
->temp_types
[ctx
->stack_size
- 1 - opnd
.idx
];
164 RUBY_ASSERT(mapping
.idx
< MAX_LOCAL_TYPES
);
165 return ctx
->local_types
[mapping
.idx
];
168 rb_bug("unreachable");
171 static int type_diff(val_type_t src
, val_type_t dst
);
173 #define UPGRADE_TYPE(dest, src) do { \
174 RUBY_ASSERT(type_diff((src), (dest)) != INT_MAX); \
179 Upgrade (or "learn") the type of an instruction operand
180 This value must be compatible and at least as specific as the previously known type.
181 If this value originated from self, or an lvar, the learned type will be
182 propagated back to its source.
185 ctx_upgrade_opnd_type(ctx_t
*ctx
, insn_opnd_t opnd
, val_type_t type
)
187 // If type propagation is disabled, store no types
188 if (rb_yjit_opts
.no_type_prop
)
192 UPGRADE_TYPE(ctx
->self_type
, type
);
196 RUBY_ASSERT(opnd
.idx
< ctx
->stack_size
);
197 int stack_idx
= ctx
->stack_size
- 1 - opnd
.idx
;
199 // If outside of tracked range, do nothing
200 if (stack_idx
>= MAX_TEMP_TYPES
)
203 temp_mapping_t mapping
= ctx
->temp_mapping
[stack_idx
];
205 switch (mapping
.kind
) {
207 UPGRADE_TYPE(ctx
->self_type
, type
);
211 UPGRADE_TYPE(ctx
->temp_types
[stack_idx
], type
);
215 RUBY_ASSERT(mapping
.idx
< MAX_LOCAL_TYPES
);
216 UPGRADE_TYPE(ctx
->local_types
[mapping
.idx
], type
);
222 Get both the type and mapping (where the value originates) of an operand.
223 This is can be used with ctx_stack_push_mapping or ctx_set_opnd_mapping to copy
224 a stack value's type while maintaining the mapping.
226 static temp_type_mapping_t
227 ctx_get_opnd_mapping(const ctx_t
*ctx
, insn_opnd_t opnd
)
229 temp_type_mapping_t type_mapping
;
230 type_mapping
.type
= ctx_get_opnd_type(ctx
, opnd
);
233 type_mapping
.mapping
= MAP_SELF
;
237 RUBY_ASSERT(opnd
.idx
< ctx
->stack_size
);
238 int stack_idx
= ctx
->stack_size
- 1 - opnd
.idx
;
240 if (stack_idx
< MAX_TEMP_TYPES
) {
241 type_mapping
.mapping
= ctx
->temp_mapping
[stack_idx
];
244 // We can't know the source of this stack operand, so we assume it is
245 // a stack-only temporary. type will be UNKNOWN
246 RUBY_ASSERT(type_mapping
.type
.type
== ETYPE_UNKNOWN
);
247 type_mapping
.mapping
= MAP_STACK
;
254 Overwrite both the type and mapping of a stack operand.
257 ctx_set_opnd_mapping(ctx_t
*ctx
, insn_opnd_t opnd
, temp_type_mapping_t type_mapping
)
259 // self is always MAP_SELF
260 RUBY_ASSERT(!opnd
.is_self
);
262 RUBY_ASSERT(opnd
.idx
< ctx
->stack_size
);
263 int stack_idx
= ctx
->stack_size
- 1 - opnd
.idx
;
265 // If type propagation is disabled, store no types
266 if (rb_yjit_opts
.no_type_prop
)
269 // If outside of tracked range, do nothing
270 if (stack_idx
>= MAX_TEMP_TYPES
)
273 ctx
->temp_mapping
[stack_idx
] = type_mapping
.mapping
;
275 // Only used when mapping == MAP_STACK
276 ctx
->temp_types
[stack_idx
] = type_mapping
.type
;
280 Set the type of a local variable
283 ctx_set_local_type(ctx_t
*ctx
, size_t idx
, val_type_t type
)
285 // If type propagation is disabled, store no types
286 if (rb_yjit_opts
.no_type_prop
)
289 if (idx
>= MAX_LOCAL_TYPES
)
292 // If any values on the stack map to this local we must detach them
293 for (int i
= 0; i
< MAX_TEMP_TYPES
; i
++) {
294 temp_mapping_t
*mapping
= &ctx
->temp_mapping
[i
];
295 if (mapping
->kind
== TEMP_LOCAL
&& mapping
->idx
== idx
) {
296 ctx
->temp_types
[i
] = ctx
->local_types
[mapping
->idx
];
297 *mapping
= MAP_STACK
;
301 ctx
->local_types
[idx
] = type
;
304 // Erase local variable type information
305 // eg: because of a call we can't track
307 ctx_clear_local_types(ctx_t
*ctx
)
309 // When clearing local types we must detach any stack mappings to those
310 // locals. Even if local values may have changed, stack values will not.
311 for (int i
= 0; i
< MAX_TEMP_TYPES
; i
++) {
312 temp_mapping_t
*mapping
= &ctx
->temp_mapping
[i
];
313 if (mapping
->kind
== TEMP_LOCAL
) {
314 RUBY_ASSERT(mapping
->idx
< MAX_LOCAL_TYPES
);
315 ctx
->temp_types
[i
] = ctx
->local_types
[mapping
->idx
];
316 *mapping
= MAP_STACK
;
318 RUBY_ASSERT(mapping
->kind
== TEMP_STACK
|| mapping
->kind
== TEMP_SELF
);
320 memset(&ctx
->local_types
, 0, sizeof(ctx
->local_types
));
324 /* This returns an appropriate val_type_t based on a known value */
326 yjit_type_of_value(VALUE val
)
328 if (SPECIAL_CONST_P(val
)) {
332 else if (NIL_P(val
)) {
335 else if (val
== Qtrue
) {
338 else if (val
== Qfalse
) {
341 else if (STATIC_SYM_P(val
)) {
342 return TYPE_STATIC_SYMBOL
;
344 else if (FLONUM_P(val
)) {
349 UNREACHABLE_RETURN(TYPE_IMM
);
353 switch (BUILTIN_TYPE(val
)) {
361 // generic heap object
367 /* The name of a type, for debugging */
368 RBIMPL_ATTR_MAYBE_UNUSED()
370 yjit_type_name(val_type_t type
)
372 RUBY_ASSERT(!(type
.is_imm
&& type
.is_heap
));
377 return "unknown immediate";
379 else if (type
.is_heap
) {
380 return "unknown heap";
405 UNREACHABLE_RETURN("");
409 Compute a difference between two value types
410 Returns 0 if the two are the same
411 Returns > 0 if different but compatible
412 Returns INT_MAX if incompatible
415 type_diff(val_type_t src
, val_type_t dst
)
417 RUBY_ASSERT(!src
.is_heap
|| !src
.is_imm
);
418 RUBY_ASSERT(!dst
.is_heap
|| !dst
.is_imm
);
420 // If dst assumes heap but src doesn't
421 if (dst
.is_heap
&& !src
.is_heap
)
424 // If dst assumes imm but src doesn't
425 if (dst
.is_imm
&& !src
.is_imm
)
428 // If dst assumes known type different from src
429 if (dst
.type
!= ETYPE_UNKNOWN
&& dst
.type
!= src
.type
)
432 if (dst
.is_heap
!= src
.is_heap
)
435 if (dst
.is_imm
!= src
.is_imm
)
438 if (dst
.type
!= src
.type
)
445 Compute a difference score for two context objects
446 Returns 0 if the two contexts are the same
447 Returns > 0 if different but compatible
448 Returns INT_MAX if incompatible
451 ctx_diff(const ctx_t
*src
, const ctx_t
*dst
)
453 // Can only lookup the first version in the chain
454 if (dst
->chain_depth
!= 0)
457 // Blocks with depth > 0 always produce new versions
458 // Sidechains cannot overlap
459 if (src
->chain_depth
!= 0)
462 if (dst
->stack_size
!= src
->stack_size
)
465 if (dst
->sp_offset
!= src
->sp_offset
)
471 // Check the type of self
472 int self_diff
= type_diff(src
->self_type
, dst
->self_type
);
474 if (self_diff
== INT_MAX
)
479 // For each local type we track
480 for (size_t i
= 0; i
< MAX_LOCAL_TYPES
; ++i
)
482 val_type_t t_src
= src
->local_types
[i
];
483 val_type_t t_dst
= dst
->local_types
[i
];
484 int temp_diff
= type_diff(t_src
, t_dst
);
486 if (temp_diff
== INT_MAX
)
492 // For each value on the temp stack
493 for (size_t i
= 0; i
< src
->stack_size
; ++i
)
495 temp_type_mapping_t m_src
= ctx_get_opnd_mapping(src
, OPND_STACK(i
));
496 temp_type_mapping_t m_dst
= ctx_get_opnd_mapping(dst
, OPND_STACK(i
));
498 if (m_dst
.mapping
.kind
!= m_src
.mapping
.kind
) {
499 if (m_dst
.mapping
.kind
== TEMP_STACK
) {
500 // We can safely drop information about the source of the temp
508 else if (m_dst
.mapping
.idx
!= m_src
.mapping
.idx
) {
512 int temp_diff
= type_diff(m_src
.type
, m_dst
.type
);
514 if (temp_diff
== INT_MAX
)
523 // Get all blocks for a particular place in an iseq.
524 static rb_yjit_block_array_t
525 yjit_get_version_array(const rb_iseq_t
*iseq
, unsigned idx
)
527 struct rb_iseq_constant_body
*body
= iseq
->body
;
529 if (rb_darray_size(body
->yjit_blocks
) == 0) {
533 RUBY_ASSERT((unsigned)rb_darray_size(body
->yjit_blocks
) == body
->iseq_size
);
534 return rb_darray_get(body
->yjit_blocks
, idx
);
537 // Count the number of block versions matching a given blockid
538 static size_t get_num_versions(blockid_t blockid
)
540 return rb_darray_size(yjit_get_version_array(blockid
.iseq
, blockid
.idx
));
543 // Keep track of a block version. Block should be fully constructed.
545 add_block_version(block_t
*block
)
547 const blockid_t blockid
= block
->blockid
;
548 const rb_iseq_t
*iseq
= blockid
.iseq
;
549 struct rb_iseq_constant_body
*body
= iseq
->body
;
551 // Function entry blocks must have stack size 0
552 RUBY_ASSERT(!(block
->blockid
.idx
== 0 && block
->ctx
.stack_size
> 0));
554 // Ensure yjit_blocks is initialized for this iseq
555 if (rb_darray_size(body
->yjit_blocks
) == 0) {
556 // Initialize yjit_blocks to be as wide as body->iseq_encoded
557 int32_t casted
= (int32_t)body
->iseq_size
;
558 if ((unsigned)casted
!= body
->iseq_size
) {
559 rb_bug("iseq too large");
561 if (!rb_darray_make(&body
->yjit_blocks
, casted
)) {
562 rb_bug("allocation failed");
566 // First block compiled for this iseq
567 yjit_runtime_counters
.compiled_iseq_count
++;
571 RUBY_ASSERT((int32_t)blockid
.idx
< rb_darray_size(body
->yjit_blocks
));
572 rb_yjit_block_array_t
*block_array_ref
= rb_darray_ref(body
->yjit_blocks
, blockid
.idx
);
575 if (!rb_darray_append(block_array_ref
, block
)) {
576 rb_bug("allocation failed");
580 // By writing the new block to the iseq, the iseq now
581 // contains new references to Ruby objects. Run write barriers.
582 cme_dependency_t
*cme_dep
;
583 rb_darray_foreach(block
->cme_dependencies
, cme_dependency_idx
, cme_dep
) {
584 RB_OBJ_WRITTEN(iseq
, Qundef
, cme_dep
->receiver_klass
);
585 RB_OBJ_WRITTEN(iseq
, Qundef
, cme_dep
->callee_cme
);
588 // Run write barriers for all objects in generated code.
589 uint32_t *offset_element
;
590 rb_darray_foreach(block
->gc_object_offsets
, offset_idx
, offset_element
) {
591 uint32_t offset_to_value
= *offset_element
;
592 uint8_t *value_address
= cb_get_ptr(cb
, offset_to_value
);
595 memcpy(&object
, value_address
, SIZEOF_VALUE
);
596 RB_OBJ_WRITTEN(iseq
, Qundef
, object
);
601 yjit_runtime_counters
.compiled_block_count
++;
606 branch_code_size(const branch_t
*branch
)
608 return branch
->end_addr
- branch
->start_addr
;
611 // Generate code for a branch, possibly rewriting and changing the size of it
613 regenerate_branch(codeblock_t
*cb
, branch_t
*branch
)
615 if (branch
->start_addr
< cb_get_ptr(cb
, yjit_codepage_frozen_bytes
)) {
616 // Generating this branch would modify frozen bytes. Do nothing.
620 const uint32_t old_write_pos
= cb
->write_pos
;
621 const bool branch_terminates_block
= branch
->end_addr
== branch
->block
->end_addr
;
623 RUBY_ASSERT(branch
->dst_addrs
[0] != NULL
);
625 cb_set_write_ptr(cb
, branch
->start_addr
);
626 branch
->gen_fn(cb
, branch
->dst_addrs
[0], branch
->dst_addrs
[1], branch
->shape
);
627 branch
->end_addr
= cb_get_write_ptr(cb
);
629 if (branch_terminates_block
) {
631 branch
->block
->end_addr
= branch
->end_addr
;
634 // cb->write_pos is both a write cursor and a marker for the end of
635 // everything written out so far. Leave cb->write_pos at the end of the
636 // block before returning. This function only ever bump or retain the end
637 // of block marker since that's what the majority of callers want. When the
638 // branch sits at the very end of the codeblock and it shrinks after
639 // regeneration, it's up to the caller to drop bytes off the end to
640 // not leave a gap and implement branch->shape.
641 if (old_write_pos
> cb
->write_pos
) {
642 // We rewound cb->write_pos to generate the branch, now restore it.
643 cb_set_pos(cb
, old_write_pos
);
646 // The branch sits at the end of cb and consumed some memory.
647 // Keep cb->write_pos.
651 // Create a new outgoing branch entry for a block
653 make_branch_entry(block_t
*block
, const ctx_t
*src_ctx
, branchgen_fn gen_fn
)
655 RUBY_ASSERT(block
!= NULL
);
657 // Allocate and zero-initialize
658 branch_t
*branch
= calloc(1, sizeof(branch_t
));
660 branch
->block
= block
;
661 (void)src_ctx
; // Unused for now
662 branch
->gen_fn
= gen_fn
;
663 branch
->shape
= SHAPE_DEFAULT
;
665 // Add to the list of outgoing branches for the block
666 rb_darray_append(&block
->outgoing
, branch
);
671 // Retrieve a basic block version for an (iseq, idx) tuple
673 find_block_version(blockid_t blockid
, const ctx_t
*ctx
)
675 rb_yjit_block_array_t versions
= yjit_get_version_array(blockid
.iseq
, blockid
.idx
);
678 block_t
*best_version
= NULL
;
679 int best_diff
= INT_MAX
;
681 // For each version matching the blockid
682 rb_darray_for(versions
, idx
) {
683 block_t
*version
= rb_darray_get(versions
, idx
);
684 int diff
= ctx_diff(ctx
, &version
->ctx
);
686 // Note that we always prefer the first matching
687 // version because of inline-cache chains
688 if (diff
< best_diff
) {
689 best_version
= version
;
694 // If greedy versioning is enabled
695 if (rb_yjit_opts
.greedy_versioning
)
697 // If we're below the version limit, don't settle for an imperfect match
698 if ((uint32_t)rb_darray_size(versions
) + 1 < rb_yjit_opts
.max_versions
&& best_diff
> 0) {
706 // Produce a generic context when the block version limit is hit for a blockid
707 // Note that this will mutate the ctx argument
709 limit_block_versions(blockid_t blockid
, const ctx_t
*ctx
)
711 // Guard chains implement limits separately, do nothing
712 if (ctx
->chain_depth
> 0)
715 // If this block version we're about to add will hit the version limit
716 if (get_num_versions(blockid
) + 1 >= rb_yjit_opts
.max_versions
) {
717 // Produce a generic context that stores no type information,
718 // but still respects the stack_size and sp_offset constraints.
719 // This new context will then match all future requests.
720 ctx_t generic_ctx
= DEFAULT_CTX
;
721 generic_ctx
.stack_size
= ctx
->stack_size
;
722 generic_ctx
.sp_offset
= ctx
->sp_offset
;
724 // Mutate the incoming context
731 static void yjit_free_block(block_t
*block
);
732 static void block_array_remove(rb_yjit_block_array_t block_array
, block_t
*block
);
734 // Immediately compile a series of block versions at a starting point and
735 // return the starting block.
737 gen_block_version(blockid_t blockid
, const ctx_t
*start_ctx
, rb_execution_context_t
*ec
)
739 // Small array to keep track of all the blocks compiled per invocation. We
740 // tend to have small batches since we often break up compilation with lazy
741 // stubs. Compilation is successful only if the whole batch is successful.
742 enum { MAX_PER_BATCH
= 64 };
743 block_t
*batch
[MAX_PER_BATCH
];
744 int compiled_count
= 0;
745 bool batch_success
= true;
748 // Generate code for the first block
749 block
= gen_single_block(blockid
, start_ctx
, ec
);
752 add_block_version(block
);
754 batch
[compiled_count
] = block
;
757 batch_success
= block
;
759 // For each successor block to compile
760 while (batch_success
) {
761 // If the previous block compiled doesn't have outgoing branches, stop
762 if (rb_darray_size(block
->outgoing
) == 0) {
766 // Get the last outgoing branch from the previous block. Blocks can use
767 // gen_direct_jump() to request a block to be placed immediately after.
768 branch_t
*last_branch
= rb_darray_back(block
->outgoing
);
770 // If there is no next block to compile, stop
771 if (last_branch
->dst_addrs
[0] || last_branch
->dst_addrs
[1]) {
775 if (last_branch
->targets
[0].iseq
== NULL
) {
776 rb_bug("invalid target for last branch");
779 // Generate code for the current block using context from the last branch.
780 blockid_t requested_id
= last_branch
->targets
[0];
781 const ctx_t
*requested_ctx
= &last_branch
->target_ctxs
[0];
783 batch_success
= compiled_count
< MAX_PER_BATCH
;
785 block
= gen_single_block(requested_id
, requested_ctx
, ec
);
786 batch_success
= block
;
789 // If the batch failed, stop
790 if (!batch_success
) {
794 // Connect the last branch and the new block
795 last_branch
->dst_addrs
[0] = block
->start_addr
;
796 rb_darray_append(&block
->incoming
, last_branch
);
797 last_branch
->blocks
[0] = block
;
799 // This block should immediately follow the last branch
800 RUBY_ASSERT(block
->start_addr
== last_branch
->end_addr
);
803 add_block_version(block
);
805 batch
[compiled_count
] = block
;
810 // Success. Return first block in the batch.
811 RUBY_ASSERT(compiled_count
> 0);
815 // The batch failed. Free everything in the batch
816 for (int block_idx
= 0; block_idx
< compiled_count
; block_idx
++) {
817 block_t
*const to_free
= batch
[block_idx
];
819 // Undo add_block_version()
820 rb_yjit_block_array_t versions
= yjit_get_version_array(to_free
->blockid
.iseq
, to_free
->blockid
.idx
);
821 block_array_remove(versions
, to_free
);
824 yjit_free_block(to_free
);
828 yjit_runtime_counters
.compilation_failure
++;
834 // Generate a block version that is an entry point inserted into an iseq
836 gen_entry_point(const rb_iseq_t
*iseq
, uint32_t insn_idx
, rb_execution_context_t
*ec
)
838 // If we aren't at PC 0, don't generate code
840 if (iseq
->body
->iseq_encoded
!= ec
->cfp
->pc
) {
844 // The entry context makes no assumptions about types
845 blockid_t blockid
= { iseq
, insn_idx
};
848 // Write the interpreter entry prologue. Might be NULL when out of memory.
849 uint8_t *code_ptr
= yjit_entry_prologue(cb
, iseq
);
851 // Try to generate code for the entry block
852 block_t
*block
= gen_block_version(blockid
, &DEFAULT_CTX
, ec
);
854 cb_mark_all_executable(ocb
);
855 cb_mark_all_executable(cb
);
857 // If we couldn't generate any code
858 if (!block
|| block
->end_idx
== insn_idx
) {
865 // Called by the generated code when a branch stub is executed
866 // Triggers compilation of branches and code patching
868 branch_stub_hit(branch_t
*branch
, const uint32_t target_idx
, rb_execution_context_t
*ec
)
870 uint8_t *dst_addr
= NULL
;
872 // Stop other ractors since we are going to patch machine code.
873 // This is how the GC does it.
877 const ptrdiff_t branch_size_on_entry
= branch_code_size(branch
);
879 RUBY_ASSERT(branch
!= NULL
);
880 RUBY_ASSERT(target_idx
< 2);
881 blockid_t target
= branch
->targets
[target_idx
];
882 const ctx_t
*target_ctx
= &branch
->target_ctxs
[target_idx
];
884 // If this branch has already been patched, return the dst address
885 // Note: ractors can cause the same stub to be hit multiple times
886 if (branch
->blocks
[target_idx
]) {
887 dst_addr
= branch
->dst_addrs
[target_idx
];
893 // Generated code do stack operations without modifying cfp->sp, while the
894 // cfp->sp tells the GC what values on the stack to root. Generated code
895 // generally takes care of updating cfp->sp when it calls runtime routines that
896 // could trigger GC, but it's inconvenient to do it before calling this function.
897 // So we do it here instead.
898 VALUE
*const original_interp_sp
= ec
->cfp
->sp
;
899 ec
->cfp
->sp
+= target_ctx
->sp_offset
;
901 // Update the PC in the current CFP, because it
902 // may be out of sync in JITted code
903 ec
->cfp
->pc
= yjit_iseq_pc_at_idx(target
.iseq
, target
.idx
);
905 // Try to find an existing compiled version of this block
906 block_t
*p_block
= find_block_version(target
, target_ctx
);
908 // If this block hasn't yet been compiled
910 const uint8_t branch_old_shape
= branch
->shape
;
911 bool branch_modified
= false;
913 // If the new block can be generated right after the branch (at cb->write_pos)
914 if (cb_get_write_ptr(cb
) == branch
->end_addr
) {
915 // This branch should be terminating its block
916 RUBY_ASSERT(branch
->end_addr
== branch
->block
->end_addr
);
918 // Change the branch shape to indicate the target block will be placed next
919 branch
->shape
= (uint8_t)target_idx
;
921 // Rewrite the branch with the new, potentially more compact shape
922 regenerate_branch(cb
, branch
);
923 branch_modified
= true;
925 // Ensure that the branch terminates the codeblock just like
926 // before entering this if block. This drops bytes off the end
927 // in case we shrank the branch when regenerating.
928 cb_set_write_ptr(cb
, branch
->end_addr
);
931 // Compile the new block version
932 p_block
= gen_block_version(target
, target_ctx
, ec
);
934 if (!p_block
&& branch_modified
) {
935 // We couldn't generate a new block for the branch, but we modified the branch.
936 // Restore the branch by regenerating it.
937 branch
->shape
= branch_old_shape
;
938 regenerate_branch(cb
, branch
);
943 // Branch shape should reflect layout
944 RUBY_ASSERT(!(branch
->shape
== (uint8_t)target_idx
&& p_block
->start_addr
!= branch
->end_addr
));
946 // Add this branch to the list of incoming branches for the target
947 rb_darray_append(&p_block
->incoming
, branch
);
949 // Update the branch target address
950 dst_addr
= p_block
->start_addr
;
951 branch
->dst_addrs
[target_idx
] = dst_addr
;
953 // Mark this branch target as patched (no longer a stub)
954 branch
->blocks
[target_idx
] = p_block
;
956 // Rewrite the branch with the new jump target address
957 regenerate_branch(cb
, branch
);
959 // Restore interpreter sp, since the code hitting the stub expects the original.
960 ec
->cfp
->sp
= original_interp_sp
;
963 // Failed to service the stub by generating a new block so now we
964 // need to exit to the interpreter at the stubbed location. We are
965 // intentionally *not* restoring original_interp_sp. At the time of
966 // writing, reconstructing interpreter state only involves setting
967 // cfp->sp and cfp->pc. We set both before trying to generate the
968 // block. All there is left to do to exit is to pop the native
969 // frame. We do that in code_for_exit_from_stub.
970 dst_addr
= code_for_exit_from_stub
;
973 cb_mark_all_executable(ocb
);
974 cb_mark_all_executable(cb
);
977 const ptrdiff_t new_branch_size
= branch_code_size(branch
);
978 RUBY_ASSERT_ALWAYS(new_branch_size
>= 0);
979 RUBY_ASSERT_ALWAYS(new_branch_size
<= branch_size_on_entry
&& "branch stubs should not enlarge branches");
983 // Return a pointer to the compiled block version
987 // Get a version or stub corresponding to a branch target
996 //fprintf(stderr, "get_branch_target, block (%p, %d)\n", target.iseq, target.idx);
998 block_t
*p_block
= find_block_version(target
, ctx
);
1000 // If the block already exists
1002 // Add an incoming branch for this version
1003 rb_darray_append(&p_block
->incoming
, branch
);
1004 branch
->blocks
[target_idx
] = p_block
;
1006 // Return a pointer to the compiled code
1007 return p_block
->start_addr
;
1010 // Do we have enough memory for a stub?
1011 const long MAX_CODE_SIZE
= 64;
1012 if (ocb
->write_pos
+ MAX_CODE_SIZE
>= cb
->mem_size
) {
1016 // Generate an outlined stub that will call branch_stub_hit()
1017 uint8_t *stub_addr
= cb_get_ptr(ocb
, ocb
->write_pos
);
1019 // Call branch_stub_hit(branch_idx, target_idx, ec)
1020 mov(ocb
, C_ARG_REGS
[2], REG_EC
);
1021 mov(ocb
, C_ARG_REGS
[1], imm_opnd(target_idx
));
1022 mov(ocb
, C_ARG_REGS
[0], const_ptr_opnd(branch
));
1023 call_ptr(ocb
, REG0
, (void *)&branch_stub_hit
);
1025 // Jump to the address returned by the
1026 // branch_stub_hit call
1029 RUBY_ASSERT(cb_get_ptr(ocb
, ocb
->write_pos
) - stub_addr
<= MAX_CODE_SIZE
);
1037 const ctx_t
*src_ctx
,
1045 RUBY_ASSERT(target0
.iseq
!= NULL
);
1047 branch_t
*branch
= make_branch_entry(jit
->block
, src_ctx
, gen_fn
);
1048 branch
->targets
[0] = target0
;
1049 branch
->targets
[1] = target1
;
1050 branch
->target_ctxs
[0] = *ctx0
;
1051 branch
->target_ctxs
[1] = ctx1
? *ctx1
:DEFAULT_CTX
;
1053 // Get the branch targets or stubs
1054 branch
->dst_addrs
[0] = get_branch_target(target0
, ctx0
, branch
, 0);
1055 branch
->dst_addrs
[1] = ctx1
? get_branch_target(target1
, ctx1
, branch
, 1):NULL
;
1057 // Call the branch generation function
1058 branch
->start_addr
= cb_get_write_ptr(cb
);
1059 regenerate_branch(cb
, branch
);
1063 gen_jump_branch(codeblock_t
*cb
, uint8_t *target0
, uint8_t *target1
, uint8_t shape
)
1074 jmp_ptr(cb
, target0
);
1086 RUBY_ASSERT(target0
.iseq
!= NULL
);
1088 branch_t
*branch
= make_branch_entry(jit
->block
, ctx
, gen_jump_branch
);
1089 branch
->targets
[0] = target0
;
1090 branch
->target_ctxs
[0] = *ctx
;
1092 block_t
*p_block
= find_block_version(target0
, ctx
);
1094 // If the version already exists
1096 rb_darray_append(&p_block
->incoming
, branch
);
1098 branch
->dst_addrs
[0] = p_block
->start_addr
;
1099 branch
->blocks
[0] = p_block
;
1100 branch
->shape
= SHAPE_DEFAULT
;
1102 // Call the branch generation function
1103 branch
->start_addr
= cb_get_write_ptr(cb
);
1104 gen_jump_branch(cb
, branch
->dst_addrs
[0], NULL
, SHAPE_DEFAULT
);
1105 branch
->end_addr
= cb_get_write_ptr(cb
);
1108 // This NULL target address signals gen_block_version() to compile the
1109 // target block right after this one (fallthrough).
1110 branch
->dst_addrs
[0] = NULL
;
1111 branch
->shape
= SHAPE_NEXT0
;
1112 branch
->start_addr
= cb_get_write_ptr(cb
);
1113 branch
->end_addr
= cb_get_write_ptr(cb
);
1117 // Create a stub to force the code up to this point to be executed
1124 //fprintf(stderr, "defer compilation at (%p, %d) depth=%d\n", block->blockid.iseq, insn_idx, cur_ctx->chain_depth);
1126 if (cur_ctx
->chain_depth
!= 0) {
1127 rb_bug("double defer");
1130 ctx_t next_ctx
= *cur_ctx
;
1132 if (next_ctx
.chain_depth
>= UINT8_MAX
) {
1133 rb_bug("max block version chain depth reached");
1136 next_ctx
.chain_depth
+= 1;
1138 branch_t
*branch
= make_branch_entry(jit
->block
, cur_ctx
, gen_jump_branch
);
1140 // Get the branch targets or stubs
1141 branch
->target_ctxs
[0] = next_ctx
;
1142 branch
->targets
[0] = (blockid_t
){ jit
->block
->blockid
.iseq
, jit
->insn_idx
};
1143 branch
->dst_addrs
[0] = get_branch_target(branch
->targets
[0], &next_ctx
, branch
, 0);
1145 // Call the branch generation function
1146 codeblock_t
*cb
= jit
->cb
;
1147 branch
->start_addr
= cb_get_write_ptr(cb
);
1148 gen_jump_branch(cb
, branch
->dst_addrs
[0], NULL
, SHAPE_DEFAULT
);
1149 branch
->end_addr
= cb_get_write_ptr(cb
);
1152 // Remove all references to a block then free it.
1154 yjit_free_block(block_t
*block
)
1156 yjit_unlink_method_lookup_dependency(block
);
1157 yjit_block_assumptions_free(block
);
1159 // Remove this block from the predecessor's targets
1160 rb_darray_for(block
->incoming
, incoming_idx
) {
1161 // Branch from the predecessor to us
1162 branch_t
*pred_branch
= rb_darray_get(block
->incoming
, incoming_idx
);
1164 // If this is us, nullify the target block
1165 for (size_t succ_idx
= 0; succ_idx
< 2; succ_idx
++) {
1166 if (pred_branch
->blocks
[succ_idx
] == block
) {
1167 pred_branch
->blocks
[succ_idx
] = NULL
;
1172 // For each outgoing branch
1173 rb_darray_for(block
->outgoing
, branch_idx
) {
1174 branch_t
*out_branch
= rb_darray_get(block
->outgoing
, branch_idx
);
1176 // For each successor block
1177 for (size_t succ_idx
= 0; succ_idx
< 2; succ_idx
++) {
1178 block_t
*succ
= out_branch
->blocks
[succ_idx
];
1183 // Remove this block from the successor's incoming list
1184 rb_darray_for(succ
->incoming
, incoming_idx
) {
1185 branch_t
*pred_branch
= rb_darray_get(succ
->incoming
, incoming_idx
);
1186 if (pred_branch
== out_branch
) {
1187 rb_darray_remove_unordered(succ
->incoming
, incoming_idx
);
1193 // Free the outgoing branch entry
1197 rb_darray_free(block
->incoming
);
1198 rb_darray_free(block
->outgoing
);
1199 rb_darray_free(block
->gc_object_offsets
);
1204 // Remove a block version
1206 block_array_remove(rb_yjit_block_array_t block_array
, block_t
*block
)
1209 rb_darray_foreach(block_array
, idx
, element
) {
1210 if (*element
== block
) {
1211 rb_darray_remove_unordered(block_array
, idx
);
1219 // Some runtime checks for integrity of a program location
1221 verify_blockid(const blockid_t blockid
)
1223 const rb_iseq_t
*const iseq
= blockid
.iseq
;
1224 RUBY_ASSERT_ALWAYS(IMEMO_TYPE_P(iseq
, imemo_iseq
));
1225 RUBY_ASSERT_ALWAYS(blockid
.idx
< iseq
->body
->iseq_size
);
1228 // Invalidate one specific block version
1230 invalidate_block_version(block_t
*block
)
1232 ASSERT_vm_locking();
1234 // TODO: want to assert that all other ractors are stopped here. Can't patch
1235 // machine code that some other thread is running.
1237 verify_blockid(block
->blockid
);
1239 const rb_iseq_t
*iseq
= block
->blockid
.iseq
;
1241 //fprintf(stderr, "invalidating block (%p, %d)\n", block->blockid.iseq, block->blockid.idx);
1242 //fprintf(stderr, "block=%p\n", block);
1244 // Remove this block from the version array
1245 rb_yjit_block_array_t versions
= yjit_get_version_array(iseq
, block
->blockid
.idx
);
1246 block_array_remove(versions
, block
);
1248 // Get a pointer to the generated code for this block
1249 uint8_t *code_ptr
= block
->start_addr
;
1251 // Make the the start of the block do an exit. This handles OOM situations
1252 // and some cases where we can't efficiently patch incoming branches.
1253 // Do this first, since in case there is a fallthrough branch into this
1254 // block, the patching loop below can overwrite the start of the block.
1255 // In those situations, there is hopefully no jumps to the start of the block
1256 // after patching as the start of the block would be in the middle of something
1257 // generated by branch_t::gen_fn.
1259 RUBY_ASSERT_ALWAYS(block
->entry_exit
&& "block invalidation requires an exit");
1260 if (block
->entry_exit
== block
->start_addr
) {
1261 // Some blocks exit on entry. Patching a jump to the entry at the
1262 // entry makes an infinite loop.
1264 else if (block
->start_addr
>= cb_get_ptr(cb
, yjit_codepage_frozen_bytes
)) { // Don't patch frozen code region
1265 // Patch in a jump to block->entry_exit.
1266 uint32_t cur_pos
= cb
->write_pos
;
1267 cb_set_write_ptr(cb
, block
->start_addr
);
1268 jmp_ptr(cb
, block
->entry_exit
);
1269 RUBY_ASSERT_ALWAYS(cb_get_ptr(cb
, cb
->write_pos
) < block
->end_addr
&& "invalidation wrote past end of block");
1270 cb_set_pos(cb
, cur_pos
);
1274 // For each incoming branch
1275 rb_darray_for(block
->incoming
, incoming_idx
) {
1276 branch_t
*branch
= rb_darray_get(block
->incoming
, incoming_idx
);
1277 uint32_t target_idx
= (branch
->dst_addrs
[0] == code_ptr
)? 0:1;
1278 RUBY_ASSERT(branch
->dst_addrs
[target_idx
] == code_ptr
);
1279 RUBY_ASSERT(branch
->blocks
[target_idx
] == block
);
1281 // Mark this target as being a stub
1282 branch
->blocks
[target_idx
] = NULL
;
1284 // Don't patch frozen code region
1285 if (branch
->start_addr
< cb_get_ptr(cb
, yjit_codepage_frozen_bytes
)) {
1289 // Create a stub for this branch target
1290 uint8_t *branch_target
= get_branch_target(
1297 if (!branch_target
) {
1298 // We were unable to generate a stub (e.g. OOM). Use the block's
1299 // exit instead of a stub for the block. It's important that we
1300 // still patch the branch in this situation so stubs are unique
1301 // to branches. Think about what could go wrong if we run out of
1302 // memory in the middle of this loop.
1303 branch_target
= block
->entry_exit
;
1306 branch
->dst_addrs
[target_idx
] = branch_target
;
1308 // Check if the invalidated block immediately follows
1309 bool target_next
= (block
->start_addr
== branch
->end_addr
);
1312 // The new block will no longer be adjacent.
1313 // Note that we could be enlarging the branch and writing into the
1314 // start of the block being invalidated.
1315 branch
->shape
= SHAPE_DEFAULT
;
1318 // Rewrite the branch with the new jump target address
1319 regenerate_branch(cb
, branch
);
1321 if (target_next
&& branch
->end_addr
> block
->end_addr
) {
1322 fprintf(stderr
, "branch_block_idx=%u block_idx=%u over=%td block_size=%td\n",
1323 branch
->block
->blockid
.idx
,
1325 branch
->end_addr
- block
->end_addr
,
1326 block
->end_addr
- block
->start_addr
);
1327 yjit_print_iseq(branch
->block
->blockid
.iseq
);
1328 rb_bug("yjit invalidate rewrote branch past end of invalidated block");
1332 // Clear out the JIT func so that we can recompile later and so the
1333 // interpreter will run the iseq
1336 // Only clear the jit_func when we're invalidating the JIT entry block.
1337 // We only support compiling iseqs from index 0 right now. So entry
1338 // points will always have an instruction index of 0. We'll need to
1339 // change this in the future when we support optional parameters because
1340 // they enter the function with a non-zero PC
1341 if (block
->blockid
.idx
== 0) {
1342 iseq
->body
->jit_func
= 0;
1347 // May want to recompile a new entry point (for interpreter entry blocks)
1348 // This isn't necessary for correctness
1351 // Call continuation addresses on the stack can also be atomically replaced by jumps going to the stub.
1353 yjit_free_block(block
);
1356 yjit_runtime_counters
.invalidation_count
++;
1359 cb_mark_all_executable(ocb
);
1360 cb_mark_all_executable(cb
);
1362 // fprintf(stderr, "invalidation done\n");
1366 yjit_init_core(void)
1368 gen_code_for_exit_from_stub();