* 2022-01-18 [ci skip]
[ruby-80x24.org.git] / yjit_core.c
blobfbe855d04bedeea1964a80aa74f84ce3d20f44da
1 // This file is a fragment of the yjit.o compilation unit. See yjit.c.
2 #include "internal.h"
3 #include "vm_sync.h"
4 #include "builtin.h"
6 #include "yjit.h"
7 #include "yjit_asm.h"
8 #include "yjit_iface.h"
9 #include "yjit_core.h"
10 #include "yjit_codegen.h"
12 // For exiting from YJIT frame from branch_stub_hit().
13 // Filled by gen_code_for_exit_from_stub().
14 static uint8_t *code_for_exit_from_stub = NULL;
17 Get an operand for the adjusted stack pointer address
19 static x86opnd_t
20 ctx_sp_opnd(ctx_t *ctx, int32_t offset_bytes)
22 int32_t offset = (ctx->sp_offset * sizeof(VALUE)) + offset_bytes;
23 return mem_opnd(64, REG_SP, offset);
27 Push one new value on the temp stack with an explicit mapping
28 Return a pointer to the new stack top
30 static x86opnd_t
31 ctx_stack_push_mapping(ctx_t *ctx, temp_type_mapping_t mapping)
33 // If type propagation is disabled, store no types
34 if (rb_yjit_opts.no_type_prop) {
35 mapping.type = TYPE_UNKNOWN;
38 // Keep track of the type and mapping of the value
39 if (ctx->stack_size < MAX_TEMP_TYPES) {
40 ctx->temp_mapping[ctx->stack_size] = mapping.mapping;
41 ctx->temp_types[ctx->stack_size] = mapping.type;
43 RUBY_ASSERT(mapping.mapping.kind != TEMP_LOCAL || mapping.mapping.idx < MAX_LOCAL_TYPES);
44 RUBY_ASSERT(mapping.mapping.kind != TEMP_STACK || mapping.mapping.idx == 0);
45 RUBY_ASSERT(mapping.mapping.kind != TEMP_SELF || mapping.mapping.idx == 0);
48 ctx->stack_size += 1;
49 ctx->sp_offset += 1;
51 // SP points just above the topmost value
52 int32_t offset = (ctx->sp_offset - 1) * sizeof(VALUE);
53 return mem_opnd(64, REG_SP, offset);
58 Push one new value on the temp stack
59 Return a pointer to the new stack top
61 static x86opnd_t
62 ctx_stack_push(ctx_t *ctx, val_type_t type)
64 temp_type_mapping_t mapping = { MAP_STACK, type };
65 return ctx_stack_push_mapping(ctx, mapping);
69 Push the self value on the stack
71 static x86opnd_t
72 ctx_stack_push_self(ctx_t *ctx)
74 temp_type_mapping_t mapping = { MAP_SELF, TYPE_UNKNOWN };
75 return ctx_stack_push_mapping(ctx, mapping);
79 Push a local variable on the stack
81 static x86opnd_t
82 ctx_stack_push_local(ctx_t *ctx, size_t local_idx)
84 if (local_idx >= MAX_LOCAL_TYPES) {
85 return ctx_stack_push(ctx, TYPE_UNKNOWN);
88 temp_type_mapping_t mapping = {
89 (temp_mapping_t){ .kind = TEMP_LOCAL, .idx = local_idx },
90 TYPE_UNKNOWN
93 return ctx_stack_push_mapping(ctx, mapping);
97 Pop N values off the stack
98 Return a pointer to the stack top before the pop operation
100 static x86opnd_t
101 ctx_stack_pop(ctx_t *ctx, size_t n)
103 RUBY_ASSERT(n <= ctx->stack_size);
105 // SP points just above the topmost value
106 int32_t offset = (ctx->sp_offset - 1) * sizeof(VALUE);
107 x86opnd_t top = mem_opnd(64, REG_SP, offset);
109 // Clear the types of the popped values
110 for (size_t i = 0; i < n; ++i)
112 size_t idx = ctx->stack_size - i - 1;
113 if (idx < MAX_TEMP_TYPES) {
114 ctx->temp_types[idx] = TYPE_UNKNOWN;
115 ctx->temp_mapping[idx] = MAP_STACK;
119 ctx->stack_size -= n;
120 ctx->sp_offset -= n;
122 return top;
126 Get an operand pointing to a slot on the temp stack
128 static x86opnd_t
129 ctx_stack_opnd(ctx_t *ctx, int32_t idx)
131 // SP points just above the topmost value
132 int32_t offset = (ctx->sp_offset - 1 - idx) * sizeof(VALUE);
133 x86opnd_t opnd = mem_opnd(64, REG_SP, offset);
135 return opnd;
139 Get the type of an instruction operand
141 static val_type_t
142 ctx_get_opnd_type(const ctx_t *ctx, insn_opnd_t opnd)
144 if (opnd.is_self)
145 return ctx->self_type;
147 RUBY_ASSERT(opnd.idx < ctx->stack_size);
148 int stack_idx = ctx->stack_size - 1 - opnd.idx;
150 // If outside of tracked range, do nothing
151 if (stack_idx >= MAX_TEMP_TYPES)
152 return TYPE_UNKNOWN;
154 temp_mapping_t mapping = ctx->temp_mapping[stack_idx];
156 switch (mapping.kind) {
157 case TEMP_SELF:
158 return ctx->self_type;
160 case TEMP_STACK:
161 return ctx->temp_types[ctx->stack_size - 1 - opnd.idx];
163 case TEMP_LOCAL:
164 RUBY_ASSERT(mapping.idx < MAX_LOCAL_TYPES);
165 return ctx->local_types[mapping.idx];
168 rb_bug("unreachable");
171 static int type_diff(val_type_t src, val_type_t dst);
173 #define UPGRADE_TYPE(dest, src) do { \
174 RUBY_ASSERT(type_diff((src), (dest)) != INT_MAX); \
175 (dest) = (src); \
176 } while (false)
179 Upgrade (or "learn") the type of an instruction operand
180 This value must be compatible and at least as specific as the previously known type.
181 If this value originated from self, or an lvar, the learned type will be
182 propagated back to its source.
184 static void
185 ctx_upgrade_opnd_type(ctx_t *ctx, insn_opnd_t opnd, val_type_t type)
187 // If type propagation is disabled, store no types
188 if (rb_yjit_opts.no_type_prop)
189 return;
191 if (opnd.is_self) {
192 UPGRADE_TYPE(ctx->self_type, type);
193 return;
196 RUBY_ASSERT(opnd.idx < ctx->stack_size);
197 int stack_idx = ctx->stack_size - 1 - opnd.idx;
199 // If outside of tracked range, do nothing
200 if (stack_idx >= MAX_TEMP_TYPES)
201 return;
203 temp_mapping_t mapping = ctx->temp_mapping[stack_idx];
205 switch (mapping.kind) {
206 case TEMP_SELF:
207 UPGRADE_TYPE(ctx->self_type, type);
208 break;
210 case TEMP_STACK:
211 UPGRADE_TYPE(ctx->temp_types[stack_idx], type);
212 break;
214 case TEMP_LOCAL:
215 RUBY_ASSERT(mapping.idx < MAX_LOCAL_TYPES);
216 UPGRADE_TYPE(ctx->local_types[mapping.idx], type);
217 break;
222 Get both the type and mapping (where the value originates) of an operand.
223 This is can be used with ctx_stack_push_mapping or ctx_set_opnd_mapping to copy
224 a stack value's type while maintaining the mapping.
226 static temp_type_mapping_t
227 ctx_get_opnd_mapping(const ctx_t *ctx, insn_opnd_t opnd)
229 temp_type_mapping_t type_mapping;
230 type_mapping.type = ctx_get_opnd_type(ctx, opnd);
232 if (opnd.is_self) {
233 type_mapping.mapping = MAP_SELF;
234 return type_mapping;
237 RUBY_ASSERT(opnd.idx < ctx->stack_size);
238 int stack_idx = ctx->stack_size - 1 - opnd.idx;
240 if (stack_idx < MAX_TEMP_TYPES) {
241 type_mapping.mapping = ctx->temp_mapping[stack_idx];
243 else {
244 // We can't know the source of this stack operand, so we assume it is
245 // a stack-only temporary. type will be UNKNOWN
246 RUBY_ASSERT(type_mapping.type.type == ETYPE_UNKNOWN);
247 type_mapping.mapping = MAP_STACK;
250 return type_mapping;
254 Overwrite both the type and mapping of a stack operand.
256 static void
257 ctx_set_opnd_mapping(ctx_t *ctx, insn_opnd_t opnd, temp_type_mapping_t type_mapping)
259 // self is always MAP_SELF
260 RUBY_ASSERT(!opnd.is_self);
262 RUBY_ASSERT(opnd.idx < ctx->stack_size);
263 int stack_idx = ctx->stack_size - 1 - opnd.idx;
265 // If type propagation is disabled, store no types
266 if (rb_yjit_opts.no_type_prop)
267 return;
269 // If outside of tracked range, do nothing
270 if (stack_idx >= MAX_TEMP_TYPES)
271 return;
273 ctx->temp_mapping[stack_idx] = type_mapping.mapping;
275 // Only used when mapping == MAP_STACK
276 ctx->temp_types[stack_idx] = type_mapping.type;
280 Set the type of a local variable
282 static void
283 ctx_set_local_type(ctx_t *ctx, size_t idx, val_type_t type)
285 // If type propagation is disabled, store no types
286 if (rb_yjit_opts.no_type_prop)
287 return;
289 if (idx >= MAX_LOCAL_TYPES)
290 return;
292 // If any values on the stack map to this local we must detach them
293 for (int i = 0; i < MAX_TEMP_TYPES; i++) {
294 temp_mapping_t *mapping = &ctx->temp_mapping[i];
295 if (mapping->kind == TEMP_LOCAL && mapping->idx == idx) {
296 ctx->temp_types[i] = ctx->local_types[mapping->idx];
297 *mapping = MAP_STACK;
301 ctx->local_types[idx] = type;
304 // Erase local variable type information
305 // eg: because of a call we can't track
306 static void
307 ctx_clear_local_types(ctx_t *ctx)
309 // When clearing local types we must detach any stack mappings to those
310 // locals. Even if local values may have changed, stack values will not.
311 for (int i = 0; i < MAX_TEMP_TYPES; i++) {
312 temp_mapping_t *mapping = &ctx->temp_mapping[i];
313 if (mapping->kind == TEMP_LOCAL) {
314 RUBY_ASSERT(mapping->idx < MAX_LOCAL_TYPES);
315 ctx->temp_types[i] = ctx->local_types[mapping->idx];
316 *mapping = MAP_STACK;
318 RUBY_ASSERT(mapping->kind == TEMP_STACK || mapping->kind == TEMP_SELF);
320 memset(&ctx->local_types, 0, sizeof(ctx->local_types));
324 /* This returns an appropriate val_type_t based on a known value */
325 static val_type_t
326 yjit_type_of_value(VALUE val)
328 if (SPECIAL_CONST_P(val)) {
329 if (FIXNUM_P(val)) {
330 return TYPE_FIXNUM;
332 else if (NIL_P(val)) {
333 return TYPE_NIL;
335 else if (val == Qtrue) {
336 return TYPE_TRUE;
338 else if (val == Qfalse) {
339 return TYPE_FALSE;
341 else if (STATIC_SYM_P(val)) {
342 return TYPE_STATIC_SYMBOL;
344 else if (FLONUM_P(val)) {
345 return TYPE_FLONUM;
347 else {
348 RUBY_ASSERT(false);
349 UNREACHABLE_RETURN(TYPE_IMM);
352 else {
353 switch (BUILTIN_TYPE(val)) {
354 case T_ARRAY:
355 return TYPE_ARRAY;
356 case T_HASH:
357 return TYPE_HASH;
358 case T_STRING:
359 return TYPE_STRING;
360 default:
361 // generic heap object
362 return TYPE_HEAP;
367 /* The name of a type, for debugging */
368 RBIMPL_ATTR_MAYBE_UNUSED()
369 static const char *
370 yjit_type_name(val_type_t type)
372 RUBY_ASSERT(!(type.is_imm && type.is_heap));
374 switch (type.type) {
375 case ETYPE_UNKNOWN:
376 if (type.is_imm) {
377 return "unknown immediate";
379 else if (type.is_heap) {
380 return "unknown heap";
382 else {
383 return "unknown";
385 case ETYPE_NIL:
386 return "nil";
387 case ETYPE_TRUE:
388 return "true";
389 case ETYPE_FALSE:
390 return "false";
391 case ETYPE_FIXNUM:
392 return "fixnum";
393 case ETYPE_FLONUM:
394 return "flonum";
395 case ETYPE_ARRAY:
396 return "array";
397 case ETYPE_HASH:
398 return "hash";
399 case ETYPE_SYMBOL:
400 return "symbol";
401 case ETYPE_STRING:
402 return "string";
405 UNREACHABLE_RETURN("");
409 Compute a difference between two value types
410 Returns 0 if the two are the same
411 Returns > 0 if different but compatible
412 Returns INT_MAX if incompatible
414 static int
415 type_diff(val_type_t src, val_type_t dst)
417 RUBY_ASSERT(!src.is_heap || !src.is_imm);
418 RUBY_ASSERT(!dst.is_heap || !dst.is_imm);
420 // If dst assumes heap but src doesn't
421 if (dst.is_heap && !src.is_heap)
422 return INT_MAX;
424 // If dst assumes imm but src doesn't
425 if (dst.is_imm && !src.is_imm)
426 return INT_MAX;
428 // If dst assumes known type different from src
429 if (dst.type != ETYPE_UNKNOWN && dst.type != src.type)
430 return INT_MAX;
432 if (dst.is_heap != src.is_heap)
433 return 1;
435 if (dst.is_imm != src.is_imm)
436 return 1;
438 if (dst.type != src.type)
439 return 1;
441 return 0;
445 Compute a difference score for two context objects
446 Returns 0 if the two contexts are the same
447 Returns > 0 if different but compatible
448 Returns INT_MAX if incompatible
450 static int
451 ctx_diff(const ctx_t *src, const ctx_t *dst)
453 // Can only lookup the first version in the chain
454 if (dst->chain_depth != 0)
455 return INT_MAX;
457 // Blocks with depth > 0 always produce new versions
458 // Sidechains cannot overlap
459 if (src->chain_depth != 0)
460 return INT_MAX;
462 if (dst->stack_size != src->stack_size)
463 return INT_MAX;
465 if (dst->sp_offset != src->sp_offset)
466 return INT_MAX;
468 // Difference sum
469 int diff = 0;
471 // Check the type of self
472 int self_diff = type_diff(src->self_type, dst->self_type);
474 if (self_diff == INT_MAX)
475 return INT_MAX;
477 diff += self_diff;
479 // For each local type we track
480 for (size_t i = 0; i < MAX_LOCAL_TYPES; ++i)
482 val_type_t t_src = src->local_types[i];
483 val_type_t t_dst = dst->local_types[i];
484 int temp_diff = type_diff(t_src, t_dst);
486 if (temp_diff == INT_MAX)
487 return INT_MAX;
489 diff += temp_diff;
492 // For each value on the temp stack
493 for (size_t i = 0; i < src->stack_size; ++i)
495 temp_type_mapping_t m_src = ctx_get_opnd_mapping(src, OPND_STACK(i));
496 temp_type_mapping_t m_dst = ctx_get_opnd_mapping(dst, OPND_STACK(i));
498 if (m_dst.mapping.kind != m_src.mapping.kind) {
499 if (m_dst.mapping.kind == TEMP_STACK) {
500 // We can safely drop information about the source of the temp
501 // stack operand.
502 diff += 1;
504 else {
505 return INT_MAX;
508 else if (m_dst.mapping.idx != m_src.mapping.idx) {
509 return INT_MAX;
512 int temp_diff = type_diff(m_src.type, m_dst.type);
514 if (temp_diff == INT_MAX)
515 return INT_MAX;
517 diff += temp_diff;
520 return diff;
523 // Get all blocks for a particular place in an iseq.
524 static rb_yjit_block_array_t
525 yjit_get_version_array(const rb_iseq_t *iseq, unsigned idx)
527 struct rb_iseq_constant_body *body = iseq->body;
529 if (rb_darray_size(body->yjit_blocks) == 0) {
530 return NULL;
533 RUBY_ASSERT((unsigned)rb_darray_size(body->yjit_blocks) == body->iseq_size);
534 return rb_darray_get(body->yjit_blocks, idx);
537 // Count the number of block versions matching a given blockid
538 static size_t get_num_versions(blockid_t blockid)
540 return rb_darray_size(yjit_get_version_array(blockid.iseq, blockid.idx));
543 // Keep track of a block version. Block should be fully constructed.
544 static void
545 add_block_version(block_t *block)
547 const blockid_t blockid = block->blockid;
548 const rb_iseq_t *iseq = blockid.iseq;
549 struct rb_iseq_constant_body *body = iseq->body;
551 // Function entry blocks must have stack size 0
552 RUBY_ASSERT(!(block->blockid.idx == 0 && block->ctx.stack_size > 0));
554 // Ensure yjit_blocks is initialized for this iseq
555 if (rb_darray_size(body->yjit_blocks) == 0) {
556 // Initialize yjit_blocks to be as wide as body->iseq_encoded
557 int32_t casted = (int32_t)body->iseq_size;
558 if ((unsigned)casted != body->iseq_size) {
559 rb_bug("iseq too large");
561 if (!rb_darray_make(&body->yjit_blocks, casted)) {
562 rb_bug("allocation failed");
565 #if YJIT_STATS
566 // First block compiled for this iseq
567 yjit_runtime_counters.compiled_iseq_count++;
568 #endif
571 RUBY_ASSERT((int32_t)blockid.idx < rb_darray_size(body->yjit_blocks));
572 rb_yjit_block_array_t *block_array_ref = rb_darray_ref(body->yjit_blocks, blockid.idx);
574 // Add the new block
575 if (!rb_darray_append(block_array_ref, block)) {
576 rb_bug("allocation failed");
580 // By writing the new block to the iseq, the iseq now
581 // contains new references to Ruby objects. Run write barriers.
582 cme_dependency_t *cme_dep;
583 rb_darray_foreach(block->cme_dependencies, cme_dependency_idx, cme_dep) {
584 RB_OBJ_WRITTEN(iseq, Qundef, cme_dep->receiver_klass);
585 RB_OBJ_WRITTEN(iseq, Qundef, cme_dep->callee_cme);
588 // Run write barriers for all objects in generated code.
589 uint32_t *offset_element;
590 rb_darray_foreach(block->gc_object_offsets, offset_idx, offset_element) {
591 uint32_t offset_to_value = *offset_element;
592 uint8_t *value_address = cb_get_ptr(cb, offset_to_value);
594 VALUE object;
595 memcpy(&object, value_address, SIZEOF_VALUE);
596 RB_OBJ_WRITTEN(iseq, Qundef, object);
600 #if YJIT_STATS
601 yjit_runtime_counters.compiled_block_count++;
602 #endif
605 static ptrdiff_t
606 branch_code_size(const branch_t *branch)
608 return branch->end_addr - branch->start_addr;
611 // Generate code for a branch, possibly rewriting and changing the size of it
612 static void
613 regenerate_branch(codeblock_t *cb, branch_t *branch)
615 if (branch->start_addr < cb_get_ptr(cb, yjit_codepage_frozen_bytes)) {
616 // Generating this branch would modify frozen bytes. Do nothing.
617 return;
620 const uint32_t old_write_pos = cb->write_pos;
621 const bool branch_terminates_block = branch->end_addr == branch->block->end_addr;
623 RUBY_ASSERT(branch->dst_addrs[0] != NULL);
625 cb_set_write_ptr(cb, branch->start_addr);
626 branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape);
627 branch->end_addr = cb_get_write_ptr(cb);
629 if (branch_terminates_block) {
630 // Adjust block size
631 branch->block->end_addr = branch->end_addr;
634 // cb->write_pos is both a write cursor and a marker for the end of
635 // everything written out so far. Leave cb->write_pos at the end of the
636 // block before returning. This function only ever bump or retain the end
637 // of block marker since that's what the majority of callers want. When the
638 // branch sits at the very end of the codeblock and it shrinks after
639 // regeneration, it's up to the caller to drop bytes off the end to
640 // not leave a gap and implement branch->shape.
641 if (old_write_pos > cb->write_pos) {
642 // We rewound cb->write_pos to generate the branch, now restore it.
643 cb_set_pos(cb, old_write_pos);
645 else {
646 // The branch sits at the end of cb and consumed some memory.
647 // Keep cb->write_pos.
651 // Create a new outgoing branch entry for a block
652 static branch_t*
653 make_branch_entry(block_t *block, const ctx_t *src_ctx, branchgen_fn gen_fn)
655 RUBY_ASSERT(block != NULL);
657 // Allocate and zero-initialize
658 branch_t *branch = calloc(1, sizeof(branch_t));
660 branch->block = block;
661 (void)src_ctx; // Unused for now
662 branch->gen_fn = gen_fn;
663 branch->shape = SHAPE_DEFAULT;
665 // Add to the list of outgoing branches for the block
666 rb_darray_append(&block->outgoing, branch);
668 return branch;
671 // Retrieve a basic block version for an (iseq, idx) tuple
672 static block_t *
673 find_block_version(blockid_t blockid, const ctx_t *ctx)
675 rb_yjit_block_array_t versions = yjit_get_version_array(blockid.iseq, blockid.idx);
677 // Best match found
678 block_t *best_version = NULL;
679 int best_diff = INT_MAX;
681 // For each version matching the blockid
682 rb_darray_for(versions, idx) {
683 block_t *version = rb_darray_get(versions, idx);
684 int diff = ctx_diff(ctx, &version->ctx);
686 // Note that we always prefer the first matching
687 // version because of inline-cache chains
688 if (diff < best_diff) {
689 best_version = version;
690 best_diff = diff;
694 // If greedy versioning is enabled
695 if (rb_yjit_opts.greedy_versioning)
697 // If we're below the version limit, don't settle for an imperfect match
698 if ((uint32_t)rb_darray_size(versions) + 1 < rb_yjit_opts.max_versions && best_diff > 0) {
699 return NULL;
703 return best_version;
706 // Produce a generic context when the block version limit is hit for a blockid
707 // Note that this will mutate the ctx argument
708 static ctx_t
709 limit_block_versions(blockid_t blockid, const ctx_t *ctx)
711 // Guard chains implement limits separately, do nothing
712 if (ctx->chain_depth > 0)
713 return *ctx;
715 // If this block version we're about to add will hit the version limit
716 if (get_num_versions(blockid) + 1 >= rb_yjit_opts.max_versions) {
717 // Produce a generic context that stores no type information,
718 // but still respects the stack_size and sp_offset constraints.
719 // This new context will then match all future requests.
720 ctx_t generic_ctx = DEFAULT_CTX;
721 generic_ctx.stack_size = ctx->stack_size;
722 generic_ctx.sp_offset = ctx->sp_offset;
724 // Mutate the incoming context
725 return generic_ctx;
728 return *ctx;
731 static void yjit_free_block(block_t *block);
732 static void block_array_remove(rb_yjit_block_array_t block_array, block_t *block);
734 // Immediately compile a series of block versions at a starting point and
735 // return the starting block.
736 static block_t *
737 gen_block_version(blockid_t blockid, const ctx_t *start_ctx, rb_execution_context_t *ec)
739 // Small array to keep track of all the blocks compiled per invocation. We
740 // tend to have small batches since we often break up compilation with lazy
741 // stubs. Compilation is successful only if the whole batch is successful.
742 enum { MAX_PER_BATCH = 64 };
743 block_t *batch[MAX_PER_BATCH];
744 int compiled_count = 0;
745 bool batch_success = true;
746 block_t *block;
748 // Generate code for the first block
749 block = gen_single_block(blockid, start_ctx, ec);
750 if (block) {
751 // Track the block
752 add_block_version(block);
754 batch[compiled_count] = block;
755 compiled_count++;
757 batch_success = block;
759 // For each successor block to compile
760 while (batch_success) {
761 // If the previous block compiled doesn't have outgoing branches, stop
762 if (rb_darray_size(block->outgoing) == 0) {
763 break;
766 // Get the last outgoing branch from the previous block. Blocks can use
767 // gen_direct_jump() to request a block to be placed immediately after.
768 branch_t *last_branch = rb_darray_back(block->outgoing);
770 // If there is no next block to compile, stop
771 if (last_branch->dst_addrs[0] || last_branch->dst_addrs[1]) {
772 break;
775 if (last_branch->targets[0].iseq == NULL) {
776 rb_bug("invalid target for last branch");
779 // Generate code for the current block using context from the last branch.
780 blockid_t requested_id = last_branch->targets[0];
781 const ctx_t *requested_ctx = &last_branch->target_ctxs[0];
783 batch_success = compiled_count < MAX_PER_BATCH;
784 if (batch_success) {
785 block = gen_single_block(requested_id, requested_ctx, ec);
786 batch_success = block;
789 // If the batch failed, stop
790 if (!batch_success) {
791 break;
794 // Connect the last branch and the new block
795 last_branch->dst_addrs[0] = block->start_addr;
796 rb_darray_append(&block->incoming, last_branch);
797 last_branch->blocks[0] = block;
799 // This block should immediately follow the last branch
800 RUBY_ASSERT(block->start_addr == last_branch->end_addr);
802 // Track the block
803 add_block_version(block);
805 batch[compiled_count] = block;
806 compiled_count++;
809 if (batch_success) {
810 // Success. Return first block in the batch.
811 RUBY_ASSERT(compiled_count > 0);
812 return batch[0];
814 else {
815 // The batch failed. Free everything in the batch
816 for (int block_idx = 0; block_idx < compiled_count; block_idx++) {
817 block_t *const to_free = batch[block_idx];
819 // Undo add_block_version()
820 rb_yjit_block_array_t versions = yjit_get_version_array(to_free->blockid.iseq, to_free->blockid.idx);
821 block_array_remove(versions, to_free);
823 // Deallocate
824 yjit_free_block(to_free);
827 #if YJIT_STATS
828 yjit_runtime_counters.compilation_failure++;
829 #endif
830 return NULL;
834 // Generate a block version that is an entry point inserted into an iseq
835 static uint8_t *
836 gen_entry_point(const rb_iseq_t *iseq, uint32_t insn_idx, rb_execution_context_t *ec)
838 // If we aren't at PC 0, don't generate code
839 // See yjit_pc_guard
840 if (iseq->body->iseq_encoded != ec->cfp->pc) {
841 return NULL;
844 // The entry context makes no assumptions about types
845 blockid_t blockid = { iseq, insn_idx };
847 rb_vm_barrier();
848 // Write the interpreter entry prologue. Might be NULL when out of memory.
849 uint8_t *code_ptr = yjit_entry_prologue(cb, iseq);
851 // Try to generate code for the entry block
852 block_t *block = gen_block_version(blockid, &DEFAULT_CTX, ec);
854 cb_mark_all_executable(ocb);
855 cb_mark_all_executable(cb);
857 // If we couldn't generate any code
858 if (!block || block->end_idx == insn_idx) {
859 return NULL;
862 return code_ptr;
865 // Called by the generated code when a branch stub is executed
866 // Triggers compilation of branches and code patching
867 static uint8_t *
868 branch_stub_hit(branch_t *branch, const uint32_t target_idx, rb_execution_context_t *ec)
870 uint8_t *dst_addr = NULL;
872 // Stop other ractors since we are going to patch machine code.
873 // This is how the GC does it.
874 RB_VM_LOCK_ENTER();
875 rb_vm_barrier();
877 const ptrdiff_t branch_size_on_entry = branch_code_size(branch);
879 RUBY_ASSERT(branch != NULL);
880 RUBY_ASSERT(target_idx < 2);
881 blockid_t target = branch->targets[target_idx];
882 const ctx_t *target_ctx = &branch->target_ctxs[target_idx];
884 // If this branch has already been patched, return the dst address
885 // Note: ractors can cause the same stub to be hit multiple times
886 if (branch->blocks[target_idx]) {
887 dst_addr = branch->dst_addrs[target_idx];
889 else {
890 rb_vm_barrier();
892 // :stub-sp-flush:
893 // Generated code do stack operations without modifying cfp->sp, while the
894 // cfp->sp tells the GC what values on the stack to root. Generated code
895 // generally takes care of updating cfp->sp when it calls runtime routines that
896 // could trigger GC, but it's inconvenient to do it before calling this function.
897 // So we do it here instead.
898 VALUE *const original_interp_sp = ec->cfp->sp;
899 ec->cfp->sp += target_ctx->sp_offset;
901 // Update the PC in the current CFP, because it
902 // may be out of sync in JITted code
903 ec->cfp->pc = yjit_iseq_pc_at_idx(target.iseq, target.idx);
905 // Try to find an existing compiled version of this block
906 block_t *p_block = find_block_version(target, target_ctx);
908 // If this block hasn't yet been compiled
909 if (!p_block) {
910 const uint8_t branch_old_shape = branch->shape;
911 bool branch_modified = false;
913 // If the new block can be generated right after the branch (at cb->write_pos)
914 if (cb_get_write_ptr(cb) == branch->end_addr) {
915 // This branch should be terminating its block
916 RUBY_ASSERT(branch->end_addr == branch->block->end_addr);
918 // Change the branch shape to indicate the target block will be placed next
919 branch->shape = (uint8_t)target_idx;
921 // Rewrite the branch with the new, potentially more compact shape
922 regenerate_branch(cb, branch);
923 branch_modified = true;
925 // Ensure that the branch terminates the codeblock just like
926 // before entering this if block. This drops bytes off the end
927 // in case we shrank the branch when regenerating.
928 cb_set_write_ptr(cb, branch->end_addr);
931 // Compile the new block version
932 p_block = gen_block_version(target, target_ctx, ec);
934 if (!p_block && branch_modified) {
935 // We couldn't generate a new block for the branch, but we modified the branch.
936 // Restore the branch by regenerating it.
937 branch->shape = branch_old_shape;
938 regenerate_branch(cb, branch);
942 if (p_block) {
943 // Branch shape should reflect layout
944 RUBY_ASSERT(!(branch->shape == (uint8_t)target_idx && p_block->start_addr != branch->end_addr));
946 // Add this branch to the list of incoming branches for the target
947 rb_darray_append(&p_block->incoming, branch);
949 // Update the branch target address
950 dst_addr = p_block->start_addr;
951 branch->dst_addrs[target_idx] = dst_addr;
953 // Mark this branch target as patched (no longer a stub)
954 branch->blocks[target_idx] = p_block;
956 // Rewrite the branch with the new jump target address
957 regenerate_branch(cb, branch);
959 // Restore interpreter sp, since the code hitting the stub expects the original.
960 ec->cfp->sp = original_interp_sp;
962 else {
963 // Failed to service the stub by generating a new block so now we
964 // need to exit to the interpreter at the stubbed location. We are
965 // intentionally *not* restoring original_interp_sp. At the time of
966 // writing, reconstructing interpreter state only involves setting
967 // cfp->sp and cfp->pc. We set both before trying to generate the
968 // block. All there is left to do to exit is to pop the native
969 // frame. We do that in code_for_exit_from_stub.
970 dst_addr = code_for_exit_from_stub;
973 cb_mark_all_executable(ocb);
974 cb_mark_all_executable(cb);
977 const ptrdiff_t new_branch_size = branch_code_size(branch);
978 RUBY_ASSERT_ALWAYS(new_branch_size >= 0);
979 RUBY_ASSERT_ALWAYS(new_branch_size <= branch_size_on_entry && "branch stubs should not enlarge branches");
981 RB_VM_LOCK_LEAVE();
983 // Return a pointer to the compiled block version
984 return dst_addr;
987 // Get a version or stub corresponding to a branch target
988 static uint8_t *
989 get_branch_target(
990 blockid_t target,
991 const ctx_t *ctx,
992 branch_t *branch,
993 uint32_t target_idx
996 //fprintf(stderr, "get_branch_target, block (%p, %d)\n", target.iseq, target.idx);
998 block_t *p_block = find_block_version(target, ctx);
1000 // If the block already exists
1001 if (p_block) {
1002 // Add an incoming branch for this version
1003 rb_darray_append(&p_block->incoming, branch);
1004 branch->blocks[target_idx] = p_block;
1006 // Return a pointer to the compiled code
1007 return p_block->start_addr;
1010 // Do we have enough memory for a stub?
1011 const long MAX_CODE_SIZE = 64;
1012 if (ocb->write_pos + MAX_CODE_SIZE >= cb->mem_size) {
1013 return NULL;
1016 // Generate an outlined stub that will call branch_stub_hit()
1017 uint8_t *stub_addr = cb_get_ptr(ocb, ocb->write_pos);
1019 // Call branch_stub_hit(branch_idx, target_idx, ec)
1020 mov(ocb, C_ARG_REGS[2], REG_EC);
1021 mov(ocb, C_ARG_REGS[1], imm_opnd(target_idx));
1022 mov(ocb, C_ARG_REGS[0], const_ptr_opnd(branch));
1023 call_ptr(ocb, REG0, (void *)&branch_stub_hit);
1025 // Jump to the address returned by the
1026 // branch_stub_hit call
1027 jmp_rm(ocb, RAX);
1029 RUBY_ASSERT(cb_get_ptr(ocb, ocb->write_pos) - stub_addr <= MAX_CODE_SIZE);
1031 return stub_addr;
1034 static void
1035 gen_branch(
1036 jitstate_t *jit,
1037 const ctx_t *src_ctx,
1038 blockid_t target0,
1039 const ctx_t *ctx0,
1040 blockid_t target1,
1041 const ctx_t *ctx1,
1042 branchgen_fn gen_fn
1045 RUBY_ASSERT(target0.iseq != NULL);
1047 branch_t *branch = make_branch_entry(jit->block, src_ctx, gen_fn);
1048 branch->targets[0] = target0;
1049 branch->targets[1] = target1;
1050 branch->target_ctxs[0] = *ctx0;
1051 branch->target_ctxs[1] = ctx1? *ctx1:DEFAULT_CTX;
1053 // Get the branch targets or stubs
1054 branch->dst_addrs[0] = get_branch_target(target0, ctx0, branch, 0);
1055 branch->dst_addrs[1] = ctx1? get_branch_target(target1, ctx1, branch, 1):NULL;
1057 // Call the branch generation function
1058 branch->start_addr = cb_get_write_ptr(cb);
1059 regenerate_branch(cb, branch);
1062 static void
1063 gen_jump_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
1065 switch (shape) {
1066 case SHAPE_NEXT0:
1067 break;
1069 case SHAPE_NEXT1:
1070 RUBY_ASSERT(false);
1071 break;
1073 case SHAPE_DEFAULT:
1074 jmp_ptr(cb, target0);
1075 break;
1079 static void
1080 gen_direct_jump(
1081 jitstate_t *jit,
1082 const ctx_t *ctx,
1083 blockid_t target0
1086 RUBY_ASSERT(target0.iseq != NULL);
1088 branch_t *branch = make_branch_entry(jit->block, ctx, gen_jump_branch);
1089 branch->targets[0] = target0;
1090 branch->target_ctxs[0] = *ctx;
1092 block_t *p_block = find_block_version(target0, ctx);
1094 // If the version already exists
1095 if (p_block) {
1096 rb_darray_append(&p_block->incoming, branch);
1098 branch->dst_addrs[0] = p_block->start_addr;
1099 branch->blocks[0] = p_block;
1100 branch->shape = SHAPE_DEFAULT;
1102 // Call the branch generation function
1103 branch->start_addr = cb_get_write_ptr(cb);
1104 gen_jump_branch(cb, branch->dst_addrs[0], NULL, SHAPE_DEFAULT);
1105 branch->end_addr = cb_get_write_ptr(cb);
1107 else {
1108 // This NULL target address signals gen_block_version() to compile the
1109 // target block right after this one (fallthrough).
1110 branch->dst_addrs[0] = NULL;
1111 branch->shape = SHAPE_NEXT0;
1112 branch->start_addr = cb_get_write_ptr(cb);
1113 branch->end_addr = cb_get_write_ptr(cb);
1117 // Create a stub to force the code up to this point to be executed
1118 static void
1119 defer_compilation(
1120 jitstate_t *jit,
1121 ctx_t *cur_ctx
1124 //fprintf(stderr, "defer compilation at (%p, %d) depth=%d\n", block->blockid.iseq, insn_idx, cur_ctx->chain_depth);
1126 if (cur_ctx->chain_depth != 0) {
1127 rb_bug("double defer");
1130 ctx_t next_ctx = *cur_ctx;
1132 if (next_ctx.chain_depth >= UINT8_MAX) {
1133 rb_bug("max block version chain depth reached");
1136 next_ctx.chain_depth += 1;
1138 branch_t *branch = make_branch_entry(jit->block, cur_ctx, gen_jump_branch);
1140 // Get the branch targets or stubs
1141 branch->target_ctxs[0] = next_ctx;
1142 branch->targets[0] = (blockid_t){ jit->block->blockid.iseq, jit->insn_idx };
1143 branch->dst_addrs[0] = get_branch_target(branch->targets[0], &next_ctx, branch, 0);
1145 // Call the branch generation function
1146 codeblock_t *cb = jit->cb;
1147 branch->start_addr = cb_get_write_ptr(cb);
1148 gen_jump_branch(cb, branch->dst_addrs[0], NULL, SHAPE_DEFAULT);
1149 branch->end_addr = cb_get_write_ptr(cb);
1152 // Remove all references to a block then free it.
1153 static void
1154 yjit_free_block(block_t *block)
1156 yjit_unlink_method_lookup_dependency(block);
1157 yjit_block_assumptions_free(block);
1159 // Remove this block from the predecessor's targets
1160 rb_darray_for(block->incoming, incoming_idx) {
1161 // Branch from the predecessor to us
1162 branch_t *pred_branch = rb_darray_get(block->incoming, incoming_idx);
1164 // If this is us, nullify the target block
1165 for (size_t succ_idx = 0; succ_idx < 2; succ_idx++) {
1166 if (pred_branch->blocks[succ_idx] == block) {
1167 pred_branch->blocks[succ_idx] = NULL;
1172 // For each outgoing branch
1173 rb_darray_for(block->outgoing, branch_idx) {
1174 branch_t *out_branch = rb_darray_get(block->outgoing, branch_idx);
1176 // For each successor block
1177 for (size_t succ_idx = 0; succ_idx < 2; succ_idx++) {
1178 block_t *succ = out_branch->blocks[succ_idx];
1180 if (succ == NULL)
1181 continue;
1183 // Remove this block from the successor's incoming list
1184 rb_darray_for(succ->incoming, incoming_idx) {
1185 branch_t *pred_branch = rb_darray_get(succ->incoming, incoming_idx);
1186 if (pred_branch == out_branch) {
1187 rb_darray_remove_unordered(succ->incoming, incoming_idx);
1188 break;
1193 // Free the outgoing branch entry
1194 free(out_branch);
1197 rb_darray_free(block->incoming);
1198 rb_darray_free(block->outgoing);
1199 rb_darray_free(block->gc_object_offsets);
1201 free(block);
1204 // Remove a block version
1205 static void
1206 block_array_remove(rb_yjit_block_array_t block_array, block_t *block)
1208 block_t **element;
1209 rb_darray_foreach(block_array, idx, element) {
1210 if (*element == block) {
1211 rb_darray_remove_unordered(block_array, idx);
1212 return;
1216 RUBY_ASSERT(false);
1219 // Some runtime checks for integrity of a program location
1220 static void
1221 verify_blockid(const blockid_t blockid)
1223 const rb_iseq_t *const iseq = blockid.iseq;
1224 RUBY_ASSERT_ALWAYS(IMEMO_TYPE_P(iseq, imemo_iseq));
1225 RUBY_ASSERT_ALWAYS(blockid.idx < iseq->body->iseq_size);
1228 // Invalidate one specific block version
1229 static void
1230 invalidate_block_version(block_t *block)
1232 ASSERT_vm_locking();
1234 // TODO: want to assert that all other ractors are stopped here. Can't patch
1235 // machine code that some other thread is running.
1237 verify_blockid(block->blockid);
1239 const rb_iseq_t *iseq = block->blockid.iseq;
1241 //fprintf(stderr, "invalidating block (%p, %d)\n", block->blockid.iseq, block->blockid.idx);
1242 //fprintf(stderr, "block=%p\n", block);
1244 // Remove this block from the version array
1245 rb_yjit_block_array_t versions = yjit_get_version_array(iseq, block->blockid.idx);
1246 block_array_remove(versions, block);
1248 // Get a pointer to the generated code for this block
1249 uint8_t *code_ptr = block->start_addr;
1251 // Make the the start of the block do an exit. This handles OOM situations
1252 // and some cases where we can't efficiently patch incoming branches.
1253 // Do this first, since in case there is a fallthrough branch into this
1254 // block, the patching loop below can overwrite the start of the block.
1255 // In those situations, there is hopefully no jumps to the start of the block
1256 // after patching as the start of the block would be in the middle of something
1257 // generated by branch_t::gen_fn.
1259 RUBY_ASSERT_ALWAYS(block->entry_exit && "block invalidation requires an exit");
1260 if (block->entry_exit == block->start_addr) {
1261 // Some blocks exit on entry. Patching a jump to the entry at the
1262 // entry makes an infinite loop.
1264 else if (block->start_addr >= cb_get_ptr(cb, yjit_codepage_frozen_bytes)) { // Don't patch frozen code region
1265 // Patch in a jump to block->entry_exit.
1266 uint32_t cur_pos = cb->write_pos;
1267 cb_set_write_ptr(cb, block->start_addr);
1268 jmp_ptr(cb, block->entry_exit);
1269 RUBY_ASSERT_ALWAYS(cb_get_ptr(cb, cb->write_pos) < block->end_addr && "invalidation wrote past end of block");
1270 cb_set_pos(cb, cur_pos);
1274 // For each incoming branch
1275 rb_darray_for(block->incoming, incoming_idx) {
1276 branch_t *branch = rb_darray_get(block->incoming, incoming_idx);
1277 uint32_t target_idx = (branch->dst_addrs[0] == code_ptr)? 0:1;
1278 RUBY_ASSERT(branch->dst_addrs[target_idx] == code_ptr);
1279 RUBY_ASSERT(branch->blocks[target_idx] == block);
1281 // Mark this target as being a stub
1282 branch->blocks[target_idx] = NULL;
1284 // Don't patch frozen code region
1285 if (branch->start_addr < cb_get_ptr(cb, yjit_codepage_frozen_bytes)) {
1286 continue;
1289 // Create a stub for this branch target
1290 uint8_t *branch_target = get_branch_target(
1291 block->blockid,
1292 &block->ctx,
1293 branch,
1294 target_idx
1297 if (!branch_target) {
1298 // We were unable to generate a stub (e.g. OOM). Use the block's
1299 // exit instead of a stub for the block. It's important that we
1300 // still patch the branch in this situation so stubs are unique
1301 // to branches. Think about what could go wrong if we run out of
1302 // memory in the middle of this loop.
1303 branch_target = block->entry_exit;
1306 branch->dst_addrs[target_idx] = branch_target;
1308 // Check if the invalidated block immediately follows
1309 bool target_next = (block->start_addr == branch->end_addr);
1311 if (target_next) {
1312 // The new block will no longer be adjacent.
1313 // Note that we could be enlarging the branch and writing into the
1314 // start of the block being invalidated.
1315 branch->shape = SHAPE_DEFAULT;
1318 // Rewrite the branch with the new jump target address
1319 regenerate_branch(cb, branch);
1321 if (target_next && branch->end_addr > block->end_addr) {
1322 fprintf(stderr, "branch_block_idx=%u block_idx=%u over=%td block_size=%td\n",
1323 branch->block->blockid.idx,
1324 block->blockid.idx,
1325 branch->end_addr - block->end_addr,
1326 block->end_addr - block->start_addr);
1327 yjit_print_iseq(branch->block->blockid.iseq);
1328 rb_bug("yjit invalidate rewrote branch past end of invalidated block");
1332 // Clear out the JIT func so that we can recompile later and so the
1333 // interpreter will run the iseq
1335 #if JIT_ENABLED
1336 // Only clear the jit_func when we're invalidating the JIT entry block.
1337 // We only support compiling iseqs from index 0 right now. So entry
1338 // points will always have an instruction index of 0. We'll need to
1339 // change this in the future when we support optional parameters because
1340 // they enter the function with a non-zero PC
1341 if (block->blockid.idx == 0) {
1342 iseq->body->jit_func = 0;
1344 #endif
1346 // TODO:
1347 // May want to recompile a new entry point (for interpreter entry blocks)
1348 // This isn't necessary for correctness
1350 // FIXME:
1351 // Call continuation addresses on the stack can also be atomically replaced by jumps going to the stub.
1353 yjit_free_block(block);
1355 #if YJIT_STATS
1356 yjit_runtime_counters.invalidation_count++;
1357 #endif
1359 cb_mark_all_executable(ocb);
1360 cb_mark_all_executable(cb);
1362 // fprintf(stderr, "invalidation done\n");
1365 static void
1366 yjit_init_core(void)
1368 gen_code_for_exit_from_stub();