`O_NONBLOCK` is not always a preprocessor constant on all platforms
[ruby-80x24.org.git] / yjit_codegen.c
blob8b60b85be41cfbe9d6c5271addd37bb2007ffb11
1 // This file is a fragment of the yjit.o compilation unit. See yjit.c.
2 #include "internal.h"
3 #include "gc.h"
4 #include "internal/compile.h"
5 #include "internal/class.h"
6 #include "internal/hash.h"
7 #include "internal/object.h"
8 #include "internal/sanitizers.h"
9 #include "internal/string.h"
10 #include "internal/struct.h"
11 #include "internal/variable.h"
12 #include "internal/re.h"
13 #include "probes.h"
14 #include "probes_helper.h"
15 #include "yjit.h"
16 #include "yjit_iface.h"
17 #include "yjit_core.h"
18 #include "yjit_codegen.h"
19 #include "yjit_asm.h"
21 // Map from YARV opcodes to code generation functions
22 static codegen_fn gen_fns[VM_INSTRUCTION_SIZE] = { NULL };
24 // Map from method entries to code generation functions
25 static st_table *yjit_method_codegen_table = NULL;
27 // Code for exiting back to the interpreter from the leave instruction
28 static void *leave_exit_code;
30 // Code for full logic of returning from C method and exiting to the interpreter
31 static uint32_t outline_full_cfunc_return_pos;
33 // For implementing global code invalidation
34 struct codepage_patch {
35 uint32_t inline_patch_pos;
36 uint32_t outlined_target_pos;
39 typedef rb_darray(struct codepage_patch) patch_array_t;
41 static patch_array_t global_inval_patches = NULL;
43 // Print the current source location for debugging purposes
44 RBIMPL_ATTR_MAYBE_UNUSED()
45 static void
46 jit_print_loc(jitstate_t *jit, const char *msg)
48 char *ptr;
49 long len;
50 VALUE path = rb_iseq_path(jit->iseq);
51 RSTRING_GETMEM(path, ptr, len);
52 fprintf(stderr, "%s %.*s:%u\n", msg, (int)len, ptr, rb_iseq_line_no(jit->iseq, jit->insn_idx));
55 // dump an object for debugging purposes
56 RBIMPL_ATTR_MAYBE_UNUSED()
57 static void
58 jit_obj_info_dump(codeblock_t *cb, x86opnd_t opnd) {
59 push_regs(cb);
60 mov(cb, C_ARG_REGS[0], opnd);
61 call_ptr(cb, REG0, (void *)rb_obj_info_dump);
62 pop_regs(cb);
65 // Get the current instruction's opcode
66 static int
67 jit_get_opcode(jitstate_t *jit)
69 return jit->opcode;
72 // Get the index of the next instruction
73 static uint32_t
74 jit_next_insn_idx(jitstate_t *jit)
76 return jit->insn_idx + insn_len(jit_get_opcode(jit));
79 // Get an instruction argument by index
80 static VALUE
81 jit_get_arg(jitstate_t *jit, size_t arg_idx)
83 RUBY_ASSERT(arg_idx + 1 < (size_t)insn_len(jit_get_opcode(jit)));
84 return *(jit->pc + arg_idx + 1);
87 // Load a VALUE into a register and keep track of the reference if it is on the GC heap.
88 static void
89 jit_mov_gc_ptr(jitstate_t *jit, codeblock_t *cb, x86opnd_t reg, VALUE ptr)
91 RUBY_ASSERT(reg.type == OPND_REG && reg.num_bits == 64);
93 // Load the pointer constant into the specified register
94 mov(cb, reg, const_ptr_opnd((void*)ptr));
96 // The pointer immediate is encoded as the last part of the mov written out
97 uint32_t ptr_offset = cb->write_pos - sizeof(VALUE);
99 if (!SPECIAL_CONST_P(ptr)) {
100 if (!rb_darray_append(&jit->block->gc_object_offsets, ptr_offset)) {
101 rb_bug("allocation failed");
106 // Check if we are compiling the instruction at the stub PC
107 // Meaning we are compiling the instruction that is next to execute
108 static bool
109 jit_at_current_insn(jitstate_t *jit)
111 const VALUE *ec_pc = jit->ec->cfp->pc;
112 return (ec_pc == jit->pc);
115 // Peek at the nth topmost value on the Ruby stack.
116 // Returns the topmost value when n == 0.
117 static VALUE
118 jit_peek_at_stack(jitstate_t *jit, ctx_t *ctx, int n)
120 RUBY_ASSERT(jit_at_current_insn(jit));
122 // Note: this does not account for ctx->sp_offset because
123 // this is only available when hitting a stub, and while
124 // hitting a stub, cfp->sp needs to be up to date in case
125 // codegen functions trigger GC. See :stub-sp-flush:.
126 VALUE *sp = jit->ec->cfp->sp;
128 return *(sp - 1 - n);
131 static VALUE
132 jit_peek_at_self(jitstate_t *jit, ctx_t *ctx)
134 return jit->ec->cfp->self;
137 RBIMPL_ATTR_MAYBE_UNUSED()
138 static VALUE
139 jit_peek_at_local(jitstate_t *jit, ctx_t *ctx, int n)
141 RUBY_ASSERT(jit_at_current_insn(jit));
143 int32_t local_table_size = jit->iseq->body->local_table_size;
144 RUBY_ASSERT(n < (int)jit->iseq->body->local_table_size);
146 const VALUE *ep = jit->ec->cfp->ep;
147 return ep[-VM_ENV_DATA_SIZE - local_table_size + n + 1];
150 // Save the incremented PC on the CFP
151 // This is necessary when calleees can raise or allocate
152 static void
153 jit_save_pc(jitstate_t *jit, x86opnd_t scratch_reg)
155 codeblock_t *cb = jit->cb;
156 mov(cb, scratch_reg, const_ptr_opnd(jit->pc + insn_len(jit->opcode)));
157 mov(cb, mem_opnd(64, REG_CFP, offsetof(rb_control_frame_t, pc)), scratch_reg);
160 // Save the current SP on the CFP
161 // This realigns the interpreter SP with the JIT SP
162 // Note: this will change the current value of REG_SP,
163 // which could invalidate memory operands
164 static void
165 jit_save_sp(jitstate_t *jit, ctx_t *ctx)
167 if (ctx->sp_offset != 0) {
168 x86opnd_t stack_pointer = ctx_sp_opnd(ctx, 0);
169 codeblock_t *cb = jit->cb;
170 lea(cb, REG_SP, stack_pointer);
171 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG_SP);
172 ctx->sp_offset = 0;
176 // jit_save_pc() + jit_save_sp(). Should be used before calling a routine that
177 // could:
178 // - Perform GC allocation
179 // - Take the VM lock through RB_VM_LOCK_ENTER()
180 // - Perform Ruby method call
181 static void
182 jit_prepare_routine_call(jitstate_t *jit, ctx_t *ctx, x86opnd_t scratch_reg)
184 jit->record_boundary_patch_point = true;
185 jit_save_pc(jit, scratch_reg);
186 jit_save_sp(jit, ctx);
188 // In case the routine calls Ruby methods, it can set local variables
189 // through Kernel#binding and other means.
190 ctx_clear_local_types(ctx);
193 // Record the current codeblock write position for rewriting into a jump into
194 // the outlined block later. Used to implement global code invalidation.
195 static void
196 record_global_inval_patch(const codeblock_t *cb, uint32_t outline_block_target_pos)
198 struct codepage_patch patch_point = { cb->write_pos, outline_block_target_pos };
199 if (!rb_darray_append(&global_inval_patches, patch_point)) rb_bug("allocation failed");
202 static bool jit_guard_known_klass(jitstate_t *jit, ctx_t *ctx, VALUE known_klass, insn_opnd_t insn_opnd, VALUE sample_instance, const int max_chain_depth, uint8_t *side_exit);
204 #if YJIT_STATS
206 // Add a comment at the current position in the code block
207 static void
208 _add_comment(codeblock_t *cb, const char *comment_str)
210 // We can't add comments to the outlined code block
211 if (cb == ocb)
212 return;
214 // Avoid adding duplicate comment strings (can happen due to deferred codegen)
215 size_t num_comments = rb_darray_size(yjit_code_comments);
216 if (num_comments > 0) {
217 struct yjit_comment last_comment = rb_darray_get(yjit_code_comments, num_comments - 1);
218 if (last_comment.offset == cb->write_pos && strcmp(last_comment.comment, comment_str) == 0) {
219 return;
223 struct yjit_comment new_comment = (struct yjit_comment){ cb->write_pos, comment_str };
224 rb_darray_append(&yjit_code_comments, new_comment);
227 // Comments for generated machine code
228 #define ADD_COMMENT(cb, comment) _add_comment((cb), (comment))
230 // Verify the ctx's types and mappings against the compile-time stack, self,
231 // and locals.
232 static void
233 verify_ctx(jitstate_t *jit, ctx_t *ctx)
235 // Only able to check types when at current insn
236 RUBY_ASSERT(jit_at_current_insn(jit));
238 VALUE self_val = jit_peek_at_self(jit, ctx);
239 if (type_diff(yjit_type_of_value(self_val), ctx->self_type) == INT_MAX) {
240 rb_bug("verify_ctx: ctx type (%s) incompatible with actual value of self: %s", yjit_type_name(ctx->self_type), rb_obj_info(self_val));
243 for (int i = 0; i < ctx->stack_size && i < MAX_TEMP_TYPES; i++) {
244 temp_type_mapping_t learned = ctx_get_opnd_mapping(ctx, OPND_STACK(i));
245 VALUE val = jit_peek_at_stack(jit, ctx, i);
246 val_type_t detected = yjit_type_of_value(val);
248 if (learned.mapping.kind == TEMP_SELF) {
249 if (self_val != val) {
250 rb_bug("verify_ctx: stack value was mapped to self, but values did not match\n"
251 " stack: %s\n"
252 " self: %s",
253 rb_obj_info(val),
254 rb_obj_info(self_val));
258 if (learned.mapping.kind == TEMP_LOCAL) {
259 int local_idx = learned.mapping.idx;
260 VALUE local_val = jit_peek_at_local(jit, ctx, local_idx);
261 if (local_val != val) {
262 rb_bug("verify_ctx: stack value was mapped to local, but values did not match\n"
263 " stack: %s\n"
264 " local %i: %s",
265 rb_obj_info(val),
266 local_idx,
267 rb_obj_info(local_val));
271 if (type_diff(detected, learned.type) == INT_MAX) {
272 rb_bug("verify_ctx: ctx type (%s) incompatible with actual value on stack: %s", yjit_type_name(learned.type), rb_obj_info(val));
276 int32_t local_table_size = jit->iseq->body->local_table_size;
277 for (int i = 0; i < local_table_size && i < MAX_TEMP_TYPES; i++) {
278 val_type_t learned = ctx->local_types[i];
279 VALUE val = jit_peek_at_local(jit, ctx, i);
280 val_type_t detected = yjit_type_of_value(val);
282 if (type_diff(detected, learned) == INT_MAX) {
283 rb_bug("verify_ctx: ctx type (%s) incompatible with actual value of local: %s", yjit_type_name(learned), rb_obj_info(val));
288 #else
290 #define ADD_COMMENT(cb, comment) ((void)0)
291 #define verify_ctx(jit, ctx) ((void)0)
293 #endif // if YJIT_STATS
295 #if YJIT_STATS
297 // Increment a profiling counter with counter_name
298 #define GEN_COUNTER_INC(cb, counter_name) _gen_counter_inc(cb, &(yjit_runtime_counters . counter_name))
299 static void
300 _gen_counter_inc(codeblock_t *cb, int64_t *counter)
302 if (!rb_yjit_opts.gen_stats) return;
304 // Use REG1 because there might be return value in REG0
305 mov(cb, REG1, const_ptr_opnd(counter));
306 cb_write_lock_prefix(cb); // for ractors.
307 add(cb, mem_opnd(64, REG1, 0), imm_opnd(1));
310 // Increment a counter then take an existing side exit.
311 #define COUNTED_EXIT(jit, side_exit, counter_name) _counted_side_exit(jit, side_exit, &(yjit_runtime_counters . counter_name))
312 static uint8_t *
313 _counted_side_exit(jitstate_t* jit, uint8_t *existing_side_exit, int64_t *counter)
315 if (!rb_yjit_opts.gen_stats) return existing_side_exit;
317 uint8_t *start = cb_get_ptr(jit->ocb, jit->ocb->write_pos);
318 _gen_counter_inc(jit->ocb, counter);
319 jmp_ptr(jit->ocb, existing_side_exit);
320 return start;
323 #else
325 #define GEN_COUNTER_INC(cb, counter_name) ((void)0)
326 #define COUNTED_EXIT(jit, side_exit, counter_name) side_exit
328 #endif // if YJIT_STATS
330 // Generate an exit to return to the interpreter
331 static uint32_t
332 yjit_gen_exit(VALUE *exit_pc, ctx_t *ctx, codeblock_t *cb)
334 const uint32_t code_pos = cb->write_pos;
336 ADD_COMMENT(cb, "exit to interpreter");
338 // Generate the code to exit to the interpreters
339 // Write the adjusted SP back into the CFP
340 if (ctx->sp_offset != 0) {
341 x86opnd_t stack_pointer = ctx_sp_opnd(ctx, 0);
342 lea(cb, REG_SP, stack_pointer);
343 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG_SP);
346 // Update CFP->PC
347 mov(cb, RAX, const_ptr_opnd(exit_pc));
348 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, pc), RAX);
350 // Accumulate stats about interpreter exits
351 #if YJIT_STATS
352 if (rb_yjit_opts.gen_stats) {
353 mov(cb, RDI, const_ptr_opnd(exit_pc));
354 call_ptr(cb, RSI, (void *)&yjit_count_side_exit_op);
356 #endif
358 pop(cb, REG_SP);
359 pop(cb, REG_EC);
360 pop(cb, REG_CFP);
362 mov(cb, RAX, imm_opnd(Qundef));
363 ret(cb);
365 return code_pos;
368 // Generate a continuation for gen_leave() that exits to the interpreter at REG_CFP->pc.
369 static uint8_t *
370 yjit_gen_leave_exit(codeblock_t *cb)
372 uint8_t *code_ptr = cb_get_ptr(cb, cb->write_pos);
374 // Note, gen_leave() fully reconstructs interpreter state and leaves the
375 // return value in RAX before coming here.
377 // Every exit to the interpreter should be counted
378 GEN_COUNTER_INC(cb, leave_interp_return);
380 pop(cb, REG_SP);
381 pop(cb, REG_EC);
382 pop(cb, REG_CFP);
384 ret(cb);
386 return code_ptr;
389 // Fill code_for_exit_from_stub. This is used by branch_stub_hit() to exit
390 // to the interpreter when it cannot service a stub by generating new code.
391 // Before coming here, branch_stub_hit() takes care of fully reconstructing
392 // interpreter state.
393 static void
394 gen_code_for_exit_from_stub(void)
396 codeblock_t *cb = ocb;
397 code_for_exit_from_stub = cb_get_ptr(cb, cb->write_pos);
399 GEN_COUNTER_INC(cb, exit_from_branch_stub);
401 pop(cb, REG_SP);
402 pop(cb, REG_EC);
403 pop(cb, REG_CFP);
405 mov(cb, RAX, imm_opnd(Qundef));
406 ret(cb);
409 // :side-exit:
410 // Get an exit for the current instruction in the outlined block. The code
411 // for each instruction often begins with several guards before proceeding
412 // to do work. When guards fail, an option we have is to exit to the
413 // interpreter at an instruction boundary. The piece of code that takes
414 // care of reconstructing interpreter state and exiting out of generated
415 // code is called the side exit.
417 // No guards change the logic for reconstructing interpreter state at the
418 // moment, so there is one unique side exit for each context. Note that
419 // it's incorrect to jump to the side exit after any ctx stack push/pop operations
420 // since they change the logic required for reconstructing interpreter state.
421 static uint8_t *
422 yjit_side_exit(jitstate_t *jit, ctx_t *ctx)
424 if (!jit->side_exit_for_pc) {
425 codeblock_t *ocb = jit->ocb;
426 uint32_t pos = yjit_gen_exit(jit->pc, ctx, ocb);
427 jit->side_exit_for_pc = cb_get_ptr(ocb, pos);
430 return jit->side_exit_for_pc;
433 // Ensure that there is an exit for the start of the block being compiled.
434 // Block invalidation uses this exit.
435 static void
436 jit_ensure_block_entry_exit(jitstate_t *jit)
438 block_t *block = jit->block;
439 if (block->entry_exit) return;
441 if (jit->insn_idx == block->blockid.idx) {
442 // We are compiling the first instruction in the block.
443 // Generate the exit with the cache in jitstate.
444 block->entry_exit = yjit_side_exit(jit, &block->ctx);
446 else {
447 VALUE *pc = yjit_iseq_pc_at_idx(block->blockid.iseq, block->blockid.idx);
448 uint32_t pos = yjit_gen_exit(pc, &block->ctx, ocb);
449 block->entry_exit = cb_get_ptr(ocb, pos);
453 // Generate a runtime guard that ensures the PC is at the start of the iseq,
454 // otherwise take a side exit. This is to handle the situation of optional
455 // parameters. When a function with optional parameters is called, the entry
456 // PC for the method isn't necessarily 0, but we always generated code that
457 // assumes the entry point is 0.
458 static void
459 yjit_pc_guard(codeblock_t *cb, const rb_iseq_t *iseq)
461 RUBY_ASSERT(cb != NULL);
463 mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, pc));
464 mov(cb, REG1, const_ptr_opnd(iseq->body->iseq_encoded));
465 xor(cb, REG0, REG1);
467 // xor should impact ZF, so we can jz here
468 uint32_t pc_is_zero = cb_new_label(cb, "pc_is_zero");
469 jz_label(cb, pc_is_zero);
471 // We're not starting at the first PC, so we need to exit.
472 GEN_COUNTER_INC(cb, leave_start_pc_non_zero);
474 pop(cb, REG_SP);
475 pop(cb, REG_EC);
476 pop(cb, REG_CFP);
478 mov(cb, RAX, imm_opnd(Qundef));
479 ret(cb);
481 // PC should be at the beginning
482 cb_write_label(cb, pc_is_zero);
483 cb_link_labels(cb);
486 // The code we generate in gen_send_cfunc() doesn't fire the c_return TracePoint event
487 // like the interpreter. When tracing for c_return is enabled, we patch the code after
488 // the C method return to call into this to fire the event.
489 static void
490 full_cfunc_return(rb_execution_context_t *ec, VALUE return_value)
492 rb_control_frame_t *cfp = ec->cfp;
493 RUBY_ASSERT_ALWAYS(cfp == GET_EC()->cfp);
494 const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(cfp);
496 RUBY_ASSERT_ALWAYS(RUBYVM_CFUNC_FRAME_P(cfp));
497 RUBY_ASSERT_ALWAYS(me->def->type == VM_METHOD_TYPE_CFUNC);
499 // CHECK_CFP_CONSISTENCY("full_cfunc_return"); TODO revive this
501 // Pop the C func's frame and fire the c_return TracePoint event
502 // Note that this is the same order as vm_call_cfunc_with_frame().
503 rb_vm_pop_frame(ec);
504 EXEC_EVENT_HOOK(ec, RUBY_EVENT_C_RETURN, cfp->self, me->def->original_id, me->called_id, me->owner, return_value);
505 // Note, this deviates from the interpreter in that users need to enable
506 // a c_return TracePoint for this DTrace hook to work. A reasonable change
507 // since the Ruby return event works this way as well.
508 RUBY_DTRACE_CMETHOD_RETURN_HOOK(ec, me->owner, me->def->original_id);
510 // Push return value into the caller's stack. We know that it's a frame that
511 // uses cfp->sp because we are patching a call done with gen_send_cfunc().
512 ec->cfp->sp[0] = return_value;
513 ec->cfp->sp++;
516 // Landing code for when c_return tracing is enabled. See full_cfunc_return().
517 static void
518 gen_full_cfunc_return(void)
520 codeblock_t *cb = ocb;
521 outline_full_cfunc_return_pos = ocb->write_pos;
523 // This chunk of code expect REG_EC to be filled properly and
524 // RAX to contain the return value of the C method.
526 // Call full_cfunc_return()
527 mov(cb, C_ARG_REGS[0], REG_EC);
528 mov(cb, C_ARG_REGS[1], RAX);
529 call_ptr(cb, REG0, (void *)full_cfunc_return);
531 // Count the exit
532 GEN_COUNTER_INC(cb, traced_cfunc_return);
534 // Return to the interpreter
535 pop(cb, REG_SP);
536 pop(cb, REG_EC);
537 pop(cb, REG_CFP);
539 mov(cb, RAX, imm_opnd(Qundef));
540 ret(cb);
544 Compile an interpreter entry block to be inserted into an iseq
545 Returns `NULL` if compilation fails.
547 static uint8_t *
548 yjit_entry_prologue(codeblock_t *cb, const rb_iseq_t *iseq)
550 RUBY_ASSERT(cb != NULL);
552 enum { MAX_PROLOGUE_SIZE = 1024 };
554 // Check if we have enough executable memory
555 if (cb->write_pos + MAX_PROLOGUE_SIZE >= cb->mem_size) {
556 return NULL;
559 const uint32_t old_write_pos = cb->write_pos;
561 // Align the current write position to cache line boundaries
562 cb_align_pos(cb, 64);
564 uint8_t *code_ptr = cb_get_ptr(cb, cb->write_pos);
565 ADD_COMMENT(cb, "yjit entry");
567 push(cb, REG_CFP);
568 push(cb, REG_EC);
569 push(cb, REG_SP);
571 // We are passed EC and CFP
572 mov(cb, REG_EC, C_ARG_REGS[0]);
573 mov(cb, REG_CFP, C_ARG_REGS[1]);
575 // Load the current SP from the CFP into REG_SP
576 mov(cb, REG_SP, member_opnd(REG_CFP, rb_control_frame_t, sp));
578 // Setup cfp->jit_return
579 // TODO: this could use an IP relative LEA instead of an 8 byte immediate
580 mov(cb, REG0, const_ptr_opnd(leave_exit_code));
581 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, jit_return), REG0);
583 // We're compiling iseqs that we *expect* to start at `insn_idx`. But in
584 // the case of optional parameters, the interpreter can set the pc to a
585 // different location depending on the optional parameters. If an iseq
586 // has optional parameters, we'll add a runtime check that the PC we've
587 // compiled for is the same PC that the interpreter wants us to run with.
588 // If they don't match, then we'll take a side exit.
589 if (iseq->body->param.flags.has_opt) {
590 yjit_pc_guard(cb, iseq);
593 // Verify MAX_PROLOGUE_SIZE
594 RUBY_ASSERT_ALWAYS(cb->write_pos - old_write_pos <= MAX_PROLOGUE_SIZE);
596 return code_ptr;
599 // Generate code to check for interrupts and take a side-exit.
600 // Warning: this function clobbers REG0
601 static void
602 yjit_check_ints(codeblock_t *cb, uint8_t *side_exit)
604 // Check for interrupts
605 // see RUBY_VM_CHECK_INTS(ec) macro
606 ADD_COMMENT(cb, "RUBY_VM_CHECK_INTS(ec)");
607 mov(cb, REG0_32, member_opnd(REG_EC, rb_execution_context_t, interrupt_mask));
608 not(cb, REG0_32);
609 test(cb, member_opnd(REG_EC, rb_execution_context_t, interrupt_flag), REG0_32);
610 jnz_ptr(cb, side_exit);
613 // Generate a stubbed unconditional jump to the next bytecode instruction.
614 // Blocks that are part of a guard chain can use this to share the same successor.
615 static void
616 jit_jump_to_next_insn(jitstate_t *jit, const ctx_t *current_context)
618 // Reset the depth since in current usages we only ever jump to to
619 // chain_depth > 0 from the same instruction.
620 ctx_t reset_depth = *current_context;
621 reset_depth.chain_depth = 0;
623 blockid_t jump_block = { jit->iseq, jit_next_insn_idx(jit) };
625 // We are at the end of the current instruction. Record the boundary.
626 if (jit->record_boundary_patch_point) {
627 uint32_t exit_pos = yjit_gen_exit(jit->pc + insn_len(jit->opcode), &reset_depth, jit->ocb);
628 record_global_inval_patch(jit->cb, exit_pos);
629 jit->record_boundary_patch_point = false;
632 // Generate the jump instruction
633 gen_direct_jump(
634 jit,
635 &reset_depth,
636 jump_block
640 // Compile a sequence of bytecode instructions for a given basic block version.
641 // Part of gen_block_version().
642 static block_t *
643 gen_single_block(blockid_t blockid, const ctx_t *start_ctx, rb_execution_context_t *ec)
645 RUBY_ASSERT(cb != NULL);
646 verify_blockid(blockid);
648 // Allocate the new block
649 block_t *block = calloc(1, sizeof(block_t));
650 if (!block) {
651 return NULL;
654 // Copy the starting context to avoid mutating it
655 ctx_t ctx_copy = *start_ctx;
656 ctx_t *ctx = &ctx_copy;
658 // Limit the number of specialized versions for this block
659 *ctx = limit_block_versions(blockid, ctx);
661 // Save the starting context on the block.
662 block->blockid = blockid;
663 block->ctx = *ctx;
665 RUBY_ASSERT(!(blockid.idx == 0 && start_ctx->stack_size > 0));
667 const rb_iseq_t *iseq = block->blockid.iseq;
668 const unsigned int iseq_size = iseq->body->iseq_size;
669 uint32_t insn_idx = block->blockid.idx;
670 const uint32_t starting_insn_idx = insn_idx;
672 // Initialize a JIT state object
673 jitstate_t jit = {
674 .cb = cb,
675 .ocb = ocb,
676 .block = block,
677 .iseq = iseq,
678 .ec = ec
681 // Mark the start position of the block
682 block->start_addr = cb_get_write_ptr(cb);
684 // For each instruction to compile
685 while (insn_idx < iseq_size) {
686 // Get the current pc and opcode
687 VALUE *pc = yjit_iseq_pc_at_idx(iseq, insn_idx);
688 int opcode = yjit_opcode_at_pc(iseq, pc);
689 RUBY_ASSERT(opcode >= 0 && opcode < VM_INSTRUCTION_SIZE);
691 // opt_getinlinecache wants to be in a block all on its own. Cut the block short
692 // if we run into it. See gen_opt_getinlinecache() for details.
693 if (opcode == BIN(opt_getinlinecache) && insn_idx > starting_insn_idx) {
694 jit_jump_to_next_insn(&jit, ctx);
695 break;
698 // Set the current instruction
699 jit.insn_idx = insn_idx;
700 jit.opcode = opcode;
701 jit.pc = pc;
702 jit.side_exit_for_pc = NULL;
704 // If previous instruction requested to record the boundary
705 if (jit.record_boundary_patch_point) {
706 // Generate an exit to this instruction and record it
707 uint32_t exit_pos = yjit_gen_exit(jit.pc, ctx, ocb);
708 record_global_inval_patch(cb, exit_pos);
709 jit.record_boundary_patch_point = false;
712 // Verify our existing assumption (DEBUG)
713 if (jit_at_current_insn(&jit)) {
714 verify_ctx(&jit, ctx);
717 // Lookup the codegen function for this instruction
718 codegen_fn gen_fn = gen_fns[opcode];
719 codegen_status_t status = YJIT_CANT_COMPILE;
720 if (gen_fn) {
721 if (0) {
722 fprintf(stderr, "compiling %d: %s\n", insn_idx, insn_name(opcode));
723 print_str(cb, insn_name(opcode));
726 // :count-placement:
727 // Count bytecode instructions that execute in generated code.
728 // Note that the increment happens even when the output takes side exit.
729 GEN_COUNTER_INC(cb, exec_instruction);
731 // Add a comment for the name of the YARV instruction
732 ADD_COMMENT(cb, insn_name(opcode));
734 // Call the code generation function
735 status = gen_fn(&jit, ctx, cb);
738 // If we can't compile this instruction
739 // exit to the interpreter and stop compiling
740 if (status == YJIT_CANT_COMPILE) {
741 // TODO: if the codegen function makes changes to ctx and then return YJIT_CANT_COMPILE,
742 // the exit this generates would be wrong. We could save a copy of the entry context
743 // and assert that ctx is the same here.
744 uint32_t exit_off = yjit_gen_exit(jit.pc, ctx, cb);
746 // If this is the first instruction in the block, then we can use
747 // the exit for block->entry_exit.
748 if (insn_idx == block->blockid.idx) {
749 block->entry_exit = cb_get_ptr(cb, exit_off);
751 break;
754 // For now, reset the chain depth after each instruction as only the
755 // first instruction in the block can concern itself with the depth.
756 ctx->chain_depth = 0;
758 // Move to the next instruction to compile
759 insn_idx += insn_len(opcode);
761 // If the instruction terminates this block
762 if (status == YJIT_END_BLOCK) {
763 break;
767 // Mark the end position of the block
768 block->end_addr = cb_get_write_ptr(cb);
770 // Store the index of the last instruction in the block
771 block->end_idx = insn_idx;
773 // We currently can't handle cases where the request is for a block that
774 // doesn't go to the next instruction.
775 RUBY_ASSERT(!jit.record_boundary_patch_point);
777 // If code for the block doesn't fit, free the block and fail.
778 if (cb->dropped_bytes || ocb->dropped_bytes) {
779 yjit_free_block(block);
780 return NULL;
783 if (YJIT_DUMP_MODE >= 2) {
784 // Dump list of compiled instrutions
785 fprintf(stderr, "Compiled the following for iseq=%p:\n", (void *)iseq);
786 for (uint32_t idx = block->blockid.idx; idx < insn_idx; ) {
787 int opcode = yjit_opcode_at_pc(iseq, yjit_iseq_pc_at_idx(iseq, idx));
788 fprintf(stderr, " %04d %s\n", idx, insn_name(opcode));
789 idx += insn_len(opcode);
793 return block;
796 static codegen_status_t gen_opt_send_without_block(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb);
798 static codegen_status_t
799 gen_nop(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
801 // Do nothing
802 return YJIT_KEEP_COMPILING;
805 static codegen_status_t
806 gen_dup(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
808 // Get the top value and its type
809 x86opnd_t dup_val = ctx_stack_pop(ctx, 0);
810 temp_type_mapping_t mapping = ctx_get_opnd_mapping(ctx, OPND_STACK(0));
812 // Push the same value on top
813 x86opnd_t loc0 = ctx_stack_push_mapping(ctx, mapping);
814 mov(cb, REG0, dup_val);
815 mov(cb, loc0, REG0);
817 return YJIT_KEEP_COMPILING;
820 // duplicate stack top n elements
821 static codegen_status_t
822 gen_dupn(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
824 rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
826 // In practice, seems to be only used for n==2
827 if (n != 2) {
828 return YJIT_CANT_COMPILE;
831 x86opnd_t opnd1 = ctx_stack_opnd(ctx, 1);
832 x86opnd_t opnd0 = ctx_stack_opnd(ctx, 0);
833 temp_type_mapping_t mapping1 = ctx_get_opnd_mapping(ctx, OPND_STACK(1));
834 temp_type_mapping_t mapping0 = ctx_get_opnd_mapping(ctx, OPND_STACK(0));
836 x86opnd_t dst1 = ctx_stack_push_mapping(ctx, mapping1);
837 mov(cb, REG0, opnd1);
838 mov(cb, dst1, REG0);
840 x86opnd_t dst0 = ctx_stack_push_mapping(ctx, mapping0);
841 mov(cb, REG0, opnd0);
842 mov(cb, dst0, REG0);
844 return YJIT_KEEP_COMPILING;
847 static void
848 stack_swap(ctx_t *ctx, codeblock_t *cb, int offset0, int offset1, x86opnd_t reg0, x86opnd_t reg1)
850 x86opnd_t opnd0 = ctx_stack_opnd(ctx, offset0);
851 x86opnd_t opnd1 = ctx_stack_opnd(ctx, offset1);
853 temp_type_mapping_t mapping0 = ctx_get_opnd_mapping(ctx, OPND_STACK(offset0));
854 temp_type_mapping_t mapping1 = ctx_get_opnd_mapping(ctx, OPND_STACK(offset1));
856 mov(cb, reg0, opnd0);
857 mov(cb, reg1, opnd1);
858 mov(cb, opnd0, reg1);
859 mov(cb, opnd1, reg0);
861 ctx_set_opnd_mapping(ctx, OPND_STACK(offset0), mapping1);
862 ctx_set_opnd_mapping(ctx, OPND_STACK(offset1), mapping0);
865 // Swap top 2 stack entries
866 static codegen_status_t
867 gen_swap(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
869 stack_swap(ctx , cb, 0, 1, REG0, REG1);
870 return YJIT_KEEP_COMPILING;
873 // set Nth stack entry to stack top
874 static codegen_status_t
875 gen_setn(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
877 rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
879 // Set the destination
880 x86opnd_t top_val = ctx_stack_pop(ctx, 0);
881 x86opnd_t dst_opnd = ctx_stack_opnd(ctx, (int32_t)n);
882 mov(cb, REG0, top_val);
883 mov(cb, dst_opnd, REG0);
885 temp_type_mapping_t mapping = ctx_get_opnd_mapping(ctx, OPND_STACK(0));
886 ctx_set_opnd_mapping(ctx, OPND_STACK(n), mapping);
888 return YJIT_KEEP_COMPILING;
891 // get nth stack value, then push it
892 static codegen_status_t
893 gen_topn(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
895 int32_t n = (int32_t)jit_get_arg(jit, 0);
897 // Get top n type / operand
898 x86opnd_t top_n_val = ctx_stack_opnd(ctx, n);
899 temp_type_mapping_t mapping = ctx_get_opnd_mapping(ctx, OPND_STACK(n));
901 x86opnd_t loc0 = ctx_stack_push_mapping(ctx, mapping);
902 mov(cb, REG0, top_n_val);
903 mov(cb, loc0, REG0);
905 return YJIT_KEEP_COMPILING;
908 static codegen_status_t
909 gen_pop(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
911 // Decrement SP
912 ctx_stack_pop(ctx, 1);
913 return YJIT_KEEP_COMPILING;
916 // Pop n values off the stack
917 static codegen_status_t
918 gen_adjuststack(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
920 rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
921 ctx_stack_pop(ctx, n);
922 return YJIT_KEEP_COMPILING;
925 // new array initialized from top N values
926 static codegen_status_t
927 gen_newarray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
929 rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
931 // Save the PC and SP because we are allocating
932 jit_prepare_routine_call(jit, ctx, REG0);
934 x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(int32_t)(sizeof(VALUE) * (uint32_t)n));
936 // call rb_ec_ary_new_from_values(struct rb_execution_context_struct *ec, long n, const VALUE *elts);
937 mov(cb, C_ARG_REGS[0], REG_EC);
938 mov(cb, C_ARG_REGS[1], imm_opnd(n));
939 lea(cb, C_ARG_REGS[2], values_ptr);
940 call_ptr(cb, REG0, (void *)rb_ec_ary_new_from_values);
942 ctx_stack_pop(ctx, n);
943 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_ARRAY);
944 mov(cb, stack_ret, RAX);
946 return YJIT_KEEP_COMPILING;
949 // dup array
950 static codegen_status_t
951 gen_duparray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
953 VALUE ary = jit_get_arg(jit, 0);
955 // Save the PC and SP because we are allocating
956 jit_prepare_routine_call(jit, ctx, REG0);
958 // call rb_ary_resurrect(VALUE ary);
959 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], ary);
960 call_ptr(cb, REG0, (void *)rb_ary_resurrect);
962 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_ARRAY);
963 mov(cb, stack_ret, RAX);
965 return YJIT_KEEP_COMPILING;
968 // dup hash
969 static codegen_status_t
970 gen_duphash(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
972 VALUE hash = jit_get_arg(jit, 0);
974 // Save the PC and SP because we are allocating
975 jit_prepare_routine_call(jit, ctx, REG0);
977 // call rb_hash_resurrect(VALUE hash);
978 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], hash);
979 call_ptr(cb, REG0, (void *)rb_hash_resurrect);
981 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HASH);
982 mov(cb, stack_ret, RAX);
984 return YJIT_KEEP_COMPILING;
987 VALUE rb_vm_splat_array(VALUE flag, VALUE ary);
989 // call to_a on the array on the stack
990 static codegen_status_t
991 gen_splatarray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
993 VALUE flag = (VALUE) jit_get_arg(jit, 0);
995 // Save the PC and SP because the callee may allocate
996 // Note that this modifies REG_SP, which is why we do it first
997 jit_prepare_routine_call(jit, ctx, REG0);
999 // Get the operands from the stack
1000 x86opnd_t ary_opnd = ctx_stack_pop(ctx, 1);
1002 // Call rb_vm_splat_array(flag, ary)
1003 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], flag);
1004 mov(cb, C_ARG_REGS[1], ary_opnd);
1005 call_ptr(cb, REG1, (void *) rb_vm_splat_array);
1007 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_ARRAY);
1008 mov(cb, stack_ret, RAX);
1010 return YJIT_KEEP_COMPILING;
1013 // new range initialized from top 2 values
1014 static codegen_status_t
1015 gen_newrange(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1017 rb_num_t flag = (rb_num_t)jit_get_arg(jit, 0);
1019 // rb_range_new() allocates and can raise
1020 jit_prepare_routine_call(jit, ctx, REG0);
1022 // val = rb_range_new(low, high, (int)flag);
1023 mov(cb, C_ARG_REGS[0], ctx_stack_opnd(ctx, 1));
1024 mov(cb, C_ARG_REGS[1], ctx_stack_opnd(ctx, 0));
1025 mov(cb, C_ARG_REGS[2], imm_opnd(flag));
1026 call_ptr(cb, REG0, (void *)rb_range_new);
1028 ctx_stack_pop(ctx, 2);
1029 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HEAP);
1030 mov(cb, stack_ret, RAX);
1032 return YJIT_KEEP_COMPILING;
1035 static void
1036 guard_object_is_heap(codeblock_t *cb, x86opnd_t object_opnd, ctx_t *ctx, uint8_t *side_exit)
1038 ADD_COMMENT(cb, "guard object is heap");
1040 // Test that the object is not an immediate
1041 test(cb, object_opnd, imm_opnd(RUBY_IMMEDIATE_MASK));
1042 jnz_ptr(cb, side_exit);
1044 // Test that the object is not false or nil
1045 cmp(cb, object_opnd, imm_opnd(Qnil));
1046 RUBY_ASSERT(Qfalse < Qnil);
1047 jbe_ptr(cb, side_exit);
1050 static inline void
1051 guard_object_is_array(codeblock_t *cb, x86opnd_t object_opnd, x86opnd_t flags_opnd, ctx_t *ctx, uint8_t *side_exit)
1053 ADD_COMMENT(cb, "guard object is array");
1055 // Pull out the type mask
1056 mov(cb, flags_opnd, member_opnd(object_opnd, struct RBasic, flags));
1057 and(cb, flags_opnd, imm_opnd(RUBY_T_MASK));
1059 // Compare the result with T_ARRAY
1060 cmp(cb, flags_opnd, imm_opnd(T_ARRAY));
1061 jne_ptr(cb, side_exit);
1064 // push enough nils onto the stack to fill out an array
1065 static codegen_status_t
1066 gen_expandarray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1068 int flag = (int) jit_get_arg(jit, 1);
1070 // If this instruction has the splat flag, then bail out.
1071 if (flag & 0x01) {
1072 GEN_COUNTER_INC(cb, expandarray_splat);
1073 return YJIT_CANT_COMPILE;
1076 // If this instruction has the postarg flag, then bail out.
1077 if (flag & 0x02) {
1078 GEN_COUNTER_INC(cb, expandarray_postarg);
1079 return YJIT_CANT_COMPILE;
1082 uint8_t *side_exit = yjit_side_exit(jit, ctx);
1084 // num is the number of requested values. If there aren't enough in the
1085 // array then we're going to push on nils.
1086 int num = (int)jit_get_arg(jit, 0);
1087 val_type_t array_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
1088 x86opnd_t array_opnd = ctx_stack_pop(ctx, 1);
1090 if (array_type.type == ETYPE_NIL) {
1091 // special case for a, b = nil pattern
1092 // push N nils onto the stack
1093 for (int i = 0; i < num; i++) {
1094 x86opnd_t push = ctx_stack_push(ctx, TYPE_NIL);
1095 mov(cb, push, imm_opnd(Qnil));
1097 return YJIT_KEEP_COMPILING;
1100 // Move the array from the stack into REG0 and check that it's an array.
1101 mov(cb, REG0, array_opnd);
1102 guard_object_is_heap(cb, REG0, ctx, COUNTED_EXIT(jit, side_exit, expandarray_not_array));
1103 guard_object_is_array(cb, REG0, REG1, ctx, COUNTED_EXIT(jit, side_exit, expandarray_not_array));
1105 // If we don't actually want any values, then just return.
1106 if (num == 0) {
1107 return YJIT_KEEP_COMPILING;
1110 // Pull out the embed flag to check if it's an embedded array.
1111 x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
1112 mov(cb, REG1, flags_opnd);
1114 // Move the length of the embedded array into REG1.
1115 and(cb, REG1, imm_opnd(RARRAY_EMBED_LEN_MASK));
1116 shr(cb, REG1, imm_opnd(RARRAY_EMBED_LEN_SHIFT));
1118 // Conditionally move the length of the heap array into REG1.
1119 test(cb, flags_opnd, imm_opnd(RARRAY_EMBED_FLAG));
1120 cmovz(cb, REG1, member_opnd(REG0, struct RArray, as.heap.len));
1122 // Only handle the case where the number of values in the array is greater
1123 // than or equal to the number of values requested.
1124 cmp(cb, REG1, imm_opnd(num));
1125 jl_ptr(cb, COUNTED_EXIT(jit, side_exit, expandarray_rhs_too_small));
1127 // Load the address of the embedded array into REG1.
1128 // (struct RArray *)(obj)->as.ary
1129 lea(cb, REG1, member_opnd(REG0, struct RArray, as.ary));
1131 // Conditionally load the address of the heap array into REG1.
1132 // (struct RArray *)(obj)->as.heap.ptr
1133 test(cb, flags_opnd, imm_opnd(RARRAY_EMBED_FLAG));
1134 cmovz(cb, REG1, member_opnd(REG0, struct RArray, as.heap.ptr));
1136 // Loop backward through the array and push each element onto the stack.
1137 for (int32_t i = (int32_t) num - 1; i >= 0; i--) {
1138 x86opnd_t top = ctx_stack_push(ctx, TYPE_UNKNOWN);
1139 mov(cb, REG0, mem_opnd(64, REG1, i * SIZEOF_VALUE));
1140 mov(cb, top, REG0);
1143 return YJIT_KEEP_COMPILING;
1146 // new hash initialized from top N values
1147 static codegen_status_t
1148 gen_newhash(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1150 int32_t num = (int32_t)jit_get_arg(jit, 0);
1152 // Save the PC and SP because we are allocating
1153 jit_prepare_routine_call(jit, ctx, REG0);
1155 if (num) {
1156 // val = rb_hash_new_with_size(num / 2);
1157 mov(cb, C_ARG_REGS[0], imm_opnd(num / 2));
1158 call_ptr(cb, REG0, (void *)rb_hash_new_with_size);
1160 // save the allocated hash as we want to push it after insertion
1161 push(cb, RAX);
1162 push(cb, RAX); // alignment
1164 // rb_hash_bulk_insert(num, STACK_ADDR_FROM_TOP(num), val);
1165 mov(cb, C_ARG_REGS[0], imm_opnd(num));
1166 lea(cb, C_ARG_REGS[1], ctx_stack_opnd(ctx, num - 1));
1167 mov(cb, C_ARG_REGS[2], RAX);
1168 call_ptr(cb, REG0, (void *)rb_hash_bulk_insert);
1170 pop(cb, RAX); // alignment
1171 pop(cb, RAX);
1173 ctx_stack_pop(ctx, num);
1174 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HASH);
1175 mov(cb, stack_ret, RAX);
1177 else {
1178 // val = rb_hash_new();
1179 call_ptr(cb, REG0, (void *)rb_hash_new);
1181 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HASH);
1182 mov(cb, stack_ret, RAX);
1185 return YJIT_KEEP_COMPILING;
1188 // Push a constant value to the stack, including type information.
1189 // The constant may be a heap object or a special constant.
1190 static void
1191 jit_putobject(jitstate_t *jit, ctx_t *ctx, VALUE arg)
1193 val_type_t val_type = yjit_type_of_value(arg);
1194 x86opnd_t stack_top = ctx_stack_push(ctx, val_type);
1196 if (SPECIAL_CONST_P(arg)) {
1197 // Immediates will not move and do not need to be tracked for GC
1198 // Thanks to this we can mov directly to memory when possible.
1200 // NOTE: VALUE -> int64_t cast below is implementation defined.
1201 // Hopefully it preserves the the bit pattern or raise a signal.
1202 // See N1256 section 6.3.1.3.
1203 x86opnd_t imm = imm_opnd((int64_t)arg);
1205 // 64-bit immediates can't be directly written to memory
1206 if (imm.num_bits <= 32) {
1207 mov(cb, stack_top, imm);
1209 else {
1210 mov(cb, REG0, imm);
1211 mov(cb, stack_top, REG0);
1214 else {
1215 // Load the value to push into REG0
1216 // Note that this value may get moved by the GC
1217 jit_mov_gc_ptr(jit, cb, REG0, arg);
1219 // Write argument at SP
1220 mov(cb, stack_top, REG0);
1224 static codegen_status_t
1225 gen_putnil(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1227 jit_putobject(jit, ctx, Qnil);
1228 return YJIT_KEEP_COMPILING;
1231 static codegen_status_t
1232 gen_putobject(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1234 VALUE arg = jit_get_arg(jit, 0);
1236 jit_putobject(jit, ctx, arg);
1237 return YJIT_KEEP_COMPILING;
1240 static codegen_status_t
1241 gen_putstring(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1243 VALUE put_val = jit_get_arg(jit, 0);
1245 // Save the PC and SP because the callee will allocate
1246 jit_prepare_routine_call(jit, ctx, REG0);
1248 mov(cb, C_ARG_REGS[0], REG_EC);
1249 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], put_val);
1250 call_ptr(cb, REG0, (void *)rb_ec_str_resurrect);
1252 x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_STRING);
1253 mov(cb, stack_top, RAX);
1255 return YJIT_KEEP_COMPILING;
1258 static codegen_status_t
1259 gen_putobject_int2fix(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1261 int opcode = jit_get_opcode(jit);
1262 int cst_val = (opcode == BIN(putobject_INT2FIX_0_))? 0:1;
1264 jit_putobject(jit, ctx, INT2FIX(cst_val));
1265 return YJIT_KEEP_COMPILING;
1268 static codegen_status_t
1269 gen_putself(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1271 // Load self from CFP
1272 mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, self));
1274 // Write it on the stack
1275 x86opnd_t stack_top = ctx_stack_push_self(ctx);
1276 mov(cb, stack_top, REG0);
1278 return YJIT_KEEP_COMPILING;
1281 static codegen_status_t
1282 gen_putspecialobject(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1284 enum vm_special_object_type type = (enum vm_special_object_type)jit_get_arg(jit, 0);
1286 if (type == VM_SPECIAL_OBJECT_VMCORE) {
1287 x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_HEAP);
1288 jit_mov_gc_ptr(jit, cb, REG0, rb_mRubyVMFrozenCore);
1289 mov(cb, stack_top, REG0);
1290 return YJIT_KEEP_COMPILING;
1292 else {
1293 // TODO: implement for VM_SPECIAL_OBJECT_CBASE and
1294 // VM_SPECIAL_OBJECT_CONST_BASE
1295 return YJIT_CANT_COMPILE;
1299 // Get EP at level from CFP
1300 static void
1301 gen_get_ep(codeblock_t *cb, x86opnd_t reg, uint32_t level)
1303 // Load environment pointer EP from CFP
1304 mov(cb, reg, member_opnd(REG_CFP, rb_control_frame_t, ep));
1306 while (level--) {
1307 // Get the previous EP from the current EP
1308 // See GET_PREV_EP(ep) macro
1309 // VALUE *prev_ep = ((VALUE *)((ep)[VM_ENV_DATA_INDEX_SPECVAL] & ~0x03))
1310 mov(cb, reg, mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_SPECVAL));
1311 and(cb, reg, imm_opnd(~0x03));
1315 // Compute the local table index of a variable from its index relative to the
1316 // environment object.
1317 static uint32_t
1318 slot_to_local_idx(const rb_iseq_t *iseq, int32_t slot_idx)
1320 // Layout illustration
1321 // This is an array of VALUE
1322 // | VM_ENV_DATA_SIZE |
1323 // v v
1324 // low addr <+-------+-------+-------+-------+------------------+
1325 // |local 0|local 1| ... |local n| .... |
1326 // +-------+-------+-------+-------+------------------+
1327 // ^ ^ ^ ^
1328 // +-------+---local_table_size----+ cfp->ep--+
1329 // | |
1330 // +------------------slot_idx----------------+
1332 // See usages of local_var_name() from iseq.c for similar calculation.
1334 // FIXME: unsigned to signed cast below can truncate
1335 int32_t local_table_size = iseq->body->local_table_size;
1336 int32_t op = slot_idx - VM_ENV_DATA_SIZE;
1337 int32_t local_idx = local_table_size - op - 1;
1338 RUBY_ASSERT(local_idx >= 0 && local_idx < local_table_size);
1339 return (uint32_t)local_idx;
1342 static codegen_status_t
1343 gen_getlocal_wc0(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1345 // Compute the offset from BP to the local
1346 // TODO: Type is lindex_t in interpter. The following cast can truncate.
1347 // Not in the mood to dance around signed multiplication UB at the moment...
1348 int32_t slot_idx = (int32_t)jit_get_arg(jit, 0);
1349 const int32_t offs = -(SIZEOF_VALUE * slot_idx);
1350 uint32_t local_idx = slot_to_local_idx(jit->iseq, slot_idx);
1352 // Load environment pointer EP (level 0) from CFP
1353 gen_get_ep(cb, REG0, 0);
1355 // Load the local from the EP
1356 mov(cb, REG0, mem_opnd(64, REG0, offs));
1358 // Write the local at SP
1359 x86opnd_t stack_top = ctx_stack_push_local(ctx, local_idx);
1360 mov(cb, stack_top, REG0);
1362 return YJIT_KEEP_COMPILING;
1365 static codegen_status_t
1366 gen_getlocal_generic(ctx_t *ctx, uint32_t local_idx, uint32_t level)
1368 gen_get_ep(cb, REG0, level);
1370 // Load the local from the block
1371 // val = *(vm_get_ep(GET_EP(), level) - idx);
1372 const int32_t offs = -(int32_t)(SIZEOF_VALUE * local_idx);
1373 mov(cb, REG0, mem_opnd(64, REG0, offs));
1375 // Write the local at SP
1376 x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_UNKNOWN);
1377 mov(cb, stack_top, REG0);
1379 return YJIT_KEEP_COMPILING;
1382 static codegen_status_t
1383 gen_getlocal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1385 int32_t idx = (int32_t)jit_get_arg(jit, 0);
1386 int32_t level = (int32_t)jit_get_arg(jit, 1);
1387 return gen_getlocal_generic(ctx, idx, level);
1390 static codegen_status_t
1391 gen_getlocal_wc1(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1393 int32_t idx = (int32_t)jit_get_arg(jit, 0);
1394 return gen_getlocal_generic(ctx, idx, 1);
1397 static codegen_status_t
1398 gen_setlocal_wc0(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1401 vm_env_write(const VALUE *ep, int index, VALUE v)
1403 VALUE flags = ep[VM_ENV_DATA_INDEX_FLAGS];
1404 if (LIKELY((flags & VM_ENV_FLAG_WB_REQUIRED) == 0)) {
1405 VM_STACK_ENV_WRITE(ep, index, v);
1407 else {
1408 vm_env_write_slowpath(ep, index, v);
1413 int32_t slot_idx = (int32_t)jit_get_arg(jit, 0);
1414 uint32_t local_idx = slot_to_local_idx(jit->iseq, slot_idx);
1416 // Load environment pointer EP (level 0) from CFP
1417 gen_get_ep(cb, REG0, 0);
1419 // flags & VM_ENV_FLAG_WB_REQUIRED
1420 x86opnd_t flags_opnd = mem_opnd(64, REG0, sizeof(VALUE) * VM_ENV_DATA_INDEX_FLAGS);
1421 test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED));
1423 // Create a side-exit to fall back to the interpreter
1424 uint8_t *side_exit = yjit_side_exit(jit, ctx);
1426 // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
1427 jnz_ptr(cb, side_exit);
1429 // Set the type of the local variable in the context
1430 val_type_t temp_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
1431 ctx_set_local_type(ctx, local_idx, temp_type);
1433 // Pop the value to write from the stack
1434 x86opnd_t stack_top = ctx_stack_pop(ctx, 1);
1435 mov(cb, REG1, stack_top);
1437 // Write the value at the environment pointer
1438 const int32_t offs = -8 * slot_idx;
1439 mov(cb, mem_opnd(64, REG0, offs), REG1);
1441 return YJIT_KEEP_COMPILING;
1444 // Push Qtrue or Qfalse depending on whether the given keyword was supplied by
1445 // the caller
1446 static codegen_status_t
1447 gen_checkkeyword(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1449 // When a keyword is unspecified past index 32, a hash will be used
1450 // instead. This can only happen in iseqs taking more than 32 keywords.
1451 if (jit->iseq->body->param.keyword->num >= 32) {
1452 return YJIT_CANT_COMPILE;
1455 // The EP offset to the undefined bits local
1456 int32_t bits_offset = (int32_t)jit_get_arg(jit, 0);
1458 // The index of the keyword we want to check
1459 int32_t index = (int32_t)jit_get_arg(jit, 1);
1461 // Load environment pointer EP
1462 gen_get_ep(cb, REG0, 0);
1464 // VALUE kw_bits = *(ep - bits);
1465 x86opnd_t bits_opnd = mem_opnd(64, REG0, sizeof(VALUE) * -bits_offset);
1467 // unsigned int b = (unsigned int)FIX2ULONG(kw_bits);
1468 // if ((b & (0x01 << idx))) {
1470 // We can skip the FIX2ULONG conversion by shifting the bit we test
1471 int64_t bit_test = 0x01 << (index + 1);
1472 test(cb, bits_opnd, imm_opnd(bit_test));
1473 mov(cb, REG0, imm_opnd(Qfalse));
1474 mov(cb, REG1, imm_opnd(Qtrue));
1475 cmovz(cb, REG0, REG1);
1477 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_IMM);
1478 mov(cb, stack_ret, REG0);
1480 return YJIT_KEEP_COMPILING;
1483 static codegen_status_t
1484 gen_setlocal_generic(jitstate_t *jit, ctx_t *ctx, uint32_t local_idx, uint32_t level)
1486 // Load environment pointer EP at level
1487 gen_get_ep(cb, REG0, level);
1489 // flags & VM_ENV_FLAG_WB_REQUIRED
1490 x86opnd_t flags_opnd = mem_opnd(64, REG0, sizeof(VALUE) * VM_ENV_DATA_INDEX_FLAGS);
1491 test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED));
1493 // Create a side-exit to fall back to the interpreter
1494 uint8_t *side_exit = yjit_side_exit(jit, ctx);
1496 // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
1497 jnz_ptr(cb, side_exit);
1499 // Pop the value to write from the stack
1500 x86opnd_t stack_top = ctx_stack_pop(ctx, 1);
1501 mov(cb, REG1, stack_top);
1503 // Write the value at the environment pointer
1504 const int32_t offs = -(int32_t)(SIZEOF_VALUE * local_idx);
1505 mov(cb, mem_opnd(64, REG0, offs), REG1);
1507 return YJIT_KEEP_COMPILING;
1510 static codegen_status_t
1511 gen_setlocal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1513 int32_t idx = (int32_t)jit_get_arg(jit, 0);
1514 int32_t level = (int32_t)jit_get_arg(jit, 1);
1515 return gen_setlocal_generic(jit, ctx, idx, level);
1518 static codegen_status_t
1519 gen_setlocal_wc1(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1521 int32_t idx = (int32_t)jit_get_arg(jit, 0);
1522 return gen_setlocal_generic(jit, ctx, idx, 1);
1525 static void
1526 gen_jnz_to_target0(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
1528 switch (shape) {
1529 case SHAPE_NEXT0:
1530 case SHAPE_NEXT1:
1531 RUBY_ASSERT(false);
1532 break;
1534 case SHAPE_DEFAULT:
1535 jnz_ptr(cb, target0);
1536 break;
1540 static void
1541 gen_jz_to_target0(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
1543 switch (shape) {
1544 case SHAPE_NEXT0:
1545 case SHAPE_NEXT1:
1546 RUBY_ASSERT(false);
1547 break;
1549 case SHAPE_DEFAULT:
1550 jz_ptr(cb, target0);
1551 break;
1555 static void
1556 gen_jbe_to_target0(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
1558 switch (shape) {
1559 case SHAPE_NEXT0:
1560 case SHAPE_NEXT1:
1561 RUBY_ASSERT(false);
1562 break;
1564 case SHAPE_DEFAULT:
1565 jbe_ptr(cb, target0);
1566 break;
1570 enum jcc_kinds {
1571 JCC_JNE,
1572 JCC_JNZ,
1573 JCC_JZ,
1574 JCC_JE,
1575 JCC_JBE,
1576 JCC_JNA,
1579 // Generate a jump to a stub that recompiles the current YARV instruction on failure.
1580 // When depth_limitk is exceeded, generate a jump to a side exit.
1581 static void
1582 jit_chain_guard(enum jcc_kinds jcc, jitstate_t *jit, const ctx_t *ctx, uint8_t depth_limit, uint8_t *side_exit)
1584 branchgen_fn target0_gen_fn;
1586 switch (jcc) {
1587 case JCC_JNE:
1588 case JCC_JNZ:
1589 target0_gen_fn = gen_jnz_to_target0;
1590 break;
1591 case JCC_JZ:
1592 case JCC_JE:
1593 target0_gen_fn = gen_jz_to_target0;
1594 break;
1595 case JCC_JBE:
1596 case JCC_JNA:
1597 target0_gen_fn = gen_jbe_to_target0;
1598 break;
1599 default:
1600 rb_bug("yjit: unimplemented jump kind");
1601 break;
1604 if (ctx->chain_depth < depth_limit) {
1605 ctx_t deeper = *ctx;
1606 deeper.chain_depth++;
1608 gen_branch(
1609 jit,
1610 ctx,
1611 (blockid_t) { jit->iseq, jit->insn_idx },
1612 &deeper,
1613 BLOCKID_NULL,
1614 NULL,
1615 target0_gen_fn
1618 else {
1619 target0_gen_fn(cb, side_exit, NULL, SHAPE_DEFAULT);
1623 enum {
1624 GETIVAR_MAX_DEPTH = 10, // up to 5 different classes, and embedded or not for each
1625 OPT_AREF_MAX_CHAIN_DEPTH = 2, // hashes and arrays
1626 SEND_MAX_DEPTH = 5, // up to 5 different classes
1629 VALUE rb_vm_set_ivar_idx(VALUE obj, uint32_t idx, VALUE val);
1631 // Codegen for setting an instance variable.
1632 // Preconditions:
1633 // - receiver is in REG0
1634 // - receiver has the same class as CLASS_OF(comptime_receiver)
1635 // - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled
1636 static codegen_status_t
1637 gen_set_ivar(jitstate_t *jit, ctx_t *ctx, VALUE recv, VALUE klass, ID ivar_name)
1639 // Save the PC and SP because the callee may allocate
1640 // Note that this modifies REG_SP, which is why we do it first
1641 jit_prepare_routine_call(jit, ctx, REG0);
1643 // Get the operands from the stack
1644 x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
1645 x86opnd_t recv_opnd = ctx_stack_pop(ctx, 1);
1647 uint32_t ivar_index = rb_obj_ensure_iv_index_mapping(recv, ivar_name);
1649 // Call rb_vm_set_ivar_idx with the receiver, the index of the ivar, and the value
1650 mov(cb, C_ARG_REGS[0], recv_opnd);
1651 mov(cb, C_ARG_REGS[1], imm_opnd(ivar_index));
1652 mov(cb, C_ARG_REGS[2], val_opnd);
1653 call_ptr(cb, REG0, (void *)rb_vm_set_ivar_idx);
1655 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
1656 mov(cb, out_opnd, RAX);
1658 return YJIT_KEEP_COMPILING;
1661 // Codegen for getting an instance variable.
1662 // Preconditions:
1663 // - receiver is in REG0
1664 // - receiver has the same class as CLASS_OF(comptime_receiver)
1665 // - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled
1666 static codegen_status_t
1667 gen_get_ivar(jitstate_t *jit, ctx_t *ctx, const int max_chain_depth, VALUE comptime_receiver, ID ivar_name, insn_opnd_t reg0_opnd, uint8_t *side_exit)
1669 VALUE comptime_val_klass = CLASS_OF(comptime_receiver);
1670 const ctx_t starting_context = *ctx; // make a copy for use with jit_chain_guard
1672 // If the class uses the default allocator, instances should all be T_OBJECT
1673 // NOTE: This assumes nobody changes the allocator of the class after allocation.
1674 // Eventually, we can encode whether an object is T_OBJECT or not
1675 // inside object shapes.
1676 if (!RB_TYPE_P(comptime_receiver, T_OBJECT) ||
1677 rb_get_alloc_func(comptime_val_klass) != rb_class_allocate_instance) {
1678 // General case. Call rb_ivar_get().
1679 // VALUE rb_ivar_get(VALUE obj, ID id)
1680 ADD_COMMENT(cb, "call rb_ivar_get()");
1682 // The function could raise exceptions.
1683 jit_prepare_routine_call(jit, ctx, REG1);
1685 mov(cb, C_ARG_REGS[0], REG0);
1686 mov(cb, C_ARG_REGS[1], imm_opnd((int64_t)ivar_name));
1687 call_ptr(cb, REG1, (void *)rb_ivar_get);
1689 if (!reg0_opnd.is_self) {
1690 (void)ctx_stack_pop(ctx, 1);
1692 // Push the ivar on the stack
1693 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
1694 mov(cb, out_opnd, RAX);
1696 // Jump to next instruction. This allows guard chains to share the same successor.
1697 jit_jump_to_next_insn(jit, ctx);
1698 return YJIT_END_BLOCK;
1702 // FIXME:
1703 // This check was added because of a failure in a test involving the
1704 // Nokogiri Document class where we see a T_DATA that still has the default
1705 // allocator.
1706 // Aaron Patterson argues that this is a bug in the C extension, because
1707 // people could call .allocate() on the class and still get a T_OBJECT
1708 // For now I added an extra dynamic check that the receiver is T_OBJECT
1709 // so we can safely pass all the tests in Shopify Core.
1711 // Guard that the receiver is T_OBJECT
1712 // #define RB_BUILTIN_TYPE(x) (int)(((struct RBasic*)(x))->flags & RUBY_T_MASK)
1713 ADD_COMMENT(cb, "guard receiver is T_OBJECT");
1714 mov(cb, REG1, member_opnd(REG0, struct RBasic, flags));
1715 and(cb, REG1, imm_opnd(RUBY_T_MASK));
1716 cmp(cb, REG1, imm_opnd(T_OBJECT));
1717 jit_chain_guard(JCC_JNE, jit, &starting_context, max_chain_depth, side_exit);
1720 // FIXME: Mapping the index could fail when there is too many ivar names. If we're
1721 // compiling for a branch stub that can cause the exception to be thrown from the
1722 // wrong PC.
1723 uint32_t ivar_index = rb_obj_ensure_iv_index_mapping(comptime_receiver, ivar_name);
1725 // Pop receiver if it's on the temp stack
1726 if (!reg0_opnd.is_self) {
1727 (void)ctx_stack_pop(ctx, 1);
1730 // Compile time self is embedded and the ivar index lands within the object
1731 if (RB_FL_TEST_RAW(comptime_receiver, ROBJECT_EMBED) && ivar_index < ROBJECT_EMBED_LEN_MAX) {
1732 // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
1734 // Guard that self is embedded
1735 // TODO: BT and JC is shorter
1736 ADD_COMMENT(cb, "guard embedded getivar");
1737 x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
1738 test(cb, flags_opnd, imm_opnd(ROBJECT_EMBED));
1739 jit_chain_guard(JCC_JZ, jit, &starting_context, max_chain_depth, COUNTED_EXIT(jit, side_exit, getivar_megamorphic));
1741 // Load the variable
1742 x86opnd_t ivar_opnd = mem_opnd(64, REG0, offsetof(struct RObject, as.ary) + ivar_index * SIZEOF_VALUE);
1743 mov(cb, REG1, ivar_opnd);
1745 // Guard that the variable is not Qundef
1746 cmp(cb, REG1, imm_opnd(Qundef));
1747 mov(cb, REG0, imm_opnd(Qnil));
1748 cmove(cb, REG1, REG0);
1750 // Push the ivar on the stack
1751 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
1752 mov(cb, out_opnd, REG1);
1754 else {
1755 // Compile time value is *not* embedded.
1757 // Guard that value is *not* embedded
1758 // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
1759 ADD_COMMENT(cb, "guard extended getivar");
1760 x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
1761 test(cb, flags_opnd, imm_opnd(ROBJECT_EMBED));
1762 jit_chain_guard(JCC_JNZ, jit, &starting_context, max_chain_depth, COUNTED_EXIT(jit, side_exit, getivar_megamorphic));
1764 // check that the extended table is big enough
1765 if (ivar_index >= ROBJECT_EMBED_LEN_MAX + 1) {
1766 // Check that the slot is inside the extended table (num_slots > index)
1767 x86opnd_t num_slots = mem_opnd(32, REG0, offsetof(struct RObject, as.heap.numiv));
1768 cmp(cb, num_slots, imm_opnd(ivar_index));
1769 jle_ptr(cb, COUNTED_EXIT(jit, side_exit, getivar_idx_out_of_range));
1772 // Get a pointer to the extended table
1773 x86opnd_t tbl_opnd = mem_opnd(64, REG0, offsetof(struct RObject, as.heap.ivptr));
1774 mov(cb, REG0, tbl_opnd);
1776 // Read the ivar from the extended table
1777 x86opnd_t ivar_opnd = mem_opnd(64, REG0, sizeof(VALUE) * ivar_index);
1778 mov(cb, REG0, ivar_opnd);
1780 // Check that the ivar is not Qundef
1781 cmp(cb, REG0, imm_opnd(Qundef));
1782 mov(cb, REG1, imm_opnd(Qnil));
1783 cmove(cb, REG0, REG1);
1785 // Push the ivar on the stack
1786 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
1787 mov(cb, out_opnd, REG0);
1790 // Jump to next instruction. This allows guard chains to share the same successor.
1791 jit_jump_to_next_insn(jit, ctx);
1792 return YJIT_END_BLOCK;
1795 static codegen_status_t
1796 gen_getinstancevariable(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1798 // Defer compilation so we can specialize on a runtime `self`
1799 if (!jit_at_current_insn(jit)) {
1800 defer_compilation(jit, ctx);
1801 return YJIT_END_BLOCK;
1804 ID ivar_name = (ID)jit_get_arg(jit, 0);
1806 VALUE comptime_val = jit_peek_at_self(jit, ctx);
1807 VALUE comptime_val_klass = CLASS_OF(comptime_val);
1809 // Generate a side exit
1810 uint8_t *side_exit = yjit_side_exit(jit, ctx);
1812 // Guard that the receiver has the same class as the one from compile time.
1813 mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, self));
1815 jit_guard_known_klass(jit, ctx, comptime_val_klass, OPND_SELF, comptime_val, GETIVAR_MAX_DEPTH, side_exit);
1817 return gen_get_ivar(jit, ctx, GETIVAR_MAX_DEPTH, comptime_val, ivar_name, OPND_SELF, side_exit);
1820 void rb_vm_setinstancevariable(const rb_iseq_t *iseq, VALUE obj, ID id, VALUE val, IVC ic);
1822 static codegen_status_t
1823 gen_setinstancevariable(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1825 ID id = (ID)jit_get_arg(jit, 0);
1826 IVC ic = (IVC)jit_get_arg(jit, 1);
1828 // Save the PC and SP because the callee may allocate
1829 // Note that this modifies REG_SP, which is why we do it first
1830 jit_prepare_routine_call(jit, ctx, REG0);
1832 // Get the operands from the stack
1833 x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
1835 // Call rb_vm_setinstancevariable(iseq, obj, id, val, ic);
1836 mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, self));
1837 mov(cb, C_ARG_REGS[3], val_opnd);
1838 mov(cb, C_ARG_REGS[2], imm_opnd(id));
1839 mov(cb, C_ARG_REGS[4], const_ptr_opnd(ic));
1840 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], (VALUE)jit->iseq);
1841 call_ptr(cb, REG0, (void *)rb_vm_setinstancevariable);
1843 return YJIT_KEEP_COMPILING;
1846 bool rb_vm_defined(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, rb_num_t op_type, VALUE obj, VALUE v);
1848 static codegen_status_t
1849 gen_defined(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1851 rb_num_t op_type = (rb_num_t)jit_get_arg(jit, 0);
1852 VALUE obj = (VALUE)jit_get_arg(jit, 1);
1853 VALUE pushval = (VALUE)jit_get_arg(jit, 2);
1855 // Save the PC and SP because the callee may allocate
1856 // Note that this modifies REG_SP, which is why we do it first
1857 jit_prepare_routine_call(jit, ctx, REG0);
1859 // Get the operands from the stack
1860 x86opnd_t v_opnd = ctx_stack_pop(ctx, 1);
1862 // Call vm_defined(ec, reg_cfp, op_type, obj, v)
1863 mov(cb, C_ARG_REGS[0], REG_EC);
1864 mov(cb, C_ARG_REGS[1], REG_CFP);
1865 mov(cb, C_ARG_REGS[2], imm_opnd(op_type));
1866 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[3], (VALUE)obj);
1867 mov(cb, C_ARG_REGS[4], v_opnd);
1868 call_ptr(cb, REG0, (void *)rb_vm_defined);
1870 // if (vm_defined(ec, GET_CFP(), op_type, obj, v)) {
1871 // val = pushval;
1872 // }
1873 jit_mov_gc_ptr(jit, cb, REG1, (VALUE)pushval);
1874 cmp(cb, AL, imm_opnd(0));
1875 mov(cb, RAX, imm_opnd(Qnil));
1876 cmovnz(cb, RAX, REG1);
1878 // Push the return value onto the stack
1879 val_type_t out_type = SPECIAL_CONST_P(pushval)? TYPE_IMM:TYPE_UNKNOWN;
1880 x86opnd_t stack_ret = ctx_stack_push(ctx, out_type);
1881 mov(cb, stack_ret, RAX);
1883 return YJIT_KEEP_COMPILING;
1886 static codegen_status_t
1887 gen_checktype(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1889 enum ruby_value_type type_val = (enum ruby_value_type)jit_get_arg(jit, 0);
1890 // Only three types are emitted by compile.c
1891 if (type_val == T_STRING || type_val == T_ARRAY || type_val == T_HASH) {
1892 val_type_t val_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
1893 x86opnd_t val = ctx_stack_pop(ctx, 1);
1895 x86opnd_t stack_ret;
1897 // Check if we know from type information
1898 if ((type_val == T_STRING && val_type.type == ETYPE_STRING) ||
1899 (type_val == T_ARRAY && val_type.type == ETYPE_ARRAY) ||
1900 (type_val == T_HASH && val_type.type == ETYPE_HASH)) {
1901 // guaranteed type match
1902 stack_ret = ctx_stack_push(ctx, TYPE_TRUE);
1903 mov(cb, stack_ret, imm_opnd(Qtrue));
1904 return YJIT_KEEP_COMPILING;
1906 else if (val_type.is_imm || val_type.type != ETYPE_UNKNOWN) {
1907 // guaranteed not to match T_STRING/T_ARRAY/T_HASH
1908 stack_ret = ctx_stack_push(ctx, TYPE_FALSE);
1909 mov(cb, stack_ret, imm_opnd(Qfalse));
1910 return YJIT_KEEP_COMPILING;
1913 mov(cb, REG0, val);
1914 mov(cb, REG1, imm_opnd(Qfalse));
1916 uint32_t ret = cb_new_label(cb, "ret");
1918 if (!val_type.is_heap) {
1919 // if (SPECIAL_CONST_P(val)) {
1920 // Return Qfalse via REG1 if not on heap
1921 test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK));
1922 jnz_label(cb, ret);
1923 cmp(cb, REG0, imm_opnd(Qnil));
1924 jbe_label(cb, ret);
1927 // Check type on object
1928 mov(cb, REG0, mem_opnd(64, REG0, offsetof(struct RBasic, flags)));
1929 and(cb, REG0, imm_opnd(RUBY_T_MASK));
1930 cmp(cb, REG0, imm_opnd(type_val));
1931 mov(cb, REG0, imm_opnd(Qtrue));
1932 // REG1 contains Qfalse from above
1933 cmove(cb, REG1, REG0);
1935 cb_write_label(cb, ret);
1936 stack_ret = ctx_stack_push(ctx, TYPE_IMM);
1937 mov(cb, stack_ret, REG1);
1938 cb_link_labels(cb);
1940 return YJIT_KEEP_COMPILING;
1942 else {
1943 return YJIT_CANT_COMPILE;
1947 static codegen_status_t
1948 gen_concatstrings(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1950 rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
1952 // Save the PC and SP because we are allocating
1953 jit_prepare_routine_call(jit, ctx, REG0);
1955 x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(int32_t)(sizeof(VALUE) * (uint32_t)n));
1957 // call rb_str_concat_literals(long n, const VALUE *strings);
1958 mov(cb, C_ARG_REGS[0], imm_opnd(n));
1959 lea(cb, C_ARG_REGS[1], values_ptr);
1960 call_ptr(cb, REG0, (void *)rb_str_concat_literals);
1962 ctx_stack_pop(ctx, n);
1963 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
1964 mov(cb, stack_ret, RAX);
1966 return YJIT_KEEP_COMPILING;
1969 static void
1970 guard_two_fixnums(ctx_t *ctx, uint8_t *side_exit)
1972 // Get the stack operand types
1973 val_type_t arg1_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
1974 val_type_t arg0_type = ctx_get_opnd_type(ctx, OPND_STACK(1));
1976 if (arg0_type.is_heap || arg1_type.is_heap) {
1977 jmp_ptr(cb, side_exit);
1978 return;
1981 if (arg0_type.type != ETYPE_FIXNUM && arg0_type.type != ETYPE_UNKNOWN) {
1982 jmp_ptr(cb, side_exit);
1983 return;
1986 if (arg1_type.type != ETYPE_FIXNUM && arg1_type.type != ETYPE_UNKNOWN) {
1987 jmp_ptr(cb, side_exit);
1988 return;
1991 RUBY_ASSERT(!arg0_type.is_heap);
1992 RUBY_ASSERT(!arg1_type.is_heap);
1993 RUBY_ASSERT(arg0_type.type == ETYPE_FIXNUM || arg0_type.type == ETYPE_UNKNOWN);
1994 RUBY_ASSERT(arg1_type.type == ETYPE_FIXNUM || arg1_type.type == ETYPE_UNKNOWN);
1996 // Get stack operands without popping them
1997 x86opnd_t arg1 = ctx_stack_opnd(ctx, 0);
1998 x86opnd_t arg0 = ctx_stack_opnd(ctx, 1);
2000 // If not fixnums, fall back
2001 if (arg0_type.type != ETYPE_FIXNUM) {
2002 ADD_COMMENT(cb, "guard arg0 fixnum");
2003 test(cb, arg0, imm_opnd(RUBY_FIXNUM_FLAG));
2004 jz_ptr(cb, side_exit);
2006 if (arg1_type.type != ETYPE_FIXNUM) {
2007 ADD_COMMENT(cb, "guard arg1 fixnum");
2008 test(cb, arg1, imm_opnd(RUBY_FIXNUM_FLAG));
2009 jz_ptr(cb, side_exit);
2012 // Set stack types in context
2013 ctx_upgrade_opnd_type(ctx, OPND_STACK(0), TYPE_FIXNUM);
2014 ctx_upgrade_opnd_type(ctx, OPND_STACK(1), TYPE_FIXNUM);
2017 // Conditional move operation used by comparison operators
2018 typedef void (*cmov_fn)(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1);
2020 static codegen_status_t
2021 gen_fixnum_cmp(jitstate_t *jit, ctx_t *ctx, cmov_fn cmov_op)
2023 // Defer compilation so we can specialize base on a runtime receiver
2024 if (!jit_at_current_insn(jit)) {
2025 defer_compilation(jit, ctx);
2026 return YJIT_END_BLOCK;
2029 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2030 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2032 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2033 // Create a side-exit to fall back to the interpreter
2034 // Note: we generate the side-exit before popping operands from the stack
2035 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2037 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_LT)) {
2038 return YJIT_CANT_COMPILE;
2041 // Check that both operands are fixnums
2042 guard_two_fixnums(ctx, side_exit);
2044 // Get the operands from the stack
2045 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2046 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2048 // Compare the arguments
2049 xor(cb, REG0_32, REG0_32); // REG0 = Qfalse
2050 mov(cb, REG1, arg0);
2051 cmp(cb, REG1, arg1);
2052 mov(cb, REG1, imm_opnd(Qtrue));
2053 cmov_op(cb, REG0, REG1);
2055 // Push the output on the stack
2056 x86opnd_t dst = ctx_stack_push(ctx, TYPE_UNKNOWN);
2057 mov(cb, dst, REG0);
2059 return YJIT_KEEP_COMPILING;
2061 else {
2062 return gen_opt_send_without_block(jit, ctx, cb);
2066 static codegen_status_t
2067 gen_opt_lt(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2069 return gen_fixnum_cmp(jit, ctx, cmovl);
2072 static codegen_status_t
2073 gen_opt_le(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2075 return gen_fixnum_cmp(jit, ctx, cmovle);
2078 static codegen_status_t
2079 gen_opt_ge(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2081 return gen_fixnum_cmp(jit, ctx, cmovge);
2084 static codegen_status_t
2085 gen_opt_gt(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2087 return gen_fixnum_cmp(jit, ctx, cmovg);
2090 // Implements specialized equality for either two fixnum or two strings
2091 // Returns true if code was generated, otherwise false
2092 static bool
2093 gen_equality_specialized(jitstate_t *jit, ctx_t *ctx, uint8_t *side_exit)
2095 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2096 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2098 x86opnd_t a_opnd = ctx_stack_opnd(ctx, 1);
2099 x86opnd_t b_opnd = ctx_stack_opnd(ctx, 0);
2101 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2102 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_EQ)) {
2103 // if overridden, emit the generic version
2104 return false;
2107 guard_two_fixnums(ctx, side_exit);
2109 mov(cb, REG0, a_opnd);
2110 cmp(cb, REG0, b_opnd);
2112 mov(cb, REG0, imm_opnd(Qfalse));
2113 mov(cb, REG1, imm_opnd(Qtrue));
2114 cmove(cb, REG0, REG1);
2116 // Push the output on the stack
2117 ctx_stack_pop(ctx, 2);
2118 x86opnd_t dst = ctx_stack_push(ctx, TYPE_IMM);
2119 mov(cb, dst, REG0);
2121 return true;
2123 else if (CLASS_OF(comptime_a) == rb_cString &&
2124 CLASS_OF(comptime_b) == rb_cString) {
2125 if (!assume_bop_not_redefined(jit, STRING_REDEFINED_OP_FLAG, BOP_EQ)) {
2126 // if overridden, emit the generic version
2127 return false;
2130 // Load a and b in preparation for call later
2131 mov(cb, C_ARG_REGS[0], a_opnd);
2132 mov(cb, C_ARG_REGS[1], b_opnd);
2134 // Guard that a is a String
2135 mov(cb, REG0, C_ARG_REGS[0]);
2136 jit_guard_known_klass(jit, ctx, rb_cString, OPND_STACK(1), comptime_a, SEND_MAX_DEPTH, side_exit);
2138 uint32_t ret = cb_new_label(cb, "ret");
2140 // If they are equal by identity, return true
2141 cmp(cb, C_ARG_REGS[0], C_ARG_REGS[1]);
2142 mov(cb, RAX, imm_opnd(Qtrue));
2143 je_label(cb, ret);
2145 // Otherwise guard that b is a T_STRING (from type info) or String (from runtime guard)
2146 if (ctx_get_opnd_type(ctx, OPND_STACK(0)).type != ETYPE_STRING) {
2147 mov(cb, REG0, C_ARG_REGS[1]);
2148 // Note: any T_STRING is valid here, but we check for a ::String for simplicity
2149 jit_guard_known_klass(jit, ctx, rb_cString, OPND_STACK(0), comptime_b, SEND_MAX_DEPTH, side_exit);
2152 // Call rb_str_eql_internal(a, b)
2153 call_ptr(cb, REG0, (void *)rb_str_eql_internal);
2155 // Push the output on the stack
2156 cb_write_label(cb, ret);
2157 ctx_stack_pop(ctx, 2);
2158 x86opnd_t dst = ctx_stack_push(ctx, TYPE_IMM);
2159 mov(cb, dst, RAX);
2160 cb_link_labels(cb);
2162 return true;
2164 else {
2165 return false;
2169 static codegen_status_t
2170 gen_opt_eq(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2172 // Defer compilation so we can specialize base on a runtime receiver
2173 if (!jit_at_current_insn(jit)) {
2174 defer_compilation(jit, ctx);
2175 return YJIT_END_BLOCK;
2178 // Create a side-exit to fall back to the interpreter
2179 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2181 if (gen_equality_specialized(jit, ctx, side_exit)) {
2182 jit_jump_to_next_insn(jit, ctx);
2183 return YJIT_END_BLOCK;
2185 else {
2186 return gen_opt_send_without_block(jit, ctx, cb);
2190 static codegen_status_t gen_send_general(jitstate_t *jit, ctx_t *ctx, struct rb_call_data *cd, rb_iseq_t *block);
2192 static codegen_status_t
2193 gen_opt_neq(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2195 // opt_neq is passed two rb_call_data as arguments:
2196 // first for ==, second for !=
2197 struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 1);
2198 return gen_send_general(jit, ctx, cd, NULL);
2201 static codegen_status_t
2202 gen_opt_aref(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2204 struct rb_call_data * cd = (struct rb_call_data *)jit_get_arg(jit, 0);
2205 int32_t argc = (int32_t)vm_ci_argc(cd->ci);
2207 // Only JIT one arg calls like `ary[6]`
2208 if (argc != 1) {
2209 GEN_COUNTER_INC(cb, oaref_argc_not_one);
2210 return YJIT_CANT_COMPILE;
2213 // Defer compilation so we can specialize base on a runtime receiver
2214 if (!jit_at_current_insn(jit)) {
2215 defer_compilation(jit, ctx);
2216 return YJIT_END_BLOCK;
2219 // Remember the context on entry for adding guard chains
2220 const ctx_t starting_context = *ctx;
2222 // Specialize base on compile time values
2223 VALUE comptime_idx = jit_peek_at_stack(jit, ctx, 0);
2224 VALUE comptime_recv = jit_peek_at_stack(jit, ctx, 1);
2226 // Create a side-exit to fall back to the interpreter
2227 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2229 if (CLASS_OF(comptime_recv) == rb_cArray && RB_FIXNUM_P(comptime_idx)) {
2230 if (!assume_bop_not_redefined(jit, ARRAY_REDEFINED_OP_FLAG, BOP_AREF)) {
2231 return YJIT_CANT_COMPILE;
2234 // Pop the stack operands
2235 x86opnd_t idx_opnd = ctx_stack_pop(ctx, 1);
2236 x86opnd_t recv_opnd = ctx_stack_pop(ctx, 1);
2237 mov(cb, REG0, recv_opnd);
2239 // if (SPECIAL_CONST_P(recv)) {
2240 // Bail if receiver is not a heap object
2241 test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK));
2242 jnz_ptr(cb, side_exit);
2243 cmp(cb, REG0, imm_opnd(Qfalse));
2244 je_ptr(cb, side_exit);
2245 cmp(cb, REG0, imm_opnd(Qnil));
2246 je_ptr(cb, side_exit);
2248 // Bail if recv has a class other than ::Array.
2249 // BOP_AREF check above is only good for ::Array.
2250 mov(cb, REG1, mem_opnd(64, REG0, offsetof(struct RBasic, klass)));
2251 mov(cb, REG0, const_ptr_opnd((void *)rb_cArray));
2252 cmp(cb, REG0, REG1);
2253 jit_chain_guard(JCC_JNE, jit, &starting_context, OPT_AREF_MAX_CHAIN_DEPTH, side_exit);
2255 // Bail if idx is not a FIXNUM
2256 mov(cb, REG1, idx_opnd);
2257 test(cb, REG1, imm_opnd(RUBY_FIXNUM_FLAG));
2258 jz_ptr(cb, COUNTED_EXIT(jit, side_exit, oaref_arg_not_fixnum));
2260 // Call VALUE rb_ary_entry_internal(VALUE ary, long offset).
2261 // It never raises or allocates, so we don't need to write to cfp->pc.
2263 mov(cb, RDI, recv_opnd);
2264 sar(cb, REG1, imm_opnd(1)); // Convert fixnum to int
2265 mov(cb, RSI, REG1);
2266 call_ptr(cb, REG0, (void *)rb_ary_entry_internal);
2268 // Push the return value onto the stack
2269 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2270 mov(cb, stack_ret, RAX);
2273 // Jump to next instruction. This allows guard chains to share the same successor.
2274 jit_jump_to_next_insn(jit, ctx);
2275 return YJIT_END_BLOCK;
2277 else if (CLASS_OF(comptime_recv) == rb_cHash) {
2278 if (!assume_bop_not_redefined(jit, HASH_REDEFINED_OP_FLAG, BOP_AREF)) {
2279 return YJIT_CANT_COMPILE;
2282 x86opnd_t key_opnd = ctx_stack_opnd(ctx, 0);
2283 x86opnd_t recv_opnd = ctx_stack_opnd(ctx, 1);
2285 // Guard that the receiver is a hash
2286 mov(cb, REG0, recv_opnd);
2287 jit_guard_known_klass(jit, ctx, rb_cHash, OPND_STACK(1), comptime_recv, OPT_AREF_MAX_CHAIN_DEPTH, side_exit);
2289 // Setup arguments for rb_hash_aref().
2290 mov(cb, C_ARG_REGS[0], REG0);
2291 mov(cb, C_ARG_REGS[1], key_opnd);
2293 // Prepare to call rb_hash_aref(). It might call #hash on the key.
2294 jit_prepare_routine_call(jit, ctx, REG0);
2296 call_ptr(cb, REG0, (void *)rb_hash_aref);
2298 // Pop the key and the receiver
2299 (void)ctx_stack_pop(ctx, 2);
2301 // Push the return value onto the stack
2302 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2303 mov(cb, stack_ret, RAX);
2305 // Jump to next instruction. This allows guard chains to share the same successor.
2306 jit_jump_to_next_insn(jit, ctx);
2307 return YJIT_END_BLOCK;
2309 else {
2310 // General case. Call the [] method.
2311 return gen_opt_send_without_block(jit, ctx, cb);
2315 static codegen_status_t
2316 gen_opt_aset(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2318 // Defer compilation so we can specialize on a runtime `self`
2319 if (!jit_at_current_insn(jit)) {
2320 defer_compilation(jit, ctx);
2321 return YJIT_END_BLOCK;
2324 VALUE comptime_recv = jit_peek_at_stack(jit, ctx, 2);
2325 VALUE comptime_key = jit_peek_at_stack(jit, ctx, 1);
2327 // Get the operands from the stack
2328 x86opnd_t recv = ctx_stack_opnd(ctx, 2);
2329 x86opnd_t key = ctx_stack_opnd(ctx, 1);
2330 x86opnd_t val = ctx_stack_opnd(ctx, 0);
2332 if (CLASS_OF(comptime_recv) == rb_cArray && FIXNUM_P(comptime_key)) {
2333 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2335 // Guard receiver is an Array
2336 mov(cb, REG0, recv);
2337 jit_guard_known_klass(jit, ctx, rb_cArray, OPND_STACK(2), comptime_recv, SEND_MAX_DEPTH, side_exit);
2339 // Guard key is a fixnum
2340 mov(cb, REG0, key);
2341 jit_guard_known_klass(jit, ctx, rb_cInteger, OPND_STACK(1), comptime_key, SEND_MAX_DEPTH, side_exit);
2343 // Call rb_ary_store
2344 mov(cb, C_ARG_REGS[0], recv);
2345 mov(cb, C_ARG_REGS[1], key);
2346 sar(cb, C_ARG_REGS[1], imm_opnd(1)); // FIX2LONG(key)
2347 mov(cb, C_ARG_REGS[2], val);
2349 // We might allocate or raise
2350 jit_prepare_routine_call(jit, ctx, REG0);
2352 call_ptr(cb, REG0, (void *)rb_ary_store);
2354 // rb_ary_store returns void
2355 // stored value should still be on stack
2356 mov(cb, REG0, ctx_stack_opnd(ctx, 0));
2358 // Push the return value onto the stack
2359 ctx_stack_pop(ctx, 3);
2360 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2361 mov(cb, stack_ret, REG0);
2363 jit_jump_to_next_insn(jit, ctx);
2364 return YJIT_END_BLOCK;
2366 else if (CLASS_OF(comptime_recv) == rb_cHash) {
2367 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2369 // Guard receiver is a Hash
2370 mov(cb, REG0, recv);
2371 jit_guard_known_klass(jit, ctx, rb_cHash, OPND_STACK(2), comptime_recv, SEND_MAX_DEPTH, side_exit);
2373 // Call rb_hash_aset
2374 mov(cb, C_ARG_REGS[0], recv);
2375 mov(cb, C_ARG_REGS[1], key);
2376 mov(cb, C_ARG_REGS[2], val);
2378 // We might allocate or raise
2379 jit_prepare_routine_call(jit, ctx, REG0);
2381 call_ptr(cb, REG0, (void *)rb_hash_aset);
2383 // Push the return value onto the stack
2384 ctx_stack_pop(ctx, 3);
2385 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2386 mov(cb, stack_ret, RAX);
2388 jit_jump_to_next_insn(jit, ctx);
2389 return YJIT_END_BLOCK;
2391 else {
2392 return gen_opt_send_without_block(jit, ctx, cb);
2396 static codegen_status_t
2397 gen_opt_and(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2399 // Defer compilation so we can specialize on a runtime `self`
2400 if (!jit_at_current_insn(jit)) {
2401 defer_compilation(jit, ctx);
2402 return YJIT_END_BLOCK;
2405 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2406 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2408 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2409 // Create a side-exit to fall back to the interpreter
2410 // Note: we generate the side-exit before popping operands from the stack
2411 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2413 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_AND)) {
2414 return YJIT_CANT_COMPILE;
2417 // Check that both operands are fixnums
2418 guard_two_fixnums(ctx, side_exit);
2420 // Get the operands and destination from the stack
2421 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2422 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2424 // Do the bitwise and arg0 & arg1
2425 mov(cb, REG0, arg0);
2426 and(cb, REG0, arg1);
2428 // Push the output on the stack
2429 x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
2430 mov(cb, dst, REG0);
2432 return YJIT_KEEP_COMPILING;
2434 else {
2435 // Delegate to send, call the method on the recv
2436 return gen_opt_send_without_block(jit, ctx, cb);
2440 static codegen_status_t
2441 gen_opt_or(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2443 // Defer compilation so we can specialize on a runtime `self`
2444 if (!jit_at_current_insn(jit)) {
2445 defer_compilation(jit, ctx);
2446 return YJIT_END_BLOCK;
2449 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2450 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2452 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2453 // Create a side-exit to fall back to the interpreter
2454 // Note: we generate the side-exit before popping operands from the stack
2455 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2457 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_OR)) {
2458 return YJIT_CANT_COMPILE;
2461 // Check that both operands are fixnums
2462 guard_two_fixnums(ctx, side_exit);
2464 // Get the operands and destination from the stack
2465 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2466 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2468 // Do the bitwise or arg0 | arg1
2469 mov(cb, REG0, arg0);
2470 or(cb, REG0, arg1);
2472 // Push the output on the stack
2473 x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
2474 mov(cb, dst, REG0);
2476 return YJIT_KEEP_COMPILING;
2478 else {
2479 // Delegate to send, call the method on the recv
2480 return gen_opt_send_without_block(jit, ctx, cb);
2484 static codegen_status_t
2485 gen_opt_minus(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2487 // Defer compilation so we can specialize on a runtime `self`
2488 if (!jit_at_current_insn(jit)) {
2489 defer_compilation(jit, ctx);
2490 return YJIT_END_BLOCK;
2493 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2494 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2496 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2497 // Create a side-exit to fall back to the interpreter
2498 // Note: we generate the side-exit before popping operands from the stack
2499 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2501 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_MINUS)) {
2502 return YJIT_CANT_COMPILE;
2505 // Check that both operands are fixnums
2506 guard_two_fixnums(ctx, side_exit);
2508 // Get the operands and destination from the stack
2509 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2510 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2512 // Subtract arg0 - arg1 and test for overflow
2513 mov(cb, REG0, arg0);
2514 sub(cb, REG0, arg1);
2515 jo_ptr(cb, side_exit);
2516 add(cb, REG0, imm_opnd(1));
2518 // Push the output on the stack
2519 x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
2520 mov(cb, dst, REG0);
2522 return YJIT_KEEP_COMPILING;
2524 else {
2525 // Delegate to send, call the method on the recv
2526 return gen_opt_send_without_block(jit, ctx, cb);
2530 static codegen_status_t
2531 gen_opt_plus(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2533 // Defer compilation so we can specialize on a runtime `self`
2534 if (!jit_at_current_insn(jit)) {
2535 defer_compilation(jit, ctx);
2536 return YJIT_END_BLOCK;
2539 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2540 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2542 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2543 // Create a side-exit to fall back to the interpreter
2544 // Note: we generate the side-exit before popping operands from the stack
2545 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2547 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS)) {
2548 return YJIT_CANT_COMPILE;
2551 // Check that both operands are fixnums
2552 guard_two_fixnums(ctx, side_exit);
2554 // Get the operands and destination from the stack
2555 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2556 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2558 // Add arg0 + arg1 and test for overflow
2559 mov(cb, REG0, arg0);
2560 sub(cb, REG0, imm_opnd(1));
2561 add(cb, REG0, arg1);
2562 jo_ptr(cb, side_exit);
2564 // Push the output on the stack
2565 x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
2566 mov(cb, dst, REG0);
2568 return YJIT_KEEP_COMPILING;
2570 else {
2571 // Delegate to send, call the method on the recv
2572 return gen_opt_send_without_block(jit, ctx, cb);
2576 static codegen_status_t
2577 gen_opt_mult(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2579 // Delegate to send, call the method on the recv
2580 return gen_opt_send_without_block(jit, ctx, cb);
2583 static codegen_status_t
2584 gen_opt_div(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2586 // Delegate to send, call the method on the recv
2587 return gen_opt_send_without_block(jit, ctx, cb);
2590 VALUE rb_vm_opt_mod(VALUE recv, VALUE obj);
2592 static codegen_status_t
2593 gen_opt_mod(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2595 // Save the PC and SP because the callee may allocate bignums
2596 // Note that this modifies REG_SP, which is why we do it first
2597 jit_prepare_routine_call(jit, ctx, REG0);
2599 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2601 // Get the operands from the stack
2602 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2603 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2605 // Call rb_vm_opt_mod(VALUE recv, VALUE obj)
2606 mov(cb, C_ARG_REGS[0], arg0);
2607 mov(cb, C_ARG_REGS[1], arg1);
2608 call_ptr(cb, REG0, (void *)rb_vm_opt_mod);
2610 // If val == Qundef, bail to do a method call
2611 cmp(cb, RAX, imm_opnd(Qundef));
2612 je_ptr(cb, side_exit);
2614 // Push the return value onto the stack
2615 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2616 mov(cb, stack_ret, RAX);
2618 return YJIT_KEEP_COMPILING;
2621 static codegen_status_t
2622 gen_opt_ltlt(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2624 // Delegate to send, call the method on the recv
2625 return gen_opt_send_without_block(jit, ctx, cb);
2628 static codegen_status_t
2629 gen_opt_nil_p(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2631 // Delegate to send, call the method on the recv
2632 return gen_opt_send_without_block(jit, ctx, cb);
2635 static codegen_status_t
2636 gen_opt_empty_p(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2638 // Delegate to send, call the method on the recv
2639 return gen_opt_send_without_block(jit, ctx, cb);
2642 static codegen_status_t
2643 gen_opt_str_freeze(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2645 if (!assume_bop_not_redefined(jit, STRING_REDEFINED_OP_FLAG, BOP_FREEZE)) {
2646 return YJIT_CANT_COMPILE;
2649 VALUE str = jit_get_arg(jit, 0);
2650 jit_mov_gc_ptr(jit, cb, REG0, str);
2652 // Push the return value onto the stack
2653 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
2654 mov(cb, stack_ret, REG0);
2656 return YJIT_KEEP_COMPILING;
2659 static codegen_status_t
2660 gen_opt_str_uminus(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2662 if (!assume_bop_not_redefined(jit, STRING_REDEFINED_OP_FLAG, BOP_UMINUS)) {
2663 return YJIT_CANT_COMPILE;
2666 VALUE str = jit_get_arg(jit, 0);
2667 jit_mov_gc_ptr(jit, cb, REG0, str);
2669 // Push the return value onto the stack
2670 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
2671 mov(cb, stack_ret, REG0);
2673 return YJIT_KEEP_COMPILING;
2676 static codegen_status_t
2677 gen_opt_not(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2679 return gen_opt_send_without_block(jit, ctx, cb);
2682 static codegen_status_t
2683 gen_opt_size(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2685 return gen_opt_send_without_block(jit, ctx, cb);
2688 static codegen_status_t
2689 gen_opt_length(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2691 return gen_opt_send_without_block(jit, ctx, cb);
2694 static codegen_status_t
2695 gen_opt_regexpmatch2(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2697 return gen_opt_send_without_block(jit, ctx, cb);
2700 static codegen_status_t
2701 gen_opt_case_dispatch(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2703 // Normally this instruction would lookup the key in a hash and jump to an
2704 // offset based on that.
2705 // Instead we can take the fallback case and continue with the next
2706 // instruction.
2707 // We'd hope that our jitted code will be sufficiently fast without the
2708 // hash lookup, at least for small hashes, but it's worth revisiting this
2709 // assumption in the future.
2711 ctx_stack_pop(ctx, 1);
2713 return YJIT_KEEP_COMPILING; // continue with the next instruction
2716 static void
2717 gen_branchif_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
2719 switch (shape) {
2720 case SHAPE_NEXT0:
2721 jz_ptr(cb, target1);
2722 break;
2724 case SHAPE_NEXT1:
2725 jnz_ptr(cb, target0);
2726 break;
2728 case SHAPE_DEFAULT:
2729 jnz_ptr(cb, target0);
2730 jmp_ptr(cb, target1);
2731 break;
2735 static codegen_status_t
2736 gen_branchif(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2738 int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
2740 // Check for interrupts, but only on backward branches that may create loops
2741 if (jump_offset < 0) {
2742 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2743 yjit_check_ints(cb, side_exit);
2746 // Test if any bit (outside of the Qnil bit) is on
2747 // RUBY_Qfalse /* ...0000 0000 */
2748 // RUBY_Qnil /* ...0000 1000 */
2749 x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
2750 test(cb, val_opnd, imm_opnd(~Qnil));
2752 // Get the branch target instruction offsets
2753 uint32_t next_idx = jit_next_insn_idx(jit);
2754 uint32_t jump_idx = next_idx + jump_offset;
2755 blockid_t next_block = { jit->iseq, next_idx };
2756 blockid_t jump_block = { jit->iseq, jump_idx };
2758 // Generate the branch instructions
2759 gen_branch(
2760 jit,
2761 ctx,
2762 jump_block,
2763 ctx,
2764 next_block,
2765 ctx,
2766 gen_branchif_branch
2769 return YJIT_END_BLOCK;
2772 static void
2773 gen_branchunless_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
2775 switch (shape) {
2776 case SHAPE_NEXT0:
2777 jnz_ptr(cb, target1);
2778 break;
2780 case SHAPE_NEXT1:
2781 jz_ptr(cb, target0);
2782 break;
2784 case SHAPE_DEFAULT:
2785 jz_ptr(cb, target0);
2786 jmp_ptr(cb, target1);
2787 break;
2791 static codegen_status_t
2792 gen_branchunless(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2794 int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
2796 // Check for interrupts, but only on backward branches that may create loops
2797 if (jump_offset < 0) {
2798 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2799 yjit_check_ints(cb, side_exit);
2802 // Test if any bit (outside of the Qnil bit) is on
2803 // RUBY_Qfalse /* ...0000 0000 */
2804 // RUBY_Qnil /* ...0000 1000 */
2805 x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
2806 test(cb, val_opnd, imm_opnd(~Qnil));
2808 // Get the branch target instruction offsets
2809 uint32_t next_idx = jit_next_insn_idx(jit);
2810 uint32_t jump_idx = next_idx + jump_offset;
2811 blockid_t next_block = { jit->iseq, next_idx };
2812 blockid_t jump_block = { jit->iseq, jump_idx };
2814 // Generate the branch instructions
2815 gen_branch(
2816 jit,
2817 ctx,
2818 jump_block,
2819 ctx,
2820 next_block,
2821 ctx,
2822 gen_branchunless_branch
2825 return YJIT_END_BLOCK;
2828 static void
2829 gen_branchnil_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
2831 switch (shape) {
2832 case SHAPE_NEXT0:
2833 jne_ptr(cb, target1);
2834 break;
2836 case SHAPE_NEXT1:
2837 je_ptr(cb, target0);
2838 break;
2840 case SHAPE_DEFAULT:
2841 je_ptr(cb, target0);
2842 jmp_ptr(cb, target1);
2843 break;
2847 static codegen_status_t
2848 gen_branchnil(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2850 int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
2852 // Check for interrupts, but only on backward branches that may create loops
2853 if (jump_offset < 0) {
2854 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2855 yjit_check_ints(cb, side_exit);
2858 // Test if the value is Qnil
2859 // RUBY_Qnil /* ...0000 1000 */
2860 x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
2861 cmp(cb, val_opnd, imm_opnd(Qnil));
2863 // Get the branch target instruction offsets
2864 uint32_t next_idx = jit_next_insn_idx(jit);
2865 uint32_t jump_idx = next_idx + jump_offset;
2866 blockid_t next_block = { jit->iseq, next_idx };
2867 blockid_t jump_block = { jit->iseq, jump_idx };
2869 // Generate the branch instructions
2870 gen_branch(
2871 jit,
2872 ctx,
2873 jump_block,
2874 ctx,
2875 next_block,
2876 ctx,
2877 gen_branchnil_branch
2880 return YJIT_END_BLOCK;
2883 static codegen_status_t
2884 gen_jump(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2886 int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
2888 // Check for interrupts, but only on backward branches that may create loops
2889 if (jump_offset < 0) {
2890 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2891 yjit_check_ints(cb, side_exit);
2894 // Get the branch target instruction offsets
2895 uint32_t jump_idx = jit_next_insn_idx(jit) + jump_offset;
2896 blockid_t jump_block = { jit->iseq, jump_idx };
2898 // Generate the jump instruction
2899 gen_direct_jump(
2900 jit,
2901 ctx,
2902 jump_block
2905 return YJIT_END_BLOCK;
2909 Guard that self or a stack operand has the same class as `known_klass`, using
2910 `sample_instance` to speculate about the shape of the runtime value.
2911 FIXNUM and on-heap integers are treated as if they have distinct classes, and
2912 the guard generated for one will fail for the other.
2914 Recompile as contingency if possible, or take side exit a last resort.
2916 static bool
2917 jit_guard_known_klass(jitstate_t *jit, ctx_t *ctx, VALUE known_klass, insn_opnd_t insn_opnd, VALUE sample_instance, const int max_chain_depth, uint8_t *side_exit)
2919 val_type_t val_type = ctx_get_opnd_type(ctx, insn_opnd);
2921 if (known_klass == rb_cNilClass) {
2922 RUBY_ASSERT(!val_type.is_heap);
2923 if (val_type.type != ETYPE_NIL) {
2924 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2926 ADD_COMMENT(cb, "guard object is nil");
2927 cmp(cb, REG0, imm_opnd(Qnil));
2928 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2930 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_NIL);
2933 else if (known_klass == rb_cTrueClass) {
2934 RUBY_ASSERT(!val_type.is_heap);
2935 if (val_type.type != ETYPE_TRUE) {
2936 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2938 ADD_COMMENT(cb, "guard object is true");
2939 cmp(cb, REG0, imm_opnd(Qtrue));
2940 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2942 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_TRUE);
2945 else if (known_klass == rb_cFalseClass) {
2946 RUBY_ASSERT(!val_type.is_heap);
2947 if (val_type.type != ETYPE_FALSE) {
2948 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2950 ADD_COMMENT(cb, "guard object is false");
2951 STATIC_ASSERT(qfalse_is_zero, Qfalse == 0);
2952 test(cb, REG0, REG0);
2953 jit_chain_guard(JCC_JNZ, jit, ctx, max_chain_depth, side_exit);
2955 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_FALSE);
2958 else if (known_klass == rb_cInteger && FIXNUM_P(sample_instance)) {
2959 RUBY_ASSERT(!val_type.is_heap);
2960 // We will guard fixnum and bignum as though they were separate classes
2961 // BIGNUM can be handled by the general else case below
2962 if (val_type.type != ETYPE_FIXNUM || !val_type.is_imm) {
2963 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2965 ADD_COMMENT(cb, "guard object is fixnum");
2966 test(cb, REG0, imm_opnd(RUBY_FIXNUM_FLAG));
2967 jit_chain_guard(JCC_JZ, jit, ctx, max_chain_depth, side_exit);
2968 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_FIXNUM);
2971 else if (known_klass == rb_cSymbol && STATIC_SYM_P(sample_instance)) {
2972 RUBY_ASSERT(!val_type.is_heap);
2973 // We will guard STATIC vs DYNAMIC as though they were separate classes
2974 // DYNAMIC symbols can be handled by the general else case below
2975 if (val_type.type != ETYPE_SYMBOL || !val_type.is_imm) {
2976 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2978 ADD_COMMENT(cb, "guard object is static symbol");
2979 STATIC_ASSERT(special_shift_is_8, RUBY_SPECIAL_SHIFT == 8);
2980 cmp(cb, REG0_8, imm_opnd(RUBY_SYMBOL_FLAG));
2981 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2982 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_STATIC_SYMBOL);
2985 else if (known_klass == rb_cFloat && FLONUM_P(sample_instance)) {
2986 RUBY_ASSERT(!val_type.is_heap);
2987 if (val_type.type != ETYPE_FLONUM || !val_type.is_imm) {
2988 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2990 // We will guard flonum vs heap float as though they were separate classes
2991 ADD_COMMENT(cb, "guard object is flonum");
2992 mov(cb, REG1, REG0);
2993 and(cb, REG1, imm_opnd(RUBY_FLONUM_MASK));
2994 cmp(cb, REG1, imm_opnd(RUBY_FLONUM_FLAG));
2995 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2996 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_FLONUM);
2999 else if (FL_TEST(known_klass, FL_SINGLETON) && sample_instance == rb_attr_get(known_klass, id__attached__)) {
3000 // Singleton classes are attached to one specific object, so we can
3001 // avoid one memory access (and potentially the is_heap check) by
3002 // looking for the expected object directly.
3003 // Note that in case the sample instance has a singleton class that
3004 // doesn't attach to the sample instance, it means the sample instance
3005 // has an empty singleton class that hasn't been materialized yet. In
3006 // this case, comparing against the sample instance doesn't guarantee
3007 // that its singleton class is empty, so we can't avoid the memory
3008 // access. As an example, `Object.new.singleton_class` is an object in
3009 // this situation.
3010 ADD_COMMENT(cb, "guard known object with singleton class");
3011 // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the object.
3012 jit_mov_gc_ptr(jit, cb, REG1, sample_instance);
3013 cmp(cb, REG0, REG1);
3014 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
3016 else {
3017 RUBY_ASSERT(!val_type.is_imm);
3019 // Check that the receiver is a heap object
3020 // Note: if we get here, the class doesn't have immediate instances.
3021 if (!val_type.is_heap) {
3022 ADD_COMMENT(cb, "guard not immediate");
3023 RUBY_ASSERT(Qfalse < Qnil);
3024 test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK));
3025 jit_chain_guard(JCC_JNZ, jit, ctx, max_chain_depth, side_exit);
3026 cmp(cb, REG0, imm_opnd(Qnil));
3027 jit_chain_guard(JCC_JBE, jit, ctx, max_chain_depth, side_exit);
3029 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_HEAP);
3032 x86opnd_t klass_opnd = mem_opnd(64, REG0, offsetof(struct RBasic, klass));
3034 // Bail if receiver class is different from known_klass
3035 // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the class.
3036 ADD_COMMENT(cb, "guard known class");
3037 jit_mov_gc_ptr(jit, cb, REG1, known_klass);
3038 cmp(cb, klass_opnd, REG1);
3039 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
3042 return true;
3045 // Generate ancestry guard for protected callee.
3046 // Calls to protected callees only go through when self.is_a?(klass_that_defines_the_callee).
3047 static void
3048 jit_protected_callee_ancestry_guard(jitstate_t *jit, codeblock_t *cb, const rb_callable_method_entry_t *cme, uint8_t *side_exit)
3050 // See vm_call_method().
3051 mov(cb, C_ARG_REGS[0], member_opnd(REG_CFP, rb_control_frame_t, self));
3052 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], cme->defined_class);
3053 // Note: PC isn't written to current control frame as rb_is_kind_of() shouldn't raise.
3054 // VALUE rb_obj_is_kind_of(VALUE obj, VALUE klass);
3055 call_ptr(cb, REG0, (void *)&rb_obj_is_kind_of);
3056 test(cb, RAX, RAX);
3057 jz_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_protected_check_failed));
3060 // Return true when the codegen function generates code.
3061 // known_recv_klass is non-NULL when the caller has used jit_guard_known_klass().
3062 // See yjit_reg_method().
3063 typedef bool (*method_codegen_t)(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass);
3065 // Register a specialized codegen function for a particular method. Note that
3066 // the if the function returns true, the code it generates runs without a
3067 // control frame and without interrupt checks. To avoid creating observable
3068 // behavior changes, the codegen function should only target simple code paths
3069 // that do not allocate and do not make method calls.
3070 static void
3071 yjit_reg_method(VALUE klass, const char *mid_str, method_codegen_t gen_fn)
3073 ID mid = rb_intern(mid_str);
3074 const rb_method_entry_t *me = rb_method_entry_at(klass, mid);
3076 if (!me) {
3077 rb_bug("undefined optimized method: %s", rb_id2name(mid));
3080 // For now, only cfuncs are supported
3081 RUBY_ASSERT(me && me->def);
3082 RUBY_ASSERT(me->def->type == VM_METHOD_TYPE_CFUNC);
3084 st_insert(yjit_method_codegen_table, (st_data_t)me->def->method_serial, (st_data_t)gen_fn);
3087 // Codegen for rb_obj_not().
3088 // Note, caller is responsible for generating all the right guards, including
3089 // arity guards.
3090 static bool
3091 jit_rb_obj_not(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3093 const val_type_t recv_opnd = ctx_get_opnd_type(ctx, OPND_STACK(0));
3095 if (recv_opnd.type == ETYPE_NIL || recv_opnd.type == ETYPE_FALSE) {
3096 ADD_COMMENT(cb, "rb_obj_not(nil_or_false)");
3097 ctx_stack_pop(ctx, 1);
3098 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_TRUE);
3099 mov(cb, out_opnd, imm_opnd(Qtrue));
3101 else if (recv_opnd.is_heap || recv_opnd.type != ETYPE_UNKNOWN) {
3102 // Note: recv_opnd.type != ETYPE_NIL && recv_opnd.type != ETYPE_FALSE.
3103 ADD_COMMENT(cb, "rb_obj_not(truthy)");
3104 ctx_stack_pop(ctx, 1);
3105 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_FALSE);
3106 mov(cb, out_opnd, imm_opnd(Qfalse));
3108 else {
3109 // jit_guard_known_klass() already ran on the receiver which should
3110 // have deduced deduced the type of the receiver. This case should be
3111 // rare if not unreachable.
3112 return false;
3114 return true;
3117 // Codegen for rb_true()
3118 static bool
3119 jit_rb_true(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3121 ADD_COMMENT(cb, "nil? == true");
3122 ctx_stack_pop(ctx, 1);
3123 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_TRUE);
3124 mov(cb, stack_ret, imm_opnd(Qtrue));
3125 return true;
3128 // Codegen for rb_false()
3129 static bool
3130 jit_rb_false(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3132 ADD_COMMENT(cb, "nil? == false");
3133 ctx_stack_pop(ctx, 1);
3134 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_FALSE);
3135 mov(cb, stack_ret, imm_opnd(Qfalse));
3136 return true;
3139 // Codegen for rb_obj_equal()
3140 // object identity comparison
3141 static bool
3142 jit_rb_obj_equal(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3144 ADD_COMMENT(cb, "equal?");
3145 x86opnd_t obj1 = ctx_stack_pop(ctx, 1);
3146 x86opnd_t obj2 = ctx_stack_pop(ctx, 1);
3148 mov(cb, REG0, obj1);
3149 cmp(cb, REG0, obj2);
3150 mov(cb, REG0, imm_opnd(Qtrue));
3151 mov(cb, REG1, imm_opnd(Qfalse));
3152 cmovne(cb, REG0, REG1);
3154 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_IMM);
3155 mov(cb, stack_ret, REG0);
3156 return true;
3159 static VALUE
3160 yjit_str_bytesize(VALUE str)
3162 return LONG2NUM(RSTRING_LEN(str));
3165 static bool
3166 jit_rb_str_bytesize(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3168 ADD_COMMENT(cb, "String#bytesize");
3170 x86opnd_t recv = ctx_stack_pop(ctx, 1);
3171 mov(cb, C_ARG_REGS[0], recv);
3172 call_ptr(cb, REG0, (void *)&yjit_str_bytesize);
3174 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_FIXNUM);
3175 mov(cb, out_opnd, RAX);
3177 return true;
3180 // Codegen for rb_str_to_s()
3181 // When String#to_s is called on a String instance, the method returns self and
3182 // most of the overhead comes from setting up the method call. We observed that
3183 // this situation happens a lot in some workloads.
3184 static bool
3185 jit_rb_str_to_s(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *recv_known_klass)
3187 if (recv_known_klass && *recv_known_klass == rb_cString) {
3188 ADD_COMMENT(cb, "to_s on plain string");
3189 // The method returns the receiver, which is already on the stack.
3190 // No stack movement.
3191 return true;
3193 return false;
3196 static bool
3197 jit_thread_s_current(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *recv_known_klass)
3199 ADD_COMMENT(cb, "Thread.current");
3200 ctx_stack_pop(ctx, 1);
3202 // ec->thread_ptr
3203 mov(cb, REG0, member_opnd(REG_EC, rb_execution_context_t, thread_ptr));
3205 // thread->self
3206 mov(cb, REG0, member_opnd(REG0, rb_thread_t, self));
3208 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HEAP);
3209 mov(cb, stack_ret, REG0);
3210 return true;
3213 // Check if we know how to codegen for a particular cfunc method
3214 static method_codegen_t
3215 lookup_cfunc_codegen(const rb_method_definition_t *def)
3217 method_codegen_t gen_fn;
3218 if (st_lookup(yjit_method_codegen_table, def->method_serial, (st_data_t *)&gen_fn)) {
3219 return gen_fn;
3221 return NULL;
3224 // Is anyone listening for :c_call and :c_return event currently?
3225 static bool
3226 c_method_tracing_currently_enabled(const jitstate_t *jit)
3228 rb_event_flag_t tracing_events;
3229 if (rb_multi_ractor_p()) {
3230 tracing_events = ruby_vm_event_enabled_global_flags;
3232 else {
3233 // At the time of writing, events are never removed from
3234 // ruby_vm_event_enabled_global_flags so always checking using it would
3235 // mean we don't compile even after tracing is disabled.
3236 tracing_events = rb_ec_ractor_hooks(jit->ec)->events;
3239 return tracing_events & (RUBY_EVENT_C_CALL | RUBY_EVENT_C_RETURN);
3242 // Called at runtime to build hashes of passed kwargs
3243 static VALUE
3244 yjit_runtime_build_kwhash(const struct rb_callinfo *ci, const VALUE *sp) {
3245 // similar to args_kw_argv_to_hash
3246 const VALUE *const passed_keywords = vm_ci_kwarg(ci)->keywords;
3247 const int kw_len = vm_ci_kwarg(ci)->keyword_len;
3248 const VALUE h = rb_hash_new_with_size(kw_len);
3250 for (int i = 0; i < kw_len; i++) {
3251 rb_hash_aset(h, passed_keywords[i], (sp - kw_len)[i]);
3253 return h;
3256 static codegen_status_t
3257 gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *recv_known_klass)
3259 const rb_method_cfunc_t *cfunc = UNALIGNED_MEMBER_PTR(cme->def, body.cfunc);
3261 const struct rb_callinfo_kwarg *kw_arg = vm_ci_kwarg(ci);
3262 const int kw_arg_num = kw_arg ? kw_arg->keyword_len : 0;
3264 // Number of args which will be passed through to the callee
3265 // This is adjusted by the kwargs being combined into a hash.
3266 const int passed_argc = kw_arg ? argc - kw_arg_num + 1 : argc;
3268 // If the argument count doesn't match
3269 if (cfunc->argc >= 0 && cfunc->argc != passed_argc) {
3270 GEN_COUNTER_INC(cb, send_cfunc_argc_mismatch);
3271 return YJIT_CANT_COMPILE;
3274 // Don't JIT functions that need C stack arguments for now
3275 if (cfunc->argc >= 0 && passed_argc + 1 > NUM_C_ARG_REGS) {
3276 GEN_COUNTER_INC(cb, send_cfunc_toomany_args);
3277 return YJIT_CANT_COMPILE;
3280 if (c_method_tracing_currently_enabled(jit)) {
3281 // Don't JIT if tracing c_call or c_return
3282 GEN_COUNTER_INC(cb, send_cfunc_tracing);
3283 return YJIT_CANT_COMPILE;
3286 // Delegate to codegen for C methods if we have it.
3288 method_codegen_t known_cfunc_codegen;
3289 if (!kw_arg && (known_cfunc_codegen = lookup_cfunc_codegen(cme->def))) {
3290 if (known_cfunc_codegen(jit, ctx, ci, cme, block, argc, recv_known_klass)) {
3291 // cfunc codegen generated code. Terminate the block so
3292 // there isn't multiple calls in the same block.
3293 jit_jump_to_next_insn(jit, ctx);
3294 return YJIT_END_BLOCK;
3299 // Callee method ID
3300 //ID mid = vm_ci_mid(ci);
3301 //printf("JITting call to C function \"%s\", argc: %lu\n", rb_id2name(mid), argc);
3302 //print_str(cb, "");
3303 //print_str(cb, "calling CFUNC:");
3304 //print_str(cb, rb_id2name(mid));
3305 //print_str(cb, "recv");
3306 //print_ptr(cb, recv);
3308 // Create a side-exit to fall back to the interpreter
3309 uint8_t *side_exit = yjit_side_exit(jit, ctx);
3311 // Check for interrupts
3312 yjit_check_ints(cb, side_exit);
3314 // Stack overflow check
3315 // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
3316 // REG_CFP <= REG_SP + 4 * sizeof(VALUE) + sizeof(rb_control_frame_t)
3317 lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 4 + 2 * sizeof(rb_control_frame_t)));
3318 cmp(cb, REG_CFP, REG0);
3319 jle_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_cf_overflow));
3321 // Points to the receiver operand on the stack
3322 x86opnd_t recv = ctx_stack_opnd(ctx, argc);
3324 // Store incremented PC into current control frame in case callee raises.
3325 jit_save_pc(jit, REG0);
3327 if (block) {
3328 // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
3329 // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases
3330 // with cfp->block_code.
3331 jit_mov_gc_ptr(jit, cb, REG0, (VALUE)block);
3332 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), REG0);
3335 // Increment the stack pointer by 3 (in the callee)
3336 // sp += 3
3337 lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 3));
3339 // Write method entry at sp[-3]
3340 // sp[-3] = me;
3341 // Put compile time cme into REG1. It's assumed to be valid because we are notified when
3342 // any cme we depend on become outdated. See rb_yjit_method_lookup_change().
3343 jit_mov_gc_ptr(jit, cb, REG1, (VALUE)cme);
3344 mov(cb, mem_opnd(64, REG0, 8 * -3), REG1);
3346 // Write block handler at sp[-2]
3347 // sp[-2] = block_handler;
3348 if (block) {
3349 // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
3350 lea(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, self));
3351 or(cb, REG1, imm_opnd(1));
3352 mov(cb, mem_opnd(64, REG0, 8 * -2), REG1);
3354 else {
3355 mov(cb, mem_opnd(64, REG0, 8 * -2), imm_opnd(VM_BLOCK_HANDLER_NONE));
3358 // Write env flags at sp[-1]
3359 // sp[-1] = frame_type;
3360 uint64_t frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL;
3361 if (kw_arg) {
3362 frame_type |= VM_FRAME_FLAG_CFRAME_KW;
3364 mov(cb, mem_opnd(64, REG0, 8 * -1), imm_opnd(frame_type));
3366 // Allocate a new CFP (ec->cfp--)
3367 sub(
3369 member_opnd(REG_EC, rb_execution_context_t, cfp),
3370 imm_opnd(sizeof(rb_control_frame_t))
3373 // Setup the new frame
3374 // *cfp = (const struct rb_control_frame_struct) {
3375 // .pc = 0,
3376 // .sp = sp,
3377 // .iseq = 0,
3378 // .self = recv,
3379 // .ep = sp - 1,
3380 // .block_code = 0,
3381 // .__bp__ = sp,
3382 // };
3383 mov(cb, REG1, member_opnd(REG_EC, rb_execution_context_t, cfp));
3384 mov(cb, member_opnd(REG1, rb_control_frame_t, pc), imm_opnd(0));
3385 mov(cb, member_opnd(REG1, rb_control_frame_t, sp), REG0);
3386 mov(cb, member_opnd(REG1, rb_control_frame_t, iseq), imm_opnd(0));
3387 mov(cb, member_opnd(REG1, rb_control_frame_t, block_code), imm_opnd(0));
3388 mov(cb, member_opnd(REG1, rb_control_frame_t, __bp__), REG0);
3389 sub(cb, REG0, imm_opnd(sizeof(VALUE)));
3390 mov(cb, member_opnd(REG1, rb_control_frame_t, ep), REG0);
3391 mov(cb, REG0, recv);
3392 mov(cb, member_opnd(REG1, rb_control_frame_t, self), REG0);
3394 // Verify that we are calling the right function
3395 if (YJIT_CHECK_MODE > 0) {
3396 // Call check_cfunc_dispatch
3397 mov(cb, C_ARG_REGS[0], recv);
3398 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], (VALUE)ci);
3399 mov(cb, C_ARG_REGS[2], const_ptr_opnd((void *)cfunc->func));
3400 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[3], (VALUE)cme);
3401 call_ptr(cb, REG0, (void *)&check_cfunc_dispatch);
3404 if (kw_arg) {
3405 // Build a hash from all kwargs passed
3406 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], (VALUE)ci);
3407 lea(cb, C_ARG_REGS[1], ctx_sp_opnd(ctx, 0));
3408 call_ptr(cb, REG0, (void *)&yjit_runtime_build_kwhash);
3410 // Replace the stack location at the start of kwargs with the new hash
3411 x86opnd_t stack_opnd = ctx_stack_opnd(ctx, argc - passed_argc);
3412 mov(cb, stack_opnd, RAX);
3415 // Non-variadic method
3416 if (cfunc->argc >= 0) {
3417 // Copy the arguments from the stack to the C argument registers
3418 // self is the 0th argument and is at index argc from the stack top
3419 for (int32_t i = 0; i < passed_argc + 1; ++i)
3421 x86opnd_t stack_opnd = ctx_stack_opnd(ctx, argc - i);
3422 x86opnd_t c_arg_reg = C_ARG_REGS[i];
3423 mov(cb, c_arg_reg, stack_opnd);
3426 // Variadic method
3427 if (cfunc->argc == -1) {
3428 // The method gets a pointer to the first argument
3429 // rb_f_puts(int argc, VALUE *argv, VALUE recv)
3430 mov(cb, C_ARG_REGS[0], imm_opnd(passed_argc));
3431 lea(cb, C_ARG_REGS[1], ctx_stack_opnd(ctx, argc - 1));
3432 mov(cb, C_ARG_REGS[2], ctx_stack_opnd(ctx, argc));
3434 // Variadic method with Ruby array
3435 if (cfunc->argc == -2) {
3436 // Create a Ruby array from the arguments.
3438 // This follows similar behaviour to vm_call_cfunc_with_frame() and
3439 // call_cfunc_m2(). We use rb_ec_ary_new_from_values() instead of
3440 // rb_ary_new4() since we have REG_EC available.
3442 // Before getting here we will have set the new CFP in the EC, and the
3443 // stack at CFP's SP will contain the values we are inserting into the
3444 // Array, so they will be properly marked if we hit a GC.
3446 // rb_ec_ary_new_from_values(rb_execution_context_t *ec, long n, const VLAUE *elts)
3447 mov(cb, C_ARG_REGS[0], REG_EC);
3448 mov(cb, C_ARG_REGS[1], imm_opnd(passed_argc));
3449 lea(cb, C_ARG_REGS[2], ctx_stack_opnd(ctx, argc - 1));
3450 call_ptr(cb, REG0, (void *)rb_ec_ary_new_from_values);
3452 // rb_file_s_join(VALUE recv, VALUE args)
3453 mov(cb, C_ARG_REGS[0], ctx_stack_opnd(ctx, argc));
3454 mov(cb, C_ARG_REGS[1], RAX);
3457 // Pop the C function arguments from the stack (in the caller)
3458 ctx_stack_pop(ctx, argc + 1);
3460 // Write interpreter SP into CFP.
3461 // Needed in case the callee yields to the block.
3462 jit_save_sp(jit, ctx);
3464 // Call the C function
3465 // VALUE ret = (cfunc->func)(recv, argv[0], argv[1]);
3466 // cfunc comes from compile-time cme->def, which we assume to be stable.
3467 // Invalidation logic is in rb_yjit_method_lookup_change()
3468 call_ptr(cb, REG0, (void*)cfunc->func);
3470 // Record code position for TracePoint patching. See full_cfunc_return().
3471 record_global_inval_patch(cb, outline_full_cfunc_return_pos);
3473 // Push the return value on the Ruby stack
3474 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
3475 mov(cb, stack_ret, RAX);
3477 // Pop the stack frame (ec->cfp++)
3478 add(
3480 member_opnd(REG_EC, rb_execution_context_t, cfp),
3481 imm_opnd(sizeof(rb_control_frame_t))
3484 // cfunc calls may corrupt types
3485 ctx_clear_local_types(ctx);
3487 // Note: the return block of gen_send_iseq() has ctx->sp_offset == 1
3488 // which allows for sharing the same successor.
3490 // Jump (fall through) to the call continuation block
3491 // We do this to end the current block after the call
3492 jit_jump_to_next_insn(jit, ctx);
3493 return YJIT_END_BLOCK;
3496 static void
3497 gen_return_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
3499 switch (shape) {
3500 case SHAPE_NEXT0:
3501 case SHAPE_NEXT1:
3502 RUBY_ASSERT(false);
3503 break;
3505 case SHAPE_DEFAULT:
3506 mov(cb, REG0, const_ptr_opnd(target0));
3507 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, jit_return), REG0);
3508 break;
3512 // If true, the iseq is leaf and it can be replaced by a single C call.
3513 static bool
3514 rb_leaf_invokebuiltin_iseq_p(const rb_iseq_t *iseq)
3516 unsigned int invokebuiltin_len = insn_len(BIN(opt_invokebuiltin_delegate_leave));
3517 unsigned int leave_len = insn_len(BIN(leave));
3519 return (iseq->body->iseq_size == (invokebuiltin_len + leave_len) &&
3520 rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[0]) == BIN(opt_invokebuiltin_delegate_leave) &&
3521 rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[invokebuiltin_len]) == BIN(leave) &&
3522 iseq->body->builtin_inline_p
3526 // Return an rb_builtin_function if the iseq contains only that leaf builtin function.
3527 static const struct rb_builtin_function*
3528 rb_leaf_builtin_function(const rb_iseq_t *iseq)
3530 if (!rb_leaf_invokebuiltin_iseq_p(iseq))
3531 return NULL;
3532 return (const struct rb_builtin_function *)iseq->body->iseq_encoded[1];
3535 static codegen_status_t
3536 gen_send_iseq(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, int32_t argc)
3538 const rb_iseq_t *iseq = def_iseq_ptr(cme->def);
3540 // When you have keyword arguments, there is an extra object that gets
3541 // placed on the stack the represents a bitmap of the keywords that were not
3542 // specified at the call site. We need to keep track of the fact that this
3543 // value is present on the stack in order to properly set up the callee's
3544 // stack pointer.
3545 const bool doing_kw_call = iseq->body->param.flags.has_kw;
3546 const bool supplying_kws = vm_ci_flag(ci) & VM_CALL_KWARG;
3548 if (vm_ci_flag(ci) & VM_CALL_TAILCALL) {
3549 // We can't handle tailcalls
3550 GEN_COUNTER_INC(cb, send_iseq_tailcall);
3551 return YJIT_CANT_COMPILE;
3554 // No support for callees with these parameters yet as they require allocation
3555 // or complex handling.
3556 if (iseq->body->param.flags.has_rest ||
3557 iseq->body->param.flags.has_post ||
3558 iseq->body->param.flags.has_kwrest) {
3559 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3560 return YJIT_CANT_COMPILE;
3563 // If we have keyword arguments being passed to a callee that only takes
3564 // positionals, then we need to allocate a hash. For now we're going to
3565 // call that too complex and bail.
3566 if (supplying_kws && !iseq->body->param.flags.has_kw) {
3567 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3568 return YJIT_CANT_COMPILE;
3571 // If we have a method accepting no kwargs (**nil), exit if we have passed
3572 // it any kwargs.
3573 if (supplying_kws && iseq->body->param.flags.accepts_no_kwarg) {
3574 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3575 return YJIT_CANT_COMPILE;
3578 // For computing number of locals to setup for the callee
3579 int num_params = iseq->body->param.size;
3581 // Block parameter handling. This mirrors setup_parameters_complex().
3582 if (iseq->body->param.flags.has_block) {
3583 if (iseq->body->local_iseq == iseq) {
3584 // Block argument is passed through EP and not setup as a local in
3585 // the callee.
3586 num_params--;
3588 else {
3589 // In this case (param.flags.has_block && local_iseq != iseq),
3590 // the block argument is setup as a local variable and requires
3591 // materialization (allocation). Bail.
3592 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3593 return YJIT_CANT_COMPILE;
3597 uint32_t start_pc_offset = 0;
3599 const int required_num = iseq->body->param.lead_num;
3601 // This struct represents the metadata about the caller-specified
3602 // keyword arguments.
3603 const struct rb_callinfo_kwarg *kw_arg = vm_ci_kwarg(ci);
3604 const int kw_arg_num = kw_arg ? kw_arg->keyword_len : 0;
3606 // Arity handling and optional parameter setup
3607 const int opts_filled = argc - required_num - kw_arg_num;
3608 const int opt_num = iseq->body->param.opt_num;
3609 const int opts_missing = opt_num - opts_filled;
3611 if (opts_filled < 0 || opts_filled > opt_num) {
3612 GEN_COUNTER_INC(cb, send_iseq_arity_error);
3613 return YJIT_CANT_COMPILE;
3616 // If we have unfilled optional arguments and keyword arguments then we
3617 // would need to move adjust the arguments location to account for that.
3618 // For now we aren't handling this case.
3619 if (doing_kw_call && opts_missing > 0) {
3620 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3621 return YJIT_CANT_COMPILE;
3624 if (opt_num > 0) {
3625 num_params -= opt_num - opts_filled;
3626 start_pc_offset = (uint32_t)iseq->body->param.opt_table[opts_filled];
3629 if (doing_kw_call) {
3630 // Here we're calling a method with keyword arguments and specifying
3631 // keyword arguments at this call site.
3633 // This struct represents the metadata about the callee-specified
3634 // keyword parameters.
3635 const struct rb_iseq_param_keyword *keyword = iseq->body->param.keyword;
3637 int required_kwargs_filled = 0;
3639 if (keyword->num > 30) {
3640 // We have so many keywords that (1 << num) encoded as a FIXNUM
3641 // (which shifts it left one more) no longer fits inside a 32-bit
3642 // immediate.
3643 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3644 return YJIT_CANT_COMPILE;
3647 // Check that the kwargs being passed are valid
3648 if (supplying_kws) {
3649 // This is the list of keyword arguments that the callee specified
3650 // in its initial declaration.
3651 const ID *callee_kwargs = keyword->table;
3653 // Here we're going to build up a list of the IDs that correspond to
3654 // the caller-specified keyword arguments. If they're not in the
3655 // same order as the order specified in the callee declaration, then
3656 // we're going to need to generate some code to swap values around
3657 // on the stack.
3658 ID *caller_kwargs = ALLOCA_N(VALUE, kw_arg->keyword_len);
3659 for (int kwarg_idx = 0; kwarg_idx < kw_arg->keyword_len; kwarg_idx++)
3660 caller_kwargs[kwarg_idx] = SYM2ID(kw_arg->keywords[kwarg_idx]);
3662 // First, we're going to be sure that the names of every
3663 // caller-specified keyword argument correspond to a name in the
3664 // list of callee-specified keyword parameters.
3665 for (int caller_idx = 0; caller_idx < kw_arg->keyword_len; caller_idx++) {
3666 int callee_idx;
3668 for (callee_idx = 0; callee_idx < keyword->num; callee_idx++) {
3669 if (caller_kwargs[caller_idx] == callee_kwargs[callee_idx]) {
3670 break;
3674 // If the keyword was never found, then we know we have a
3675 // mismatch in the names of the keyword arguments, so we need to
3676 // bail.
3677 if (callee_idx == keyword->num) {
3678 GEN_COUNTER_INC(cb, send_iseq_kwargs_mismatch);
3679 return YJIT_CANT_COMPILE;
3682 // Keep a count to ensure all required kwargs are specified
3683 if (callee_idx < keyword->required_num) {
3684 required_kwargs_filled++;
3689 RUBY_ASSERT(required_kwargs_filled <= keyword->required_num);
3690 if (required_kwargs_filled != keyword->required_num) {
3691 GEN_COUNTER_INC(cb, send_iseq_kwargs_mismatch);
3692 return YJIT_CANT_COMPILE;
3696 // Number of locals that are not parameters
3697 const int num_locals = iseq->body->local_table_size - num_params;
3699 // Create a side-exit to fall back to the interpreter
3700 uint8_t *side_exit = yjit_side_exit(jit, ctx);
3702 // Check for interrupts
3703 yjit_check_ints(cb, side_exit);
3705 const struct rb_builtin_function *leaf_builtin = rb_leaf_builtin_function(iseq);
3707 if (leaf_builtin && !block && leaf_builtin->argc + 1 <= NUM_C_ARG_REGS) {
3708 ADD_COMMENT(cb, "inlined leaf builtin");
3710 // Call the builtin func (ec, recv, arg1, arg2, ...)
3711 mov(cb, C_ARG_REGS[0], REG_EC);
3713 // Copy self and arguments
3714 for (int32_t i = 0; i < leaf_builtin->argc + 1; i++) {
3715 x86opnd_t stack_opnd = ctx_stack_opnd(ctx, leaf_builtin->argc - i);
3716 x86opnd_t c_arg_reg = C_ARG_REGS[i + 1];
3717 mov(cb, c_arg_reg, stack_opnd);
3719 ctx_stack_pop(ctx, leaf_builtin->argc + 1);
3720 call_ptr(cb, REG0, (void *)leaf_builtin->func_ptr);
3722 // Push the return value
3723 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
3724 mov(cb, stack_ret, RAX);
3726 // Note: assuming that the leaf builtin doesn't change local variables here.
3727 // Seems like a safe assumption.
3729 return YJIT_KEEP_COMPILING;
3732 // Stack overflow check
3733 // Note that vm_push_frame checks it against a decremented cfp, hence the multiply by 2.
3734 // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
3735 ADD_COMMENT(cb, "stack overflow check");
3736 lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * (num_locals + iseq->body->stack_max) + 2 * sizeof(rb_control_frame_t)));
3737 cmp(cb, REG_CFP, REG0);
3738 jle_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_cf_overflow));
3740 if (doing_kw_call) {
3741 // Here we're calling a method with keyword arguments and specifying
3742 // keyword arguments at this call site.
3744 // Number of positional arguments the callee expects before the first
3745 // keyword argument
3746 const int args_before_kw = required_num + opt_num;
3748 // This struct represents the metadata about the caller-specified
3749 // keyword arguments.
3750 int caller_keyword_len = 0;
3751 const VALUE *caller_keywords = NULL;
3752 if (vm_ci_kwarg(ci)) {
3753 caller_keyword_len = vm_ci_kwarg(ci)->keyword_len;
3754 caller_keywords = &vm_ci_kwarg(ci)->keywords[0];
3757 // This struct represents the metadata about the callee-specified
3758 // keyword parameters.
3759 const struct rb_iseq_param_keyword *const keyword = iseq->body->param.keyword;
3761 ADD_COMMENT(cb, "keyword args");
3763 // This is the list of keyword arguments that the callee specified
3764 // in its initial declaration.
3765 const ID *callee_kwargs = keyword->table;
3767 int total_kwargs = keyword->num;
3769 // Here we're going to build up a list of the IDs that correspond to
3770 // the caller-specified keyword arguments. If they're not in the
3771 // same order as the order specified in the callee declaration, then
3772 // we're going to need to generate some code to swap values around
3773 // on the stack.
3774 ID *caller_kwargs = ALLOCA_N(VALUE, total_kwargs);
3775 int kwarg_idx;
3776 for (kwarg_idx = 0; kwarg_idx < caller_keyword_len; kwarg_idx++) {
3777 caller_kwargs[kwarg_idx] = SYM2ID(caller_keywords[kwarg_idx]);
3780 int unspecified_bits = 0;
3782 for (int callee_idx = keyword->required_num; callee_idx < total_kwargs; callee_idx++) {
3783 bool already_passed = false;
3784 ID callee_kwarg = callee_kwargs[callee_idx];
3786 for (int caller_idx = 0; caller_idx < caller_keyword_len; caller_idx++) {
3787 if (caller_kwargs[caller_idx] == callee_kwarg) {
3788 already_passed = true;
3789 break;
3793 if (!already_passed) {
3794 // Reserve space on the stack for each default value we'll be
3795 // filling in (which is done in the next loop). Also increments
3796 // argc so that the callee's SP is recorded correctly.
3797 argc++;
3798 x86opnd_t default_arg = ctx_stack_push(ctx, TYPE_UNKNOWN);
3799 VALUE default_value = keyword->default_values[callee_idx - keyword->required_num];
3801 if (default_value == Qundef) {
3802 // Qundef means that this value is not constant and must be
3803 // recalculated at runtime, so we record it in unspecified_bits
3804 // (Qnil is then used as a placeholder instead of Qundef).
3805 unspecified_bits |= 0x01 << (callee_idx - keyword->required_num);
3806 default_value = Qnil;
3809 // GC might move default_value.
3810 jit_mov_gc_ptr(jit, cb, REG0, default_value);
3811 mov(cb, default_arg, REG0);
3813 caller_kwargs[kwarg_idx++] = callee_kwarg;
3816 RUBY_ASSERT(kwarg_idx == total_kwargs);
3818 // Next, we're going to loop through every keyword that was
3819 // specified by the caller and make sure that it's in the correct
3820 // place. If it's not we're going to swap it around with another one.
3821 for (kwarg_idx = 0; kwarg_idx < total_kwargs; kwarg_idx++) {
3822 ID callee_kwarg = callee_kwargs[kwarg_idx];
3824 // If the argument is already in the right order, then we don't
3825 // need to generate any code since the expected value is already
3826 // in the right place on the stack.
3827 if (callee_kwarg == caller_kwargs[kwarg_idx]) continue;
3829 // In this case the argument is not in the right place, so we
3830 // need to find its position where it _should_ be and swap with
3831 // that location.
3832 for (int swap_idx = kwarg_idx + 1; swap_idx < total_kwargs; swap_idx++) {
3833 if (callee_kwarg == caller_kwargs[swap_idx]) {
3834 // First we're going to generate the code that is going
3835 // to perform the actual swapping at runtime.
3836 stack_swap(ctx, cb, argc - 1 - swap_idx - args_before_kw, argc - 1 - kwarg_idx - args_before_kw, REG1, REG0);
3838 // Next we're going to do some bookkeeping on our end so
3839 // that we know the order that the arguments are
3840 // actually in now.
3841 ID tmp = caller_kwargs[kwarg_idx];
3842 caller_kwargs[kwarg_idx] = caller_kwargs[swap_idx];
3843 caller_kwargs[swap_idx] = tmp;
3845 break;
3850 // Keyword arguments cause a special extra local variable to be
3851 // pushed onto the stack that represents the parameters that weren't
3852 // explicitly given a value and have a non-constant default.
3853 mov(cb, ctx_stack_opnd(ctx, -1), imm_opnd(INT2FIX(unspecified_bits)));
3855 // Points to the receiver operand on the stack
3856 x86opnd_t recv = ctx_stack_opnd(ctx, argc);
3858 // Store the updated SP on the current frame (pop arguments and receiver)
3859 ADD_COMMENT(cb, "store caller sp");
3860 lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * -(argc + 1)));
3861 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG0);
3863 // Store the next PC in the current frame
3864 jit_save_pc(jit, REG0);
3866 if (block) {
3867 // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
3868 // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases
3869 // with cfp->block_code.
3870 jit_mov_gc_ptr(jit, cb, REG0, (VALUE)block);
3871 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), REG0);
3874 // Adjust the callee's stack pointer
3875 lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * (3 + num_locals + doing_kw_call)));
3877 // Initialize local variables to Qnil
3878 for (int i = 0; i < num_locals; i++) {
3879 mov(cb, mem_opnd(64, REG0, sizeof(VALUE) * (i - num_locals - 3)), imm_opnd(Qnil));
3882 ADD_COMMENT(cb, "push env");
3883 // Put compile time cme into REG1. It's assumed to be valid because we are notified when
3884 // any cme we depend on become outdated. See rb_yjit_method_lookup_change().
3885 jit_mov_gc_ptr(jit, cb, REG1, (VALUE)cme);
3886 // Write method entry at sp[-3]
3887 // sp[-3] = me;
3888 mov(cb, mem_opnd(64, REG0, 8 * -3), REG1);
3890 // Write block handler at sp[-2]
3891 // sp[-2] = block_handler;
3892 if (block) {
3893 // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
3894 lea(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, self));
3895 or(cb, REG1, imm_opnd(1));
3896 mov(cb, mem_opnd(64, REG0, 8 * -2), REG1);
3898 else {
3899 mov(cb, mem_opnd(64, REG0, 8 * -2), imm_opnd(VM_BLOCK_HANDLER_NONE));
3902 // Write env flags at sp[-1]
3903 // sp[-1] = frame_type;
3904 uint64_t frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL;
3905 mov(cb, mem_opnd(64, REG0, 8 * -1), imm_opnd(frame_type));
3907 ADD_COMMENT(cb, "push callee CFP");
3908 // Allocate a new CFP (ec->cfp--)
3909 sub(cb, REG_CFP, imm_opnd(sizeof(rb_control_frame_t)));
3910 mov(cb, member_opnd(REG_EC, rb_execution_context_t, cfp), REG_CFP);
3912 // Setup the new frame
3913 // *cfp = (const struct rb_control_frame_struct) {
3914 // .pc = pc,
3915 // .sp = sp,
3916 // .iseq = iseq,
3917 // .self = recv,
3918 // .ep = sp - 1,
3919 // .block_code = 0,
3920 // .__bp__ = sp,
3921 // };
3922 mov(cb, REG1, recv);
3923 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, self), REG1);
3924 mov(cb, REG_SP, REG0); // Switch to the callee's REG_SP
3925 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG0);
3926 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, __bp__), REG0);
3927 sub(cb, REG0, imm_opnd(sizeof(VALUE)));
3928 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, ep), REG0);
3929 jit_mov_gc_ptr(jit, cb, REG0, (VALUE)iseq);
3930 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, iseq), REG0);
3931 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), imm_opnd(0));
3933 // No need to set cfp->pc since the callee sets it whenever calling into routines
3934 // that could look at it through jit_save_pc().
3935 // mov(cb, REG0, const_ptr_opnd(start_pc));
3936 // mov(cb, member_opnd(REG_CFP, rb_control_frame_t, pc), REG0);
3938 // Stub so we can return to JITted code
3939 blockid_t return_block = { jit->iseq, jit_next_insn_idx(jit) };
3941 // Create a context for the callee
3942 ctx_t callee_ctx = DEFAULT_CTX;
3944 // Set the argument types in the callee's context
3945 for (int32_t arg_idx = 0; arg_idx < argc; ++arg_idx) {
3946 val_type_t arg_type = ctx_get_opnd_type(ctx, OPND_STACK(argc - arg_idx - 1));
3947 ctx_set_local_type(&callee_ctx, arg_idx, arg_type);
3949 val_type_t recv_type = ctx_get_opnd_type(ctx, OPND_STACK(argc));
3950 ctx_upgrade_opnd_type(&callee_ctx, OPND_SELF, recv_type);
3952 // The callee might change locals through Kernel#binding and other means.
3953 ctx_clear_local_types(ctx);
3955 // Pop arguments and receiver in return context, push the return value
3956 // After the return, sp_offset will be 1. The codegen for leave writes
3957 // the return value in case of JIT-to-JIT return.
3958 ctx_t return_ctx = *ctx;
3959 ctx_stack_pop(&return_ctx, argc + 1);
3960 ctx_stack_push(&return_ctx, TYPE_UNKNOWN);
3961 return_ctx.sp_offset = 1;
3962 return_ctx.chain_depth = 0;
3964 // Write the JIT return address on the callee frame
3965 gen_branch(
3966 jit,
3967 ctx,
3968 return_block,
3969 &return_ctx,
3970 return_block,
3971 &return_ctx,
3972 gen_return_branch
3975 //print_str(cb, "calling Ruby func:");
3976 //print_str(cb, rb_id2name(vm_ci_mid(ci)));
3978 // Directly jump to the entry point of the callee
3979 gen_direct_jump(
3980 jit,
3981 &callee_ctx,
3982 (blockid_t){ iseq, start_pc_offset }
3985 return YJIT_END_BLOCK;
3988 static codegen_status_t
3989 gen_struct_aref(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, VALUE comptime_recv, VALUE comptime_recv_klass) {
3990 if (vm_ci_argc(ci) != 0) {
3991 return YJIT_CANT_COMPILE;
3994 const unsigned int off = cme->def->body.optimized.index;
3996 // Confidence checks
3997 RUBY_ASSERT_ALWAYS(RB_TYPE_P(comptime_recv, T_STRUCT));
3998 RUBY_ASSERT_ALWAYS((long)off < RSTRUCT_LEN(comptime_recv));
4000 // We are going to use an encoding that takes a 4-byte immediate which
4001 // limits the offset to INT32_MAX.
4003 uint64_t native_off = (uint64_t)off * (uint64_t)SIZEOF_VALUE;
4004 if (native_off > (uint64_t)INT32_MAX) {
4005 return YJIT_CANT_COMPILE;
4009 // All structs from the same Struct class should have the same
4010 // length. So if our comptime_recv is embedded all runtime
4011 // structs of the same class should be as well, and the same is
4012 // true of the converse.
4013 bool embedded = FL_TEST_RAW(comptime_recv, RSTRUCT_EMBED_LEN_MASK);
4015 ADD_COMMENT(cb, "struct aref");
4017 x86opnd_t recv = ctx_stack_pop(ctx, 1);
4019 mov(cb, REG0, recv);
4021 if (embedded) {
4022 mov(cb, REG0, member_opnd_idx(REG0, struct RStruct, as.ary, off));
4024 else {
4025 mov(cb, REG0, member_opnd(REG0, struct RStruct, as.heap.ptr));
4026 mov(cb, REG0, mem_opnd(64, REG0, SIZEOF_VALUE * off));
4029 x86opnd_t ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4030 mov(cb, ret, REG0);
4032 jit_jump_to_next_insn(jit, ctx);
4033 return YJIT_END_BLOCK;
4036 static codegen_status_t
4037 gen_struct_aset(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, VALUE comptime_recv, VALUE comptime_recv_klass) {
4038 if (vm_ci_argc(ci) != 1) {
4039 return YJIT_CANT_COMPILE;
4042 const unsigned int off = cme->def->body.optimized.index;
4044 // Confidence checks
4045 RUBY_ASSERT_ALWAYS(RB_TYPE_P(comptime_recv, T_STRUCT));
4046 RUBY_ASSERT_ALWAYS((long)off < RSTRUCT_LEN(comptime_recv));
4048 ADD_COMMENT(cb, "struct aset");
4050 x86opnd_t val = ctx_stack_pop(ctx, 1);
4051 x86opnd_t recv = ctx_stack_pop(ctx, 1);
4053 mov(cb, C_ARG_REGS[0], recv);
4054 mov(cb, C_ARG_REGS[1], imm_opnd(off));
4055 mov(cb, C_ARG_REGS[2], val);
4056 call_ptr(cb, REG0, (void *)RSTRUCT_SET);
4058 x86opnd_t ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4059 mov(cb, ret, RAX);
4061 jit_jump_to_next_insn(jit, ctx);
4062 return YJIT_END_BLOCK;
4065 const rb_callable_method_entry_t *
4066 rb_aliased_callable_method_entry(const rb_callable_method_entry_t *me);
4068 static codegen_status_t
4069 gen_send_general(jitstate_t *jit, ctx_t *ctx, struct rb_call_data *cd, rb_iseq_t *block)
4071 // Relevant definitions:
4072 // rb_execution_context_t : vm_core.h
4073 // invoker, cfunc logic : method.h, vm_method.c
4074 // rb_callinfo : vm_callinfo.h
4075 // rb_callable_method_entry_t : method.h
4076 // vm_call_cfunc_with_frame : vm_insnhelper.c
4078 // For a general overview for how the interpreter calls methods,
4079 // see vm_call_method().
4081 const struct rb_callinfo *ci = cd->ci; // info about the call site
4083 int32_t argc = (int32_t)vm_ci_argc(ci);
4084 ID mid = vm_ci_mid(ci);
4086 // Don't JIT calls with keyword splat
4087 if (vm_ci_flag(ci) & VM_CALL_KW_SPLAT) {
4088 GEN_COUNTER_INC(cb, send_kw_splat);
4089 return YJIT_CANT_COMPILE;
4092 // Don't JIT calls that aren't simple
4093 // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block.
4094 if ((vm_ci_flag(ci) & VM_CALL_ARGS_SPLAT) != 0) {
4095 GEN_COUNTER_INC(cb, send_args_splat);
4096 return YJIT_CANT_COMPILE;
4098 if ((vm_ci_flag(ci) & VM_CALL_ARGS_BLOCKARG) != 0) {
4099 GEN_COUNTER_INC(cb, send_block_arg);
4100 return YJIT_CANT_COMPILE;
4103 // Defer compilation so we can specialize on class of receiver
4104 if (!jit_at_current_insn(jit)) {
4105 defer_compilation(jit, ctx);
4106 return YJIT_END_BLOCK;
4109 VALUE comptime_recv = jit_peek_at_stack(jit, ctx, argc);
4110 VALUE comptime_recv_klass = CLASS_OF(comptime_recv);
4112 // Guard that the receiver has the same class as the one from compile time
4113 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4115 // Points to the receiver operand on the stack
4116 x86opnd_t recv = ctx_stack_opnd(ctx, argc);
4117 insn_opnd_t recv_opnd = OPND_STACK(argc);
4118 mov(cb, REG0, recv);
4119 if (!jit_guard_known_klass(jit, ctx, comptime_recv_klass, recv_opnd, comptime_recv, SEND_MAX_DEPTH, side_exit)) {
4120 return YJIT_CANT_COMPILE;
4123 // Do method lookup
4124 const rb_callable_method_entry_t *cme = rb_callable_method_entry(comptime_recv_klass, mid);
4125 if (!cme) {
4126 // TODO: counter
4127 return YJIT_CANT_COMPILE;
4130 switch (METHOD_ENTRY_VISI(cme)) {
4131 case METHOD_VISI_PUBLIC:
4132 // Can always call public methods
4133 break;
4134 case METHOD_VISI_PRIVATE:
4135 if (!(vm_ci_flag(ci) & VM_CALL_FCALL)) {
4136 // Can only call private methods with FCALL callsites.
4137 // (at the moment they are callsites without a receiver or an explicit `self` receiver)
4138 return YJIT_CANT_COMPILE;
4140 break;
4141 case METHOD_VISI_PROTECTED:
4142 jit_protected_callee_ancestry_guard(jit, cb, cme, side_exit);
4143 break;
4144 case METHOD_VISI_UNDEF:
4145 RUBY_ASSERT(false && "cmes should always have a visibility");
4146 break;
4149 // Register block for invalidation
4150 RUBY_ASSERT(cme->called_id == mid);
4151 assume_method_lookup_stable(comptime_recv_klass, cme, jit);
4153 // To handle the aliased method case (VM_METHOD_TYPE_ALIAS)
4154 while (true) {
4155 // switch on the method type
4156 switch (cme->def->type) {
4157 case VM_METHOD_TYPE_ISEQ:
4158 return gen_send_iseq(jit, ctx, ci, cme, block, argc);
4159 case VM_METHOD_TYPE_CFUNC:
4160 return gen_send_cfunc(jit, ctx, ci, cme, block, argc, &comptime_recv_klass);
4161 case VM_METHOD_TYPE_IVAR:
4162 if (argc != 0) {
4163 // Argument count mismatch. Getters take no arguments.
4164 GEN_COUNTER_INC(cb, send_getter_arity);
4165 return YJIT_CANT_COMPILE;
4167 if (c_method_tracing_currently_enabled(jit)) {
4168 // Can't generate code for firing c_call and c_return events
4169 // :attr-tracing:
4170 // Handling the C method tracing events for attr_accessor
4171 // methods is easier than regular C methods as we know the
4172 // "method" we are calling into never enables those tracing
4173 // events. Once global invalidation runs, the code for the
4174 // attr_accessor is invalidated and we exit at the closest
4175 // instruction boundary which is always outside of the body of
4176 // the attr_accessor code.
4177 GEN_COUNTER_INC(cb, send_cfunc_tracing);
4178 return YJIT_CANT_COMPILE;
4181 mov(cb, REG0, recv);
4183 ID ivar_name = cme->def->body.attr.id;
4184 return gen_get_ivar(jit, ctx, SEND_MAX_DEPTH, comptime_recv, ivar_name, recv_opnd, side_exit);
4185 case VM_METHOD_TYPE_ATTRSET:
4186 if ((vm_ci_flag(ci) & VM_CALL_KWARG) != 0) {
4187 GEN_COUNTER_INC(cb, send_attrset_kwargs);
4188 return YJIT_CANT_COMPILE;
4190 else if (argc != 1 || !RB_TYPE_P(comptime_recv, T_OBJECT)) {
4191 GEN_COUNTER_INC(cb, send_ivar_set_method);
4192 return YJIT_CANT_COMPILE;
4194 else if (c_method_tracing_currently_enabled(jit)) {
4195 // Can't generate code for firing c_call and c_return events
4196 // See :attr-tracing:
4197 GEN_COUNTER_INC(cb, send_cfunc_tracing);
4198 return YJIT_CANT_COMPILE;
4200 else {
4201 ID ivar_name = cme->def->body.attr.id;
4202 return gen_set_ivar(jit, ctx, comptime_recv, comptime_recv_klass, ivar_name);
4204 // Block method, e.g. define_method(:foo) { :my_block }
4205 case VM_METHOD_TYPE_BMETHOD:
4206 GEN_COUNTER_INC(cb, send_bmethod);
4207 return YJIT_CANT_COMPILE;
4208 case VM_METHOD_TYPE_ZSUPER:
4209 GEN_COUNTER_INC(cb, send_zsuper_method);
4210 return YJIT_CANT_COMPILE;
4211 case VM_METHOD_TYPE_ALIAS: {
4212 // Retrieve the alised method and re-enter the switch
4213 cme = rb_aliased_callable_method_entry(cme);
4214 continue;
4216 case VM_METHOD_TYPE_UNDEF:
4217 GEN_COUNTER_INC(cb, send_undef_method);
4218 return YJIT_CANT_COMPILE;
4219 case VM_METHOD_TYPE_NOTIMPLEMENTED:
4220 GEN_COUNTER_INC(cb, send_not_implemented_method);
4221 return YJIT_CANT_COMPILE;
4222 // Send family of methods, e.g. call/apply
4223 case VM_METHOD_TYPE_OPTIMIZED:
4224 switch (cme->def->body.optimized.type) {
4225 case OPTIMIZED_METHOD_TYPE_SEND:
4226 GEN_COUNTER_INC(cb, send_optimized_method_send);
4227 return YJIT_CANT_COMPILE;
4228 case OPTIMIZED_METHOD_TYPE_CALL:
4229 GEN_COUNTER_INC(cb, send_optimized_method_call);
4230 return YJIT_CANT_COMPILE;
4231 case OPTIMIZED_METHOD_TYPE_BLOCK_CALL:
4232 GEN_COUNTER_INC(cb, send_optimized_method_block_call);
4233 return YJIT_CANT_COMPILE;
4234 case OPTIMIZED_METHOD_TYPE_STRUCT_AREF:
4235 return gen_struct_aref(jit, ctx, ci, cme, comptime_recv, comptime_recv_klass);
4236 case OPTIMIZED_METHOD_TYPE_STRUCT_ASET:
4237 return gen_struct_aset(jit, ctx, ci, cme, comptime_recv, comptime_recv_klass);
4238 default:
4239 rb_bug("unknown optimized method type (%d)", cme->def->body.optimized.type);
4240 UNREACHABLE_RETURN(YJIT_CANT_COMPILE);
4242 case VM_METHOD_TYPE_MISSING:
4243 GEN_COUNTER_INC(cb, send_missing_method);
4244 return YJIT_CANT_COMPILE;
4245 case VM_METHOD_TYPE_REFINED:
4246 GEN_COUNTER_INC(cb, send_refined_method);
4247 return YJIT_CANT_COMPILE;
4248 // no default case so compiler issues a warning if this is not exhaustive
4251 // Unreachable
4252 RUBY_ASSERT(false);
4256 static codegen_status_t
4257 gen_opt_send_without_block(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4259 struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
4260 return gen_send_general(jit, ctx, cd, NULL);
4263 static codegen_status_t
4264 gen_send(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4266 struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
4267 rb_iseq_t *block = (rb_iseq_t *)jit_get_arg(jit, 1);
4268 return gen_send_general(jit, ctx, cd, block);
4271 static codegen_status_t
4272 gen_invokesuper(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4274 struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
4275 rb_iseq_t *block = (rb_iseq_t *)jit_get_arg(jit, 1);
4277 // Defer compilation so we can specialize on class of receiver
4278 if (!jit_at_current_insn(jit)) {
4279 defer_compilation(jit, ctx);
4280 return YJIT_END_BLOCK;
4283 const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(jit->ec->cfp);
4284 if (!me) {
4285 return YJIT_CANT_COMPILE;
4288 // FIXME: We should track and invalidate this block when this cme is invalidated
4289 VALUE current_defined_class = me->defined_class;
4290 ID mid = me->def->original_id;
4292 if (me != rb_callable_method_entry(current_defined_class, me->called_id)) {
4293 // Though we likely could generate this call, as we are only concerned
4294 // with the method entry remaining valid, assume_method_lookup_stable
4295 // below requires that the method lookup matches as well
4296 return YJIT_CANT_COMPILE;
4299 // vm_search_normal_superclass
4300 if (BUILTIN_TYPE(current_defined_class) == T_ICLASS && FL_TEST_RAW(RBASIC(current_defined_class)->klass, RMODULE_IS_REFINEMENT)) {
4301 return YJIT_CANT_COMPILE;
4303 VALUE comptime_superclass = RCLASS_SUPER(RCLASS_ORIGIN(current_defined_class));
4305 const struct rb_callinfo *ci = cd->ci;
4306 int32_t argc = (int32_t)vm_ci_argc(ci);
4308 // Don't JIT calls that aren't simple
4309 // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block.
4310 if ((vm_ci_flag(ci) & VM_CALL_ARGS_SPLAT) != 0) {
4311 GEN_COUNTER_INC(cb, send_args_splat);
4312 return YJIT_CANT_COMPILE;
4314 if ((vm_ci_flag(ci) & VM_CALL_KWARG) != 0) {
4315 GEN_COUNTER_INC(cb, send_keywords);
4316 return YJIT_CANT_COMPILE;
4318 if ((vm_ci_flag(ci) & VM_CALL_KW_SPLAT) != 0) {
4319 GEN_COUNTER_INC(cb, send_kw_splat);
4320 return YJIT_CANT_COMPILE;
4322 if ((vm_ci_flag(ci) & VM_CALL_ARGS_BLOCKARG) != 0) {
4323 GEN_COUNTER_INC(cb, send_block_arg);
4324 return YJIT_CANT_COMPILE;
4327 // Ensure we haven't rebound this method onto an incompatible class.
4328 // In the interpreter we try to avoid making this check by performing some
4329 // cheaper calculations first, but since we specialize on the method entry
4330 // and so only have to do this once at compile time this is fine to always
4331 // check and side exit.
4332 VALUE comptime_recv = jit_peek_at_stack(jit, ctx, argc);
4333 if (!rb_obj_is_kind_of(comptime_recv, current_defined_class)) {
4334 return YJIT_CANT_COMPILE;
4337 // Do method lookup
4338 const rb_callable_method_entry_t *cme = rb_callable_method_entry(comptime_superclass, mid);
4340 if (!cme) {
4341 return YJIT_CANT_COMPILE;
4344 // Check that we'll be able to write this method dispatch before generating checks
4345 switch (cme->def->type) {
4346 case VM_METHOD_TYPE_ISEQ:
4347 case VM_METHOD_TYPE_CFUNC:
4348 break;
4349 default:
4350 // others unimplemented
4351 return YJIT_CANT_COMPILE;
4354 // Guard that the receiver has the same class as the one from compile time
4355 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4357 if (jit->ec->cfp->ep[VM_ENV_DATA_INDEX_ME_CREF] != (VALUE)me) {
4358 // This will be the case for super within a block
4359 return YJIT_CANT_COMPILE;
4362 ADD_COMMENT(cb, "guard known me");
4363 mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, ep));
4364 x86opnd_t ep_me_opnd = mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_ME_CREF);
4365 jit_mov_gc_ptr(jit, cb, REG1, (VALUE)me);
4366 cmp(cb, ep_me_opnd, REG1);
4367 jne_ptr(cb, COUNTED_EXIT(jit, side_exit, invokesuper_me_changed));
4369 if (!block) {
4370 // Guard no block passed
4371 // rb_vm_frame_block_handler(GET_EC()->cfp) == VM_BLOCK_HANDLER_NONE
4372 // note, we assume VM_ASSERT(VM_ENV_LOCAL_P(ep))
4374 // TODO: this could properly forward the current block handler, but
4375 // would require changes to gen_send_*
4376 ADD_COMMENT(cb, "guard no block given");
4377 // EP is in REG0 from above
4378 x86opnd_t ep_specval_opnd = mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_SPECVAL);
4379 cmp(cb, ep_specval_opnd, imm_opnd(VM_BLOCK_HANDLER_NONE));
4380 jne_ptr(cb, COUNTED_EXIT(jit, side_exit, invokesuper_block));
4383 // Points to the receiver operand on the stack
4384 x86opnd_t recv = ctx_stack_opnd(ctx, argc);
4385 mov(cb, REG0, recv);
4387 // We need to assume that both our current method entry and the super
4388 // method entry we invoke remain stable
4389 assume_method_lookup_stable(current_defined_class, me, jit);
4390 assume_method_lookup_stable(comptime_superclass, cme, jit);
4392 // Method calls may corrupt types
4393 ctx_clear_local_types(ctx);
4395 switch (cme->def->type) {
4396 case VM_METHOD_TYPE_ISEQ:
4397 return gen_send_iseq(jit, ctx, ci, cme, block, argc);
4398 case VM_METHOD_TYPE_CFUNC:
4399 return gen_send_cfunc(jit, ctx, ci, cme, block, argc, NULL);
4400 default:
4401 break;
4404 RUBY_ASSERT_ALWAYS(false);
4407 static codegen_status_t
4408 gen_leave(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4410 // Only the return value should be on the stack
4411 RUBY_ASSERT(ctx->stack_size == 1);
4413 // Create a side-exit to fall back to the interpreter
4414 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4416 // Load environment pointer EP from CFP
4417 mov(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, ep));
4419 // Check for interrupts
4420 ADD_COMMENT(cb, "check for interrupts");
4421 yjit_check_ints(cb, COUNTED_EXIT(jit, side_exit, leave_se_interrupt));
4423 // Load the return value
4424 mov(cb, REG0, ctx_stack_pop(ctx, 1));
4426 // Pop the current frame (ec->cfp++)
4427 // Note: the return PC is already in the previous CFP
4428 add(cb, REG_CFP, imm_opnd(sizeof(rb_control_frame_t)));
4429 mov(cb, member_opnd(REG_EC, rb_execution_context_t, cfp), REG_CFP);
4431 // Reload REG_SP for the caller and write the return value.
4432 // Top of the stack is REG_SP[0] since the caller has sp_offset=1.
4433 mov(cb, REG_SP, member_opnd(REG_CFP, rb_control_frame_t, sp));
4434 mov(cb, mem_opnd(64, REG_SP, 0), REG0);
4436 // Jump to the JIT return address on the frame that was just popped
4437 const int32_t offset_to_jit_return = -((int32_t)sizeof(rb_control_frame_t)) + (int32_t)offsetof(rb_control_frame_t, jit_return);
4438 jmp_rm(cb, mem_opnd(64, REG_CFP, offset_to_jit_return));
4440 return YJIT_END_BLOCK;
4443 RUBY_EXTERN rb_serial_t ruby_vm_global_constant_state;
4445 static codegen_status_t
4446 gen_getglobal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4448 ID gid = jit_get_arg(jit, 0);
4450 // Save the PC and SP because we might make a Ruby call for warning
4451 jit_prepare_routine_call(jit, ctx, REG0);
4453 mov(cb, C_ARG_REGS[0], imm_opnd(gid));
4455 call_ptr(cb, REG0, (void *)&rb_gvar_get);
4457 x86opnd_t top = ctx_stack_push(ctx, TYPE_UNKNOWN);
4458 mov(cb, top, RAX);
4460 return YJIT_KEEP_COMPILING;
4463 static codegen_status_t
4464 gen_setglobal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4466 ID gid = jit_get_arg(jit, 0);
4468 // Save the PC and SP because we might make a Ruby call for
4469 // Kernel#trace_var
4470 jit_prepare_routine_call(jit, ctx, REG0);
4472 mov(cb, C_ARG_REGS[0], imm_opnd(gid));
4474 x86opnd_t val = ctx_stack_pop(ctx, 1);
4476 mov(cb, C_ARG_REGS[1], val);
4478 call_ptr(cb, REG0, (void *)&rb_gvar_set);
4480 return YJIT_KEEP_COMPILING;
4483 static codegen_status_t
4484 gen_anytostring(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4486 // Might allocate in rb_obj_as_string_result().
4487 jit_prepare_routine_call(jit, ctx, REG0);
4489 x86opnd_t str = ctx_stack_pop(ctx, 1);
4490 x86opnd_t val = ctx_stack_pop(ctx, 1);
4492 mov(cb, C_ARG_REGS[0], str);
4493 mov(cb, C_ARG_REGS[1], val);
4495 call_ptr(cb, REG0, (void *)&rb_obj_as_string_result);
4497 // Push the return value
4498 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
4499 mov(cb, stack_ret, RAX);
4501 return YJIT_KEEP_COMPILING;
4504 static codegen_status_t
4505 gen_objtostring(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4507 if (!jit_at_current_insn(jit)) {
4508 defer_compilation(jit, ctx);
4509 return YJIT_END_BLOCK;
4512 x86opnd_t recv = ctx_stack_opnd(ctx, 0);
4513 VALUE comptime_recv = jit_peek_at_stack(jit, ctx, 0);
4515 if (RB_TYPE_P(comptime_recv, T_STRING)) {
4516 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4518 mov(cb, REG0, recv);
4519 jit_guard_known_klass(jit, ctx, CLASS_OF(comptime_recv), OPND_STACK(0), comptime_recv, SEND_MAX_DEPTH, side_exit);
4520 // No work needed. The string value is already on the top of the stack.
4521 return YJIT_KEEP_COMPILING;
4523 else {
4524 struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
4525 return gen_send_general(jit, ctx, cd, NULL);
4529 static codegen_status_t
4530 gen_toregexp(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4532 rb_num_t opt = jit_get_arg(jit, 0);
4533 rb_num_t cnt = jit_get_arg(jit, 1);
4535 // Save the PC and SP because this allocates an object and could
4536 // raise an exception.
4537 jit_prepare_routine_call(jit, ctx, REG0);
4539 x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(int32_t)(sizeof(VALUE) * (uint32_t)cnt));
4540 ctx_stack_pop(ctx, cnt);
4542 mov(cb, C_ARG_REGS[0], imm_opnd(0));
4543 mov(cb, C_ARG_REGS[1], imm_opnd(cnt));
4544 lea(cb, C_ARG_REGS[2], values_ptr);
4545 call_ptr(cb, REG0, (void *)&rb_ary_tmp_new_from_values);
4547 // Save the array so we can clear it later
4548 push(cb, RAX);
4549 push(cb, RAX); // Alignment
4550 mov(cb, C_ARG_REGS[0], RAX);
4551 mov(cb, C_ARG_REGS[1], imm_opnd(opt));
4552 call_ptr(cb, REG0, (void *)&rb_reg_new_ary);
4554 // The actual regex is in RAX now. Pop the temp array from
4555 // rb_ary_tmp_new_from_values into C arg regs so we can clear it
4556 pop(cb, REG1); // Alignment
4557 pop(cb, C_ARG_REGS[0]);
4559 // The value we want to push on the stack is in RAX right now
4560 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4561 mov(cb, stack_ret, RAX);
4563 // Clear the temp array.
4564 call_ptr(cb, REG0, (void *)&rb_ary_clear);
4566 return YJIT_KEEP_COMPILING;
4569 static codegen_status_t
4570 gen_intern(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4572 // Save the PC and SP because we might allocate
4573 jit_prepare_routine_call(jit, ctx, REG0);
4575 x86opnd_t str = ctx_stack_pop(ctx, 1);
4577 mov(cb, C_ARG_REGS[0], str);
4579 call_ptr(cb, REG0, (void *)&rb_str_intern);
4581 // Push the return value
4582 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4583 mov(cb, stack_ret, RAX);
4585 return YJIT_KEEP_COMPILING;
4588 static codegen_status_t
4589 gen_getspecial(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4591 // This takes two arguments, key and type
4592 // key is only used when type == 0
4593 // A non-zero type determines which type of backref to fetch
4594 //rb_num_t key = jit_get_arg(jit, 0);
4595 rb_num_t type = jit_get_arg(jit, 1);
4597 if (type == 0) {
4598 // not yet implemented
4599 return YJIT_CANT_COMPILE;
4601 else if (type & 0x01) {
4602 // Fetch a "special" backref based on a char encoded by shifting by 1
4604 // Can raise if matchdata uninitialized
4605 jit_prepare_routine_call(jit, ctx, REG0);
4607 // call rb_backref_get()
4608 ADD_COMMENT(cb, "rb_backref_get");
4609 call_ptr(cb, REG0, (void *)rb_backref_get);
4610 mov(cb, C_ARG_REGS[0], RAX);
4612 switch (type >> 1) {
4613 case '&':
4614 ADD_COMMENT(cb, "rb_reg_last_match");
4615 call_ptr(cb, REG0, (void *)rb_reg_last_match);
4616 break;
4617 case '`':
4618 ADD_COMMENT(cb, "rb_reg_match_pre");
4619 call_ptr(cb, REG0, (void *)rb_reg_match_pre);
4620 break;
4621 case '\'':
4622 ADD_COMMENT(cb, "rb_reg_match_post");
4623 call_ptr(cb, REG0, (void *)rb_reg_match_post);
4624 break;
4625 case '+':
4626 ADD_COMMENT(cb, "rb_reg_match_last");
4627 call_ptr(cb, REG0, (void *)rb_reg_match_last);
4628 break;
4629 default:
4630 rb_bug("invalid back-ref");
4633 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4634 mov(cb, stack_ret, RAX);
4636 return YJIT_KEEP_COMPILING;
4638 else {
4639 // Fetch the N-th match from the last backref based on type shifted by 1
4641 // Can raise if matchdata uninitialized
4642 jit_prepare_routine_call(jit, ctx, REG0);
4644 // call rb_backref_get()
4645 ADD_COMMENT(cb, "rb_backref_get");
4646 call_ptr(cb, REG0, (void *)rb_backref_get);
4648 // rb_reg_nth_match((int)(type >> 1), backref);
4649 ADD_COMMENT(cb, "rb_reg_nth_match");
4650 mov(cb, C_ARG_REGS[0], imm_opnd(type >> 1));
4651 mov(cb, C_ARG_REGS[1], RAX);
4652 call_ptr(cb, REG0, (void *)rb_reg_nth_match);
4654 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4655 mov(cb, stack_ret, RAX);
4657 return YJIT_KEEP_COMPILING;
4661 VALUE
4662 rb_vm_getclassvariable(const rb_iseq_t *iseq, const rb_control_frame_t *cfp, ID id, ICVARC ic);
4664 static codegen_status_t
4665 gen_getclassvariable(jitstate_t* jit, ctx_t* ctx, codeblock_t* cb)
4667 // rb_vm_getclassvariable can raise exceptions.
4668 jit_prepare_routine_call(jit, ctx, REG0);
4670 mov(cb, C_ARG_REGS[0], member_opnd(REG_CFP, rb_control_frame_t, iseq));
4671 mov(cb, C_ARG_REGS[1], REG_CFP);
4672 mov(cb, C_ARG_REGS[2], imm_opnd(jit_get_arg(jit, 0)));
4673 mov(cb, C_ARG_REGS[3], imm_opnd(jit_get_arg(jit, 1)));
4675 call_ptr(cb, REG0, (void *)rb_vm_getclassvariable);
4677 x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_UNKNOWN);
4678 mov(cb, stack_top, RAX);
4680 return YJIT_KEEP_COMPILING;
4683 VALUE
4684 rb_vm_setclassvariable(const rb_iseq_t *iseq, const rb_control_frame_t *cfp, ID id, VALUE val, ICVARC ic);
4686 static codegen_status_t
4687 gen_setclassvariable(jitstate_t* jit, ctx_t* ctx, codeblock_t* cb)
4689 // rb_vm_setclassvariable can raise exceptions.
4690 jit_prepare_routine_call(jit, ctx, REG0);
4692 mov(cb, C_ARG_REGS[0], member_opnd(REG_CFP, rb_control_frame_t, iseq));
4693 mov(cb, C_ARG_REGS[1], REG_CFP);
4694 mov(cb, C_ARG_REGS[2], imm_opnd(jit_get_arg(jit, 0)));
4695 mov(cb, C_ARG_REGS[3], ctx_stack_pop(ctx, 1));
4696 mov(cb, C_ARG_REGS[4], imm_opnd(jit_get_arg(jit, 1)));
4698 call_ptr(cb, REG0, (void *)rb_vm_setclassvariable);
4700 return YJIT_KEEP_COMPILING;
4703 static codegen_status_t
4704 gen_opt_getinlinecache(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4706 VALUE jump_offset = jit_get_arg(jit, 0);
4707 VALUE const_cache_as_value = jit_get_arg(jit, 1);
4708 IC ic = (IC)const_cache_as_value;
4710 // See vm_ic_hit_p(). The same conditions are checked in yjit_constant_ic_update().
4711 struct iseq_inline_constant_cache_entry *ice = ic->entry;
4712 if (!ice || // cache not filled
4713 GET_IC_SERIAL(ice) != ruby_vm_global_constant_state /* cache out of date */) {
4714 // In these cases, leave a block that unconditionally side exits
4715 // for the interpreter to invalidate.
4716 return YJIT_CANT_COMPILE;
4719 // Make sure there is an exit for this block as the interpreter might want
4720 // to invalidate this block from yjit_constant_ic_update().
4721 jit_ensure_block_entry_exit(jit);
4723 if (ice->ic_cref) {
4724 // Cache is keyed on a certain lexical scope. Use the interpreter's cache.
4725 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4727 // Call function to verify the cache. It doesn't allocate or call methods.
4728 bool rb_vm_ic_hit_p(IC ic, const VALUE *reg_ep);
4729 mov(cb, C_ARG_REGS[0], const_ptr_opnd((void *)ic));
4730 mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, ep));
4731 call_ptr(cb, REG0, (void *)rb_vm_ic_hit_p);
4733 // Check the result. _Bool is one byte in SysV.
4734 test(cb, AL, AL);
4735 jz_ptr(cb, COUNTED_EXIT(jit, side_exit, opt_getinlinecache_miss));
4737 // Push ic->entry->value
4738 mov(cb, REG0, const_ptr_opnd((void *)ic));
4739 mov(cb, REG0, member_opnd(REG0, struct iseq_inline_constant_cache, entry));
4740 x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_UNKNOWN);
4741 mov(cb, REG0, member_opnd(REG0, struct iseq_inline_constant_cache_entry, value));
4742 mov(cb, stack_top, REG0);
4744 else {
4745 // Optimize for single ractor mode.
4746 // FIXME: This leaks when st_insert raises NoMemoryError
4747 if (!assume_single_ractor_mode(jit)) return YJIT_CANT_COMPILE;
4749 // Invalidate output code on any and all constant writes
4750 // FIXME: This leaks when st_insert raises NoMemoryError
4751 assume_stable_global_constant_state(jit);
4753 jit_putobject(jit, ctx, ice->value);
4756 // Jump over the code for filling the cache
4757 uint32_t jump_idx = jit_next_insn_idx(jit) + (int32_t)jump_offset;
4758 gen_direct_jump(
4759 jit,
4760 ctx,
4761 (blockid_t){ .iseq = jit->iseq, .idx = jump_idx }
4764 return YJIT_END_BLOCK;
4767 // Push the explicit block parameter onto the temporary stack. Part of the
4768 // interpreter's scheme for avoiding Proc allocations when delegating
4769 // explicit block parameters.
4770 static codegen_status_t
4771 gen_getblockparamproxy(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4773 // A mirror of the interpreter code. Checking for the case
4774 // where it's pushing rb_block_param_proxy.
4775 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4777 // EP level
4778 uint32_t level = (uint32_t)jit_get_arg(jit, 1);
4780 // Load environment pointer EP from CFP
4781 gen_get_ep(cb, REG0, level);
4783 // Bail when VM_ENV_FLAGS(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM) is non zero
4784 test(cb, mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_FLAGS), imm_opnd(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM));
4785 jnz_ptr(cb, COUNTED_EXIT(jit, side_exit, gbpp_block_param_modified));
4787 // Load the block handler for the current frame
4788 // note, VM_ASSERT(VM_ENV_LOCAL_P(ep))
4789 mov(cb, REG0, mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_SPECVAL));
4791 // Block handler is a tagged pointer. Look at the tag. 0x03 is from VM_BH_ISEQ_BLOCK_P().
4792 and(cb, REG0_8, imm_opnd(0x3));
4794 // Bail unless VM_BH_ISEQ_BLOCK_P(bh). This also checks for null.
4795 cmp(cb, REG0_8, imm_opnd(0x1));
4796 jnz_ptr(cb, COUNTED_EXIT(jit, side_exit, gbpp_block_handler_not_iseq));
4798 // Push rb_block_param_proxy. It's a root, so no need to use jit_mov_gc_ptr.
4799 mov(cb, REG0, const_ptr_opnd((void *)rb_block_param_proxy));
4800 RUBY_ASSERT(!SPECIAL_CONST_P(rb_block_param_proxy));
4801 x86opnd_t top = ctx_stack_push(ctx, TYPE_HEAP);
4802 mov(cb, top, REG0);
4804 return YJIT_KEEP_COMPILING;
4807 static codegen_status_t
4808 gen_invokebuiltin(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4810 const struct rb_builtin_function *bf = (struct rb_builtin_function *)jit_get_arg(jit, 0);
4812 // ec, self, and arguments
4813 if (bf->argc + 2 > NUM_C_ARG_REGS) {
4814 return YJIT_CANT_COMPILE;
4817 // If the calls don't allocate, do they need up to date PC, SP?
4818 jit_prepare_routine_call(jit, ctx, REG0);
4820 // Call the builtin func (ec, recv, arg1, arg2, ...)
4821 mov(cb, C_ARG_REGS[0], REG_EC);
4822 mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, self));
4824 // Copy arguments from locals
4825 for (int32_t i = 0; i < bf->argc; i++) {
4826 x86opnd_t stack_opnd = ctx_stack_opnd(ctx, bf->argc - i - 1);
4827 x86opnd_t c_arg_reg = C_ARG_REGS[2 + i];
4828 mov(cb, c_arg_reg, stack_opnd);
4831 call_ptr(cb, REG0, (void *)bf->func_ptr);
4833 // Push the return value
4834 ctx_stack_pop(ctx, bf->argc);
4835 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4836 mov(cb, stack_ret, RAX);
4838 return YJIT_KEEP_COMPILING;
4841 // opt_invokebuiltin_delegate calls a builtin function, like
4842 // invokebuiltin does, but instead of taking arguments from the top of the
4843 // stack uses the argument locals (and self) from the current method.
4844 static codegen_status_t
4845 gen_opt_invokebuiltin_delegate(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4847 const struct rb_builtin_function *bf = (struct rb_builtin_function *)jit_get_arg(jit, 0);
4848 int32_t start_index = (int32_t)jit_get_arg(jit, 1);
4850 // ec, self, and arguments
4851 if (bf->argc + 2 > NUM_C_ARG_REGS) {
4852 return YJIT_CANT_COMPILE;
4855 // If the calls don't allocate, do they need up to date PC, SP?
4856 jit_prepare_routine_call(jit, ctx, REG0);
4858 if (bf->argc > 0) {
4859 // Load environment pointer EP from CFP
4860 mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, ep));
4863 // Call the builtin func (ec, recv, arg1, arg2, ...)
4864 mov(cb, C_ARG_REGS[0], REG_EC);
4865 mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, self));
4867 // Copy arguments from locals
4868 for (int32_t i = 0; i < bf->argc; i++) {
4869 const int32_t offs = start_index + i - jit->iseq->body->local_table_size - VM_ENV_DATA_SIZE + 1;
4870 x86opnd_t local_opnd = mem_opnd(64, REG0, offs * SIZEOF_VALUE);
4871 x86opnd_t c_arg_reg = C_ARG_REGS[i + 2];
4872 mov(cb, c_arg_reg, local_opnd);
4874 call_ptr(cb, REG0, (void *)bf->func_ptr);
4876 // Push the return value
4877 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4878 mov(cb, stack_ret, RAX);
4880 return YJIT_KEEP_COMPILING;
4883 static int tracing_invalidate_all_i(void *vstart, void *vend, size_t stride, void *data);
4884 static void invalidate_all_blocks_for_tracing(const rb_iseq_t *iseq);
4886 // Invalidate all generated code and patch C method return code to contain
4887 // logic for firing the c_return TracePoint event. Once rb_vm_barrier()
4888 // returns, all other ractors are pausing inside RB_VM_LOCK_ENTER(), which
4889 // means they are inside a C routine. If there are any generated code on-stack,
4890 // they are waiting for a return from a C routine. For every routine call, we
4891 // patch in an exit after the body of the containing VM instruction. This makes
4892 // it so all the invalidated code exit as soon as execution logically reaches
4893 // the next VM instruction. The interpreter takes care of firing the tracing
4894 // event if it so happens that the next VM instruction has one attached.
4896 // The c_return event needs special handling as our codegen never outputs code
4897 // that contains tracing logic. If we let the normal output code run until the
4898 // start of the next VM instruction by relying on the patching scheme above, we
4899 // would fail to fire the c_return event. The interpreter doesn't fire the
4900 // event at an instruction boundary, so simply exiting to the interpreter isn't
4901 // enough. To handle it, we patch in the full logic at the return address. See
4902 // full_cfunc_return().
4904 // In addition to patching, we prevent future entries into invalidated code by
4905 // removing all live blocks from their iseq.
4906 void
4907 rb_yjit_tracing_invalidate_all(void)
4909 if (!rb_yjit_enabled_p()) return;
4911 // Stop other ractors since we are going to patch machine code.
4912 RB_VM_LOCK_ENTER();
4913 rb_vm_barrier();
4915 // Make it so all live block versions are no longer valid branch targets
4916 rb_objspace_each_objects(tracing_invalidate_all_i, NULL);
4918 // Apply patches
4919 const uint32_t old_pos = cb->write_pos;
4920 rb_darray_for(global_inval_patches, patch_idx) {
4921 struct codepage_patch patch = rb_darray_get(global_inval_patches, patch_idx);
4922 cb_set_pos(cb, patch.inline_patch_pos);
4923 uint8_t *jump_target = cb_get_ptr(ocb, patch.outlined_target_pos);
4924 jmp_ptr(cb, jump_target);
4926 cb_set_pos(cb, old_pos);
4928 // Freeze invalidated part of the codepage. We only want to wait for
4929 // running instances of the code to exit from now on, so we shouldn't
4930 // change the code. There could be other ractors sleeping in
4931 // branch_stub_hit(), for example. We could harden this by changing memory
4932 // protection on the frozen range.
4933 RUBY_ASSERT_ALWAYS(yjit_codepage_frozen_bytes <= old_pos && "frozen bytes should increase monotonically");
4934 yjit_codepage_frozen_bytes = old_pos;
4936 cb_mark_all_executable(ocb);
4937 cb_mark_all_executable(cb);
4938 RB_VM_LOCK_LEAVE();
4941 static int
4942 tracing_invalidate_all_i(void *vstart, void *vend, size_t stride, void *data)
4944 VALUE v = (VALUE)vstart;
4945 for (; v != (VALUE)vend; v += stride) {
4946 void *ptr = asan_poisoned_object_p(v);
4947 asan_unpoison_object(v, false);
4949 if (rb_obj_is_iseq(v)) {
4950 rb_iseq_t *iseq = (rb_iseq_t *)v;
4951 invalidate_all_blocks_for_tracing(iseq);
4954 asan_poison_object_if(ptr, v);
4956 return 0;
4959 static void
4960 invalidate_all_blocks_for_tracing(const rb_iseq_t *iseq)
4962 struct rb_iseq_constant_body *body = iseq->body;
4963 if (!body) return; // iseq yet to be initialized
4965 ASSERT_vm_locking();
4967 // Empty all blocks on the iseq so we don't compile new blocks that jump to the
4968 // invalidted region.
4969 // TODO Leaking the blocks for now since we might have situations where
4970 // a different ractor is waiting in branch_stub_hit(). If we free the block
4971 // that ractor can wake up with a dangling block.
4972 rb_darray_for(body->yjit_blocks, version_array_idx) {
4973 rb_yjit_block_array_t version_array = rb_darray_get(body->yjit_blocks, version_array_idx);
4974 rb_darray_for(version_array, version_idx) {
4975 // Stop listening for invalidation events like basic operation redefinition.
4976 block_t *block = rb_darray_get(version_array, version_idx);
4977 yjit_unlink_method_lookup_dependency(block);
4978 yjit_block_assumptions_free(block);
4980 rb_darray_free(version_array);
4982 rb_darray_free(body->yjit_blocks);
4983 body->yjit_blocks = NULL;
4985 #if USE_MJIT
4986 // Reset output code entry point
4987 body->jit_func = NULL;
4988 #endif
4991 static void
4992 yjit_reg_op(int opcode, codegen_fn gen_fn)
4994 RUBY_ASSERT(opcode >= 0 && opcode < VM_INSTRUCTION_SIZE);
4995 // Check that the op wasn't previously registered
4996 RUBY_ASSERT(gen_fns[opcode] == NULL);
4998 gen_fns[opcode] = gen_fn;
5001 void
5002 yjit_init_codegen(void)
5004 // Initialize the code blocks
5005 uint32_t mem_size = rb_yjit_opts.exec_mem_size * 1024 * 1024;
5006 uint8_t *mem_block = alloc_exec_mem(mem_size);
5008 cb = &block;
5009 cb_init(cb, mem_block, mem_size/2);
5011 ocb = &outline_block;
5012 cb_init(ocb, mem_block + mem_size/2, mem_size/2);
5014 // Generate the interpreter exit code for leave
5015 leave_exit_code = yjit_gen_leave_exit(cb);
5017 // Generate full exit code for C func
5018 gen_full_cfunc_return();
5019 cb_mark_all_executable(cb);
5021 // Map YARV opcodes to the corresponding codegen functions
5022 yjit_reg_op(BIN(nop), gen_nop);
5023 yjit_reg_op(BIN(dup), gen_dup);
5024 yjit_reg_op(BIN(dupn), gen_dupn);
5025 yjit_reg_op(BIN(swap), gen_swap);
5026 yjit_reg_op(BIN(setn), gen_setn);
5027 yjit_reg_op(BIN(topn), gen_topn);
5028 yjit_reg_op(BIN(pop), gen_pop);
5029 yjit_reg_op(BIN(adjuststack), gen_adjuststack);
5030 yjit_reg_op(BIN(newarray), gen_newarray);
5031 yjit_reg_op(BIN(duparray), gen_duparray);
5032 yjit_reg_op(BIN(duphash), gen_duphash);
5033 yjit_reg_op(BIN(splatarray), gen_splatarray);
5034 yjit_reg_op(BIN(expandarray), gen_expandarray);
5035 yjit_reg_op(BIN(newhash), gen_newhash);
5036 yjit_reg_op(BIN(newrange), gen_newrange);
5037 yjit_reg_op(BIN(concatstrings), gen_concatstrings);
5038 yjit_reg_op(BIN(putnil), gen_putnil);
5039 yjit_reg_op(BIN(putobject), gen_putobject);
5040 yjit_reg_op(BIN(putstring), gen_putstring);
5041 yjit_reg_op(BIN(putobject_INT2FIX_0_), gen_putobject_int2fix);
5042 yjit_reg_op(BIN(putobject_INT2FIX_1_), gen_putobject_int2fix);
5043 yjit_reg_op(BIN(putself), gen_putself);
5044 yjit_reg_op(BIN(putspecialobject), gen_putspecialobject);
5045 yjit_reg_op(BIN(getlocal), gen_getlocal);
5046 yjit_reg_op(BIN(getlocal_WC_0), gen_getlocal_wc0);
5047 yjit_reg_op(BIN(getlocal_WC_1), gen_getlocal_wc1);
5048 yjit_reg_op(BIN(setlocal), gen_setlocal);
5049 yjit_reg_op(BIN(setlocal_WC_0), gen_setlocal_wc0);
5050 yjit_reg_op(BIN(setlocal_WC_1), gen_setlocal_wc1);
5051 yjit_reg_op(BIN(getinstancevariable), gen_getinstancevariable);
5052 yjit_reg_op(BIN(setinstancevariable), gen_setinstancevariable);
5053 yjit_reg_op(BIN(defined), gen_defined);
5054 yjit_reg_op(BIN(checktype), gen_checktype);
5055 yjit_reg_op(BIN(checkkeyword), gen_checkkeyword);
5056 yjit_reg_op(BIN(opt_lt), gen_opt_lt);
5057 yjit_reg_op(BIN(opt_le), gen_opt_le);
5058 yjit_reg_op(BIN(opt_ge), gen_opt_ge);
5059 yjit_reg_op(BIN(opt_gt), gen_opt_gt);
5060 yjit_reg_op(BIN(opt_eq), gen_opt_eq);
5061 yjit_reg_op(BIN(opt_neq), gen_opt_neq);
5062 yjit_reg_op(BIN(opt_aref), gen_opt_aref);
5063 yjit_reg_op(BIN(opt_aset), gen_opt_aset);
5064 yjit_reg_op(BIN(opt_and), gen_opt_and);
5065 yjit_reg_op(BIN(opt_or), gen_opt_or);
5066 yjit_reg_op(BIN(opt_minus), gen_opt_minus);
5067 yjit_reg_op(BIN(opt_plus), gen_opt_plus);
5068 yjit_reg_op(BIN(opt_mult), gen_opt_mult);
5069 yjit_reg_op(BIN(opt_div), gen_opt_div);
5070 yjit_reg_op(BIN(opt_mod), gen_opt_mod);
5071 yjit_reg_op(BIN(opt_ltlt), gen_opt_ltlt);
5072 yjit_reg_op(BIN(opt_nil_p), gen_opt_nil_p);
5073 yjit_reg_op(BIN(opt_empty_p), gen_opt_empty_p);
5074 yjit_reg_op(BIN(opt_str_freeze), gen_opt_str_freeze);
5075 yjit_reg_op(BIN(opt_str_uminus), gen_opt_str_uminus);
5076 yjit_reg_op(BIN(opt_not), gen_opt_not);
5077 yjit_reg_op(BIN(opt_size), gen_opt_size);
5078 yjit_reg_op(BIN(opt_length), gen_opt_length);
5079 yjit_reg_op(BIN(opt_regexpmatch2), gen_opt_regexpmatch2);
5080 yjit_reg_op(BIN(opt_getinlinecache), gen_opt_getinlinecache);
5081 yjit_reg_op(BIN(invokebuiltin), gen_invokebuiltin);
5082 yjit_reg_op(BIN(opt_invokebuiltin_delegate), gen_opt_invokebuiltin_delegate);
5083 yjit_reg_op(BIN(opt_invokebuiltin_delegate_leave), gen_opt_invokebuiltin_delegate);
5084 yjit_reg_op(BIN(opt_case_dispatch), gen_opt_case_dispatch);
5085 yjit_reg_op(BIN(branchif), gen_branchif);
5086 yjit_reg_op(BIN(branchunless), gen_branchunless);
5087 yjit_reg_op(BIN(branchnil), gen_branchnil);
5088 yjit_reg_op(BIN(jump), gen_jump);
5089 yjit_reg_op(BIN(getblockparamproxy), gen_getblockparamproxy);
5090 yjit_reg_op(BIN(opt_send_without_block), gen_opt_send_without_block);
5091 yjit_reg_op(BIN(send), gen_send);
5092 yjit_reg_op(BIN(invokesuper), gen_invokesuper);
5093 yjit_reg_op(BIN(leave), gen_leave);
5094 yjit_reg_op(BIN(getglobal), gen_getglobal);
5095 yjit_reg_op(BIN(setglobal), gen_setglobal);
5096 yjit_reg_op(BIN(anytostring), gen_anytostring);
5097 yjit_reg_op(BIN(objtostring), gen_objtostring);
5098 yjit_reg_op(BIN(toregexp), gen_toregexp);
5099 yjit_reg_op(BIN(intern), gen_intern);
5100 yjit_reg_op(BIN(getspecial), gen_getspecial);
5101 yjit_reg_op(BIN(getclassvariable), gen_getclassvariable);
5102 yjit_reg_op(BIN(setclassvariable), gen_setclassvariable);
5104 yjit_method_codegen_table = st_init_numtable();
5106 // Specialization for C methods. See yjit_reg_method() for details.
5107 yjit_reg_method(rb_cBasicObject, "!", jit_rb_obj_not);
5109 yjit_reg_method(rb_cNilClass, "nil?", jit_rb_true);
5110 yjit_reg_method(rb_mKernel, "nil?", jit_rb_false);
5112 yjit_reg_method(rb_cBasicObject, "==", jit_rb_obj_equal);
5113 yjit_reg_method(rb_cBasicObject, "equal?", jit_rb_obj_equal);
5114 yjit_reg_method(rb_mKernel, "eql?", jit_rb_obj_equal);
5115 yjit_reg_method(rb_cModule, "==", jit_rb_obj_equal);
5116 yjit_reg_method(rb_cSymbol, "==", jit_rb_obj_equal);
5117 yjit_reg_method(rb_cSymbol, "===", jit_rb_obj_equal);
5119 // rb_str_to_s() methods in string.c
5120 yjit_reg_method(rb_cString, "to_s", jit_rb_str_to_s);
5121 yjit_reg_method(rb_cString, "to_str", jit_rb_str_to_s);
5122 yjit_reg_method(rb_cString, "bytesize", jit_rb_str_bytesize);
5124 // Thread.current
5125 yjit_reg_method(rb_singleton_class(rb_cThread), "current", jit_thread_s_current);