[ruby/win32ole] Undefine allocator of WIN32OLE_VARIABLE to get rid of warning
[ruby-80x24.org.git] / yjit_codegen.c
blob8a547d2ef6072dbcc6eb6672dac1f274146d3b62
1 // This file is a fragment of the yjit.o compilation unit. See yjit.c.
2 #include "internal.h"
3 #include "gc.h"
4 #include "internal/compile.h"
5 #include "internal/class.h"
6 #include "internal/hash.h"
7 #include "internal/object.h"
8 #include "internal/sanitizers.h"
9 #include "internal/string.h"
10 #include "internal/struct.h"
11 #include "internal/variable.h"
12 #include "internal/re.h"
13 #include "probes.h"
14 #include "probes_helper.h"
15 #include "yjit.h"
16 #include "yjit_iface.h"
17 #include "yjit_core.h"
18 #include "yjit_codegen.h"
19 #include "yjit_asm.h"
21 // Map from YARV opcodes to code generation functions
22 static codegen_fn gen_fns[VM_INSTRUCTION_SIZE] = { NULL };
24 // Map from method entries to code generation functions
25 static st_table *yjit_method_codegen_table = NULL;
27 // Code for exiting back to the interpreter from the leave instruction
28 static void *leave_exit_code;
30 // Code for full logic of returning from C method and exiting to the interpreter
31 static uint32_t outline_full_cfunc_return_pos;
33 // For implementing global code invalidation
34 struct codepage_patch {
35 uint32_t inline_patch_pos;
36 uint32_t outlined_target_pos;
39 typedef rb_darray(struct codepage_patch) patch_array_t;
41 static patch_array_t global_inval_patches = NULL;
43 // Print the current source location for debugging purposes
44 RBIMPL_ATTR_MAYBE_UNUSED()
45 static void
46 jit_print_loc(jitstate_t *jit, const char *msg)
48 char *ptr;
49 long len;
50 VALUE path = rb_iseq_path(jit->iseq);
51 RSTRING_GETMEM(path, ptr, len);
52 fprintf(stderr, "%s %.*s:%u\n", msg, (int)len, ptr, rb_iseq_line_no(jit->iseq, jit->insn_idx));
55 // dump an object for debugging purposes
56 RBIMPL_ATTR_MAYBE_UNUSED()
57 static void
58 jit_obj_info_dump(codeblock_t *cb, x86opnd_t opnd) {
59 push_regs(cb);
60 mov(cb, C_ARG_REGS[0], opnd);
61 call_ptr(cb, REG0, (void *)rb_obj_info_dump);
62 pop_regs(cb);
65 // Get the current instruction's opcode
66 static int
67 jit_get_opcode(jitstate_t *jit)
69 return jit->opcode;
72 // Get the index of the next instruction
73 static uint32_t
74 jit_next_insn_idx(jitstate_t *jit)
76 return jit->insn_idx + insn_len(jit_get_opcode(jit));
79 // Get an instruction argument by index
80 static VALUE
81 jit_get_arg(jitstate_t *jit, size_t arg_idx)
83 RUBY_ASSERT(arg_idx + 1 < (size_t)insn_len(jit_get_opcode(jit)));
84 return *(jit->pc + arg_idx + 1);
87 // Load a VALUE into a register and keep track of the reference if it is on the GC heap.
88 static void
89 jit_mov_gc_ptr(jitstate_t *jit, codeblock_t *cb, x86opnd_t reg, VALUE ptr)
91 RUBY_ASSERT(reg.type == OPND_REG && reg.num_bits == 64);
93 // Load the pointer constant into the specified register
94 mov(cb, reg, const_ptr_opnd((void*)ptr));
96 // The pointer immediate is encoded as the last part of the mov written out
97 uint32_t ptr_offset = cb->write_pos - sizeof(VALUE);
99 if (!SPECIAL_CONST_P(ptr)) {
100 if (!rb_darray_append(&jit->block->gc_object_offsets, ptr_offset)) {
101 rb_bug("allocation failed");
106 // Check if we are compiling the instruction at the stub PC
107 // Meaning we are compiling the instruction that is next to execute
108 static bool
109 jit_at_current_insn(jitstate_t *jit)
111 const VALUE *ec_pc = jit->ec->cfp->pc;
112 return (ec_pc == jit->pc);
115 // Peek at the nth topmost value on the Ruby stack.
116 // Returns the topmost value when n == 0.
117 static VALUE
118 jit_peek_at_stack(jitstate_t *jit, ctx_t *ctx, int n)
120 RUBY_ASSERT(jit_at_current_insn(jit));
122 // Note: this does not account for ctx->sp_offset because
123 // this is only available when hitting a stub, and while
124 // hitting a stub, cfp->sp needs to be up to date in case
125 // codegen functions trigger GC. See :stub-sp-flush:.
126 VALUE *sp = jit->ec->cfp->sp;
128 return *(sp - 1 - n);
131 static VALUE
132 jit_peek_at_self(jitstate_t *jit, ctx_t *ctx)
134 return jit->ec->cfp->self;
137 RBIMPL_ATTR_MAYBE_UNUSED()
138 static VALUE
139 jit_peek_at_local(jitstate_t *jit, ctx_t *ctx, int n)
141 RUBY_ASSERT(jit_at_current_insn(jit));
143 int32_t local_table_size = jit->iseq->body->local_table_size;
144 RUBY_ASSERT(n < (int)jit->iseq->body->local_table_size);
146 const VALUE *ep = jit->ec->cfp->ep;
147 return ep[-VM_ENV_DATA_SIZE - local_table_size + n + 1];
150 // Save the incremented PC on the CFP
151 // This is necessary when calleees can raise or allocate
152 static void
153 jit_save_pc(jitstate_t *jit, x86opnd_t scratch_reg)
155 codeblock_t *cb = jit->cb;
156 mov(cb, scratch_reg, const_ptr_opnd(jit->pc + insn_len(jit->opcode)));
157 mov(cb, mem_opnd(64, REG_CFP, offsetof(rb_control_frame_t, pc)), scratch_reg);
160 // Save the current SP on the CFP
161 // This realigns the interpreter SP with the JIT SP
162 // Note: this will change the current value of REG_SP,
163 // which could invalidate memory operands
164 static void
165 jit_save_sp(jitstate_t *jit, ctx_t *ctx)
167 if (ctx->sp_offset != 0) {
168 x86opnd_t stack_pointer = ctx_sp_opnd(ctx, 0);
169 codeblock_t *cb = jit->cb;
170 lea(cb, REG_SP, stack_pointer);
171 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG_SP);
172 ctx->sp_offset = 0;
176 // jit_save_pc() + jit_save_sp(). Should be used before calling a routine that
177 // could:
178 // - Perform GC allocation
179 // - Take the VM lock through RB_VM_LOCK_ENTER()
180 // - Perform Ruby method call
181 static void
182 jit_prepare_routine_call(jitstate_t *jit, ctx_t *ctx, x86opnd_t scratch_reg)
184 jit->record_boundary_patch_point = true;
185 jit_save_pc(jit, scratch_reg);
186 jit_save_sp(jit, ctx);
189 // Record the current codeblock write position for rewriting into a jump into
190 // the outlined block later. Used to implement global code invalidation.
191 static void
192 record_global_inval_patch(const codeblock_t *cb, uint32_t outline_block_target_pos)
194 struct codepage_patch patch_point = { cb->write_pos, outline_block_target_pos };
195 if (!rb_darray_append(&global_inval_patches, patch_point)) rb_bug("allocation failed");
198 static bool jit_guard_known_klass(jitstate_t *jit, ctx_t *ctx, VALUE known_klass, insn_opnd_t insn_opnd, VALUE sample_instance, const int max_chain_depth, uint8_t *side_exit);
200 #if YJIT_STATS
202 // Add a comment at the current position in the code block
203 static void
204 _add_comment(codeblock_t *cb, const char *comment_str)
206 // We can't add comments to the outlined code block
207 if (cb == ocb)
208 return;
210 // Avoid adding duplicate comment strings (can happen due to deferred codegen)
211 size_t num_comments = rb_darray_size(yjit_code_comments);
212 if (num_comments > 0) {
213 struct yjit_comment last_comment = rb_darray_get(yjit_code_comments, num_comments - 1);
214 if (last_comment.offset == cb->write_pos && strcmp(last_comment.comment, comment_str) == 0) {
215 return;
219 struct yjit_comment new_comment = (struct yjit_comment){ cb->write_pos, comment_str };
220 rb_darray_append(&yjit_code_comments, new_comment);
223 // Comments for generated machine code
224 #define ADD_COMMENT(cb, comment) _add_comment((cb), (comment))
226 // Verify the ctx's types and mappings against the compile-time stack, self,
227 // and locals.
228 static void
229 verify_ctx(jitstate_t *jit, ctx_t *ctx)
231 // Only able to check types when at current insn
232 RUBY_ASSERT(jit_at_current_insn(jit));
234 VALUE self_val = jit_peek_at_self(jit, ctx);
235 if (type_diff(yjit_type_of_value(self_val), ctx->self_type) == INT_MAX) {
236 rb_bug("verify_ctx: ctx type (%s) incompatible with actual value of self: %s", yjit_type_name(ctx->self_type), rb_obj_info(self_val));
239 for (int i = 0; i < ctx->stack_size && i < MAX_TEMP_TYPES; i++) {
240 temp_type_mapping_t learned = ctx_get_opnd_mapping(ctx, OPND_STACK(i));
241 VALUE val = jit_peek_at_stack(jit, ctx, i);
242 val_type_t detected = yjit_type_of_value(val);
244 if (learned.mapping.kind == TEMP_SELF) {
245 if (self_val != val) {
246 rb_bug("verify_ctx: stack value was mapped to self, but values did not match\n"
247 " stack: %s\n"
248 " self: %s",
249 rb_obj_info(val),
250 rb_obj_info(self_val));
254 if (learned.mapping.kind == TEMP_LOCAL) {
255 int local_idx = learned.mapping.idx;
256 VALUE local_val = jit_peek_at_local(jit, ctx, local_idx);
257 if (local_val != val) {
258 rb_bug("verify_ctx: stack value was mapped to local, but values did not match\n"
259 " stack: %s\n"
260 " local %i: %s",
261 rb_obj_info(val),
262 local_idx,
263 rb_obj_info(local_val));
267 if (type_diff(detected, learned.type) == INT_MAX) {
268 rb_bug("verify_ctx: ctx type (%s) incompatible with actual value on stack: %s", yjit_type_name(learned.type), rb_obj_info(val));
272 int32_t local_table_size = jit->iseq->body->local_table_size;
273 for (int i = 0; i < local_table_size && i < MAX_TEMP_TYPES; i++) {
274 val_type_t learned = ctx->local_types[i];
275 VALUE val = jit_peek_at_local(jit, ctx, i);
276 val_type_t detected = yjit_type_of_value(val);
278 if (type_diff(detected, learned) == INT_MAX) {
279 rb_bug("verify_ctx: ctx type (%s) incompatible with actual value of local: %s", yjit_type_name(learned), rb_obj_info(val));
284 #else
286 #define ADD_COMMENT(cb, comment) ((void)0)
287 #define verify_ctx(jit, ctx) ((void)0)
289 #endif // if YJIT_STATS
291 #if YJIT_STATS
293 // Increment a profiling counter with counter_name
294 #define GEN_COUNTER_INC(cb, counter_name) _gen_counter_inc(cb, &(yjit_runtime_counters . counter_name))
295 static void
296 _gen_counter_inc(codeblock_t *cb, int64_t *counter)
298 if (!rb_yjit_opts.gen_stats) return;
300 // Use REG1 because there might be return value in REG0
301 mov(cb, REG1, const_ptr_opnd(counter));
302 cb_write_lock_prefix(cb); // for ractors.
303 add(cb, mem_opnd(64, REG1, 0), imm_opnd(1));
306 // Increment a counter then take an existing side exit.
307 #define COUNTED_EXIT(jit, side_exit, counter_name) _counted_side_exit(jit, side_exit, &(yjit_runtime_counters . counter_name))
308 static uint8_t *
309 _counted_side_exit(jitstate_t* jit, uint8_t *existing_side_exit, int64_t *counter)
311 if (!rb_yjit_opts.gen_stats) return existing_side_exit;
313 uint8_t *start = cb_get_ptr(jit->ocb, jit->ocb->write_pos);
314 _gen_counter_inc(jit->ocb, counter);
315 jmp_ptr(jit->ocb, existing_side_exit);
316 return start;
319 #else
321 #define GEN_COUNTER_INC(cb, counter_name) ((void)0)
322 #define COUNTED_EXIT(jit, side_exit, counter_name) side_exit
324 #endif // if YJIT_STATS
326 // Generate an exit to return to the interpreter
327 static uint32_t
328 yjit_gen_exit(VALUE *exit_pc, ctx_t *ctx, codeblock_t *cb)
330 const uint32_t code_pos = cb->write_pos;
332 ADD_COMMENT(cb, "exit to interpreter");
334 // Generate the code to exit to the interpreters
335 // Write the adjusted SP back into the CFP
336 if (ctx->sp_offset != 0) {
337 x86opnd_t stack_pointer = ctx_sp_opnd(ctx, 0);
338 lea(cb, REG_SP, stack_pointer);
339 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG_SP);
342 // Update CFP->PC
343 mov(cb, RAX, const_ptr_opnd(exit_pc));
344 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, pc), RAX);
346 // Accumulate stats about interpreter exits
347 #if YJIT_STATS
348 if (rb_yjit_opts.gen_stats) {
349 mov(cb, RDI, const_ptr_opnd(exit_pc));
350 call_ptr(cb, RSI, (void *)&yjit_count_side_exit_op);
352 #endif
354 pop(cb, REG_SP);
355 pop(cb, REG_EC);
356 pop(cb, REG_CFP);
358 mov(cb, RAX, imm_opnd(Qundef));
359 ret(cb);
361 return code_pos;
364 // Generate a continuation for gen_leave() that exits to the interpreter at REG_CFP->pc.
365 static uint8_t *
366 yjit_gen_leave_exit(codeblock_t *cb)
368 uint8_t *code_ptr = cb_get_ptr(cb, cb->write_pos);
370 // Note, gen_leave() fully reconstructs interpreter state and leaves the
371 // return value in RAX before coming here.
373 // Every exit to the interpreter should be counted
374 GEN_COUNTER_INC(cb, leave_interp_return);
376 pop(cb, REG_SP);
377 pop(cb, REG_EC);
378 pop(cb, REG_CFP);
380 ret(cb);
382 return code_ptr;
385 // Fill code_for_exit_from_stub. This is used by branch_stub_hit() to exit
386 // to the interpreter when it cannot service a stub by generating new code.
387 // Before coming here, branch_stub_hit() takes care of fully reconstructing
388 // interpreter state.
389 static void
390 gen_code_for_exit_from_stub(void)
392 codeblock_t *cb = ocb;
393 code_for_exit_from_stub = cb_get_ptr(cb, cb->write_pos);
395 GEN_COUNTER_INC(cb, exit_from_branch_stub);
397 pop(cb, REG_SP);
398 pop(cb, REG_EC);
399 pop(cb, REG_CFP);
401 mov(cb, RAX, imm_opnd(Qundef));
402 ret(cb);
405 // :side-exit:
406 // Get an exit for the current instruction in the outlined block. The code
407 // for each instruction often begins with several guards before proceeding
408 // to do work. When guards fail, an option we have is to exit to the
409 // interpreter at an instruction boundary. The piece of code that takes
410 // care of reconstructing interpreter state and exiting out of generated
411 // code is called the side exit.
413 // No guards change the logic for reconstructing interpreter state at the
414 // moment, so there is one unique side exit for each context. Note that
415 // it's incorrect to jump to the side exit after any ctx stack push/pop operations
416 // since they change the logic required for reconstructing interpreter state.
417 static uint8_t *
418 yjit_side_exit(jitstate_t *jit, ctx_t *ctx)
420 if (!jit->side_exit_for_pc) {
421 codeblock_t *ocb = jit->ocb;
422 uint32_t pos = yjit_gen_exit(jit->pc, ctx, ocb);
423 jit->side_exit_for_pc = cb_get_ptr(ocb, pos);
426 return jit->side_exit_for_pc;
429 // Ensure that there is an exit for the start of the block being compiled.
430 // Block invalidation uses this exit.
431 static void
432 jit_ensure_block_entry_exit(jitstate_t *jit)
434 block_t *block = jit->block;
435 if (block->entry_exit) return;
437 if (jit->insn_idx == block->blockid.idx) {
438 // We are compiling the first instruction in the block.
439 // Generate the exit with the cache in jitstate.
440 block->entry_exit = yjit_side_exit(jit, &block->ctx);
442 else {
443 VALUE *pc = yjit_iseq_pc_at_idx(block->blockid.iseq, block->blockid.idx);
444 uint32_t pos = yjit_gen_exit(pc, &block->ctx, ocb);
445 block->entry_exit = cb_get_ptr(ocb, pos);
449 // Generate a runtime guard that ensures the PC is at the start of the iseq,
450 // otherwise take a side exit. This is to handle the situation of optional
451 // parameters. When a function with optional parameters is called, the entry
452 // PC for the method isn't necessarily 0, but we always generated code that
453 // assumes the entry point is 0.
454 static void
455 yjit_pc_guard(codeblock_t *cb, const rb_iseq_t *iseq)
457 RUBY_ASSERT(cb != NULL);
459 mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, pc));
460 mov(cb, REG1, const_ptr_opnd(iseq->body->iseq_encoded));
461 xor(cb, REG0, REG1);
463 // xor should impact ZF, so we can jz here
464 uint32_t pc_is_zero = cb_new_label(cb, "pc_is_zero");
465 jz_label(cb, pc_is_zero);
467 // We're not starting at the first PC, so we need to exit.
468 GEN_COUNTER_INC(cb, leave_start_pc_non_zero);
470 pop(cb, REG_SP);
471 pop(cb, REG_EC);
472 pop(cb, REG_CFP);
474 mov(cb, RAX, imm_opnd(Qundef));
475 ret(cb);
477 // PC should be at the beginning
478 cb_write_label(cb, pc_is_zero);
479 cb_link_labels(cb);
482 // The code we generate in gen_send_cfunc() doesn't fire the c_return TracePoint event
483 // like the interpreter. When tracing for c_return is enabled, we patch the code after
484 // the C method return to call into this to fire the event.
485 static void
486 full_cfunc_return(rb_execution_context_t *ec, VALUE return_value)
488 rb_control_frame_t *cfp = ec->cfp;
489 RUBY_ASSERT_ALWAYS(cfp == GET_EC()->cfp);
490 const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(cfp);
492 RUBY_ASSERT_ALWAYS(RUBYVM_CFUNC_FRAME_P(cfp));
493 RUBY_ASSERT_ALWAYS(me->def->type == VM_METHOD_TYPE_CFUNC);
495 // CHECK_CFP_CONSISTENCY("full_cfunc_return"); TODO revive this
497 // Pop the C func's frame and fire the c_return TracePoint event
498 // Note that this is the same order as vm_call_cfunc_with_frame().
499 rb_vm_pop_frame(ec);
500 EXEC_EVENT_HOOK(ec, RUBY_EVENT_C_RETURN, cfp->self, me->def->original_id, me->called_id, me->owner, return_value);
501 // Note, this deviates from the interpreter in that users need to enable
502 // a c_return TracePoint for this DTrace hook to work. A reasonable change
503 // since the Ruby return event works this way as well.
504 RUBY_DTRACE_CMETHOD_RETURN_HOOK(ec, me->owner, me->def->original_id);
506 // Push return value into the caller's stack. We know that it's a frame that
507 // uses cfp->sp because we are patching a call done with gen_send_cfunc().
508 ec->cfp->sp[0] = return_value;
509 ec->cfp->sp++;
512 // Landing code for when c_return tracing is enabled. See full_cfunc_return().
513 static void
514 gen_full_cfunc_return(void)
516 codeblock_t *cb = ocb;
517 outline_full_cfunc_return_pos = ocb->write_pos;
519 // This chunk of code expect REG_EC to be filled properly and
520 // RAX to contain the return value of the C method.
522 // Call full_cfunc_return()
523 mov(cb, C_ARG_REGS[0], REG_EC);
524 mov(cb, C_ARG_REGS[1], RAX);
525 call_ptr(cb, REG0, (void *)full_cfunc_return);
527 // Count the exit
528 GEN_COUNTER_INC(cb, traced_cfunc_return);
530 // Return to the interpreter
531 pop(cb, REG_SP);
532 pop(cb, REG_EC);
533 pop(cb, REG_CFP);
535 mov(cb, RAX, imm_opnd(Qundef));
536 ret(cb);
540 Compile an interpreter entry block to be inserted into an iseq
541 Returns `NULL` if compilation fails.
543 static uint8_t *
544 yjit_entry_prologue(codeblock_t *cb, const rb_iseq_t *iseq)
546 RUBY_ASSERT(cb != NULL);
548 enum { MAX_PROLOGUE_SIZE = 1024 };
550 // Check if we have enough executable memory
551 if (cb->write_pos + MAX_PROLOGUE_SIZE >= cb->mem_size) {
552 return NULL;
555 const uint32_t old_write_pos = cb->write_pos;
557 // Align the current write position to cache line boundaries
558 cb_align_pos(cb, 64);
560 uint8_t *code_ptr = cb_get_ptr(cb, cb->write_pos);
561 ADD_COMMENT(cb, "yjit entry");
563 push(cb, REG_CFP);
564 push(cb, REG_EC);
565 push(cb, REG_SP);
567 // We are passed EC and CFP
568 mov(cb, REG_EC, C_ARG_REGS[0]);
569 mov(cb, REG_CFP, C_ARG_REGS[1]);
571 // Load the current SP from the CFP into REG_SP
572 mov(cb, REG_SP, member_opnd(REG_CFP, rb_control_frame_t, sp));
574 // Setup cfp->jit_return
575 // TODO: this could use an IP relative LEA instead of an 8 byte immediate
576 mov(cb, REG0, const_ptr_opnd(leave_exit_code));
577 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, jit_return), REG0);
579 // We're compiling iseqs that we *expect* to start at `insn_idx`. But in
580 // the case of optional parameters, the interpreter can set the pc to a
581 // different location depending on the optional parameters. If an iseq
582 // has optional parameters, we'll add a runtime check that the PC we've
583 // compiled for is the same PC that the interpreter wants us to run with.
584 // If they don't match, then we'll take a side exit.
585 if (iseq->body->param.flags.has_opt) {
586 yjit_pc_guard(cb, iseq);
589 // Verify MAX_PROLOGUE_SIZE
590 RUBY_ASSERT_ALWAYS(cb->write_pos - old_write_pos <= MAX_PROLOGUE_SIZE);
592 return code_ptr;
595 // Generate code to check for interrupts and take a side-exit.
596 // Warning: this function clobbers REG0
597 static void
598 yjit_check_ints(codeblock_t *cb, uint8_t *side_exit)
600 // Check for interrupts
601 // see RUBY_VM_CHECK_INTS(ec) macro
602 ADD_COMMENT(cb, "RUBY_VM_CHECK_INTS(ec)");
603 mov(cb, REG0_32, member_opnd(REG_EC, rb_execution_context_t, interrupt_mask));
604 not(cb, REG0_32);
605 test(cb, member_opnd(REG_EC, rb_execution_context_t, interrupt_flag), REG0_32);
606 jnz_ptr(cb, side_exit);
609 // Generate a stubbed unconditional jump to the next bytecode instruction.
610 // Blocks that are part of a guard chain can use this to share the same successor.
611 static void
612 jit_jump_to_next_insn(jitstate_t *jit, const ctx_t *current_context)
614 // Reset the depth since in current usages we only ever jump to to
615 // chain_depth > 0 from the same instruction.
616 ctx_t reset_depth = *current_context;
617 reset_depth.chain_depth = 0;
619 blockid_t jump_block = { jit->iseq, jit_next_insn_idx(jit) };
621 // We are at the end of the current instruction. Record the boundary.
622 if (jit->record_boundary_patch_point) {
623 uint32_t exit_pos = yjit_gen_exit(jit->pc + insn_len(jit->opcode), &reset_depth, jit->ocb);
624 record_global_inval_patch(jit->cb, exit_pos);
625 jit->record_boundary_patch_point = false;
628 // Generate the jump instruction
629 gen_direct_jump(
630 jit,
631 &reset_depth,
632 jump_block
636 // Compile a sequence of bytecode instructions for a given basic block version.
637 // Part of gen_block_version().
638 static block_t *
639 gen_single_block(blockid_t blockid, const ctx_t *start_ctx, rb_execution_context_t *ec)
641 RUBY_ASSERT(cb != NULL);
642 verify_blockid(blockid);
644 // Allocate the new block
645 block_t *block = calloc(1, sizeof(block_t));
646 if (!block) {
647 return NULL;
650 // Copy the starting context to avoid mutating it
651 ctx_t ctx_copy = *start_ctx;
652 ctx_t *ctx = &ctx_copy;
654 // Limit the number of specialized versions for this block
655 *ctx = limit_block_versions(blockid, ctx);
657 // Save the starting context on the block.
658 block->blockid = blockid;
659 block->ctx = *ctx;
661 RUBY_ASSERT(!(blockid.idx == 0 && start_ctx->stack_size > 0));
663 const rb_iseq_t *iseq = block->blockid.iseq;
664 const unsigned int iseq_size = iseq->body->iseq_size;
665 uint32_t insn_idx = block->blockid.idx;
666 const uint32_t starting_insn_idx = insn_idx;
668 // Initialize a JIT state object
669 jitstate_t jit = {
670 .cb = cb,
671 .ocb = ocb,
672 .block = block,
673 .iseq = iseq,
674 .ec = ec
677 // Mark the start position of the block
678 block->start_addr = cb_get_write_ptr(cb);
680 // For each instruction to compile
681 while (insn_idx < iseq_size) {
682 // Get the current pc and opcode
683 VALUE *pc = yjit_iseq_pc_at_idx(iseq, insn_idx);
684 int opcode = yjit_opcode_at_pc(iseq, pc);
685 RUBY_ASSERT(opcode >= 0 && opcode < VM_INSTRUCTION_SIZE);
687 // opt_getinlinecache wants to be in a block all on its own. Cut the block short
688 // if we run into it. See gen_opt_getinlinecache() for details.
689 if (opcode == BIN(opt_getinlinecache) && insn_idx > starting_insn_idx) {
690 jit_jump_to_next_insn(&jit, ctx);
691 break;
694 // Set the current instruction
695 jit.insn_idx = insn_idx;
696 jit.opcode = opcode;
697 jit.pc = pc;
698 jit.side_exit_for_pc = NULL;
700 // If previous instruction requested to record the boundary
701 if (jit.record_boundary_patch_point) {
702 // Generate an exit to this instruction and record it
703 uint32_t exit_pos = yjit_gen_exit(jit.pc, ctx, ocb);
704 record_global_inval_patch(cb, exit_pos);
705 jit.record_boundary_patch_point = false;
708 // Verify our existing assumption (DEBUG)
709 if (jit_at_current_insn(&jit)) {
710 verify_ctx(&jit, ctx);
713 // Lookup the codegen function for this instruction
714 codegen_fn gen_fn = gen_fns[opcode];
715 codegen_status_t status = YJIT_CANT_COMPILE;
716 if (gen_fn) {
717 if (0) {
718 fprintf(stderr, "compiling %d: %s\n", insn_idx, insn_name(opcode));
719 print_str(cb, insn_name(opcode));
722 // :count-placement:
723 // Count bytecode instructions that execute in generated code.
724 // Note that the increment happens even when the output takes side exit.
725 GEN_COUNTER_INC(cb, exec_instruction);
727 // Add a comment for the name of the YARV instruction
728 ADD_COMMENT(cb, insn_name(opcode));
730 // Call the code generation function
731 status = gen_fn(&jit, ctx, cb);
734 // If we can't compile this instruction
735 // exit to the interpreter and stop compiling
736 if (status == YJIT_CANT_COMPILE) {
737 // TODO: if the codegen function makes changes to ctx and then return YJIT_CANT_COMPILE,
738 // the exit this generates would be wrong. We could save a copy of the entry context
739 // and assert that ctx is the same here.
740 uint32_t exit_off = yjit_gen_exit(jit.pc, ctx, cb);
742 // If this is the first instruction in the block, then we can use
743 // the exit for block->entry_exit.
744 if (insn_idx == block->blockid.idx) {
745 block->entry_exit = cb_get_ptr(cb, exit_off);
747 break;
750 // For now, reset the chain depth after each instruction as only the
751 // first instruction in the block can concern itself with the depth.
752 ctx->chain_depth = 0;
754 // Move to the next instruction to compile
755 insn_idx += insn_len(opcode);
757 // If the instruction terminates this block
758 if (status == YJIT_END_BLOCK) {
759 break;
763 // Mark the end position of the block
764 block->end_addr = cb_get_write_ptr(cb);
766 // Store the index of the last instruction in the block
767 block->end_idx = insn_idx;
769 // We currently can't handle cases where the request is for a block that
770 // doesn't go to the next instruction.
771 RUBY_ASSERT(!jit.record_boundary_patch_point);
773 // If code for the block doesn't fit, free the block and fail.
774 if (cb->dropped_bytes || ocb->dropped_bytes) {
775 yjit_free_block(block);
776 return NULL;
779 if (YJIT_DUMP_MODE >= 2) {
780 // Dump list of compiled instrutions
781 fprintf(stderr, "Compiled the following for iseq=%p:\n", (void *)iseq);
782 for (uint32_t idx = block->blockid.idx; idx < insn_idx; ) {
783 int opcode = yjit_opcode_at_pc(iseq, yjit_iseq_pc_at_idx(iseq, idx));
784 fprintf(stderr, " %04d %s\n", idx, insn_name(opcode));
785 idx += insn_len(opcode);
789 return block;
792 static codegen_status_t gen_opt_send_without_block(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb);
794 static codegen_status_t
795 gen_nop(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
797 // Do nothing
798 return YJIT_KEEP_COMPILING;
801 static codegen_status_t
802 gen_dup(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
804 // Get the top value and its type
805 x86opnd_t dup_val = ctx_stack_pop(ctx, 0);
806 temp_type_mapping_t mapping = ctx_get_opnd_mapping(ctx, OPND_STACK(0));
808 // Push the same value on top
809 x86opnd_t loc0 = ctx_stack_push_mapping(ctx, mapping);
810 mov(cb, REG0, dup_val);
811 mov(cb, loc0, REG0);
813 return YJIT_KEEP_COMPILING;
816 // duplicate stack top n elements
817 static codegen_status_t
818 gen_dupn(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
820 rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
822 // In practice, seems to be only used for n==2
823 if (n != 2) {
824 return YJIT_CANT_COMPILE;
827 x86opnd_t opnd1 = ctx_stack_opnd(ctx, 1);
828 x86opnd_t opnd0 = ctx_stack_opnd(ctx, 0);
829 temp_type_mapping_t mapping1 = ctx_get_opnd_mapping(ctx, OPND_STACK(1));
830 temp_type_mapping_t mapping0 = ctx_get_opnd_mapping(ctx, OPND_STACK(0));
832 x86opnd_t dst1 = ctx_stack_push_mapping(ctx, mapping1);
833 mov(cb, REG0, opnd1);
834 mov(cb, dst1, REG0);
836 x86opnd_t dst0 = ctx_stack_push_mapping(ctx, mapping0);
837 mov(cb, REG0, opnd0);
838 mov(cb, dst0, REG0);
840 return YJIT_KEEP_COMPILING;
843 static void
844 stack_swap(ctx_t *ctx, codeblock_t *cb, int offset0, int offset1, x86opnd_t reg0, x86opnd_t reg1)
846 x86opnd_t opnd0 = ctx_stack_opnd(ctx, offset0);
847 x86opnd_t opnd1 = ctx_stack_opnd(ctx, offset1);
849 temp_type_mapping_t mapping0 = ctx_get_opnd_mapping(ctx, OPND_STACK(offset0));
850 temp_type_mapping_t mapping1 = ctx_get_opnd_mapping(ctx, OPND_STACK(offset1));
852 mov(cb, reg0, opnd0);
853 mov(cb, reg1, opnd1);
854 mov(cb, opnd0, reg1);
855 mov(cb, opnd1, reg0);
857 ctx_set_opnd_mapping(ctx, OPND_STACK(offset0), mapping1);
858 ctx_set_opnd_mapping(ctx, OPND_STACK(offset1), mapping0);
861 // Swap top 2 stack entries
862 static codegen_status_t
863 gen_swap(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
865 stack_swap(ctx , cb, 0, 1, REG0, REG1);
866 return YJIT_KEEP_COMPILING;
869 // set Nth stack entry to stack top
870 static codegen_status_t
871 gen_setn(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
873 rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
875 // Set the destination
876 x86opnd_t top_val = ctx_stack_pop(ctx, 0);
877 x86opnd_t dst_opnd = ctx_stack_opnd(ctx, (int32_t)n);
878 mov(cb, REG0, top_val);
879 mov(cb, dst_opnd, REG0);
881 temp_type_mapping_t mapping = ctx_get_opnd_mapping(ctx, OPND_STACK(0));
882 ctx_set_opnd_mapping(ctx, OPND_STACK(n), mapping);
884 return YJIT_KEEP_COMPILING;
887 // get nth stack value, then push it
888 static codegen_status_t
889 gen_topn(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
891 int32_t n = (int32_t)jit_get_arg(jit, 0);
893 // Get top n type / operand
894 x86opnd_t top_n_val = ctx_stack_opnd(ctx, n);
895 temp_type_mapping_t mapping = ctx_get_opnd_mapping(ctx, OPND_STACK(n));
897 x86opnd_t loc0 = ctx_stack_push_mapping(ctx, mapping);
898 mov(cb, REG0, top_n_val);
899 mov(cb, loc0, REG0);
901 return YJIT_KEEP_COMPILING;
904 static codegen_status_t
905 gen_pop(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
907 // Decrement SP
908 ctx_stack_pop(ctx, 1);
909 return YJIT_KEEP_COMPILING;
912 // Pop n values off the stack
913 static codegen_status_t
914 gen_adjuststack(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
916 rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
917 ctx_stack_pop(ctx, n);
918 return YJIT_KEEP_COMPILING;
921 // new array initialized from top N values
922 static codegen_status_t
923 gen_newarray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
925 rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
927 // Save the PC and SP because we are allocating
928 jit_prepare_routine_call(jit, ctx, REG0);
930 x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(sizeof(VALUE) * (uint32_t)n));
932 // call rb_ec_ary_new_from_values(struct rb_execution_context_struct *ec, long n, const VALUE *elts);
933 mov(cb, C_ARG_REGS[0], REG_EC);
934 mov(cb, C_ARG_REGS[1], imm_opnd(n));
935 lea(cb, C_ARG_REGS[2], values_ptr);
936 call_ptr(cb, REG0, (void *)rb_ec_ary_new_from_values);
938 ctx_stack_pop(ctx, n);
939 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_ARRAY);
940 mov(cb, stack_ret, RAX);
942 return YJIT_KEEP_COMPILING;
945 // dup array
946 static codegen_status_t
947 gen_duparray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
949 VALUE ary = jit_get_arg(jit, 0);
951 // Save the PC and SP because we are allocating
952 jit_prepare_routine_call(jit, ctx, REG0);
954 // call rb_ary_resurrect(VALUE ary);
955 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], ary);
956 call_ptr(cb, REG0, (void *)rb_ary_resurrect);
958 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_ARRAY);
959 mov(cb, stack_ret, RAX);
961 return YJIT_KEEP_COMPILING;
964 // dup hash
965 static codegen_status_t
966 gen_duphash(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
968 VALUE hash = jit_get_arg(jit, 0);
970 // Save the PC and SP because we are allocating
971 jit_prepare_routine_call(jit, ctx, REG0);
973 // call rb_hash_resurrect(VALUE hash);
974 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], hash);
975 call_ptr(cb, REG0, (void *)rb_hash_resurrect);
977 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HASH);
978 mov(cb, stack_ret, RAX);
980 return YJIT_KEEP_COMPILING;
983 VALUE rb_vm_splat_array(VALUE flag, VALUE ary);
985 // call to_a on the array on the stack
986 static codegen_status_t
987 gen_splatarray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
989 VALUE flag = (VALUE) jit_get_arg(jit, 0);
991 // Save the PC and SP because the callee may allocate
992 // Note that this modifies REG_SP, which is why we do it first
993 jit_prepare_routine_call(jit, ctx, REG0);
995 // Get the operands from the stack
996 x86opnd_t ary_opnd = ctx_stack_pop(ctx, 1);
998 // Call rb_vm_splat_array(flag, ary)
999 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], flag);
1000 mov(cb, C_ARG_REGS[1], ary_opnd);
1001 call_ptr(cb, REG1, (void *) rb_vm_splat_array);
1003 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_ARRAY);
1004 mov(cb, stack_ret, RAX);
1006 return YJIT_KEEP_COMPILING;
1009 // new range initialized from top 2 values
1010 static codegen_status_t
1011 gen_newrange(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1013 rb_num_t flag = (rb_num_t)jit_get_arg(jit, 0);
1015 // rb_range_new() allocates and can raise
1016 jit_prepare_routine_call(jit, ctx, REG0);
1018 // val = rb_range_new(low, high, (int)flag);
1019 mov(cb, C_ARG_REGS[0], ctx_stack_opnd(ctx, 1));
1020 mov(cb, C_ARG_REGS[1], ctx_stack_opnd(ctx, 0));
1021 mov(cb, C_ARG_REGS[2], imm_opnd(flag));
1022 call_ptr(cb, REG0, (void *)rb_range_new);
1024 ctx_stack_pop(ctx, 2);
1025 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HEAP);
1026 mov(cb, stack_ret, RAX);
1028 return YJIT_KEEP_COMPILING;
1031 static void
1032 guard_object_is_heap(codeblock_t *cb, x86opnd_t object_opnd, ctx_t *ctx, uint8_t *side_exit)
1034 ADD_COMMENT(cb, "guard object is heap");
1036 // Test that the object is not an immediate
1037 test(cb, object_opnd, imm_opnd(RUBY_IMMEDIATE_MASK));
1038 jnz_ptr(cb, side_exit);
1040 // Test that the object is not false or nil
1041 cmp(cb, object_opnd, imm_opnd(Qnil));
1042 RUBY_ASSERT(Qfalse < Qnil);
1043 jbe_ptr(cb, side_exit);
1046 static inline void
1047 guard_object_is_array(codeblock_t *cb, x86opnd_t object_opnd, x86opnd_t flags_opnd, ctx_t *ctx, uint8_t *side_exit)
1049 ADD_COMMENT(cb, "guard object is array");
1051 // Pull out the type mask
1052 mov(cb, flags_opnd, member_opnd(object_opnd, struct RBasic, flags));
1053 and(cb, flags_opnd, imm_opnd(RUBY_T_MASK));
1055 // Compare the result with T_ARRAY
1056 cmp(cb, flags_opnd, imm_opnd(T_ARRAY));
1057 jne_ptr(cb, side_exit);
1060 // push enough nils onto the stack to fill out an array
1061 static codegen_status_t
1062 gen_expandarray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1064 int flag = (int) jit_get_arg(jit, 1);
1066 // If this instruction has the splat flag, then bail out.
1067 if (flag & 0x01) {
1068 GEN_COUNTER_INC(cb, expandarray_splat);
1069 return YJIT_CANT_COMPILE;
1072 // If this instruction has the postarg flag, then bail out.
1073 if (flag & 0x02) {
1074 GEN_COUNTER_INC(cb, expandarray_postarg);
1075 return YJIT_CANT_COMPILE;
1078 uint8_t *side_exit = yjit_side_exit(jit, ctx);
1080 // num is the number of requested values. If there aren't enough in the
1081 // array then we're going to push on nils.
1082 int num = (int)jit_get_arg(jit, 0);
1083 val_type_t array_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
1084 x86opnd_t array_opnd = ctx_stack_pop(ctx, 1);
1086 if (array_type.type == ETYPE_NIL) {
1087 // special case for a, b = nil pattern
1088 // push N nils onto the stack
1089 for (int i = 0; i < num; i++) {
1090 x86opnd_t push = ctx_stack_push(ctx, TYPE_NIL);
1091 mov(cb, push, imm_opnd(Qnil));
1093 return YJIT_KEEP_COMPILING;
1096 // Move the array from the stack into REG0 and check that it's an array.
1097 mov(cb, REG0, array_opnd);
1098 guard_object_is_heap(cb, REG0, ctx, COUNTED_EXIT(jit, side_exit, expandarray_not_array));
1099 guard_object_is_array(cb, REG0, REG1, ctx, COUNTED_EXIT(jit, side_exit, expandarray_not_array));
1101 // If we don't actually want any values, then just return.
1102 if (num == 0) {
1103 return YJIT_KEEP_COMPILING;
1106 // Pull out the embed flag to check if it's an embedded array.
1107 x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
1108 mov(cb, REG1, flags_opnd);
1110 // Move the length of the embedded array into REG1.
1111 and(cb, REG1, imm_opnd(RARRAY_EMBED_LEN_MASK));
1112 shr(cb, REG1, imm_opnd(RARRAY_EMBED_LEN_SHIFT));
1114 // Conditionally move the length of the heap array into REG1.
1115 test(cb, flags_opnd, imm_opnd(RARRAY_EMBED_FLAG));
1116 cmovz(cb, REG1, member_opnd(REG0, struct RArray, as.heap.len));
1118 // Only handle the case where the number of values in the array is greater
1119 // than or equal to the number of values requested.
1120 cmp(cb, REG1, imm_opnd(num));
1121 jl_ptr(cb, COUNTED_EXIT(jit, side_exit, expandarray_rhs_too_small));
1123 // Load the address of the embedded array into REG1.
1124 // (struct RArray *)(obj)->as.ary
1125 lea(cb, REG1, member_opnd(REG0, struct RArray, as.ary));
1127 // Conditionally load the address of the heap array into REG1.
1128 // (struct RArray *)(obj)->as.heap.ptr
1129 test(cb, flags_opnd, imm_opnd(RARRAY_EMBED_FLAG));
1130 cmovz(cb, REG1, member_opnd(REG0, struct RArray, as.heap.ptr));
1132 // Loop backward through the array and push each element onto the stack.
1133 for (int32_t i = (int32_t) num - 1; i >= 0; i--) {
1134 x86opnd_t top = ctx_stack_push(ctx, TYPE_UNKNOWN);
1135 mov(cb, REG0, mem_opnd(64, REG1, i * SIZEOF_VALUE));
1136 mov(cb, top, REG0);
1139 return YJIT_KEEP_COMPILING;
1142 // new hash initialized from top N values
1143 static codegen_status_t
1144 gen_newhash(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1146 int32_t num = (int32_t)jit_get_arg(jit, 0);
1148 // Save the PC and SP because we are allocating
1149 jit_prepare_routine_call(jit, ctx, REG0);
1151 if (num) {
1152 // val = rb_hash_new_with_size(num / 2);
1153 mov(cb, C_ARG_REGS[0], imm_opnd(num / 2));
1154 call_ptr(cb, REG0, (void *)rb_hash_new_with_size);
1156 // save the allocated hash as we want to push it after insertion
1157 push(cb, RAX);
1158 push(cb, RAX); // alignment
1160 // rb_hash_bulk_insert(num, STACK_ADDR_FROM_TOP(num), val);
1161 mov(cb, C_ARG_REGS[0], imm_opnd(num));
1162 lea(cb, C_ARG_REGS[1], ctx_stack_opnd(ctx, num - 1));
1163 mov(cb, C_ARG_REGS[2], RAX);
1164 call_ptr(cb, REG0, (void *)rb_hash_bulk_insert);
1166 pop(cb, RAX); // alignment
1167 pop(cb, RAX);
1169 ctx_stack_pop(ctx, num);
1170 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HASH);
1171 mov(cb, stack_ret, RAX);
1173 else {
1174 // val = rb_hash_new();
1175 call_ptr(cb, REG0, (void *)rb_hash_new);
1177 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HASH);
1178 mov(cb, stack_ret, RAX);
1181 return YJIT_KEEP_COMPILING;
1184 // Push a constant value to the stack, including type information.
1185 // The constant may be a heap object or a special constant.
1186 static void
1187 jit_putobject(jitstate_t *jit, ctx_t *ctx, VALUE arg)
1189 val_type_t val_type = yjit_type_of_value(arg);
1190 x86opnd_t stack_top = ctx_stack_push(ctx, val_type);
1192 if (SPECIAL_CONST_P(arg)) {
1193 // Immediates will not move and do not need to be tracked for GC
1194 // Thanks to this we can mov directly to memory when possible.
1196 // NOTE: VALUE -> int64_t cast below is implementation defined.
1197 // Hopefully it preserves the the bit pattern or raise a signal.
1198 // See N1256 section 6.3.1.3.
1199 x86opnd_t imm = imm_opnd((int64_t)arg);
1201 // 64-bit immediates can't be directly written to memory
1202 if (imm.num_bits <= 32) {
1203 mov(cb, stack_top, imm);
1205 else {
1206 mov(cb, REG0, imm);
1207 mov(cb, stack_top, REG0);
1210 else {
1211 // Load the value to push into REG0
1212 // Note that this value may get moved by the GC
1213 jit_mov_gc_ptr(jit, cb, REG0, arg);
1215 // Write argument at SP
1216 mov(cb, stack_top, REG0);
1220 static codegen_status_t
1221 gen_putnil(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1223 jit_putobject(jit, ctx, Qnil);
1224 return YJIT_KEEP_COMPILING;
1227 static codegen_status_t
1228 gen_putobject(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1230 VALUE arg = jit_get_arg(jit, 0);
1232 jit_putobject(jit, ctx, arg);
1233 return YJIT_KEEP_COMPILING;
1236 static codegen_status_t
1237 gen_putstring(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1239 VALUE put_val = jit_get_arg(jit, 0);
1241 // Save the PC and SP because the callee will allocate
1242 jit_prepare_routine_call(jit, ctx, REG0);
1244 mov(cb, C_ARG_REGS[0], REG_EC);
1245 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], put_val);
1246 call_ptr(cb, REG0, (void *)rb_ec_str_resurrect);
1248 x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_STRING);
1249 mov(cb, stack_top, RAX);
1251 return YJIT_KEEP_COMPILING;
1254 static codegen_status_t
1255 gen_putobject_int2fix(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1257 int opcode = jit_get_opcode(jit);
1258 int cst_val = (opcode == BIN(putobject_INT2FIX_0_))? 0:1;
1260 jit_putobject(jit, ctx, INT2FIX(cst_val));
1261 return YJIT_KEEP_COMPILING;
1264 static codegen_status_t
1265 gen_putself(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1267 // Load self from CFP
1268 mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, self));
1270 // Write it on the stack
1271 x86opnd_t stack_top = ctx_stack_push_self(ctx);
1272 mov(cb, stack_top, REG0);
1274 return YJIT_KEEP_COMPILING;
1277 static codegen_status_t
1278 gen_putspecialobject(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1280 enum vm_special_object_type type = (enum vm_special_object_type)jit_get_arg(jit, 0);
1282 if (type == VM_SPECIAL_OBJECT_VMCORE) {
1283 x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_HEAP);
1284 jit_mov_gc_ptr(jit, cb, REG0, rb_mRubyVMFrozenCore);
1285 mov(cb, stack_top, REG0);
1286 return YJIT_KEEP_COMPILING;
1288 else {
1289 // TODO: implement for VM_SPECIAL_OBJECT_CBASE and
1290 // VM_SPECIAL_OBJECT_CONST_BASE
1291 return YJIT_CANT_COMPILE;
1295 // Get EP at level from CFP
1296 static void
1297 gen_get_ep(codeblock_t *cb, x86opnd_t reg, uint32_t level)
1299 // Load environment pointer EP from CFP
1300 mov(cb, reg, member_opnd(REG_CFP, rb_control_frame_t, ep));
1302 while (level--) {
1303 // Get the previous EP from the current EP
1304 // See GET_PREV_EP(ep) macro
1305 // VALUE *prev_ep = ((VALUE *)((ep)[VM_ENV_DATA_INDEX_SPECVAL] & ~0x03))
1306 mov(cb, reg, mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_SPECVAL));
1307 and(cb, reg, imm_opnd(~0x03));
1311 // Compute the index of a local variable from its slot index
1312 static uint32_t
1313 slot_to_local_idx(const rb_iseq_t *iseq, int32_t slot_idx)
1315 // Convoluted rules from local_var_name() in iseq.c
1316 int32_t local_table_size = iseq->body->local_table_size;
1317 int32_t op = slot_idx - VM_ENV_DATA_SIZE;
1318 int32_t local_idx = local_idx = local_table_size - op - 1;
1319 RUBY_ASSERT(local_idx >= 0 && local_idx < local_table_size);
1320 return (uint32_t)local_idx;
1323 static codegen_status_t
1324 gen_getlocal_wc0(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1326 // Compute the offset from BP to the local
1327 int32_t slot_idx = (int32_t)jit_get_arg(jit, 0);
1328 const int32_t offs = -(SIZEOF_VALUE * slot_idx);
1329 uint32_t local_idx = slot_to_local_idx(jit->iseq, slot_idx);
1331 // Load environment pointer EP (level 0) from CFP
1332 gen_get_ep(cb, REG0, 0);
1334 // Load the local from the EP
1335 mov(cb, REG0, mem_opnd(64, REG0, offs));
1337 // Write the local at SP
1338 x86opnd_t stack_top = ctx_stack_push_local(ctx, local_idx);
1339 mov(cb, stack_top, REG0);
1341 return YJIT_KEEP_COMPILING;
1344 static codegen_status_t
1345 gen_getlocal_generic(ctx_t *ctx, uint32_t local_idx, uint32_t level)
1347 gen_get_ep(cb, REG0, level);
1349 // Load the local from the block
1350 // val = *(vm_get_ep(GET_EP(), level) - idx);
1351 const int32_t offs = -(SIZEOF_VALUE * local_idx);
1352 mov(cb, REG0, mem_opnd(64, REG0, offs));
1354 // Write the local at SP
1355 x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_UNKNOWN);
1356 mov(cb, stack_top, REG0);
1358 return YJIT_KEEP_COMPILING;
1361 static codegen_status_t
1362 gen_getlocal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1364 int32_t idx = (int32_t)jit_get_arg(jit, 0);
1365 int32_t level = (int32_t)jit_get_arg(jit, 1);
1366 return gen_getlocal_generic(ctx, idx, level);
1369 static codegen_status_t
1370 gen_getlocal_wc1(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1372 int32_t idx = (int32_t)jit_get_arg(jit, 0);
1373 return gen_getlocal_generic(ctx, idx, 1);
1376 static codegen_status_t
1377 gen_setlocal_wc0(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1380 vm_env_write(const VALUE *ep, int index, VALUE v)
1382 VALUE flags = ep[VM_ENV_DATA_INDEX_FLAGS];
1383 if (LIKELY((flags & VM_ENV_FLAG_WB_REQUIRED) == 0)) {
1384 VM_STACK_ENV_WRITE(ep, index, v);
1386 else {
1387 vm_env_write_slowpath(ep, index, v);
1392 int32_t slot_idx = (int32_t)jit_get_arg(jit, 0);
1393 uint32_t local_idx = slot_to_local_idx(jit->iseq, slot_idx);
1395 // Load environment pointer EP (level 0) from CFP
1396 gen_get_ep(cb, REG0, 0);
1398 // flags & VM_ENV_FLAG_WB_REQUIRED
1399 x86opnd_t flags_opnd = mem_opnd(64, REG0, sizeof(VALUE) * VM_ENV_DATA_INDEX_FLAGS);
1400 test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED));
1402 // Create a side-exit to fall back to the interpreter
1403 uint8_t *side_exit = yjit_side_exit(jit, ctx);
1405 // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
1406 jnz_ptr(cb, side_exit);
1408 // Set the type of the local variable in the context
1409 val_type_t temp_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
1410 ctx_set_local_type(ctx, local_idx, temp_type);
1412 // Pop the value to write from the stack
1413 x86opnd_t stack_top = ctx_stack_pop(ctx, 1);
1414 mov(cb, REG1, stack_top);
1416 // Write the value at the environment pointer
1417 const int32_t offs = -8 * slot_idx;
1418 mov(cb, mem_opnd(64, REG0, offs), REG1);
1420 return YJIT_KEEP_COMPILING;
1423 // Push Qtrue or Qfalse depending on whether the given keyword was supplied by
1424 // the caller
1425 static codegen_status_t
1426 gen_checkkeyword(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1428 // When a keyword is unspecified past index 32, a hash will be used
1429 // instead. This can only happen in iseqs taking more than 32 keywords.
1430 if (jit->iseq->body->param.keyword->num >= 32) {
1431 return YJIT_CANT_COMPILE;
1434 // The EP offset to the undefined bits local
1435 int32_t bits_offset = (int32_t)jit_get_arg(jit, 0);
1437 // The index of the keyword we want to check
1438 int32_t index = (int32_t)jit_get_arg(jit, 1);
1440 // Load environment pointer EP
1441 gen_get_ep(cb, REG0, 0);
1443 // VALUE kw_bits = *(ep - bits);
1444 x86opnd_t bits_opnd = mem_opnd(64, REG0, sizeof(VALUE) * -bits_offset);
1446 // unsigned int b = (unsigned int)FIX2ULONG(kw_bits);
1447 // if ((b & (0x01 << idx))) {
1449 // We can skip the FIX2ULONG conversion by shifting the bit we test
1450 int64_t bit_test = 0x01 << (index + 1);
1451 test(cb, bits_opnd, imm_opnd(bit_test));
1452 mov(cb, REG0, imm_opnd(Qfalse));
1453 mov(cb, REG1, imm_opnd(Qtrue));
1454 cmovz(cb, REG0, REG1);
1456 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_IMM);
1457 mov(cb, stack_ret, REG0);
1459 return YJIT_KEEP_COMPILING;
1462 static codegen_status_t
1463 gen_setlocal_generic(jitstate_t *jit, ctx_t *ctx, uint32_t local_idx, uint32_t level)
1465 // Load environment pointer EP at level
1466 gen_get_ep(cb, REG0, level);
1468 // flags & VM_ENV_FLAG_WB_REQUIRED
1469 x86opnd_t flags_opnd = mem_opnd(64, REG0, sizeof(VALUE) * VM_ENV_DATA_INDEX_FLAGS);
1470 test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED));
1472 // Create a side-exit to fall back to the interpreter
1473 uint8_t *side_exit = yjit_side_exit(jit, ctx);
1475 // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
1476 jnz_ptr(cb, side_exit);
1478 // Pop the value to write from the stack
1479 x86opnd_t stack_top = ctx_stack_pop(ctx, 1);
1480 mov(cb, REG1, stack_top);
1482 // Write the value at the environment pointer
1483 const int32_t offs = -(SIZEOF_VALUE * local_idx);
1484 mov(cb, mem_opnd(64, REG0, offs), REG1);
1486 return YJIT_KEEP_COMPILING;
1489 static codegen_status_t
1490 gen_setlocal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1492 int32_t idx = (int32_t)jit_get_arg(jit, 0);
1493 int32_t level = (int32_t)jit_get_arg(jit, 1);
1494 return gen_setlocal_generic(jit, ctx, idx, level);
1497 static codegen_status_t
1498 gen_setlocal_wc1(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1500 int32_t idx = (int32_t)jit_get_arg(jit, 0);
1501 return gen_setlocal_generic(jit, ctx, idx, 1);
1504 static void
1505 gen_jnz_to_target0(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
1507 switch (shape) {
1508 case SHAPE_NEXT0:
1509 case SHAPE_NEXT1:
1510 RUBY_ASSERT(false);
1511 break;
1513 case SHAPE_DEFAULT:
1514 jnz_ptr(cb, target0);
1515 break;
1519 static void
1520 gen_jz_to_target0(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
1522 switch (shape) {
1523 case SHAPE_NEXT0:
1524 case SHAPE_NEXT1:
1525 RUBY_ASSERT(false);
1526 break;
1528 case SHAPE_DEFAULT:
1529 jz_ptr(cb, target0);
1530 break;
1534 static void
1535 gen_jbe_to_target0(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
1537 switch (shape) {
1538 case SHAPE_NEXT0:
1539 case SHAPE_NEXT1:
1540 RUBY_ASSERT(false);
1541 break;
1543 case SHAPE_DEFAULT:
1544 jbe_ptr(cb, target0);
1545 break;
1549 enum jcc_kinds {
1550 JCC_JNE,
1551 JCC_JNZ,
1552 JCC_JZ,
1553 JCC_JE,
1554 JCC_JBE,
1555 JCC_JNA,
1558 // Generate a jump to a stub that recompiles the current YARV instruction on failure.
1559 // When depth_limitk is exceeded, generate a jump to a side exit.
1560 static void
1561 jit_chain_guard(enum jcc_kinds jcc, jitstate_t *jit, const ctx_t *ctx, uint8_t depth_limit, uint8_t *side_exit)
1563 branchgen_fn target0_gen_fn;
1565 switch (jcc) {
1566 case JCC_JNE:
1567 case JCC_JNZ:
1568 target0_gen_fn = gen_jnz_to_target0;
1569 break;
1570 case JCC_JZ:
1571 case JCC_JE:
1572 target0_gen_fn = gen_jz_to_target0;
1573 break;
1574 case JCC_JBE:
1575 case JCC_JNA:
1576 target0_gen_fn = gen_jbe_to_target0;
1577 break;
1578 default:
1579 rb_bug("yjit: unimplemented jump kind");
1580 break;
1583 if (ctx->chain_depth < depth_limit) {
1584 ctx_t deeper = *ctx;
1585 deeper.chain_depth++;
1587 gen_branch(
1588 jit,
1589 ctx,
1590 (blockid_t) { jit->iseq, jit->insn_idx },
1591 &deeper,
1592 BLOCKID_NULL,
1593 NULL,
1594 target0_gen_fn
1597 else {
1598 target0_gen_fn(cb, side_exit, NULL, SHAPE_DEFAULT);
1602 enum {
1603 GETIVAR_MAX_DEPTH = 10, // up to 5 different classes, and embedded or not for each
1604 OPT_AREF_MAX_CHAIN_DEPTH = 2, // hashes and arrays
1605 SEND_MAX_DEPTH = 5, // up to 5 different classes
1608 VALUE rb_vm_set_ivar_idx(VALUE obj, uint32_t idx, VALUE val);
1610 // Codegen for setting an instance variable.
1611 // Preconditions:
1612 // - receiver is in REG0
1613 // - receiver has the same class as CLASS_OF(comptime_receiver)
1614 // - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled
1615 static codegen_status_t
1616 gen_set_ivar(jitstate_t *jit, ctx_t *ctx, VALUE recv, VALUE klass, ID ivar_name)
1618 // Save the PC and SP because the callee may allocate
1619 // Note that this modifies REG_SP, which is why we do it first
1620 jit_prepare_routine_call(jit, ctx, REG0);
1622 // Get the operands from the stack
1623 x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
1624 x86opnd_t recv_opnd = ctx_stack_pop(ctx, 1);
1626 uint32_t ivar_index = rb_obj_ensure_iv_index_mapping(recv, ivar_name);
1628 // Call rb_vm_set_ivar_idx with the receiver, the index of the ivar, and the value
1629 mov(cb, C_ARG_REGS[0], recv_opnd);
1630 mov(cb, C_ARG_REGS[1], imm_opnd(ivar_index));
1631 mov(cb, C_ARG_REGS[2], val_opnd);
1632 call_ptr(cb, REG0, (void *)rb_vm_set_ivar_idx);
1634 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
1635 mov(cb, out_opnd, RAX);
1637 return YJIT_KEEP_COMPILING;
1640 // Codegen for getting an instance variable.
1641 // Preconditions:
1642 // - receiver is in REG0
1643 // - receiver has the same class as CLASS_OF(comptime_receiver)
1644 // - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled
1645 static codegen_status_t
1646 gen_get_ivar(jitstate_t *jit, ctx_t *ctx, const int max_chain_depth, VALUE comptime_receiver, ID ivar_name, insn_opnd_t reg0_opnd, uint8_t *side_exit)
1648 VALUE comptime_val_klass = CLASS_OF(comptime_receiver);
1649 const ctx_t starting_context = *ctx; // make a copy for use with jit_chain_guard
1651 // If the class uses the default allocator, instances should all be T_OBJECT
1652 // NOTE: This assumes nobody changes the allocator of the class after allocation.
1653 // Eventually, we can encode whether an object is T_OBJECT or not
1654 // inside object shapes.
1655 if (!RB_TYPE_P(comptime_receiver, T_OBJECT) ||
1656 rb_get_alloc_func(comptime_val_klass) != rb_class_allocate_instance) {
1657 // General case. Call rb_ivar_get().
1658 // VALUE rb_ivar_get(VALUE obj, ID id)
1659 ADD_COMMENT(cb, "call rb_ivar_get()");
1661 // The function could raise exceptions.
1662 jit_prepare_routine_call(jit, ctx, REG1);
1664 mov(cb, C_ARG_REGS[0], REG0);
1665 mov(cb, C_ARG_REGS[1], imm_opnd((int64_t)ivar_name));
1666 call_ptr(cb, REG1, (void *)rb_ivar_get);
1668 if (!reg0_opnd.is_self) {
1669 (void)ctx_stack_pop(ctx, 1);
1671 // Push the ivar on the stack
1672 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
1673 mov(cb, out_opnd, RAX);
1675 // Jump to next instruction. This allows guard chains to share the same successor.
1676 jit_jump_to_next_insn(jit, ctx);
1677 return YJIT_END_BLOCK;
1681 // FIXME:
1682 // This check was added because of a failure in a test involving the
1683 // Nokogiri Document class where we see a T_DATA that still has the default
1684 // allocator.
1685 // Aaron Patterson argues that this is a bug in the C extension, because
1686 // people could call .allocate() on the class and still get a T_OBJECT
1687 // For now I added an extra dynamic check that the receiver is T_OBJECT
1688 // so we can safely pass all the tests in Shopify Core.
1690 // Guard that the receiver is T_OBJECT
1691 // #define RB_BUILTIN_TYPE(x) (int)(((struct RBasic*)(x))->flags & RUBY_T_MASK)
1692 ADD_COMMENT(cb, "guard receiver is T_OBJECT");
1693 mov(cb, REG1, member_opnd(REG0, struct RBasic, flags));
1694 and(cb, REG1, imm_opnd(RUBY_T_MASK));
1695 cmp(cb, REG1, imm_opnd(T_OBJECT));
1696 jit_chain_guard(JCC_JNE, jit, &starting_context, max_chain_depth, side_exit);
1699 // FIXME: Mapping the index could fail when there is too many ivar names. If we're
1700 // compiling for a branch stub that can cause the exception to be thrown from the
1701 // wrong PC.
1702 uint32_t ivar_index = rb_obj_ensure_iv_index_mapping(comptime_receiver, ivar_name);
1704 // Pop receiver if it's on the temp stack
1705 if (!reg0_opnd.is_self) {
1706 (void)ctx_stack_pop(ctx, 1);
1709 // Compile time self is embedded and the ivar index lands within the object
1710 if (RB_FL_TEST_RAW(comptime_receiver, ROBJECT_EMBED) && ivar_index < ROBJECT_EMBED_LEN_MAX) {
1711 // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
1713 // Guard that self is embedded
1714 // TODO: BT and JC is shorter
1715 ADD_COMMENT(cb, "guard embedded getivar");
1716 x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
1717 test(cb, flags_opnd, imm_opnd(ROBJECT_EMBED));
1718 jit_chain_guard(JCC_JZ, jit, &starting_context, max_chain_depth, COUNTED_EXIT(jit, side_exit, getivar_megamorphic));
1720 // Load the variable
1721 x86opnd_t ivar_opnd = mem_opnd(64, REG0, offsetof(struct RObject, as.ary) + ivar_index * SIZEOF_VALUE);
1722 mov(cb, REG1, ivar_opnd);
1724 // Guard that the variable is not Qundef
1725 cmp(cb, REG1, imm_opnd(Qundef));
1726 mov(cb, REG0, imm_opnd(Qnil));
1727 cmove(cb, REG1, REG0);
1729 // Push the ivar on the stack
1730 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
1731 mov(cb, out_opnd, REG1);
1733 else {
1734 // Compile time value is *not* embedded.
1736 // Guard that value is *not* embedded
1737 // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
1738 ADD_COMMENT(cb, "guard extended getivar");
1739 x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
1740 test(cb, flags_opnd, imm_opnd(ROBJECT_EMBED));
1741 jit_chain_guard(JCC_JNZ, jit, &starting_context, max_chain_depth, COUNTED_EXIT(jit, side_exit, getivar_megamorphic));
1743 // check that the extended table is big enough
1744 if (ivar_index >= ROBJECT_EMBED_LEN_MAX + 1) {
1745 // Check that the slot is inside the extended table (num_slots > index)
1746 x86opnd_t num_slots = mem_opnd(32, REG0, offsetof(struct RObject, as.heap.numiv));
1747 cmp(cb, num_slots, imm_opnd(ivar_index));
1748 jle_ptr(cb, COUNTED_EXIT(jit, side_exit, getivar_idx_out_of_range));
1751 // Get a pointer to the extended table
1752 x86opnd_t tbl_opnd = mem_opnd(64, REG0, offsetof(struct RObject, as.heap.ivptr));
1753 mov(cb, REG0, tbl_opnd);
1755 // Read the ivar from the extended table
1756 x86opnd_t ivar_opnd = mem_opnd(64, REG0, sizeof(VALUE) * ivar_index);
1757 mov(cb, REG0, ivar_opnd);
1759 // Check that the ivar is not Qundef
1760 cmp(cb, REG0, imm_opnd(Qundef));
1761 mov(cb, REG1, imm_opnd(Qnil));
1762 cmove(cb, REG0, REG1);
1764 // Push the ivar on the stack
1765 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
1766 mov(cb, out_opnd, REG0);
1769 // Jump to next instruction. This allows guard chains to share the same successor.
1770 jit_jump_to_next_insn(jit, ctx);
1771 return YJIT_END_BLOCK;
1774 static codegen_status_t
1775 gen_getinstancevariable(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1777 // Defer compilation so we can specialize on a runtime `self`
1778 if (!jit_at_current_insn(jit)) {
1779 defer_compilation(jit, ctx);
1780 return YJIT_END_BLOCK;
1783 ID ivar_name = (ID)jit_get_arg(jit, 0);
1785 VALUE comptime_val = jit_peek_at_self(jit, ctx);
1786 VALUE comptime_val_klass = CLASS_OF(comptime_val);
1788 // Generate a side exit
1789 uint8_t *side_exit = yjit_side_exit(jit, ctx);
1791 // Guard that the receiver has the same class as the one from compile time.
1792 mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, self));
1794 jit_guard_known_klass(jit, ctx, comptime_val_klass, OPND_SELF, comptime_val, GETIVAR_MAX_DEPTH, side_exit);
1796 return gen_get_ivar(jit, ctx, GETIVAR_MAX_DEPTH, comptime_val, ivar_name, OPND_SELF, side_exit);
1799 void rb_vm_setinstancevariable(const rb_iseq_t *iseq, VALUE obj, ID id, VALUE val, IVC ic);
1801 static codegen_status_t
1802 gen_setinstancevariable(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1804 ID id = (ID)jit_get_arg(jit, 0);
1805 IVC ic = (IVC)jit_get_arg(jit, 1);
1807 // Save the PC and SP because the callee may allocate
1808 // Note that this modifies REG_SP, which is why we do it first
1809 jit_prepare_routine_call(jit, ctx, REG0);
1811 // Get the operands from the stack
1812 x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
1814 // Call rb_vm_setinstancevariable(iseq, obj, id, val, ic);
1815 mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, self));
1816 mov(cb, C_ARG_REGS[3], val_opnd);
1817 mov(cb, C_ARG_REGS[2], imm_opnd(id));
1818 mov(cb, C_ARG_REGS[4], const_ptr_opnd(ic));
1819 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], (VALUE)jit->iseq);
1820 call_ptr(cb, REG0, (void *)rb_vm_setinstancevariable);
1822 return YJIT_KEEP_COMPILING;
1825 bool rb_vm_defined(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, rb_num_t op_type, VALUE obj, VALUE v);
1827 static codegen_status_t
1828 gen_defined(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1830 rb_num_t op_type = (rb_num_t)jit_get_arg(jit, 0);
1831 VALUE obj = (VALUE)jit_get_arg(jit, 1);
1832 VALUE pushval = (VALUE)jit_get_arg(jit, 2);
1834 // Save the PC and SP because the callee may allocate
1835 // Note that this modifies REG_SP, which is why we do it first
1836 jit_prepare_routine_call(jit, ctx, REG0);
1838 // Get the operands from the stack
1839 x86opnd_t v_opnd = ctx_stack_pop(ctx, 1);
1841 // Call vm_defined(ec, reg_cfp, op_type, obj, v)
1842 mov(cb, C_ARG_REGS[0], REG_EC);
1843 mov(cb, C_ARG_REGS[1], REG_CFP);
1844 mov(cb, C_ARG_REGS[2], imm_opnd(op_type));
1845 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[3], (VALUE)obj);
1846 mov(cb, C_ARG_REGS[4], v_opnd);
1847 call_ptr(cb, REG0, (void *)rb_vm_defined);
1849 // if (vm_defined(ec, GET_CFP(), op_type, obj, v)) {
1850 // val = pushval;
1851 // }
1852 jit_mov_gc_ptr(jit, cb, REG1, (VALUE)pushval);
1853 cmp(cb, AL, imm_opnd(0));
1854 mov(cb, RAX, imm_opnd(Qnil));
1855 cmovnz(cb, RAX, REG1);
1857 // Push the return value onto the stack
1858 val_type_t out_type = SPECIAL_CONST_P(pushval)? TYPE_IMM:TYPE_UNKNOWN;
1859 x86opnd_t stack_ret = ctx_stack_push(ctx, out_type);
1860 mov(cb, stack_ret, RAX);
1862 return YJIT_KEEP_COMPILING;
1865 static codegen_status_t
1866 gen_checktype(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1868 enum ruby_value_type type_val = (enum ruby_value_type)jit_get_arg(jit, 0);
1869 // Only three types are emitted by compile.c
1870 if (type_val == T_STRING || type_val == T_ARRAY || type_val == T_HASH) {
1871 val_type_t val_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
1872 x86opnd_t val = ctx_stack_pop(ctx, 1);
1874 x86opnd_t stack_ret;
1876 // Check if we know from type information
1877 if ((type_val == T_STRING && val_type.type == ETYPE_STRING) ||
1878 (type_val == T_ARRAY && val_type.type == ETYPE_ARRAY) ||
1879 (type_val == T_HASH && val_type.type == ETYPE_HASH)) {
1880 // guaranteed type match
1881 stack_ret = ctx_stack_push(ctx, TYPE_TRUE);
1882 mov(cb, stack_ret, imm_opnd(Qtrue));
1883 return YJIT_KEEP_COMPILING;
1885 else if (val_type.is_imm || val_type.type != ETYPE_UNKNOWN) {
1886 // guaranteed not to match T_STRING/T_ARRAY/T_HASH
1887 stack_ret = ctx_stack_push(ctx, TYPE_FALSE);
1888 mov(cb, stack_ret, imm_opnd(Qfalse));
1889 return YJIT_KEEP_COMPILING;
1892 mov(cb, REG0, val);
1893 mov(cb, REG1, imm_opnd(Qfalse));
1895 uint32_t ret = cb_new_label(cb, "ret");
1897 if (!val_type.is_heap) {
1898 // if (SPECIAL_CONST_P(val)) {
1899 // Return Qfalse via REG1 if not on heap
1900 test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK));
1901 jnz_label(cb, ret);
1902 cmp(cb, REG0, imm_opnd(Qnil));
1903 jbe_label(cb, ret);
1906 // Check type on object
1907 mov(cb, REG0, mem_opnd(64, REG0, offsetof(struct RBasic, flags)));
1908 and(cb, REG0, imm_opnd(RUBY_T_MASK));
1909 cmp(cb, REG0, imm_opnd(type_val));
1910 mov(cb, REG0, imm_opnd(Qtrue));
1911 // REG1 contains Qfalse from above
1912 cmove(cb, REG1, REG0);
1914 cb_write_label(cb, ret);
1915 stack_ret = ctx_stack_push(ctx, TYPE_IMM);
1916 mov(cb, stack_ret, REG1);
1917 cb_link_labels(cb);
1919 return YJIT_KEEP_COMPILING;
1921 else {
1922 return YJIT_CANT_COMPILE;
1926 static codegen_status_t
1927 gen_concatstrings(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1929 rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
1931 // Save the PC and SP because we are allocating
1932 jit_prepare_routine_call(jit, ctx, REG0);
1934 x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(sizeof(VALUE) * (uint32_t)n));
1936 // call rb_str_concat_literals(long n, const VALUE *strings);
1937 mov(cb, C_ARG_REGS[0], imm_opnd(n));
1938 lea(cb, C_ARG_REGS[1], values_ptr);
1939 call_ptr(cb, REG0, (void *)rb_str_concat_literals);
1941 ctx_stack_pop(ctx, n);
1942 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
1943 mov(cb, stack_ret, RAX);
1945 return YJIT_KEEP_COMPILING;
1948 static void
1949 guard_two_fixnums(ctx_t *ctx, uint8_t *side_exit)
1951 // Get the stack operand types
1952 val_type_t arg1_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
1953 val_type_t arg0_type = ctx_get_opnd_type(ctx, OPND_STACK(1));
1955 if (arg0_type.is_heap || arg1_type.is_heap) {
1956 jmp_ptr(cb, side_exit);
1957 return;
1960 if (arg0_type.type != ETYPE_FIXNUM && arg0_type.type != ETYPE_UNKNOWN) {
1961 jmp_ptr(cb, side_exit);
1962 return;
1965 if (arg1_type.type != ETYPE_FIXNUM && arg1_type.type != ETYPE_UNKNOWN) {
1966 jmp_ptr(cb, side_exit);
1967 return;
1970 RUBY_ASSERT(!arg0_type.is_heap);
1971 RUBY_ASSERT(!arg1_type.is_heap);
1972 RUBY_ASSERT(arg0_type.type == ETYPE_FIXNUM || arg0_type.type == ETYPE_UNKNOWN);
1973 RUBY_ASSERT(arg1_type.type == ETYPE_FIXNUM || arg1_type.type == ETYPE_UNKNOWN);
1975 // Get stack operands without popping them
1976 x86opnd_t arg1 = ctx_stack_opnd(ctx, 0);
1977 x86opnd_t arg0 = ctx_stack_opnd(ctx, 1);
1979 // If not fixnums, fall back
1980 if (arg0_type.type != ETYPE_FIXNUM) {
1981 ADD_COMMENT(cb, "guard arg0 fixnum");
1982 test(cb, arg0, imm_opnd(RUBY_FIXNUM_FLAG));
1983 jz_ptr(cb, side_exit);
1985 if (arg1_type.type != ETYPE_FIXNUM) {
1986 ADD_COMMENT(cb, "guard arg1 fixnum");
1987 test(cb, arg1, imm_opnd(RUBY_FIXNUM_FLAG));
1988 jz_ptr(cb, side_exit);
1991 // Set stack types in context
1992 ctx_upgrade_opnd_type(ctx, OPND_STACK(0), TYPE_FIXNUM);
1993 ctx_upgrade_opnd_type(ctx, OPND_STACK(1), TYPE_FIXNUM);
1996 // Conditional move operation used by comparison operators
1997 typedef void (*cmov_fn)(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1);
1999 static codegen_status_t
2000 gen_fixnum_cmp(jitstate_t *jit, ctx_t *ctx, cmov_fn cmov_op)
2002 // Defer compilation so we can specialize base on a runtime receiver
2003 if (!jit_at_current_insn(jit)) {
2004 defer_compilation(jit, ctx);
2005 return YJIT_END_BLOCK;
2008 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2009 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2011 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2012 // Create a side-exit to fall back to the interpreter
2013 // Note: we generate the side-exit before popping operands from the stack
2014 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2016 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_LT)) {
2017 return YJIT_CANT_COMPILE;
2020 // Check that both operands are fixnums
2021 guard_two_fixnums(ctx, side_exit);
2023 // Get the operands from the stack
2024 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2025 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2027 // Compare the arguments
2028 xor(cb, REG0_32, REG0_32); // REG0 = Qfalse
2029 mov(cb, REG1, arg0);
2030 cmp(cb, REG1, arg1);
2031 mov(cb, REG1, imm_opnd(Qtrue));
2032 cmov_op(cb, REG0, REG1);
2034 // Push the output on the stack
2035 x86opnd_t dst = ctx_stack_push(ctx, TYPE_UNKNOWN);
2036 mov(cb, dst, REG0);
2038 return YJIT_KEEP_COMPILING;
2040 else {
2041 return gen_opt_send_without_block(jit, ctx, cb);
2045 static codegen_status_t
2046 gen_opt_lt(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2048 return gen_fixnum_cmp(jit, ctx, cmovl);
2051 static codegen_status_t
2052 gen_opt_le(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2054 return gen_fixnum_cmp(jit, ctx, cmovle);
2057 static codegen_status_t
2058 gen_opt_ge(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2060 return gen_fixnum_cmp(jit, ctx, cmovge);
2063 static codegen_status_t
2064 gen_opt_gt(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2066 return gen_fixnum_cmp(jit, ctx, cmovg);
2069 // Implements specialized equality for either two fixnum or two strings
2070 // Returns true if code was generated, otherwise false
2071 static bool
2072 gen_equality_specialized(jitstate_t *jit, ctx_t *ctx, uint8_t *side_exit)
2074 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2075 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2077 x86opnd_t a_opnd = ctx_stack_opnd(ctx, 1);
2078 x86opnd_t b_opnd = ctx_stack_opnd(ctx, 0);
2080 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2081 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_EQ)) {
2082 // if overridden, emit the generic version
2083 return false;
2086 guard_two_fixnums(ctx, side_exit);
2088 mov(cb, REG0, a_opnd);
2089 cmp(cb, REG0, b_opnd);
2091 mov(cb, REG0, imm_opnd(Qfalse));
2092 mov(cb, REG1, imm_opnd(Qtrue));
2093 cmove(cb, REG0, REG1);
2095 // Push the output on the stack
2096 ctx_stack_pop(ctx, 2);
2097 x86opnd_t dst = ctx_stack_push(ctx, TYPE_IMM);
2098 mov(cb, dst, REG0);
2100 return true;
2102 else if (CLASS_OF(comptime_a) == rb_cString &&
2103 CLASS_OF(comptime_b) == rb_cString) {
2104 if (!assume_bop_not_redefined(jit, STRING_REDEFINED_OP_FLAG, BOP_EQ)) {
2105 // if overridden, emit the generic version
2106 return false;
2109 // Load a and b in preparation for call later
2110 mov(cb, C_ARG_REGS[0], a_opnd);
2111 mov(cb, C_ARG_REGS[1], b_opnd);
2113 // Guard that a is a String
2114 mov(cb, REG0, C_ARG_REGS[0]);
2115 jit_guard_known_klass(jit, ctx, rb_cString, OPND_STACK(1), comptime_a, SEND_MAX_DEPTH, side_exit);
2117 uint32_t ret = cb_new_label(cb, "ret");
2119 // If they are equal by identity, return true
2120 cmp(cb, C_ARG_REGS[0], C_ARG_REGS[1]);
2121 mov(cb, RAX, imm_opnd(Qtrue));
2122 je_label(cb, ret);
2124 // Otherwise guard that b is a T_STRING (from type info) or String (from runtime guard)
2125 if (ctx_get_opnd_type(ctx, OPND_STACK(0)).type != ETYPE_STRING) {
2126 mov(cb, REG0, C_ARG_REGS[1]);
2127 // Note: any T_STRING is valid here, but we check for a ::String for simplicity
2128 jit_guard_known_klass(jit, ctx, rb_cString, OPND_STACK(0), comptime_b, SEND_MAX_DEPTH, side_exit);
2131 // Call rb_str_eql_internal(a, b)
2132 call_ptr(cb, REG0, (void *)rb_str_eql_internal);
2134 // Push the output on the stack
2135 cb_write_label(cb, ret);
2136 ctx_stack_pop(ctx, 2);
2137 x86opnd_t dst = ctx_stack_push(ctx, TYPE_IMM);
2138 mov(cb, dst, RAX);
2139 cb_link_labels(cb);
2141 return true;
2143 else {
2144 return false;
2148 static codegen_status_t
2149 gen_opt_eq(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2151 // Defer compilation so we can specialize base on a runtime receiver
2152 if (!jit_at_current_insn(jit)) {
2153 defer_compilation(jit, ctx);
2154 return YJIT_END_BLOCK;
2157 // Create a side-exit to fall back to the interpreter
2158 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2160 if (gen_equality_specialized(jit, ctx, side_exit)) {
2161 jit_jump_to_next_insn(jit, ctx);
2162 return YJIT_END_BLOCK;
2164 else {
2165 return gen_opt_send_without_block(jit, ctx, cb);
2169 static codegen_status_t gen_send_general(jitstate_t *jit, ctx_t *ctx, struct rb_call_data *cd, rb_iseq_t *block);
2171 static codegen_status_t
2172 gen_opt_neq(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2174 // opt_neq is passed two rb_call_data as arguments:
2175 // first for ==, second for !=
2176 struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 1);
2177 return gen_send_general(jit, ctx, cd, NULL);
2180 static codegen_status_t
2181 gen_opt_aref(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2183 struct rb_call_data * cd = (struct rb_call_data *)jit_get_arg(jit, 0);
2184 int32_t argc = (int32_t)vm_ci_argc(cd->ci);
2186 // Only JIT one arg calls like `ary[6]`
2187 if (argc != 1) {
2188 GEN_COUNTER_INC(cb, oaref_argc_not_one);
2189 return YJIT_CANT_COMPILE;
2192 // Defer compilation so we can specialize base on a runtime receiver
2193 if (!jit_at_current_insn(jit)) {
2194 defer_compilation(jit, ctx);
2195 return YJIT_END_BLOCK;
2198 // Remember the context on entry for adding guard chains
2199 const ctx_t starting_context = *ctx;
2201 // Specialize base on compile time values
2202 VALUE comptime_idx = jit_peek_at_stack(jit, ctx, 0);
2203 VALUE comptime_recv = jit_peek_at_stack(jit, ctx, 1);
2205 // Create a side-exit to fall back to the interpreter
2206 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2208 if (CLASS_OF(comptime_recv) == rb_cArray && RB_FIXNUM_P(comptime_idx)) {
2209 if (!assume_bop_not_redefined(jit, ARRAY_REDEFINED_OP_FLAG, BOP_AREF)) {
2210 return YJIT_CANT_COMPILE;
2213 // Pop the stack operands
2214 x86opnd_t idx_opnd = ctx_stack_pop(ctx, 1);
2215 x86opnd_t recv_opnd = ctx_stack_pop(ctx, 1);
2216 mov(cb, REG0, recv_opnd);
2218 // if (SPECIAL_CONST_P(recv)) {
2219 // Bail if receiver is not a heap object
2220 test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK));
2221 jnz_ptr(cb, side_exit);
2222 cmp(cb, REG0, imm_opnd(Qfalse));
2223 je_ptr(cb, side_exit);
2224 cmp(cb, REG0, imm_opnd(Qnil));
2225 je_ptr(cb, side_exit);
2227 // Bail if recv has a class other than ::Array.
2228 // BOP_AREF check above is only good for ::Array.
2229 mov(cb, REG1, mem_opnd(64, REG0, offsetof(struct RBasic, klass)));
2230 mov(cb, REG0, const_ptr_opnd((void *)rb_cArray));
2231 cmp(cb, REG0, REG1);
2232 jit_chain_guard(JCC_JNE, jit, &starting_context, OPT_AREF_MAX_CHAIN_DEPTH, side_exit);
2234 // Bail if idx is not a FIXNUM
2235 mov(cb, REG1, idx_opnd);
2236 test(cb, REG1, imm_opnd(RUBY_FIXNUM_FLAG));
2237 jz_ptr(cb, COUNTED_EXIT(jit, side_exit, oaref_arg_not_fixnum));
2239 // Call VALUE rb_ary_entry_internal(VALUE ary, long offset).
2240 // It never raises or allocates, so we don't need to write to cfp->pc.
2242 mov(cb, RDI, recv_opnd);
2243 sar(cb, REG1, imm_opnd(1)); // Convert fixnum to int
2244 mov(cb, RSI, REG1);
2245 call_ptr(cb, REG0, (void *)rb_ary_entry_internal);
2247 // Push the return value onto the stack
2248 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2249 mov(cb, stack_ret, RAX);
2252 // Jump to next instruction. This allows guard chains to share the same successor.
2253 jit_jump_to_next_insn(jit, ctx);
2254 return YJIT_END_BLOCK;
2256 else if (CLASS_OF(comptime_recv) == rb_cHash) {
2257 if (!assume_bop_not_redefined(jit, HASH_REDEFINED_OP_FLAG, BOP_AREF)) {
2258 return YJIT_CANT_COMPILE;
2261 x86opnd_t key_opnd = ctx_stack_opnd(ctx, 0);
2262 x86opnd_t recv_opnd = ctx_stack_opnd(ctx, 1);
2264 // Guard that the receiver is a hash
2265 mov(cb, REG0, recv_opnd);
2266 jit_guard_known_klass(jit, ctx, rb_cHash, OPND_STACK(1), comptime_recv, OPT_AREF_MAX_CHAIN_DEPTH, side_exit);
2268 // Setup arguments for rb_hash_aref().
2269 mov(cb, C_ARG_REGS[0], REG0);
2270 mov(cb, C_ARG_REGS[1], key_opnd);
2272 // Prepare to call rb_hash_aref(). It might call #hash on the key.
2273 jit_prepare_routine_call(jit, ctx, REG0);
2275 call_ptr(cb, REG0, (void *)rb_hash_aref);
2277 // Pop the key and the receiver
2278 (void)ctx_stack_pop(ctx, 2);
2280 // Push the return value onto the stack
2281 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2282 mov(cb, stack_ret, RAX);
2284 // Jump to next instruction. This allows guard chains to share the same successor.
2285 jit_jump_to_next_insn(jit, ctx);
2286 return YJIT_END_BLOCK;
2288 else {
2289 // General case. Call the [] method.
2290 return gen_opt_send_without_block(jit, ctx, cb);
2294 static codegen_status_t
2295 gen_opt_aset(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2297 // Defer compilation so we can specialize on a runtime `self`
2298 if (!jit_at_current_insn(jit)) {
2299 defer_compilation(jit, ctx);
2300 return YJIT_END_BLOCK;
2303 VALUE comptime_recv = jit_peek_at_stack(jit, ctx, 2);
2304 VALUE comptime_key = jit_peek_at_stack(jit, ctx, 1);
2306 // Get the operands from the stack
2307 x86opnd_t recv = ctx_stack_opnd(ctx, 2);
2308 x86opnd_t key = ctx_stack_opnd(ctx, 1);
2309 x86opnd_t val = ctx_stack_opnd(ctx, 0);
2311 if (CLASS_OF(comptime_recv) == rb_cArray && FIXNUM_P(comptime_key)) {
2312 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2314 // Guard receiver is an Array
2315 mov(cb, REG0, recv);
2316 jit_guard_known_klass(jit, ctx, rb_cArray, OPND_STACK(2), comptime_recv, SEND_MAX_DEPTH, side_exit);
2318 // Guard key is a fixnum
2319 mov(cb, REG0, key);
2320 jit_guard_known_klass(jit, ctx, rb_cInteger, OPND_STACK(1), comptime_key, SEND_MAX_DEPTH, side_exit);
2322 // Call rb_ary_store
2323 mov(cb, C_ARG_REGS[0], recv);
2324 mov(cb, C_ARG_REGS[1], key);
2325 sar(cb, C_ARG_REGS[1], imm_opnd(1)); // FIX2LONG(key)
2326 mov(cb, C_ARG_REGS[2], val);
2328 // We might allocate or raise
2329 jit_prepare_routine_call(jit, ctx, REG0);
2331 call_ptr(cb, REG0, (void *)rb_ary_store);
2333 // rb_ary_store returns void
2334 // stored value should still be on stack
2335 mov(cb, REG0, ctx_stack_opnd(ctx, 0));
2337 // Push the return value onto the stack
2338 ctx_stack_pop(ctx, 3);
2339 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2340 mov(cb, stack_ret, REG0);
2342 jit_jump_to_next_insn(jit, ctx);
2343 return YJIT_END_BLOCK;
2345 else if (CLASS_OF(comptime_recv) == rb_cHash) {
2346 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2348 // Guard receiver is a Hash
2349 mov(cb, REG0, recv);
2350 jit_guard_known_klass(jit, ctx, rb_cHash, OPND_STACK(2), comptime_recv, SEND_MAX_DEPTH, side_exit);
2352 // Call rb_hash_aset
2353 mov(cb, C_ARG_REGS[0], recv);
2354 mov(cb, C_ARG_REGS[1], key);
2355 mov(cb, C_ARG_REGS[2], val);
2357 // We might allocate or raise
2358 jit_prepare_routine_call(jit, ctx, REG0);
2360 call_ptr(cb, REG0, (void *)rb_hash_aset);
2362 // Push the return value onto the stack
2363 ctx_stack_pop(ctx, 3);
2364 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2365 mov(cb, stack_ret, RAX);
2367 jit_jump_to_next_insn(jit, ctx);
2368 return YJIT_END_BLOCK;
2370 else {
2371 return gen_opt_send_without_block(jit, ctx, cb);
2375 static codegen_status_t
2376 gen_opt_and(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2378 // Defer compilation so we can specialize on a runtime `self`
2379 if (!jit_at_current_insn(jit)) {
2380 defer_compilation(jit, ctx);
2381 return YJIT_END_BLOCK;
2384 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2385 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2387 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2388 // Create a side-exit to fall back to the interpreter
2389 // Note: we generate the side-exit before popping operands from the stack
2390 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2392 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_AND)) {
2393 return YJIT_CANT_COMPILE;
2396 // Check that both operands are fixnums
2397 guard_two_fixnums(ctx, side_exit);
2399 // Get the operands and destination from the stack
2400 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2401 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2403 // Do the bitwise and arg0 & arg1
2404 mov(cb, REG0, arg0);
2405 and(cb, REG0, arg1);
2407 // Push the output on the stack
2408 x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
2409 mov(cb, dst, REG0);
2411 return YJIT_KEEP_COMPILING;
2413 else {
2414 // Delegate to send, call the method on the recv
2415 return gen_opt_send_without_block(jit, ctx, cb);
2419 static codegen_status_t
2420 gen_opt_or(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2422 // Defer compilation so we can specialize on a runtime `self`
2423 if (!jit_at_current_insn(jit)) {
2424 defer_compilation(jit, ctx);
2425 return YJIT_END_BLOCK;
2428 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2429 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2431 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2432 // Create a side-exit to fall back to the interpreter
2433 // Note: we generate the side-exit before popping operands from the stack
2434 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2436 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_OR)) {
2437 return YJIT_CANT_COMPILE;
2440 // Check that both operands are fixnums
2441 guard_two_fixnums(ctx, side_exit);
2443 // Get the operands and destination from the stack
2444 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2445 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2447 // Do the bitwise or arg0 | arg1
2448 mov(cb, REG0, arg0);
2449 or(cb, REG0, arg1);
2451 // Push the output on the stack
2452 x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
2453 mov(cb, dst, REG0);
2455 return YJIT_KEEP_COMPILING;
2457 else {
2458 // Delegate to send, call the method on the recv
2459 return gen_opt_send_without_block(jit, ctx, cb);
2463 static codegen_status_t
2464 gen_opt_minus(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2466 // Defer compilation so we can specialize on a runtime `self`
2467 if (!jit_at_current_insn(jit)) {
2468 defer_compilation(jit, ctx);
2469 return YJIT_END_BLOCK;
2472 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2473 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2475 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2476 // Create a side-exit to fall back to the interpreter
2477 // Note: we generate the side-exit before popping operands from the stack
2478 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2480 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_MINUS)) {
2481 return YJIT_CANT_COMPILE;
2484 // Check that both operands are fixnums
2485 guard_two_fixnums(ctx, side_exit);
2487 // Get the operands and destination from the stack
2488 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2489 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2491 // Subtract arg0 - arg1 and test for overflow
2492 mov(cb, REG0, arg0);
2493 sub(cb, REG0, arg1);
2494 jo_ptr(cb, side_exit);
2495 add(cb, REG0, imm_opnd(1));
2497 // Push the output on the stack
2498 x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
2499 mov(cb, dst, REG0);
2501 return YJIT_KEEP_COMPILING;
2503 else {
2504 // Delegate to send, call the method on the recv
2505 return gen_opt_send_without_block(jit, ctx, cb);
2509 static codegen_status_t
2510 gen_opt_plus(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2512 // Defer compilation so we can specialize on a runtime `self`
2513 if (!jit_at_current_insn(jit)) {
2514 defer_compilation(jit, ctx);
2515 return YJIT_END_BLOCK;
2518 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2519 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2521 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2522 // Create a side-exit to fall back to the interpreter
2523 // Note: we generate the side-exit before popping operands from the stack
2524 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2526 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS)) {
2527 return YJIT_CANT_COMPILE;
2530 // Check that both operands are fixnums
2531 guard_two_fixnums(ctx, side_exit);
2533 // Get the operands and destination from the stack
2534 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2535 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2537 // Add arg0 + arg1 and test for overflow
2538 mov(cb, REG0, arg0);
2539 sub(cb, REG0, imm_opnd(1));
2540 add(cb, REG0, arg1);
2541 jo_ptr(cb, side_exit);
2543 // Push the output on the stack
2544 x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
2545 mov(cb, dst, REG0);
2547 return YJIT_KEEP_COMPILING;
2549 else {
2550 // Delegate to send, call the method on the recv
2551 return gen_opt_send_without_block(jit, ctx, cb);
2555 static codegen_status_t
2556 gen_opt_mult(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2558 // Delegate to send, call the method on the recv
2559 return gen_opt_send_without_block(jit, ctx, cb);
2562 static codegen_status_t
2563 gen_opt_div(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2565 // Delegate to send, call the method on the recv
2566 return gen_opt_send_without_block(jit, ctx, cb);
2569 VALUE rb_vm_opt_mod(VALUE recv, VALUE obj);
2571 static codegen_status_t
2572 gen_opt_mod(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2574 // Save the PC and SP because the callee may allocate bignums
2575 // Note that this modifies REG_SP, which is why we do it first
2576 jit_prepare_routine_call(jit, ctx, REG0);
2578 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2580 // Get the operands from the stack
2581 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2582 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2584 // Call rb_vm_opt_mod(VALUE recv, VALUE obj)
2585 mov(cb, C_ARG_REGS[0], arg0);
2586 mov(cb, C_ARG_REGS[1], arg1);
2587 call_ptr(cb, REG0, (void *)rb_vm_opt_mod);
2589 // If val == Qundef, bail to do a method call
2590 cmp(cb, RAX, imm_opnd(Qundef));
2591 je_ptr(cb, side_exit);
2593 // Push the return value onto the stack
2594 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2595 mov(cb, stack_ret, RAX);
2597 return YJIT_KEEP_COMPILING;
2600 static codegen_status_t
2601 gen_opt_ltlt(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2603 // Delegate to send, call the method on the recv
2604 return gen_opt_send_without_block(jit, ctx, cb);
2607 static codegen_status_t
2608 gen_opt_nil_p(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2610 // Delegate to send, call the method on the recv
2611 return gen_opt_send_without_block(jit, ctx, cb);
2614 static codegen_status_t
2615 gen_opt_empty_p(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2617 // Delegate to send, call the method on the recv
2618 return gen_opt_send_without_block(jit, ctx, cb);
2621 static codegen_status_t
2622 gen_opt_str_freeze(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2624 if (!assume_bop_not_redefined(jit, STRING_REDEFINED_OP_FLAG, BOP_FREEZE)) {
2625 return YJIT_CANT_COMPILE;
2628 VALUE str = jit_get_arg(jit, 0);
2629 jit_mov_gc_ptr(jit, cb, REG0, str);
2631 // Push the return value onto the stack
2632 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
2633 mov(cb, stack_ret, REG0);
2635 return YJIT_KEEP_COMPILING;
2638 static codegen_status_t
2639 gen_opt_str_uminus(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2641 if (!assume_bop_not_redefined(jit, STRING_REDEFINED_OP_FLAG, BOP_UMINUS)) {
2642 return YJIT_CANT_COMPILE;
2645 VALUE str = jit_get_arg(jit, 0);
2646 jit_mov_gc_ptr(jit, cb, REG0, str);
2648 // Push the return value onto the stack
2649 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
2650 mov(cb, stack_ret, REG0);
2652 return YJIT_KEEP_COMPILING;
2655 static codegen_status_t
2656 gen_opt_not(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2658 return gen_opt_send_without_block(jit, ctx, cb);
2661 static codegen_status_t
2662 gen_opt_size(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2664 return gen_opt_send_without_block(jit, ctx, cb);
2667 static codegen_status_t
2668 gen_opt_length(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2670 return gen_opt_send_without_block(jit, ctx, cb);
2673 static codegen_status_t
2674 gen_opt_regexpmatch2(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2676 return gen_opt_send_without_block(jit, ctx, cb);
2679 static codegen_status_t
2680 gen_opt_case_dispatch(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2682 // Normally this instruction would lookup the key in a hash and jump to an
2683 // offset based on that.
2684 // Instead we can take the fallback case and continue with the next
2685 // instruction.
2686 // We'd hope that our jitted code will be sufficiently fast without the
2687 // hash lookup, at least for small hashes, but it's worth revisiting this
2688 // assumption in the future.
2690 ctx_stack_pop(ctx, 1);
2692 return YJIT_KEEP_COMPILING; // continue with the next instruction
2695 static void
2696 gen_branchif_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
2698 switch (shape) {
2699 case SHAPE_NEXT0:
2700 jz_ptr(cb, target1);
2701 break;
2703 case SHAPE_NEXT1:
2704 jnz_ptr(cb, target0);
2705 break;
2707 case SHAPE_DEFAULT:
2708 jnz_ptr(cb, target0);
2709 jmp_ptr(cb, target1);
2710 break;
2714 static codegen_status_t
2715 gen_branchif(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2717 int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
2719 // Check for interrupts, but only on backward branches that may create loops
2720 if (jump_offset < 0) {
2721 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2722 yjit_check_ints(cb, side_exit);
2725 // Test if any bit (outside of the Qnil bit) is on
2726 // RUBY_Qfalse /* ...0000 0000 */
2727 // RUBY_Qnil /* ...0000 1000 */
2728 x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
2729 test(cb, val_opnd, imm_opnd(~Qnil));
2731 // Get the branch target instruction offsets
2732 uint32_t next_idx = jit_next_insn_idx(jit);
2733 uint32_t jump_idx = next_idx + jump_offset;
2734 blockid_t next_block = { jit->iseq, next_idx };
2735 blockid_t jump_block = { jit->iseq, jump_idx };
2737 // Generate the branch instructions
2738 gen_branch(
2739 jit,
2740 ctx,
2741 jump_block,
2742 ctx,
2743 next_block,
2744 ctx,
2745 gen_branchif_branch
2748 return YJIT_END_BLOCK;
2751 static void
2752 gen_branchunless_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
2754 switch (shape) {
2755 case SHAPE_NEXT0:
2756 jnz_ptr(cb, target1);
2757 break;
2759 case SHAPE_NEXT1:
2760 jz_ptr(cb, target0);
2761 break;
2763 case SHAPE_DEFAULT:
2764 jz_ptr(cb, target0);
2765 jmp_ptr(cb, target1);
2766 break;
2770 static codegen_status_t
2771 gen_branchunless(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2773 int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
2775 // Check for interrupts, but only on backward branches that may create loops
2776 if (jump_offset < 0) {
2777 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2778 yjit_check_ints(cb, side_exit);
2781 // Test if any bit (outside of the Qnil bit) is on
2782 // RUBY_Qfalse /* ...0000 0000 */
2783 // RUBY_Qnil /* ...0000 1000 */
2784 x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
2785 test(cb, val_opnd, imm_opnd(~Qnil));
2787 // Get the branch target instruction offsets
2788 uint32_t next_idx = jit_next_insn_idx(jit);
2789 uint32_t jump_idx = next_idx + jump_offset;
2790 blockid_t next_block = { jit->iseq, next_idx };
2791 blockid_t jump_block = { jit->iseq, jump_idx };
2793 // Generate the branch instructions
2794 gen_branch(
2795 jit,
2796 ctx,
2797 jump_block,
2798 ctx,
2799 next_block,
2800 ctx,
2801 gen_branchunless_branch
2804 return YJIT_END_BLOCK;
2807 static void
2808 gen_branchnil_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
2810 switch (shape) {
2811 case SHAPE_NEXT0:
2812 jne_ptr(cb, target1);
2813 break;
2815 case SHAPE_NEXT1:
2816 je_ptr(cb, target0);
2817 break;
2819 case SHAPE_DEFAULT:
2820 je_ptr(cb, target0);
2821 jmp_ptr(cb, target1);
2822 break;
2826 static codegen_status_t
2827 gen_branchnil(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2829 int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
2831 // Check for interrupts, but only on backward branches that may create loops
2832 if (jump_offset < 0) {
2833 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2834 yjit_check_ints(cb, side_exit);
2837 // Test if the value is Qnil
2838 // RUBY_Qnil /* ...0000 1000 */
2839 x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
2840 cmp(cb, val_opnd, imm_opnd(Qnil));
2842 // Get the branch target instruction offsets
2843 uint32_t next_idx = jit_next_insn_idx(jit);
2844 uint32_t jump_idx = next_idx + jump_offset;
2845 blockid_t next_block = { jit->iseq, next_idx };
2846 blockid_t jump_block = { jit->iseq, jump_idx };
2848 // Generate the branch instructions
2849 gen_branch(
2850 jit,
2851 ctx,
2852 jump_block,
2853 ctx,
2854 next_block,
2855 ctx,
2856 gen_branchnil_branch
2859 return YJIT_END_BLOCK;
2862 static codegen_status_t
2863 gen_jump(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2865 int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
2867 // Check for interrupts, but only on backward branches that may create loops
2868 if (jump_offset < 0) {
2869 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2870 yjit_check_ints(cb, side_exit);
2873 // Get the branch target instruction offsets
2874 uint32_t jump_idx = jit_next_insn_idx(jit) + jump_offset;
2875 blockid_t jump_block = { jit->iseq, jump_idx };
2877 // Generate the jump instruction
2878 gen_direct_jump(
2879 jit,
2880 ctx,
2881 jump_block
2884 return YJIT_END_BLOCK;
2888 Guard that self or a stack operand has the same class as `known_klass`, using
2889 `sample_instance` to speculate about the shape of the runtime value.
2890 FIXNUM and on-heap integers are treated as if they have distinct classes, and
2891 the guard generated for one will fail for the other.
2893 Recompile as contingency if possible, or take side exit a last resort.
2895 static bool
2896 jit_guard_known_klass(jitstate_t *jit, ctx_t *ctx, VALUE known_klass, insn_opnd_t insn_opnd, VALUE sample_instance, const int max_chain_depth, uint8_t *side_exit)
2898 val_type_t val_type = ctx_get_opnd_type(ctx, insn_opnd);
2900 if (known_klass == rb_cNilClass) {
2901 RUBY_ASSERT(!val_type.is_heap);
2902 if (val_type.type != ETYPE_NIL) {
2903 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2905 ADD_COMMENT(cb, "guard object is nil");
2906 cmp(cb, REG0, imm_opnd(Qnil));
2907 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2909 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_NIL);
2912 else if (known_klass == rb_cTrueClass) {
2913 RUBY_ASSERT(!val_type.is_heap);
2914 if (val_type.type != ETYPE_TRUE) {
2915 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2917 ADD_COMMENT(cb, "guard object is true");
2918 cmp(cb, REG0, imm_opnd(Qtrue));
2919 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2921 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_TRUE);
2924 else if (known_klass == rb_cFalseClass) {
2925 RUBY_ASSERT(!val_type.is_heap);
2926 if (val_type.type != ETYPE_FALSE) {
2927 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2929 ADD_COMMENT(cb, "guard object is false");
2930 STATIC_ASSERT(qfalse_is_zero, Qfalse == 0);
2931 test(cb, REG0, REG0);
2932 jit_chain_guard(JCC_JNZ, jit, ctx, max_chain_depth, side_exit);
2934 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_FALSE);
2937 else if (known_klass == rb_cInteger && FIXNUM_P(sample_instance)) {
2938 RUBY_ASSERT(!val_type.is_heap);
2939 // We will guard fixnum and bignum as though they were separate classes
2940 // BIGNUM can be handled by the general else case below
2941 if (val_type.type != ETYPE_FIXNUM || !val_type.is_imm) {
2942 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2944 ADD_COMMENT(cb, "guard object is fixnum");
2945 test(cb, REG0, imm_opnd(RUBY_FIXNUM_FLAG));
2946 jit_chain_guard(JCC_JZ, jit, ctx, max_chain_depth, side_exit);
2947 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_FIXNUM);
2950 else if (known_klass == rb_cSymbol && STATIC_SYM_P(sample_instance)) {
2951 RUBY_ASSERT(!val_type.is_heap);
2952 // We will guard STATIC vs DYNAMIC as though they were separate classes
2953 // DYNAMIC symbols can be handled by the general else case below
2954 if (val_type.type != ETYPE_SYMBOL || !val_type.is_imm) {
2955 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2957 ADD_COMMENT(cb, "guard object is static symbol");
2958 STATIC_ASSERT(special_shift_is_8, RUBY_SPECIAL_SHIFT == 8);
2959 cmp(cb, REG0_8, imm_opnd(RUBY_SYMBOL_FLAG));
2960 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2961 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_STATIC_SYMBOL);
2964 else if (known_klass == rb_cFloat && FLONUM_P(sample_instance)) {
2965 RUBY_ASSERT(!val_type.is_heap);
2966 if (val_type.type != ETYPE_FLONUM || !val_type.is_imm) {
2967 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2969 // We will guard flonum vs heap float as though they were separate classes
2970 ADD_COMMENT(cb, "guard object is flonum");
2971 mov(cb, REG1, REG0);
2972 and(cb, REG1, imm_opnd(RUBY_FLONUM_MASK));
2973 cmp(cb, REG1, imm_opnd(RUBY_FLONUM_FLAG));
2974 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2975 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_FLONUM);
2978 else if (FL_TEST(known_klass, FL_SINGLETON) && sample_instance == rb_attr_get(known_klass, id__attached__)) {
2979 // Singleton classes are attached to one specific object, so we can
2980 // avoid one memory access (and potentially the is_heap check) by
2981 // looking for the expected object directly.
2982 // Note that in case the sample instance has a singleton class that
2983 // doesn't attach to the sample instance, it means the sample instance
2984 // has an empty singleton class that hasn't been materialized yet. In
2985 // this case, comparing against the sample instance doesn't guarantee
2986 // that its singleton class is empty, so we can't avoid the memory
2987 // access. As an example, `Object.new.singleton_class` is an object in
2988 // this situation.
2989 ADD_COMMENT(cb, "guard known object with singleton class");
2990 // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the object.
2991 jit_mov_gc_ptr(jit, cb, REG1, sample_instance);
2992 cmp(cb, REG0, REG1);
2993 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2995 else {
2996 RUBY_ASSERT(!val_type.is_imm);
2998 // Check that the receiver is a heap object
2999 // Note: if we get here, the class doesn't have immediate instances.
3000 if (!val_type.is_heap) {
3001 ADD_COMMENT(cb, "guard not immediate");
3002 RUBY_ASSERT(Qfalse < Qnil);
3003 test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK));
3004 jit_chain_guard(JCC_JNZ, jit, ctx, max_chain_depth, side_exit);
3005 cmp(cb, REG0, imm_opnd(Qnil));
3006 jit_chain_guard(JCC_JBE, jit, ctx, max_chain_depth, side_exit);
3008 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_HEAP);
3011 x86opnd_t klass_opnd = mem_opnd(64, REG0, offsetof(struct RBasic, klass));
3013 // Bail if receiver class is different from known_klass
3014 // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the class.
3015 ADD_COMMENT(cb, "guard known class");
3016 jit_mov_gc_ptr(jit, cb, REG1, known_klass);
3017 cmp(cb, klass_opnd, REG1);
3018 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
3021 return true;
3024 // Generate ancestry guard for protected callee.
3025 // Calls to protected callees only go through when self.is_a?(klass_that_defines_the_callee).
3026 static void
3027 jit_protected_callee_ancestry_guard(jitstate_t *jit, codeblock_t *cb, const rb_callable_method_entry_t *cme, uint8_t *side_exit)
3029 // See vm_call_method().
3030 mov(cb, C_ARG_REGS[0], member_opnd(REG_CFP, rb_control_frame_t, self));
3031 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], cme->defined_class);
3032 // Note: PC isn't written to current control frame as rb_is_kind_of() shouldn't raise.
3033 // VALUE rb_obj_is_kind_of(VALUE obj, VALUE klass);
3034 call_ptr(cb, REG0, (void *)&rb_obj_is_kind_of);
3035 test(cb, RAX, RAX);
3036 jz_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_protected_check_failed));
3039 // Return true when the codegen function generates code.
3040 // known_recv_klass is non-NULL when the caller has used jit_guard_known_klass().
3041 // See yjit_reg_method().
3042 typedef bool (*method_codegen_t)(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass);
3044 // Register a specialized codegen function for a particular method. Note that
3045 // the if the function returns true, the code it generates runs without a
3046 // control frame and without interrupt checks. To avoid creating observable
3047 // behavior changes, the codegen function should only target simple code paths
3048 // that do not allocate and do not make method calls.
3049 static void
3050 yjit_reg_method(VALUE klass, const char *mid_str, method_codegen_t gen_fn)
3052 ID mid = rb_intern(mid_str);
3053 const rb_method_entry_t *me = rb_method_entry_at(klass, mid);
3055 if (!me) {
3056 rb_bug("undefined optimized method: %s", rb_id2name(mid));
3059 // For now, only cfuncs are supported
3060 RUBY_ASSERT(me && me->def);
3061 RUBY_ASSERT(me->def->type == VM_METHOD_TYPE_CFUNC);
3063 st_insert(yjit_method_codegen_table, (st_data_t)me->def->method_serial, (st_data_t)gen_fn);
3066 // Codegen for rb_obj_not().
3067 // Note, caller is responsible for generating all the right guards, including
3068 // arity guards.
3069 static bool
3070 jit_rb_obj_not(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3072 const val_type_t recv_opnd = ctx_get_opnd_type(ctx, OPND_STACK(0));
3074 if (recv_opnd.type == ETYPE_NIL || recv_opnd.type == ETYPE_FALSE) {
3075 ADD_COMMENT(cb, "rb_obj_not(nil_or_false)");
3076 ctx_stack_pop(ctx, 1);
3077 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_TRUE);
3078 mov(cb, out_opnd, imm_opnd(Qtrue));
3080 else if (recv_opnd.is_heap || recv_opnd.type != ETYPE_UNKNOWN) {
3081 // Note: recv_opnd.type != ETYPE_NIL && recv_opnd.type != ETYPE_FALSE.
3082 ADD_COMMENT(cb, "rb_obj_not(truthy)");
3083 ctx_stack_pop(ctx, 1);
3084 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_FALSE);
3085 mov(cb, out_opnd, imm_opnd(Qfalse));
3087 else {
3088 // jit_guard_known_klass() already ran on the receiver which should
3089 // have deduced deduced the type of the receiver. This case should be
3090 // rare if not unreachable.
3091 return false;
3093 return true;
3096 // Codegen for rb_true()
3097 static bool
3098 jit_rb_true(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3100 ADD_COMMENT(cb, "nil? == true");
3101 ctx_stack_pop(ctx, 1);
3102 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_TRUE);
3103 mov(cb, stack_ret, imm_opnd(Qtrue));
3104 return true;
3107 // Codegen for rb_false()
3108 static bool
3109 jit_rb_false(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3111 ADD_COMMENT(cb, "nil? == false");
3112 ctx_stack_pop(ctx, 1);
3113 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_FALSE);
3114 mov(cb, stack_ret, imm_opnd(Qfalse));
3115 return true;
3118 // Codegen for rb_obj_equal()
3119 // object identity comparison
3120 static bool
3121 jit_rb_obj_equal(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3123 ADD_COMMENT(cb, "equal?");
3124 x86opnd_t obj1 = ctx_stack_pop(ctx, 1);
3125 x86opnd_t obj2 = ctx_stack_pop(ctx, 1);
3127 mov(cb, REG0, obj1);
3128 cmp(cb, REG0, obj2);
3129 mov(cb, REG0, imm_opnd(Qtrue));
3130 mov(cb, REG1, imm_opnd(Qfalse));
3131 cmovne(cb, REG0, REG1);
3133 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_IMM);
3134 mov(cb, stack_ret, REG0);
3135 return true;
3138 static VALUE
3139 yjit_str_bytesize(VALUE str)
3141 return LONG2NUM(RSTRING_LEN(str));
3144 static bool
3145 jit_rb_str_bytesize(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3147 ADD_COMMENT(cb, "String#bytesize");
3149 x86opnd_t recv = ctx_stack_pop(ctx, 1);
3150 mov(cb, C_ARG_REGS[0], recv);
3151 call_ptr(cb, REG0, (void *)&yjit_str_bytesize);
3153 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_FIXNUM);
3154 mov(cb, out_opnd, RAX);
3156 return true;
3159 // Codegen for rb_str_to_s()
3160 // When String#to_s is called on a String instance, the method returns self and
3161 // most of the overhead comes from setting up the method call. We observed that
3162 // this situation happens a lot in some workloads.
3163 static bool
3164 jit_rb_str_to_s(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *recv_known_klass)
3166 if (recv_known_klass && *recv_known_klass == rb_cString) {
3167 ADD_COMMENT(cb, "to_s on plain string");
3168 // The method returns the receiver, which is already on the stack.
3169 // No stack movement.
3170 return true;
3172 return false;
3175 static bool
3176 jit_thread_s_current(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *recv_known_klass)
3178 ADD_COMMENT(cb, "Thread.current");
3179 ctx_stack_pop(ctx, 1);
3181 // ec->thread_ptr
3182 mov(cb, REG0, member_opnd(REG_EC, rb_execution_context_t, thread_ptr));
3184 // thread->self
3185 mov(cb, REG0, member_opnd(REG0, rb_thread_t, self));
3187 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HEAP);
3188 mov(cb, stack_ret, REG0);
3189 return true;
3192 // Check if we know how to codegen for a particular cfunc method
3193 static method_codegen_t
3194 lookup_cfunc_codegen(const rb_method_definition_t *def)
3196 method_codegen_t gen_fn;
3197 if (st_lookup(yjit_method_codegen_table, def->method_serial, (st_data_t *)&gen_fn)) {
3198 return gen_fn;
3200 return NULL;
3203 // Is anyone listening for :c_call and :c_return event currently?
3204 static bool
3205 c_method_tracing_currently_enabled(const jitstate_t *jit)
3207 rb_event_flag_t tracing_events;
3208 if (rb_multi_ractor_p()) {
3209 tracing_events = ruby_vm_event_enabled_global_flags;
3211 else {
3212 // At the time of writing, events are never removed from
3213 // ruby_vm_event_enabled_global_flags so always checking using it would
3214 // mean we don't compile even after tracing is disabled.
3215 tracing_events = rb_ec_ractor_hooks(jit->ec)->events;
3218 return tracing_events & (RUBY_EVENT_C_CALL | RUBY_EVENT_C_RETURN);
3221 static codegen_status_t
3222 gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *recv_known_klass)
3224 const rb_method_cfunc_t *cfunc = UNALIGNED_MEMBER_PTR(cme->def, body.cfunc);
3226 // If the function expects a Ruby array of arguments
3227 if (cfunc->argc < 0 && cfunc->argc != -1) {
3228 GEN_COUNTER_INC(cb, send_cfunc_ruby_array_varg);
3229 return YJIT_CANT_COMPILE;
3232 // If the argument count doesn't match
3233 if (cfunc->argc >= 0 && cfunc->argc != argc) {
3234 GEN_COUNTER_INC(cb, send_cfunc_argc_mismatch);
3235 return YJIT_CANT_COMPILE;
3238 // Don't JIT functions that need C stack arguments for now
3239 if (cfunc->argc >= 0 && argc + 1 > NUM_C_ARG_REGS) {
3240 GEN_COUNTER_INC(cb, send_cfunc_toomany_args);
3241 return YJIT_CANT_COMPILE;
3244 if (c_method_tracing_currently_enabled(jit)) {
3245 // Don't JIT if tracing c_call or c_return
3246 GEN_COUNTER_INC(cb, send_cfunc_tracing);
3247 return YJIT_CANT_COMPILE;
3250 // Delegate to codegen for C methods if we have it.
3252 method_codegen_t known_cfunc_codegen;
3253 if ((known_cfunc_codegen = lookup_cfunc_codegen(cme->def))) {
3254 if (known_cfunc_codegen(jit, ctx, ci, cme, block, argc, recv_known_klass)) {
3255 // cfunc codegen generated code. Terminate the block so
3256 // there isn't multiple calls in the same block.
3257 jit_jump_to_next_insn(jit, ctx);
3258 return YJIT_END_BLOCK;
3263 // Callee method ID
3264 //ID mid = vm_ci_mid(ci);
3265 //printf("JITting call to C function \"%s\", argc: %lu\n", rb_id2name(mid), argc);
3266 //print_str(cb, "");
3267 //print_str(cb, "calling CFUNC:");
3268 //print_str(cb, rb_id2name(mid));
3269 //print_str(cb, "recv");
3270 //print_ptr(cb, recv);
3272 // Create a side-exit to fall back to the interpreter
3273 uint8_t *side_exit = yjit_side_exit(jit, ctx);
3275 // Check for interrupts
3276 yjit_check_ints(cb, side_exit);
3278 // Stack overflow check
3279 // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
3280 // REG_CFP <= REG_SP + 4 * sizeof(VALUE) + sizeof(rb_control_frame_t)
3281 lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 4 + 2 * sizeof(rb_control_frame_t)));
3282 cmp(cb, REG_CFP, REG0);
3283 jle_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_cf_overflow));
3285 // Points to the receiver operand on the stack
3286 x86opnd_t recv = ctx_stack_opnd(ctx, argc);
3288 // Store incremented PC into current control frame in case callee raises.
3289 jit_save_pc(jit, REG0);
3291 if (block) {
3292 // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
3293 // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases
3294 // with cfp->block_code.
3295 jit_mov_gc_ptr(jit, cb, REG0, (VALUE)block);
3296 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), REG0);
3299 // Increment the stack pointer by 3 (in the callee)
3300 // sp += 3
3301 lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 3));
3303 // Write method entry at sp[-3]
3304 // sp[-3] = me;
3305 // Put compile time cme into REG1. It's assumed to be valid because we are notified when
3306 // any cme we depend on become outdated. See rb_yjit_method_lookup_change().
3307 jit_mov_gc_ptr(jit, cb, REG1, (VALUE)cme);
3308 mov(cb, mem_opnd(64, REG0, 8 * -3), REG1);
3310 // Write block handler at sp[-2]
3311 // sp[-2] = block_handler;
3312 if (block) {
3313 // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
3314 lea(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, self));
3315 or(cb, REG1, imm_opnd(1));
3316 mov(cb, mem_opnd(64, REG0, 8 * -2), REG1);
3318 else {
3319 mov(cb, mem_opnd(64, REG0, 8 * -2), imm_opnd(VM_BLOCK_HANDLER_NONE));
3322 // Write env flags at sp[-1]
3323 // sp[-1] = frame_type;
3324 uint64_t frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL;
3325 mov(cb, mem_opnd(64, REG0, 8 * -1), imm_opnd(frame_type));
3327 // Allocate a new CFP (ec->cfp--)
3328 sub(
3330 member_opnd(REG_EC, rb_execution_context_t, cfp),
3331 imm_opnd(sizeof(rb_control_frame_t))
3334 // Setup the new frame
3335 // *cfp = (const struct rb_control_frame_struct) {
3336 // .pc = 0,
3337 // .sp = sp,
3338 // .iseq = 0,
3339 // .self = recv,
3340 // .ep = sp - 1,
3341 // .block_code = 0,
3342 // .__bp__ = sp,
3343 // };
3344 mov(cb, REG1, member_opnd(REG_EC, rb_execution_context_t, cfp));
3345 mov(cb, member_opnd(REG1, rb_control_frame_t, pc), imm_opnd(0));
3346 mov(cb, member_opnd(REG1, rb_control_frame_t, sp), REG0);
3347 mov(cb, member_opnd(REG1, rb_control_frame_t, iseq), imm_opnd(0));
3348 mov(cb, member_opnd(REG1, rb_control_frame_t, block_code), imm_opnd(0));
3349 mov(cb, member_opnd(REG1, rb_control_frame_t, __bp__), REG0);
3350 sub(cb, REG0, imm_opnd(sizeof(VALUE)));
3351 mov(cb, member_opnd(REG1, rb_control_frame_t, ep), REG0);
3352 mov(cb, REG0, recv);
3353 mov(cb, member_opnd(REG1, rb_control_frame_t, self), REG0);
3355 // Verify that we are calling the right function
3356 if (YJIT_CHECK_MODE > 0) {
3357 // Call check_cfunc_dispatch
3358 mov(cb, C_ARG_REGS[0], recv);
3359 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], (VALUE)ci);
3360 mov(cb, C_ARG_REGS[2], const_ptr_opnd((void *)cfunc->func));
3361 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[3], (VALUE)cme);
3362 call_ptr(cb, REG0, (void *)&check_cfunc_dispatch);
3365 // Copy SP into RAX because REG_SP will get overwritten
3366 lea(cb, RAX, ctx_sp_opnd(ctx, 0));
3368 // Pop the C function arguments from the stack (in the caller)
3369 ctx_stack_pop(ctx, argc + 1);
3371 // Write interpreter SP into CFP.
3372 // Needed in case the callee yields to the block.
3373 jit_save_sp(jit, ctx);
3375 // Non-variadic method
3376 if (cfunc->argc >= 0) {
3377 // Copy the arguments from the stack to the C argument registers
3378 // self is the 0th argument and is at index argc from the stack top
3379 for (int32_t i = 0; i < argc + 1; ++i)
3381 x86opnd_t stack_opnd = mem_opnd(64, RAX, -(argc + 1 - i) * SIZEOF_VALUE);
3382 x86opnd_t c_arg_reg = C_ARG_REGS[i];
3383 mov(cb, c_arg_reg, stack_opnd);
3386 // Variadic method
3387 if (cfunc->argc == -1) {
3388 // The method gets a pointer to the first argument
3389 // rb_f_puts(int argc, VALUE *argv, VALUE recv)
3390 mov(cb, C_ARG_REGS[0], imm_opnd(argc));
3391 lea(cb, C_ARG_REGS[1], mem_opnd(64, RAX, -(argc) * SIZEOF_VALUE));
3392 mov(cb, C_ARG_REGS[2], mem_opnd(64, RAX, -(argc + 1) * SIZEOF_VALUE));
3395 // Call the C function
3396 // VALUE ret = (cfunc->func)(recv, argv[0], argv[1]);
3397 // cfunc comes from compile-time cme->def, which we assume to be stable.
3398 // Invalidation logic is in rb_yjit_method_lookup_change()
3399 call_ptr(cb, REG0, (void*)cfunc->func);
3401 // Record code position for TracePoint patching. See full_cfunc_return().
3402 record_global_inval_patch(cb, outline_full_cfunc_return_pos);
3404 // Push the return value on the Ruby stack
3405 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
3406 mov(cb, stack_ret, RAX);
3408 // Pop the stack frame (ec->cfp++)
3409 add(
3411 member_opnd(REG_EC, rb_execution_context_t, cfp),
3412 imm_opnd(sizeof(rb_control_frame_t))
3415 // cfunc calls may corrupt types
3416 ctx_clear_local_types(ctx);
3418 // Note: the return block of gen_send_iseq() has ctx->sp_offset == 1
3419 // which allows for sharing the same successor.
3421 // Jump (fall through) to the call continuation block
3422 // We do this to end the current block after the call
3423 jit_jump_to_next_insn(jit, ctx);
3424 return YJIT_END_BLOCK;
3427 static void
3428 gen_return_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
3430 switch (shape) {
3431 case SHAPE_NEXT0:
3432 case SHAPE_NEXT1:
3433 RUBY_ASSERT(false);
3434 break;
3436 case SHAPE_DEFAULT:
3437 mov(cb, REG0, const_ptr_opnd(target0));
3438 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, jit_return), REG0);
3439 break;
3443 // If true, the iseq is leaf and it can be replaced by a single C call.
3444 static bool
3445 rb_leaf_invokebuiltin_iseq_p(const rb_iseq_t *iseq)
3447 unsigned int invokebuiltin_len = insn_len(BIN(opt_invokebuiltin_delegate_leave));
3448 unsigned int leave_len = insn_len(BIN(leave));
3450 return (iseq->body->iseq_size == (invokebuiltin_len + leave_len) &&
3451 rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[0]) == BIN(opt_invokebuiltin_delegate_leave) &&
3452 rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[invokebuiltin_len]) == BIN(leave) &&
3453 iseq->body->builtin_inline_p
3457 // Return an rb_builtin_function if the iseq contains only that leaf builtin function.
3458 static const struct rb_builtin_function*
3459 rb_leaf_builtin_function(const rb_iseq_t *iseq)
3461 if (!rb_leaf_invokebuiltin_iseq_p(iseq))
3462 return NULL;
3463 return (const struct rb_builtin_function *)iseq->body->iseq_encoded[1];
3466 static codegen_status_t
3467 gen_send_iseq(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, int32_t argc)
3469 const rb_iseq_t *iseq = def_iseq_ptr(cme->def);
3471 // When you have keyword arguments, there is an extra object that gets
3472 // placed on the stack the represents a bitmap of the keywords that were not
3473 // specified at the call site. We need to keep track of the fact that this
3474 // value is present on the stack in order to properly set up the callee's
3475 // stack pointer.
3476 const bool doing_kw_call = iseq->body->param.flags.has_kw;
3477 const bool supplying_kws = vm_ci_flag(ci) & VM_CALL_KWARG;
3479 if (vm_ci_flag(ci) & VM_CALL_TAILCALL) {
3480 // We can't handle tailcalls
3481 GEN_COUNTER_INC(cb, send_iseq_tailcall);
3482 return YJIT_CANT_COMPILE;
3485 // No support for callees with these parameters yet as they require allocation
3486 // or complex handling.
3487 if (iseq->body->param.flags.has_rest ||
3488 iseq->body->param.flags.has_post ||
3489 iseq->body->param.flags.has_kwrest) {
3490 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3491 return YJIT_CANT_COMPILE;
3494 // If we have keyword arguments being passed to a callee that only takes
3495 // positionals, then we need to allocate a hash. For now we're going to
3496 // call that too complex and bail.
3497 if (supplying_kws && !iseq->body->param.flags.has_kw) {
3498 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3499 return YJIT_CANT_COMPILE;
3502 // If we have a method accepting no kwargs (**nil), exit if we have passed
3503 // it any kwargs.
3504 if (supplying_kws && iseq->body->param.flags.accepts_no_kwarg) {
3505 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3506 return YJIT_CANT_COMPILE;
3509 // For computing number of locals to setup for the callee
3510 int num_params = iseq->body->param.size;
3512 // Block parameter handling. This mirrors setup_parameters_complex().
3513 if (iseq->body->param.flags.has_block) {
3514 if (iseq->body->local_iseq == iseq) {
3515 // Block argument is passed through EP and not setup as a local in
3516 // the callee.
3517 num_params--;
3519 else {
3520 // In this case (param.flags.has_block && local_iseq != iseq),
3521 // the block argument is setup as a local variable and requires
3522 // materialization (allocation). Bail.
3523 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3524 return YJIT_CANT_COMPILE;
3528 uint32_t start_pc_offset = 0;
3530 const int required_num = iseq->body->param.lead_num;
3532 // This struct represents the metadata about the caller-specified
3533 // keyword arguments.
3534 const struct rb_callinfo_kwarg *kw_arg = vm_ci_kwarg(ci);
3535 const int kw_arg_num = kw_arg ? kw_arg->keyword_len : 0;
3537 // Arity handling and optional parameter setup
3538 const int opts_filled = argc - required_num - kw_arg_num;
3539 const int opt_num = iseq->body->param.opt_num;
3540 const int opts_missing = opt_num - opts_filled;
3542 if (opts_filled < 0 || opts_filled > opt_num) {
3543 GEN_COUNTER_INC(cb, send_iseq_arity_error);
3544 return YJIT_CANT_COMPILE;
3547 // If we have unfilled optional arguments and keyword arguments then we
3548 // would need to move adjust the arguments location to account for that.
3549 // For now we aren't handling this case.
3550 if (doing_kw_call && opts_missing > 0) {
3551 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3552 return YJIT_CANT_COMPILE;
3555 if (opt_num > 0) {
3556 num_params -= opt_num - opts_filled;
3557 start_pc_offset = (uint32_t)iseq->body->param.opt_table[opts_filled];
3560 if (doing_kw_call) {
3561 // Here we're calling a method with keyword arguments and specifying
3562 // keyword arguments at this call site.
3564 // This struct represents the metadata about the callee-specified
3565 // keyword parameters.
3566 const struct rb_iseq_param_keyword *keyword = iseq->body->param.keyword;
3568 int required_kwargs_filled = 0;
3570 if (keyword->num > 30) {
3571 // We have so many keywords that (1 << num) encoded as a FIXNUM
3572 // (which shifts it left one more) no longer fits inside a 32-bit
3573 // immediate.
3574 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3575 return YJIT_CANT_COMPILE;
3578 // Check that the kwargs being passed are valid
3579 if (supplying_kws) {
3580 // This is the list of keyword arguments that the callee specified
3581 // in its initial declaration.
3582 const ID *callee_kwargs = keyword->table;
3584 // Here we're going to build up a list of the IDs that correspond to
3585 // the caller-specified keyword arguments. If they're not in the
3586 // same order as the order specified in the callee declaration, then
3587 // we're going to need to generate some code to swap values around
3588 // on the stack.
3589 ID *caller_kwargs = ALLOCA_N(VALUE, kw_arg->keyword_len);
3590 for (int kwarg_idx = 0; kwarg_idx < kw_arg->keyword_len; kwarg_idx++)
3591 caller_kwargs[kwarg_idx] = SYM2ID(kw_arg->keywords[kwarg_idx]);
3593 // First, we're going to be sure that the names of every
3594 // caller-specified keyword argument correspond to a name in the
3595 // list of callee-specified keyword parameters.
3596 for (int caller_idx = 0; caller_idx < kw_arg->keyword_len; caller_idx++) {
3597 int callee_idx;
3599 for (callee_idx = 0; callee_idx < keyword->num; callee_idx++) {
3600 if (caller_kwargs[caller_idx] == callee_kwargs[callee_idx]) {
3601 break;
3605 // If the keyword was never found, then we know we have a
3606 // mismatch in the names of the keyword arguments, so we need to
3607 // bail.
3608 if (callee_idx == keyword->num) {
3609 GEN_COUNTER_INC(cb, send_iseq_kwargs_mismatch);
3610 return YJIT_CANT_COMPILE;
3613 // Keep a count to ensure all required kwargs are specified
3614 if (callee_idx < keyword->required_num) {
3615 required_kwargs_filled++;
3620 RUBY_ASSERT(required_kwargs_filled <= keyword->required_num);
3621 if (required_kwargs_filled != keyword->required_num) {
3622 GEN_COUNTER_INC(cb, send_iseq_kwargs_mismatch);
3623 return YJIT_CANT_COMPILE;
3627 // Number of locals that are not parameters
3628 const int num_locals = iseq->body->local_table_size - num_params;
3630 // Create a side-exit to fall back to the interpreter
3631 uint8_t *side_exit = yjit_side_exit(jit, ctx);
3633 // Check for interrupts
3634 yjit_check_ints(cb, side_exit);
3636 const struct rb_builtin_function *leaf_builtin = rb_leaf_builtin_function(iseq);
3638 if (leaf_builtin && !block && leaf_builtin->argc + 1 <= NUM_C_ARG_REGS) {
3639 ADD_COMMENT(cb, "inlined leaf builtin");
3641 // Call the builtin func (ec, recv, arg1, arg2, ...)
3642 mov(cb, C_ARG_REGS[0], REG_EC);
3644 // Copy self and arguments
3645 for (int32_t i = 0; i < leaf_builtin->argc + 1; i++) {
3646 x86opnd_t stack_opnd = ctx_stack_opnd(ctx, leaf_builtin->argc - i);
3647 x86opnd_t c_arg_reg = C_ARG_REGS[i + 1];
3648 mov(cb, c_arg_reg, stack_opnd);
3650 ctx_stack_pop(ctx, leaf_builtin->argc + 1);
3651 call_ptr(cb, REG0, (void *)leaf_builtin->func_ptr);
3653 // Push the return value
3654 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
3655 mov(cb, stack_ret, RAX);
3657 // Note: assuming that the leaf builtin doesn't change local variables here.
3658 // Seems like a safe assumption.
3660 return YJIT_KEEP_COMPILING;
3663 // Stack overflow check
3664 // Note that vm_push_frame checks it against a decremented cfp, hence the multiply by 2.
3665 // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
3666 ADD_COMMENT(cb, "stack overflow check");
3667 lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * (num_locals + iseq->body->stack_max) + 2 * sizeof(rb_control_frame_t)));
3668 cmp(cb, REG_CFP, REG0);
3669 jle_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_cf_overflow));
3671 if (doing_kw_call) {
3672 // Here we're calling a method with keyword arguments and specifying
3673 // keyword arguments at this call site.
3674 const int lead_num = iseq->body->param.lead_num;
3676 // This struct represents the metadata about the caller-specified
3677 // keyword arguments.
3678 int caller_keyword_len = 0;
3679 const VALUE *caller_keywords = NULL;
3680 if (vm_ci_kwarg(ci)) {
3681 caller_keyword_len = vm_ci_kwarg(ci)->keyword_len;
3682 caller_keywords = &vm_ci_kwarg(ci)->keywords[0];
3685 // This struct represents the metadata about the callee-specified
3686 // keyword parameters.
3687 const struct rb_iseq_param_keyword *const keyword = iseq->body->param.keyword;
3689 ADD_COMMENT(cb, "keyword args");
3691 // This is the list of keyword arguments that the callee specified
3692 // in its initial declaration.
3693 const ID *callee_kwargs = keyword->table;
3695 int total_kwargs = keyword->num;
3697 // Here we're going to build up a list of the IDs that correspond to
3698 // the caller-specified keyword arguments. If they're not in the
3699 // same order as the order specified in the callee declaration, then
3700 // we're going to need to generate some code to swap values around
3701 // on the stack.
3702 ID *caller_kwargs = ALLOCA_N(VALUE, total_kwargs);
3703 int kwarg_idx;
3704 for (kwarg_idx = 0; kwarg_idx < caller_keyword_len; kwarg_idx++) {
3705 caller_kwargs[kwarg_idx] = SYM2ID(caller_keywords[kwarg_idx]);
3708 int unspecified_bits = 0;
3710 for (int callee_idx = keyword->required_num; callee_idx < total_kwargs; callee_idx++) {
3711 bool already_passed = false;
3712 ID callee_kwarg = callee_kwargs[callee_idx];
3714 for (int caller_idx = 0; caller_idx < caller_keyword_len; caller_idx++) {
3715 if (caller_kwargs[caller_idx] == callee_kwarg) {
3716 already_passed = true;
3717 break;
3721 if (!already_passed) {
3722 // Reserve space on the stack for each default value we'll be
3723 // filling in (which is done in the next loop). Also increments
3724 // argc so that the callee's SP is recorded correctly.
3725 argc++;
3726 x86opnd_t default_arg = ctx_stack_push(ctx, TYPE_UNKNOWN);
3727 VALUE default_value = keyword->default_values[callee_idx - keyword->required_num];
3729 if (default_value == Qundef) {
3730 // Qundef means that this value is not constant and must be
3731 // recalculated at runtime, so we record it in unspecified_bits
3732 // (Qnil is then used as a placeholder instead of Qundef).
3733 unspecified_bits |= 0x01 << (callee_idx - keyword->required_num);
3734 default_value = Qnil;
3737 // GC might move default_value.
3738 jit_mov_gc_ptr(jit, cb, REG0, default_value);
3739 mov(cb, default_arg, REG0);
3741 caller_kwargs[kwarg_idx++] = callee_kwarg;
3744 RUBY_ASSERT(kwarg_idx == total_kwargs);
3746 // Next, we're going to loop through every keyword that was
3747 // specified by the caller and make sure that it's in the correct
3748 // place. If it's not we're going to swap it around with another one.
3749 for (kwarg_idx = 0; kwarg_idx < total_kwargs; kwarg_idx++) {
3750 ID callee_kwarg = callee_kwargs[kwarg_idx];
3752 // If the argument is already in the right order, then we don't
3753 // need to generate any code since the expected value is already
3754 // in the right place on the stack.
3755 if (callee_kwarg == caller_kwargs[kwarg_idx]) continue;
3757 // In this case the argument is not in the right place, so we
3758 // need to find its position where it _should_ be and swap with
3759 // that location.
3760 for (int swap_idx = kwarg_idx + 1; swap_idx < total_kwargs; swap_idx++) {
3761 if (callee_kwarg == caller_kwargs[swap_idx]) {
3762 // First we're going to generate the code that is going
3763 // to perform the actual swapping at runtime.
3764 stack_swap(ctx, cb, argc - 1 - swap_idx - lead_num, argc - 1 - kwarg_idx - lead_num, REG1, REG0);
3766 // Next we're going to do some bookkeeping on our end so
3767 // that we know the order that the arguments are
3768 // actually in now.
3769 ID tmp = caller_kwargs[kwarg_idx];
3770 caller_kwargs[kwarg_idx] = caller_kwargs[swap_idx];
3771 caller_kwargs[swap_idx] = tmp;
3773 break;
3778 // Keyword arguments cause a special extra local variable to be
3779 // pushed onto the stack that represents the parameters that weren't
3780 // explicitly given a value and have a non-constant default.
3781 mov(cb, ctx_stack_opnd(ctx, -1), imm_opnd(INT2FIX(unspecified_bits)));
3783 // Points to the receiver operand on the stack
3784 x86opnd_t recv = ctx_stack_opnd(ctx, argc);
3786 // Store the updated SP on the current frame (pop arguments and receiver)
3787 ADD_COMMENT(cb, "store caller sp");
3788 lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * -(argc + 1)));
3789 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG0);
3791 // Store the next PC in the current frame
3792 jit_save_pc(jit, REG0);
3794 if (block) {
3795 // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
3796 // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases
3797 // with cfp->block_code.
3798 jit_mov_gc_ptr(jit, cb, REG0, (VALUE)block);
3799 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), REG0);
3802 // Adjust the callee's stack pointer
3803 lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * (3 + num_locals + doing_kw_call)));
3805 // Initialize local variables to Qnil
3806 for (int i = 0; i < num_locals; i++) {
3807 mov(cb, mem_opnd(64, REG0, sizeof(VALUE) * (i - num_locals - 3)), imm_opnd(Qnil));
3810 ADD_COMMENT(cb, "push env");
3811 // Put compile time cme into REG1. It's assumed to be valid because we are notified when
3812 // any cme we depend on become outdated. See rb_yjit_method_lookup_change().
3813 jit_mov_gc_ptr(jit, cb, REG1, (VALUE)cme);
3814 // Write method entry at sp[-3]
3815 // sp[-3] = me;
3816 mov(cb, mem_opnd(64, REG0, 8 * -3), REG1);
3818 // Write block handler at sp[-2]
3819 // sp[-2] = block_handler;
3820 if (block) {
3821 // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
3822 lea(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, self));
3823 or(cb, REG1, imm_opnd(1));
3824 mov(cb, mem_opnd(64, REG0, 8 * -2), REG1);
3826 else {
3827 mov(cb, mem_opnd(64, REG0, 8 * -2), imm_opnd(VM_BLOCK_HANDLER_NONE));
3830 // Write env flags at sp[-1]
3831 // sp[-1] = frame_type;
3832 uint64_t frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL;
3833 mov(cb, mem_opnd(64, REG0, 8 * -1), imm_opnd(frame_type));
3835 ADD_COMMENT(cb, "push callee CFP");
3836 // Allocate a new CFP (ec->cfp--)
3837 sub(cb, REG_CFP, imm_opnd(sizeof(rb_control_frame_t)));
3838 mov(cb, member_opnd(REG_EC, rb_execution_context_t, cfp), REG_CFP);
3840 // Setup the new frame
3841 // *cfp = (const struct rb_control_frame_struct) {
3842 // .pc = pc,
3843 // .sp = sp,
3844 // .iseq = iseq,
3845 // .self = recv,
3846 // .ep = sp - 1,
3847 // .block_code = 0,
3848 // .__bp__ = sp,
3849 // };
3850 mov(cb, REG1, recv);
3851 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, self), REG1);
3852 mov(cb, REG_SP, REG0); // Switch to the callee's REG_SP
3853 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG0);
3854 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, __bp__), REG0);
3855 sub(cb, REG0, imm_opnd(sizeof(VALUE)));
3856 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, ep), REG0);
3857 jit_mov_gc_ptr(jit, cb, REG0, (VALUE)iseq);
3858 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, iseq), REG0);
3859 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), imm_opnd(0));
3861 // No need to set cfp->pc since the callee sets it whenever calling into routines
3862 // that could look at it through jit_save_pc().
3863 // mov(cb, REG0, const_ptr_opnd(start_pc));
3864 // mov(cb, member_opnd(REG_CFP, rb_control_frame_t, pc), REG0);
3866 // Stub so we can return to JITted code
3867 blockid_t return_block = { jit->iseq, jit_next_insn_idx(jit) };
3869 // Create a context for the callee
3870 ctx_t callee_ctx = DEFAULT_CTX;
3872 // Set the argument types in the callee's context
3873 for (int32_t arg_idx = 0; arg_idx < argc; ++arg_idx) {
3874 val_type_t arg_type = ctx_get_opnd_type(ctx, OPND_STACK(argc - arg_idx - 1));
3875 ctx_set_local_type(&callee_ctx, arg_idx, arg_type);
3877 val_type_t recv_type = ctx_get_opnd_type(ctx, OPND_STACK(argc));
3878 ctx_upgrade_opnd_type(&callee_ctx, OPND_SELF, recv_type);
3880 // The callee might change locals through Kernel#binding and other means.
3881 ctx_clear_local_types(ctx);
3883 // Pop arguments and receiver in return context, push the return value
3884 // After the return, sp_offset will be 1. The codegen for leave writes
3885 // the return value in case of JIT-to-JIT return.
3886 ctx_t return_ctx = *ctx;
3887 ctx_stack_pop(&return_ctx, argc + 1);
3888 ctx_stack_push(&return_ctx, TYPE_UNKNOWN);
3889 return_ctx.sp_offset = 1;
3890 return_ctx.chain_depth = 0;
3892 // Write the JIT return address on the callee frame
3893 gen_branch(
3894 jit,
3895 ctx,
3896 return_block,
3897 &return_ctx,
3898 return_block,
3899 &return_ctx,
3900 gen_return_branch
3903 //print_str(cb, "calling Ruby func:");
3904 //print_str(cb, rb_id2name(vm_ci_mid(ci)));
3906 // Directly jump to the entry point of the callee
3907 gen_direct_jump(
3908 jit,
3909 &callee_ctx,
3910 (blockid_t){ iseq, start_pc_offset }
3913 return YJIT_END_BLOCK;
3916 static codegen_status_t
3917 gen_struct_aref(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, VALUE comptime_recv, VALUE comptime_recv_klass) {
3918 if (vm_ci_argc(ci) != 0) {
3919 return YJIT_CANT_COMPILE;
3922 const unsigned int off = cme->def->body.optimized.index;
3924 // Confidence checks
3925 RUBY_ASSERT_ALWAYS(RB_TYPE_P(comptime_recv, T_STRUCT));
3926 RUBY_ASSERT_ALWAYS((long)off < RSTRUCT_LEN(comptime_recv));
3928 // We are going to use an encoding that takes a 4-byte immediate which
3929 // limits the offset to INT32_MAX.
3931 uint64_t native_off = (uint64_t)off * (uint64_t)SIZEOF_VALUE;
3932 if (native_off > (uint64_t)INT32_MAX) {
3933 return YJIT_CANT_COMPILE;
3937 // All structs from the same Struct class should have the same
3938 // length. So if our comptime_recv is embedded all runtime
3939 // structs of the same class should be as well, and the same is
3940 // true of the converse.
3941 bool embedded = FL_TEST_RAW(comptime_recv, RSTRUCT_EMBED_LEN_MASK);
3943 ADD_COMMENT(cb, "struct aref");
3945 x86opnd_t recv = ctx_stack_pop(ctx, 1);
3947 mov(cb, REG0, recv);
3949 if (embedded) {
3950 mov(cb, REG0, member_opnd_idx(REG0, struct RStruct, as.ary, off));
3952 else {
3953 mov(cb, REG0, member_opnd(REG0, struct RStruct, as.heap.ptr));
3954 mov(cb, REG0, mem_opnd(64, REG0, SIZEOF_VALUE * off));
3957 x86opnd_t ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
3958 mov(cb, ret, REG0);
3960 jit_jump_to_next_insn(jit, ctx);
3961 return YJIT_END_BLOCK;
3964 static codegen_status_t
3965 gen_struct_aset(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, VALUE comptime_recv, VALUE comptime_recv_klass) {
3966 if (vm_ci_argc(ci) != 1) {
3967 return YJIT_CANT_COMPILE;
3970 const unsigned int off = cme->def->body.optimized.index;
3972 // Confidence checks
3973 RUBY_ASSERT_ALWAYS(RB_TYPE_P(comptime_recv, T_STRUCT));
3974 RUBY_ASSERT_ALWAYS((long)off < RSTRUCT_LEN(comptime_recv));
3976 ADD_COMMENT(cb, "struct aset");
3978 x86opnd_t val = ctx_stack_pop(ctx, 1);
3979 x86opnd_t recv = ctx_stack_pop(ctx, 1);
3981 mov(cb, C_ARG_REGS[0], recv);
3982 mov(cb, C_ARG_REGS[1], imm_opnd(off));
3983 mov(cb, C_ARG_REGS[2], val);
3984 call_ptr(cb, REG0, (void *)RSTRUCT_SET);
3986 x86opnd_t ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
3987 mov(cb, ret, RAX);
3989 jit_jump_to_next_insn(jit, ctx);
3990 return YJIT_END_BLOCK;
3993 const rb_callable_method_entry_t *
3994 rb_aliased_callable_method_entry(const rb_callable_method_entry_t *me);
3996 static codegen_status_t
3997 gen_send_general(jitstate_t *jit, ctx_t *ctx, struct rb_call_data *cd, rb_iseq_t *block)
3999 // Relevant definitions:
4000 // rb_execution_context_t : vm_core.h
4001 // invoker, cfunc logic : method.h, vm_method.c
4002 // rb_callinfo : vm_callinfo.h
4003 // rb_callable_method_entry_t : method.h
4004 // vm_call_cfunc_with_frame : vm_insnhelper.c
4006 // For a general overview for how the interpreter calls methods,
4007 // see vm_call_method().
4009 const struct rb_callinfo *ci = cd->ci; // info about the call site
4011 int32_t argc = (int32_t)vm_ci_argc(ci);
4012 ID mid = vm_ci_mid(ci);
4014 // Don't JIT calls with keyword splat
4015 if (vm_ci_flag(ci) & VM_CALL_KW_SPLAT) {
4016 GEN_COUNTER_INC(cb, send_kw_splat);
4017 return YJIT_CANT_COMPILE;
4020 // Don't JIT calls that aren't simple
4021 // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block.
4022 if ((vm_ci_flag(ci) & VM_CALL_ARGS_SPLAT) != 0) {
4023 GEN_COUNTER_INC(cb, send_args_splat);
4024 return YJIT_CANT_COMPILE;
4026 if ((vm_ci_flag(ci) & VM_CALL_ARGS_BLOCKARG) != 0) {
4027 GEN_COUNTER_INC(cb, send_block_arg);
4028 return YJIT_CANT_COMPILE;
4031 // Defer compilation so we can specialize on class of receiver
4032 if (!jit_at_current_insn(jit)) {
4033 defer_compilation(jit, ctx);
4034 return YJIT_END_BLOCK;
4037 VALUE comptime_recv = jit_peek_at_stack(jit, ctx, argc);
4038 VALUE comptime_recv_klass = CLASS_OF(comptime_recv);
4040 // Guard that the receiver has the same class as the one from compile time
4041 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4043 // Points to the receiver operand on the stack
4044 x86opnd_t recv = ctx_stack_opnd(ctx, argc);
4045 insn_opnd_t recv_opnd = OPND_STACK(argc);
4046 mov(cb, REG0, recv);
4047 if (!jit_guard_known_klass(jit, ctx, comptime_recv_klass, recv_opnd, comptime_recv, SEND_MAX_DEPTH, side_exit)) {
4048 return YJIT_CANT_COMPILE;
4051 // Do method lookup
4052 const rb_callable_method_entry_t *cme = rb_callable_method_entry(comptime_recv_klass, mid);
4053 if (!cme) {
4054 // TODO: counter
4055 return YJIT_CANT_COMPILE;
4058 switch (METHOD_ENTRY_VISI(cme)) {
4059 case METHOD_VISI_PUBLIC:
4060 // Can always call public methods
4061 break;
4062 case METHOD_VISI_PRIVATE:
4063 if (!(vm_ci_flag(ci) & VM_CALL_FCALL)) {
4064 // Can only call private methods with FCALL callsites.
4065 // (at the moment they are callsites without a receiver or an explicit `self` receiver)
4066 return YJIT_CANT_COMPILE;
4068 break;
4069 case METHOD_VISI_PROTECTED:
4070 jit_protected_callee_ancestry_guard(jit, cb, cme, side_exit);
4071 break;
4072 case METHOD_VISI_UNDEF:
4073 RUBY_ASSERT(false && "cmes should always have a visibility");
4074 break;
4077 // Register block for invalidation
4078 RUBY_ASSERT(cme->called_id == mid);
4079 assume_method_lookup_stable(comptime_recv_klass, cme, jit);
4081 // To handle the aliased method case (VM_METHOD_TYPE_ALIAS)
4082 while (true) {
4083 // switch on the method type
4084 switch (cme->def->type) {
4085 case VM_METHOD_TYPE_ISEQ:
4086 return gen_send_iseq(jit, ctx, ci, cme, block, argc);
4087 case VM_METHOD_TYPE_CFUNC:
4088 if ((vm_ci_flag(ci) & VM_CALL_KWARG) != 0) {
4089 GEN_COUNTER_INC(cb, send_cfunc_kwargs);
4090 return YJIT_CANT_COMPILE;
4092 return gen_send_cfunc(jit, ctx, ci, cme, block, argc, &comptime_recv_klass);
4093 case VM_METHOD_TYPE_IVAR:
4094 if (argc != 0) {
4095 // Argument count mismatch. Getters take no arguments.
4096 GEN_COUNTER_INC(cb, send_getter_arity);
4097 return YJIT_CANT_COMPILE;
4099 if (c_method_tracing_currently_enabled(jit)) {
4100 // Can't generate code for firing c_call and c_return events
4101 // :attr-tracing:
4102 // Handling the C method tracing events for attr_accessor
4103 // methods is easier than regular C methods as we know the
4104 // "method" we are calling into never enables those tracing
4105 // events. Once global invalidation runs, the code for the
4106 // attr_accessor is invalidated and we exit at the closest
4107 // instruction boundary which is always outside of the body of
4108 // the attr_accessor code.
4109 GEN_COUNTER_INC(cb, send_cfunc_tracing);
4110 return YJIT_CANT_COMPILE;
4113 mov(cb, REG0, recv);
4115 ID ivar_name = cme->def->body.attr.id;
4116 return gen_get_ivar(jit, ctx, SEND_MAX_DEPTH, comptime_recv, ivar_name, recv_opnd, side_exit);
4117 case VM_METHOD_TYPE_ATTRSET:
4118 if ((vm_ci_flag(ci) & VM_CALL_KWARG) != 0) {
4119 GEN_COUNTER_INC(cb, send_attrset_kwargs);
4120 return YJIT_CANT_COMPILE;
4122 else if (argc != 1 || !RB_TYPE_P(comptime_recv, T_OBJECT)) {
4123 GEN_COUNTER_INC(cb, send_ivar_set_method);
4124 return YJIT_CANT_COMPILE;
4126 else if (c_method_tracing_currently_enabled(jit)) {
4127 // Can't generate code for firing c_call and c_return events
4128 // See :attr-tracing:
4129 GEN_COUNTER_INC(cb, send_cfunc_tracing);
4130 return YJIT_CANT_COMPILE;
4132 else {
4133 ID ivar_name = cme->def->body.attr.id;
4134 return gen_set_ivar(jit, ctx, comptime_recv, comptime_recv_klass, ivar_name);
4136 // Block method, e.g. define_method(:foo) { :my_block }
4137 case VM_METHOD_TYPE_BMETHOD:
4138 GEN_COUNTER_INC(cb, send_bmethod);
4139 return YJIT_CANT_COMPILE;
4140 case VM_METHOD_TYPE_ZSUPER:
4141 GEN_COUNTER_INC(cb, send_zsuper_method);
4142 return YJIT_CANT_COMPILE;
4143 case VM_METHOD_TYPE_ALIAS: {
4144 // Retrieve the alised method and re-enter the switch
4145 cme = rb_aliased_callable_method_entry(cme);
4146 continue;
4148 case VM_METHOD_TYPE_UNDEF:
4149 GEN_COUNTER_INC(cb, send_undef_method);
4150 return YJIT_CANT_COMPILE;
4151 case VM_METHOD_TYPE_NOTIMPLEMENTED:
4152 GEN_COUNTER_INC(cb, send_not_implemented_method);
4153 return YJIT_CANT_COMPILE;
4154 // Send family of methods, e.g. call/apply
4155 case VM_METHOD_TYPE_OPTIMIZED:
4156 switch (cme->def->body.optimized.type) {
4157 case OPTIMIZED_METHOD_TYPE_SEND:
4158 GEN_COUNTER_INC(cb, send_optimized_method_send);
4159 return YJIT_CANT_COMPILE;
4160 case OPTIMIZED_METHOD_TYPE_CALL:
4161 GEN_COUNTER_INC(cb, send_optimized_method_call);
4162 return YJIT_CANT_COMPILE;
4163 case OPTIMIZED_METHOD_TYPE_BLOCK_CALL:
4164 GEN_COUNTER_INC(cb, send_optimized_method_block_call);
4165 return YJIT_CANT_COMPILE;
4166 case OPTIMIZED_METHOD_TYPE_STRUCT_AREF:
4167 return gen_struct_aref(jit, ctx, ci, cme, comptime_recv, comptime_recv_klass);
4168 case OPTIMIZED_METHOD_TYPE_STRUCT_ASET:
4169 return gen_struct_aset(jit, ctx, ci, cme, comptime_recv, comptime_recv_klass);
4170 default:
4171 rb_bug("unknown optimized method type (%d)", cme->def->body.optimized.type);
4172 UNREACHABLE_RETURN(YJIT_CANT_COMPILE);
4174 case VM_METHOD_TYPE_MISSING:
4175 GEN_COUNTER_INC(cb, send_missing_method);
4176 return YJIT_CANT_COMPILE;
4177 case VM_METHOD_TYPE_REFINED:
4178 GEN_COUNTER_INC(cb, send_refined_method);
4179 return YJIT_CANT_COMPILE;
4180 // no default case so compiler issues a warning if this is not exhaustive
4183 // Unreachable
4184 RUBY_ASSERT(false);
4188 static codegen_status_t
4189 gen_opt_send_without_block(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4191 struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
4192 return gen_send_general(jit, ctx, cd, NULL);
4195 static codegen_status_t
4196 gen_send(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4198 struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
4199 rb_iseq_t *block = (rb_iseq_t *)jit_get_arg(jit, 1);
4200 return gen_send_general(jit, ctx, cd, block);
4203 static codegen_status_t
4204 gen_invokesuper(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4206 struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
4207 rb_iseq_t *block = (rb_iseq_t *)jit_get_arg(jit, 1);
4209 // Defer compilation so we can specialize on class of receiver
4210 if (!jit_at_current_insn(jit)) {
4211 defer_compilation(jit, ctx);
4212 return YJIT_END_BLOCK;
4215 const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(jit->ec->cfp);
4216 if (!me) {
4217 return YJIT_CANT_COMPILE;
4220 // FIXME: We should track and invalidate this block when this cme is invalidated
4221 VALUE current_defined_class = me->defined_class;
4222 ID mid = me->def->original_id;
4224 if (me != rb_callable_method_entry(current_defined_class, me->called_id)) {
4225 // Though we likely could generate this call, as we are only concerned
4226 // with the method entry remaining valid, assume_method_lookup_stable
4227 // below requires that the method lookup matches as well
4228 return YJIT_CANT_COMPILE;
4231 // vm_search_normal_superclass
4232 if (BUILTIN_TYPE(current_defined_class) == T_ICLASS && FL_TEST_RAW(RBASIC(current_defined_class)->klass, RMODULE_IS_REFINEMENT)) {
4233 return YJIT_CANT_COMPILE;
4235 VALUE comptime_superclass = RCLASS_SUPER(RCLASS_ORIGIN(current_defined_class));
4237 const struct rb_callinfo *ci = cd->ci;
4238 int32_t argc = (int32_t)vm_ci_argc(ci);
4240 // Don't JIT calls that aren't simple
4241 // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block.
4242 if ((vm_ci_flag(ci) & VM_CALL_ARGS_SPLAT) != 0) {
4243 GEN_COUNTER_INC(cb, send_args_splat);
4244 return YJIT_CANT_COMPILE;
4246 if ((vm_ci_flag(ci) & VM_CALL_KWARG) != 0) {
4247 GEN_COUNTER_INC(cb, send_keywords);
4248 return YJIT_CANT_COMPILE;
4250 if ((vm_ci_flag(ci) & VM_CALL_KW_SPLAT) != 0) {
4251 GEN_COUNTER_INC(cb, send_kw_splat);
4252 return YJIT_CANT_COMPILE;
4254 if ((vm_ci_flag(ci) & VM_CALL_ARGS_BLOCKARG) != 0) {
4255 GEN_COUNTER_INC(cb, send_block_arg);
4256 return YJIT_CANT_COMPILE;
4259 // Ensure we haven't rebound this method onto an incompatible class.
4260 // In the interpreter we try to avoid making this check by performing some
4261 // cheaper calculations first, but since we specialize on the method entry
4262 // and so only have to do this once at compile time this is fine to always
4263 // check and side exit.
4264 VALUE comptime_recv = jit_peek_at_stack(jit, ctx, argc);
4265 if (!rb_obj_is_kind_of(comptime_recv, current_defined_class)) {
4266 return YJIT_CANT_COMPILE;
4269 // Do method lookup
4270 const rb_callable_method_entry_t *cme = rb_callable_method_entry(comptime_superclass, mid);
4272 if (!cme) {
4273 return YJIT_CANT_COMPILE;
4276 // Check that we'll be able to write this method dispatch before generating checks
4277 switch (cme->def->type) {
4278 case VM_METHOD_TYPE_ISEQ:
4279 case VM_METHOD_TYPE_CFUNC:
4280 break;
4281 default:
4282 // others unimplemented
4283 return YJIT_CANT_COMPILE;
4286 // Guard that the receiver has the same class as the one from compile time
4287 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4289 if (jit->ec->cfp->ep[VM_ENV_DATA_INDEX_ME_CREF] != (VALUE)me) {
4290 // This will be the case for super within a block
4291 return YJIT_CANT_COMPILE;
4294 ADD_COMMENT(cb, "guard known me");
4295 mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, ep));
4296 x86opnd_t ep_me_opnd = mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_ME_CREF);
4297 jit_mov_gc_ptr(jit, cb, REG1, (VALUE)me);
4298 cmp(cb, ep_me_opnd, REG1);
4299 jne_ptr(cb, COUNTED_EXIT(jit, side_exit, invokesuper_me_changed));
4301 if (!block) {
4302 // Guard no block passed
4303 // rb_vm_frame_block_handler(GET_EC()->cfp) == VM_BLOCK_HANDLER_NONE
4304 // note, we assume VM_ASSERT(VM_ENV_LOCAL_P(ep))
4306 // TODO: this could properly forward the current block handler, but
4307 // would require changes to gen_send_*
4308 ADD_COMMENT(cb, "guard no block given");
4309 // EP is in REG0 from above
4310 x86opnd_t ep_specval_opnd = mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_SPECVAL);
4311 cmp(cb, ep_specval_opnd, imm_opnd(VM_BLOCK_HANDLER_NONE));
4312 jne_ptr(cb, COUNTED_EXIT(jit, side_exit, invokesuper_block));
4315 // Points to the receiver operand on the stack
4316 x86opnd_t recv = ctx_stack_opnd(ctx, argc);
4317 mov(cb, REG0, recv);
4319 // We need to assume that both our current method entry and the super
4320 // method entry we invoke remain stable
4321 assume_method_lookup_stable(current_defined_class, me, jit);
4322 assume_method_lookup_stable(comptime_superclass, cme, jit);
4324 // Method calls may corrupt types
4325 ctx_clear_local_types(ctx);
4327 switch (cme->def->type) {
4328 case VM_METHOD_TYPE_ISEQ:
4329 return gen_send_iseq(jit, ctx, ci, cme, block, argc);
4330 case VM_METHOD_TYPE_CFUNC:
4331 return gen_send_cfunc(jit, ctx, ci, cme, block, argc, NULL);
4332 default:
4333 break;
4336 RUBY_ASSERT_ALWAYS(false);
4339 static codegen_status_t
4340 gen_leave(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4342 // Only the return value should be on the stack
4343 RUBY_ASSERT(ctx->stack_size == 1);
4345 // Create a side-exit to fall back to the interpreter
4346 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4348 // Load environment pointer EP from CFP
4349 mov(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, ep));
4351 // Check for interrupts
4352 ADD_COMMENT(cb, "check for interrupts");
4353 yjit_check_ints(cb, COUNTED_EXIT(jit, side_exit, leave_se_interrupt));
4355 // Load the return value
4356 mov(cb, REG0, ctx_stack_pop(ctx, 1));
4358 // Pop the current frame (ec->cfp++)
4359 // Note: the return PC is already in the previous CFP
4360 add(cb, REG_CFP, imm_opnd(sizeof(rb_control_frame_t)));
4361 mov(cb, member_opnd(REG_EC, rb_execution_context_t, cfp), REG_CFP);
4363 // Reload REG_SP for the caller and write the return value.
4364 // Top of the stack is REG_SP[0] since the caller has sp_offset=1.
4365 mov(cb, REG_SP, member_opnd(REG_CFP, rb_control_frame_t, sp));
4366 mov(cb, mem_opnd(64, REG_SP, 0), REG0);
4368 // Jump to the JIT return address on the frame that was just popped
4369 const int32_t offset_to_jit_return = -((int32_t)sizeof(rb_control_frame_t)) + (int32_t)offsetof(rb_control_frame_t, jit_return);
4370 jmp_rm(cb, mem_opnd(64, REG_CFP, offset_to_jit_return));
4372 return YJIT_END_BLOCK;
4375 RUBY_EXTERN rb_serial_t ruby_vm_global_constant_state;
4377 static codegen_status_t
4378 gen_getglobal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4380 ID gid = jit_get_arg(jit, 0);
4382 // Save the PC and SP because we might make a Ruby call for warning
4383 jit_prepare_routine_call(jit, ctx, REG0);
4385 mov(cb, C_ARG_REGS[0], imm_opnd(gid));
4387 call_ptr(cb, REG0, (void *)&rb_gvar_get);
4389 x86opnd_t top = ctx_stack_push(ctx, TYPE_UNKNOWN);
4390 mov(cb, top, RAX);
4392 return YJIT_KEEP_COMPILING;
4395 static codegen_status_t
4396 gen_setglobal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4398 ID gid = jit_get_arg(jit, 0);
4400 // Save the PC and SP because we might make a Ruby call for
4401 // Kernel#set_trace_var
4402 jit_prepare_routine_call(jit, ctx, REG0);
4404 mov(cb, C_ARG_REGS[0], imm_opnd(gid));
4406 x86opnd_t val = ctx_stack_pop(ctx, 1);
4408 mov(cb, C_ARG_REGS[1], val);
4410 call_ptr(cb, REG0, (void *)&rb_gvar_set);
4412 return YJIT_KEEP_COMPILING;
4415 static codegen_status_t
4416 gen_anytostring(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4418 // Save the PC and SP because we might make a Ruby call for
4419 // Kernel#set_trace_var
4420 jit_prepare_routine_call(jit, ctx, REG0);
4422 x86opnd_t str = ctx_stack_pop(ctx, 1);
4423 x86opnd_t val = ctx_stack_pop(ctx, 1);
4425 mov(cb, C_ARG_REGS[0], str);
4426 mov(cb, C_ARG_REGS[1], val);
4428 call_ptr(cb, REG0, (void *)&rb_obj_as_string_result);
4430 // Push the return value
4431 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
4432 mov(cb, stack_ret, RAX);
4434 return YJIT_KEEP_COMPILING;
4437 static codegen_status_t
4438 gen_objtostring(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4440 if (!jit_at_current_insn(jit)) {
4441 defer_compilation(jit, ctx);
4442 return YJIT_END_BLOCK;
4445 x86opnd_t recv = ctx_stack_opnd(ctx, 0);
4446 VALUE comptime_recv = jit_peek_at_stack(jit, ctx, 0);
4448 if (RB_TYPE_P(comptime_recv, T_STRING)) {
4449 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4451 mov(cb, REG0, recv);
4452 jit_guard_known_klass(jit, ctx, CLASS_OF(comptime_recv), OPND_STACK(0), comptime_recv, SEND_MAX_DEPTH, side_exit);
4453 // No work needed. The string value is already on the top of the stack.
4454 return YJIT_KEEP_COMPILING;
4456 else {
4457 struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
4458 return gen_send_general(jit, ctx, cd, NULL);
4462 static codegen_status_t
4463 gen_toregexp(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4465 rb_num_t opt = jit_get_arg(jit, 0);
4466 rb_num_t cnt = jit_get_arg(jit, 1);
4468 // Save the PC and SP because this allocates an object and could
4469 // raise an exception.
4470 jit_prepare_routine_call(jit, ctx, REG0);
4472 x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(sizeof(VALUE) * (uint32_t)cnt));
4473 ctx_stack_pop(ctx, cnt);
4475 mov(cb, C_ARG_REGS[0], imm_opnd(0));
4476 mov(cb, C_ARG_REGS[1], imm_opnd(cnt));
4477 lea(cb, C_ARG_REGS[2], values_ptr);
4478 call_ptr(cb, REG0, (void *)&rb_ary_tmp_new_from_values);
4480 // Save the array so we can clear it later
4481 push(cb, RAX);
4482 push(cb, RAX); // Alignment
4483 mov(cb, C_ARG_REGS[0], RAX);
4484 mov(cb, C_ARG_REGS[1], imm_opnd(opt));
4485 call_ptr(cb, REG0, (void *)&rb_reg_new_ary);
4487 // The actual regex is in RAX now. Pop the temp array from
4488 // rb_ary_tmp_new_from_values into C arg regs so we can clear it
4489 pop(cb, REG1); // Alignment
4490 pop(cb, C_ARG_REGS[0]);
4492 // The value we want to push on the stack is in RAX right now
4493 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4494 mov(cb, stack_ret, RAX);
4496 // Clear the temp array.
4497 call_ptr(cb, REG0, (void *)&rb_ary_clear);
4499 return YJIT_KEEP_COMPILING;
4502 static codegen_status_t
4503 gen_intern(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4505 // Save the PC and SP because we might allocate
4506 jit_prepare_routine_call(jit, ctx, REG0);
4508 x86opnd_t str = ctx_stack_pop(ctx, 1);
4510 mov(cb, C_ARG_REGS[0], str);
4512 call_ptr(cb, REG0, (void *)&rb_str_intern);
4514 // Push the return value
4515 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4516 mov(cb, stack_ret, RAX);
4518 return YJIT_KEEP_COMPILING;
4521 static codegen_status_t
4522 gen_getspecial(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4524 // This takes two arguments, key and type
4525 // key is only used when type == 0
4526 // A non-zero type determines which type of backref to fetch
4527 //rb_num_t key = jit_get_arg(jit, 0);
4528 rb_num_t type = jit_get_arg(jit, 1);
4530 if (type == 0) {
4531 // not yet implemented
4532 return YJIT_CANT_COMPILE;
4534 else if (type & 0x01) {
4535 // Fetch a "special" backref based on a char encoded by shifting by 1
4537 // Can raise if matchdata uninitialized
4538 jit_prepare_routine_call(jit, ctx, REG0);
4540 // call rb_backref_get()
4541 ADD_COMMENT(cb, "rb_backref_get");
4542 call_ptr(cb, REG0, (void *)rb_backref_get);
4543 mov(cb, C_ARG_REGS[0], RAX);
4545 switch (type >> 1) {
4546 case '&':
4547 ADD_COMMENT(cb, "rb_reg_last_match");
4548 call_ptr(cb, REG0, (void *)rb_reg_last_match);
4549 break;
4550 case '`':
4551 ADD_COMMENT(cb, "rb_reg_match_pre");
4552 call_ptr(cb, REG0, (void *)rb_reg_match_pre);
4553 break;
4554 case '\'':
4555 ADD_COMMENT(cb, "rb_reg_match_post");
4556 call_ptr(cb, REG0, (void *)rb_reg_match_post);
4557 break;
4558 case '+':
4559 ADD_COMMENT(cb, "rb_reg_match_last");
4560 call_ptr(cb, REG0, (void *)rb_reg_match_last);
4561 break;
4562 default:
4563 rb_bug("invalid back-ref");
4566 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4567 mov(cb, stack_ret, RAX);
4569 return YJIT_KEEP_COMPILING;
4571 else {
4572 // Fetch the N-th match from the last backref based on type shifted by 1
4574 // Can raise if matchdata uninitialized
4575 jit_prepare_routine_call(jit, ctx, REG0);
4577 // call rb_backref_get()
4578 ADD_COMMENT(cb, "rb_backref_get");
4579 call_ptr(cb, REG0, (void *)rb_backref_get);
4581 // rb_reg_nth_match((int)(type >> 1), backref);
4582 ADD_COMMENT(cb, "rb_reg_nth_match");
4583 mov(cb, C_ARG_REGS[0], imm_opnd(type >> 1));
4584 mov(cb, C_ARG_REGS[1], RAX);
4585 call_ptr(cb, REG0, (void *)rb_reg_nth_match);
4587 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4588 mov(cb, stack_ret, RAX);
4590 return YJIT_KEEP_COMPILING;
4594 VALUE
4595 rb_vm_getclassvariable(const rb_iseq_t *iseq, const rb_control_frame_t *cfp, ID id, ICVARC ic);
4597 static codegen_status_t
4598 gen_getclassvariable(jitstate_t* jit, ctx_t* ctx, codeblock_t* cb)
4600 // rb_vm_getclassvariable can raise exceptions.
4601 jit_prepare_routine_call(jit, ctx, REG0);
4603 mov(cb, C_ARG_REGS[0], member_opnd(REG_CFP, rb_control_frame_t, iseq));
4604 mov(cb, C_ARG_REGS[1], REG_CFP);
4605 mov(cb, C_ARG_REGS[2], imm_opnd(jit_get_arg(jit, 0)));
4606 mov(cb, C_ARG_REGS[3], imm_opnd(jit_get_arg(jit, 1)));
4608 call_ptr(cb, REG0, (void *)rb_vm_getclassvariable);
4610 x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_UNKNOWN);
4611 mov(cb, stack_top, RAX);
4613 return YJIT_KEEP_COMPILING;
4616 VALUE
4617 rb_vm_setclassvariable(const rb_iseq_t *iseq, const rb_control_frame_t *cfp, ID id, VALUE val, ICVARC ic);
4619 static codegen_status_t
4620 gen_setclassvariable(jitstate_t* jit, ctx_t* ctx, codeblock_t* cb)
4622 // rb_vm_setclassvariable can raise exceptions.
4623 jit_prepare_routine_call(jit, ctx, REG0);
4625 mov(cb, C_ARG_REGS[0], member_opnd(REG_CFP, rb_control_frame_t, iseq));
4626 mov(cb, C_ARG_REGS[1], REG_CFP);
4627 mov(cb, C_ARG_REGS[2], imm_opnd(jit_get_arg(jit, 0)));
4628 mov(cb, C_ARG_REGS[3], ctx_stack_pop(ctx, 1));
4629 mov(cb, C_ARG_REGS[4], imm_opnd(jit_get_arg(jit, 1)));
4631 call_ptr(cb, REG0, (void *)rb_vm_setclassvariable);
4633 return YJIT_KEEP_COMPILING;
4636 static codegen_status_t
4637 gen_opt_getinlinecache(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4639 VALUE jump_offset = jit_get_arg(jit, 0);
4640 VALUE const_cache_as_value = jit_get_arg(jit, 1);
4641 IC ic = (IC)const_cache_as_value;
4643 // See vm_ic_hit_p(). The same conditions are checked in yjit_constant_ic_update().
4644 struct iseq_inline_constant_cache_entry *ice = ic->entry;
4645 if (!ice || // cache not filled
4646 GET_IC_SERIAL(ice) != ruby_vm_global_constant_state /* cache out of date */) {
4647 // In these cases, leave a block that unconditionally side exits
4648 // for the interpreter to invalidate.
4649 return YJIT_CANT_COMPILE;
4652 // Make sure there is an exit for this block as the interpreter might want
4653 // to invalidate this block from yjit_constant_ic_update().
4654 jit_ensure_block_entry_exit(jit);
4656 if (ice->ic_cref) {
4657 // Cache is keyed on a certain lexical scope. Use the interpreter's cache.
4658 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4660 // Call function to verify the cache. It doesn't allocate or call methods.
4661 bool rb_vm_ic_hit_p(IC ic, const VALUE *reg_ep);
4662 mov(cb, C_ARG_REGS[0], const_ptr_opnd((void *)ic));
4663 mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, ep));
4664 call_ptr(cb, REG0, (void *)rb_vm_ic_hit_p);
4666 // Check the result. _Bool is one byte in SysV.
4667 test(cb, AL, AL);
4668 jz_ptr(cb, COUNTED_EXIT(jit, side_exit, opt_getinlinecache_miss));
4670 // Push ic->entry->value
4671 mov(cb, REG0, const_ptr_opnd((void *)ic));
4672 mov(cb, REG0, member_opnd(REG0, struct iseq_inline_constant_cache, entry));
4673 x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_UNKNOWN);
4674 mov(cb, REG0, member_opnd(REG0, struct iseq_inline_constant_cache_entry, value));
4675 mov(cb, stack_top, REG0);
4677 else {
4678 // Optimize for single ractor mode.
4679 // FIXME: This leaks when st_insert raises NoMemoryError
4680 if (!assume_single_ractor_mode(jit)) return YJIT_CANT_COMPILE;
4682 // Invalidate output code on any and all constant writes
4683 // FIXME: This leaks when st_insert raises NoMemoryError
4684 assume_stable_global_constant_state(jit);
4686 jit_putobject(jit, ctx, ice->value);
4689 // Jump over the code for filling the cache
4690 uint32_t jump_idx = jit_next_insn_idx(jit) + (int32_t)jump_offset;
4691 gen_direct_jump(
4692 jit,
4693 ctx,
4694 (blockid_t){ .iseq = jit->iseq, .idx = jump_idx }
4697 return YJIT_END_BLOCK;
4700 // Push the explicit block parameter onto the temporary stack. Part of the
4701 // interpreter's scheme for avoiding Proc allocations when delegating
4702 // explicit block parameters.
4703 static codegen_status_t
4704 gen_getblockparamproxy(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4706 // A mirror of the interpreter code. Checking for the case
4707 // where it's pushing rb_block_param_proxy.
4708 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4710 // EP level
4711 uint32_t level = (uint32_t)jit_get_arg(jit, 1);
4713 // Load environment pointer EP from CFP
4714 gen_get_ep(cb, REG0, level);
4716 // Bail when VM_ENV_FLAGS(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM) is non zero
4717 test(cb, mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_FLAGS), imm_opnd(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM));
4718 jnz_ptr(cb, COUNTED_EXIT(jit, side_exit, gbpp_block_param_modified));
4720 // Load the block handler for the current frame
4721 // note, VM_ASSERT(VM_ENV_LOCAL_P(ep))
4722 mov(cb, REG0, mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_SPECVAL));
4724 // Block handler is a tagged pointer. Look at the tag. 0x03 is from VM_BH_ISEQ_BLOCK_P().
4725 and(cb, REG0_8, imm_opnd(0x3));
4727 // Bail unless VM_BH_ISEQ_BLOCK_P(bh). This also checks for null.
4728 cmp(cb, REG0_8, imm_opnd(0x1));
4729 jnz_ptr(cb, COUNTED_EXIT(jit, side_exit, gbpp_block_handler_not_iseq));
4731 // Push rb_block_param_proxy. It's a root, so no need to use jit_mov_gc_ptr.
4732 mov(cb, REG0, const_ptr_opnd((void *)rb_block_param_proxy));
4733 RUBY_ASSERT(!SPECIAL_CONST_P(rb_block_param_proxy));
4734 x86opnd_t top = ctx_stack_push(ctx, TYPE_HEAP);
4735 mov(cb, top, REG0);
4737 return YJIT_KEEP_COMPILING;
4740 static codegen_status_t
4741 gen_invokebuiltin(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4743 const struct rb_builtin_function *bf = (struct rb_builtin_function *)jit_get_arg(jit, 0);
4745 // ec, self, and arguments
4746 if (bf->argc + 2 > NUM_C_ARG_REGS) {
4747 return YJIT_CANT_COMPILE;
4750 // If the calls don't allocate, do they need up to date PC, SP?
4751 jit_prepare_routine_call(jit, ctx, REG0);
4753 // Call the builtin func (ec, recv, arg1, arg2, ...)
4754 mov(cb, C_ARG_REGS[0], REG_EC);
4755 mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, self));
4757 // Copy arguments from locals
4758 for (int32_t i = 0; i < bf->argc; i++) {
4759 x86opnd_t stack_opnd = ctx_stack_opnd(ctx, bf->argc - i - 1);
4760 x86opnd_t c_arg_reg = C_ARG_REGS[2 + i];
4761 mov(cb, c_arg_reg, stack_opnd);
4764 call_ptr(cb, REG0, (void *)bf->func_ptr);
4766 // Push the return value
4767 ctx_stack_pop(ctx, bf->argc);
4768 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4769 mov(cb, stack_ret, RAX);
4771 return YJIT_KEEP_COMPILING;
4774 // opt_invokebuiltin_delegate calls a builtin function, like
4775 // invokebuiltin does, but instead of taking arguments from the top of the
4776 // stack uses the argument locals (and self) from the current method.
4777 static codegen_status_t
4778 gen_opt_invokebuiltin_delegate(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4780 const struct rb_builtin_function *bf = (struct rb_builtin_function *)jit_get_arg(jit, 0);
4781 int32_t start_index = (int32_t)jit_get_arg(jit, 1);
4783 // ec, self, and arguments
4784 if (bf->argc + 2 > NUM_C_ARG_REGS) {
4785 return YJIT_CANT_COMPILE;
4788 // If the calls don't allocate, do they need up to date PC, SP?
4789 jit_prepare_routine_call(jit, ctx, REG0);
4791 if (bf->argc > 0) {
4792 // Load environment pointer EP from CFP
4793 mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, ep));
4796 // Call the builtin func (ec, recv, arg1, arg2, ...)
4797 mov(cb, C_ARG_REGS[0], REG_EC);
4798 mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, self));
4800 // Copy arguments from locals
4801 for (int32_t i = 0; i < bf->argc; i++) {
4802 const int32_t offs = -jit->iseq->body->local_table_size - VM_ENV_DATA_SIZE + 1 + start_index + i;
4803 x86opnd_t local_opnd = mem_opnd(64, REG0, offs * SIZEOF_VALUE);
4804 x86opnd_t c_arg_reg = C_ARG_REGS[i + 2];
4805 mov(cb, c_arg_reg, local_opnd);
4807 call_ptr(cb, REG0, (void *)bf->func_ptr);
4809 // Push the return value
4810 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4811 mov(cb, stack_ret, RAX);
4813 return YJIT_KEEP_COMPILING;
4816 static int tracing_invalidate_all_i(void *vstart, void *vend, size_t stride, void *data);
4817 static void invalidate_all_blocks_for_tracing(const rb_iseq_t *iseq);
4819 // Invalidate all generated code and patch C method return code to contain
4820 // logic for firing the c_return TracePoint event. Once rb_vm_barrier()
4821 // returns, all other ractors are pausing inside RB_VM_LOCK_ENTER(), which
4822 // means they are inside a C routine. If there are any generated code on-stack,
4823 // they are waiting for a return from a C routine. For every routine call, we
4824 // patch in an exit after the body of the containing VM instruction. This makes
4825 // it so all the invalidated code exit as soon as execution logically reaches
4826 // the next VM instruction. The interpreter takes care of firing the tracing
4827 // event if it so happens that the next VM instruction has one attached.
4829 // The c_return event needs special handling as our codegen never outputs code
4830 // that contains tracing logic. If we let the normal output code run until the
4831 // start of the next VM instruction by relying on the patching scheme above, we
4832 // would fail to fire the c_return event. The interpreter doesn't fire the
4833 // event at an instruction boundary, so simply exiting to the interpreter isn't
4834 // enough. To handle it, we patch in the full logic at the return address. See
4835 // full_cfunc_return().
4837 // In addition to patching, we prevent future entries into invalidated code by
4838 // removing all live blocks from their iseq.
4839 void
4840 rb_yjit_tracing_invalidate_all(void)
4842 if (!rb_yjit_enabled_p()) return;
4844 // Stop other ractors since we are going to patch machine code.
4845 RB_VM_LOCK_ENTER();
4846 rb_vm_barrier();
4848 // Make it so all live block versions are no longer valid branch targets
4849 rb_objspace_each_objects(tracing_invalidate_all_i, NULL);
4851 // Apply patches
4852 const uint32_t old_pos = cb->write_pos;
4853 rb_darray_for(global_inval_patches, patch_idx) {
4854 struct codepage_patch patch = rb_darray_get(global_inval_patches, patch_idx);
4855 cb_set_pos(cb, patch.inline_patch_pos);
4856 uint8_t *jump_target = cb_get_ptr(ocb, patch.outlined_target_pos);
4857 jmp_ptr(cb, jump_target);
4859 cb_set_pos(cb, old_pos);
4861 // Freeze invalidated part of the codepage. We only want to wait for
4862 // running instances of the code to exit from now on, so we shouldn't
4863 // change the code. There could be other ractors sleeping in
4864 // branch_stub_hit(), for example. We could harden this by changing memory
4865 // protection on the frozen range.
4866 RUBY_ASSERT_ALWAYS(yjit_codepage_frozen_bytes <= old_pos && "frozen bytes should increase monotonically");
4867 yjit_codepage_frozen_bytes = old_pos;
4869 cb_mark_all_executable(ocb);
4870 cb_mark_all_executable(cb);
4871 RB_VM_LOCK_LEAVE();
4874 static int
4875 tracing_invalidate_all_i(void *vstart, void *vend, size_t stride, void *data)
4877 VALUE v = (VALUE)vstart;
4878 for (; v != (VALUE)vend; v += stride) {
4879 void *ptr = asan_poisoned_object_p(v);
4880 asan_unpoison_object(v, false);
4882 if (rb_obj_is_iseq(v)) {
4883 rb_iseq_t *iseq = (rb_iseq_t *)v;
4884 invalidate_all_blocks_for_tracing(iseq);
4887 asan_poison_object_if(ptr, v);
4889 return 0;
4892 static void
4893 invalidate_all_blocks_for_tracing(const rb_iseq_t *iseq)
4895 struct rb_iseq_constant_body *body = iseq->body;
4896 if (!body) return; // iseq yet to be initialized
4898 ASSERT_vm_locking();
4900 // Empty all blocks on the iseq so we don't compile new blocks that jump to the
4901 // invalidted region.
4902 // TODO Leaking the blocks for now since we might have situations where
4903 // a different ractor is waiting in branch_stub_hit(). If we free the block
4904 // that ractor can wake up with a dangling block.
4905 rb_darray_for(body->yjit_blocks, version_array_idx) {
4906 rb_yjit_block_array_t version_array = rb_darray_get(body->yjit_blocks, version_array_idx);
4907 rb_darray_for(version_array, version_idx) {
4908 // Stop listening for invalidation events like basic operation redefinition.
4909 block_t *block = rb_darray_get(version_array, version_idx);
4910 yjit_unlink_method_lookup_dependency(block);
4911 yjit_block_assumptions_free(block);
4913 rb_darray_free(version_array);
4915 rb_darray_free(body->yjit_blocks);
4916 body->yjit_blocks = NULL;
4918 #if USE_MJIT
4919 // Reset output code entry point
4920 body->jit_func = NULL;
4921 #endif
4924 static void
4925 yjit_reg_op(int opcode, codegen_fn gen_fn)
4927 RUBY_ASSERT(opcode >= 0 && opcode < VM_INSTRUCTION_SIZE);
4928 // Check that the op wasn't previously registered
4929 RUBY_ASSERT(gen_fns[opcode] == NULL);
4931 gen_fns[opcode] = gen_fn;
4934 void
4935 yjit_init_codegen(void)
4937 // Initialize the code blocks
4938 uint32_t mem_size = rb_yjit_opts.exec_mem_size * 1024 * 1024;
4939 uint8_t *mem_block = alloc_exec_mem(mem_size);
4941 cb = &block;
4942 cb_init(cb, mem_block, mem_size/2);
4944 ocb = &outline_block;
4945 cb_init(ocb, mem_block + mem_size/2, mem_size/2);
4947 // Generate the interpreter exit code for leave
4948 leave_exit_code = yjit_gen_leave_exit(cb);
4950 // Generate full exit code for C func
4951 gen_full_cfunc_return();
4952 cb_mark_all_executable(cb);
4954 // Map YARV opcodes to the corresponding codegen functions
4955 yjit_reg_op(BIN(nop), gen_nop);
4956 yjit_reg_op(BIN(dup), gen_dup);
4957 yjit_reg_op(BIN(dupn), gen_dupn);
4958 yjit_reg_op(BIN(swap), gen_swap);
4959 yjit_reg_op(BIN(setn), gen_setn);
4960 yjit_reg_op(BIN(topn), gen_topn);
4961 yjit_reg_op(BIN(pop), gen_pop);
4962 yjit_reg_op(BIN(adjuststack), gen_adjuststack);
4963 yjit_reg_op(BIN(newarray), gen_newarray);
4964 yjit_reg_op(BIN(duparray), gen_duparray);
4965 yjit_reg_op(BIN(duphash), gen_duphash);
4966 yjit_reg_op(BIN(splatarray), gen_splatarray);
4967 yjit_reg_op(BIN(expandarray), gen_expandarray);
4968 yjit_reg_op(BIN(newhash), gen_newhash);
4969 yjit_reg_op(BIN(newrange), gen_newrange);
4970 yjit_reg_op(BIN(concatstrings), gen_concatstrings);
4971 yjit_reg_op(BIN(putnil), gen_putnil);
4972 yjit_reg_op(BIN(putobject), gen_putobject);
4973 yjit_reg_op(BIN(putstring), gen_putstring);
4974 yjit_reg_op(BIN(putobject_INT2FIX_0_), gen_putobject_int2fix);
4975 yjit_reg_op(BIN(putobject_INT2FIX_1_), gen_putobject_int2fix);
4976 yjit_reg_op(BIN(putself), gen_putself);
4977 yjit_reg_op(BIN(putspecialobject), gen_putspecialobject);
4978 yjit_reg_op(BIN(getlocal), gen_getlocal);
4979 yjit_reg_op(BIN(getlocal_WC_0), gen_getlocal_wc0);
4980 yjit_reg_op(BIN(getlocal_WC_1), gen_getlocal_wc1);
4981 yjit_reg_op(BIN(setlocal), gen_setlocal);
4982 yjit_reg_op(BIN(setlocal_WC_0), gen_setlocal_wc0);
4983 yjit_reg_op(BIN(setlocal_WC_1), gen_setlocal_wc1);
4984 yjit_reg_op(BIN(getinstancevariable), gen_getinstancevariable);
4985 yjit_reg_op(BIN(setinstancevariable), gen_setinstancevariable);
4986 yjit_reg_op(BIN(defined), gen_defined);
4987 yjit_reg_op(BIN(checktype), gen_checktype);
4988 yjit_reg_op(BIN(checkkeyword), gen_checkkeyword);
4989 yjit_reg_op(BIN(opt_lt), gen_opt_lt);
4990 yjit_reg_op(BIN(opt_le), gen_opt_le);
4991 yjit_reg_op(BIN(opt_ge), gen_opt_ge);
4992 yjit_reg_op(BIN(opt_gt), gen_opt_gt);
4993 yjit_reg_op(BIN(opt_eq), gen_opt_eq);
4994 yjit_reg_op(BIN(opt_neq), gen_opt_neq);
4995 yjit_reg_op(BIN(opt_aref), gen_opt_aref);
4996 yjit_reg_op(BIN(opt_aset), gen_opt_aset);
4997 yjit_reg_op(BIN(opt_and), gen_opt_and);
4998 yjit_reg_op(BIN(opt_or), gen_opt_or);
4999 yjit_reg_op(BIN(opt_minus), gen_opt_minus);
5000 yjit_reg_op(BIN(opt_plus), gen_opt_plus);
5001 yjit_reg_op(BIN(opt_mult), gen_opt_mult);
5002 yjit_reg_op(BIN(opt_div), gen_opt_div);
5003 yjit_reg_op(BIN(opt_mod), gen_opt_mod);
5004 yjit_reg_op(BIN(opt_ltlt), gen_opt_ltlt);
5005 yjit_reg_op(BIN(opt_nil_p), gen_opt_nil_p);
5006 yjit_reg_op(BIN(opt_empty_p), gen_opt_empty_p);
5007 yjit_reg_op(BIN(opt_str_freeze), gen_opt_str_freeze);
5008 yjit_reg_op(BIN(opt_str_uminus), gen_opt_str_uminus);
5009 yjit_reg_op(BIN(opt_not), gen_opt_not);
5010 yjit_reg_op(BIN(opt_size), gen_opt_size);
5011 yjit_reg_op(BIN(opt_length), gen_opt_length);
5012 yjit_reg_op(BIN(opt_regexpmatch2), gen_opt_regexpmatch2);
5013 yjit_reg_op(BIN(opt_getinlinecache), gen_opt_getinlinecache);
5014 yjit_reg_op(BIN(invokebuiltin), gen_invokebuiltin);
5015 yjit_reg_op(BIN(opt_invokebuiltin_delegate), gen_opt_invokebuiltin_delegate);
5016 yjit_reg_op(BIN(opt_invokebuiltin_delegate_leave), gen_opt_invokebuiltin_delegate);
5017 yjit_reg_op(BIN(opt_case_dispatch), gen_opt_case_dispatch);
5018 yjit_reg_op(BIN(branchif), gen_branchif);
5019 yjit_reg_op(BIN(branchunless), gen_branchunless);
5020 yjit_reg_op(BIN(branchnil), gen_branchnil);
5021 yjit_reg_op(BIN(jump), gen_jump);
5022 yjit_reg_op(BIN(getblockparamproxy), gen_getblockparamproxy);
5023 yjit_reg_op(BIN(opt_send_without_block), gen_opt_send_without_block);
5024 yjit_reg_op(BIN(send), gen_send);
5025 yjit_reg_op(BIN(invokesuper), gen_invokesuper);
5026 yjit_reg_op(BIN(leave), gen_leave);
5027 yjit_reg_op(BIN(getglobal), gen_getglobal);
5028 yjit_reg_op(BIN(setglobal), gen_setglobal);
5029 yjit_reg_op(BIN(anytostring), gen_anytostring);
5030 yjit_reg_op(BIN(objtostring), gen_objtostring);
5031 yjit_reg_op(BIN(toregexp), gen_toregexp);
5032 yjit_reg_op(BIN(intern), gen_intern);
5033 yjit_reg_op(BIN(getspecial), gen_getspecial);
5034 yjit_reg_op(BIN(getclassvariable), gen_getclassvariable);
5035 yjit_reg_op(BIN(setclassvariable), gen_setclassvariable);
5037 yjit_method_codegen_table = st_init_numtable();
5039 // Specialization for C methods. See yjit_reg_method() for details.
5040 yjit_reg_method(rb_cBasicObject, "!", jit_rb_obj_not);
5042 yjit_reg_method(rb_cNilClass, "nil?", jit_rb_true);
5043 yjit_reg_method(rb_mKernel, "nil?", jit_rb_false);
5045 yjit_reg_method(rb_cBasicObject, "==", jit_rb_obj_equal);
5046 yjit_reg_method(rb_cBasicObject, "equal?", jit_rb_obj_equal);
5047 yjit_reg_method(rb_mKernel, "eql?", jit_rb_obj_equal);
5048 yjit_reg_method(rb_cModule, "==", jit_rb_obj_equal);
5049 yjit_reg_method(rb_cSymbol, "==", jit_rb_obj_equal);
5050 yjit_reg_method(rb_cSymbol, "===", jit_rb_obj_equal);
5052 // rb_str_to_s() methods in string.c
5053 yjit_reg_method(rb_cString, "to_s", jit_rb_str_to_s);
5054 yjit_reg_method(rb_cString, "to_str", jit_rb_str_to_s);
5055 yjit_reg_method(rb_cString, "bytesize", jit_rb_str_bytesize);
5057 // Thread.current
5058 yjit_reg_method(rb_singleton_class(rb_cThread), "current", jit_thread_s_current);