[ruby/etc] bump up to 1.3.1
[ruby-80x24.org.git] / yjit_codegen.c
blob21e4813c1937a67917efc60a8ffe49429bc046e5
1 // This file is a fragment of the yjit.o compilation unit. See yjit.c.
2 #include "internal.h"
3 #include "gc.h"
4 #include "internal/compile.h"
5 #include "internal/class.h"
6 #include "internal/hash.h"
7 #include "internal/object.h"
8 #include "internal/sanitizers.h"
9 #include "internal/string.h"
10 #include "internal/struct.h"
11 #include "internal/variable.h"
12 #include "internal/re.h"
13 #include "probes.h"
14 #include "probes_helper.h"
15 #include "yjit.h"
16 #include "yjit_iface.h"
17 #include "yjit_core.h"
18 #include "yjit_codegen.h"
19 #include "yjit_asm.h"
21 // Map from YARV opcodes to code generation functions
22 static codegen_fn gen_fns[VM_INSTRUCTION_SIZE] = { NULL };
24 // Map from method entries to code generation functions
25 static st_table *yjit_method_codegen_table = NULL;
27 // Code for exiting back to the interpreter from the leave instruction
28 static void *leave_exit_code;
30 // Code for full logic of returning from C method and exiting to the interpreter
31 static uint32_t outline_full_cfunc_return_pos;
33 // For implementing global code invalidation
34 struct codepage_patch {
35 uint32_t inline_patch_pos;
36 uint32_t outlined_target_pos;
39 typedef rb_darray(struct codepage_patch) patch_array_t;
41 static patch_array_t global_inval_patches = NULL;
43 // Print the current source location for debugging purposes
44 RBIMPL_ATTR_MAYBE_UNUSED()
45 static void
46 jit_print_loc(jitstate_t *jit, const char *msg)
48 char *ptr;
49 long len;
50 VALUE path = rb_iseq_path(jit->iseq);
51 RSTRING_GETMEM(path, ptr, len);
52 fprintf(stderr, "%s %.*s:%u\n", msg, (int)len, ptr, rb_iseq_line_no(jit->iseq, jit->insn_idx));
55 // dump an object for debugging purposes
56 RBIMPL_ATTR_MAYBE_UNUSED()
57 static void
58 jit_obj_info_dump(codeblock_t *cb, x86opnd_t opnd) {
59 push_regs(cb);
60 mov(cb, C_ARG_REGS[0], opnd);
61 call_ptr(cb, REG0, (void *)rb_obj_info_dump);
62 pop_regs(cb);
65 // Get the current instruction's opcode
66 static int
67 jit_get_opcode(jitstate_t *jit)
69 return jit->opcode;
72 // Get the index of the next instruction
73 static uint32_t
74 jit_next_insn_idx(jitstate_t *jit)
76 return jit->insn_idx + insn_len(jit_get_opcode(jit));
79 // Get an instruction argument by index
80 static VALUE
81 jit_get_arg(jitstate_t *jit, size_t arg_idx)
83 RUBY_ASSERT(arg_idx + 1 < (size_t)insn_len(jit_get_opcode(jit)));
84 return *(jit->pc + arg_idx + 1);
87 // Load a VALUE into a register and keep track of the reference if it is on the GC heap.
88 static void
89 jit_mov_gc_ptr(jitstate_t *jit, codeblock_t *cb, x86opnd_t reg, VALUE ptr)
91 RUBY_ASSERT(reg.type == OPND_REG && reg.num_bits == 64);
93 // Load the pointer constant into the specified register
94 mov(cb, reg, const_ptr_opnd((void*)ptr));
96 // The pointer immediate is encoded as the last part of the mov written out
97 uint32_t ptr_offset = cb->write_pos - sizeof(VALUE);
99 if (!SPECIAL_CONST_P(ptr)) {
100 if (!rb_darray_append(&jit->block->gc_object_offsets, ptr_offset)) {
101 rb_bug("allocation failed");
106 // Check if we are compiling the instruction at the stub PC
107 // Meaning we are compiling the instruction that is next to execute
108 static bool
109 jit_at_current_insn(jitstate_t *jit)
111 const VALUE *ec_pc = jit->ec->cfp->pc;
112 return (ec_pc == jit->pc);
115 // Peek at the nth topmost value on the Ruby stack.
116 // Returns the topmost value when n == 0.
117 static VALUE
118 jit_peek_at_stack(jitstate_t *jit, ctx_t *ctx, int n)
120 RUBY_ASSERT(jit_at_current_insn(jit));
122 // Note: this does not account for ctx->sp_offset because
123 // this is only available when hitting a stub, and while
124 // hitting a stub, cfp->sp needs to be up to date in case
125 // codegen functions trigger GC. See :stub-sp-flush:.
126 VALUE *sp = jit->ec->cfp->sp;
128 return *(sp - 1 - n);
131 static VALUE
132 jit_peek_at_self(jitstate_t *jit, ctx_t *ctx)
134 return jit->ec->cfp->self;
137 RBIMPL_ATTR_MAYBE_UNUSED()
138 static VALUE
139 jit_peek_at_local(jitstate_t *jit, ctx_t *ctx, int n)
141 RUBY_ASSERT(jit_at_current_insn(jit));
143 int32_t local_table_size = jit->iseq->body->local_table_size;
144 RUBY_ASSERT(n < (int)jit->iseq->body->local_table_size);
146 const VALUE *ep = jit->ec->cfp->ep;
147 return ep[-VM_ENV_DATA_SIZE - local_table_size + n + 1];
150 // Save the incremented PC on the CFP
151 // This is necessary when calleees can raise or allocate
152 static void
153 jit_save_pc(jitstate_t *jit, x86opnd_t scratch_reg)
155 codeblock_t *cb = jit->cb;
156 mov(cb, scratch_reg, const_ptr_opnd(jit->pc + insn_len(jit->opcode)));
157 mov(cb, mem_opnd(64, REG_CFP, offsetof(rb_control_frame_t, pc)), scratch_reg);
160 // Save the current SP on the CFP
161 // This realigns the interpreter SP with the JIT SP
162 // Note: this will change the current value of REG_SP,
163 // which could invalidate memory operands
164 static void
165 jit_save_sp(jitstate_t *jit, ctx_t *ctx)
167 if (ctx->sp_offset != 0) {
168 x86opnd_t stack_pointer = ctx_sp_opnd(ctx, 0);
169 codeblock_t *cb = jit->cb;
170 lea(cb, REG_SP, stack_pointer);
171 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG_SP);
172 ctx->sp_offset = 0;
176 // jit_save_pc() + jit_save_sp(). Should be used before calling a routine that
177 // could:
178 // - Perform GC allocation
179 // - Take the VM lock through RB_VM_LOCK_ENTER()
180 // - Perform Ruby method call
181 static void
182 jit_prepare_routine_call(jitstate_t *jit, ctx_t *ctx, x86opnd_t scratch_reg)
184 jit->record_boundary_patch_point = true;
185 jit_save_pc(jit, scratch_reg);
186 jit_save_sp(jit, ctx);
189 // Record the current codeblock write position for rewriting into a jump into
190 // the outlined block later. Used to implement global code invalidation.
191 static void
192 record_global_inval_patch(const codeblock_t *cb, uint32_t outline_block_target_pos)
194 struct codepage_patch patch_point = { cb->write_pos, outline_block_target_pos };
195 if (!rb_darray_append(&global_inval_patches, patch_point)) rb_bug("allocation failed");
198 static bool jit_guard_known_klass(jitstate_t *jit, ctx_t *ctx, VALUE known_klass, insn_opnd_t insn_opnd, VALUE sample_instance, const int max_chain_depth, uint8_t *side_exit);
200 #if YJIT_STATS
202 // Add a comment at the current position in the code block
203 static void
204 _add_comment(codeblock_t *cb, const char *comment_str)
206 // We can't add comments to the outlined code block
207 if (cb == ocb)
208 return;
210 // Avoid adding duplicate comment strings (can happen due to deferred codegen)
211 size_t num_comments = rb_darray_size(yjit_code_comments);
212 if (num_comments > 0) {
213 struct yjit_comment last_comment = rb_darray_get(yjit_code_comments, num_comments - 1);
214 if (last_comment.offset == cb->write_pos && strcmp(last_comment.comment, comment_str) == 0) {
215 return;
219 struct yjit_comment new_comment = (struct yjit_comment){ cb->write_pos, comment_str };
220 rb_darray_append(&yjit_code_comments, new_comment);
223 // Comments for generated machine code
224 #define ADD_COMMENT(cb, comment) _add_comment((cb), (comment))
226 // Verify the ctx's types and mappings against the compile-time stack, self,
227 // and locals.
228 static void
229 verify_ctx(jitstate_t *jit, ctx_t *ctx)
231 // Only able to check types when at current insn
232 RUBY_ASSERT(jit_at_current_insn(jit));
234 VALUE self_val = jit_peek_at_self(jit, ctx);
235 if (type_diff(yjit_type_of_value(self_val), ctx->self_type) == INT_MAX) {
236 rb_bug("verify_ctx: ctx type (%s) incompatible with actual value of self: %s", yjit_type_name(ctx->self_type), rb_obj_info(self_val));
239 for (int i = 0; i < ctx->stack_size && i < MAX_TEMP_TYPES; i++) {
240 temp_type_mapping_t learned = ctx_get_opnd_mapping(ctx, OPND_STACK(i));
241 VALUE val = jit_peek_at_stack(jit, ctx, i);
242 val_type_t detected = yjit_type_of_value(val);
244 if (learned.mapping.kind == TEMP_SELF) {
245 if (self_val != val) {
246 rb_bug("verify_ctx: stack value was mapped to self, but values did not match\n"
247 " stack: %s\n"
248 " self: %s",
249 rb_obj_info(val),
250 rb_obj_info(self_val));
254 if (learned.mapping.kind == TEMP_LOCAL) {
255 int local_idx = learned.mapping.idx;
256 VALUE local_val = jit_peek_at_local(jit, ctx, local_idx);
257 if (local_val != val) {
258 rb_bug("verify_ctx: stack value was mapped to local, but values did not match\n"
259 " stack: %s\n"
260 " local %i: %s",
261 rb_obj_info(val),
262 local_idx,
263 rb_obj_info(local_val));
267 if (type_diff(detected, learned.type) == INT_MAX) {
268 rb_bug("verify_ctx: ctx type (%s) incompatible with actual value on stack: %s", yjit_type_name(learned.type), rb_obj_info(val));
272 int32_t local_table_size = jit->iseq->body->local_table_size;
273 for (int i = 0; i < local_table_size && i < MAX_TEMP_TYPES; i++) {
274 val_type_t learned = ctx->local_types[i];
275 VALUE val = jit_peek_at_local(jit, ctx, i);
276 val_type_t detected = yjit_type_of_value(val);
278 if (type_diff(detected, learned) == INT_MAX) {
279 rb_bug("verify_ctx: ctx type (%s) incompatible with actual value of local: %s", yjit_type_name(learned), rb_obj_info(val));
284 #else
286 #define ADD_COMMENT(cb, comment) ((void)0)
287 #define verify_ctx(jit, ctx) ((void)0)
289 #endif // if YJIT_STATS
291 #if YJIT_STATS
293 // Increment a profiling counter with counter_name
294 #define GEN_COUNTER_INC(cb, counter_name) _gen_counter_inc(cb, &(yjit_runtime_counters . counter_name))
295 static void
296 _gen_counter_inc(codeblock_t *cb, int64_t *counter)
298 if (!rb_yjit_opts.gen_stats) return;
300 // Use REG1 because there might be return value in REG0
301 mov(cb, REG1, const_ptr_opnd(counter));
302 cb_write_lock_prefix(cb); // for ractors.
303 add(cb, mem_opnd(64, REG1, 0), imm_opnd(1));
306 // Increment a counter then take an existing side exit.
307 #define COUNTED_EXIT(jit, side_exit, counter_name) _counted_side_exit(jit, side_exit, &(yjit_runtime_counters . counter_name))
308 static uint8_t *
309 _counted_side_exit(jitstate_t* jit, uint8_t *existing_side_exit, int64_t *counter)
311 if (!rb_yjit_opts.gen_stats) return existing_side_exit;
313 uint8_t *start = cb_get_ptr(jit->ocb, jit->ocb->write_pos);
314 _gen_counter_inc(jit->ocb, counter);
315 jmp_ptr(jit->ocb, existing_side_exit);
316 return start;
319 #else
321 #define GEN_COUNTER_INC(cb, counter_name) ((void)0)
322 #define COUNTED_EXIT(jit, side_exit, counter_name) side_exit
324 #endif // if YJIT_STATS
326 // Generate an exit to return to the interpreter
327 static uint32_t
328 yjit_gen_exit(VALUE *exit_pc, ctx_t *ctx, codeblock_t *cb)
330 const uint32_t code_pos = cb->write_pos;
332 ADD_COMMENT(cb, "exit to interpreter");
334 // Generate the code to exit to the interpreters
335 // Write the adjusted SP back into the CFP
336 if (ctx->sp_offset != 0) {
337 x86opnd_t stack_pointer = ctx_sp_opnd(ctx, 0);
338 lea(cb, REG_SP, stack_pointer);
339 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG_SP);
342 // Update CFP->PC
343 mov(cb, RAX, const_ptr_opnd(exit_pc));
344 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, pc), RAX);
346 // Accumulate stats about interpreter exits
347 #if YJIT_STATS
348 if (rb_yjit_opts.gen_stats) {
349 mov(cb, RDI, const_ptr_opnd(exit_pc));
350 call_ptr(cb, RSI, (void *)&yjit_count_side_exit_op);
352 #endif
354 pop(cb, REG_SP);
355 pop(cb, REG_EC);
356 pop(cb, REG_CFP);
358 mov(cb, RAX, imm_opnd(Qundef));
359 ret(cb);
361 return code_pos;
364 // Generate a continuation for gen_leave() that exits to the interpreter at REG_CFP->pc.
365 static uint8_t *
366 yjit_gen_leave_exit(codeblock_t *cb)
368 uint8_t *code_ptr = cb_get_ptr(cb, cb->write_pos);
370 // Note, gen_leave() fully reconstructs interpreter state and leaves the
371 // return value in RAX before coming here.
373 // Every exit to the interpreter should be counted
374 GEN_COUNTER_INC(cb, leave_interp_return);
376 pop(cb, REG_SP);
377 pop(cb, REG_EC);
378 pop(cb, REG_CFP);
380 ret(cb);
382 return code_ptr;
385 // Fill code_for_exit_from_stub. This is used by branch_stub_hit() to exit
386 // to the interpreter when it cannot service a stub by generating new code.
387 // Before coming here, branch_stub_hit() takes care of fully reconstructing
388 // interpreter state.
389 static void
390 gen_code_for_exit_from_stub(void)
392 codeblock_t *cb = ocb;
393 code_for_exit_from_stub = cb_get_ptr(cb, cb->write_pos);
395 GEN_COUNTER_INC(cb, exit_from_branch_stub);
397 pop(cb, REG_SP);
398 pop(cb, REG_EC);
399 pop(cb, REG_CFP);
401 mov(cb, RAX, imm_opnd(Qundef));
402 ret(cb);
405 // :side-exit:
406 // Get an exit for the current instruction in the outlined block. The code
407 // for each instruction often begins with several guards before proceeding
408 // to do work. When guards fail, an option we have is to exit to the
409 // interpreter at an instruction boundary. The piece of code that takes
410 // care of reconstructing interpreter state and exiting out of generated
411 // code is called the side exit.
413 // No guards change the logic for reconstructing interpreter state at the
414 // moment, so there is one unique side exit for each context. Note that
415 // it's incorrect to jump to the side exit after any ctx stack push/pop operations
416 // since they change the logic required for reconstructing interpreter state.
417 static uint8_t *
418 yjit_side_exit(jitstate_t *jit, ctx_t *ctx)
420 if (!jit->side_exit_for_pc) {
421 codeblock_t *ocb = jit->ocb;
422 uint32_t pos = yjit_gen_exit(jit->pc, ctx, ocb);
423 jit->side_exit_for_pc = cb_get_ptr(ocb, pos);
426 return jit->side_exit_for_pc;
429 // Ensure that there is an exit for the start of the block being compiled.
430 // Block invalidation uses this exit.
431 static void
432 jit_ensure_block_entry_exit(jitstate_t *jit)
434 block_t *block = jit->block;
435 if (block->entry_exit) return;
437 if (jit->insn_idx == block->blockid.idx) {
438 // We are compiling the first instruction in the block.
439 // Generate the exit with the cache in jitstate.
440 block->entry_exit = yjit_side_exit(jit, &block->ctx);
442 else {
443 VALUE *pc = yjit_iseq_pc_at_idx(block->blockid.iseq, block->blockid.idx);
444 uint32_t pos = yjit_gen_exit(pc, &block->ctx, ocb);
445 block->entry_exit = cb_get_ptr(ocb, pos);
449 // Generate a runtime guard that ensures the PC is at the start of the iseq,
450 // otherwise take a side exit. This is to handle the situation of optional
451 // parameters. When a function with optional parameters is called, the entry
452 // PC for the method isn't necessarily 0, but we always generated code that
453 // assumes the entry point is 0.
454 static void
455 yjit_pc_guard(codeblock_t *cb, const rb_iseq_t *iseq)
457 RUBY_ASSERT(cb != NULL);
459 mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, pc));
460 mov(cb, REG1, const_ptr_opnd(iseq->body->iseq_encoded));
461 xor(cb, REG0, REG1);
463 // xor should impact ZF, so we can jz here
464 uint32_t pc_is_zero = cb_new_label(cb, "pc_is_zero");
465 jz_label(cb, pc_is_zero);
467 // We're not starting at the first PC, so we need to exit.
468 GEN_COUNTER_INC(cb, leave_start_pc_non_zero);
470 pop(cb, REG_SP);
471 pop(cb, REG_EC);
472 pop(cb, REG_CFP);
474 mov(cb, RAX, imm_opnd(Qundef));
475 ret(cb);
477 // PC should be at the beginning
478 cb_write_label(cb, pc_is_zero);
479 cb_link_labels(cb);
482 // The code we generate in gen_send_cfunc() doesn't fire the c_return TracePoint event
483 // like the interpreter. When tracing for c_return is enabled, we patch the code after
484 // the C method return to call into this to fire the event.
485 static void
486 full_cfunc_return(rb_execution_context_t *ec, VALUE return_value)
488 rb_control_frame_t *cfp = ec->cfp;
489 RUBY_ASSERT_ALWAYS(cfp == GET_EC()->cfp);
490 const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(cfp);
492 RUBY_ASSERT_ALWAYS(RUBYVM_CFUNC_FRAME_P(cfp));
493 RUBY_ASSERT_ALWAYS(me->def->type == VM_METHOD_TYPE_CFUNC);
495 // CHECK_CFP_CONSISTENCY("full_cfunc_return"); TODO revive this
497 // Pop the C func's frame and fire the c_return TracePoint event
498 // Note that this is the same order as vm_call_cfunc_with_frame().
499 rb_vm_pop_frame(ec);
500 EXEC_EVENT_HOOK(ec, RUBY_EVENT_C_RETURN, cfp->self, me->def->original_id, me->called_id, me->owner, return_value);
501 // Note, this deviates from the interpreter in that users need to enable
502 // a c_return TracePoint for this DTrace hook to work. A reasonable change
503 // since the Ruby return event works this way as well.
504 RUBY_DTRACE_CMETHOD_RETURN_HOOK(ec, me->owner, me->def->original_id);
506 // Push return value into the caller's stack. We know that it's a frame that
507 // uses cfp->sp because we are patching a call done with gen_send_cfunc().
508 ec->cfp->sp[0] = return_value;
509 ec->cfp->sp++;
512 // Landing code for when c_return tracing is enabled. See full_cfunc_return().
513 static void
514 gen_full_cfunc_return(void)
516 codeblock_t *cb = ocb;
517 outline_full_cfunc_return_pos = ocb->write_pos;
519 // This chunk of code expect REG_EC to be filled properly and
520 // RAX to contain the return value of the C method.
522 // Call full_cfunc_return()
523 mov(cb, C_ARG_REGS[0], REG_EC);
524 mov(cb, C_ARG_REGS[1], RAX);
525 call_ptr(cb, REG0, (void *)full_cfunc_return);
527 // Count the exit
528 GEN_COUNTER_INC(cb, traced_cfunc_return);
530 // Return to the interpreter
531 pop(cb, REG_SP);
532 pop(cb, REG_EC);
533 pop(cb, REG_CFP);
535 mov(cb, RAX, imm_opnd(Qundef));
536 ret(cb);
540 Compile an interpreter entry block to be inserted into an iseq
541 Returns `NULL` if compilation fails.
543 static uint8_t *
544 yjit_entry_prologue(codeblock_t *cb, const rb_iseq_t *iseq)
546 RUBY_ASSERT(cb != NULL);
548 enum { MAX_PROLOGUE_SIZE = 1024 };
550 // Check if we have enough executable memory
551 if (cb->write_pos + MAX_PROLOGUE_SIZE >= cb->mem_size) {
552 return NULL;
555 const uint32_t old_write_pos = cb->write_pos;
557 // Align the current write position to cache line boundaries
558 cb_align_pos(cb, 64);
560 uint8_t *code_ptr = cb_get_ptr(cb, cb->write_pos);
561 ADD_COMMENT(cb, "yjit entry");
563 push(cb, REG_CFP);
564 push(cb, REG_EC);
565 push(cb, REG_SP);
567 // We are passed EC and CFP
568 mov(cb, REG_EC, C_ARG_REGS[0]);
569 mov(cb, REG_CFP, C_ARG_REGS[1]);
571 // Load the current SP from the CFP into REG_SP
572 mov(cb, REG_SP, member_opnd(REG_CFP, rb_control_frame_t, sp));
574 // Setup cfp->jit_return
575 // TODO: this could use an IP relative LEA instead of an 8 byte immediate
576 mov(cb, REG0, const_ptr_opnd(leave_exit_code));
577 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, jit_return), REG0);
579 // We're compiling iseqs that we *expect* to start at `insn_idx`. But in
580 // the case of optional parameters, the interpreter can set the pc to a
581 // different location depending on the optional parameters. If an iseq
582 // has optional parameters, we'll add a runtime check that the PC we've
583 // compiled for is the same PC that the interpreter wants us to run with.
584 // If they don't match, then we'll take a side exit.
585 if (iseq->body->param.flags.has_opt) {
586 yjit_pc_guard(cb, iseq);
589 // Verify MAX_PROLOGUE_SIZE
590 RUBY_ASSERT_ALWAYS(cb->write_pos - old_write_pos <= MAX_PROLOGUE_SIZE);
592 return code_ptr;
595 // Generate code to check for interrupts and take a side-exit.
596 // Warning: this function clobbers REG0
597 static void
598 yjit_check_ints(codeblock_t *cb, uint8_t *side_exit)
600 // Check for interrupts
601 // see RUBY_VM_CHECK_INTS(ec) macro
602 ADD_COMMENT(cb, "RUBY_VM_CHECK_INTS(ec)");
603 mov(cb, REG0_32, member_opnd(REG_EC, rb_execution_context_t, interrupt_mask));
604 not(cb, REG0_32);
605 test(cb, member_opnd(REG_EC, rb_execution_context_t, interrupt_flag), REG0_32);
606 jnz_ptr(cb, side_exit);
609 // Generate a stubbed unconditional jump to the next bytecode instruction.
610 // Blocks that are part of a guard chain can use this to share the same successor.
611 static void
612 jit_jump_to_next_insn(jitstate_t *jit, const ctx_t *current_context)
614 // Reset the depth since in current usages we only ever jump to to
615 // chain_depth > 0 from the same instruction.
616 ctx_t reset_depth = *current_context;
617 reset_depth.chain_depth = 0;
619 blockid_t jump_block = { jit->iseq, jit_next_insn_idx(jit) };
621 // We are at the end of the current instruction. Record the boundary.
622 if (jit->record_boundary_patch_point) {
623 uint32_t exit_pos = yjit_gen_exit(jit->pc + insn_len(jit->opcode), &reset_depth, jit->ocb);
624 record_global_inval_patch(jit->cb, exit_pos);
625 jit->record_boundary_patch_point = false;
628 // Generate the jump instruction
629 gen_direct_jump(
630 jit,
631 &reset_depth,
632 jump_block
636 // Compile a sequence of bytecode instructions for a given basic block version.
637 // Part of gen_block_version().
638 static block_t *
639 gen_single_block(blockid_t blockid, const ctx_t *start_ctx, rb_execution_context_t *ec)
641 RUBY_ASSERT(cb != NULL);
642 verify_blockid(blockid);
644 // Allocate the new block
645 block_t *block = calloc(1, sizeof(block_t));
646 if (!block) {
647 return NULL;
650 // Copy the starting context to avoid mutating it
651 ctx_t ctx_copy = *start_ctx;
652 ctx_t *ctx = &ctx_copy;
654 // Limit the number of specialized versions for this block
655 *ctx = limit_block_versions(blockid, ctx);
657 // Save the starting context on the block.
658 block->blockid = blockid;
659 block->ctx = *ctx;
661 RUBY_ASSERT(!(blockid.idx == 0 && start_ctx->stack_size > 0));
663 const rb_iseq_t *iseq = block->blockid.iseq;
664 const unsigned int iseq_size = iseq->body->iseq_size;
665 uint32_t insn_idx = block->blockid.idx;
666 const uint32_t starting_insn_idx = insn_idx;
668 // Initialize a JIT state object
669 jitstate_t jit = {
670 .cb = cb,
671 .ocb = ocb,
672 .block = block,
673 .iseq = iseq,
674 .ec = ec
677 // Mark the start position of the block
678 block->start_addr = cb_get_write_ptr(cb);
680 // For each instruction to compile
681 while (insn_idx < iseq_size) {
682 // Get the current pc and opcode
683 VALUE *pc = yjit_iseq_pc_at_idx(iseq, insn_idx);
684 int opcode = yjit_opcode_at_pc(iseq, pc);
685 RUBY_ASSERT(opcode >= 0 && opcode < VM_INSTRUCTION_SIZE);
687 // opt_getinlinecache wants to be in a block all on its own. Cut the block short
688 // if we run into it. See gen_opt_getinlinecache() for details.
689 if (opcode == BIN(opt_getinlinecache) && insn_idx > starting_insn_idx) {
690 jit_jump_to_next_insn(&jit, ctx);
691 break;
694 // Set the current instruction
695 jit.insn_idx = insn_idx;
696 jit.opcode = opcode;
697 jit.pc = pc;
698 jit.side_exit_for_pc = NULL;
700 // If previous instruction requested to record the boundary
701 if (jit.record_boundary_patch_point) {
702 // Generate an exit to this instruction and record it
703 uint32_t exit_pos = yjit_gen_exit(jit.pc, ctx, ocb);
704 record_global_inval_patch(cb, exit_pos);
705 jit.record_boundary_patch_point = false;
708 // Verify our existing assumption (DEBUG)
709 if (jit_at_current_insn(&jit)) {
710 verify_ctx(&jit, ctx);
713 // Lookup the codegen function for this instruction
714 codegen_fn gen_fn = gen_fns[opcode];
715 codegen_status_t status = YJIT_CANT_COMPILE;
716 if (gen_fn) {
717 if (0) {
718 fprintf(stderr, "compiling %d: %s\n", insn_idx, insn_name(opcode));
719 print_str(cb, insn_name(opcode));
722 // :count-placement:
723 // Count bytecode instructions that execute in generated code.
724 // Note that the increment happens even when the output takes side exit.
725 GEN_COUNTER_INC(cb, exec_instruction);
727 // Add a comment for the name of the YARV instruction
728 ADD_COMMENT(cb, insn_name(opcode));
730 // Call the code generation function
731 status = gen_fn(&jit, ctx, cb);
734 // If we can't compile this instruction
735 // exit to the interpreter and stop compiling
736 if (status == YJIT_CANT_COMPILE) {
737 // TODO: if the codegen function makes changes to ctx and then return YJIT_CANT_COMPILE,
738 // the exit this generates would be wrong. We could save a copy of the entry context
739 // and assert that ctx is the same here.
740 uint32_t exit_off = yjit_gen_exit(jit.pc, ctx, cb);
742 // If this is the first instruction in the block, then we can use
743 // the exit for block->entry_exit.
744 if (insn_idx == block->blockid.idx) {
745 block->entry_exit = cb_get_ptr(cb, exit_off);
747 break;
750 // For now, reset the chain depth after each instruction as only the
751 // first instruction in the block can concern itself with the depth.
752 ctx->chain_depth = 0;
754 // Move to the next instruction to compile
755 insn_idx += insn_len(opcode);
757 // If the instruction terminates this block
758 if (status == YJIT_END_BLOCK) {
759 break;
763 // Mark the end position of the block
764 block->end_addr = cb_get_write_ptr(cb);
766 // Store the index of the last instruction in the block
767 block->end_idx = insn_idx;
769 // We currently can't handle cases where the request is for a block that
770 // doesn't go to the next instruction.
771 RUBY_ASSERT(!jit.record_boundary_patch_point);
773 // If code for the block doesn't fit, free the block and fail.
774 if (cb->dropped_bytes || ocb->dropped_bytes) {
775 yjit_free_block(block);
776 return NULL;
779 if (YJIT_DUMP_MODE >= 2) {
780 // Dump list of compiled instrutions
781 fprintf(stderr, "Compiled the following for iseq=%p:\n", (void *)iseq);
782 for (uint32_t idx = block->blockid.idx; idx < insn_idx; ) {
783 int opcode = yjit_opcode_at_pc(iseq, yjit_iseq_pc_at_idx(iseq, idx));
784 fprintf(stderr, " %04d %s\n", idx, insn_name(opcode));
785 idx += insn_len(opcode);
789 return block;
792 static codegen_status_t gen_opt_send_without_block(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb);
794 static codegen_status_t
795 gen_nop(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
797 // Do nothing
798 return YJIT_KEEP_COMPILING;
801 static codegen_status_t
802 gen_dup(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
804 // Get the top value and its type
805 x86opnd_t dup_val = ctx_stack_pop(ctx, 0);
806 temp_type_mapping_t mapping = ctx_get_opnd_mapping(ctx, OPND_STACK(0));
808 // Push the same value on top
809 x86opnd_t loc0 = ctx_stack_push_mapping(ctx, mapping);
810 mov(cb, REG0, dup_val);
811 mov(cb, loc0, REG0);
813 return YJIT_KEEP_COMPILING;
816 // duplicate stack top n elements
817 static codegen_status_t
818 gen_dupn(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
820 rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
822 // In practice, seems to be only used for n==2
823 if (n != 2) {
824 return YJIT_CANT_COMPILE;
827 x86opnd_t opnd1 = ctx_stack_opnd(ctx, 1);
828 x86opnd_t opnd0 = ctx_stack_opnd(ctx, 0);
829 temp_type_mapping_t mapping1 = ctx_get_opnd_mapping(ctx, OPND_STACK(1));
830 temp_type_mapping_t mapping0 = ctx_get_opnd_mapping(ctx, OPND_STACK(0));
832 x86opnd_t dst1 = ctx_stack_push_mapping(ctx, mapping1);
833 mov(cb, REG0, opnd1);
834 mov(cb, dst1, REG0);
836 x86opnd_t dst0 = ctx_stack_push_mapping(ctx, mapping0);
837 mov(cb, REG0, opnd0);
838 mov(cb, dst0, REG0);
840 return YJIT_KEEP_COMPILING;
843 static void
844 stack_swap(ctx_t *ctx, codeblock_t *cb, int offset0, int offset1, x86opnd_t reg0, x86opnd_t reg1)
846 x86opnd_t opnd0 = ctx_stack_opnd(ctx, offset0);
847 x86opnd_t opnd1 = ctx_stack_opnd(ctx, offset1);
849 temp_type_mapping_t mapping0 = ctx_get_opnd_mapping(ctx, OPND_STACK(offset0));
850 temp_type_mapping_t mapping1 = ctx_get_opnd_mapping(ctx, OPND_STACK(offset1));
852 mov(cb, reg0, opnd0);
853 mov(cb, reg1, opnd1);
854 mov(cb, opnd0, reg1);
855 mov(cb, opnd1, reg0);
857 ctx_set_opnd_mapping(ctx, OPND_STACK(offset0), mapping1);
858 ctx_set_opnd_mapping(ctx, OPND_STACK(offset1), mapping0);
861 // Swap top 2 stack entries
862 static codegen_status_t
863 gen_swap(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
865 stack_swap(ctx , cb, 0, 1, REG0, REG1);
866 return YJIT_KEEP_COMPILING;
869 // set Nth stack entry to stack top
870 static codegen_status_t
871 gen_setn(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
873 rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
875 // Set the destination
876 x86opnd_t top_val = ctx_stack_pop(ctx, 0);
877 x86opnd_t dst_opnd = ctx_stack_opnd(ctx, (int32_t)n);
878 mov(cb, REG0, top_val);
879 mov(cb, dst_opnd, REG0);
881 temp_type_mapping_t mapping = ctx_get_opnd_mapping(ctx, OPND_STACK(0));
882 ctx_set_opnd_mapping(ctx, OPND_STACK(n), mapping);
884 return YJIT_KEEP_COMPILING;
887 // get nth stack value, then push it
888 static codegen_status_t
889 gen_topn(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
891 int32_t n = (int32_t)jit_get_arg(jit, 0);
893 // Get top n type / operand
894 x86opnd_t top_n_val = ctx_stack_opnd(ctx, n);
895 temp_type_mapping_t mapping = ctx_get_opnd_mapping(ctx, OPND_STACK(n));
897 x86opnd_t loc0 = ctx_stack_push_mapping(ctx, mapping);
898 mov(cb, REG0, top_n_val);
899 mov(cb, loc0, REG0);
901 return YJIT_KEEP_COMPILING;
904 static codegen_status_t
905 gen_pop(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
907 // Decrement SP
908 ctx_stack_pop(ctx, 1);
909 return YJIT_KEEP_COMPILING;
912 // Pop n values off the stack
913 static codegen_status_t
914 gen_adjuststack(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
916 rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
917 ctx_stack_pop(ctx, n);
918 return YJIT_KEEP_COMPILING;
921 // new array initialized from top N values
922 static codegen_status_t
923 gen_newarray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
925 rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
927 // Save the PC and SP because we are allocating
928 jit_prepare_routine_call(jit, ctx, REG0);
930 x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(sizeof(VALUE) * (uint32_t)n));
932 // call rb_ec_ary_new_from_values(struct rb_execution_context_struct *ec, long n, const VALUE *elts);
933 mov(cb, C_ARG_REGS[0], REG_EC);
934 mov(cb, C_ARG_REGS[1], imm_opnd(n));
935 lea(cb, C_ARG_REGS[2], values_ptr);
936 call_ptr(cb, REG0, (void *)rb_ec_ary_new_from_values);
938 ctx_stack_pop(ctx, n);
939 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_ARRAY);
940 mov(cb, stack_ret, RAX);
942 return YJIT_KEEP_COMPILING;
945 // dup array
946 static codegen_status_t
947 gen_duparray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
949 VALUE ary = jit_get_arg(jit, 0);
951 // Save the PC and SP because we are allocating
952 jit_prepare_routine_call(jit, ctx, REG0);
954 // call rb_ary_resurrect(VALUE ary);
955 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], ary);
956 call_ptr(cb, REG0, (void *)rb_ary_resurrect);
958 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_ARRAY);
959 mov(cb, stack_ret, RAX);
961 return YJIT_KEEP_COMPILING;
964 // dup hash
965 static codegen_status_t
966 gen_duphash(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
968 VALUE hash = jit_get_arg(jit, 0);
970 // Save the PC and SP because we are allocating
971 jit_prepare_routine_call(jit, ctx, REG0);
973 // call rb_hash_resurrect(VALUE hash);
974 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], hash);
975 call_ptr(cb, REG0, (void *)rb_hash_resurrect);
977 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HASH);
978 mov(cb, stack_ret, RAX);
980 return YJIT_KEEP_COMPILING;
983 VALUE rb_vm_splat_array(VALUE flag, VALUE ary);
985 // call to_a on the array on the stack
986 static codegen_status_t
987 gen_splatarray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
989 VALUE flag = (VALUE) jit_get_arg(jit, 0);
991 // Save the PC and SP because the callee may allocate
992 // Note that this modifies REG_SP, which is why we do it first
993 jit_prepare_routine_call(jit, ctx, REG0);
995 // Get the operands from the stack
996 x86opnd_t ary_opnd = ctx_stack_pop(ctx, 1);
998 // Call rb_vm_splat_array(flag, ary)
999 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], flag);
1000 mov(cb, C_ARG_REGS[1], ary_opnd);
1001 call_ptr(cb, REG1, (void *) rb_vm_splat_array);
1003 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_ARRAY);
1004 mov(cb, stack_ret, RAX);
1006 return YJIT_KEEP_COMPILING;
1009 // new range initialized from top 2 values
1010 static codegen_status_t
1011 gen_newrange(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1013 rb_num_t flag = (rb_num_t)jit_get_arg(jit, 0);
1015 // rb_range_new() allocates and can raise
1016 jit_prepare_routine_call(jit, ctx, REG0);
1018 // val = rb_range_new(low, high, (int)flag);
1019 mov(cb, C_ARG_REGS[0], ctx_stack_opnd(ctx, 1));
1020 mov(cb, C_ARG_REGS[1], ctx_stack_opnd(ctx, 0));
1021 mov(cb, C_ARG_REGS[2], imm_opnd(flag));
1022 call_ptr(cb, REG0, (void *)rb_range_new);
1024 ctx_stack_pop(ctx, 2);
1025 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HEAP);
1026 mov(cb, stack_ret, RAX);
1028 return YJIT_KEEP_COMPILING;
1031 static void
1032 guard_object_is_heap(codeblock_t *cb, x86opnd_t object_opnd, ctx_t *ctx, uint8_t *side_exit)
1034 ADD_COMMENT(cb, "guard object is heap");
1036 // Test that the object is not an immediate
1037 test(cb, object_opnd, imm_opnd(RUBY_IMMEDIATE_MASK));
1038 jnz_ptr(cb, side_exit);
1040 // Test that the object is not false or nil
1041 cmp(cb, object_opnd, imm_opnd(Qnil));
1042 RUBY_ASSERT(Qfalse < Qnil);
1043 jbe_ptr(cb, side_exit);
1046 static inline void
1047 guard_object_is_array(codeblock_t *cb, x86opnd_t object_opnd, x86opnd_t flags_opnd, ctx_t *ctx, uint8_t *side_exit)
1049 ADD_COMMENT(cb, "guard object is array");
1051 // Pull out the type mask
1052 mov(cb, flags_opnd, member_opnd(object_opnd, struct RBasic, flags));
1053 and(cb, flags_opnd, imm_opnd(RUBY_T_MASK));
1055 // Compare the result with T_ARRAY
1056 cmp(cb, flags_opnd, imm_opnd(T_ARRAY));
1057 jne_ptr(cb, side_exit);
1060 // push enough nils onto the stack to fill out an array
1061 static codegen_status_t
1062 gen_expandarray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1064 int flag = (int) jit_get_arg(jit, 1);
1066 // If this instruction has the splat flag, then bail out.
1067 if (flag & 0x01) {
1068 GEN_COUNTER_INC(cb, expandarray_splat);
1069 return YJIT_CANT_COMPILE;
1072 // If this instruction has the postarg flag, then bail out.
1073 if (flag & 0x02) {
1074 GEN_COUNTER_INC(cb, expandarray_postarg);
1075 return YJIT_CANT_COMPILE;
1078 uint8_t *side_exit = yjit_side_exit(jit, ctx);
1080 // num is the number of requested values. If there aren't enough in the
1081 // array then we're going to push on nils.
1082 int num = (int)jit_get_arg(jit, 0);
1083 val_type_t array_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
1084 x86opnd_t array_opnd = ctx_stack_pop(ctx, 1);
1086 if (array_type.type == ETYPE_NIL) {
1087 // special case for a, b = nil pattern
1088 // push N nils onto the stack
1089 for (int i = 0; i < num; i++) {
1090 x86opnd_t push = ctx_stack_push(ctx, TYPE_NIL);
1091 mov(cb, push, imm_opnd(Qnil));
1093 return YJIT_KEEP_COMPILING;
1096 // Move the array from the stack into REG0 and check that it's an array.
1097 mov(cb, REG0, array_opnd);
1098 guard_object_is_heap(cb, REG0, ctx, COUNTED_EXIT(jit, side_exit, expandarray_not_array));
1099 guard_object_is_array(cb, REG0, REG1, ctx, COUNTED_EXIT(jit, side_exit, expandarray_not_array));
1101 // If we don't actually want any values, then just return.
1102 if (num == 0) {
1103 return YJIT_KEEP_COMPILING;
1106 // Pull out the embed flag to check if it's an embedded array.
1107 x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
1108 mov(cb, REG1, flags_opnd);
1110 // Move the length of the embedded array into REG1.
1111 and(cb, REG1, imm_opnd(RARRAY_EMBED_LEN_MASK));
1112 shr(cb, REG1, imm_opnd(RARRAY_EMBED_LEN_SHIFT));
1114 // Conditionally move the length of the heap array into REG1.
1115 test(cb, flags_opnd, imm_opnd(RARRAY_EMBED_FLAG));
1116 cmovz(cb, REG1, member_opnd(REG0, struct RArray, as.heap.len));
1118 // Only handle the case where the number of values in the array is greater
1119 // than or equal to the number of values requested.
1120 cmp(cb, REG1, imm_opnd(num));
1121 jl_ptr(cb, COUNTED_EXIT(jit, side_exit, expandarray_rhs_too_small));
1123 // Load the address of the embedded array into REG1.
1124 // (struct RArray *)(obj)->as.ary
1125 lea(cb, REG1, member_opnd(REG0, struct RArray, as.ary));
1127 // Conditionally load the address of the heap array into REG1.
1128 // (struct RArray *)(obj)->as.heap.ptr
1129 test(cb, flags_opnd, imm_opnd(RARRAY_EMBED_FLAG));
1130 cmovz(cb, REG1, member_opnd(REG0, struct RArray, as.heap.ptr));
1132 // Loop backward through the array and push each element onto the stack.
1133 for (int32_t i = (int32_t) num - 1; i >= 0; i--) {
1134 x86opnd_t top = ctx_stack_push(ctx, TYPE_UNKNOWN);
1135 mov(cb, REG0, mem_opnd(64, REG1, i * SIZEOF_VALUE));
1136 mov(cb, top, REG0);
1139 return YJIT_KEEP_COMPILING;
1142 // new hash initialized from top N values
1143 static codegen_status_t
1144 gen_newhash(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1146 int32_t num = (int32_t)jit_get_arg(jit, 0);
1148 // Save the PC and SP because we are allocating
1149 jit_prepare_routine_call(jit, ctx, REG0);
1151 if (num) {
1152 // val = rb_hash_new_with_size(num / 2);
1153 mov(cb, C_ARG_REGS[0], imm_opnd(num / 2));
1154 call_ptr(cb, REG0, (void *)rb_hash_new_with_size);
1156 // save the allocated hash as we want to push it after insertion
1157 push(cb, RAX);
1158 push(cb, RAX); // alignment
1160 // rb_hash_bulk_insert(num, STACK_ADDR_FROM_TOP(num), val);
1161 mov(cb, C_ARG_REGS[0], imm_opnd(num));
1162 lea(cb, C_ARG_REGS[1], ctx_stack_opnd(ctx, num - 1));
1163 mov(cb, C_ARG_REGS[2], RAX);
1164 call_ptr(cb, REG0, (void *)rb_hash_bulk_insert);
1166 pop(cb, RAX); // alignment
1167 pop(cb, RAX);
1169 ctx_stack_pop(ctx, num);
1170 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HASH);
1171 mov(cb, stack_ret, RAX);
1173 else {
1174 // val = rb_hash_new();
1175 call_ptr(cb, REG0, (void *)rb_hash_new);
1177 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HASH);
1178 mov(cb, stack_ret, RAX);
1181 return YJIT_KEEP_COMPILING;
1184 // Push a constant value to the stack, including type information.
1185 // The constant may be a heap object or a special constant.
1186 static void
1187 jit_putobject(jitstate_t *jit, ctx_t *ctx, VALUE arg)
1189 val_type_t val_type = yjit_type_of_value(arg);
1190 x86opnd_t stack_top = ctx_stack_push(ctx, val_type);
1192 if (SPECIAL_CONST_P(arg)) {
1193 // Immediates will not move and do not need to be tracked for GC
1194 // Thanks to this we can mov directly to memory when possible.
1196 // NOTE: VALUE -> int64_t cast below is implementation defined.
1197 // Hopefully it preserves the the bit pattern or raise a signal.
1198 // See N1256 section 6.3.1.3.
1199 x86opnd_t imm = imm_opnd((int64_t)arg);
1201 // 64-bit immediates can't be directly written to memory
1202 if (imm.num_bits <= 32) {
1203 mov(cb, stack_top, imm);
1205 else {
1206 mov(cb, REG0, imm);
1207 mov(cb, stack_top, REG0);
1210 else {
1211 // Load the value to push into REG0
1212 // Note that this value may get moved by the GC
1213 jit_mov_gc_ptr(jit, cb, REG0, arg);
1215 // Write argument at SP
1216 mov(cb, stack_top, REG0);
1220 static codegen_status_t
1221 gen_putnil(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1223 jit_putobject(jit, ctx, Qnil);
1224 return YJIT_KEEP_COMPILING;
1227 static codegen_status_t
1228 gen_putobject(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1230 VALUE arg = jit_get_arg(jit, 0);
1232 jit_putobject(jit, ctx, arg);
1233 return YJIT_KEEP_COMPILING;
1236 static codegen_status_t
1237 gen_putstring(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1239 VALUE put_val = jit_get_arg(jit, 0);
1241 // Save the PC and SP because the callee will allocate
1242 jit_prepare_routine_call(jit, ctx, REG0);
1244 mov(cb, C_ARG_REGS[0], REG_EC);
1245 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], put_val);
1246 call_ptr(cb, REG0, (void *)rb_ec_str_resurrect);
1248 x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_STRING);
1249 mov(cb, stack_top, RAX);
1251 return YJIT_KEEP_COMPILING;
1254 static codegen_status_t
1255 gen_putobject_int2fix(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1257 int opcode = jit_get_opcode(jit);
1258 int cst_val = (opcode == BIN(putobject_INT2FIX_0_))? 0:1;
1260 jit_putobject(jit, ctx, INT2FIX(cst_val));
1261 return YJIT_KEEP_COMPILING;
1264 static codegen_status_t
1265 gen_putself(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1267 // Load self from CFP
1268 mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, self));
1270 // Write it on the stack
1271 x86opnd_t stack_top = ctx_stack_push_self(ctx);
1272 mov(cb, stack_top, REG0);
1274 return YJIT_KEEP_COMPILING;
1277 static codegen_status_t
1278 gen_putspecialobject(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1280 enum vm_special_object_type type = (enum vm_special_object_type)jit_get_arg(jit, 0);
1282 if (type == VM_SPECIAL_OBJECT_VMCORE) {
1283 x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_HEAP);
1284 jit_mov_gc_ptr(jit, cb, REG0, rb_mRubyVMFrozenCore);
1285 mov(cb, stack_top, REG0);
1286 return YJIT_KEEP_COMPILING;
1288 else {
1289 // TODO: implement for VM_SPECIAL_OBJECT_CBASE and
1290 // VM_SPECIAL_OBJECT_CONST_BASE
1291 return YJIT_CANT_COMPILE;
1295 // Get EP at level from CFP
1296 static void
1297 gen_get_ep(codeblock_t *cb, x86opnd_t reg, uint32_t level)
1299 // Load environment pointer EP from CFP
1300 mov(cb, reg, member_opnd(REG_CFP, rb_control_frame_t, ep));
1302 while (level--) {
1303 // Get the previous EP from the current EP
1304 // See GET_PREV_EP(ep) macro
1305 // VALUE *prev_ep = ((VALUE *)((ep)[VM_ENV_DATA_INDEX_SPECVAL] & ~0x03))
1306 mov(cb, reg, mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_SPECVAL));
1307 and(cb, reg, imm_opnd(~0x03));
1311 // Compute the index of a local variable from its slot index
1312 static uint32_t
1313 slot_to_local_idx(const rb_iseq_t *iseq, int32_t slot_idx)
1315 // Convoluted rules from local_var_name() in iseq.c
1316 int32_t local_table_size = iseq->body->local_table_size;
1317 int32_t op = slot_idx - VM_ENV_DATA_SIZE;
1318 int32_t local_idx = local_idx = local_table_size - op - 1;
1319 RUBY_ASSERT(local_idx >= 0 && local_idx < local_table_size);
1320 return (uint32_t)local_idx;
1323 static codegen_status_t
1324 gen_getlocal_wc0(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1326 // Compute the offset from BP to the local
1327 int32_t slot_idx = (int32_t)jit_get_arg(jit, 0);
1328 const int32_t offs = -(SIZEOF_VALUE * slot_idx);
1329 uint32_t local_idx = slot_to_local_idx(jit->iseq, slot_idx);
1331 // Load environment pointer EP (level 0) from CFP
1332 gen_get_ep(cb, REG0, 0);
1334 // Load the local from the EP
1335 mov(cb, REG0, mem_opnd(64, REG0, offs));
1337 // Write the local at SP
1338 x86opnd_t stack_top = ctx_stack_push_local(ctx, local_idx);
1339 mov(cb, stack_top, REG0);
1341 return YJIT_KEEP_COMPILING;
1344 static codegen_status_t
1345 gen_getlocal_generic(ctx_t *ctx, uint32_t local_idx, uint32_t level)
1347 gen_get_ep(cb, REG0, level);
1349 // Load the local from the block
1350 // val = *(vm_get_ep(GET_EP(), level) - idx);
1351 const int32_t offs = -(SIZEOF_VALUE * local_idx);
1352 mov(cb, REG0, mem_opnd(64, REG0, offs));
1354 // Write the local at SP
1355 x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_UNKNOWN);
1356 mov(cb, stack_top, REG0);
1358 return YJIT_KEEP_COMPILING;
1361 static codegen_status_t
1362 gen_getlocal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1364 int32_t idx = (int32_t)jit_get_arg(jit, 0);
1365 int32_t level = (int32_t)jit_get_arg(jit, 1);
1366 return gen_getlocal_generic(ctx, idx, level);
1369 static codegen_status_t
1370 gen_getlocal_wc1(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1372 int32_t idx = (int32_t)jit_get_arg(jit, 0);
1373 return gen_getlocal_generic(ctx, idx, 1);
1376 static codegen_status_t
1377 gen_setlocal_wc0(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1380 vm_env_write(const VALUE *ep, int index, VALUE v)
1382 VALUE flags = ep[VM_ENV_DATA_INDEX_FLAGS];
1383 if (LIKELY((flags & VM_ENV_FLAG_WB_REQUIRED) == 0)) {
1384 VM_STACK_ENV_WRITE(ep, index, v);
1386 else {
1387 vm_env_write_slowpath(ep, index, v);
1392 int32_t slot_idx = (int32_t)jit_get_arg(jit, 0);
1393 uint32_t local_idx = slot_to_local_idx(jit->iseq, slot_idx);
1395 // Load environment pointer EP (level 0) from CFP
1396 gen_get_ep(cb, REG0, 0);
1398 // flags & VM_ENV_FLAG_WB_REQUIRED
1399 x86opnd_t flags_opnd = mem_opnd(64, REG0, sizeof(VALUE) * VM_ENV_DATA_INDEX_FLAGS);
1400 test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED));
1402 // Create a side-exit to fall back to the interpreter
1403 uint8_t *side_exit = yjit_side_exit(jit, ctx);
1405 // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
1406 jnz_ptr(cb, side_exit);
1408 // Set the type of the local variable in the context
1409 val_type_t temp_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
1410 ctx_set_local_type(ctx, local_idx, temp_type);
1412 // Pop the value to write from the stack
1413 x86opnd_t stack_top = ctx_stack_pop(ctx, 1);
1414 mov(cb, REG1, stack_top);
1416 // Write the value at the environment pointer
1417 const int32_t offs = -8 * slot_idx;
1418 mov(cb, mem_opnd(64, REG0, offs), REG1);
1420 return YJIT_KEEP_COMPILING;
1423 // Push Qtrue or Qfalse depending on whether the given keyword was supplied by
1424 // the caller
1425 static codegen_status_t
1426 gen_checkkeyword(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1428 // When a keyword is unspecified past index 32, a hash will be used
1429 // instead. This can only happen in iseqs taking more than 32 keywords.
1430 if (jit->iseq->body->param.keyword->num >= 32) {
1431 return YJIT_CANT_COMPILE;
1434 // The EP offset to the undefined bits local
1435 int32_t bits_offset = (int32_t)jit_get_arg(jit, 0);
1437 // The index of the keyword we want to check
1438 int32_t index = (int32_t)jit_get_arg(jit, 1);
1440 // Load environment pointer EP
1441 gen_get_ep(cb, REG0, 0);
1443 // VALUE kw_bits = *(ep - bits);
1444 x86opnd_t bits_opnd = mem_opnd(64, REG0, sizeof(VALUE) * -bits_offset);
1446 // unsigned int b = (unsigned int)FIX2ULONG(kw_bits);
1447 // if ((b & (0x01 << idx))) {
1449 // We can skip the FIX2ULONG conversion by shifting the bit we test
1450 int64_t bit_test = 0x01 << (index + 1);
1451 test(cb, bits_opnd, imm_opnd(bit_test));
1452 mov(cb, REG0, imm_opnd(Qfalse));
1453 mov(cb, REG1, imm_opnd(Qtrue));
1454 cmovz(cb, REG0, REG1);
1456 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_IMM);
1457 mov(cb, stack_ret, REG0);
1459 return YJIT_KEEP_COMPILING;
1462 static codegen_status_t
1463 gen_setlocal_generic(jitstate_t *jit, ctx_t *ctx, uint32_t local_idx, uint32_t level)
1465 // Load environment pointer EP at level
1466 gen_get_ep(cb, REG0, level);
1468 // flags & VM_ENV_FLAG_WB_REQUIRED
1469 x86opnd_t flags_opnd = mem_opnd(64, REG0, sizeof(VALUE) * VM_ENV_DATA_INDEX_FLAGS);
1470 test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED));
1472 // Create a side-exit to fall back to the interpreter
1473 uint8_t *side_exit = yjit_side_exit(jit, ctx);
1475 // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
1476 jnz_ptr(cb, side_exit);
1478 // Pop the value to write from the stack
1479 x86opnd_t stack_top = ctx_stack_pop(ctx, 1);
1480 mov(cb, REG1, stack_top);
1482 // Write the value at the environment pointer
1483 const int32_t offs = -(SIZEOF_VALUE * local_idx);
1484 mov(cb, mem_opnd(64, REG0, offs), REG1);
1486 return YJIT_KEEP_COMPILING;
1489 static codegen_status_t
1490 gen_setlocal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1492 int32_t idx = (int32_t)jit_get_arg(jit, 0);
1493 int32_t level = (int32_t)jit_get_arg(jit, 1);
1494 return gen_setlocal_generic(jit, ctx, idx, level);
1497 static codegen_status_t
1498 gen_setlocal_wc1(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1500 int32_t idx = (int32_t)jit_get_arg(jit, 0);
1501 return gen_setlocal_generic(jit, ctx, idx, 1);
1504 static void
1505 gen_jnz_to_target0(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
1507 switch (shape) {
1508 case SHAPE_NEXT0:
1509 case SHAPE_NEXT1:
1510 RUBY_ASSERT(false);
1511 break;
1513 case SHAPE_DEFAULT:
1514 jnz_ptr(cb, target0);
1515 break;
1519 static void
1520 gen_jz_to_target0(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
1522 switch (shape) {
1523 case SHAPE_NEXT0:
1524 case SHAPE_NEXT1:
1525 RUBY_ASSERT(false);
1526 break;
1528 case SHAPE_DEFAULT:
1529 jz_ptr(cb, target0);
1530 break;
1534 static void
1535 gen_jbe_to_target0(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
1537 switch (shape) {
1538 case SHAPE_NEXT0:
1539 case SHAPE_NEXT1:
1540 RUBY_ASSERT(false);
1541 break;
1543 case SHAPE_DEFAULT:
1544 jbe_ptr(cb, target0);
1545 break;
1549 enum jcc_kinds {
1550 JCC_JNE,
1551 JCC_JNZ,
1552 JCC_JZ,
1553 JCC_JE,
1554 JCC_JBE,
1555 JCC_JNA,
1558 // Generate a jump to a stub that recompiles the current YARV instruction on failure.
1559 // When depth_limitk is exceeded, generate a jump to a side exit.
1560 static void
1561 jit_chain_guard(enum jcc_kinds jcc, jitstate_t *jit, const ctx_t *ctx, uint8_t depth_limit, uint8_t *side_exit)
1563 branchgen_fn target0_gen_fn;
1565 switch (jcc) {
1566 case JCC_JNE:
1567 case JCC_JNZ:
1568 target0_gen_fn = gen_jnz_to_target0;
1569 break;
1570 case JCC_JZ:
1571 case JCC_JE:
1572 target0_gen_fn = gen_jz_to_target0;
1573 break;
1574 case JCC_JBE:
1575 case JCC_JNA:
1576 target0_gen_fn = gen_jbe_to_target0;
1577 break;
1578 default:
1579 rb_bug("yjit: unimplemented jump kind");
1580 break;
1583 if (ctx->chain_depth < depth_limit) {
1584 ctx_t deeper = *ctx;
1585 deeper.chain_depth++;
1587 gen_branch(
1588 jit,
1589 ctx,
1590 (blockid_t) { jit->iseq, jit->insn_idx },
1591 &deeper,
1592 BLOCKID_NULL,
1593 NULL,
1594 target0_gen_fn
1597 else {
1598 target0_gen_fn(cb, side_exit, NULL, SHAPE_DEFAULT);
1602 enum {
1603 GETIVAR_MAX_DEPTH = 10, // up to 5 different classes, and embedded or not for each
1604 OPT_AREF_MAX_CHAIN_DEPTH = 2, // hashes and arrays
1605 SEND_MAX_DEPTH = 5, // up to 5 different classes
1608 VALUE rb_vm_set_ivar_idx(VALUE obj, uint32_t idx, VALUE val);
1610 // Codegen for setting an instance variable.
1611 // Preconditions:
1612 // - receiver is in REG0
1613 // - receiver has the same class as CLASS_OF(comptime_receiver)
1614 // - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled
1615 static codegen_status_t
1616 gen_set_ivar(jitstate_t *jit, ctx_t *ctx, VALUE recv, VALUE klass, ID ivar_name)
1618 // Save the PC and SP because the callee may allocate
1619 // Note that this modifies REG_SP, which is why we do it first
1620 jit_prepare_routine_call(jit, ctx, REG0);
1622 // Get the operands from the stack
1623 x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
1624 x86opnd_t recv_opnd = ctx_stack_pop(ctx, 1);
1626 uint32_t ivar_index = rb_obj_ensure_iv_index_mapping(recv, ivar_name);
1628 // Call rb_vm_set_ivar_idx with the receiver, the index of the ivar, and the value
1629 mov(cb, C_ARG_REGS[0], recv_opnd);
1630 mov(cb, C_ARG_REGS[1], imm_opnd(ivar_index));
1631 mov(cb, C_ARG_REGS[2], val_opnd);
1632 call_ptr(cb, REG0, (void *)rb_vm_set_ivar_idx);
1634 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
1635 mov(cb, out_opnd, RAX);
1637 return YJIT_KEEP_COMPILING;
1640 // Codegen for getting an instance variable.
1641 // Preconditions:
1642 // - receiver is in REG0
1643 // - receiver has the same class as CLASS_OF(comptime_receiver)
1644 // - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled
1645 static codegen_status_t
1646 gen_get_ivar(jitstate_t *jit, ctx_t *ctx, const int max_chain_depth, VALUE comptime_receiver, ID ivar_name, insn_opnd_t reg0_opnd, uint8_t *side_exit)
1648 VALUE comptime_val_klass = CLASS_OF(comptime_receiver);
1649 const ctx_t starting_context = *ctx; // make a copy for use with jit_chain_guard
1651 // If the class uses the default allocator, instances should all be T_OBJECT
1652 // NOTE: This assumes nobody changes the allocator of the class after allocation.
1653 // Eventually, we can encode whether an object is T_OBJECT or not
1654 // inside object shapes.
1655 if (!RB_TYPE_P(comptime_receiver, T_OBJECT) ||
1656 rb_get_alloc_func(comptime_val_klass) != rb_class_allocate_instance) {
1657 // General case. Call rb_ivar_get().
1658 // VALUE rb_ivar_get(VALUE obj, ID id)
1659 ADD_COMMENT(cb, "call rb_ivar_get()");
1661 // The function could raise exceptions.
1662 jit_prepare_routine_call(jit, ctx, REG1);
1664 mov(cb, C_ARG_REGS[0], REG0);
1665 mov(cb, C_ARG_REGS[1], imm_opnd((int64_t)ivar_name));
1666 call_ptr(cb, REG1, (void *)rb_ivar_get);
1668 if (!reg0_opnd.is_self) {
1669 (void)ctx_stack_pop(ctx, 1);
1671 // Push the ivar on the stack
1672 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
1673 mov(cb, out_opnd, RAX);
1675 // Jump to next instruction. This allows guard chains to share the same successor.
1676 jit_jump_to_next_insn(jit, ctx);
1677 return YJIT_END_BLOCK;
1681 // FIXME:
1682 // This check was added because of a failure in a test involving the
1683 // Nokogiri Document class where we see a T_DATA that still has the default
1684 // allocator.
1685 // Aaron Patterson argues that this is a bug in the C extension, because
1686 // people could call .allocate() on the class and still get a T_OBJECT
1687 // For now I added an extra dynamic check that the receiver is T_OBJECT
1688 // so we can safely pass all the tests in Shopify Core.
1690 // Guard that the receiver is T_OBJECT
1691 // #define RB_BUILTIN_TYPE(x) (int)(((struct RBasic*)(x))->flags & RUBY_T_MASK)
1692 ADD_COMMENT(cb, "guard receiver is T_OBJECT");
1693 mov(cb, REG1, member_opnd(REG0, struct RBasic, flags));
1694 and(cb, REG1, imm_opnd(RUBY_T_MASK));
1695 cmp(cb, REG1, imm_opnd(T_OBJECT));
1696 jit_chain_guard(JCC_JNE, jit, &starting_context, max_chain_depth, side_exit);
1699 // FIXME: Mapping the index could fail when there is too many ivar names. If we're
1700 // compiling for a branch stub that can cause the exception to be thrown from the
1701 // wrong PC.
1702 uint32_t ivar_index = rb_obj_ensure_iv_index_mapping(comptime_receiver, ivar_name);
1704 // Pop receiver if it's on the temp stack
1705 if (!reg0_opnd.is_self) {
1706 (void)ctx_stack_pop(ctx, 1);
1709 // Compile time self is embedded and the ivar index lands within the object
1710 if (RB_FL_TEST_RAW(comptime_receiver, ROBJECT_EMBED) && ivar_index < ROBJECT_EMBED_LEN_MAX) {
1711 // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
1713 // Guard that self is embedded
1714 // TODO: BT and JC is shorter
1715 ADD_COMMENT(cb, "guard embedded getivar");
1716 x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
1717 test(cb, flags_opnd, imm_opnd(ROBJECT_EMBED));
1718 jit_chain_guard(JCC_JZ, jit, &starting_context, max_chain_depth, COUNTED_EXIT(jit, side_exit, getivar_megamorphic));
1720 // Load the variable
1721 x86opnd_t ivar_opnd = mem_opnd(64, REG0, offsetof(struct RObject, as.ary) + ivar_index * SIZEOF_VALUE);
1722 mov(cb, REG1, ivar_opnd);
1724 // Guard that the variable is not Qundef
1725 cmp(cb, REG1, imm_opnd(Qundef));
1726 mov(cb, REG0, imm_opnd(Qnil));
1727 cmove(cb, REG1, REG0);
1729 // Push the ivar on the stack
1730 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
1731 mov(cb, out_opnd, REG1);
1733 else {
1734 // Compile time value is *not* embedded.
1736 // Guard that value is *not* embedded
1737 // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
1738 ADD_COMMENT(cb, "guard extended getivar");
1739 x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
1740 test(cb, flags_opnd, imm_opnd(ROBJECT_EMBED));
1741 jit_chain_guard(JCC_JNZ, jit, &starting_context, max_chain_depth, COUNTED_EXIT(jit, side_exit, getivar_megamorphic));
1743 // check that the extended table is big enough
1744 if (ivar_index >= ROBJECT_EMBED_LEN_MAX + 1) {
1745 // Check that the slot is inside the extended table (num_slots > index)
1746 x86opnd_t num_slots = mem_opnd(32, REG0, offsetof(struct RObject, as.heap.numiv));
1747 cmp(cb, num_slots, imm_opnd(ivar_index));
1748 jle_ptr(cb, COUNTED_EXIT(jit, side_exit, getivar_idx_out_of_range));
1751 // Get a pointer to the extended table
1752 x86opnd_t tbl_opnd = mem_opnd(64, REG0, offsetof(struct RObject, as.heap.ivptr));
1753 mov(cb, REG0, tbl_opnd);
1755 // Read the ivar from the extended table
1756 x86opnd_t ivar_opnd = mem_opnd(64, REG0, sizeof(VALUE) * ivar_index);
1757 mov(cb, REG0, ivar_opnd);
1759 // Check that the ivar is not Qundef
1760 cmp(cb, REG0, imm_opnd(Qundef));
1761 mov(cb, REG1, imm_opnd(Qnil));
1762 cmove(cb, REG0, REG1);
1764 // Push the ivar on the stack
1765 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
1766 mov(cb, out_opnd, REG0);
1769 // Jump to next instruction. This allows guard chains to share the same successor.
1770 jit_jump_to_next_insn(jit, ctx);
1771 return YJIT_END_BLOCK;
1774 static codegen_status_t
1775 gen_getinstancevariable(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1777 // Defer compilation so we can specialize on a runtime `self`
1778 if (!jit_at_current_insn(jit)) {
1779 defer_compilation(jit, ctx);
1780 return YJIT_END_BLOCK;
1783 ID ivar_name = (ID)jit_get_arg(jit, 0);
1785 VALUE comptime_val = jit_peek_at_self(jit, ctx);
1786 VALUE comptime_val_klass = CLASS_OF(comptime_val);
1788 // Generate a side exit
1789 uint8_t *side_exit = yjit_side_exit(jit, ctx);
1791 // Guard that the receiver has the same class as the one from compile time.
1792 mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, self));
1794 jit_guard_known_klass(jit, ctx, comptime_val_klass, OPND_SELF, comptime_val, GETIVAR_MAX_DEPTH, side_exit);
1796 return gen_get_ivar(jit, ctx, GETIVAR_MAX_DEPTH, comptime_val, ivar_name, OPND_SELF, side_exit);
1799 void rb_vm_setinstancevariable(const rb_iseq_t *iseq, VALUE obj, ID id, VALUE val, IVC ic);
1801 static codegen_status_t
1802 gen_setinstancevariable(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1804 ID id = (ID)jit_get_arg(jit, 0);
1805 IVC ic = (IVC)jit_get_arg(jit, 1);
1807 // Save the PC and SP because the callee may allocate
1808 // Note that this modifies REG_SP, which is why we do it first
1809 jit_prepare_routine_call(jit, ctx, REG0);
1811 // Get the operands from the stack
1812 x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
1814 // Call rb_vm_setinstancevariable(iseq, obj, id, val, ic);
1815 mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, self));
1816 mov(cb, C_ARG_REGS[3], val_opnd);
1817 mov(cb, C_ARG_REGS[2], imm_opnd(id));
1818 mov(cb, C_ARG_REGS[4], const_ptr_opnd(ic));
1819 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], (VALUE)jit->iseq);
1820 call_ptr(cb, REG0, (void *)rb_vm_setinstancevariable);
1822 return YJIT_KEEP_COMPILING;
1825 bool rb_vm_defined(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, rb_num_t op_type, VALUE obj, VALUE v);
1827 static codegen_status_t
1828 gen_defined(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1830 rb_num_t op_type = (rb_num_t)jit_get_arg(jit, 0);
1831 VALUE obj = (VALUE)jit_get_arg(jit, 1);
1832 VALUE pushval = (VALUE)jit_get_arg(jit, 2);
1834 // Save the PC and SP because the callee may allocate
1835 // Note that this modifies REG_SP, which is why we do it first
1836 jit_prepare_routine_call(jit, ctx, REG0);
1838 // Get the operands from the stack
1839 x86opnd_t v_opnd = ctx_stack_pop(ctx, 1);
1841 // Call vm_defined(ec, reg_cfp, op_type, obj, v)
1842 mov(cb, C_ARG_REGS[0], REG_EC);
1843 mov(cb, C_ARG_REGS[1], REG_CFP);
1844 mov(cb, C_ARG_REGS[2], imm_opnd(op_type));
1845 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[3], (VALUE)obj);
1846 mov(cb, C_ARG_REGS[4], v_opnd);
1847 call_ptr(cb, REG0, (void *)rb_vm_defined);
1849 // if (vm_defined(ec, GET_CFP(), op_type, obj, v)) {
1850 // val = pushval;
1851 // }
1852 jit_mov_gc_ptr(jit, cb, REG1, (VALUE)pushval);
1853 cmp(cb, AL, imm_opnd(0));
1854 mov(cb, RAX, imm_opnd(Qnil));
1855 cmovnz(cb, RAX, REG1);
1857 // Push the return value onto the stack
1858 val_type_t out_type = SPECIAL_CONST_P(pushval)? TYPE_IMM:TYPE_UNKNOWN;
1859 x86opnd_t stack_ret = ctx_stack_push(ctx, out_type);
1860 mov(cb, stack_ret, RAX);
1862 return YJIT_KEEP_COMPILING;
1865 static codegen_status_t
1866 gen_checktype(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1868 enum ruby_value_type type_val = (enum ruby_value_type)jit_get_arg(jit, 0);
1869 // Only three types are emitted by compile.c
1870 if (type_val == T_STRING || type_val == T_ARRAY || type_val == T_HASH) {
1871 val_type_t val_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
1872 x86opnd_t val = ctx_stack_pop(ctx, 1);
1874 x86opnd_t stack_ret;
1876 // Check if we know from type information
1877 if ((type_val == T_STRING && val_type.type == ETYPE_STRING) ||
1878 (type_val == T_ARRAY && val_type.type == ETYPE_ARRAY) ||
1879 (type_val == T_HASH && val_type.type == ETYPE_HASH)) {
1880 // guaranteed type match
1881 stack_ret = ctx_stack_push(ctx, TYPE_TRUE);
1882 mov(cb, stack_ret, imm_opnd(Qtrue));
1883 return YJIT_KEEP_COMPILING;
1885 else if (val_type.is_imm || val_type.type != ETYPE_UNKNOWN) {
1886 // guaranteed not to match T_STRING/T_ARRAY/T_HASH
1887 stack_ret = ctx_stack_push(ctx, TYPE_FALSE);
1888 mov(cb, stack_ret, imm_opnd(Qfalse));
1889 return YJIT_KEEP_COMPILING;
1892 mov(cb, REG0, val);
1893 mov(cb, REG1, imm_opnd(Qfalse));
1895 uint32_t ret = cb_new_label(cb, "ret");
1897 if (!val_type.is_heap) {
1898 // if (SPECIAL_CONST_P(val)) {
1899 // Return Qfalse via REG1 if not on heap
1900 test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK));
1901 jnz_label(cb, ret);
1902 cmp(cb, REG0, imm_opnd(Qnil));
1903 jbe_label(cb, ret);
1906 // Check type on object
1907 mov(cb, REG0, mem_opnd(64, REG0, offsetof(struct RBasic, flags)));
1908 and(cb, REG0, imm_opnd(RUBY_T_MASK));
1909 cmp(cb, REG0, imm_opnd(type_val));
1910 mov(cb, REG0, imm_opnd(Qtrue));
1911 // REG1 contains Qfalse from above
1912 cmove(cb, REG1, REG0);
1914 cb_write_label(cb, ret);
1915 stack_ret = ctx_stack_push(ctx, TYPE_IMM);
1916 mov(cb, stack_ret, REG1);
1917 cb_link_labels(cb);
1919 return YJIT_KEEP_COMPILING;
1921 else {
1922 return YJIT_CANT_COMPILE;
1926 static codegen_status_t
1927 gen_concatstrings(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1929 rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
1931 // Save the PC and SP because we are allocating
1932 jit_prepare_routine_call(jit, ctx, REG0);
1934 x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(sizeof(VALUE) * (uint32_t)n));
1936 // call rb_str_concat_literals(long n, const VALUE *strings);
1937 mov(cb, C_ARG_REGS[0], imm_opnd(n));
1938 lea(cb, C_ARG_REGS[1], values_ptr);
1939 call_ptr(cb, REG0, (void *)rb_str_concat_literals);
1941 ctx_stack_pop(ctx, n);
1942 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
1943 mov(cb, stack_ret, RAX);
1945 return YJIT_KEEP_COMPILING;
1948 static void
1949 guard_two_fixnums(ctx_t *ctx, uint8_t *side_exit)
1951 // Get the stack operand types
1952 val_type_t arg1_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
1953 val_type_t arg0_type = ctx_get_opnd_type(ctx, OPND_STACK(1));
1955 if (arg0_type.is_heap || arg1_type.is_heap) {
1956 jmp_ptr(cb, side_exit);
1957 return;
1960 if (arg0_type.type != ETYPE_FIXNUM && arg0_type.type != ETYPE_UNKNOWN) {
1961 jmp_ptr(cb, side_exit);
1962 return;
1965 if (arg1_type.type != ETYPE_FIXNUM && arg1_type.type != ETYPE_UNKNOWN) {
1966 jmp_ptr(cb, side_exit);
1967 return;
1970 RUBY_ASSERT(!arg0_type.is_heap);
1971 RUBY_ASSERT(!arg1_type.is_heap);
1972 RUBY_ASSERT(arg0_type.type == ETYPE_FIXNUM || arg0_type.type == ETYPE_UNKNOWN);
1973 RUBY_ASSERT(arg1_type.type == ETYPE_FIXNUM || arg1_type.type == ETYPE_UNKNOWN);
1975 // Get stack operands without popping them
1976 x86opnd_t arg1 = ctx_stack_opnd(ctx, 0);
1977 x86opnd_t arg0 = ctx_stack_opnd(ctx, 1);
1979 // If not fixnums, fall back
1980 if (arg0_type.type != ETYPE_FIXNUM) {
1981 ADD_COMMENT(cb, "guard arg0 fixnum");
1982 test(cb, arg0, imm_opnd(RUBY_FIXNUM_FLAG));
1983 jz_ptr(cb, side_exit);
1985 if (arg1_type.type != ETYPE_FIXNUM) {
1986 ADD_COMMENT(cb, "guard arg1 fixnum");
1987 test(cb, arg1, imm_opnd(RUBY_FIXNUM_FLAG));
1988 jz_ptr(cb, side_exit);
1991 // Set stack types in context
1992 ctx_upgrade_opnd_type(ctx, OPND_STACK(0), TYPE_FIXNUM);
1993 ctx_upgrade_opnd_type(ctx, OPND_STACK(1), TYPE_FIXNUM);
1996 // Conditional move operation used by comparison operators
1997 typedef void (*cmov_fn)(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1);
1999 static codegen_status_t
2000 gen_fixnum_cmp(jitstate_t *jit, ctx_t *ctx, cmov_fn cmov_op)
2002 // Defer compilation so we can specialize base on a runtime receiver
2003 if (!jit_at_current_insn(jit)) {
2004 defer_compilation(jit, ctx);
2005 return YJIT_END_BLOCK;
2008 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2009 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2011 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2012 // Create a side-exit to fall back to the interpreter
2013 // Note: we generate the side-exit before popping operands from the stack
2014 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2016 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_LT)) {
2017 return YJIT_CANT_COMPILE;
2020 // Check that both operands are fixnums
2021 guard_two_fixnums(ctx, side_exit);
2023 // Get the operands from the stack
2024 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2025 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2027 // Compare the arguments
2028 xor(cb, REG0_32, REG0_32); // REG0 = Qfalse
2029 mov(cb, REG1, arg0);
2030 cmp(cb, REG1, arg1);
2031 mov(cb, REG1, imm_opnd(Qtrue));
2032 cmov_op(cb, REG0, REG1);
2034 // Push the output on the stack
2035 x86opnd_t dst = ctx_stack_push(ctx, TYPE_UNKNOWN);
2036 mov(cb, dst, REG0);
2038 return YJIT_KEEP_COMPILING;
2040 else {
2041 return gen_opt_send_without_block(jit, ctx, cb);
2045 static codegen_status_t
2046 gen_opt_lt(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2048 return gen_fixnum_cmp(jit, ctx, cmovl);
2051 static codegen_status_t
2052 gen_opt_le(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2054 return gen_fixnum_cmp(jit, ctx, cmovle);
2057 static codegen_status_t
2058 gen_opt_ge(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2060 return gen_fixnum_cmp(jit, ctx, cmovge);
2063 static codegen_status_t
2064 gen_opt_gt(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2066 return gen_fixnum_cmp(jit, ctx, cmovg);
2069 // Implements specialized equality for either two fixnum or two strings
2070 // Returns true if code was generated, otherwise false
2071 static bool
2072 gen_equality_specialized(jitstate_t *jit, ctx_t *ctx, uint8_t *side_exit)
2074 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2075 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2077 x86opnd_t a_opnd = ctx_stack_opnd(ctx, 1);
2078 x86opnd_t b_opnd = ctx_stack_opnd(ctx, 0);
2080 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2081 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_EQ)) {
2082 // if overridden, emit the generic version
2083 return false;
2086 guard_two_fixnums(ctx, side_exit);
2088 mov(cb, REG0, a_opnd);
2089 cmp(cb, REG0, b_opnd);
2091 mov(cb, REG0, imm_opnd(Qfalse));
2092 mov(cb, REG1, imm_opnd(Qtrue));
2093 cmove(cb, REG0, REG1);
2095 // Push the output on the stack
2096 ctx_stack_pop(ctx, 2);
2097 x86opnd_t dst = ctx_stack_push(ctx, TYPE_IMM);
2098 mov(cb, dst, REG0);
2100 return true;
2102 else if (CLASS_OF(comptime_a) == rb_cString &&
2103 CLASS_OF(comptime_b) == rb_cString) {
2104 if (!assume_bop_not_redefined(jit, STRING_REDEFINED_OP_FLAG, BOP_EQ)) {
2105 // if overridden, emit the generic version
2106 return false;
2109 // Load a and b in preparation for call later
2110 mov(cb, C_ARG_REGS[0], a_opnd);
2111 mov(cb, C_ARG_REGS[1], b_opnd);
2113 // Guard that a is a String
2114 mov(cb, REG0, C_ARG_REGS[0]);
2115 jit_guard_known_klass(jit, ctx, rb_cString, OPND_STACK(1), comptime_a, SEND_MAX_DEPTH, side_exit);
2117 uint32_t ret = cb_new_label(cb, "ret");
2119 // If they are equal by identity, return true
2120 cmp(cb, C_ARG_REGS[0], C_ARG_REGS[1]);
2121 mov(cb, RAX, imm_opnd(Qtrue));
2122 je_label(cb, ret);
2124 // Otherwise guard that b is a T_STRING (from type info) or String (from runtime guard)
2125 if (ctx_get_opnd_type(ctx, OPND_STACK(0)).type != ETYPE_STRING) {
2126 mov(cb, REG0, C_ARG_REGS[1]);
2127 // Note: any T_STRING is valid here, but we check for a ::String for simplicity
2128 jit_guard_known_klass(jit, ctx, rb_cString, OPND_STACK(0), comptime_b, SEND_MAX_DEPTH, side_exit);
2131 // Call rb_str_eql_internal(a, b)
2132 call_ptr(cb, REG0, (void *)rb_str_eql_internal);
2134 // Push the output on the stack
2135 cb_write_label(cb, ret);
2136 ctx_stack_pop(ctx, 2);
2137 x86opnd_t dst = ctx_stack_push(ctx, TYPE_IMM);
2138 mov(cb, dst, RAX);
2139 cb_link_labels(cb);
2141 return true;
2143 else {
2144 return false;
2148 static codegen_status_t
2149 gen_opt_eq(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2151 // Defer compilation so we can specialize base on a runtime receiver
2152 if (!jit_at_current_insn(jit)) {
2153 defer_compilation(jit, ctx);
2154 return YJIT_END_BLOCK;
2157 // Create a side-exit to fall back to the interpreter
2158 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2160 if (gen_equality_specialized(jit, ctx, side_exit)) {
2161 jit_jump_to_next_insn(jit, ctx);
2162 return YJIT_END_BLOCK;
2164 else {
2165 return gen_opt_send_without_block(jit, ctx, cb);
2169 static codegen_status_t gen_send_general(jitstate_t *jit, ctx_t *ctx, struct rb_call_data *cd, rb_iseq_t *block);
2171 static codegen_status_t
2172 gen_opt_neq(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2174 // opt_neq is passed two rb_call_data as arguments:
2175 // first for ==, second for !=
2176 struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 1);
2177 return gen_send_general(jit, ctx, cd, NULL);
2180 static codegen_status_t
2181 gen_opt_aref(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2183 struct rb_call_data * cd = (struct rb_call_data *)jit_get_arg(jit, 0);
2184 int32_t argc = (int32_t)vm_ci_argc(cd->ci);
2186 // Only JIT one arg calls like `ary[6]`
2187 if (argc != 1) {
2188 GEN_COUNTER_INC(cb, oaref_argc_not_one);
2189 return YJIT_CANT_COMPILE;
2192 // Defer compilation so we can specialize base on a runtime receiver
2193 if (!jit_at_current_insn(jit)) {
2194 defer_compilation(jit, ctx);
2195 return YJIT_END_BLOCK;
2198 // Remember the context on entry for adding guard chains
2199 const ctx_t starting_context = *ctx;
2201 // Specialize base on compile time values
2202 VALUE comptime_idx = jit_peek_at_stack(jit, ctx, 0);
2203 VALUE comptime_recv = jit_peek_at_stack(jit, ctx, 1);
2205 // Create a side-exit to fall back to the interpreter
2206 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2208 if (CLASS_OF(comptime_recv) == rb_cArray && RB_FIXNUM_P(comptime_idx)) {
2209 if (!assume_bop_not_redefined(jit, ARRAY_REDEFINED_OP_FLAG, BOP_AREF)) {
2210 return YJIT_CANT_COMPILE;
2213 // Pop the stack operands
2214 x86opnd_t idx_opnd = ctx_stack_pop(ctx, 1);
2215 x86opnd_t recv_opnd = ctx_stack_pop(ctx, 1);
2216 mov(cb, REG0, recv_opnd);
2218 // if (SPECIAL_CONST_P(recv)) {
2219 // Bail if receiver is not a heap object
2220 test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK));
2221 jnz_ptr(cb, side_exit);
2222 cmp(cb, REG0, imm_opnd(Qfalse));
2223 je_ptr(cb, side_exit);
2224 cmp(cb, REG0, imm_opnd(Qnil));
2225 je_ptr(cb, side_exit);
2227 // Bail if recv has a class other than ::Array.
2228 // BOP_AREF check above is only good for ::Array.
2229 mov(cb, REG1, mem_opnd(64, REG0, offsetof(struct RBasic, klass)));
2230 mov(cb, REG0, const_ptr_opnd((void *)rb_cArray));
2231 cmp(cb, REG0, REG1);
2232 jit_chain_guard(JCC_JNE, jit, &starting_context, OPT_AREF_MAX_CHAIN_DEPTH, side_exit);
2234 // Bail if idx is not a FIXNUM
2235 mov(cb, REG1, idx_opnd);
2236 test(cb, REG1, imm_opnd(RUBY_FIXNUM_FLAG));
2237 jz_ptr(cb, COUNTED_EXIT(jit, side_exit, oaref_arg_not_fixnum));
2239 // Call VALUE rb_ary_entry_internal(VALUE ary, long offset).
2240 // It never raises or allocates, so we don't need to write to cfp->pc.
2242 mov(cb, RDI, recv_opnd);
2243 sar(cb, REG1, imm_opnd(1)); // Convert fixnum to int
2244 mov(cb, RSI, REG1);
2245 call_ptr(cb, REG0, (void *)rb_ary_entry_internal);
2247 // Push the return value onto the stack
2248 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2249 mov(cb, stack_ret, RAX);
2252 // Jump to next instruction. This allows guard chains to share the same successor.
2253 jit_jump_to_next_insn(jit, ctx);
2254 return YJIT_END_BLOCK;
2256 else if (CLASS_OF(comptime_recv) == rb_cHash) {
2257 if (!assume_bop_not_redefined(jit, HASH_REDEFINED_OP_FLAG, BOP_AREF)) {
2258 return YJIT_CANT_COMPILE;
2261 x86opnd_t key_opnd = ctx_stack_opnd(ctx, 0);
2262 x86opnd_t recv_opnd = ctx_stack_opnd(ctx, 1);
2264 // Guard that the receiver is a hash
2265 mov(cb, REG0, recv_opnd);
2266 jit_guard_known_klass(jit, ctx, rb_cHash, OPND_STACK(1), comptime_recv, OPT_AREF_MAX_CHAIN_DEPTH, side_exit);
2268 // Setup arguments for rb_hash_aref().
2269 mov(cb, C_ARG_REGS[0], REG0);
2270 mov(cb, C_ARG_REGS[1], key_opnd);
2272 // Prepare to call rb_hash_aref(). It might call #hash on the key.
2273 jit_prepare_routine_call(jit, ctx, REG0);
2275 call_ptr(cb, REG0, (void *)rb_hash_aref);
2277 // Pop the key and the receiver
2278 (void)ctx_stack_pop(ctx, 2);
2280 // Push the return value onto the stack
2281 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2282 mov(cb, stack_ret, RAX);
2284 // Jump to next instruction. This allows guard chains to share the same successor.
2285 jit_jump_to_next_insn(jit, ctx);
2286 return YJIT_END_BLOCK;
2288 else {
2289 // General case. Call the [] method.
2290 return gen_opt_send_without_block(jit, ctx, cb);
2294 static codegen_status_t
2295 gen_opt_aset(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2297 // Defer compilation so we can specialize on a runtime `self`
2298 if (!jit_at_current_insn(jit)) {
2299 defer_compilation(jit, ctx);
2300 return YJIT_END_BLOCK;
2303 VALUE comptime_recv = jit_peek_at_stack(jit, ctx, 2);
2304 VALUE comptime_key = jit_peek_at_stack(jit, ctx, 1);
2306 // Get the operands from the stack
2307 x86opnd_t recv = ctx_stack_opnd(ctx, 2);
2308 x86opnd_t key = ctx_stack_opnd(ctx, 1);
2309 x86opnd_t val = ctx_stack_opnd(ctx, 0);
2311 if (CLASS_OF(comptime_recv) == rb_cArray && FIXNUM_P(comptime_key)) {
2312 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2314 // Guard receiver is an Array
2315 mov(cb, REG0, recv);
2316 jit_guard_known_klass(jit, ctx, rb_cArray, OPND_STACK(2), comptime_recv, SEND_MAX_DEPTH, side_exit);
2318 // Guard key is a fixnum
2319 mov(cb, REG0, key);
2320 jit_guard_known_klass(jit, ctx, rb_cInteger, OPND_STACK(1), comptime_key, SEND_MAX_DEPTH, side_exit);
2322 // Call rb_ary_store
2323 mov(cb, C_ARG_REGS[0], recv);
2324 mov(cb, C_ARG_REGS[1], key);
2325 sar(cb, C_ARG_REGS[1], imm_opnd(1)); // FIX2LONG(key)
2326 mov(cb, C_ARG_REGS[2], val);
2328 // We might allocate or raise
2329 jit_prepare_routine_call(jit, ctx, REG0);
2331 call_ptr(cb, REG0, (void *)rb_ary_store);
2333 // rb_ary_store returns void
2334 // stored value should still be on stack
2335 mov(cb, REG0, ctx_stack_opnd(ctx, 0));
2337 // Push the return value onto the stack
2338 ctx_stack_pop(ctx, 3);
2339 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2340 mov(cb, stack_ret, REG0);
2342 jit_jump_to_next_insn(jit, ctx);
2343 return YJIT_END_BLOCK;
2345 else if (CLASS_OF(comptime_recv) == rb_cHash) {
2346 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2348 // Guard receiver is a Hash
2349 mov(cb, REG0, recv);
2350 jit_guard_known_klass(jit, ctx, rb_cHash, OPND_STACK(2), comptime_recv, SEND_MAX_DEPTH, side_exit);
2352 // Call rb_hash_aset
2353 mov(cb, C_ARG_REGS[0], recv);
2354 mov(cb, C_ARG_REGS[1], key);
2355 mov(cb, C_ARG_REGS[2], val);
2357 // We might allocate or raise
2358 jit_prepare_routine_call(jit, ctx, REG0);
2360 call_ptr(cb, REG0, (void *)rb_hash_aset);
2362 // Push the return value onto the stack
2363 ctx_stack_pop(ctx, 3);
2364 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2365 mov(cb, stack_ret, RAX);
2367 jit_jump_to_next_insn(jit, ctx);
2368 return YJIT_END_BLOCK;
2370 else {
2371 return gen_opt_send_without_block(jit, ctx, cb);
2375 static codegen_status_t
2376 gen_opt_and(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2378 // Defer compilation so we can specialize on a runtime `self`
2379 if (!jit_at_current_insn(jit)) {
2380 defer_compilation(jit, ctx);
2381 return YJIT_END_BLOCK;
2384 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2385 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2387 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2388 // Create a side-exit to fall back to the interpreter
2389 // Note: we generate the side-exit before popping operands from the stack
2390 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2392 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_AND)) {
2393 return YJIT_CANT_COMPILE;
2396 // Check that both operands are fixnums
2397 guard_two_fixnums(ctx, side_exit);
2399 // Get the operands and destination from the stack
2400 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2401 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2403 // Do the bitwise and arg0 & arg1
2404 mov(cb, REG0, arg0);
2405 and(cb, REG0, arg1);
2407 // Push the output on the stack
2408 x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
2409 mov(cb, dst, REG0);
2411 return YJIT_KEEP_COMPILING;
2413 else {
2414 // Delegate to send, call the method on the recv
2415 return gen_opt_send_without_block(jit, ctx, cb);
2419 static codegen_status_t
2420 gen_opt_or(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2422 // Defer compilation so we can specialize on a runtime `self`
2423 if (!jit_at_current_insn(jit)) {
2424 defer_compilation(jit, ctx);
2425 return YJIT_END_BLOCK;
2428 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2429 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2431 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2432 // Create a side-exit to fall back to the interpreter
2433 // Note: we generate the side-exit before popping operands from the stack
2434 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2436 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_OR)) {
2437 return YJIT_CANT_COMPILE;
2440 // Check that both operands are fixnums
2441 guard_two_fixnums(ctx, side_exit);
2443 // Get the operands and destination from the stack
2444 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2445 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2447 // Do the bitwise or arg0 | arg1
2448 mov(cb, REG0, arg0);
2449 or(cb, REG0, arg1);
2451 // Push the output on the stack
2452 x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
2453 mov(cb, dst, REG0);
2455 return YJIT_KEEP_COMPILING;
2457 else {
2458 // Delegate to send, call the method on the recv
2459 return gen_opt_send_without_block(jit, ctx, cb);
2463 static codegen_status_t
2464 gen_opt_minus(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2466 // Defer compilation so we can specialize on a runtime `self`
2467 if (!jit_at_current_insn(jit)) {
2468 defer_compilation(jit, ctx);
2469 return YJIT_END_BLOCK;
2472 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2473 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2475 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2476 // Create a side-exit to fall back to the interpreter
2477 // Note: we generate the side-exit before popping operands from the stack
2478 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2480 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_MINUS)) {
2481 return YJIT_CANT_COMPILE;
2484 // Check that both operands are fixnums
2485 guard_two_fixnums(ctx, side_exit);
2487 // Get the operands and destination from the stack
2488 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2489 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2491 // Subtract arg0 - arg1 and test for overflow
2492 mov(cb, REG0, arg0);
2493 sub(cb, REG0, arg1);
2494 jo_ptr(cb, side_exit);
2495 add(cb, REG0, imm_opnd(1));
2497 // Push the output on the stack
2498 x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
2499 mov(cb, dst, REG0);
2501 return YJIT_KEEP_COMPILING;
2503 else {
2504 // Delegate to send, call the method on the recv
2505 return gen_opt_send_without_block(jit, ctx, cb);
2509 static codegen_status_t
2510 gen_opt_plus(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2512 // Defer compilation so we can specialize on a runtime `self`
2513 if (!jit_at_current_insn(jit)) {
2514 defer_compilation(jit, ctx);
2515 return YJIT_END_BLOCK;
2518 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2519 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2521 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2522 // Create a side-exit to fall back to the interpreter
2523 // Note: we generate the side-exit before popping operands from the stack
2524 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2526 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS)) {
2527 return YJIT_CANT_COMPILE;
2530 // Check that both operands are fixnums
2531 guard_two_fixnums(ctx, side_exit);
2533 // Get the operands and destination from the stack
2534 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2535 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2537 // Add arg0 + arg1 and test for overflow
2538 mov(cb, REG0, arg0);
2539 sub(cb, REG0, imm_opnd(1));
2540 add(cb, REG0, arg1);
2541 jo_ptr(cb, side_exit);
2543 // Push the output on the stack
2544 x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
2545 mov(cb, dst, REG0);
2547 return YJIT_KEEP_COMPILING;
2549 else {
2550 // Delegate to send, call the method on the recv
2551 return gen_opt_send_without_block(jit, ctx, cb);
2555 static codegen_status_t
2556 gen_opt_mult(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2558 // Delegate to send, call the method on the recv
2559 return gen_opt_send_without_block(jit, ctx, cb);
2562 static codegen_status_t
2563 gen_opt_div(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2565 // Delegate to send, call the method on the recv
2566 return gen_opt_send_without_block(jit, ctx, cb);
2569 VALUE rb_vm_opt_mod(VALUE recv, VALUE obj);
2571 static codegen_status_t
2572 gen_opt_mod(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2574 // Save the PC and SP because the callee may allocate bignums
2575 // Note that this modifies REG_SP, which is why we do it first
2576 jit_prepare_routine_call(jit, ctx, REG0);
2578 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2580 // Get the operands from the stack
2581 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2582 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2584 // Call rb_vm_opt_mod(VALUE recv, VALUE obj)
2585 mov(cb, C_ARG_REGS[0], arg0);
2586 mov(cb, C_ARG_REGS[1], arg1);
2587 call_ptr(cb, REG0, (void *)rb_vm_opt_mod);
2589 // If val == Qundef, bail to do a method call
2590 cmp(cb, RAX, imm_opnd(Qundef));
2591 je_ptr(cb, side_exit);
2593 // Push the return value onto the stack
2594 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2595 mov(cb, stack_ret, RAX);
2597 return YJIT_KEEP_COMPILING;
2600 static codegen_status_t
2601 gen_opt_ltlt(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2603 // Delegate to send, call the method on the recv
2604 return gen_opt_send_without_block(jit, ctx, cb);
2607 static codegen_status_t
2608 gen_opt_nil_p(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2610 // Delegate to send, call the method on the recv
2611 return gen_opt_send_without_block(jit, ctx, cb);
2614 static codegen_status_t
2615 gen_opt_empty_p(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2617 // Delegate to send, call the method on the recv
2618 return gen_opt_send_without_block(jit, ctx, cb);
2621 static codegen_status_t
2622 gen_opt_str_freeze(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2624 if (!assume_bop_not_redefined(jit, STRING_REDEFINED_OP_FLAG, BOP_FREEZE)) {
2625 return YJIT_CANT_COMPILE;
2628 VALUE str = jit_get_arg(jit, 0);
2629 jit_mov_gc_ptr(jit, cb, REG0, str);
2631 // Push the return value onto the stack
2632 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
2633 mov(cb, stack_ret, REG0);
2635 return YJIT_KEEP_COMPILING;
2638 static codegen_status_t
2639 gen_opt_str_uminus(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2641 if (!assume_bop_not_redefined(jit, STRING_REDEFINED_OP_FLAG, BOP_UMINUS)) {
2642 return YJIT_CANT_COMPILE;
2645 VALUE str = jit_get_arg(jit, 0);
2646 jit_mov_gc_ptr(jit, cb, REG0, str);
2648 // Push the return value onto the stack
2649 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
2650 mov(cb, stack_ret, REG0);
2652 return YJIT_KEEP_COMPILING;
2655 static codegen_status_t
2656 gen_opt_not(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2658 return gen_opt_send_without_block(jit, ctx, cb);
2661 static codegen_status_t
2662 gen_opt_size(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2664 return gen_opt_send_without_block(jit, ctx, cb);
2667 static codegen_status_t
2668 gen_opt_length(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2670 return gen_opt_send_without_block(jit, ctx, cb);
2673 static codegen_status_t
2674 gen_opt_regexpmatch2(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2676 return gen_opt_send_without_block(jit, ctx, cb);
2679 static codegen_status_t
2680 gen_opt_case_dispatch(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2682 // Normally this instruction would lookup the key in a hash and jump to an
2683 // offset based on that.
2684 // Instead we can take the fallback case and continue with the next
2685 // instruction.
2686 // We'd hope that our jitted code will be sufficiently fast without the
2687 // hash lookup, at least for small hashes, but it's worth revisiting this
2688 // assumption in the future.
2690 ctx_stack_pop(ctx, 1);
2692 return YJIT_KEEP_COMPILING; // continue with the next instruction
2695 static void
2696 gen_branchif_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
2698 switch (shape) {
2699 case SHAPE_NEXT0:
2700 jz_ptr(cb, target1);
2701 break;
2703 case SHAPE_NEXT1:
2704 jnz_ptr(cb, target0);
2705 break;
2707 case SHAPE_DEFAULT:
2708 jnz_ptr(cb, target0);
2709 jmp_ptr(cb, target1);
2710 break;
2714 static codegen_status_t
2715 gen_branchif(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2717 int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
2719 // Check for interrupts, but only on backward branches that may create loops
2720 if (jump_offset < 0) {
2721 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2722 yjit_check_ints(cb, side_exit);
2725 // Test if any bit (outside of the Qnil bit) is on
2726 // RUBY_Qfalse /* ...0000 0000 */
2727 // RUBY_Qnil /* ...0000 1000 */
2728 x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
2729 test(cb, val_opnd, imm_opnd(~Qnil));
2731 // Get the branch target instruction offsets
2732 uint32_t next_idx = jit_next_insn_idx(jit);
2733 uint32_t jump_idx = next_idx + jump_offset;
2734 blockid_t next_block = { jit->iseq, next_idx };
2735 blockid_t jump_block = { jit->iseq, jump_idx };
2737 // Generate the branch instructions
2738 gen_branch(
2739 jit,
2740 ctx,
2741 jump_block,
2742 ctx,
2743 next_block,
2744 ctx,
2745 gen_branchif_branch
2748 return YJIT_END_BLOCK;
2751 static void
2752 gen_branchunless_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
2754 switch (shape) {
2755 case SHAPE_NEXT0:
2756 jnz_ptr(cb, target1);
2757 break;
2759 case SHAPE_NEXT1:
2760 jz_ptr(cb, target0);
2761 break;
2763 case SHAPE_DEFAULT:
2764 jz_ptr(cb, target0);
2765 jmp_ptr(cb, target1);
2766 break;
2770 static codegen_status_t
2771 gen_branchunless(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2773 int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
2775 // Check for interrupts, but only on backward branches that may create loops
2776 if (jump_offset < 0) {
2777 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2778 yjit_check_ints(cb, side_exit);
2781 // Test if any bit (outside of the Qnil bit) is on
2782 // RUBY_Qfalse /* ...0000 0000 */
2783 // RUBY_Qnil /* ...0000 1000 */
2784 x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
2785 test(cb, val_opnd, imm_opnd(~Qnil));
2787 // Get the branch target instruction offsets
2788 uint32_t next_idx = jit_next_insn_idx(jit);
2789 uint32_t jump_idx = next_idx + jump_offset;
2790 blockid_t next_block = { jit->iseq, next_idx };
2791 blockid_t jump_block = { jit->iseq, jump_idx };
2793 // Generate the branch instructions
2794 gen_branch(
2795 jit,
2796 ctx,
2797 jump_block,
2798 ctx,
2799 next_block,
2800 ctx,
2801 gen_branchunless_branch
2804 return YJIT_END_BLOCK;
2807 static void
2808 gen_branchnil_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
2810 switch (shape) {
2811 case SHAPE_NEXT0:
2812 jne_ptr(cb, target1);
2813 break;
2815 case SHAPE_NEXT1:
2816 je_ptr(cb, target0);
2817 break;
2819 case SHAPE_DEFAULT:
2820 je_ptr(cb, target0);
2821 jmp_ptr(cb, target1);
2822 break;
2826 static codegen_status_t
2827 gen_branchnil(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2829 int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
2831 // Check for interrupts, but only on backward branches that may create loops
2832 if (jump_offset < 0) {
2833 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2834 yjit_check_ints(cb, side_exit);
2837 // Test if the value is Qnil
2838 // RUBY_Qnil /* ...0000 1000 */
2839 x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
2840 cmp(cb, val_opnd, imm_opnd(Qnil));
2842 // Get the branch target instruction offsets
2843 uint32_t next_idx = jit_next_insn_idx(jit);
2844 uint32_t jump_idx = next_idx + jump_offset;
2845 blockid_t next_block = { jit->iseq, next_idx };
2846 blockid_t jump_block = { jit->iseq, jump_idx };
2848 // Generate the branch instructions
2849 gen_branch(
2850 jit,
2851 ctx,
2852 jump_block,
2853 ctx,
2854 next_block,
2855 ctx,
2856 gen_branchnil_branch
2859 return YJIT_END_BLOCK;
2862 static codegen_status_t
2863 gen_jump(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2865 int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
2867 // Check for interrupts, but only on backward branches that may create loops
2868 if (jump_offset < 0) {
2869 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2870 yjit_check_ints(cb, side_exit);
2873 // Get the branch target instruction offsets
2874 uint32_t jump_idx = jit_next_insn_idx(jit) + jump_offset;
2875 blockid_t jump_block = { jit->iseq, jump_idx };
2877 // Generate the jump instruction
2878 gen_direct_jump(
2879 jit,
2880 ctx,
2881 jump_block
2884 return YJIT_END_BLOCK;
2888 Guard that self or a stack operand has the same class as `known_klass`, using
2889 `sample_instance` to speculate about the shape of the runtime value.
2890 FIXNUM and on-heap integers are treated as if they have distinct classes, and
2891 the guard generated for one will fail for the other.
2893 Recompile as contingency if possible, or take side exit a last resort.
2895 static bool
2896 jit_guard_known_klass(jitstate_t *jit, ctx_t *ctx, VALUE known_klass, insn_opnd_t insn_opnd, VALUE sample_instance, const int max_chain_depth, uint8_t *side_exit)
2898 val_type_t val_type = ctx_get_opnd_type(ctx, insn_opnd);
2900 if (known_klass == rb_cNilClass) {
2901 RUBY_ASSERT(!val_type.is_heap);
2902 if (val_type.type != ETYPE_NIL) {
2903 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2905 ADD_COMMENT(cb, "guard object is nil");
2906 cmp(cb, REG0, imm_opnd(Qnil));
2907 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2909 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_NIL);
2912 else if (known_klass == rb_cTrueClass) {
2913 RUBY_ASSERT(!val_type.is_heap);
2914 if (val_type.type != ETYPE_TRUE) {
2915 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2917 ADD_COMMENT(cb, "guard object is true");
2918 cmp(cb, REG0, imm_opnd(Qtrue));
2919 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2921 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_TRUE);
2924 else if (known_klass == rb_cFalseClass) {
2925 RUBY_ASSERT(!val_type.is_heap);
2926 if (val_type.type != ETYPE_FALSE) {
2927 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2929 ADD_COMMENT(cb, "guard object is false");
2930 STATIC_ASSERT(qfalse_is_zero, Qfalse == 0);
2931 test(cb, REG0, REG0);
2932 jit_chain_guard(JCC_JNZ, jit, ctx, max_chain_depth, side_exit);
2934 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_FALSE);
2937 else if (known_klass == rb_cInteger && FIXNUM_P(sample_instance)) {
2938 RUBY_ASSERT(!val_type.is_heap);
2939 // We will guard fixnum and bignum as though they were separate classes
2940 // BIGNUM can be handled by the general else case below
2941 if (val_type.type != ETYPE_FIXNUM || !val_type.is_imm) {
2942 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2944 ADD_COMMENT(cb, "guard object is fixnum");
2945 test(cb, REG0, imm_opnd(RUBY_FIXNUM_FLAG));
2946 jit_chain_guard(JCC_JZ, jit, ctx, max_chain_depth, side_exit);
2947 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_FIXNUM);
2950 else if (known_klass == rb_cSymbol && STATIC_SYM_P(sample_instance)) {
2951 RUBY_ASSERT(!val_type.is_heap);
2952 // We will guard STATIC vs DYNAMIC as though they were separate classes
2953 // DYNAMIC symbols can be handled by the general else case below
2954 if (val_type.type != ETYPE_SYMBOL || !val_type.is_imm) {
2955 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2957 ADD_COMMENT(cb, "guard object is static symbol");
2958 STATIC_ASSERT(special_shift_is_8, RUBY_SPECIAL_SHIFT == 8);
2959 cmp(cb, REG0_8, imm_opnd(RUBY_SYMBOL_FLAG));
2960 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2961 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_STATIC_SYMBOL);
2964 else if (known_klass == rb_cFloat && FLONUM_P(sample_instance)) {
2965 RUBY_ASSERT(!val_type.is_heap);
2966 if (val_type.type != ETYPE_FLONUM || !val_type.is_imm) {
2967 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2969 // We will guard flonum vs heap float as though they were separate classes
2970 ADD_COMMENT(cb, "guard object is flonum");
2971 mov(cb, REG1, REG0);
2972 and(cb, REG1, imm_opnd(RUBY_FLONUM_MASK));
2973 cmp(cb, REG1, imm_opnd(RUBY_FLONUM_FLAG));
2974 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2975 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_FLONUM);
2978 else if (FL_TEST(known_klass, FL_SINGLETON) && sample_instance == rb_attr_get(known_klass, id__attached__)) {
2979 // Singleton classes are attached to one specific object, so we can
2980 // avoid one memory access (and potentially the is_heap check) by
2981 // looking for the expected object directly.
2982 // Note that in case the sample instance has a singleton class that
2983 // doesn't attach to the sample instance, it means the sample instance
2984 // has an empty singleton class that hasn't been materialized yet. In
2985 // this case, comparing against the sample instance doesn't guarantee
2986 // that its singleton class is empty, so we can't avoid the memory
2987 // access. As an example, `Object.new.singleton_class` is an object in
2988 // this situation.
2989 ADD_COMMENT(cb, "guard known object with singleton class");
2990 // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the object.
2991 jit_mov_gc_ptr(jit, cb, REG1, sample_instance);
2992 cmp(cb, REG0, REG1);
2993 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2995 else {
2996 RUBY_ASSERT(!val_type.is_imm);
2998 // Check that the receiver is a heap object
2999 // Note: if we get here, the class doesn't have immediate instances.
3000 if (!val_type.is_heap) {
3001 ADD_COMMENT(cb, "guard not immediate");
3002 RUBY_ASSERT(Qfalse < Qnil);
3003 test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK));
3004 jit_chain_guard(JCC_JNZ, jit, ctx, max_chain_depth, side_exit);
3005 cmp(cb, REG0, imm_opnd(Qnil));
3006 jit_chain_guard(JCC_JBE, jit, ctx, max_chain_depth, side_exit);
3008 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_HEAP);
3011 x86opnd_t klass_opnd = mem_opnd(64, REG0, offsetof(struct RBasic, klass));
3013 // Bail if receiver class is different from known_klass
3014 // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the class.
3015 ADD_COMMENT(cb, "guard known class");
3016 jit_mov_gc_ptr(jit, cb, REG1, known_klass);
3017 cmp(cb, klass_opnd, REG1);
3018 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
3021 return true;
3024 // Generate ancestry guard for protected callee.
3025 // Calls to protected callees only go through when self.is_a?(klass_that_defines_the_callee).
3026 static void
3027 jit_protected_callee_ancestry_guard(jitstate_t *jit, codeblock_t *cb, const rb_callable_method_entry_t *cme, uint8_t *side_exit)
3029 // See vm_call_method().
3030 mov(cb, C_ARG_REGS[0], member_opnd(REG_CFP, rb_control_frame_t, self));
3031 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], cme->defined_class);
3032 // Note: PC isn't written to current control frame as rb_is_kind_of() shouldn't raise.
3033 // VALUE rb_obj_is_kind_of(VALUE obj, VALUE klass);
3034 call_ptr(cb, REG0, (void *)&rb_obj_is_kind_of);
3035 test(cb, RAX, RAX);
3036 jz_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_protected_check_failed));
3039 // Return true when the codegen function generates code.
3040 // known_recv_klass is non-NULL when the caller has used jit_guard_known_klass().
3041 // See yjit_reg_method().
3042 typedef bool (*method_codegen_t)(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass);
3044 // Register a specialized codegen function for a particular method. Note that
3045 // the if the function returns true, the code it generates runs without a
3046 // control frame and without interrupt checks. To avoid creating observable
3047 // behavior changes, the codegen function should only target simple code paths
3048 // that do not allocate and do not make method calls.
3049 static void
3050 yjit_reg_method(VALUE klass, const char *mid_str, method_codegen_t gen_fn)
3052 ID mid = rb_intern(mid_str);
3053 const rb_method_entry_t *me = rb_method_entry_at(klass, mid);
3055 if (!me) {
3056 rb_bug("undefined optimized method: %s", rb_id2name(mid));
3059 // For now, only cfuncs are supported
3060 RUBY_ASSERT(me && me->def);
3061 RUBY_ASSERT(me->def->type == VM_METHOD_TYPE_CFUNC);
3063 st_insert(yjit_method_codegen_table, (st_data_t)me->def->method_serial, (st_data_t)gen_fn);
3066 // Codegen for rb_obj_not().
3067 // Note, caller is responsible for generating all the right guards, including
3068 // arity guards.
3069 static bool
3070 jit_rb_obj_not(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3072 const val_type_t recv_opnd = ctx_get_opnd_type(ctx, OPND_STACK(0));
3074 if (recv_opnd.type == ETYPE_NIL || recv_opnd.type == ETYPE_FALSE) {
3075 ADD_COMMENT(cb, "rb_obj_not(nil_or_false)");
3076 ctx_stack_pop(ctx, 1);
3077 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_TRUE);
3078 mov(cb, out_opnd, imm_opnd(Qtrue));
3080 else if (recv_opnd.is_heap || recv_opnd.type != ETYPE_UNKNOWN) {
3081 // Note: recv_opnd.type != ETYPE_NIL && recv_opnd.type != ETYPE_FALSE.
3082 ADD_COMMENT(cb, "rb_obj_not(truthy)");
3083 ctx_stack_pop(ctx, 1);
3084 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_FALSE);
3085 mov(cb, out_opnd, imm_opnd(Qfalse));
3087 else {
3088 // jit_guard_known_klass() already ran on the receiver which should
3089 // have deduced deduced the type of the receiver. This case should be
3090 // rare if not unreachable.
3091 return false;
3093 return true;
3096 // Codegen for rb_true()
3097 static bool
3098 jit_rb_true(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3100 ADD_COMMENT(cb, "nil? == true");
3101 ctx_stack_pop(ctx, 1);
3102 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_TRUE);
3103 mov(cb, stack_ret, imm_opnd(Qtrue));
3104 return true;
3107 // Codegen for rb_false()
3108 static bool
3109 jit_rb_false(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3111 ADD_COMMENT(cb, "nil? == false");
3112 ctx_stack_pop(ctx, 1);
3113 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_FALSE);
3114 mov(cb, stack_ret, imm_opnd(Qfalse));
3115 return true;
3118 // Codegen for rb_obj_equal()
3119 // object identity comparison
3120 static bool
3121 jit_rb_obj_equal(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3123 ADD_COMMENT(cb, "equal?");
3124 x86opnd_t obj1 = ctx_stack_pop(ctx, 1);
3125 x86opnd_t obj2 = ctx_stack_pop(ctx, 1);
3127 mov(cb, REG0, obj1);
3128 cmp(cb, REG0, obj2);
3129 mov(cb, REG0, imm_opnd(Qtrue));
3130 mov(cb, REG1, imm_opnd(Qfalse));
3131 cmovne(cb, REG0, REG1);
3133 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_IMM);
3134 mov(cb, stack_ret, REG0);
3135 return true;
3138 static VALUE
3139 yjit_str_bytesize(VALUE str)
3141 return LONG2NUM(RSTRING_LEN(str));
3144 static bool
3145 jit_rb_str_bytesize(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3147 ADD_COMMENT(cb, "String#bytesize");
3149 x86opnd_t recv = ctx_stack_pop(ctx, 1);
3150 mov(cb, C_ARG_REGS[0], recv);
3151 call_ptr(cb, REG0, (void *)&yjit_str_bytesize);
3153 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_FIXNUM);
3154 mov(cb, out_opnd, RAX);
3156 return true;
3159 // Codegen for rb_str_to_s()
3160 // When String#to_s is called on a String instance, the method returns self and
3161 // most of the overhead comes from setting up the method call. We observed that
3162 // this situation happens a lot in some workloads.
3163 static bool
3164 jit_rb_str_to_s(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *recv_known_klass)
3166 if (recv_known_klass && *recv_known_klass == rb_cString) {
3167 ADD_COMMENT(cb, "to_s on plain string");
3168 // The method returns the receiver, which is already on the stack.
3169 // No stack movement.
3170 return true;
3172 return false;
3175 static bool
3176 jit_thread_s_current(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *recv_known_klass)
3178 ADD_COMMENT(cb, "Thread.current");
3179 ctx_stack_pop(ctx, 1);
3181 // ec->thread_ptr
3182 mov(cb, REG0, member_opnd(REG_EC, rb_execution_context_t, thread_ptr));
3184 // thread->self
3185 mov(cb, REG0, member_opnd(REG0, rb_thread_t, self));
3187 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HEAP);
3188 mov(cb, stack_ret, REG0);
3189 return true;
3192 // Check if we know how to codegen for a particular cfunc method
3193 static method_codegen_t
3194 lookup_cfunc_codegen(const rb_method_definition_t *def)
3196 method_codegen_t gen_fn;
3197 if (st_lookup(yjit_method_codegen_table, def->method_serial, (st_data_t *)&gen_fn)) {
3198 return gen_fn;
3200 return NULL;
3203 // Is anyone listening for :c_call and :c_return event currently?
3204 static bool
3205 c_method_tracing_currently_enabled(const jitstate_t *jit)
3207 rb_event_flag_t tracing_events;
3208 if (rb_multi_ractor_p()) {
3209 tracing_events = ruby_vm_event_enabled_global_flags;
3211 else {
3212 // At the time of writing, events are never removed from
3213 // ruby_vm_event_enabled_global_flags so always checking using it would
3214 // mean we don't compile even after tracing is disabled.
3215 tracing_events = rb_ec_ractor_hooks(jit->ec)->events;
3218 return tracing_events & (RUBY_EVENT_C_CALL | RUBY_EVENT_C_RETURN);
3221 static codegen_status_t
3222 gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *recv_known_klass)
3224 const rb_method_cfunc_t *cfunc = UNALIGNED_MEMBER_PTR(cme->def, body.cfunc);
3226 // If the function expects a Ruby array of arguments
3227 if (cfunc->argc < 0 && cfunc->argc != -1) {
3228 GEN_COUNTER_INC(cb, send_cfunc_ruby_array_varg);
3229 return YJIT_CANT_COMPILE;
3232 // If the argument count doesn't match
3233 if (cfunc->argc >= 0 && cfunc->argc != argc) {
3234 GEN_COUNTER_INC(cb, send_cfunc_argc_mismatch);
3235 return YJIT_CANT_COMPILE;
3238 // Don't JIT functions that need C stack arguments for now
3239 if (cfunc->argc >= 0 && argc + 1 > NUM_C_ARG_REGS) {
3240 GEN_COUNTER_INC(cb, send_cfunc_toomany_args);
3241 return YJIT_CANT_COMPILE;
3244 if (c_method_tracing_currently_enabled(jit)) {
3245 // Don't JIT if tracing c_call or c_return
3246 GEN_COUNTER_INC(cb, send_cfunc_tracing);
3247 return YJIT_CANT_COMPILE;
3250 // Delegate to codegen for C methods if we have it.
3252 method_codegen_t known_cfunc_codegen;
3253 if ((known_cfunc_codegen = lookup_cfunc_codegen(cme->def))) {
3254 if (known_cfunc_codegen(jit, ctx, ci, cme, block, argc, recv_known_klass)) {
3255 // cfunc codegen generated code. Terminate the block so
3256 // there isn't multiple calls in the same block.
3257 jit_jump_to_next_insn(jit, ctx);
3258 return YJIT_END_BLOCK;
3263 // Callee method ID
3264 //ID mid = vm_ci_mid(ci);
3265 //printf("JITting call to C function \"%s\", argc: %lu\n", rb_id2name(mid), argc);
3266 //print_str(cb, "");
3267 //print_str(cb, "calling CFUNC:");
3268 //print_str(cb, rb_id2name(mid));
3269 //print_str(cb, "recv");
3270 //print_ptr(cb, recv);
3272 // Create a side-exit to fall back to the interpreter
3273 uint8_t *side_exit = yjit_side_exit(jit, ctx);
3275 // Check for interrupts
3276 yjit_check_ints(cb, side_exit);
3278 // Stack overflow check
3279 // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
3280 // REG_CFP <= REG_SP + 4 * sizeof(VALUE) + sizeof(rb_control_frame_t)
3281 lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 4 + 2 * sizeof(rb_control_frame_t)));
3282 cmp(cb, REG_CFP, REG0);
3283 jle_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_cf_overflow));
3285 // Points to the receiver operand on the stack
3286 x86opnd_t recv = ctx_stack_opnd(ctx, argc);
3288 // Store incremented PC into current control frame in case callee raises.
3289 jit_save_pc(jit, REG0);
3291 if (block) {
3292 // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
3293 // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases
3294 // with cfp->block_code.
3295 jit_mov_gc_ptr(jit, cb, REG0, (VALUE)block);
3296 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), REG0);
3299 // Increment the stack pointer by 3 (in the callee)
3300 // sp += 3
3301 lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 3));
3303 // Write method entry at sp[-3]
3304 // sp[-3] = me;
3305 // Put compile time cme into REG1. It's assumed to be valid because we are notified when
3306 // any cme we depend on become outdated. See rb_yjit_method_lookup_change().
3307 jit_mov_gc_ptr(jit, cb, REG1, (VALUE)cme);
3308 mov(cb, mem_opnd(64, REG0, 8 * -3), REG1);
3310 // Write block handler at sp[-2]
3311 // sp[-2] = block_handler;
3312 if (block) {
3313 // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
3314 lea(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, self));
3315 or(cb, REG1, imm_opnd(1));
3316 mov(cb, mem_opnd(64, REG0, 8 * -2), REG1);
3318 else {
3319 mov(cb, mem_opnd(64, REG0, 8 * -2), imm_opnd(VM_BLOCK_HANDLER_NONE));
3322 // Write env flags at sp[-1]
3323 // sp[-1] = frame_type;
3324 uint64_t frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL;
3325 mov(cb, mem_opnd(64, REG0, 8 * -1), imm_opnd(frame_type));
3327 // Allocate a new CFP (ec->cfp--)
3328 sub(
3330 member_opnd(REG_EC, rb_execution_context_t, cfp),
3331 imm_opnd(sizeof(rb_control_frame_t))
3334 // Setup the new frame
3335 // *cfp = (const struct rb_control_frame_struct) {
3336 // .pc = 0,
3337 // .sp = sp,
3338 // .iseq = 0,
3339 // .self = recv,
3340 // .ep = sp - 1,
3341 // .block_code = 0,
3342 // .__bp__ = sp,
3343 // };
3344 mov(cb, REG1, member_opnd(REG_EC, rb_execution_context_t, cfp));
3345 mov(cb, member_opnd(REG1, rb_control_frame_t, pc), imm_opnd(0));
3346 mov(cb, member_opnd(REG1, rb_control_frame_t, sp), REG0);
3347 mov(cb, member_opnd(REG1, rb_control_frame_t, iseq), imm_opnd(0));
3348 mov(cb, member_opnd(REG1, rb_control_frame_t, block_code), imm_opnd(0));
3349 mov(cb, member_opnd(REG1, rb_control_frame_t, __bp__), REG0);
3350 sub(cb, REG0, imm_opnd(sizeof(VALUE)));
3351 mov(cb, member_opnd(REG1, rb_control_frame_t, ep), REG0);
3352 mov(cb, REG0, recv);
3353 mov(cb, member_opnd(REG1, rb_control_frame_t, self), REG0);
3355 // Verify that we are calling the right function
3356 if (YJIT_CHECK_MODE > 0) {
3357 // Call check_cfunc_dispatch
3358 mov(cb, C_ARG_REGS[0], recv);
3359 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], (VALUE)ci);
3360 mov(cb, C_ARG_REGS[2], const_ptr_opnd((void *)cfunc->func));
3361 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[3], (VALUE)cme);
3362 call_ptr(cb, REG0, (void *)&check_cfunc_dispatch);
3365 // Copy SP into RAX because REG_SP will get overwritten
3366 lea(cb, RAX, ctx_sp_opnd(ctx, 0));
3368 // Pop the C function arguments from the stack (in the caller)
3369 ctx_stack_pop(ctx, argc + 1);
3371 // Write interpreter SP into CFP.
3372 // Needed in case the callee yields to the block.
3373 jit_save_sp(jit, ctx);
3375 // Non-variadic method
3376 if (cfunc->argc >= 0) {
3377 // Copy the arguments from the stack to the C argument registers
3378 // self is the 0th argument and is at index argc from the stack top
3379 for (int32_t i = 0; i < argc + 1; ++i)
3381 x86opnd_t stack_opnd = mem_opnd(64, RAX, -(argc + 1 - i) * SIZEOF_VALUE);
3382 x86opnd_t c_arg_reg = C_ARG_REGS[i];
3383 mov(cb, c_arg_reg, stack_opnd);
3386 // Variadic method
3387 if (cfunc->argc == -1) {
3388 // The method gets a pointer to the first argument
3389 // rb_f_puts(int argc, VALUE *argv, VALUE recv)
3390 mov(cb, C_ARG_REGS[0], imm_opnd(argc));
3391 lea(cb, C_ARG_REGS[1], mem_opnd(64, RAX, -(argc) * SIZEOF_VALUE));
3392 mov(cb, C_ARG_REGS[2], mem_opnd(64, RAX, -(argc + 1) * SIZEOF_VALUE));
3395 // Call the C function
3396 // VALUE ret = (cfunc->func)(recv, argv[0], argv[1]);
3397 // cfunc comes from compile-time cme->def, which we assume to be stable.
3398 // Invalidation logic is in rb_yjit_method_lookup_change()
3399 call_ptr(cb, REG0, (void*)cfunc->func);
3401 // Record code position for TracePoint patching. See full_cfunc_return().
3402 record_global_inval_patch(cb, outline_full_cfunc_return_pos);
3404 // Push the return value on the Ruby stack
3405 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
3406 mov(cb, stack_ret, RAX);
3408 // Pop the stack frame (ec->cfp++)
3409 add(
3411 member_opnd(REG_EC, rb_execution_context_t, cfp),
3412 imm_opnd(sizeof(rb_control_frame_t))
3415 // cfunc calls may corrupt types
3416 ctx_clear_local_types(ctx);
3418 // Note: the return block of gen_send_iseq() has ctx->sp_offset == 1
3419 // which allows for sharing the same successor.
3421 // Jump (fall through) to the call continuation block
3422 // We do this to end the current block after the call
3423 jit_jump_to_next_insn(jit, ctx);
3424 return YJIT_END_BLOCK;
3427 static void
3428 gen_return_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
3430 switch (shape) {
3431 case SHAPE_NEXT0:
3432 case SHAPE_NEXT1:
3433 RUBY_ASSERT(false);
3434 break;
3436 case SHAPE_DEFAULT:
3437 mov(cb, REG0, const_ptr_opnd(target0));
3438 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, jit_return), REG0);
3439 break;
3443 // If true, the iseq is leaf and it can be replaced by a single C call.
3444 static bool
3445 rb_leaf_invokebuiltin_iseq_p(const rb_iseq_t *iseq)
3447 unsigned int invokebuiltin_len = insn_len(BIN(opt_invokebuiltin_delegate_leave));
3448 unsigned int leave_len = insn_len(BIN(leave));
3450 return (iseq->body->iseq_size == (invokebuiltin_len + leave_len) &&
3451 rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[0]) == BIN(opt_invokebuiltin_delegate_leave) &&
3452 rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[invokebuiltin_len]) == BIN(leave) &&
3453 iseq->body->builtin_inline_p
3457 // Return an rb_builtin_function if the iseq contains only that leaf builtin function.
3458 static const struct rb_builtin_function*
3459 rb_leaf_builtin_function(const rb_iseq_t *iseq)
3461 if (!rb_leaf_invokebuiltin_iseq_p(iseq))
3462 return NULL;
3463 return (const struct rb_builtin_function *)iseq->body->iseq_encoded[1];
3466 static codegen_status_t
3467 gen_send_iseq(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, int32_t argc)
3469 const rb_iseq_t *iseq = def_iseq_ptr(cme->def);
3471 // When you have keyword arguments, there is an extra object that gets
3472 // placed on the stack the represents a bitmap of the keywords that were not
3473 // specified at the call site. We need to keep track of the fact that this
3474 // value is present on the stack in order to properly set up the callee's
3475 // stack pointer.
3476 const bool doing_kw_call = iseq->body->param.flags.has_kw;
3477 const bool supplying_kws = vm_ci_flag(ci) & VM_CALL_KWARG;
3479 if (vm_ci_flag(ci) & VM_CALL_TAILCALL) {
3480 // We can't handle tailcalls
3481 GEN_COUNTER_INC(cb, send_iseq_tailcall);
3482 return YJIT_CANT_COMPILE;
3485 // No support for callees with these parameters yet as they require allocation
3486 // or complex handling.
3487 if (iseq->body->param.flags.has_rest ||
3488 iseq->body->param.flags.has_post ||
3489 iseq->body->param.flags.has_kwrest) {
3490 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3491 return YJIT_CANT_COMPILE;
3494 // If we have keyword arguments being passed to a callee that only takes
3495 // positionals, then we need to allocate a hash. For now we're going to
3496 // call that too complex and bail.
3497 if (supplying_kws && !iseq->body->param.flags.has_kw) {
3498 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3499 return YJIT_CANT_COMPILE;
3502 // If we have a method accepting no kwargs (**nil), exit if we have passed
3503 // it any kwargs.
3504 if (supplying_kws && iseq->body->param.flags.accepts_no_kwarg) {
3505 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3506 return YJIT_CANT_COMPILE;
3509 // For computing number of locals to setup for the callee
3510 int num_params = iseq->body->param.size;
3512 // Block parameter handling. This mirrors setup_parameters_complex().
3513 if (iseq->body->param.flags.has_block) {
3514 if (iseq->body->local_iseq == iseq) {
3515 // Block argument is passed through EP and not setup as a local in
3516 // the callee.
3517 num_params--;
3519 else {
3520 // In this case (param.flags.has_block && local_iseq != iseq),
3521 // the block argument is setup as a local variable and requires
3522 // materialization (allocation). Bail.
3523 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3524 return YJIT_CANT_COMPILE;
3528 uint32_t start_pc_offset = 0;
3530 const int required_num = iseq->body->param.lead_num;
3532 // This struct represents the metadata about the caller-specified
3533 // keyword arguments.
3534 const struct rb_callinfo_kwarg *kw_arg = vm_ci_kwarg(ci);
3535 const int kw_arg_num = kw_arg ? kw_arg->keyword_len : 0;
3537 // Arity handling and optional parameter setup
3538 const int opts_filled = argc - required_num - kw_arg_num;
3539 const int opt_num = iseq->body->param.opt_num;
3540 const int opts_missing = opt_num - opts_filled;
3542 if (opts_filled < 0 || opts_filled > opt_num) {
3543 GEN_COUNTER_INC(cb, send_iseq_arity_error);
3544 return YJIT_CANT_COMPILE;
3547 // If we have unfilled optional arguments and keyword arguments then we
3548 // would need to move adjust the arguments location to account for that.
3549 // For now we aren't handling this case.
3550 if (doing_kw_call && opts_missing > 0) {
3551 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3552 return YJIT_CANT_COMPILE;
3555 if (opt_num > 0) {
3556 num_params -= opt_num - opts_filled;
3557 start_pc_offset = (uint32_t)iseq->body->param.opt_table[opts_filled];
3560 if (doing_kw_call) {
3561 // Here we're calling a method with keyword arguments and specifying
3562 // keyword arguments at this call site.
3564 // This struct represents the metadata about the callee-specified
3565 // keyword parameters.
3566 const struct rb_iseq_param_keyword *keyword = iseq->body->param.keyword;
3568 int required_kwargs_filled = 0;
3570 if (keyword->num > 30) {
3571 // We have so many keywords that (1 << num) encoded as a FIXNUM
3572 // (which shifts it left one more) no longer fits inside a 32-bit
3573 // immediate.
3574 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3575 return YJIT_CANT_COMPILE;
3578 // Check that the kwargs being passed are valid
3579 if (supplying_kws) {
3580 // This is the list of keyword arguments that the callee specified
3581 // in its initial declaration.
3582 const ID *callee_kwargs = keyword->table;
3584 // Here we're going to build up a list of the IDs that correspond to
3585 // the caller-specified keyword arguments. If they're not in the
3586 // same order as the order specified in the callee declaration, then
3587 // we're going to need to generate some code to swap values around
3588 // on the stack.
3589 ID *caller_kwargs = ALLOCA_N(VALUE, kw_arg->keyword_len);
3590 for (int kwarg_idx = 0; kwarg_idx < kw_arg->keyword_len; kwarg_idx++)
3591 caller_kwargs[kwarg_idx] = SYM2ID(kw_arg->keywords[kwarg_idx]);
3593 // First, we're going to be sure that the names of every
3594 // caller-specified keyword argument correspond to a name in the
3595 // list of callee-specified keyword parameters.
3596 for (int caller_idx = 0; caller_idx < kw_arg->keyword_len; caller_idx++) {
3597 int callee_idx;
3599 for (callee_idx = 0; callee_idx < keyword->num; callee_idx++) {
3600 if (caller_kwargs[caller_idx] == callee_kwargs[callee_idx]) {
3601 break;
3605 // If the keyword was never found, then we know we have a
3606 // mismatch in the names of the keyword arguments, so we need to
3607 // bail.
3608 if (callee_idx == keyword->num) {
3609 GEN_COUNTER_INC(cb, send_iseq_kwargs_mismatch);
3610 return YJIT_CANT_COMPILE;
3613 // Keep a count to ensure all required kwargs are specified
3614 if (callee_idx < keyword->required_num) {
3615 required_kwargs_filled++;
3620 RUBY_ASSERT(required_kwargs_filled <= keyword->required_num);
3621 if (required_kwargs_filled != keyword->required_num) {
3622 GEN_COUNTER_INC(cb, send_iseq_kwargs_mismatch);
3623 return YJIT_CANT_COMPILE;
3627 // Number of locals that are not parameters
3628 const int num_locals = iseq->body->local_table_size - num_params;
3630 // Create a side-exit to fall back to the interpreter
3631 uint8_t *side_exit = yjit_side_exit(jit, ctx);
3633 // Check for interrupts
3634 yjit_check_ints(cb, side_exit);
3636 const struct rb_builtin_function *leaf_builtin = rb_leaf_builtin_function(iseq);
3638 if (leaf_builtin && !block && leaf_builtin->argc + 1 <= NUM_C_ARG_REGS) {
3639 ADD_COMMENT(cb, "inlined leaf builtin");
3641 // Call the builtin func (ec, recv, arg1, arg2, ...)
3642 mov(cb, C_ARG_REGS[0], REG_EC);
3644 // Copy self and arguments
3645 for (int32_t i = 0; i < leaf_builtin->argc + 1; i++) {
3646 x86opnd_t stack_opnd = ctx_stack_opnd(ctx, leaf_builtin->argc - i);
3647 x86opnd_t c_arg_reg = C_ARG_REGS[i + 1];
3648 mov(cb, c_arg_reg, stack_opnd);
3650 ctx_stack_pop(ctx, leaf_builtin->argc + 1);
3651 call_ptr(cb, REG0, (void *)leaf_builtin->func_ptr);
3653 // Push the return value
3654 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
3655 mov(cb, stack_ret, RAX);
3657 // Note: assuming that the leaf builtin doesn't change local variables here.
3658 // Seems like a safe assumption.
3660 return YJIT_KEEP_COMPILING;
3663 // Stack overflow check
3664 // Note that vm_push_frame checks it against a decremented cfp, hence the multiply by 2.
3665 // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
3666 ADD_COMMENT(cb, "stack overflow check");
3667 lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * (num_locals + iseq->body->stack_max) + 2 * sizeof(rb_control_frame_t)));
3668 cmp(cb, REG_CFP, REG0);
3669 jle_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_cf_overflow));
3671 if (doing_kw_call) {
3672 // Here we're calling a method with keyword arguments and specifying
3673 // keyword arguments at this call site.
3675 // Number of positional arguments the callee expects before the first
3676 // keyword argument
3677 const int args_before_kw = required_num + opt_num;
3679 // This struct represents the metadata about the caller-specified
3680 // keyword arguments.
3681 int caller_keyword_len = 0;
3682 const VALUE *caller_keywords = NULL;
3683 if (vm_ci_kwarg(ci)) {
3684 caller_keyword_len = vm_ci_kwarg(ci)->keyword_len;
3685 caller_keywords = &vm_ci_kwarg(ci)->keywords[0];
3688 // This struct represents the metadata about the callee-specified
3689 // keyword parameters.
3690 const struct rb_iseq_param_keyword *const keyword = iseq->body->param.keyword;
3692 ADD_COMMENT(cb, "keyword args");
3694 // This is the list of keyword arguments that the callee specified
3695 // in its initial declaration.
3696 const ID *callee_kwargs = keyword->table;
3698 int total_kwargs = keyword->num;
3700 // Here we're going to build up a list of the IDs that correspond to
3701 // the caller-specified keyword arguments. If they're not in the
3702 // same order as the order specified in the callee declaration, then
3703 // we're going to need to generate some code to swap values around
3704 // on the stack.
3705 ID *caller_kwargs = ALLOCA_N(VALUE, total_kwargs);
3706 int kwarg_idx;
3707 for (kwarg_idx = 0; kwarg_idx < caller_keyword_len; kwarg_idx++) {
3708 caller_kwargs[kwarg_idx] = SYM2ID(caller_keywords[kwarg_idx]);
3711 int unspecified_bits = 0;
3713 for (int callee_idx = keyword->required_num; callee_idx < total_kwargs; callee_idx++) {
3714 bool already_passed = false;
3715 ID callee_kwarg = callee_kwargs[callee_idx];
3717 for (int caller_idx = 0; caller_idx < caller_keyword_len; caller_idx++) {
3718 if (caller_kwargs[caller_idx] == callee_kwarg) {
3719 already_passed = true;
3720 break;
3724 if (!already_passed) {
3725 // Reserve space on the stack for each default value we'll be
3726 // filling in (which is done in the next loop). Also increments
3727 // argc so that the callee's SP is recorded correctly.
3728 argc++;
3729 x86opnd_t default_arg = ctx_stack_push(ctx, TYPE_UNKNOWN);
3730 VALUE default_value = keyword->default_values[callee_idx - keyword->required_num];
3732 if (default_value == Qundef) {
3733 // Qundef means that this value is not constant and must be
3734 // recalculated at runtime, so we record it in unspecified_bits
3735 // (Qnil is then used as a placeholder instead of Qundef).
3736 unspecified_bits |= 0x01 << (callee_idx - keyword->required_num);
3737 default_value = Qnil;
3740 // GC might move default_value.
3741 jit_mov_gc_ptr(jit, cb, REG0, default_value);
3742 mov(cb, default_arg, REG0);
3744 caller_kwargs[kwarg_idx++] = callee_kwarg;
3747 RUBY_ASSERT(kwarg_idx == total_kwargs);
3749 // Next, we're going to loop through every keyword that was
3750 // specified by the caller and make sure that it's in the correct
3751 // place. If it's not we're going to swap it around with another one.
3752 for (kwarg_idx = 0; kwarg_idx < total_kwargs; kwarg_idx++) {
3753 ID callee_kwarg = callee_kwargs[kwarg_idx];
3755 // If the argument is already in the right order, then we don't
3756 // need to generate any code since the expected value is already
3757 // in the right place on the stack.
3758 if (callee_kwarg == caller_kwargs[kwarg_idx]) continue;
3760 // In this case the argument is not in the right place, so we
3761 // need to find its position where it _should_ be and swap with
3762 // that location.
3763 for (int swap_idx = kwarg_idx + 1; swap_idx < total_kwargs; swap_idx++) {
3764 if (callee_kwarg == caller_kwargs[swap_idx]) {
3765 // First we're going to generate the code that is going
3766 // to perform the actual swapping at runtime.
3767 stack_swap(ctx, cb, argc - 1 - swap_idx - args_before_kw, argc - 1 - kwarg_idx - args_before_kw, REG1, REG0);
3769 // Next we're going to do some bookkeeping on our end so
3770 // that we know the order that the arguments are
3771 // actually in now.
3772 ID tmp = caller_kwargs[kwarg_idx];
3773 caller_kwargs[kwarg_idx] = caller_kwargs[swap_idx];
3774 caller_kwargs[swap_idx] = tmp;
3776 break;
3781 // Keyword arguments cause a special extra local variable to be
3782 // pushed onto the stack that represents the parameters that weren't
3783 // explicitly given a value and have a non-constant default.
3784 mov(cb, ctx_stack_opnd(ctx, -1), imm_opnd(INT2FIX(unspecified_bits)));
3786 // Points to the receiver operand on the stack
3787 x86opnd_t recv = ctx_stack_opnd(ctx, argc);
3789 // Store the updated SP on the current frame (pop arguments and receiver)
3790 ADD_COMMENT(cb, "store caller sp");
3791 lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * -(argc + 1)));
3792 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG0);
3794 // Store the next PC in the current frame
3795 jit_save_pc(jit, REG0);
3797 if (block) {
3798 // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
3799 // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases
3800 // with cfp->block_code.
3801 jit_mov_gc_ptr(jit, cb, REG0, (VALUE)block);
3802 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), REG0);
3805 // Adjust the callee's stack pointer
3806 lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * (3 + num_locals + doing_kw_call)));
3808 // Initialize local variables to Qnil
3809 for (int i = 0; i < num_locals; i++) {
3810 mov(cb, mem_opnd(64, REG0, sizeof(VALUE) * (i - num_locals - 3)), imm_opnd(Qnil));
3813 ADD_COMMENT(cb, "push env");
3814 // Put compile time cme into REG1. It's assumed to be valid because we are notified when
3815 // any cme we depend on become outdated. See rb_yjit_method_lookup_change().
3816 jit_mov_gc_ptr(jit, cb, REG1, (VALUE)cme);
3817 // Write method entry at sp[-3]
3818 // sp[-3] = me;
3819 mov(cb, mem_opnd(64, REG0, 8 * -3), REG1);
3821 // Write block handler at sp[-2]
3822 // sp[-2] = block_handler;
3823 if (block) {
3824 // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
3825 lea(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, self));
3826 or(cb, REG1, imm_opnd(1));
3827 mov(cb, mem_opnd(64, REG0, 8 * -2), REG1);
3829 else {
3830 mov(cb, mem_opnd(64, REG0, 8 * -2), imm_opnd(VM_BLOCK_HANDLER_NONE));
3833 // Write env flags at sp[-1]
3834 // sp[-1] = frame_type;
3835 uint64_t frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL;
3836 mov(cb, mem_opnd(64, REG0, 8 * -1), imm_opnd(frame_type));
3838 ADD_COMMENT(cb, "push callee CFP");
3839 // Allocate a new CFP (ec->cfp--)
3840 sub(cb, REG_CFP, imm_opnd(sizeof(rb_control_frame_t)));
3841 mov(cb, member_opnd(REG_EC, rb_execution_context_t, cfp), REG_CFP);
3843 // Setup the new frame
3844 // *cfp = (const struct rb_control_frame_struct) {
3845 // .pc = pc,
3846 // .sp = sp,
3847 // .iseq = iseq,
3848 // .self = recv,
3849 // .ep = sp - 1,
3850 // .block_code = 0,
3851 // .__bp__ = sp,
3852 // };
3853 mov(cb, REG1, recv);
3854 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, self), REG1);
3855 mov(cb, REG_SP, REG0); // Switch to the callee's REG_SP
3856 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG0);
3857 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, __bp__), REG0);
3858 sub(cb, REG0, imm_opnd(sizeof(VALUE)));
3859 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, ep), REG0);
3860 jit_mov_gc_ptr(jit, cb, REG0, (VALUE)iseq);
3861 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, iseq), REG0);
3862 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), imm_opnd(0));
3864 // No need to set cfp->pc since the callee sets it whenever calling into routines
3865 // that could look at it through jit_save_pc().
3866 // mov(cb, REG0, const_ptr_opnd(start_pc));
3867 // mov(cb, member_opnd(REG_CFP, rb_control_frame_t, pc), REG0);
3869 // Stub so we can return to JITted code
3870 blockid_t return_block = { jit->iseq, jit_next_insn_idx(jit) };
3872 // Create a context for the callee
3873 ctx_t callee_ctx = DEFAULT_CTX;
3875 // Set the argument types in the callee's context
3876 for (int32_t arg_idx = 0; arg_idx < argc; ++arg_idx) {
3877 val_type_t arg_type = ctx_get_opnd_type(ctx, OPND_STACK(argc - arg_idx - 1));
3878 ctx_set_local_type(&callee_ctx, arg_idx, arg_type);
3880 val_type_t recv_type = ctx_get_opnd_type(ctx, OPND_STACK(argc));
3881 ctx_upgrade_opnd_type(&callee_ctx, OPND_SELF, recv_type);
3883 // The callee might change locals through Kernel#binding and other means.
3884 ctx_clear_local_types(ctx);
3886 // Pop arguments and receiver in return context, push the return value
3887 // After the return, sp_offset will be 1. The codegen for leave writes
3888 // the return value in case of JIT-to-JIT return.
3889 ctx_t return_ctx = *ctx;
3890 ctx_stack_pop(&return_ctx, argc + 1);
3891 ctx_stack_push(&return_ctx, TYPE_UNKNOWN);
3892 return_ctx.sp_offset = 1;
3893 return_ctx.chain_depth = 0;
3895 // Write the JIT return address on the callee frame
3896 gen_branch(
3897 jit,
3898 ctx,
3899 return_block,
3900 &return_ctx,
3901 return_block,
3902 &return_ctx,
3903 gen_return_branch
3906 //print_str(cb, "calling Ruby func:");
3907 //print_str(cb, rb_id2name(vm_ci_mid(ci)));
3909 // Directly jump to the entry point of the callee
3910 gen_direct_jump(
3911 jit,
3912 &callee_ctx,
3913 (blockid_t){ iseq, start_pc_offset }
3916 return YJIT_END_BLOCK;
3919 static codegen_status_t
3920 gen_struct_aref(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, VALUE comptime_recv, VALUE comptime_recv_klass) {
3921 if (vm_ci_argc(ci) != 0) {
3922 return YJIT_CANT_COMPILE;
3925 const unsigned int off = cme->def->body.optimized.index;
3927 // Confidence checks
3928 RUBY_ASSERT_ALWAYS(RB_TYPE_P(comptime_recv, T_STRUCT));
3929 RUBY_ASSERT_ALWAYS((long)off < RSTRUCT_LEN(comptime_recv));
3931 // We are going to use an encoding that takes a 4-byte immediate which
3932 // limits the offset to INT32_MAX.
3934 uint64_t native_off = (uint64_t)off * (uint64_t)SIZEOF_VALUE;
3935 if (native_off > (uint64_t)INT32_MAX) {
3936 return YJIT_CANT_COMPILE;
3940 // All structs from the same Struct class should have the same
3941 // length. So if our comptime_recv is embedded all runtime
3942 // structs of the same class should be as well, and the same is
3943 // true of the converse.
3944 bool embedded = FL_TEST_RAW(comptime_recv, RSTRUCT_EMBED_LEN_MASK);
3946 ADD_COMMENT(cb, "struct aref");
3948 x86opnd_t recv = ctx_stack_pop(ctx, 1);
3950 mov(cb, REG0, recv);
3952 if (embedded) {
3953 mov(cb, REG0, member_opnd_idx(REG0, struct RStruct, as.ary, off));
3955 else {
3956 mov(cb, REG0, member_opnd(REG0, struct RStruct, as.heap.ptr));
3957 mov(cb, REG0, mem_opnd(64, REG0, SIZEOF_VALUE * off));
3960 x86opnd_t ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
3961 mov(cb, ret, REG0);
3963 jit_jump_to_next_insn(jit, ctx);
3964 return YJIT_END_BLOCK;
3967 static codegen_status_t
3968 gen_struct_aset(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, VALUE comptime_recv, VALUE comptime_recv_klass) {
3969 if (vm_ci_argc(ci) != 1) {
3970 return YJIT_CANT_COMPILE;
3973 const unsigned int off = cme->def->body.optimized.index;
3975 // Confidence checks
3976 RUBY_ASSERT_ALWAYS(RB_TYPE_P(comptime_recv, T_STRUCT));
3977 RUBY_ASSERT_ALWAYS((long)off < RSTRUCT_LEN(comptime_recv));
3979 ADD_COMMENT(cb, "struct aset");
3981 x86opnd_t val = ctx_stack_pop(ctx, 1);
3982 x86opnd_t recv = ctx_stack_pop(ctx, 1);
3984 mov(cb, C_ARG_REGS[0], recv);
3985 mov(cb, C_ARG_REGS[1], imm_opnd(off));
3986 mov(cb, C_ARG_REGS[2], val);
3987 call_ptr(cb, REG0, (void *)RSTRUCT_SET);
3989 x86opnd_t ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
3990 mov(cb, ret, RAX);
3992 jit_jump_to_next_insn(jit, ctx);
3993 return YJIT_END_BLOCK;
3996 const rb_callable_method_entry_t *
3997 rb_aliased_callable_method_entry(const rb_callable_method_entry_t *me);
3999 static codegen_status_t
4000 gen_send_general(jitstate_t *jit, ctx_t *ctx, struct rb_call_data *cd, rb_iseq_t *block)
4002 // Relevant definitions:
4003 // rb_execution_context_t : vm_core.h
4004 // invoker, cfunc logic : method.h, vm_method.c
4005 // rb_callinfo : vm_callinfo.h
4006 // rb_callable_method_entry_t : method.h
4007 // vm_call_cfunc_with_frame : vm_insnhelper.c
4009 // For a general overview for how the interpreter calls methods,
4010 // see vm_call_method().
4012 const struct rb_callinfo *ci = cd->ci; // info about the call site
4014 int32_t argc = (int32_t)vm_ci_argc(ci);
4015 ID mid = vm_ci_mid(ci);
4017 // Don't JIT calls with keyword splat
4018 if (vm_ci_flag(ci) & VM_CALL_KW_SPLAT) {
4019 GEN_COUNTER_INC(cb, send_kw_splat);
4020 return YJIT_CANT_COMPILE;
4023 // Don't JIT calls that aren't simple
4024 // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block.
4025 if ((vm_ci_flag(ci) & VM_CALL_ARGS_SPLAT) != 0) {
4026 GEN_COUNTER_INC(cb, send_args_splat);
4027 return YJIT_CANT_COMPILE;
4029 if ((vm_ci_flag(ci) & VM_CALL_ARGS_BLOCKARG) != 0) {
4030 GEN_COUNTER_INC(cb, send_block_arg);
4031 return YJIT_CANT_COMPILE;
4034 // Defer compilation so we can specialize on class of receiver
4035 if (!jit_at_current_insn(jit)) {
4036 defer_compilation(jit, ctx);
4037 return YJIT_END_BLOCK;
4040 VALUE comptime_recv = jit_peek_at_stack(jit, ctx, argc);
4041 VALUE comptime_recv_klass = CLASS_OF(comptime_recv);
4043 // Guard that the receiver has the same class as the one from compile time
4044 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4046 // Points to the receiver operand on the stack
4047 x86opnd_t recv = ctx_stack_opnd(ctx, argc);
4048 insn_opnd_t recv_opnd = OPND_STACK(argc);
4049 mov(cb, REG0, recv);
4050 if (!jit_guard_known_klass(jit, ctx, comptime_recv_klass, recv_opnd, comptime_recv, SEND_MAX_DEPTH, side_exit)) {
4051 return YJIT_CANT_COMPILE;
4054 // Do method lookup
4055 const rb_callable_method_entry_t *cme = rb_callable_method_entry(comptime_recv_klass, mid);
4056 if (!cme) {
4057 // TODO: counter
4058 return YJIT_CANT_COMPILE;
4061 switch (METHOD_ENTRY_VISI(cme)) {
4062 case METHOD_VISI_PUBLIC:
4063 // Can always call public methods
4064 break;
4065 case METHOD_VISI_PRIVATE:
4066 if (!(vm_ci_flag(ci) & VM_CALL_FCALL)) {
4067 // Can only call private methods with FCALL callsites.
4068 // (at the moment they are callsites without a receiver or an explicit `self` receiver)
4069 return YJIT_CANT_COMPILE;
4071 break;
4072 case METHOD_VISI_PROTECTED:
4073 jit_protected_callee_ancestry_guard(jit, cb, cme, side_exit);
4074 break;
4075 case METHOD_VISI_UNDEF:
4076 RUBY_ASSERT(false && "cmes should always have a visibility");
4077 break;
4080 // Register block for invalidation
4081 RUBY_ASSERT(cme->called_id == mid);
4082 assume_method_lookup_stable(comptime_recv_klass, cme, jit);
4084 // To handle the aliased method case (VM_METHOD_TYPE_ALIAS)
4085 while (true) {
4086 // switch on the method type
4087 switch (cme->def->type) {
4088 case VM_METHOD_TYPE_ISEQ:
4089 return gen_send_iseq(jit, ctx, ci, cme, block, argc);
4090 case VM_METHOD_TYPE_CFUNC:
4091 if ((vm_ci_flag(ci) & VM_CALL_KWARG) != 0) {
4092 GEN_COUNTER_INC(cb, send_cfunc_kwargs);
4093 return YJIT_CANT_COMPILE;
4095 return gen_send_cfunc(jit, ctx, ci, cme, block, argc, &comptime_recv_klass);
4096 case VM_METHOD_TYPE_IVAR:
4097 if (argc != 0) {
4098 // Argument count mismatch. Getters take no arguments.
4099 GEN_COUNTER_INC(cb, send_getter_arity);
4100 return YJIT_CANT_COMPILE;
4102 if (c_method_tracing_currently_enabled(jit)) {
4103 // Can't generate code for firing c_call and c_return events
4104 // :attr-tracing:
4105 // Handling the C method tracing events for attr_accessor
4106 // methods is easier than regular C methods as we know the
4107 // "method" we are calling into never enables those tracing
4108 // events. Once global invalidation runs, the code for the
4109 // attr_accessor is invalidated and we exit at the closest
4110 // instruction boundary which is always outside of the body of
4111 // the attr_accessor code.
4112 GEN_COUNTER_INC(cb, send_cfunc_tracing);
4113 return YJIT_CANT_COMPILE;
4116 mov(cb, REG0, recv);
4118 ID ivar_name = cme->def->body.attr.id;
4119 return gen_get_ivar(jit, ctx, SEND_MAX_DEPTH, comptime_recv, ivar_name, recv_opnd, side_exit);
4120 case VM_METHOD_TYPE_ATTRSET:
4121 if ((vm_ci_flag(ci) & VM_CALL_KWARG) != 0) {
4122 GEN_COUNTER_INC(cb, send_attrset_kwargs);
4123 return YJIT_CANT_COMPILE;
4125 else if (argc != 1 || !RB_TYPE_P(comptime_recv, T_OBJECT)) {
4126 GEN_COUNTER_INC(cb, send_ivar_set_method);
4127 return YJIT_CANT_COMPILE;
4129 else if (c_method_tracing_currently_enabled(jit)) {
4130 // Can't generate code for firing c_call and c_return events
4131 // See :attr-tracing:
4132 GEN_COUNTER_INC(cb, send_cfunc_tracing);
4133 return YJIT_CANT_COMPILE;
4135 else {
4136 ID ivar_name = cme->def->body.attr.id;
4137 return gen_set_ivar(jit, ctx, comptime_recv, comptime_recv_klass, ivar_name);
4139 // Block method, e.g. define_method(:foo) { :my_block }
4140 case VM_METHOD_TYPE_BMETHOD:
4141 GEN_COUNTER_INC(cb, send_bmethod);
4142 return YJIT_CANT_COMPILE;
4143 case VM_METHOD_TYPE_ZSUPER:
4144 GEN_COUNTER_INC(cb, send_zsuper_method);
4145 return YJIT_CANT_COMPILE;
4146 case VM_METHOD_TYPE_ALIAS: {
4147 // Retrieve the alised method and re-enter the switch
4148 cme = rb_aliased_callable_method_entry(cme);
4149 continue;
4151 case VM_METHOD_TYPE_UNDEF:
4152 GEN_COUNTER_INC(cb, send_undef_method);
4153 return YJIT_CANT_COMPILE;
4154 case VM_METHOD_TYPE_NOTIMPLEMENTED:
4155 GEN_COUNTER_INC(cb, send_not_implemented_method);
4156 return YJIT_CANT_COMPILE;
4157 // Send family of methods, e.g. call/apply
4158 case VM_METHOD_TYPE_OPTIMIZED:
4159 switch (cme->def->body.optimized.type) {
4160 case OPTIMIZED_METHOD_TYPE_SEND:
4161 GEN_COUNTER_INC(cb, send_optimized_method_send);
4162 return YJIT_CANT_COMPILE;
4163 case OPTIMIZED_METHOD_TYPE_CALL:
4164 GEN_COUNTER_INC(cb, send_optimized_method_call);
4165 return YJIT_CANT_COMPILE;
4166 case OPTIMIZED_METHOD_TYPE_BLOCK_CALL:
4167 GEN_COUNTER_INC(cb, send_optimized_method_block_call);
4168 return YJIT_CANT_COMPILE;
4169 case OPTIMIZED_METHOD_TYPE_STRUCT_AREF:
4170 return gen_struct_aref(jit, ctx, ci, cme, comptime_recv, comptime_recv_klass);
4171 case OPTIMIZED_METHOD_TYPE_STRUCT_ASET:
4172 return gen_struct_aset(jit, ctx, ci, cme, comptime_recv, comptime_recv_klass);
4173 default:
4174 rb_bug("unknown optimized method type (%d)", cme->def->body.optimized.type);
4175 UNREACHABLE_RETURN(YJIT_CANT_COMPILE);
4177 case VM_METHOD_TYPE_MISSING:
4178 GEN_COUNTER_INC(cb, send_missing_method);
4179 return YJIT_CANT_COMPILE;
4180 case VM_METHOD_TYPE_REFINED:
4181 GEN_COUNTER_INC(cb, send_refined_method);
4182 return YJIT_CANT_COMPILE;
4183 // no default case so compiler issues a warning if this is not exhaustive
4186 // Unreachable
4187 RUBY_ASSERT(false);
4191 static codegen_status_t
4192 gen_opt_send_without_block(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4194 struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
4195 return gen_send_general(jit, ctx, cd, NULL);
4198 static codegen_status_t
4199 gen_send(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4201 struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
4202 rb_iseq_t *block = (rb_iseq_t *)jit_get_arg(jit, 1);
4203 return gen_send_general(jit, ctx, cd, block);
4206 static codegen_status_t
4207 gen_invokesuper(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4209 struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
4210 rb_iseq_t *block = (rb_iseq_t *)jit_get_arg(jit, 1);
4212 // Defer compilation so we can specialize on class of receiver
4213 if (!jit_at_current_insn(jit)) {
4214 defer_compilation(jit, ctx);
4215 return YJIT_END_BLOCK;
4218 const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(jit->ec->cfp);
4219 if (!me) {
4220 return YJIT_CANT_COMPILE;
4223 // FIXME: We should track and invalidate this block when this cme is invalidated
4224 VALUE current_defined_class = me->defined_class;
4225 ID mid = me->def->original_id;
4227 if (me != rb_callable_method_entry(current_defined_class, me->called_id)) {
4228 // Though we likely could generate this call, as we are only concerned
4229 // with the method entry remaining valid, assume_method_lookup_stable
4230 // below requires that the method lookup matches as well
4231 return YJIT_CANT_COMPILE;
4234 // vm_search_normal_superclass
4235 if (BUILTIN_TYPE(current_defined_class) == T_ICLASS && FL_TEST_RAW(RBASIC(current_defined_class)->klass, RMODULE_IS_REFINEMENT)) {
4236 return YJIT_CANT_COMPILE;
4238 VALUE comptime_superclass = RCLASS_SUPER(RCLASS_ORIGIN(current_defined_class));
4240 const struct rb_callinfo *ci = cd->ci;
4241 int32_t argc = (int32_t)vm_ci_argc(ci);
4243 // Don't JIT calls that aren't simple
4244 // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block.
4245 if ((vm_ci_flag(ci) & VM_CALL_ARGS_SPLAT) != 0) {
4246 GEN_COUNTER_INC(cb, send_args_splat);
4247 return YJIT_CANT_COMPILE;
4249 if ((vm_ci_flag(ci) & VM_CALL_KWARG) != 0) {
4250 GEN_COUNTER_INC(cb, send_keywords);
4251 return YJIT_CANT_COMPILE;
4253 if ((vm_ci_flag(ci) & VM_CALL_KW_SPLAT) != 0) {
4254 GEN_COUNTER_INC(cb, send_kw_splat);
4255 return YJIT_CANT_COMPILE;
4257 if ((vm_ci_flag(ci) & VM_CALL_ARGS_BLOCKARG) != 0) {
4258 GEN_COUNTER_INC(cb, send_block_arg);
4259 return YJIT_CANT_COMPILE;
4262 // Ensure we haven't rebound this method onto an incompatible class.
4263 // In the interpreter we try to avoid making this check by performing some
4264 // cheaper calculations first, but since we specialize on the method entry
4265 // and so only have to do this once at compile time this is fine to always
4266 // check and side exit.
4267 VALUE comptime_recv = jit_peek_at_stack(jit, ctx, argc);
4268 if (!rb_obj_is_kind_of(comptime_recv, current_defined_class)) {
4269 return YJIT_CANT_COMPILE;
4272 // Do method lookup
4273 const rb_callable_method_entry_t *cme = rb_callable_method_entry(comptime_superclass, mid);
4275 if (!cme) {
4276 return YJIT_CANT_COMPILE;
4279 // Check that we'll be able to write this method dispatch before generating checks
4280 switch (cme->def->type) {
4281 case VM_METHOD_TYPE_ISEQ:
4282 case VM_METHOD_TYPE_CFUNC:
4283 break;
4284 default:
4285 // others unimplemented
4286 return YJIT_CANT_COMPILE;
4289 // Guard that the receiver has the same class as the one from compile time
4290 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4292 if (jit->ec->cfp->ep[VM_ENV_DATA_INDEX_ME_CREF] != (VALUE)me) {
4293 // This will be the case for super within a block
4294 return YJIT_CANT_COMPILE;
4297 ADD_COMMENT(cb, "guard known me");
4298 mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, ep));
4299 x86opnd_t ep_me_opnd = mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_ME_CREF);
4300 jit_mov_gc_ptr(jit, cb, REG1, (VALUE)me);
4301 cmp(cb, ep_me_opnd, REG1);
4302 jne_ptr(cb, COUNTED_EXIT(jit, side_exit, invokesuper_me_changed));
4304 if (!block) {
4305 // Guard no block passed
4306 // rb_vm_frame_block_handler(GET_EC()->cfp) == VM_BLOCK_HANDLER_NONE
4307 // note, we assume VM_ASSERT(VM_ENV_LOCAL_P(ep))
4309 // TODO: this could properly forward the current block handler, but
4310 // would require changes to gen_send_*
4311 ADD_COMMENT(cb, "guard no block given");
4312 // EP is in REG0 from above
4313 x86opnd_t ep_specval_opnd = mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_SPECVAL);
4314 cmp(cb, ep_specval_opnd, imm_opnd(VM_BLOCK_HANDLER_NONE));
4315 jne_ptr(cb, COUNTED_EXIT(jit, side_exit, invokesuper_block));
4318 // Points to the receiver operand on the stack
4319 x86opnd_t recv = ctx_stack_opnd(ctx, argc);
4320 mov(cb, REG0, recv);
4322 // We need to assume that both our current method entry and the super
4323 // method entry we invoke remain stable
4324 assume_method_lookup_stable(current_defined_class, me, jit);
4325 assume_method_lookup_stable(comptime_superclass, cme, jit);
4327 // Method calls may corrupt types
4328 ctx_clear_local_types(ctx);
4330 switch (cme->def->type) {
4331 case VM_METHOD_TYPE_ISEQ:
4332 return gen_send_iseq(jit, ctx, ci, cme, block, argc);
4333 case VM_METHOD_TYPE_CFUNC:
4334 return gen_send_cfunc(jit, ctx, ci, cme, block, argc, NULL);
4335 default:
4336 break;
4339 RUBY_ASSERT_ALWAYS(false);
4342 static codegen_status_t
4343 gen_leave(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4345 // Only the return value should be on the stack
4346 RUBY_ASSERT(ctx->stack_size == 1);
4348 // Create a side-exit to fall back to the interpreter
4349 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4351 // Load environment pointer EP from CFP
4352 mov(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, ep));
4354 // Check for interrupts
4355 ADD_COMMENT(cb, "check for interrupts");
4356 yjit_check_ints(cb, COUNTED_EXIT(jit, side_exit, leave_se_interrupt));
4358 // Load the return value
4359 mov(cb, REG0, ctx_stack_pop(ctx, 1));
4361 // Pop the current frame (ec->cfp++)
4362 // Note: the return PC is already in the previous CFP
4363 add(cb, REG_CFP, imm_opnd(sizeof(rb_control_frame_t)));
4364 mov(cb, member_opnd(REG_EC, rb_execution_context_t, cfp), REG_CFP);
4366 // Reload REG_SP for the caller and write the return value.
4367 // Top of the stack is REG_SP[0] since the caller has sp_offset=1.
4368 mov(cb, REG_SP, member_opnd(REG_CFP, rb_control_frame_t, sp));
4369 mov(cb, mem_opnd(64, REG_SP, 0), REG0);
4371 // Jump to the JIT return address on the frame that was just popped
4372 const int32_t offset_to_jit_return = -((int32_t)sizeof(rb_control_frame_t)) + (int32_t)offsetof(rb_control_frame_t, jit_return);
4373 jmp_rm(cb, mem_opnd(64, REG_CFP, offset_to_jit_return));
4375 return YJIT_END_BLOCK;
4378 RUBY_EXTERN rb_serial_t ruby_vm_global_constant_state;
4380 static codegen_status_t
4381 gen_getglobal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4383 ID gid = jit_get_arg(jit, 0);
4385 // Save the PC and SP because we might make a Ruby call for warning
4386 jit_prepare_routine_call(jit, ctx, REG0);
4388 mov(cb, C_ARG_REGS[0], imm_opnd(gid));
4390 call_ptr(cb, REG0, (void *)&rb_gvar_get);
4392 x86opnd_t top = ctx_stack_push(ctx, TYPE_UNKNOWN);
4393 mov(cb, top, RAX);
4395 return YJIT_KEEP_COMPILING;
4398 static codegen_status_t
4399 gen_setglobal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4401 ID gid = jit_get_arg(jit, 0);
4403 // Save the PC and SP because we might make a Ruby call for
4404 // Kernel#set_trace_var
4405 jit_prepare_routine_call(jit, ctx, REG0);
4407 mov(cb, C_ARG_REGS[0], imm_opnd(gid));
4409 x86opnd_t val = ctx_stack_pop(ctx, 1);
4411 mov(cb, C_ARG_REGS[1], val);
4413 call_ptr(cb, REG0, (void *)&rb_gvar_set);
4415 return YJIT_KEEP_COMPILING;
4418 static codegen_status_t
4419 gen_anytostring(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4421 // Save the PC and SP because we might make a Ruby call for
4422 // Kernel#set_trace_var
4423 jit_prepare_routine_call(jit, ctx, REG0);
4425 x86opnd_t str = ctx_stack_pop(ctx, 1);
4426 x86opnd_t val = ctx_stack_pop(ctx, 1);
4428 mov(cb, C_ARG_REGS[0], str);
4429 mov(cb, C_ARG_REGS[1], val);
4431 call_ptr(cb, REG0, (void *)&rb_obj_as_string_result);
4433 // Push the return value
4434 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
4435 mov(cb, stack_ret, RAX);
4437 return YJIT_KEEP_COMPILING;
4440 static codegen_status_t
4441 gen_objtostring(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4443 if (!jit_at_current_insn(jit)) {
4444 defer_compilation(jit, ctx);
4445 return YJIT_END_BLOCK;
4448 x86opnd_t recv = ctx_stack_opnd(ctx, 0);
4449 VALUE comptime_recv = jit_peek_at_stack(jit, ctx, 0);
4451 if (RB_TYPE_P(comptime_recv, T_STRING)) {
4452 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4454 mov(cb, REG0, recv);
4455 jit_guard_known_klass(jit, ctx, CLASS_OF(comptime_recv), OPND_STACK(0), comptime_recv, SEND_MAX_DEPTH, side_exit);
4456 // No work needed. The string value is already on the top of the stack.
4457 return YJIT_KEEP_COMPILING;
4459 else {
4460 struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
4461 return gen_send_general(jit, ctx, cd, NULL);
4465 static codegen_status_t
4466 gen_toregexp(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4468 rb_num_t opt = jit_get_arg(jit, 0);
4469 rb_num_t cnt = jit_get_arg(jit, 1);
4471 // Save the PC and SP because this allocates an object and could
4472 // raise an exception.
4473 jit_prepare_routine_call(jit, ctx, REG0);
4475 x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(sizeof(VALUE) * (uint32_t)cnt));
4476 ctx_stack_pop(ctx, cnt);
4478 mov(cb, C_ARG_REGS[0], imm_opnd(0));
4479 mov(cb, C_ARG_REGS[1], imm_opnd(cnt));
4480 lea(cb, C_ARG_REGS[2], values_ptr);
4481 call_ptr(cb, REG0, (void *)&rb_ary_tmp_new_from_values);
4483 // Save the array so we can clear it later
4484 push(cb, RAX);
4485 push(cb, RAX); // Alignment
4486 mov(cb, C_ARG_REGS[0], RAX);
4487 mov(cb, C_ARG_REGS[1], imm_opnd(opt));
4488 call_ptr(cb, REG0, (void *)&rb_reg_new_ary);
4490 // The actual regex is in RAX now. Pop the temp array from
4491 // rb_ary_tmp_new_from_values into C arg regs so we can clear it
4492 pop(cb, REG1); // Alignment
4493 pop(cb, C_ARG_REGS[0]);
4495 // The value we want to push on the stack is in RAX right now
4496 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4497 mov(cb, stack_ret, RAX);
4499 // Clear the temp array.
4500 call_ptr(cb, REG0, (void *)&rb_ary_clear);
4502 return YJIT_KEEP_COMPILING;
4505 static codegen_status_t
4506 gen_intern(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4508 // Save the PC and SP because we might allocate
4509 jit_prepare_routine_call(jit, ctx, REG0);
4511 x86opnd_t str = ctx_stack_pop(ctx, 1);
4513 mov(cb, C_ARG_REGS[0], str);
4515 call_ptr(cb, REG0, (void *)&rb_str_intern);
4517 // Push the return value
4518 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4519 mov(cb, stack_ret, RAX);
4521 return YJIT_KEEP_COMPILING;
4524 static codegen_status_t
4525 gen_getspecial(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4527 // This takes two arguments, key and type
4528 // key is only used when type == 0
4529 // A non-zero type determines which type of backref to fetch
4530 //rb_num_t key = jit_get_arg(jit, 0);
4531 rb_num_t type = jit_get_arg(jit, 1);
4533 if (type == 0) {
4534 // not yet implemented
4535 return YJIT_CANT_COMPILE;
4537 else if (type & 0x01) {
4538 // Fetch a "special" backref based on a char encoded by shifting by 1
4540 // Can raise if matchdata uninitialized
4541 jit_prepare_routine_call(jit, ctx, REG0);
4543 // call rb_backref_get()
4544 ADD_COMMENT(cb, "rb_backref_get");
4545 call_ptr(cb, REG0, (void *)rb_backref_get);
4546 mov(cb, C_ARG_REGS[0], RAX);
4548 switch (type >> 1) {
4549 case '&':
4550 ADD_COMMENT(cb, "rb_reg_last_match");
4551 call_ptr(cb, REG0, (void *)rb_reg_last_match);
4552 break;
4553 case '`':
4554 ADD_COMMENT(cb, "rb_reg_match_pre");
4555 call_ptr(cb, REG0, (void *)rb_reg_match_pre);
4556 break;
4557 case '\'':
4558 ADD_COMMENT(cb, "rb_reg_match_post");
4559 call_ptr(cb, REG0, (void *)rb_reg_match_post);
4560 break;
4561 case '+':
4562 ADD_COMMENT(cb, "rb_reg_match_last");
4563 call_ptr(cb, REG0, (void *)rb_reg_match_last);
4564 break;
4565 default:
4566 rb_bug("invalid back-ref");
4569 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4570 mov(cb, stack_ret, RAX);
4572 return YJIT_KEEP_COMPILING;
4574 else {
4575 // Fetch the N-th match from the last backref based on type shifted by 1
4577 // Can raise if matchdata uninitialized
4578 jit_prepare_routine_call(jit, ctx, REG0);
4580 // call rb_backref_get()
4581 ADD_COMMENT(cb, "rb_backref_get");
4582 call_ptr(cb, REG0, (void *)rb_backref_get);
4584 // rb_reg_nth_match((int)(type >> 1), backref);
4585 ADD_COMMENT(cb, "rb_reg_nth_match");
4586 mov(cb, C_ARG_REGS[0], imm_opnd(type >> 1));
4587 mov(cb, C_ARG_REGS[1], RAX);
4588 call_ptr(cb, REG0, (void *)rb_reg_nth_match);
4590 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4591 mov(cb, stack_ret, RAX);
4593 return YJIT_KEEP_COMPILING;
4597 VALUE
4598 rb_vm_getclassvariable(const rb_iseq_t *iseq, const rb_control_frame_t *cfp, ID id, ICVARC ic);
4600 static codegen_status_t
4601 gen_getclassvariable(jitstate_t* jit, ctx_t* ctx, codeblock_t* cb)
4603 // rb_vm_getclassvariable can raise exceptions.
4604 jit_prepare_routine_call(jit, ctx, REG0);
4606 mov(cb, C_ARG_REGS[0], member_opnd(REG_CFP, rb_control_frame_t, iseq));
4607 mov(cb, C_ARG_REGS[1], REG_CFP);
4608 mov(cb, C_ARG_REGS[2], imm_opnd(jit_get_arg(jit, 0)));
4609 mov(cb, C_ARG_REGS[3], imm_opnd(jit_get_arg(jit, 1)));
4611 call_ptr(cb, REG0, (void *)rb_vm_getclassvariable);
4613 x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_UNKNOWN);
4614 mov(cb, stack_top, RAX);
4616 return YJIT_KEEP_COMPILING;
4619 VALUE
4620 rb_vm_setclassvariable(const rb_iseq_t *iseq, const rb_control_frame_t *cfp, ID id, VALUE val, ICVARC ic);
4622 static codegen_status_t
4623 gen_setclassvariable(jitstate_t* jit, ctx_t* ctx, codeblock_t* cb)
4625 // rb_vm_setclassvariable can raise exceptions.
4626 jit_prepare_routine_call(jit, ctx, REG0);
4628 mov(cb, C_ARG_REGS[0], member_opnd(REG_CFP, rb_control_frame_t, iseq));
4629 mov(cb, C_ARG_REGS[1], REG_CFP);
4630 mov(cb, C_ARG_REGS[2], imm_opnd(jit_get_arg(jit, 0)));
4631 mov(cb, C_ARG_REGS[3], ctx_stack_pop(ctx, 1));
4632 mov(cb, C_ARG_REGS[4], imm_opnd(jit_get_arg(jit, 1)));
4634 call_ptr(cb, REG0, (void *)rb_vm_setclassvariable);
4636 return YJIT_KEEP_COMPILING;
4639 static codegen_status_t
4640 gen_opt_getinlinecache(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4642 VALUE jump_offset = jit_get_arg(jit, 0);
4643 VALUE const_cache_as_value = jit_get_arg(jit, 1);
4644 IC ic = (IC)const_cache_as_value;
4646 // See vm_ic_hit_p(). The same conditions are checked in yjit_constant_ic_update().
4647 struct iseq_inline_constant_cache_entry *ice = ic->entry;
4648 if (!ice || // cache not filled
4649 GET_IC_SERIAL(ice) != ruby_vm_global_constant_state /* cache out of date */) {
4650 // In these cases, leave a block that unconditionally side exits
4651 // for the interpreter to invalidate.
4652 return YJIT_CANT_COMPILE;
4655 // Make sure there is an exit for this block as the interpreter might want
4656 // to invalidate this block from yjit_constant_ic_update().
4657 jit_ensure_block_entry_exit(jit);
4659 if (ice->ic_cref) {
4660 // Cache is keyed on a certain lexical scope. Use the interpreter's cache.
4661 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4663 // Call function to verify the cache. It doesn't allocate or call methods.
4664 bool rb_vm_ic_hit_p(IC ic, const VALUE *reg_ep);
4665 mov(cb, C_ARG_REGS[0], const_ptr_opnd((void *)ic));
4666 mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, ep));
4667 call_ptr(cb, REG0, (void *)rb_vm_ic_hit_p);
4669 // Check the result. _Bool is one byte in SysV.
4670 test(cb, AL, AL);
4671 jz_ptr(cb, COUNTED_EXIT(jit, side_exit, opt_getinlinecache_miss));
4673 // Push ic->entry->value
4674 mov(cb, REG0, const_ptr_opnd((void *)ic));
4675 mov(cb, REG0, member_opnd(REG0, struct iseq_inline_constant_cache, entry));
4676 x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_UNKNOWN);
4677 mov(cb, REG0, member_opnd(REG0, struct iseq_inline_constant_cache_entry, value));
4678 mov(cb, stack_top, REG0);
4680 else {
4681 // Optimize for single ractor mode.
4682 // FIXME: This leaks when st_insert raises NoMemoryError
4683 if (!assume_single_ractor_mode(jit)) return YJIT_CANT_COMPILE;
4685 // Invalidate output code on any and all constant writes
4686 // FIXME: This leaks when st_insert raises NoMemoryError
4687 assume_stable_global_constant_state(jit);
4689 jit_putobject(jit, ctx, ice->value);
4692 // Jump over the code for filling the cache
4693 uint32_t jump_idx = jit_next_insn_idx(jit) + (int32_t)jump_offset;
4694 gen_direct_jump(
4695 jit,
4696 ctx,
4697 (blockid_t){ .iseq = jit->iseq, .idx = jump_idx }
4700 return YJIT_END_BLOCK;
4703 // Push the explicit block parameter onto the temporary stack. Part of the
4704 // interpreter's scheme for avoiding Proc allocations when delegating
4705 // explicit block parameters.
4706 static codegen_status_t
4707 gen_getblockparamproxy(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4709 // A mirror of the interpreter code. Checking for the case
4710 // where it's pushing rb_block_param_proxy.
4711 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4713 // EP level
4714 uint32_t level = (uint32_t)jit_get_arg(jit, 1);
4716 // Load environment pointer EP from CFP
4717 gen_get_ep(cb, REG0, level);
4719 // Bail when VM_ENV_FLAGS(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM) is non zero
4720 test(cb, mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_FLAGS), imm_opnd(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM));
4721 jnz_ptr(cb, COUNTED_EXIT(jit, side_exit, gbpp_block_param_modified));
4723 // Load the block handler for the current frame
4724 // note, VM_ASSERT(VM_ENV_LOCAL_P(ep))
4725 mov(cb, REG0, mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_SPECVAL));
4727 // Block handler is a tagged pointer. Look at the tag. 0x03 is from VM_BH_ISEQ_BLOCK_P().
4728 and(cb, REG0_8, imm_opnd(0x3));
4730 // Bail unless VM_BH_ISEQ_BLOCK_P(bh). This also checks for null.
4731 cmp(cb, REG0_8, imm_opnd(0x1));
4732 jnz_ptr(cb, COUNTED_EXIT(jit, side_exit, gbpp_block_handler_not_iseq));
4734 // Push rb_block_param_proxy. It's a root, so no need to use jit_mov_gc_ptr.
4735 mov(cb, REG0, const_ptr_opnd((void *)rb_block_param_proxy));
4736 RUBY_ASSERT(!SPECIAL_CONST_P(rb_block_param_proxy));
4737 x86opnd_t top = ctx_stack_push(ctx, TYPE_HEAP);
4738 mov(cb, top, REG0);
4740 return YJIT_KEEP_COMPILING;
4743 static codegen_status_t
4744 gen_invokebuiltin(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4746 const struct rb_builtin_function *bf = (struct rb_builtin_function *)jit_get_arg(jit, 0);
4748 // ec, self, and arguments
4749 if (bf->argc + 2 > NUM_C_ARG_REGS) {
4750 return YJIT_CANT_COMPILE;
4753 // If the calls don't allocate, do they need up to date PC, SP?
4754 jit_prepare_routine_call(jit, ctx, REG0);
4756 // Call the builtin func (ec, recv, arg1, arg2, ...)
4757 mov(cb, C_ARG_REGS[0], REG_EC);
4758 mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, self));
4760 // Copy arguments from locals
4761 for (int32_t i = 0; i < bf->argc; i++) {
4762 x86opnd_t stack_opnd = ctx_stack_opnd(ctx, bf->argc - i - 1);
4763 x86opnd_t c_arg_reg = C_ARG_REGS[2 + i];
4764 mov(cb, c_arg_reg, stack_opnd);
4767 call_ptr(cb, REG0, (void *)bf->func_ptr);
4769 // Push the return value
4770 ctx_stack_pop(ctx, bf->argc);
4771 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4772 mov(cb, stack_ret, RAX);
4774 return YJIT_KEEP_COMPILING;
4777 // opt_invokebuiltin_delegate calls a builtin function, like
4778 // invokebuiltin does, but instead of taking arguments from the top of the
4779 // stack uses the argument locals (and self) from the current method.
4780 static codegen_status_t
4781 gen_opt_invokebuiltin_delegate(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4783 const struct rb_builtin_function *bf = (struct rb_builtin_function *)jit_get_arg(jit, 0);
4784 int32_t start_index = (int32_t)jit_get_arg(jit, 1);
4786 // ec, self, and arguments
4787 if (bf->argc + 2 > NUM_C_ARG_REGS) {
4788 return YJIT_CANT_COMPILE;
4791 // If the calls don't allocate, do they need up to date PC, SP?
4792 jit_prepare_routine_call(jit, ctx, REG0);
4794 if (bf->argc > 0) {
4795 // Load environment pointer EP from CFP
4796 mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, ep));
4799 // Call the builtin func (ec, recv, arg1, arg2, ...)
4800 mov(cb, C_ARG_REGS[0], REG_EC);
4801 mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, self));
4803 // Copy arguments from locals
4804 for (int32_t i = 0; i < bf->argc; i++) {
4805 const int32_t offs = -jit->iseq->body->local_table_size - VM_ENV_DATA_SIZE + 1 + start_index + i;
4806 x86opnd_t local_opnd = mem_opnd(64, REG0, offs * SIZEOF_VALUE);
4807 x86opnd_t c_arg_reg = C_ARG_REGS[i + 2];
4808 mov(cb, c_arg_reg, local_opnd);
4810 call_ptr(cb, REG0, (void *)bf->func_ptr);
4812 // Push the return value
4813 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4814 mov(cb, stack_ret, RAX);
4816 return YJIT_KEEP_COMPILING;
4819 static int tracing_invalidate_all_i(void *vstart, void *vend, size_t stride, void *data);
4820 static void invalidate_all_blocks_for_tracing(const rb_iseq_t *iseq);
4822 // Invalidate all generated code and patch C method return code to contain
4823 // logic for firing the c_return TracePoint event. Once rb_vm_barrier()
4824 // returns, all other ractors are pausing inside RB_VM_LOCK_ENTER(), which
4825 // means they are inside a C routine. If there are any generated code on-stack,
4826 // they are waiting for a return from a C routine. For every routine call, we
4827 // patch in an exit after the body of the containing VM instruction. This makes
4828 // it so all the invalidated code exit as soon as execution logically reaches
4829 // the next VM instruction. The interpreter takes care of firing the tracing
4830 // event if it so happens that the next VM instruction has one attached.
4832 // The c_return event needs special handling as our codegen never outputs code
4833 // that contains tracing logic. If we let the normal output code run until the
4834 // start of the next VM instruction by relying on the patching scheme above, we
4835 // would fail to fire the c_return event. The interpreter doesn't fire the
4836 // event at an instruction boundary, so simply exiting to the interpreter isn't
4837 // enough. To handle it, we patch in the full logic at the return address. See
4838 // full_cfunc_return().
4840 // In addition to patching, we prevent future entries into invalidated code by
4841 // removing all live blocks from their iseq.
4842 void
4843 rb_yjit_tracing_invalidate_all(void)
4845 if (!rb_yjit_enabled_p()) return;
4847 // Stop other ractors since we are going to patch machine code.
4848 RB_VM_LOCK_ENTER();
4849 rb_vm_barrier();
4851 // Make it so all live block versions are no longer valid branch targets
4852 rb_objspace_each_objects(tracing_invalidate_all_i, NULL);
4854 // Apply patches
4855 const uint32_t old_pos = cb->write_pos;
4856 rb_darray_for(global_inval_patches, patch_idx) {
4857 struct codepage_patch patch = rb_darray_get(global_inval_patches, patch_idx);
4858 cb_set_pos(cb, patch.inline_patch_pos);
4859 uint8_t *jump_target = cb_get_ptr(ocb, patch.outlined_target_pos);
4860 jmp_ptr(cb, jump_target);
4862 cb_set_pos(cb, old_pos);
4864 // Freeze invalidated part of the codepage. We only want to wait for
4865 // running instances of the code to exit from now on, so we shouldn't
4866 // change the code. There could be other ractors sleeping in
4867 // branch_stub_hit(), for example. We could harden this by changing memory
4868 // protection on the frozen range.
4869 RUBY_ASSERT_ALWAYS(yjit_codepage_frozen_bytes <= old_pos && "frozen bytes should increase monotonically");
4870 yjit_codepage_frozen_bytes = old_pos;
4872 cb_mark_all_executable(ocb);
4873 cb_mark_all_executable(cb);
4874 RB_VM_LOCK_LEAVE();
4877 static int
4878 tracing_invalidate_all_i(void *vstart, void *vend, size_t stride, void *data)
4880 VALUE v = (VALUE)vstart;
4881 for (; v != (VALUE)vend; v += stride) {
4882 void *ptr = asan_poisoned_object_p(v);
4883 asan_unpoison_object(v, false);
4885 if (rb_obj_is_iseq(v)) {
4886 rb_iseq_t *iseq = (rb_iseq_t *)v;
4887 invalidate_all_blocks_for_tracing(iseq);
4890 asan_poison_object_if(ptr, v);
4892 return 0;
4895 static void
4896 invalidate_all_blocks_for_tracing(const rb_iseq_t *iseq)
4898 struct rb_iseq_constant_body *body = iseq->body;
4899 if (!body) return; // iseq yet to be initialized
4901 ASSERT_vm_locking();
4903 // Empty all blocks on the iseq so we don't compile new blocks that jump to the
4904 // invalidted region.
4905 // TODO Leaking the blocks for now since we might have situations where
4906 // a different ractor is waiting in branch_stub_hit(). If we free the block
4907 // that ractor can wake up with a dangling block.
4908 rb_darray_for(body->yjit_blocks, version_array_idx) {
4909 rb_yjit_block_array_t version_array = rb_darray_get(body->yjit_blocks, version_array_idx);
4910 rb_darray_for(version_array, version_idx) {
4911 // Stop listening for invalidation events like basic operation redefinition.
4912 block_t *block = rb_darray_get(version_array, version_idx);
4913 yjit_unlink_method_lookup_dependency(block);
4914 yjit_block_assumptions_free(block);
4916 rb_darray_free(version_array);
4918 rb_darray_free(body->yjit_blocks);
4919 body->yjit_blocks = NULL;
4921 #if USE_MJIT
4922 // Reset output code entry point
4923 body->jit_func = NULL;
4924 #endif
4927 static void
4928 yjit_reg_op(int opcode, codegen_fn gen_fn)
4930 RUBY_ASSERT(opcode >= 0 && opcode < VM_INSTRUCTION_SIZE);
4931 // Check that the op wasn't previously registered
4932 RUBY_ASSERT(gen_fns[opcode] == NULL);
4934 gen_fns[opcode] = gen_fn;
4937 void
4938 yjit_init_codegen(void)
4940 // Initialize the code blocks
4941 uint32_t mem_size = rb_yjit_opts.exec_mem_size * 1024 * 1024;
4942 uint8_t *mem_block = alloc_exec_mem(mem_size);
4944 cb = &block;
4945 cb_init(cb, mem_block, mem_size/2);
4947 ocb = &outline_block;
4948 cb_init(ocb, mem_block + mem_size/2, mem_size/2);
4950 // Generate the interpreter exit code for leave
4951 leave_exit_code = yjit_gen_leave_exit(cb);
4953 // Generate full exit code for C func
4954 gen_full_cfunc_return();
4955 cb_mark_all_executable(cb);
4957 // Map YARV opcodes to the corresponding codegen functions
4958 yjit_reg_op(BIN(nop), gen_nop);
4959 yjit_reg_op(BIN(dup), gen_dup);
4960 yjit_reg_op(BIN(dupn), gen_dupn);
4961 yjit_reg_op(BIN(swap), gen_swap);
4962 yjit_reg_op(BIN(setn), gen_setn);
4963 yjit_reg_op(BIN(topn), gen_topn);
4964 yjit_reg_op(BIN(pop), gen_pop);
4965 yjit_reg_op(BIN(adjuststack), gen_adjuststack);
4966 yjit_reg_op(BIN(newarray), gen_newarray);
4967 yjit_reg_op(BIN(duparray), gen_duparray);
4968 yjit_reg_op(BIN(duphash), gen_duphash);
4969 yjit_reg_op(BIN(splatarray), gen_splatarray);
4970 yjit_reg_op(BIN(expandarray), gen_expandarray);
4971 yjit_reg_op(BIN(newhash), gen_newhash);
4972 yjit_reg_op(BIN(newrange), gen_newrange);
4973 yjit_reg_op(BIN(concatstrings), gen_concatstrings);
4974 yjit_reg_op(BIN(putnil), gen_putnil);
4975 yjit_reg_op(BIN(putobject), gen_putobject);
4976 yjit_reg_op(BIN(putstring), gen_putstring);
4977 yjit_reg_op(BIN(putobject_INT2FIX_0_), gen_putobject_int2fix);
4978 yjit_reg_op(BIN(putobject_INT2FIX_1_), gen_putobject_int2fix);
4979 yjit_reg_op(BIN(putself), gen_putself);
4980 yjit_reg_op(BIN(putspecialobject), gen_putspecialobject);
4981 yjit_reg_op(BIN(getlocal), gen_getlocal);
4982 yjit_reg_op(BIN(getlocal_WC_0), gen_getlocal_wc0);
4983 yjit_reg_op(BIN(getlocal_WC_1), gen_getlocal_wc1);
4984 yjit_reg_op(BIN(setlocal), gen_setlocal);
4985 yjit_reg_op(BIN(setlocal_WC_0), gen_setlocal_wc0);
4986 yjit_reg_op(BIN(setlocal_WC_1), gen_setlocal_wc1);
4987 yjit_reg_op(BIN(getinstancevariable), gen_getinstancevariable);
4988 yjit_reg_op(BIN(setinstancevariable), gen_setinstancevariable);
4989 yjit_reg_op(BIN(defined), gen_defined);
4990 yjit_reg_op(BIN(checktype), gen_checktype);
4991 yjit_reg_op(BIN(checkkeyword), gen_checkkeyword);
4992 yjit_reg_op(BIN(opt_lt), gen_opt_lt);
4993 yjit_reg_op(BIN(opt_le), gen_opt_le);
4994 yjit_reg_op(BIN(opt_ge), gen_opt_ge);
4995 yjit_reg_op(BIN(opt_gt), gen_opt_gt);
4996 yjit_reg_op(BIN(opt_eq), gen_opt_eq);
4997 yjit_reg_op(BIN(opt_neq), gen_opt_neq);
4998 yjit_reg_op(BIN(opt_aref), gen_opt_aref);
4999 yjit_reg_op(BIN(opt_aset), gen_opt_aset);
5000 yjit_reg_op(BIN(opt_and), gen_opt_and);
5001 yjit_reg_op(BIN(opt_or), gen_opt_or);
5002 yjit_reg_op(BIN(opt_minus), gen_opt_minus);
5003 yjit_reg_op(BIN(opt_plus), gen_opt_plus);
5004 yjit_reg_op(BIN(opt_mult), gen_opt_mult);
5005 yjit_reg_op(BIN(opt_div), gen_opt_div);
5006 yjit_reg_op(BIN(opt_mod), gen_opt_mod);
5007 yjit_reg_op(BIN(opt_ltlt), gen_opt_ltlt);
5008 yjit_reg_op(BIN(opt_nil_p), gen_opt_nil_p);
5009 yjit_reg_op(BIN(opt_empty_p), gen_opt_empty_p);
5010 yjit_reg_op(BIN(opt_str_freeze), gen_opt_str_freeze);
5011 yjit_reg_op(BIN(opt_str_uminus), gen_opt_str_uminus);
5012 yjit_reg_op(BIN(opt_not), gen_opt_not);
5013 yjit_reg_op(BIN(opt_size), gen_opt_size);
5014 yjit_reg_op(BIN(opt_length), gen_opt_length);
5015 yjit_reg_op(BIN(opt_regexpmatch2), gen_opt_regexpmatch2);
5016 yjit_reg_op(BIN(opt_getinlinecache), gen_opt_getinlinecache);
5017 yjit_reg_op(BIN(invokebuiltin), gen_invokebuiltin);
5018 yjit_reg_op(BIN(opt_invokebuiltin_delegate), gen_opt_invokebuiltin_delegate);
5019 yjit_reg_op(BIN(opt_invokebuiltin_delegate_leave), gen_opt_invokebuiltin_delegate);
5020 yjit_reg_op(BIN(opt_case_dispatch), gen_opt_case_dispatch);
5021 yjit_reg_op(BIN(branchif), gen_branchif);
5022 yjit_reg_op(BIN(branchunless), gen_branchunless);
5023 yjit_reg_op(BIN(branchnil), gen_branchnil);
5024 yjit_reg_op(BIN(jump), gen_jump);
5025 yjit_reg_op(BIN(getblockparamproxy), gen_getblockparamproxy);
5026 yjit_reg_op(BIN(opt_send_without_block), gen_opt_send_without_block);
5027 yjit_reg_op(BIN(send), gen_send);
5028 yjit_reg_op(BIN(invokesuper), gen_invokesuper);
5029 yjit_reg_op(BIN(leave), gen_leave);
5030 yjit_reg_op(BIN(getglobal), gen_getglobal);
5031 yjit_reg_op(BIN(setglobal), gen_setglobal);
5032 yjit_reg_op(BIN(anytostring), gen_anytostring);
5033 yjit_reg_op(BIN(objtostring), gen_objtostring);
5034 yjit_reg_op(BIN(toregexp), gen_toregexp);
5035 yjit_reg_op(BIN(intern), gen_intern);
5036 yjit_reg_op(BIN(getspecial), gen_getspecial);
5037 yjit_reg_op(BIN(getclassvariable), gen_getclassvariable);
5038 yjit_reg_op(BIN(setclassvariable), gen_setclassvariable);
5040 yjit_method_codegen_table = st_init_numtable();
5042 // Specialization for C methods. See yjit_reg_method() for details.
5043 yjit_reg_method(rb_cBasicObject, "!", jit_rb_obj_not);
5045 yjit_reg_method(rb_cNilClass, "nil?", jit_rb_true);
5046 yjit_reg_method(rb_mKernel, "nil?", jit_rb_false);
5048 yjit_reg_method(rb_cBasicObject, "==", jit_rb_obj_equal);
5049 yjit_reg_method(rb_cBasicObject, "equal?", jit_rb_obj_equal);
5050 yjit_reg_method(rb_mKernel, "eql?", jit_rb_obj_equal);
5051 yjit_reg_method(rb_cModule, "==", jit_rb_obj_equal);
5052 yjit_reg_method(rb_cSymbol, "==", jit_rb_obj_equal);
5053 yjit_reg_method(rb_cSymbol, "===", jit_rb_obj_equal);
5055 // rb_str_to_s() methods in string.c
5056 yjit_reg_method(rb_cString, "to_s", jit_rb_str_to_s);
5057 yjit_reg_method(rb_cString, "to_str", jit_rb_str_to_s);
5058 yjit_reg_method(rb_cString, "bytesize", jit_rb_str_bytesize);
5060 // Thread.current
5061 yjit_reg_method(rb_singleton_class(rb_cThread), "current", jit_thread_s_current);