1 /**********************************************************************
3 rjit_c.c - C helpers for RJIT
5 Copyright (C) 2017 Takashi Kokubun <k0kubun@ruby-lang.org>.
7 **********************************************************************/
9 #include "rjit.h" // defines USE_RJIT
14 #include "include/ruby/assert.h"
15 #include "include/ruby/debug.h"
17 #include "internal/compile.h"
18 #include "internal/fixnum.h"
19 #include "internal/hash.h"
20 #include "internal/sanitizers.h"
21 #include "internal/gc.h"
22 #include "internal/proc.h"
24 #include "vm_insnhelper.h"
26 #include "probes_helper.h"
29 #include "insns_info.inc"
31 // For mmapp(), sysconf()
39 #if defined(MAP_FIXED_NOREPLACE) && defined(_SC_PAGESIZE)
40 // Align the current write position to a multiple of bytes
42 align_ptr(uint8_t *ptr
, uint32_t multiple
)
44 // Compute the pointer modulo the given alignment boundary
45 uint32_t rem
= ((uint32_t)(uintptr_t)ptr
) % multiple
;
47 // If the pointer is already aligned, stop
51 // Pad the pointer by the necessary amount to align it
52 uint32_t pad
= multiple
- rem
;
58 // Address space reservation. Memory pages are mapped on an as needed basis.
59 // See the Rust mm module for details.
61 rjit_reserve_addr_space(uint32_t mem_size
)
67 #if defined(MAP_FIXED_NOREPLACE) && defined(_SC_PAGESIZE)
68 uint32_t const page_size
= (uint32_t)sysconf(_SC_PAGESIZE
);
69 uint8_t *const cfunc_sample_addr
= (void *)&rjit_reserve_addr_space
;
70 uint8_t *const probe_region_end
= cfunc_sample_addr
+ INT32_MAX
;
71 // Align the requested address to page size
72 uint8_t *req_addr
= align_ptr(cfunc_sample_addr
, page_size
);
74 // Probe for addresses close to this function using MAP_FIXED_NOREPLACE
75 // to improve odds of being in range for 32-bit relative call instructions.
81 MAP_PRIVATE
| MAP_ANONYMOUS
| MAP_FIXED_NOREPLACE
,
86 // If we succeeded, stop
87 if (mem_block
!= MAP_FAILED
) {
92 req_addr
+= 4 * 1024 * 1024;
93 } while (req_addr
< probe_region_end
);
95 // On MacOS and other platforms
97 // Try to map a chunk of memory as executable
99 (void *)rjit_reserve_addr_space
,
102 MAP_PRIVATE
| MAP_ANONYMOUS
,
109 if (mem_block
== MAP_FAILED
) {
110 // Try again without the address hint (e.g., valgrind)
115 MAP_PRIVATE
| MAP_ANONYMOUS
,
121 // Check that the memory mapping was successful
122 if (mem_block
== MAP_FAILED
) {
123 perror("ruby: yjit: mmap:");
124 if(errno
== ENOMEM
) {
125 // No crash report if it's only insufficient memory
128 rb_bug("mmap failed");
133 // Windows not supported for now
139 mprotect_write(rb_execution_context_t
*ec
, VALUE self
, VALUE rb_mem_block
, VALUE rb_mem_size
)
141 void *mem_block
= (void *)NUM2SIZET(rb_mem_block
);
142 uint32_t mem_size
= NUM2UINT(rb_mem_size
);
143 return RBOOL(mprotect(mem_block
, mem_size
, PROT_READ
| PROT_WRITE
) == 0);
147 mprotect_exec(rb_execution_context_t
*ec
, VALUE self
, VALUE rb_mem_block
, VALUE rb_mem_size
)
149 void *mem_block
= (void *)NUM2SIZET(rb_mem_block
);
150 uint32_t mem_size
= NUM2UINT(rb_mem_size
);
151 if (mem_size
== 0) return Qfalse
; // Some platforms return an error for mem_size 0.
153 if (mprotect(mem_block
, mem_size
, PROT_READ
| PROT_EXEC
)) {
154 rb_bug("Couldn't make JIT page (%p, %lu bytes) executable, errno: %s",
155 mem_block
, (unsigned long)mem_size
, strerror(errno
));
161 rjit_optimized_call(VALUE
*recv
, rb_execution_context_t
*ec
, int argc
, VALUE
*argv
, int kw_splat
, VALUE block_handler
)
164 GetProcPtr(recv
, proc
);
165 return rb_vm_invoke_proc(ec
, proc
, argc
, argv
, kw_splat
, block_handler
);
169 rjit_str_neq_internal(VALUE str1
, VALUE str2
)
171 return rb_str_eql_internal(str1
, str2
) == Qtrue
? Qfalse
: Qtrue
;
175 rjit_str_simple_append(VALUE str1
, VALUE str2
)
177 return rb_str_cat(str1
, RSTRING_PTR(str2
), RSTRING_LEN(str2
));
181 rjit_rb_ary_subseq_length(VALUE ary
, long beg
)
183 long len
= RARRAY_LEN(ary
);
184 return rb_ary_subseq(ary
, beg
, len
);
188 rjit_build_kwhash(const struct rb_callinfo
*ci
, VALUE
*sp
)
190 const struct rb_callinfo_kwarg
*kw_arg
= vm_ci_kwarg(ci
);
191 int kw_len
= kw_arg
->keyword_len
;
192 VALUE hash
= rb_hash_new_with_size(kw_len
);
194 for (int i
= 0; i
< kw_len
; i
++) {
195 VALUE key
= kw_arg
->keywords
[i
];
196 VALUE val
= *(sp
- kw_len
+ i
);
197 rb_hash_aset(hash
, key
, val
);
202 // The code we generate in gen_send_cfunc() doesn't fire the c_return TracePoint event
203 // like the interpreter. When tracing for c_return is enabled, we patch the code after
204 // the C method return to call into this to fire the event.
206 rjit_full_cfunc_return(rb_execution_context_t
*ec
, VALUE return_value
)
208 rb_control_frame_t
*cfp
= ec
->cfp
;
209 RUBY_ASSERT_ALWAYS(cfp
== GET_EC()->cfp
);
210 const rb_callable_method_entry_t
*me
= rb_vm_frame_method_entry(cfp
);
212 RUBY_ASSERT_ALWAYS(RUBYVM_CFUNC_FRAME_P(cfp
));
213 RUBY_ASSERT_ALWAYS(me
->def
->type
== VM_METHOD_TYPE_CFUNC
);
215 // CHECK_CFP_CONSISTENCY("full_cfunc_return"); TODO revive this
217 // Pop the C func's frame and fire the c_return TracePoint event
218 // Note that this is the same order as vm_call_cfunc_with_frame().
220 EXEC_EVENT_HOOK(ec
, RUBY_EVENT_C_RETURN
, cfp
->self
, me
->def
->original_id
, me
->called_id
, me
->owner
, return_value
);
221 // Note, this deviates from the interpreter in that users need to enable
222 // a c_return TracePoint for this DTrace hook to work. A reasonable change
223 // since the Ruby return event works this way as well.
224 RUBY_DTRACE_CMETHOD_RETURN_HOOK(ec
, me
->owner
, me
->def
->original_id
);
226 // Push return value into the caller's stack. We know that it's a frame that
227 // uses cfp->sp because we are patching a call done with gen_send_cfunc().
228 ec
->cfp
->sp
[0] = return_value
;
233 rjit_get_proc_ptr(VALUE procv
)
236 GetProcPtr(procv
, proc
);
240 // Use the same buffer size as Stackprof.
241 #define BUFF_LEN 2048
243 extern VALUE rb_rjit_raw_samples
;
244 extern VALUE rb_rjit_line_samples
;
247 rjit_record_exit_stack(const VALUE
*exit_pc
)
249 // Let Primitive.rjit_stop_stats stop this
250 if (!rb_rjit_call_p
) return;
252 // Get the opcode from the encoded insn handler at this PC
253 int insn
= rb_vm_insn_addr2opcode((void *)*exit_pc
);
255 // Create 2 array buffers to be used to collect frames and lines.
256 VALUE frames_buffer
[BUFF_LEN
] = { 0 };
257 int lines_buffer
[BUFF_LEN
] = { 0 };
259 // Records call frame and line information for each method entry into two
260 // temporary buffers. Returns the number of times we added to the buffer (ie
261 // the length of the stack).
263 // Call frame info is stored in the frames_buffer, line number information
264 // in the lines_buffer. The first argument is the start point and the second
265 // argument is the buffer limit, set at 2048.
266 int stack_length
= rb_profile_frames(0, BUFF_LEN
, frames_buffer
, lines_buffer
);
267 int samples_length
= stack_length
+ 3; // 3: length, insn, count
269 // If yjit_raw_samples is less than or equal to the current length of the samples
270 // we might have seen this stack trace previously.
271 int prev_stack_len_index
= (int)RARRAY_LEN(rb_rjit_raw_samples
) - samples_length
;
272 VALUE prev_stack_len_obj
;
273 if (RARRAY_LEN(rb_rjit_raw_samples
) >= samples_length
&& FIXNUM_P(prev_stack_len_obj
= RARRAY_AREF(rb_rjit_raw_samples
, prev_stack_len_index
))) {
274 int prev_stack_len
= NUM2INT(prev_stack_len_obj
);
275 int idx
= stack_length
- 1;
276 int prev_frame_idx
= 0;
277 bool seen_already
= true;
279 // If the previous stack length and current stack length are equal,
280 // loop and compare the current frame to the previous frame. If they are
281 // not equal, set seen_already to false and break out of the loop.
282 if (prev_stack_len
== stack_length
) {
284 VALUE current_frame
= frames_buffer
[idx
];
285 VALUE prev_frame
= RARRAY_AREF(rb_rjit_raw_samples
, prev_stack_len_index
+ prev_frame_idx
+ 1);
287 // If the current frame and previous frame are not equal, set
288 // seen_already to false and break out of the loop.
289 if (current_frame
!= prev_frame
) {
290 seen_already
= false;
298 // If we know we've seen this stack before, increment the counter by 1.
300 int prev_idx
= (int)RARRAY_LEN(rb_rjit_raw_samples
) - 1;
301 int prev_count
= NUM2INT(RARRAY_AREF(rb_rjit_raw_samples
, prev_idx
));
302 int new_count
= prev_count
+ 1;
304 rb_ary_store(rb_rjit_raw_samples
, prev_idx
, INT2NUM(new_count
));
305 rb_ary_store(rb_rjit_line_samples
, prev_idx
, INT2NUM(new_count
));
311 rb_ary_push(rb_rjit_raw_samples
, INT2NUM(stack_length
));
312 rb_ary_push(rb_rjit_line_samples
, INT2NUM(stack_length
));
314 int idx
= stack_length
- 1;
317 VALUE frame
= frames_buffer
[idx
];
318 int line
= lines_buffer
[idx
];
320 rb_ary_push(rb_rjit_raw_samples
, frame
);
321 rb_ary_push(rb_rjit_line_samples
, INT2NUM(line
));
326 // Push the insn value into the yjit_raw_samples Vec.
327 rb_ary_push(rb_rjit_raw_samples
, INT2NUM(insn
));
329 // Push the current line onto the yjit_line_samples Vec. This
330 // points to the line in insns.def.
331 int line
= (int)RARRAY_LEN(rb_rjit_line_samples
) - 1;
332 rb_ary_push(rb_rjit_line_samples
, INT2NUM(line
));
334 // Push number of times seen onto the stack, which is 1
335 // because it's the first time we've seen it.
336 rb_ary_push(rb_rjit_raw_samples
, INT2NUM(1));
337 rb_ary_push(rb_rjit_line_samples
, INT2NUM(1));
340 // For a given raw_sample (frame), set the hash with the caller's
341 // name, file, and line number. Return the hash with collected frame_info.
343 rjit_add_frame(VALUE hash
, VALUE frame
)
345 VALUE frame_id
= SIZET2NUM(frame
);
347 if (RTEST(rb_hash_aref(hash
, frame_id
))) {
351 VALUE frame_info
= rb_hash_new();
352 // Full label for the frame
353 VALUE name
= rb_profile_frame_full_label(frame
);
354 // Absolute path of the frame from rb_iseq_realpath
355 VALUE file
= rb_profile_frame_absolute_path(frame
);
356 // Line number of the frame
357 VALUE line
= rb_profile_frame_first_lineno(frame
);
359 // If absolute path isn't available use the rb_iseq_path
361 file
= rb_profile_frame_path(frame
);
364 rb_hash_aset(frame_info
, ID2SYM(rb_intern("name")), name
);
365 rb_hash_aset(frame_info
, ID2SYM(rb_intern("file")), file
);
366 rb_hash_aset(frame_info
, ID2SYM(rb_intern("samples")), INT2NUM(0));
367 rb_hash_aset(frame_info
, ID2SYM(rb_intern("total_samples")), INT2NUM(0));
368 rb_hash_aset(frame_info
, ID2SYM(rb_intern("edges")), rb_hash_new());
369 rb_hash_aset(frame_info
, ID2SYM(rb_intern("lines")), rb_hash_new());
371 if (line
!= INT2FIX(0)) {
372 rb_hash_aset(frame_info
, ID2SYM(rb_intern("line")), line
);
375 rb_hash_aset(hash
, frame_id
, frame_info
);
380 rjit_exit_traces(void)
382 int samples_len
= (int)RARRAY_LEN(rb_rjit_raw_samples
);
383 RUBY_ASSERT(samples_len
== RARRAY_LEN(rb_rjit_line_samples
));
385 VALUE result
= rb_hash_new();
386 VALUE raw_samples
= rb_ary_new_capa(samples_len
);
387 VALUE line_samples
= rb_ary_new_capa(samples_len
);
388 VALUE frames
= rb_hash_new();
391 // While the index is less than samples_len, parse yjit_raw_samples and
392 // yjit_line_samples, then add casted values to raw_samples and line_samples array.
393 while (idx
< samples_len
) {
394 int num
= NUM2INT(RARRAY_AREF(rb_rjit_raw_samples
, idx
));
395 int line_num
= NUM2INT(RARRAY_AREF(rb_rjit_line_samples
, idx
));
398 rb_ary_push(raw_samples
, SIZET2NUM(num
));
399 rb_ary_push(line_samples
, INT2NUM(line_num
));
401 // Loop through the length of samples_len and add data to the
402 // frames hash. Also push the current value onto the raw_samples
403 // and line_samples array respectively.
404 for (int o
= 0; o
< num
; o
++) {
405 rjit_add_frame(frames
, RARRAY_AREF(rb_rjit_raw_samples
, idx
));
406 rb_ary_push(raw_samples
, SIZET2NUM(RARRAY_AREF(rb_rjit_raw_samples
, idx
)));
407 rb_ary_push(line_samples
, RARRAY_AREF(rb_rjit_line_samples
, idx
));
411 // insn BIN and lineno
412 rb_ary_push(raw_samples
, RARRAY_AREF(rb_rjit_raw_samples
, idx
));
413 rb_ary_push(line_samples
, RARRAY_AREF(rb_rjit_line_samples
, idx
));
416 // Number of times seen
417 rb_ary_push(raw_samples
, RARRAY_AREF(rb_rjit_raw_samples
, idx
));
418 rb_ary_push(line_samples
, RARRAY_AREF(rb_rjit_line_samples
, idx
));
422 // Set add the raw_samples, line_samples, and frames to the results
424 rb_hash_aset(result
, ID2SYM(rb_intern("raw")), raw_samples
);
425 rb_hash_aset(result
, ID2SYM(rb_intern("lines")), line_samples
);
426 rb_hash_aset(result
, ID2SYM(rb_intern("frames")), frames
);
431 // An offsetof implementation that works for unnamed struct and union.
432 // Multiplying 8 for compatibility with libclang's offsetof.
433 #define OFFSETOF(ptr, member) RB_SIZE2NUM(((char *)&ptr.member - (char*)&ptr) * 8)
435 #define SIZEOF(type) RB_SIZE2NUM(sizeof(type))
436 #define SIGNED_TYPE_P(type) RBOOL((type)(-1) < (type)(1))
438 // Insn side exit counters
439 static size_t rjit_insn_exits
[VM_INSTRUCTION_SIZE
] = { 0 };
441 // macOS: brew install capstone
442 // Ubuntu/Debian: apt-get install libcapstone-dev
443 // Fedora: dnf -y install capstone-devel
444 #ifdef HAVE_LIBCAPSTONE
445 #include <capstone/capstone.h>
448 // Return an array of [address, mnemonic, op_str]
450 dump_disasm(rb_execution_context_t
*ec
, VALUE self
, VALUE from
, VALUE to
, VALUE test
)
452 VALUE result
= rb_ary_new();
453 #ifdef HAVE_LIBCAPSTONE
454 // Prepare for calling cs_disasm
456 if (cs_open(CS_ARCH_X86
, CS_MODE_64
, &handle
) != CS_ERR_OK
) {
457 rb_raise(rb_eRuntimeError
, "failed to make Capstone handle");
459 size_t from_addr
= NUM2SIZET(from
);
460 size_t to_addr
= NUM2SIZET(to
);
462 // Call cs_disasm and convert results to a Ruby array
464 size_t base_addr
= RTEST(test
) ? 0 : from_addr
; // On tests, start from 0 for output stability.
465 size_t count
= cs_disasm(handle
, (const uint8_t *)from_addr
, to_addr
- from_addr
, base_addr
, 0, &insns
);
466 for (size_t i
= 0; i
< count
; i
++) {
467 VALUE vals
= rb_ary_new_from_args(3, LONG2NUM(insns
[i
].address
), rb_str_new2(insns
[i
].mnemonic
), rb_str_new2(insns
[i
].op_str
));
468 rb_ary_push(result
, vals
);
471 // Free memory used by capstone
472 cs_free(insns
, count
);
478 // Same as `RubyVM::RJIT.enabled?`, but this is used before it's defined.
480 rjit_enabled_p(rb_execution_context_t
*ec
, VALUE self
)
482 return RBOOL(rb_rjit_enabled
);
486 for_each_iseq_i(void *vstart
, void *vend
, size_t stride
, void *data
)
488 VALUE block
= (VALUE
)data
;
489 VALUE v
= (VALUE
)vstart
;
490 for (; v
!= (VALUE
)vend
; v
+= stride
) {
491 void *ptr
= asan_poisoned_object_p(v
);
492 asan_unpoison_object(v
, false);
494 if (rb_obj_is_iseq(v
)) {
495 extern VALUE
rb_rjit_iseq_new(rb_iseq_t
*iseq
);
496 rb_iseq_t
*iseq
= (rb_iseq_t
*)v
;
497 rb_funcall(block
, rb_intern("call"), 1, rb_rjit_iseq_new(iseq
));
500 asan_poison_object_if(ptr
, v
);
506 rjit_for_each_iseq(rb_execution_context_t
*ec
, VALUE self
, VALUE block
)
508 rb_objspace_each_objects(for_each_iseq_i
, (void *)block
);
512 // bindgen references
513 extern ID
rb_get_symbol_id(VALUE name
);
514 extern VALUE
rb_fix_aref(VALUE fix
, VALUE idx
);
515 extern VALUE
rb_str_getbyte(VALUE str
, VALUE index
);
516 extern VALUE
rb_vm_concat_array(VALUE ary1
, VALUE ary2st
);
517 extern VALUE
rb_vm_get_ev_const(rb_execution_context_t
*ec
, VALUE orig_klass
, ID id
, VALUE allow_nil
);
518 extern VALUE
rb_vm_getclassvariable(const rb_iseq_t
*iseq
, const rb_control_frame_t
*cfp
, ID id
, ICVARC ic
);
519 extern VALUE
rb_vm_opt_newarray_min(rb_execution_context_t
*ec
, rb_num_t num
, const VALUE
*ptr
);
520 extern VALUE
rb_vm_opt_newarray_max(rb_execution_context_t
*ec
, rb_num_t num
, const VALUE
*ptr
);
521 extern VALUE
rb_vm_opt_newarray_hash(rb_execution_context_t
*ec
, rb_num_t num
, const VALUE
*ptr
);
522 extern VALUE
rb_vm_splat_array(VALUE flag
, VALUE array
);
523 extern bool rb_simple_iseq_p(const rb_iseq_t
*iseq
);
524 extern bool rb_vm_defined(rb_execution_context_t
*ec
, rb_control_frame_t
*reg_cfp
, rb_num_t op_type
, VALUE obj
, VALUE v
);
525 extern bool rb_vm_ic_hit_p(IC ic
, const VALUE
*reg_ep
);
526 extern rb_event_flag_t rb_rjit_global_events
;
527 extern void rb_vm_setinstancevariable(const rb_iseq_t
*iseq
, VALUE obj
, ID id
, VALUE val
, IVC ic
);
528 extern VALUE
rb_vm_throw(const rb_execution_context_t
*ec
, rb_control_frame_t
*reg_cfp
, rb_num_t throw_state
, VALUE throwobj
);
529 extern VALUE
rb_reg_new_ary(VALUE ary
, int opt
);
530 extern void rb_vm_setclassvariable(const rb_iseq_t
*iseq
, const rb_control_frame_t
*cfp
, ID id
, VALUE val
, ICVARC ic
);
531 extern VALUE
rb_str_bytesize(VALUE str
);
532 extern const rb_callable_method_entry_t
*rb_callable_method_entry_or_negative(VALUE klass
, ID mid
);
533 extern VALUE
rb_vm_yield_with_cfunc(rb_execution_context_t
*ec
, const struct rb_captured_block
*captured
, int argc
, const VALUE
*argv
);
534 extern VALUE
rb_vm_set_ivar_id(VALUE obj
, ID id
, VALUE val
);
535 extern VALUE
rb_ary_unshift_m(int argc
, VALUE
*argv
, VALUE ary
);
536 extern void* rb_rjit_entry_stub_hit(VALUE branch_stub
);
537 extern void* rb_rjit_branch_stub_hit(VALUE branch_stub
, int sp_offset
, int target0_p
);
538 extern uint64_t rb_vm_insns_count
;
540 #include "rjit_c.rbinc"