[ruby/etc] bump up to 1.3.1
[ruby-80x24.org.git] / mjit_worker.c
blob879237eacca9d3970649ef16caaf07c9daeca36f
1 /**********************************************************************
3 mjit_worker.c - Worker for MRI method JIT compiler
5 Copyright (C) 2017 Vladimir Makarov <vmakarov@redhat.com>.
7 **********************************************************************/
9 // NOTE: All functions in this file are executed on MJIT worker. So don't
10 // call Ruby methods (C functions that may call rb_funcall) or trigger
11 // GC (using ZALLOC, xmalloc, xfree, etc.) in this file.
13 /* However, note that calling `free` for resources `xmalloc`-ed in mjit.c,
14 which is currently done in some places, is sometimes problematic in the
15 following situations:
17 * malloc library could be different between interpreter and extensions
18 on Windows (perhaps not applicable to MJIT because CC is the same)
19 * xmalloc -> free leaks extra space used for USE_GC_MALLOC_OBJ_INFO_DETAILS
20 (not enabled by default)
22 ...in short, it's usually not a problem in MJIT. But maybe it's worth
23 fixing for consistency or for USE_GC_MALLOC_OBJ_INFO_DETAILS support.
26 /* We utilize widely used C compilers (GCC and LLVM Clang) to
27 implement MJIT. We feed them a C code generated from ISEQ. The
28 industrial C compilers are slower than regular JIT engines.
29 Generated code performance of the used C compilers has a higher
30 priority over the compilation speed.
32 So our major goal is to minimize the ISEQ compilation time when we
33 use widely optimization level (-O2). It is achieved by
35 o Using a precompiled version of the header
36 o Keeping all files in `/tmp`. On modern Linux `/tmp` is a file
37 system in memory. So it is pretty fast
38 o Implementing MJIT as a multi-threaded code because we want to
39 compile ISEQs in parallel with iseq execution to speed up Ruby
40 code execution. MJIT has one thread (*worker*) to do
41 parallel compilations:
42 o It prepares a precompiled code of the minimized header.
43 It starts at the MRI execution start
44 o It generates PIC object files of ISEQs
45 o It takes one JIT unit from a priority queue unless it is empty.
46 o It translates the JIT unit ISEQ into C-code using the precompiled
47 header, calls CC and load PIC code when it is ready
48 o Currently MJIT put ISEQ in the queue when ISEQ is called
49 o MJIT can reorder ISEQs in the queue if some ISEQ has been called
50 many times and its compilation did not start yet
51 o MRI reuses the machine code if it already exists for ISEQ
52 o The machine code we generate can stop and switch to the ISEQ
53 interpretation if some condition is not satisfied as the machine
54 code can be speculative or some exception raises
55 o Speculative machine code can be canceled.
57 Here is a diagram showing the MJIT organization:
59 _______
60 |header |
61 |_______|
62 | MRI building
63 --------------|----------------------------------------
64 | MRI execution
66 _____________|_____
67 | | |
68 | ___V__ | CC ____________________
69 | | |----------->| precompiled header |
70 | | | | |____________________|
71 | | | | |
72 | | MJIT | | |
73 | | | | |
74 | | | | ____V___ CC __________
75 | |______|----------->| C code |--->| .so file |
76 | | |________| |__________|
77 | | |
78 | | |
79 | MRI machine code |<-----------------------------
80 |___________________| loading
84 #ifdef __sun
85 #define __EXTENSIONS__ 1
86 #endif
88 #include "vm_core.h"
89 #include "vm_callinfo.h"
90 #include "mjit.h"
91 #include "gc.h"
92 #include "ruby_assert.h"
93 #include "ruby/debug.h"
94 #include "ruby/thread.h"
95 #include "ruby/version.h"
96 #include "builtin.h"
97 #include "insns.inc"
98 #include "insns_info.inc"
99 #include "internal/compile.h"
101 #ifdef _WIN32
102 #include <winsock2.h>
103 #include <windows.h>
104 #else
105 #include <sys/wait.h>
106 #include <sys/time.h>
107 #include <dlfcn.h>
108 #endif
109 #include <errno.h>
110 #ifdef HAVE_FCNTL_H
111 #include <fcntl.h>
112 #endif
113 #ifdef HAVE_SYS_PARAM_H
114 # include <sys/param.h>
115 #endif
116 #include "dln.h"
118 #include "ruby/util.h"
119 #undef strdup // ruby_strdup may trigger GC
121 #ifndef MAXPATHLEN
122 # define MAXPATHLEN 1024
123 #endif
125 #ifdef _WIN32
126 #define dlopen(name,flag) ((void*)LoadLibrary(name))
127 #define dlerror() strerror(rb_w32_map_errno(GetLastError()))
128 #define dlsym(handle,name) ((void*)GetProcAddress((handle),(name)))
129 #define dlclose(handle) (!FreeLibrary(handle))
130 #define RTLD_NOW -1
132 #define waitpid(pid,stat_loc,options) (WaitForSingleObject((HANDLE)(pid), INFINITE), GetExitCodeProcess((HANDLE)(pid), (LPDWORD)(stat_loc)), CloseHandle((HANDLE)pid), (pid))
133 #define WIFEXITED(S) ((S) != STILL_ACTIVE)
134 #define WEXITSTATUS(S) (S)
135 #define WIFSIGNALED(S) (0)
136 typedef intptr_t pid_t;
137 #endif
139 // Atomically set function pointer if possible.
140 #define MJIT_ATOMIC_SET(var, val) (void)ATOMIC_PTR_EXCHANGE(var, val)
142 #define MJIT_TMP_PREFIX "_ruby_mjit_"
144 // JIT compaction requires the header transformation because linking multiple .o files
145 // doesn't work without having `static` in the same function definitions. We currently
146 // don't support transforming the MJIT header on Windows.
147 #ifdef _WIN32
148 # define USE_JIT_COMPACTION 0
149 #else
150 # define USE_JIT_COMPACTION 1
151 #endif
153 // The unit structure that holds metadata of ISeq for MJIT.
154 struct rb_mjit_unit {
155 struct list_node unode;
156 // Unique order number of unit.
157 int id;
158 // Dlopen handle of the loaded object file.
159 void *handle;
160 rb_iseq_t *iseq;
161 #if defined(_WIN32)
162 // DLL cannot be removed while loaded on Windows. If this is set, it'll be lazily deleted.
163 char *so_file;
164 #endif
165 // Only used by unload_units. Flag to check this unit is currently on stack or not.
166 bool used_code_p;
167 // True if this is still in active_units but it's to be lazily removed
168 bool stale_p;
169 // mjit_compile's optimization switches
170 struct rb_mjit_compile_info compile_info;
171 // captured CC values, they should be marked with iseq.
172 const struct rb_callcache **cc_entries;
173 unsigned int cc_entries_size; // iseq->body->ci_size + ones of inlined iseqs
176 // Linked list of struct rb_mjit_unit.
177 struct rb_mjit_unit_list {
178 struct list_head head;
179 int length; // the list length
182 extern void rb_native_mutex_lock(rb_nativethread_lock_t *lock);
183 extern void rb_native_mutex_unlock(rb_nativethread_lock_t *lock);
184 extern void rb_native_mutex_initialize(rb_nativethread_lock_t *lock);
185 extern void rb_native_mutex_destroy(rb_nativethread_lock_t *lock);
187 extern void rb_native_cond_initialize(rb_nativethread_cond_t *cond);
188 extern void rb_native_cond_destroy(rb_nativethread_cond_t *cond);
189 extern void rb_native_cond_signal(rb_nativethread_cond_t *cond);
190 extern void rb_native_cond_broadcast(rb_nativethread_cond_t *cond);
191 extern void rb_native_cond_wait(rb_nativethread_cond_t *cond, rb_nativethread_lock_t *mutex);
193 // process.c
194 extern rb_pid_t ruby_waitpid_locked(rb_vm_t *, rb_pid_t, int *status, int options, rb_nativethread_cond_t *cond);
196 // A copy of MJIT portion of MRI options since MJIT initialization. We
197 // need them as MJIT threads still can work when the most MRI data were
198 // freed.
199 struct mjit_options mjit_opts;
201 // true if MJIT is enabled.
202 bool mjit_enabled = false;
203 // true if JIT-ed code should be called. When `ruby_vm_event_enabled_global_flags & ISEQ_TRACE_EVENTS`
204 // and `mjit_call_p == false`, any JIT-ed code execution is cancelled as soon as possible.
205 bool mjit_call_p = false;
207 // Priority queue of iseqs waiting for JIT compilation.
208 // This variable is a pointer to head unit of the queue.
209 static struct rb_mjit_unit_list unit_queue = { LIST_HEAD_INIT(unit_queue.head) };
210 // List of units which are successfully compiled.
211 static struct rb_mjit_unit_list active_units = { LIST_HEAD_INIT(active_units.head) };
212 // List of compacted so files which will be cleaned up by `free_list()` in `mjit_finish()`.
213 static struct rb_mjit_unit_list compact_units = { LIST_HEAD_INIT(compact_units.head) };
214 // List of units before recompilation and just waiting for dlclose().
215 static struct rb_mjit_unit_list stale_units = { LIST_HEAD_INIT(stale_units.head) };
216 // The number of so far processed ISEQs, used to generate unique id.
217 static int current_unit_num;
218 // A mutex for conitionals and critical sections.
219 static rb_nativethread_lock_t mjit_engine_mutex;
220 // A thread conditional to wake up `mjit_finish` at the end of PCH thread.
221 static rb_nativethread_cond_t mjit_pch_wakeup;
222 // A thread conditional to wake up the client if there is a change in
223 // executed unit status.
224 static rb_nativethread_cond_t mjit_client_wakeup;
225 // A thread conditional to wake up a worker if there we have something
226 // to add or we need to stop MJIT engine.
227 static rb_nativethread_cond_t mjit_worker_wakeup;
228 // A thread conditional to wake up workers if at the end of GC.
229 static rb_nativethread_cond_t mjit_gc_wakeup;
230 // Greater than 0 when GC is working.
231 static int in_gc = 0;
232 // True when JIT is working.
233 static bool in_jit = false;
234 // True when active_units has at least one stale_p=true unit.
235 static bool pending_stale_p = false;
236 // The times when unload_units is requested. unload_units is called after some requests.
237 static int unload_requests = 0;
238 // The total number of unloaded units.
239 static int total_unloads = 0;
240 // Set to true to stop worker.
241 static bool stop_worker_p;
242 // Set to true if worker is stopped.
243 static bool worker_stopped = true;
245 // Path of "/tmp", which can be changed to $TMP in MinGW.
246 static char *tmp_dir;
248 // Used C compiler path.
249 static const char *cc_path;
250 // Used C compiler flags.
251 static const char **cc_common_args;
252 // Used C compiler flags added by --mjit-debug=...
253 static char **cc_added_args;
254 // Name of the precompiled header file.
255 static char *pch_file;
256 // The process id which should delete the pch_file on mjit_finish.
257 static rb_pid_t pch_owner_pid;
258 // Status of the precompiled header creation. The status is
259 // shared by the workers and the pch thread.
260 static enum {PCH_NOT_READY, PCH_FAILED, PCH_SUCCESS} pch_status;
262 #ifndef _MSC_VER
263 // Name of the header file.
264 static char *header_file;
265 #endif
267 #ifdef _WIN32
268 // Linker option to enable libruby.
269 static char *libruby_pathflag;
270 #endif
272 #include "mjit_config.h"
274 #if defined(__GNUC__) && \
275 (!defined(__clang__) || \
276 (defined(__clang__) && (defined(__FreeBSD__) || defined(__GLIBC__))))
277 # define GCC_PIC_FLAGS "-Wfatal-errors", "-fPIC", "-shared", "-w", "-pipe",
278 # define MJIT_CFLAGS_PIPE 1
279 #else
280 # define GCC_PIC_FLAGS /* empty */
281 # define MJIT_CFLAGS_PIPE 0
282 #endif
284 // Use `-nodefaultlibs -nostdlib` for GCC where possible, which does not work on mingw, cygwin, AIX, and OpenBSD.
285 // This seems to improve MJIT performance on GCC.
286 #if defined __GNUC__ && !defined __clang__ && !defined(_WIN32) && !defined(__CYGWIN__) && !defined(_AIX) && !defined(__OpenBSD__)
287 # define GCC_NOSTDLIB_FLAGS "-nodefaultlibs", "-nostdlib",
288 #else
289 # define GCC_NOSTDLIB_FLAGS // empty
290 #endif
292 static const char *const CC_COMMON_ARGS[] = {
293 MJIT_CC_COMMON MJIT_CFLAGS GCC_PIC_FLAGS
294 NULL
297 static const char *const CC_DEBUG_ARGS[] = {MJIT_DEBUGFLAGS NULL};
298 static const char *const CC_OPTIMIZE_ARGS[] = {MJIT_OPTFLAGS NULL};
300 static const char *const CC_LDSHARED_ARGS[] = {MJIT_LDSHARED GCC_PIC_FLAGS NULL};
301 static const char *const CC_DLDFLAGS_ARGS[] = {MJIT_DLDFLAGS NULL};
302 // `CC_LINKER_ARGS` are linker flags which must be passed to `-c` as well.
303 static const char *const CC_LINKER_ARGS[] = {
304 #if defined __GNUC__ && !defined __clang__ && !defined(__OpenBSD__)
305 "-nostartfiles",
306 #endif
307 GCC_NOSTDLIB_FLAGS NULL
310 static const char *const CC_LIBS[] = {
311 #if defined(_WIN32) || defined(__CYGWIN__)
312 MJIT_LIBS // mswin, mingw, cygwin
313 #endif
314 #if defined __GNUC__ && !defined __clang__
315 # if defined(_WIN32)
316 "-lmsvcrt", // mingw
317 # endif
318 "-lgcc", // mingw, cygwin, and GCC platforms using `-nodefaultlibs -nostdlib`
319 #endif
320 #if defined __ANDROID__
321 "-lm", // to avoid 'cannot locate symbol "modf" referenced by .../_ruby_mjit_XXX.so"'
322 #endif
323 NULL
326 #define CC_CODEFLAG_ARGS (mjit_opts.debug ? CC_DEBUG_ARGS : CC_OPTIMIZE_ARGS)
328 // Print the arguments according to FORMAT to stderr only if MJIT
329 // verbose option value is more or equal to LEVEL.
330 PRINTF_ARGS(static void, 2, 3)
331 verbose(int level, const char *format, ...)
333 if (mjit_opts.verbose >= level) {
334 va_list args;
335 size_t len = strlen(format);
336 char *full_format = alloca(sizeof(char) * (len + 2));
338 // Creating `format + '\n'` to atomically print format and '\n'.
339 memcpy(full_format, format, len);
340 full_format[len] = '\n';
341 full_format[len+1] = '\0';
343 va_start(args, format);
344 vfprintf(stderr, full_format, args);
345 va_end(args);
349 PRINTF_ARGS(static void, 1, 2)
350 mjit_warning(const char *format, ...)
352 if (mjit_opts.warnings || mjit_opts.verbose) {
353 va_list args;
355 fprintf(stderr, "MJIT warning: ");
356 va_start(args, format);
357 vfprintf(stderr, format, args);
358 va_end(args);
359 fprintf(stderr, "\n");
363 // Add unit node to the tail of doubly linked `list`. It should be not in
364 // the list before.
365 static void
366 add_to_list(struct rb_mjit_unit *unit, struct rb_mjit_unit_list *list)
368 (void)RB_DEBUG_COUNTER_INC_IF(mjit_length_unit_queue, list == &unit_queue);
369 (void)RB_DEBUG_COUNTER_INC_IF(mjit_length_active_units, list == &active_units);
370 (void)RB_DEBUG_COUNTER_INC_IF(mjit_length_compact_units, list == &compact_units);
371 (void)RB_DEBUG_COUNTER_INC_IF(mjit_length_stale_units, list == &stale_units);
373 list_add_tail(&list->head, &unit->unode);
374 list->length++;
377 static void
378 remove_from_list(struct rb_mjit_unit *unit, struct rb_mjit_unit_list *list)
380 #if USE_DEBUG_COUNTER
381 rb_debug_counter_add(RB_DEBUG_COUNTER_mjit_length_unit_queue, -1, list == &unit_queue);
382 rb_debug_counter_add(RB_DEBUG_COUNTER_mjit_length_active_units, -1, list == &active_units);
383 rb_debug_counter_add(RB_DEBUG_COUNTER_mjit_length_compact_units, -1, list == &compact_units);
384 rb_debug_counter_add(RB_DEBUG_COUNTER_mjit_length_stale_units, -1, list == &stale_units);
385 #endif
387 list_del(&unit->unode);
388 list->length--;
391 static void
392 remove_file(const char *filename)
394 if (remove(filename)) {
395 mjit_warning("failed to remove \"%s\": %s", filename, strerror(errno));
399 // Lazily delete .so files.
400 static void
401 clean_temp_files(struct rb_mjit_unit *unit)
403 #if defined(_WIN32)
404 if (unit->so_file) {
405 char *so_file = unit->so_file;
407 unit->so_file = NULL;
408 // unit->so_file is set only when mjit_opts.save_temps is false.
409 remove_file(so_file);
410 free(so_file);
412 #endif
415 // This is called in the following situations:
416 // 1) On dequeue or `unload_units()`, associated ISeq is already GCed.
417 // 2) The unit is not called often and unloaded by `unload_units()`.
418 // 3) Freeing lists on `mjit_finish()`.
420 // `jit_func` value does not matter for 1 and 3 since the unit won't be used anymore.
421 // For the situation 2, this sets the ISeq's JIT state to NOT_COMPILED_JIT_ISEQ_FUNC
422 // to prevent the situation that the same methods are continuously compiled.
423 static void
424 free_unit(struct rb_mjit_unit *unit)
426 if (unit->iseq) { // ISeq is not GCed
427 unit->iseq->body->jit_func = (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC;
428 unit->iseq->body->jit_unit = NULL;
430 if (unit->cc_entries) {
431 void *entries = (void *)unit->cc_entries;
432 free(entries);
434 if (unit->handle && dlclose(unit->handle)) { // handle is NULL if it's in queue
435 mjit_warning("failed to close handle for u%d: %s", unit->id, dlerror());
437 clean_temp_files(unit);
438 free(unit);
441 // Start a critical section. Use message `msg` to print debug info at `level`.
442 static inline void
443 CRITICAL_SECTION_START(int level, const char *msg)
445 verbose(level, "Locking %s", msg);
446 rb_native_mutex_lock(&mjit_engine_mutex);
447 verbose(level, "Locked %s", msg);
450 // Finish the current critical section. Use message `msg` to print
451 // debug info at `level`.
452 static inline void
453 CRITICAL_SECTION_FINISH(int level, const char *msg)
455 verbose(level, "Unlocked %s", msg);
456 rb_native_mutex_unlock(&mjit_engine_mutex);
459 static int
460 sprint_uniq_filename(char *str, size_t size, unsigned long id, const char *prefix, const char *suffix)
462 return snprintf(str, size, "%s/%sp%"PRI_PIDT_PREFIX"uu%lu%s", tmp_dir, prefix, getpid(), id, suffix);
465 // Return time in milliseconds as a double.
466 #ifdef __APPLE__
467 double ruby_real_ms_time(void);
468 # define real_ms_time() ruby_real_ms_time()
469 #else
470 static double
471 real_ms_time(void)
473 # ifdef HAVE_CLOCK_GETTIME
474 struct timespec tv;
475 # ifdef CLOCK_MONOTONIC
476 const clockid_t c = CLOCK_MONOTONIC;
477 # else
478 const clockid_t c = CLOCK_REALTIME;
479 # endif
481 clock_gettime(c, &tv);
482 return tv.tv_nsec / 1000000.0 + tv.tv_sec * 1000.0;
483 # else
484 struct timeval tv;
486 gettimeofday(&tv, NULL);
487 return tv.tv_usec / 1000.0 + tv.tv_sec * 1000.0;
488 # endif
490 #endif
492 // Return the best unit from list. The best is the first
493 // high priority unit or the unit whose iseq has the biggest number
494 // of calls so far.
495 static struct rb_mjit_unit *
496 get_from_list(struct rb_mjit_unit_list *list)
498 while (in_gc) {
499 verbose(3, "Waiting wakeup from GC");
500 rb_native_cond_wait(&mjit_gc_wakeup, &mjit_engine_mutex);
502 in_jit = true; // Lock GC
504 // Find iseq with max total_calls
505 struct rb_mjit_unit *unit = NULL, *next, *best = NULL;
506 list_for_each_safe(&list->head, unit, next, unode) {
507 if (unit->iseq == NULL) { // ISeq is GCed.
508 remove_from_list(unit, list);
509 free_unit(unit);
510 continue;
513 if (best == NULL || best->iseq->body->total_calls < unit->iseq->body->total_calls) {
514 best = unit;
518 in_jit = false; // Unlock GC
519 verbose(3, "Sending wakeup signal to client in a mjit-worker for GC");
520 rb_native_cond_signal(&mjit_client_wakeup);
522 if (best) {
523 remove_from_list(best, list);
525 return best;
528 // Return length of NULL-terminated array `args` excluding the NULL marker.
529 static size_t
530 args_len(char *const *args)
532 size_t i;
534 for (i = 0; (args[i]) != NULL;i++)
536 return i;
539 // Concatenate `num` passed NULL-terminated arrays of strings, put the
540 // result (with NULL end marker) into the heap, and return the result.
541 static char **
542 form_args(int num, ...)
544 va_list argp;
545 size_t len, n;
546 int i;
547 char **args, **res, **tmp;
549 va_start(argp, num);
550 res = NULL;
551 for (i = len = 0; i < num; i++) {
552 args = va_arg(argp, char **);
553 n = args_len(args);
554 if ((tmp = (char **)realloc(res, sizeof(char *) * (len + n + 1))) == NULL) {
555 free(res);
556 res = NULL;
557 break;
559 res = tmp;
560 MEMCPY(res + len, args, char *, n + 1);
561 len += n;
563 va_end(argp);
564 return res;
567 COMPILER_WARNING_PUSH
568 #if __has_warning("-Wdeprecated-declarations") || RBIMPL_COMPILER_IS(GCC)
569 COMPILER_WARNING_IGNORED(-Wdeprecated-declarations)
570 #endif
571 // Start an OS process of absolute executable path with arguments `argv`.
572 // Return PID of the process.
573 static pid_t
574 start_process(const char *abspath, char *const *argv)
576 // Not calling non-async-signal-safe functions between vfork
577 // and execv for safety
578 int dev_null = rb_cloexec_open(ruby_null_device, O_WRONLY, 0);
579 if (dev_null < 0) {
580 verbose(1, "MJIT: Failed to open a null device: %s", strerror(errno));
581 return -1;
583 if (mjit_opts.verbose >= 2) {
584 const char *arg;
585 fprintf(stderr, "Starting process: %s", abspath);
586 for (int i = 0; (arg = argv[i]) != NULL; i++)
587 fprintf(stderr, " %s", arg);
588 fprintf(stderr, "\n");
591 pid_t pid;
592 #ifdef _WIN32
593 extern HANDLE rb_w32_start_process(const char *abspath, char *const *argv, int out_fd);
594 int out_fd = 0;
595 if (mjit_opts.verbose <= 1) {
596 // Discard cl.exe's outputs like:
597 // _ruby_mjit_p12u3.c
598 // Creating library C:.../_ruby_mjit_p12u3.lib and object C:.../_ruby_mjit_p12u3.exp
599 out_fd = dev_null;
602 pid = (pid_t)rb_w32_start_process(abspath, argv, out_fd);
603 if (pid == 0) {
604 verbose(1, "MJIT: Failed to create process: %s", dlerror());
605 return -1;
607 #else
608 if ((pid = vfork()) == 0) { /* TODO: reuse some function in process.c */
609 umask(0077);
610 if (mjit_opts.verbose == 0) {
611 // CC can be started in a thread using a file which has been
612 // already removed while MJIT is finishing. Discard the
613 // messages about missing files.
614 dup2(dev_null, STDERR_FILENO);
615 dup2(dev_null, STDOUT_FILENO);
617 (void)close(dev_null);
618 pid = execv(abspath, argv); // Pid will be negative on an error
619 // Even if we successfully found CC to compile PCH we still can
620 // fail with loading the CC in very rare cases for some reasons.
621 // Stop the forked process in this case.
622 verbose(1, "MJIT: Error in execv: %s", abspath);
623 _exit(1);
625 #endif
626 (void)close(dev_null);
627 return pid;
629 COMPILER_WARNING_POP
631 // Execute an OS process of executable PATH with arguments ARGV.
632 // Return -1 or -2 if failed to execute, otherwise exit code of the process.
633 // TODO: Use a similar function in process.c
634 static int
635 exec_process(const char *path, char *const argv[])
637 int stat, exit_code = -2;
638 rb_vm_t *vm = WAITPID_USE_SIGCHLD ? GET_VM() : 0;
639 rb_nativethread_cond_t cond;
641 if (vm) {
642 rb_native_cond_initialize(&cond);
643 rb_native_mutex_lock(&vm->waitpid_lock);
646 pid_t pid = start_process(path, argv);
647 for (;pid > 0;) {
648 pid_t r = vm ? ruby_waitpid_locked(vm, pid, &stat, 0, &cond)
649 : waitpid(pid, &stat, 0);
650 if (r == -1) {
651 if (errno == EINTR) continue;
652 fprintf(stderr, "[%"PRI_PIDT_PREFIX"d] waitpid(%lu): %s (SIGCHLD=%d,%u)\n",
653 getpid(), (unsigned long)pid, strerror(errno),
654 RUBY_SIGCHLD, SIGCHLD_LOSSY);
655 break;
657 else if (r == pid) {
658 if (WIFEXITED(stat)) {
659 exit_code = WEXITSTATUS(stat);
660 break;
662 else if (WIFSIGNALED(stat)) {
663 exit_code = -1;
664 break;
669 if (vm) {
670 rb_native_mutex_unlock(&vm->waitpid_lock);
671 rb_native_cond_destroy(&cond);
673 return exit_code;
676 static void
677 remove_so_file(const char *so_file, struct rb_mjit_unit *unit)
679 #if defined(_WIN32)
680 // Windows can't remove files while it's used.
681 unit->so_file = strdup(so_file); // lazily delete on `clean_temp_files()`
682 if (unit->so_file == NULL)
683 mjit_warning("failed to allocate memory to lazily remove '%s': %s", so_file, strerror(errno));
684 #else
685 remove_file(so_file);
686 #endif
689 // Print _mjitX, but make a human-readable funcname when --mjit-debug is used
690 static void
691 sprint_funcname(char *funcname, const struct rb_mjit_unit *unit)
693 const rb_iseq_t *iseq = unit->iseq;
694 if (iseq == NULL || (!mjit_opts.debug && !mjit_opts.debug_flags)) {
695 sprintf(funcname, "_mjit%d", unit->id);
696 return;
699 // Generate a short path
700 const char *path = RSTRING_PTR(rb_iseq_path(iseq));
701 const char *lib = "/lib/";
702 const char *version = "/" STRINGIZE(RUBY_API_VERSION_MAJOR) "." STRINGIZE(RUBY_API_VERSION_MINOR) "." STRINGIZE(RUBY_API_VERSION_TEENY) "/";
703 while (strstr(path, lib)) // skip "/lib/"
704 path = strstr(path, lib) + strlen(lib);
705 while (strstr(path, version)) // skip "/x.y.z/"
706 path = strstr(path, version) + strlen(version);
708 // Annotate all-normalized method names
709 const char *method = RSTRING_PTR(iseq->body->location.label);
710 if (!strcmp(method, "[]")) method = "AREF";
711 if (!strcmp(method, "[]=")) method = "ASET";
713 // Print and normalize
714 sprintf(funcname, "_mjit%d_%s_%s", unit->id, path, method);
715 for (size_t i = 0; i < strlen(funcname); i++) {
716 char c = funcname[i];
717 if (!(('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') || c == '_')) {
718 funcname[i] = '_';
723 static const rb_iseq_t **compiling_iseqs = NULL;
725 static bool
726 set_compiling_iseqs(const rb_iseq_t *iseq)
728 compiling_iseqs = calloc(iseq->body->iseq_size + 2, sizeof(rb_iseq_t *)); // 2: 1 (unit->iseq) + 1 (NULL end)
729 if (compiling_iseqs == NULL)
730 return false;
732 compiling_iseqs[0] = iseq;
733 int i = 1;
735 unsigned int pos = 0;
736 while (pos < iseq->body->iseq_size) {
737 int insn = rb_vm_insn_decode(iseq->body->iseq_encoded[pos]);
738 if (insn == BIN(opt_send_without_block) || insn == BIN(opt_size)) {
739 CALL_DATA cd = (CALL_DATA)iseq->body->iseq_encoded[pos + 1];
740 extern const rb_iseq_t *rb_mjit_inlinable_iseq(const struct rb_callinfo *ci, const struct rb_callcache *cc);
741 const rb_iseq_t *iseq = rb_mjit_inlinable_iseq(cd->ci, cd->cc);
742 if (iseq != NULL) {
743 compiling_iseqs[i] = iseq;
744 i++;
747 pos += insn_len(insn);
749 return true;
752 static void
753 free_compiling_iseqs(void)
755 RBIMPL_WARNING_PUSH();
756 #ifdef _MSC_VER
757 RBIMPL_WARNING_IGNORED(4090); /* suppress false warning by MSVC */
758 #endif
759 free(compiling_iseqs);
760 RBIMPL_WARNING_POP();
761 compiling_iseqs = NULL;
764 bool
765 rb_mjit_compiling_iseq_p(const rb_iseq_t *iseq)
767 assert(compiling_iseqs != NULL);
768 int i = 0;
769 while (compiling_iseqs[i]) {
770 if (compiling_iseqs[i] == iseq) return true;
771 i++;
773 return false;
776 static const int c_file_access_mode =
777 #ifdef O_BINARY
778 O_BINARY|
779 #endif
780 O_WRONLY|O_EXCL|O_CREAT;
782 #define append_str2(p, str, len) ((char *)memcpy((p), str, (len))+(len))
783 #define append_str(p, str) append_str2(p, str, sizeof(str)-1)
784 #define append_lit(p, str) append_str2(p, str, rb_strlen_lit(str))
786 #ifdef _MSC_VER
787 // Compile C file to so. It returns true if it succeeds. (mswin)
788 static bool
789 compile_c_to_so(const char *c_file, const char *so_file)
791 const char *files[] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, "-link", libruby_pathflag, NULL };
792 char *p;
794 // files[0] = "-Fe*.dll"
795 files[0] = p = alloca(sizeof(char) * (rb_strlen_lit("-Fe") + strlen(so_file) + 1));
796 p = append_lit(p, "-Fe");
797 p = append_str2(p, so_file, strlen(so_file));
798 *p = '\0';
800 // files[1] = "-Fo*.obj"
801 // We don't need .obj file, but it's somehow created to cwd without -Fo and we want to control the output directory.
802 files[1] = p = alloca(sizeof(char) * (rb_strlen_lit("-Fo") + strlen(so_file) - rb_strlen_lit(DLEXT) + rb_strlen_lit(".obj") + 1));
803 char *obj_file = p = append_lit(p, "-Fo");
804 p = append_str2(p, so_file, strlen(so_file) - rb_strlen_lit(DLEXT));
805 p = append_lit(p, ".obj");
806 *p = '\0';
808 // files[2] = "-Yu*.pch"
809 files[2] = p = alloca(sizeof(char) * (rb_strlen_lit("-Yu") + strlen(pch_file) + 1));
810 p = append_lit(p, "-Yu");
811 p = append_str2(p, pch_file, strlen(pch_file));
812 *p = '\0';
814 // files[3] = "C:/.../rb_mjit_header-*.obj"
815 files[3] = p = alloca(sizeof(char) * (strlen(pch_file) + 1));
816 p = append_str2(p, pch_file, strlen(pch_file) - strlen(".pch"));
817 p = append_lit(p, ".obj");
818 *p = '\0';
820 // files[4] = "-Tc*.c"
821 files[4] = p = alloca(sizeof(char) * (rb_strlen_lit("-Tc") + strlen(c_file) + 1));
822 p = append_lit(p, "-Tc");
823 p = append_str2(p, c_file, strlen(c_file));
824 *p = '\0';
826 // files[5] = "-Fd*.pdb"
827 // Generate .pdb file in temporary directory instead of cwd.
828 files[5] = p = alloca(sizeof(char) * (rb_strlen_lit("-Fd") + strlen(so_file) - rb_strlen_lit(DLEXT) + rb_strlen_lit(".pdb") + 1));
829 p = append_lit(p, "-Fd");
830 p = append_str2(p, so_file, strlen(so_file) - rb_strlen_lit(DLEXT));
831 p = append_lit(p, ".pdb");
832 *p = '\0';
834 // files[6] = "-Z7"
835 // Put this last to override any debug options that came previously.
836 files[6] = p = alloca(sizeof(char) * rb_strlen_lit("-Z7") + 1);
837 p = append_lit(p, "-Z7");
838 *p = '\0';
840 char **args = form_args(5, CC_LDSHARED_ARGS, CC_CODEFLAG_ARGS,
841 files, CC_LIBS, CC_DLDFLAGS_ARGS);
842 if (args == NULL)
843 return false;
845 int exit_code = exec_process(cc_path, args);
846 free(args);
848 if (exit_code == 0) {
849 // remove never-used files (.obj, .lib, .exp, .pdb). XXX: Is there any way not to generate this?
850 if (!mjit_opts.save_temps) {
851 char *before_dot;
852 remove_file(obj_file);
854 before_dot = obj_file + strlen(obj_file) - rb_strlen_lit(".obj");
855 append_lit(before_dot, ".lib"); remove_file(obj_file);
856 append_lit(before_dot, ".exp"); remove_file(obj_file);
857 append_lit(before_dot, ".pdb"); remove_file(obj_file);
860 else {
861 verbose(2, "compile_c_to_so: compile error: %d", exit_code);
863 return exit_code == 0;
865 #else // _MSC_VER
867 // The function producing the pre-compiled header.
868 static void
869 make_pch(void)
871 const char *rest_args[] = {
872 # ifdef __clang__
873 "-emit-pch",
874 "-c",
875 # endif
876 // -nodefaultlibs is a linker flag, but it may affect cc1 behavior on Gentoo, which should NOT be changed on pch:
877 // https://gitweb.gentoo.org/proj/gcc-patches.git/tree/7.3.0/gentoo/13_all_default-ssp-fix.patch
878 GCC_NOSTDLIB_FLAGS
879 "-o", pch_file, header_file,
880 NULL,
883 verbose(2, "Creating precompiled header");
884 char **args = form_args(4, cc_common_args, CC_CODEFLAG_ARGS, cc_added_args, rest_args);
885 if (args == NULL) {
886 mjit_warning("making precompiled header failed on forming args");
887 CRITICAL_SECTION_START(3, "in make_pch");
888 pch_status = PCH_FAILED;
889 CRITICAL_SECTION_FINISH(3, "in make_pch");
890 return;
893 int exit_code = exec_process(cc_path, args);
894 free(args);
896 CRITICAL_SECTION_START(3, "in make_pch");
897 if (exit_code == 0) {
898 pch_status = PCH_SUCCESS;
900 else {
901 mjit_warning("Making precompiled header failed on compilation. Stopping MJIT worker...");
902 pch_status = PCH_FAILED;
904 /* wakeup `mjit_finish` */
905 rb_native_cond_broadcast(&mjit_pch_wakeup);
906 CRITICAL_SECTION_FINISH(3, "in make_pch");
909 // Compile .c file to .so file. It returns true if it succeeds. (non-mswin)
910 // Not compiling .c to .so directly because it fails on MinGW, and this helps
911 // to generate no .dSYM on macOS.
912 static bool
913 compile_c_to_so(const char *c_file, const char *so_file)
915 char* o_file = alloca(strlen(c_file) + 1);
916 strcpy(o_file, c_file);
917 o_file[strlen(c_file) - 1] = 'o';
919 const char *o_args[] = {
920 "-o", o_file, c_file,
921 # ifdef __clang__
922 "-include-pch", pch_file,
923 # endif
924 "-c", NULL
926 char **args = form_args(5, cc_common_args, CC_CODEFLAG_ARGS, cc_added_args, o_args, CC_LINKER_ARGS);
927 if (args == NULL) return false;
928 int exit_code = exec_process(cc_path, args);
929 free(args);
930 if (exit_code != 0) {
931 verbose(2, "compile_c_to_so: failed to compile .c to .o: %d", exit_code);
932 return false;
935 const char *so_args[] = {
936 "-o", so_file,
937 # ifdef _WIN32
938 libruby_pathflag,
939 # endif
940 o_file, NULL
942 args = form_args(6, CC_LDSHARED_ARGS, CC_CODEFLAG_ARGS, so_args, CC_LIBS, CC_DLDFLAGS_ARGS, CC_LINKER_ARGS);
943 if (args == NULL) return false;
944 exit_code = exec_process(cc_path, args);
945 free(args);
946 if (!mjit_opts.save_temps) remove_file(o_file);
947 if (exit_code != 0) {
948 verbose(2, "compile_c_to_so: failed to link .o to .so: %d", exit_code);
950 return exit_code == 0;
952 #endif // _MSC_VER
954 #if USE_JIT_COMPACTION
955 static void compile_prelude(FILE *f);
957 static bool
958 compile_compact_jit_code(char* c_file)
960 FILE *f;
961 int fd = rb_cloexec_open(c_file, c_file_access_mode, 0600);
962 if (fd < 0 || (f = fdopen(fd, "w")) == NULL) {
963 int e = errno;
964 if (fd >= 0) (void)close(fd);
965 verbose(1, "Failed to fopen '%s', giving up JIT for it (%s)", c_file, strerror(e));
966 return false;
969 compile_prelude(f);
971 // wait until mjit_gc_exit_hook is called
972 CRITICAL_SECTION_START(3, "before mjit_compile to wait GC finish");
973 while (in_gc) {
974 verbose(3, "Waiting wakeup from GC");
975 rb_native_cond_wait(&mjit_gc_wakeup, &mjit_engine_mutex);
977 // We need to check again here because we could've waited on GC above
978 bool iseq_gced = false;
979 struct rb_mjit_unit *child_unit = 0, *next;
980 list_for_each_safe(&active_units.head, child_unit, next, unode) {
981 if (child_unit->iseq == NULL) { // ISeq is GC-ed
982 iseq_gced = true;
983 verbose(1, "JIT compaction: A method for JIT code u%d is obsoleted. Compaction will be skipped.", child_unit->id);
984 remove_from_list(child_unit, &active_units);
985 free_unit(child_unit); // unload it without waiting for throttled unload_units to retry compaction quickly
988 in_jit = !iseq_gced;
989 CRITICAL_SECTION_FINISH(3, "before mjit_compile to wait GC finish");
990 if (!in_jit) {
991 fclose(f);
992 if (!mjit_opts.save_temps)
993 remove_file(c_file);
994 return false;
997 // This entire loop lock GC so that we do not need to consider a case that
998 // ISeq is GC-ed in a middle of re-compilation. It takes 3~4ms with 100 methods
999 // on my machine. It's not too bad compared to compilation time of C (7200~8000ms),
1000 // but it might be larger if we use a larger --jit-max-cache.
1002 // TODO: Consider using a more granular lock after we implement inlining across
1003 // compacted functions (not done yet).
1004 bool success = true;
1005 list_for_each(&active_units.head, child_unit, unode) {
1006 CRITICAL_SECTION_START(3, "before set_compiling_iseqs");
1007 success &= set_compiling_iseqs(child_unit->iseq);
1008 CRITICAL_SECTION_FINISH(3, "after set_compiling_iseqs");
1009 if (!success) continue;
1011 char funcname[MAXPATHLEN];
1012 sprint_funcname(funcname, child_unit);
1014 long iseq_lineno = 0;
1015 if (FIXNUM_P(child_unit->iseq->body->location.first_lineno))
1016 // FIX2INT may fallback to rb_num2long(), which is a method call and dangerous in MJIT worker. So using only FIX2LONG.
1017 iseq_lineno = FIX2LONG(child_unit->iseq->body->location.first_lineno);
1018 const char *sep = "@";
1019 const char *iseq_label = RSTRING_PTR(child_unit->iseq->body->location.label);
1020 const char *iseq_path = RSTRING_PTR(rb_iseq_path(child_unit->iseq));
1021 if (!iseq_label) iseq_label = sep = "";
1022 fprintf(f, "\n/* %s%s%s:%ld */\n", iseq_label, sep, iseq_path, iseq_lineno);
1023 success &= mjit_compile(f, child_unit->iseq, funcname, child_unit->id);
1025 CRITICAL_SECTION_START(3, "before compiling_iseqs free");
1026 free_compiling_iseqs();
1027 CRITICAL_SECTION_FINISH(3, "after compiling_iseqs free");
1030 // release blocking mjit_gc_start_hook
1031 CRITICAL_SECTION_START(3, "after mjit_compile to wakeup client for GC");
1032 in_jit = false;
1033 verbose(3, "Sending wakeup signal to client in a mjit-worker for GC");
1034 rb_native_cond_signal(&mjit_client_wakeup);
1035 CRITICAL_SECTION_FINISH(3, "in worker to wakeup client for GC");
1037 fclose(f);
1038 return success;
1041 // Compile all cached .c files and build a single .so file. Reload all JIT func from it.
1042 // This improves the code locality for better performance in terms of iTLB and iCache.
1043 static void
1044 compact_all_jit_code(void)
1046 struct rb_mjit_unit *unit, *cur = 0;
1047 static const char c_ext[] = ".c";
1048 static const char so_ext[] = DLEXT;
1049 char c_file[MAXPATHLEN], so_file[MAXPATHLEN];
1051 // Abnormal use case of rb_mjit_unit that doesn't have ISeq
1052 unit = calloc(1, sizeof(struct rb_mjit_unit)); // To prevent GC, don't use ZALLOC
1053 if (unit == NULL) return;
1054 unit->id = current_unit_num++;
1055 sprint_uniq_filename(c_file, (int)sizeof(c_file), unit->id, MJIT_TMP_PREFIX, c_ext);
1056 sprint_uniq_filename(so_file, (int)sizeof(so_file), unit->id, MJIT_TMP_PREFIX, so_ext);
1058 bool success = compile_compact_jit_code(c_file);
1059 double start_time = real_ms_time();
1060 if (success) {
1061 success = compile_c_to_so(c_file, so_file);
1062 if (!mjit_opts.save_temps)
1063 remove_file(c_file);
1065 double end_time = real_ms_time();
1067 if (success) {
1068 void *handle = dlopen(so_file, RTLD_NOW);
1069 if (handle == NULL) {
1070 mjit_warning("failure in loading code from compacted '%s': %s", so_file, dlerror());
1071 free(unit);
1072 return;
1074 unit->handle = handle;
1076 // lazily dlclose handle (and .so file for win32) on `mjit_finish()`.
1077 add_to_list(unit, &compact_units);
1079 if (!mjit_opts.save_temps)
1080 remove_so_file(so_file, unit);
1082 CRITICAL_SECTION_START(3, "in compact_all_jit_code to read list");
1083 list_for_each(&active_units.head, cur, unode) {
1084 void *func;
1085 char funcname[MAXPATHLEN];
1086 sprint_funcname(funcname, cur);
1088 if ((func = dlsym(handle, funcname)) == NULL) {
1089 mjit_warning("skipping to reload '%s' from '%s': %s", funcname, so_file, dlerror());
1090 continue;
1093 if (cur->iseq) { // Check whether GCed or not
1094 // Usage of jit_code might be not in a critical section.
1095 MJIT_ATOMIC_SET(cur->iseq->body->jit_func, (mjit_func_t)func);
1098 CRITICAL_SECTION_FINISH(3, "in compact_all_jit_code to read list");
1099 verbose(1, "JIT compaction (%.1fms): Compacted %d methods %s -> %s", end_time - start_time, active_units.length, c_file, so_file);
1101 else {
1102 free(unit);
1103 verbose(1, "JIT compaction failure (%.1fms): Failed to compact methods", end_time - start_time);
1106 #endif // USE_JIT_COMPACTION
1108 static void *
1109 load_func_from_so(const char *so_file, const char *funcname, struct rb_mjit_unit *unit)
1111 void *handle, *func;
1113 handle = dlopen(so_file, RTLD_NOW);
1114 if (handle == NULL) {
1115 mjit_warning("failure in loading code from '%s': %s", so_file, dlerror());
1116 return (void *)NOT_COMPILED_JIT_ISEQ_FUNC;
1119 func = dlsym(handle, funcname);
1120 unit->handle = handle;
1121 return func;
1124 #ifndef __clang__
1125 static const char *
1126 header_name_end(const char *s)
1128 const char *e = s + strlen(s);
1129 # ifdef __GNUC__ // don't chomp .pch for mswin
1130 static const char suffix[] = ".gch";
1132 // chomp .gch suffix
1133 if (e > s+sizeof(suffix)-1 && strcmp(e-sizeof(suffix)+1, suffix) == 0) {
1134 e -= sizeof(suffix)-1;
1136 # endif
1137 return e;
1139 #endif
1141 // Print platform-specific prerequisites in generated code.
1142 static void
1143 compile_prelude(FILE *f)
1145 #ifndef __clang__ // -include-pch is used for Clang
1146 const char *s = pch_file;
1147 const char *e = header_name_end(s);
1149 fprintf(f, "#include \"");
1150 // print pch_file except .gch for gcc, but keep .pch for mswin
1151 for (; s < e; s++) {
1152 switch (*s) {
1153 case '\\': case '"':
1154 fputc('\\', f);
1156 fputc(*s, f);
1158 fprintf(f, "\"\n");
1159 #endif
1161 #ifdef _WIN32
1162 fprintf(f, "void _pei386_runtime_relocator(void){}\n");
1163 fprintf(f, "int __stdcall DllMainCRTStartup(void* hinstDLL, unsigned int fdwReason, void* lpvReserved) { return 1; }\n");
1164 #endif
1167 // Compile ISeq in UNIT and return function pointer of JIT-ed code.
1168 // It may return NOT_COMPILED_JIT_ISEQ_FUNC if something went wrong.
1169 static mjit_func_t
1170 convert_unit_to_func(struct rb_mjit_unit *unit)
1172 static const char c_ext[] = ".c";
1173 static const char so_ext[] = DLEXT;
1174 char c_file[MAXPATHLEN], so_file[MAXPATHLEN], funcname[MAXPATHLEN];
1176 sprint_uniq_filename(c_file, (int)sizeof(c_file), unit->id, MJIT_TMP_PREFIX, c_ext);
1177 sprint_uniq_filename(so_file, (int)sizeof(so_file), unit->id, MJIT_TMP_PREFIX, so_ext);
1178 sprint_funcname(funcname, unit);
1180 FILE *f;
1181 int fd = rb_cloexec_open(c_file, c_file_access_mode, 0600);
1182 if (fd < 0 || (f = fdopen(fd, "w")) == NULL) {
1183 int e = errno;
1184 if (fd >= 0) (void)close(fd);
1185 verbose(1, "Failed to fopen '%s', giving up JIT for it (%s)", c_file, strerror(e));
1186 return (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC;
1189 // print #include of MJIT header, etc.
1190 compile_prelude(f);
1192 // wait until mjit_gc_exit_hook is called
1193 CRITICAL_SECTION_START(3, "before mjit_compile to wait GC finish");
1194 while (in_gc) {
1195 verbose(3, "Waiting wakeup from GC");
1196 rb_native_cond_wait(&mjit_gc_wakeup, &mjit_engine_mutex);
1198 // We need to check again here because we could've waited on GC above
1199 in_jit = (unit->iseq != NULL);
1200 if (in_jit)
1201 in_jit &= set_compiling_iseqs(unit->iseq);
1202 CRITICAL_SECTION_FINISH(3, "before mjit_compile to wait GC finish");
1203 if (!in_jit) {
1204 fclose(f);
1205 if (!mjit_opts.save_temps)
1206 remove_file(c_file);
1207 return (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC;
1210 // To make MJIT worker thread-safe against GC.compact, copy ISeq values while `in_jit` is true.
1211 long iseq_lineno = 0;
1212 if (FIXNUM_P(unit->iseq->body->location.first_lineno))
1213 // FIX2INT may fallback to rb_num2long(), which is a method call and dangerous in MJIT worker. So using only FIX2LONG.
1214 iseq_lineno = FIX2LONG(unit->iseq->body->location.first_lineno);
1215 char *iseq_label = alloca(RSTRING_LEN(unit->iseq->body->location.label) + 1);
1216 char *iseq_path = alloca(RSTRING_LEN(rb_iseq_path(unit->iseq)) + 1);
1217 strcpy(iseq_label, RSTRING_PTR(unit->iseq->body->location.label));
1218 strcpy(iseq_path, RSTRING_PTR(rb_iseq_path(unit->iseq)));
1220 verbose(2, "start compilation: %s@%s:%ld -> %s", iseq_label, iseq_path, iseq_lineno, c_file);
1221 fprintf(f, "/* %s@%s:%ld */\n\n", iseq_label, iseq_path, iseq_lineno);
1222 bool success = mjit_compile(f, unit->iseq, funcname, unit->id);
1224 // release blocking mjit_gc_start_hook
1225 CRITICAL_SECTION_START(3, "after mjit_compile to wakeup client for GC");
1226 free_compiling_iseqs();
1227 in_jit = false;
1228 verbose(3, "Sending wakeup signal to client in a mjit-worker for GC");
1229 rb_native_cond_signal(&mjit_client_wakeup);
1230 CRITICAL_SECTION_FINISH(3, "in worker to wakeup client for GC");
1232 fclose(f);
1233 if (!success) {
1234 if (!mjit_opts.save_temps)
1235 remove_file(c_file);
1236 verbose(1, "JIT failure: %s@%s:%ld -> %s", iseq_label, iseq_path, iseq_lineno, c_file);
1237 return (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC;
1240 double start_time = real_ms_time();
1241 success = compile_c_to_so(c_file, so_file);
1242 if (!mjit_opts.save_temps)
1243 remove_file(c_file);
1244 double end_time = real_ms_time();
1246 if (!success) {
1247 verbose(2, "Failed to generate so: %s", so_file);
1248 return (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC;
1251 void *func = load_func_from_so(so_file, funcname, unit);
1252 if (!mjit_opts.save_temps)
1253 remove_so_file(so_file, unit);
1255 if ((uintptr_t)func > (uintptr_t)LAST_JIT_ISEQ_FUNC) {
1256 verbose(1, "JIT success (%.1fms): %s@%s:%ld -> %s",
1257 end_time - start_time, iseq_label, iseq_path, iseq_lineno, c_file);
1259 return (mjit_func_t)func;
1262 // To see cc_entries using index returned by `mjit_capture_cc_entries` in mjit_compile.c
1263 const struct rb_callcache **
1264 mjit_iseq_cc_entries(const struct rb_iseq_constant_body *const body)
1266 return body->jit_unit->cc_entries;
1269 // Capture cc entries of `captured_iseq` and append them to `compiled_iseq->jit_unit->cc_entries`.
1270 // This is needed when `captured_iseq` is inlined by `compiled_iseq` and GC needs to mark inlined cc.
1272 // Index to refer to `compiled_iseq->jit_unit->cc_entries` is returned instead of the address
1273 // because old addresses may be invalidated by `realloc` later. -1 is returned on failure.
1275 // This assumes that it's safe to reference cc without acquiring GVL.
1277 mjit_capture_cc_entries(const struct rb_iseq_constant_body *compiled_iseq, const struct rb_iseq_constant_body *captured_iseq)
1279 struct rb_mjit_unit *unit = compiled_iseq->jit_unit;
1280 unsigned int new_entries_size = unit->cc_entries_size + captured_iseq->ci_size;
1281 VM_ASSERT(captured_iseq->ci_size > 0);
1283 // Allocate new cc_entries and append them to unit->cc_entries
1284 const struct rb_callcache **cc_entries;
1285 int cc_entries_index = unit->cc_entries_size;
1286 if (unit->cc_entries_size == 0) {
1287 VM_ASSERT(unit->cc_entries == NULL);
1288 unit->cc_entries = cc_entries = malloc(sizeof(struct rb_callcache *) * new_entries_size);
1289 if (cc_entries == NULL) return -1;
1291 else {
1292 void *cc_ptr = (void *)unit->cc_entries; // get rid of bogus warning by VC
1293 cc_entries = realloc(cc_ptr, sizeof(struct rb_callcache *) * new_entries_size);
1294 if (cc_entries == NULL) return -1;
1295 unit->cc_entries = cc_entries;
1296 cc_entries += cc_entries_index;
1298 unit->cc_entries_size = new_entries_size;
1300 // Capture cc to cc_enties
1301 for (unsigned int i = 0; i < captured_iseq->ci_size; i++) {
1302 cc_entries[i] = captured_iseq->call_data[i].cc;
1305 return cc_entries_index;
1308 // Set up field `used_code_p` for unit iseqs whose iseq on the stack of ec.
1309 static void
1310 mark_ec_units(rb_execution_context_t *ec)
1312 const rb_control_frame_t *cfp;
1314 if (ec->vm_stack == NULL)
1315 return;
1316 for (cfp = RUBY_VM_END_CONTROL_FRAME(ec) - 1; ; cfp = RUBY_VM_NEXT_CONTROL_FRAME(cfp)) {
1317 const rb_iseq_t *iseq;
1318 if (cfp->pc && (iseq = cfp->iseq) != NULL
1319 && imemo_type((VALUE) iseq) == imemo_iseq
1320 && (iseq->body->jit_unit) != NULL) {
1321 iseq->body->jit_unit->used_code_p = true;
1324 if (cfp == ec->cfp)
1325 break; // reached the most recent cfp
1329 // MJIT info related to an existing continutaion.
1330 struct mjit_cont {
1331 rb_execution_context_t *ec; // continuation ec
1332 struct mjit_cont *prev, *next; // used to form lists
1335 // Double linked list of registered continuations. This is used to detect
1336 // units which are in use in unload_units.
1337 static struct mjit_cont *first_cont;
1339 // Unload JIT code of some units to satisfy the maximum permitted
1340 // number of units with a loaded code.
1341 static void
1342 unload_units(void)
1344 struct rb_mjit_unit *unit = 0, *next;
1345 struct mjit_cont *cont;
1346 int units_num = active_units.length;
1348 // For now, we don't unload units when ISeq is GCed. We should
1349 // unload such ISeqs first here.
1350 list_for_each_safe(&active_units.head, unit, next, unode) {
1351 if (unit->iseq == NULL) { // ISeq is GCed.
1352 remove_from_list(unit, &active_units);
1353 free_unit(unit);
1357 // Detect units which are in use and can't be unloaded.
1358 list_for_each(&active_units.head, unit, unode) {
1359 assert(unit->iseq != NULL && unit->handle != NULL);
1360 unit->used_code_p = false;
1362 // All threads have a root_fiber which has a mjit_cont. Other normal fibers also
1363 // have a mjit_cont. Thus we can check ISeqs in use by scanning ec of mjit_conts.
1364 for (cont = first_cont; cont != NULL; cont = cont->next) {
1365 mark_ec_units(cont->ec);
1367 // TODO: check stale_units and unload unused ones! (note that the unit is not associated to ISeq anymore)
1369 // Unload units whose total_calls is smaller than any total_calls in unit_queue.
1370 // TODO: make the algorithm more efficient
1371 long unsigned prev_queue_calls = -1;
1372 while (true) {
1373 // Calculate the next max total_calls in unit_queue
1374 long unsigned max_queue_calls = 0;
1375 list_for_each(&unit_queue.head, unit, unode) {
1376 if (unit->iseq != NULL && max_queue_calls < unit->iseq->body->total_calls
1377 && unit->iseq->body->total_calls < prev_queue_calls) {
1378 max_queue_calls = unit->iseq->body->total_calls;
1381 prev_queue_calls = max_queue_calls;
1383 bool unloaded_p = false;
1384 list_for_each_safe(&active_units.head, unit, next, unode) {
1385 if (unit->used_code_p) // We can't unload code on stack.
1386 continue;
1388 if (max_queue_calls > unit->iseq->body->total_calls) {
1389 verbose(2, "Unloading unit %d (calls=%lu, threshold=%lu)",
1390 unit->id, unit->iseq->body->total_calls, max_queue_calls);
1391 assert(unit->handle != NULL);
1392 remove_from_list(unit, &active_units);
1393 free_unit(unit);
1394 unloaded_p = true;
1397 if (!unloaded_p) break;
1400 if (units_num > active_units.length) {
1401 verbose(1, "Too many JIT code -- %d units unloaded", units_num - active_units.length);
1402 total_unloads += units_num - active_units.length;
1406 static void mjit_add_iseq_to_process(const rb_iseq_t *iseq, const struct rb_mjit_compile_info *compile_info, bool worker_p);
1408 // The function implementing a worker. It is executed in a separate
1409 // thread by rb_thread_create_mjit_thread. It compiles precompiled header
1410 // and then compiles requested ISeqs.
1411 void
1412 mjit_worker(void)
1414 // Allow only `max_cache_size / 100` times (default: 100) of compaction.
1415 // Note: GC of compacted code has not been implemented yet.
1416 int max_compact_size = mjit_opts.max_cache_size / 100;
1417 if (max_compact_size < 10) max_compact_size = 10;
1419 // Run unload_units after it's requested `max_cache_size / 10` (default: 10) times.
1420 // This throttles the call to mitigate locking in unload_units. It also throttles JIT compaction.
1421 int throttle_threshold = mjit_opts.max_cache_size / 10;
1423 #ifndef _MSC_VER
1424 if (pch_status == PCH_NOT_READY) {
1425 make_pch();
1427 #endif
1428 if (pch_status == PCH_FAILED) {
1429 mjit_enabled = false;
1430 CRITICAL_SECTION_START(3, "in worker to update worker_stopped");
1431 worker_stopped = true;
1432 verbose(3, "Sending wakeup signal to client in a mjit-worker");
1433 rb_native_cond_signal(&mjit_client_wakeup);
1434 CRITICAL_SECTION_FINISH(3, "in worker to update worker_stopped");
1435 return; // TODO: do the same thing in the latter half of mjit_finish
1438 // main worker loop
1439 while (!stop_worker_p) {
1440 struct rb_mjit_unit *unit;
1442 // Wait until a unit becomes available
1443 CRITICAL_SECTION_START(3, "in worker dequeue");
1444 while ((list_empty(&unit_queue.head) || active_units.length >= mjit_opts.max_cache_size) && !stop_worker_p) {
1445 rb_native_cond_wait(&mjit_worker_wakeup, &mjit_engine_mutex);
1446 verbose(3, "Getting wakeup from client");
1448 // Lazily move active_units to stale_units to avoid race conditions around active_units with compaction
1449 if (pending_stale_p) {
1450 pending_stale_p = false;
1451 struct rb_mjit_unit *next;
1452 list_for_each_safe(&active_units.head, unit, next, unode) {
1453 if (unit->stale_p) {
1454 unit->stale_p = false;
1455 remove_from_list(unit, &active_units);
1456 add_to_list(unit, &stale_units);
1457 // Lazily put it to unit_queue as well to avoid race conditions on jit_unit with mjit_compile.
1458 mjit_add_iseq_to_process(unit->iseq, &unit->iseq->body->jit_unit->compile_info, true);
1463 // Unload some units as needed
1464 if (unload_requests >= throttle_threshold) {
1465 while (in_gc) {
1466 verbose(3, "Waiting wakeup from GC");
1467 rb_native_cond_wait(&mjit_gc_wakeup, &mjit_engine_mutex);
1469 in_jit = true; // Lock GC
1471 RB_DEBUG_COUNTER_INC(mjit_unload_units);
1472 unload_units();
1473 unload_requests = 0;
1475 in_jit = false; // Unlock GC
1476 verbose(3, "Sending wakeup signal to client in a mjit-worker for GC");
1477 rb_native_cond_signal(&mjit_client_wakeup);
1479 if (active_units.length == mjit_opts.max_cache_size && mjit_opts.wait) { // Sometimes all methods may be in use
1480 mjit_opts.max_cache_size++; // avoid infinite loop on `rb_mjit_wait_call`. Note that --jit-wait is just for testing.
1481 verbose(1, "No units can be unloaded -- incremented max-cache-size to %d for --jit-wait", mjit_opts.max_cache_size);
1484 unit = get_from_list(&unit_queue);
1485 CRITICAL_SECTION_FINISH(3, "in worker dequeue");
1487 if (unit) {
1488 // JIT compile
1489 mjit_func_t func = convert_unit_to_func(unit);
1490 (void)RB_DEBUG_COUNTER_INC_IF(mjit_compile_failures, func == (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC);
1492 CRITICAL_SECTION_START(3, "in jit func replace");
1493 while (in_gc) { // Make sure we're not GC-ing when touching ISeq
1494 verbose(3, "Waiting wakeup from GC");
1495 rb_native_cond_wait(&mjit_gc_wakeup, &mjit_engine_mutex);
1497 if (unit->iseq) { // Check whether GCed or not
1498 if ((uintptr_t)func > (uintptr_t)LAST_JIT_ISEQ_FUNC) {
1499 add_to_list(unit, &active_units);
1501 // Usage of jit_code might be not in a critical section.
1502 MJIT_ATOMIC_SET(unit->iseq->body->jit_func, func);
1504 else {
1505 free_unit(unit);
1507 CRITICAL_SECTION_FINISH(3, "in jit func replace");
1509 #if USE_JIT_COMPACTION
1510 // Combine .o files to one .so and reload all jit_func to improve memory locality.
1511 if (compact_units.length < max_compact_size
1512 && ((!mjit_opts.wait && unit_queue.length == 0 && active_units.length > 1)
1513 || (active_units.length == mjit_opts.max_cache_size && compact_units.length * throttle_threshold <= total_unloads))) { // throttle compaction by total_unloads
1514 compact_all_jit_code();
1516 #endif
1520 // To keep mutex unlocked when it is destroyed by mjit_finish, don't wrap CRITICAL_SECTION here.
1521 worker_stopped = true;