Draft NEWS for sbcl-2.4.7
[sbcl.git] / src / runtime / thread.c
blobd99cb38a17c842426935af74ba61d7041306779d
1 /*
2 * This software is part of the SBCL system. See the README file for
3 * more information.
5 * This software is derived from the CMU CL system, which was
6 * written at Carnegie Mellon University and released into the
7 * public domain. The software is in the public domain and is
8 * provided with absolutely no warranty. See the COPYING and CREDITS
9 * files for more information.
12 #ifdef __linux__
13 #define _GNU_SOURCE // for pthread_setname_np()
14 #endif
15 #include "genesis/sbcl.h"
17 #include <stdlib.h>
18 #include <stdio.h>
19 #include <string.h>
20 #ifndef LISP_FEATURE_WIN32
21 #include <sched.h>
22 #endif
23 #include <stddef.h>
24 #include <errno.h>
25 #include <sys/types.h>
26 #ifndef LISP_FEATURE_WIN32
27 #include <sys/wait.h>
28 #endif
30 #include "runtime.h"
31 #include "validate.h" /* for BINDING_STACK_SIZE etc */
32 #include "thread.h"
33 #include "genesis/thread.h"
34 #include "arch.h"
35 #include "target-arch-os.h"
36 #include "os.h"
37 #include "globals.h"
38 #include "genesis/cons.h"
39 #include "genesis/symbol.h"
40 #include "genesis/instance.h"
41 #include "genesis/vector.h"
42 #include "interr.h" /* for lose() */
43 #include "gc.h"
44 #include "pseudo-atomic.h"
45 #include "interrupt.h"
46 #include "lispregs.h"
47 #include "atomiclog.inc"
49 #ifdef LISP_FEATURE_SB_THREAD
51 #if defined LISP_FEATURE_OPENBSD || defined LISP_FEATURE_FREEBSD || defined LISP_FEATURE_DRAGONFLY
52 #include <pthread_np.h>
53 #endif
55 #ifdef LISP_FEATURE_SUNOS
56 #include <thread.h>
57 #endif
58 #endif
60 int dynamic_values_bytes = 4096 * sizeof(lispobj); // same for all threads
61 // exposed to lisp for pthread_create if not C_STACK_IS_CONTROL_STACK
62 os_vm_size_t thread_alien_stack_size = ALIEN_STACK_SIZE;
63 struct thread *all_threads;
65 #ifdef LISP_FEATURE_SB_THREAD
67 #ifdef LISP_FEATURE_GCC_TLS
68 __thread struct thread *current_thread;
69 #elif !defined LISP_FEATURE_WIN32
70 pthread_key_t current_thread = 0;
71 #endif
73 #ifdef LISP_FEATURE_WIN32
74 CRITICAL_SECTION all_threads_lock;
75 static CRITICAL_SECTION recyclebin_lock;
76 static CRITICAL_SECTION in_gc_lock;
77 #else
78 pthread_mutex_t all_threads_lock = PTHREAD_MUTEX_INITIALIZER;
79 static pthread_mutex_t recyclebin_lock = PTHREAD_MUTEX_INITIALIZER;
80 static pthread_mutex_t in_gc_lock = PTHREAD_MUTEX_INITIALIZER;
81 #endif
83 #endif
85 #if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64)
86 extern lispobj call_into_lisp_first_time(lispobj fun, lispobj *args, int nargs);
87 #endif
89 static void
90 link_thread(struct thread *th)
92 if (all_threads) all_threads->prev=th;
93 th->next=all_threads;
94 th->prev=0;
95 all_threads=th;
98 #ifdef LISP_FEATURE_SB_THREAD
99 static void
100 unlink_thread(struct thread *th)
102 if (th->prev)
103 th->prev->next = th->next;
104 else
105 all_threads = th->next;
106 if (th->next)
107 th->next->prev = th->prev;
110 /* Not safe in general, but if your thread names are all
111 * simple-base-string and won't move, this is slightly ok */
112 char* vm_thread_name(struct thread* th)
114 if (!th) return "non-lisp";
115 struct thread_instance *lispthread = (void*)INSTANCE(th->lisp_thread);
116 lispobj name = lispthread->_name;
117 if (simple_base_string_p(name)) return vector_sap(name);
118 return "?";
121 #define get_thread_state(thread) \
122 (int)__sync_val_compare_and_swap(&thread->state_word.state, -1, -1)
124 #ifndef LISP_FEATURE_SB_SAFEPOINT
126 void
127 set_thread_state(struct thread *thread,
128 char state,
129 bool signals_already_blocked) // for foreign thread
131 struct extra_thread_data *semaphores = thread_extra_data(thread);
132 int i, waitcount = 0;
133 sigset_t old;
134 // If we've already masked the blockable signals we can avoid two syscalls here.
135 if (!signals_already_blocked)
136 block_blockable_signals(&old);
137 os_sem_wait(&semaphores->state_sem);
138 if (thread->state_word.state != state) {
139 if ((STATE_STOPPED==state) ||
140 (STATE_DEAD==state)) {
141 waitcount = semaphores->state_not_running_waitcount;
142 semaphores->state_not_running_waitcount = 0;
143 for (i=0; i<waitcount; i++)
144 os_sem_post(&semaphores->state_not_running_sem);
146 if ((STATE_RUNNING==state) ||
147 (STATE_DEAD==state)) {
148 waitcount = semaphores->state_not_stopped_waitcount;
149 semaphores->state_not_stopped_waitcount = 0;
150 for (i=0; i<waitcount; i++)
151 os_sem_post(&semaphores->state_not_stopped_sem);
153 thread->state_word.state = state;
155 os_sem_post(&semaphores->state_sem);
156 if (!signals_already_blocked)
157 thread_sigmask(SIG_SETMASK, &old, NULL);
160 // Wait until "thread's" state is something other than 'undesired_state'
161 // and return whatever the new state is.
162 int thread_wait_until_not(int undesired_state,
163 struct thread *thread)
165 struct extra_thread_data *semaphores = thread_extra_data(thread);
166 sigset_t old;
167 os_sem_t *wait_sem;
168 block_blockable_signals(&old);
169 start:
170 os_sem_wait(&semaphores->state_sem);
171 /* "The following functions synchronize memory with respect to other threads:
172 * ... pthread_mutex_lock() ... "
173 * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_11
174 * But we still have to ensure no compiler reordering.
176 int ending_state = get_thread_state(thread);
177 if (ending_state == undesired_state) {
178 switch (undesired_state) {
179 case STATE_RUNNING:
180 wait_sem = &semaphores->state_not_running_sem;
181 semaphores->state_not_running_waitcount++;
182 break;
183 case STATE_STOPPED:
184 wait_sem = &semaphores->state_not_stopped_sem;
185 semaphores->state_not_stopped_waitcount++;
186 break;
187 default:
188 lose("thread_wait_until_not: invalid argument %x", ending_state);
190 } else {
191 wait_sem = NULL;
193 os_sem_post(&semaphores->state_sem);
194 if (wait_sem) {
195 os_sem_wait(wait_sem);
196 goto start;
198 thread_sigmask(SIG_SETMASK, &old, NULL);
199 return ending_state;
201 #endif /* sb-safepoint */
202 #endif /* sb-thread */
204 #ifdef LISP_FEATURE_WIN32
205 #define sb_GetTID() GetCurrentThreadId()
206 #elif defined __linux__
207 // gettid() was added in glibc 2.30 but we support older glibc
208 int sb_GetTID() { return syscall(SYS_gettid); }
209 #elif defined __DragonFly__
210 #include <sys/lwp.h>
211 lwpid_t sb_GetTID() { return lwp_gettid(); }
212 #elif defined __FreeBSD__
213 #include <sys/thr.h>
214 int sb_GetTID()
216 long id;
217 thr_self(&id);
218 // man thr_self(2) says: the thread identifier is an integer in the range
219 // from PID_MAX + 2 (100001) to INT_MAX. So casting to int is safe.
220 return (int)id;
222 #elif defined __OpenBSD__
223 int sb_GetTID()
225 return getthrid();
227 #elif defined __APPLE__ && defined LISP_FEATURE_SB_THREAD
228 int sb_GetTID() {
229 return pthread_mach_thread_np(pthread_self());
231 #else
232 #define sb_GetTID() 0
233 #endif
235 /* Our futex-based lisp mutex needs an OS-assigned unique ID.
236 * Why not use pthread_self? I think the reason is that that on linux,
237 * the TID is 4 bytes, and the futex lock word is 4 bytes.
238 * If the unique ID needed 8 bytes, there could be spurious aliasing
239 * that would make the code behave incorrectly. */
240 static int get_nonzero_tid()
242 int tid = sb_GetTID();
243 #ifdef LISP_FEATURE_SB_FUTEX
244 // If no futexes, don't need or want to assert that the TID is valid.
245 // (macOS etc)
246 gc_assert(tid != 0);
247 #endif
248 return tid;
251 // Because creation is synchronized by *MAKE-THREAD-LOCK*
252 // we only need a single 'attributes' object.
253 #if defined LISP_FEATURE_SB_THREAD && !defined LISP_FEATURE_WIN32
254 pthread_attr_t new_lisp_thread_attr;
255 #define init_shared_attr_object() (pthread_attr_init(&new_lisp_thread_attr)==0)
256 #else
257 #define init_shared_attr_object() (1)
258 #endif
259 struct thread *alloc_thread_struct(void*);
261 #ifdef LISP_FEATURE_WIN32
262 #define ASSOCIATE_OS_THREAD(thread) \
263 DuplicateHandle(GetCurrentProcess(), GetCurrentThread(), \
264 GetCurrentProcess(), (LPHANDLE)&thread->os_thread, 0, TRUE, \
265 DUPLICATE_SAME_ACCESS)
266 #elif defined LISP_FEATURE_GS_SEG
267 #include <asm/prctl.h>
268 #include <sys/prctl.h>
269 extern int arch_prctl(int code, unsigned long *addr);
270 #define ASSOCIATE_OS_THREAD(thread) arch_prctl(ARCH_SET_GS, (uword_t*)thread), \
271 thread->os_thread = thread_self()
272 #else
273 #define ASSOCIATE_OS_THREAD(thread) thread->os_thread = thread_self()
274 #endif
276 #ifdef LISP_FEATURE_WIN32
277 // Need a function callable from assembly code, where the inline one won't do.
278 void* read_current_thread() {
279 return get_sb_vm_thread();
281 #endif
283 #if defined LISP_FEATURE_DARWIN && defined LISP_FEATURE_SB_THREAD
284 extern pthread_key_t ignore_stop_for_gc;
285 #endif
287 #if !defined COLLECT_GC_STATS && !defined STANDALONE_LDB && \
288 defined LISP_FEATURE_LINUX && defined LISP_FEATURE_SB_THREAD && defined LISP_FEATURE_64_BIT
289 #define COLLECT_GC_STATS
290 #endif
291 #ifdef COLLECT_GC_STATS
292 __attribute__((unused)) static struct timespec gc_start_time;
293 __attribute__((unused)) static long stw_elapsed,
294 stw_min_duration = LONG_MAX, stw_max_duration, stw_sum_duration,
295 gc_min_duration = LONG_MAX, gc_max_duration, gc_sum_duration;
296 int show_gc_stats, n_gcs_done;
297 static void summarize_gc_stats(void) {
298 // TODO: also collect things like number of root pages,bytes scanned
299 // and number of pages,bytes copied on average per GC cycle.
300 if (show_gc_stats && n_gcs_done)
301 fprintf(stderr,
302 "\nGC: stw_delay=%ld,%ld,%ld \u00B5s (min,avg,max) pause=%ld,%ld,%ld \u00B5s (sum=%ld) over %d GCs\n",
303 stw_min_duration/1000, stw_sum_duration/n_gcs_done/1000, stw_max_duration/1000,
304 gc_min_duration/1000, gc_sum_duration/n_gcs_done/1000, gc_max_duration/1000,
305 gc_sum_duration/1000, n_gcs_done);
307 void reset_gc_stats() { // after sb-posix:fork
308 stw_min_duration = LONG_MAX; stw_max_duration = stw_sum_duration = 0;
309 gc_min_duration = LONG_MAX; gc_max_duration = gc_sum_duration = 0;
310 n_gcs_done = 0;
311 show_gc_stats = 1; // won't show if never called reset
313 #endif
315 #ifdef ATOMIC_LOGGING
316 #define THREAD_NAME_MAP_MAX 20 /* KLUDGE */
317 struct {
318 pthread_t thread;
319 char *name; // strdup'ed
320 } thread_name_map[THREAD_NAME_MAP_MAX];
321 int thread_name_map_count;
323 char* thread_name_from_pthread(pthread_t pointer){
324 int i;
325 for(i=0; i<thread_name_map_count; ++i)
326 if (thread_name_map[i].thread == pointer) return thread_name_map[i].name;
327 return 0;
329 #endif
331 void create_main_lisp_thread(lispobj function) {
332 #ifdef LISP_FEATURE_WIN32
333 InitializeCriticalSection(&all_threads_lock);
334 InitializeCriticalSection(&recyclebin_lock);
335 InitializeCriticalSection(&in_gc_lock);
336 #endif
337 struct thread *th = alloc_thread_struct(0);
338 if (!th || arch_os_thread_init(th)==0 || !init_shared_attr_object())
339 lose("can't create initial thread");
340 th->state_word.sprof_enable = 1;
341 #if defined LISP_FEATURE_SB_THREAD && !defined LISP_FEATURE_GCC_TLS && !defined LISP_FEATURE_WIN32
342 pthread_key_create(&current_thread, 0);
343 #endif
344 #if defined LISP_FEATURE_DARWIN && defined LISP_FEATURE_SB_THREAD
345 pthread_key_create(&ignore_stop_for_gc, 0);
346 #endif
347 #if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64)
348 __attribute__((unused)) lispobj *args = NULL;
349 #endif
350 ASSOCIATE_OS_THREAD(th);
351 ASSIGN_CURRENT_THREAD(th);
352 #if defined THREADS_USING_GCSIGNAL && \
353 (defined LISP_FEATURE_PPC || defined LISP_FEATURE_PPC64 || defined LISP_FEATURE_ARM64 || defined LISP_FEATURE_RISCV)
354 /* SIG_STOP_FOR_GC defaults to blocked on PPC? */
355 unblock_gc_stop_signal();
356 #endif
357 link_thread(th);
358 th->os_kernel_tid = get_nonzero_tid();
360 #ifndef LISP_FEATURE_WIN32
361 protect_control_stack_hard_guard_page(1, NULL);
362 #endif
363 protect_binding_stack_hard_guard_page(1, NULL);
364 protect_alien_stack_hard_guard_page(1, NULL);
365 #ifndef LISP_FEATURE_WIN32
366 protect_control_stack_guard_page(1, NULL);
367 #endif
368 protect_binding_stack_guard_page(1, NULL);
369 protect_alien_stack_guard_page(1, NULL);
371 #if defined(LISP_FEATURE_DARWIN) && defined(LISP_FEATURE_X86_64)
372 set_thread_stack(th->control_stack_end);
373 #endif
375 #ifdef COLLECT_GC_STATS
376 atexit(summarize_gc_stats);
377 #endif
378 /* WIN32 has a special stack arrangement, calling
379 * call_into_lisp_first_time will put the new stack in the middle
380 * of the current stack */
381 #if !(defined(LISP_FEATURE_WIN32) && !defined(OS_THREAD_STACK)) \
382 && (defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64))
383 call_into_lisp_first_time(function,args,0);
384 #else
385 funcall0(function);
386 #endif
387 // If we end up returning, clean up the initial thread.
388 #ifdef LISP_FEATURE_SB_THREAD
389 unlink_thread(th);
390 #else
391 all_threads = NULL;
392 #endif
393 arch_os_thread_cleanup(th);
394 ASSIGN_CURRENT_THREAD(NULL);
397 void sb_posix_after_fork() { // for use by sb-posix:fork
398 struct thread* th = get_sb_vm_thread();
399 th->os_kernel_tid = get_nonzero_tid();
400 #ifdef LISP_FEATURE_DARWIN
401 extern void darwin_reinit();
402 darwin_reinit();
403 #endif
404 #ifdef LISP_FEATURE_MARK_REGION_GC
405 extern void thread_pool_init();
406 thread_pool_init();
407 #endif
410 #ifdef LISP_FEATURE_SB_THREAD
412 void free_thread_struct(struct thread *th)
414 struct extra_thread_data *extra_data = thread_extra_data(th);
415 if (extra_data->arena_savearea) free(extra_data->arena_savearea);
416 os_deallocate((os_vm_address_t) th->os_address, THREAD_STRUCT_SIZE);
419 /* Note: scribble must be stack-allocated */
420 static void
421 init_new_thread(struct thread *th,
422 init_thread_data __attribute__((unused)) *scribble,
423 int guardp)
425 ASSIGN_CURRENT_THREAD(th);
426 if(arch_os_thread_init(th)==0) {
427 /* FIXME: handle error */
428 lose("arch_os_thread_init failed");
431 #define GUARD_CONTROL_STACK 1
432 #define GUARD_BINDING_STACK 2
433 #define GUARD_ALIEN_STACK 4
435 #ifndef LISP_FEATURE_WIN32
436 if (guardp & GUARD_CONTROL_STACK)
437 protect_control_stack_guard_page(1, NULL);
438 #endif
439 if (guardp & GUARD_BINDING_STACK)
440 protect_binding_stack_guard_page(1, NULL);
441 if (guardp & GUARD_ALIEN_STACK)
442 protect_alien_stack_guard_page(1, NULL);
444 /* Since GC can only know about this thread from the all_threads
445 * list and we're just adding this thread to it, there is no
446 * danger of deadlocking even with SIG_STOP_FOR_GC blocked (which
447 * it is not). */
448 #ifdef LISP_FEATURE_SB_SAFEPOINT
449 csp_around_foreign_call(th) = (lispobj)scribble;
450 #endif
451 __attribute__((unused)) int lock_ret = mutex_acquire(&all_threads_lock);
452 gc_assert(lock_ret);
453 link_thread(th);
454 ignore_value(mutex_release(&all_threads_lock));
456 /* Kludge: Changed the order of some steps between the safepoint/
457 * non-safepoint versions of this code. Can we unify this more?
459 #ifdef LISP_FEATURE_SB_SAFEPOINT
460 WITH_GC_STATE_LOCK {
461 gc_state_wait(GC_NONE);
463 push_gcing_safety(&scribble->safety);
464 #endif
467 lispobj remset_transfer_list;
469 static void
470 unregister_thread(struct thread *th,
471 init_thread_data __attribute__((unused)) *scribble)
473 block_blockable_signals(0);
474 #ifdef LISP_FEATURE_PERMGEN
475 lispobj my_remset = th->remset;
476 if (my_remset) {
477 lispobj tail = remset_transfer_list;
478 while (1) {
479 VECTOR(my_remset)->data[1] = tail;
480 lispobj actual_old = __sync_val_compare_and_swap(
481 &remset_transfer_list, tail, my_remset);
482 if (actual_old == tail) break;
483 tail = actual_old;
485 th->remset = 0;
487 #endif
488 gc_close_thread_regions(th, LOCK_PAGE_TABLE|CONSUME_REMAINDER);
489 #ifdef LISP_FEATURE_SB_SAFEPOINT
490 pop_gcing_safety(&scribble->safety);
491 #else
492 /* This state change serves to "acknowledge" any stop-the-world
493 * signal received while the STOP_FOR_GC signal is blocked */
494 set_thread_state(th, STATE_DEAD, 1);
495 #endif
496 /* SIG_STOP_FOR_GC is blocked and GC might be waiting for this
497 * thread, but since we are either exiting lisp code as a lisp
498 * thread that is dying, or exiting lisp code to return to
499 * former status as a C thread, it won't wait long. */
500 __attribute__((unused)) int lock_ret = mutex_acquire(&all_threads_lock);
501 gc_assert(lock_ret);
502 unlink_thread(th);
503 lock_ret = mutex_release(&all_threads_lock);
504 gc_assert(lock_ret);
506 arch_os_thread_cleanup(th);
508 __attribute__((unused)) struct extra_thread_data *semaphores = thread_extra_data(th);
509 #ifdef LISP_FEATURE_UNIX
510 os_sem_destroy(&semaphores->sprof_sem);
511 #endif
512 #ifndef LISP_FEATURE_SB_SAFEPOINT
513 os_sem_destroy(&semaphores->state_sem);
514 os_sem_destroy(&semaphores->state_not_running_sem);
515 os_sem_destroy(&semaphores->state_not_stopped_sem);
516 #endif
518 #if defined(LISP_FEATURE_WIN32)
519 int i;
520 for (i = 0; i<NUM_PRIVATE_EVENTS; ++i)
521 CloseHandle(thread_private_events(th,i));
522 #endif
524 /* Undo the association of the current pthread to its `struct thread',
525 * such that we can call get_sb_vm_thread() later in this
526 * thread and cleanly get back NULL. */
527 /* FIXME: what if, after we blocked signals, someone uses INTERRUPT-THREAD
528 * on this thread? It's no longer a lisp thread; I suspect the signal
529 * will be redirected to a lisp thread.
530 * Can anything else go wrong with other signals? Nothing else should
531 * direct signals specifically to this thread. Per-process signals are ok
532 * because the kernel picks a thread in which a signal isn't blocked */
533 ASSIGN_CURRENT_THREAD(NULL);
536 /* this is the first thing that runs in the child (which is why the
537 * silly calling convention). Basically it calls the user's requested
538 * lisp function after doing arch_os_thread_init and whatever other
539 * bookkeeping needs to be done
541 #ifdef LISP_FEATURE_WIN32
542 __stdcall unsigned int new_thread_trampoline(LPVOID arg)
543 #else
544 void* new_thread_trampoline(void* arg)
545 #endif
547 struct thread* th = arg;
548 ASSOCIATE_OS_THREAD(th);
550 #ifdef LISP_FEATURE_SB_SAFEPOINT
551 init_thread_data scribble;
552 // This "scribble" thing is really quite pointless because the original sigset_t
553 // was passed in the thread's startup info (unless no signals at all were blocked).
554 // And when terminating, why does anyone care what the signal mask was???
555 // Well, there's a big "however": '&scribble' is no mere pass-by-reference arg-
556 // it is actually used as an approximation of the C stack pointer.
557 #define SCRIBBLE &scribble
558 #else
559 #define SCRIBBLE 0
560 #endif
561 // 'th->lisp_thread' remains valid despite not being in all_threads
562 // due to the pinning via *STARTING-THREADS*.
563 struct thread_instance *lispthread = (void*)native_pointer(th->lisp_thread);
564 if (lispthread->_ephemeral_p == LISP_T) th->state_word.user_thread_p = 0;
566 #ifdef ATOMIC_LOGGING
567 char* string = strdup((char*)VECTOR(name)->data); // FIXME: no such var as 'name'
568 int index = __sync_fetch_and_add(&thread_name_map_count, 1);
569 gc_assert(index < THREAD_NAME_MAP_MAX);
570 thread_name_map[index].thread = pthread_self();
571 thread_name_map[index].name = string;
572 #endif
574 struct vector* startup_info = VECTOR(lispthread->startup_info); // 'lispthread' is pinned
575 gc_assert(header_widetag(startup_info->header) == SIMPLE_VECTOR_WIDETAG);
576 lispobj startfun = startup_info->data[0]; // 'startup_info' is pinned
577 gc_assert(functionp(startfun));
578 // GC can benefit from knowing the _effective_ end of the ambiguous root range.
579 // Nothing at a higher address than &arg needs to be scanned for ambiguous roots.
580 // For x86 + linux this optimization skips over about 800 words in the stack scan,
581 // and for x86-64 it skip about 550 words as observed via:
582 // fprintf(stderr, "%d non-lisp stack words\n",
583 // (int)((lispobj*)th->control_stack_end - (lispobj*)&arg));
584 // ADDRESS_SANITIZER doesn't allow this optimization.
585 // Both of these assertions fail with the sanitizer enabled:
586 // gc_assert(th->control_stack_start <= (lispobj*)&arg
587 // && (lispobj*)&arg <= th->control_stack_end);
588 // gc_assert(th->control_stack_start <= (lispobj*)&startup_info
589 // && (lispobj*)&startup_info <= th->control_stack_end);
590 // It seems to subvert the "&" and "*" operators in a way that only it understands,
591 // while the stack pointer register is unperturbed.
592 // (gencgc takes '&raise' for the current thread, but it disables the sanitizers)
594 // A stop-for-GC signal that hits after init_new_thread() releases the all_threads lock
595 // and returns control here needs to see in the interrupt context a stack pointer
596 // strictly below the computed th->control_stack_end. So make sure the value we pick
597 // is strictly above any value of SP that the interrupt context could have.
598 #if defined LISP_FEATURE_C_STACK_IS_CONTROL_STACK && !defined ADDRESS_SANITIZER \
599 && !defined LISP_FEATURE_SB_SAFEPOINT
600 th->control_stack_end = (lispobj*)&arg + 1;
601 #endif
602 th->os_kernel_tid = get_nonzero_tid();
603 init_new_thread(th, SCRIBBLE, 0);
604 // Passing the untagged pointer ensures 2 things:
605 // - that the pinning mechanism works as designed, and not just by accident.
606 // - that the initial stack does not contain a lisp pointer after it is not needed.
607 // (a regression test asserts that not even a THREAD instance is on the stack)
608 funcall1(startfun, (lispobj)lispthread); // both pinned
609 // Close the GC region and unlink from all_threads
610 unregister_thread(th, SCRIBBLE);
612 return 0;
616 // This receives a VECTOR-SAP
617 void sb_set_os_thread_name(char* name)
619 __attribute__((unused)) struct vector* v = (void*)(name - offsetof(struct vector,data));
620 /* Potentially set the externally-visible name of this thread,
621 * and for a whole pile of crazy, look at get_max_thread_name_length_impl() in
622 * https://github.com/llvm-mirror/llvm/blob/394ea6522c69c2668bf328fc923e1a11cd785265/lib/Support/Unix/Threading.inc
623 * which among other things, suggests that Linux might not even have the syscall */
624 #ifdef LISP_FEATURE_LINUX
625 /* "The thread name is a meaningful C language string, whose length is
626 * restricted to 16 characters, including the terminating null byte ('\0').
627 * The pthread_setname_np() function can fail with the following error:
628 * ERANGE The length of the string ... exceeds the allowed limit." */
629 if (vector_len(v) <= 15) pthread_setname_np(pthread_self(), name);
630 #endif
631 #ifdef LISP_FEATURE_NETBSD
632 /* This constant is an upper bound on the length including the NUL.
633 * Exceeding it will fail the call. It happens to be 32.
634 * Also, don't want to printf-format a name containing a '%' */
635 if (vector_len(v) < PTHREAD_MAX_NAMELEN_NP) pthread_setname_np(pthread_self(), "%s", name);
636 #endif
637 #if defined LISP_FEATURE_FREEBSD || defined LISP_FEATURE_OPENBSD
638 /* Some places document that the length limit is either 16 or 32,
639 * but my testing showed that 12.1 seems to accept any length */
640 pthread_set_name_np(pthread_self(), name);
641 #endif
642 #if defined LISP_FEATURE_DARWIN && !defined LISP_FEATURE_AVOID_PTHREAD_SETNAME_NP
643 if (vector_len(v) < 64) pthread_setname_np(name);
644 #endif
647 #ifdef LISP_FEATURE_OS_THREAD_STACK
648 extern void* funcall1_switching_stack(void*, void *(*fun)(void *));
650 void* new_thread_trampoline_switch_stack(void* th) {
651 return funcall1_switching_stack(th, new_thread_trampoline);
653 #endif
655 static struct thread* recyclebin_threads;
656 static struct thread* get_recyclebin_item()
658 struct thread* result = 0;
659 __attribute__((unused)) int rc = mutex_acquire(&recyclebin_lock);
660 gc_assert(rc);
661 if (recyclebin_threads) {
662 result = recyclebin_threads;
663 recyclebin_threads = result->next;
665 ignore_value(mutex_release(&recyclebin_lock));
666 return result ? result->os_address : 0;
668 static void put_recyclebin_item(struct thread* th)
670 __attribute__((unused)) int rc = mutex_acquire(&recyclebin_lock);
671 gc_assert(rc);
672 th->next = recyclebin_threads;
673 recyclebin_threads = th;
674 ignore_value(mutex_release(&recyclebin_lock));
676 void empty_thread_recyclebin()
678 if (!recyclebin_threads) return;
679 sigset_t old;
680 block_deferrable_signals(&old);
681 // no big deal if already locked (recursive GC?)
682 if (TryEnterCriticalSection(&recyclebin_lock)) {
683 struct thread* this = recyclebin_threads;
684 while (this) {
685 struct thread* next = this->next;
686 free_thread_struct(this);
687 this = next;
689 recyclebin_threads = 0;
690 ignore_value(mutex_release(&recyclebin_lock));
692 thread_sigmask(SIG_SETMASK, &old, 0);
695 static void attach_os_thread(init_thread_data *scribble)
697 #ifndef LISP_FEATURE_WIN32 // native threads have no signal maskk
698 block_deferrable_signals(&scribble->oldset);
699 #endif
700 void* recycled_memory = get_recyclebin_item();
701 struct thread *th = alloc_thread_struct(recycled_memory);
703 #ifndef LISP_FEATURE_SB_SAFEPOINT
704 /* new-lisp-thread-trampoline doesn't like when the GC signal is blocked */
705 /* FIXME: could be done using a single call to pthread_sigmask
706 together with blocking the deferrable signals above. */
707 unblock_gc_stop_signal();
708 #endif
710 th->os_kernel_tid = get_nonzero_tid();
711 /* win32: While ASSOCIATE_OS_THREAD performs a relatively expensive DuplicateHandle(),
712 * simplicity here is preferable to the complexity entailed by memoizing the handle
713 * in a TLS slot and registering a waiter on the foreign thread to close to handle.
714 * In contrast to the previous approach, the new handle is closed in detach_os_thread(),
715 * and if C calls lisp again in this thread... then lather, rinse, repeat.
716 * A benchmark based on 'fcb-threads.impure' shows that we're still 8x faster
717 * at callback entry than the code as it was prior to git rev 91f86339b4 */
718 ASSOCIATE_OS_THREAD(th);
720 #if !defined(LISP_FEATURE_WIN32) && defined(LISP_FEATURE_C_STACK_IS_CONTROL_STACK)
721 /* On windows, arch_os_thread_init will take care of finding the
722 * stack. */
723 void *stack_addr;
724 size_t stack_size;
725 # ifdef LISP_FEATURE_OPENBSD
726 stack_t stack;
727 pthread_stackseg_np(th->os_thread, &stack);
728 stack_size = stack.ss_size;
729 stack_addr = (void*)((size_t)stack.ss_sp - stack_size);
730 # elif defined LISP_FEATURE_SUNOS
731 stack_t stack;
732 thr_stksegment(&stack);
733 stack_size = stack.ss_size;
734 stack_addr = (void*)((size_t)stack.ss_sp - stack_size);
735 # elif defined(LISP_FEATURE_DARWIN)
736 stack_size = pthread_get_stacksize_np(th->os_thread);
737 stack_addr = (char*)pthread_get_stackaddr_np(th->os_thread) - stack_size;
738 # else
739 pthread_attr_t attr;
740 pthread_attr_init(&attr);
741 # if defined LISP_FEATURE_FREEBSD || defined LISP_FEATURE_DRAGONFLY
742 pthread_attr_get_np(th->os_thread, &attr);
743 # else
744 int pthread_getattr_np(pthread_t, pthread_attr_t *);
745 pthread_getattr_np(th->os_thread, &attr);
746 # endif
747 pthread_attr_getstack(&attr, &stack_addr, &stack_size);
748 pthread_attr_destroy(&attr);
749 # endif
750 th->control_stack_start = stack_addr;
751 th->control_stack_end = (void *) (((uintptr_t) stack_addr) + stack_size);
752 #endif
754 /* We don't protect the control stack when adopting a foreign thread
755 * because we wouldn't know where to put the guard */
756 init_new_thread(th, scribble,
757 /* recycled memory already had mprotect() done,
758 * so avoid 2 syscalls when possible */
759 recycled_memory ? 0 : GUARD_BINDING_STACK|GUARD_ALIEN_STACK);
762 static void detach_os_thread(init_thread_data *scribble)
764 struct thread *th = get_sb_vm_thread();
766 #if defined(LISP_FEATURE_WIN32)
767 CloseHandle((HANDLE)th->os_thread);
768 #endif
770 unregister_thread(th, scribble);
772 /* We have to clear a STOP_FOR_GC signal if pending. Consider:
773 * - on entry to unregister_thread, we block all signals
774 * - simultaneously some other thread decides that it needs to initiate a GC
775 * - that thread observes that this thread exists in all_threads and sends
776 * STOP_FOR_GC, so it becomes pending but undeliverable in this thread
777 * - immediately after blocking signals, we change state to DEAD,
778 * which allows the GCing thread to ignore this thread
779 * (it sees the state change criterion as having been satisfied)
780 * - the GCing thread releases the all_threads lock
781 * - this thread acquires the lock and removes itself from all_threads,
782 * and indicates that it is no longer a lisp thread
783 * - but STOP_FOR_GC is pending because it was in the blocked set.
784 * Bad things happen unless we clear the pending GC signal.
786 #if !defined LISP_FEATURE_SB_SAFEPOINT
787 sigset_t pending;
788 sigpending(&pending);
789 if (sigismember(&pending, SIG_STOP_FOR_GC)) {
790 #ifdef LISP_FEATURE_DARWIN
791 /* sigwait is not reliable on macOS, but sigsuspend is. It unfortunately
792 * requires that the signal be delivered, so set a flag to ignore it.
793 * If you don't believe the preceding statement, try enabling the other
794 * branch of this #ifdef and running fcb-threads.impure.lisp which will
795 * sporadically fail with "Can't handle sig31 in non-lisp thread".
796 * So either sigpending was sometimes lying (hence we didn't try to clear
797 * the signal), or else sigwait did not dequeue the signal. Clearly the
798 * latter must be true, because if only the former were true, then we
799 * would also see the test fail with sigsuspend */
800 sigset_t blockmask;
801 sigfillset(&blockmask);
802 sigdelset(&blockmask, SIG_STOP_FOR_GC);
803 pthread_setspecific(ignore_stop_for_gc, (void*)1);
804 /* sigsuspend takes the mask of signals to block */
805 sigsuspend(&blockmask);
806 pthread_setspecific(ignore_stop_for_gc, 0);
807 sigpending(&pending);
808 if (sigismember(&pending, SIG_STOP_FOR_GC)) lose("clear stop-for-GC did not work");
809 #else
810 __attribute__((unused)) int sig, rc;
811 /* sigwait takes the mask of signals to allow through */
812 rc = sigwait(&gc_sigset, &sig);
813 gc_assert(rc == 0 && sig == SIG_STOP_FOR_GC);
814 #endif
816 #endif
817 put_recyclebin_item(th);
818 #ifndef LISP_FEATURE_WIN32 // native threads have no signal mask
819 thread_sigmask(SIG_SETMASK, &scribble->oldset, 0);
820 #endif
823 #if defined(LISP_FEATURE_X86_64) && !defined(LISP_FEATURE_WIN32)
824 extern void funcall_alien_callback(lispobj arg1, lispobj arg2, lispobj arg0,
825 struct thread* thread)
826 __attribute__((sysv_abi));
827 #endif
829 /* This function's address is assigned into a static symbol's value slot,
830 * so it has to look like a fixnum. lp#1991485 */
831 void __attribute__((aligned(8)))
832 callback_wrapper_trampoline(
833 #if !(defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64))
834 /* On the x86oid backends, the assembly wrapper happens to not pass
835 * in ENTER_ALIEN_CALLBACK explicitly for safepoints. However, the
836 * platforms with precise GC are tricky enough already, and I want
837 * to minimize the read-time conditionals. For those platforms, I'm
838 * only replacing funcall3 with callback_wrapper_trampoline while
839 * keeping the arguments unchanged. --DFL */
840 lispobj __attribute__((__unused__)) fun,
841 #endif
842 lispobj arg0, lispobj arg1, lispobj arg2)
844 struct thread* th = get_sb_vm_thread();
845 if (!th) { /* callback invoked in non-lisp thread */
846 init_thread_data scribble;
847 attach_os_thread(&scribble);
849 WITH_GC_AT_SAFEPOINTS_ONLY()
851 funcall3(StaticSymbolFunction(ENTER_FOREIGN_CALLBACK), arg0,arg1,arg2);
853 detach_os_thread(&scribble);
854 return;
857 #ifdef LISP_FEATURE_WIN32
858 /* arg2 is the pointer to a return value, which sits on the stack */
859 thread_extra_data(th)->carried_base_pointer = (os_context_register_t) *(((void**)arg2)-1);
860 #endif
862 WITH_GC_AT_SAFEPOINTS_ONLY()
864 #if defined(LISP_FEATURE_X86_64) && !defined(LISP_FEATURE_WIN32)
865 funcall_alien_callback(arg1, arg2, arg0, th);
866 #else
867 funcall3(StaticSymbolFunction(ENTER_ALIEN_CALLBACK), arg0,arg1,arg2);
868 #endif
872 #endif /* LISP_FEATURE_SB_THREAD */
874 /* this is called from any other thread to create the new one, and
875 * initialize all parts of it that can be initialized from another
876 * thread
878 * The allocated memory will be laid out as depicted below.
879 * Left-to-right is in order of lowest to highest address:
881 * ______ spaces as obtained from OS
882 * / ___ aligned_spaces
883 * / /
884 * (0) (1) (2) (3) (4) (5) (6)
885 * | | CONTROL | BINDING | ALIEN | CSP | thread | |
886 * | | STACK | STACK | STACK | PAGE | structure | altstack |
887 * |...|------------------------------------------------------------|
888 * 2MiB 1MiB 1MiB (*) (**)
890 * | Lisp TLS | (**) altstack |
891 * |-----------------------------------|----------|--------------|
892 * | thread + struct + dynamically | extra | sigstack |
893 * | header thread assigned TLS | data | |
894 * +---------+-------------------------|----------+--------------|
895 * | | <--- TLS_SIZE words --> | ~1kb | 32*SIGSTKSZ |
896 * ^ thread base
898 * (1) = control stack start. default size shown
899 * (2) = binding stack start. size = BINDING_STACK_SIZE
900 * (3) = alien stack start. size = ALIEN_STACK_SIZE
901 * (4) = C safepoint page. size = BACKEND_PAGE_BYTES or 0
902 * (5) = per_thread_data. size = (THREAD_HEADER_SLOTS+TLS_SIZE) words
903 * (6) = arbitrarily-sized "extra" data and signal stack.
905 * (0) and (1) may coincide; (4) and (5) may coincide
907 * - Lisp TLS overlaps 'struct thread' so that the first N (~30) words
908 * have preassigned TLS indices.
910 * - "extra" data are not in 'struct thread' because placing them there
911 * makes it tough to calculate addresses in 'struct thread' from Lisp.
912 * (Every 'struct thread' slot has a known size)
914 * On sb-safepoint builds one page before the thread base is used for the foreign calls safepoint.
917 struct thread *
918 alloc_thread_struct(void* spaces) {
919 /* Allocate the thread structure in one fell swoop as there is no way to recover
920 * from failing to obtain contiguous memory. Note that the OS may have a smaller
921 * alignment granularity than BACKEND_PAGE_BYTES so we may have to adjust the
922 * result to make it conform to our guard page alignment requirement. */
923 bool zeroize_stack = 0;
924 if (spaces) {
925 // If reusing memory from a previously exited thread, start by removing
926 // some old junk from the stack. This is imperfect since we only clear a little
927 // at the top, but doing so enables diagnosing some garbage-retention issues
928 // using a fine-toothed comb. It would not be possible at all to diagnose
929 // if any newly started thread could refer a dead thread's heap objects.
930 zeroize_stack = 1;
931 } else {
932 spaces = os_alloc_gc_space(THREAD_STRUCT_CORE_SPACE_ID, MOVABLE,
933 NULL, THREAD_STRUCT_SIZE);
934 if (!spaces) return NULL;
936 /* Aligning up is safe as THREAD_STRUCT_SIZE has
937 * THREAD_ALIGNMENT_BYTES padding. */
938 char *aligned_spaces = PTR_ALIGN_UP(spaces, THREAD_ALIGNMENT_BYTES);
939 char* csp_page = aligned_spaces + thread_control_stack_size +
940 BINDING_STACK_SIZE + ALIEN_STACK_SIZE;
942 // Refer to the ASCII art in the block comment above
943 struct thread *th = (void*)(csp_page + THREAD_CSP_PAGE_SIZE
944 + THREAD_HEADER_SLOTS*N_WORD_BYTES);
946 #ifdef LISP_FEATURE_SB_SAFEPOINT
947 // Out of caution I'm supposing that the last thread to use this memory
948 // might have left this page as read-only. Could it? I have no idea.
949 os_protect(csp_page, THREAD_CSP_PAGE_SIZE, OS_VM_PROT_READ|OS_VM_PROT_WRITE);
950 #endif
952 #ifdef LISP_FEATURE_SB_THREAD
953 memset(th, 0, sizeof *th);
954 lispobj* ptr = (lispobj*)(th + 1);
955 lispobj* end = (lispobj*)((char*)th + dynamic_values_bytes);
956 memset(ptr, NO_TLS_VALUE_MARKER & 0xFF, (char*)end-(char*)ptr);
957 th->tls_size = dynamic_values_bytes;
958 #endif
960 __attribute((unused)) lispobj* tls = (lispobj*)th;
961 #ifdef THREAD_T_NIL_CONSTANTS_SLOT
962 tls[THREAD_T_NIL_CONSTANTS_SLOT] = (NIL << 32) | LISP_T;
963 #endif
964 #ifdef LISP_FEATURE_LINKAGE_SPACE
965 tls[THREAD_LINKAGE_TABLE_SLOT] = (lispobj)linkage_space;
966 tls[THREAD_ALIEN_LINKAGE_TABLE_BASE_SLOT] = (lispobj)ALIEN_LINKAGE_SPACE_START;
967 #endif
968 #if defined LISP_FEATURE_X86_64 && defined LISP_FEATURE_LINUX
969 tls[THREAD_MSAN_XOR_CONSTANT_SLOT] = 0x500000000000;
970 #endif
971 #ifdef LAYOUT_OF_FUNCTION
972 tls[THREAD_FUNCTION_LAYOUT_SLOT] = LAYOUT_OF_FUNCTION << 32;
973 #endif
974 #ifdef THREAD_TEXT_CARD_MARKS_SLOT
975 extern unsigned int* text_page_touched_bits;
976 tls[THREAD_TEXT_SPACE_ADDR_SLOT] = TEXT_SPACE_START;
977 tls[THREAD_TEXT_CARD_COUNT_SLOT] = text_space_size / IMMOBILE_CARD_BYTES;
978 tls[THREAD_TEXT_CARD_MARKS_SLOT] = (lispobj)text_page_touched_bits;
979 #endif
981 th->os_address = spaces;
982 th->control_stack_start = (lispobj*)aligned_spaces;
983 th->binding_stack_start=
984 (lispobj*)((char*)th->control_stack_start+thread_control_stack_size);
985 th->control_stack_end = th->binding_stack_start;
987 if (zeroize_stack) {
988 #if GENCGC_IS_PRECISE
989 /* Clear the entire control stack. Without this I was able to induce a GC failure
990 * in a test which hammered on thread creation for hours. The control stack is
991 * scavenged before the heap, so a stale word could point to the start (or middle)
992 * of an object using a bad lowtag, for whatever object formerly was there.
993 * Then a wrong transport function would be called and (if it worked at all) would
994 * place a wrongly tagged FP into a word that might not be the base of an object.
995 * Assume for simplicity (as is true) that stacks grow upward if GENCGC_IS_PRECISE.
996 * This could just call scrub_thread_control_stack but the comment there says that
997 * it's a lame algorithm and only mostly right - it stops after (1<<12) words
998 * and checks if the next is nonzero, looping again if it isn't.
999 * There's no reason not to be exactly right here instead of probably right */
1000 memset((char*)th->control_stack_start, 0,
1001 // take off 2 pages because of the soft and hard guard pages
1002 thread_control_stack_size - 2*os_vm_page_size);
1003 #else
1004 /* This is a little wasteful of cycles to pre-zero the pthread overhead (which in glibc
1005 * resides at the highest stack addresses) comprising about 5kb, below which is the lisp
1006 * stack. We don't need to zeroize above the lisp stack end, but we don't know exactly
1007 * where that will be. Zeroizing more than necessary is conservative, and helps ensure
1008 * that garbage retention from reused stacks does not pose a huge problem. */
1009 memset((char*)th->control_stack_end - 16384, 0, 16384);
1010 #endif
1013 th->state_word.control_stack_guard_page_protected = 1;
1014 th->alien_stack_start=
1015 (lispobj*)((char*)th->binding_stack_start+BINDING_STACK_SIZE);
1016 set_binding_stack_pointer(th,th->binding_stack_start);
1017 th->this = th;
1018 th->os_kernel_tid = 0;
1019 th->os_thread = 0;
1020 // Once allocated, the allocation profiling buffer sticks around.
1021 // If present and enabled, assign into the new thread.
1022 extern int alloc_profiling;
1023 th->profile_data = (uword_t*)(alloc_profiling ? alloc_profile_buffer : 0);
1025 struct extra_thread_data *extra_data = thread_extra_data(th);
1026 memset(extra_data, 0, sizeof *extra_data);
1028 #if defined LISP_FEATURE_SB_THREAD && !defined LISP_FEATURE_SB_SAFEPOINT
1029 os_sem_init(&extra_data->state_sem, 1);
1030 os_sem_init(&extra_data->state_not_running_sem, 0);
1031 os_sem_init(&extra_data->state_not_stopped_sem, 0);
1032 #endif
1033 #if defined LISP_FEATURE_UNIX && defined LISP_FEATURE_SB_THREAD
1034 os_sem_init(&extra_data->sprof_sem, 0);
1035 #endif
1036 extra_data->sprof_lock = 0;
1037 th->sprof_data = 0;
1039 th->state_word.state = STATE_RUNNING;
1040 th->state_word.sprof_enable = 0;
1041 th->state_word.user_thread_p = 1;
1043 lispobj* alien_stack_end = (lispobj*)((char*)th->alien_stack_start + ALIEN_STACK_SIZE);
1044 #if defined LISP_FEATURE_X86 || defined LISP_FEATURE_X86_64
1045 // Alien-stack-pointer is predecremented upon use
1046 th->alien_stack_pointer = alien_stack_end;
1047 #else
1048 // I do not know the convention for alien-stack-pointer
1049 th->alien_stack_pointer = alien_stack_end - 1;
1050 #endif
1052 #ifdef HAVE_THREAD_PSEUDO_ATOMIC_BITS_SLOT
1053 memset(&th->pseudo_atomic_bits, 0, sizeof th->pseudo_atomic_bits);
1054 #elif defined LISP_FEATURE_GENERATIONAL
1055 clear_pseudo_atomic_atomic(th);
1056 clear_pseudo_atomic_interrupted(th);
1057 #endif
1059 INIT_THREAD_REGIONS(th);
1060 #ifdef LISP_FEATURE_SB_THREAD
1061 /* This parallels the same logic in globals.c for the
1062 * single-threaded foreign_function_call_active, KLUDGE and
1063 * all. */
1064 #if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64)
1065 th->ffcall_active_p = 0;
1066 #elif !defined(LISP_FEATURE_ARM64) // uses control_stack_start
1067 th->ffcall_active_p = 1;
1068 #endif
1069 #endif
1071 #ifndef LISP_FEATURE_SB_THREAD
1072 /* the tls-points-into-struct-thread trick is only good for threaded
1073 * sbcl, because unithread sbcl doesn't have tls. So, we copy the
1074 * appropriate values from struct thread here, and make sure that
1075 * we use the appropriate SymbolValue macros to access any of the
1076 * variable quantities from the C runtime. It's not quite OAOOM,
1077 * it just feels like it */
1078 SetSymbolValue(BINDING_STACK_START,(lispobj)th->binding_stack_start,th);
1079 SetSymbolValue(CONTROL_STACK_START,(lispobj)th->control_stack_start,th);
1080 SetSymbolValue(CONTROL_STACK_END,(lispobj)th->control_stack_end,th);
1081 #if defined(LISP_FEATURE_X86) || defined (LISP_FEATURE_X86_64)
1082 SetSymbolValue(ALIEN_STACK_POINTER,(lispobj)th->alien_stack_pointer,th);
1083 #endif
1084 #endif
1085 #ifndef LISP_FEATURE_C_STACK_IS_CONTROL_STACK
1086 access_control_stack_pointer(th)=th->control_stack_start;
1087 access_control_frame_pointer(th)=0;
1088 #endif
1090 thread_interrupt_data(th).pending_handler = 0;
1091 thread_interrupt_data(th).gc_blocked_deferrables = 0;
1092 #if HAVE_ALLOCATION_TRAP_CONTEXT
1093 thread_interrupt_data(th).allocation_trap_context = 0;
1094 #endif
1095 #if defined LISP_FEATURE_PPC64
1096 /* Storing a 0 into code coverage mark bytes or GC card mark bytes
1097 * can be done from the low byte of the thread base register.
1098 * The thread alignment is BACKEND_PAGE_BYTES (from thread.h), but seeing as this is
1099 * a similar-but-different requirement, it pays to double-check */
1100 if ((lispobj)th & 0xFF) lose("Thread struct not at least 256-byte-aligned");
1101 #endif
1103 #ifdef LISP_FEATURE_SB_THREAD
1104 // This macro is the same as "write_TLS(sym,val,th)" but can't be spelled thus.
1105 // 'sym' would get substituted prior to token pasting, so you end up with a bad
1106 // token "(*)_tlsindex" because all symbols are #defined to "(*)" so that #ifdef
1107 // remains meaningful to the preprocessor, while use of 'sym' itself yields
1108 // a deliberate syntax error if you try to compile an expression involving it.
1109 # define INITIALIZE_TLS(sym,val) write_TLS_index(sym##_tlsindex, val, th, _ignored_)
1110 #else
1111 # define INITIALIZE_TLS(sym,val) SYMBOL(sym)->value = val
1112 #endif
1113 #include "genesis/thread-init.inc"
1114 th->no_tls_value_marker = NO_TLS_VALUE_MARKER;
1116 #if defined(LISP_FEATURE_WIN32)
1117 int i;
1118 for (i = 0; i<NUM_PRIVATE_EVENTS; ++i)
1119 thread_private_events(th,i) = CreateEvent(NULL,FALSE,FALSE,NULL);
1120 thread_extra_data(th)->synchronous_io_handle_and_flag = 0;
1121 #endif
1122 th->stepping = 0;
1123 th->card_table = (lispobj)gc_card_mark;
1124 return th;
1126 #ifdef LISP_FEATURE_SB_THREAD
1127 #ifdef LISP_FEATURE_WIN32
1128 uword_t create_thread(struct thread* th)
1130 unsigned int tid;
1131 struct extra_thread_data *data = thread_extra_data(th);
1132 data->blocked_signal_set = deferrable_sigset;
1133 // It's somewhat customary in the win32 API to start threads as suspended.
1134 th->os_thread =
1135 _beginthreadex(NULL, thread_control_stack_size, new_thread_trampoline, th,
1136 CREATE_SUSPENDED | STACK_SIZE_PARAM_IS_A_RESERVATION, &tid);
1137 bool success = th->os_thread != 0;
1138 if (success) {
1139 th->os_kernel_tid = tid;
1140 ResumeThread((HANDLE)th->os_thread);
1142 return success;
1144 #endif
1146 int try_acquire_gc_lock() { return TryEnterCriticalSection(&in_gc_lock); }
1147 int release_gc_lock() { return mutex_release(&in_gc_lock); }
1149 static __attribute__((unused)) struct timespec stw_begin_realtime, stw_begin_cputime;
1150 long timespec_diff(struct timespec* begin, struct timespec* end)
1152 #ifdef LISP_FEATURE_64_BIT
1153 return (end->tv_sec - begin->tv_sec) * 1000000000L + (end->tv_nsec - begin->tv_nsec) ;
1154 #else
1155 return (end->tv_sec - begin->tv_sec) * 1000000L + (end->tv_nsec - begin->tv_nsec) / 1000;
1156 #endif
1158 #ifdef MEASURE_STOP_THE_WORLD_PAUSE
1159 void thread_accrue_stw_time(struct thread* th,
1160 struct timespec* begin_real,
1161 struct timespec* begin_cpu)
1163 /* A non-Lisp thread calling into Lisp via DEFINE-ALIEN-CALLABLE
1164 * can receive SIG_STOP_FOR_GC as soon as it has a 'struct thread'
1165 * and _before_ a thread instance has been consed */
1166 if (th->lisp_thread) {
1167 struct timespec now;
1168 clock_gettime(CLOCK_MONOTONIC, &now);
1169 unsigned long elapsed = timespec_diff(begin_real, &now);
1170 struct thread_instance* ti = (void*)INSTANCE(th->lisp_thread);
1171 if (elapsed > ti->uw_max_stw_pause) ti->uw_max_stw_pause = elapsed;
1172 ti->uw_sum_stw_pause += elapsed;
1173 ++ti->uw_ct_stw_pauses;
1174 if (begin_cpu) {
1175 #ifdef CLOCK_THREAD_CPUTIME_ID
1176 clock_gettime(CLOCK_THREAD_CPUTIME_ID, &now);
1177 ti->uw_gc_virtual_time += timespec_diff(begin_cpu, &now);
1178 #endif
1182 #endif
1184 /* stopping the world is a two-stage process. From this thread we signal
1185 * all the others with SIG_STOP_FOR_GC. The handler for this signal does
1186 * the usual pseudo-atomic checks (we don't want to stop a thread while
1187 * it's in the middle of allocation) then waits for another SIG_STOP_FOR_GC.
1190 * (With SB-SAFEPOINT, see the definitions in safepoint.c instead.)
1192 #if !defined LISP_FEATURE_SB_SAFEPOINT && !defined STANDALONE_LDB
1194 /* To avoid deadlocks when gc stops the world all clients of each
1195 * mutex must enable or disable SIG_STOP_FOR_GC for the duration of
1196 * holding the lock, but they must agree on which.
1197 * [The preceding remark is probably wrong - STOP_FOR_GC is a signal
1198 * that is directed to a thread, so the "wrong" thread would never
1199 * respond to someone else's STOP_FOR_GC. I'm leaving the comment
1200 * just case someone can decipher it and decide to delete it]
1202 * A note about ESRCH: tchnically ESRCH can happen if an OS thread ceases
1203 * to exist, while the thread library has a representation of the thread
1204 * because pthread_join() wasn't invoked on it yet.
1205 * ESRCH can't oocur for us because:
1206 * - if a thread was still linked in all_threads at the acquire of all_threads lock,
1207 * then that thread can't make progress in its termination code, because it's
1208 * waiting on the lock. If it changed its state to DEAD, but we perceived it as
1209 * RUNNING, it now must be blocked on the all_threads_lock and it can't disappear.
1210 * - ESRCH is not guaranteed to be returned anyway, and Linux man page doesn't even
1211 * list it as a possible outcome of pthread_kill.
1212 * Also, there used to be assertion that "thread_state(p)==STATE_DEAD)" on ESRCH
1213 * error, but that's saying that there is still memory backing 'struct thread'
1214 * (so that dereferencing was valid), but if dereferencing was valid, then the thread
1215 * can't have died (i.e. if ESRCH could be returned, then that implies that
1216 * the memory shouldn't be there) */
1218 void gc_stop_the_world()
1220 #ifdef MEASURE_STOP_THE_WORLD_PAUSE
1221 /* The thread performing stop-the-world does not use sig_stop_for_gc_handler on itself,
1222 * so it would not accrue time spent stopped. Force it to, by considering it "paused"
1223 * from the moment it wants to stop all other threads. */
1224 clock_gettime(CLOCK_MONOTONIC, &stw_begin_realtime);
1225 #endif
1226 #ifdef CLOCK_THREAD_CPUTIME_ID
1227 clock_gettime(CLOCK_THREAD_CPUTIME_ID, &stw_begin_cputime);
1228 #endif
1229 struct thread *th, *me = get_sb_vm_thread();
1230 int rc;
1232 /* Keep threads from registering with GC while the world is stopped. */
1233 rc = mutex_acquire(&all_threads_lock);
1234 gc_assert(rc);
1236 /* stop all other threads by sending them SIG_STOP_FOR_GC */
1237 for_each_thread(th) {
1238 if (th != me) {
1239 gc_assert(th->os_thread != 0);
1240 struct extra_thread_data *semaphores = thread_extra_data(th);
1241 os_sem_wait(&semaphores->state_sem);
1242 int state = get_thread_state(th);
1243 if (state == STATE_RUNNING) {
1244 rc = pthread_kill(th->os_thread,SIG_STOP_FOR_GC);
1245 /* This used to bogusly check for ESRCH.
1246 * I changed the ESRCH case to just fall into lose() */
1247 if (rc) lose("cannot suspend thread %p: %d, %s",
1248 // KLUDGE: assume that os_thread can be cast as pointer.
1249 // See comment in 'interr.h' about that.
1250 (void*)th->os_thread, rc, strerror(rc));
1252 os_sem_post(&semaphores->state_sem);
1255 for_each_thread(th) {
1256 if (th != me) {
1257 __attribute__((unused)) int state = thread_wait_until_not(STATE_RUNNING, th);
1258 gc_assert(state != STATE_RUNNING);
1261 event0("/gc_stop_the_world:end");
1264 /* pthread_kill is not guaranteed to be reentrant, prevent
1265 * gc_stop_the_world from interrupting another pthread_kill */
1266 int sb_thread_kill (pthread_t thread, int sig) {
1267 sigset_t old;
1268 block_blockable_signals(&old);
1269 int ret = pthread_kill(thread, sig);
1270 thread_sigmask(SIG_SETMASK, &old, NULL);
1271 return ret;
1275 void gc_start_the_world()
1277 #ifdef COLLECT_GC_STATS
1278 struct timespec gc_end_time;
1279 clock_gettime(CLOCK_MONOTONIC, &gc_end_time);
1280 long gc_elapsed = (gc_end_time.tv_sec - gc_start_time.tv_sec)*1000000000L
1281 + (gc_end_time.tv_nsec - gc_start_time.tv_nsec);
1282 if (stw_elapsed < 0 || gc_elapsed < 0) {
1283 char errmsg[] = "GC: Negative times?\n";
1284 ignore_value(write(2, errmsg, sizeof errmsg-1));
1285 } else {
1286 stw_sum_duration += stw_elapsed;
1287 if (stw_elapsed < stw_min_duration) stw_min_duration = stw_elapsed;
1288 if (stw_elapsed > stw_max_duration) stw_max_duration = stw_elapsed;
1289 gc_sum_duration += gc_elapsed;
1290 if (gc_elapsed < gc_min_duration) gc_min_duration = gc_elapsed;
1291 if (gc_elapsed > gc_max_duration) gc_max_duration = gc_elapsed;
1292 ++n_gcs_done;
1294 #endif
1295 struct thread *th, *me = get_sb_vm_thread();
1296 __attribute__((unused)) int lock_ret;
1297 /* if a resumed thread creates a new thread before we're done with
1298 * this loop, the new thread will be suspended waiting to acquire
1299 * the all_threads lock */
1300 for_each_thread(th) {
1301 gc_assert(th->os_thread);
1302 if (th != me) {
1303 /* I don't know if a normal load is fine here. I think we can't read
1304 * any value other than what was already observed?
1305 * No harm in being cautious though with regard to compiler reordering */
1306 int state = get_thread_state(th);
1307 if (state != STATE_DEAD) {
1308 if(state != STATE_STOPPED)
1309 lose("gc_start_the_world: bad thread state %x", state);
1310 set_thread_state(th, STATE_RUNNING, 0);
1315 lock_ret = mutex_release(&all_threads_lock);
1316 gc_assert(lock_ret);
1317 #ifdef MEASURE_STOP_THE_WORLD_PAUSE
1318 thread_accrue_stw_time(me, &stw_begin_realtime, &stw_begin_cputime);
1319 #endif
1322 #endif /* !LISP_FEATURE_SB_SAFEPOINT */
1323 #elif !defined STANDALONE_LDB
1324 // no threads
1325 void gc_stop_the_world() {}
1326 void gc_start_the_world() {}
1327 #endif /* !LISP_FEATURE_SB_THREAD */
1330 thread_yield()
1332 #ifdef LISP_FEATURE_SB_THREAD
1333 return sched_yield();
1334 #else
1335 return 0;
1336 #endif
1339 #ifdef LISP_FEATURE_SB_SAFEPOINT
1340 /* If the thread id given does not belong to a running thread (it has
1341 * exited or never even existed) pthread_kill _may_ fail with ESRCH,
1342 * but it is also allowed to just segfault, see
1343 * <http://udrepper.livejournal.com/16844.html>.
1345 * Relying on thread ids can easily backfire since ids are recycled
1346 * (NPTL recycles them extremely fast) so a signal can be sent to
1347 * another process if the one it was sent to exited.
1349 * For these reasons, we must make sure that the thread is still alive
1350 * when the pthread_kill is called and return if the thread is
1351 * exiting.
1353 * Note (DFL, 2011-06-22): At the time of writing, this function is only
1354 * used for INTERRUPT-THREAD, hence the wake_thread special-case for
1355 * Windows is OK. */
1356 void wake_thread(struct thread_instance* lispthread)
1358 #ifdef LISP_FEATURE_WIN32
1359 /* META: why is this comment about safepoint builds mentioning
1360 * gc_stop_the_world() ? Never the twain shall meet. */
1362 /* Kludge (on safepoint builds): At the moment, this isn't just
1363 * an optimization; rather it masks the fact that
1364 * gc_stop_the_world() grabs the all_threads mutex without
1365 * releasing it, and since we're not using recursive pthread
1366 * mutexes, the pthread_mutex_lock() around the all_threads loop
1367 * would go wrong. Why are we running interruptions while
1368 * stopping the world though? Test case is (:ASYNC-UNWIND
1369 * :SPECIALS), especially with s/10/100/ in both loops. */
1371 /* Frequent special case: resignalling to self. The idea is
1372 * that leave_region safepoint will acknowledge the signal, so
1373 * there is no need to take locks, roll thread to safepoint
1374 * etc. */
1375 struct thread* thread = (void*)lispthread->uw_primitive_thread;
1376 if (thread == get_sb_vm_thread()) {
1377 sb_pthr_kill(thread, 1); // can't fail
1378 check_pending_thruptions(NULL);
1379 return;
1381 // block_deferrables + mutex_lock looks very unnecessary here,
1382 // but without them, make-target-contrib hangs in bsd-sockets.
1383 sigset_t oldset;
1384 block_deferrable_signals(&oldset);
1385 mutex_acquire(&all_threads_lock);
1386 sb_pthr_kill(thread, 1); // can't fail
1387 # ifdef LISP_FEATURE_SB_SAFEPOINT
1388 wake_thread_impl(lispthread);
1389 # endif
1390 mutex_release(&all_threads_lock);
1391 thread_sigmask(SIG_SETMASK,&oldset,0);
1392 #elif defined LISP_FEATURE_SB_SAFEPOINT
1393 wake_thread_impl(lispthread);
1394 #else
1395 pthread_kill(lispthread->uw_os_thread, SIGURG);
1396 #endif
1398 #endif
1400 #ifdef LISP_FEATURE_ULTRAFUTEX
1401 extern int futex_wake(int *lock_word, int n);
1402 void lispmutex_wake_waiter()
1404 struct lispmutex* m = (void*)INSTANCE(read_TLS(CURRENT_MUTEX, get_sb_vm_thread()));
1405 // The lock word is in the least-significant half of the state word if 64-bit.
1406 // See the definition of MUTEX-STATE-ADDRESS which adds 4 if #+big-endian.
1407 int* word =
1408 #ifdef LISP_FEATURE_BIG_ENDIAN
1410 #endif
1411 (int*)&m->uw_state;
1412 *word = 0; // slam 0 in, meaning uncontested
1413 futex_wake(word, 1);
1415 #endif