Workaround next problem with MSAN and amend previous comment
[sbcl.git] / src / runtime / linux-os.c
blob6410c93717c080cd190a3aa25704caee85b815e2
1 /*
2 * the Linux incarnation of OS-dependent routines. See also
3 * $(sbcl_arch)-linux-os.c
5 * This file (along with os.h) exports an OS-independent interface to
6 * the operating system VM facilities. Surprise surprise, this
7 * interface looks a lot like the Mach interface (but simpler in some
8 * places). For some operating systems, a subset of these functions
9 * will have to be emulated.
13 * This software is part of the SBCL system. See the README file for
14 * more information.
16 * This software is derived from the CMU CL system, which was
17 * written at Carnegie Mellon University and released into the
18 * public domain. The software is in the public domain and is
19 * provided with absolutely no warranty. See the COPYING and CREDITS
20 * files for more information.
23 #include <stdio.h>
24 #include <sys/param.h>
25 #include <sys/file.h>
26 #include "sbcl.h"
27 #include "./signal.h"
28 #include "os.h"
29 #include "arch.h"
30 #include "globals.h"
31 #include "interrupt.h"
32 #include "interr.h"
33 #include "lispregs.h"
34 #include "runtime.h"
35 #include "genesis/static-symbols.h"
36 #include "genesis/fdefn.h"
38 #include <sys/socket.h>
39 #include <sys/utsname.h>
40 #include <errno.h>
42 #include <sys/types.h>
43 #include <signal.h>
44 /* #include <sys/sysinfo.h> */
45 #include <sys/time.h>
46 #include <sys/stat.h>
47 #include <unistd.h>
48 #include <linux/version.h>
50 #include "validate.h"
51 #include "thread.h"
52 #include "gc.h"
53 #if defined LISP_FEATURE_GENCGC
54 #include "gencgc-internal.h"
55 #else
56 #include "cheneygc-internal.h"
57 #endif
58 #include <fcntl.h>
59 #ifdef LISP_FEATURE_SB_WTIMER
60 # include <sys/timerfd.h>
61 #endif
63 #ifdef LISP_FEATURE_X86
64 /* Prototype for personality(2). Done inline here since the header file
65 * for this isn't available on old versions of glibc. */
66 int personality (unsigned long);
67 #define ADDR_NO_RANDOMIZE 0x0040000
68 #else
69 #include <sys/personality.h>
70 #endif
72 size_t os_vm_page_size;
74 #if defined(LISP_FEATURE_SB_THREAD) && defined(LISP_FEATURE_SB_FUTEX) && !defined(LISP_FEATURE_SB_PTHREAD_FUTEX)
75 #include <sys/syscall.h>
76 #include <unistd.h>
77 #include <errno.h>
79 /* values taken from the kernel's linux/futex.h. This header file
80 doesn't exist in userspace, which is our excuse for not grovelling
81 them automatically */
82 #define FUTEX_WAIT 0
83 #define FUTEX_WAKE 1
84 /* This is also copied from linux/futex.h so that a binary compiled on
85 * a not so recent Linux system can still take advantage of private
86 * futexes when available.*/
87 #define FUTEX_WAIT_PRIVATE (0+128)
88 #define FUTEX_WAKE_PRIVATE (1+128)
89 #define FUTEX_FD (2)
90 #define FUTEX_REQUEUE (3)
92 /* Not static so that Lisp may query it. */
93 boolean futex_private_supported_p;
95 static inline int
96 futex_wait_op()
98 return (futex_private_supported_p ? FUTEX_WAIT_PRIVATE : FUTEX_WAIT);
101 static inline int
102 futex_wake_op()
104 return (futex_private_supported_p ? FUTEX_WAKE_PRIVATE : FUTEX_WAKE);
107 static inline int sys_futex(void *futex, int op, int val, struct timespec *rel)
109 return syscall(SYS_futex, futex, op, val, rel);
112 static void
113 futex_init()
115 int x = 0;
116 sys_futex(&x, FUTEX_WAIT, 1, 0);
117 if (errno == ENOSYS)
118 lose("This version of SBCL is compiled with threading support, but your kernel\n"
119 "is too old to support this. Please use a more recent kernel or\n"
120 "a version of SBCL without threading support.\n");
121 sys_futex(&x, FUTEX_WAIT_PRIVATE, 1, 0);
122 if (errno == EWOULDBLOCK) {
123 futex_private_supported_p = 1;
124 } else {
125 futex_private_supported_p = 0;
126 SHOW("No futex private suppport\n");
131 futex_wait(int *lock_word, int oldval, long sec, unsigned long usec)
133 struct timespec timeout;
134 int t;
136 if (sec<0) {
137 t = sys_futex(lock_word, futex_wait_op(), oldval, 0);
139 else {
140 timeout.tv_sec = sec;
141 timeout.tv_nsec = usec * 1000;
142 t = sys_futex(lock_word, futex_wait_op(), oldval, &timeout);
144 if (t==0)
145 return 0;
146 else if (errno==ETIMEDOUT)
147 return 1;
148 else if (errno==EINTR)
149 return 2;
150 else
151 /* EWOULDBLOCK and others, need to check the lock */
152 return -1;
156 futex_wake(int *lock_word, int n)
158 return sys_futex(lock_word, futex_wake_op(),n,0);
160 #endif
163 int linux_sparc_siginfo_bug = 0;
165 #ifdef LISP_FEATURE_SB_THREAD
167 isnptl (void)
169 size_t n = confstr (_CS_GNU_LIBPTHREAD_VERSION, NULL, 0);
170 if (n > 0) {
171 char *buf = alloca (n);
172 confstr (_CS_GNU_LIBPTHREAD_VERSION, buf, n);
173 if (strstr (buf, "NPTL")) {
174 return 1;
177 return 0;
179 #endif
181 void
182 os_init(char *argv[], char *envp[])
184 /* Conduct various version checks: do we have enough mmap(), is
185 * this a sparc running 2.2, can we do threads? */
186 struct utsname name;
187 int major_version;
188 int minor_version;
189 int patch_version;
190 char *p;
191 uname(&name);
193 p=name.release;
194 major_version = atoi(p);
195 minor_version = patch_version = 0;
196 p=strchr(p,'.');
197 if (p != NULL) {
198 minor_version = atoi(++p);
199 p=strchr(p,'.');
200 if (p != NULL)
201 patch_version = atoi(++p);
204 if (major_version<2) {
205 lose("linux kernel version too old: major version=%d (can't run in version < 2.0.0)\n",
206 major_version);
208 if (!(major_version>2 || minor_version >= 4)) {
209 #ifdef LISP_FEATURE_SPARC
210 FSHOW((stderr,"linux kernel %d.%d predates 2.4;\n enabling workarounds for SPARC kernel bugs in signal handling.\n", major_version,minor_version));
211 linux_sparc_siginfo_bug = 1;
212 #endif
214 #ifdef LISP_FEATURE_SB_THREAD
215 #if defined(LISP_FEATURE_SB_FUTEX) && !defined(LISP_FEATURE_SB_PTHREAD_FUTEX)
216 futex_init();
217 #endif
218 if(! isnptl()) {
219 lose("This version of SBCL only works correctly with the NPTL threading\n"
220 "library. Please use a newer glibc, use an older SBCL, or stop using\n"
221 "LD_ASSUME_KERNEL\n");
223 #endif
225 /* Don't use getpagesize(), since it's not constant across Linux
226 * kernel versions on some architectures (for example PPC). FIXME:
227 * possibly the same should be done on other architectures too.
229 os_vm_page_size = BACKEND_PAGE_BYTES;
231 /* KLUDGE: Disable memory randomization on new Linux kernels
232 * by setting a personality flag and re-executing. (We need
233 * to re-execute, since the memory maps that can conflict with
234 * the SBCL spaces have already been done at this point).
236 * Since randomization is currently implemented only on x86 kernels,
237 * don't do this trick on other platforms.
239 #if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64)
240 if ((major_version == 2
241 /* Some old kernels will apparently lose unsupported personality flags
242 * on exec() */
243 && ((minor_version == 6 && patch_version >= 11)
244 || (minor_version > 6)
245 /* This is what RHEL 3 reports */
246 || (minor_version == 4 && patch_version > 20)))
247 || major_version >= 3)
249 int pers = personality(0xffffffffUL);
250 if (!(pers & ADDR_NO_RANDOMIZE)) {
251 int retval = personality(pers | ADDR_NO_RANDOMIZE);
252 /* Allegedly some Linux kernels (the reported case was
253 * "hardened Linux 2.6.7") won't set the new personality,
254 * but nor will they return -1 for an error. So as a
255 * workaround query the new personality...
257 int newpers = personality(0xffffffffUL);
258 /* ... and don't re-execute if either the setting resulted
259 * in an error or if the value didn't change. Otherwise
260 * this might result in an infinite loop.
263 if (!getenv("SBCL_IS_RESTARTING") &&
264 retval != -1 && newpers != pers) {
265 /* Use /proc/self/exe instead of trying to figure out
266 * the executable path from PATH and argv[0], since
267 * that's unreliable. We follow the symlink instead of
268 * executing the file directly in order to prevent top
269 * from displaying the name of the process as "exe". */
270 char runtime[PATH_MAX+1];
271 int i = readlink("/proc/self/exe", runtime, PATH_MAX);
272 if (i != -1) {
273 environ = envp;
274 setenv("SBCL_IS_RESTARTING", "T", 1);
275 runtime[i] = '\0';
276 execv(runtime, argv);
279 /* Either changing the personality or execve() failed. Either
280 * way we might as well continue, and hope that the random
281 * memory maps are ok this time around.
283 fprintf(stderr, "WARNING:\
284 \nCouldn't re-execute SBCL with proper personality flags (/proc isn't mounted? setuid?)\
285 \nTrying to continue anyway.\n");
286 } else if (getenv("SBCL_IS_RESTARTING")) {
287 /* We restarted due to previously enabled ASLR. Now,
288 * reenable it for fork()'ed children. */
289 int pers = personality(0xffffffffUL);
290 personality(pers & ~ADDR_NO_RANDOMIZE);
292 unsetenv("SBCL_IS_RESTARTING");
295 #ifdef LISP_FEATURE_X86
296 /* Use SSE detector. Recent versions of Linux enable SSE support
297 * on SSE capable CPUs. */
298 /* FIXME: Are there any old versions that does not support SSE? */
299 fast_bzero_pointer = fast_bzero_detect;
300 #endif
301 #endif
305 #ifdef LISP_FEATURE_ALPHA
306 /* The Alpha is a 64 bit CPU. SBCL is a 32 bit application. Due to all
307 * the places that assume we can get a pointer into a fixnum with no
308 * information loss, we have to make sure it allocates all its ram in the
309 * 0-2Gb region. */
311 static void * under_2gb_free_pointer=DYNAMIC_1_SPACE_END;
312 #endif
314 os_vm_address_t
315 os_validate(boolean movable, os_vm_address_t addr, os_vm_size_t len)
317 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
318 os_vm_address_t actual;
320 #ifdef LISP_FEATURE_ALPHA
321 if (!addr) {
322 addr=under_2gb_free_pointer;
324 #endif
325 actual = mmap(addr, len, OS_VM_PROT_ALL, flags, -1, 0);
326 if (actual == MAP_FAILED) {
327 perror("mmap");
328 return 0; /* caller should check this */
331 if (!movable && (addr!=actual)) {
332 fprintf(stderr, "mmap: wanted %lu bytes at %p, actually mapped at %p\n",
333 (unsigned long) len, addr, actual);
334 return 0;
337 #ifdef LISP_FEATURE_ALPHA
339 len=(len+(os_vm_page_size-1))&(~(os_vm_page_size-1));
340 under_2gb_free_pointer+=len;
341 #endif
343 return actual;
346 void
347 os_invalidate(os_vm_address_t addr, os_vm_size_t len)
349 if (munmap(addr,len) == -1) {
350 perror("munmap");
354 void
355 os_protect(os_vm_address_t address, os_vm_size_t length, os_vm_prot_t prot)
357 if (mprotect(address, length, prot) == -1) {
358 if (errno == ENOMEM) {
359 lose("An mprotect call failed with ENOMEM. This probably means that the maximum amount\n"
360 "of separate memory mappings was exceeded. To fix the problem, either increase\n"
361 "the maximum with e.g. 'echo 262144 > /proc/sys/vm/max_map_count' or recompile\n"
362 "SBCL with a larger value for GENCGC-CARD-BYTES in\n"
363 "'src/compiler/target/backend-parms.lisp'.");
364 } else {
365 perror("mprotect");
371 * any OS-dependent special low-level handling for signals
375 * The GC needs to be hooked into whatever signal is raised for
376 * page fault on this OS.
378 static void
379 fallback_sigsegv_handler(int signal, siginfo_t *info, os_context_t *context)
381 // This calls corruption_warning_and_maybe_lose.
382 lisp_memory_fault_error(context, arch_get_bad_addr(signal, info, context));
385 void (*sbcl_fallback_sigsegv_handler) // Settable by user.
386 (int, siginfo_t*, os_context_t*) = fallback_sigsegv_handler;
388 static void
389 sigsegv_handler(int signal, siginfo_t *info, os_context_t *context)
391 os_vm_address_t addr = arch_get_bad_addr(signal, info, context);
393 #ifdef LISP_FEATURE_ALPHA
394 /* Alpha stuff: This is the end of a pseudo-atomic section during
395 which a signal was received. We must deal with the pending
396 interrupt (see also interrupt.c, ../code/interrupt.lisp)
398 (how we got here: when interrupting, we set bit 63 in reg_ALLOC.
399 At the end of the atomic section we tried to write to reg_ALLOC,
400 got a SIGSEGV (there's nothing mapped there) so ended up here. */
401 if (addr != NULL &&
402 *os_context_register_addr(context, reg_ALLOC) & (1L<<63)) {
403 *os_context_register_addr(context, reg_ALLOC) -= (1L<<63);
404 interrupt_handle_pending(context);
405 return;
407 #endif
409 #ifdef LISP_FEATURE_SB_SAFEPOINT
410 if (!handle_safepoint_violation(context, addr))
411 #endif
413 #ifdef LISP_FEATURE_GENCGC
414 if (!gencgc_handle_wp_violation(addr))
415 #else
416 if (!cheneygc_handle_wp_violation(context, addr))
417 #endif
418 if (!handle_guard_page_triggered(context, addr))
419 sbcl_fallback_sigsegv_handler(signal, info, context);
422 void
423 os_install_interrupt_handlers(void)
425 if (INSTALL_SIG_MEMORY_FAULT_HANDLER) {
426 undoably_install_low_level_interrupt_handler(SIG_MEMORY_FAULT,
427 sigsegv_handler);
430 /* OAOOM c.f. sunos-os.c.
431 * Should we have a reusable function gc_install_interrupt_handlers? */
432 #ifdef LISP_FEATURE_SB_THREAD
433 # ifdef LISP_FEATURE_SB_SAFEPOINT
434 # ifdef LISP_FEATURE_SB_THRUPTION
435 undoably_install_low_level_interrupt_handler(SIGPIPE, thruption_handler);
436 # endif
437 # else
438 undoably_install_low_level_interrupt_handler(SIG_STOP_FOR_GC,
439 sig_stop_for_gc_handler);
440 # endif
441 #endif
444 char *
445 os_get_runtime_executable_path(int external)
447 /* XXX: If this code is compiled with MSAN, all is well.
448 But if this code is compiled without MSAN, there is a false positive
449 in copied_string() unless we zero-initialize path[].
450 Basically if you want sanitization, the right thing is to compile
451 *all* the source code with the sanitizer, not just some of it. */
452 char path[PATH_MAX + 1] = {0};
453 int size;
455 size = readlink("/proc/self/exe", path, sizeof(path)-1);
456 if (size < 0)
457 return NULL;
458 else
459 path[size] = '\0';
461 return copied_string(path);
464 #ifdef LISP_FEATURE_SB_WTIMER
466 * Waitable timer implementation for the safepoint-based (SIGALRM-free)
467 * timer facility using timerfd_create().
470 os_create_wtimer()
472 int fd = timerfd_create(CLOCK_MONOTONIC, 0);
473 if (fd == -1)
474 lose("os_create_wtimer: timerfd_create");
476 /* Cannot count on TFD_CLOEXEC availability, so do it manually: */
477 if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1)
478 lose("os_create_wtimer: fcntl");
480 return fd;
484 os_wait_for_wtimer(int fd)
486 unsigned char buf[8];
487 int n = read(fd, buf, sizeof(buf));
488 if (n == -1) {
489 if (errno == EINTR)
490 return -1;
491 lose("os_wtimer_listen failed");
493 if (n != sizeof(buf))
494 lose("os_wtimer_listen read too little");
495 return 0;
498 void
499 os_close_wtimer(int fd)
501 if (close(fd) == -1)
502 lose("os_close_wtimer failed");
505 void
506 os_set_wtimer(int fd, int sec, int nsec)
508 struct itimerspec spec = { {0,0}, {0,0} };
509 spec.it_value.tv_sec = sec;
510 spec.it_value.tv_nsec = nsec;
511 if (timerfd_settime(fd, 0, &spec, 0) == -1)
512 lose("timerfd_settime");
515 void
516 os_cancel_wtimer(int fd)
518 os_set_wtimer(fd, 0, 0);
520 #endif