Add a declaration
[sbcl.git] / src / runtime / linux-os.c
blob33a3683c0a779e24d0017200d2581182e4e5bfa4
1 /*
2 * the Linux incarnation of OS-dependent routines. See also
3 * $(sbcl_arch)-linux-os.c
5 * This file (along with os.h) exports an OS-independent interface to
6 * the operating system VM facilities. Surprise surprise, this
7 * interface looks a lot like the Mach interface (but simpler in some
8 * places). For some operating systems, a subset of these functions
9 * will have to be emulated.
13 * This software is part of the SBCL system. See the README file for
14 * more information.
16 * This software is derived from the CMU CL system, which was
17 * written at Carnegie Mellon University and released into the
18 * public domain. The software is in the public domain and is
19 * provided with absolutely no warranty. See the COPYING and CREDITS
20 * files for more information.
23 #include <stdio.h>
24 #include <sys/param.h>
25 #include <sys/file.h>
26 #include "sbcl.h"
27 #include "./signal.h"
28 #include "os.h"
29 #include "arch.h"
30 #include "globals.h"
31 #include "interrupt.h"
32 #include "interr.h"
33 #include "lispregs.h"
34 #include "runtime.h"
35 #include "genesis/static-symbols.h"
36 #include "genesis/fdefn.h"
38 #include <sys/socket.h>
39 #include <sys/utsname.h>
40 #include <errno.h>
42 #include <sys/types.h>
43 #include <signal.h>
44 /* #include <sys/sysinfo.h> */
45 #include <sys/time.h>
46 #include <sys/stat.h>
47 #include <unistd.h>
48 #include <linux/version.h>
50 #include "validate.h"
51 #include "thread.h"
52 #include "gc.h"
53 #if defined LISP_FEATURE_GENCGC
54 #include "gencgc-internal.h"
55 #else
56 #include "cheneygc-internal.h"
57 #endif
58 #include <fcntl.h>
59 #ifdef LISP_FEATURE_SB_WTIMER
60 # include <sys/timerfd.h>
61 #endif
63 #ifdef LISP_FEATURE_X86
64 /* Prototype for personality(2). Done inline here since the header file
65 * for this isn't available on old versions of glibc. */
66 int personality (unsigned long);
67 #else
68 #include <sys/personality.h>
69 #endif
71 size_t os_vm_page_size;
73 #if defined(LISP_FEATURE_SB_THREAD) && defined(LISP_FEATURE_SB_FUTEX) && !defined(LISP_FEATURE_SB_PTHREAD_FUTEX)
74 #include <sys/syscall.h>
75 #include <unistd.h>
76 #include <errno.h>
78 /* values taken from the kernel's linux/futex.h. This header file
79 doesn't exist in userspace, which is our excuse for not grovelling
80 them automatically */
81 #define FUTEX_WAIT 0
82 #define FUTEX_WAKE 1
83 /* This is also copied from linux/futex.h so that a binary compiled on
84 * a not so recent Linux system can still take advantage of private
85 * futexes when available.*/
86 #define FUTEX_WAIT_PRIVATE (0+128)
87 #define FUTEX_WAKE_PRIVATE (1+128)
88 #define FUTEX_FD (2)
89 #define FUTEX_REQUEUE (3)
91 /* Not static so that Lisp may query it. */
92 boolean futex_private_supported_p;
94 static inline int
95 futex_wait_op()
97 return (futex_private_supported_p ? FUTEX_WAIT_PRIVATE : FUTEX_WAIT);
100 static inline int
101 futex_wake_op()
103 return (futex_private_supported_p ? FUTEX_WAKE_PRIVATE : FUTEX_WAKE);
106 static inline int sys_futex(void *futex, int op, int val, struct timespec *rel)
108 return syscall(SYS_futex, futex, op, val, rel);
111 static void
112 futex_init()
114 int x = 0;
115 sys_futex(&x, FUTEX_WAIT, 1, 0);
116 if (errno == ENOSYS)
117 lose("This version of SBCL is compiled with threading support, but your kernel\n"
118 "is too old to support this. Please use a more recent kernel or\n"
119 "a version of SBCL without threading support.\n");
120 sys_futex(&x, FUTEX_WAIT_PRIVATE, 1, 0);
121 if (errno == EWOULDBLOCK) {
122 futex_private_supported_p = 1;
123 } else {
124 futex_private_supported_p = 0;
125 SHOW("No futex private suppport\n");
130 futex_wait(int *lock_word, int oldval, long sec, unsigned long usec)
132 struct timespec timeout;
133 int t;
135 if (sec<0) {
136 t = sys_futex(lock_word, futex_wait_op(), oldval, 0);
138 else {
139 timeout.tv_sec = sec;
140 timeout.tv_nsec = usec * 1000;
141 t = sys_futex(lock_word, futex_wait_op(), oldval, &timeout);
143 if (t==0)
144 return 0;
145 else if (errno==ETIMEDOUT)
146 return 1;
147 else if (errno==EINTR)
148 return 2;
149 else
150 /* EWOULDBLOCK and others, need to check the lock */
151 return -1;
155 futex_wake(int *lock_word, int n)
157 return sys_futex(lock_word, futex_wake_op(),n,0);
159 #endif
162 int linux_sparc_siginfo_bug = 0;
164 /* This variable was in real use for a few months, basically for
165 * storing autodetected information about whether the Linux
166 * installation was recent enough to support SBCL threads, and make
167 * some run-time decisions based on that. But this turned out to be
168 * unstable, so now we just flat-out refuse to start on the old installations
169 * when thread support has been compiled in.
171 * Unfortunately, in the meanwhile Slime started depending on this
172 * variable for deciding which communication style to use. So even
173 * though this variable looks unused, it shouldn't be deleted until
174 * it's no longer used in the versions of Slime that people are
175 * likely to download first. -- JES, 2006-06-07
177 int linux_no_threads_p = 0;
179 #ifdef LISP_FEATURE_SB_THREAD
181 isnptl (void)
183 size_t n = confstr (_CS_GNU_LIBPTHREAD_VERSION, NULL, 0);
184 if (n > 0) {
185 char *buf = alloca (n);
186 confstr (_CS_GNU_LIBPTHREAD_VERSION, buf, n);
187 if (strstr (buf, "NPTL")) {
188 return 1;
191 return 0;
193 #endif
195 void
196 os_init(char *argv[], char *envp[])
198 /* Conduct various version checks: do we have enough mmap(), is
199 * this a sparc running 2.2, can we do threads? */
200 struct utsname name;
201 int major_version;
202 int minor_version;
203 int patch_version;
204 char *p;
205 uname(&name);
207 p=name.release;
208 major_version = atoi(p);
209 minor_version = patch_version = 0;
210 p=strchr(p,'.');
211 if (p != NULL) {
212 minor_version = atoi(++p);
213 p=strchr(p,'.');
214 if (p != NULL)
215 patch_version = atoi(++p);
218 if (major_version<2) {
219 lose("linux kernel version too old: major version=%d (can't run in version < 2.0.0)\n",
220 major_version);
222 if (!(major_version>2 || minor_version >= 4)) {
223 #ifdef LISP_FEATURE_SPARC
224 FSHOW((stderr,"linux kernel %d.%d predates 2.4;\n enabling workarounds for SPARC kernel bugs in signal handling.\n", major_version,minor_version));
225 linux_sparc_siginfo_bug = 1;
226 #endif
228 #ifdef LISP_FEATURE_SB_THREAD
229 #if defined(LISP_FEATURE_SB_FUTEX) && !defined(LISP_FEATURE_SB_PTHREAD_FUTEX)
230 futex_init();
231 #endif
232 if(! isnptl()) {
233 lose("This version of SBCL only works correctly with the NPTL threading\n"
234 "library. Please use a newer glibc, use an older SBCL, or stop using\n"
235 "LD_ASSUME_KERNEL\n");
237 #endif
239 /* Don't use getpagesize(), since it's not constant across Linux
240 * kernel versions on some architectures (for example PPC). FIXME:
241 * possibly the same should be done on other architectures too.
243 os_vm_page_size = BACKEND_PAGE_BYTES;
245 /* KLUDGE: Disable memory randomization on new Linux kernels
246 * by setting a personality flag and re-executing. (We need
247 * to re-execute, since the memory maps that can conflict with
248 * the SBCL spaces have already been done at this point).
250 * Since randomization is currently implemented only on x86 kernels,
251 * don't do this trick on other platforms.
253 #if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64)
254 if ((major_version == 2
255 /* Some old kernels will apparently lose unsupported personality flags
256 * on exec() */
257 && ((minor_version == 6 && patch_version >= 11)
258 || (minor_version > 6)
259 /* This is what RHEL 3 reports */
260 || (minor_version == 4 && patch_version > 20)))
261 || major_version >= 3)
263 int pers = personality(0xffffffffUL);
264 /* 0x40000 aka. ADDR_NO_RANDOMIZE */
265 if (!(pers & 0x40000)) {
266 int retval = personality(pers | 0x40000);
267 /* Allegedly some Linux kernels (the reported case was
268 * "hardened Linux 2.6.7") won't set the new personality,
269 * but nor will they return -1 for an error. So as a
270 * workaround query the new personality...
272 int newpers = personality(0xffffffffUL);
273 /* ... and don't re-execute if either the setting resulted
274 * in an error or if the value didn't change. Otherwise
275 * this might result in an infinite loop.
278 if (!getenv("SBCL_IS_RESTARTING") &&
279 retval != -1 && newpers != pers) {
280 /* Use /proc/self/exe instead of trying to figure out
281 * the executable path from PATH and argv[0], since
282 * that's unreliable. We follow the symlink instead of
283 * executing the file directly in order to prevent top
284 * from displaying the name of the process as "exe". */
285 char runtime[PATH_MAX+1];
286 int i = readlink("/proc/self/exe", runtime, PATH_MAX);
287 if (i != -1) {
288 environ = envp;
289 setenv("SBCL_IS_RESTARTING", "T", 1);
290 runtime[i] = '\0';
291 execv(runtime, argv);
294 /* Either changing the personality or execve() failed. Either
295 * way we might as well continue, and hope that the random
296 * memory maps are ok this time around.
298 fprintf(stderr, "WARNING:\
299 \nCouldn't re-execute SBCL with proper personality flags (/proc isn't mounted? setuid?)\
300 \nTrying to continue anyway.\n");
301 } else {
302 unsetenv("SBCL_IS_RESTARTING");
305 #ifdef LISP_FEATURE_X86
306 /* Use SSE detector. Recent versions of Linux enable SSE support
307 * on SSE capable CPUs. */
308 /* FIXME: Are there any old versions that does not support SSE? */
309 fast_bzero_pointer = fast_bzero_detect;
310 #endif
311 #endif
315 #ifdef LISP_FEATURE_ALPHA
316 /* The Alpha is a 64 bit CPU. SBCL is a 32 bit application. Due to all
317 * the places that assume we can get a pointer into a fixnum with no
318 * information loss, we have to make sure it allocates all its ram in the
319 * 0-2Gb region. */
321 static void * under_2gb_free_pointer=DYNAMIC_1_SPACE_END;
322 #endif
324 os_vm_address_t
325 os_validate(os_vm_address_t addr, os_vm_size_t len)
327 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
328 os_vm_address_t actual;
330 #ifdef LISP_FEATURE_ALPHA
331 if (!addr) {
332 addr=under_2gb_free_pointer;
334 #endif
335 actual = mmap(addr, len, OS_VM_PROT_ALL, flags, -1, 0);
336 if (actual == MAP_FAILED) {
337 perror("mmap");
338 return 0; /* caller should check this */
341 if (addr && (addr!=actual)) {
342 fprintf(stderr, "mmap: wanted %lu bytes at %p, actually mapped at %p\n",
343 (unsigned long) len, addr, actual);
344 return 0;
347 #ifdef LISP_FEATURE_ALPHA
349 len=(len+(os_vm_page_size-1))&(~(os_vm_page_size-1));
350 under_2gb_free_pointer+=len;
351 #endif
353 return actual;
356 void
357 os_invalidate(os_vm_address_t addr, os_vm_size_t len)
359 if (munmap(addr,len) == -1) {
360 perror("munmap");
364 os_vm_address_t
365 os_map(int fd, int offset, os_vm_address_t addr, os_vm_size_t len)
367 os_vm_address_t actual;
369 actual = mmap(addr, len, OS_VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
370 fd, (off_t) offset);
371 if (actual == MAP_FAILED || (addr && (addr != actual))) {
372 perror("mmap");
373 lose("unexpected mmap(..) failure\n");
376 return actual;
379 void
380 os_protect(os_vm_address_t address, os_vm_size_t length, os_vm_prot_t prot)
382 if (mprotect(address, length, prot) == -1) {
383 if (errno == ENOMEM) {
384 lose("An mprotect call failed with ENOMEM. This probably means that the maximum amount\n"
385 "of separate memory mappings was exceeded. To fix the problem, either increase\n"
386 "the maximum with e.g. 'echo 262144 > /proc/sys/vm/max_map_count' or recompile\n"
387 "SBCL with a larger value for GENCGC-CARD-BYTES in\n"
388 "'src/compiler/target/backend-parms.lisp'.");
389 } else {
390 perror("mprotect");
395 boolean
396 is_valid_lisp_addr(os_vm_address_t addr)
398 struct thread *th;
399 size_t ad = (size_t) addr;
401 if ((READ_ONLY_SPACE_START <= ad && ad < READ_ONLY_SPACE_END)
402 || (STATIC_SPACE_START <= ad && ad < STATIC_SPACE_END)
403 #if defined LISP_FEATURE_GENCGC
404 || (DYNAMIC_SPACE_START <= ad && ad < DYNAMIC_SPACE_END)
405 #else
406 || (DYNAMIC_0_SPACE_START <= ad && ad < DYNAMIC_0_SPACE_END)
407 || (DYNAMIC_1_SPACE_START <= ad && ad < DYNAMIC_1_SPACE_END)
408 #endif
410 return 1;
411 for_each_thread(th) {
412 if((size_t)(th->control_stack_start) <= ad
413 && ad < (size_t)(th->control_stack_end))
414 return 1;
415 if((size_t)(th->binding_stack_start) <= ad
416 && ad < (size_t)(th->binding_stack_start + BINDING_STACK_SIZE))
417 return 1;
419 return 0;
423 * any OS-dependent special low-level handling for signals
427 * The GC needs to be hooked into whatever signal is raised for
428 * page fault on this OS.
430 static void
431 sigsegv_handler(int signal, siginfo_t *info, os_context_t *context)
433 os_vm_address_t addr = arch_get_bad_addr(signal, info, context);
435 #ifdef LISP_FEATURE_ALPHA
436 /* Alpha stuff: This is the end of a pseudo-atomic section during
437 which a signal was received. We must deal with the pending
438 interrupt (see also interrupt.c, ../code/interrupt.lisp)
440 (how we got here: when interrupting, we set bit 63 in reg_ALLOC.
441 At the end of the atomic section we tried to write to reg_ALLOC,
442 got a SIGSEGV (there's nothing mapped there) so ended up here. */
443 if (addr != NULL &&
444 *os_context_register_addr(context, reg_ALLOC) & (1L<<63)) {
445 *os_context_register_addr(context, reg_ALLOC) -= (1L<<63);
446 interrupt_handle_pending(context);
447 return;
449 #endif
451 #ifdef LISP_FEATURE_SB_SAFEPOINT
452 if (!handle_safepoint_violation(context, addr))
453 #endif
455 #ifdef LISP_FEATURE_GENCGC
456 if (!gencgc_handle_wp_violation(addr))
457 #else
458 if (!cheneygc_handle_wp_violation(context, addr))
459 #endif
460 if (!handle_guard_page_triggered(context, addr))
461 lisp_memory_fault_error(context, addr);
464 void
465 os_install_interrupt_handlers(void)
467 undoably_install_low_level_interrupt_handler(SIG_MEMORY_FAULT,
468 sigsegv_handler);
470 /* OAOOM c.f. sunos-os.c.
471 * Should we have a reusable function gc_install_interrupt_handlers? */
472 #ifdef LISP_FEATURE_SB_THREAD
473 # ifdef LISP_FEATURE_SB_SAFEPOINT
474 # ifdef LISP_FEATURE_SB_THRUPTION
475 undoably_install_low_level_interrupt_handler(SIGPIPE, thruption_handler);
476 # endif
477 # else
478 undoably_install_low_level_interrupt_handler(SIG_STOP_FOR_GC,
479 sig_stop_for_gc_handler);
480 # endif
481 #endif
484 char *
485 os_get_runtime_executable_path(int external)
487 char path[PATH_MAX + 1];
488 int size;
490 size = readlink("/proc/self/exe", path, sizeof(path)-1);
491 if (size < 0)
492 return NULL;
493 else
494 path[size] = '\0';
496 return copied_string(path);
499 #ifdef LISP_FEATURE_SB_WTIMER
501 * Waitable timer implementation for the safepoint-based (SIGALRM-free)
502 * timer facility using timerfd_create().
505 os_create_wtimer()
507 int fd = timerfd_create(CLOCK_MONOTONIC, 0);
508 if (fd == -1)
509 lose("os_create_wtimer: timerfd_create");
511 /* Cannot count on TFD_CLOEXEC availability, so do it manually: */
512 if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1)
513 lose("os_create_wtimer: fcntl");
515 return fd;
519 os_wait_for_wtimer(int fd)
521 unsigned char buf[8];
522 int n = read(fd, buf, sizeof(buf));
523 if (n == -1) {
524 if (errno == EINTR)
525 return -1;
526 lose("os_wtimer_listen failed");
528 if (n != sizeof(buf))
529 lose("os_wtimer_listen read too little");
530 return 0;
533 void
534 os_close_wtimer(int fd)
536 if (close(fd) == -1)
537 lose("os_close_wtimer failed");
540 void
541 os_set_wtimer(int fd, int sec, int nsec)
543 struct itimerspec spec = { {0,0}, {0,0} };
544 spec.it_value.tv_sec = sec;
545 spec.it_value.tv_nsec = nsec;
546 if (timerfd_settime(fd, 0, &spec, 0) == -1)
547 lose("timerfd_settime");
550 void
551 os_cancel_wtimer(int fd)
553 os_set_wtimer(fd, 0, 0);
555 #endif