x86-64: Integrate Paul Khuong's interleaved raw slot feature.
[sbcl.git] / src / runtime / bsd-os.c
blob7007e3994292a375d9b17327e9f5b3e9c555dd4f
1 /*
2 * OS-dependent routines for BSD-ish systems
4 * This file (along with os.h) exports an OS-independent interface to
5 * the operating system VM facilities. This interface looks a lot like
6 * the Mach interface (but simpler in some places). For some operating
7 * systems, a subset of these functions will have to be emulated.
8 */
11 * This software is part of the SBCL system. See the README file for
12 * more information.
14 * This software is derived from the CMU CL system, which was
15 * written at Carnegie Mellon University and released into the
16 * public domain. The software is in the public domain and is
17 * provided with absolutely no warranty. See the COPYING and CREDITS
18 * files for more information.
21 #include <stdio.h>
22 #include <sys/param.h>
23 #include <sys/file.h>
24 #include <unistd.h>
25 #include <utime.h>
26 #include <assert.h>
27 #include <errno.h>
28 #include "sbcl.h"
29 #include "./signal.h"
30 #include "os.h"
31 #include "arch.h"
32 #include "globals.h"
33 #include "interrupt.h"
34 #include "interr.h"
35 #include "lispregs.h"
36 #include "thread.h"
37 #include "runtime.h"
38 #include "genesis/static-symbols.h"
39 #include "genesis/fdefn.h"
41 #include <sys/types.h>
42 #include <signal.h>
43 /* #include <sys/sysinfo.h> */
44 #include "validate.h"
45 #if defined LISP_FEATURE_GENCGC
46 #include "gencgc-internal.h"
47 #endif
49 #if defined(LISP_FEATURE_SB_WTIMER) && !defined(LISP_FEATURE_DARWIN)
50 # include <sys/event.h>
51 #endif
54 os_vm_size_t os_vm_page_size;
56 #ifdef __NetBSD__
57 #include <sys/resource.h>
58 #include <sys/sysctl.h>
59 #include <string.h>
60 #include <sys/stat.h> /* For the stat-family wrappers. */
61 #include <dirent.h> /* For the opendir()/readdir() wrappers */
62 #include <sys/socket.h> /* For the socket() wrapper */
63 static void netbsd_init();
64 static os_vm_size_t max_allocation_size;
65 #endif /* __NetBSD__ */
67 #if defined LISP_FEATURE_FREEBSD
68 #include <sys/sysctl.h>
69 #if defined(LISP_FEATURE_SB_THREAD) && !defined(LISP_FEATURE_SB_PTHREAD_FUTEX)
70 #include <sys/umtx.h>
71 #endif
73 static void freebsd_init();
74 #endif /* __FreeBSD__ */
76 #ifdef __DragonFly__
77 #include <sys/sysctl.h>
79 static void dragonfly_init();
80 #endif /* __DragonFly__ */
82 #ifdef __OpenBSD__
83 #include <sys/types.h>
84 #include <sys/resource.h>
85 #include <sys/stat.h>
86 #include <sys/sysctl.h>
87 #include <dlfcn.h>
88 #ifdef LISP_FEATURE_X86
89 #include <machine/cpu.h>
90 #endif
92 static void openbsd_init();
93 #endif
95 void
96 os_init(char *argv[], char *envp[])
98 os_vm_page_size = BACKEND_PAGE_BYTES;
100 #ifdef __NetBSD__
101 netbsd_init();
102 #elif defined(LISP_FEATURE_FREEBSD)
103 freebsd_init();
104 #elif defined(__OpenBSD__)
105 openbsd_init();
106 #elif defined(LISP_FEATURE_DARWIN)
107 darwin_init();
108 #elif defined(__DragonFly__)
109 dragonfly_init();
110 #endif
113 sigset_t *
114 os_context_sigmask_addr(os_context_t *context)
116 /* (Unlike most of the other context fields that we access, the
117 * signal mask field is a field of the basic, outermost context
118 * struct itself both in FreeBSD 4.0 and in OpenBSD 2.6.) */
119 #if defined(LISP_FEATURE_FREEBSD) || defined(__NetBSD__) || defined(LISP_FEATURE_DARWIN) \
120 || defined(__DragonFly__)
121 return &context->uc_sigmask;
122 #elif defined (__OpenBSD__)
123 return &context->sc_mask;
124 #else
125 #error unsupported BSD variant
126 #endif
129 os_vm_address_t
130 os_validate(os_vm_address_t addr, os_vm_size_t len)
132 int flags = MAP_PRIVATE | MAP_ANON;
134 if (addr)
135 flags |= MAP_FIXED;
137 #ifdef __NetBSD__
138 if (addr) {
139 os_vm_address_t curaddr = addr;
141 while (len > 0) {
142 os_vm_address_t resaddr;
143 os_vm_size_t curlen = MIN(max_allocation_size, len);
145 resaddr = mmap(curaddr, curlen, OS_VM_PROT_ALL, flags, -1, 0);
147 if (resaddr == (os_vm_address_t) - 1) {
148 perror("mmap");
150 while (curaddr > addr) {
151 curaddr -= max_allocation_size;
152 munmap(curaddr, max_allocation_size);
155 return NULL;
158 curaddr += curlen;
159 len -= curlen;
161 } else {
162 #endif
163 addr = mmap(addr, len, OS_VM_PROT_ALL, flags, -1, 0);
164 #ifdef __NetBSD__
166 #endif
168 if (addr == MAP_FAILED) {
169 perror("mmap");
170 return NULL;
173 return addr;
176 void
177 os_invalidate(os_vm_address_t addr, os_vm_size_t len)
179 if (munmap(addr, len) == -1)
180 perror("munmap");
183 os_vm_address_t
184 os_map(int fd, int offset, os_vm_address_t addr, os_vm_size_t len)
186 addr = mmap(addr, len,
187 OS_VM_PROT_ALL,
188 MAP_PRIVATE | MAP_FILE | MAP_FIXED,
189 fd, (off_t) offset);
191 if (addr == MAP_FAILED) {
192 perror("mmap");
193 lose("unexpected mmap(..) failure\n");
196 return addr;
199 void
200 os_protect(os_vm_address_t address, os_vm_size_t length, os_vm_prot_t prot)
202 if (mprotect(address, length, prot) == -1) {
203 perror("mprotect");
207 static boolean
208 in_range_p(os_vm_address_t a, lispobj sbeg, size_t slen)
210 char* beg = (char*) sbeg;
211 char* end = (char*) sbeg + slen;
212 char* adr = (char*) a;
213 return (adr >= beg && adr < end);
216 boolean
217 is_valid_lisp_addr(os_vm_address_t addr)
219 struct thread *th;
221 if (in_range_p(addr, READ_ONLY_SPACE_START, READ_ONLY_SPACE_SIZE) ||
222 in_range_p(addr, STATIC_SPACE_START, STATIC_SPACE_SIZE) ||
223 in_range_p(addr, DYNAMIC_SPACE_START, dynamic_space_size))
224 return 1;
225 for_each_thread(th) {
226 if (((os_vm_address_t)th->control_stack_start <= addr) &&
227 (addr < (os_vm_address_t)th->control_stack_end))
228 return 1;
229 if (in_range_p(addr, (lispobj) th->binding_stack_start,
230 BINDING_STACK_SIZE))
231 return 1;
233 return 0;
237 * any OS-dependent special low-level handling for signals
240 #if defined LISP_FEATURE_GENCGC
243 * The GENCGC needs to be hooked into whatever signal is raised for
244 * page fault on this OS.
247 void
248 memory_fault_handler(int signal, siginfo_t *siginfo, os_context_t *context)
250 void *fault_addr = arch_get_bad_addr(signal, siginfo, context);
252 #if defined(LISP_FEATURE_RESTORE_TLS_SEGMENT_REGISTER_FROM_CONTEXT)
253 FSHOW_SIGNAL((stderr, "/ TLS: restoring fs: %p in memory_fault_handler\n",
254 *CONTEXT_ADDR_FROM_STEM(fs)));
255 os_restore_tls_segment_register(context);
256 #endif
258 FSHOW((stderr, "Memory fault at: %p, PC: %p\n", fault_addr, *os_context_pc_addr(context)));
260 #ifdef LISP_FEATURE_SB_SAFEPOINT
261 if (!handle_safepoint_violation(context, fault_addr))
262 #endif
264 if (!gencgc_handle_wp_violation(fault_addr))
265 if(!handle_guard_page_triggered(context,fault_addr))
266 lisp_memory_fault_error(context, fault_addr);
269 #if defined(LISP_FEATURE_MACH_EXCEPTION_HANDLER)
270 void
271 mach_error_memory_fault_handler(int signal, siginfo_t *siginfo,
272 os_context_t *context) {
273 lose("Unhandled memory fault. Exiting.");
275 #endif
277 void
278 os_install_interrupt_handlers(void)
280 SHOW("os_install_interrupt_handlers()/bsd-os/defined(GENCGC)");
281 #if defined(LISP_FEATURE_MACH_EXCEPTION_HANDLER)
282 undoably_install_low_level_interrupt_handler(SIG_MEMORY_FAULT,
283 mach_error_memory_fault_handler);
284 #else
285 undoably_install_low_level_interrupt_handler(SIG_MEMORY_FAULT,
286 #if defined(LISP_FEATURE_FREEBSD) && !defined(__GLIBC__)
287 (__siginfohandler_t *)
288 #endif
289 memory_fault_handler);
290 #endif
292 #ifdef LISP_FEATURE_SB_THREAD
293 # ifdef LISP_FEATURE_SB_SAFEPOINT
294 # ifdef LISP_FEATURE_SB_THRUPTION
295 undoably_install_low_level_interrupt_handler(SIGPIPE, thruption_handler);
296 # endif
297 # else
298 undoably_install_low_level_interrupt_handler(SIG_STOP_FOR_GC,
299 sig_stop_for_gc_handler);
300 # endif
301 #endif
302 SHOW("leaving os_install_interrupt_handlers()");
305 #else /* Currently PPC/Darwin/Cheney only */
307 static void
308 sigsegv_handler(int signal, siginfo_t *info, os_context_t *context)
310 #if 0
311 unsigned int pc = (unsigned int *)(*os_context_pc_addr(context));
312 #endif
313 os_vm_address_t addr;
315 addr = arch_get_bad_addr(signal, info, context);
316 if (!cheneygc_handle_wp_violation(context, addr))
317 if (!handle_guard_page_triggered(context, addr))
318 interrupt_handle_now(signal, info, context);
321 void
322 os_install_interrupt_handlers(void)
324 SHOW("os_install_interrupt_handlers()/bsd-os/!defined(GENCGC)");
325 undoably_install_low_level_interrupt_handler(SIG_MEMORY_FAULT,
326 sigsegv_handler);
329 #endif /* defined GENCGC */
331 #ifdef __NetBSD__
332 static void netbsd_init()
334 struct rlimit rl;
335 int mib[2], osrev;
336 size_t len;
338 /* Are we running on a sufficiently functional kernel? */
339 mib[0] = CTL_KERN;
340 mib[1] = KERN_OSREV;
342 len = sizeof(osrev);
343 sysctl(mib, 2, &osrev, &len, NULL, 0);
345 /* If we're older than 2.0... */
346 if (osrev < 200000000) {
347 fprintf(stderr, "osrev = %d (needed at least 200000000).\n", osrev);
348 lose("NetBSD kernel too old to run sbcl.\n");
351 /* NetBSD counts mmap()ed space against the process's data size limit,
352 * so yank it up. This might be a nasty thing to do? */
353 getrlimit (RLIMIT_DATA, &rl);
354 if (rl.rlim_cur < rl.rlim_max) {
355 rl.rlim_cur = rl.rlim_max;
356 if (setrlimit (RLIMIT_DATA, &rl) < 0) {
357 fprintf (stderr,
358 "RUNTIME WARNING: unable to raise process data size limit:\n\
359 %s.\n\
360 The system may fail to start.\n",
361 strerror(errno));
364 max_allocation_size = (os_vm_size_t)((rl.rlim_cur / 2) &
365 ~(32 * 1024 * 1024));
367 #ifdef LISP_FEATURE_X86
369 size_t len;
370 int sse;
372 len = sizeof(sse);
373 if (sysctlbyname("machdep.sse", &sse, &len,
374 NULL, 0) == 0 && sse != 0) {
375 /* Use the SSE detector */
376 fast_bzero_pointer = fast_bzero_detect;
379 #endif /* LISP_FEATURE_X86 */
382 /* Various routines in NetBSD's C library are compatibility wrappers
383 for old versions. Programs must be processed by the C toolchain in
384 order to get up-to-date definitions of such routines. */
385 /* The stat-family, opendir, and readdir are used only in sb-posix, as
386 of 2007-01-16. -- RMK */
388 _stat(const char *path, struct stat *sb)
390 return stat(path, sb);
393 _lstat(const char *path, struct stat *sb)
395 return lstat(path, sb);
398 _fstat(int fd, struct stat *sb)
400 return fstat(fd, sb);
403 DIR *
404 _opendir(const char *filename)
406 return opendir(filename);
408 struct dirent *
409 _readdir(DIR *dirp)
411 return readdir(dirp);
415 _utime(const char *file, const struct utimbuf *timep)
417 return utime(file, timep);
420 /* Used in sb-bsd-sockets. */
422 _socket(int domain, int type, int protocol)
424 return socket(domain, type, protocol);
426 #endif /* __NetBSD__ */
428 #if defined(LISP_FEATURE_FREEBSD)
429 #ifndef __GLIBC__
430 extern int getosreldate(void);
431 #endif
433 int sig_memory_fault;
435 static void freebsd_init()
437 /* Memory fault signal on FreeBSD was changed from SIGBUS to
438 * SIGSEGV. */
439 #ifdef __GLIBC__
440 sig_memory_fault = SIGSEGV;
441 #else
442 if (getosreldate() < 700004)
443 sig_memory_fault = SIGBUS;
444 else
445 sig_memory_fault = SIGSEGV;
446 #endif
448 /* Quote from sbcl-devel (NIIMI Satoshi): "Some OSes, like FreeBSD
449 * 4.x with GENERIC kernel, does not enable SSE support even on
450 * SSE capable CPUs". Detect this situation and skip the
451 * fast_bzero sse/base selection logic that's normally done in
452 * x86-assem.S.
454 #ifdef LISP_FEATURE_X86
456 size_t len;
457 int instruction_sse;
459 len = sizeof(instruction_sse);
460 if (sysctlbyname("hw.instruction_sse", &instruction_sse, &len,
461 NULL, 0) == 0 && instruction_sse != 0) {
462 /* Use the SSE detector */
463 fast_bzero_pointer = fast_bzero_detect;
466 #endif /* LISP_FEATURE_X86 */
469 #if defined(LISP_FEATURE_SB_THREAD) && defined(LISP_FEATURE_SB_FUTEX) \
470 && !defined(LISP_FEATURE_SB_PTHREAD_FUTEX)
472 futex_wait(int *lock_word, long oldval, long sec, unsigned long usec)
474 struct timespec timeout;
475 int ret;
477 if (sec < 0)
478 ret = umtx_wait((void *)lock_word, oldval, NULL);
479 else {
480 timeout.tv_sec = sec;
481 timeout.tv_nsec = usec * 1000;
482 ret = umtx_wait((void *)lock_word, oldval, &timeout);
485 switch (ret) {
486 case 0:
487 return 0;
488 case ETIMEDOUT:
489 return 1;
490 case EINTR:
491 return 2;
492 default:
493 /* EWOULDBLOCK and others, need to check the lock */
494 return -1;
499 futex_wake(int *lock_word, int n)
501 return umtx_wake((void *)lock_word, n);
503 #endif
504 #endif /* __FreeBSD__ */
506 #ifdef __DragonFly__
507 static void dragonfly_init()
509 #ifdef LISP_FEATURE_X86
510 size_t len;
511 int instruction_sse;
513 len = sizeof(instruction_sse);
514 if (sysctlbyname("hw.instruction_sse", &instruction_sse, &len,
515 NULL, 0) == 0 && instruction_sse != 0) {
516 /* Use the SSE detector */
517 fast_bzero_pointer = fast_bzero_detect;
519 #endif /* LISP_FEATURE_X86 */
523 #if defined(LISP_FEATURE_SB_THREAD) && defined(LISP_FEATURE_SB_FUTEX) \
524 && !defined(LISP_FEATURE_SB_PTHREAD_FUTEX)
526 futex_wait(int *lock_word, long oldval, long sec, unsigned long usec)
528 int ret;
530 if (sec < 0)
531 ret = umtx_sleep(lock_word, oldval, 0);
532 else {
533 int count = usec + 1000000 * sec;
534 ret = umtx_sleep(lock_word, oldval, count);
537 if (ret == 0) return 0;
538 else {
539 switch (errno) {
540 case EWOULDBLOCK: // Operation timed out
541 return 1;
542 case EINTR:
543 return 2;
544 default: // Such as EINVAL or EBUSY
545 return -1;
551 futex_wake(int *lock_word, int n)
553 return umtx_wakeup(lock_word, n);
555 #endif
556 #endif /* __DragonFly__ */
558 #ifdef LISP_FEATURE_DARWIN
559 /* defined in ppc-darwin-os.c instead */
560 #elif defined(LISP_FEATURE_FREEBSD)
561 #ifndef KERN_PROC_PATHNAME
562 #define KERN_PROC_PATHNAME 12
563 #endif
565 char *
566 os_get_runtime_executable_path(int external)
568 char path[PATH_MAX + 1];
570 #ifndef __GLIBC__
571 if (getosreldate() >= 600024) {
572 #endif
573 /* KERN_PROC_PATHNAME is available */
574 size_t len = PATH_MAX + 1;
575 int mib[4];
577 mib[0] = CTL_KERN;
578 mib[1] = KERN_PROC;
579 mib[2] = KERN_PROC_PATHNAME;
580 mib[3] = -1;
581 if (sysctl(mib, 4, &path, &len, NULL, 0) != 0)
582 return NULL;
583 #ifndef __GLIBC__
584 } else {
585 int size;
586 size = readlink("/proc/curproc/file", path, sizeof(path) - 1);
587 if (size < 0)
588 return NULL;
589 path[size] = '\0';
591 #endif
592 if (strcmp(path, "unknown") == 0)
593 return NULL;
594 return copied_string(path);
596 #elif defined(LISP_FEATURE_DRAGONFLY)
597 char *
598 os_get_runtime_executable_path(int external)
600 char path[PATH_MAX + 1];
601 int size = readlink("/proc/curproc/file", path, sizeof(path) - 1);
602 if (size < 0)
603 return NULL;
604 path[size] = '\0';
606 if (strcmp(path, "unknown") == 0)
607 return NULL;
608 return copied_string(path);
610 #elif defined(LISP_FEATURE_NETBSD) || defined(LISP_FEATURE_OPENBSD)
611 char *
612 os_get_runtime_executable_path(int external)
614 struct stat sb;
615 if (!external && stat("/proc/curproc/file", &sb) == 0)
616 return copied_string("/proc/curproc/file");
617 return NULL;
619 #else /* Not DARWIN or FREEBSD or NETBSD or OPENBSD or DragonFly */
620 char *
621 os_get_runtime_executable_path(int external)
623 return NULL;
625 #endif
627 #ifdef __OpenBSD__
629 int openbsd_use_fxsave = 0;
631 void
632 openbsd_init()
634 #ifdef LISP_FEATURE_X86
635 int mib[2];
636 size_t size;
637 #endif
639 * Show a warning if it looks like the memory available after
640 * allocating the spaces won't be at least this much.
642 #ifdef LISP_FEATURE_X86_64
643 const int wantfree = 64 * 1024 * 1024;
644 #else
645 const int wantfree = 32 * 1024 * 1024;
646 #endif
647 struct rlimit rl;
649 #ifdef LISP_FEATURE_X86
650 /* Save the machdep.osfxsr sysctl for use by os_restore_fp_control() */
651 mib[0] = CTL_MACHDEP;
652 mib[1] = CPU_OSFXSR;
653 size = sizeof (openbsd_use_fxsave);
654 sysctl(mib, 2, &openbsd_use_fxsave, &size, NULL, 0);
655 #endif
657 /* OpenBSD, like NetBSD, counts mmap()ed space against the
658 * process's data size limit. If the soft limit is lower than the
659 * hard limit then try to yank it up, this lets users in the
660 * "staff" or "daemon" login classes run sbcl with larger dynamic
661 * space sizes.
663 getrlimit (RLIMIT_DATA, &rl);
664 if (rl.rlim_cur < rl.rlim_max) {
665 rl.rlim_cur = rl.rlim_max;
666 if (setrlimit (RLIMIT_DATA, &rl) < 0) {
667 fprintf (stderr,
668 "RUNTIME WARNING: unable to raise process data size limit:\n\
669 %s.\n\
670 The system may fail to start.\n",
671 strerror(errno));
676 * Display a (hopefully) helpful warning if it looks like we won't
677 * be able to allocate enough memory.
679 getrlimit (RLIMIT_DATA, &rl);
680 if (dynamic_space_size + READ_ONLY_SPACE_SIZE + STATIC_SPACE_SIZE +
681 LINKAGE_TABLE_SPACE_SIZE + wantfree > rl.rlim_cur)
682 fprintf (stderr,
683 "RUNTIME WARNING: data size resource limit may be too low,\n"
684 " try decreasing the dynamic space size with --dynamic-space-size\n"
685 " or raising the datasize or datasize-max limits in /etc/login.conf\n");
688 /* OpenBSD's dlsym() relies on the gcc bulitin
689 * __builtin_return_address(0) returning an address in the
690 * executable's text segment, but when called from lisp it will return
691 * an address in the dynamic space. Work around this by calling this
692 * wrapper function instead. Note that tail-call optimization will
693 * defeat this, disable it by saving the dlsym() return value in a
694 * volatile variable.
696 void *
697 os_dlsym(void *handle, const char *symbol)
699 void * volatile ret = dlsym(handle, symbol);
700 return ret;
703 #endif
705 #if defined(LISP_FEATURE_SB_WTIMER) && !defined(LISP_FEATURE_DARWIN)
707 * Waitable timer implementation for the safepoint-based (SIGALRM-free)
708 * timer facility using kqueue.
711 os_create_wtimer()
713 int kq = kqueue();
714 if (kq == -1)
715 lose("os_create_wtimer: kqueue");
716 return kq;
720 os_wait_for_wtimer(int kq)
722 struct kevent ev;
723 int n;
724 if ( (n = kevent(kq, 0, 0, &ev, 1, 0)) == -1) {
725 if (errno != EINTR)
726 lose("os_wtimer_listen failed");
727 n = 0;
729 return n != 1;
732 void
733 os_close_wtimer(int kq)
735 if (close(kq) == -1)
736 lose("os_close_wtimer failed");
739 void
740 os_set_wtimer(int kq, int sec, int nsec)
742 long long msec
743 = ((long long) sec) * 1000 + (long long) (nsec+999999) / 1000000;
744 if (msec > INT_MAX) msec = INT_MAX;
746 struct kevent ev;
747 EV_SET(&ev, 1, EVFILT_TIMER, EV_ADD|EV_ENABLE|EV_ONESHOT, 0, (int)msec, 0);
748 if (kevent(kq, &ev, 1, 0, 0, 0) == -1)
749 perror("os_set_wtimer: kevent");
752 void
753 os_cancel_wtimer(int kq)
755 struct kevent ev;
756 EV_SET(&ev, 1, EVFILT_TIMER, EV_DISABLE, 0, 0, 0);
757 if (kevent(kq, &ev, 1, 0, 0, 0) == -1 && errno != ENOENT)
758 perror("os_cancel_wtimer: kevent");
760 #endif