2 * the Linux incarnation of OS-dependent routines. See also
3 * $(sbcl_arch)-linux-os.c
5 * This file (along with os.h) exports an OS-independent interface to
6 * the operating system VM facilities. Surprise surprise, this
7 * interface looks a lot like the Mach interface (but simpler in some
8 * places). For some operating systems, a subset of these functions
9 * will have to be emulated.
13 * This software is part of the SBCL system. See the README file for
16 * This software is derived from the CMU CL system, which was
17 * written at Carnegie Mellon University and released into the
18 * public domain. The software is in the public domain and is
19 * provided with absolutely no warranty. See the COPYING and CREDITS
20 * files for more information.
24 #include <sys/param.h>
30 #include "interrupt.h"
34 #include <sys/socket.h>
35 #include <sys/utsname.h>
37 #include <sys/types.h>
39 /* #include <sys/sysinfo.h> */
46 size_t os_vm_page_size
;
56 /* Early versions of Linux don't support the mmap(..) functionality
65 major_version
= atoi(name
.release
);
66 if (major_version
< 2) {
67 lose("linux major version=%d (can't run in version < 2.0.0)",
71 /* KLUDGE: This will break if Linux moves to a uname() version number
72 * that has more than one digit initially -- CSR, 2002-02-12 */
73 minor_version
= atoi(name
.release
+2);
74 if (minor_version
< 4) {
75 FSHOW((stderr
,"linux minor version=%d;\n enabling workarounds for SPARC kernel bugs in signal handling.\n", minor_version
));
81 os_vm_page_size
= getpagesize();
82 /* This could just as well be in arch_init(), but it's not. */
84 /* FIXME: This used to be here. However, I have just removed it
85 with no apparent ill effects (it may be that earlier kernels
86 started up a process with a different set of traps, or
87 something?) Find out what this was meant to do, and reenable it
88 or delete it if possible. -- CSR, 2002-07-15 */
89 /* SET_FPU_CONTROL_WORD(0x1372|4|8|16|32); no interrupts */
93 /* In Debian CMU CL ca. 2.4.9, it was possible to get an infinite
94 * cascade of errors from do_mmap(..). This variable is a counter to
95 * prevent that; when it counts down to zero, an error in do_mmap
96 * causes the low-level monitor to be called. */
97 int n_do_mmap_ignorable_errors
= 3;
99 /* Return 0 for success. */
101 do_mmap(os_vm_address_t
*addr
, os_vm_size_t len
, int flags
)
103 /* We *must* have the memory where we expect it. */
104 os_vm_address_t old_addr
= *addr
;
106 *addr
= mmap(*addr
, len
, OS_VM_PROT_ALL
, flags
, -1, 0);
107 if (*addr
== MAP_FAILED
||
108 ((old_addr
!= NULL
) && (*addr
!= old_addr
))) {
110 "/retryable error in allocating memory from the OS\n"
111 "(addr=0x%lx, len=0x%lx, flags=0x%lx)\n",
115 if (n_do_mmap_ignorable_errors
> 0) {
116 --n_do_mmap_ignorable_errors
;
118 lose("too many errors in allocating memory from the OS");
127 os_validate(os_vm_address_t addr
, os_vm_size_t len
)
130 int flags
= MAP_PRIVATE
| MAP_ANONYMOUS
| MAP_FIXED
;
131 os_vm_address_t base_addr
= addr
;
133 /* KLUDGE: It looks as though this code allocates memory
134 * in chunks of size no larger than 'magic', but why? What
135 * is the significance of 0x1000000 here? Also, can it be
136 * right that if the first few 'do_mmap' calls succeed,
137 * then one fails, we leave the memory allocated by the
138 * first few in place even while we return a code for
139 * complete failure? -- WHN 19991020
141 * Peter Van Eynde writes (20000211)
142 * This was done because the kernel would only check for
143 * overcommit for every allocation seperately. So if you
144 * had 16MB of free mem+swap you could allocate 16M. And
145 * again, and again, etc.
146 * This in [Linux] 2.X could be bad as they changed the memory
147 * system. A side effect was/is (I don't really know) that
148 * programs with a lot of memory mappings run slower. But
149 * of course for 2.2.2X we now have the NO_RESERVE flag that
152 * FIXME: The logic is also flaky w.r.t. failed
153 * allocations. If we make one or more successful calls to
154 * do_mmap(..) before one fails, then we've allocated
155 * memory, and we should ensure that it gets deallocated
156 * sometime somehow. If this function's response to any
157 * failed do_mmap(..) is to give up and return NULL (as in
158 * sbcl-0.6.7), then any failed do_mmap(..) after any
159 * successful do_mmap(..) causes a memory leak. */
160 int magic
= 0x1000000;
162 if (do_mmap(&addr
, len
, flags
)) {
167 if (do_mmap(&addr
, magic
, flags
)) {
176 int flags
= MAP_PRIVATE
| MAP_ANONYMOUS
;
177 if (do_mmap(&addr
, len
, flags
)) {
186 os_invalidate(os_vm_address_t addr
, os_vm_size_t len
)
188 if (munmap(addr
,len
) == -1) {
194 os_map(int fd
, int offset
, os_vm_address_t addr
, os_vm_size_t len
)
196 addr
= mmap(addr
, len
,
198 MAP_PRIVATE
| MAP_FILE
| MAP_FIXED
,
201 if (addr
== MAP_FAILED
) {
203 lose("unexpected mmap(..) failure");
210 os_protect(os_vm_address_t address
, os_vm_size_t length
, os_vm_prot_t prot
)
212 if (mprotect(address
, length
, prot
) == -1) {
217 /* FIXME: Now that FOO_END, rather than FOO_SIZE, is the fundamental
218 * description of a space, we could probably punt this and just do
219 * (FOO_START <= x && x < FOO_END) everywhere it's called. */
221 in_range_p(os_vm_address_t a
, lispobj sbeg
, size_t slen
)
223 char* beg
= (char*)((long)sbeg
);
224 char* end
= (char*)((long)sbeg
) + slen
;
225 char* adr
= (char*)a
;
226 return (adr
>= beg
&& adr
< end
);
230 is_valid_lisp_addr(os_vm_address_t addr
)
233 if(in_range_p(addr
, READ_ONLY_SPACE_START
, READ_ONLY_SPACE_SIZE
) ||
234 in_range_p(addr
, STATIC_SPACE_START
, STATIC_SPACE_SIZE
) ||
235 in_range_p(addr
, DYNAMIC_SPACE_START
, DYNAMIC_SPACE_SIZE
))
237 for_each_thread(th
) {
238 if(in_range_p(addr
, th
->control_stack_start
,
239 THREAD_CONTROL_STACK_SIZE
) ||
240 in_range_p(addr
, th
->binding_stack_start
,
248 * any OS-dependent special low-level handling for signals
252 #if defined LISP_FEATURE_GENCGC
255 * The GENCGC needs to be hooked into whatever signal is raised for
256 * page fault on this OS.
259 sigsegv_handler(int signal
, siginfo_t
*info
, void* void_context
)
261 os_context_t
*context
= arch_os_get_context(&void_context
);
262 void* fault_addr
= (void*)context
->uc_mcontext
.cr2
;
263 if (!gencgc_handle_wp_violation(fault_addr
))
264 if(!handle_control_stack_guard_triggered(context
,fault_addr
))
265 interrupt_handle_now(signal
, info
, void_context
);
271 sigsegv_handler(int signal
, siginfo_t
*info
, void* void_context
)
273 os_context_t
*context
= arch_os_get_context(&void_context
);
274 os_vm_address_t addr
;
276 addr
= arch_get_bad_addr(signal
,info
,context
);
278 *os_context_register_addr(context
,reg_ALLOC
) & (1L<<63)){
280 /* Alpha stuff: This is the end of a pseudo-atomic section
281 * during which a signal was received. We must deal with the
282 * pending interrupt (see also interrupt.c,
283 * ../code/interrupt.lisp)
285 /* (how we got here: when interrupting, we set bit 63 in
286 * reg_Alloc. At the end of the atomic section we tried to
287 * write to reg_ALLOC, got a SIGSEGV (there's nothing mapped
288 * there) so ended up here
290 *os_context_register_addr(context
,reg_ALLOC
) -= (1L<<63);
291 interrupt_handle_pending(context
);
293 if(!interrupt_maybe_gc(signal
, info
, context
))
294 if(!handle_control_stack_guard_triggered(context
,addr
))
295 interrupt_handle_now(signal
, info
, context
);
300 void sigcont_handler(int signal
, siginfo_t
*info
, void *void_context
)
302 /* we need to have a handler installed for this signal so that
303 * sigwaitinfo() for it actually returns at the appropriate time
308 os_install_interrupt_handlers(void)
310 undoably_install_low_level_interrupt_handler(SIG_MEMORY_FAULT
,
312 undoably_install_low_level_interrupt_handler(SIGCONT
,