2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2003 Peter Wemm
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1998-1999 Andrew Gallatin
6 * Copyright (c) 1994-1996 Søren Schmidt
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer
14 * in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * $FreeBSD: src/sys/amd64/linux32/linux32_sysvec.c,v 1.29 2007/05/14 22:40:04 jhb Exp $
36 #include "opt_compat.h"
39 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
42 #define __ELF_WORD_SIZE 32
44 #include <sys/param.h>
45 #include <sys/systm.h>
47 #include <sys/imgact.h>
48 #include <sys/imgact_elf.h>
49 #include <sys/kernel.h>
51 #include <sys/malloc.h>
52 #include <sys/module.h>
53 #include <sys/mutex.h>
55 #include <sys/resourcevar.h>
56 #include <sys/signalvar.h>
57 #include <sys/sysctl.h>
58 #include <sys/syscallsubr.h>
59 #include <sys/sysent.h>
60 #include <sys/sysproto.h>
61 #include <sys/vnode.h>
62 #include <sys/eventhandler.h>
66 #include <vm/vm_extern.h>
67 #include <vm/vm_map.h>
68 #include <vm/vm_object.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_param.h>
72 #include <machine/cpu.h>
73 #include <machine/md_var.h>
74 #include <machine/pcb.h>
75 #include <machine/specialreg.h>
77 #include <amd64/linux32/linux.h>
78 #include <amd64/linux32/linux32_proto.h>
79 #include <compat/linux/linux_emul.h>
80 #include <compat/linux/linux_mib.h>
81 #include <compat/linux/linux_signal.h>
82 #include <compat/linux/linux_util.h>
84 MODULE_VERSION(linux
, 1);
86 MALLOC_DEFINE(M_LINUX
, "linux", "Linux mode structures");
88 #define AUXARGS_ENTRY_32(pos, id, val) \
90 suword32(pos++, id); \
91 suword32(pos++, val); \
94 #if BYTE_ORDER == LITTLE_ENDIAN
95 #define SHELLMAGIC 0x2123 /* #! */
97 #define SHELLMAGIC 0x2321
101 * Allow the sendsig functions to use the ldebug() facility
102 * even though they are not syscalls themselves. Map them
103 * to syscall 0. This is slightly less bogus than using
106 #define LINUX_SYS_linux_rt_sendsig 0
107 #define LINUX_SYS_linux_sendsig 0
109 extern char linux_sigcode
[];
110 extern int linux_szsigcode
;
112 extern struct sysent linux_sysent
[LINUX_SYS_MAXSYSCALL
];
114 SET_DECLARE(linux_ioctl_handler_set
, struct linux_ioctl_handler
);
115 SET_DECLARE(linux_device_handler_set
, struct linux_device_handler
);
117 static int elf_linux_fixup(register_t
**stack_base
,
118 struct image_params
*iparams
);
119 static register_t
*linux_copyout_strings(struct image_params
*imgp
);
120 static void linux_prepsyscall(struct trapframe
*tf
, int *args
, u_int
*code
,
122 static void linux_sendsig(sig_t catcher
, ksiginfo_t
*ksi
, sigset_t
*mask
);
123 static void exec_linux_setregs(struct thread
*td
, u_long entry
,
124 u_long stack
, u_long ps_strings
);
125 static void linux32_fixlimit(struct rlimit
*rl
, int which
);
127 extern LIST_HEAD(futex_list
, futex
) futex_list
;
128 extern struct sx futex_sx
;
130 static eventhandler_tag linux_exit_tag
;
131 static eventhandler_tag linux_schedtail_tag
;
132 static eventhandler_tag linux_exec_tag
;
135 * Linux syscalls return negative errno's, we do positive and map them
137 * FreeBSD: src/sys/sys/errno.h
138 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
139 * linux-2.6.17.8/include/asm-generic/errno.h
141 static int bsd_to_linux_errno
[ELAST
+ 1] = {
142 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
143 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
144 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
145 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
146 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
147 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
148 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
149 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
150 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
154 int bsd_to_linux_signal
[LINUX_SIGTBLSZ
] = {
155 LINUX_SIGHUP
, LINUX_SIGINT
, LINUX_SIGQUIT
, LINUX_SIGILL
,
156 LINUX_SIGTRAP
, LINUX_SIGABRT
, 0, LINUX_SIGFPE
,
157 LINUX_SIGKILL
, LINUX_SIGBUS
, LINUX_SIGSEGV
, LINUX_SIGSYS
,
158 LINUX_SIGPIPE
, LINUX_SIGALRM
, LINUX_SIGTERM
, LINUX_SIGURG
,
159 LINUX_SIGSTOP
, LINUX_SIGTSTP
, LINUX_SIGCONT
, LINUX_SIGCHLD
,
160 LINUX_SIGTTIN
, LINUX_SIGTTOU
, LINUX_SIGIO
, LINUX_SIGXCPU
,
161 LINUX_SIGXFSZ
, LINUX_SIGVTALRM
, LINUX_SIGPROF
, LINUX_SIGWINCH
,
162 0, LINUX_SIGUSR1
, LINUX_SIGUSR2
165 int linux_to_bsd_signal
[LINUX_SIGTBLSZ
] = {
166 SIGHUP
, SIGINT
, SIGQUIT
, SIGILL
,
167 SIGTRAP
, SIGABRT
, SIGBUS
, SIGFPE
,
168 SIGKILL
, SIGUSR1
, SIGSEGV
, SIGUSR2
,
169 SIGPIPE
, SIGALRM
, SIGTERM
, SIGBUS
,
170 SIGCHLD
, SIGCONT
, SIGSTOP
, SIGTSTP
,
171 SIGTTIN
, SIGTTOU
, SIGURG
, SIGXCPU
,
172 SIGXFSZ
, SIGVTALRM
, SIGPROF
, SIGWINCH
,
173 SIGIO
, SIGURG
, SIGSYS
176 #define LINUX_T_UNKNOWN 255
177 static int _bsd_to_linux_trapcode
[] = {
178 LINUX_T_UNKNOWN
, /* 0 */
179 6, /* 1 T_PRIVINFLT */
180 LINUX_T_UNKNOWN
, /* 2 */
182 LINUX_T_UNKNOWN
, /* 4 */
183 LINUX_T_UNKNOWN
, /* 5 */
184 16, /* 6 T_ARITHTRAP */
185 254, /* 7 T_ASTFLT */
186 LINUX_T_UNKNOWN
, /* 8 */
187 13, /* 9 T_PROTFLT */
188 1, /* 10 T_TRCTRAP */
189 LINUX_T_UNKNOWN
, /* 11 */
190 14, /* 12 T_PAGEFLT */
191 LINUX_T_UNKNOWN
, /* 13 */
192 17, /* 14 T_ALIGNFLT */
193 LINUX_T_UNKNOWN
, /* 15 */
194 LINUX_T_UNKNOWN
, /* 16 */
195 LINUX_T_UNKNOWN
, /* 17 */
201 8, /* 23 T_DOUBLEFLT */
202 9, /* 24 T_FPOPFLT */
203 10, /* 25 T_TSSFLT */
204 11, /* 26 T_SEGNPFLT */
205 12, /* 27 T_STKFLT */
207 19, /* 29 T_XMMFLT */
208 15 /* 30 T_RESERVED */
210 #define bsd_to_linux_trapcode(code) \
211 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
212 _bsd_to_linux_trapcode[(code)]: \
215 struct linux32_ps_strings
{
216 u_int32_t ps_argvstr
; /* first of 0 or more argument strings */
217 u_int ps_nargvstr
; /* the number of argument strings */
218 u_int32_t ps_envstr
; /* first of 0 or more environment strings */
219 u_int ps_nenvstr
; /* the number of environment strings */
223 * If FreeBSD & Linux have a difference of opinion about what a trap
224 * means, deal with it here.
229 translate_traps(int signal
, int trap_code
)
231 if (signal
!= SIGBUS
)
245 elf_linux_fixup(register_t
**stack_base
, struct image_params
*imgp
)
251 KASSERT(curthread
->td_proc
== imgp
->proc
&&
252 (curthread
->td_proc
->p_flag
& P_SA
) == 0,
253 ("unsafe elf_linux_fixup(), should be curproc"));
254 base
= (Elf32_Addr
*)*stack_base
;
255 args
= (Elf32_Auxargs
*)imgp
->auxargs
;
256 pos
= base
+ (imgp
->args
->argc
+ imgp
->args
->envc
+ 2);
259 AUXARGS_ENTRY_32(pos
, AT_DEBUG
, 1);
260 if (args
->execfd
!= -1)
261 AUXARGS_ENTRY_32(pos
, AT_EXECFD
, args
->execfd
);
262 AUXARGS_ENTRY_32(pos
, AT_PHDR
, args
->phdr
);
263 AUXARGS_ENTRY_32(pos
, AT_PHENT
, args
->phent
);
264 AUXARGS_ENTRY_32(pos
, AT_PHNUM
, args
->phnum
);
265 AUXARGS_ENTRY_32(pos
, AT_PAGESZ
, args
->pagesz
);
266 AUXARGS_ENTRY_32(pos
, AT_FLAGS
, args
->flags
);
267 AUXARGS_ENTRY_32(pos
, AT_ENTRY
, args
->entry
);
268 AUXARGS_ENTRY_32(pos
, AT_BASE
, args
->base
);
269 AUXARGS_ENTRY_32(pos
, AT_UID
, imgp
->proc
->p_ucred
->cr_ruid
);
270 AUXARGS_ENTRY_32(pos
, AT_EUID
, imgp
->proc
->p_ucred
->cr_svuid
);
271 AUXARGS_ENTRY_32(pos
, AT_GID
, imgp
->proc
->p_ucred
->cr_rgid
);
272 AUXARGS_ENTRY_32(pos
, AT_EGID
, imgp
->proc
->p_ucred
->cr_svgid
);
273 AUXARGS_ENTRY_32(pos
, AT_NULL
, 0);
275 free(imgp
->auxargs
, M_TEMP
);
276 imgp
->auxargs
= NULL
;
279 suword32(base
, (uint32_t)imgp
->args
->argc
);
280 *stack_base
= (register_t
*)base
;
284 extern int _ucodesel
, _ucode32sel
, _udatasel
;
285 extern unsigned long linux_sznonrtsigcode
;
288 linux_rt_sendsig(sig_t catcher
, ksiginfo_t
*ksi
, sigset_t
*mask
)
290 struct thread
*td
= curthread
;
291 struct proc
*p
= td
->td_proc
;
293 struct trapframe
*regs
;
294 struct l_rt_sigframe
*fp
, frame
;
299 sig
= ksi
->ksi_signo
;
300 code
= ksi
->ksi_code
;
301 PROC_LOCK_ASSERT(p
, MA_OWNED
);
303 mtx_assert(&psp
->ps_mtx
, MA_OWNED
);
305 oonstack
= sigonstack(regs
->tf_rsp
);
308 if (ldebug(rt_sendsig
))
309 printf(ARGS(rt_sendsig
, "%p, %d, %p, %u"),
310 catcher
, sig
, (void*)mask
, code
);
313 * Allocate space for the signal handler context.
315 if ((td
->td_pflags
& TDP_ALTSTACK
) && !oonstack
&&
316 SIGISMEMBER(psp
->ps_sigonstack
, sig
)) {
317 fp
= (struct l_rt_sigframe
*)(td
->td_sigstk
.ss_sp
+
318 td
->td_sigstk
.ss_size
- sizeof(struct l_rt_sigframe
));
320 fp
= (struct l_rt_sigframe
*)regs
->tf_rsp
- 1;
321 mtx_unlock(&psp
->ps_mtx
);
324 * Build the argument list for the signal handler.
326 if (p
->p_sysent
->sv_sigtbl
)
327 if (sig
<= p
->p_sysent
->sv_sigsize
)
328 sig
= p
->p_sysent
->sv_sigtbl
[_SIG_IDX(sig
)];
330 bzero(&frame
, sizeof(frame
));
332 frame
.sf_handler
= PTROUT(catcher
);
334 frame
.sf_siginfo
= PTROUT(&fp
->sf_si
);
335 frame
.sf_ucontext
= PTROUT(&fp
->sf_sc
);
337 /* Fill in POSIX parts */
338 frame
.sf_si
.lsi_signo
= sig
;
339 frame
.sf_si
.lsi_code
= code
;
340 frame
.sf_si
.lsi_addr
= PTROUT(ksi
->ksi_addr
);
343 * Build the signal context to be used by sigreturn.
345 frame
.sf_sc
.uc_flags
= 0; /* XXX ??? */
346 frame
.sf_sc
.uc_link
= 0; /* XXX ??? */
348 frame
.sf_sc
.uc_stack
.ss_sp
= PTROUT(td
->td_sigstk
.ss_sp
);
349 frame
.sf_sc
.uc_stack
.ss_size
= td
->td_sigstk
.ss_size
;
350 frame
.sf_sc
.uc_stack
.ss_flags
= (td
->td_pflags
& TDP_ALTSTACK
)
351 ? ((oonstack
) ? LINUX_SS_ONSTACK
: 0) : LINUX_SS_DISABLE
;
354 bsd_to_linux_sigset(mask
, &frame
.sf_sc
.uc_sigmask
);
356 frame
.sf_sc
.uc_mcontext
.sc_mask
= frame
.sf_sc
.uc_sigmask
.__bits
[0];
357 frame
.sf_sc
.uc_mcontext
.sc_gs
= rgs();
358 frame
.sf_sc
.uc_mcontext
.sc_fs
= rfs();
359 __asm
__volatile("movl %%es,%0" :
360 "=rm" (frame
.sf_sc
.uc_mcontext
.sc_es
));
361 __asm
__volatile("movl %%ds,%0" :
362 "=rm" (frame
.sf_sc
.uc_mcontext
.sc_ds
));
363 frame
.sf_sc
.uc_mcontext
.sc_edi
= regs
->tf_rdi
;
364 frame
.sf_sc
.uc_mcontext
.sc_esi
= regs
->tf_rsi
;
365 frame
.sf_sc
.uc_mcontext
.sc_ebp
= regs
->tf_rbp
;
366 frame
.sf_sc
.uc_mcontext
.sc_ebx
= regs
->tf_rbx
;
367 frame
.sf_sc
.uc_mcontext
.sc_edx
= regs
->tf_rdx
;
368 frame
.sf_sc
.uc_mcontext
.sc_ecx
= regs
->tf_rcx
;
369 frame
.sf_sc
.uc_mcontext
.sc_eax
= regs
->tf_rax
;
370 frame
.sf_sc
.uc_mcontext
.sc_eip
= regs
->tf_rip
;
371 frame
.sf_sc
.uc_mcontext
.sc_cs
= regs
->tf_cs
;
372 frame
.sf_sc
.uc_mcontext
.sc_eflags
= regs
->tf_rflags
;
373 frame
.sf_sc
.uc_mcontext
.sc_esp_at_signal
= regs
->tf_rsp
;
374 frame
.sf_sc
.uc_mcontext
.sc_ss
= regs
->tf_ss
;
375 frame
.sf_sc
.uc_mcontext
.sc_err
= regs
->tf_err
;
376 frame
.sf_sc
.uc_mcontext
.sc_trapno
= bsd_to_linux_trapcode(code
);
379 if (ldebug(rt_sendsig
))
380 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
381 frame
.sf_sc
.uc_stack
.ss_flags
, td
->td_sigstk
.ss_sp
,
382 td
->td_sigstk
.ss_size
, frame
.sf_sc
.uc_mcontext
.sc_mask
);
385 if (copyout(&frame
, fp
, sizeof(frame
)) != 0) {
387 * Process has trashed its stack; give it an illegal
388 * instruction to halt it in its tracks.
391 if (ldebug(rt_sendsig
))
392 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
400 * Build context to run handler in.
402 regs
->tf_rsp
= PTROUT(fp
);
403 regs
->tf_rip
= LINUX32_PS_STRINGS
- *(p
->p_sysent
->sv_szsigcode
) +
404 linux_sznonrtsigcode
;
405 regs
->tf_rflags
&= ~PSL_T
;
406 regs
->tf_cs
= _ucode32sel
;
407 regs
->tf_ss
= _udatasel
;
409 td
->td_pcb
->pcb_ds
= _udatasel
;
411 td
->td_pcb
->pcb_es
= _udatasel
;
412 /* leave user %fs and %gs untouched */
414 mtx_lock(&psp
->ps_mtx
);
419 * Send an interrupt to process.
421 * Stack is set up to allow sigcode stored
422 * in u. to call routine, followed by kcall
423 * to sigreturn routine below. After sigreturn
424 * resets the signal mask, the stack, and the
425 * frame pointer, it returns to the user
429 linux_sendsig(sig_t catcher
, ksiginfo_t
*ksi
, sigset_t
*mask
)
431 struct thread
*td
= curthread
;
432 struct proc
*p
= td
->td_proc
;
434 struct trapframe
*regs
;
435 struct l_sigframe
*fp
, frame
;
440 sig
= ksi
->ksi_signo
;
441 code
= ksi
->ksi_code
;
442 PROC_LOCK_ASSERT(p
, MA_OWNED
);
444 mtx_assert(&psp
->ps_mtx
, MA_OWNED
);
445 if (SIGISMEMBER(psp
->ps_siginfo
, sig
)) {
446 /* Signal handler installed with SA_SIGINFO. */
447 linux_rt_sendsig(catcher
, ksi
, mask
);
452 oonstack
= sigonstack(regs
->tf_rsp
);
456 printf(ARGS(sendsig
, "%p, %d, %p, %u"),
457 catcher
, sig
, (void*)mask
, code
);
461 * Allocate space for the signal handler context.
463 if ((td
->td_pflags
& TDP_ALTSTACK
) && !oonstack
&&
464 SIGISMEMBER(psp
->ps_sigonstack
, sig
)) {
465 fp
= (struct l_sigframe
*)(td
->td_sigstk
.ss_sp
+
466 td
->td_sigstk
.ss_size
- sizeof(struct l_sigframe
));
468 fp
= (struct l_sigframe
*)regs
->tf_rsp
- 1;
469 mtx_unlock(&psp
->ps_mtx
);
473 * Build the argument list for the signal handler.
475 if (p
->p_sysent
->sv_sigtbl
)
476 if (sig
<= p
->p_sysent
->sv_sigsize
)
477 sig
= p
->p_sysent
->sv_sigtbl
[_SIG_IDX(sig
)];
479 bzero(&frame
, sizeof(frame
));
481 frame
.sf_handler
= PTROUT(catcher
);
484 bsd_to_linux_sigset(mask
, &lmask
);
487 * Build the signal context to be used by sigreturn.
489 frame
.sf_sc
.sc_mask
= lmask
.__bits
[0];
490 frame
.sf_sc
.sc_gs
= rgs();
491 frame
.sf_sc
.sc_fs
= rfs();
492 __asm
__volatile("movl %%es,%0" : "=rm" (frame
.sf_sc
.sc_es
));
493 __asm
__volatile("movl %%ds,%0" : "=rm" (frame
.sf_sc
.sc_ds
));
494 frame
.sf_sc
.sc_edi
= regs
->tf_rdi
;
495 frame
.sf_sc
.sc_esi
= regs
->tf_rsi
;
496 frame
.sf_sc
.sc_ebp
= regs
->tf_rbp
;
497 frame
.sf_sc
.sc_ebx
= regs
->tf_rbx
;
498 frame
.sf_sc
.sc_edx
= regs
->tf_rdx
;
499 frame
.sf_sc
.sc_ecx
= regs
->tf_rcx
;
500 frame
.sf_sc
.sc_eax
= regs
->tf_rax
;
501 frame
.sf_sc
.sc_eip
= regs
->tf_rip
;
502 frame
.sf_sc
.sc_cs
= regs
->tf_cs
;
503 frame
.sf_sc
.sc_eflags
= regs
->tf_rflags
;
504 frame
.sf_sc
.sc_esp_at_signal
= regs
->tf_rsp
;
505 frame
.sf_sc
.sc_ss
= regs
->tf_ss
;
506 frame
.sf_sc
.sc_err
= regs
->tf_err
;
507 frame
.sf_sc
.sc_trapno
= bsd_to_linux_trapcode(code
);
509 for (i
= 0; i
< (LINUX_NSIG_WORDS
-1); i
++)
510 frame
.sf_extramask
[i
] = lmask
.__bits
[i
+1];
512 if (copyout(&frame
, fp
, sizeof(frame
)) != 0) {
514 * Process has trashed its stack; give it an illegal
515 * instruction to halt it in its tracks.
522 * Build context to run handler in.
524 regs
->tf_rsp
= PTROUT(fp
);
525 regs
->tf_rip
= LINUX32_PS_STRINGS
- *(p
->p_sysent
->sv_szsigcode
);
526 regs
->tf_rflags
&= ~PSL_T
;
527 regs
->tf_cs
= _ucode32sel
;
528 regs
->tf_ss
= _udatasel
;
530 td
->td_pcb
->pcb_ds
= _udatasel
;
532 td
->td_pcb
->pcb_es
= _udatasel
;
533 /* leave user %fs and %gs untouched */
535 mtx_lock(&psp
->ps_mtx
);
539 * System call to cleanup state after a signal
540 * has been taken. Reset signal mask and
541 * stack state from context left by sendsig (above).
542 * Return to previous pc and psl as specified by
543 * context left by sendsig. Check carefully to
544 * make sure that the user has not modified the
545 * psl to gain improper privileges or to cause
549 linux_sigreturn(struct thread
*td
, struct linux_sigreturn_args
*args
)
551 struct proc
*p
= td
->td_proc
;
552 struct l_sigframe frame
;
553 struct trapframe
*regs
;
561 if (ldebug(sigreturn
))
562 printf(ARGS(sigreturn
, "%p"), (void *)args
->sfp
);
565 * The trampoline code hands us the sigframe.
566 * It is unsafe to keep track of it ourselves, in the event that a
567 * program jumps out of a signal handler.
569 if (copyin(args
->sfp
, &frame
, sizeof(frame
)) != 0)
573 * Check for security violations.
575 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
576 eflags
= frame
.sf_sc
.sc_eflags
;
578 * XXX do allow users to change the privileged flag PSL_RF. The
579 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
580 * sometimes set it there too. tf_eflags is kept in the signal
581 * context during signal handling and there is no other place
582 * to remember it, so the PSL_RF bit may be corrupted by the
583 * signal handler without us knowing. Corruption of the PSL_RF
584 * bit at worst causes one more or one less debugger trap, so
585 * allowing it is fairly harmless.
587 if (!EFLAGS_SECURE(eflags
& ~PSL_RF
, regs
->tf_rflags
& ~PSL_RF
))
591 * Don't allow users to load a valid privileged %cs. Let the
592 * hardware check for invalid selectors, excess privilege in
593 * other selectors, invalid %eip's and invalid %esp's.
595 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
596 if (!CS_SECURE(frame
.sf_sc
.sc_cs
)) {
597 ksiginfo_init_trap(&ksi
);
598 ksi
.ksi_signo
= SIGBUS
;
599 ksi
.ksi_code
= BUS_OBJERR
;
600 ksi
.ksi_trapno
= T_PROTFLT
;
601 ksi
.ksi_addr
= (void *)regs
->tf_rip
;
602 trapsignal(td
, &ksi
);
606 lmask
.__bits
[0] = frame
.sf_sc
.sc_mask
;
607 for (i
= 0; i
< (LINUX_NSIG_WORDS
-1); i
++)
608 lmask
.__bits
[i
+1] = frame
.sf_extramask
[i
];
610 linux_to_bsd_sigset(&lmask
, &td
->td_sigmask
);
611 SIG_CANTMASK(td
->td_sigmask
);
616 * Restore signal context.
618 /* Selectors were restored by the trampoline. */
619 regs
->tf_rdi
= frame
.sf_sc
.sc_edi
;
620 regs
->tf_rsi
= frame
.sf_sc
.sc_esi
;
621 regs
->tf_rbp
= frame
.sf_sc
.sc_ebp
;
622 regs
->tf_rbx
= frame
.sf_sc
.sc_ebx
;
623 regs
->tf_rdx
= frame
.sf_sc
.sc_edx
;
624 regs
->tf_rcx
= frame
.sf_sc
.sc_ecx
;
625 regs
->tf_rax
= frame
.sf_sc
.sc_eax
;
626 regs
->tf_rip
= frame
.sf_sc
.sc_eip
;
627 regs
->tf_cs
= frame
.sf_sc
.sc_cs
;
628 regs
->tf_rflags
= eflags
;
629 regs
->tf_rsp
= frame
.sf_sc
.sc_esp_at_signal
;
630 regs
->tf_ss
= frame
.sf_sc
.sc_ss
;
632 return (EJUSTRETURN
);
636 * System call to cleanup state after a signal
637 * has been taken. Reset signal mask and
638 * stack state from context left by rt_sendsig (above).
639 * Return to previous pc and psl as specified by
640 * context left by sendsig. Check carefully to
641 * make sure that the user has not modified the
642 * psl to gain improper privileges or to cause
646 linux_rt_sigreturn(struct thread
*td
, struct linux_rt_sigreturn_args
*args
)
648 struct proc
*p
= td
->td_proc
;
649 struct l_ucontext uc
;
650 struct l_sigcontext
*context
;
653 struct trapframe
*regs
;
660 if (ldebug(rt_sigreturn
))
661 printf(ARGS(rt_sigreturn
, "%p"), (void *)args
->ucp
);
664 * The trampoline code hands us the ucontext.
665 * It is unsafe to keep track of it ourselves, in the event that a
666 * program jumps out of a signal handler.
668 if (copyin(args
->ucp
, &uc
, sizeof(uc
)) != 0)
671 context
= &uc
.uc_mcontext
;
674 * Check for security violations.
676 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
677 eflags
= context
->sc_eflags
;
679 * XXX do allow users to change the privileged flag PSL_RF. The
680 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
681 * sometimes set it there too. tf_eflags is kept in the signal
682 * context during signal handling and there is no other place
683 * to remember it, so the PSL_RF bit may be corrupted by the
684 * signal handler without us knowing. Corruption of the PSL_RF
685 * bit at worst causes one more or one less debugger trap, so
686 * allowing it is fairly harmless.
688 if (!EFLAGS_SECURE(eflags
& ~PSL_RF
, regs
->tf_rflags
& ~PSL_RF
))
692 * Don't allow users to load a valid privileged %cs. Let the
693 * hardware check for invalid selectors, excess privilege in
694 * other selectors, invalid %eip's and invalid %esp's.
696 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
697 if (!CS_SECURE(context
->sc_cs
)) {
698 ksiginfo_init_trap(&ksi
);
699 ksi
.ksi_signo
= SIGBUS
;
700 ksi
.ksi_code
= BUS_OBJERR
;
701 ksi
.ksi_trapno
= T_PROTFLT
;
702 ksi
.ksi_addr
= (void *)regs
->tf_rip
;
703 trapsignal(td
, &ksi
);
708 linux_to_bsd_sigset(&uc
.uc_sigmask
, &td
->td_sigmask
);
709 SIG_CANTMASK(td
->td_sigmask
);
714 * Restore signal context
716 /* Selectors were restored by the trampoline. */
717 regs
->tf_rdi
= context
->sc_edi
;
718 regs
->tf_rsi
= context
->sc_esi
;
719 regs
->tf_rbp
= context
->sc_ebp
;
720 regs
->tf_rbx
= context
->sc_ebx
;
721 regs
->tf_rdx
= context
->sc_edx
;
722 regs
->tf_rcx
= context
->sc_ecx
;
723 regs
->tf_rax
= context
->sc_eax
;
724 regs
->tf_rip
= context
->sc_eip
;
725 regs
->tf_cs
= context
->sc_cs
;
726 regs
->tf_rflags
= eflags
;
727 regs
->tf_rsp
= context
->sc_esp_at_signal
;
728 regs
->tf_ss
= context
->sc_ss
;
731 * call sigaltstack & ignore results..
734 ss
.ss_sp
= PTRIN(lss
->ss_sp
);
735 ss
.ss_size
= lss
->ss_size
;
736 ss
.ss_flags
= linux_to_bsd_sigaltstack(lss
->ss_flags
);
739 if (ldebug(rt_sigreturn
))
740 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
741 ss
.ss_flags
, ss
.ss_sp
, ss
.ss_size
, context
->sc_mask
);
743 (void)kern_sigaltstack(td
, &ss
, NULL
);
745 return (EJUSTRETURN
);
752 linux_prepsyscall(struct trapframe
*tf
, int *args
, u_int
*code
, caddr_t
*params
)
754 args
[0] = tf
->tf_rbx
;
755 args
[1] = tf
->tf_rcx
;
756 args
[2] = tf
->tf_rdx
;
757 args
[3] = tf
->tf_rsi
;
758 args
[4] = tf
->tf_rdi
;
759 args
[5] = tf
->tf_rbp
; /* Unconfirmed */
760 *params
= NULL
; /* no copyin */
764 * If a linux binary is exec'ing something, try this image activator
765 * first. We override standard shell script execution in order to
766 * be able to modify the interpreter path. We only do this if a linux
767 * binary is doing the exec, so we do not create an EXEC module for it.
769 static int exec_linux_imgact_try(struct image_params
*iparams
);
772 exec_linux_imgact_try(struct image_params
*imgp
)
774 const char *head
= (const char *)imgp
->image_header
;
779 * The interpreter for shell scripts run from a linux binary needs
780 * to be located in /compat/linux if possible in order to recursively
781 * maintain linux path emulation.
783 if (((const short *)head
)[0] == SHELLMAGIC
) {
785 * Run our normal shell image activator. If it succeeds attempt
786 * to use the alternate path for the interpreter. If an alternate
787 * path is found, use our stringspace to store it.
789 if ((error
= exec_shell_imgact(imgp
)) == 0) {
790 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp
->proc
),
791 imgp
->interpreter_name
, UIO_SYSSPACE
, &rpath
, 0);
793 len
= strlen(rpath
) + 1;
795 if (len
<= MAXSHELLCMDLEN
) {
796 memcpy(imgp
->interpreter_name
, rpath
, len
);
806 * Clear registers on exec
807 * XXX copied from ia32_signal.c.
810 exec_linux_setregs(td
, entry
, stack
, ps_strings
)
816 struct trapframe
*regs
= td
->td_frame
;
817 struct pcb
*pcb
= td
->td_pcb
;
820 wrmsr(MSR_FSBASE
, 0);
821 wrmsr(MSR_KGSBASE
, 0); /* User value while we're in the kernel */
829 pcb
->pcb_ds
= _udatasel
;
830 pcb
->pcb_es
= _udatasel
;
831 pcb
->pcb_fs
= _udatasel
;
832 pcb
->pcb_gs
= _udatasel
;
834 bzero((char *)regs
, sizeof(struct trapframe
));
835 regs
->tf_rip
= entry
;
836 regs
->tf_rsp
= stack
;
837 regs
->tf_rflags
= PSL_USER
| (regs
->tf_rflags
& PSL_T
);
838 regs
->tf_ss
= _udatasel
;
839 regs
->tf_cs
= _ucode32sel
;
840 regs
->tf_rbx
= ps_strings
;
841 load_cr0(rcr0() | CR0_MP
| CR0_TS
);
844 /* Return via doreti so that we can change to a different %cs */
845 pcb
->pcb_flags
|= PCB_FULLCTX
;
846 td
->td_retval
[1] = 0;
850 * XXX copied from ia32_sysvec.c.
853 linux_copyout_strings(struct image_params
*imgp
)
857 char *stringp
, *destp
;
858 u_int32_t
*stack_base
;
859 struct linux32_ps_strings
*arginfo
;
863 * Calculate string base and vector table pointers.
864 * Also deal with signal trampoline code for this exec type.
866 arginfo
= (struct linux32_ps_strings
*)LINUX32_PS_STRINGS
;
867 sigcodesz
= *(imgp
->proc
->p_sysent
->sv_szsigcode
);
868 destp
= (caddr_t
)arginfo
- sigcodesz
- SPARE_USRSPACE
-
869 roundup((ARG_MAX
- imgp
->args
->stringspace
), sizeof(char *));
875 copyout(imgp
->proc
->p_sysent
->sv_sigcode
,
876 ((caddr_t
)arginfo
- sigcodesz
), sigcodesz
);
879 * If we have a valid auxargs ptr, prepare some room
884 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
885 * lower compatibility.
887 imgp
->auxarg_size
= (imgp
->auxarg_size
) ? imgp
->auxarg_size
890 * The '+ 2' is for the null pointers at the end of each of
891 * the arg and env vector sets,and imgp->auxarg_size is room
892 * for argument of Runtime loader.
894 vectp
= (u_int32_t
*) (destp
- (imgp
->args
->argc
+ imgp
->args
->envc
+ 2 +
895 imgp
->auxarg_size
) * sizeof(u_int32_t
));
899 * The '+ 2' is for the null pointers at the end of each of
900 * the arg and env vector sets
902 vectp
= (u_int32_t
*)
903 (destp
- (imgp
->args
->argc
+ imgp
->args
->envc
+ 2) * sizeof(u_int32_t
));
906 * vectp also becomes our initial stack base
910 stringp
= imgp
->args
->begin_argv
;
911 argc
= imgp
->args
->argc
;
912 envc
= imgp
->args
->envc
;
914 * Copy out strings - arguments and environment.
916 copyout(stringp
, destp
, ARG_MAX
- imgp
->args
->stringspace
);
919 * Fill in "ps_strings" struct for ps, w, etc.
921 suword32(&arginfo
->ps_argvstr
, (u_int32_t
)(intptr_t)vectp
);
922 suword32(&arginfo
->ps_nargvstr
, argc
);
925 * Fill in argument portion of vector table.
927 for (; argc
> 0; --argc
) {
928 suword32(vectp
++, (u_int32_t
)(intptr_t)destp
);
929 while (*stringp
++ != 0)
934 /* a null vector table pointer separates the argp's from the envp's */
935 suword32(vectp
++, 0);
937 suword32(&arginfo
->ps_envstr
, (u_int32_t
)(intptr_t)vectp
);
938 suword32(&arginfo
->ps_nenvstr
, envc
);
941 * Fill in environment portion of vector table.
943 for (; envc
> 0; --envc
) {
944 suword32(vectp
++, (u_int32_t
)(intptr_t)destp
);
945 while (*stringp
++ != 0)
950 /* end of vector table is a null pointer */
953 return ((register_t
*)stack_base
);
956 SYSCTL_NODE(_compat
, OID_AUTO
, linux32
, CTLFLAG_RW
, 0,
957 "32-bit Linux emulation");
959 static u_long linux32_maxdsiz
= LINUX32_MAXDSIZ
;
960 SYSCTL_ULONG(_compat_linux32
, OID_AUTO
, maxdsiz
, CTLFLAG_RW
,
961 &linux32_maxdsiz
, 0, "");
962 static u_long linux32_maxssiz
= LINUX32_MAXSSIZ
;
963 SYSCTL_ULONG(_compat_linux32
, OID_AUTO
, maxssiz
, CTLFLAG_RW
,
964 &linux32_maxssiz
, 0, "");
965 static u_long linux32_maxvmem
= LINUX32_MAXVMEM
;
966 SYSCTL_ULONG(_compat_linux32
, OID_AUTO
, maxvmem
, CTLFLAG_RW
,
967 &linux32_maxvmem
, 0, "");
970 linux32_fixlimit(struct rlimit
*rl
, int which
)
975 if (linux32_maxdsiz
!= 0) {
976 if (rl
->rlim_cur
> linux32_maxdsiz
)
977 rl
->rlim_cur
= linux32_maxdsiz
;
978 if (rl
->rlim_max
> linux32_maxdsiz
)
979 rl
->rlim_max
= linux32_maxdsiz
;
983 if (linux32_maxssiz
!= 0) {
984 if (rl
->rlim_cur
> linux32_maxssiz
)
985 rl
->rlim_cur
= linux32_maxssiz
;
986 if (rl
->rlim_max
> linux32_maxssiz
)
987 rl
->rlim_max
= linux32_maxssiz
;
991 if (linux32_maxvmem
!= 0) {
992 if (rl
->rlim_cur
> linux32_maxvmem
)
993 rl
->rlim_cur
= linux32_maxvmem
;
994 if (rl
->rlim_max
> linux32_maxvmem
)
995 rl
->rlim_max
= linux32_maxvmem
;
1001 struct sysentvec elf_linux_sysvec
= {
1002 LINUX_SYS_MAXSYSCALL
,
1006 bsd_to_linux_signal
,
1017 exec_linux_imgact_try
,
1025 linux_copyout_strings
,
1030 static Elf32_Brandinfo linux_brand
= {
1035 "/lib/ld-linux.so.1",
1041 static Elf32_Brandinfo linux_glibc2brand
= {
1046 "/lib/ld-linux.so.2",
1052 Elf32_Brandinfo
*linux_brandlist
[] = {
1059 linux_elf_modevent(module_t mod
, int type
, void *data
)
1061 Elf32_Brandinfo
**brandinfo
;
1063 struct linux_ioctl_handler
**lihp
;
1064 struct linux_device_handler
**ldhp
;
1070 for (brandinfo
= &linux_brandlist
[0]; *brandinfo
!= NULL
;
1072 if (elf32_insert_brand_entry(*brandinfo
) < 0)
1075 SET_FOREACH(lihp
, linux_ioctl_handler_set
)
1076 linux_ioctl_register_handler(*lihp
);
1077 SET_FOREACH(ldhp
, linux_device_handler_set
)
1078 linux_device_register_handler(*ldhp
);
1079 mtx_init(&emul_lock
, "emuldata lock", NULL
, MTX_DEF
);
1080 sx_init(&emul_shared_lock
, "emuldata->shared lock");
1081 LIST_INIT(&futex_list
);
1082 sx_init(&futex_sx
, "futex protection lock");
1083 linux_exit_tag
= EVENTHANDLER_REGISTER(process_exit
, linux_proc_exit
,
1085 linux_schedtail_tag
= EVENTHANDLER_REGISTER(schedtail
, linux_schedtail
,
1087 linux_exec_tag
= EVENTHANDLER_REGISTER(process_exec
, linux_proc_exec
,
1090 printf("Linux ELF exec handler installed\n");
1092 printf("cannot insert Linux ELF brand handler\n");
1095 for (brandinfo
= &linux_brandlist
[0]; *brandinfo
!= NULL
;
1097 if (elf32_brand_inuse(*brandinfo
))
1100 for (brandinfo
= &linux_brandlist
[0];
1101 *brandinfo
!= NULL
; ++brandinfo
)
1102 if (elf32_remove_brand_entry(*brandinfo
) < 0)
1106 SET_FOREACH(lihp
, linux_ioctl_handler_set
)
1107 linux_ioctl_unregister_handler(*lihp
);
1108 SET_FOREACH(ldhp
, linux_device_handler_set
)
1109 linux_device_unregister_handler(*ldhp
);
1110 mtx_destroy(&emul_lock
);
1111 sx_destroy(&emul_shared_lock
);
1112 sx_destroy(&futex_sx
);
1113 EVENTHANDLER_DEREGISTER(process_exit
, linux_exit_tag
);
1114 EVENTHANDLER_DEREGISTER(schedtail
, linux_schedtail_tag
);
1115 EVENTHANDLER_DEREGISTER(process_exec
, linux_exec_tag
);
1117 printf("Linux ELF exec handler removed\n");
1119 printf("Could not deinstall ELF interpreter entry\n");
1127 static moduledata_t linux_elf_mod
= {
1133 DECLARE_MODULE(linuxelf
, linux_elf_mod
, SI_SUB_EXEC
, SI_ORDER_ANY
);