leftover from 1:1 Userland threading stage 2.11/4
[dragonfly.git] / sys / emulation / linux / i386 / linux_sysvec.c
blob7f114af6254b618ba55d9305507f44ccade53576
1 /*-
2 * Copyright (c) 1994-1996 Søren Schmidt
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software withough specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 * $FreeBSD: src/sys/i386/linux/linux_sysvec.c,v 1.55.2.9 2002/01/12 11:03:30 bde Exp $
29 * $DragonFly: src/sys/emulation/linux/i386/linux_sysvec.c,v 1.28 2007/02/06 05:56:03 y0netan1 Exp $
32 /* XXX we use functions that might not exist. */
33 #include "opt_compat.h"
35 #ifndef COMPAT_43
36 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
37 #endif
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/imgact.h>
42 #include <sys/imgact_aout.h>
43 #include <sys/imgact_elf.h>
44 #include <sys/kern_syscall.h>
45 #include <sys/lock.h>
46 #include <sys/malloc.h>
47 #include <sys/proc.h>
48 #include <sys/signalvar.h>
49 #include <sys/sysent.h>
50 #include <sys/sysproto.h>
52 #include <vm/vm.h>
53 #include <vm/vm_param.h>
54 #include <vm/vm_page.h>
55 #include <vm/vm_extern.h>
56 #include <sys/exec.h>
57 #include <sys/kernel.h>
58 #include <sys/module.h>
59 #include <machine/cpu.h>
61 #include "linux.h"
62 #include "linux_proto.h"
63 #include "../linux_signal.h"
64 #include "../linux_util.h"
66 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
68 #if BYTE_ORDER == LITTLE_ENDIAN
69 #define SHELLMAGIC 0x2123 /* #! */
70 #else
71 #define SHELLMAGIC 0x2321
72 #endif
75 * Allow the sendsig functions to use the ldebug() facility
76 * even though they are not syscalls themselves. Map them
77 * to syscall 0. This is slightly less bogus than using
78 * ldebug(sigreturn).
80 #define LINUX_SYS_linux_rt_sendsig 0
81 #define LINUX_SYS_linux_sendsig 0
83 extern char linux_sigcode[];
84 extern int linux_szsigcode;
86 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
88 static int linux_fixup (register_t **stack_base,
89 struct image_params *iparams);
90 static int elf_linux_fixup (register_t **stack_base,
91 struct image_params *iparams);
92 static void linux_prepsyscall (struct trapframe *tf, int *args,
93 u_int *code, caddr_t *params);
94 static void linux_sendsig (sig_t catcher, int sig, sigset_t *mask,
95 u_long code);
98 * Linux syscalls return negative errno's, we do positive and map them
100 static int bsd_to_linux_errno[ELAST + 1] = {
101 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
102 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
103 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
104 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
105 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
106 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
107 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
108 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
109 -6, -6, -43, -42, -75, -6, -84
112 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
113 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
114 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
115 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
116 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
117 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
118 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
119 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
120 0, LINUX_SIGUSR1, LINUX_SIGUSR2
123 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
124 SIGHUP, SIGINT, SIGQUIT, SIGILL,
125 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
126 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
127 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
128 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
129 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
130 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
131 SIGIO, SIGURG, 0
134 #define LINUX_T_UNKNOWN 255
135 static int _bsd_to_linux_trapcode[] = {
136 LINUX_T_UNKNOWN, /* 0 */
137 6, /* 1 T_PRIVINFLT */
138 LINUX_T_UNKNOWN, /* 2 */
139 3, /* 3 T_BPTFLT */
140 LINUX_T_UNKNOWN, /* 4 */
141 LINUX_T_UNKNOWN, /* 5 */
142 16, /* 6 T_ARITHTRAP */
143 254, /* 7 T_ASTFLT */
144 LINUX_T_UNKNOWN, /* 8 */
145 13, /* 9 T_PROTFLT */
146 1, /* 10 T_TRCTRAP */
147 LINUX_T_UNKNOWN, /* 11 */
148 14, /* 12 T_PAGEFLT */
149 LINUX_T_UNKNOWN, /* 13 */
150 17, /* 14 T_ALIGNFLT */
151 LINUX_T_UNKNOWN, /* 15 */
152 LINUX_T_UNKNOWN, /* 16 */
153 LINUX_T_UNKNOWN, /* 17 */
154 0, /* 18 T_DIVIDE */
155 2, /* 19 T_NMI */
156 4, /* 20 T_OFLOW */
157 5, /* 21 T_BOUND */
158 7, /* 22 T_DNA */
159 8, /* 23 T_DOUBLEFLT */
160 9, /* 24 T_FPOPFLT */
161 10, /* 25 T_TSSFLT */
162 11, /* 26 T_SEGNPFLT */
163 12, /* 27 T_STKFLT */
164 18, /* 28 T_MCHK */
165 19, /* 29 T_XMMFLT */
166 15 /* 30 T_RESERVED */
168 #define bsd_to_linux_trapcode(code) \
169 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
170 _bsd_to_linux_trapcode[(code)]: \
171 LINUX_T_UNKNOWN)
174 * If FreeBSD & Linux have a difference of opinion about what a trap
175 * means, deal with it here.
177 static int
178 translate_traps(int signal, int trap_code)
180 if (signal != SIGBUS)
181 return signal;
182 switch (trap_code) {
183 case T_PROTFLT:
184 case T_TSSFLT:
185 case T_DOUBLEFLT:
186 case T_PAGEFLT:
187 return SIGSEGV;
188 default:
189 return signal;
193 static int
194 linux_fixup(register_t **stack_base, struct image_params *imgp)
196 register_t *argv, *envp;
198 argv = *stack_base;
199 envp = *stack_base + (imgp->args->argc + 1);
200 (*stack_base)--;
201 **stack_base = (intptr_t)(void *)envp;
202 (*stack_base)--;
203 **stack_base = (intptr_t)(void *)argv;
204 (*stack_base)--;
205 **stack_base = imgp->args->argc;
206 return 0;
209 static int
210 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
212 Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
213 register_t *pos;
215 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
217 if (args->trace) {
218 AUXARGS_ENTRY(pos, AT_DEBUG, 1);
220 if (args->execfd != -1) {
221 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
223 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
224 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
225 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
226 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
227 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
228 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
229 AUXARGS_ENTRY(pos, AT_BASE, args->base);
230 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
231 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
232 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
233 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
234 AUXARGS_ENTRY(pos, AT_NULL, 0);
236 kfree(imgp->auxargs, M_TEMP);
237 imgp->auxargs = NULL;
239 (*stack_base)--;
240 **stack_base = (long)imgp->args->argc;
241 return 0;
244 extern int _ucodesel, _udatasel;
245 extern unsigned long linux_sznonrtsigcode;
247 static void
248 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
250 struct proc *p = curproc;
251 struct lwp *lp = curthread->td_lwp;
252 struct trapframe *regs;
253 struct l_rt_sigframe *fp, frame;
254 int oonstack;
256 regs = lp->lwp_md.md_regs;
257 oonstack = lp->lwp_sigstk.ss_flags & SS_ONSTACK;
259 #ifdef DEBUG
260 if (ldebug(rt_sendsig))
261 kprintf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
262 catcher, sig, (void*)mask, code);
263 #endif
265 * Allocate space for the signal handler context.
267 if ((lp->lwp_flag & LWP_ALTSTACK) && !oonstack &&
268 SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
269 fp = (struct l_rt_sigframe *)(lp->lwp_sigstk.ss_sp +
270 lp->lwp_sigstk.ss_size - sizeof(struct l_rt_sigframe));
271 lp->lwp_sigstk.ss_flags |= SS_ONSTACK;
272 } else
273 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
276 * grow() will return FALSE if the fp will not fit inside the stack
277 * and the stack can not be grown. useracc will return FALSE
278 * if access is denied.
280 if ((grow_stack (p, (int)fp) == FALSE) ||
281 !useracc((caddr_t)fp, sizeof (struct l_rt_sigframe),
282 VM_PROT_WRITE)) {
284 * Process has trashed its stack; give it an illegal
285 * instruction to halt it in its tracks.
287 SIGACTION(p, SIGILL) = SIG_DFL;
288 SIGDELSET(p->p_sigignore, SIGILL);
289 SIGDELSET(p->p_sigcatch, SIGILL);
290 SIGDELSET(lp->lwp_sigmask, SIGILL);
291 #ifdef DEBUG
292 if (ldebug(rt_sendsig))
293 kprintf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
294 fp, oonstack);
295 #endif
296 ksignal(p, SIGILL);
297 return;
301 * Build the argument list for the signal handler.
303 if (p->p_sysent->sv_sigtbl)
304 if (sig <= p->p_sysent->sv_sigsize)
305 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
307 frame.sf_handler = catcher;
308 frame.sf_sig = sig;
309 frame.sf_siginfo = &fp->sf_si;
310 frame.sf_ucontext = &fp->sf_sc;
312 /* Fill siginfo structure. */
313 frame.sf_si.lsi_signo = sig;
314 frame.sf_si.lsi_code = code;
315 frame.sf_si.lsi_addr = (void *)regs->tf_err;
318 * Build the signal context to be used by sigreturn.
320 frame.sf_sc.uc_flags = 0; /* XXX ??? */
321 frame.sf_sc.uc_link = NULL; /* XXX ??? */
323 frame.sf_sc.uc_stack.ss_sp = lp->lwp_sigstk.ss_sp;
324 frame.sf_sc.uc_stack.ss_size = lp->lwp_sigstk.ss_size;
325 frame.sf_sc.uc_stack.ss_flags = (lp->lwp_flag & LWP_ALTSTACK)
326 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
328 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
330 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
331 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs;
332 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
333 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
334 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
335 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
336 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
337 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
338 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
339 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
340 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
341 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
342 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
343 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
344 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
345 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
346 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
347 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
348 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
350 #ifdef DEBUG
351 if (ldebug(rt_sendsig))
352 kprintf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
353 frame.sf_sc.uc_stack.ss_flags, lp->lwp_sigstk.ss_sp,
354 lp->lwp_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
355 #endif
357 if (copyout(&frame, fp, sizeof(frame)) != 0) {
359 * Process has trashed its stack; give it an illegal
360 * instruction to halt it in its tracks.
362 sigexit(p, SIGILL);
363 /* NOTREACHED */
367 * Build context to run handler in.
369 regs->tf_esp = (int)fp;
370 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
371 linux_sznonrtsigcode;
372 regs->tf_eflags &= ~(PSL_T | PSL_VM);
373 regs->tf_cs = _ucodesel;
374 regs->tf_ds = _udatasel;
375 regs->tf_es = _udatasel;
376 /* allow %fs and %gs to be inherited by the signal handler */
378 regs->tf_fs = _udatasel;
379 regs->tf_gs = _udatasel;
381 regs->tf_ss = _udatasel;
386 * Send an interrupt to process.
388 * Stack is set up to allow sigcode stored
389 * in u. to call routine, followed by kcall
390 * to sigreturn routine below. After sigreturn
391 * resets the signal mask, the stack, and the
392 * frame pointer, it returns to the user
393 * specified pc, psl.
396 static void
397 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
399 struct proc *p = curproc;
400 struct lwp *lp = curthread->td_lwp;
401 struct trapframe *regs;
402 struct l_sigframe *fp, frame;
403 l_sigset_t lmask;
404 int oonstack, i;
406 if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
407 /* Signal handler installed with SA_SIGINFO. */
408 linux_rt_sendsig(catcher, sig, mask, code);
409 return;
412 regs = lp->lwp_md.md_regs;
413 oonstack = lp->lwp_sigstk.ss_flags & SS_ONSTACK;
415 #ifdef DEBUG
416 if (ldebug(sendsig))
417 kprintf(ARGS(sendsig, "%p, %d, %p, %lu"),
418 catcher, sig, (void*)mask, code);
419 #endif
422 * Allocate space for the signal handler context.
424 if ((lp->lwp_flag & LWP_ALTSTACK) && !oonstack &&
425 SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
426 fp = (struct l_sigframe *)(lp->lwp_sigstk.ss_sp +
427 lp->lwp_sigstk.ss_size - sizeof(struct l_sigframe));
428 lp->lwp_sigstk.ss_flags |= SS_ONSTACK;
429 } else
430 fp = (struct l_sigframe *)regs->tf_esp - 1;
433 * grow() will return FALSE if the fp will not fit inside the stack
434 * and the stack can not be grown. useracc will return FALSE
435 * if access is denied.
437 if ((grow_stack (p, (int)fp) == FALSE) ||
438 !useracc((caddr_t)fp, sizeof (struct l_sigframe),
439 VM_PROT_WRITE)) {
441 * Process has trashed its stack; give it an illegal
442 * instruction to halt it in its tracks.
444 SIGACTION(p, SIGILL) = SIG_DFL;
445 SIGDELSET(p->p_sigignore, SIGILL);
446 SIGDELSET(p->p_sigcatch, SIGILL);
447 SIGDELSET(lp->lwp_sigmask, SIGILL);
448 ksignal(p, SIGILL);
449 return;
453 * Build the argument list for the signal handler.
455 if (p->p_sysent->sv_sigtbl)
456 if (sig <= p->p_sysent->sv_sigsize)
457 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
459 frame.sf_handler = catcher;
460 frame.sf_sig = sig;
462 bsd_to_linux_sigset(mask, &lmask);
465 * Build the signal context to be used by sigreturn.
467 frame.sf_sc.sc_mask = lmask.__bits[0];
468 frame.sf_sc.sc_gs = regs->tf_gs;
469 frame.sf_sc.sc_fs = regs->tf_fs;
470 frame.sf_sc.sc_es = regs->tf_es;
471 frame.sf_sc.sc_ds = regs->tf_ds;
472 frame.sf_sc.sc_edi = regs->tf_edi;
473 frame.sf_sc.sc_esi = regs->tf_esi;
474 frame.sf_sc.sc_ebp = regs->tf_ebp;
475 frame.sf_sc.sc_ebx = regs->tf_ebx;
476 frame.sf_sc.sc_edx = regs->tf_edx;
477 frame.sf_sc.sc_ecx = regs->tf_ecx;
478 frame.sf_sc.sc_eax = regs->tf_eax;
479 frame.sf_sc.sc_eip = regs->tf_eip;
480 frame.sf_sc.sc_cs = regs->tf_cs;
481 frame.sf_sc.sc_eflags = regs->tf_eflags;
482 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
483 frame.sf_sc.sc_ss = regs->tf_ss;
484 frame.sf_sc.sc_err = regs->tf_err;
485 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
487 bzero(&frame.sf_fpstate, sizeof(struct l_fpstate));
489 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
490 frame.sf_extramask[i] = lmask.__bits[i+1];
492 if (copyout(&frame, fp, sizeof(frame)) != 0) {
494 * Process has trashed its stack; give it an illegal
495 * instruction to halt it in its tracks.
497 sigexit(p, SIGILL);
498 /* NOTREACHED */
502 * Build context to run handler in.
504 regs->tf_esp = (int)fp;
505 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
506 regs->tf_eflags &= ~(PSL_T | PSL_VM);
507 regs->tf_cs = _ucodesel;
508 regs->tf_ds = _udatasel;
509 regs->tf_es = _udatasel;
510 /* Allow %fs and %gs to be inherited by the signal handler */
512 regs->tf_fs = _udatasel;
513 regs->tf_gs = _udatasel;
515 regs->tf_ss = _udatasel;
519 * System call to cleanup state after a signal
520 * has been taken. Reset signal mask and
521 * stack state from context left by sendsig (above).
522 * Return to previous pc and psl as specified by
523 * context left by sendsig. Check carefully to
524 * make sure that the user has not modified the
525 * psl to gain improper privileges or to cause
526 * a machine fault.
529 sys_linux_sigreturn(struct linux_sigreturn_args *args)
531 struct lwp *lp = curthread->td_lwp;
532 struct l_sigframe frame;
533 struct trapframe *regs;
534 l_sigset_t lmask;
535 int eflags, i;
537 regs = lp->lwp_md.md_regs;
539 #ifdef DEBUG
540 if (ldebug(sigreturn))
541 kprintf(ARGS(sigreturn, "%p"), (void *)args->sfp);
542 #endif
544 * The trampoline code hands us the sigframe.
545 * It is unsafe to keep track of it ourselves, in the event that a
546 * program jumps out of a signal handler.
548 if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
549 return (EFAULT);
552 * Check for security violations.
554 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
555 eflags = frame.sf_sc.sc_eflags;
557 * XXX do allow users to change the privileged flag PSL_RF. The
558 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
559 * sometimes set it there too. tf_eflags is kept in the signal
560 * context during signal handling and there is no other place
561 * to remember it, so the PSL_RF bit may be corrupted by the
562 * signal handler without us knowing. Corruption of the PSL_RF
563 * bit at worst causes one more or one less debugger trap, so
564 * allowing it is fairly harmless.
566 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
567 return(EINVAL);
571 * Don't allow users to load a valid privileged %cs. Let the
572 * hardware check for invalid selectors, excess privilege in
573 * other selectors, invalid %eip's and invalid %esp's.
575 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
576 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
577 trapsignal(lp, SIGBUS, T_PROTFLT);
578 return(EINVAL);
581 lp->lwp_sigstk.ss_flags &= ~SS_ONSTACK;
582 lmask.__bits[0] = frame.sf_sc.sc_mask;
583 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
584 lmask.__bits[i+1] = frame.sf_extramask[i];
585 linux_to_bsd_sigset(&lmask, &lp->lwp_sigmask);
586 SIG_CANTMASK(lp->lwp_sigmask);
589 * Restore signal context.
591 /* %gs was restored by the trampoline. */
592 regs->tf_fs = frame.sf_sc.sc_fs;
593 regs->tf_es = frame.sf_sc.sc_es;
594 regs->tf_ds = frame.sf_sc.sc_ds;
595 regs->tf_edi = frame.sf_sc.sc_edi;
596 regs->tf_esi = frame.sf_sc.sc_esi;
597 regs->tf_ebp = frame.sf_sc.sc_ebp;
598 regs->tf_ebx = frame.sf_sc.sc_ebx;
599 regs->tf_edx = frame.sf_sc.sc_edx;
600 regs->tf_ecx = frame.sf_sc.sc_ecx;
601 regs->tf_eax = frame.sf_sc.sc_eax;
602 regs->tf_eip = frame.sf_sc.sc_eip;
603 regs->tf_cs = frame.sf_sc.sc_cs;
604 regs->tf_eflags = eflags;
605 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
606 regs->tf_ss = frame.sf_sc.sc_ss;
608 return (EJUSTRETURN);
612 * System call to cleanup state after a signal
613 * has been taken. Reset signal mask and
614 * stack state from context left by rt_sendsig (above).
615 * Return to previous pc and psl as specified by
616 * context left by sendsig. Check carefully to
617 * make sure that the user has not modified the
618 * psl to gain improper privileges or to cause
619 * a machine fault.
622 sys_linux_rt_sigreturn(struct linux_rt_sigreturn_args *args)
624 struct lwp *lp = curthread->td_lwp;
625 struct l_ucontext uc;
626 struct l_sigcontext *context;
627 l_stack_t *lss;
628 stack_t ss;
629 struct trapframe *regs;
630 int eflags;
632 regs = lp->lwp_md.md_regs;
634 #ifdef DEBUG
635 if (ldebug(rt_sigreturn))
636 kprintf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
637 #endif
639 * The trampoline code hands us the ucontext.
640 * It is unsafe to keep track of it ourselves, in the event that a
641 * program jumps out of a signal handler.
643 if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
644 return (EFAULT);
646 context = &uc.uc_mcontext;
649 * Check for security violations.
651 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
652 eflags = context->sc_eflags;
654 * XXX do allow users to change the privileged flag PSL_RF. The
655 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
656 * sometimes set it there too. tf_eflags is kept in the signal
657 * context during signal handling and there is no other place
658 * to remember it, so the PSL_RF bit may be corrupted by the
659 * signal handler without us knowing. Corruption of the PSL_RF
660 * bit at worst causes one more or one less debugger trap, so
661 * allowing it is fairly harmless.
663 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
664 return(EINVAL);
668 * Don't allow users to load a valid privileged %cs. Let the
669 * hardware check for invalid selectors, excess privilege in
670 * other selectors, invalid %eip's and invalid %esp's.
672 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
673 if (!CS_SECURE(context->sc_cs)) {
674 trapsignal(lp, SIGBUS, T_PROTFLT);
675 return(EINVAL);
678 lp->lwp_sigstk.ss_flags &= ~SS_ONSTACK;
679 linux_to_bsd_sigset(&uc.uc_sigmask, &lp->lwp_sigmask);
680 SIG_CANTMASK(lp->lwp_sigmask);
683 * Restore signal context
685 /* %gs was restored by the trampoline. */
686 regs->tf_fs = context->sc_fs;
687 regs->tf_es = context->sc_es;
688 regs->tf_ds = context->sc_ds;
689 regs->tf_edi = context->sc_edi;
690 regs->tf_esi = context->sc_esi;
691 regs->tf_ebp = context->sc_ebp;
692 regs->tf_ebx = context->sc_ebx;
693 regs->tf_edx = context->sc_edx;
694 regs->tf_ecx = context->sc_ecx;
695 regs->tf_eax = context->sc_eax;
696 regs->tf_eip = context->sc_eip;
697 regs->tf_cs = context->sc_cs;
698 regs->tf_eflags = eflags;
699 regs->tf_esp = context->sc_esp_at_signal;
700 regs->tf_ss = context->sc_ss;
703 * call sigaltstack & ignore results..
705 lss = &uc.uc_stack;
706 ss.ss_sp = lss->ss_sp;
707 ss.ss_size = lss->ss_size;
708 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
710 #ifdef DEBUG
711 if (ldebug(rt_sigreturn))
712 kprintf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
713 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
714 #endif
715 kern_sigaltstack(&ss, NULL);
717 return (EJUSTRETURN);
721 * Prep arguments.
723 * MUST BE MPSAFE
725 static void
726 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
728 args[0] = tf->tf_ebx;
729 args[1] = tf->tf_ecx;
730 args[2] = tf->tf_edx;
731 args[3] = tf->tf_esi;
732 args[4] = tf->tf_edi;
733 args[5] = tf->tf_ebp;
734 *params = NULL; /* no copyin */
738 * If a linux binary is exec'ing something, try this image activator
739 * first. We override standard shell script execution in order to
740 * be able to modify the interpreter path. We only do this if a linux
741 * binary is doing the exec, so we do not create an EXEC module for it.
743 static int exec_linux_imgact_try (struct image_params *iparams);
745 static int
746 exec_linux_imgact_try(struct image_params *imgp)
748 const char *head = (const char *)imgp->image_header;
749 int error = -1;
752 * The interpreter for shell scripts run from a linux binary needs
753 * to be located in /compat/linux if possible in order to recursively
754 * maintain linux path emulation.
756 if (((const short *)head)[0] == SHELLMAGIC) {
758 * Run our normal shell image activator. If it succeeds attempt
759 * to use the alternate path for the interpreter. If an alternate
760 * path is found, use our stringspace to store it.
762 if ((error = exec_shell_imgact(imgp)) == 0) {
763 linux_translate_path(imgp->interpreter_name,
764 MAXSHELLCMDLEN);
767 return(error);
770 struct sysentvec linux_sysvec = {
771 LINUX_SYS_MAXSYSCALL,
772 linux_sysent,
773 0xff,
774 LINUX_SIGTBLSZ,
775 bsd_to_linux_signal,
776 ELAST + 1,
777 bsd_to_linux_errno,
778 translate_traps,
779 linux_fixup,
780 linux_sendsig,
781 linux_sigcode,
782 &linux_szsigcode,
783 linux_prepsyscall,
784 "Linux a.out",
785 NULL,
786 exec_linux_imgact_try,
787 LINUX_MINSIGSTKSZ
790 struct sysentvec elf_linux_sysvec = {
791 LINUX_SYS_MAXSYSCALL,
792 linux_sysent,
793 0xff,
794 LINUX_SIGTBLSZ,
795 bsd_to_linux_signal,
796 ELAST + 1,
797 bsd_to_linux_errno,
798 translate_traps,
799 elf_linux_fixup,
800 linux_sendsig,
801 linux_sigcode,
802 &linux_szsigcode,
803 linux_prepsyscall,
804 "Linux ELF",
805 elf_coredump,
806 exec_linux_imgact_try,
807 LINUX_MINSIGSTKSZ
810 static int linux_match_abi_note(const Elf_Note *abi_note);
811 static int linux_match_suse_note(const Elf_Note *abi_note);
813 static Elf32_Brandinfo linux_brand = {
814 ELFOSABI_LINUX,
815 "Linux",
816 linux_match_abi_note,
817 "/compat/linux",
818 "/lib/ld-linux.so.1",
819 &elf_linux_sysvec
822 static Elf32_Brandinfo linux_glibc2brand = {
823 ELFOSABI_LINUX,
824 "Linux",
825 linux_match_abi_note,
826 "/compat/linux",
827 "/lib/ld-linux.so.2",
828 &elf_linux_sysvec
831 static Elf32_Brandinfo linux_suse_brand = {
832 ELFOSABI_LINUX,
833 "Linux",
834 linux_match_suse_note,
835 "/compat/linux",
836 "/lib/ld-linux.so.2",
837 &elf_linux_sysvec
840 Elf32_Brandinfo *linux_brandlist[] = {
841 &linux_brand,
842 &linux_glibc2brand,
843 &linux_suse_brand,
844 NULL
847 static int
848 linux_match_abi_note(const Elf_Note *abi_note)
850 const char *abi_name = (const char *)
851 ((const uint8_t *)abi_note + sizeof(*abi_note));
852 const uint32_t *descr = (const uint32_t *)
853 ((const uint8_t *)abi_name + abi_note->n_namesz);
855 if (abi_note->n_namesz != sizeof("GNU"))
856 return(FALSE);
857 if (memcmp(abi_name, "GNU", sizeof("GNU")))
858 return(FALSE);
859 if (abi_note->n_descsz < sizeof(uint32_t))
860 return(FALSE);
862 if (*descr != 0)
863 return(FALSE);
864 return(TRUE);
867 static int
868 linux_match_suse_note(const Elf_Note *abi_note)
870 const char *abi_name = (const char *)
871 ((const uint8_t *)abi_note + sizeof(*abi_note));
873 if (abi_note->n_namesz != sizeof("SuSE"))
874 return(FALSE);
875 if (memcmp(abi_name, "SuSE", sizeof("SuSE")))
876 return(FALSE);
878 return(TRUE);
881 static int
882 linux_elf_modevent(module_t mod, int type, void *data)
884 Elf32_Brandinfo **brandinfo;
885 int error;
887 error = 0;
889 switch(type) {
890 case MOD_LOAD:
891 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
892 ++brandinfo)
893 if (elf_insert_brand_entry(*brandinfo) < 0)
894 error = EINVAL;
895 if (error == 0) {
896 if (bootverbose)
897 kprintf("Linux ELF exec handler installed\n");
898 } else
899 kprintf("cannot insert Linux ELF brand handler\n");
900 break;
901 case MOD_UNLOAD:
902 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
903 ++brandinfo)
904 if (elf_brand_inuse(*brandinfo))
905 error = EBUSY;
906 if (error == 0) {
907 for (brandinfo = &linux_brandlist[0];
908 *brandinfo != NULL; ++brandinfo)
909 if (elf_remove_brand_entry(*brandinfo) < 0)
910 error = EINVAL;
912 if (error == 0) {
913 if (bootverbose)
914 kprintf("Linux ELF exec handler removed\n");
915 } else
916 kprintf("Could not deinstall ELF interpreter entry\n");
917 break;
918 default:
919 break;
921 return error;
924 static moduledata_t linux_elf_mod = {
925 "linuxelf",
926 linux_elf_modevent,
930 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);