41a5a1a73cc5b98ccfd22b5233fe69a02084fb01
[dragonfly.git] / sys / emulation / linux / i386 / linux_sysvec.c
blob41a5a1a73cc5b98ccfd22b5233fe69a02084fb01
1 /*-
2 * Copyright (c) 1994-1996 Søren Schmidt
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software withough specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 * $FreeBSD: src/sys/i386/linux/linux_sysvec.c,v 1.55.2.9 2002/01/12 11:03:30 bde Exp $
29 * $DragonFly: src/sys/emulation/linux/i386/linux_sysvec.c,v 1.31 2008/04/21 15:47:53 dillon Exp $
32 /* XXX we use functions that might not exist. */
33 #include "opt_compat.h"
35 #ifndef COMPAT_43
36 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
37 #endif
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/imgact.h>
42 #include <sys/imgact_aout.h>
43 #include <sys/imgact_elf.h>
44 #include <sys/kern_syscall.h>
45 #include <sys/lock.h>
46 #include <sys/malloc.h>
47 #include <sys/proc.h>
48 #include <sys/signalvar.h>
49 #include <sys/sysent.h>
50 #include <sys/sysproto.h>
52 #include <vm/vm.h>
53 #include <vm/vm_param.h>
54 #include <vm/vm_page.h>
55 #include <vm/vm_extern.h>
56 #include <sys/exec.h>
57 #include <sys/kernel.h>
58 #include <sys/module.h>
59 #include <machine/cpu.h>
61 #include "linux.h"
62 #include "linux_proto.h"
63 #include "../linux_signal.h"
64 #include "../linux_util.h"
66 MODULE_VERSION(linux, 1);
68 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
70 #if BYTE_ORDER == LITTLE_ENDIAN
71 #define SHELLMAGIC 0x2123 /* #! */
72 #else
73 #define SHELLMAGIC 0x2321
74 #endif
77 * Allow the sendsig functions to use the ldebug() facility
78 * even though they are not syscalls themselves. Map them
79 * to syscall 0. This is slightly less bogus than using
80 * ldebug(sigreturn).
82 #define LINUX_SYS_linux_rt_sendsig 0
83 #define LINUX_SYS_linux_sendsig 0
85 extern char linux_sigcode[];
86 extern int linux_szsigcode;
88 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
90 static int linux_fixup (register_t **stack_base,
91 struct image_params *iparams);
92 static int elf_linux_fixup (register_t **stack_base,
93 struct image_params *iparams);
94 static void linux_prepsyscall (struct trapframe *tf, int *args,
95 u_int *code, caddr_t *params);
96 static void linux_sendsig (sig_t catcher, int sig, sigset_t *mask,
97 u_long code);
100 * Linux syscalls return negative errno's, we do positive and map them
102 static int bsd_to_linux_errno[ELAST + 1] = {
103 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
104 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
105 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
106 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
107 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
108 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
109 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
110 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
111 -6, -6, -43, -42, -75, -6, -84
114 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
115 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
116 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
117 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
118 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
119 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
120 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
121 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
122 0, LINUX_SIGUSR1, LINUX_SIGUSR2
125 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
126 SIGHUP, SIGINT, SIGQUIT, SIGILL,
127 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
128 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
129 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
130 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
131 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
132 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
133 SIGIO, SIGURG, 0
136 #define LINUX_T_UNKNOWN 255
137 static int _bsd_to_linux_trapcode[] = {
138 LINUX_T_UNKNOWN, /* 0 */
139 6, /* 1 T_PRIVINFLT */
140 LINUX_T_UNKNOWN, /* 2 */
141 3, /* 3 T_BPTFLT */
142 LINUX_T_UNKNOWN, /* 4 */
143 LINUX_T_UNKNOWN, /* 5 */
144 16, /* 6 T_ARITHTRAP */
145 254, /* 7 T_ASTFLT */
146 LINUX_T_UNKNOWN, /* 8 */
147 13, /* 9 T_PROTFLT */
148 1, /* 10 T_TRCTRAP */
149 LINUX_T_UNKNOWN, /* 11 */
150 14, /* 12 T_PAGEFLT */
151 LINUX_T_UNKNOWN, /* 13 */
152 17, /* 14 T_ALIGNFLT */
153 LINUX_T_UNKNOWN, /* 15 */
154 LINUX_T_UNKNOWN, /* 16 */
155 LINUX_T_UNKNOWN, /* 17 */
156 0, /* 18 T_DIVIDE */
157 2, /* 19 T_NMI */
158 4, /* 20 T_OFLOW */
159 5, /* 21 T_BOUND */
160 7, /* 22 T_DNA */
161 8, /* 23 T_DOUBLEFLT */
162 9, /* 24 T_FPOPFLT */
163 10, /* 25 T_TSSFLT */
164 11, /* 26 T_SEGNPFLT */
165 12, /* 27 T_STKFLT */
166 18, /* 28 T_MCHK */
167 19, /* 29 T_XMMFLT */
168 15 /* 30 T_RESERVED */
170 #define bsd_to_linux_trapcode(code) \
171 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
172 _bsd_to_linux_trapcode[(code)]: \
173 LINUX_T_UNKNOWN)
176 * If FreeBSD & Linux have a difference of opinion about what a trap
177 * means, deal with it here.
179 static int
180 translate_traps(int signal, int trap_code)
182 if (signal != SIGBUS)
183 return signal;
184 switch (trap_code) {
185 case T_PROTFLT:
186 case T_TSSFLT:
187 case T_DOUBLEFLT:
188 case T_PAGEFLT:
189 return SIGSEGV;
190 default:
191 return signal;
195 static int
196 linux_fixup(register_t **stack_base, struct image_params *imgp)
198 register_t *argv, *envp;
200 argv = *stack_base;
201 envp = *stack_base + (imgp->args->argc + 1);
202 (*stack_base)--;
203 **stack_base = (intptr_t)(void *)envp;
204 (*stack_base)--;
205 **stack_base = (intptr_t)(void *)argv;
206 (*stack_base)--;
207 **stack_base = imgp->args->argc;
208 return 0;
211 static int
212 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
214 Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
215 register_t *pos;
217 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
219 if (args->trace) {
220 AUXARGS_ENTRY(pos, AT_DEBUG, 1);
222 if (args->execfd != -1) {
223 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
225 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
226 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
227 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
228 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
229 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
230 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
231 AUXARGS_ENTRY(pos, AT_BASE, args->base);
232 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
233 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
234 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
235 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
236 AUXARGS_ENTRY(pos, AT_NULL, 0);
238 kfree(imgp->auxargs, M_TEMP);
239 imgp->auxargs = NULL;
241 (*stack_base)--;
242 **stack_base = (long)imgp->args->argc;
243 return 0;
246 extern int _ucodesel, _udatasel;
247 extern unsigned long linux_sznonrtsigcode;
249 static void
250 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
252 struct proc *p = curproc;
253 struct lwp *lp = curthread->td_lwp;
254 struct trapframe *regs;
255 struct l_rt_sigframe *fp, frame;
256 int oonstack;
258 regs = lp->lwp_md.md_regs;
259 oonstack = lp->lwp_sigstk.ss_flags & SS_ONSTACK;
261 #ifdef DEBUG
262 if (ldebug(rt_sendsig))
263 kprintf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
264 catcher, sig, (void*)mask, code);
265 #endif
267 * Allocate space for the signal handler context.
269 if ((lp->lwp_flag & LWP_ALTSTACK) && !oonstack &&
270 SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
271 fp = (struct l_rt_sigframe *)(lp->lwp_sigstk.ss_sp +
272 lp->lwp_sigstk.ss_size - sizeof(struct l_rt_sigframe));
273 lp->lwp_sigstk.ss_flags |= SS_ONSTACK;
274 } else
275 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
278 * grow() will return FALSE if the fp will not fit inside the stack
279 * and the stack can not be grown. useracc will return FALSE
280 * if access is denied.
282 if ((vm_map_growstack(p, (vm_offset_t)fp) != KERN_SUCCESS) ||
283 !useracc((caddr_t)fp, sizeof (struct l_rt_sigframe),
284 VM_PROT_WRITE)) {
286 * Process has trashed its stack; give it an illegal
287 * instruction to halt it in its tracks.
289 SIGACTION(p, SIGILL) = SIG_DFL;
290 SIGDELSET(p->p_sigignore, SIGILL);
291 SIGDELSET(p->p_sigcatch, SIGILL);
292 SIGDELSET(lp->lwp_sigmask, SIGILL);
293 #ifdef DEBUG
294 if (ldebug(rt_sendsig))
295 kprintf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
296 fp, oonstack);
297 #endif
298 lwpsignal(p, lp, SIGILL);
299 return;
303 * Build the argument list for the signal handler.
305 if (p->p_sysent->sv_sigtbl)
306 if (sig <= p->p_sysent->sv_sigsize)
307 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
309 frame.sf_handler = catcher;
310 frame.sf_sig = sig;
311 frame.sf_siginfo = &fp->sf_si;
312 frame.sf_ucontext = &fp->sf_sc;
314 /* Fill siginfo structure. */
315 frame.sf_si.lsi_signo = sig;
316 frame.sf_si.lsi_code = code;
317 frame.sf_si.lsi_addr = (void *)regs->tf_err;
320 * Build the signal context to be used by sigreturn.
322 frame.sf_sc.uc_flags = 0; /* XXX ??? */
323 frame.sf_sc.uc_link = NULL; /* XXX ??? */
325 frame.sf_sc.uc_stack.ss_sp = lp->lwp_sigstk.ss_sp;
326 frame.sf_sc.uc_stack.ss_size = lp->lwp_sigstk.ss_size;
327 frame.sf_sc.uc_stack.ss_flags = (lp->lwp_flag & LWP_ALTSTACK)
328 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
330 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
332 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
333 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs;
334 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
335 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
336 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
337 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
338 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
339 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
340 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
341 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
342 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
343 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
344 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
345 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
346 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
347 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
348 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
349 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
350 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
352 #ifdef DEBUG
353 if (ldebug(rt_sendsig))
354 kprintf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
355 frame.sf_sc.uc_stack.ss_flags, lp->lwp_sigstk.ss_sp,
356 lp->lwp_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
357 #endif
359 if (copyout(&frame, fp, sizeof(frame)) != 0) {
361 * Process has trashed its stack; give it an illegal
362 * instruction to halt it in its tracks.
364 sigexit(lp, SIGILL);
365 /* NOTREACHED */
369 * Build context to run handler in.
371 regs->tf_esp = (int)fp;
372 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
373 linux_sznonrtsigcode;
376 * i386 abi specifies that the direction flag must be cleared
377 * on function entry
379 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
381 regs->tf_cs = _ucodesel;
382 regs->tf_ds = _udatasel;
383 regs->tf_es = _udatasel;
384 /* allow %fs and %gs to be inherited by the signal handler */
386 regs->tf_fs = _udatasel;
387 regs->tf_gs = _udatasel;
389 regs->tf_ss = _udatasel;
394 * Send an interrupt to process.
396 * Stack is set up to allow sigcode stored
397 * in u. to call routine, followed by kcall
398 * to sigreturn routine below. After sigreturn
399 * resets the signal mask, the stack, and the
400 * frame pointer, it returns to the user
401 * specified pc, psl.
404 static void
405 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
407 struct proc *p = curproc;
408 struct lwp *lp = curthread->td_lwp;
409 struct trapframe *regs;
410 struct l_sigframe *fp, frame;
411 l_sigset_t lmask;
412 int oonstack, i;
414 if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
415 /* Signal handler installed with SA_SIGINFO. */
416 linux_rt_sendsig(catcher, sig, mask, code);
417 return;
420 regs = lp->lwp_md.md_regs;
421 oonstack = lp->lwp_sigstk.ss_flags & SS_ONSTACK;
423 #ifdef DEBUG
424 if (ldebug(sendsig))
425 kprintf(ARGS(sendsig, "%p, %d, %p, %lu"),
426 catcher, sig, (void*)mask, code);
427 #endif
430 * Allocate space for the signal handler context.
432 if ((lp->lwp_flag & LWP_ALTSTACK) && !oonstack &&
433 SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
434 fp = (struct l_sigframe *)(lp->lwp_sigstk.ss_sp +
435 lp->lwp_sigstk.ss_size - sizeof(struct l_sigframe));
436 lp->lwp_sigstk.ss_flags |= SS_ONSTACK;
437 } else
438 fp = (struct l_sigframe *)regs->tf_esp - 1;
441 * grow() will return FALSE if the fp will not fit inside the stack
442 * and the stack can not be grown. useracc will return FALSE
443 * if access is denied.
445 if ((vm_map_growstack(p, (vm_offset_t)fp) != KERN_SUCCESS) ||
446 !useracc((caddr_t)fp, sizeof (struct l_sigframe),
447 VM_PROT_WRITE)) {
449 * Process has trashed its stack; give it an illegal
450 * instruction to halt it in its tracks.
452 SIGACTION(p, SIGILL) = SIG_DFL;
453 SIGDELSET(p->p_sigignore, SIGILL);
454 SIGDELSET(p->p_sigcatch, SIGILL);
455 SIGDELSET(lp->lwp_sigmask, SIGILL);
456 lwpsignal(p, lp, SIGILL);
457 return;
461 * Build the argument list for the signal handler.
463 if (p->p_sysent->sv_sigtbl)
464 if (sig <= p->p_sysent->sv_sigsize)
465 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
467 frame.sf_handler = catcher;
468 frame.sf_sig = sig;
470 bsd_to_linux_sigset(mask, &lmask);
473 * Build the signal context to be used by sigreturn.
475 frame.sf_sc.sc_mask = lmask.__bits[0];
476 frame.sf_sc.sc_gs = regs->tf_gs;
477 frame.sf_sc.sc_fs = regs->tf_fs;
478 frame.sf_sc.sc_es = regs->tf_es;
479 frame.sf_sc.sc_ds = regs->tf_ds;
480 frame.sf_sc.sc_edi = regs->tf_edi;
481 frame.sf_sc.sc_esi = regs->tf_esi;
482 frame.sf_sc.sc_ebp = regs->tf_ebp;
483 frame.sf_sc.sc_ebx = regs->tf_ebx;
484 frame.sf_sc.sc_edx = regs->tf_edx;
485 frame.sf_sc.sc_ecx = regs->tf_ecx;
486 frame.sf_sc.sc_eax = regs->tf_eax;
487 frame.sf_sc.sc_eip = regs->tf_eip;
488 frame.sf_sc.sc_cs = regs->tf_cs;
489 frame.sf_sc.sc_eflags = regs->tf_eflags;
490 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
491 frame.sf_sc.sc_ss = regs->tf_ss;
492 frame.sf_sc.sc_err = regs->tf_err;
493 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
495 bzero(&frame.sf_fpstate, sizeof(struct l_fpstate));
497 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
498 frame.sf_extramask[i] = lmask.__bits[i+1];
500 if (copyout(&frame, fp, sizeof(frame)) != 0) {
502 * Process has trashed its stack; give it an illegal
503 * instruction to halt it in its tracks.
505 sigexit(lp, SIGILL);
506 /* NOTREACHED */
510 * Build context to run handler in.
512 regs->tf_esp = (int)fp;
513 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
516 * i386 abi specifies that the direction flag must be cleared
517 * on function entry
519 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
521 regs->tf_cs = _ucodesel;
522 regs->tf_ds = _udatasel;
523 regs->tf_es = _udatasel;
524 /* Allow %fs and %gs to be inherited by the signal handler */
526 regs->tf_fs = _udatasel;
527 regs->tf_gs = _udatasel;
529 regs->tf_ss = _udatasel;
533 * System call to cleanup state after a signal
534 * has been taken. Reset signal mask and
535 * stack state from context left by sendsig (above).
536 * Return to previous pc and psl as specified by
537 * context left by sendsig. Check carefully to
538 * make sure that the user has not modified the
539 * psl to gain improper privileges or to cause
540 * a machine fault.
542 * MPSAFE
545 sys_linux_sigreturn(struct linux_sigreturn_args *args)
547 struct lwp *lp = curthread->td_lwp;
548 struct l_sigframe frame;
549 struct trapframe *regs;
550 l_sigset_t lmask;
551 int eflags, i;
553 regs = lp->lwp_md.md_regs;
555 #ifdef DEBUG
556 if (ldebug(sigreturn))
557 kprintf(ARGS(sigreturn, "%p"), (void *)args->sfp);
558 #endif
560 * The trampoline code hands us the sigframe.
561 * It is unsafe to keep track of it ourselves, in the event that a
562 * program jumps out of a signal handler.
564 if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
565 return (EFAULT);
568 * Check for security violations.
570 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
571 eflags = frame.sf_sc.sc_eflags;
573 * XXX do allow users to change the privileged flag PSL_RF. The
574 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
575 * sometimes set it there too. tf_eflags is kept in the signal
576 * context during signal handling and there is no other place
577 * to remember it, so the PSL_RF bit may be corrupted by the
578 * signal handler without us knowing. Corruption of the PSL_RF
579 * bit at worst causes one more or one less debugger trap, so
580 * allowing it is fairly harmless.
582 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
583 return(EINVAL);
587 * Don't allow users to load a valid privileged %cs. Let the
588 * hardware check for invalid selectors, excess privilege in
589 * other selectors, invalid %eip's and invalid %esp's.
591 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
592 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
593 trapsignal(lp, SIGBUS, T_PROTFLT);
594 return(EINVAL);
597 lp->lwp_sigstk.ss_flags &= ~SS_ONSTACK;
598 lmask.__bits[0] = frame.sf_sc.sc_mask;
599 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
600 lmask.__bits[i+1] = frame.sf_extramask[i];
601 linux_to_bsd_sigset(&lmask, &lp->lwp_sigmask);
602 SIG_CANTMASK(lp->lwp_sigmask);
605 * Restore signal context.
607 /* %gs was restored by the trampoline. */
608 regs->tf_fs = frame.sf_sc.sc_fs;
609 regs->tf_es = frame.sf_sc.sc_es;
610 regs->tf_ds = frame.sf_sc.sc_ds;
611 regs->tf_edi = frame.sf_sc.sc_edi;
612 regs->tf_esi = frame.sf_sc.sc_esi;
613 regs->tf_ebp = frame.sf_sc.sc_ebp;
614 regs->tf_ebx = frame.sf_sc.sc_ebx;
615 regs->tf_edx = frame.sf_sc.sc_edx;
616 regs->tf_ecx = frame.sf_sc.sc_ecx;
617 regs->tf_eax = frame.sf_sc.sc_eax;
618 regs->tf_eip = frame.sf_sc.sc_eip;
619 regs->tf_cs = frame.sf_sc.sc_cs;
620 regs->tf_eflags = eflags;
621 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
622 regs->tf_ss = frame.sf_sc.sc_ss;
624 return (EJUSTRETURN);
628 * System call to cleanup state after a signal
629 * has been taken. Reset signal mask and
630 * stack state from context left by rt_sendsig (above).
631 * Return to previous pc and psl as specified by
632 * context left by sendsig. Check carefully to
633 * make sure that the user has not modified the
634 * psl to gain improper privileges or to cause
635 * a machine fault.
637 * MPSAFE
640 sys_linux_rt_sigreturn(struct linux_rt_sigreturn_args *args)
642 struct lwp *lp = curthread->td_lwp;
643 struct l_ucontext uc;
644 struct l_sigcontext *context;
645 l_stack_t *lss;
646 stack_t ss;
647 struct trapframe *regs;
648 int eflags;
650 regs = lp->lwp_md.md_regs;
652 #ifdef DEBUG
653 if (ldebug(rt_sigreturn))
654 kprintf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
655 #endif
657 * The trampoline code hands us the ucontext.
658 * It is unsafe to keep track of it ourselves, in the event that a
659 * program jumps out of a signal handler.
661 if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
662 return (EFAULT);
664 context = &uc.uc_mcontext;
667 * Check for security violations.
669 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
670 eflags = context->sc_eflags;
672 * XXX do allow users to change the privileged flag PSL_RF. The
673 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
674 * sometimes set it there too. tf_eflags is kept in the signal
675 * context during signal handling and there is no other place
676 * to remember it, so the PSL_RF bit may be corrupted by the
677 * signal handler without us knowing. Corruption of the PSL_RF
678 * bit at worst causes one more or one less debugger trap, so
679 * allowing it is fairly harmless.
681 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
682 return(EINVAL);
686 * Don't allow users to load a valid privileged %cs. Let the
687 * hardware check for invalid selectors, excess privilege in
688 * other selectors, invalid %eip's and invalid %esp's.
690 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
691 if (!CS_SECURE(context->sc_cs)) {
692 trapsignal(lp, SIGBUS, T_PROTFLT);
693 return(EINVAL);
696 lp->lwp_sigstk.ss_flags &= ~SS_ONSTACK;
697 linux_to_bsd_sigset(&uc.uc_sigmask, &lp->lwp_sigmask);
698 SIG_CANTMASK(lp->lwp_sigmask);
701 * Restore signal context
703 /* %gs was restored by the trampoline. */
704 regs->tf_fs = context->sc_fs;
705 regs->tf_es = context->sc_es;
706 regs->tf_ds = context->sc_ds;
707 regs->tf_edi = context->sc_edi;
708 regs->tf_esi = context->sc_esi;
709 regs->tf_ebp = context->sc_ebp;
710 regs->tf_ebx = context->sc_ebx;
711 regs->tf_edx = context->sc_edx;
712 regs->tf_ecx = context->sc_ecx;
713 regs->tf_eax = context->sc_eax;
714 regs->tf_eip = context->sc_eip;
715 regs->tf_cs = context->sc_cs;
716 regs->tf_eflags = eflags;
717 regs->tf_esp = context->sc_esp_at_signal;
718 regs->tf_ss = context->sc_ss;
721 * call sigaltstack & ignore results..
723 lss = &uc.uc_stack;
724 ss.ss_sp = lss->ss_sp;
725 ss.ss_size = lss->ss_size;
726 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
728 #ifdef DEBUG
729 if (ldebug(rt_sigreturn))
730 kprintf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
731 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
732 #endif
733 kern_sigaltstack(&ss, NULL);
735 return (EJUSTRETURN);
739 * Prep arguments.
741 * MUST BE MPSAFE
743 static void
744 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
746 args[0] = tf->tf_ebx;
747 args[1] = tf->tf_ecx;
748 args[2] = tf->tf_edx;
749 args[3] = tf->tf_esi;
750 args[4] = tf->tf_edi;
751 args[5] = tf->tf_ebp;
752 *params = NULL; /* no copyin */
756 * If a linux binary is exec'ing something, try this image activator
757 * first. We override standard shell script execution in order to
758 * be able to modify the interpreter path. We only do this if a linux
759 * binary is doing the exec, so we do not create an EXEC module for it.
761 static int exec_linux_imgact_try (struct image_params *iparams);
763 static int
764 exec_linux_imgact_try(struct image_params *imgp)
766 const char *head = (const char *)imgp->image_header;
767 int error = -1;
770 * The interpreter for shell scripts run from a linux binary needs
771 * to be located in /compat/linux if possible in order to recursively
772 * maintain linux path emulation.
774 if (((const short *)head)[0] == SHELLMAGIC) {
776 * Run our normal shell image activator. If it succeeds attempt
777 * to use the alternate path for the interpreter. If an alternate
778 * path is found, use our stringspace to store it.
780 if ((error = exec_shell_imgact(imgp)) == 0) {
781 linux_translate_path(imgp->interpreter_name,
782 MAXSHELLCMDLEN);
785 return(error);
788 struct sysentvec linux_sysvec = {
789 LINUX_SYS_MAXSYSCALL,
790 linux_sysent,
791 0xff,
792 LINUX_SIGTBLSZ,
793 bsd_to_linux_signal,
794 ELAST + 1,
795 bsd_to_linux_errno,
796 translate_traps,
797 linux_fixup,
798 linux_sendsig,
799 linux_sigcode,
800 &linux_szsigcode,
801 linux_prepsyscall,
802 "Linux a.out",
803 NULL,
804 exec_linux_imgact_try,
805 LINUX_MINSIGSTKSZ
808 struct sysentvec elf_linux_sysvec = {
809 LINUX_SYS_MAXSYSCALL,
810 linux_sysent,
811 0xff,
812 LINUX_SIGTBLSZ,
813 bsd_to_linux_signal,
814 ELAST + 1,
815 bsd_to_linux_errno,
816 translate_traps,
817 elf_linux_fixup,
818 linux_sendsig,
819 linux_sigcode,
820 &linux_szsigcode,
821 linux_prepsyscall,
822 "Linux ELF",
823 elf_coredump,
824 exec_linux_imgact_try,
825 LINUX_MINSIGSTKSZ
828 static int linux_match_abi_note(const Elf_Note *abi_note);
829 static int linux_match_suse_note(const Elf_Note *abi_note);
831 static Elf32_Brandinfo linux_brand = {
832 ELFOSABI_LINUX,
833 "Linux",
834 linux_match_abi_note,
835 "/compat/linux",
836 "/lib/ld-linux.so.1",
837 &elf_linux_sysvec
840 static Elf32_Brandinfo linux_glibc2brand = {
841 ELFOSABI_LINUX,
842 "Linux",
843 linux_match_abi_note,
844 "/compat/linux",
845 "/lib/ld-linux.so.2",
846 &elf_linux_sysvec
849 static Elf32_Brandinfo linux_suse_brand = {
850 ELFOSABI_LINUX,
851 "Linux",
852 linux_match_suse_note,
853 "/compat/linux",
854 "/lib/ld-linux.so.2",
855 &elf_linux_sysvec
858 Elf32_Brandinfo *linux_brandlist[] = {
859 &linux_brand,
860 &linux_glibc2brand,
861 &linux_suse_brand,
862 NULL
865 static int
866 linux_match_abi_note(const Elf_Note *abi_note)
868 const char *abi_name = (const char *)
869 ((const uint8_t *)abi_note + sizeof(*abi_note));
870 const uint32_t *descr = (const uint32_t *)
871 ((const uint8_t *)abi_name + abi_note->n_namesz);
873 if (abi_note->n_namesz != sizeof("GNU"))
874 return(FALSE);
875 if (memcmp(abi_name, "GNU", sizeof("GNU")))
876 return(FALSE);
877 if (abi_note->n_descsz < sizeof(uint32_t))
878 return(FALSE);
880 if (*descr != 0)
881 return(FALSE);
882 return(TRUE);
885 static int
886 linux_match_suse_note(const Elf_Note *abi_note)
888 const char *abi_name = (const char *)
889 ((const uint8_t *)abi_note + sizeof(*abi_note));
891 if (abi_note->n_namesz != sizeof("SuSE"))
892 return(FALSE);
893 if (memcmp(abi_name, "SuSE", sizeof("SuSE")))
894 return(FALSE);
896 return(TRUE);
899 static int
900 linux_elf_modevent(module_t mod, int type, void *data)
902 Elf32_Brandinfo **brandinfo;
903 int error;
905 error = 0;
907 switch(type) {
908 case MOD_LOAD:
909 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
910 ++brandinfo)
911 if (elf_insert_brand_entry(*brandinfo) < 0)
912 error = EINVAL;
913 if (error == 0) {
914 if (bootverbose)
915 kprintf("Linux ELF exec handler installed\n");
916 } else
917 kprintf("cannot insert Linux ELF brand handler\n");
918 break;
919 case MOD_UNLOAD:
920 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
921 ++brandinfo)
922 if (elf_brand_inuse(*brandinfo))
923 error = EBUSY;
924 if (error == 0) {
925 for (brandinfo = &linux_brandlist[0];
926 *brandinfo != NULL; ++brandinfo)
927 if (elf_remove_brand_entry(*brandinfo) < 0)
928 error = EINVAL;
930 if (error == 0) {
931 if (bootverbose)
932 kprintf("Linux ELF exec handler removed\n");
933 } else
934 kprintf("Could not deinstall ELF interpreter entry\n");
935 break;
936 default:
937 break;
939 return error;
942 static moduledata_t linux_elf_mod = {
943 "linuxelf",
944 linux_elf_modevent,
948 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);