2 * Copyright (c) 1994-1996 Søren Schmidt
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysctl.h>
48 #include <sys/sysent.h>
49 #include <sys/sysproto.h>
50 #include <sys/vnode.h>
51 #include <sys/eventhandler.h>
55 #include <vm/vm_extern.h>
56 #include <vm/vm_map.h>
57 #include <vm/vm_object.h>
58 #include <vm/vm_page.h>
59 #include <vm/vm_param.h>
61 #include <machine/cpu.h>
62 #include <machine/cputypes.h>
63 #include <machine/md_var.h>
64 #include <machine/pcb.h>
66 #include <i386/linux/linux.h>
67 #include <i386/linux/linux_proto.h>
68 #include <compat/linux/linux_emul.h>
69 #include <compat/linux/linux_futex.h>
70 #include <compat/linux/linux_ioctl.h>
71 #include <compat/linux/linux_mib.h>
72 #include <compat/linux/linux_misc.h>
73 #include <compat/linux/linux_signal.h>
74 #include <compat/linux/linux_util.h>
75 #include <compat/linux/linux_vdso.h>
77 MODULE_VERSION(linux
, 1);
79 #if BYTE_ORDER == LITTLE_ENDIAN
80 #define SHELLMAGIC 0x2123 /* #! */
82 #define SHELLMAGIC 0x2321
86 SYSCTL_PROC(_compat_linux
, OID_AUTO
, debug
,
87 CTLTYPE_STRING
| CTLFLAG_RW
,
88 0, 0, linux_sysctl_debug
, "A",
89 "Linux debugging control");
93 * Allow the sendsig functions to use the ldebug() facility
94 * even though they are not syscalls themselves. Map them
95 * to syscall 0. This is slightly less bogus than using
98 #define LINUX_SYS_linux_rt_sendsig 0
99 #define LINUX_SYS_linux_sendsig 0
101 #define LINUX_PS_STRINGS (LINUX_USRSTACK - sizeof(struct ps_strings))
103 static int linux_szsigcode
;
104 static vm_object_t linux_shared_page_obj
;
105 static char *linux_shared_page_mapping
;
106 extern char _binary_linux_locore_o_start
;
107 extern char _binary_linux_locore_o_end
;
109 extern struct sysent linux_sysent
[LINUX_SYS_MAXSYSCALL
];
111 SET_DECLARE(linux_ioctl_handler_set
, struct linux_ioctl_handler
);
113 static int linux_fixup(register_t
**stack_base
,
114 struct image_params
*iparams
);
115 static int elf_linux_fixup(register_t
**stack_base
,
116 struct image_params
*iparams
);
117 static void linux_sendsig(sig_t catcher
, ksiginfo_t
*ksi
, sigset_t
*mask
);
118 static void exec_linux_setregs(struct thread
*td
,
119 struct image_params
*imgp
, u_long stack
);
120 static register_t
*linux_copyout_strings(struct image_params
*imgp
);
121 static boolean_t
linux_trans_osrel(const Elf_Note
*note
, int32_t *osrel
);
122 static void linux_vdso_install(void *param
);
123 static void linux_vdso_deinstall(void *param
);
125 static int linux_szplatform
;
126 const char *linux_kplatform
;
128 static eventhandler_tag linux_exit_tag
;
129 static eventhandler_tag linux_exec_tag
;
130 static eventhandler_tag linux_thread_dtor_tag
;
133 * Linux syscalls return negative errno's, we do positive and map them
135 * FreeBSD: src/sys/sys/errno.h
136 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
137 * linux-2.6.17.8/include/asm-generic/errno.h
139 static int bsd_to_linux_errno
[ELAST
+ 1] = {
140 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
141 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
142 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
143 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
144 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
145 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
146 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
147 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
148 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
152 #define LINUX_T_UNKNOWN 255
153 static int _bsd_to_linux_trapcode
[] = {
154 LINUX_T_UNKNOWN
, /* 0 */
155 6, /* 1 T_PRIVINFLT */
156 LINUX_T_UNKNOWN
, /* 2 */
158 LINUX_T_UNKNOWN
, /* 4 */
159 LINUX_T_UNKNOWN
, /* 5 */
160 16, /* 6 T_ARITHTRAP */
161 254, /* 7 T_ASTFLT */
162 LINUX_T_UNKNOWN
, /* 8 */
163 13, /* 9 T_PROTFLT */
164 1, /* 10 T_TRCTRAP */
165 LINUX_T_UNKNOWN
, /* 11 */
166 14, /* 12 T_PAGEFLT */
167 LINUX_T_UNKNOWN
, /* 13 */
168 17, /* 14 T_ALIGNFLT */
169 LINUX_T_UNKNOWN
, /* 15 */
170 LINUX_T_UNKNOWN
, /* 16 */
171 LINUX_T_UNKNOWN
, /* 17 */
177 8, /* 23 T_DOUBLEFLT */
178 9, /* 24 T_FPOPFLT */
179 10, /* 25 T_TSSFLT */
180 11, /* 26 T_SEGNPFLT */
181 12, /* 27 T_STKFLT */
183 19, /* 29 T_XMMFLT */
184 15 /* 30 T_RESERVED */
186 #define bsd_to_linux_trapcode(code) \
187 ((code)<nitems(_bsd_to_linux_trapcode)? \
188 _bsd_to_linux_trapcode[(code)]: \
191 LINUX_VDSO_SYM_INTPTR(linux_sigcode
);
192 LINUX_VDSO_SYM_INTPTR(linux_rt_sigcode
);
193 LINUX_VDSO_SYM_INTPTR(linux_vsyscall
);
196 * If FreeBSD & Linux have a difference of opinion about what a trap
197 * means, deal with it here.
202 translate_traps(int signal
, int trap_code
)
204 if (signal
!= SIGBUS
)
218 linux_fixup(register_t
**stack_base
, struct image_params
*imgp
)
220 register_t
*argv
, *envp
;
223 envp
= *stack_base
+ (imgp
->args
->argc
+ 1);
225 suword(*stack_base
, (intptr_t)(void *)envp
);
227 suword(*stack_base
, (intptr_t)(void *)argv
);
229 suword(*stack_base
, imgp
->args
->argc
);
234 elf_linux_fixup(register_t
**stack_base
, struct image_params
*imgp
)
238 Elf32_Addr
*uplatform
;
239 struct ps_strings
*arginfo
;
243 KASSERT(curthread
->td_proc
== imgp
->proc
,
244 ("unsafe elf_linux_fixup(), should be curproc"));
247 issetugid
= imgp
->proc
->p_flag
& P_SUGID
? 1 : 0;
248 arginfo
= (struct ps_strings
*)p
->p_sysent
->sv_psstrings
;
249 uplatform
= (Elf32_Addr
*)((caddr_t
)arginfo
- linux_szplatform
);
250 args
= (Elf32_Auxargs
*)imgp
->auxargs
;
251 pos
= *stack_base
+ (imgp
->args
->argc
+ imgp
->args
->envc
+ 2);
253 AUXARGS_ENTRY(pos
, LINUX_AT_SYSINFO_EHDR
,
254 imgp
->proc
->p_sysent
->sv_shared_page_base
);
255 AUXARGS_ENTRY(pos
, LINUX_AT_SYSINFO
, linux_vsyscall
);
256 AUXARGS_ENTRY(pos
, LINUX_AT_HWCAP
, cpu_feature
);
259 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
260 * as it has appeared in the 2.4.0-rc7 first time.
261 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
262 * glibc falls back to the hard-coded CLK_TCK value when aux entry
264 * Also see linux_times() implementation.
266 if (linux_kernver(curthread
) >= LINUX_KERNVER_2004000
)
267 AUXARGS_ENTRY(pos
, LINUX_AT_CLKTCK
, stclohz
);
268 AUXARGS_ENTRY(pos
, AT_PHDR
, args
->phdr
);
269 AUXARGS_ENTRY(pos
, AT_PHENT
, args
->phent
);
270 AUXARGS_ENTRY(pos
, AT_PHNUM
, args
->phnum
);
271 AUXARGS_ENTRY(pos
, AT_PAGESZ
, args
->pagesz
);
272 AUXARGS_ENTRY(pos
, AT_FLAGS
, args
->flags
);
273 AUXARGS_ENTRY(pos
, AT_ENTRY
, args
->entry
);
274 AUXARGS_ENTRY(pos
, AT_BASE
, args
->base
);
275 AUXARGS_ENTRY(pos
, LINUX_AT_SECURE
, issetugid
);
276 AUXARGS_ENTRY(pos
, AT_UID
, imgp
->proc
->p_ucred
->cr_ruid
);
277 AUXARGS_ENTRY(pos
, AT_EUID
, imgp
->proc
->p_ucred
->cr_svuid
);
278 AUXARGS_ENTRY(pos
, AT_GID
, imgp
->proc
->p_ucred
->cr_rgid
);
279 AUXARGS_ENTRY(pos
, AT_EGID
, imgp
->proc
->p_ucred
->cr_svgid
);
280 AUXARGS_ENTRY(pos
, LINUX_AT_PLATFORM
, PTROUT(uplatform
));
281 AUXARGS_ENTRY(pos
, LINUX_AT_RANDOM
, imgp
->canary
);
282 if (imgp
->execpathp
!= 0)
283 AUXARGS_ENTRY(pos
, LINUX_AT_EXECFN
, imgp
->execpathp
);
284 if (args
->execfd
!= -1)
285 AUXARGS_ENTRY(pos
, AT_EXECFD
, args
->execfd
);
286 AUXARGS_ENTRY(pos
, AT_NULL
, 0);
288 free(imgp
->auxargs
, M_TEMP
);
289 imgp
->auxargs
= NULL
;
292 suword(*stack_base
, (register_t
)imgp
->args
->argc
);
297 * Copied from kern/kern_exec.c
300 linux_copyout_strings(struct image_params
*imgp
)
304 char *stringp
, *destp
;
305 register_t
*stack_base
;
306 struct ps_strings
*arginfo
;
307 char canary
[LINUX_AT_RANDOM_LEN
];
312 * Calculate string base and vector table pointers.
315 if (imgp
->execpath
!= NULL
&& imgp
->auxargs
!= NULL
)
316 execpath_len
= strlen(imgp
->execpath
) + 1;
319 arginfo
= (struct ps_strings
*)p
->p_sysent
->sv_psstrings
;
320 destp
= (caddr_t
)arginfo
- SPARE_USRSPACE
- linux_szplatform
-
321 roundup(sizeof(canary
), sizeof(char *)) -
322 roundup(execpath_len
, sizeof(char *)) -
323 roundup(ARG_MAX
- imgp
->args
->stringspace
, sizeof(char *));
326 * install LINUX_PLATFORM
328 copyout(linux_kplatform
, ((caddr_t
)arginfo
- linux_szplatform
),
331 if (execpath_len
!= 0) {
332 imgp
->execpathp
= (uintptr_t)arginfo
-
333 linux_szplatform
- execpath_len
;
334 copyout(imgp
->execpath
, (void *)imgp
->execpathp
, execpath_len
);
338 * Prepare the canary for SSP.
340 arc4rand(canary
, sizeof(canary
), 0);
341 imgp
->canary
= (uintptr_t)arginfo
- linux_szplatform
-
342 roundup(execpath_len
, sizeof(char *)) -
343 roundup(sizeof(canary
), sizeof(char *));
344 copyout(canary
, (void *)imgp
->canary
, sizeof(canary
));
347 * If we have a valid auxargs ptr, prepare some room
352 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
353 * lower compatibility.
355 imgp
->auxarg_size
= (imgp
->auxarg_size
) ? imgp
->auxarg_size
:
356 (LINUX_AT_COUNT
* 2);
358 * The '+ 2' is for the null pointers at the end of each of
359 * the arg and env vector sets,and imgp->auxarg_size is room
360 * for argument of Runtime loader.
362 vectp
= (char **)(destp
- (imgp
->args
->argc
+
363 imgp
->args
->envc
+ 2 + imgp
->auxarg_size
) * sizeof(char *));
366 * The '+ 2' is for the null pointers at the end of each of
367 * the arg and env vector sets
369 vectp
= (char **)(destp
- (imgp
->args
->argc
+ imgp
->args
->envc
+ 2) *
374 * vectp also becomes our initial stack base
376 stack_base
= (register_t
*)vectp
;
378 stringp
= imgp
->args
->begin_argv
;
379 argc
= imgp
->args
->argc
;
380 envc
= imgp
->args
->envc
;
383 * Copy out strings - arguments and environment.
385 copyout(stringp
, destp
, ARG_MAX
- imgp
->args
->stringspace
);
388 * Fill in "ps_strings" struct for ps, w, etc.
390 suword(&arginfo
->ps_argvstr
, (long)(intptr_t)vectp
);
391 suword(&arginfo
->ps_nargvstr
, argc
);
394 * Fill in argument portion of vector table.
396 for (; argc
> 0; --argc
) {
397 suword(vectp
++, (long)(intptr_t)destp
);
398 while (*stringp
++ != 0)
403 /* a null vector table pointer separates the argp's from the envp's */
406 suword(&arginfo
->ps_envstr
, (long)(intptr_t)vectp
);
407 suword(&arginfo
->ps_nenvstr
, envc
);
410 * Fill in environment portion of vector table.
412 for (; envc
> 0; --envc
) {
413 suword(vectp
++, (long)(intptr_t)destp
);
414 while (*stringp
++ != 0)
419 /* end of vector table is a null pointer */
426 linux_rt_sendsig(sig_t catcher
, ksiginfo_t
*ksi
, sigset_t
*mask
)
428 struct thread
*td
= curthread
;
429 struct proc
*p
= td
->td_proc
;
431 struct trapframe
*regs
;
432 struct l_rt_sigframe
*fp
, frame
;
436 sig
= ksi
->ksi_signo
;
437 code
= ksi
->ksi_code
;
438 PROC_LOCK_ASSERT(p
, MA_OWNED
);
440 mtx_assert(&psp
->ps_mtx
, MA_OWNED
);
442 oonstack
= sigonstack(regs
->tf_esp
);
445 if (ldebug(rt_sendsig
))
446 printf(ARGS(rt_sendsig
, "%p, %d, %p, %u"),
447 catcher
, sig
, (void*)mask
, code
);
450 * Allocate space for the signal handler context.
452 if ((td
->td_pflags
& TDP_ALTSTACK
) && !oonstack
&&
453 SIGISMEMBER(psp
->ps_sigonstack
, sig
)) {
454 fp
= (struct l_rt_sigframe
*)((uintptr_t)td
->td_sigstk
.ss_sp
+
455 td
->td_sigstk
.ss_size
- sizeof(struct l_rt_sigframe
));
457 fp
= (struct l_rt_sigframe
*)regs
->tf_esp
- 1;
458 mtx_unlock(&psp
->ps_mtx
);
461 * Build the argument list for the signal handler.
463 sig
= bsd_to_linux_signal(sig
);
465 bzero(&frame
, sizeof(frame
));
467 frame
.sf_handler
= catcher
;
469 frame
.sf_siginfo
= &fp
->sf_si
;
470 frame
.sf_ucontext
= &fp
->sf_sc
;
472 /* Fill in POSIX parts */
473 ksiginfo_to_lsiginfo(ksi
, &frame
.sf_si
, sig
);
476 * Build the signal context to be used by sigreturn.
478 frame
.sf_sc
.uc_flags
= 0; /* XXX ??? */
479 frame
.sf_sc
.uc_link
= NULL
; /* XXX ??? */
481 frame
.sf_sc
.uc_stack
.ss_sp
= td
->td_sigstk
.ss_sp
;
482 frame
.sf_sc
.uc_stack
.ss_size
= td
->td_sigstk
.ss_size
;
483 frame
.sf_sc
.uc_stack
.ss_flags
= (td
->td_pflags
& TDP_ALTSTACK
)
484 ? ((oonstack
) ? LINUX_SS_ONSTACK
: 0) : LINUX_SS_DISABLE
;
487 bsd_to_linux_sigset(mask
, &frame
.sf_sc
.uc_sigmask
);
489 frame
.sf_sc
.uc_mcontext
.sc_mask
= frame
.sf_sc
.uc_sigmask
.__mask
;
490 frame
.sf_sc
.uc_mcontext
.sc_gs
= rgs();
491 frame
.sf_sc
.uc_mcontext
.sc_fs
= regs
->tf_fs
;
492 frame
.sf_sc
.uc_mcontext
.sc_es
= regs
->tf_es
;
493 frame
.sf_sc
.uc_mcontext
.sc_ds
= regs
->tf_ds
;
494 frame
.sf_sc
.uc_mcontext
.sc_edi
= regs
->tf_edi
;
495 frame
.sf_sc
.uc_mcontext
.sc_esi
= regs
->tf_esi
;
496 frame
.sf_sc
.uc_mcontext
.sc_ebp
= regs
->tf_ebp
;
497 frame
.sf_sc
.uc_mcontext
.sc_ebx
= regs
->tf_ebx
;
498 frame
.sf_sc
.uc_mcontext
.sc_esp
= regs
->tf_esp
;
499 frame
.sf_sc
.uc_mcontext
.sc_edx
= regs
->tf_edx
;
500 frame
.sf_sc
.uc_mcontext
.sc_ecx
= regs
->tf_ecx
;
501 frame
.sf_sc
.uc_mcontext
.sc_eax
= regs
->tf_eax
;
502 frame
.sf_sc
.uc_mcontext
.sc_eip
= regs
->tf_eip
;
503 frame
.sf_sc
.uc_mcontext
.sc_cs
= regs
->tf_cs
;
504 frame
.sf_sc
.uc_mcontext
.sc_eflags
= regs
->tf_eflags
;
505 frame
.sf_sc
.uc_mcontext
.sc_esp_at_signal
= regs
->tf_esp
;
506 frame
.sf_sc
.uc_mcontext
.sc_ss
= regs
->tf_ss
;
507 frame
.sf_sc
.uc_mcontext
.sc_err
= regs
->tf_err
;
508 frame
.sf_sc
.uc_mcontext
.sc_cr2
= (register_t
)ksi
->ksi_addr
;
509 frame
.sf_sc
.uc_mcontext
.sc_trapno
= bsd_to_linux_trapcode(code
);
512 if (ldebug(rt_sendsig
))
513 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
514 frame
.sf_sc
.uc_stack
.ss_flags
, td
->td_sigstk
.ss_sp
,
515 td
->td_sigstk
.ss_size
, frame
.sf_sc
.uc_mcontext
.sc_mask
);
518 if (copyout(&frame
, fp
, sizeof(frame
)) != 0) {
520 * Process has trashed its stack; give it an illegal
521 * instruction to halt it in its tracks.
524 if (ldebug(rt_sendsig
))
525 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
533 * Build context to run handler in.
535 regs
->tf_esp
= (int)fp
;
536 regs
->tf_eip
= linux_rt_sigcode
;
537 regs
->tf_eflags
&= ~(PSL_T
| PSL_VM
| PSL_D
);
538 regs
->tf_cs
= _ucodesel
;
539 regs
->tf_ds
= _udatasel
;
540 regs
->tf_es
= _udatasel
;
541 regs
->tf_fs
= _udatasel
;
542 regs
->tf_ss
= _udatasel
;
544 mtx_lock(&psp
->ps_mtx
);
549 * Send an interrupt to process.
551 * Stack is set up to allow sigcode stored
552 * in u. to call routine, followed by kcall
553 * to sigreturn routine below. After sigreturn
554 * resets the signal mask, the stack, and the
555 * frame pointer, it returns to the user
559 linux_sendsig(sig_t catcher
, ksiginfo_t
*ksi
, sigset_t
*mask
)
561 struct thread
*td
= curthread
;
562 struct proc
*p
= td
->td_proc
;
564 struct trapframe
*regs
;
565 struct l_sigframe
*fp
, frame
;
570 PROC_LOCK_ASSERT(p
, MA_OWNED
);
572 sig
= ksi
->ksi_signo
;
573 code
= ksi
->ksi_code
;
574 mtx_assert(&psp
->ps_mtx
, MA_OWNED
);
575 if (SIGISMEMBER(psp
->ps_siginfo
, sig
)) {
576 /* Signal handler installed with SA_SIGINFO. */
577 linux_rt_sendsig(catcher
, ksi
, mask
);
581 oonstack
= sigonstack(regs
->tf_esp
);
585 printf(ARGS(sendsig
, "%p, %d, %p, %u"),
586 catcher
, sig
, (void*)mask
, code
);
590 * Allocate space for the signal handler context.
592 if ((td
->td_pflags
& TDP_ALTSTACK
) && !oonstack
&&
593 SIGISMEMBER(psp
->ps_sigonstack
, sig
)) {
594 fp
= (struct l_sigframe
*)((uintptr_t)td
->td_sigstk
.ss_sp
+
595 td
->td_sigstk
.ss_size
- sizeof(struct l_sigframe
));
597 fp
= (struct l_sigframe
*)regs
->tf_esp
- 1;
598 mtx_unlock(&psp
->ps_mtx
);
602 * Build the argument list for the signal handler.
604 sig
= bsd_to_linux_signal(sig
);
606 bzero(&frame
, sizeof(frame
));
608 frame
.sf_handler
= catcher
;
611 bsd_to_linux_sigset(mask
, &lmask
);
614 * Build the signal context to be used by sigreturn.
616 frame
.sf_sc
.sc_mask
= lmask
.__mask
;
617 frame
.sf_sc
.sc_gs
= rgs();
618 frame
.sf_sc
.sc_fs
= regs
->tf_fs
;
619 frame
.sf_sc
.sc_es
= regs
->tf_es
;
620 frame
.sf_sc
.sc_ds
= regs
->tf_ds
;
621 frame
.sf_sc
.sc_edi
= regs
->tf_edi
;
622 frame
.sf_sc
.sc_esi
= regs
->tf_esi
;
623 frame
.sf_sc
.sc_ebp
= regs
->tf_ebp
;
624 frame
.sf_sc
.sc_ebx
= regs
->tf_ebx
;
625 frame
.sf_sc
.sc_esp
= regs
->tf_esp
;
626 frame
.sf_sc
.sc_edx
= regs
->tf_edx
;
627 frame
.sf_sc
.sc_ecx
= regs
->tf_ecx
;
628 frame
.sf_sc
.sc_eax
= regs
->tf_eax
;
629 frame
.sf_sc
.sc_eip
= regs
->tf_eip
;
630 frame
.sf_sc
.sc_cs
= regs
->tf_cs
;
631 frame
.sf_sc
.sc_eflags
= regs
->tf_eflags
;
632 frame
.sf_sc
.sc_esp_at_signal
= regs
->tf_esp
;
633 frame
.sf_sc
.sc_ss
= regs
->tf_ss
;
634 frame
.sf_sc
.sc_err
= regs
->tf_err
;
635 frame
.sf_sc
.sc_cr2
= (register_t
)ksi
->ksi_addr
;
636 frame
.sf_sc
.sc_trapno
= bsd_to_linux_trapcode(ksi
->ksi_trapno
);
638 frame
.sf_extramask
[0] = lmask
.__mask
;
640 if (copyout(&frame
, fp
, sizeof(frame
)) != 0) {
642 * Process has trashed its stack; give it an illegal
643 * instruction to halt it in its tracks.
650 * Build context to run handler in.
652 regs
->tf_esp
= (int)fp
;
653 regs
->tf_eip
= linux_sigcode
;
654 regs
->tf_eflags
&= ~(PSL_T
| PSL_VM
| PSL_D
);
655 regs
->tf_cs
= _ucodesel
;
656 regs
->tf_ds
= _udatasel
;
657 regs
->tf_es
= _udatasel
;
658 regs
->tf_fs
= _udatasel
;
659 regs
->tf_ss
= _udatasel
;
661 mtx_lock(&psp
->ps_mtx
);
665 * System call to cleanup state after a signal
666 * has been taken. Reset signal mask and
667 * stack state from context left by sendsig (above).
668 * Return to previous pc and psl as specified by
669 * context left by sendsig. Check carefully to
670 * make sure that the user has not modified the
671 * psl to gain improper privileges or to cause
675 linux_sigreturn(struct thread
*td
, struct linux_sigreturn_args
*args
)
677 struct l_sigframe frame
;
678 struct trapframe
*regs
;
687 if (ldebug(sigreturn
))
688 printf(ARGS(sigreturn
, "%p"), (void *)args
->sfp
);
691 * The trampoline code hands us the sigframe.
692 * It is unsafe to keep track of it ourselves, in the event that a
693 * program jumps out of a signal handler.
695 if (copyin(args
->sfp
, &frame
, sizeof(frame
)) != 0)
699 * Check for security violations.
701 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
702 eflags
= frame
.sf_sc
.sc_eflags
;
703 if (!EFLAGS_SECURE(eflags
, regs
->tf_eflags
))
707 * Don't allow users to load a valid privileged %cs. Let the
708 * hardware check for invalid selectors, excess privilege in
709 * other selectors, invalid %eip's and invalid %esp's.
711 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
712 if (!CS_SECURE(frame
.sf_sc
.sc_cs
)) {
713 ksiginfo_init_trap(&ksi
);
714 ksi
.ksi_signo
= SIGBUS
;
715 ksi
.ksi_code
= BUS_OBJERR
;
716 ksi
.ksi_trapno
= T_PROTFLT
;
717 ksi
.ksi_addr
= (void *)regs
->tf_eip
;
718 trapsignal(td
, &ksi
);
722 lmask
.__mask
= frame
.sf_sc
.sc_mask
;
723 linux_to_bsd_sigset(&lmask
, &bmask
);
724 kern_sigprocmask(td
, SIG_SETMASK
, &bmask
, NULL
, 0);
727 * Restore signal context.
729 /* %gs was restored by the trampoline. */
730 regs
->tf_fs
= frame
.sf_sc
.sc_fs
;
731 regs
->tf_es
= frame
.sf_sc
.sc_es
;
732 regs
->tf_ds
= frame
.sf_sc
.sc_ds
;
733 regs
->tf_edi
= frame
.sf_sc
.sc_edi
;
734 regs
->tf_esi
= frame
.sf_sc
.sc_esi
;
735 regs
->tf_ebp
= frame
.sf_sc
.sc_ebp
;
736 regs
->tf_ebx
= frame
.sf_sc
.sc_ebx
;
737 regs
->tf_edx
= frame
.sf_sc
.sc_edx
;
738 regs
->tf_ecx
= frame
.sf_sc
.sc_ecx
;
739 regs
->tf_eax
= frame
.sf_sc
.sc_eax
;
740 regs
->tf_eip
= frame
.sf_sc
.sc_eip
;
741 regs
->tf_cs
= frame
.sf_sc
.sc_cs
;
742 regs
->tf_eflags
= eflags
;
743 regs
->tf_esp
= frame
.sf_sc
.sc_esp_at_signal
;
744 regs
->tf_ss
= frame
.sf_sc
.sc_ss
;
746 return (EJUSTRETURN
);
750 * System call to cleanup state after a signal
751 * has been taken. Reset signal mask and
752 * stack state from context left by rt_sendsig (above).
753 * Return to previous pc and psl as specified by
754 * context left by sendsig. Check carefully to
755 * make sure that the user has not modified the
756 * psl to gain improper privileges or to cause
760 linux_rt_sigreturn(struct thread
*td
, struct linux_rt_sigreturn_args
*args
)
762 struct l_ucontext uc
;
763 struct l_sigcontext
*context
;
767 struct trapframe
*regs
;
774 if (ldebug(rt_sigreturn
))
775 printf(ARGS(rt_sigreturn
, "%p"), (void *)args
->ucp
);
778 * The trampoline code hands us the ucontext.
779 * It is unsafe to keep track of it ourselves, in the event that a
780 * program jumps out of a signal handler.
782 if (copyin(args
->ucp
, &uc
, sizeof(uc
)) != 0)
785 context
= &uc
.uc_mcontext
;
788 * Check for security violations.
790 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
791 eflags
= context
->sc_eflags
;
792 if (!EFLAGS_SECURE(eflags
, regs
->tf_eflags
))
796 * Don't allow users to load a valid privileged %cs. Let the
797 * hardware check for invalid selectors, excess privilege in
798 * other selectors, invalid %eip's and invalid %esp's.
800 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
801 if (!CS_SECURE(context
->sc_cs
)) {
802 ksiginfo_init_trap(&ksi
);
803 ksi
.ksi_signo
= SIGBUS
;
804 ksi
.ksi_code
= BUS_OBJERR
;
805 ksi
.ksi_trapno
= T_PROTFLT
;
806 ksi
.ksi_addr
= (void *)regs
->tf_eip
;
807 trapsignal(td
, &ksi
);
811 linux_to_bsd_sigset(&uc
.uc_sigmask
, &bmask
);
812 kern_sigprocmask(td
, SIG_SETMASK
, &bmask
, NULL
, 0);
815 * Restore signal context
817 /* %gs was restored by the trampoline. */
818 regs
->tf_fs
= context
->sc_fs
;
819 regs
->tf_es
= context
->sc_es
;
820 regs
->tf_ds
= context
->sc_ds
;
821 regs
->tf_edi
= context
->sc_edi
;
822 regs
->tf_esi
= context
->sc_esi
;
823 regs
->tf_ebp
= context
->sc_ebp
;
824 regs
->tf_ebx
= context
->sc_ebx
;
825 regs
->tf_edx
= context
->sc_edx
;
826 regs
->tf_ecx
= context
->sc_ecx
;
827 regs
->tf_eax
= context
->sc_eax
;
828 regs
->tf_eip
= context
->sc_eip
;
829 regs
->tf_cs
= context
->sc_cs
;
830 regs
->tf_eflags
= eflags
;
831 regs
->tf_esp
= context
->sc_esp_at_signal
;
832 regs
->tf_ss
= context
->sc_ss
;
835 * call sigaltstack & ignore results..
838 ss
.ss_sp
= lss
->ss_sp
;
839 ss
.ss_size
= lss
->ss_size
;
840 ss
.ss_flags
= linux_to_bsd_sigaltstack(lss
->ss_flags
);
843 if (ldebug(rt_sigreturn
))
844 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
845 ss
.ss_flags
, ss
.ss_sp
, ss
.ss_size
, context
->sc_mask
);
847 (void)kern_sigaltstack(td
, &ss
, NULL
);
849 return (EJUSTRETURN
);
853 linux_fetch_syscall_args(struct thread
*td
, struct syscall_args
*sa
)
856 struct trapframe
*frame
;
859 frame
= td
->td_frame
;
861 sa
->code
= frame
->tf_eax
;
862 sa
->args
[0] = frame
->tf_ebx
;
863 sa
->args
[1] = frame
->tf_ecx
;
864 sa
->args
[2] = frame
->tf_edx
;
865 sa
->args
[3] = frame
->tf_esi
;
866 sa
->args
[4] = frame
->tf_edi
;
867 sa
->args
[5] = frame
->tf_ebp
; /* Unconfirmed */
869 if (sa
->code
>= p
->p_sysent
->sv_size
)
871 sa
->callp
= &p
->p_sysent
->sv_table
[p
->p_sysent
->sv_size
- 1];
873 sa
->callp
= &p
->p_sysent
->sv_table
[sa
->code
];
874 sa
->narg
= sa
->callp
->sy_narg
;
876 td
->td_retval
[0] = 0;
877 td
->td_retval
[1] = frame
->tf_edx
;
883 * If a linux binary is exec'ing something, try this image activator
884 * first. We override standard shell script execution in order to
885 * be able to modify the interpreter path. We only do this if a linux
886 * binary is doing the exec, so we do not create an EXEC module for it.
888 static int exec_linux_imgact_try(struct image_params
*iparams
);
891 exec_linux_imgact_try(struct image_params
*imgp
)
893 const char *head
= (const char *)imgp
->image_header
;
898 * The interpreter for shell scripts run from a linux binary needs
899 * to be located in /compat/linux if possible in order to recursively
900 * maintain linux path emulation.
902 if (((const short *)head
)[0] == SHELLMAGIC
) {
904 * Run our normal shell image activator. If it succeeds attempt
905 * to use the alternate path for the interpreter. If an alternate
906 * path is found, use our stringspace to store it.
908 if ((error
= exec_shell_imgact(imgp
)) == 0) {
909 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp
->proc
),
910 imgp
->interpreter_name
, UIO_SYSSPACE
, &rpath
, 0, AT_FDCWD
);
912 imgp
->args
->fname_buf
=
913 imgp
->interpreter_name
= rpath
;
920 * exec_setregs may initialize some registers differently than Linux
921 * does, thus potentially confusing Linux binaries. If necessary, we
922 * override the exec_setregs default(s) here.
925 exec_linux_setregs(struct thread
*td
, struct image_params
*imgp
, u_long stack
)
927 struct pcb
*pcb
= td
->td_pcb
;
929 exec_setregs(td
, imgp
, stack
);
931 /* Linux sets %gs to 0, we default to _udatasel */
935 pcb
->pcb_initial_npxcw
= __LINUX_NPXCW__
;
939 linux_get_machine(const char **dst
)
957 struct sysentvec linux_sysvec
= {
958 .sv_size
= LINUX_SYS_MAXSYSCALL
,
959 .sv_table
= linux_sysent
,
961 .sv_errsize
= ELAST
+ 1,
962 .sv_errtbl
= bsd_to_linux_errno
,
963 .sv_transtrap
= translate_traps
,
964 .sv_fixup
= linux_fixup
,
965 .sv_sendsig
= linux_sendsig
,
966 .sv_sigcode
= &_binary_linux_locore_o_start
,
967 .sv_szsigcode
= &linux_szsigcode
,
968 .sv_name
= "Linux a.out",
970 .sv_imgact_try
= exec_linux_imgact_try
,
971 .sv_minsigstksz
= LINUX_MINSIGSTKSZ
,
972 .sv_pagesize
= PAGE_SIZE
,
973 .sv_minuser
= VM_MIN_ADDRESS
,
974 .sv_maxuser
= VM_MAXUSER_ADDRESS
,
975 .sv_usrstack
= LINUX_USRSTACK
,
976 .sv_psstrings
= PS_STRINGS
,
977 .sv_stackprot
= VM_PROT_ALL
,
978 .sv_copyout_strings
= exec_copyout_strings
,
979 .sv_setregs
= exec_linux_setregs
,
982 .sv_flags
= SV_ABI_LINUX
| SV_AOUT
| SV_IA32
| SV_ILP32
,
983 .sv_set_syscall_retval
= cpu_set_syscall_retval
,
984 .sv_fetch_syscall_args
= linux_fetch_syscall_args
,
985 .sv_syscallnames
= NULL
,
986 .sv_shared_page_base
= LINUX_SHAREDPAGE
,
987 .sv_shared_page_len
= PAGE_SIZE
,
988 .sv_schedtail
= linux_schedtail
,
989 .sv_thread_detach
= linux_thread_detach
,
992 INIT_SYSENTVEC(aout_sysvec
, &linux_sysvec
);
994 struct sysentvec elf_linux_sysvec
= {
995 .sv_size
= LINUX_SYS_MAXSYSCALL
,
996 .sv_table
= linux_sysent
,
998 .sv_errsize
= ELAST
+ 1,
999 .sv_errtbl
= bsd_to_linux_errno
,
1000 .sv_transtrap
= translate_traps
,
1001 .sv_fixup
= elf_linux_fixup
,
1002 .sv_sendsig
= linux_sendsig
,
1003 .sv_sigcode
= &_binary_linux_locore_o_start
,
1004 .sv_szsigcode
= &linux_szsigcode
,
1005 .sv_name
= "Linux ELF",
1006 .sv_coredump
= elf32_coredump
,
1007 .sv_imgact_try
= exec_linux_imgact_try
,
1008 .sv_minsigstksz
= LINUX_MINSIGSTKSZ
,
1009 .sv_pagesize
= PAGE_SIZE
,
1010 .sv_minuser
= VM_MIN_ADDRESS
,
1011 .sv_maxuser
= VM_MAXUSER_ADDRESS
,
1012 .sv_usrstack
= LINUX_USRSTACK
,
1013 .sv_psstrings
= LINUX_PS_STRINGS
,
1014 .sv_stackprot
= VM_PROT_ALL
,
1015 .sv_copyout_strings
= linux_copyout_strings
,
1016 .sv_setregs
= exec_linux_setregs
,
1017 .sv_fixlimit
= NULL
,
1019 .sv_flags
= SV_ABI_LINUX
| SV_IA32
| SV_ILP32
| SV_SHP
,
1020 .sv_set_syscall_retval
= cpu_set_syscall_retval
,
1021 .sv_fetch_syscall_args
= linux_fetch_syscall_args
,
1022 .sv_syscallnames
= NULL
,
1023 .sv_shared_page_base
= LINUX_SHAREDPAGE
,
1024 .sv_shared_page_len
= PAGE_SIZE
,
1025 .sv_schedtail
= linux_schedtail
,
1026 .sv_thread_detach
= linux_thread_detach
,
1031 linux_vdso_install(void *param
)
1034 linux_szsigcode
= (&_binary_linux_locore_o_end
-
1035 &_binary_linux_locore_o_start
);
1037 if (linux_szsigcode
> elf_linux_sysvec
.sv_shared_page_len
)
1038 panic("Linux invalid vdso size\n");
1040 __elfN(linux_vdso_fixup
)(&elf_linux_sysvec
);
1042 linux_shared_page_obj
= __elfN(linux_shared_page_init
)
1043 (&linux_shared_page_mapping
);
1045 __elfN(linux_vdso_reloc
)(&elf_linux_sysvec
, LINUX_SHAREDPAGE
);
1047 bcopy(elf_linux_sysvec
.sv_sigcode
, linux_shared_page_mapping
,
1049 elf_linux_sysvec
.sv_shared_page_obj
= linux_shared_page_obj
;
1051 SYSINIT(elf_linux_vdso_init
, SI_SUB_EXEC
, SI_ORDER_ANY
,
1052 (sysinit_cfunc_t
)linux_vdso_install
, NULL
);
1055 linux_vdso_deinstall(void *param
)
1058 __elfN(linux_shared_page_fini
)(linux_shared_page_obj
);
1060 SYSUNINIT(elf_linux_vdso_uninit
, SI_SUB_EXEC
, SI_ORDER_FIRST
,
1061 (sysinit_cfunc_t
)linux_vdso_deinstall
, NULL
);
1063 static char GNU_ABI_VENDOR
[] = "GNU";
1064 static int GNULINUX_ABI_DESC
= 0;
1067 linux_trans_osrel(const Elf_Note
*note
, int32_t *osrel
)
1069 const Elf32_Word
*desc
;
1072 p
= (uintptr_t)(note
+ 1);
1073 p
+= roundup2(note
->n_namesz
, sizeof(Elf32_Addr
));
1075 desc
= (const Elf32_Word
*)p
;
1076 if (desc
[0] != GNULINUX_ABI_DESC
)
1080 * For linux we encode osrel as follows (see linux_mib.c):
1081 * VVVMMMIII (version, major, minor), see linux_mib.c.
1083 *osrel
= desc
[1] * 1000000 + desc
[2] * 1000 + desc
[3];
1088 static Elf_Brandnote linux_brandnote
= {
1089 .hdr
.n_namesz
= sizeof(GNU_ABI_VENDOR
),
1090 .hdr
.n_descsz
= 16, /* XXX at least 16 */
1092 .vendor
= GNU_ABI_VENDOR
,
1093 .flags
= BN_TRANSLATE_OSREL
,
1094 .trans_osrel
= linux_trans_osrel
1097 static Elf32_Brandinfo linux_brand
= {
1098 .brand
= ELFOSABI_LINUX
,
1100 .compat_3_brand
= "Linux",
1101 .emul_path
= "/compat/linux",
1102 .interp_path
= "/lib/ld-linux.so.1",
1103 .sysvec
= &elf_linux_sysvec
,
1104 .interp_newpath
= NULL
,
1105 .brand_note
= &linux_brandnote
,
1106 .flags
= BI_CAN_EXEC_DYN
| BI_BRAND_NOTE
1109 static Elf32_Brandinfo linux_glibc2brand
= {
1110 .brand
= ELFOSABI_LINUX
,
1112 .compat_3_brand
= "Linux",
1113 .emul_path
= "/compat/linux",
1114 .interp_path
= "/lib/ld-linux.so.2",
1115 .sysvec
= &elf_linux_sysvec
,
1116 .interp_newpath
= NULL
,
1117 .brand_note
= &linux_brandnote
,
1118 .flags
= BI_CAN_EXEC_DYN
| BI_BRAND_NOTE
1121 Elf32_Brandinfo
*linux_brandlist
[] = {
1128 linux_elf_modevent(module_t mod
, int type
, void *data
)
1130 Elf32_Brandinfo
**brandinfo
;
1132 struct linux_ioctl_handler
**lihp
;
1138 for (brandinfo
= &linux_brandlist
[0]; *brandinfo
!= NULL
;
1140 if (elf32_insert_brand_entry(*brandinfo
) < 0)
1143 SET_FOREACH(lihp
, linux_ioctl_handler_set
)
1144 linux_ioctl_register_handler(*lihp
);
1145 LIST_INIT(&futex_list
);
1146 mtx_init(&futex_mtx
, "ftllk", NULL
, MTX_DEF
);
1147 linux_exit_tag
= EVENTHANDLER_REGISTER(process_exit
, linux_proc_exit
,
1149 linux_exec_tag
= EVENTHANDLER_REGISTER(process_exec
, linux_proc_exec
,
1151 linux_thread_dtor_tag
= EVENTHANDLER_REGISTER(thread_dtor
,
1152 linux_thread_dtor
, NULL
, EVENTHANDLER_PRI_ANY
);
1153 linux_get_machine(&linux_kplatform
);
1154 linux_szplatform
= roundup(strlen(linux_kplatform
) + 1,
1156 linux_osd_jail_register();
1157 stclohz
= (stathz
? stathz
: hz
);
1159 printf("Linux ELF exec handler installed\n");
1161 printf("cannot insert Linux ELF brand handler\n");
1164 for (brandinfo
= &linux_brandlist
[0]; *brandinfo
!= NULL
;
1166 if (elf32_brand_inuse(*brandinfo
))
1169 for (brandinfo
= &linux_brandlist
[0];
1170 *brandinfo
!= NULL
; ++brandinfo
)
1171 if (elf32_remove_brand_entry(*brandinfo
) < 0)
1175 SET_FOREACH(lihp
, linux_ioctl_handler_set
)
1176 linux_ioctl_unregister_handler(*lihp
);
1177 mtx_destroy(&futex_mtx
);
1178 EVENTHANDLER_DEREGISTER(process_exit
, linux_exit_tag
);
1179 EVENTHANDLER_DEREGISTER(process_exec
, linux_exec_tag
);
1180 EVENTHANDLER_DEREGISTER(thread_dtor
, linux_thread_dtor_tag
);
1181 linux_osd_jail_deregister();
1183 printf("Linux ELF exec handler removed\n");
1185 printf("Could not deinstall ELF interpreter entry\n");
1188 return (EOPNOTSUPP
);
1193 static moduledata_t linux_elf_mod
= {
1199 DECLARE_MODULE_TIED(linuxelf
, linux_elf_mod
, SI_SUB_EXEC
, SI_ORDER_ANY
);
1200 FEATURE(linux
, "Linux 32bit support");