2 * Copyright (c) 2000 Marcel Moolenaar
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/capsicum.h>
36 #include <sys/fcntl.h>
37 #include <sys/imgact.h>
39 #include <sys/malloc.h>
41 #include <sys/mutex.h>
45 #include <sys/queue.h>
46 #include <sys/resource.h>
47 #include <sys/resourcevar.h>
48 #include <sys/signalvar.h>
49 #include <sys/syscallsubr.h>
50 #include <sys/sysproto.h>
51 #include <sys/unistd.h>
53 #include <sys/sched.h>
55 #include <machine/frame.h>
56 #include <machine/psl.h>
57 #include <machine/segments.h>
58 #include <machine/sysarch.h>
62 #include <vm/vm_map.h>
64 #include <i386/linux/linux.h>
65 #include <i386/linux/linux_proto.h>
66 #include <compat/linux/linux_ipc.h>
67 #include <compat/linux/linux_misc.h>
68 #include <compat/linux/linux_mmap.h>
69 #include <compat/linux/linux_signal.h>
70 #include <compat/linux/linux_util.h>
71 #include <compat/linux/linux_emul.h>
73 #include <i386/include/pcb.h> /* needed for pcb definition in linux_set_thread_area */
75 #include "opt_posix.h"
77 extern struct sysentvec elf32_freebsd_sysvec
; /* defined in i386/i386/elf_machdep.c */
85 l_uint read_exec_only
:1;
86 l_uint limit_in_pages
:1;
87 l_uint seg_not_present
:1;
91 struct l_old_select_argv
{
96 struct l_timeval
*timeout
;
101 linux_execve(struct thread
*td
, struct linux_execve_args
*args
)
103 struct image_args eargs
;
107 LCONVPATHEXIST(td
, args
->path
, &newpath
);
111 printf(ARGS(execve
, "%s"), newpath
);
114 error
= exec_copyin_args(&eargs
, newpath
, UIO_SYSSPACE
,
115 args
->argp
, args
->envp
);
116 free(newpath
, M_TEMP
);
118 error
= linux_common_execve(td
, &eargs
);
122 struct l_ipc_kludge
{
123 struct l_msgbuf
*msgp
;
128 linux_ipc(struct thread
*td
, struct linux_ipc_args
*args
)
131 switch (args
->what
& 0xFFFF) {
133 struct linux_semop_args a
;
135 a
.semid
= args
->arg1
;
137 a
.nsops
= args
->arg2
;
138 return (linux_semop(td
, &a
));
141 struct linux_semget_args a
;
144 a
.nsems
= args
->arg2
;
145 a
.semflg
= args
->arg3
;
146 return (linux_semget(td
, &a
));
149 struct linux_semctl_args a
;
152 a
.semid
= args
->arg1
;
153 a
.semnum
= args
->arg2
;
155 error
= copyin(args
->ptr
, &a
.arg
, sizeof(a
.arg
));
158 return (linux_semctl(td
, &a
));
161 struct linux_msgsnd_args a
;
163 a
.msqid
= args
->arg1
;
165 a
.msgsz
= args
->arg2
;
166 a
.msgflg
= args
->arg3
;
167 return (linux_msgsnd(td
, &a
));
170 struct linux_msgrcv_args a
;
172 a
.msqid
= args
->arg1
;
173 a
.msgsz
= args
->arg2
;
174 a
.msgflg
= args
->arg3
;
175 if ((args
->what
>> 16) == 0) {
176 struct l_ipc_kludge tmp
;
179 if (args
->ptr
== NULL
)
181 error
= copyin(args
->ptr
, &tmp
, sizeof(tmp
));
185 a
.msgtyp
= tmp
.msgtyp
;
188 a
.msgtyp
= args
->arg5
;
190 return (linux_msgrcv(td
, &a
));
193 struct linux_msgget_args a
;
196 a
.msgflg
= args
->arg2
;
197 return (linux_msgget(td
, &a
));
200 struct linux_msgctl_args a
;
202 a
.msqid
= args
->arg1
;
205 return (linux_msgctl(td
, &a
));
208 struct linux_shmat_args a
;
210 a
.shmid
= args
->arg1
;
211 a
.shmaddr
= args
->ptr
;
212 a
.shmflg
= args
->arg2
;
213 a
.raddr
= (l_ulong
*)args
->arg3
;
214 return (linux_shmat(td
, &a
));
217 struct linux_shmdt_args a
;
219 a
.shmaddr
= args
->ptr
;
220 return (linux_shmdt(td
, &a
));
223 struct linux_shmget_args a
;
227 a
.shmflg
= args
->arg3
;
228 return (linux_shmget(td
, &a
));
231 struct linux_shmctl_args a
;
233 a
.shmid
= args
->arg1
;
236 return (linux_shmctl(td
, &a
));
246 linux_old_select(struct thread
*td
, struct linux_old_select_args
*args
)
248 struct l_old_select_argv linux_args
;
249 struct linux_select_args newsel
;
253 if (ldebug(old_select
))
254 printf(ARGS(old_select
, "%p"), args
->ptr
);
257 error
= copyin(args
->ptr
, &linux_args
, sizeof(linux_args
));
261 newsel
.nfds
= linux_args
.nfds
;
262 newsel
.readfds
= linux_args
.readfds
;
263 newsel
.writefds
= linux_args
.writefds
;
264 newsel
.exceptfds
= linux_args
.exceptfds
;
265 newsel
.timeout
= linux_args
.timeout
;
266 return (linux_select(td
, &newsel
));
270 linux_set_cloned_tls(struct thread
*td
, void *desc
)
272 struct segment_descriptor sd
;
273 struct l_user_desc info
;
277 error
= copyin(desc
, &info
, sizeof(struct l_user_desc
));
279 printf(LMSG("copyin failed!"));
281 idx
= info
.entry_number
;
284 * looks like we're getting the idx we returned
285 * in the set_thread_area() syscall
287 if (idx
!= 6 && idx
!= 3) {
288 printf(LMSG("resetting idx!"));
292 /* this doesnt happen in practice */
294 /* we might copy out the entry_number as 3 */
295 info
.entry_number
= 3;
296 error
= copyout(&info
, desc
, sizeof(struct l_user_desc
));
298 printf(LMSG("copyout failed!"));
301 a
[0] = LINUX_LDT_entry_a(&info
);
302 a
[1] = LINUX_LDT_entry_b(&info
);
304 memcpy(&sd
, &a
, sizeof(a
));
307 printf("Segment created in clone with "
308 "CLONE_SETTLS: lobase: %x, hibase: %x, "
309 "lolimit: %x, hilimit: %x, type: %i, "
310 "dpl: %i, p: %i, xx: %i, def32: %i, "
311 "gran: %i\n", sd
.sd_lobase
, sd
.sd_hibase
,
312 sd
.sd_lolimit
, sd
.sd_hilimit
, sd
.sd_type
,
313 sd
.sd_dpl
, sd
.sd_p
, sd
.sd_xx
,
314 sd
.sd_def32
, sd
.sd_gran
);
318 td
->td_pcb
->pcb_gsd
= sd
;
319 td
->td_pcb
->pcb_gs
= GSEL(GUGS_SEL
, SEL_UPL
);
326 linux_set_upcall_kse(struct thread
*td
, register_t stack
)
330 td
->td_frame
->tf_esp
= stack
;
333 * The newly created Linux thread returns
334 * to the user space by the same path that a parent do.
336 td
->td_frame
->tf_eax
= 0;
341 linux_mmap2(struct thread
*td
, struct linux_mmap2_args
*args
)
346 printf(ARGS(mmap2
, "%p, %d, %d, 0x%08x, %d, %d"),
347 (void *)args
->addr
, args
->len
, args
->prot
,
348 args
->flags
, args
->fd
, args
->pgoff
);
351 return (linux_mmap_common(td
, args
->addr
, args
->len
, args
->prot
,
352 args
->flags
, args
->fd
, (uint64_t)(uint32_t)args
->pgoff
*
357 linux_mmap(struct thread
*td
, struct linux_mmap_args
*args
)
360 struct l_mmap_argv linux_args
;
362 error
= copyin(args
->ptr
, &linux_args
, sizeof(linux_args
));
368 printf(ARGS(mmap
, "%p, %d, %d, 0x%08x, %d, %d"),
369 (void *)linux_args
.addr
, linux_args
.len
, linux_args
.prot
,
370 linux_args
.flags
, linux_args
.fd
, linux_args
.pgoff
);
373 return (linux_mmap_common(td
, linux_args
.addr
, linux_args
.len
,
374 linux_args
.prot
, linux_args
.flags
, linux_args
.fd
,
375 (uint32_t)linux_args
.pgoff
));
379 linux_mprotect(struct thread
*td
, struct linux_mprotect_args
*uap
)
382 return (linux_mprotect_common(td
, PTROUT(uap
->addr
), uap
->len
, uap
->prot
));
386 linux_ioperm(struct thread
*td
, struct linux_ioperm_args
*args
)
389 struct i386_ioperm_args iia
;
391 iia
.start
= args
->start
;
392 iia
.length
= args
->length
;
393 iia
.enable
= args
->enable
;
394 error
= i386_set_ioperm(td
, &iia
);
399 linux_iopl(struct thread
*td
, struct linux_iopl_args
*args
)
403 if (args
->level
< 0 || args
->level
> 3)
405 if ((error
= priv_check(td
, PRIV_IO
)) != 0)
407 if ((error
= securelevel_gt(td
->td_ucred
, 0)) != 0)
409 td
->td_frame
->tf_eflags
= (td
->td_frame
->tf_eflags
& ~PSL_IOPL
) |
410 (args
->level
* (PSL_IOPL
/ 3));
415 linux_modify_ldt(struct thread
*td
, struct linux_modify_ldt_args
*uap
)
418 struct i386_ldt_args ldt
;
419 struct l_descriptor ld
;
420 union descriptor desc
;
424 case 0x00: /* read_ldt */
426 ldt
.descs
= uap
->ptr
;
427 ldt
.num
= uap
->bytecount
/ sizeof(union descriptor
);
428 error
= i386_get_ldt(td
, &ldt
);
429 td
->td_retval
[0] *= sizeof(union descriptor
);
431 case 0x02: /* read_default_ldt = 0 */
432 size
= 5*sizeof(struct l_desc_struct
);
433 if (size
> uap
->bytecount
)
434 size
= uap
->bytecount
;
435 for (written
= error
= 0; written
< size
&& error
== 0; written
++)
436 error
= subyte((char *)uap
->ptr
+ written
, 0);
437 td
->td_retval
[0] = written
;
439 case 0x01: /* write_ldt */
440 case 0x11: /* write_ldt */
441 if (uap
->bytecount
!= sizeof(ld
))
444 error
= copyin(uap
->ptr
, &ld
, sizeof(ld
));
448 ldt
.start
= ld
.entry_number
;
451 desc
.sd
.sd_lolimit
= (ld
.limit
& 0x0000ffff);
452 desc
.sd
.sd_hilimit
= (ld
.limit
& 0x000f0000) >> 16;
453 desc
.sd
.sd_lobase
= (ld
.base_addr
& 0x00ffffff);
454 desc
.sd
.sd_hibase
= (ld
.base_addr
& 0xff000000) >> 24;
455 desc
.sd
.sd_type
= SDT_MEMRO
| ((ld
.read_exec_only
^ 1) << 1) |
458 desc
.sd
.sd_p
= (ld
.seg_not_present
^ 1);
460 desc
.sd
.sd_def32
= ld
.seg_32bit
;
461 desc
.sd
.sd_gran
= ld
.limit_in_pages
;
462 error
= i386_set_ldt(td
, &ldt
, &desc
);
469 if (error
== EOPNOTSUPP
) {
470 printf("linux: modify_ldt needs kernel option USER_LDT\n");
478 linux_sigaction(struct thread
*td
, struct linux_sigaction_args
*args
)
481 l_sigaction_t act
, oact
;
485 if (ldebug(sigaction
))
486 printf(ARGS(sigaction
, "%d, %p, %p"),
487 args
->sig
, (void *)args
->nsa
, (void *)args
->osa
);
490 if (args
->nsa
!= NULL
) {
491 error
= copyin(args
->nsa
, &osa
, sizeof(l_osigaction_t
));
494 act
.lsa_handler
= osa
.lsa_handler
;
495 act
.lsa_flags
= osa
.lsa_flags
;
496 act
.lsa_restorer
= osa
.lsa_restorer
;
497 LINUX_SIGEMPTYSET(act
.lsa_mask
);
498 act
.lsa_mask
.__mask
= osa
.lsa_mask
;
501 error
= linux_do_sigaction(td
, args
->sig
, args
->nsa
? &act
: NULL
,
502 args
->osa
? &oact
: NULL
);
504 if (args
->osa
!= NULL
&& !error
) {
505 osa
.lsa_handler
= oact
.lsa_handler
;
506 osa
.lsa_flags
= oact
.lsa_flags
;
507 osa
.lsa_restorer
= oact
.lsa_restorer
;
508 osa
.lsa_mask
= oact
.lsa_mask
.__mask
;
509 error
= copyout(&osa
, args
->osa
, sizeof(l_osigaction_t
));
516 * Linux has two extra args, restart and oldmask. We dont use these,
517 * but it seems that "restart" is actually a context pointer that
518 * enables the signal to happen with a different register set.
521 linux_sigsuspend(struct thread
*td
, struct linux_sigsuspend_args
*args
)
527 if (ldebug(sigsuspend
))
528 printf(ARGS(sigsuspend
, "%08lx"), (unsigned long)args
->mask
);
531 LINUX_SIGEMPTYSET(mask
);
532 mask
.__mask
= args
->mask
;
533 linux_to_bsd_sigset(&mask
, &sigmask
);
534 return (kern_sigsuspend(td
, sigmask
));
538 linux_rt_sigsuspend(struct thread
*td
, struct linux_rt_sigsuspend_args
*uap
)
545 if (ldebug(rt_sigsuspend
))
546 printf(ARGS(rt_sigsuspend
, "%p, %d"),
547 (void *)uap
->newset
, uap
->sigsetsize
);
550 if (uap
->sigsetsize
!= sizeof(l_sigset_t
))
553 error
= copyin(uap
->newset
, &lmask
, sizeof(l_sigset_t
));
557 linux_to_bsd_sigset(&lmask
, &sigmask
);
558 return (kern_sigsuspend(td
, sigmask
));
562 linux_pause(struct thread
*td
, struct linux_pause_args
*args
)
564 struct proc
*p
= td
->td_proc
;
569 printf(ARGS(pause
, ""));
573 sigmask
= td
->td_sigmask
;
575 return (kern_sigsuspend(td
, sigmask
));
579 linux_sigaltstack(struct thread
*td
, struct linux_sigaltstack_args
*uap
)
586 if (ldebug(sigaltstack
))
587 printf(ARGS(sigaltstack
, "%p, %p"), uap
->uss
, uap
->uoss
);
590 if (uap
->uss
!= NULL
) {
591 error
= copyin(uap
->uss
, &lss
, sizeof(l_stack_t
));
595 ss
.ss_sp
= lss
.ss_sp
;
596 ss
.ss_size
= lss
.ss_size
;
597 ss
.ss_flags
= linux_to_bsd_sigaltstack(lss
.ss_flags
);
599 error
= kern_sigaltstack(td
, (uap
->uss
!= NULL
) ? &ss
: NULL
,
600 (uap
->uoss
!= NULL
) ? &oss
: NULL
);
601 if (!error
&& uap
->uoss
!= NULL
) {
602 lss
.ss_sp
= oss
.ss_sp
;
603 lss
.ss_size
= oss
.ss_size
;
604 lss
.ss_flags
= bsd_to_linux_sigaltstack(oss
.ss_flags
);
605 error
= copyout(&lss
, uap
->uoss
, sizeof(l_stack_t
));
612 linux_ftruncate64(struct thread
*td
, struct linux_ftruncate64_args
*args
)
614 struct ftruncate_args sa
;
617 if (ldebug(ftruncate64
))
618 printf(ARGS(ftruncate64
, "%u, %jd"), args
->fd
,
619 (intmax_t)args
->length
);
623 sa
.length
= args
->length
;
624 return sys_ftruncate(td
, &sa
);
628 linux_set_thread_area(struct thread
*td
, struct linux_set_thread_area_args
*args
)
630 struct l_user_desc info
;
634 struct segment_descriptor sd
;
636 error
= copyin(args
->desc
, &info
, sizeof(struct l_user_desc
));
641 if (ldebug(set_thread_area
))
642 printf(ARGS(set_thread_area
, "%i, %x, %x, %i, %i, %i, %i, %i, %i\n"),
650 info
.seg_not_present
,
654 idx
= info
.entry_number
;
656 * Semantics of linux version: every thread in the system has array of
657 * 3 tls descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. This
658 * syscall loads one of the selected tls decriptors with a value and
659 * also loads GDT descriptors 6, 7 and 8 with the content of the
660 * per-thread descriptors.
662 * Semantics of fbsd version: I think we can ignore that linux has 3
663 * per-thread descriptors and use just the 1st one. The tls_array[]
664 * is used only in set/get-thread_area() syscalls and for loading the
665 * GDT descriptors. In fbsd we use just one GDT descriptor for TLS so
666 * we will load just one.
668 * XXX: this doesn't work when a user space process tries to use more
669 * than 1 TLS segment. Comment in the linux sources says wine might do
674 * we support just GLIBC TLS now
675 * we should let 3 proceed as well because we use this segment so
676 * if code does two subsequent calls it should succeed
678 if (idx
!= 6 && idx
!= -1 && idx
!= 3)
682 * we have to copy out the GDT entry we use
683 * FreeBSD uses GDT entry #3 for storing %gs so load that
685 * XXX: what if a user space program doesn't check this value and tries
688 idx
= info
.entry_number
= 3;
689 error
= copyout(&info
, args
->desc
, sizeof(struct l_user_desc
));
693 if (LINUX_LDT_empty(&info
)) {
697 a
[0] = LINUX_LDT_entry_a(&info
);
698 a
[1] = LINUX_LDT_entry_b(&info
);
701 memcpy(&sd
, &a
, sizeof(a
));
703 if (ldebug(set_thread_area
))
704 printf("Segment created in set_thread_area: lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, def32: %i, gran: %i\n", sd
.sd_lobase
,
716 /* this is taken from i386 version of cpu_set_user_tls() */
719 td
->td_pcb
->pcb_gsd
= sd
;
720 PCPU_GET(fsgs_gdt
)[1] = sd
;
721 load_gs(GSEL(GUGS_SEL
, SEL_UPL
));
728 linux_get_thread_area(struct thread
*td
, struct linux_get_thread_area_args
*args
)
731 struct l_user_desc info
;
734 struct l_desc_struct desc
;
735 struct segment_descriptor sd
;
738 if (ldebug(get_thread_area
))
739 printf(ARGS(get_thread_area
, "%p"), args
->desc
);
742 error
= copyin(args
->desc
, &info
, sizeof(struct l_user_desc
));
746 idx
= info
.entry_number
;
747 /* XXX: I am not sure if we want 3 to be allowed too. */
748 if (idx
!= 6 && idx
!= 3)
753 memset(&info
, 0, sizeof(info
));
755 sd
= PCPU_GET(fsgs_gdt
)[1];
757 memcpy(&desc
, &sd
, sizeof(desc
));
759 info
.entry_number
= idx
;
760 info
.base_addr
= LINUX_GET_BASE(&desc
);
761 info
.limit
= LINUX_GET_LIMIT(&desc
);
762 info
.seg_32bit
= LINUX_GET_32BIT(&desc
);
763 info
.contents
= LINUX_GET_CONTENTS(&desc
);
764 info
.read_exec_only
= !LINUX_GET_WRITABLE(&desc
);
765 info
.limit_in_pages
= LINUX_GET_LIMIT_PAGES(&desc
);
766 info
.seg_not_present
= !LINUX_GET_PRESENT(&desc
);
767 info
.useable
= LINUX_GET_USEABLE(&desc
);
769 error
= copyout(&info
, args
->desc
, sizeof(struct l_user_desc
));
776 /* XXX: this wont work with module - convert it */
778 linux_mq_open(struct thread
*td
, struct linux_mq_open_args
*args
)
780 #ifdef P1003_1B_MQUEUE
781 return sys_kmq_open(td
, (struct kmq_open_args
*) args
);
788 linux_mq_unlink(struct thread
*td
, struct linux_mq_unlink_args
*args
)
790 #ifdef P1003_1B_MQUEUE
791 return sys_kmq_unlink(td
, (struct kmq_unlink_args
*) args
);
798 linux_mq_timedsend(struct thread
*td
, struct linux_mq_timedsend_args
*args
)
800 #ifdef P1003_1B_MQUEUE
801 return sys_kmq_timedsend(td
, (struct kmq_timedsend_args
*) args
);
808 linux_mq_timedreceive(struct thread
*td
, struct linux_mq_timedreceive_args
*args
)
810 #ifdef P1003_1B_MQUEUE
811 return sys_kmq_timedreceive(td
, (struct kmq_timedreceive_args
*) args
);
818 linux_mq_notify(struct thread
*td
, struct linux_mq_notify_args
*args
)
820 #ifdef P1003_1B_MQUEUE
821 return sys_kmq_notify(td
, (struct kmq_notify_args
*) args
);
828 linux_mq_getsetattr(struct thread
*td
, struct linux_mq_getsetattr_args
*args
)
830 #ifdef P1003_1B_MQUEUE
831 return sys_kmq_setattr(td
, (struct kmq_setattr_args
*) args
);