From a1f82243320e334d4011b8515135447478cc19c5 Mon Sep 17 00:00:00 2001 From: Alex Hornung Date: Fri, 12 Mar 2010 23:08:40 +0000 Subject: [PATCH] linux emulation - Major update * Major update to all components of the linux emulation layer (linuxulator) and linprocfs. * Many old system calls have been overhauled and new ones added. * Tested with java 1.6.18, apache tomcat 6, opera, Acrobat Reader, ... * NOTE: Flash doesn't work yet, and I haven't been able to figure out why. I'll continue my work on the linuxulator part time now, and hopefully eventually fix the issue(s). Some parts of this have been taken from FreeBSD and NetBSD. Overall the new stuff isn't as well organized as it should be; most of the new defines are in linux.h as I didn't bother splitting it up into independent headers. Once we plan on porting the linuxulator to amd64, this has to be taken care of, so especially machine independent bits are broken out of the machine dependent sys/emulation/linux/i386. --- sys/emulation/linux/Makefile | 13 +- sys/emulation/linux/i386/linprocfs/linprocfs.h | 25 +- .../linux/i386/linprocfs/linprocfs_misc.c | 289 ++++++- .../linux/i386/linprocfs/linprocfs_subr.c | 43 +- .../linux/i386/linprocfs/linprocfs_vnops.c | 492 +++++++++++- sys/emulation/linux/i386/linux.h | 154 +++- sys/emulation/linux/i386/linux_dummy.c | 2 +- sys/emulation/linux/i386/linux_machdep.c | 392 ++++++++-- sys/emulation/linux/i386/linux_proto.h | 342 ++++++++ sys/emulation/linux/i386/linux_support.s | 155 ++++ sys/emulation/linux/i386/linux_syscall.h | 45 +- sys/emulation/linux/i386/linux_sysent.c | 102 ++- sys/emulation/linux/i386/linux_sysvec.c | 30 +- sys/emulation/linux/i386/linux_union.h | 38 + sys/emulation/linux/i386/syscalls.master | 134 +++- sys/emulation/linux/linux_emuldata.c | 310 ++++++++ sys/emulation/linux/linux_emuldata.h | 110 +++ sys/emulation/linux/linux_epoll.c | 248 ++++++ sys/emulation/linux/linux_epoll.h | 55 ++ sys/emulation/linux/linux_file.c | 342 +++++++- sys/emulation/linux/linux_futex.c | 856 +++++++++++++++++++++ sys/emulation/linux/linux_futex.h | 87 +++ sys/emulation/linux/linux_ioctl.c | 67 ++ sys/emulation/linux/linux_ioctl.h | 10 +- sys/emulation/linux/linux_ipc.c | 467 ++++++++--- sys/emulation/linux/linux_ipc.h | 202 ++++- sys/emulation/linux/linux_mib.c | 2 +- sys/emulation/linux/linux_misc.c | 467 ++++++++++- sys/emulation/linux/linux_signal.c | 76 +- sys/emulation/linux/linux_socket.c | 114 ++- sys/emulation/linux/linux_socket.h | 6 + sys/emulation/linux/linux_stats.c | 58 +- sys/emulation/linux/linux_time.c | 232 ++++++ sys/emulation/linux/linux_util.c | 12 +- sys/platform/pc32/conf/files | 6 + 35 files changed, 5720 insertions(+), 263 deletions(-) create mode 100644 sys/emulation/linux/i386/linux_support.s create mode 100644 sys/emulation/linux/linux_emuldata.c create mode 100644 sys/emulation/linux/linux_emuldata.h create mode 100644 sys/emulation/linux/linux_epoll.c create mode 100644 sys/emulation/linux/linux_epoll.h create mode 100644 sys/emulation/linux/linux_futex.c create mode 100644 sys/emulation/linux/linux_futex.h create mode 100644 sys/emulation/linux/linux_time.c diff --git a/sys/emulation/linux/Makefile b/sys/emulation/linux/Makefile index 0f9af643ed..7a8974b37d 100644 --- a/sys/emulation/linux/Makefile +++ b/sys/emulation/linux/Makefile @@ -5,11 +5,14 @@ ARCH= arch_linux KMOD= linux -SRCS= linux_dummy.c linux_file.c linux_getcwd.c linux_ioctl.c linux_ipc.c \ - linux_machdep.c linux_mib.c linux_misc.c linux_signal.c linux_socket.c \ +SRCS= linux_dummy.c linux_emuldata.c linux_epoll.c \ + linux_file.c linux_futex.c linux_getcwd.c linux_ioctl.c \ + linux_ipc.c \ + linux_machdep.c linux_mib.c linux_misc.c linux_time.c linux_signal.c \ + linux_socket.c \ linux_stats.c linux_sysctl.c linux_sysent.c linux_sysvec.c \ linux_util.c opt_compat.h opt_global.h opt_vmpage.h -OBJS= linux_locore.o +OBJS= linux_support.o linux_locore.o SUBDIR= i386/linprocfs .if ${MACHINE_ARCH} == "i386" @@ -28,6 +31,10 @@ linux_locore.o: linux_locore.s linux_assym.h ${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \ ${.IMPSRC} -o ${.TARGET} +linux_support.o: linux_support.s linux_assym.h + ${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \ + ${.IMPSRC} -o ${.TARGET} + linux_genassym.o: linux_genassym.c linux.h @ ${CC} -c ${CFLAGS:N-fno-common} ${.IMPSRC} diff --git a/sys/emulation/linux/i386/linprocfs/linprocfs.h b/sys/emulation/linux/i386/linprocfs/linprocfs.h index cff12cd2e4..047da47006 100644 --- a/sys/emulation/linux/i386/linprocfs/linprocfs.h +++ b/sys/emulation/linux/i386/linprocfs/linprocfs.h @@ -58,7 +58,22 @@ typedef enum { Pstat, /* kernel/system statistics */ Puptime, /* system uptime */ Pversion, /* system version */ - Ploadavg /* system load average */ + Ploadavg, /* system load average */ + Pnet, /* the net sub-directory */ + Pnetdev, /* net devices */ + Psys, /* the sys sub-directory */ + Psyskernel, /* the sys/kernel sub-directory */ + Pdevices, /* devices */ + Posrelease, /* osrelease */ + Postype, /* ostype */ + Ppidmax, /* pid_max */ + Pcwd, + Pprocroot, + Pfd, + Pcmdline, + Penviron, + Pmaps, + Pstatm, } pfstype; /* @@ -136,7 +151,13 @@ int linprocfs_doversion (struct proc *, struct proc *, struct pfsnode *pfsp, str int linprocfs_doprocstat (struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio); int linprocfs_doprocstatus (struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio); int linprocfs_doloadavg (struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio); - +int linprocfs_donetdev (struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio); +int linprocfs_dodevices (struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio); +int linprocfs_doosrelease (struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio); +int linprocfs_doostype (struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio); +int linprocfs_dopidmax (struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio); +int linprocfs_domaps(struct proc *curp, struct proc *p, struct pfsnode *pfs, struct uio *uio); +int linprocfs_dostatm(struct proc *curp, struct proc *p, struct pfsnode *pfs, struct uio *uio); /* functions to check whether or not files should be displayed */ int linprocfs_validfile (struct proc *); diff --git a/sys/emulation/linux/i386/linprocfs/linprocfs_misc.c b/sys/emulation/linux/i386/linprocfs/linprocfs_misc.c index 6be0e01e9d..6aa0591be7 100644 --- a/sys/emulation/linux/i386/linprocfs/linprocfs_misc.c +++ b/sys/emulation/linux/i386/linprocfs/linprocfs_misc.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include @@ -67,8 +68,12 @@ #include #include #include +#include #include "linprocfs.h" +#include "../linux.h" +#include "../../linux_ioctl.h" +#include "../../linux_mib.h" /* * Various conversion macros @@ -318,11 +323,59 @@ linprocfs_doversion(struct proc *curp, struct proc *p, struct pfsnode *pfs, return (uiomove_frombuf(ps, xlen, uio)); } +#define B2P(x) ((x) >> PAGE_SHIFT) /* bytes to pages */ +int +linprocfs_dostatm(struct proc *curp, struct proc *p, struct pfsnode *pfs, + struct uio *uio) +{ + char *ps, psbuf[1024]; + struct kinfo_proc kp; + + fill_kinfo_proc(p, &kp); + + ps = psbuf; + ps += ksprintf(ps, "%d", p->p_pid); +#define PS_ADD(name, fmt, arg) ps += ksprintf(ps, " " fmt, arg) + PS_ADD("", "%ju", B2P((uintmax_t)(kp.kp_vm_tsize + kp.kp_vm_dsize + kp.kp_vm_ssize))); + PS_ADD("", "%ju", (uintmax_t)kp.kp_vm_rssize); + PS_ADD("", "%ju", (uintmax_t)0); /* XXX */ + PS_ADD("", "%ju", (uintmax_t)kp.kp_vm_tsize); + PS_ADD("", "%ju", (uintmax_t)kp.kp_vm_dsize); + PS_ADD("", "%ju", (uintmax_t)kp.kp_vm_ssize); + PS_ADD("", "%ju", (uintmax_t)0); /* XXX */ +#undef PS_ADD + ps += ksprintf(ps, "\n"); + + return (uiomove_frombuf(psbuf, ps - psbuf, uio)); +} + +#define P2K(x) ((x) << (PAGE_SHIFT - 10)) /* pages to kbytes */ int linprocfs_doprocstat(struct proc *curp, struct proc *p, struct pfsnode *pfs, struct uio *uio) { + vm_map_t map = &p->p_vmspace->vm_map; + vm_map_entry_t entry; + vm_offset_t start, end; char *ps, psbuf[1024]; + struct kinfo_proc kp; + + fill_kinfo_proc(p, &kp); + + start = 0; + end = 0; + vm_map_lock_read(map); + for (entry = map->header.next; entry != &map->header; + entry = entry->next) { + if (entry->maptype != VM_MAPTYPE_NORMAL && + entry->maptype != VM_MAPTYPE_VPAGETABLE) { + continue; + } + /* Assuming that text is the first entry */ + start = entry->start; + end = entry->end; + } + vm_map_unlock_read(map); ps = psbuf; ps += ksprintf(ps, "%d", p->p_pid); @@ -333,27 +386,27 @@ linprocfs_doprocstat(struct proc *curp, struct proc *p, struct pfsnode *pfs, PS_ADD("pgrp", "%d", p->p_pgid); PS_ADD("session", "%d", p->p_session->s_sid); PS_ADD("tty", "%d", 0); /* XXX */ - PS_ADD("tpgid", "%d", 0); /* XXX */ + PS_ADD("tpgid", "%d", kp.kp_tpgid); /* XXX */ PS_ADD("flags", "%u", 0); /* XXX */ - PS_ADD("minflt", "%u", 0); /* XXX */ - PS_ADD("cminflt", "%u", 0); /* XXX */ - PS_ADD("majflt", "%u", 0); /* XXX */ - PS_ADD("cminflt", "%u", 0); /* XXX */ - PS_ADD("utime", "%d", 0); /* XXX */ - PS_ADD("stime", "%d", 0); /* XXX */ - PS_ADD("cutime", "%d", 0); /* XXX */ - PS_ADD("cstime", "%d", 0); /* XXX */ - PS_ADD("counter", "%d", 0); /* XXX */ + PS_ADD("minflt", "%lu", kp.kp_ru.ru_minflt); /* XXX */ + PS_ADD("cminflt", "%lu", kp.kp_cru.ru_minflt); /* XXX */ + PS_ADD("majflt", "%lu", kp.kp_ru.ru_majflt); /* XXX */ + PS_ADD("cmajflt", "%lu", kp.kp_cru.ru_majflt); /* XXX */ + PS_ADD("utime", "%d", T2J(tvtohz_high(&kp.kp_ru.ru_utime))); /* XXX */ + PS_ADD("stime", "%d", T2J(tvtohz_high(&kp.kp_ru.ru_stime))); /* XXX */ + PS_ADD("cutime", "%d", T2J(tvtohz_high(&kp.kp_cru.ru_utime))); /* XXX */ + PS_ADD("cstime", "%d", T2J(tvtohz_high(&kp.kp_cru.ru_stime))); /* XXX */ PS_ADD("priority", "%d", 0); /* XXX */ + PS_ADD("nice", "%d", kp.kp_nice); PS_ADD("timeout", "%u", 0); /* XXX */ PS_ADD("itrealvalue", "%u", 0); /* XXX */ - PS_ADD("starttime", "%d", 0); /* XXX */ - PS_ADD("vsize", "%u", 0); /* XXX */ - PS_ADD("rss", "%u", 0); /* XXX */ - PS_ADD("rlim", "%u", 0); /* XXX */ - PS_ADD("startcode", "%u", 0); /* XXX */ - PS_ADD("endcode", "%u", 0); /* XXX */ - PS_ADD("startstack", "%u", 0); /* XXX */ + PS_ADD("starttime", "%d", T2J(tvtohz_high(&kp.kp_start))); /* XXX */ + PS_ADD("vsize", "%ju", P2K((uintmax_t)(kp.kp_vm_tsize + kp.kp_vm_dsize + kp.kp_vm_ssize))); /* XXX: not sure */ + PS_ADD("rss", "%ju", (uintmax_t)kp.kp_vm_rssize); /* XXX */ + PS_ADD("rlim", "%lu", kp.kp_ru.ru_maxrss); /* XXX */ + PS_ADD("startcode", "%lu", start); /* XXX */ + PS_ADD("endcode", "%lu", end); /* XXX */ + PS_ADD("startstack", "%lu", (u_long)p->p_vmspace->vm_minsaddr); /* XXX */ PS_ADD("kstkesp", "%u", 0); /* XXX */ PS_ADD("kstkeip", "%u", 0); /* XXX */ PS_ADD("signal", "%d", 0); /* XXX */ @@ -361,6 +414,12 @@ linprocfs_doprocstat(struct proc *curp, struct proc *p, struct pfsnode *pfs, PS_ADD("sigignore", "%d", 0); /* XXX */ PS_ADD("sigcatch", "%d", 0); /* XXX */ PS_ADD("wchan", "%u", 0); /* XXX */ + PS_ADD("nswap", "%lu", kp.kp_ru.ru_nswap); /* XXX */ + PS_ADD("cnswap", "%lu", kp.kp_cru.ru_nswap); /* XXX */ + PS_ADD("exitsignal", "%d", 0); /* XXX */ + PS_ADD("processor", "%u", kp.kp_lwp.kl_cpuid); /* XXX */ + PS_ADD("rt_priority", "%u", 0); /* XXX */ /* >= 2.5.19 */ + PS_ADD("policy", "%u", kp.kp_nice); /* XXX */ /* >= 2.5.19 */ #undef PS_ADD ps += ksprintf(ps, "\n"); @@ -483,3 +542,199 @@ linprocfs_doloadavg(struct proc *curp, struct proc *p, return(uiomove_frombuf(psbuf, ps - psbuf, uio)); } +int +linprocfs_donetdev(struct proc *curp, struct proc *p, struct pfsnode *pfs, + struct uio *uio) +{ + struct sbuf *sb; + char ifname[16]; /* XXX LINUX_IFNAMSIZ */ + struct ifnet *ifp; + int error; + + sb = sbuf_new_auto(); + + sbuf_printf(sb, "%6s|%58s|%s\n%6s|%58s|%58s\n", + "Inter-", " Receive", " Transmit", " face", + "bytes packets errs drop fifo frame compressed", + "bytes packets errs drop fifo frame compressed"); + + crit_enter(); + TAILQ_FOREACH(ifp, &ifnet, if_link) { + linux_ifname(ifp, ifname, sizeof ifname); + sbuf_printf(sb, "%6.6s:", ifname); + sbuf_printf(sb, "%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu ", + 0UL, 0UL, 0UL, 0UL, 0UL, 0UL, 0UL, 0UL); + sbuf_printf(sb, "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", + 0UL, 0UL, 0UL, 0UL, 0UL, 0UL, 0UL, 0UL); + } + crit_exit(); + sbuf_finish(sb); + error = uiomove_frombuf(sbuf_data(sb), sbuf_len(sb), uio); + sbuf_delete(sb); + return (error); +} + +int +linprocfs_dodevices(struct proc *curp, struct proc *p, struct pfsnode *pfs, + struct uio *uio) +{ + return 0; +} + +int +linprocfs_doosrelease(struct proc *curp, struct proc *p, struct pfsnode *pfs, + struct uio *uio) +{ + char *osrelease; + + osrelease = linux_get_osrelease(curthread); + return(uiomove_frombuf(osrelease, strlen(osrelease)+1, uio)); +} + +int +linprocfs_doostype(struct proc *curp, struct proc *p, struct pfsnode *pfs, + struct uio *uio) +{ + char *osname; + + osname = linux_get_osname(curthread); + return(uiomove_frombuf(osname, strlen(osname)+1, uio)); +} + +int +linprocfs_dopidmax(struct proc *curp, struct proc *p, struct pfsnode *pfs, + struct uio *uio) +{ + char buf[32]; + + ksnprintf(buf, sizeof(buf), "%d", PID_MAX); + return(uiomove_frombuf(buf, strlen(buf)+1, uio)); + return 0; +} + +int +linprocfs_domaps(struct proc *curp, struct proc *p, struct pfsnode *pfs, + struct uio *uio) +{ + int len; + int error; + vm_map_t map = &p->p_vmspace->vm_map; + vm_map_entry_t entry; + vm_ooffset_t off = 0; + char mebuffer[256]; + char *name = "", *freename = NULL; + struct vnode *vp; + struct vattr vat; + int major, minor; + ino_t ino; + + if (uio->uio_rw != UIO_READ) + return (EOPNOTSUPP); + + if (uio->uio_offset != 0) + return (0); + + error = 0; + vm_map_lock_read(map); + for (entry = map->header.next; + ((uio->uio_resid > 0) && (entry != &map->header)); + entry = entry->next) { + vm_object_t obj, tobj, lobj; + vm_offset_t ostart; + name = ""; + freename = NULL; + ino = 0; + if (entry->maptype != VM_MAPTYPE_NORMAL && + entry->maptype != VM_MAPTYPE_VPAGETABLE) { + continue; + } + /* + * Use map->hint as a poor man's ripout detector. + */ + map->hint = entry; + ostart = entry->start; + obj = entry->object.vm_object; + + for( lobj = tobj = obj; tobj; tobj = tobj->backing_object) + lobj = tobj; + + if (lobj) { + off = IDX_TO_OFF(lobj->size); + if (lobj->type == OBJT_VNODE) { + vp = lobj->handle; + if (vp) + vref(vp); + } else { + vp = NULL; + } + + if (vp) { + vn_fullpath(curproc, vp, &name, &freename); + vn_lock(vp, LK_SHARED | LK_RETRY); + VOP_GETATTR(vp, &vat); + ino = vat.va_fileid; + major = vat.va_rmajor; + minor = vat.va_rminor; + vput(vp); + } + } + if (freename == NULL) { + if (entry->eflags & MAP_ENTRY_STACK) + name = "[stack]"; + } + + /* + * format: + * start-end access offset major:minor inode [.text file] + */ + ksnprintf(mebuffer, sizeof(mebuffer), + "%08lx-%08lx %s%s%s%s %08llx %02x:%02x %llu%s%s\n", + (u_long)entry->start, (u_long)entry->end, + (entry->protection & VM_PROT_READ)?"r":"-", + (entry->protection & VM_PROT_WRITE)?"w":"-", + (entry->protection & VM_PROT_EXECUTE)?"x":"-", + "p", + off, /* offset */ + 0, /* major */ + 0, /* minor */ + ino, /* inode */ + *name ? " " : "", + name); + + if (freename) + kfree(freename, M_TEMP); + + len = strlen(mebuffer); + if (len > uio->uio_resid) { + error = EFBIG; + break; + } + + /* + * We cannot safely hold the map locked while accessing + * userspace as a VM fault might recurse the locked map. + */ + vm_map_unlock_read(map); + error = uiomove(mebuffer, len, uio); + vm_map_lock_read(map); + if (error) + break; + + /* + * We use map->hint as a poor man's ripout detector. If + * it does not match the entry we set it to prior to + * unlocking the map the entry MIGHT now be stale. In + * this case we do an expensive lookup to find our place + * in the iteration again. + */ + if (map->hint != entry) { + vm_map_entry_t reentry; + + vm_map_lookup_entry(map, ostart, &reentry); + entry = reentry; + } + } + vm_map_unlock_read(map); + + return error; +} diff --git a/sys/emulation/linux/i386/linprocfs/linprocfs_subr.c b/sys/emulation/linux/i386/linprocfs/linprocfs_subr.c index 2058332f95..e1f52a86df 100644 --- a/sys/emulation/linux/i386/linprocfs/linprocfs_subr.c +++ b/sys/emulation/linux/i386/linprocfs/linprocfs_subr.c @@ -147,11 +147,15 @@ loop: switch (pfs_type) { case Proot: /* /proc = dr-xr-xr-x */ + vsetflags(vp, VROOT); + /* fallthrough */ + case Pnet: + case Psys: + case Psyskernel: pfs->pfs_mode = (VREAD|VEXEC) | (VREAD|VEXEC) >> 3 | (VREAD|VEXEC) >> 6; vp->v_type = VDIR; - vsetflags(vp, VROOT); break; case Pself: /* /proc/self = lr--r--r-- */ @@ -169,6 +173,9 @@ loop: break; case Pexe: + case Pcwd: + case Pprocroot: + case Pfd: pfs->pfs_mode = (VREAD|VEXEC) | (VREAD|VEXEC) >> 3 | (VREAD|VEXEC) >> 6; @@ -183,14 +190,22 @@ loop: case Pprocstat: case Pprocstatus: + case Pcmdline: + case Penviron: + case Pstatm: /* fallthrough */ - + case Pmaps: case Pmeminfo: case Pcpuinfo: case Pstat: case Puptime: case Pversion: case Ploadavg: + case Pdevices: + case Pnetdev: + case Posrelease: + case Postype: + case Ppidmax: pfs->pfs_mode = (VREAD) | (VREAD >> 3) | (VREAD >> 6); @@ -267,7 +282,6 @@ linprocfs_rw(struct vop_read_args *ap) tsleep(&pfs->pfs_lockowner, 0, "pfslck", 0); } pfs->pfs_lockowner = curthread; - switch (pfs->pfs_type) { case Pmem: rtval = procfs_domem(curp, lp, pfs, uio); @@ -296,6 +310,27 @@ linprocfs_rw(struct vop_read_args *ap) case Ploadavg: rtval = linprocfs_doloadavg(curp, p, pfs, uio); break; + case Pnetdev: + rtval = linprocfs_donetdev(curp, p, pfs, uio); + break; + case Pdevices: + rtval = linprocfs_dodevices(curp, p, pfs, uio); + break; + case Posrelease: + rtval = linprocfs_doosrelease(curp, p, pfs, uio); + break; + case Postype: + rtval = linprocfs_doostype(curp, p, pfs, uio); + break; + case Ppidmax: + rtval = linprocfs_dopidmax(curp, p, pfs, uio); + break; + case Pmaps: + rtval = linprocfs_domaps(curp, p, pfs, uio); + break; + case Pstatm: + rtval = linprocfs_dostatm(curp, p, pfs, uio); + break; default: rtval = EOPNOTSUPP; break; @@ -367,7 +402,7 @@ void linprocfs_init(void) { lwkt_token_init(&pfs_token); -} +} void linprocfs_exit(struct thread *td) diff --git a/sys/emulation/linux/i386/linprocfs/linprocfs_vnops.c b/sys/emulation/linux/i386/linprocfs/linprocfs_vnops.c index 14bc7036d1..a0bde59805 100644 --- a/sys/emulation/linux/i386/linprocfs/linprocfs_vnops.c +++ b/sys/emulation/linux/i386/linprocfs/linprocfs_vnops.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include "linprocfs.h" #include @@ -86,6 +87,9 @@ static int linprocfs_setattr (struct vop_setattr_args *); static int linprocfs_readdir_proc(struct vop_readdir_args *); static int linprocfs_readdir_root(struct vop_readdir_args *); +static int linprocfs_readdir_net(struct vop_readdir_args *ap); +static int linprocfs_readdir_sys(struct vop_readdir_args *ap); +static int linprocfs_readdir_syskernel(struct vop_readdir_args *ap); /* * procfs vnode operations. @@ -136,9 +140,20 @@ static struct proc_target { { DT_DIR, N("."), Pproc, NULL }, { DT_DIR, N(".."), Proot, NULL }, { DT_REG, N("mem"), Pmem, NULL }, + { DT_LNK, N("exe"), Pexe, NULL }, + { DT_LNK, N("cwd"), Pcwd, NULL }, + { DT_LNK, N("root"), Pprocroot, NULL }, + { DT_LNK, N("fd"), Pfd, NULL }, + { DT_REG, N("stat"), Pprocstat, NULL }, { DT_REG, N("status"), Pprocstatus, NULL }, + { DT_REG, N("maps"), Pmaps, NULL }, + { DT_REG, N("statm"), Pstatm, NULL }, +#if 0 + { DT_REG, N("cmdline"), Pcmdline, NULL }, + { DT_REG, N("environ"), Penviron, NULL }, +#endif #undef N }; static const int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]); @@ -466,6 +481,9 @@ linprocfs_getattr(struct vop_getattr_args *ap) switch (pfs->pfs_type) { case Proot: + case Pnet: + case Psys: + case Psyskernel: /* * Set nlink to 1 to tell fts(3) we don't actually know. */ @@ -491,7 +509,8 @@ linprocfs_getattr(struct vop_getattr_args *ap) case Pexe: { char *fullpath, *freepath; - error = vn_fullpath(procp, NULL, &fullpath, &freepath); + error = cache_fullpath(procp, &procp->p_textnch, &fullpath, &freepath); + /* error = vn_fullpath(procp, NULL, &fullpath, &freepath); */ if (error == 0) { vap->va_size = strlen(fullpath); kfree(freepath, M_TEMP); @@ -502,6 +521,45 @@ linprocfs_getattr(struct vop_getattr_args *ap) vap->va_bytes = vap->va_size; break; } + case Pcwd: { + char *fullpath, *freepath; + error = cache_fullpath(procp, &procp->p_fd->fd_ncdir, &fullpath, &freepath); + if (error == 0) { + vap->va_size = strlen(fullpath); + kfree(freepath, M_TEMP); + } else { + vap->va_size = sizeof("unknown") - 1; + error = 0; + } + vap->va_bytes = vap->va_size; + break; + } + case Pprocroot: { + struct nchandle *nchp; + char *fullpath, *freepath; + nchp = jailed(procp->p_ucred) ? &procp->p_fd->fd_njdir : &procp->p_fd->fd_nrdir; + error = cache_fullpath(procp, nchp, &fullpath, &freepath); + if (error == 0) { + vap->va_size = strlen(fullpath); + kfree(freepath, M_TEMP); + } else { + vap->va_size = sizeof("unknown") - 1; + error = 0; + } + vap->va_bytes = vap->va_size; + break; + } + case Pfd: { + if (procp == curproc) { + vap->va_size = sizeof("/dev/fd") - 1; + error = 0; + } else { + vap->va_size = sizeof("unknown") - 1; + error = 0; + } + vap->va_bytes = vap->va_size; + break; + } case Pmeminfo: case Pcpuinfo: @@ -509,6 +567,11 @@ linprocfs_getattr(struct vop_getattr_args *ap) case Puptime: case Pversion: case Ploadavg: + case Pnetdev: + case Pdevices: + case Posrelease: + case Postype: + case Ppidmax: vap->va_bytes = vap->va_size = 0; vap->va_uid = 0; vap->va_gid = 0; @@ -528,6 +591,10 @@ linprocfs_getattr(struct vop_getattr_args *ap) case Pprocstat: case Pprocstatus: + case Pcmdline: + case Penviron: + case Pmaps: + case Pstatm: vap->va_bytes = vap->va_size = 0; /* uid, gid are already set */ break; @@ -651,6 +718,50 @@ linprocfs_lookup(struct vop_old_lookup_args *ap) pfs = VTOPFS(dvp); switch (pfs->pfs_type) { + case Psys: + if (cnp->cn_flags & CNP_ISDOTDOT) { + error = linprocfs_root(dvp->v_mount, vpp); + goto out; + } + if (CNEQ(cnp, "kernel", 6)) { + error = linprocfs_allocvp(dvp->v_mount, vpp, 0, Psyskernel); + goto out; + } + break; + case Pnet: + if (cnp->cn_flags & CNP_ISDOTDOT) { + error = linprocfs_root(dvp->v_mount, vpp); + goto out; + } + if (CNEQ(cnp, "dev", 3)) { + error = linprocfs_allocvp(dvp->v_mount, vpp, 0, Pnetdev); + goto out; + } + break; + case Psyskernel: + if (cnp->cn_flags & CNP_ISDOTDOT) { + /* XXX: this is wrong, wrong, wrong. */ + error = linprocfs_root(dvp->v_mount, vpp); + goto out; + } + if (CNEQ(cnp, "osrelease", 9)) { + error = linprocfs_allocvp(dvp->v_mount, vpp, 0, Posrelease); + goto out; + } + if (CNEQ(cnp, "ostype", 6)) { + error = linprocfs_allocvp(dvp->v_mount, vpp, 0, Postype); + goto out; + } + if (CNEQ(cnp, "pid_max", 7)) { + error = linprocfs_allocvp(dvp->v_mount, vpp, 0, Ppidmax); + goto out; + } + if (CNEQ(cnp, "version", 7)) { + error = linprocfs_allocvp(dvp->v_mount, vpp, 0, Pversion); + goto out; + } + break; + case Proot: if (cnp->cn_flags & CNP_ISDOTDOT) return (EIO); @@ -683,6 +794,14 @@ linprocfs_lookup(struct vop_old_lookup_args *ap) error = linprocfs_allocvp(dvp->v_mount, vpp, 0, Ploadavg); goto out; } + if (CNEQ(cnp, "net", 3)) { + error = linprocfs_allocvp(dvp->v_mount, vpp, 0, Pnet); + goto out; + } + if (CNEQ(cnp, "sys", 3)) { + error = linprocfs_allocvp(dvp->v_mount, vpp, 0, Psys); + goto out; + } pid = atopid(pname, cnp->cn_namelen); if (pid == NO_PID) @@ -807,6 +926,15 @@ linprocfs_readdir(struct vop_readdir_args *ap) */ error = linprocfs_readdir_root(ap); break; + case Pnet: + error = linprocfs_readdir_net(ap); + break; + case Psys: + error = linprocfs_readdir_sys(ap); + break; + case Psyskernel: + error = linprocfs_readdir_syskernel(ap); + break; default: error = ENOTDIR; break; @@ -870,7 +998,7 @@ linprocfs_readdir_root(struct vop_readdir_args *ap) { struct linprocfs_readdir_root_info info; struct uio *uio = ap->a_uio; - int res; + int res = 0; info.error = 0; info.i = uio->uio_offset; @@ -878,7 +1006,7 @@ linprocfs_readdir_root(struct vop_readdir_args *ap) info.uio = uio; info.cred = ap->a_cred; - while (info.pcnt < 9) { + while (info.pcnt < 11) { res = linprocfs_readdir_root_callback(NULL, &info); if (res < 0) break; @@ -964,7 +1092,26 @@ linprocfs_readdir_root_callback(struct proc *p, void *data) d_name = "loadavg"; d_type = DT_REG; break; - + case 9: + d_ino = PROCFS_FILENO(0, Pnet); + d_namlen = 3; + d_name = "net"; + d_type = DT_DIR; + break; + case 10: + d_ino = PROCFS_FILENO(0, Psys); + d_namlen = 3; + d_name = "sys"; + d_type = DT_DIR; + break; +#if 0 + case 11: + d_ino = PROCFS_FILENO(0, Pdevices); + d_namlen = 7; + d_name = "devices"; + d_type = DT_REG; + break; +#endif default: /* * Ignore processes that aren't in our prison @@ -1015,6 +1162,292 @@ linprocfs_readdir_root_callback(struct proc *p, void *data) } /* + * Scan the root directory by scanning all process + */ +static int linprocfs_readdir_net_callback(struct proc *p, void *data); + +static int +linprocfs_readdir_net(struct vop_readdir_args *ap) +{ + struct linprocfs_readdir_root_info info; + struct uio *uio = ap->a_uio; + int res; + + info.error = 0; + info.i = uio->uio_offset; + info.pcnt = 0; + info.uio = uio; + info.cred = ap->a_cred; + + while (info.pcnt < 3) { + res = linprocfs_readdir_net_callback(NULL, &info); + if (res < 0) + break; + } + + uio->uio_offset = info.i; + return(info.error); +} + +static int +linprocfs_readdir_net_callback(struct proc *p, void *data) +{ + struct linprocfs_readdir_root_info *info = data; + int retval; + struct uio *uio = info->uio; + ino_t d_ino; + const char *d_name; + size_t d_namlen; + uint8_t d_type; + + switch (info->pcnt) { + case 0: /* `.' */ + d_ino = PROCFS_FILENO(0, Pnet); + d_name = "."; + d_namlen = 1; + d_type = DT_DIR; + break; + case 1: /* `..' */ + d_ino = PROCFS_FILENO(0, Proot); + d_name = ".."; + d_namlen = 2; + d_type = DT_DIR; + break; + + case 2: + d_ino = PROCFS_FILENO(0, Pnet); + d_namlen = 3; + d_name = "dev"; + d_type = DT_REG; + break; + default: + d_ino = 0; + d_namlen = 0; + d_name = NULL; + d_type = DT_REG; + break; + } + + /* + * Skip processes we have already read + */ + if (info->pcnt < info->i) { + ++info->pcnt; + return(0); + } + retval = vop_write_dirent(&info->error, info->uio, + d_ino, d_type, d_namlen, d_name); + if (retval == 0) { + ++info->pcnt; /* iterate proc candidates scanned */ + ++info->i; /* iterate entries written */ + } + if (retval || info->error || uio->uio_resid <= 0) + return(-1); + return(0); +} + + + + + + + +/* + * Scan the root directory by scanning all process + */ +static int linprocfs_readdir_sys_callback(struct proc *p, void *data); + +static int +linprocfs_readdir_sys(struct vop_readdir_args *ap) +{ + struct linprocfs_readdir_root_info info; + struct uio *uio = ap->a_uio; + int res; + + info.error = 0; + info.i = uio->uio_offset; + info.pcnt = 0; + info.uio = uio; + info.cred = ap->a_cred; + + while (info.pcnt < 3) { + res = linprocfs_readdir_sys_callback(NULL, &info); + if (res < 0) + break; + } + + uio->uio_offset = info.i; + return(info.error); +} + +static int +linprocfs_readdir_sys_callback(struct proc *p, void *data) +{ + struct linprocfs_readdir_root_info *info = data; + int retval; + struct uio *uio = info->uio; + ino_t d_ino; + const char *d_name; + size_t d_namlen; + uint8_t d_type; + + switch (info->pcnt) { + case 0: /* `.' */ + d_ino = PROCFS_FILENO(0, Psys); + d_name = "."; + d_namlen = 1; + d_type = DT_DIR; + break; + case 1: /* `..' */ + d_ino = PROCFS_FILENO(0, Proot); + d_name = ".."; + d_namlen = 2; + d_type = DT_DIR; + break; + + case 2: + d_ino = PROCFS_FILENO(0, Psyskernel); + d_namlen = 6; + d_name = "kernel"; + d_type = DT_DIR; + break; + default: + d_ino = 0; + d_namlen = 0; + d_name = NULL; + d_type = DT_REG; + break; + } + + /* + * Skip processes we have already read + */ + if (info->pcnt < info->i) { + ++info->pcnt; + return(0); + } + retval = vop_write_dirent(&info->error, info->uio, + d_ino, d_type, d_namlen, d_name); + if (retval == 0) { + ++info->pcnt; /* iterate proc candidates scanned */ + ++info->i; /* iterate entries written */ + } + if (retval || info->error || uio->uio_resid <= 0) + return(-1); + return(0); +} + + + + + +/* + * Scan the root directory by scanning all process + */ +static int linprocfs_readdir_syskernel_callback(struct proc *p, void *data); + +static int +linprocfs_readdir_syskernel(struct vop_readdir_args *ap) +{ + struct linprocfs_readdir_root_info info; + struct uio *uio = ap->a_uio; + int res; + + info.error = 0; + info.i = uio->uio_offset; + info.pcnt = 0; + info.uio = uio; + info.cred = ap->a_cred; + + while (info.pcnt < 6) { + res = linprocfs_readdir_syskernel_callback(NULL, &info); + if (res < 0) + break; + } + + uio->uio_offset = info.i; + return(info.error); +} + +static int +linprocfs_readdir_syskernel_callback(struct proc *p, void *data) +{ + struct linprocfs_readdir_root_info *info = data; + int retval; + struct uio *uio = info->uio; + ino_t d_ino; + const char *d_name; + size_t d_namlen; + uint8_t d_type; + + switch (info->pcnt) { + case 0: /* `.' */ + d_ino = PROCFS_FILENO(0, Psyskernel); + d_name = "."; + d_namlen = 1; + d_type = DT_DIR; + break; + case 1: /* `..' */ + d_ino = PROCFS_FILENO(0, Psys); + d_name = ".."; + d_namlen = 2; + d_type = DT_DIR; + break; + + case 2: + d_ino = PROCFS_FILENO(0, Posrelease); + d_namlen = 9; + d_name = "osrelease"; + d_type = DT_REG; + break; + + case 3: + d_ino = PROCFS_FILENO(0, Postype); + d_namlen = 4; + d_name = "ostype"; + d_type = DT_REG; + break; + + case 4: + d_ino = PROCFS_FILENO(0, Pversion); + d_namlen = 7; + d_name = "version"; + d_type = DT_REG; + break; + + case 5: + d_ino = PROCFS_FILENO(0, Ppidmax); + d_namlen = 7; + d_name = "pid_max"; + d_type = DT_REG; + break; + default: + d_ino = 0; + d_namlen = 0; + d_name = NULL; + d_type = DT_REG; + break; + } + + /* + * Skip processes we have already read + */ + if (info->pcnt < info->i) { + ++info->pcnt; + return(0); + } + retval = vop_write_dirent(&info->error, info->uio, + d_ino, d_type, d_namlen, d_name); + if (retval == 0) { + ++info->pcnt; /* iterate proc candidates scanned */ + ++info->i; /* iterate entries written */ + } + if (retval || info->error || uio->uio_resid <= 0) + return(-1); + return(0); +} + +/* * readlink reads the link of `self' or `exe' */ static int @@ -1023,6 +1456,7 @@ linprocfs_readlink(struct vop_readlink_args *ap) char buf[16]; /* should be enough */ struct proc *procp; struct vnode *vp = ap->a_vp; + struct nchandle *nchp; struct pfsnode *pfs = VTOPFS(vp); char *fullpath, *freepath; int error, len; @@ -1047,13 +1481,61 @@ linprocfs_readlink(struct vop_readlink_args *ap) return (uiomove("unknown", sizeof("unknown") - 1, ap->a_uio)); } - error = vn_fullpath(procp, NULL, &fullpath, &freepath); + error = cache_fullpath(procp, &procp->p_textnch, &fullpath, &freepath); + if (error != 0) + return (uiomove("unknown", sizeof("unknown") - 1, + ap->a_uio)); + error = uiomove(fullpath, strlen(fullpath), ap->a_uio); + kfree(freepath, M_TEMP); + return (error); + case Pcwd: + procp = PFIND(pfs->pfs_pid); + if (procp == NULL || procp->p_ucred == NULL) { + kprintf("linprocfs_readlink: pid %d disappeared\n", + pfs->pfs_pid); + return (uiomove("unknown", sizeof("unknown") - 1, + ap->a_uio)); + } + error = cache_fullpath(procp, &procp->p_fd->fd_ncdir, &fullpath, &freepath); + if (error != 0) + return (uiomove("unknown", sizeof("unknown") - 1, + ap->a_uio)); + error = uiomove(fullpath, strlen(fullpath), ap->a_uio); + kfree(freepath, M_TEMP); + return (error); + case Pprocroot: + procp = PFIND(pfs->pfs_pid); + if (procp == NULL || procp->p_ucred == NULL) { + kprintf("linprocfs_readlink: pid %d disappeared\n", + pfs->pfs_pid); + return (uiomove("unknown", sizeof("unknown") - 1, + ap->a_uio)); + } + nchp = jailed(procp->p_ucred) ? &procp->p_fd->fd_njdir : &procp->p_fd->fd_nrdir; + error = cache_fullpath(procp, nchp, &fullpath, &freepath); if (error != 0) return (uiomove("unknown", sizeof("unknown") - 1, ap->a_uio)); error = uiomove(fullpath, strlen(fullpath), ap->a_uio); kfree(freepath, M_TEMP); return (error); + case Pfd: + procp = PFIND(pfs->pfs_pid); + if (procp == NULL || procp->p_ucred == NULL) { + kprintf("linprocfs_readlink: pid %d disappeared\n", + pfs->pfs_pid); + return (uiomove("unknown", sizeof("unknown") - 1, + ap->a_uio)); + } + if (procp == curproc) { + return (uiomove("/dev/fd", sizeof("/dev/fd") - 1, + ap->a_uio)); + } else { + return (uiomove("unknown", sizeof("unknown") - 1, + ap->a_uio)); + } + /* notreached */ + break; default: return (EINVAL); } diff --git a/sys/emulation/linux/i386/linux.h b/sys/emulation/linux/i386/linux.h index 4386f738df..2e9a238054 100644 --- a/sys/emulation/linux/i386/linux.h +++ b/sys/emulation/linux/i386/linux.h @@ -44,10 +44,17 @@ extern u_char linux_debug_map[]; #define ARGS(nm, fmt) "linux(%ld): "#nm"("fmt")\n", (long)(curthread->td_proc ? curthread->td_proc->p_pid : -1) #define LMSG(fmt) "linux(%ld): "fmt"\n", (long)(curthread->td_proc ? curthread->td_proc->p_pid : -1) +#if 0 +#define LINUX_DEBUG 1 +#endif + #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_LINUX); #endif +#define PTRIN(v) (void *)(v) +#define PTROUT(v) (l_uintptr_t)(v) + /* * Provide a separate set of types for the Linux types. */ @@ -60,6 +67,7 @@ typedef uint32_t l_ulong; typedef uint64_t l_ulonglong; typedef unsigned short l_ushort; +typedef l_ulong l_uintptr_t; typedef char *l_caddr_t; typedef l_long l_clock_t; typedef l_int l_daddr_t; @@ -77,6 +85,8 @@ typedef l_long l_suseconds_t; typedef l_long l_time_t; typedef l_uint l_uid_t; typedef l_ushort l_uid16_t; +typedef l_int l_timer_t; +typedef l_int l_mqd_t; typedef struct { l_int val[2]; @@ -143,8 +153,8 @@ struct l_rlimit { * stat family of syscalls */ struct l_timespec { - l_ulong tv_sec; - l_ulong tv_nsec; + l_time_t tv_sec; + l_long tv_nsec; }; struct l_newstat { @@ -187,6 +197,19 @@ struct l_stat64 { l_ulonglong st_ino; }; +struct l_statfs64 { + l_int f_type; + l_int f_bsize; + uint64_t f_blocks; + uint64_t f_bfree; + uint64_t f_bavail; + uint64_t f_files; + uint64_t f_ffree; + l_fsid_t f_fsid; + l_int f_namelen; + l_int f_spare[6]; +}; + struct l_new_utsname { char sysname[LINUX_MAX_UTSNAME]; char nodename[LINUX_MAX_UTSNAME]; @@ -231,7 +254,7 @@ struct l_new_utsname { #define LINUX_SIGIO 29 #define LINUX_SIGPOLL LINUX_SIGIO #define LINUX_SIGPWR 30 -#define LINUX_SIGUNUSED 31 +#define LINUX_SIGSYS 31 #define LINUX_SIGTBLSZ 31 #define LINUX_NSIG_WORDS 2 @@ -596,6 +619,13 @@ union l_semun { #define LINUX_SO_NO_CHECK 11 #define LINUX_SO_PRIORITY 12 #define LINUX_SO_LINGER 13 +#define LINUX_SO_PEERCRED 17 +#define LINUX_SO_RCVLOWAT 18 +#define LINUX_SO_SNDLOWAT 19 +#define LINUX_SO_RCVTIMEO 20 +#define LINUX_SO_SNDTIMEO 21 +#define LINUX_SO_TIMESTAMP 29 +#define LINUX_SO_ACCEPTCONN 30 #define LINUX_IP_TOS 1 #define LINUX_IP_TTL 2 @@ -638,6 +668,7 @@ struct l_ifreq { struct l_sockaddr ifru_hwaddr; l_short ifru_flags[1]; l_int ifru_metric; + l_int ifru_ivalue; l_int ifru_mtu; struct l_ifmap ifru_map; char ifru_slave[LINUX_IFNAMSIZ]; @@ -647,6 +678,8 @@ struct l_ifreq { #define ifr_name ifr_ifrn.ifrn_name /* interface name */ #define ifr_hwaddr ifr_ifru.ifru_hwaddr /* MAC address */ +#define ifr_ifindex ifr_ifru.ifru_ivalue /* Interface index */ +#define ifr_ifmetric ifr_ifru.ifru_metric /* Interface index */ /* * poll() @@ -669,4 +702,119 @@ struct l_pollfd { l_short revents; }; +struct l_user_desc { + l_uint entry_number; + l_uint base_addr; + l_uint limit; + l_uint seg_32bit:1; + l_uint contents:2; + l_uint read_exec_only:1; + l_uint limit_in_pages:1; + l_uint seg_not_present:1; + l_uint useable:1; +}; + +struct l_desc_struct { + unsigned long a, b; +}; + + +#define LINUX_LOWERWORD 0x0000ffff + +/* + * Macros which does the same thing as those in Linux include/asm-um/ldt-i386.h. + * These convert Linux user space descriptor to machine one. + */ +#define LINUX_LDT_entry_a(info) \ + ((((info)->base_addr & LINUX_LOWERWORD) << 16) | \ + ((info)->limit & LINUX_LOWERWORD)) + +#define LINUX_ENTRY_B_READ_EXEC_ONLY 9 +#define LINUX_ENTRY_B_CONTENTS 10 +#define LINUX_ENTRY_B_SEG_NOT_PRESENT 15 +#define LINUX_ENTRY_B_BASE_ADDR 16 +#define LINUX_ENTRY_B_USEABLE 20 +#define LINUX_ENTRY_B_SEG32BIT 22 +#define LINUX_ENTRY_B_LIMIT 23 + +#define LINUX_LDT_entry_b(info) \ + (((info)->base_addr & 0xff000000) | \ + ((info)->limit & 0xf0000) | \ + ((info)->contents << LINUX_ENTRY_B_CONTENTS) | \ + (((info)->seg_not_present == 0) << LINUX_ENTRY_B_SEG_NOT_PRESENT) | \ + (((info)->base_addr & 0x00ff0000) >> LINUX_ENTRY_B_BASE_ADDR) | \ + (((info)->read_exec_only == 0) << LINUX_ENTRY_B_READ_EXEC_ONLY) | \ + ((info)->seg_32bit << LINUX_ENTRY_B_SEG32BIT) | \ + ((info)->useable << LINUX_ENTRY_B_USEABLE) | \ + ((info)->limit_in_pages << LINUX_ENTRY_B_LIMIT) | 0x7000) + +#define LINUX_LDT_empty(info) \ + ((info)->base_addr == 0 && \ + (info)->limit == 0 && \ + (info)->contents == 0 && \ + (info)->seg_not_present == 1 && \ + (info)->read_exec_only == 1 && \ + (info)->seg_32bit == 0 && \ + (info)->limit_in_pages == 0 && \ + (info)->useable == 0) + +/* + * Macros for converting segments. + * They do the same as those in arch/i386/kernel/process.c in Linux. + */ +#define LINUX_GET_BASE(desc) \ + ((((desc)->a >> 16) & LINUX_LOWERWORD) | \ + (((desc)->b << 16) & 0x00ff0000) | \ + ((desc)->b & 0xff000000)) + +#define LINUX_GET_LIMIT(desc) \ + (((desc)->a & LINUX_LOWERWORD) | \ + ((desc)->b & 0xf0000)) + +#define LINUX_GET_32BIT(desc) \ + (((desc)->b >> LINUX_ENTRY_B_SEG32BIT) & 1) +#define LINUX_GET_CONTENTS(desc) \ + (((desc)->b >> LINUX_ENTRY_B_CONTENTS) & 3) +#define LINUX_GET_WRITABLE(desc) \ + (((desc)->b >> LINUX_ENTRY_B_READ_EXEC_ONLY) & 1) +#define LINUX_GET_LIMIT_PAGES(desc) \ + (((desc)->b >> LINUX_ENTRY_B_LIMIT) & 1) +#define LINUX_GET_PRESENT(desc) \ + (((desc)->b >> LINUX_ENTRY_B_SEG_NOT_PRESENT) & 1) +#define LINUX_GET_USEABLE(desc) \ + (((desc)->b >> LINUX_ENTRY_B_USEABLE) & 1) + +#define LINUX_CLOCK_REALTIME 0 +#define LINUX_CLOCK_MONOTONIC 1 +#define LINUX_CLOCK_PROCESS_CPUTIME_ID 2 +#define LINUX_CLOCK_THREAD_CPUTIME_ID 3 +#define LINUX_CLOCK_REALTIME_HR 4 +#define LINUX_CLOCK_MONOTONIC_HR 5 + +#define LINUX_CLONE_VM 0x00000100 +#define LINUX_CLONE_FS 0x00000200 +#define LINUX_CLONE_FILES 0x00000400 +#define LINUX_CLONE_SIGHAND 0x00000800 +#define LINUX_CLONE_PID 0x00001000 /* No longer exist in Linux */ +#define LINUX_CLONE_VFORK 0x00004000 +#define LINUX_CLONE_PARENT 0x00008000 +#define LINUX_CLONE_THREAD 0x00010000 +#define LINUX_CLONE_SETTLS 0x00080000 +#define LINUX_CLONE_PARENT_SETTID 0x00100000 +#define LINUX_CLONE_CHILD_CLEARTID 0x00200000 +#define LINUX_CLONE_CHILD_SETTID 0x01000000 + +#define LINUX_THREADING_FLAGS \ + (LINUX_CLONE_VM | LINUX_CLONE_FS | LINUX_CLONE_FILES | \ + LINUX_CLONE_SIGHAND | LINUX_CLONE_THREAD) + +#define LINUX_AT_FDCWD -100 +#define LINUX_AT_SYMLINK_NOFOLLOW 0x100 +#define LINUX_AT_REMOVEDIR 0x200 + +#define LINUX_MREMAP_MAYMOVE 1 +#define LINUX_MREMAP_FIXED 2 + +#define LINUX_UTIME_NOW (-1) +#define LINUX_UTIME_OMIT (-2) #endif /* !_EMULATION_LINUX_ARCH_LINUX_H_ */ diff --git a/sys/emulation/linux/i386/linux_dummy.c b/sys/emulation/linux/i386/linux_dummy.c index 7c8f8c91fe..c3dd683fc1 100644 --- a/sys/emulation/linux/i386/linux_dummy.c +++ b/sys/emulation/linux/i386/linux_dummy.c @@ -69,8 +69,8 @@ DUMMY(setfsuid); DUMMY(setfsgid); DUMMY(pivot_root); DUMMY(mincore); -DUMMY(madvise); DUMMY(fadvise64); +DUMMY(statfs64); #define DUMMY_XATTR(s) \ int \ diff --git a/sys/emulation/linux/i386/linux_machdep.c b/sys/emulation/linux/i386/linux_machdep.c index e2edbafe0c..822f8f3217 100644 --- a/sys/emulation/linux/i386/linux_machdep.c +++ b/sys/emulation/linux/i386/linux_machdep.c @@ -40,7 +40,9 @@ #include #include #include +#include #include +#include #include #include @@ -60,6 +62,7 @@ #include "../linux_ipc.h" #include "../linux_signal.h" #include "../linux_util.h" +#include "../linux_emuldata.h" struct l_descriptor { l_uint entry_number; @@ -139,8 +142,11 @@ sys_linux_execve(struct linux_execve_args *args) * Linux will register %edx as an atexit function and we must be * sure to set it to 0. XXX */ - if (error == 0) + if (error == 0) { args->sysmsg_result64 = 0; + if (curproc->p_sysent == &elf_linux_sysvec) + error = emuldata_init(curproc, NULL, 0); + } exec_free_args(&exec_args); linux_free_path(&path); @@ -225,6 +231,10 @@ sys_linux_ipc(struct linux_ipc_args *args) a.msqid = args->arg1; a.msgsz = args->arg2; + if (a.msgsz < 0) { + error = EINVAL; + break; + } a.msgflg = args->arg3; a.sysmsg_lresult = 0; if ((args->what >> 16) == 0) { @@ -356,35 +366,80 @@ sys_linux_old_select(struct linux_old_select_args *args) int sys_linux_fork(struct linux_fork_args *args) { + struct lwp *lp = curthread->td_lwp; + struct proc *p2; int error; -#ifdef DEBUG - if (ldebug(fork)) - kprintf(ARGS(fork, "")); -#endif + get_mplock(); + error = fork1(lp, RFFDG | RFPROC | RFPGLOCK, &p2); + if (error == 0) { + emuldata_init(curproc, p2, 0); - if ((error = sys_fork((struct fork_args *)args)) != 0) - return (error); + start_forked_proc(lp, p2); + args->sysmsg_fds[0] = p2->p_pid; + args->sysmsg_fds[1] = 0; + } + rel_mplock(); + /* Are we the child? */ if (args->sysmsg_iresult == 1) args->sysmsg_iresult = 0; - return (0); + + return (error); } /* - * MPSAFE + * MPALMOSTSAFE */ int sys_linux_exit_group(struct linux_exit_group_args *args) { - struct exit_args newargs; - int error; + struct linux_emuldata *em, *e; + int sig; - newargs.sysmsg_iresult = 0; - newargs.rval = args->rval; - error = sys_exit(&newargs); - args->sysmsg_iresult = newargs.sysmsg_iresult; - return (error); + sig = args->rval; + + get_mplock(); + + EMUL_LOCK(); + + em = emuldata_get(curproc); + + if (em->s->refs == 1) { + exit1(W_EXITCODE(0, sig)); + /* notreached */ + + EMUL_UNLOCK(); + + rel_mplock(); + return (0); + } + KKASSERT(em->proc == curproc); + em->flags |= EMUL_DIDKILL; + em->s->flags |= LINUX_LES_INEXITGROUP; + em->s->xstat = W_EXITCODE(0, sig); + + LIST_REMOVE(em, threads); + LIST_INSERT_HEAD(&em->s->threads, em, threads); + + while ((e = LIST_NEXT(em, threads)) != NULL) { + LIST_REMOVE(em, threads); + LIST_INSERT_AFTER(e, em, threads); + if ((e->flags & EMUL_DIDKILL) == 0) { + e->flags |= EMUL_DIDKILL; + KKASSERT(pfind(e->proc->p_pid) == e->proc); + ksignal(e->proc, SIGKILL); + } + } + + + EMUL_UNLOCK(); + + exit1(W_EXITCODE(0, sig)); + rel_mplock(); + /* notreached */ + + return (0); } /* @@ -393,26 +448,26 @@ sys_linux_exit_group(struct linux_exit_group_args *args) int sys_linux_vfork(struct linux_vfork_args *args) { + struct lwp *lp = curthread->td_lwp; + struct proc *p2; int error; -#ifdef DEBUG - if (ldebug(vfork)) - kprintf(ARGS(vfork, "")); -#endif + get_mplock(); + error = fork1(lp, RFFDG | RFPROC | RFPPWAIT | RFMEM | RFPGLOCK, &p2); + if (error == 0) { + emuldata_init(curproc, p2, 0); + + start_forked_proc(lp, p2); + args->sysmsg_fds[0] = p2->p_pid; + args->sysmsg_fds[1] = 0; + } + rel_mplock(); - if ((error = sys_vfork((struct vfork_args *)args)) != 0) - return (error); - /* Are we the child? */ if (args->sysmsg_iresult == 1) args->sysmsg_iresult = 0; - return (0); -} -#define CLONE_VM 0x100 -#define CLONE_FS 0x200 -#define CLONE_FILES 0x400 -#define CLONE_SIGHAND 0x800 -#define CLONE_PID 0x1000 + return (error); +} /* * MPALMOSTSAFE @@ -420,11 +475,16 @@ sys_linux_vfork(struct linux_vfork_args *args) int sys_linux_clone(struct linux_clone_args *args) { + struct segment_descriptor *desc; + struct l_user_desc info; + int idx; + int a[2]; + + struct lwp *lp = curthread->td_lwp; int error, ff = RFPROC; struct proc *p2 = NULL; int exit_signal; vm_offset_t start; - struct rfork_args rf_args; #ifdef DEBUG if (ldebug(clone)) { @@ -434,52 +494,111 @@ sys_linux_clone(struct linux_clone_args *args) kprintf(LMSG("CLONE_PID not yet supported")); } #endif - - if (!args->stack) - return (EINVAL); - exit_signal = args->flags & 0x000000ff; if (exit_signal >= LINUX_NSIG) return (EINVAL); - if (exit_signal <= LINUX_SIGTBLSZ) exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)]; - /* RFTHREAD probably not necessary here, but it shouldn't hurt */ - ff |= RFTHREAD; - - if (args->flags & CLONE_VM) + if (args->flags & LINUX_CLONE_VM) ff |= RFMEM; - if (args->flags & CLONE_SIGHAND) + if (args->flags & LINUX_CLONE_SIGHAND) ff |= RFSIGSHARE; - if (!(args->flags & CLONE_FILES)) + if (!(args->flags & (LINUX_CLONE_FILES | LINUX_CLONE_FS))) ff |= RFFDG; + if ((args->flags & 0xffffff00) == LINUX_THREADING_FLAGS) + ff |= RFTHREAD; + if (args->flags & LINUX_CLONE_VFORK) + ff |= RFPPWAIT; + if (args->flags & LINUX_CLONE_PARENT_SETTID) { + if (args->parent_tidptr == NULL) + return (EINVAL); + } error = 0; start = 0; - rf_args.flags = ff; - rf_args.sysmsg_iresult = 0; get_mplock(); - if ((error = sys_rfork(&rf_args)) == 0) { - args->sysmsg_iresult = rf_args.sysmsg_iresult; + error = fork1(lp, ff | RFPGLOCK, &p2); + if (error) { + rel_mplock(); + return error; + } - p2 = pfind(rf_args.sysmsg_iresult); - if (p2 == NULL) - error = ESRCH; + args->sysmsg_fds[0] = p2 ? p2->p_pid : 0; + args->sysmsg_fds[1] = 0; + + if (args->flags & (LINUX_CLONE_PARENT | LINUX_CLONE_THREAD)) + proc_reparent(p2, curproc->p_pptr /* XXX */); + + emuldata_init(curproc, p2, args->flags); + linux_proc_fork(p2, curproc, args->child_tidptr); + /* + * XXX: this can't happen, p2 is never NULL, or else we'd have + * other problems, too (see p2->p_sigparent == ..., + * linux_proc_fork and emuldata_init. + */ + if (p2 == NULL) { + error = ESRCH; + } else { + if (args->flags & LINUX_CLONE_PARENT_SETTID) { + error = copyout(&p2->p_pid, args->parent_tidptr, sizeof(p2->p_pid)); + } } - rel_mplock(); - if (error == 0) { - p2->p_sigparent = exit_signal; + + p2->p_sigparent = exit_signal; + if (args->stack) { ONLY_LWP_IN_PROC(p2)->lwp_md.md_regs->tf_esp = - (unsigned long)args->stack; + (unsigned long)args->stack; + } + + if (args->flags & LINUX_CLONE_SETTLS) { + error = copyin((void *)curthread->td_lwp->lwp_md.md_regs->tf_esi, &info, sizeof(struct l_user_desc)); + if (error) { + kprintf("copyin of tf_esi to info failed\n"); + } else { + idx = info.entry_number; + /* + * We understand both our own entries such as the ones + * we provide on linux_set_thread_area, as well as the + * linux-type entries 6-8. + */ + if ((idx < 6 || idx > 8) && (idx < GTLS_START)) { + kprintf("LINUX_CLONE_SETTLS, invalid idx requested: %d\n", idx); + goto out; + } + if (idx < GTLS_START) { + idx -= 6; + } else { +#ifdef SMP + idx -= (GTLS_START + mycpu->gd_cpuid * NGDT); +#else + idx -= GTLS_START; +#endif + } + KKASSERT(idx >= 0); + + a[0] = LINUX_LDT_entry_a(&info); + a[1] = LINUX_LDT_entry_b(&info); + if (p2) { + desc = &FIRST_LWP_IN_PROC(p2)->lwp_thread->td_tls.tls[idx]; + memcpy(desc, &a, sizeof(a)); + } else { + kprintf("linux_clone... we don't have a p2\n"); + } + } + } +out: + if (p2) + start_forked_proc(lp, p2); + rel_mplock(); #ifdef DEBUG - if (ldebug(clone)) - kprintf(LMSG("clone: successful rfork to %ld"), - (long)p2->p_pid); + if (ldebug(clone)) + kprintf(LMSG("clone: successful rfork to %ld"), + (long)p2->p_pid); #endif - } + return (error); } @@ -753,6 +872,7 @@ sys_linux_modify_ldt(struct linux_modify_ldt_args *uap) struct i386_ldt_args *ldt; struct l_descriptor ld; union descriptor *desc; + int size, written; sg = stackgap_init(); @@ -772,6 +892,14 @@ sys_linux_modify_ldt(struct linux_modify_ldt_args *uap) uap->sysmsg_iresult = args.sysmsg_iresult * sizeof(union descriptor); break; + case 0x02: /* read_default_ldt = 0 */ + size = 5*sizeof(struct l_desc_struct); + if (size > uap->bytecount) + size = uap->bytecount; + for (written = error = 0; written < size && error == 0; written++) + error = subyte((char *)uap->ptr + written, 0); + uap->sysmsg_iresult = written; + break; case 0x01: /* write_ldt */ case 0x11: /* write_ldt */ if (uap->bytecount != sizeof(ld)) @@ -982,3 +1110,153 @@ sys_linux_sigaltstack(struct linux_sigaltstack_args *uap) return (error); } + +int +sys_linux_set_thread_area(struct linux_set_thread_area_args *args) +{ + struct segment_descriptor *desc; + struct l_user_desc info; + int error; + int idx; + int a[2]; + int i; + + error = copyin(args->desc, &info, sizeof(struct l_user_desc)); + if (error) + return (EFAULT); + +#ifdef DEBUG + if (ldebug(set_thread_area)) + kprintf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, %i, %i, %i\n"), + info.entry_number, + info.base_addr, + info.limit, + info.seg_32bit, + info.contents, + info.read_exec_only, + info.limit_in_pages, + info.seg_not_present, + info.useable); +#endif + + idx = info.entry_number; + if (idx != -1 && (idx < 6 || idx > 8)) + return (EINVAL); + + if (idx == -1) { + /* -1 means finding the first free TLS entry */ + for (i = 0; i < NGTLS; i++) { + /* + * try to determine if the TLS entry is empty by looking + * at the lolimit entry. + */ + if (curthread->td_tls.tls[idx].sd_lolimit == 0) { + idx = i; + break; + } + } + + if (idx == -1) { + /* + * By now we should have an index. If not, it means + * that no entry is free, so return ESRCH. + */ + return (ESRCH); + } + } else { + /* translate the index from Linux to ours */ + idx -= 6; + KKASSERT(idx >= 0); + } + + /* Tell the caller about the allocated entry number */ +#if 0 + info.entry_number = idx; +#endif +#ifdef SMP + info.entry_number = GTLS_START + mycpu->gd_cpuid * NGDT + idx; +#else + info.entry_number = GTLS_START + idx; +#endif + + error = copyout(&info, args->desc, sizeof(struct l_user_desc)); + if (error) + return (error); + + if (LINUX_LDT_empty(&info)) { + a[0] = 0; + a[1] = 0; + } else { + a[0] = LINUX_LDT_entry_a(&info); + a[1] = LINUX_LDT_entry_b(&info); + } + + /* + * Update the TLS and the TLS entries in the GDT, but hold a critical + * section as required by set_user_TLS(). + */ + crit_enter(); + desc = &curthread->td_tls.tls[idx]; + memcpy(desc, &a, sizeof(a)); + set_user_TLS(); + crit_exit(); + + return (0); +} + +int +sys_linux_get_thread_area(struct linux_get_thread_area_args *args) +{ + struct segment_descriptor *sd; + struct l_desc_struct desc; + struct l_user_desc info; + int error; + int idx; + +#ifdef DEBUG + if (ldebug(get_thread_area)) + kprintf(ARGS(get_thread_area, "%p"), args->desc); +#endif + + error = copyin(args->desc, &info, sizeof(struct l_user_desc)); + if (error) + return (EFAULT); + + idx = info.entry_number; + if ((idx < 6 || idx > 8) && (idx < GTLS_START)) { + kprintf("sys_linux_get_thread_area, invalid idx requested: %d\n", idx); + return (EINVAL); + } + + memset(&info, 0, sizeof(info)); + + /* translate the index from Linux to ours */ + info.entry_number = idx; + if (idx < GTLS_START) { + idx -= 6; + } else { +#ifdef SMP + idx -= (GTLS_START + mycpu->gd_cpuid * NGDT); +#else + idx -= GTLS_START; +#endif + } + KKASSERT(idx >= 0); + + sd = &curthread->td_tls.tls[idx]; + memcpy(&desc, sd, sizeof(desc)); + info.base_addr = LINUX_GET_BASE(&desc); + info.limit = LINUX_GET_LIMIT(&desc); + info.seg_32bit = LINUX_GET_32BIT(&desc); + info.contents = LINUX_GET_CONTENTS(&desc); + info.read_exec_only = !LINUX_GET_WRITABLE(&desc); + info.limit_in_pages = LINUX_GET_LIMIT_PAGES(&desc); + info.seg_not_present = !LINUX_GET_PRESENT(&desc); + info.useable = LINUX_GET_USEABLE(&desc); + + error = copyout(&info, args->desc, sizeof(struct l_user_desc)); + if (error) + return (EFAULT); + + return (0); +} diff --git a/sys/emulation/linux/i386/linux_proto.h b/sys/emulation/linux/i386/linux_proto.h index 6141090751..8e246be00a 100644 --- a/sys/emulation/linux/i386/linux_proto.h +++ b/sys/emulation/linux/i386/linux_proto.h @@ -323,6 +323,12 @@ struct linux_ustat_args { l_dev_t dev; char dev_[PAD_(l_dev_t)]; struct l_ustat * ubuf; char ubuf_[PAD_(struct l_ustat *)]; }; +struct linux_getppid_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + register_t dummy; +}; struct linux_sigaction_args { #ifdef _KERNEL struct sysmsg sysmsg; @@ -604,6 +610,9 @@ struct linux_clone_args { #endif l_int flags; char flags_[PAD_(l_int)]; void * stack; char stack_[PAD_(void *)]; + void * parent_tidptr; char parent_tidptr_[PAD_(void *)]; + int dummy; char dummy_[PAD_(int)]; + void * child_tidptr; char child_tidptr_[PAD_(void *)]; }; struct linux_newuname_args { #ifdef _KERNEL @@ -1101,6 +1110,12 @@ struct linux_fcntl64_args { l_uint cmd; char cmd_[PAD_(l_uint)]; l_ulong arg; char arg_[PAD_(l_ulong)]; }; +struct linux_gettid_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + register_t dummy; +}; struct linux_setxattr_args { #ifdef _KERNEL struct sysmsg sysmsg; @@ -1173,6 +1188,52 @@ struct linux_fremovexattr_args { #endif register_t dummy; }; +struct linux_tkill_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + int tid; char tid_[PAD_(int)]; + int sig; char sig_[PAD_(int)]; +}; +struct linux_sys_futex_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + void * uaddr; char uaddr_[PAD_(void *)]; + int op; char op_[PAD_(int)]; + int val; char val_[PAD_(int)]; + struct l_timespec * timeout; char timeout_[PAD_(struct l_timespec *)]; + void * uaddr2; char uaddr2_[PAD_(void *)]; + int val3; char val3_[PAD_(int)]; +}; +struct linux_sched_setaffinity_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + l_pid_t pid; char pid_[PAD_(l_pid_t)]; + l_uint len; char len_[PAD_(l_uint)]; + l_ulong * user_mask_ptr; char user_mask_ptr_[PAD_(l_ulong *)]; +}; +struct linux_sched_getaffinity_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + l_pid_t pid; char pid_[PAD_(l_pid_t)]; + l_uint len; char len_[PAD_(l_uint)]; + l_ulong * user_mask_ptr; char user_mask_ptr_[PAD_(l_ulong *)]; +}; +struct linux_set_thread_area_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + struct l_user_desc * desc; char desc_[PAD_(struct l_user_desc *)]; +}; +struct linux_get_thread_area_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + struct l_user_desc * desc; char desc_[PAD_(struct l_user_desc *)]; +}; struct linux_fadvise64_args { #ifdef _KERNEL struct sysmsg sysmsg; @@ -1185,6 +1246,249 @@ struct linux_exit_group_args { #endif int rval; char rval_[PAD_(int)]; }; +struct linux_epoll_create_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + l_int size; char size_[PAD_(l_int)]; +}; +struct linux_epoll_ctl_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + l_int epfd; char epfd_[PAD_(l_int)]; + l_int op; char op_[PAD_(l_int)]; + l_int fd; char fd_[PAD_(l_int)]; + struct linux_epoll_event * event; char event_[PAD_(struct linux_epoll_event *)]; +}; +struct linux_epoll_wait_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + l_int epfd; char epfd_[PAD_(l_int)]; + struct linux_epoll_event * events; char events_[PAD_(struct linux_epoll_event *)]; + l_int maxevents; char maxevents_[PAD_(l_int)]; + l_int timeout; char timeout_[PAD_(l_int)]; +}; +struct linux_set_tid_address_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + int * tidptr; char tidptr_[PAD_(int *)]; +}; +struct linux_clock_settime_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + clockid_t which; char which_[PAD_(clockid_t)]; + struct l_timespec * tp; char tp_[PAD_(struct l_timespec *)]; +}; +struct linux_clock_gettime_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + clockid_t which; char which_[PAD_(clockid_t)]; + struct l_timespec * tp; char tp_[PAD_(struct l_timespec *)]; +}; +struct linux_clock_getres_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + clockid_t which; char which_[PAD_(clockid_t)]; + struct l_timespec * tp; char tp_[PAD_(struct l_timespec *)]; +}; +struct linux_clock_nanosleep_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + clockid_t which; char which_[PAD_(clockid_t)]; + int flags; char flags_[PAD_(int)]; + struct l_timespec * rqtp; char rqtp_[PAD_(struct l_timespec *)]; + struct l_timespec * rmtp; char rmtp_[PAD_(struct l_timespec *)]; +}; +struct linux_statfs64_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + register_t dummy; +}; +struct linux_tgkill_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + int tgid; char tgid_[PAD_(int)]; + int pid; char pid_[PAD_(int)]; + int sig; char sig_[PAD_(int)]; +}; +struct linux_utimes_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + char * fname; char fname_[PAD_(char *)]; + struct l_timeval * tptr; char tptr_[PAD_(struct l_timeval *)]; +}; +struct linux_mq_open_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + const char * name; char name_[PAD_(const char *)]; + int oflag; char oflag_[PAD_(int)]; + mode_t mode; char mode_[PAD_(mode_t)]; + struct mq_attr * attr; char attr_[PAD_(struct mq_attr *)]; +}; +struct linux_mq_getsetattr_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + l_mqd_t mqd; char mqd_[PAD_(l_mqd_t)]; + const struct mq_attr * attr; char attr_[PAD_(const struct mq_attr *)]; + struct mq_attr * oattr; char oattr_[PAD_(struct mq_attr *)]; +}; +struct linux_openat_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + l_int dfd; char dfd_[PAD_(l_int)]; + char * path; char path_[PAD_(char *)]; + l_int flags; char flags_[PAD_(l_int)]; + l_int mode; char mode_[PAD_(l_int)]; +}; +struct linux_mkdirat_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + l_int dfd; char dfd_[PAD_(l_int)]; + char * path; char path_[PAD_(char *)]; + l_int mode; char mode_[PAD_(l_int)]; +}; +struct linux_mknodat_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + l_int dfd; char dfd_[PAD_(l_int)]; + char * path; char path_[PAD_(char *)]; + l_int mode; char mode_[PAD_(l_int)]; + l_uint dev; char dev_[PAD_(l_uint)]; +}; +struct linux_fchownat_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + l_int dfd; char dfd_[PAD_(l_int)]; + char * filename; char filename_[PAD_(char *)]; + l_uid16_t uid; char uid_[PAD_(l_uid16_t)]; + l_gid16_t gid; char gid_[PAD_(l_gid16_t)]; + l_int flag; char flag_[PAD_(l_int)]; +}; +struct linux_futimesat_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + l_int dfd; char dfd_[PAD_(l_int)]; + char * fname; char fname_[PAD_(char *)]; + struct l_timeval * tptr; char tptr_[PAD_(struct l_timeval *)]; +}; +struct linux_fstatat64_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + l_int dfd; char dfd_[PAD_(l_int)]; + char * path; char path_[PAD_(char *)]; + struct l_stat64 * statbuf; char statbuf_[PAD_(struct l_stat64 *)]; + l_int flag; char flag_[PAD_(l_int)]; +}; +struct linux_unlinkat_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + l_int dfd; char dfd_[PAD_(l_int)]; + char * path; char path_[PAD_(char *)]; + l_int flag; char flag_[PAD_(l_int)]; +}; +struct linux_renameat_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + l_int olddfd; char olddfd_[PAD_(l_int)]; + char * from; char from_[PAD_(char *)]; + l_int newdfd; char newdfd_[PAD_(l_int)]; + char * to; char to_[PAD_(char *)]; +}; +struct linux_linkat_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + l_int olddfd; char olddfd_[PAD_(l_int)]; + char * path; char path_[PAD_(char *)]; + l_int newdfd; char newdfd_[PAD_(l_int)]; + char * to; char to_[PAD_(char *)]; + l_int flags; char flags_[PAD_(l_int)]; +}; +struct linux_symlinkat_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + char * path; char path_[PAD_(char *)]; + l_int newdfd; char newdfd_[PAD_(l_int)]; + char * to; char to_[PAD_(char *)]; +}; +struct linux_readlinkat_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + l_int dfd; char dfd_[PAD_(l_int)]; + char * path; char path_[PAD_(char *)]; + char * buf; char buf_[PAD_(char *)]; + l_int count; char count_[PAD_(l_int)]; +}; +struct linux_fchmodat_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + l_int dfd; char dfd_[PAD_(l_int)]; + char * filename; char filename_[PAD_(char *)]; + l_mode_t mode; char mode_[PAD_(l_mode_t)]; +}; +struct linux_faccessat_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + l_int dfd; char dfd_[PAD_(l_int)]; + char * filename; char filename_[PAD_(char *)]; + l_int mode; char mode_[PAD_(l_int)]; +}; +struct linux_set_robust_list_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + struct linux_robust_list_head * head; char head_[PAD_(struct linux_robust_list_head *)]; + l_size_t len; char len_[PAD_(l_size_t)]; +}; +struct linux_get_robust_list_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + l_int pid; char pid_[PAD_(l_int)]; + struct linux_robust_list_head ** head; char head_[PAD_(struct linux_robust_list_head **)]; + l_size_t * len; char len_[PAD_(l_size_t *)]; +}; +struct linux_getcpu_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + l_uint * pcpu; char pcpu_[PAD_(l_uint *)]; + l_uint * pnode; char pnode_[PAD_(l_uint *)]; + void * ptcache; char ptcache_[PAD_(void *)]; +}; +struct linux_utimensat_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + l_int dfd; char dfd_[PAD_(l_int)]; + char * fname; char fname_[PAD_(char *)]; + struct l_timespec * tptr; char tptr_[PAD_(struct l_timespec *)]; + l_int flag; char flag_[PAD_(l_int)]; +}; #ifdef COMPAT_43 @@ -1257,6 +1561,7 @@ int sys_linux_ioctl (struct linux_ioctl_args *); int sys_linux_fcntl (struct linux_fcntl_args *); int sys_linux_olduname (struct linux_olduname_args *); int sys_linux_ustat (struct linux_ustat_args *); +int sys_linux_getppid (struct linux_getppid_args *); int sys_linux_sigaction (struct linux_sigaction_args *); int sys_linux_sgetmask (struct linux_sgetmask_args *); int sys_linux_ssetmask (struct linux_ssetmask_args *); @@ -1367,6 +1672,7 @@ int sys_linux_mincore (struct linux_mincore_args *); int sys_linux_madvise (struct linux_madvise_args *); int sys_linux_getdents64 (struct linux_getdents64_args *); int sys_linux_fcntl64 (struct linux_fcntl64_args *); +int sys_linux_gettid (struct linux_gettid_args *); int sys_linux_setxattr (struct linux_setxattr_args *); int sys_linux_lsetxattr (struct linux_lsetxattr_args *); int sys_linux_fsetxattr (struct linux_fsetxattr_args *); @@ -1379,8 +1685,44 @@ int sys_linux_flistxattr (struct linux_flistxattr_args *); int sys_linux_removexattr (struct linux_removexattr_args *); int sys_linux_lremovexattr (struct linux_lremovexattr_args *); int sys_linux_fremovexattr (struct linux_fremovexattr_args *); +int sys_linux_tkill (struct linux_tkill_args *); +int sys_linux_sys_futex (struct linux_sys_futex_args *); +int sys_linux_sched_setaffinity (struct linux_sched_setaffinity_args *); +int sys_linux_sched_getaffinity (struct linux_sched_getaffinity_args *); +int sys_linux_set_thread_area (struct linux_set_thread_area_args *); +int sys_linux_get_thread_area (struct linux_get_thread_area_args *); int sys_linux_fadvise64 (struct linux_fadvise64_args *); int sys_linux_exit_group (struct linux_exit_group_args *); +int sys_linux_epoll_create (struct linux_epoll_create_args *); +int sys_linux_epoll_ctl (struct linux_epoll_ctl_args *); +int sys_linux_epoll_wait (struct linux_epoll_wait_args *); +int sys_linux_set_tid_address (struct linux_set_tid_address_args *); +int sys_linux_clock_settime (struct linux_clock_settime_args *); +int sys_linux_clock_gettime (struct linux_clock_gettime_args *); +int sys_linux_clock_getres (struct linux_clock_getres_args *); +int sys_linux_clock_nanosleep (struct linux_clock_nanosleep_args *); +int sys_linux_statfs64 (struct linux_statfs64_args *); +int sys_linux_tgkill (struct linux_tgkill_args *); +int sys_linux_utimes (struct linux_utimes_args *); +int sys_linux_mq_open (struct linux_mq_open_args *); +int sys_linux_mq_getsetattr (struct linux_mq_getsetattr_args *); +int sys_linux_openat (struct linux_openat_args *); +int sys_linux_mkdirat (struct linux_mkdirat_args *); +int sys_linux_mknodat (struct linux_mknodat_args *); +int sys_linux_fchownat (struct linux_fchownat_args *); +int sys_linux_futimesat (struct linux_futimesat_args *); +int sys_linux_fstatat64 (struct linux_fstatat64_args *); +int sys_linux_unlinkat (struct linux_unlinkat_args *); +int sys_linux_renameat (struct linux_renameat_args *); +int sys_linux_linkat (struct linux_linkat_args *); +int sys_linux_symlinkat (struct linux_symlinkat_args *); +int sys_linux_readlinkat (struct linux_readlinkat_args *); +int sys_linux_fchmodat (struct linux_fchmodat_args *); +int sys_linux_faccessat (struct linux_faccessat_args *); +int sys_linux_set_robust_list (struct linux_set_robust_list_args *); +int sys_linux_get_robust_list (struct linux_get_robust_list_args *); +int sys_linux_getcpu (struct linux_getcpu_args *); +int sys_linux_utimensat (struct linux_utimensat_args *); #endif /* !_LINUX_SYSPROTO_H_ */ #undef PAD_ diff --git a/sys/emulation/linux/i386/linux_support.s b/sys/emulation/linux/i386/linux_support.s new file mode 100644 index 0000000000..0bf33e679a --- /dev/null +++ b/sys/emulation/linux/i386/linux_support.s @@ -0,0 +1,155 @@ +/*- + * Copyright (c) 2006,2007 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include "linux_assym.h" /* system definitions */ +#include /* miscellaneous asm macros */ +#include +#include +#include + +#include "assym.s" + +/* + * A stack-based on-fault routine is used for more complex PCB_ONFAULT + * situations (such as memcpy/bcopy/bzero). In this case the on-fault + * routine must be pushed on the stack. + */ +stack_onfault: + ret + +futex_fault_decx: + movl PCPU(curthread),%ecx + movl TD_PCB(%ecx), %ecx +futex_fault: + movl $0,PCB_ONFAULT(%ecx) + movl $EFAULT,%eax + ret + +futex_fault_pop: + addl $4,%esp + movl $0,PCB_ONFAULT(%ecx) + movl $EFAULT,%eax + ret + +ENTRY(futex_xchgl) + movl PCPU(curthread),%ecx + movl TD_PCB(%ecx),%ecx + pushl $futex_fault + movl $stack_onfault, PCB_ONFAULT(%ecx) + movl 8(%esp),%eax + movl 12(%esp),%edx + cmpl $VM_MAX_USER_ADDRESS-4,%edx + ja futex_fault_pop + xchgl %eax,(%edx) + movl 16(%esp),%edx + movl %eax,(%edx) + xorl %eax,%eax + movl %eax,PCB_ONFAULT(%ecx) + ret + +ENTRY(futex_addl) + movl PCPU(curthread),%ecx + movl TD_PCB(%ecx),%ecx + pushl $futex_fault + movl $stack_onfault,PCB_ONFAULT(%ecx) + movl 8(%esp),%eax + movl 12(%esp),%edx + cmpl $VM_MAX_USER_ADDRESS-4,%edx + ja futex_fault_pop +#ifdef SMP + lock +#endif + xaddl %eax,(%edx) + movl 16(%esp),%edx + movl %eax,(%edx) + xorl %eax,%eax + movl %eax,PCB_ONFAULT(%ecx) + ret + +ENTRY(futex_orl) + movl PCPU(curthread),%ecx + movl TD_PCB(%ecx),%ecx + pushl $futex_fault_decx + movl $stack_onfault,PCB_ONFAULT(%ecx) + movl 12(%esp),%edx + cmpl $VM_MAX_USER_ADDRESS-4,%edx + ja futex_fault_pop + movl (%edx),%eax +1: movl %eax,%ecx + orl 8(%esp),%ecx +#ifdef SMP + lock +#endif + cmpxchgl %ecx,(%edx) + jnz 1b +futex_tail: + movl 16(%esp),%edx + movl %eax,(%edx) + xorl %eax,%eax + movl PCPU(curthread),%ecx + movl %eax,PCB_ONFAULT(%ecx) + ret + +ENTRY(futex_andl) + movl PCPU(curthread),%ecx + movl TD_PCB(%ecx),%ecx + pushl $futex_fault_decx + movl $stack_onfault,PCB_ONFAULT(%ecx) + movl 12(%esp),%edx + cmpl $VM_MAX_USER_ADDRESS-4,%edx + ja futex_fault_pop + movl (%edx),%eax +1: movl %eax,%ecx + andl 8(%esp),%ecx +#ifdef SMP + lock +#endif + cmpxchgl %ecx,(%edx) + jnz 1b + jmp futex_tail + +ENTRY(futex_xorl) + movl PCPU(curthread),%ecx + movl TD_PCB(%ecx),%ecx + pushl $futex_fault_decx + movl $stack_onfault,PCB_ONFAULT(%ecx) + movl 12(%esp),%edx + cmpl $VM_MAX_USER_ADDRESS-4,%edx + ja futex_fault_pop + movl (%edx),%eax +1: movl %eax,%ecx + xorl 8(%esp),%ecx +#ifdef SMP + lock +#endif + cmpxchgl %ecx,(%edx) + jnz 1b + jmp futex_tail diff --git a/sys/emulation/linux/i386/linux_syscall.h b/sys/emulation/linux/i386/linux_syscall.h index 157ed344ca..85cf4ed38a 100644 --- a/sys/emulation/linux/i386/linux_syscall.h +++ b/sys/emulation/linux/i386/linux_syscall.h @@ -59,7 +59,7 @@ #define LINUX_SYS_chroot 61 #define LINUX_SYS_linux_ustat 62 #define LINUX_SYS_dup2 63 -#define LINUX_SYS_getppid 64 +#define LINUX_SYS_linux_getppid 64 #define LINUX_SYS_getpgrp 65 #define LINUX_SYS_setsid 66 #define LINUX_SYS_linux_sigaction 67 @@ -211,6 +211,7 @@ #define LINUX_SYS_linux_madvise 219 #define LINUX_SYS_linux_getdents64 220 #define LINUX_SYS_linux_fcntl64 221 +#define LINUX_SYS_linux_gettid 224 #define LINUX_SYS_linux_setxattr 226 #define LINUX_SYS_linux_lsetxattr 227 #define LINUX_SYS_linux_fsetxattr 228 @@ -223,6 +224,46 @@ #define LINUX_SYS_linux_removexattr 235 #define LINUX_SYS_linux_lremovexattr 236 #define LINUX_SYS_linux_fremovexattr 237 +#define LINUX_SYS_linux_tkill 238 +#define LINUX_SYS_linux_sys_futex 240 +#define LINUX_SYS_linux_sched_setaffinity 241 +#define LINUX_SYS_linux_sched_getaffinity 242 +#define LINUX_SYS_linux_set_thread_area 243 +#define LINUX_SYS_linux_get_thread_area 244 #define LINUX_SYS_linux_fadvise64 250 #define LINUX_SYS_linux_exit_group 252 -#define LINUX_SYS_MAXSYSCALL 268 +#define LINUX_SYS_linux_epoll_create 254 +#define LINUX_SYS_linux_epoll_ctl 255 +#define LINUX_SYS_linux_epoll_wait 256 +#define LINUX_SYS_linux_set_tid_address 258 +#define LINUX_SYS_linux_clock_settime 264 +#define LINUX_SYS_linux_clock_gettime 265 +#define LINUX_SYS_linux_clock_getres 266 +#define LINUX_SYS_linux_clock_nanosleep 267 +#define LINUX_SYS_linux_statfs64 268 +#define LINUX_SYS_linux_tgkill 270 +#define LINUX_SYS_linux_utimes 271 +#define LINUX_SYS_linux_mq_open 277 +#define LINUX_SYS_mq_unlink 278 +#define LINUX_SYS_mq_timedsend 279 +#define LINUX_SYS_mq_timedreceive 280 +#define LINUX_SYS_mq_notify 281 +#define LINUX_SYS_linux_mq_getsetattr 282 +#define LINUX_SYS_linux_openat 295 +#define LINUX_SYS_linux_mkdirat 296 +#define LINUX_SYS_linux_mknodat 297 +#define LINUX_SYS_linux_fchownat 298 +#define LINUX_SYS_linux_futimesat 299 +#define LINUX_SYS_linux_fstatat64 300 +#define LINUX_SYS_linux_unlinkat 301 +#define LINUX_SYS_linux_renameat 302 +#define LINUX_SYS_linux_linkat 303 +#define LINUX_SYS_linux_symlinkat 304 +#define LINUX_SYS_linux_readlinkat 305 +#define LINUX_SYS_linux_fchmodat 306 +#define LINUX_SYS_linux_faccessat 307 +#define LINUX_SYS_linux_set_robust_list 311 +#define LINUX_SYS_linux_get_robust_list 312 +#define LINUX_SYS_linux_getcpu 318 +#define LINUX_SYS_linux_utimensat 320 +#define LINUX_SYS_MAXSYSCALL 338 diff --git a/sys/emulation/linux/i386/linux_sysent.c b/sys/emulation/linux/i386/linux_sysent.c index 5e81128d9a..c6948bbdea 100644 --- a/sys/emulation/linux/i386/linux_sysent.c +++ b/sys/emulation/linux/i386/linux_sysent.c @@ -82,7 +82,7 @@ struct sysent linux_sysent[] = { { AS(chroot_args), (sy_call_t *)sys_chroot }, /* 61 = chroot */ { AS(linux_ustat_args), (sy_call_t *)sys_linux_ustat }, /* 62 = linux_ustat */ { AS(dup2_args), (sy_call_t *)sys_dup2 }, /* 63 = dup2 */ - { 0, (sy_call_t *)sys_getppid }, /* 64 = getppid */ + { 0, (sy_call_t *)sys_linux_getppid }, /* 64 = linux_getppid */ { 0, (sy_call_t *)sys_getpgrp }, /* 65 = getpgrp */ { 0, (sy_call_t *)sys_setsid }, /* 66 = setsid */ { AS(linux_sigaction_args), (sy_call_t *)sys_linux_sigaction }, /* 67 = linux_sigaction */ @@ -242,7 +242,7 @@ struct sysent linux_sysent[] = { { AS(linux_fcntl64_args), (sy_call_t *)sys_linux_fcntl64 }, /* 221 = linux_fcntl64 */ { 0, (sy_call_t *)sys_nosys }, /* 222 = none */ { 0, (sy_call_t *)sys_nosys }, /* 223 = none */ - { 0, (sy_call_t *)sys_nosys }, /* 224 = linux_gettid */ + { 0, (sy_call_t *)sys_linux_gettid }, /* 224 = linux_gettid */ { 0, (sy_call_t *)sys_nosys }, /* 225 = linux_readahead */ { 0, (sy_call_t *)sys_linux_setxattr }, /* 226 = linux_setxattr */ { 0, (sy_call_t *)sys_linux_lsetxattr }, /* 227 = linux_lsetxattr */ @@ -256,13 +256,13 @@ struct sysent linux_sysent[] = { { 0, (sy_call_t *)sys_linux_removexattr }, /* 235 = linux_removexattr */ { 0, (sy_call_t *)sys_linux_lremovexattr }, /* 236 = linux_lremovexattr */ { 0, (sy_call_t *)sys_linux_fremovexattr }, /* 237 = linux_fremovexattr */ - { 0, (sy_call_t *)sys_nosys }, /* 238 = linux_tkill */ + { AS(linux_tkill_args), (sy_call_t *)sys_linux_tkill }, /* 238 = linux_tkill */ { 0, (sy_call_t *)sys_nosys }, /* 239 = linux_sendfile64 */ - { 0, (sy_call_t *)sys_nosys }, /* 240 = linux_futex */ - { 0, (sy_call_t *)sys_nosys }, /* 241 = linux_sched_setaffinity */ - { 0, (sy_call_t *)sys_nosys }, /* 242 = linux_sched_getaffinity */ - { 0, (sy_call_t *)sys_nosys }, /* 243 = linux_set_thread_area */ - { 0, (sy_call_t *)sys_nosys }, /* 244 = linux_get_thread_area */ + { AS(linux_sys_futex_args), (sy_call_t *)sys_linux_sys_futex }, /* 240 = linux_sys_futex */ + { AS(linux_sched_setaffinity_args), (sy_call_t *)sys_linux_sched_setaffinity }, /* 241 = linux_sched_setaffinity */ + { AS(linux_sched_getaffinity_args), (sy_call_t *)sys_linux_sched_getaffinity }, /* 242 = linux_sched_getaffinity */ + { AS(linux_set_thread_area_args), (sy_call_t *)sys_linux_set_thread_area }, /* 243 = linux_set_thread_area */ + { AS(linux_get_thread_area_args), (sy_call_t *)sys_linux_get_thread_area }, /* 244 = linux_get_thread_area */ { 0, (sy_call_t *)sys_nosys }, /* 245 = linux_io_setup */ { 0, (sy_call_t *)sys_nosys }, /* 246 = linux_io_destroy */ { 0, (sy_call_t *)sys_nosys }, /* 247 = linux_io_getevents */ @@ -272,18 +272,88 @@ struct sysent linux_sysent[] = { { 0, (sy_call_t *)sys_nosys }, /* 251 = */ { AS(linux_exit_group_args), (sy_call_t *)sys_linux_exit_group }, /* 252 = linux_exit_group */ { 0, (sy_call_t *)sys_nosys }, /* 253 = linux_lookup_dcookie */ - { 0, (sy_call_t *)sys_nosys }, /* 254 = linux_epoll_create */ - { 0, (sy_call_t *)sys_nosys }, /* 255 = linux_epoll_ctl */ - { 0, (sy_call_t *)sys_nosys }, /* 256 = linux_epoll_wait */ + { AS(linux_epoll_create_args), (sy_call_t *)sys_linux_epoll_create }, /* 254 = linux_epoll_create */ + { AS(linux_epoll_ctl_args), (sy_call_t *)sys_linux_epoll_ctl }, /* 255 = linux_epoll_ctl */ + { AS(linux_epoll_wait_args), (sy_call_t *)sys_linux_epoll_wait }, /* 256 = linux_epoll_wait */ { 0, (sy_call_t *)sys_nosys }, /* 257 = linux_remap_file_pages */ - { 0, (sy_call_t *)sys_nosys }, /* 258 = linux_set_tid_address */ + { AS(linux_set_tid_address_args), (sy_call_t *)sys_linux_set_tid_address }, /* 258 = linux_set_tid_address */ { 0, (sy_call_t *)sys_nosys }, /* 259 = linux_timer_create */ { 0, (sy_call_t *)sys_nosys }, /* 260 = linux_timer_settime */ { 0, (sy_call_t *)sys_nosys }, /* 261 = linux_timer_gettime */ { 0, (sy_call_t *)sys_nosys }, /* 262 = linux_timer_getoverrun */ { 0, (sy_call_t *)sys_nosys }, /* 263 = linux_timer_delete */ - { 0, (sy_call_t *)sys_nosys }, /* 264 = linux_clock_settime */ - { 0, (sy_call_t *)sys_nosys }, /* 265 = linux_clock_gettime */ - { 0, (sy_call_t *)sys_nosys }, /* 266 = linux_clock_getres */ - { 0, (sy_call_t *)sys_nosys }, /* 267 = linux_clock_nanosleep */ + { AS(linux_clock_settime_args), (sy_call_t *)sys_linux_clock_settime }, /* 264 = linux_clock_settime */ + { AS(linux_clock_gettime_args), (sy_call_t *)sys_linux_clock_gettime }, /* 265 = linux_clock_gettime */ + { AS(linux_clock_getres_args), (sy_call_t *)sys_linux_clock_getres }, /* 266 = linux_clock_getres */ + { AS(linux_clock_nanosleep_args), (sy_call_t *)sys_linux_clock_nanosleep }, /* 267 = linux_clock_nanosleep */ + { 0, (sy_call_t *)sys_linux_statfs64 }, /* 268 = linux_statfs64 */ + { 0, (sy_call_t *)sys_nosys }, /* 269 = linux_fstatfs64 */ + { AS(linux_tgkill_args), (sy_call_t *)sys_linux_tgkill }, /* 270 = linux_tgkill */ + { AS(linux_utimes_args), (sy_call_t *)sys_linux_utimes }, /* 271 = linux_utimes */ + { 0, (sy_call_t *)sys_nosys }, /* 272 = linux_fadvise64_64 */ + { 0, (sy_call_t *)sys_nosys }, /* 273 = linux_nonexistant */ + { 0, (sy_call_t *)sys_nosys }, /* 274 = linux_mbind */ + { 0, (sy_call_t *)sys_nosys }, /* 275 = linux_get_mempolicy */ + { 0, (sy_call_t *)sys_nosys }, /* 276 = linux_set_mempolicy */ + { AS(linux_mq_open_args), (sy_call_t *)sys_linux_mq_open }, /* 277 = linux_mq_open */ + { AS(mq_unlink_args), (sy_call_t *)sys_mq_unlink }, /* 278 = mq_unlink */ + { AS(mq_timedsend_args), (sy_call_t *)sys_mq_timedsend }, /* 279 = mq_timedsend */ + { AS(mq_timedreceive_args), (sy_call_t *)sys_mq_timedreceive }, /* 280 = mq_timedreceive */ + { AS(mq_notify_args), (sy_call_t *)sys_mq_notify }, /* 281 = mq_notify */ + { AS(linux_mq_getsetattr_args), (sy_call_t *)sys_linux_mq_getsetattr }, /* 282 = linux_mq_getsetattr */ + { 0, (sy_call_t *)sys_nosys }, /* 283 = linux_kexec_load */ + { 0, (sy_call_t *)sys_nosys }, /* 284 = linux_waitid */ + { 0, (sy_call_t *)sys_nosys }, /* 285 = linux_nonexistant2 */ + { 0, (sy_call_t *)sys_nosys }, /* 286 = linux_add_key */ + { 0, (sy_call_t *)sys_nosys }, /* 287 = linux_request_key */ + { 0, (sy_call_t *)sys_nosys }, /* 288 = linux_keyctl */ + { 0, (sy_call_t *)sys_nosys }, /* 289 = linux_ioprio_set */ + { 0, (sy_call_t *)sys_nosys }, /* 290 = linux_ioprio_get */ + { 0, (sy_call_t *)sys_nosys }, /* 291 = linux_inotify_init */ + { 0, (sy_call_t *)sys_nosys }, /* 292 = linux_inotify_add_watch */ + { 0, (sy_call_t *)sys_nosys }, /* 293 = linux_inotify_rm_watch */ + { 0, (sy_call_t *)sys_nosys }, /* 294 = linux_migrate_pages */ + { AS(linux_openat_args), (sy_call_t *)sys_linux_openat }, /* 295 = linux_openat */ + { AS(linux_mkdirat_args), (sy_call_t *)sys_linux_mkdirat }, /* 296 = linux_mkdirat */ + { AS(linux_mknodat_args), (sy_call_t *)sys_linux_mknodat }, /* 297 = linux_mknodat */ + { AS(linux_fchownat_args), (sy_call_t *)sys_linux_fchownat }, /* 298 = linux_fchownat */ + { AS(linux_futimesat_args), (sy_call_t *)sys_linux_futimesat }, /* 299 = linux_futimesat */ + { AS(linux_fstatat64_args), (sy_call_t *)sys_linux_fstatat64 }, /* 300 = linux_fstatat64 */ + { AS(linux_unlinkat_args), (sy_call_t *)sys_linux_unlinkat }, /* 301 = linux_unlinkat */ + { AS(linux_renameat_args), (sy_call_t *)sys_linux_renameat }, /* 302 = linux_renameat */ + { AS(linux_linkat_args), (sy_call_t *)sys_linux_linkat }, /* 303 = linux_linkat */ + { AS(linux_symlinkat_args), (sy_call_t *)sys_linux_symlinkat }, /* 304 = linux_symlinkat */ + { AS(linux_readlinkat_args), (sy_call_t *)sys_linux_readlinkat }, /* 305 = linux_readlinkat */ + { AS(linux_fchmodat_args), (sy_call_t *)sys_linux_fchmodat }, /* 306 = linux_fchmodat */ + { AS(linux_faccessat_args), (sy_call_t *)sys_linux_faccessat }, /* 307 = linux_faccessat */ + { 0, (sy_call_t *)sys_nosys }, /* 308 = linux_pselect6 */ + { 0, (sy_call_t *)sys_nosys }, /* 309 = linux_ppoll */ + { 0, (sy_call_t *)sys_nosys }, /* 310 = linux_unshare */ + { AS(linux_set_robust_list_args), (sy_call_t *)sys_linux_set_robust_list }, /* 311 = linux_set_robust_list */ + { AS(linux_get_robust_list_args), (sy_call_t *)sys_linux_get_robust_list }, /* 312 = linux_get_robust_list */ + { 0, (sy_call_t *)sys_nosys }, /* 313 = linux_splice */ + { 0, (sy_call_t *)sys_nosys }, /* 314 = linux_sync_file_range */ + { 0, (sy_call_t *)sys_nosys }, /* 315 = linux_tee */ + { 0, (sy_call_t *)sys_nosys }, /* 316 = linux_vmsplice */ + { 0, (sy_call_t *)sys_nosys }, /* 317 = linux_move_pages */ + { AS(linux_getcpu_args), (sy_call_t *)sys_linux_getcpu }, /* 318 = linux_getcpu */ + { 0, (sy_call_t *)sys_nosys }, /* 319 = linux_epoll_pwait */ + { AS(linux_utimensat_args), (sy_call_t *)sys_linux_utimensat }, /* 320 = linux_utimensat */ + { 0, (sy_call_t *)sys_nosys }, /* 321 = linux_signalfd */ + { 0, (sy_call_t *)sys_nosys }, /* 322 = linux_timerfd_create */ + { 0, (sy_call_t *)sys_nosys }, /* 323 = linux_eventfd */ + { 0, (sy_call_t *)sys_nosys }, /* 324 = linux_fallocate */ + { 0, (sy_call_t *)sys_nosys }, /* 325 = linux_timerfd_settime */ + { 0, (sy_call_t *)sys_nosys }, /* 326 = linux_timerfd_gettime */ + { 0, (sy_call_t *)sys_nosys }, /* 327 = linux_signalfd4 */ + { 0, (sy_call_t *)sys_nosys }, /* 328 = linux_eventfd2 */ + { 0, (sy_call_t *)sys_nosys }, /* 329 = linux_epoll_create1 */ + { 0, (sy_call_t *)sys_nosys }, /* 330 = linux_dup3 */ + { 0, (sy_call_t *)sys_nosys }, /* 331 = linux_pipe2 */ + { 0, (sy_call_t *)sys_nosys }, /* 332 = linux_inotify_init1 */ + { 0, (sy_call_t *)sys_nosys }, /* 333 = linux_preadv */ + { 0, (sy_call_t *)sys_nosys }, /* 334 = linux_pwritev */ + { 0, (sy_call_t *)sys_nosys }, /* 335 = linux_rt_tgsigqueueinfo */ + { 0, (sy_call_t *)sys_nosys }, /* 336 = linux_perf_event_open */ + { 0, (sy_call_t *)sys_nosys }, /* 337 = linux_recvmmsg */ }; diff --git a/sys/emulation/linux/i386/linux_sysvec.c b/sys/emulation/linux/i386/linux_sysvec.c index 41a5a1a73c..abe895b96a 100644 --- a/sys/emulation/linux/i386/linux_sysvec.c +++ b/sys/emulation/linux/i386/linux_sysvec.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -62,6 +63,8 @@ #include "linux_proto.h" #include "../linux_signal.h" #include "../linux_util.h" +#include "../linux_futex.h" +#include "../linux_emuldata.h" MODULE_VERSION(linux, 1); @@ -96,6 +99,9 @@ static void linux_prepsyscall (struct trapframe *tf, int *args, static void linux_sendsig (sig_t catcher, int sig, sigset_t *mask, u_long code); +static eventhandler_tag linux_exec_tag; +static eventhandler_tag linux_exit_tag; + /* * Linux syscalls return negative errno's, we do positive and map them */ @@ -114,7 +120,7 @@ static int bsd_to_linux_errno[ELAST + 1] = { int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, - LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0, + LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, @@ -130,7 +136,7 @@ int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, - SIGIO, SIGURG, 0 + SIGIO, SIGURG, SIGSYS }; #define LINUX_T_UNKNOWN 255 @@ -788,7 +794,7 @@ exec_linux_imgact_try(struct image_params *imgp) struct sysentvec linux_sysvec = { LINUX_SYS_MAXSYSCALL, linux_sysent, - 0xff, + 0xffffffff, LINUX_SIGTBLSZ, bsd_to_linux_signal, ELAST + 1, @@ -808,7 +814,7 @@ struct sysentvec linux_sysvec = { struct sysentvec elf_linux_sysvec = { LINUX_SYS_MAXSYSCALL, linux_sysent, - 0xff, + 0xffffffff, LINUX_SIGTBLSZ, bsd_to_linux_signal, ELAST + 1, @@ -913,8 +919,15 @@ linux_elf_modevent(module_t mod, int type, void *data) if (error == 0) { if (bootverbose) kprintf("Linux ELF exec handler installed\n"); - } else + } else { kprintf("cannot insert Linux ELF brand handler\n"); + } + EMUL_LOCKINIT(); + lockinit(&futex_mtx, "linftxs", 0, LK_CANRECURSE); + linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_transition, + NULL, 1000); + linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, emuldata_exit, + NULL, 1000); break; case MOD_UNLOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; @@ -930,8 +943,13 @@ linux_elf_modevent(module_t mod, int type, void *data) if (error == 0) { if (bootverbose) kprintf("Linux ELF exec handler removed\n"); - } else + } else { kprintf("Could not deinstall ELF interpreter entry\n"); + } + EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); + EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); + lockuninit(&futex_mtx); + EMUL_LOCKUNINIT(); break; default: break; diff --git a/sys/emulation/linux/i386/linux_union.h b/sys/emulation/linux/i386/linux_union.h index 98bebeae91..0351fee53d 100644 --- a/sys/emulation/linux/i386/linux_union.h +++ b/sys/emulation/linux/i386/linux_union.h @@ -54,6 +54,7 @@ union sysunion { struct linux_fcntl_args linux_fcntl; struct linux_olduname_args linux_olduname; struct linux_ustat_args linux_ustat; + struct linux_getppid_args linux_getppid; struct linux_sigaction_args linux_sigaction; struct linux_sgetmask_args linux_sgetmask; struct linux_ssetmask_args linux_ssetmask; @@ -164,6 +165,7 @@ union sysunion { struct linux_madvise_args linux_madvise; struct linux_getdents64_args linux_getdents64; struct linux_fcntl64_args linux_fcntl64; + struct linux_gettid_args linux_gettid; struct linux_setxattr_args linux_setxattr; struct linux_lsetxattr_args linux_lsetxattr; struct linux_fsetxattr_args linux_fsetxattr; @@ -176,6 +178,42 @@ union sysunion { struct linux_removexattr_args linux_removexattr; struct linux_lremovexattr_args linux_lremovexattr; struct linux_fremovexattr_args linux_fremovexattr; + struct linux_tkill_args linux_tkill; + struct linux_sys_futex_args linux_sys_futex; + struct linux_sched_setaffinity_args linux_sched_setaffinity; + struct linux_sched_getaffinity_args linux_sched_getaffinity; + struct linux_set_thread_area_args linux_set_thread_area; + struct linux_get_thread_area_args linux_get_thread_area; struct linux_fadvise64_args linux_fadvise64; struct linux_exit_group_args linux_exit_group; + struct linux_epoll_create_args linux_epoll_create; + struct linux_epoll_ctl_args linux_epoll_ctl; + struct linux_epoll_wait_args linux_epoll_wait; + struct linux_set_tid_address_args linux_set_tid_address; + struct linux_clock_settime_args linux_clock_settime; + struct linux_clock_gettime_args linux_clock_gettime; + struct linux_clock_getres_args linux_clock_getres; + struct linux_clock_nanosleep_args linux_clock_nanosleep; + struct linux_statfs64_args linux_statfs64; + struct linux_tgkill_args linux_tgkill; + struct linux_utimes_args linux_utimes; + struct linux_mq_open_args linux_mq_open; + struct linux_mq_getsetattr_args linux_mq_getsetattr; + struct linux_openat_args linux_openat; + struct linux_mkdirat_args linux_mkdirat; + struct linux_mknodat_args linux_mknodat; + struct linux_fchownat_args linux_fchownat; + struct linux_futimesat_args linux_futimesat; + struct linux_fstatat64_args linux_fstatat64; + struct linux_unlinkat_args linux_unlinkat; + struct linux_renameat_args linux_renameat; + struct linux_linkat_args linux_linkat; + struct linux_symlinkat_args linux_symlinkat; + struct linux_readlinkat_args linux_readlinkat; + struct linux_fchmodat_args linux_fchmodat; + struct linux_faccessat_args linux_faccessat; + struct linux_set_robust_list_args linux_set_robust_list; + struct linux_get_robust_list_args linux_get_robust_list; + struct linux_getcpu_args linux_getcpu; + struct linux_utimensat_args linux_utimensat; }; diff --git a/sys/emulation/linux/i386/syscalls.master b/sys/emulation/linux/i386/syscalls.master index 8d50ae9f68..38716d46f0 100644 --- a/sys/emulation/linux/i386/syscalls.master +++ b/sys/emulation/linux/i386/syscalls.master @@ -112,7 +112,7 @@ 61 NOPROTO LINUX { int chroot(char *path); } 62 STD LINUX { int linux_ustat(l_dev_t dev, struct l_ustat *ubuf); } 63 NOPROTO LINUX { int dup2(u_int from, u_int to); } -64 NOPROTO LINUX { int getppid(void); } +64 STD LINUX { int linux_getppid(void); } 65 NOPROTO LINUX { int getpgrp(void); } 66 NOPROTO LINUX { int setsid(void); } 67 STD LINUX { int linux_sigaction(l_int sig, l_osigaction_t *nsa, \ @@ -194,7 +194,8 @@ l_int arg3, void *ptr, l_long arg5); } 118 NOPROTO LINUX { int fsync(int fd); } 119 STD LINUX { int linux_sigreturn(struct l_sigframe *sfp); } -120 STD LINUX { int linux_clone(l_int flags, void *stack); } +120 STD LINUX { int linux_clone(l_int flags, void *stack, \ + void *parent_tidptr, int dummy, void *child_tidptr); } 121 NOPROTO LINUX { int setdomainname(char *name, int len); } 122 STD LINUX { int linux_newuname(struct l_new_utsname *buf); } 123 STD LINUX { int linux_modify_ldt(l_int func, void *ptr, \ @@ -345,7 +346,7 @@ l_ulong arg); } 222 UNIMPL LINUX none 223 UNIMPL LINUX none -224 UNIMPL LINUX linux_gettid +224 STD LINUX { int linux_gettid(void); } 225 UNIMPL LINUX linux_readahead 226 STD LINUX { int linux_setxattr(void); } 227 STD LINUX { int linux_lsetxattr(void); } @@ -359,13 +360,16 @@ 235 STD LINUX { int linux_removexattr(void); } 236 STD LINUX { int linux_lremovexattr(void); } 237 STD LINUX { int linux_fremovexattr(void); } -238 UNIMPL LINUX linux_tkill +238 STD LINUX { int linux_tkill(int tid, int sig); } 239 UNIMPL LINUX linux_sendfile64 -240 UNIMPL LINUX linux_futex -241 UNIMPL LINUX linux_sched_setaffinity -242 UNIMPL LINUX linux_sched_getaffinity -243 UNIMPL LINUX linux_set_thread_area -244 UNIMPL LINUX linux_get_thread_area +240 STD LINUX { int linux_sys_futex(void *uaddr, int op, int val, \ + struct l_timespec *timeout, void *uaddr2, int val3); } +241 STD LINUX { int linux_sched_setaffinity(l_pid_t pid, l_uint len, \ + l_ulong *user_mask_ptr); } +242 STD LINUX { int linux_sched_getaffinity(l_pid_t pid, l_uint len, \ + l_ulong *user_mask_ptr); } +243 STD LINUX { int linux_set_thread_area(struct l_user_desc *desc); } +244 STD LINUX { int linux_get_thread_area(struct l_user_desc *desc); } 245 UNIMPL LINUX linux_io_setup 246 UNIMPL LINUX linux_io_destroy 247 UNIMPL LINUX linux_io_getevents @@ -375,18 +379,112 @@ 251 UNIMPL LINUX 252 STD LINUX { void linux_exit_group(int rval); } 253 UNIMPL LINUX linux_lookup_dcookie -254 UNIMPL LINUX linux_epoll_create -255 UNIMPL LINUX linux_epoll_ctl -256 UNIMPL LINUX linux_epoll_wait +254 STD LINUX { int linux_epoll_create(l_int size); } +255 STD LINUX { int linux_epoll_ctl(l_int epfd, l_int op, l_int fd, \ + struct linux_epoll_event *event); } +256 STD LINUX { int linux_epoll_wait(l_int epfd, struct linux_epoll_event *events, \ + l_int maxevents, l_int timeout); } 257 UNIMPL LINUX linux_remap_file_pages -258 UNIMPL LINUX linux_set_tid_address +258 STD LINUX { int linux_set_tid_address(int *tidptr); } 259 UNIMPL LINUX linux_timer_create 260 UNIMPL LINUX linux_timer_settime 261 UNIMPL LINUX linux_timer_gettime 262 UNIMPL LINUX linux_timer_getoverrun 263 UNIMPL LINUX linux_timer_delete -264 UNIMPL LINUX linux_clock_settime -265 UNIMPL LINUX linux_clock_gettime -266 UNIMPL LINUX linux_clock_getres -267 UNIMPL LINUX linux_clock_nanosleep - +264 STD LINUX { int linux_clock_settime(clockid_t which, struct l_timespec *tp); } +265 STD LINUX { int linux_clock_gettime(clockid_t which, struct l_timespec *tp); } +266 STD LINUX { int linux_clock_getres(clockid_t which, struct l_timespec *tp); } +267 STD LINUX { int linux_clock_nanosleep(clockid_t which, int flags, \ + struct l_timespec *rqtp, struct l_timespec *rmtp); } +268 STD LINUX { int linux_statfs64(void); } +269 UNIMPL LINUX linux_fstatfs64 +270 STD LINUX { int linux_tgkill(int tgid, int pid, int sig); } +271 STD LINUX { int linux_utimes(char *fname, \ + struct l_timeval *tptr); } +272 UNIMPL LINUX linux_fadvise64_64 +273 UNIMPL LINUX linux_nonexistant +274 UNIMPL LINUX linux_mbind +275 UNIMPL LINUX linux_get_mempolicy +276 UNIMPL LINUX linux_set_mempolicy +277 STD LINUX { int linux_mq_open(const char *name, int oflag, mode_t mode, \ + struct mq_attr *attr); } +278 NOPROTO LINUX { int mq_unlink(const char *name); } +279 NOPROTO LINUX { int mq_timedsend(l_mqd_t mqd, const char *msg_ptr, \ + size_t msg_len, unsigned int msg_prio, const struct \ + l_timespec *abs_timeout); } +280 NOPROTO LINUX { int mq_timedreceive(l_mqd_t mqd, char *msg_ptr, \ + size_t msg_len, unsigned int msg_prio, const struct \ + l_timespec *abs_timeout); } +281 NOPROTO LINUX { int mq_notify(l_mqd_t mqd, const struct sigevent *notification); } +282 STD LINUX { int linux_mq_getsetattr(l_mqd_t mqd, const struct mq_attr *attr, \ + struct mq_attr *oattr); } +283 UNIMPL LINUX linux_kexec_load +284 UNIMPL LINUX linux_waitid +285 UNIMPL LINUX linux_nonexistant2 +286 UNIMPL LINUX linux_add_key +287 UNIMPL LINUX linux_request_key +288 UNIMPL LINUX linux_keyctl +289 UNIMPL LINUX linux_ioprio_set +290 UNIMPL LINUX linux_ioprio_get +291 UNIMPL LINUX linux_inotify_init +292 UNIMPL LINUX linux_inotify_add_watch +293 UNIMPL LINUX linux_inotify_rm_watch +294 UNIMPL LINUX linux_migrate_pages +295 STD LINUX { int linux_openat(l_int dfd, char *path, \ + l_int flags, l_int mode); } +296 STD LINUX { int linux_mkdirat(l_int dfd, char *path, \ + l_int mode); } +297 STD LINUX { int linux_mknodat(l_int dfd, char *path, \ + l_int mode, l_uint dev); } +298 STD LINUX { int linux_fchownat(l_int dfd, char *filename, \ + l_uid16_t uid, l_gid16_t gid, l_int flag); } +299 STD LINUX { int linux_futimesat(l_int dfd, char *fname, \ + struct l_timeval *tptr); } +300 STD LINUX { int linux_fstatat64(l_int dfd, char *path, \ + struct l_stat64 *statbuf, l_int flag); } +301 STD LINUX { int linux_unlinkat(l_int dfd, char *path, \ + l_int flag); } +302 STD LINUX { int linux_renameat(l_int olddfd, char *from, \ + l_int newdfd, char *to); } +303 STD LINUX { int linux_linkat(l_int olddfd, char *path, \ + l_int newdfd, char *to, l_int flags); } +304 STD LINUX { int linux_symlinkat(char *path, l_int newdfd, \ + char *to); } +305 STD LINUX { int linux_readlinkat(l_int dfd, char *path, \ + char *buf, l_int count); } +306 STD LINUX { int linux_fchmodat(l_int dfd, char *filename, \ + l_mode_t mode); } +307 STD LINUX { int linux_faccessat(l_int dfd, char *filename, l_int mode); } +308 UNIMPL LINUX linux_pselect6 +309 UNIMPL LINUX linux_ppoll +310 UNIMPL LINUX linux_unshare +311 STD LINUX { int linux_set_robust_list(struct linux_robust_list_head *head, \ + l_size_t len); } +312 STD LINUX { int linux_get_robust_list(l_int pid, struct linux_robust_list_head **head, \ + l_size_t *len); } +313 UNIMPL LINUX linux_splice +314 UNIMPL LINUX linux_sync_file_range +315 UNIMPL LINUX linux_tee +316 UNIMPL LINUX linux_vmsplice +317 UNIMPL LINUX linux_move_pages +318 STD LINUX { int linux_getcpu(l_uint *pcpu, l_uint *pnode, void *ptcache); } +319 UNIMPL LINUX linux_epoll_pwait +320 STD LINUX { int linux_utimensat(l_int dfd, char *fname, \ + struct l_timespec *tptr, l_int flag); } +321 UNIMPL LINUX linux_signalfd +322 UNIMPL LINUX linux_timerfd_create +323 UNIMPL LINUX linux_eventfd +324 UNIMPL LINUX linux_fallocate +325 UNIMPL LINUX linux_timerfd_settime +326 UNIMPL LINUX linux_timerfd_gettime +327 UNIMPL LINUX linux_signalfd4 +328 UNIMPL LINUX linux_eventfd2 +329 UNIMPL LINUX linux_epoll_create1 +330 UNIMPL LINUX linux_dup3 +331 UNIMPL LINUX linux_pipe2 +332 UNIMPL LINUX linux_inotify_init1 +333 UNIMPL LINUX linux_preadv +334 UNIMPL LINUX linux_pwritev +335 UNIMPL LINUX linux_rt_tgsigqueueinfo +336 UNIMPL LINUX linux_perf_event_open +337 UNIMPL LINUX linux_recvmmsg diff --git a/sys/emulation/linux/linux_emuldata.c b/sys/emulation/linux/linux_emuldata.c new file mode 100644 index 0000000000..56f57b85e2 --- /dev/null +++ b/sys/emulation/linux/linux_emuldata.c @@ -0,0 +1,310 @@ +/* + * Copyright (c) 2010 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Alex Hornung + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_compat.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "i386/linux.h" +#include "i386/linux_proto.h" +#include "linux_signal.h" +#include "linux_util.h" +#include "linux_emuldata.h" + + +struct lock emul_lock; + +struct linux_emuldata * +emuldata_get(struct proc *p) +{ + struct linux_emuldata *em; + + EMUL_LOCK(); + + em = p->p_emuldata; + + EMUL_UNLOCK(); + return (em); +} + +void +emuldata_set_robust(struct proc *p, struct linux_robust_list_head *robust_ftx) +{ + struct linux_emuldata *em; + + EMUL_LOCK(); + + em = emuldata_get(p); + KKASSERT(em != NULL); + + em->robust_futexes = robust_ftx; + EMUL_UNLOCK(); +} + +int +emuldata_init(struct proc *p, struct proc *pchild, int flags) +{ + struct linux_emuldata_shared *s; + struct linux_emuldata *em, *ep; + int error = 0; + + EMUL_LOCK(); + + em = emuldata_get(p); + + if (pchild == NULL) { + ep = NULL; + /* This is the execv* case, where a process gets overwritten */ + KKASSERT(em != NULL); + KKASSERT(em->s != NULL); + if (atomic_fetchadd_int(&em->s->refs, -1) == 1) { + kfree(em->s, M_LINUX); + em->s = NULL; + } + if (em->s) + KKASSERT(em->s->refs >= 0); + + em->parent_tidptr = NULL; + em->child_tidptr = NULL; + em->clone_flags = 0; + em->clear_tid = NULL; + em->set_tls = NULL; + em->proc = p; + } else { + ep = em; + em = kmalloc(sizeof(*em), M_LINUX, M_WAITOK | M_ZERO); + } + + if (flags & LINUX_CLONE_THREAD) { + /* + * If CLONE_THREAD is set, the child is placed in the same + * thread group as the calling process. + */ + KKASSERT(ep != NULL); + em->s = ep->s; + s = em->s; + } else { + /* new thread group */ + s = kmalloc(sizeof(*s), M_LINUX, M_WAITOK | M_ZERO); + LIST_INIT(&s->threads); + if (pchild) + s->group_pid = pchild->p_pid; + else + s->group_pid = p->p_pid; + } + + if (ep != NULL) { + em->parent_tidptr = ep->parent_tidptr; + em->child_tidptr = ep->child_tidptr; +#if 0 + em->clone_flags = ep->clone_flags; +#endif + } + + em->clone_flags = flags; + + atomic_add_int(&s->refs, 1); + KKASSERT(s->refs >= 0); + em->s = s; + LIST_INSERT_HEAD(&s->threads, em, threads); + + + if (pchild != NULL) { + em->proc = pchild; + pchild->p_emuldata = em; + } + + EMUL_UNLOCK(); + return (error); +} + +/* emuldata_exit is modelled after NetBSD's */ +void +emuldata_exit(void *unused, struct proc *p) +{ + struct linux_sys_futex_args cup; + struct linux_emuldata *em; + int error = 0; + + if (__predict_true(p->p_sysent != &elf_linux_sysvec)) + return; + + release_futexes(p); + EMUL_LOCK(); + + em = emuldata_get(p); + if (em == NULL) { + EMUL_UNLOCK(); + return; + } + + /* + * Members of the thread groups others than the leader should + * exit quietely: no zombie stage, no signal. We do that by + * reparenting to init. init will collect us and nobody will + * notice what happened. + */ + if ((em->s->group_pid != p->p_pid) && + (em->clone_flags & LINUX_CLONE_THREAD)) { + p->p_sigparent = SIGCHLD; + + wakeup((caddr_t) initproc); /* kern_exit seems to do this */ + proc_reparent(p, initproc); /* XXX: might be dangerous */ + } + + if (em->clear_tid != NULL) { + int tid = 0; + copyout(&tid, em->clear_tid, sizeof(tid)); + cup.uaddr = em->clear_tid; + cup.op = LINUX_FUTEX_WAKE; + cup.val = 0x7fffffff; /* Awake everyone */ + cup.timeout = NULL; + cup.uaddr2 = NULL; + cup.val3 = 0; + error = sys_linux_sys_futex(&cup); + if (error) + kprintf("emuldata_exit futex stuff failed miserably\n"); + } + + LIST_REMOVE(em, threads); + + p->p_emuldata = NULL; + + if ((em->s->group_pid == p->p_pid) && + (em->s->flags & LINUX_LES_INEXITGROUP)) { + p->p_xstat = em->s->xstat; + } + + if (atomic_fetchadd_int(&em->s->refs, -1) == 1) { + kfree(em->s, M_LINUX); + em->s = NULL; + } + if (em->s) + KKASSERT(em->s->refs >= 0); + + EMUL_UNLOCK(); + kfree(em, M_LINUX); +} + +void +linux_proc_transition(void *unused, struct image_params *imgp) +{ + struct proc *p; + + p = imgp->proc; + if (__predict_false(imgp->proc->p_sysent == &elf_linux_sysvec && + imgp->proc->p_emuldata == NULL)) { +#ifdef LINUX_DEBUG + kprintf("timidly hello from proc_transition\n"); +#endif + emuldata_init(p, p, 0); + } +} + +static void +linux_proc_userret(void) +{ + struct proc *p = curproc; + struct linux_emuldata *em; + + em = emuldata_get(p); + KKASSERT(em != NULL); + + if (em->clone_flags & LINUX_CLONE_CHILD_SETTID) { + copyout(&p->p_pid, (int *)em->child_tidptr, + sizeof(p->p_pid)); + } + + return; +} + +void +linux_proc_fork(struct proc *p, struct proc *parent, void *child_tidptr) +{ + struct linux_emuldata *em; + + em = emuldata_get(p); + KKASSERT(em != NULL); + + if (child_tidptr != NULL) + em->child_tidptr = child_tidptr; + + /* LINUX_CLONE_CHILD_CLEARTID: clear TID in child's memory on exit() */ + if (em->clone_flags & LINUX_CLONE_CHILD_CLEARTID) + em->clear_tid = em->child_tidptr; + + if (em->clone_flags & LINUX_CLONE_CHILD_SETTID) + p->p_userret = linux_proc_userret; + + return; +} + +int +sys_linux_set_tid_address(struct linux_set_tid_address_args *args) +{ + struct linux_emuldata *em; + + EMUL_LOCK(); + + em = emuldata_get(curproc); + KKASSERT(em != NULL); + + em->clear_tid = args->tidptr; + args->sysmsg_iresult = curproc->p_pid; + + EMUL_UNLOCK(); + return 0; +} diff --git a/sys/emulation/linux/linux_emuldata.h b/sys/emulation/linux/linux_emuldata.h new file mode 100644 index 0000000000..e97611481a --- /dev/null +++ b/sys/emulation/linux/linux_emuldata.h @@ -0,0 +1,110 @@ +/* $NetBSD: linux_emuldata.h,v 1.16 2008/10/26 16:38:22 christos Exp $ */ + +/*- + * Copyright (c) 1998,2002 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Eric Haszlakiewicz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "linux_futex.h" + +#ifndef _SYS_LOCK_H +#include +#endif + +#ifndef _SYS_MPLOCK2_H +#include +#endif + +#ifndef _COMMON_LINUX_EMULDATA_H +#define _COMMON_LINUX_EMULDATA_H + +/* + * This is auxillary data the linux compat code + * needs to do its work. A pointer to it is + * stored in the emuldata field of the proc + * structure. + */ +struct linux_emuldata_shared { + void * p_break; /* Processes' idea of break */ + int refs; + pid_t group_pid; /* PID of Linux process (group of threads) */ + /* List of Linux threads (NetBSD processes) in the Linux process */ + LIST_HEAD(, linux_emuldata) threads; + int flags; /* See below */ + int xstat; /* Thread group exit code, for exit_group */ +}; + +#define LINUX_LES_INEXITGROUP 0x1 /* thread group doing exit_group() */ +#define LINUX_LES_USE_NPTL 0x2 /* Need to emulate NPTL threads */ + +struct linux_emuldata { +#if notyet + sigset_t ps_siginfo; /* Which signals have a RT handler */ +#endif + int debugreg[8]; /* GDB information for ptrace - for use, */ + /* see ../arch/i386/linux_ptrace.c */ + struct linux_emuldata_shared *s; + + void *parent_tidptr; /* Used during clone() */ + void *child_tidptr; /* Used during clone() */ + int clone_flags; /* Used during clone() */ + int flags; + void *clear_tid; /* Own TID to clear on exit */ + void *set_tls; /* Pointer to child TLS desc in user space */ + + struct linux_robust_list_head *robust_futexes; + + /* List of Linux threads (NetBSD processes) in the Linux process */ + LIST_ENTRY(linux_emuldata) threads; + struct proc *proc; /* backpointer to struct proc */ +}; + +#define EMUL_DIDKILL 0x01 + +#define LINUX_CHILD_QUIETEXIT 0x1 /* Child will have quietexit set */ +#define LINUX_QUIETEXIT 0x2 /* Quiet exit (no zombie state) */ + +#define EMUL_LOCKINIT(x) lockinit(&emul_lock, "tux_emul", 0, LK_CANRECURSE) +#define EMUL_LOCKUNINIT(x) lockuninit(&emul_lock) + +#if 0 +#define EMUL_LOCK(x) lockmgr(&emul_lock, LK_EXCLUSIVE) +#define EMUL_UNLOCK(x) lockmgr(&emul_lock, LK_RELEASE) +#endif + +#define EMUL_LOCK(x) get_mplock() +#define EMUL_UNLOCK(x) rel_mplock() + +extern struct lock emul_lock; + +struct linux_emuldata *emuldata_get(struct proc *p); +void emuldata_set_robust(struct proc *p, struct linux_robust_list_head *robust_ftx); +int emuldata_init(struct proc *p, struct proc *pchild, int flags); +void emuldata_exit(void *unused, struct proc *p); +void linux_proc_transition(void *unused, struct image_params *imgp); +void linux_proc_fork(struct proc *p, struct proc *parent, void *child_tidptr); +#endif /* !_COMMON_LINUX_EMULDATA_H */ diff --git a/sys/emulation/linux/linux_epoll.c b/sys/emulation/linux/linux_epoll.c new file mode 100644 index 0000000000..9e8cd7499c --- /dev/null +++ b/sys/emulation/linux/linux_epoll.c @@ -0,0 +1,248 @@ +/*- + * Copyright (c) 2007 Roman Divacky + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_compat.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "i386/linux.h" +#include "i386/linux_proto.h" +#include "linux_signal.h" +#include "linux_util.h" +#include "linux_epoll.h" + + +/* Create a new epoll file descriptor. */ +int +sys_linux_epoll_create(struct linux_epoll_create_args *args) +{ + struct kqueue_args k_args; + + if (args->size <= 0) + return (EINVAL); + /* args->size is unused. Linux ignores it as well. */ + + return (sys_kqueue(&k_args)); +} + +/* Structure converting function from epoll to kevent. */ +static void +linux_epoll_to_kevent(int fd, struct linux_epoll_event *event, struct kevent *kevent) +{ + int filter = 0; + int flags = kevent->flags; + + if (event->events & LINUX_EPOLLIN) + filter |= EVFILT_READ; + if (event->events & LINUX_EPOLLOUT) + filter |= EVFILT_WRITE; + if (event->events & LINUX_EPOLLPRI) + filter |= EVFILT_READ; + if (event->events & LINUX_EPOLLET) + flags |= EV_CLEAR; + if (event->events & LINUX_EPOLLONESHOT) + flags |= EV_ONESHOT; + + EV_SET(kevent, fd, filter, flags, 0, 0, NULL); +} + +/* + * Structure converting function from kevent to epoll. In a case + * this is called on error in registration we store the error in + * event->data and pick it up later in linux_epoll_ctl(). + */ +static void +linux_kevent_to_epoll(struct kevent *kevent, struct linux_epoll_event *event) +{ + if (kevent->flags & EV_ERROR) { + event->data = kevent->data; + return; + } + switch (kevent->filter) { + case EVFILT_READ: + if (kevent->data > 0) + event->events = LINUX_EPOLLIN; + event->data = kevent->ident; + break; + case EVFILT_WRITE: + if (kevent->data > 0) + event->events = LINUX_EPOLLOUT; + event->data = kevent->ident; + break; + } +} + +/* + * Copyout callback used by kevent. This converts kevent + * events to epoll events and copies them back to the + * userspace. This is also called on error on registering + * of the filter. + */ +static int +linux_kev_copyout(void *arg, struct kevent *kevp, int count) +{ + struct kevent_args *uap; + struct linux_epoll_event *eep; + int error, i; + + uap = (struct kevent_args*) arg; + + eep = kmalloc(sizeof(*eep) * count, M_TEMP, M_WAITOK | M_ZERO); + + for (i = 0; i < count; i++) { + linux_kevent_to_epoll(&kevp[i], &eep[i]); + } + + error = copyout(eep, uap->eventlist, count * sizeof(*eep)); + if (error) + uap->eventlist = (struct kevent *)((char *)uap->eventlist + count * sizeof(*eep)); + + kfree(eep, M_TEMP); + return (0); +} + +/* + * Copyin callback used by kevent. This copies already + * converted filters to the kevent internal memory. + */ +static int +linux_kev_copyin(void *arg, struct kevent *kevp, int count) +{ + struct kevent_args *uap; + + uap = (struct kevent_args*) arg; + + memcpy(kevp, uap->changelist, count * sizeof(*kevp)); + + uap->changelist += count; + + return (0); +} + +/* + * Load epoll filter, convert it to kevent filter + * and load it into kevent subsystem. + */ +int +sys_linux_epoll_ctl(struct linux_epoll_ctl_args *args) +{ + struct kevent_args k_args; + struct kevent kev; + struct linux_epoll_event le; + int error; + + error = copyin(args->event, &le, sizeof(le)); + if (error) + return (error); +#ifdef DEBUG + if (ldebug(epoll_ctl)) + kprintf(ARGS(epoll_ctl,"%i, %i, %i, %u"), args->epfd, args->op, + args->fd, le.events); +#endif + k_args.fd = args->epfd; + k_args.changelist = &kev; + /* The epoll can register only 1 filter at once. */ + k_args.nchanges = 1; + k_args.eventlist = NULL; + k_args.nevents = 0; + k_args.timeout = NULL; + + switch (args->op) { + case LINUX_EPOLL_CTL_ADD: + kev.flags = EV_ADD | EV_ENABLE; + break; + case LINUX_EPOLL_CTL_MOD: + /* TODO: DELETE && ADD maybe? */ + return (EINVAL); + break; + case LINUX_EPOLL_CTL_DEL: + kev.flags = EV_DELETE | EV_DISABLE; + break; + } + linux_epoll_to_kevent(args->fd, &le, &kev); + + error = kern_kevent(args->epfd, 1, 0, &k_args, linux_kev_copyin, + linux_kev_copyout, NULL); + /* Check if there was an error during registration. */ + if (error == 0 && k_args.sysmsg_result != 0) { + /* The copyout callback stored the error there. */ + error = le.data; + } + + return (error); +} + +/* + * Wait for a filter to be triggered on the epoll file descriptor. */ +int +sys_linux_epoll_wait(struct linux_epoll_wait_args *args) +{ + struct timespec ts; + struct kevent_args k_args; + int error; + + /* Convert from miliseconds to timespec. */ + ts.tv_sec = args->timeout / 1000000; + ts.tv_nsec = (args->timeout % 1000000) * 1000; + + k_args.fd = args->epfd; + k_args.changelist = NULL; + k_args.nchanges = 0; + /* + * We don't mind the bogus type-cast because + * our copyout function knows about this and + * handles it correctly. + */ + k_args.eventlist = (struct kevent *)args->events; + k_args.nevents = args->maxevents; + k_args.timeout = &ts; + + error = kern_kevent(args->epfd, 0, args->maxevents, &k_args, + linux_kev_copyin, linux_kev_copyout, &ts); + + /* translation? */ + return (error); +} diff --git a/sys/emulation/linux/linux_epoll.h b/sys/emulation/linux/linux_epoll.h new file mode 100644 index 0000000000..37cd3eb1f0 --- /dev/null +++ b/sys/emulation/linux/linux_epoll.h @@ -0,0 +1,55 @@ +/*- + * Copyright (c) 2007 Roman Divacky + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _LINUX_EPOLL_H_ +#define _LINUX_EPOLL_H_ + +#ifdef __amd64__ +#define EPOLL_PACKED __packed +#else +#define EPOLL_PACKED +#endif + +struct linux_epoll_event { + uint32_t events; + uint64_t data; +} EPOLL_PACKED; + +#define LINUX_EPOLLIN 0x001 +#define LINUX_EPOLLPRI 0x002 +#define LINUX_EPOLLOUT 0x004 +#define LINUX_EPOLLONESHOT (1 << 30) +#define LINUX_EPOLLET (1 << 31) + +#define LINUX_EPOLL_CTL_ADD 1 +#define LINUX_EPOLL_CTL_DEL 2 +#define LINUX_EPOLL_CTL_MOD 3 + +#define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct linux_epoll_event)) + +#endif /* !_LINUX_EPOLL_H_ */ diff --git a/sys/emulation/linux/linux_file.c b/sys/emulation/linux/linux_file.c index 16c3edea3b..bb0ba0ad77 100644 --- a/sys/emulation/linux/linux_file.c +++ b/sys/emulation/linux/linux_file.c @@ -145,6 +145,89 @@ sys_linux_open(struct linux_open_args *args) error = kern_open(&nd, flags, args->mode, &args->sysmsg_iresult); } + nlookup_done(&nd); + + if (error == 0 && !(flags & O_NOCTTY) && + SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) { + struct file *fp; + + fp = holdfp(p->p_fd, args->sysmsg_iresult, -1); + if (fp) { + if (fp->f_type == DTYPE_VNODE) { + fo_ioctl(fp, TIOCSCTTY, NULL, + td->td_ucred, NULL); + } + fdrop(fp); + } + } + rel_mplock(); +#ifdef DEBUG + if (ldebug(open)) + kprintf(LMSG("open returns error %d"), error); +#endif + linux_free_path(&path); + return error; +} + +int +sys_linux_openat(struct linux_openat_args *args) +{ + struct thread *td = curthread; + struct proc *p = td->td_proc; + struct nlookupdata nd; + struct file *fp; + char *path; + int error, flags, dfd; + + if (args->flags & LINUX_O_CREAT) { + error = linux_copyin_path(args->path, &path, + LINUX_PATH_CREATE); + } else { + error = linux_copyin_path(args->path, &path, + LINUX_PATH_EXISTS); + } + if (error) + return (error); + +#ifdef DEBUG + if (ldebug(open)) + kprintf(ARGS(open, "%s, 0x%x, 0x%x"), path, args->flags, + args->mode); +#endif + flags = 0; + if (args->flags & LINUX_O_RDONLY) + flags |= O_RDONLY; + if (args->flags & LINUX_O_WRONLY) + flags |= O_WRONLY; + if (args->flags & LINUX_O_RDWR) + flags |= O_RDWR; + if (args->flags & LINUX_O_NDELAY) + flags |= O_NONBLOCK; + if (args->flags & LINUX_O_APPEND) + flags |= O_APPEND; + if (args->flags & LINUX_O_SYNC) + flags |= O_FSYNC; + if (args->flags & LINUX_O_NONBLOCK) + flags |= O_NONBLOCK; + if (args->flags & LINUX_FASYNC) + flags |= O_ASYNC; + if (args->flags & LINUX_O_CREAT) + flags |= O_CREAT; + if (args->flags & LINUX_O_TRUNC) + flags |= O_TRUNC; + if (args->flags & LINUX_O_EXCL) + flags |= O_EXCL; + if (args->flags & LINUX_O_NOCTTY) + flags |= O_NOCTTY; + + dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; + get_mplock(); + error = nlookup_init_at(&nd, &fp, dfd, path, UIO_SYSSPACE, NLC_FOLLOW); + if (error == 0) { + error = kern_open(&nd, flags, + args->mode, &args->sysmsg_iresult); + } + nlookup_done_at(&nd, fp); if (error == 0 && !(flags & O_NOCTTY) && SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) { @@ -414,7 +497,7 @@ again: error = copyout(&linux_dirent, outp, linuxreclen); } else { if (is64bit) { - linux_dirent64.d_ino = bdp->d_ino; + linux_dirent64.d_ino = INO64TO32(bdp->d_ino); linux_dirent64.d_off = (cookiep) ? (l_off_t)*cookiep : (l_off_t)(off + reclen); @@ -557,6 +640,42 @@ sys_linux_unlink(struct linux_unlink_args *args) return(error); } +int +sys_linux_unlinkat(struct linux_unlinkat_args *args) +{ + struct nlookupdata nd; + struct file *fp; + char *path; + int dfd, error; + + if (args->flag & ~LINUX_AT_REMOVEDIR) + return (EINVAL); + + error = linux_copyin_path(args->path, &path, LINUX_PATH_EXISTS); + if (error) { + kprintf("linux_copyin_path says error = %d\n", error); + return (error); + } +#ifdef DEBUG + if (ldebug(unlink)) + kprintf(ARGS(unlink, "%s"), path); +#endif + + dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; + get_mplock(); + error = nlookup_init_at(&nd, &fp, dfd, path, UIO_SYSSPACE, 0); + if (error == 0) { + if (args->flag & LINUX_AT_REMOVEDIR) + error = kern_rmdir(&nd); + else + error = kern_unlink(&nd); + } + nlookup_done_at(&nd, fp); + rel_mplock(); + linux_free_path(&path); + return(error); +} + /* * MPALMOSTSAFE */ @@ -640,6 +759,33 @@ sys_linux_mkdir(struct linux_mkdir_args *args) return(error); } +int +sys_linux_mkdirat(struct linux_mkdirat_args *args) +{ + struct nlookupdata nd; + struct file *fp; + char *path; + int dfd, error; + + error = linux_copyin_path(args->path, &path, LINUX_PATH_CREATE); + if (error) + return (error); +#ifdef DEBUG + if (ldebug(mkdir)) + kprintf(ARGS(mkdir, "%s, %d"), path, args->mode); +#endif + dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; + get_mplock(); + error = nlookup_init_at(&nd, &fp, dfd, path, UIO_SYSSPACE, 0); + if (error == 0) + error = kern_mkdir(&nd, args->mode); + nlookup_done_at(&nd, fp); + rel_mplock(); + + linux_free_path(&path); + return(error); +} + /* * MPALMOSTSAFE */ @@ -704,6 +850,43 @@ sys_linux_rename(struct linux_rename_args *args) return(error); } +int +sys_linux_renameat(struct linux_renameat_args *args) +{ + struct nlookupdata fromnd, tond; + struct file *fp, *fp2; + char *from, *to; + int olddfd, newdfd,error; + + error = linux_copyin_path(args->from, &from, LINUX_PATH_EXISTS); + if (error) + return (error); + error = linux_copyin_path(args->to, &to, LINUX_PATH_CREATE); + if (error) { + linux_free_path(&from); + return (error); + } +#ifdef DEBUG + if (ldebug(rename)) + kprintf(ARGS(rename, "%s, %s"), from, to); +#endif + olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd; + newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd; + get_mplock(); + error = nlookup_init_at(&fromnd, &fp, olddfd, from, UIO_SYSSPACE, 0); + if (error == 0) { + error = nlookup_init_at(&tond, &fp2, newdfd, to, UIO_SYSSPACE, 0); + if (error == 0) + error = kern_rename(&fromnd, &tond); + nlookup_done_at(&tond, fp2); + } + nlookup_done_at(&fromnd, fp); + rel_mplock(); + linux_free_path(&from); + linux_free_path(&to); + return(error); +} + /* * MPALMOSTSAFE */ @@ -741,6 +924,42 @@ sys_linux_symlink(struct linux_symlink_args *args) return(error); } +int +sys_linux_symlinkat(struct linux_symlinkat_args *args) +{ + struct thread *td = curthread; + struct nlookupdata nd; + struct file *fp; + char *path, *link; + int error; + int newdfd, mode; + + error = linux_copyin_path(args->path, &path, LINUX_PATH_EXISTS); + if (error) + return (error); + error = linux_copyin_path(args->to, &link, LINUX_PATH_CREATE); + if (error) { + linux_free_path(&path); + return (error); + } +#ifdef DEBUG + if (ldebug(symlink)) + kprintf(ARGS(symlink, "%s, %s"), path, link); +#endif + newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd; + get_mplock(); + error = nlookup_init_at(&nd, &fp, newdfd, link, UIO_SYSSPACE, 0); + if (error == 0) { + mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; + error = kern_symlink(&nd, path, mode); + } + nlookup_done_at(&nd, fp); + rel_mplock(); + linux_free_path(&path); + linux_free_path(&link); + return(error); +} + /* * MPALMOSTSAFE */ @@ -771,6 +990,35 @@ sys_linux_readlink(struct linux_readlink_args *args) return(error); } +int +sys_linux_readlinkat(struct linux_readlinkat_args *args) +{ + struct nlookupdata nd; + struct file *fp; + char *path; + int dfd, error; + + error = linux_copyin_path(args->path, &path, LINUX_PATH_EXISTS); + if (error) + return (error); +#ifdef DEBUG + if (ldebug(readlink)) + kprintf(ARGS(readlink, "%s, %p, %d"), path, (void *)args->buf, + args->count); +#endif + dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; + get_mplock(); + error = nlookup_init_at(&nd, &fp, dfd, path, UIO_SYSSPACE, 0); + if (error == 0) { + error = kern_readlink(&nd, args->buf, args->count, + &args->sysmsg_iresult); + } + nlookup_done_at(&nd, fp); + rel_mplock(); + linux_free_path(&path); + return(error); +} + /* * MPALMOSTSAFE */ @@ -904,6 +1152,46 @@ sys_linux_link(struct linux_link_args *args) return(error); } +int +sys_linux_linkat(struct linux_linkat_args *args) +{ + struct nlookupdata nd, linknd; + struct file *fp, *fp2; + char *path, *link; + int olddfd, newdfd, error; + + if (args->flags != 0) + return (EINVAL); + + error = linux_copyin_path(args->path, &path, LINUX_PATH_EXISTS); + if (error) + return (error); + error = linux_copyin_path(args->to, &link, LINUX_PATH_CREATE); + if (error) { + linux_free_path(&path); + return (error); + } +#ifdef DEBUG + if (ldebug(link)) + kprintf(ARGS(link, "%s, %s"), path, link); +#endif + olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd; + newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd; + get_mplock(); + error = nlookup_init_at(&nd, &fp, olddfd, path, UIO_SYSSPACE, NLC_FOLLOW); + if (error == 0) { + error = nlookup_init_at(&linknd, &fp2, newdfd, link, UIO_SYSSPACE, 0); + if (error == 0) + error = kern_link(&nd, &linknd); + nlookup_done_at(&linknd, fp2); + } + nlookup_done_at(&nd, fp); + rel_mplock(); + linux_free_path(&path); + linux_free_path(&link); + return(error); +} + /* * MPSAFE */ @@ -1386,3 +1674,55 @@ sys_linux_lchown(struct linux_lchown_args *args) return(error); } +int +sys_linux_fchmodat(struct linux_fchmodat_args *args) +{ + struct fchmodat_args uap; + int error; + + uap.fd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; + uap.path = args->filename; + uap.mode = args->mode; + uap.flags = 0; + + error = sys_fchmodat(&uap); + + return (error); +} + +int +sys_linux_fchownat(struct linux_fchownat_args *args) +{ + struct fchownat_args uap; + int error; + + if (args->flag & ~LINUX_AT_SYMLINK_NOFOLLOW) + return (EINVAL); + + uap.fd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; + uap.path = args->filename; + uap.uid = args->uid; + uap.gid = args->gid; + uap.flags = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) == 0 ? 0 : + AT_SYMLINK_NOFOLLOW; + + error = sys_fchownat(&uap); + + return (error); +} + +int +sys_linux_faccessat(struct linux_faccessat_args *args) +{ + struct faccessat_args uap; + int error; + + uap.fd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; + uap.path = args->filename; + uap.amode = args->mode; + uap.flags = 0; + + error = sys_faccessat(&uap); + + return error; +} diff --git a/sys/emulation/linux/linux_futex.c b/sys/emulation/linux/linux_futex.c new file mode 100644 index 0000000000..c4cf129dfe --- /dev/null +++ b/sys/emulation/linux/linux_futex.c @@ -0,0 +1,856 @@ +/* $NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $ */ + +/*- + * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Emmanuel Dreyfus + * 4. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "opt_compat.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "i386/linux.h" +#include "i386/linux_proto.h" +#include "linux_signal.h" +#include "linux_util.h" +#include "linux_emuldata.h" + +MALLOC_DEFINE(M_FUTEX, "futex", "Linux futexes"); +MALLOC_DEFINE(M_FUTEX_WP, "futex wp", "Linux futexes wp"); + +struct futex; + +struct waiting_proc { + uint32_t wp_flags; + struct futex *wp_futex; + TAILQ_ENTRY(waiting_proc) wp_list; +}; + +struct futex { + struct lock f_lck; + uint32_t *f_uaddr; + uint32_t f_refcount; + LIST_ENTRY(futex) f_list; + TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc; +}; + +struct futex_list futex_list; + +#if 0 +#define FUTEX_LOCK(f) spin_lock_wr(&(f)->f_lck) +#define FUTEX_UNLOCK(f) spin_unlock_wr(&(f)->f_lck) +#define FUTEX_INIT(f) spin_init(&(f)->f_lck) +#define FUTEX_SLEEP(f, id, flag, wm, timo) ssleep((id), &(f)->f_lck, (flag), (wm), (timo)) +#endif + +#define FUTEX_LOCK(f) lockmgr(&(f)->f_lck, LK_EXCLUSIVE) +#define FUTEX_UNLOCK(f) lockmgr(&(f)->f_lck, LK_RELEASE) +#define FUTEX_INIT(f) lockinit(&(f)->f_lck, "ftlk", 0, LK_CANRECURSE) +#define FUTEX_DESTROY(f) lockuninit(&(f)->f_lck) +#define FUTEX_ASSERT_LOCKED(f) KKASSERT(lockstatus(&(f)->f_lck, curthread) == LK_EXCLUSIVE) +#define FUTEX_SLEEP(f, id, flag, wm, timo) lksleep((id), &(f)->f_lck, (flag), (wm), (timo)) + +struct lock futex_mtx; /* protects the futex list */ +#define FUTEXES_LOCK lockmgr(&futex_mtx, LK_EXCLUSIVE) +#define FUTEXES_UNLOCK lockmgr(&futex_mtx, LK_RELEASE) + +/* Debug magic to take advantage of freebsd's mess */ +#if LINUX_DEBUG +#define LINUX_CTR_PREFIX +#else +#define LINUX_CTR_PREFIX while (0) +#endif + +#define LINUX_CTR1(a,b,c) LINUX_CTR_PREFIX kprintf("linux_futex: " b "\n",c) +#define LINUX_CTR2(a,b,c,d) LINUX_CTR_PREFIX kprintf("linux_futex: " b "\n",c,d) +#define LINUX_CTR3(a,b,c,d,e) LINUX_CTR_PREFIX kprintf("linux_futex: " b "\n",c,d,e) +#define LINUX_CTR4(a,b,c,d,e,f) LINUX_CTR_PREFIX kprintf("linux_futex: " b "\n",c,d,e,f) +#define LINUX_CTR5(a,b,c,d,e,f,g) LINUX_CTR_PREFIX kprintf("linux_futex: " b "\n",c,d,e,f,g) + + + +/* flags for futex_get() */ +#define FUTEX_CREATE_WP 0x1 /* create waiting_proc */ +#define FUTEX_DONTCREATE 0x2 /* don't create futex if not exists */ +#define FUTEX_DONTEXISTS 0x4 /* return EINVAL if futex exists */ + +/* wp_flags */ +#define FUTEX_WP_REQUEUED 0x1 /* wp requeued - wp moved from wp_list + * of futex where thread sleep to wp_list + * of another futex. + */ +#define FUTEX_WP_REMOVED 0x2 /* wp is woken up and removed from futex + * wp_list to prevent double wakeup. + */ + +/* support.s */ +int futex_xchgl(int oparg, uint32_t *uaddr, int *oldval); +int futex_addl(int oparg, uint32_t *uaddr, int *oldval); +int futex_orl(int oparg, uint32_t *uaddr, int *oldval); +int futex_andl(int oparg, uint32_t *uaddr, int *oldval); +int futex_xorl(int oparg, uint32_t *uaddr, int *oldval); + +static void +futex_put(struct futex *f, struct waiting_proc *wp) +{ + FUTEX_ASSERT_LOCKED(f); + if (wp != NULL) { + if ((wp->wp_flags & FUTEX_WP_REMOVED) == 0) + TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); + kfree(wp, M_FUTEX_WP); + } + + FUTEXES_LOCK; + if (--f->f_refcount == 0) { + LIST_REMOVE(f, f_list); + FUTEXES_UNLOCK; + FUTEX_UNLOCK(f); + + LINUX_CTR2(sys_futex, "futex_put destroy uaddr %p ref %d", + f->f_uaddr, f->f_refcount); + FUTEX_DESTROY(f); + kfree(f, M_FUTEX); + return; + } + + LINUX_CTR2(sys_futex, "futex_put uaddr %p ref %d", + f->f_uaddr, f->f_refcount); + FUTEXES_UNLOCK; + FUTEX_UNLOCK(f); +} + +static int +futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags) +{ + struct futex *f, *tmpf; + + *newf = tmpf = NULL; + +retry: + FUTEXES_LOCK; + LIST_FOREACH(f, &futex_list, f_list) { + if (f->f_uaddr == uaddr) { + if (tmpf != NULL) { + FUTEX_UNLOCK(tmpf); + FUTEX_DESTROY(tmpf); + kfree(tmpf, M_FUTEX); + } + if (flags & FUTEX_DONTEXISTS) { + FUTEXES_UNLOCK; + return (EINVAL); + } + + /* + * Increment refcount of the found futex to + * prevent it from deallocation before FUTEX_LOCK() + */ + ++f->f_refcount; + FUTEXES_UNLOCK; + + FUTEX_LOCK(f); + *newf = f; + LINUX_CTR2(sys_futex, "futex_get uaddr %p ref %d", + uaddr, f->f_refcount); + return (0); + } + } + + if (flags & FUTEX_DONTCREATE) { + FUTEXES_UNLOCK; + LINUX_CTR1(sys_futex, "futex_get uaddr %p null", uaddr); + return (0); + } + + if (tmpf == NULL) { + FUTEXES_UNLOCK; + tmpf = kmalloc(sizeof(*tmpf), M_FUTEX, M_WAITOK | M_ZERO); + tmpf->f_uaddr = uaddr; + tmpf->f_refcount = 1; + FUTEX_INIT(tmpf); + TAILQ_INIT(&tmpf->f_waiting_proc); + + /* + * Lock the new futex before an insert into the futex_list + * to prevent futex usage by other. + */ + FUTEX_LOCK(tmpf); + goto retry; + } + + LIST_INSERT_HEAD(&futex_list, tmpf, f_list); + FUTEXES_UNLOCK; + + LINUX_CTR2(sys_futex, "futex_get uaddr %p ref %d new", + uaddr, tmpf->f_refcount); + *newf = tmpf; + return (0); +} + +static int +futex_get(uint32_t *uaddr, struct waiting_proc **wp, struct futex **f, + uint32_t flags) +{ + int error; + + if (flags & FUTEX_CREATE_WP) { + *wp = kmalloc(sizeof(struct waiting_proc), M_FUTEX_WP, M_WAITOK); + (*wp)->wp_flags = 0; + } + error = futex_get0(uaddr, f, flags); + if (error) { + if (flags & FUTEX_CREATE_WP) { + kfree(*wp, M_FUTEX_WP); + *wp = NULL; + } + return (error); + } + if (flags & FUTEX_CREATE_WP) { + TAILQ_INSERT_HEAD(&(*f)->f_waiting_proc, *wp, wp_list); + (*wp)->wp_futex = *f; + } + + return (error); +} + +static int +futex_sleep(struct futex *f, struct waiting_proc *wp, unsigned long timeout) +{ + int error; + + FUTEX_ASSERT_LOCKED(f); + LINUX_CTR4(sys_futex, "futex_sleep enter uaddr %p wp %p timo %ld ref %d", + f->f_uaddr, wp, timeout, f->f_refcount); + error = FUTEX_SLEEP(f, wp, PCATCH, "futex", timeout); +#if 0 + error = ssleep(wp, &f->f_lck, PCATCH, "futex", timeout); + error = sx_sleep(wp, &f->f_lck, PCATCH, "futex", timeout); +#endif + if (wp->wp_flags & FUTEX_WP_REQUEUED) { + KASSERT(f != wp->wp_futex, ("futex != wp_futex")); + LINUX_CTR5(sys_futex, "futex_sleep out error %d uaddr %p w" + " %p requeued uaddr %p ref %d", + error, f->f_uaddr, wp, wp->wp_futex->f_uaddr, + wp->wp_futex->f_refcount); + futex_put(f, NULL); + f = wp->wp_futex; + FUTEX_LOCK(f); + } + + futex_put(f, wp); + return (error); +} + +static int +futex_wake(struct futex *f, int n) +{ + struct waiting_proc *wp, *wpt; + int count = 0; + + FUTEX_ASSERT_LOCKED(f); + TAILQ_FOREACH_MUTABLE(wp, &f->f_waiting_proc, wp_list, wpt) { + LINUX_CTR3(sys_futex, "futex_wake uaddr %p wp %p ref %d", + f->f_uaddr, wp, f->f_refcount); + wp->wp_flags |= FUTEX_WP_REMOVED; + TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); + wakeup_one(wp); + if (++count == n) + break; + } + + return (count); +} + +static int +futex_requeue(struct futex *f, int n, struct futex *f2, int n2) +{ + struct waiting_proc *wp, *wpt; + int count = 0; + + FUTEX_ASSERT_LOCKED(f); + FUTEX_ASSERT_LOCKED(f2); + + TAILQ_FOREACH_MUTABLE(wp, &f->f_waiting_proc, wp_list, wpt) { + if (++count <= n) { + LINUX_CTR2(sys_futex, "futex_req_wake uaddr %p wp %p", + f->f_uaddr, wp); + wp->wp_flags |= FUTEX_WP_REMOVED; + TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); + wakeup_one(wp); + } else { + LINUX_CTR3(sys_futex, "futex_requeue uaddr %p wp %p to %p", + f->f_uaddr, wp, f2->f_uaddr); + wp->wp_flags |= FUTEX_WP_REQUEUED; + /* Move wp to wp_list of f2 futex */ + TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); + TAILQ_INSERT_HEAD(&f2->f_waiting_proc, wp, wp_list); + + /* + * Thread which sleeps on wp after waking should + * acquire f2 lock, so increment refcount of f2 to + * prevent it from premature deallocation. + */ + wp->wp_futex = f2; + FUTEXES_LOCK; + ++f2->f_refcount; + FUTEXES_UNLOCK; + if (count - n >= n2) + break; + } + } + + return (count); +} + +static int +futex_wait(struct futex *f, struct waiting_proc *wp, struct l_timespec *ts) +{ + struct l_timespec timeout = {0, 0}; + struct timeval tv = {0, 0}; + int timeout_hz; + int error; + + if (ts != NULL) { + error = copyin(ts, &timeout, sizeof(timeout)); + if (error) + return (error); + } + + tv.tv_usec = timeout.tv_sec * 1000000 + timeout.tv_nsec / 1000; + timeout_hz = tvtohz_high(&tv); + + if (timeout.tv_sec == 0 && timeout.tv_nsec == 0) + timeout_hz = 0; + + /* + * If the user process requests a non null timeout, + * make sure we do not turn it into an infinite + * timeout because timeout_hz gets null. + * + * We use a minimal timeout of 1/hz. Maybe it would + * make sense to just return ETIMEDOUT without sleeping. + */ + if (((timeout.tv_sec != 0) || (timeout.tv_nsec != 0)) && + (timeout_hz == 0)) + timeout_hz = 1; + + error = futex_sleep(f, wp, timeout_hz); + if (error == EWOULDBLOCK) + error = ETIMEDOUT; + + return (error); +} + +static int +futex_atomic_op(struct proc *p, int encoded_op, uint32_t *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret; + + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + +#ifdef DEBUG + if (ldebug(sys_futex)) + kprintf("futex_atomic_op: op = %d, cmp = %d, oparg = %x, " + "cmparg = %x, uaddr = %p\n", + op, cmp, oparg, cmparg, uaddr); +#endif + /* XXX: linux verifies access here and returns EFAULT */ + + switch (op) { + case FUTEX_OP_SET: + ret = futex_xchgl(oparg, uaddr, &oldval); + break; + case FUTEX_OP_ADD: + ret = futex_addl(oparg, uaddr, &oldval); + break; + case FUTEX_OP_OR: + ret = futex_orl(oparg, uaddr, &oldval); + break; + case FUTEX_OP_ANDN: + ret = futex_andl(~oparg, uaddr, &oldval); + break; + case FUTEX_OP_XOR: + ret = futex_xorl(oparg, uaddr, &oldval); + break; + default: + ret = -ENOSYS; + break; + } + + if (ret) + return (ret); + + switch (cmp) { + case FUTEX_OP_CMP_EQ: + return (oldval == cmparg); + case FUTEX_OP_CMP_NE: + return (oldval != cmparg); + case FUTEX_OP_CMP_LT: + return (oldval < cmparg); + case FUTEX_OP_CMP_GE: + return (oldval >= cmparg); + case FUTEX_OP_CMP_LE: + return (oldval <= cmparg); + case FUTEX_OP_CMP_GT: + return (oldval > cmparg); + default: + return (-ENOSYS); + } +} + +int +sys_linux_sys_futex(struct linux_sys_futex_args *args) +{ + int op_ret, val, ret, nrwake; + struct waiting_proc *wp; + struct futex *f, *f2; + int error = 0; + + /* + * Our implementation provides only privates futexes. Most of the apps + * should use private futexes but don't claim so. Therefore we treat + * all futexes as private by clearing the FUTEX_PRIVATE_FLAG. It works + * in most cases (ie. when futexes are not shared on file descriptor + * or between different processes.). + */ + args->op = (args->op & ~LINUX_FUTEX_PRIVATE_FLAG); + + switch (args->op) { + case LINUX_FUTEX_WAIT: + LINUX_CTR2(sys_futex, "WAIT val %d uaddr %p", + args->val, args->uaddr); +#ifdef DEBUG + if (ldebug(sys_futex)) + kprintf(ARGS(sys_futex, "futex_wait val %d uaddr %p"), + args->val, args->uaddr); +#endif + error = futex_get(args->uaddr, &wp, &f, FUTEX_CREATE_WP); + if (error) + return (error); + error = copyin(args->uaddr, &val, sizeof(val)); + if (error) { + LINUX_CTR1(sys_futex, "WAIT copyin failed %d", + error); + futex_put(f, wp); + return (error); + } + if (val != args->val) { + LINUX_CTR3(sys_futex, "WAIT uaddr %p val %d != uval %d", + args->uaddr, args->val, val); + futex_put(f, wp); + return (EWOULDBLOCK); + } + + error = futex_wait(f, wp, args->timeout); + break; + + case LINUX_FUTEX_WAKE: + + LINUX_CTR2(sys_futex, "WAKE val %d uaddr %p", + args->val, args->uaddr); + + /* + * XXX: Linux is able to cope with different addresses + * corresponding to the same mapped memory in the sleeping + * and waker process(es). + */ +#ifdef DEBUG + if (ldebug(sys_futex)) + kprintf(ARGS(sys_futex, "futex_wake val %d uaddr %p"), + args->val, args->uaddr); +#endif + error = futex_get(args->uaddr, NULL, &f, FUTEX_DONTCREATE); + if (error) + return (error); + if (f == NULL) { + args->sysmsg_iresult = 0; + return (error); + } + args->sysmsg_iresult = futex_wake(f, args->val); + futex_put(f, NULL); + break; + + case LINUX_FUTEX_CMP_REQUEUE: + + LINUX_CTR5(sys_futex, "CMP_REQUEUE uaddr %p " + "val %d val3 %d uaddr2 %p val2 %d", + args->uaddr, args->val, args->val3, args->uaddr2, + (int)(unsigned long)args->timeout); + +#ifdef DEBUG + if (ldebug(sys_futex)) + kprintf(ARGS(sys_futex, "futex_cmp_requeue uaddr %p " + "val %d val3 %d uaddr2 %p val2 %d"), + args->uaddr, args->val, args->val3, args->uaddr2, + (int)(unsigned long)args->timeout); +#endif + /* + * Linux allows this, we would not, it is an incorrect + * usage of declared ABI, so return EINVAL. + */ + if (args->uaddr == args->uaddr2) + return (EINVAL); + error = futex_get0(args->uaddr, &f, 0); + if (error) + return (error); + + /* + * To avoid deadlocks return EINVAL if second futex + * exists at this time. Otherwise create the new futex + * and ignore false positive LOR which thus happens. + * + * Glibc fall back to FUTEX_WAKE in case of any error + * returned by FUTEX_CMP_REQUEUE. + */ + error = futex_get0(args->uaddr2, &f2, FUTEX_DONTEXISTS); + if (error) { + futex_put(f, NULL); + return (error); + } + error = copyin(args->uaddr, &val, sizeof(val)); + if (error) { + LINUX_CTR1(sys_futex, "CMP_REQUEUE copyin failed %d", + error); + futex_put(f2, NULL); + futex_put(f, NULL); + return (error); + } + if (val != args->val3) { + LINUX_CTR2(sys_futex, "CMP_REQUEUE val %d != uval %d", + args->val, val); + futex_put(f2, NULL); + futex_put(f, NULL); + return (EAGAIN); + } + + nrwake = (int)(unsigned long)args->timeout; + args->sysmsg_iresult = futex_requeue(f, args->val, f2, nrwake); + futex_put(f2, NULL); + futex_put(f, NULL); + break; + + case LINUX_FUTEX_WAKE_OP: + + LINUX_CTR5(sys_futex, "WAKE_OP " + "uaddr %p op %d val %x uaddr2 %p val3 %x", + args->uaddr, args->op, args->val, + args->uaddr2, args->val3); + +#ifdef DEBUG + if (ldebug(sys_futex)) + kprintf(ARGS(sys_futex, "futex_wake_op " + "uaddr %p op %d val %x uaddr2 %p val3 %x"), + args->uaddr, args->op, args->val, + args->uaddr2, args->val3); +#endif + error = futex_get0(args->uaddr, &f, 0); + if (error) + return (error); + if (args->uaddr != args->uaddr2) + error = futex_get0(args->uaddr2, &f2, 0); + if (error) { + futex_put(f, NULL); + return (error); + } + + /* + * This function returns positive number as results and + * negative as errors + */ + op_ret = futex_atomic_op(curproc, args->val3, args->uaddr2); + + if (op_ret < 0) { + /* XXX: We don't handle the EFAULT yet. */ + if (op_ret != -EFAULT) { + if (f2 != NULL) + futex_put(f2, NULL); + futex_put(f, NULL); + return (-op_ret); + } + if (f2 != NULL) + futex_put(f2, NULL); + futex_put(f, NULL); + return (EFAULT); + } + + ret = futex_wake(f, args->val); + + if (op_ret > 0) { + op_ret = 0; + nrwake = (int)(unsigned long)args->timeout; + + if (f2 != NULL) + op_ret += futex_wake(f2, nrwake); + else + op_ret += futex_wake(f, nrwake); + ret += op_ret; + + } + if (f2 != NULL) + futex_put(f2, NULL); + futex_put(f, NULL); + args->sysmsg_iresult = ret; + break; + + case LINUX_FUTEX_LOCK_PI: + /* not yet implemented */ + return (ENOSYS); + + case LINUX_FUTEX_UNLOCK_PI: + /* not yet implemented */ + return (ENOSYS); + + case LINUX_FUTEX_TRYLOCK_PI: + /* not yet implemented */ + return (ENOSYS); + + case LINUX_FUTEX_REQUEUE: + + /* + * Glibc does not use this operation since version 2.3.3, + * as it is racy and replaced by FUTEX_CMP_REQUEUE operation. + * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when + * FUTEX_REQUEUE returned EINVAL. + */ + return (EINVAL); + + default: + kprintf("linux_sys_futex: unknown op %d\n", args->op); + return (ENOSYS); + } + + return (error); +} + +int +sys_linux_set_robust_list(struct linux_set_robust_list_args *args) +{ +#ifdef DEBUG + if (ldebug(set_robust_list)) + kprintf(ARGS(set_robust_list, "head %p len %d"), + args->head, args->len); +#endif + + if (args->len != sizeof(struct linux_robust_list_head)) + return (EINVAL); + + emuldata_set_robust(curproc, args->head); + + return (0); +} + + + +int +sys_linux_get_robust_list(struct linux_get_robust_list_args *args) +{ + struct linux_emuldata *em; + struct linux_robust_list_head empty_head; + struct linux_robust_list_head *head; + l_size_t len = sizeof(struct linux_robust_list_head); + int error = 0; + +#ifdef DEBUG + if (ldebug(get_robust_list)) + kprintf(ARGS(get_robust_list, "")); +#endif + EMUL_LOCK(); + if (args->pid == 0) { + em = emuldata_get(curproc); + KKASSERT(em != NULL); + if (em->robust_futexes == NULL) { + bzero(&empty_head, sizeof(empty_head)); + head = &empty_head; + } else { + head = em->robust_futexes; + } + } else { + struct proc *p; + + p = pfind(args->pid); + if (p == NULL) { + EMUL_UNLOCK(); + return (ESRCH); + } + + em = emuldata_get(p); + /* XXX: ptrace? p_candebug?*/ + if (priv_check(curthread, PRIV_CRED_SETUID) || + priv_check(curthread, PRIV_CRED_SETEUID)/* || + p_candebug(curproc, p) */) { + EMUL_UNLOCK(); + return (EPERM); + } + head = em->robust_futexes; + + } + EMUL_UNLOCK(); + + error = copyout(&len, args->len, sizeof(l_size_t)); + if (error) + return (EFAULT); + + error = copyout(head, args->head, sizeof(struct linux_robust_list_head)); + + return (error); +} + +static int +handle_futex_death(struct proc *p, uint32_t *uaddr, int pi) +{ + uint32_t uval, nval, mval; + struct futex *f; + int error; + +retry: + if (copyin(uaddr, &uval, 4)) + return (EFAULT); + if ((uval & FUTEX_TID_MASK) == p->p_pid) { + mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; + nval = casuword((ulong *)uaddr, uval, mval); + + if (nval == -1) + return (EFAULT); + + if (nval != uval) + goto retry; + + if (!pi && (uval & FUTEX_WAITERS)) { + error = futex_get(uaddr, NULL, &f, + FUTEX_DONTCREATE); + if (error) + return (error); + if (f != NULL) { + futex_wake(f, 1); + futex_put(f, NULL); + } + } + } + + return (0); +} + +static int +fetch_robust_entry(struct linux_robust_list **entry, + struct linux_robust_list **head, int *pi) +{ + l_ulong uentry; + + if (copyin((const void *)head, &uentry, sizeof(l_ulong))) + return (EFAULT); + + *entry = (void *)(uentry & ~1UL); + *pi = uentry & 1; + + return (0); +} + +/* This walks the list of robust futexes releasing them. */ +void +release_futexes(struct proc *p) +{ + struct linux_robust_list_head *head = NULL; + struct linux_robust_list *entry, *next_entry, *pending; + unsigned int limit = 2048, pi, next_pi, pip; + struct linux_emuldata *em; + l_long futex_offset; + int rc; + + EMUL_LOCK(); + KKASSERT(p != NULL); + em = emuldata_get(p); + KKASSERT(em != NULL); + head = em->robust_futexes; + EMUL_UNLOCK(); + + if (head == NULL) + return; + + if (fetch_robust_entry(&entry, PTRIN(&head->list.next), &pi)) + return; + + if (copyin(&head->futex_offset, &futex_offset, sizeof(futex_offset))) + return; + + if (fetch_robust_entry(&pending, PTRIN(&head->pending_list), &pip)) + return; + + while (entry != &head->list) { + rc = fetch_robust_entry(&next_entry, PTRIN(&entry->next), &next_pi); + + if (entry != pending) + if (handle_futex_death(p, (uint32_t *)entry + futex_offset, pi)) + return; + if (rc) + return; + + entry = next_entry; + pi = next_pi; + + if (!--limit) + break; + +#if 0 + /* XXX: not sure about this yield, was sched_relinquish(curthread); */ + lwkt_deschedule(curthread); + lwkt_yield(); +#endif + } + + if (pending) + handle_futex_death(p, (uint32_t *)pending + futex_offset, pip); +} diff --git a/sys/emulation/linux/linux_futex.h b/sys/emulation/linux/linux_futex.h new file mode 100644 index 0000000000..d9335ebdbf --- /dev/null +++ b/sys/emulation/linux/linux_futex.h @@ -0,0 +1,87 @@ +/* $NetBSD: linux_futex.h,v 1.3 2008/10/126 16:38:22 christos Exp $ */ + +/*- + * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Emmanuel Dreyfus + * 4. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _LINUX_FUTEX_H +#define _LINUX_FUTEX_H + +extern LIST_HEAD(futex_list, futex) futex_list; +extern struct lock futex_lock; + +#define LINUX_FUTEX_WAIT 0 +#define LINUX_FUTEX_WAKE 1 +#define LINUX_FUTEX_FD 2 /* unused */ +#define LINUX_FUTEX_REQUEUE 3 +#define LINUX_FUTEX_CMP_REQUEUE 4 +#define LINUX_FUTEX_WAKE_OP 5 + +/* XXX: what are these? netbsd doesn't have them */ +#define LINUX_FUTEX_LOCK_PI 6 +#define LINUX_FUTEX_UNLOCK_PI 7 +#define LINUX_FUTEX_TRYLOCK_PI 8 + +#define LINUX_FUTEX_PRIVATE_FLAG 128 + +#define FUTEX_OP_SET 0 /* *(int *)UADDR2 = OPARG; */ +#define FUTEX_OP_ADD 1 /* *(int *)UADDR2 += OPARG; */ +#define FUTEX_OP_OR 2 /* *(int *)UADDR2 |= OPARG; */ +#define FUTEX_OP_ANDN 3 /* *(int *)UADDR2 &= ~OPARG; */ +#define FUTEX_OP_XOR 4 /* *(int *)UADDR2 ^= OPARG; */ + +#define FUTEX_OP_OPARG_SHIFT 8 /* Use (1 << OPARG) instead of OPARG. */ + +#define FUTEX_OP_CMP_EQ 0 /* if (oldval == CMPARG) wake */ +#define FUTEX_OP_CMP_NE 1 /* if (oldval != CMPARG) wake */ +#define FUTEX_OP_CMP_LT 2 /* if (oldval < CMPARG) wake */ +#define FUTEX_OP_CMP_LE 3 /* if (oldval <= CMPARG) wake */ +#define FUTEX_OP_CMP_GT 4 /* if (oldval > CMPARG) wake */ +#define FUTEX_OP_CMP_GE 5 /* if (oldval >= CMPARG) wake */ + +#define FUTEX_WAITERS 0x80000000 +#define FUTEX_OWNER_DIED 0x40000000 +#define FUTEX_TID_MASK 0x3fffffff + + +/* robust futexes */ +struct linux_robust_list { + struct linux_robust_list *next; +}; + +struct linux_robust_list_head { + struct linux_robust_list list; + l_long futex_offset; + struct linux_robust_list *pending_list; +}; + +void release_futexes(struct proc *); +extern struct lock futex_mtx; +#endif /* !_LINUX_FUTEX_H */ diff --git a/sys/emulation/linux/linux_ioctl.c b/sys/emulation/linux/linux_ioctl.c index 39f058c563..2e0f315762 100644 --- a/sys/emulation/linux/linux_ioctl.c +++ b/sys/emulation/linux/linux_ioctl.c @@ -569,6 +569,20 @@ linux_ioctl_TCSETAF(struct file *fp, u_long cmd, u_long ocmd, caddr_t data, stru } static int +linux_ioctl_TIOCLINUX(struct file *fp, u_long cmd, u_long ocmd, caddr_t data, struct ucred *cred) +{ + switch ((u_char)*data) { + case 11: /* LINUX_TIOCLINUX_KERNMSG */ + return 0; + default: + kprintf("Unknown LINUX_TIOCLINUX: %d\n", ((u_char)*data)); + kprintf("cmd = %lu, ocmd = %lu\n", cmd, ocmd); + return 0; + } + return 0; +} + +static int linux_ioctl_TCXONC(struct file *fp, u_long cmd, u_long ocmd, caddr_t data, struct ucred *cred) { switch ((u_long)data) { @@ -1048,6 +1062,7 @@ linux_ioctl_SIOCGIFFLAGS(struct file *fp, u_long cmd, u_long ocmd, caddr_t data, char ifname[IFNAMSIZ]; l_short flags; +#if 0 if (fp->f_type != DTYPE_SOCKET) { /* XXX: I doubt this is correct because * we don't translate the ifname and @@ -1055,8 +1070,12 @@ linux_ioctl_SIOCGIFFLAGS(struct file *fp, u_long cmd, u_long ocmd, caddr_t data, */ return (fo_ioctl(fp, SIOCGIFFLAGS, data, cred, NULL)); } +#endif ifp = ifname_linux_to_bsd(ifr->ifr_name, ifname); + if (ifp == NULL) + return (EINVAL); + flags = ifp->if_flags; /* these flags have no Linux equivalent */ flags &= ~(IFF_SMART|IFF_OACTIVE|IFF_SIMPLEX| @@ -1074,6 +1093,47 @@ linux_ioctl_SIOCGIFFLAGS(struct file *fp, u_long cmd, u_long ocmd, caddr_t data, #define ARPHRD_ETHER 1 #define ARPHRD_LOOPBACK 772 +/* XXX: could implement using native ioctl, so only mapping */ +static int +linux_ioctl_SIOCGIFINDEX(struct file *fp, u_long cmd, u_long ocmd, caddr_t data, struct ucred *cred) +{ + struct l_ifreq *ifr = (struct l_ifreq *)data; + struct ifnet *ifp; + char ifname[IFNAMSIZ]; + l_int index; + + ifp = ifname_linux_to_bsd(ifr->ifr_name, ifname); + if (ifp == NULL) + return EINVAL; + +#if DEBUG + kprintf("Interface index: %d\n", ifp->if_index); +#endif + + index = ifp->if_index; + return (copyout(&index, &ifr->ifr_ifindex, sizeof(index))); +} + +static int +linux_ioctl_SIOCGIFMETRIC(struct file *fp, u_long cmd, u_long ocmd, caddr_t data, struct ucred *cred) +{ + struct l_ifreq *ifr = (struct l_ifreq *)data; + struct ifnet *ifp; + char ifname[IFNAMSIZ]; + l_int metric; + + ifp = ifname_linux_to_bsd(ifr->ifr_name, ifname); + if (ifp == NULL) + return EINVAL; + +#if DEBUG + kprintf("Interface metric: %d\n", ifp->if_metric); +#endif + + metric = ifp->if_metric; + return (copyout(&metric, &ifr->ifr_ifmetric, sizeof(metric))); +} + static int linux_ioctl_SIOGIFHWADDR(struct file *fp, u_long cmd, u_long ocmd, caddr_t data, struct ucred *cred) { @@ -1085,6 +1145,10 @@ linux_ioctl_SIOGIFHWADDR(struct file *fp, u_long cmd, u_long ocmd, caddr_t data, struct ifaddr_container *ifac; ifp = ifname_linux_to_bsd(ifr->ifr_name, ifname); + if (ifp == NULL) { + return EINVAL; + } + if (ifp->if_type == IFT_LOOP) { bzero(&ifr->ifr_hwaddr, sizeof lsa); ifr->ifr_hwaddr.sa_family = ARPHRD_LOOPBACK; @@ -1186,6 +1250,7 @@ static struct ioctl_map_range linux_ioctl_map_entries[] = { MAPPED_IOCTL_IOW(LINUX_TCSETAF, linux_ioctl_TCSETAF, struct linux_termio), MAPPED_IOCTL_IO(LINUX_TCXONC, linux_ioctl_TCXONC), MAPPED_IOCTL_IO(LINUX_TCFLSH, linux_ioctl_TCFLSH), + MAPPED_IOCTL_IO(LINUX_TIOCLINUX, linux_ioctl_TIOCLINUX), MAPPED_IOCTL_MAP(LINUX_TIOCEXCL, TIOCEXCL), MAPPED_IOCTL_MAP(LINUX_TIOCNXCL, TIOCNXCL), MAPPED_IOCTL_MAP(LINUX_TIOCGPGRP, TIOCGPGRP), @@ -1262,6 +1327,8 @@ static struct ioctl_map_range linux_ioctl_map_entries[] = { MAPPED_IOCTL_MAPF(LINUX_SIOCGIFMTU, SIOCGIFMTU, linux_ioctl_map_ifname), MAPPED_IOCTL_MAPF(LINUX_SIOCSIFMTU, SIOCSIFMTU, linux_ioctl_map_ifname), MAPPED_IOCTL_IOWR(LINUX_SIOCGIFHWADDR, linux_ioctl_SIOGIFHWADDR, struct l_ifreq), + MAPPED_IOCTL_IOR(LINUX_SIOCGIFINDEX, linux_ioctl_SIOCGIFINDEX, struct l_ifreq), + MAPPED_IOCTL_IOR(LINUX_SIOCGIFMETRIC, linux_ioctl_SIOCGIFMETRIC, struct l_ifreq), MAPPED_IOCTL_MAP(LINUX_SIOCADDMULTI, SIOCADDMULTI), MAPPED_IOCTL_MAP(LINUX_SIOCDELMULTI, SIOCDELMULTI), /* diff --git a/sys/emulation/linux/linux_ioctl.h b/sys/emulation/linux/linux_ioctl.h index 5e2cf4c918..004c9edda7 100644 --- a/sys/emulation/linux/linux_ioctl.h +++ b/sys/emulation/linux/linux_ioctl.h @@ -216,7 +216,7 @@ #define LINUX_KBD_MEDIUMRAW 2 /* - * socket + * socket (http://lxr.oss.org.cn/source/include/linux/sockios.h) */ #define LINUX_FIOSETOWN 0x8901 #define LINUX_SIOCSPGRP 0x8902 @@ -232,6 +232,7 @@ #define LINUX_SIOCGIFBRDADDR 0x8919 #define LINUX_SIOCGIFNETMASK 0x891b #define LINUX_SIOCSIFNETMASK 0x891c +#define LINUX_SIOCGIFMETRIC 0x891d /* get metric */ #define LINUX_SIOCGIFMTU 0x8921 #define LINUX_SIOCSIFMTU 0x8922 #define LINUX_SIOCSIFNAME 0x8923 @@ -239,9 +240,14 @@ #define LINUX_SIOCGIFHWADDR 0x8927 #define LINUX_SIOCADDMULTI 0x8931 #define LINUX_SIOCDELMULTI 0x8932 +#define LINUX_SIOCGIFINDEX 0x8933 /* name -> if_index mapping */ +#define LINUX_SIOCGIFCOUNT 0x8938 /* get number of devices */ +#define LINUX_SIOCGIFTXQLEN 0x8942 /* Get the tx queue length */ +#define LINUX_SIOCGIFMAP 0x8970 /* Get device parameters */ + #define LINUX_IOCTL_SOCKET_MIN LINUX_FIOSETOWN -#define LINUX_IOCTL_SOCKET_MAX LINUX_SIOCDELMULTI +#define LINUX_IOCTL_SOCKET_MAX LINUX_SIOCGIFMAP /* * Device private ioctl calls diff --git a/sys/emulation/linux/linux_ipc.c b/sys/emulation/linux/linux_ipc.c index fe47a8b52e..5a8b0df4ea 100644 --- a/sys/emulation/linux/linux_ipc.c +++ b/sys/emulation/linux/linux_ipc.c @@ -33,53 +33,39 @@ #include #include #include +#include #include #include #include +#include #include #include "linux_ipc.h" #include "linux_util.h" -struct l_seminfo { - l_int semmap; - l_int semmni; - l_int semmns; - l_int semmnu; - l_int semmsl; - l_int semopm; - l_int semume; - l_int semusz; - l_int semvmx; - l_int semaem; -}; - -struct l_shminfo { - l_int shmmax; - l_int shmmin; - l_int shmmni; - l_int shmseg; - l_int shmall; -}; - -struct l_shm_info { - l_int used_ids; - l_ulong shm_tot; /* total allocated shm */ - l_ulong shm_rss; /* total resident shm */ - l_ulong shm_swp; /* total swapped shm */ - l_ulong swap_attempts; - l_ulong swap_successes; -}; - -struct l_ipc_perm { - l_key_t key; - l_uid16_t uid; - l_gid16_t gid; - l_uid16_t cuid; - l_gid16_t cgid; - l_ushort mode; - l_ushort seq; -}; + +static void +bsd_to_linux_shminfo( struct shminfo *bpp, struct l_shminfo *lpp) +{ + lpp->shmmax = bpp->shmmax; + lpp->shmmin = bpp->shmmin; + lpp->shmmni = bpp->shmmni; + lpp->shmseg = bpp->shmseg; + lpp->shmall = bpp->shmall; +} + +#if 0 +static void +bsd_to_linux_shm_info( struct shm_info *bpp, struct l_shm_info *lpp) +{ + lpp->used_ids = bpp->used_ids ; + lpp->shm_tot = bpp->shm_tot ; + lpp->shm_rss = bpp->shm_rss ; + lpp->shm_swp = bpp->shm_swp ; + lpp->swap_attempts = bpp->swap_attempts ; + lpp->swap_successes = bpp->swap_successes ; +} +#endif /* * MPSAFE @@ -111,31 +97,6 @@ bsd_to_linux_ipc_perm(struct ipc_perm *bpp, struct l_ipc_perm *lpp) lpp->seq = bpp->seq; } -struct l_semid_ds { - struct l_ipc_perm sem_perm; - l_time_t sem_otime; - l_time_t sem_ctime; - void *sem_base; - void *sem_pending; - void *sem_pending_last; - void *undo; - l_ushort sem_nsems; -}; - -struct l_shmid_ds { - struct l_ipc_perm shm_perm; - l_int shm_segsz; - l_time_t shm_atime; - l_time_t shm_dtime; - l_time_t shm_ctime; - l_ushort shm_cpid; - l_ushort shm_lpid; - l_short shm_nattch; - l_ushort private1; - void *private2; - void *private3; -}; - /* * MPSAFE */ @@ -176,7 +137,6 @@ linux_to_bsd_shmid_ds(struct l_shmid_ds *lsp, struct shmid_ds *bsp) bsp->shm_atime = lsp->shm_atime; bsp->shm_dtime = lsp->shm_dtime; bsp->shm_ctime = lsp->shm_ctime; - bsp->shm_internal = lsp->private3; /* this goes (yet) SOS */ } /* @@ -186,14 +146,243 @@ static void bsd_to_linux_shmid_ds(struct shmid_ds *bsp, struct l_shmid_ds *lsp) { bsd_to_linux_ipc_perm(&bsp->shm_perm, &lsp->shm_perm); - lsp->shm_segsz = bsp->shm_segsz; + if (bsp->shm_segsz > INT_MAX) + lsp->shm_segsz = INT_MAX; + else + lsp->shm_segsz = bsp->shm_segsz; lsp->shm_lpid = bsp->shm_lpid; lsp->shm_cpid = bsp->shm_cpid; lsp->shm_nattch = bsp->shm_nattch; lsp->shm_atime = bsp->shm_atime; lsp->shm_dtime = bsp->shm_dtime; lsp->shm_ctime = bsp->shm_ctime; - lsp->private3 = bsp->shm_internal; /* this goes (yet) SOS */ + lsp->private3 = 0; +} + +static void +linux_to_bsd_msqid_ds(struct l_msqid_ds *lsp, struct msqid_ds *bsp) +{ + linux_to_bsd_ipc_perm(&lsp->msg_perm, &bsp->msg_perm); + bsp->msg_cbytes = lsp->msg_cbytes; + bsp->msg_qnum = lsp->msg_qnum; + bsp->msg_qbytes = lsp->msg_qbytes; + bsp->msg_lspid = lsp->msg_lspid; + bsp->msg_lrpid = lsp->msg_lrpid; + bsp->msg_stime = lsp->msg_stime; + bsp->msg_rtime = lsp->msg_rtime; + bsp->msg_ctime = lsp->msg_ctime; +} + +static void +bsd_to_linux_msqid_ds(struct msqid_ds *bsp, struct l_msqid_ds *lsp) +{ + bsd_to_linux_ipc_perm(&bsp->msg_perm, &lsp->msg_perm); + lsp->msg_cbytes = bsp->msg_cbytes; + lsp->msg_qnum = bsp->msg_qnum; + lsp->msg_qbytes = bsp->msg_qbytes; + lsp->msg_lspid = bsp->msg_lspid; + lsp->msg_lrpid = bsp->msg_lrpid; + lsp->msg_stime = bsp->msg_stime; + lsp->msg_rtime = bsp->msg_rtime; + lsp->msg_ctime = bsp->msg_ctime; +} + +static void +linux_ipc_perm_to_ipc64_perm(struct l_ipc_perm *in, struct l_ipc64_perm *out) +{ + + /* XXX: do we really need to do something here? */ + out->key = in->key; + out->uid = in->uid; + out->gid = in->gid; + out->cuid = in->cuid; + out->cgid = in->cgid; + out->mode = in->mode; + out->seq = in->seq; +} + +static int +linux_msqid_pullup(l_int ver, struct l_msqid_ds *linux_msqid, caddr_t uaddr) +{ + struct l_msqid64_ds linux_msqid64; + int error; + + if (ver == LINUX_IPC_64) { + error = copyin(uaddr, &linux_msqid64, sizeof(linux_msqid64)); + if (error != 0) + return (error); + + bzero(linux_msqid, sizeof(*linux_msqid)); + + linux_msqid->msg_perm.uid = linux_msqid64.msg_perm.uid; + linux_msqid->msg_perm.gid = linux_msqid64.msg_perm.gid; + linux_msqid->msg_perm.mode = linux_msqid64.msg_perm.mode; + + if (linux_msqid64.msg_qbytes > USHRT_MAX) + linux_msqid->msg_lqbytes = linux_msqid64.msg_qbytes; + else + linux_msqid->msg_qbytes = linux_msqid64.msg_qbytes; + } else { + error = copyin(uaddr, linux_msqid, sizeof(*linux_msqid)); + } + return (error); +} + +static int +linux_msqid_pushdown(l_int ver, struct l_msqid_ds *linux_msqid, caddr_t uaddr) +{ + struct l_msqid64_ds linux_msqid64; + + if (ver == LINUX_IPC_64) { + bzero(&linux_msqid64, sizeof(linux_msqid64)); + + linux_ipc_perm_to_ipc64_perm(&linux_msqid->msg_perm, + &linux_msqid64.msg_perm); + + linux_msqid64.msg_stime = linux_msqid->msg_stime; + linux_msqid64.msg_rtime = linux_msqid->msg_rtime; + linux_msqid64.msg_ctime = linux_msqid->msg_ctime; + + if (linux_msqid->msg_cbytes == 0) + linux_msqid64.msg_cbytes = linux_msqid->msg_lcbytes; + else + linux_msqid64.msg_cbytes = linux_msqid->msg_cbytes; + + linux_msqid64.msg_qnum = linux_msqid->msg_qnum; + + if (linux_msqid->msg_qbytes == 0) + linux_msqid64.msg_qbytes = linux_msqid->msg_lqbytes; + else + linux_msqid64.msg_qbytes = linux_msqid->msg_qbytes; + + linux_msqid64.msg_lspid = linux_msqid->msg_lspid; + linux_msqid64.msg_lrpid = linux_msqid->msg_lrpid; + + return (copyout(&linux_msqid64, uaddr, sizeof(linux_msqid64))); + } else { + return (copyout(linux_msqid, uaddr, sizeof(*linux_msqid))); + } +} + +static int +linux_semid_pullup(l_int ver, struct l_semid_ds *linux_semid, caddr_t uaddr) +{ + struct l_semid64_ds linux_semid64; + int error; + + if (ver == LINUX_IPC_64) { + error = copyin(uaddr, &linux_semid64, sizeof(linux_semid64)); + if (error != 0) + return (error); + + bzero(linux_semid, sizeof(*linux_semid)); + + linux_semid->sem_perm.uid = linux_semid64.sem_perm.uid; + linux_semid->sem_perm.gid = linux_semid64.sem_perm.gid; + linux_semid->sem_perm.mode = linux_semid64.sem_perm.mode; + } else { + error = copyin(uaddr, linux_semid, sizeof(*linux_semid)); + } + return (error); +} + +static int +linux_semid_pushdown(l_int ver, struct l_semid_ds *linux_semid, caddr_t uaddr) +{ + struct l_semid64_ds linux_semid64; + + if (ver == LINUX_IPC_64) { + bzero(&linux_semid64, sizeof(linux_semid64)); + + linux_ipc_perm_to_ipc64_perm(&linux_semid->sem_perm, + &linux_semid64.sem_perm); + + linux_semid64.sem_otime = linux_semid->sem_otime; + linux_semid64.sem_ctime = linux_semid->sem_ctime; + linux_semid64.sem_nsems = linux_semid->sem_nsems; + + return (copyout(&linux_semid64, uaddr, sizeof(linux_semid64))); + } else { + return (copyout(linux_semid, uaddr, sizeof(*linux_semid))); + } +} + +static int +linux_shmid_pullup(l_int ver, struct l_shmid_ds *linux_shmid, caddr_t uaddr) +{ + struct l_shmid64_ds linux_shmid64; + int error; + + if (ver == LINUX_IPC_64) { + error = copyin(uaddr, &linux_shmid64, sizeof(linux_shmid64)); + if (error != 0) + return (error); + + bzero(linux_shmid, sizeof(*linux_shmid)); + + linux_shmid->shm_perm.uid = linux_shmid64.shm_perm.uid; + linux_shmid->shm_perm.gid = linux_shmid64.shm_perm.gid; + linux_shmid->shm_perm.mode = linux_shmid64.shm_perm.mode; + } else { + error = copyin(uaddr, linux_shmid, sizeof(*linux_shmid)); + } + return (error); +} + +static int +linux_shmid_pushdown(l_int ver, struct l_shmid_ds *linux_shmid, caddr_t uaddr) +{ + struct l_shmid64_ds linux_shmid64; + + /* + * XXX: This is backwards and loses information in shm_nattch + * and shm_segsz. We should probably either expose the BSD + * shmid structure directly and convert it to either the + * non-64 or 64 variant directly or the code should always + * convert to the 64 variant and then truncate values into the + * non-64 variant if needed since the 64 variant has more + * precision. + */ + if (ver == LINUX_IPC_64) { + bzero(&linux_shmid64, sizeof(linux_shmid64)); + + linux_ipc_perm_to_ipc64_perm(&linux_shmid->shm_perm, + &linux_shmid64.shm_perm); + + linux_shmid64.shm_segsz = linux_shmid->shm_segsz; + linux_shmid64.shm_atime = linux_shmid->shm_atime; + linux_shmid64.shm_dtime = linux_shmid->shm_dtime; + linux_shmid64.shm_ctime = linux_shmid->shm_ctime; + linux_shmid64.shm_cpid = linux_shmid->shm_cpid; + linux_shmid64.shm_lpid = linux_shmid->shm_lpid; + linux_shmid64.shm_nattch = linux_shmid->shm_nattch; + + return (copyout(&linux_shmid64, uaddr, sizeof(linux_shmid64))); + } else { + return (copyout(linux_shmid, uaddr, sizeof(*linux_shmid))); + } +} + +static int +linux_shminfo_pushdown(l_int ver, struct l_shminfo *linux_shminfo, + caddr_t uaddr) +{ + struct l_shminfo64 linux_shminfo64; + + if (ver == LINUX_IPC_64) { + bzero(&linux_shminfo64, sizeof(linux_shminfo64)); + + linux_shminfo64.shmmax = linux_shminfo->shmmax; + linux_shminfo64.shmmin = linux_shminfo->shmmin; + linux_shminfo64.shmmni = linux_shminfo->shmmni; + linux_shminfo64.shmseg = linux_shminfo->shmseg; + linux_shminfo64.shmall = linux_shminfo->shmall; + + return (copyout(&linux_shminfo64, uaddr, + sizeof(linux_shminfo64))); + } else { + return (copyout(linux_shminfo, uaddr, sizeof(*linux_shminfo))); + } } /* @@ -256,7 +445,7 @@ linux_semctl(struct linux_semctl_args *args) bsd_args.semnum = args->semnum; bsd_args.arg = unptr; - switch (args->cmd) { + switch (args->cmd & ~LINUX_IPC_64) { case LINUX_IPC_RMID: bsd_args.cmd = IPC_RMID; break; @@ -277,15 +466,19 @@ linux_semctl(struct linux_semctl_args *args) break; case LINUX_IPC_SET: bsd_args.cmd = IPC_SET; - error = copyin((caddr_t)args->arg.buf, &linux_semid, - sizeof(linux_semid)); + error = linux_semid_pullup(args->cmd & LINUX_IPC_64, + &linux_semid, (caddr_t)args->arg.buf); if (error) return (error); unptr->buf = stackgap_alloc(&sg, sizeof(struct semid_ds)); linux_to_bsd_semid_ds(&linux_semid, unptr->buf); break; case LINUX_IPC_STAT: - bsd_args.cmd = IPC_STAT; + case LINUX_SEM_STAT: + if ((args->cmd & ~LINUX_IPC_64) == LINUX_IPC_STAT) + bsd_args.cmd = IPC_STAT; + else + bsd_args.cmd = SEM_STAT; unptr->buf = stackgap_alloc(&sg, sizeof(struct semid_ds)); error = sys___semctl(&bsd_args); if (error) @@ -293,8 +486,12 @@ linux_semctl(struct linux_semctl_args *args) args->sysmsg_result = IXSEQ_TO_IPCID(bsd_args.semid, unptr->buf->sem_perm); bsd_to_linux_semid_ds(unptr->buf, &linux_semid); - return copyout(&linux_semid, (caddr_t)args->arg.buf, - sizeof(linux_semid)); + error = linux_semid_pushdown(args->cmd & LINUX_IPC_64, + &linux_semid, (caddr_t)(args->arg.buf)); + if (error == 0) + args->sysmsg_iresult = ((args->cmd & ~LINUX_IPC_64) == SEM_STAT) + ? bsd_args.sysmsg_result : 0; + return (error); case LINUX_IPC_INFO: case LINUX_SEM_INFO: error = copyin((caddr_t)args->arg.buf, &linux_seminfo, @@ -315,11 +512,13 @@ linux_semctl(struct linux_semctl_args *args) args->sysmsg_result = seminfo.semmni; return 0; /* No need for __semctl call */ case LINUX_GETALL: - /* FALLTHROUGH */ + bsd_args.cmd = GETALL; + break; case LINUX_SETALL: - /* FALLTHROUGH */ + bsd_args.cmd = SETALL; + break; default: - uprintf("linux: 'ipc' typ=%d not implemented\n", args->cmd); + uprintf("linux: 'ipc' type=%d not implemented\n", args->cmd & ~LINUX_IPC_64); return EINVAL; } error = sys___semctl(&bsd_args); @@ -336,6 +535,8 @@ linux_msgsnd(struct linux_msgsnd_args *args) struct msgsnd_args bsd_args; int error; + if ((l_long)args->msgsz < 0 || args->msgsz > (l_long)msginfo.msgmax) + return (EINVAL); bsd_args.sysmsg_result = 0; bsd_args.msqid = args->msqid; bsd_args.msgp = args->msgp; @@ -354,7 +555,8 @@ linux_msgrcv(struct linux_msgrcv_args *args) { struct msgrcv_args bsd_args; int error; - + if ((l_long)args->msgsz < 0 || args->msgsz > (l_long)msginfo.msgmax) + return (EINVAL); bsd_args.sysmsg_result = 0; bsd_args.msqid = args->msqid; bsd_args.msgp = args->msgp; @@ -390,15 +592,87 @@ int linux_msgctl(struct linux_msgctl_args *args) { struct msgctl_args bsd_args; - int error; + struct l_msqid_ds linux_msqid; + int error, bsd_cmd; + struct msqid_ds *unptr; + caddr_t sg; + + sg = stackgap_init(); + /* Make sure the arg parameter can be copied in. */ + unptr = stackgap_alloc(&sg, sizeof(struct msqid_ds)); + bcopy(&args->buf, unptr, sizeof(struct msqid_ds)); + bsd_cmd = args->cmd & ~LINUX_IPC_64; bsd_args.sysmsg_result = 0; bsd_args.msqid = args->msqid; - bsd_args.cmd = args->cmd; - bsd_args.buf = (struct msqid_ds *)args->buf; + bsd_args.cmd = bsd_cmd; + bsd_args.buf = unptr; + switch(bsd_cmd) { + case LINUX_IPC_INFO: + case LINUX_MSG_INFO: { + struct l_msginfo linux_msginfo; + + /* + * XXX MSG_INFO uses the same data structure but returns different + * dynamic counters in msgpool, msgmap, and msgtql fields. + */ + linux_msginfo.msgpool = (long)msginfo.msgmni * + (long)msginfo.msgmnb / 1024L; /* XXX MSG_INFO. */ + linux_msginfo.msgmap = msginfo.msgmnb; /* XXX MSG_INFO. */ + linux_msginfo.msgmax = msginfo.msgmax; + linux_msginfo.msgmnb = msginfo.msgmnb; + linux_msginfo.msgmni = msginfo.msgmni; + linux_msginfo.msgssz = msginfo.msgssz; + linux_msginfo.msgtql = msginfo.msgtql; /* XXX MSG_INFO. */ + linux_msginfo.msgseg = msginfo.msgseg; + error = copyout(&linux_msginfo, PTRIN(args->buf), + sizeof(linux_msginfo)); + if (error == 0) + args->sysmsg_iresult = msginfo.msgmni; /* XXX */ + + return (error); + } + +/* + * TODO: implement this + * case LINUX_MSG_STAT: + */ + case LINUX_IPC_STAT: + /* NOTHING */ + break; + + case LINUX_IPC_SET: + error = linux_msqid_pullup(args->cmd & LINUX_IPC_64, + &linux_msqid, (caddr_t)(args->buf)); + if (error) + return (error); + linux_to_bsd_msqid_ds(&linux_msqid, unptr); + break; + + case LINUX_IPC_RMID: + /* NOTHING */ + break; + + default: + return (EINVAL); + break; + } + error = sys_msgctl(&bsd_args); + if (error != 0) + if (bsd_cmd != LINUX_IPC_RMID || error != EINVAL) + return (error); + if (bsd_cmd == LINUX_IPC_STAT) { + bsd_to_linux_msqid_ds(bsd_args.buf, &linux_msqid); + return (linux_msqid_pushdown(args->cmd & LINUX_IPC_64, + &linux_msqid, PTRIN(args->buf))); + } args->sysmsg_result = bsd_args.sysmsg_result; return ((args->cmd == LINUX_IPC_RMID && error == EINVAL) ? 0 : error); + + + + } /* @@ -463,17 +737,21 @@ linux_shmget(struct linux_shmget_args *args) /* * MPSAFE */ +extern int shm_nused; int linux_shmctl(struct linux_shmctl_args *args) { struct l_shmid_ds linux_shmid; + struct l_shminfo linux_shminfo; + struct l_shm_info linux_shm_info; struct shmctl_args bsd_args; int error; caddr_t sg = stackgap_init(); bsd_args.sysmsg_result = 0; - switch (args->cmd) { + switch (args->cmd & ~LINUX_IPC_64) { case LINUX_IPC_STAT: + case LINUX_SHM_STAT: bsd_args.shmid = args->shmid; bsd_args.cmd = IPC_STAT; bsd_args.buf = (struct shmid_ds*)stackgap_alloc(&sg, sizeof(struct shmid_ds)); @@ -481,11 +759,12 @@ linux_shmctl(struct linux_shmctl_args *args) return error; bsd_to_linux_shmid_ds(bsd_args.buf, &linux_shmid); args->sysmsg_result = bsd_args.sysmsg_result; - return copyout(&linux_shmid, (caddr_t)args->buf, sizeof(linux_shmid)); + return (linux_shmid_pushdown(args->cmd & LINUX_IPC_64, + &linux_shmid, PTRIN(args->buf))); case LINUX_IPC_SET: - if ((error = copyin((caddr_t)args->buf, &linux_shmid, - sizeof(linux_shmid)))) + if ((error = linux_shmid_pullup(args->cmd & LINUX_IPC_64, + &linux_shmid, PTRIN(args->buf)))) return error; bsd_args.buf = (struct shmid_ds*)stackgap_alloc(&sg, sizeof(struct shmid_ds)); linux_to_bsd_shmid_ds(&linux_shmid, bsd_args.buf); @@ -498,20 +777,32 @@ linux_shmctl(struct linux_shmctl_args *args) if (args->buf == NULL) bsd_args.buf = NULL; else { - if ((error = copyin((caddr_t)args->buf, &linux_shmid, - sizeof(linux_shmid)))) + if ((error = linux_shmid_pullup(args->cmd & LINUX_IPC_64, + &linux_shmid, PTRIN(args->buf)))) return error; bsd_args.buf = (struct shmid_ds*)stackgap_alloc(&sg, sizeof(struct shmid_ds)); linux_to_bsd_shmid_ds(&linux_shmid, bsd_args.buf); } break; case LINUX_IPC_INFO: - case LINUX_SHM_STAT: + bsd_to_linux_shminfo(&shminfo, &linux_shminfo); + return (linux_shminfo_pushdown(args->cmd & LINUX_IPC_64, + &linux_shminfo, PTRIN(args->buf))); + break; case LINUX_SHM_INFO: + linux_shm_info.used_ids = shm_nused; + linux_shm_info.shm_tot = 0; + linux_shm_info.shm_rss = 0; + linux_shm_info.shm_swp = 0; + linux_shm_info.swap_attempts = 0; + linux_shm_info.swap_successes = 0; + return copyout(&linux_shm_info, PTRIN(args->buf), + sizeof(struct l_shm_info)); + case LINUX_SHM_LOCK: case LINUX_SHM_UNLOCK: default: - uprintf("linux: 'ipc' typ=%d not implemented\n", args->cmd); + uprintf("linux: 'ipc' type=%d not implemented\n", args->cmd & ~LINUX_IPC_64); return EINVAL; } error = sys_shmctl(&bsd_args); diff --git a/sys/emulation/linux/linux_ipc.h b/sys/emulation/linux/linux_ipc.h index b7db664ed9..67d565d2a6 100644 --- a/sys/emulation/linux/linux_ipc.h +++ b/sys/emulation/linux/linux_ipc.h @@ -34,6 +34,18 @@ #ifdef __i386__ + +struct l_msginfo { + l_int msgpool; + l_int msgmap; + l_int msgmax; + l_int msgmnb; + l_int msgmni; + l_int msgssz; + l_int msgtql; + l_ushort msgseg; +}; + struct linux_msgctl_args { struct sysmsg sysmsg; @@ -124,6 +136,193 @@ struct linux_shmget_args l_int shmflg; }; + + +struct l_seminfo { + l_int semmap; + l_int semmni; + l_int semmns; + l_int semmnu; + l_int semmsl; + l_int semopm; + l_int semume; + l_int semusz; + l_int semvmx; + l_int semaem; +}; + + +struct l_shminfo { + l_int shmmax; + l_int shmmin; + l_int shmmni; + l_int shmseg; + l_int shmall; +}; + +struct l_shm_info { + l_int used_ids; + l_ulong shm_tot; /* total allocated shm */ + l_ulong shm_rss; /* total resident shm */ + l_ulong shm_swp; /* total swapped shm */ + l_ulong swap_attempts; + l_ulong swap_successes; +}; + + +struct l_ipc64_perm +{ + l_key_t key; + l_uid_t uid; + l_gid_t gid; + l_uid_t cuid; + l_gid_t cgid; + l_mode_t mode; + l_ushort __pad1; + l_ushort seq; + l_ushort __pad2; + l_ulong __unused1; + l_ulong __unused2; +}; + +/* + * The msqid64_ds structure for i386 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ + +struct l_msqid64_ds { + struct l_ipc64_perm msg_perm; + l_time_t msg_stime; /* last msgsnd time */ + l_ulong __unused1; + l_time_t msg_rtime; /* last msgrcv time */ + l_ulong __unused2; + l_time_t msg_ctime; /* last change time */ + l_ulong __unused3; + l_ulong msg_cbytes; /* current number of bytes on queue */ + l_ulong msg_qnum; /* number of messages in queue */ + l_ulong msg_qbytes; /* max number of bytes on queue */ + l_pid_t msg_lspid; /* pid of last msgsnd */ + l_pid_t msg_lrpid; /* last receive pid */ + l_ulong __unused4; + l_ulong __unused5; +}; + +/* + * The semid64_ds structure for i386 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ + +struct l_semid64_ds { + struct l_ipc64_perm sem_perm; /* permissions */ + l_time_t sem_otime; /* last semop time */ + l_ulong __unused1; + l_time_t sem_ctime; /* last change time */ + l_ulong __unused2; + l_ulong sem_nsems; /* no. of semaphores in array */ + l_ulong __unused3; + l_ulong __unused4; +}; + +/* + * The shmid64_ds structure for i386 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ + +struct l_shmid64_ds { + struct l_ipc64_perm shm_perm; /* operation perms */ + l_size_t shm_segsz; /* size of segment (bytes) */ + l_time_t shm_atime; /* last attach time */ + l_ulong __unused1; + l_time_t shm_dtime; /* last detach time */ + l_ulong __unused2; + l_time_t shm_ctime; /* last change time */ + l_ulong __unused3; + l_pid_t shm_cpid; /* pid of creator */ + l_pid_t shm_lpid; /* pid of last operator */ + l_ulong shm_nattch; /* no. of current attaches */ + l_ulong __unused4; + l_ulong __unused5; +}; + +struct l_shminfo64 { + l_ulong shmmax; + l_ulong shmmin; + l_ulong shmmni; + l_ulong shmseg; + l_ulong shmall; + l_ulong __unused1; + l_ulong __unused2; + l_ulong __unused3; + l_ulong __unused4; +}; + + +struct l_ipc_perm { + l_key_t key; + l_uid16_t uid; + l_gid16_t gid; + l_uid16_t cuid; + l_gid16_t cgid; + l_ushort mode; + l_ushort seq; +}; + +struct l_msqid_ds { + struct l_ipc_perm msg_perm; + l_uintptr_t msg_first; /* first message on queue,unused */ + l_uintptr_t msg_last; /* last message in queue,unused */ + l_time_t msg_stime; /* last msgsnd time */ + l_time_t msg_rtime; /* last msgrcv time */ + l_time_t msg_ctime; /* last change time */ + l_ulong msg_lcbytes; /* Reuse junk fields for 32 bit */ + l_ulong msg_lqbytes; /* ditto */ + l_ushort msg_cbytes; /* current number of bytes on queue */ + l_ushort msg_qnum; /* number of messages in queue */ + l_ushort msg_qbytes; /* max number of bytes on queue */ + l_pid_t msg_lspid; /* pid of last msgsnd */ + l_pid_t msg_lrpid; /* last receive pid */ +}; + +struct l_semid_ds { + struct l_ipc_perm sem_perm; + l_time_t sem_otime; + l_time_t sem_ctime; + void *sem_base; + void *sem_pending; + void *sem_pending_last; + void *undo; + l_ushort sem_nsems; +}; + +struct l_shmid_ds { + struct l_ipc_perm shm_perm; + l_int shm_segsz; + l_time_t shm_atime; + l_time_t shm_dtime; + l_time_t shm_ctime; + l_ushort shm_cpid; + l_ushort shm_lpid; + l_short shm_nattch; + l_ushort private1; + void *private2; + void *private3; +}; + int linux_msgctl (struct linux_msgctl_args *); int linux_msgget (struct linux_msgget_args *); int linux_msgrcv (struct linux_msgrcv_args *); @@ -137,7 +336,8 @@ int linux_shmat (struct linux_shmat_args *); int linux_shmctl (struct linux_shmctl_args *); int linux_shmdt (struct linux_shmdt_args *); int linux_shmget (struct linux_shmget_args *); - +#define LINUX_MSG_INFO 12 +#define LINUX_IPC_64 0x0100 /* New version (support 32-bit UIDs, bigger */ #endif /* __i386__ */ #endif /* _LINUX_IPC_H_ */ diff --git a/sys/emulation/linux/linux_mib.c b/sys/emulation/linux/linux_mib.c index 832706299f..4ba7a16501 100644 --- a/sys/emulation/linux/linux_mib.c +++ b/sys/emulation/linux/linux_mib.c @@ -70,7 +70,7 @@ SYSCTL_PROC(_compat_linux, OID_AUTO, osname, 0, 0, linux_sysctl_osname, "A", "Linux kernel OS name"); -static char linux_osrelease[LINUX_MAX_UTSNAME] = "2.4.2"; +static char linux_osrelease[LINUX_MAX_UTSNAME] = "2.6.16"; static int linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS) diff --git a/sys/emulation/linux/linux_misc.c b/sys/emulation/linux/linux_misc.c index 799abc3141..124d8409d8 100644 --- a/sys/emulation/linux/linux_misc.c +++ b/sys/emulation/linux/linux_misc.c @@ -85,6 +85,8 @@ #include #include "linux_mib.h" #include "linux_util.h" +#include "linux_emuldata.h" +#include "i386/linux.h" #define BSD_TO_LINUX_SIGNAL(sig) \ (((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig) @@ -108,6 +110,12 @@ struct l_sysinfo { char _f[22]; /* Pads structure to 64 bytes */ }; +int +sys_linux_madvise(struct linux_madvise_args *args) +{ + return 0; +} + /* * MPALMOSTSAFE */ @@ -616,6 +624,20 @@ sys_linux_mremap(struct linux_mremap_args *args) (unsigned long)args->new_len, (unsigned long)args->flags); #endif + if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { + args->sysmsg_resultp = NULL; + return (EINVAL); + } + + /* + * Check for the page alignment. + * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. + */ + if (args->addr & PAGE_MASK) { + args->sysmsg_resultp = NULL; + return (EINVAL); + } + args->new_len = round_page(args->new_len); args->old_len = round_page(args->old_len); @@ -759,6 +781,7 @@ sys_linux_newuname(struct linux_newuname_args *args) return (copyout(&utsname, (caddr_t)args->buf, sizeof(utsname))); } +/* XXX: why would this be i386-only? most of these are wrong! */ #if defined(__i386__) struct l_utimbuf { l_time_t l_actime; @@ -804,6 +827,159 @@ cleanup: linux_free_path(&path); return (error); } + +int +sys_linux_utimes(struct linux_utimes_args *args) +{ + l_timeval ltv[2]; + struct timeval tv[2], *tvp = NULL; + struct nlookupdata nd; + char *path; + int error; + + error = linux_copyin_path(args->fname, &path, LINUX_PATH_EXISTS); + if (error) + return (error); +#ifdef DEBUG + if (ldebug(utimes)) + kprintf(ARGS(utimes, "%s, *"), path); +#endif + + if (args->tptr) { + error = copyin(args->tptr, ltv, sizeof(ltv)); + if (error) + goto cleanup; + tv[0].tv_sec = ltv[0].tv_sec; + tv[0].tv_usec = ltv[0].tv_usec; + tv[1].tv_sec = ltv[1].tv_sec; + tv[1].tv_usec = ltv[1].tv_usec; + tvp = tv; + } + get_mplock(); + error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); + if (error == 0) + error = kern_utimes(&nd, tvp); + nlookup_done(&nd); + rel_mplock(); +cleanup: + linux_free_path(&path); + return (error); +} + +int +sys_linux_futimesat(struct linux_futimesat_args *args) +{ + l_timeval ltv[2]; + struct timeval tv[2], *tvp = NULL; + struct file *fp; + struct nlookupdata nd; + char *path; + int dfd,error; + + error = linux_copyin_path(args->fname, &path, LINUX_PATH_EXISTS); + if (error) + return (error); +#ifdef DEBUG + if (ldebug(futimesat)) + kprintf(ARGS(futimesat, "%s, *"), path); +#endif + if (args->tptr) { + error = copyin(args->tptr, ltv, sizeof(ltv)); + if (error) + goto cleanup; + tv[0].tv_sec = ltv[0].tv_sec; + tv[0].tv_usec = ltv[0].tv_usec; + tv[1].tv_sec = ltv[1].tv_sec; + tv[1].tv_usec = ltv[1].tv_usec; + tvp = tv; + } + dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; + get_mplock(); + error = nlookup_init_at(&nd, &fp, dfd, path, UIO_SYSSPACE, NLC_FOLLOW); + if (error == 0) + error = kern_utimes(&nd, tvp); + nlookup_done_at(&nd, fp); + rel_mplock(); +cleanup: + linux_free_path(&path); + return (error); +} + + +int +sys_linux_utimensat(struct linux_utimensat_args *args) +{ + struct l_timespec ltv[2]; + struct timeval tv[2], *tvp = NULL; + struct file *fp; + struct nlookupdata nd; + char *path; + int dfd, flags, error = 0; + + if (args->flag & ~LINUX_AT_SYMLINK_NOFOLLOW) + return (EINVAL); + + if (args->dfd == LINUX_AT_FDCWD && args->fname == NULL) + return (EINVAL); + + if (args->fname) { + error = linux_copyin_path(args->fname, &path, LINUX_PATH_EXISTS); + if (error) + return (error); + } +#ifdef DEBUG + if (ldebug(utimensat)) + kprintf(ARGS(utimensat, "%s, *"), path); +#endif + if (args->tptr) { + error = copyin(args->tptr, ltv, sizeof(ltv)); + if (error) + goto cleanup; + + if (ltv[0].tv_sec == LINUX_UTIME_NOW) { + microtime(&tv[0]); + } else if (ltv[0].tv_sec == LINUX_UTIME_OMIT) { + /* XXX: this is not right, but will do for now */ + microtime(&tv[0]); + } else { + tv[0].tv_sec = ltv[0].tv_sec; + /* XXX: we lose precision here, as we don't have ns */ + tv[0].tv_usec = ltv[0].tv_nsec/1000; + } + if (ltv[1].tv_sec == LINUX_UTIME_NOW) { + microtime(&tv[1]); + } else if (ltv[1].tv_sec == LINUX_UTIME_OMIT) { + /* XXX: this is not right, but will do for now */ + microtime(&tv[1]); + } else { + tv[1].tv_sec = ltv[1].tv_sec; + /* XXX: we lose precision here, as we don't have ns */ + tv[1].tv_usec = ltv[1].tv_nsec/1000; + } + tvp = tv; + } + + dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; + flags = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; + + get_mplock(); + if (args->fname) { + error = nlookup_init_at(&nd, &fp, dfd, path, UIO_SYSSPACE, flags); + if (error == 0) + error = kern_utimes(&nd, tvp); + nlookup_done_at(&nd, fp); + } else { + /* Thank you, Linux, for another non-standard "feature" */ + KKASSERT(dfd != AT_FDCWD); + error = kern_futimes(dfd, tvp); + } + rel_mplock(); +cleanup: + if (args->fname) + linux_free_path(&path); + + return (error); +} #endif /* __i386__ */ #define __WCLONE 0x80000000 @@ -923,6 +1099,41 @@ sys_linux_mknod(struct linux_mknod_args *args) return(error); } +int +sys_linux_mknodat(struct linux_mknodat_args *args) +{ + struct nlookupdata nd; + struct file *fp; + char *path; + int dfd, error; + + error = linux_copyin_path(args->path, &path, LINUX_PATH_CREATE); + if (error) + return (error); +#ifdef DEBUG + if (ldebug(mknod)) + kprintf(ARGS(mknod, "%s, %d, %d"), + path, args->mode, args->dev); +#endif + get_mplock(); + dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; + error = nlookup_init_at(&nd, &fp, dfd, path, UIO_SYSSPACE, 0); + if (error == 0) { + if (args->mode & S_IFIFO) { + error = kern_mkfifo(&nd, args->mode); + } else { + error = kern_mknod(&nd, args->mode, + umajor(args->dev), + uminor(args->dev)); + } + } + nlookup_done_at(&nd, fp); + rel_mplock(); + + linux_free_path(&path); + return(error); +} + /* * UGH! This is just about the dumbest idea I've ever heard!! * @@ -1366,6 +1577,13 @@ sys_linux_sched_get_priority_min(struct linux_sched_get_priority_min_args *args) #define REBOOT_CAD_ON 0x89abcdef #define REBOOT_CAD_OFF 0 #define REBOOT_HALT 0xcdef0123 +#define REBOOT_RESTART 0x01234567 +#define REBOOT_RESTART2 0xA1B2C3D4 +#define REBOOT_POWEROFF 0x4321FEDC +#define REBOOT_MAGIC1 0xfee1dead +#define REBOOT_MAGIC2 0x28121969 +#define REBOOT_MAGIC2A 0x05121996 +#define REBOOT_MAGIC2B 0x16041998 /* * MPSAFE @@ -1380,9 +1598,33 @@ sys_linux_reboot(struct linux_reboot_args *args) if (ldebug(reboot)) kprintf(ARGS(reboot, "0x%x"), args->cmd); #endif - if (args->cmd == REBOOT_CAD_ON || args->cmd == REBOOT_CAD_OFF) - return (0); - bsd_args.opt = (args->cmd == REBOOT_HALT) ? RB_HALT : 0; + + if ((args->magic1 != REBOOT_MAGIC1) || + ((args->magic2 != REBOOT_MAGIC2) && + (args->magic2 != REBOOT_MAGIC2A) && + (args->magic2 != REBOOT_MAGIC2B))) + return EINVAL; + + switch (args->cmd) { + case REBOOT_CAD_ON: + case REBOOT_CAD_OFF: + return (priv_check(curthread, PRIV_REBOOT)); + /* NOTREACHED */ + case REBOOT_HALT: + bsd_args.opt = RB_HALT; + break; + case REBOOT_RESTART: + case REBOOT_RESTART2: + bsd_args.opt = 0; + break; + case REBOOT_POWEROFF: + bsd_args.opt = RB_POWEROFF; + break; + default: + return EINVAL; + /* NOTREACHED */ + } + bsd_args.sysmsg_result = 0; error = sys_reboot(&bsd_args); @@ -1403,15 +1645,59 @@ sys_linux_reboot(struct linux_reboot_args *args) */ /* - * MPSAFE + * MPALMOSTSAFE */ int sys_linux_getpid(struct linux_getpid_args *args) { - struct thread *td = curthread; - struct proc *p = td->td_proc; + struct linux_emuldata *em; + struct proc *p = curproc; - args->sysmsg_result = p->p_pid; + get_mplock(); + EMUL_LOCK(); + em = emuldata_get(p); + if (em == NULL) /* this should never happen */ + args->sysmsg_result = p->p_pid; + else + args->sysmsg_result = em->s->group_pid; + kprintf("curproc %s requested getpid, return pid = %d\n", curproc->p_comm, em->s->group_pid); + EMUL_UNLOCK(); + rel_mplock(); + return (0); +} + +/* + * MPALMOSTSAFE + */ +int +sys_linux_getppid(struct linux_getppid_args *args) +{ + struct linux_emuldata *em; + struct proc *parent; + struct proc *p; + + get_mplock(); + EMUL_LOCK(); + em = emuldata_get(curproc); + KKASSERT(em != NULL); + + p = pfind(em->s->group_pid); + /* We are not allowed to fail */ + if (p == NULL) + goto out; + + parent = p->p_pptr; + if (parent->p_sysent == &elf_linux_sysvec) { + em = emuldata_get(parent); + args->sysmsg_result = em->s->group_pid; + kprintf("(a) curproc %s requested getppid, return pid = %d\n", curproc->p_comm, em->s->group_pid); + } else { + args->sysmsg_result = parent->p_pid; + kprintf("(b) curproc %s requested getppid, return pid = %d\n", curproc->p_comm, em->s->group_pid); + } +out: + EMUL_UNLOCK(); + rel_mplock(); return (0); } @@ -1464,3 +1750,170 @@ linux_nosys(struct nosys_args *args) /* XXX */ return (ENOSYS); } + +int +sys_linux_mq_open(struct linux_mq_open_args *args) +{ + struct mq_open_args moa; + int error, oflag; + + oflag = 0; + if (args->oflag & LINUX_O_RDONLY) + oflag |= O_RDONLY; + if (args->oflag & LINUX_O_WRONLY) + oflag |= O_WRONLY; + if (args->oflag & LINUX_O_RDWR) + oflag |= O_RDWR; + + if (args->oflag & LINUX_O_NONBLOCK) + oflag |= O_NONBLOCK; + if (args->oflag & LINUX_O_CREAT) + oflag |= O_CREAT; + if (args->oflag & LINUX_O_EXCL) + oflag |= O_EXCL; + + moa.name = args->name; + moa.oflag = oflag; + moa.mode = args->mode; + moa.attr = args->attr; + + error = sys_mq_open(&moa); + + return (error); +} + +int +sys_linux_mq_getsetattr(struct linux_mq_getsetattr_args *args) +{ + struct mq_getattr_args gaa; + struct mq_setattr_args saa; + int error; + + gaa.mqdes = args->mqd; + gaa.mqstat = args->oattr; + + saa.mqdes = args->mqd; + saa.mqstat = args->attr; + saa.mqstat = args->oattr; + + if (args->attr != NULL) { + error = sys_mq_setattr(&saa); + } else { + error = sys_mq_getattr(&gaa); + } + + return error; +} + +/* + * Get affinity of a process. + */ +int +sys_linux_sched_getaffinity(struct linux_sched_getaffinity_args *args) +{ + cpumask_t mask; + struct proc *p; + struct lwp *lp; + int error = 0; + +#ifdef DEBUG + if (ldebug(sched_getaffinity)) + kprintf(ARGS(sched_getaffinity, "%d, %d, *"), args->pid, + args->len); +#endif + if (args->len < sizeof(cpumask_t)) + return (EINVAL); +#if 0 + if ((error = priv_check(curthread, PRIV_SCHED_CPUSET)) != 0) + return (EPERM); +#endif + /* Get the mplock to ensure that the proc is not running */ + get_mplock(); + if (args->pid == 0) { + p = curproc; + } else { + p = pfind(args->pid); + if (p == NULL) { + error = ESRCH; + goto done; + } + } + + lp = FIRST_LWP_IN_PROC(p); + /* + * XXX: if lwp_cpumask is ever changed to support more than + * 32 processors, this needs to be changed to a bcopy. + */ + mask = lp->lwp_cpumask; + if ((error = copyout(&mask, args->user_mask_ptr, sizeof(cpumask_t)))) + error = EFAULT; +done: + rel_mplock(); +#if 0 + if (error == 0) + args->sysmsg_iresult = sizeof(cpumask_t); +#endif + return (error); +} + +/* + * Set affinity of a process. + */ +int +sys_linux_sched_setaffinity(struct linux_sched_setaffinity_args *args) +{ +#ifdef DEBUG + if (ldebug(sched_setaffinity)) + kprintf(ARGS(sched_setaffinity, "%d, %d, *"), args->pid, + args->len); +#endif + /* + * From Linux man page: + * sched_setaffinity() sets the CPU affinity mask of the process + * whose ID is pid to the value specified by mask. If pid is zero, + * then the calling process is used. The argument cpusetsize is + * the length (in bytes) of the data pointed to by mask. Normally + * this argument would be specified as sizeof(cpu_set_t). + * + * If the process specified by pid is not currently running on one + * of the CPUs specified in mask, then that process is migrated to + * one of the CPUs specified in mask. + */ + /* + * About our implementation: I don't think that it is too important + * to have a working implementation, but if it was ever needed, + * the best approach would be to implement the whole mechanism + * properly in kern_usched. + * The idea has to be to change the affinity mask AND migrate the + * lwp to one of the new valid CPUs for the lwp, in case the current + * CPU isn't anymore in the affinity mask passed in. + * For now, we'll just signal success even if we didn't do anything. + */ + return 0; +} + +int +sys_linux_gettid(struct linux_gettid_args *args) +{ + args->sysmsg_iresult = curproc->p_pid; + return 0; +} + +int +sys_linux_getcpu(struct linux_getcpu_args *args) +{ + struct globaldata *gd; + l_uint node = 0; + int error; + + gd = mycpu; + error = copyout(&gd->gd_cpuid, args->pcpu, sizeof(gd->gd_cpuid)); + if (error) + return (error); + /* + * XXX: this should be the NUMA node, but since we don't implement it, + * just return 0 for it. + */ + error = copyout(&node, args->pnode, sizeof(node)); + return (error); +} diff --git a/sys/emulation/linux/linux_signal.c b/sys/emulation/linux/linux_signal.c index 043ff5fc0f..0dfe60a152 100644 --- a/sys/emulation/linux/linux_signal.c +++ b/sys/emulation/linux/linux_signal.c @@ -43,6 +43,7 @@ #include #include +#include "linux_emuldata.h" #include "linux_signal.h" #include "linux_util.h" @@ -387,7 +388,6 @@ sys_linux_kill(struct linux_kill_args *args) if (ldebug(kill)) kprintf(ARGS(kill, "%d, %d"), args->pid, args->signum); #endif - /* * Allow signal 0 as a means to check for privileges */ @@ -406,3 +406,77 @@ sys_linux_kill(struct linux_kill_args *args) return(error); } + +static int +linux_do_tkill(l_int tgid, l_int pid, l_int sig) +{ + struct linux_emuldata *em; + struct proc *p; + int error = 0; + + /* + * Allow signal 0 as a means to check for privileges + */ + if (sig < 0 || sig > LINUX_NSIG) + return (EINVAL); + + if (sig > 0 && sig <= LINUX_SIGTBLSZ) + sig = linux_to_bsd_signal[_SIG_IDX(sig)]; + + get_mplock(); + if ((p = pfind(pid)) == NULL) { + if ((p = zpfind(pid)) == NULL) { + rel_mplock(); + return (ESRCH); + } + } + + EMUL_LOCK(); + em = emuldata_get(p); + + if (em == NULL) { + EMUL_UNLOCK(); + rel_mplock(); + return (ESRCH); + } + + if (tgid > 0 && em->s->group_pid != tgid) { + EMUL_UNLOCK(); + rel_mplock(); + return (ESRCH); + } + EMUL_UNLOCK(); + + error = kern_kill(sig, pid, -1); + rel_mplock(); + + return (error); +} + +int +sys_linux_tgkill(struct linux_tgkill_args *args) +{ + +#ifdef DEBUG + if (ldebug(tgkill)) + kprintf(ARGS(tgkill, "%d, %d, %d"), args->tgid, args->pid, args->sig); +#endif + if (args->pid <= 0 || args->tgid <= 0) + return (EINVAL); + + return (linux_do_tkill(args->tgid, args->pid, args->sig)); +} + +int +sys_linux_tkill(struct linux_tkill_args *args) +{ +#ifdef DEBUG + if (ldebug(tkill)) + kprintf(ARGS(tkill, "%i, %i"), args->tid, args->sig); +#endif + if (args->tid <= 0) + return (EINVAL); + + return (linux_do_tkill(0, args->tid, args->sig)); +} + diff --git a/sys/emulation/linux/linux_socket.c b/sys/emulation/linux/linux_socket.c index 0d93fc97d4..a762e426d3 100644 --- a/sys/emulation/linux/linux_socket.c +++ b/sys/emulation/linux/linux_socket.c @@ -41,6 +41,7 @@ #include #include #include +#include #include @@ -224,6 +225,8 @@ linux_to_bsd_so_sockopt(int opt) return (SO_OOBINLINE); case LINUX_SO_LINGER: return (SO_LINGER); + case LINUX_SO_PEERCRED: + return (LOCAL_PEERCRED); } return (-1); } @@ -826,6 +829,16 @@ linux_sendmsg(struct linux_sendmsg_args *args, size_t *res) if (error) return (error); + /* + * XXX: I'm not sure atm how this relates to dragonfly, but + * just in case, I put it in. + * Ping on linux does pass 0 in controllen which is forbidden + * by FreeBSD but seems to be ok on Linux. This needs some + * checking but now it lets ping work. + */ + if (msg.msg_control && msg.msg_controllen == 0) + msg.msg_control = NULL; + /* * Conditionally copyin msg.msg_name. */ @@ -1061,6 +1074,8 @@ linux_setsockopt(struct linux_setsockopt_args *args, int *res) struct linux_setsockopt_args linux_args; struct thread *td = curthread; struct sockopt sopt; + l_timeval linux_tv; + struct timeval tv; int error, name, level; error = copyin(args, &linux_args, sizeof(linux_args)); @@ -1071,6 +1086,28 @@ linux_setsockopt(struct linux_setsockopt_args *args, int *res) switch (level) { case SOL_SOCKET: name = linux_to_bsd_so_sockopt(linux_args.optname); + switch (name) { + case SO_RCVTIMEO: + /* FALLTHROUGH */ + case SO_SNDTIMEO: + error = copyin(linux_args.optval, &linux_tv, + sizeof(linux_tv)); + if (error) + return (error); + tv.tv_sec = linux_tv.tv_sec; + tv.tv_usec = linux_tv.tv_usec; + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = level; + sopt.sopt_name = name; + sopt.sopt_valsize = sizeof(tv); + sopt.sopt_val = &tv; + sopt.sopt_td = td; + return (kern_setsockopt(linux_args.s, &sopt)); + /* NOTREACHED */ + break; + default: + break; + } break; case IPPROTO_IP: name = linux_to_bsd_ip_sockopt(linux_args.optname); @@ -1084,7 +1121,14 @@ linux_setsockopt(struct linux_setsockopt_args *args, int *res) break; } if (name == -1) + return (ENOPROTOOPT); + + if (linux_args.optlen < 0 || linux_args.optlen > SOMAXOPT_SIZE) + return (EINVAL); + if (linux_args.optval != NULL && linux_args.optlen == 0) return (EINVAL); + if (linux_args.optval == NULL && linux_args.optlen != 0) + return (EFAULT); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = level; @@ -1092,9 +1136,6 @@ linux_setsockopt(struct linux_setsockopt_args *args, int *res) sopt.sopt_valsize = linux_args.optlen; sopt.sopt_td = td; - if (sopt.sopt_valsize < 0 || sopt.sopt_valsize > SOMAXOPT_SIZE) - return (EINVAL); - if (linux_args.optval) { sopt.sopt_val = kmalloc(sopt.sopt_valsize, M_TEMP, M_WAITOK); error = copyin(linux_args.optval, sopt.sopt_val, sopt.sopt_valsize); @@ -1129,26 +1170,78 @@ linux_getsockopt(struct linux_getsockopt_args *args, int *res) struct linux_getsockopt_args linux_args; struct thread *td = curthread; struct sockopt sopt; + l_timeval linux_tv; + struct timeval tv; + struct xucred xu; + struct l_ucred lxu; int error, name, valsize, level; error = copyin(args, &linux_args, sizeof(linux_args)); if (error) return (error); - if (linux_args.optval) { + if (linux_args.optlen) { error = copyin(linux_args.optlen, &valsize, sizeof(valsize)); if (error) return (error); - if (valsize < 0 || valsize > SOMAXOPT_SIZE) - return (EINVAL); } else { valsize = 0; } + if (valsize < 0 || valsize > SOMAXOPT_SIZE) + return (EINVAL); + if (linux_args.optval != NULL && valsize == 0) + return (EFAULT); + if (linux_args.optval == NULL && valsize != 0) + return (EFAULT); + level = linux_to_bsd_sockopt_level(linux_args.level); switch (level) { case SOL_SOCKET: name = linux_to_bsd_so_sockopt(linux_args.optname); + switch (name) { + case SO_RCVTIMEO: + /* FALLTHROUGH */ + case SO_SNDTIMEO: + sopt.sopt_dir = SOPT_GET; + sopt.sopt_level = level; + sopt.sopt_name = name; + sopt.sopt_valsize = sizeof(tv); + sopt.sopt_td = td; + sopt.sopt_val = &tv; + error = kern_getsockopt(linux_args.s, &sopt); + if (error) + return (error); + linux_tv.tv_sec = tv.tv_sec; + linux_tv.tv_usec = tv.tv_usec; + return (copyout(&linux_tv, linux_args.optval, + sizeof(linux_tv))); + /* NOTREACHED */ + break; + case LOCAL_PEERCRED: + if (valsize != sizeof(lxu)) + return (EINVAL); + sopt.sopt_dir = SOPT_GET; + sopt.sopt_level = level; + sopt.sopt_name = name; + sopt.sopt_valsize = sizeof(xu); + sopt.sopt_td = td; + sopt.sopt_val = &xu; + error = kern_getsockopt(linux_args.s, &sopt); + if (error) + return (error); + /* + * XXX Use 0 for pid as the FreeBSD does not cache peer pid. + */ + lxu.pid = 0; + lxu.uid = xu.cr_uid; + lxu.gid = xu.cr_gid; + return (copyout(&lxu, linux_args.optval, sizeof(lxu))); + /* NOTREACHED */ + break; + default: + break; + } break; case IPPROTO_IP: name = linux_to_bsd_ip_sockopt(linux_args.optname); @@ -1162,7 +1255,9 @@ linux_getsockopt(struct linux_getsockopt_args *args, int *res) break; } if (name == -1) - return (EINVAL); + return (EOPNOTSUPP); + + sopt.sopt_dir = SOPT_GET; sopt.sopt_level = level; @@ -1179,8 +1274,11 @@ linux_getsockopt(struct linux_getsockopt_args *args, int *res) sopt.sopt_val = NULL; } error = kern_getsockopt(linux_args.s, &sopt); - if (error) + if (error) { + if (error == EINVAL) + error = ENOPROTOOPT; goto out; + } valsize = sopt.sopt_valsize; error = copyout(&valsize, linux_args.optlen, sizeof(valsize)); if (error) diff --git a/sys/emulation/linux/linux_socket.h b/sys/emulation/linux/linux_socket.h index 262de95707..54a40cb020 100644 --- a/sys/emulation/linux/linux_socket.h +++ b/sys/emulation/linux/linux_socket.h @@ -50,4 +50,10 @@ #define LINUX_MSG_ERRQUEUE 0x2000 #define LINUX_MSG_NOSIGNAL 0x4000 +struct l_ucred { + uint32_t pid; + uint32_t uid; + uint32_t gid; +}; + #endif /* _LINUX_SOCKET_H_ */ diff --git a/sys/emulation/linux/linux_stats.c b/sys/emulation/linux/linux_stats.c index 0d6cf9efa4..b826865ba0 100644 --- a/sys/emulation/linux/linux_stats.c +++ b/sys/emulation/linux/linux_stats.c @@ -57,9 +57,9 @@ static int newstat_copyout(struct stat *buf, void *ubuf) { struct l_newstat tbuf; - cdev_t dev; int error; + bzero(&tbuf, sizeof(tbuf)); tbuf.st_dev = uminor(buf->st_dev) | (umajor(buf->st_dev) << 8); tbuf.st_ino = INO64TO32(buf->st_ino); tbuf.st_mode = buf->st_mode; @@ -74,22 +74,6 @@ newstat_copyout(struct stat *buf, void *ubuf) tbuf.st_blksize = buf->st_blksize; tbuf.st_blocks = buf->st_blocks; - /* Lie about disk drives which are character devices - * in FreeBSD but block devices under Linux. - */ - if (S_ISCHR(tbuf.st_mode) && - (dev = udev2dev(buf->st_rdev, 0)) != NULL) { - if (dev_is_good(dev) && (dev_dflags(dev) & D_DISK)) { - tbuf.st_mode &= ~S_IFMT; - tbuf.st_mode |= S_IFBLK; - - /* XXX this may not be quite right */ - /* Map major number to 0 */ - tbuf.st_dev = uminor(buf->st_dev) & 0xf; - tbuf.st_rdev = buf->st_rdev & 0xff; - } - } - error = copyout(&tbuf, ubuf, sizeof(tbuf)); return (error); } @@ -385,7 +369,7 @@ stat64_copyout(struct stat *buf, void *ubuf) bzero(&lbuf, sizeof(lbuf)); lbuf.st_dev = uminor(buf->st_dev) | (umajor(buf->st_dev) << 8); - lbuf.st_ino = buf->st_ino; + lbuf.st_ino = INO64TO32(buf->st_ino); lbuf.st_mode = buf->st_mode; lbuf.st_nlink = buf->st_nlink; lbuf.st_uid = buf->st_uid; @@ -494,4 +478,42 @@ sys_linux_fstat64(struct linux_fstat64_args *args) return (error); } +int +sys_linux_fstatat64(struct linux_fstatat64_args *args) +{ + CACHE_MPLOCK_DECLARE; + struct nlookupdata nd; + struct file *fp; + struct stat st; + char *path; + int error, flags, dfd; + + if (args->flag & ~LINUX_AT_SYMLINK_NOFOLLOW) + return (EINVAL); + + error = linux_copyin_path(args->path, &path, LINUX_PATH_EXISTS); + if (error) + return (error); +#ifdef DEBUG + if (ldebug(fstatat64)) + kprintf(ARGS(fstatat64, "%s"), path); +#endif + kprintf(ARGS(fstatat64, "%s"), path); + dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; + flags = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; + + CACHE_GETMPLOCK1(); + error = nlookup_init_at(&nd, &fp, dfd, path, UIO_SYSSPACE, flags); + if (error == 0) { + error = kern_stat(&nd, &st); + if (error == 0) + error = stat64_copyout(&st, args->statbuf); + } + nlookup_done_at(&nd, fp); + CACHE_RELMPLOCK(); + linux_free_path(&path); + return (error); +} + + #endif /* __i386__ */ diff --git a/sys/emulation/linux/linux_time.c b/sys/emulation/linux/linux_time.c new file mode 100644 index 0000000000..e05fff2738 --- /dev/null +++ b/sys/emulation/linux/linux_time.c @@ -0,0 +1,232 @@ +/* $NetBSD: linux_time.c,v 1.14 2006/05/14 03:40:54 christos Exp $ */ + +/*- + * Copyright (c) 2001 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Emmanuel Dreyfus. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#include "opt_compat.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#ifdef __i386__ +#include +#endif + +#include + +#include +#include +#include +#include "linux_mib.h" +#include "linux_util.h" + +static void native_to_linux_timespec(struct l_timespec *, + struct timespec *); +static int linux_to_native_timespec(struct timespec *, + struct l_timespec *); +static int linux_to_native_clockid(clockid_t *, clockid_t); + +static void +native_to_linux_timespec(struct l_timespec *ltp, struct timespec *ntp) +{ + ltp->tv_sec = ntp->tv_sec; + ltp->tv_nsec = ntp->tv_nsec; +} + +static int +linux_to_native_timespec(struct timespec *ntp, struct l_timespec *ltp) +{ + if (ltp->tv_sec < 0 || ltp->tv_nsec > (l_long)999999999L) + return (EINVAL); + ntp->tv_sec = ltp->tv_sec; + ntp->tv_nsec = ltp->tv_nsec; + + return (0); +} + +static int +linux_to_native_clockid(clockid_t *n, clockid_t l) +{ + switch (l) { + case LINUX_CLOCK_REALTIME: + *n = CLOCK_REALTIME; + break; + case LINUX_CLOCK_MONOTONIC: + *n = CLOCK_MONOTONIC; + break; + case LINUX_CLOCK_PROCESS_CPUTIME_ID: + case LINUX_CLOCK_THREAD_CPUTIME_ID: + case LINUX_CLOCK_REALTIME_HR: + case LINUX_CLOCK_MONOTONIC_HR: + default: + return (EINVAL); + break; + } + + return (0); +} + +int +sys_linux_clock_gettime(struct linux_clock_gettime_args *args) +{ + struct l_timespec lts; + int error; + clockid_t nwhich = 0; /* XXX: GCC */ + struct timespec tp; + + error = linux_to_native_clockid(&nwhich, args->which); + if (error != 0) + return (error); + error = kern_clock_gettime(nwhich, &tp); + if (error != 0) + return (error); + native_to_linux_timespec(<s, &tp); + + return (copyout(<s, args->tp, sizeof lts)); +} + +int +sys_linux_clock_settime(struct linux_clock_settime_args *args) +{ + struct timespec ts; + struct l_timespec lts; + int error; + clockid_t nwhich = 0; /* XXX: GCC */ + + error = linux_to_native_clockid(&nwhich, args->which); + if (error != 0) + return (error); + error = copyin(args->tp, <s, sizeof lts); + if (error != 0) + return (error); + error = linux_to_native_timespec(&ts, <s); + if (error != 0) + return (error); + + return (kern_clock_settime(nwhich, &ts)); +} + +int +sys_linux_clock_getres(struct linux_clock_getres_args *args) +{ + struct timespec ts; + struct l_timespec lts; + int error; + clockid_t nwhich = 0; /* XXX: GCC */ + + if (args->tp == NULL) + return (0); + + error = linux_to_native_clockid(&nwhich, args->which); + if (error != 0) + return (error); + error = kern_clock_getres(nwhich, &ts); + if (error != 0) + return (error); + native_to_linux_timespec(<s, &ts); + + return (copyout(<s, args->tp, sizeof lts)); +} +extern int nanosleep1(struct timespec *rqt, struct timespec *rmt); + +int +sys_linux_clock_nanosleep(struct linux_clock_nanosleep_args *args) +{ + struct timespec *rmtp; + struct l_timespec lrqts, lrmts; + struct timespec rqts, rmts; + int error; + + if (args->flags != 0) + return (EINVAL); /* XXX deal with TIMER_ABSTIME */ + + if (args->which != LINUX_CLOCK_REALTIME) + return (EINVAL); + + error = copyin(args->rqtp, &lrqts, sizeof lrqts); + if (error != 0) + return (error); + + if (args->rmtp != NULL) + rmtp = &rmts; + else + rmtp = NULL; + + error = linux_to_native_timespec(&rqts, &lrqts); + if (error != 0) + return (error); + error = nanosleep1(&rqts, rmtp); + if (error != 0) + return (error); + + if (args->rmtp != NULL) { + native_to_linux_timespec(&lrmts, rmtp); + error = copyout(&lrmts, args->rmtp, sizeof lrmts ); + if (error != 0) + return (error); + } + + return (0); +} diff --git a/sys/emulation/linux/linux_util.c b/sys/emulation/linux/linux_util.c index 42a376aca5..cdf850750c 100644 --- a/sys/emulation/linux/linux_util.c +++ b/sys/emulation/linux/linux_util.c @@ -56,15 +56,23 @@ linux_copyin_path(char *uname, char **kname, int flags) struct vattr vat, vatroot; struct vnode *vp, *vproot; char *buf, *cp; - int error, length, dummy; + int error, length, dummy, byte; buf = (char *) kmalloc(MAXPATHLEN, M_TEMP, M_WAITOK); *kname = buf; /* + * Read a byte and see if uname is a valid address. if not, EFAULT. + */ + byte = fubyte(uname); + if (byte == -1) { + error = EFAULT; + goto done; + } + /* * Don't bother trying to translate if the path is relative. */ - if (fubyte(uname) != '/') + if (byte != '/') goto dont_translate; /* diff --git a/sys/platform/pc32/conf/files b/sys/platform/pc32/conf/files index 7630e3ebd6..17e0734fef 100644 --- a/sys/platform/pc32/conf/files +++ b/sys/platform/pc32/conf/files @@ -37,7 +37,10 @@ ukbdmap.h optional ukbd_dflt_keymap \ emulation/dragonfly12/dfbsd12_getdirentries.c nonoptional compat_df12 emulation/dragonfly12/dfbsd12_stat.c nonoptional compat_df12 # +emulation/linux/linux_emuldata.c optional compat_linux +emulation/linux/linux_epoll.c optional compat_linux emulation/linux/linux_file.c optional compat_linux +emulation/linux/linux_futex.c optional compat_linux emulation/linux/linux_getcwd.c optional compat_linux emulation/linux/linux_ioctl.c optional compat_linux emulation/linux/linux_ipc.c optional compat_linux @@ -47,6 +50,7 @@ emulation/linux/linux_signal.c optional compat_linux emulation/linux/linux_socket.c optional compat_linux emulation/linux/linux_stats.c optional compat_linux emulation/linux/linux_sysctl.c optional compat_linux +emulation/linux/linux_time.c optional compat_linux emulation/linux/linux_uid16.c optional compat_linux emulation/linux/linux_util.c optional compat_linux # @@ -244,6 +248,8 @@ emulation/linux/i386/linux_locore.s optional compat_linux \ dependency "linux_assym.h" emulation/linux/i386/linux_machdep.c optional compat_linux emulation/linux/i386/linux_ptrace.c optional compat_linux +emulation/linux/i386/linux_support.s optional compat_linux \ + dependency "linux_assym.h" emulation/linux/i386/linux_sysent.c optional compat_linux emulation/linux/i386/linux_sysvec.c optional compat_linux # -- 2.11.4.GIT