2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94
39 * $FreeBSD: src/sys/kern/sys_generic.c,v 1.55.2.10 2001/03/17 10:39:32 peter Exp $
40 * $DragonFly: src/sys/kern/sys_generic.c,v 1.46 2007/08/15 03:15:06 dillon Exp $
43 #include "opt_ktrace.h"
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/sysproto.h>
48 #include <sys/filedesc.h>
49 #include <sys/filio.h>
50 #include <sys/fcntl.h>
53 #include <sys/signalvar.h>
54 #include <sys/socketvar.h>
56 #include <sys/kernel.h>
57 #include <sys/kern_syscall.h>
58 #include <sys/malloc.h>
59 #include <sys/mapped_ioctl.h>
61 #include <sys/queue.h>
62 #include <sys/resourcevar.h>
63 #include <sys/sysctl.h>
64 #include <sys/sysent.h>
67 #include <sys/ktrace.h>
70 #include <vm/vm_page.h>
71 #include <sys/file2.h>
73 #include <machine/limits.h>
75 static MALLOC_DEFINE(M_IOCTLOPS
, "ioctlops", "ioctl data buffer");
76 static MALLOC_DEFINE(M_IOCTLMAP
, "ioctlmap", "mapped ioctl handler buffer");
77 static MALLOC_DEFINE(M_SELECT
, "select", "select() buffer");
78 MALLOC_DEFINE(M_IOV
, "iov", "large iov's");
80 static int pollscan (struct proc
*, struct pollfd
*, u_int
, int *);
81 static int selscan (struct proc
*, fd_mask
**, fd_mask
**,
83 static int dofileread(int, struct file
*, struct uio
*, int, int *);
84 static int dofilewrite(int, struct file
*, struct uio
*, int, int *);
92 sys_read(struct read_args
*uap
)
94 struct thread
*td
= curthread
;
99 aiov
.iov_base
= uap
->buf
;
100 aiov
.iov_len
= uap
->nbyte
;
101 auio
.uio_iov
= &aiov
;
103 auio
.uio_offset
= -1;
104 auio
.uio_resid
= uap
->nbyte
;
105 auio
.uio_rw
= UIO_READ
;
106 auio
.uio_segflg
= UIO_USERSPACE
;
109 if (auio
.uio_resid
< 0)
112 error
= kern_preadv(uap
->fd
, &auio
, 0, &uap
->sysmsg_result
);
117 * Positioned (Pread) read system call
122 sys_extpread(struct extpread_args
*uap
)
124 struct thread
*td
= curthread
;
130 aiov
.iov_base
= uap
->buf
;
131 aiov
.iov_len
= uap
->nbyte
;
132 auio
.uio_iov
= &aiov
;
134 auio
.uio_offset
= uap
->offset
;
135 auio
.uio_resid
= uap
->nbyte
;
136 auio
.uio_rw
= UIO_READ
;
137 auio
.uio_segflg
= UIO_USERSPACE
;
140 flags
= uap
->flags
& O_FMASK
;
141 if (uap
->offset
!= (off_t
)-1)
144 if (auio
.uio_resid
< 0)
147 error
= kern_preadv(uap
->fd
, &auio
, flags
, &uap
->sysmsg_result
);
152 * Scatter read system call.
157 sys_readv(struct readv_args
*uap
)
159 struct thread
*td
= curthread
;
161 struct iovec aiov
[UIO_SMALLIOV
], *iov
= NULL
;
164 error
= iovec_copyin(uap
->iovp
, &iov
, aiov
, uap
->iovcnt
,
169 auio
.uio_iovcnt
= uap
->iovcnt
;
170 auio
.uio_offset
= -1;
171 auio
.uio_rw
= UIO_READ
;
172 auio
.uio_segflg
= UIO_USERSPACE
;
175 error
= kern_preadv(uap
->fd
, &auio
, 0, &uap
->sysmsg_result
);
177 iovec_free(&iov
, aiov
);
183 * Scatter positioned read system call.
188 sys_extpreadv(struct extpreadv_args
*uap
)
190 struct thread
*td
= curthread
;
192 struct iovec aiov
[UIO_SMALLIOV
], *iov
= NULL
;
196 error
= iovec_copyin(uap
->iovp
, &iov
, aiov
, uap
->iovcnt
,
201 auio
.uio_iovcnt
= uap
->iovcnt
;
202 auio
.uio_offset
= uap
->offset
;
203 auio
.uio_rw
= UIO_READ
;
204 auio
.uio_segflg
= UIO_USERSPACE
;
207 flags
= uap
->flags
& O_FMASK
;
208 if (uap
->offset
!= (off_t
)-1)
211 error
= kern_preadv(uap
->fd
, &auio
, flags
, &uap
->sysmsg_result
);
213 iovec_free(&iov
, aiov
);
221 kern_preadv(int fd
, struct uio
*auio
, int flags
, int *res
)
223 struct thread
*td
= curthread
;
224 struct proc
*p
= td
->td_proc
;
230 fp
= holdfp(p
->p_fd
, fd
, FREAD
);
233 if (flags
& O_FOFFSET
&& fp
->f_type
!= DTYPE_VNODE
) {
235 } else if (auio
->uio_resid
< 0) {
238 error
= dofileread(fd
, fp
, auio
, flags
, res
);
245 * Common code for readv and preadv that reads data in
246 * from a file using the passed in uio, offset, and flags.
248 * MPALMOSTSAFE - ktrace needs help
251 dofileread(int fd
, struct file
*fp
, struct uio
*auio
, int flags
, int *res
)
253 struct thread
*td
= curthread
;
254 struct proc
*p
= td
->td_proc
;
258 struct iovec
*ktriov
= NULL
;
264 * if tracing, save a copy of iovec
266 if (KTRPOINT(td
, KTR_GENIO
)) {
267 int iovlen
= auio
->uio_iovcnt
* sizeof(struct iovec
);
269 MALLOC(ktriov
, struct iovec
*, iovlen
, M_TEMP
, M_WAITOK
);
270 bcopy((caddr_t
)auio
->uio_iov
, (caddr_t
)ktriov
, iovlen
);
274 len
= auio
->uio_resid
;
275 error
= fo_read(fp
, auio
, fp
->f_cred
, flags
);
277 if (auio
->uio_resid
!= len
&& (error
== ERESTART
||
278 error
== EINTR
|| error
== EWOULDBLOCK
))
282 if (ktriov
!= NULL
) {
284 ktruio
.uio_iov
= ktriov
;
285 ktruio
.uio_resid
= len
- auio
->uio_resid
;
287 ktrgenio(p
, fd
, UIO_READ
, &ktruio
, error
);
290 FREE(ktriov
, M_TEMP
);
294 *res
= len
- auio
->uio_resid
;
305 sys_write(struct write_args
*uap
)
307 struct thread
*td
= curthread
;
312 aiov
.iov_base
= (void *)(uintptr_t)uap
->buf
;
313 aiov
.iov_len
= uap
->nbyte
;
314 auio
.uio_iov
= &aiov
;
316 auio
.uio_offset
= -1;
317 auio
.uio_resid
= uap
->nbyte
;
318 auio
.uio_rw
= UIO_WRITE
;
319 auio
.uio_segflg
= UIO_USERSPACE
;
322 if (auio
.uio_resid
< 0)
325 error
= kern_pwritev(uap
->fd
, &auio
, 0, &uap
->sysmsg_result
);
336 sys_extpwrite(struct extpwrite_args
*uap
)
338 struct thread
*td
= curthread
;
344 aiov
.iov_base
= (void *)(uintptr_t)uap
->buf
;
345 aiov
.iov_len
= uap
->nbyte
;
346 auio
.uio_iov
= &aiov
;
348 auio
.uio_offset
= uap
->offset
;
349 auio
.uio_resid
= uap
->nbyte
;
350 auio
.uio_rw
= UIO_WRITE
;
351 auio
.uio_segflg
= UIO_USERSPACE
;
354 flags
= uap
->flags
& O_FMASK
;
355 if (uap
->offset
!= (off_t
)-1)
358 if (auio
.uio_resid
< 0)
361 error
= kern_pwritev(uap
->fd
, &auio
, flags
, &uap
->sysmsg_result
);
370 sys_writev(struct writev_args
*uap
)
372 struct thread
*td
= curthread
;
374 struct iovec aiov
[UIO_SMALLIOV
], *iov
= NULL
;
377 error
= iovec_copyin(uap
->iovp
, &iov
, aiov
, uap
->iovcnt
,
382 auio
.uio_iovcnt
= uap
->iovcnt
;
383 auio
.uio_offset
= -1;
384 auio
.uio_rw
= UIO_WRITE
;
385 auio
.uio_segflg
= UIO_USERSPACE
;
388 error
= kern_pwritev(uap
->fd
, &auio
, 0, &uap
->sysmsg_result
);
390 iovec_free(&iov
, aiov
);
396 * Gather positioned write system call
401 sys_extpwritev(struct extpwritev_args
*uap
)
403 struct thread
*td
= curthread
;
405 struct iovec aiov
[UIO_SMALLIOV
], *iov
= NULL
;
409 error
= iovec_copyin(uap
->iovp
, &iov
, aiov
, uap
->iovcnt
,
414 auio
.uio_iovcnt
= uap
->iovcnt
;
415 auio
.uio_offset
= uap
->offset
;
416 auio
.uio_rw
= UIO_WRITE
;
417 auio
.uio_segflg
= UIO_USERSPACE
;
420 flags
= uap
->flags
& O_FMASK
;
421 if (uap
->offset
!= (off_t
)-1)
424 error
= kern_pwritev(uap
->fd
, &auio
, flags
, &uap
->sysmsg_result
);
426 iovec_free(&iov
, aiov
);
434 kern_pwritev(int fd
, struct uio
*auio
, int flags
, int *res
)
436 struct thread
*td
= curthread
;
437 struct proc
*p
= td
->td_proc
;
443 fp
= holdfp(p
->p_fd
, fd
, FWRITE
);
446 else if ((flags
& O_FOFFSET
) && fp
->f_type
!= DTYPE_VNODE
) {
449 error
= dofilewrite(fd
, fp
, auio
, flags
, res
);
457 * Common code for writev and pwritev that writes data to
458 * a file using the passed in uio, offset, and flags.
460 * MPALMOSTSAFE - ktrace needs help
463 dofilewrite(int fd
, struct file
*fp
, struct uio
*auio
, int flags
, int *res
)
465 struct thread
*td
= curthread
;
466 struct lwp
*lp
= td
->td_lwp
;
467 struct proc
*p
= td
->td_proc
;
471 struct iovec
*ktriov
= NULL
;
477 * if tracing, save a copy of iovec and uio
479 if (KTRPOINT(td
, KTR_GENIO
)) {
480 int iovlen
= auio
->uio_iovcnt
* sizeof(struct iovec
);
482 MALLOC(ktriov
, struct iovec
*, iovlen
, M_TEMP
, M_WAITOK
);
483 bcopy((caddr_t
)auio
->uio_iov
, (caddr_t
)ktriov
, iovlen
);
487 len
= auio
->uio_resid
;
488 if (fp
->f_type
== DTYPE_VNODE
)
490 error
= fo_write(fp
, auio
, fp
->f_cred
, flags
);
492 if (auio
->uio_resid
!= len
&& (error
== ERESTART
||
493 error
== EINTR
|| error
== EWOULDBLOCK
))
495 /* Socket layer is responsible for issuing SIGPIPE. */
496 if (error
== EPIPE
) {
498 lwpsignal(p
, lp
, SIGPIPE
);
503 if (ktriov
!= NULL
) {
505 ktruio
.uio_iov
= ktriov
;
506 ktruio
.uio_resid
= len
- auio
->uio_resid
;
508 ktrgenio(p
, fd
, UIO_WRITE
, &ktruio
, error
);
511 FREE(ktriov
, M_TEMP
);
515 *res
= len
- auio
->uio_resid
;
525 sys_ioctl(struct ioctl_args
*uap
)
527 return(mapped_ioctl(uap
->fd
, uap
->com
, uap
->data
, NULL
));
530 struct ioctl_map_entry
{
532 struct ioctl_map_range
*cmd_ranges
;
533 LIST_ENTRY(ioctl_map_entry
) entries
;
537 * The true heart of all ioctl syscall handlers (native, emulation).
538 * If map != NULL, it will be searched for a matching entry for com,
539 * and appropriate conversions/conversion functions will be utilized.
542 mapped_ioctl(int fd
, u_long com
, caddr_t uspc_data
, struct ioctl_map
*map
)
544 struct thread
*td
= curthread
;
545 struct proc
*p
= td
->td_proc
;
548 struct ioctl_map_range
*iomc
= NULL
;
554 #define STK_PARAMS 128
556 char stkbuf
[STK_PARAMS
];
563 fp
= holdfp(p
->p_fd
, fd
, FREAD
|FWRITE
);
567 if (map
!= NULL
) { /* obey translation map */
569 struct ioctl_map_entry
*e
;
571 maskcmd
= com
& map
->mask
;
573 LIST_FOREACH(e
, &map
->mapping
, entries
) {
574 for (iomc
= e
->cmd_ranges
; iomc
->start
!= 0 ||
575 iomc
->maptocmd
!= 0 || iomc
->wrapfunc
!= NULL
||
576 iomc
->mapfunc
!= NULL
;
578 if (maskcmd
>= iomc
->start
&&
579 maskcmd
<= iomc
->end
)
583 /* Did we find a match? */
584 if (iomc
->start
!= 0 || iomc
->maptocmd
!= 0 ||
585 iomc
->wrapfunc
!= NULL
|| iomc
->mapfunc
!= NULL
)
590 (iomc
->start
== 0 && iomc
->maptocmd
== 0
591 && iomc
->wrapfunc
== NULL
&& iomc
->mapfunc
== NULL
)) {
592 kprintf("%s: 'ioctl' fd=%d, cmd=0x%lx ('%c',%d) not implemented\n",
593 map
->sys
, fd
, maskcmd
,
594 (int)((maskcmd
>> 8) & 0xff),
595 (int)(maskcmd
& 0xff));
601 * If it's a non-range one to one mapping, maptocmd should be
602 * correct. If it's a ranged one to one mapping, we pass the
603 * original value of com, and for a range mapped to a different
604 * range, we always need a mapping function to translate the
605 * ioctl to our native ioctl. Ex. 6500-65ff <-> 9500-95ff
607 if (iomc
->start
== iomc
->end
&& iomc
->maptocmd
== iomc
->maptoend
) {
608 com
= iomc
->maptocmd
;
609 } else if (iomc
->start
== iomc
->maptocmd
&& iomc
->end
== iomc
->maptoend
) {
610 if (iomc
->mapfunc
!= NULL
)
611 com
= iomc
->mapfunc(iomc
->start
, iomc
->end
,
612 iomc
->start
, iomc
->end
,
615 if (iomc
->mapfunc
!= NULL
) {
616 com
= iomc
->mapfunc(iomc
->start
, iomc
->end
,
617 iomc
->maptocmd
, iomc
->maptoend
,
620 kprintf("%s: Invalid mapping for fd=%d, cmd=%#lx ('%c',%d)\n",
621 map
->sys
, fd
, maskcmd
,
622 (int)((maskcmd
>> 8) & 0xff),
623 (int)(maskcmd
& 0xff));
632 error
= fclrfdflags(p
->p_fd
, fd
, UF_EXCLOSE
);
635 error
= fsetfdflags(p
->p_fd
, fd
, UF_EXCLOSE
);
640 * Interpret high order word to find amount of data to be
641 * copied to/from the user's address space.
643 size
= IOCPARM_LEN(com
);
644 if (size
> IOCPARM_MAX
) {
650 if (size
> sizeof (ubuf
.stkbuf
)) {
651 memp
= kmalloc(size
, M_IOCTLOPS
, M_WAITOK
);
656 if ((com
& IOC_IN
) != 0) {
658 error
= copyin(uspc_data
, data
, (u_int
)size
);
661 kfree(memp
, M_IOCTLOPS
);
665 *(caddr_t
*)data
= uspc_data
;
667 } else if ((com
& IOC_OUT
) != 0 && size
) {
669 * Zero the buffer so the user always
670 * gets back something deterministic.
673 } else if ((com
& IOC_VOID
) != 0) {
674 *(caddr_t
*)data
= uspc_data
;
679 if ((tmp
= *(int *)data
))
680 fp
->f_flag
|= FNONBLOCK
;
682 fp
->f_flag
&= ~FNONBLOCK
;
687 if ((tmp
= *(int *)data
))
688 fp
->f_flag
|= FASYNC
;
690 fp
->f_flag
&= ~FASYNC
;
691 error
= fo_ioctl(fp
, FIOASYNC
, (caddr_t
)&tmp
, cred
);
696 * If there is a override function,
697 * call it instead of directly routing the call
699 if (map
!= NULL
&& iomc
->wrapfunc
!= NULL
)
700 error
= iomc
->wrapfunc(fp
, com
, ocom
, data
, cred
);
702 error
= fo_ioctl(fp
, com
, data
, cred
);
704 * Copy any data to user, size was
705 * already set and checked above.
707 if (error
== 0 && (com
& IOC_OUT
) != 0 && size
!= 0)
708 error
= copyout(data
, uspc_data
, (u_int
)size
);
712 kfree(memp
, M_IOCTLOPS
);
719 mapped_ioctl_register_handler(struct ioctl_map_handler
*he
)
721 struct ioctl_map_entry
*ne
;
723 KKASSERT(he
!= NULL
&& he
->map
!= NULL
&& he
->cmd_ranges
!= NULL
&&
724 he
->subsys
!= NULL
&& *he
->subsys
!= '\0');
726 ne
= kmalloc(sizeof(struct ioctl_map_entry
), M_IOCTLMAP
, M_WAITOK
);
728 ne
->subsys
= he
->subsys
;
729 ne
->cmd_ranges
= he
->cmd_ranges
;
731 LIST_INSERT_HEAD(&he
->map
->mapping
, ne
, entries
);
737 mapped_ioctl_unregister_handler(struct ioctl_map_handler
*he
)
739 struct ioctl_map_entry
*ne
;
741 KKASSERT(he
!= NULL
&& he
->map
!= NULL
&& he
->cmd_ranges
!= NULL
);
743 LIST_FOREACH(ne
, &he
->map
->mapping
, entries
) {
744 if (ne
->cmd_ranges
!= he
->cmd_ranges
)
746 LIST_REMOVE(ne
, entries
);
747 kfree(ne
, M_IOCTLMAP
);
753 static int nselcoll
; /* Select collisions since boot */
755 SYSCTL_INT(_kern
, OID_AUTO
, nselcoll
, CTLFLAG_RD
, &nselcoll
, 0, "");
758 * Select system call.
761 sys_select(struct select_args
*uap
)
763 struct lwp
*lp
= curthread
->td_lwp
;
764 struct proc
*p
= curproc
;
767 * The magic 2048 here is chosen to be just enough for FD_SETSIZE
768 * infds with the new FD_SETSIZE of 1024, and more than enough for
769 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE
772 fd_mask s_selbits
[howmany(2048, NFDBITS
)];
773 fd_mask
*ibits
[3], *obits
[3], *selbits
, *sbp
;
774 struct timeval atv
, rtv
, ttv
;
775 int ncoll
, error
, timo
;
776 u_int nbufbytes
, ncpbytes
, nfdbits
;
780 if (uap
->nd
> p
->p_fd
->fd_nfiles
)
781 uap
->nd
= p
->p_fd
->fd_nfiles
; /* forgiving; slightly wrong */
784 * Allocate just enough bits for the non-null fd_sets. Use the
785 * preallocated auto buffer if possible.
787 nfdbits
= roundup(uap
->nd
, NFDBITS
);
788 ncpbytes
= nfdbits
/ NBBY
;
791 nbufbytes
+= 2 * ncpbytes
;
793 nbufbytes
+= 2 * ncpbytes
;
795 nbufbytes
+= 2 * ncpbytes
;
796 if (nbufbytes
<= sizeof s_selbits
)
797 selbits
= &s_selbits
[0];
799 selbits
= kmalloc(nbufbytes
, M_SELECT
, M_WAITOK
);
802 * Assign pointers into the bit buffers and fetch the input bits.
803 * Put the output buffers together so that they can be bzeroed
807 #define getbits(name, x) \
809 if (uap->name == NULL) \
812 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \
814 sbp += ncpbytes / sizeof *sbp; \
815 error = copyin(uap->name, ibits[x], ncpbytes); \
825 bzero(selbits
, nbufbytes
/ 2);
828 error
= copyin((caddr_t
)uap
->tv
, (caddr_t
)&atv
,
832 if (itimerfix(&atv
)) {
836 getmicrouptime(&rtv
);
837 timevaladd(&atv
, &rtv
);
845 lp
->lwp_flag
|= LWP_SELECT
;
846 error
= selscan(p
, ibits
, obits
, uap
->nd
, &uap
->sysmsg_result
);
847 if (error
|| uap
->sysmsg_result
)
849 if (atv
.tv_sec
|| atv
.tv_usec
) {
850 getmicrouptime(&rtv
);
851 if (timevalcmp(&rtv
, &atv
, >=))
854 timevalsub(&ttv
, &rtv
);
855 timo
= ttv
.tv_sec
> 24 * 60 * 60 ?
856 24 * 60 * 60 * hz
: tvtohz_high(&ttv
);
859 if ((lp
->lwp_flag
& LWP_SELECT
) == 0 || nselcoll
!= ncoll
) {
863 lp
->lwp_flag
&= ~LWP_SELECT
;
865 error
= tsleep((caddr_t
)&selwait
, PCATCH
, "select", timo
);
871 lp
->lwp_flag
&= ~LWP_SELECT
;
872 /* select is not restarted after signals... */
873 if (error
== ERESTART
)
875 if (error
== EWOULDBLOCK
)
877 #define putbits(name, x) \
878 if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \
888 if (selbits
!= &s_selbits
[0])
889 kfree(selbits
, M_SELECT
);
894 selscan(struct proc
*p
, fd_mask
**ibits
, fd_mask
**obits
, int nfd
, int *res
)
900 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */
901 static int flag
[3] = { POLLRDNORM
, POLLWRNORM
, POLLRDBAND
};
903 for (msk
= 0; msk
< 3; msk
++) {
904 if (ibits
[msk
] == NULL
)
906 for (i
= 0; i
< nfd
; i
+= NFDBITS
) {
907 bits
= ibits
[msk
][i
/NFDBITS
];
908 /* ffs(int mask) not portable, fd_mask is long */
909 for (fd
= i
; bits
&& fd
< nfd
; fd
++, bits
>>= 1) {
912 fp
= holdfp(p
->p_fd
, fd
, -1);
915 if (fo_poll(fp
, flag
[msk
], fp
->f_cred
)) {
916 obits
[msk
][(fd
)/NFDBITS
] |=
917 ((fd_mask
)1 << ((fd
) % NFDBITS
));
932 sys_poll(struct poll_args
*uap
)
935 struct pollfd smallbits
[32];
936 struct timeval atv
, rtv
, ttv
;
937 int ncoll
, error
= 0, timo
;
940 struct lwp
*lp
= curthread
->td_lwp
;
941 struct proc
*p
= curproc
;
945 * This is kinda bogus. We have fd limits, but that is not
946 * really related to the size of the pollfd array. Make sure
947 * we let the process use at least FD_SETSIZE entries and at
948 * least enough for the current limits. We want to be reasonably
949 * safe, but not overly restrictive.
951 if (nfds
> p
->p_rlimit
[RLIMIT_NOFILE
].rlim_cur
&& nfds
> FD_SETSIZE
)
953 ni
= nfds
* sizeof(struct pollfd
);
954 if (ni
> sizeof(smallbits
))
955 bits
= kmalloc(ni
, M_TEMP
, M_WAITOK
);
958 error
= copyin(uap
->fds
, bits
, ni
);
961 if (uap
->timeout
!= INFTIM
) {
962 atv
.tv_sec
= uap
->timeout
/ 1000;
963 atv
.tv_usec
= (uap
->timeout
% 1000) * 1000;
964 if (itimerfix(&atv
)) {
968 getmicrouptime(&rtv
);
969 timevaladd(&atv
, &rtv
);
977 lp
->lwp_flag
|= LWP_SELECT
;
978 error
= pollscan(p
, bits
, nfds
, &uap
->sysmsg_result
);
979 if (error
|| uap
->sysmsg_result
)
981 if (atv
.tv_sec
|| atv
.tv_usec
) {
982 getmicrouptime(&rtv
);
983 if (timevalcmp(&rtv
, &atv
, >=))
986 timevalsub(&ttv
, &rtv
);
987 timo
= ttv
.tv_sec
> 24 * 60 * 60 ?
988 24 * 60 * 60 * hz
: tvtohz_high(&ttv
);
991 if ((lp
->lwp_flag
& LWP_SELECT
) == 0 || nselcoll
!= ncoll
) {
995 lp
->lwp_flag
&= ~LWP_SELECT
;
996 error
= tsleep((caddr_t
)&selwait
, PCATCH
, "poll", timo
);
1001 lp
->lwp_flag
&= ~LWP_SELECT
;
1002 /* poll is not restarted after signals... */
1003 if (error
== ERESTART
)
1005 if (error
== EWOULDBLOCK
)
1008 error
= copyout(bits
, uap
->fds
, ni
);
1013 if (ni
> sizeof(smallbits
))
1014 kfree(bits
, M_TEMP
);
1019 pollscan(struct proc
*p
, struct pollfd
*fds
, u_int nfd
, int *res
)
1025 for (i
= 0; i
< nfd
; i
++, fds
++) {
1026 if (fds
->fd
>= p
->p_fd
->fd_nfiles
) {
1027 fds
->revents
= POLLNVAL
;
1029 } else if (fds
->fd
< 0) {
1032 fp
= holdfp(p
->p_fd
, fds
->fd
, -1);
1034 fds
->revents
= POLLNVAL
;
1038 * Note: backend also returns POLLHUP and
1039 * POLLERR if appropriate.
1041 fds
->revents
= fo_poll(fp
, fds
->events
,
1043 if (fds
->revents
!= 0)
1054 * OpenBSD poll system call.
1055 * XXX this isn't quite a true representation.. OpenBSD uses select ops.
1058 sys_openbsd_poll(struct openbsd_poll_args
*uap
)
1060 return (sys_poll((struct poll_args
*)uap
));
1065 seltrue(cdev_t dev
, int events
)
1067 return (events
& (POLLIN
| POLLOUT
| POLLRDNORM
| POLLWRNORM
));
1071 * Record a select request. A global wait must be used since a process/thread
1072 * might go away after recording its request.
1075 selrecord(struct thread
*selector
, struct selinfo
*sip
)
1078 struct lwp
*lp
= NULL
;
1080 if (selector
->td_lwp
== NULL
)
1081 panic("selrecord: thread needs a process");
1083 if (sip
->si_pid
== selector
->td_proc
->p_pid
&&
1084 sip
->si_tid
== selector
->td_lwp
->lwp_tid
)
1086 if (sip
->si_pid
&& (p
= pfind(sip
->si_pid
)))
1087 lp
= lwp_rb_tree_RB_LOOKUP(&p
->p_lwp_tree
, sip
->si_tid
);
1088 if (lp
!= NULL
&& lp
->lwp_wchan
== (caddr_t
)&selwait
) {
1089 sip
->si_flags
|= SI_COLL
;
1091 sip
->si_pid
= selector
->td_proc
->p_pid
;
1092 sip
->si_tid
= selector
->td_lwp
->lwp_tid
;
1097 * Do a wakeup when a selectable event occurs.
1100 selwakeup(struct selinfo
*sip
)
1103 struct lwp
*lp
= NULL
;
1105 if (sip
->si_pid
== 0)
1107 if (sip
->si_flags
& SI_COLL
) {
1109 sip
->si_flags
&= ~SI_COLL
;
1110 wakeup((caddr_t
)&selwait
); /* YYY fixable */
1112 p
= pfind(sip
->si_pid
);
1116 lp
= lwp_rb_tree_RB_LOOKUP(&p
->p_lwp_tree
, sip
->si_tid
);
1121 if (lp
->lwp_wchan
== (caddr_t
)&selwait
) {
1123 * Flag the process to break the tsleep when
1124 * setrunnable is called, but only call setrunnable
1125 * here if the process is not in a stopped state.
1127 lp
->lwp_flag
|= LWP_BREAKTSLEEP
;
1128 if (p
->p_stat
!= SSTOP
)
1130 } else if (lp
->lwp_flag
& LWP_SELECT
) {
1131 lp
->lwp_flag
&= ~LWP_SELECT
;