2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
40 #include "opt_compat.h"
41 #include "opt_ktrace.h"
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/sysproto.h>
46 #include <sys/filedesc.h>
47 #include <sys/filio.h>
48 #include <sys/fcntl.h>
51 #include <sys/signalvar.h>
52 #include <sys/socketvar.h>
54 #include <sys/kernel.h>
56 #include <sys/limits.h>
57 #include <sys/malloc.h>
59 #include <sys/resourcevar.h>
60 #include <sys/selinfo.h>
61 #include <sys/sleepqueue.h>
62 #include <sys/syscallsubr.h>
63 #include <sys/sysctl.h>
64 #include <sys/sysent.h>
65 #include <sys/vnode.h>
68 #include <sys/condvar.h>
70 #include <sys/ktrace.h>
73 #include <security/audit/audit.h>
75 static MALLOC_DEFINE(M_IOCTLOPS
, "ioctlops", "ioctl data buffer");
76 static MALLOC_DEFINE(M_SELECT
, "select", "select() buffer");
77 MALLOC_DEFINE(M_IOV
, "iov", "large iov's");
79 static int pollscan(struct thread
*, struct pollfd
*, u_int
);
80 static int pollrescan(struct thread
*);
81 static int selscan(struct thread
*, fd_mask
**, fd_mask
**, int);
82 static int selrescan(struct thread
*, fd_mask
**, fd_mask
**);
83 static void selfdalloc(struct thread
*, void *);
84 static void selfdfree(struct seltd
*, struct selfd
*);
85 static int dofileread(struct thread
*, int, struct file
*, struct uio
*,
87 static int dofilewrite(struct thread
*, int, struct file
*, struct uio
*,
89 static void doselwakeup(struct selinfo
*, int);
90 static void seltdinit(struct thread
*);
91 static int seltdwait(struct thread
*, int);
92 static void seltdclear(struct thread
*);
95 * One seltd per-thread allocated on demand as needed.
97 * t - protected by st_mtx
98 * k - Only accessed by curthread or read-only
101 STAILQ_HEAD(, selfd
) st_selq
; /* (k) List of selfds. */
102 struct selfd
*st_free1
; /* (k) free fd for read set. */
103 struct selfd
*st_free2
; /* (k) free fd for write set. */
104 struct mtx st_mtx
; /* Protects struct seltd */
105 struct cv st_wait
; /* (t) Wait channel. */
106 int st_flags
; /* (t) SELTD_ flags. */
109 #define SELTD_PENDING 0x0001 /* We have pending events. */
110 #define SELTD_RESCAN 0x0002 /* Doing a rescan. */
113 * One selfd allocated per-thread per-file-descriptor.
114 * f - protected by sf_mtx
117 STAILQ_ENTRY(selfd
) sf_link
; /* (k) fds owned by this td. */
118 TAILQ_ENTRY(selfd
) sf_threads
; /* (f) fds on this selinfo. */
119 struct selinfo
*sf_si
; /* (f) selinfo when linked. */
120 struct mtx
*sf_mtx
; /* Pointer to selinfo mtx. */
121 struct seltd
*sf_td
; /* (k) owning seltd. */
122 void *sf_cookie
; /* (k) fd or pollfd. */
125 static uma_zone_t selfd_zone
;
127 #ifndef _SYS_SYSPROTO_H_
137 struct read_args
*uap
;
143 if (uap
->nbyte
> INT_MAX
)
145 aiov
.iov_base
= uap
->buf
;
146 aiov
.iov_len
= uap
->nbyte
;
147 auio
.uio_iov
= &aiov
;
149 auio
.uio_resid
= uap
->nbyte
;
150 auio
.uio_segflg
= UIO_USERSPACE
;
151 error
= kern_readv(td
, uap
->fd
, &auio
);
156 * Positioned read system call
158 #ifndef _SYS_SYSPROTO_H_
170 struct pread_args
*uap
;
176 if (uap
->nbyte
> INT_MAX
)
178 aiov
.iov_base
= uap
->buf
;
179 aiov
.iov_len
= uap
->nbyte
;
180 auio
.uio_iov
= &aiov
;
182 auio
.uio_resid
= uap
->nbyte
;
183 auio
.uio_segflg
= UIO_USERSPACE
;
184 error
= kern_preadv(td
, uap
->fd
, &auio
, uap
->offset
);
189 freebsd6_pread(td
, uap
)
191 struct freebsd6_pread_args
*uap
;
193 struct pread_args oargs
;
196 oargs
.buf
= uap
->buf
;
197 oargs
.nbyte
= uap
->nbyte
;
198 oargs
.offset
= uap
->offset
;
199 return (pread(td
, &oargs
));
203 * Scatter read system call.
205 #ifndef _SYS_SYSPROTO_H_
213 readv(struct thread
*td
, struct readv_args
*uap
)
218 error
= copyinuio(uap
->iovp
, uap
->iovcnt
, &auio
);
221 error
= kern_readv(td
, uap
->fd
, auio
);
227 kern_readv(struct thread
*td
, int fd
, struct uio
*auio
)
232 error
= fget_read(td
, fd
, &fp
);
235 error
= dofileread(td
, fd
, fp
, auio
, (off_t
)-1, 0);
241 * Scatter positioned read system call.
243 #ifndef _SYS_SYSPROTO_H_
252 preadv(struct thread
*td
, struct preadv_args
*uap
)
257 error
= copyinuio(uap
->iovp
, uap
->iovcnt
, &auio
);
260 error
= kern_preadv(td
, uap
->fd
, auio
, uap
->offset
);
266 kern_preadv(td
, fd
, auio
, offset
)
275 error
= fget_read(td
, fd
, &fp
);
278 if (!(fp
->f_ops
->fo_flags
& DFLAG_SEEKABLE
))
280 else if (offset
< 0 && fp
->f_vnode
->v_type
!= VCHR
)
283 error
= dofileread(td
, fd
, fp
, auio
, offset
, FOF_OFFSET
);
289 * Common code for readv and preadv that reads data in
290 * from a file using the passed in uio, offset, and flags.
293 dofileread(td
, fd
, fp
, auio
, offset
, flags
)
304 struct uio
*ktruio
= NULL
;
307 /* Finish zero length reads right here */
308 if (auio
->uio_resid
== 0) {
309 td
->td_retval
[0] = 0;
312 auio
->uio_rw
= UIO_READ
;
313 auio
->uio_offset
= offset
;
316 if (KTRPOINT(td
, KTR_GENIO
))
317 ktruio
= cloneuio(auio
);
319 cnt
= auio
->uio_resid
;
320 if ((error
= fo_read(fp
, auio
, td
->td_ucred
, flags
, td
))) {
321 if (auio
->uio_resid
!= cnt
&& (error
== ERESTART
||
322 error
== EINTR
|| error
== EWOULDBLOCK
))
325 cnt
-= auio
->uio_resid
;
327 if (ktruio
!= NULL
) {
328 ktruio
->uio_resid
= cnt
;
329 ktrgenio(fd
, UIO_READ
, ktruio
, error
);
332 td
->td_retval
[0] = cnt
;
336 #ifndef _SYS_SYSPROTO_H_
346 struct write_args
*uap
;
352 if (uap
->nbyte
> INT_MAX
)
354 aiov
.iov_base
= (void *)(uintptr_t)uap
->buf
;
355 aiov
.iov_len
= uap
->nbyte
;
356 auio
.uio_iov
= &aiov
;
358 auio
.uio_resid
= uap
->nbyte
;
359 auio
.uio_segflg
= UIO_USERSPACE
;
360 error
= kern_writev(td
, uap
->fd
, &auio
);
365 * Positioned write system call.
367 #ifndef _SYS_SYSPROTO_H_
379 struct pwrite_args
*uap
;
385 if (uap
->nbyte
> INT_MAX
)
387 aiov
.iov_base
= (void *)(uintptr_t)uap
->buf
;
388 aiov
.iov_len
= uap
->nbyte
;
389 auio
.uio_iov
= &aiov
;
391 auio
.uio_resid
= uap
->nbyte
;
392 auio
.uio_segflg
= UIO_USERSPACE
;
393 error
= kern_pwritev(td
, uap
->fd
, &auio
, uap
->offset
);
398 freebsd6_pwrite(td
, uap
)
400 struct freebsd6_pwrite_args
*uap
;
402 struct pwrite_args oargs
;
405 oargs
.buf
= uap
->buf
;
406 oargs
.nbyte
= uap
->nbyte
;
407 oargs
.offset
= uap
->offset
;
408 return (pwrite(td
, &oargs
));
412 * Gather write system call.
414 #ifndef _SYS_SYSPROTO_H_
422 writev(struct thread
*td
, struct writev_args
*uap
)
427 error
= copyinuio(uap
->iovp
, uap
->iovcnt
, &auio
);
430 error
= kern_writev(td
, uap
->fd
, auio
);
436 kern_writev(struct thread
*td
, int fd
, struct uio
*auio
)
441 error
= fget_write(td
, fd
, &fp
);
444 error
= dofilewrite(td
, fd
, fp
, auio
, (off_t
)-1, 0);
450 * Gather positioned write system call.
452 #ifndef _SYS_SYSPROTO_H_
453 struct pwritev_args
{
461 pwritev(struct thread
*td
, struct pwritev_args
*uap
)
466 error
= copyinuio(uap
->iovp
, uap
->iovcnt
, &auio
);
469 error
= kern_pwritev(td
, uap
->fd
, auio
, uap
->offset
);
475 kern_pwritev(td
, fd
, auio
, offset
)
484 error
= fget_write(td
, fd
, &fp
);
487 if (!(fp
->f_ops
->fo_flags
& DFLAG_SEEKABLE
))
489 else if (offset
< 0 && fp
->f_vnode
->v_type
!= VCHR
)
492 error
= dofilewrite(td
, fd
, fp
, auio
, offset
, FOF_OFFSET
);
498 * Common code for writev and pwritev that writes data to
499 * a file using the passed in uio, offset, and flags.
502 dofilewrite(td
, fd
, fp
, auio
, offset
, flags
)
513 struct uio
*ktruio
= NULL
;
516 auio
->uio_rw
= UIO_WRITE
;
518 auio
->uio_offset
= offset
;
520 if (KTRPOINT(td
, KTR_GENIO
))
521 ktruio
= cloneuio(auio
);
523 cnt
= auio
->uio_resid
;
524 if (fp
->f_type
== DTYPE_VNODE
)
526 if ((error
= fo_write(fp
, auio
, td
->td_ucred
, flags
, td
))) {
527 if (auio
->uio_resid
!= cnt
&& (error
== ERESTART
||
528 error
== EINTR
|| error
== EWOULDBLOCK
))
530 /* Socket layer is responsible for issuing SIGPIPE. */
531 if (fp
->f_type
!= DTYPE_SOCKET
&& error
== EPIPE
) {
532 PROC_LOCK(td
->td_proc
);
533 psignal(td
->td_proc
, SIGPIPE
);
534 PROC_UNLOCK(td
->td_proc
);
537 cnt
-= auio
->uio_resid
;
539 if (ktruio
!= NULL
) {
540 ktruio
->uio_resid
= cnt
;
541 ktrgenio(fd
, UIO_WRITE
, ktruio
, error
);
544 td
->td_retval
[0] = cnt
;
549 * Truncate a file given a file descriptor.
551 * Can't use fget_write() here, since must return EINVAL and not EBADF if the
552 * descriptor isn't writable.
555 kern_ftruncate(td
, fd
, length
)
566 error
= fget(td
, fd
, &fp
);
569 AUDIT_ARG(file
, td
->td_proc
, fp
);
570 if (!(fp
->f_flag
& FWRITE
)) {
574 error
= fo_truncate(fp
, length
, td
->td_ucred
, td
);
579 #ifndef _SYS_SYSPROTO_H_
580 struct ftruncate_args
{
589 struct ftruncate_args
*uap
;
592 return (kern_ftruncate(td
, uap
->fd
, uap
->length
));
595 #if defined(COMPAT_43)
596 #ifndef _SYS_SYSPROTO_H_
597 struct oftruncate_args
{
605 struct oftruncate_args
*uap
;
608 return (kern_ftruncate(td
, uap
->fd
, uap
->length
));
610 #endif /* COMPAT_43 */
612 #ifndef _SYS_SYSPROTO_H_
621 ioctl(struct thread
*td
, struct ioctl_args
*uap
)
628 if (uap
->com
> 0xffffffff) {
630 "WARNING pid %d (%s): ioctl sign-extension ioctl %lx\n",
631 td
->td_proc
->p_pid
, td
->td_name
, uap
->com
);
632 uap
->com
&= 0xffffffff;
637 * Interpret high order word to find amount of data to be
638 * copied to/from the user's address space.
640 size
= IOCPARM_LEN(com
);
641 if ((size
> IOCPARM_MAX
) ||
642 ((com
& (IOC_VOID
| IOC_IN
| IOC_OUT
)) == 0) ||
643 #if defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
644 ((com
& IOC_OUT
) && size
== 0) ||
646 ((com
& (IOC_IN
| IOC_OUT
)) && size
== 0) ||
648 ((com
& IOC_VOID
) && size
> 0 && size
!= sizeof(int)))
652 if (!(com
& IOC_VOID
))
653 data
= malloc((u_long
)size
, M_IOCTLOPS
, M_WAITOK
);
655 /* Integer argument. */
656 arg
= (intptr_t)uap
->data
;
661 data
= (void *)&uap
->data
;
663 error
= copyin(uap
->data
, data
, (u_int
)size
);
666 free(data
, M_IOCTLOPS
);
669 } else if (com
& IOC_OUT
) {
671 * Zero the buffer so the user always
672 * gets back something deterministic.
677 error
= kern_ioctl(td
, uap
->fd
, com
, data
);
679 if (error
== 0 && (com
& IOC_OUT
))
680 error
= copyout(data
, uap
->data
, (u_int
)size
);
683 free(data
, M_IOCTLOPS
);
688 kern_ioctl(struct thread
*td
, int fd
, u_long com
, caddr_t data
)
691 struct filedesc
*fdp
;
695 if ((error
= fget(td
, fd
, &fp
)) != 0)
697 if ((fp
->f_flag
& (FREAD
| FWRITE
)) == 0) {
701 fdp
= td
->td_proc
->p_fd
;
705 fdp
->fd_ofileflags
[fd
] &= ~UF_EXCLOSE
;
706 FILEDESC_XUNLOCK(fdp
);
710 fdp
->fd_ofileflags
[fd
] |= UF_EXCLOSE
;
711 FILEDESC_XUNLOCK(fdp
);
714 if ((tmp
= *(int *)data
))
715 atomic_set_int(&fp
->f_flag
, FNONBLOCK
);
717 atomic_clear_int(&fp
->f_flag
, FNONBLOCK
);
721 if ((tmp
= *(int *)data
))
722 atomic_set_int(&fp
->f_flag
, FASYNC
);
724 atomic_clear_int(&fp
->f_flag
, FASYNC
);
729 error
= fo_ioctl(fp
, com
, data
, td
->td_ucred
, td
);
735 #ifndef _SYS_SYSPROTO_H_
738 fd_set
*in
, *ou
, *ex
;
744 register struct thread
*td
;
745 register struct select_args
*uap
;
747 struct timeval tv
, *tvp
;
750 if (uap
->tv
!= NULL
) {
751 error
= copyin(uap
->tv
, &tv
, sizeof(tv
));
758 return (kern_select(td
, uap
->nd
, uap
->in
, uap
->ou
, uap
->ex
, tvp
));
762 kern_select(struct thread
*td
, int nd
, fd_set
*fd_in
, fd_set
*fd_ou
,
763 fd_set
*fd_ex
, struct timeval
*tvp
)
765 struct filedesc
*fdp
;
767 * The magic 2048 here is chosen to be just enough for FD_SETSIZE
768 * infds with the new FD_SETSIZE of 1024, and more than enough for
769 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE
772 fd_mask s_selbits
[howmany(2048, NFDBITS
)];
773 fd_mask
*ibits
[3], *obits
[3], *selbits
, *sbp
;
774 struct timeval atv
, rtv
, ttv
;
776 u_int nbufbytes
, ncpbytes
, nfdbits
;
780 fdp
= td
->td_proc
->p_fd
;
783 if (nd
> td
->td_proc
->p_fd
->fd_nfiles
)
784 nd
= td
->td_proc
->p_fd
->fd_nfiles
; /* forgiving; slightly wrong */
785 FILEDESC_SUNLOCK(fdp
);
788 * Allocate just enough bits for the non-null fd_sets. Use the
789 * preallocated auto buffer if possible.
791 nfdbits
= roundup(nd
, NFDBITS
);
792 ncpbytes
= nfdbits
/ NBBY
;
795 nbufbytes
+= 2 * ncpbytes
;
797 nbufbytes
+= 2 * ncpbytes
;
799 nbufbytes
+= 2 * ncpbytes
;
800 if (nbufbytes
<= sizeof s_selbits
)
801 selbits
= &s_selbits
[0];
803 selbits
= malloc(nbufbytes
, M_SELECT
, M_WAITOK
);
806 * Assign pointers into the bit buffers and fetch the input bits.
807 * Put the output buffers together so that they can be bzeroed
811 #define getbits(name, x) \
816 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \
818 sbp += ncpbytes / sizeof *sbp; \
819 error = copyin(name, ibits[x], ncpbytes); \
829 bzero(selbits
, nbufbytes
/ 2);
833 if (itimerfix(&atv
)) {
837 getmicrouptime(&rtv
);
838 timevaladd(&atv
, &rtv
);
845 /* Iterate until the timeout expires or descriptors become ready. */
847 error
= selscan(td
, ibits
, obits
, nd
);
848 if (error
|| td
->td_retval
[0] != 0)
850 if (atv
.tv_sec
|| atv
.tv_usec
) {
851 getmicrouptime(&rtv
);
852 if (timevalcmp(&rtv
, &atv
, >=))
855 timevalsub(&ttv
, &rtv
);
856 timo
= ttv
.tv_sec
> 24 * 60 * 60 ?
857 24 * 60 * 60 * hz
: tvtohz(&ttv
);
859 error
= seltdwait(td
, timo
);
862 error
= selrescan(td
, ibits
, obits
);
863 if (error
|| td
->td_retval
[0] != 0)
869 /* select is not restarted after signals... */
870 if (error
== ERESTART
)
872 if (error
== EWOULDBLOCK
)
874 #define putbits(name, x) \
875 if (name && (error2 = copyout(obits[x], name, ncpbytes))) \
885 if (selbits
!= &s_selbits
[0])
886 free(selbits
, M_SELECT
);
892 * Traverse the list of fds attached to this thread's seltd and check for
896 selrescan(struct thread
*td
, fd_mask
**ibits
, fd_mask
**obits
)
905 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */
906 static int flag
[3] = { POLLRDNORM
, POLLWRNORM
, POLLRDBAND
};
907 struct filedesc
*fdp
= td
->td_proc
->p_fd
;
911 STAILQ_FOREACH_SAFE(sfp
, &stp
->st_selq
, sf_link
, sfn
) {
912 fd
= (int)(uintptr_t)sfp
->sf_cookie
;
915 /* If the selinfo wasn't cleared the event didn't fire. */
918 if ((fp
= fget_locked(fdp
, fd
)) == NULL
) {
919 FILEDESC_SUNLOCK(fdp
);
922 for (msk
= 0; msk
< 3; msk
++) {
923 if (ibits
[msk
] == NULL
)
925 if ((ibits
[msk
][fd
/NFDBITS
] &
926 ((fd_mask
) 1 << (fd
% NFDBITS
))) == 0)
928 if (fo_poll(fp
, flag
[msk
], td
->td_ucred
, td
)) {
929 obits
[msk
][(fd
)/NFDBITS
] |=
930 ((fd_mask
)1 << ((fd
) % NFDBITS
));
935 FILEDESC_SUNLOCK(fdp
);
937 td
->td_retval
[0] = n
;
942 * Perform the initial filedescriptor scan and register ourselves with
946 selscan(td
, ibits
, obits
, nfd
)
948 fd_mask
**ibits
, **obits
;
955 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */
956 static int flag
[3] = { POLLRDNORM
, POLLWRNORM
, POLLRDBAND
};
957 struct filedesc
*fdp
= td
->td_proc
->p_fd
;
960 for (msk
= 0; msk
< 3; msk
++) {
961 if (ibits
[msk
] == NULL
)
963 for (i
= 0; i
< nfd
; i
+= NFDBITS
) {
964 bits
= ibits
[msk
][i
/NFDBITS
];
965 /* ffs(int mask) not portable, fd_mask is long */
966 for (fd
= i
; bits
&& fd
< nfd
; fd
++, bits
>>= 1) {
969 if ((fp
= fget_locked(fdp
, fd
)) == NULL
) {
970 FILEDESC_SUNLOCK(fdp
);
973 selfdalloc(td
, (void *)(uintptr_t)fd
);
974 if (fo_poll(fp
, flag
[msk
], td
->td_ucred
,
976 obits
[msk
][(fd
)/NFDBITS
] |=
977 ((fd_mask
)1 << ((fd
) % NFDBITS
));
983 FILEDESC_SUNLOCK(fdp
);
984 td
->td_retval
[0] = n
;
988 #ifndef _SYS_SYSPROTO_H_
998 struct poll_args
*uap
;
1000 struct pollfd
*bits
;
1001 struct pollfd smallbits
[32];
1002 struct timeval atv
, rtv
, ttv
;
1003 int error
= 0, timo
;
1008 if (nfds
> maxfilesperproc
&& nfds
> FD_SETSIZE
)
1010 ni
= nfds
* sizeof(struct pollfd
);
1011 if (ni
> sizeof(smallbits
))
1012 bits
= malloc(ni
, M_TEMP
, M_WAITOK
);
1015 error
= copyin(uap
->fds
, bits
, ni
);
1018 if (uap
->timeout
!= INFTIM
) {
1019 atv
.tv_sec
= uap
->timeout
/ 1000;
1020 atv
.tv_usec
= (uap
->timeout
% 1000) * 1000;
1021 if (itimerfix(&atv
)) {
1025 getmicrouptime(&rtv
);
1026 timevaladd(&atv
, &rtv
);
1033 /* Iterate until the timeout expires or descriptors become ready. */
1035 error
= pollscan(td
, bits
, nfds
);
1036 if (error
|| td
->td_retval
[0] != 0)
1038 if (atv
.tv_sec
|| atv
.tv_usec
) {
1039 getmicrouptime(&rtv
);
1040 if (timevalcmp(&rtv
, &atv
, >=))
1043 timevalsub(&ttv
, &rtv
);
1044 timo
= ttv
.tv_sec
> 24 * 60 * 60 ?
1045 24 * 60 * 60 * hz
: tvtohz(&ttv
);
1047 error
= seltdwait(td
, timo
);
1050 error
= pollrescan(td
);
1051 if (error
|| td
->td_retval
[0] != 0)
1057 /* poll is not restarted after signals... */
1058 if (error
== ERESTART
)
1060 if (error
== EWOULDBLOCK
)
1063 error
= copyout(bits
, uap
->fds
, ni
);
1068 if (ni
> sizeof(smallbits
))
1074 pollrescan(struct thread
*td
)
1080 struct filedesc
*fdp
;
1086 fdp
= td
->td_proc
->p_fd
;
1088 FILEDESC_SLOCK(fdp
);
1089 STAILQ_FOREACH_SAFE(sfp
, &stp
->st_selq
, sf_link
, sfn
) {
1090 fd
= (struct pollfd
*)sfp
->sf_cookie
;
1092 selfdfree(stp
, sfp
);
1093 /* If the selinfo wasn't cleared the event didn't fire. */
1096 fp
= fdp
->fd_ofiles
[fd
->fd
];
1098 fd
->revents
= POLLNVAL
;
1103 * Note: backend also returns POLLHUP and
1104 * POLLERR if appropriate.
1106 fd
->revents
= fo_poll(fp
, fd
->events
, td
->td_ucred
, td
);
1107 if (fd
->revents
!= 0)
1110 FILEDESC_SUNLOCK(fdp
);
1112 td
->td_retval
[0] = n
;
1118 pollscan(td
, fds
, nfd
)
1123 struct filedesc
*fdp
= td
->td_proc
->p_fd
;
1128 FILEDESC_SLOCK(fdp
);
1129 for (i
= 0; i
< nfd
; i
++, fds
++) {
1130 if (fds
->fd
>= fdp
->fd_nfiles
) {
1131 fds
->revents
= POLLNVAL
;
1133 } else if (fds
->fd
< 0) {
1136 fp
= fdp
->fd_ofiles
[fds
->fd
];
1138 fds
->revents
= POLLNVAL
;
1142 * Note: backend also returns POLLHUP and
1143 * POLLERR if appropriate.
1145 selfdalloc(td
, fds
);
1146 fds
->revents
= fo_poll(fp
, fds
->events
,
1148 if (fds
->revents
!= 0)
1153 FILEDESC_SUNLOCK(fdp
);
1154 td
->td_retval
[0] = n
;
1159 * OpenBSD poll system call.
1161 * XXX this isn't quite a true representation.. OpenBSD uses select ops.
1163 #ifndef _SYS_SYSPROTO_H_
1164 struct openbsd_poll_args
{
1171 openbsd_poll(td
, uap
)
1172 register struct thread
*td
;
1173 register struct openbsd_poll_args
*uap
;
1175 return (poll(td
, (struct poll_args
*)uap
));
1179 * XXX This was created specifically to support netncp and netsmb. This
1180 * allows the caller to specify a socket to wait for events on. It returns
1181 * 0 if any events matched and an error otherwise. There is no way to
1182 * determine which events fired.
1185 selsocket(struct socket
*so
, int events
, struct timeval
*tvp
, struct thread
*td
)
1187 struct timeval atv
, rtv
, ttv
;
1192 if (itimerfix(&atv
))
1194 getmicrouptime(&rtv
);
1195 timevaladd(&atv
, &rtv
);
1204 * Iterate until the timeout expires or the socket becomes ready.
1207 selfdalloc(td
, NULL
);
1208 error
= sopoll(so
, events
, NULL
, td
);
1209 /* error here is actually the ready events. */
1212 if (atv
.tv_sec
|| atv
.tv_usec
) {
1213 getmicrouptime(&rtv
);
1214 if (timevalcmp(&rtv
, &atv
, >=)) {
1216 return (EWOULDBLOCK
);
1219 timevalsub(&ttv
, &rtv
);
1220 timo
= ttv
.tv_sec
> 24 * 60 * 60 ?
1221 24 * 60 * 60 * hz
: tvtohz(&ttv
);
1223 error
= seltdwait(td
, timo
);
1228 /* XXX Duplicates ncp/smb behavior. */
1229 if (error
== ERESTART
)
1235 * Preallocate two selfds associated with 'cookie'. Some fo_poll routines
1236 * have two select sets, one for read and another for write.
1239 selfdalloc(struct thread
*td
, void *cookie
)
1244 if (stp
->st_free1
== NULL
)
1245 stp
->st_free1
= uma_zalloc(selfd_zone
, M_WAITOK
|M_ZERO
);
1246 stp
->st_free1
->sf_td
= stp
;
1247 stp
->st_free1
->sf_cookie
= cookie
;
1248 if (stp
->st_free2
== NULL
)
1249 stp
->st_free2
= uma_zalloc(selfd_zone
, M_WAITOK
|M_ZERO
);
1250 stp
->st_free2
->sf_td
= stp
;
1251 stp
->st_free2
->sf_cookie
= cookie
;
1255 selfdfree(struct seltd
*stp
, struct selfd
*sfp
)
1257 STAILQ_REMOVE(&stp
->st_selq
, sfp
, selfd
, sf_link
);
1258 mtx_lock(sfp
->sf_mtx
);
1260 TAILQ_REMOVE(&sfp
->sf_si
->si_tdlist
, sfp
, sf_threads
);
1261 mtx_unlock(sfp
->sf_mtx
);
1262 uma_zfree(selfd_zone
, sfp
);
1266 * Record a select request.
1269 selrecord(selector
, sip
)
1270 struct thread
*selector
;
1271 struct selinfo
*sip
;
1277 stp
= selector
->td_sel
;
1279 * Don't record when doing a rescan.
1281 if (stp
->st_flags
& SELTD_RESCAN
)
1284 * Grab one of the preallocated descriptors.
1287 if ((sfp
= stp
->st_free1
) != NULL
)
1288 stp
->st_free1
= NULL
;
1289 else if ((sfp
= stp
->st_free2
) != NULL
)
1290 stp
->st_free2
= NULL
;
1292 panic("selrecord: No free selfd on selq");
1293 mtxp
= mtx_pool_find(mtxpool_sleep
, sip
);
1295 * Initialize the sfp and queue it in the thread.
1299 STAILQ_INSERT_TAIL(&stp
->st_selq
, sfp
, sf_link
);
1301 * Now that we've locked the sip, check for initialization.
1304 if (sip
->si_mtx
== NULL
) {
1306 TAILQ_INIT(&sip
->si_tdlist
);
1309 * Add this thread to the list of selfds listening on this selinfo.
1311 TAILQ_INSERT_TAIL(&sip
->si_tdlist
, sfp
, sf_threads
);
1312 mtx_unlock(sip
->si_mtx
);
1315 /* Wake up a selecting thread. */
1318 struct selinfo
*sip
;
1320 doselwakeup(sip
, -1);
1323 /* Wake up a selecting thread, and set its priority. */
1325 selwakeuppri(sip
, pri
)
1326 struct selinfo
*sip
;
1329 doselwakeup(sip
, pri
);
1333 * Do a wakeup when a selectable event occurs.
1336 doselwakeup(sip
, pri
)
1337 struct selinfo
*sip
;
1344 /* If it's not initialized there can't be any waiters. */
1345 if (sip
->si_mtx
== NULL
)
1348 * Locking the selinfo locks all selfds associated with it.
1350 mtx_lock(sip
->si_mtx
);
1351 TAILQ_FOREACH_SAFE(sfp
, &sip
->si_tdlist
, sf_threads
, sfn
) {
1353 * Once we remove this sfp from the list and clear the
1354 * sf_si seltdclear will know to ignore this si.
1356 TAILQ_REMOVE(&sip
->si_tdlist
, sfp
, sf_threads
);
1359 mtx_lock(&stp
->st_mtx
);
1360 stp
->st_flags
|= SELTD_PENDING
;
1361 cv_broadcastpri(&stp
->st_wait
, pri
);
1362 mtx_unlock(&stp
->st_mtx
);
1364 mtx_unlock(sip
->si_mtx
);
1368 seltdinit(struct thread
*td
)
1372 if ((stp
= td
->td_sel
) != NULL
)
1374 td
->td_sel
= stp
= malloc(sizeof(*stp
), M_SELECT
, M_WAITOK
|M_ZERO
);
1375 mtx_init(&stp
->st_mtx
, "sellck", NULL
, MTX_DEF
);
1376 cv_init(&stp
->st_wait
, "select");
1379 STAILQ_INIT(&stp
->st_selq
);
1383 seltdwait(struct thread
*td
, int timo
)
1390 * An event of interest may occur while we do not hold the seltd
1391 * locked so check the pending flag before we sleep.
1393 mtx_lock(&stp
->st_mtx
);
1395 * Any further calls to selrecord will be a rescan.
1397 stp
->st_flags
|= SELTD_RESCAN
;
1398 if (stp
->st_flags
& SELTD_PENDING
) {
1399 mtx_unlock(&stp
->st_mtx
);
1403 error
= cv_timedwait_sig(&stp
->st_wait
, &stp
->st_mtx
, timo
);
1405 error
= cv_wait_sig(&stp
->st_wait
, &stp
->st_mtx
);
1406 mtx_unlock(&stp
->st_mtx
);
1412 seltdfini(struct thread
*td
)
1420 uma_zfree(selfd_zone
, stp
->st_free1
);
1422 uma_zfree(selfd_zone
, stp
->st_free2
);
1424 free(stp
, M_SELECT
);
1428 * Remove the references to the thread from all of the objects we were
1432 seltdclear(struct thread
*td
)
1439 STAILQ_FOREACH_SAFE(sfp
, &stp
->st_selq
, sf_link
, sfn
)
1440 selfdfree(stp
, sfp
);
1444 static void selectinit(void *);
1445 SYSINIT(select
, SI_SUB_SYSCALLS
, SI_ORDER_ANY
, selectinit
, NULL
);
1447 selectinit(void *dummy __unused
)
1449 selfd_zone
= uma_zcreate("selfd", sizeof(struct selfd
), NULL
, NULL
,
1450 NULL
, NULL
, UMA_ALIGN_PTR
, 0);