2 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
3 * Copyright 2004 John-Mark Gurney <jmg@FreeBSD.org>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
31 #include "opt_ktrace.h"
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
37 #include <sys/mutex.h>
39 #include <sys/malloc.h>
40 #include <sys/unistd.h>
42 #include <sys/filedesc.h>
43 #include <sys/filio.h>
44 #include <sys/fcntl.h>
45 #include <sys/kthread.h>
46 #include <sys/selinfo.h>
47 #include <sys/queue.h>
48 #include <sys/event.h>
49 #include <sys/eventvar.h>
51 #include <sys/protosw.h>
52 #include <sys/sigio.h>
53 #include <sys/signalvar.h>
54 #include <sys/socket.h>
55 #include <sys/socketvar.h>
57 #include <sys/sysctl.h>
58 #include <sys/sysproto.h>
59 #include <sys/syscallsubr.h>
60 #include <sys/taskqueue.h>
63 #include <sys/ktrace.h>
68 static MALLOC_DEFINE(M_KQUEUE
, "kqueue", "memory for kqueue system");
71 * This lock is used if multiple kq locks are required. This possibly
72 * should be made into a per proc lock.
74 static struct mtx kq_global
;
75 MTX_SYSINIT(kq_global
, &kq_global
, "kqueue order", MTX_DEF
);
76 #define KQ_GLOBAL_LOCK(lck, haslck) do { \
81 #define KQ_GLOBAL_UNLOCK(lck, haslck) do { \
87 TASKQUEUE_DEFINE_THREAD(kqueue
);
89 static int kevent_copyout(void *arg
, struct kevent
*kevp
, int count
);
90 static int kevent_copyin(void *arg
, struct kevent
*kevp
, int count
);
91 static int kqueue_register(struct kqueue
*kq
, struct kevent
*kev
,
92 struct thread
*td
, int waitok
);
93 static int kqueue_acquire(struct file
*fp
, struct kqueue
**kqp
);
94 static void kqueue_release(struct kqueue
*kq
, int locked
);
95 static int kqueue_expand(struct kqueue
*kq
, struct filterops
*fops
,
96 uintptr_t ident
, int waitok
);
97 static void kqueue_task(void *arg
, int pending
);
98 static int kqueue_scan(struct kqueue
*kq
, int maxevents
,
99 struct kevent_copyops
*k_ops
,
100 const struct timespec
*timeout
,
101 struct kevent
*keva
, struct thread
*td
);
102 static void kqueue_wakeup(struct kqueue
*kq
);
103 static struct filterops
*kqueue_fo_find(int filt
);
104 static void kqueue_fo_release(int filt
);
106 static fo_rdwr_t kqueue_read
;
107 static fo_rdwr_t kqueue_write
;
108 static fo_truncate_t kqueue_truncate
;
109 static fo_ioctl_t kqueue_ioctl
;
110 static fo_poll_t kqueue_poll
;
111 static fo_kqfilter_t kqueue_kqfilter
;
112 static fo_stat_t kqueue_stat
;
113 static fo_close_t kqueue_close
;
115 static struct fileops kqueueops
= {
116 .fo_read
= kqueue_read
,
117 .fo_write
= kqueue_write
,
118 .fo_truncate
= kqueue_truncate
,
119 .fo_ioctl
= kqueue_ioctl
,
120 .fo_poll
= kqueue_poll
,
121 .fo_kqfilter
= kqueue_kqfilter
,
122 .fo_stat
= kqueue_stat
,
123 .fo_close
= kqueue_close
,
126 static int knote_attach(struct knote
*kn
, struct kqueue
*kq
);
127 static void knote_drop(struct knote
*kn
, struct thread
*td
);
128 static void knote_enqueue(struct knote
*kn
);
129 static void knote_dequeue(struct knote
*kn
);
130 static void knote_init(void);
131 static struct knote
*knote_alloc(int waitok
);
132 static void knote_free(struct knote
*kn
);
134 static void filt_kqdetach(struct knote
*kn
);
135 static int filt_kqueue(struct knote
*kn
, long hint
);
136 static int filt_procattach(struct knote
*kn
);
137 static void filt_procdetach(struct knote
*kn
);
138 static int filt_proc(struct knote
*kn
, long hint
);
139 static int filt_fileattach(struct knote
*kn
);
140 static void filt_timerexpire(void *knx
);
141 static int filt_timerattach(struct knote
*kn
);
142 static void filt_timerdetach(struct knote
*kn
);
143 static int filt_timer(struct knote
*kn
, long hint
);
145 static struct filterops file_filtops
=
146 { 1, filt_fileattach
, NULL
, NULL
};
147 static struct filterops kqread_filtops
=
148 { 1, NULL
, filt_kqdetach
, filt_kqueue
};
149 /* XXX - move to kern_proc.c? */
150 static struct filterops proc_filtops
=
151 { 0, filt_procattach
, filt_procdetach
, filt_proc
};
152 static struct filterops timer_filtops
=
153 { 0, filt_timerattach
, filt_timerdetach
, filt_timer
};
155 static uma_zone_t knote_zone
;
156 static int kq_ncallouts
= 0;
157 static int kq_calloutmax
= (4 * 1024);
158 SYSCTL_INT(_kern
, OID_AUTO
, kq_calloutmax
, CTLFLAG_RW
,
159 &kq_calloutmax
, 0, "Maximum number of callouts allocated for kqueue");
161 /* XXX - ensure not KN_INFLUX?? */
162 #define KNOTE_ACTIVATE(kn, islock) do { \
164 mtx_assert(&(kn)->kn_kq->kq_lock, MA_OWNED); \
166 KQ_LOCK((kn)->kn_kq); \
167 (kn)->kn_status |= KN_ACTIVE; \
168 if (((kn)->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \
169 knote_enqueue((kn)); \
171 KQ_UNLOCK((kn)->kn_kq); \
173 #define KQ_LOCK(kq) do { \
174 mtx_lock(&(kq)->kq_lock); \
176 #define KQ_FLUX_WAKEUP(kq) do { \
177 if (((kq)->kq_state & KQ_FLUXWAIT) == KQ_FLUXWAIT) { \
178 (kq)->kq_state &= ~KQ_FLUXWAIT; \
182 #define KQ_UNLOCK_FLUX(kq) do { \
183 KQ_FLUX_WAKEUP(kq); \
184 mtx_unlock(&(kq)->kq_lock); \
186 #define KQ_UNLOCK(kq) do { \
187 mtx_unlock(&(kq)->kq_lock); \
189 #define KQ_OWNED(kq) do { \
190 mtx_assert(&(kq)->kq_lock, MA_OWNED); \
192 #define KQ_NOTOWNED(kq) do { \
193 mtx_assert(&(kq)->kq_lock, MA_NOTOWNED); \
195 #define KN_LIST_LOCK(kn) do { \
196 if (kn->kn_knlist != NULL) \
197 kn->kn_knlist->kl_lock(kn->kn_knlist->kl_lockarg); \
199 #define KN_LIST_UNLOCK(kn) do { \
200 if (kn->kn_knlist != NULL) \
201 kn->kn_knlist->kl_unlock(kn->kn_knlist->kl_lockarg); \
203 #define KNL_ASSERT_LOCK(knl, islocked) do { \
205 KNL_ASSERT_LOCKED(knl); \
207 KNL_ASSERT_UNLOCKED(knl); \
210 #define KNL_ASSERT_LOCKED(knl) do { \
211 if (!knl->kl_locked((knl)->kl_lockarg)) \
212 panic("knlist not locked, but should be"); \
214 #define KNL_ASSERT_UNLOCKED(knl) do { \
215 if (knl->kl_locked((knl)->kl_lockarg)) \
216 panic("knlist locked, but should not be"); \
218 #else /* !INVARIANTS */
219 #define KNL_ASSERT_LOCKED(knl) do {} while(0)
220 #define KNL_ASSERT_UNLOCKED(knl) do {} while (0)
221 #endif /* INVARIANTS */
223 #define KN_HASHSIZE 64 /* XXX should be tunable */
224 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
227 filt_nullattach(struct knote
*kn
)
233 struct filterops null_filtops
=
234 { 0, filt_nullattach
, NULL
, NULL
};
236 /* XXX - make SYSINIT to add these, and move into respective modules. */
237 extern struct filterops sig_filtops
;
238 extern struct filterops fs_filtops
;
241 * Table for for all system-defined filters.
243 static struct mtx filterops_lock
;
244 MTX_SYSINIT(kqueue_filterops
, &filterops_lock
, "protect sysfilt_ops",
247 struct filterops
*for_fop
;
249 } sysfilt_ops
[EVFILT_SYSCOUNT
] = {
250 { &file_filtops
}, /* EVFILT_READ */
251 { &file_filtops
}, /* EVFILT_WRITE */
252 { &null_filtops
}, /* EVFILT_AIO */
253 { &file_filtops
}, /* EVFILT_VNODE */
254 { &proc_filtops
}, /* EVFILT_PROC */
255 { &sig_filtops
}, /* EVFILT_SIGNAL */
256 { &timer_filtops
}, /* EVFILT_TIMER */
257 { &file_filtops
}, /* EVFILT_NETDEV */
258 { &fs_filtops
}, /* EVFILT_FS */
259 { &null_filtops
}, /* EVFILT_LIO */
263 * Simple redirection for all cdevsw style objects to call their fo_kqfilter
267 filt_fileattach(struct knote
*kn
)
270 return (fo_kqfilter(kn
->kn_fp
, kn
));
275 kqueue_kqfilter(struct file
*fp
, struct knote
*kn
)
277 struct kqueue
*kq
= kn
->kn_fp
->f_data
;
279 if (kn
->kn_filter
!= EVFILT_READ
)
282 kn
->kn_status
|= KN_KQUEUE
;
283 kn
->kn_fop
= &kqread_filtops
;
284 knlist_add(&kq
->kq_sel
.si_note
, kn
, 0);
290 filt_kqdetach(struct knote
*kn
)
292 struct kqueue
*kq
= kn
->kn_fp
->f_data
;
294 knlist_remove(&kq
->kq_sel
.si_note
, kn
, 0);
299 filt_kqueue(struct knote
*kn
, long hint
)
301 struct kqueue
*kq
= kn
->kn_fp
->f_data
;
303 kn
->kn_data
= kq
->kq_count
;
304 return (kn
->kn_data
> 0);
307 /* XXX - move to kern_proc.c? */
309 filt_procattach(struct knote
*kn
)
316 p
= pfind(kn
->kn_id
);
317 if (p
== NULL
&& (kn
->kn_sfflags
& NOTE_EXIT
)) {
318 p
= zpfind(kn
->kn_id
);
320 } else if (p
!= NULL
&& (p
->p_flag
& P_WEXIT
)) {
326 if ((error
= p_cansee(curthread
, p
)))
329 kn
->kn_ptr
.p_proc
= p
;
330 kn
->kn_flags
|= EV_CLEAR
; /* automatically set */
333 * internal flag indicating registration done by kernel
335 if (kn
->kn_flags
& EV_FLAG1
) {
336 kn
->kn_data
= kn
->kn_sdata
; /* ppid */
337 kn
->kn_fflags
= NOTE_CHILD
;
338 kn
->kn_flags
&= ~EV_FLAG1
;
342 knlist_add(&p
->p_klist
, kn
, 1);
345 * Immediately activate any exit notes if the target process is a
346 * zombie. This is necessary to handle the case where the target
347 * process, e.g. a child, dies before the kevent is registered.
349 if (immediate
&& filt_proc(kn
, NOTE_EXIT
))
350 KNOTE_ACTIVATE(kn
, 0);
358 * The knote may be attached to a different process, which may exit,
359 * leaving nothing for the knote to be attached to. So when the process
360 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so
361 * it will be deleted when read out. However, as part of the knote deletion,
362 * this routine is called, so a check is needed to avoid actually performing
363 * a detach, because the original process does not exist any more.
365 /* XXX - move to kern_proc.c? */
367 filt_procdetach(struct knote
*kn
)
371 p
= kn
->kn_ptr
.p_proc
;
372 knlist_remove(&p
->p_klist
, kn
, 0);
373 kn
->kn_ptr
.p_proc
= NULL
;
376 /* XXX - move to kern_proc.c? */
378 filt_proc(struct knote
*kn
, long hint
)
380 struct proc
*p
= kn
->kn_ptr
.p_proc
;
384 * mask off extra data
386 event
= (u_int
)hint
& NOTE_PCTRLMASK
;
389 * if the user is interested in this event, record it.
391 if (kn
->kn_sfflags
& event
)
392 kn
->kn_fflags
|= event
;
395 * process is gone, so flag the event as finished.
397 if (event
== NOTE_EXIT
) {
398 if (!(kn
->kn_status
& KN_DETACHED
))
399 knlist_remove_inevent(&p
->p_klist
, kn
);
400 kn
->kn_flags
|= (EV_EOF
| EV_ONESHOT
);
401 kn
->kn_data
= p
->p_xstat
;
402 kn
->kn_ptr
.p_proc
= NULL
;
406 return (kn
->kn_fflags
!= 0);
410 * Called when the process forked. It mostly does the same as the
411 * knote(), activating all knotes registered to be activated when the
412 * process forked. Additionally, for each knote attached to the
413 * parent, check whether user wants to track the new process. If so
414 * attach a new knote to it, and immediately report an event with the
418 knote_fork(struct knlist
*list
, int pid
)
427 list
->kl_lock(list
->kl_lockarg
);
429 SLIST_FOREACH(kn
, &list
->kl_list
, kn_selnext
) {
430 if ((kn
->kn_status
& KN_INFLUX
) == KN_INFLUX
)
434 if ((kn
->kn_status
& KN_INFLUX
) == KN_INFLUX
) {
440 * The same as knote(), activate the event.
442 if ((kn
->kn_sfflags
& NOTE_TRACK
) == 0) {
443 kn
->kn_status
|= KN_HASKQLOCK
;
444 if (kn
->kn_fop
->f_event(kn
, NOTE_FORK
| pid
))
445 KNOTE_ACTIVATE(kn
, 1);
446 kn
->kn_status
&= ~KN_HASKQLOCK
;
452 * The NOTE_TRACK case. In addition to the activation
453 * of the event, we need to register new event to
454 * track the child. Drop the locks in preparation for
455 * the call to kqueue_register().
457 kn
->kn_status
|= KN_INFLUX
;
459 list
->kl_unlock(list
->kl_lockarg
);
462 * Activate existing knote and register a knote with
466 kev
.filter
= kn
->kn_filter
;
467 kev
.flags
= kn
->kn_flags
| EV_ADD
| EV_ENABLE
| EV_FLAG1
;
468 kev
.fflags
= kn
->kn_sfflags
;
469 kev
.data
= kn
->kn_id
; /* parent */
470 kev
.udata
= kn
->kn_kevent
.udata
;/* preserve udata */
471 error
= kqueue_register(kq
, &kev
, NULL
, 0);
472 if (kn
->kn_fop
->f_event(kn
, NOTE_FORK
| pid
))
473 KNOTE_ACTIVATE(kn
, 0);
475 kn
->kn_fflags
|= NOTE_TRACKERR
;
477 kn
->kn_status
&= ~KN_INFLUX
;
479 list
->kl_lock(list
->kl_lockarg
);
481 list
->kl_unlock(list
->kl_lockarg
);
485 timertoticks(intptr_t data
)
490 tv
.tv_sec
= data
/ 1000;
491 tv
.tv_usec
= (data
% 1000) * 1000;
492 tticks
= tvtohz(&tv
);
497 /* XXX - move to kern_timeout.c? */
499 filt_timerexpire(void *knx
)
501 struct knote
*kn
= knx
;
502 struct callout
*calloutp
;
505 KNOTE_ACTIVATE(kn
, 0); /* XXX - handle locking */
507 if ((kn
->kn_flags
& EV_ONESHOT
) != EV_ONESHOT
) {
508 calloutp
= (struct callout
*)kn
->kn_hook
;
509 callout_reset_curcpu(calloutp
, timertoticks(kn
->kn_sdata
),
510 filt_timerexpire
, kn
);
515 * data contains amount of time to sleep, in milliseconds
517 /* XXX - move to kern_timeout.c? */
519 filt_timerattach(struct knote
*kn
)
521 struct callout
*calloutp
;
523 atomic_add_int(&kq_ncallouts
, 1);
525 if (kq_ncallouts
>= kq_calloutmax
) {
526 atomic_add_int(&kq_ncallouts
, -1);
530 kn
->kn_flags
|= EV_CLEAR
; /* automatically set */
531 kn
->kn_status
&= ~KN_DETACHED
; /* knlist_add usually sets it */
532 MALLOC(calloutp
, struct callout
*, sizeof(*calloutp
),
534 callout_init(calloutp
, CALLOUT_MPSAFE
);
535 kn
->kn_hook
= calloutp
;
536 callout_reset_curcpu(calloutp
, timertoticks(kn
->kn_sdata
),
537 filt_timerexpire
, kn
);
542 /* XXX - move to kern_timeout.c? */
544 filt_timerdetach(struct knote
*kn
)
546 struct callout
*calloutp
;
548 calloutp
= (struct callout
*)kn
->kn_hook
;
549 callout_drain(calloutp
);
550 FREE(calloutp
, M_KQUEUE
);
551 atomic_add_int(&kq_ncallouts
, -1);
552 kn
->kn_status
|= KN_DETACHED
; /* knlist_remove usually clears it */
555 /* XXX - move to kern_timeout.c? */
557 filt_timer(struct knote
*kn
, long hint
)
560 return (kn
->kn_data
!= 0);
564 kqueue(struct thread
*td
, struct kqueue_args
*uap
)
566 struct filedesc
*fdp
;
571 fdp
= td
->td_proc
->p_fd
;
572 error
= falloc(td
, &fp
, &fd
);
576 /* An extra reference on `nfp' has been held for us by falloc(). */
577 kq
= malloc(sizeof *kq
, M_KQUEUE
, M_WAITOK
| M_ZERO
);
578 mtx_init(&kq
->kq_lock
, "kqueue", NULL
, MTX_DEF
|MTX_DUPOK
);
579 TAILQ_INIT(&kq
->kq_head
);
581 knlist_init(&kq
->kq_sel
.si_note
, &kq
->kq_lock
, NULL
, NULL
, NULL
);
582 TASK_INIT(&kq
->kq_task
, 0, kqueue_task
, kq
);
585 SLIST_INSERT_HEAD(&fdp
->fd_kqlist
, kq
, kq_list
);
586 FILEDESC_XUNLOCK(fdp
);
588 finit(fp
, FREAD
| FWRITE
, DTYPE_KQUEUE
, kq
, &kqueueops
);
591 td
->td_retval
[0] = fd
;
596 #ifndef _SYS_SYSPROTO_H_
599 const struct kevent
*changelist
;
601 struct kevent
*eventlist
;
603 const struct timespec
*timeout
;
607 kevent(struct thread
*td
, struct kevent_args
*uap
)
609 struct timespec ts
, *tsp
;
610 struct kevent_copyops k_ops
= { uap
,
617 struct uio
*ktruioin
= NULL
;
618 struct uio
*ktruioout
= NULL
;
621 if (uap
->timeout
!= NULL
) {
622 error
= copyin(uap
->timeout
, &ts
, sizeof(ts
));
630 if (KTRPOINT(td
, KTR_GENIO
)) {
631 ktriov
.iov_base
= uap
->changelist
;
632 ktriov
.iov_len
= uap
->nchanges
* sizeof(struct kevent
);
633 ktruio
= (struct uio
){ .uio_iov
= &ktriov
, .uio_iovcnt
= 1,
634 .uio_segflg
= UIO_USERSPACE
, .uio_rw
= UIO_READ
,
636 ktruioin
= cloneuio(&ktruio
);
637 ktriov
.iov_base
= uap
->eventlist
;
638 ktriov
.iov_len
= uap
->nevents
* sizeof(struct kevent
);
639 ktruioout
= cloneuio(&ktruio
);
643 error
= kern_kevent(td
, uap
->fd
, uap
->nchanges
, uap
->nevents
,
647 if (ktruioin
!= NULL
) {
648 ktruioin
->uio_resid
= uap
->nchanges
* sizeof(struct kevent
);
649 ktrgenio(uap
->fd
, UIO_WRITE
, ktruioin
, 0);
650 ktruioout
->uio_resid
= td
->td_retval
[0] * sizeof(struct kevent
);
651 ktrgenio(uap
->fd
, UIO_READ
, ktruioout
, error
);
659 * Copy 'count' items into the destination list pointed to by uap->eventlist.
662 kevent_copyout(void *arg
, struct kevent
*kevp
, int count
)
664 struct kevent_args
*uap
;
667 KASSERT(count
<= KQ_NEVENTS
, ("count (%d) > KQ_NEVENTS", count
));
668 uap
= (struct kevent_args
*)arg
;
670 error
= copyout(kevp
, uap
->eventlist
, count
* sizeof *kevp
);
672 uap
->eventlist
+= count
;
677 * Copy 'count' items from the list pointed to by uap->changelist.
680 kevent_copyin(void *arg
, struct kevent
*kevp
, int count
)
682 struct kevent_args
*uap
;
685 KASSERT(count
<= KQ_NEVENTS
, ("count (%d) > KQ_NEVENTS", count
));
686 uap
= (struct kevent_args
*)arg
;
688 error
= copyin(uap
->changelist
, kevp
, count
* sizeof *kevp
);
690 uap
->changelist
+= count
;
695 kern_kevent(struct thread
*td
, int fd
, int nchanges
, int nevents
,
696 struct kevent_copyops
*k_ops
, const struct timespec
*timeout
)
698 struct kevent keva
[KQ_NEVENTS
];
699 struct kevent
*kevp
, *changes
;
702 int i
, n
, nerrors
, error
;
704 if ((error
= fget(td
, fd
, &fp
)) != 0)
706 if ((error
= kqueue_acquire(fp
, &kq
)) != 0)
711 while (nchanges
> 0) {
712 n
= nchanges
> KQ_NEVENTS
? KQ_NEVENTS
: nchanges
;
713 error
= k_ops
->k_copyin(k_ops
->arg
, keva
, n
);
717 for (i
= 0; i
< n
; i
++) {
721 kevp
->flags
&= ~EV_SYSFLAGS
;
722 error
= kqueue_register(kq
, kevp
, td
, 1);
725 kevp
->flags
= EV_ERROR
;
727 (void) k_ops
->k_copyout(k_ops
->arg
,
739 td
->td_retval
[0] = nerrors
;
744 error
= kqueue_scan(kq
, nevents
, k_ops
, timeout
, keva
, td
);
746 kqueue_release(kq
, 0);
753 kqueue_add_filteropts(int filt
, struct filterops
*filtops
)
757 if (filt
> 0 || filt
+ EVFILT_SYSCOUNT
< 0) {
759 "trying to add a filterop that is out of range: %d is beyond %d\n",
760 ~filt
, EVFILT_SYSCOUNT
);
763 mtx_lock(&filterops_lock
);
764 if (sysfilt_ops
[~filt
].for_fop
!= &null_filtops
&&
765 sysfilt_ops
[~filt
].for_fop
!= NULL
)
768 sysfilt_ops
[~filt
].for_fop
= filtops
;
769 sysfilt_ops
[~filt
].for_refcnt
= 0;
771 mtx_unlock(&filterops_lock
);
777 kqueue_del_filteropts(int filt
)
782 if (filt
> 0 || filt
+ EVFILT_SYSCOUNT
< 0)
785 mtx_lock(&filterops_lock
);
786 if (sysfilt_ops
[~filt
].for_fop
== &null_filtops
||
787 sysfilt_ops
[~filt
].for_fop
== NULL
)
789 else if (sysfilt_ops
[~filt
].for_refcnt
!= 0)
792 sysfilt_ops
[~filt
].for_fop
= &null_filtops
;
793 sysfilt_ops
[~filt
].for_refcnt
= 0;
795 mtx_unlock(&filterops_lock
);
800 static struct filterops
*
801 kqueue_fo_find(int filt
)
804 if (filt
> 0 || filt
+ EVFILT_SYSCOUNT
< 0)
807 mtx_lock(&filterops_lock
);
808 sysfilt_ops
[~filt
].for_refcnt
++;
809 if (sysfilt_ops
[~filt
].for_fop
== NULL
)
810 sysfilt_ops
[~filt
].for_fop
= &null_filtops
;
811 mtx_unlock(&filterops_lock
);
813 return sysfilt_ops
[~filt
].for_fop
;
817 kqueue_fo_release(int filt
)
820 if (filt
> 0 || filt
+ EVFILT_SYSCOUNT
< 0)
823 mtx_lock(&filterops_lock
);
824 KASSERT(sysfilt_ops
[~filt
].for_refcnt
> 0,
825 ("filter object refcount not valid on release"));
826 sysfilt_ops
[~filt
].for_refcnt
--;
827 mtx_unlock(&filterops_lock
);
831 * A ref to kq (obtained via kqueue_acquire) must be held. waitok will
832 * influence if memory allocation should wait. Make sure it is 0 if you
836 kqueue_register(struct kqueue
*kq
, struct kevent
*kev
, struct thread
*td
, int waitok
)
838 struct filterops
*fops
;
840 struct knote
*kn
, *tkn
;
841 int error
, filt
, event
;
850 fops
= kqueue_fo_find(filt
);
854 tkn
= knote_alloc(waitok
); /* prevent waiting with locks */
858 KASSERT(td
!= NULL
, ("td is NULL"));
859 error
= fget(td
, kev
->ident
, &fp
);
863 if ((kev
->flags
& EV_ADD
) == EV_ADD
&& kqueue_expand(kq
, fops
,
864 kev
->ident
, 0) != 0) {
868 error
= kqueue_expand(kq
, fops
, kev
->ident
, waitok
);
874 if (fp
->f_type
== DTYPE_KQUEUE
) {
876 * if we add some inteligence about what we are doing,
877 * we should be able to support events on ourselves.
878 * We need to know when we are doing this to prevent
879 * getting both the knlist lock and the kq lock since
880 * they are the same thing.
882 if (fp
->f_data
== kq
) {
887 KQ_GLOBAL_LOCK(&kq_global
, haskqglobal
);
891 if (kev
->ident
< kq
->kq_knlistsize
) {
892 SLIST_FOREACH(kn
, &kq
->kq_knlist
[kev
->ident
], kn_link
)
893 if (kev
->filter
== kn
->kn_filter
)
897 if ((kev
->flags
& EV_ADD
) == EV_ADD
)
898 kqueue_expand(kq
, fops
, kev
->ident
, waitok
);
901 if (kq
->kq_knhashmask
!= 0) {
904 list
= &kq
->kq_knhash
[
905 KN_HASH((u_long
)kev
->ident
, kq
->kq_knhashmask
)];
906 SLIST_FOREACH(kn
, list
, kn_link
)
907 if (kev
->ident
== kn
->kn_id
&&
908 kev
->filter
== kn
->kn_filter
)
913 /* knote is in the process of changing, wait for it to stablize. */
914 if (kn
!= NULL
&& (kn
->kn_status
& KN_INFLUX
) == KN_INFLUX
) {
919 KQ_GLOBAL_UNLOCK(&kq_global
, haskqglobal
);
920 kq
->kq_state
|= KQ_FLUXWAIT
;
921 msleep(kq
, &kq
->kq_lock
, PSOCK
| PDROP
, "kqflxwt", 0);
925 if (kn
== NULL
&& ((kev
->flags
& EV_ADD
) == 0)) {
932 * kn now contains the matching knote, or NULL if no match
934 if (kev
->flags
& EV_ADD
) {
947 * apply reference counts to knote structure, and
948 * do not release it at the end of this routine.
953 kn
->kn_sfflags
= kev
->fflags
;
954 kn
->kn_sdata
= kev
->data
;
957 kn
->kn_kevent
= *kev
;
958 kn
->kn_kevent
.flags
&= ~(EV_ADD
| EV_DELETE
|
959 EV_ENABLE
| EV_DISABLE
);
960 kn
->kn_status
= KN_INFLUX
|KN_DETACHED
;
962 error
= knote_attach(kn
, kq
);
969 if ((error
= kn
->kn_fop
->f_attach(kn
)) != 0) {
976 * The user may change some filter values after the
977 * initial EV_ADD, but doing so will not reset any
978 * filter which has already been triggered.
980 kn
->kn_status
|= KN_INFLUX
;
983 kn
->kn_sfflags
= kev
->fflags
;
984 kn
->kn_sdata
= kev
->data
;
985 kn
->kn_kevent
.udata
= kev
->udata
;
989 * We can get here with kn->kn_knlist == NULL.
990 * This can happen when the initial attach event decides that
991 * the event is "completed" already. i.e. filt_procattach
992 * is called on a zombie process. It will call filt_proc
993 * which will remove it from the list, and NULL kn_knlist.
995 event
= kn
->kn_fop
->f_event(kn
, 0);
998 KNOTE_ACTIVATE(kn
, 1);
999 kn
->kn_status
&= ~KN_INFLUX
;
1001 } else if (kev
->flags
& EV_DELETE
) {
1002 kn
->kn_status
|= KN_INFLUX
;
1004 if (!(kn
->kn_status
& KN_DETACHED
))
1005 kn
->kn_fop
->f_detach(kn
);
1010 if ((kev
->flags
& EV_DISABLE
) &&
1011 ((kn
->kn_status
& KN_DISABLED
) == 0)) {
1012 kn
->kn_status
|= KN_DISABLED
;
1015 if ((kev
->flags
& EV_ENABLE
) && (kn
->kn_status
& KN_DISABLED
)) {
1016 kn
->kn_status
&= ~KN_DISABLED
;
1017 if ((kn
->kn_status
& KN_ACTIVE
) &&
1018 ((kn
->kn_status
& KN_QUEUED
) == 0))
1024 KQ_GLOBAL_UNLOCK(&kq_global
, haskqglobal
);
1030 kqueue_fo_release(filt
);
1035 kqueue_acquire(struct file
*fp
, struct kqueue
**kqp
)
1043 if (fp
->f_type
!= DTYPE_KQUEUE
|| kq
== NULL
)
1047 if ((kq
->kq_state
& KQ_CLOSING
) == KQ_CLOSING
) {
1058 kqueue_release(struct kqueue
*kq
, int locked
)
1065 if (kq
->kq_refcnt
== 1)
1066 wakeup(&kq
->kq_refcnt
);
1072 kqueue_schedtask(struct kqueue
*kq
)
1076 KASSERT(((kq
->kq_state
& KQ_TASKDRAIN
) != KQ_TASKDRAIN
),
1077 ("scheduling kqueue task while draining"));
1079 if ((kq
->kq_state
& KQ_TASKSCHED
) != KQ_TASKSCHED
) {
1080 taskqueue_enqueue(taskqueue_kqueue
, &kq
->kq_task
);
1081 kq
->kq_state
|= KQ_TASKSCHED
;
1086 * Expand the kq to make sure we have storage for fops/ident pair.
1088 * Return 0 on success (or no work necessary), return errno on failure.
1090 * Not calling hashinit w/ waitok (proper malloc flag) should be safe.
1091 * If kqueue_register is called from a non-fd context, there usually/should
1095 kqueue_expand(struct kqueue
*kq
, struct filterops
*fops
, uintptr_t ident
,
1098 struct klist
*list
, *tmp_knhash
;
1099 u_long tmp_knhashmask
;
1102 int mflag
= waitok
? M_WAITOK
: M_NOWAIT
;
1108 if (kq
->kq_knlistsize
<= fd
) {
1109 size
= kq
->kq_knlistsize
;
1112 MALLOC(list
, struct klist
*,
1113 size
* sizeof list
, M_KQUEUE
, mflag
);
1117 if (kq
->kq_knlistsize
> fd
) {
1118 FREE(list
, M_KQUEUE
);
1121 if (kq
->kq_knlist
!= NULL
) {
1122 bcopy(kq
->kq_knlist
, list
,
1123 kq
->kq_knlistsize
* sizeof list
);
1124 FREE(kq
->kq_knlist
, M_KQUEUE
);
1125 kq
->kq_knlist
= NULL
;
1127 bzero((caddr_t
)list
+
1128 kq
->kq_knlistsize
* sizeof list
,
1129 (size
- kq
->kq_knlistsize
) * sizeof list
);
1130 kq
->kq_knlistsize
= size
;
1131 kq
->kq_knlist
= list
;
1136 if (kq
->kq_knhashmask
== 0) {
1137 tmp_knhash
= hashinit(KN_HASHSIZE
, M_KQUEUE
,
1139 if (tmp_knhash
== NULL
)
1142 if (kq
->kq_knhashmask
== 0) {
1143 kq
->kq_knhash
= tmp_knhash
;
1144 kq
->kq_knhashmask
= tmp_knhashmask
;
1146 free(tmp_knhash
, M_KQUEUE
);
1157 kqueue_task(void *arg
, int pending
)
1165 KQ_GLOBAL_LOCK(&kq_global
, haskqglobal
);
1168 KNOTE_LOCKED(&kq
->kq_sel
.si_note
, 0);
1170 kq
->kq_state
&= ~KQ_TASKSCHED
;
1171 if ((kq
->kq_state
& KQ_TASKDRAIN
) == KQ_TASKDRAIN
) {
1172 wakeup(&kq
->kq_state
);
1175 KQ_GLOBAL_UNLOCK(&kq_global
, haskqglobal
);
1179 * Scan, update kn_data (if not ONESHOT), and copyout triggered events.
1180 * We treat KN_MARKER knotes as if they are INFLUX.
1183 kqueue_scan(struct kqueue
*kq
, int maxevents
, struct kevent_copyops
*k_ops
,
1184 const struct timespec
*tsp
, struct kevent
*keva
, struct thread
*td
)
1186 struct kevent
*kevp
;
1187 struct timeval atv
, rtv
, ttv
;
1188 struct knote
*kn
, *marker
;
1189 int count
, timeout
, nkev
, error
, influx
;
1201 TIMESPEC_TO_TIMEVAL(&atv
, tsp
);
1202 if (itimerfix(&atv
)) {
1206 if (tsp
->tv_sec
== 0 && tsp
->tv_nsec
== 0)
1209 timeout
= atv
.tv_sec
> 24 * 60 * 60 ?
1210 24 * 60 * 60 * hz
: tvtohz(&atv
);
1211 getmicrouptime(&rtv
);
1212 timevaladd(&atv
, &rtv
);
1218 marker
= knote_alloc(1);
1219 if (marker
== NULL
) {
1223 marker
->kn_status
= KN_MARKER
;
1228 if (atv
.tv_sec
|| atv
.tv_usec
) {
1229 getmicrouptime(&rtv
);
1230 if (timevalcmp(&rtv
, &atv
, >=))
1233 timevalsub(&ttv
, &rtv
);
1234 timeout
= ttv
.tv_sec
> 24 * 60 * 60 ?
1235 24 * 60 * 60 * hz
: tvtohz(&ttv
);
1240 if (kq
->kq_count
== 0) {
1242 error
= EWOULDBLOCK
;
1244 kq
->kq_state
|= KQ_SLEEP
;
1245 error
= msleep(kq
, &kq
->kq_lock
, PSOCK
| PCATCH
,
1250 /* don't restart after signals... */
1251 if (error
== ERESTART
)
1253 else if (error
== EWOULDBLOCK
)
1258 TAILQ_INSERT_TAIL(&kq
->kq_head
, marker
, kn_tqe
);
1262 kn
= TAILQ_FIRST(&kq
->kq_head
);
1264 if ((kn
->kn_status
== KN_MARKER
&& kn
!= marker
) ||
1265 (kn
->kn_status
& KN_INFLUX
) == KN_INFLUX
) {
1270 kq
->kq_state
|= KQ_FLUXWAIT
;
1271 error
= msleep(kq
, &kq
->kq_lock
, PSOCK
,
1276 TAILQ_REMOVE(&kq
->kq_head
, kn
, kn_tqe
);
1277 if ((kn
->kn_status
& KN_DISABLED
) == KN_DISABLED
) {
1278 kn
->kn_status
&= ~KN_QUEUED
;
1284 if (count
== maxevents
)
1288 KASSERT((kn
->kn_status
& KN_INFLUX
) == 0,
1289 ("KN_INFLUX set when not suppose to be"));
1291 if ((kn
->kn_flags
& EV_ONESHOT
) == EV_ONESHOT
) {
1292 kn
->kn_status
&= ~KN_QUEUED
;
1293 kn
->kn_status
|= KN_INFLUX
;
1297 * We don't need to lock the list since we've marked
1300 *kevp
= kn
->kn_kevent
;
1301 if (!(kn
->kn_status
& KN_DETACHED
))
1302 kn
->kn_fop
->f_detach(kn
);
1307 kn
->kn_status
|= KN_INFLUX
;
1309 if ((kn
->kn_status
& KN_KQUEUE
) == KN_KQUEUE
)
1310 KQ_GLOBAL_LOCK(&kq_global
, haskqglobal
);
1312 if (kn
->kn_fop
->f_event(kn
, 0) == 0) {
1314 KQ_GLOBAL_UNLOCK(&kq_global
, haskqglobal
);
1316 ~(KN_QUEUED
| KN_ACTIVE
| KN_INFLUX
);
1322 *kevp
= kn
->kn_kevent
;
1324 KQ_GLOBAL_UNLOCK(&kq_global
, haskqglobal
);
1325 if (kn
->kn_flags
& EV_CLEAR
) {
1328 kn
->kn_status
&= ~(KN_QUEUED
| KN_ACTIVE
);
1331 TAILQ_INSERT_TAIL(&kq
->kq_head
, kn
, kn_tqe
);
1333 kn
->kn_status
&= ~(KN_INFLUX
);
1338 /* we are returning a copy to the user */
1343 if (nkev
== KQ_NEVENTS
) {
1346 error
= k_ops
->k_copyout(k_ops
->arg
, keva
, nkev
);
1354 TAILQ_REMOVE(&kq
->kq_head
, marker
, kn_tqe
);
1362 error
= k_ops
->k_copyout(k_ops
->arg
, keva
, nkev
);
1363 td
->td_retval
[0] = maxevents
- count
;
1369 * This could be expanded to call kqueue_scan, if desired.
1373 kqueue_read(struct file
*fp
, struct uio
*uio
, struct ucred
*active_cred
,
1374 int flags
, struct thread
*td
)
1381 kqueue_write(struct file
*fp
, struct uio
*uio
, struct ucred
*active_cred
,
1382 int flags
, struct thread
*td
)
1389 kqueue_truncate(struct file
*fp
, off_t length
, struct ucred
*active_cred
,
1398 kqueue_ioctl(struct file
*fp
, u_long cmd
, void *data
,
1399 struct ucred
*active_cred
, struct thread
*td
)
1402 * Enabling sigio causes two major problems:
1403 * 1) infinite recursion:
1404 * Synopsys: kevent is being used to track signals and have FIOASYNC
1405 * set. On receipt of a signal this will cause a kqueue to recurse
1406 * into itself over and over. Sending the sigio causes the kqueue
1407 * to become ready, which in turn posts sigio again, forever.
1408 * Solution: this can be solved by setting a flag in the kqueue that
1409 * we have a SIGIO in progress.
1410 * 2) locking problems:
1411 * Synopsys: Kqueue is a leaf subsystem, but adding signalling puts
1412 * us above the proc and pgrp locks.
1413 * Solution: Post a signal using an async mechanism, being sure to
1414 * record a generation count in the delivery so that we do not deliver
1415 * a signal to the wrong process.
1417 * Note, these two mechanisms are somewhat mutually exclusive!
1426 kq
->kq_state
|= KQ_ASYNC
;
1428 kq
->kq_state
&= ~KQ_ASYNC
;
1433 return (fsetown(*(int *)data
, &kq
->kq_sigio
));
1436 *(int *)data
= fgetown(&kq
->kq_sigio
);
1446 kqueue_poll(struct file
*fp
, int events
, struct ucred
*active_cred
,
1453 if ((error
= kqueue_acquire(fp
, &kq
)))
1457 if (events
& (POLLIN
| POLLRDNORM
)) {
1459 revents
|= events
& (POLLIN
| POLLRDNORM
);
1461 selrecord(td
, &kq
->kq_sel
);
1462 if (SEL_WAITING(&kq
->kq_sel
))
1463 kq
->kq_state
|= KQ_SEL
;
1466 kqueue_release(kq
, 1);
1473 kqueue_stat(struct file
*fp
, struct stat
*st
, struct ucred
*active_cred
,
1477 bzero((void *)st
, sizeof *st
);
1479 * We no longer return kq_count because the unlocked value is useless.
1480 * If you spent all this time getting the count, why not spend your
1481 * syscall better by calling kevent?
1483 * XXX - This is needed for libc_r.
1485 st
->st_mode
= S_IFIFO
;
1491 kqueue_close(struct file
*fp
, struct thread
*td
)
1493 struct kqueue
*kq
= fp
->f_data
;
1494 struct filedesc
*fdp
;
1499 if ((error
= kqueue_acquire(fp
, &kq
)))
1504 KASSERT((kq
->kq_state
& KQ_CLOSING
) != KQ_CLOSING
,
1505 ("kqueue already closing"));
1506 kq
->kq_state
|= KQ_CLOSING
;
1507 if (kq
->kq_refcnt
> 1)
1508 msleep(&kq
->kq_refcnt
, &kq
->kq_lock
, PSOCK
, "kqclose", 0);
1510 KASSERT(kq
->kq_refcnt
== 1, ("other refs are out there!"));
1513 KASSERT(knlist_empty(&kq
->kq_sel
.si_note
),
1514 ("kqueue's knlist not empty"));
1516 for (i
= 0; i
< kq
->kq_knlistsize
; i
++) {
1517 while ((kn
= SLIST_FIRST(&kq
->kq_knlist
[i
])) != NULL
) {
1518 if ((kn
->kn_status
& KN_INFLUX
) == KN_INFLUX
) {
1519 kq
->kq_state
|= KQ_FLUXWAIT
;
1520 msleep(kq
, &kq
->kq_lock
, PSOCK
, "kqclo1", 0);
1523 kn
->kn_status
|= KN_INFLUX
;
1525 if (!(kn
->kn_status
& KN_DETACHED
))
1526 kn
->kn_fop
->f_detach(kn
);
1531 if (kq
->kq_knhashmask
!= 0) {
1532 for (i
= 0; i
<= kq
->kq_knhashmask
; i
++) {
1533 while ((kn
= SLIST_FIRST(&kq
->kq_knhash
[i
])) != NULL
) {
1534 if ((kn
->kn_status
& KN_INFLUX
) == KN_INFLUX
) {
1535 kq
->kq_state
|= KQ_FLUXWAIT
;
1536 msleep(kq
, &kq
->kq_lock
, PSOCK
,
1540 kn
->kn_status
|= KN_INFLUX
;
1542 if (!(kn
->kn_status
& KN_DETACHED
))
1543 kn
->kn_fop
->f_detach(kn
);
1550 if ((kq
->kq_state
& KQ_TASKSCHED
) == KQ_TASKSCHED
) {
1551 kq
->kq_state
|= KQ_TASKDRAIN
;
1552 msleep(&kq
->kq_state
, &kq
->kq_lock
, PSOCK
, "kqtqdr", 0);
1555 if ((kq
->kq_state
& KQ_SEL
) == KQ_SEL
) {
1556 selwakeuppri(&kq
->kq_sel
, PSOCK
);
1557 if (!SEL_WAITING(&kq
->kq_sel
))
1558 kq
->kq_state
&= ~KQ_SEL
;
1563 FILEDESC_XLOCK(fdp
);
1564 SLIST_REMOVE(&fdp
->fd_kqlist
, kq
, kqueue
, kq_list
);
1565 FILEDESC_XUNLOCK(fdp
);
1567 knlist_destroy(&kq
->kq_sel
.si_note
);
1568 mtx_destroy(&kq
->kq_lock
);
1571 if (kq
->kq_knhash
!= NULL
)
1572 free(kq
->kq_knhash
, M_KQUEUE
);
1573 if (kq
->kq_knlist
!= NULL
)
1574 free(kq
->kq_knlist
, M_KQUEUE
);
1576 funsetown(&kq
->kq_sigio
);
1584 kqueue_wakeup(struct kqueue
*kq
)
1588 if ((kq
->kq_state
& KQ_SLEEP
) == KQ_SLEEP
) {
1589 kq
->kq_state
&= ~KQ_SLEEP
;
1592 if ((kq
->kq_state
& KQ_SEL
) == KQ_SEL
) {
1593 selwakeuppri(&kq
->kq_sel
, PSOCK
);
1594 if (!SEL_WAITING(&kq
->kq_sel
))
1595 kq
->kq_state
&= ~KQ_SEL
;
1597 if (!knlist_empty(&kq
->kq_sel
.si_note
))
1598 kqueue_schedtask(kq
);
1599 if ((kq
->kq_state
& KQ_ASYNC
) == KQ_ASYNC
) {
1600 pgsigio(&kq
->kq_sigio
, SIGIO
, 0);
1605 * Walk down a list of knotes, activating them if their event has triggered.
1607 * There is a possibility to optimize in the case of one kq watching another.
1608 * Instead of scheduling a task to wake it up, you could pass enough state
1609 * down the chain to make up the parent kqueue. Make this code functional
1613 knote(struct knlist
*list
, long hint
, int islocked
)
1621 KNL_ASSERT_LOCK(list
, islocked
);
1624 list
->kl_lock(list
->kl_lockarg
);
1627 * If we unlock the list lock (and set KN_INFLUX), we can eliminate
1628 * the kqueue scheduling, but this will introduce four
1629 * lock/unlock's for each knote to test. If we do, continue to use
1630 * SLIST_FOREACH, SLIST_FOREACH_SAFE is not safe in our case, it is
1631 * only safe if you want to remove the current item, which we are
1634 SLIST_FOREACH(kn
, &list
->kl_list
, kn_selnext
) {
1636 if ((kn
->kn_status
& KN_INFLUX
) != KN_INFLUX
) {
1638 if ((kn
->kn_status
& KN_INFLUX
) != KN_INFLUX
) {
1639 kn
->kn_status
|= KN_HASKQLOCK
;
1640 if (kn
->kn_fop
->f_event(kn
, hint
))
1641 KNOTE_ACTIVATE(kn
, 1);
1642 kn
->kn_status
&= ~KN_HASKQLOCK
;
1649 list
->kl_unlock(list
->kl_lockarg
);
1653 * add a knote to a knlist
1656 knlist_add(struct knlist
*knl
, struct knote
*kn
, int islocked
)
1658 KNL_ASSERT_LOCK(knl
, islocked
);
1659 KQ_NOTOWNED(kn
->kn_kq
);
1660 KASSERT((kn
->kn_status
& (KN_INFLUX
|KN_DETACHED
)) ==
1661 (KN_INFLUX
|KN_DETACHED
), ("knote not KN_INFLUX and KN_DETACHED"));
1663 knl
->kl_lock(knl
->kl_lockarg
);
1664 SLIST_INSERT_HEAD(&knl
->kl_list
, kn
, kn_selnext
);
1666 knl
->kl_unlock(knl
->kl_lockarg
);
1668 kn
->kn_knlist
= knl
;
1669 kn
->kn_status
&= ~KN_DETACHED
;
1670 KQ_UNLOCK(kn
->kn_kq
);
1674 knlist_remove_kq(struct knlist
*knl
, struct knote
*kn
, int knlislocked
, int kqislocked
)
1676 KASSERT(!(!!kqislocked
&& !knlislocked
), ("kq locked w/o knl locked"));
1677 KNL_ASSERT_LOCK(knl
, knlislocked
);
1678 mtx_assert(&kn
->kn_kq
->kq_lock
, kqislocked
? MA_OWNED
: MA_NOTOWNED
);
1680 KASSERT((kn
->kn_status
& (KN_INFLUX
|KN_DETACHED
)) == KN_INFLUX
,
1681 ("knlist_remove called w/o knote being KN_INFLUX or already removed"));
1683 knl
->kl_lock(knl
->kl_lockarg
);
1684 SLIST_REMOVE(&knl
->kl_list
, kn
, knote
, kn_selnext
);
1685 kn
->kn_knlist
= NULL
;
1687 knl
->kl_unlock(knl
->kl_lockarg
);
1690 kn
->kn_status
|= KN_DETACHED
;
1692 KQ_UNLOCK(kn
->kn_kq
);
1696 * remove all knotes from a specified klist
1699 knlist_remove(struct knlist
*knl
, struct knote
*kn
, int islocked
)
1702 knlist_remove_kq(knl
, kn
, islocked
, 0);
1706 * remove knote from a specified klist while in f_event handler.
1709 knlist_remove_inevent(struct knlist
*knl
, struct knote
*kn
)
1712 knlist_remove_kq(knl
, kn
, 1,
1713 (kn
->kn_status
& KN_HASKQLOCK
) == KN_HASKQLOCK
);
1717 knlist_empty(struct knlist
*knl
)
1719 KNL_ASSERT_LOCKED(knl
);
1720 return SLIST_EMPTY(&knl
->kl_list
);
1723 static struct mtx knlist_lock
;
1724 MTX_SYSINIT(knlist_lock
, &knlist_lock
, "knlist lock for lockless objects",
1726 static void knlist_mtx_lock(void *arg
);
1727 static void knlist_mtx_unlock(void *arg
);
1728 static int knlist_mtx_locked(void *arg
);
1731 knlist_mtx_lock(void *arg
)
1733 mtx_lock((struct mtx
*)arg
);
1737 knlist_mtx_unlock(void *arg
)
1739 mtx_unlock((struct mtx
*)arg
);
1743 knlist_mtx_locked(void *arg
)
1745 return (mtx_owned((struct mtx
*)arg
));
1749 knlist_init(struct knlist
*knl
, void *lock
, void (*kl_lock
)(void *),
1750 void (*kl_unlock
)(void *), int (*kl_locked
)(void *))
1754 knl
->kl_lockarg
= &knlist_lock
;
1756 knl
->kl_lockarg
= lock
;
1758 if (kl_lock
== NULL
)
1759 knl
->kl_lock
= knlist_mtx_lock
;
1761 knl
->kl_lock
= kl_lock
;
1762 if (kl_unlock
== NULL
)
1763 knl
->kl_unlock
= knlist_mtx_unlock
;
1765 knl
->kl_unlock
= kl_unlock
;
1766 if (kl_locked
== NULL
)
1767 knl
->kl_locked
= knlist_mtx_locked
;
1769 knl
->kl_locked
= kl_locked
;
1771 SLIST_INIT(&knl
->kl_list
);
1775 knlist_destroy(struct knlist
*knl
)
1780 * if we run across this error, we need to find the offending
1781 * driver and have it call knlist_clear.
1783 if (!SLIST_EMPTY(&knl
->kl_list
))
1784 printf("WARNING: destroying knlist w/ knotes on it!\n");
1787 knl
->kl_lockarg
= knl
->kl_lock
= knl
->kl_unlock
= NULL
;
1788 SLIST_INIT(&knl
->kl_list
);
1792 * Even if we are locked, we may need to drop the lock to allow any influx
1793 * knotes time to "settle".
1796 knlist_cleardel(struct knlist
*knl
, struct thread
*td
, int islocked
, int killkn
)
1798 struct knote
*kn
, *kn2
;
1802 KNL_ASSERT_LOCKED(knl
);
1804 KNL_ASSERT_UNLOCKED(knl
);
1805 again
: /* need to reacquire lock since we have dropped it */
1806 knl
->kl_lock(knl
->kl_lockarg
);
1809 SLIST_FOREACH_SAFE(kn
, &knl
->kl_list
, kn_selnext
, kn2
) {
1812 if ((kn
->kn_status
& KN_INFLUX
)) {
1816 knlist_remove_kq(knl
, kn
, 1, 1);
1818 kn
->kn_status
|= KN_INFLUX
| KN_DETACHED
;
1822 /* Make sure cleared knotes disappear soon */
1823 kn
->kn_flags
|= (EV_EOF
| EV_ONESHOT
);
1829 if (!SLIST_EMPTY(&knl
->kl_list
)) {
1830 /* there are still KN_INFLUX remaining */
1831 kn
= SLIST_FIRST(&knl
->kl_list
);
1834 KASSERT(kn
->kn_status
& KN_INFLUX
,
1835 ("knote removed w/o list lock"));
1836 knl
->kl_unlock(knl
->kl_lockarg
);
1837 kq
->kq_state
|= KQ_FLUXWAIT
;
1838 msleep(kq
, &kq
->kq_lock
, PSOCK
| PDROP
, "kqkclr", 0);
1844 KNL_ASSERT_LOCKED(knl
);
1846 knl
->kl_unlock(knl
->kl_lockarg
);
1847 KNL_ASSERT_UNLOCKED(knl
);
1852 * Remove all knotes referencing a specified fd must be called with FILEDESC
1853 * lock. This prevents a race where a new fd comes along and occupies the
1854 * entry and we attach a knote to the fd.
1857 knote_fdclose(struct thread
*td
, int fd
)
1859 struct filedesc
*fdp
= td
->td_proc
->p_fd
;
1864 FILEDESC_XLOCK_ASSERT(fdp
);
1867 * We shouldn't have to worry about new kevents appearing on fd
1868 * since filedesc is locked.
1870 SLIST_FOREACH(kq
, &fdp
->fd_kqlist
, kq_list
) {
1875 while (kq
->kq_knlistsize
> fd
&&
1876 (kn
= SLIST_FIRST(&kq
->kq_knlist
[fd
])) != NULL
) {
1877 if (kn
->kn_status
& KN_INFLUX
) {
1878 /* someone else might be waiting on our knote */
1881 kq
->kq_state
|= KQ_FLUXWAIT
;
1882 msleep(kq
, &kq
->kq_lock
, PSOCK
, "kqflxwt", 0);
1885 kn
->kn_status
|= KN_INFLUX
;
1887 if (!(kn
->kn_status
& KN_DETACHED
))
1888 kn
->kn_fop
->f_detach(kn
);
1898 knote_attach(struct knote
*kn
, struct kqueue
*kq
)
1902 KASSERT(kn
->kn_status
& KN_INFLUX
, ("knote not marked INFLUX"));
1905 if (kn
->kn_fop
->f_isfd
) {
1906 if (kn
->kn_id
>= kq
->kq_knlistsize
)
1908 list
= &kq
->kq_knlist
[kn
->kn_id
];
1910 if (kq
->kq_knhash
== NULL
)
1912 list
= &kq
->kq_knhash
[KN_HASH(kn
->kn_id
, kq
->kq_knhashmask
)];
1915 SLIST_INSERT_HEAD(list
, kn
, kn_link
);
1921 * knote must already have been detached using the f_detach method.
1922 * no lock need to be held, it is assumed that the KN_INFLUX flag is set
1923 * to prevent other removal.
1926 knote_drop(struct knote
*kn
, struct thread
*td
)
1934 KASSERT((kn
->kn_status
& KN_INFLUX
) == KN_INFLUX
,
1935 ("knote_drop called without KN_INFLUX set in kn_status"));
1938 if (kn
->kn_fop
->f_isfd
)
1939 list
= &kq
->kq_knlist
[kn
->kn_id
];
1941 list
= &kq
->kq_knhash
[KN_HASH(kn
->kn_id
, kq
->kq_knhashmask
)];
1943 if (!SLIST_EMPTY(list
))
1944 SLIST_REMOVE(list
, kn
, knote
, kn_link
);
1945 if (kn
->kn_status
& KN_QUEUED
)
1949 if (kn
->kn_fop
->f_isfd
) {
1950 fdrop(kn
->kn_fp
, td
);
1953 kqueue_fo_release(kn
->kn_kevent
.filter
);
1959 knote_enqueue(struct knote
*kn
)
1961 struct kqueue
*kq
= kn
->kn_kq
;
1963 KQ_OWNED(kn
->kn_kq
);
1964 KASSERT((kn
->kn_status
& KN_QUEUED
) == 0, ("knote already queued"));
1966 TAILQ_INSERT_TAIL(&kq
->kq_head
, kn
, kn_tqe
);
1967 kn
->kn_status
|= KN_QUEUED
;
1973 knote_dequeue(struct knote
*kn
)
1975 struct kqueue
*kq
= kn
->kn_kq
;
1977 KQ_OWNED(kn
->kn_kq
);
1978 KASSERT(kn
->kn_status
& KN_QUEUED
, ("knote not queued"));
1980 TAILQ_REMOVE(&kq
->kq_head
, kn
, kn_tqe
);
1981 kn
->kn_status
&= ~KN_QUEUED
;
1989 knote_zone
= uma_zcreate("KNOTE", sizeof(struct knote
), NULL
, NULL
,
1990 NULL
, NULL
, UMA_ALIGN_PTR
, 0);
1992 SYSINIT(knote
, SI_SUB_PSEUDO
, SI_ORDER_ANY
, knote_init
, NULL
);
1994 static struct knote
*
1995 knote_alloc(int waitok
)
1997 return ((struct knote
*)uma_zalloc(knote_zone
,
1998 (waitok
? M_WAITOK
: M_NOWAIT
)|M_ZERO
));
2002 knote_free(struct knote
*kn
)
2005 uma_zfree(knote_zone
, kn
);
2009 * Register the kev w/ the kq specified by fd.
2012 kqfd_register(int fd
, struct kevent
*kev
, struct thread
*td
, int waitok
)
2018 if ((error
= fget(td
, fd
, &fp
)) != 0)
2020 if ((error
= kqueue_acquire(fp
, &kq
)) != 0)
2023 error
= kqueue_register(kq
, kev
, td
, waitok
);
2025 kqueue_release(kq
, 0);