2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2011 NetApp, Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * Copyright 2018 Joyent, Inc.
33 * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
37 * Micro event library for FreeBSD, designed for a single i/o thread
38 * using kqueue, and having events be persistent by default.
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
45 #ifndef WITHOUT_CAPSICUM
46 #include <capsicum_helpers.h>
57 #include <sys/types.h>
58 #ifndef WITHOUT_CAPSICUM
59 #include <sys/capsicum.h>
62 #include <sys/event.h>
66 #include <sys/siginfo.h>
67 #include <sys/queue.h>
68 #include <sys/debug.h>
74 #include <pthread_np.h>
81 #define EV_ENABLE 0x01
82 #define EV_ADD EV_ENABLE
83 #define EV_DISABLE 0x02
84 #define EV_DELETE 0x04
86 static int mevent_file_poll_interval_ms
= 5000;
89 static pthread_t mevent_tid
;
90 static pthread_once_t mevent_once
= PTHREAD_ONCE_INIT
;
92 static int mevent_timid
= 43;
94 static int mevent_pipefd
[2];
96 static pthread_mutex_t mevent_lmutex
= PTHREAD_MUTEX_INITIALIZER
;
99 void (*me_func
)(int, enum ev_type
, void *);
100 #define me_msecs me_fd
107 enum ev_type me_type
;
110 int me_state
; /* Desired kevent flags. */
114 port_notify_t me_notify
;
115 struct sigevent me_sigev
;
116 boolean_t me_auto_requeue
;
120 void (*mp_func
)(int, enum ev_type
, void *);
124 LIST_ENTRY(mevent
) me_list
;
127 static LIST_HEAD(listhead
, mevent
) global_head
, change_head
;
132 pthread_mutex_lock(&mevent_lmutex
);
138 pthread_mutex_unlock(&mevent_lmutex
);
142 mevent_pipe_read(int fd
, enum ev_type type __unused
, void *param __unused
)
144 char buf
[MEVENT_MAX
];
148 * Drain the pipe read side. The fd is non-blocking so this is
152 status
= read(fd
, buf
, sizeof(buf
));
153 } while (status
== MEVENT_MAX
);
162 * If calling from outside the i/o thread, write a byte on the
163 * pipe to force the i/o thread to exit the blocking kevent call.
165 if (mevent_pipefd
[1] != 0 && pthread_self() != mevent_tid
) {
166 write(mevent_pipefd
[1], &c
, 1);
173 #ifndef WITHOUT_CAPSICUM
184 #ifndef WITHOUT_CAPSICUM
185 cap_rights_init(&rights
, CAP_KQUEUE
);
186 if (caph_rights_limit(mfd
, &rights
) == -1)
187 errx(EX_OSERR
, "Unable to apply rights for sandbox");
190 LIST_INIT(&change_head
);
191 LIST_INIT(&global_head
);
197 mevent_kq_filter(struct mevent
*mevp
)
203 if (mevp
->me_type
== EVF_READ
)
204 retval
= EVFILT_READ
;
206 if (mevp
->me_type
== EVF_WRITE
)
207 retval
= EVFILT_WRITE
;
209 if (mevp
->me_type
== EVF_TIMER
)
210 retval
= EVFILT_TIMER
;
212 if (mevp
->me_type
== EVF_SIGNAL
)
213 retval
= EVFILT_SIGNAL
;
215 if (mevp
->me_type
== EVF_VNODE
)
216 retval
= EVFILT_VNODE
;
222 mevent_kq_flags(struct mevent
*mevp
)
226 retval
= mevp
->me_state
;
228 if (mevp
->me_type
== EVF_VNODE
)
235 mevent_kq_fflags(struct mevent
*mevp
)
241 switch (mevp
->me_type
) {
243 if ((mevp
->me_fflags
& EVFF_ATTRIB
) != 0)
244 retval
|= NOTE_ATTRIB
;
257 mevent_populate(struct mevent
*mevp
, struct kevent
*kev
)
259 if (mevp
->me_type
== EVF_TIMER
) {
260 kev
->ident
= mevp
->me_timid
;
261 kev
->data
= mevp
->me_msecs
;
263 kev
->ident
= mevp
->me_fd
;
266 kev
->filter
= mevent_kq_filter(mevp
);
267 kev
->flags
= mevent_kq_flags(mevp
);
268 kev
->fflags
= mevent_kq_fflags(mevp
);
273 mevent_build(struct kevent
*kev
)
275 struct mevent
*mevp
, *tmpp
;
282 LIST_FOREACH_SAFE(mevp
, &change_head
, me_list
, tmpp
) {
283 if (mevp
->me_closefd
) {
285 * A close of the file descriptor will remove the
290 assert((mevp
->me_state
& EV_ADD
) == 0);
291 mevent_populate(mevp
, &kev
[i
]);
296 LIST_REMOVE(mevp
, me_list
);
298 if (mevp
->me_state
& EV_DELETE
) {
301 LIST_INSERT_HEAD(&global_head
, mevp
, me_list
);
304 assert(i
< MEVENT_MAX
);
313 mevent_handle(struct kevent
*kev
, int numev
)
318 for (i
= 0; i
< numev
; i
++) {
321 /* XXX check for EV_ERROR ? */
323 (*mevp
->me_func
)(mevp
->me_fd
, mevp
->me_type
, mevp
->me_param
);
327 #else /* __FreeBSD__ */
330 mevent_clarify_state(struct mevent
*mevp
)
332 const int state
= mevp
->me_state
;
334 if ((state
& EV_DELETE
) != 0) {
335 /* All other intents are overriden by delete. */
336 mevp
->me_state
= EV_DELETE
;
341 * Without a distinction between EV_ADD and EV_ENABLE in our emulation,
342 * handling the add-disabled case means eliding the portfs operation
343 * when both flags are present.
345 * This is not a concern for subsequent enable/disable operations, as
346 * mevent_update() toggles the flags properly so they are not left in
349 if (state
== (EV_ENABLE
|EV_DISABLE
)) {
350 mevp
->me_state
= EV_DISABLE
;
358 mevent_poll_file_attrib(int fd
, enum ev_type type
, void *param
)
360 struct mevent
*mevp
= param
;
363 if (fstat(mevp
->me_poll
.mp_fd
, &st
) != 0) {
364 (void) fprintf(stderr
, "%s: fstat(%d) failed: %s\n",
365 __func__
, fd
, strerror(errno
));
370 * The only current consumer of file attribute monitoring is
371 * blockif, which wants to know about size changes.
373 if (mevp
->me_poll
.mp_size
!= st
.st_size
) {
374 mevp
->me_poll
.mp_size
= st
.st_size
;
376 (*mevp
->me_poll
.mp_func
)(mevp
->me_poll
.mp_fd
, EVF_VNODE
,
377 mevp
->me_poll
.mp_param
);
382 mevent_update_one_readwrite(struct mevent
*mevp
)
384 int portfd
= mevp
->me_notify
.portnfy_port
;
386 mevp
->me_auto_requeue
= B_FALSE
;
388 switch (mevp
->me_state
) {
391 const int events
= (mevp
->me_type
== EVF_READ
) ?
394 if (port_associate(portfd
, PORT_SOURCE_FD
, mevp
->me_fd
,
395 events
, mevp
) != 0) {
396 (void) fprintf(stderr
,
397 "port_associate fd %d %p failed: %s\n",
398 mevp
->me_fd
, mevp
, strerror(errno
));
405 * A disable that comes in while an event is being
406 * handled will result in an ENOENT.
408 if (port_dissociate(portfd
, PORT_SOURCE_FD
,
409 mevp
->me_fd
) != 0 && errno
!= ENOENT
) {
410 (void) fprintf(stderr
, "port_dissociate "
411 "portfd %d fd %d mevp %p failed: %s\n",
412 portfd
, mevp
->me_fd
, mevp
, strerror(errno
));
416 (void) fprintf(stderr
, "%s: unhandled state %d\n", __func__
,
423 mevent_update_one_timer(struct mevent
*mevp
)
425 mevp
->me_auto_requeue
= B_TRUE
;
427 switch (mevp
->me_state
) {
430 struct itimerspec it
= { 0 };
432 mevp
->me_sigev
.sigev_notify
= SIGEV_PORT
;
433 mevp
->me_sigev
.sigev_value
.sival_ptr
= &mevp
->me_notify
;
435 if (timer_create(CLOCK_REALTIME
, &mevp
->me_sigev
,
436 &mevp
->me_timid
) != 0) {
437 (void) fprintf(stderr
, "timer_create failed: %s",
442 /* The first timeout */
443 it
.it_value
.tv_sec
= mevp
->me_msecs
/ MILLISEC
;
444 it
.it_value
.tv_nsec
=
445 MSEC2NSEC(mevp
->me_msecs
% MILLISEC
);
446 /* Repeat at the same interval */
447 it
.it_interval
= it
.it_value
;
449 if (timer_settime(mevp
->me_timid
, 0, &it
, NULL
) != 0) {
450 (void) fprintf(stderr
, "timer_settime failed: %s",
457 if (timer_delete(mevp
->me_timid
) != 0) {
458 (void) fprintf(stderr
, "timer_delete failed: %s",
464 (void) fprintf(stderr
, "%s: unhandled state %d\n", __func__
,
471 mevent_update_one_vnode(struct mevent
*mevp
)
473 switch (mevp
->me_state
) {
479 if ((mevp
->me_fflags
& EVFF_ATTRIB
) != 0)
480 events
|= FILE_ATTRIB
;
485 * It is tempting to use the PORT_SOURCE_FILE type for this in
486 * conjunction with the FILE_ATTRIB event type. Unfortunately
487 * this event type triggers on any change to the file's
488 * ctime, and therefore for every write as well as attribute
489 * changes. It also does not work for ZVOLs.
491 * Convert this to a timer event and poll for the file
492 * attribute changes that we care about.
495 if (fstat(mevp
->me_fd
, &st
) != 0) {
496 (void) fprintf(stderr
, "fstat(%d) failed: %s\n",
497 mevp
->me_fd
, strerror(errno
));
501 mevp
->me_poll
.mp_fd
= mevp
->me_fd
;
502 mevp
->me_poll
.mp_size
= st
.st_size
;
504 mevp
->me_poll
.mp_func
= mevp
->me_func
;
505 mevp
->me_poll
.mp_param
= mevp
->me_param
;
506 mevp
->me_func
= mevent_poll_file_attrib
;
507 mevp
->me_param
= mevp
;
509 mevp
->me_type
= EVF_TIMER
;
511 mevp
->me_msecs
= mevent_file_poll_interval_ms
;
512 mevent_update_one_timer(mevp
);
519 * These events do not really exist as they are converted to
520 * timers; fall through to abort.
523 (void) fprintf(stderr
, "%s: unhandled state %d\n", __func__
,
530 mevent_update_one(struct mevent
*mevp
)
532 switch (mevp
->me_type
) {
535 mevent_update_one_readwrite(mevp
);
538 mevent_update_one_timer(mevp
);
541 mevent_update_one_vnode(mevp
);
543 case EVF_SIGNAL
: /* EVF_SIGNAL not yet implemented. */
545 (void) fprintf(stderr
, "%s: unhandled event type %d\n",
546 __func__
, mevp
->me_type
);
552 mevent_populate(struct mevent
*mevp
)
554 mevp
->me_notify
.portnfy_port
= mfd
;
555 mevp
->me_notify
.portnfy_user
= mevp
;
559 mevent_update_pending()
561 struct mevent
*mevp
, *tmpp
;
565 LIST_FOREACH_SAFE(mevp
, &change_head
, me_list
, tmpp
) {
566 mevent_populate(mevp
);
567 if (mevp
->me_closefd
) {
569 * A close of the file descriptor will remove the
572 (void) close(mevp
->me_fd
);
575 if (mevent_clarify_state(mevp
)) {
576 mevent_update_one(mevp
);
581 LIST_REMOVE(mevp
, me_list
);
583 if (mevp
->me_state
& EV_DELETE
) {
586 LIST_INSERT_HEAD(&global_head
, mevp
, me_list
);
594 mevent_handle_pe(port_event_t
*pe
)
596 struct mevent
*mevp
= pe
->portev_user
;
598 (*mevp
->me_func
)(mevp
->me_fd
, mevp
->me_type
, mevp
->me_param
);
601 if (!mevp
->me_cq
&& !mevp
->me_auto_requeue
) {
602 mevent_update_one(mevp
);
608 static struct mevent
*
609 mevent_add_state(int tfd
, enum ev_type type
,
610 void (*func
)(int, enum ev_type
, void *), void *param
,
611 int state
, int fflags
)
616 struct mevent
*lp
, *mevp
;
621 if (tfd
< 0 || func
== NULL
) {
627 pthread_once(&mevent_once
, mevent_init
);
632 * Verify that the fd/type tuple is not present in any list
634 LIST_FOREACH(lp
, &global_head
, me_list
) {
635 if (type
!= EVF_TIMER
&& lp
->me_fd
== tfd
&&
636 lp
->me_type
== type
) {
641 LIST_FOREACH(lp
, &change_head
, me_list
) {
642 if (type
!= EVF_TIMER
&& lp
->me_fd
== tfd
&&
643 lp
->me_type
== type
) {
649 * Allocate an entry and populate it.
651 mevp
= calloc(1, sizeof(struct mevent
));
656 if (type
== EVF_TIMER
) {
657 mevp
->me_msecs
= tfd
;
659 mevp
->me_timid
= mevent_timid
++;
665 mevp
->me_type
= type
;
666 mevp
->me_func
= func
;
667 mevp
->me_param
= param
;
668 mevp
->me_state
= state
;
669 mevp
->me_fflags
= fflags
;
672 * Try to add the event. If this fails, report the failure to
676 mevent_populate(mevp
, &kev
);
677 ret
= kevent(mfd
, &kev
, 1, NULL
, 0, NULL
);
683 mevp
->me_state
&= ~EV_ADD
;
685 mevent_populate(mevp
);
686 if (mevent_clarify_state(mevp
))
687 mevent_update_one(mevp
);
690 LIST_INSERT_HEAD(&global_head
, mevp
, me_list
);
699 mevent_add(int tfd
, enum ev_type type
,
700 void (*func
)(int, enum ev_type
, void *), void *param
)
703 return (mevent_add_state(tfd
, type
, func
, param
, EV_ADD
, 0));
707 mevent_add_flags(int tfd
, enum ev_type type
, int fflags
,
708 void (*func
)(int, enum ev_type
, void *), void *param
)
711 return (mevent_add_state(tfd
, type
, func
, param
, EV_ADD
, fflags
));
715 mevent_add_disabled(int tfd
, enum ev_type type
,
716 void (*func
)(int, enum ev_type
, void *), void *param
)
719 return (mevent_add_state(tfd
, type
, func
, param
, EV_ADD
| EV_DISABLE
, 0));
723 mevent_update(struct mevent
*evp
, bool enable
)
730 * It's not possible to enable/disable a deleted event
732 assert((evp
->me_state
& EV_DELETE
) == 0);
734 newstate
= evp
->me_state
;
736 newstate
|= EV_ENABLE
;
737 newstate
&= ~EV_DISABLE
;
739 newstate
|= EV_DISABLE
;
740 newstate
&= ~EV_ENABLE
;
744 * No update needed if state isn't changing
746 if (evp
->me_state
!= newstate
) {
747 evp
->me_state
= newstate
;
750 * Place the entry onto the changed list if not
753 if (evp
->me_cq
== 0) {
755 LIST_REMOVE(evp
, me_list
);
756 LIST_INSERT_HEAD(&change_head
, evp
, me_list
);
767 mevent_enable(struct mevent
*evp
)
770 return (mevent_update(evp
, true));
774 mevent_disable(struct mevent
*evp
)
777 return (mevent_update(evp
, false));
781 mevent_delete_event(struct mevent
*evp
, int closefd
)
786 * Place the entry onto the changed list if not already there, and
787 * mark as to be deleted.
789 if (evp
->me_cq
== 0) {
791 LIST_REMOVE(evp
, me_list
);
792 LIST_INSERT_HEAD(&change_head
, evp
, me_list
);
795 evp
->me_state
= EV_DELETE
;
806 mevent_delete(struct mevent
*evp
)
809 return (mevent_delete_event(evp
, 0));
813 mevent_delete_close(struct mevent
*evp
)
816 return (mevent_delete_event(evp
, 1));
820 mevent_set_name(void)
823 pthread_set_name_np(mevent_tid
, "mevent");
827 mevent_dispatch(void)
830 struct kevent changelist
[MEVENT_MAX
];
831 struct kevent eventlist
[MEVENT_MAX
];
832 struct mevent
*pipev
;
835 struct mevent
*pipev
;
838 #ifndef WITHOUT_CAPSICUM
842 mevent_tid
= pthread_self();
845 pthread_once(&mevent_once
, mevent_init
);
848 * Open the pipe that will be used for other threads to force
849 * the blocking kqueue call to exit by writing to it. Set the
850 * descriptor to non-blocking.
852 ret
= pipe(mevent_pipefd
);
858 #ifndef WITHOUT_CAPSICUM
859 cap_rights_init(&rights
, CAP_EVENT
, CAP_READ
, CAP_WRITE
);
860 if (caph_rights_limit(mevent_pipefd
[0], &rights
) == -1)
861 errx(EX_OSERR
, "Unable to apply rights for sandbox");
862 if (caph_rights_limit(mevent_pipefd
[1], &rights
) == -1)
863 errx(EX_OSERR
, "Unable to apply rights for sandbox");
867 * Add internal event handler for the pipe write fd
869 pipev
= mevent_add(mevent_pipefd
[0], EVF_READ
, mevent_pipe_read
, NULL
);
870 assert(pipev
!= NULL
);
875 * Build changelist if required.
876 * XXX the changelist can be put into the blocking call
877 * to eliminate the extra syscall. Currently better for
880 numev
= mevent_build(changelist
);
882 ret
= kevent(mfd
, changelist
, numev
, NULL
, 0, NULL
);
884 perror("Error return from kevent change");
889 * Block awaiting events
891 ret
= kevent(mfd
, NULL
, 0, eventlist
, MEVENT_MAX
, NULL
);
892 if (ret
== -1 && errno
!= EINTR
) {
893 perror("Error return from kevent monitor");
897 * Handle reported events
899 mevent_handle(eventlist
, ret
);
901 #else /* __FreeBSD__ */
904 /* Handle any pending updates */
905 mevent_update_pending();
907 /* Block awaiting events */
908 ret
= port_get(mfd
, &pev
, NULL
);
911 perror("Error return from port_get");
915 /* Handle reported event */
916 mevent_handle_pe(&pev
);
917 #endif /* __FreeBSD__ */