2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
13 * Copyright 2017 Joyent, Inc.
17 * Support for the eventfd facility, a Linux-borne facility for user-generated
18 * file descriptor-based events.
22 #include <sys/sunddi.h>
23 #include <sys/eventfd.h>
26 #include <sys/sysmacros.h>
27 #include <sys/filio.h>
32 typedef struct eventfd_state eventfd_state_t
;
34 struct eventfd_state
{
35 kmutex_t efd_lock
; /* lock protecting state */
36 boolean_t efd_semaphore
; /* boolean: sema. semantics */
37 kcondvar_t efd_cv
; /* condvar */
38 pollhead_t efd_pollhd
; /* poll head */
39 uint64_t efd_value
; /* value */
40 size_t efd_bwriters
; /* count of blocked writers */
41 eventfd_state_t
*efd_next
; /* next state on global list */
45 * Internal global variables.
47 static kmutex_t eventfd_lock
; /* lock protecting state */
48 static dev_info_t
*eventfd_devi
; /* device info */
49 static vmem_t
*eventfd_minor
; /* minor number arena */
50 static void *eventfd_softstate
; /* softstate pointer */
51 static eventfd_state_t
*eventfd_state
; /* global list of state */
55 eventfd_open(dev_t
*devp
, int flag
, int otyp
, cred_t
*cred_p
)
57 eventfd_state_t
*state
;
58 major_t major
= getemajor(*devp
);
59 minor_t minor
= getminor(*devp
);
61 if (minor
!= EVENTFDMNRN_EVENTFD
)
64 mutex_enter(&eventfd_lock
);
66 minor
= (minor_t
)(uintptr_t)vmem_alloc(eventfd_minor
, 1,
67 VM_BESTFIT
| VM_SLEEP
);
69 if (ddi_soft_state_zalloc(eventfd_softstate
, minor
) != DDI_SUCCESS
) {
70 vmem_free(eventfd_minor
, (void *)(uintptr_t)minor
, 1);
71 mutex_exit(&eventfd_lock
);
75 state
= ddi_get_soft_state(eventfd_softstate
, minor
);
76 *devp
= makedevice(major
, minor
);
78 state
->efd_next
= eventfd_state
;
79 eventfd_state
= state
;
81 mutex_exit(&eventfd_lock
);
88 eventfd_read(dev_t dev
, uio_t
*uio
, cred_t
*cr
)
90 eventfd_state_t
*state
;
91 minor_t minor
= getminor(dev
);
95 if (uio
->uio_resid
< sizeof (val
))
98 state
= ddi_get_soft_state(eventfd_softstate
, minor
);
100 mutex_enter(&state
->efd_lock
);
102 while (state
->efd_value
== 0) {
103 if (uio
->uio_fmode
& (FNDELAY
|FNONBLOCK
)) {
104 mutex_exit(&state
->efd_lock
);
108 if (!cv_wait_sig_swap(&state
->efd_cv
, &state
->efd_lock
)) {
109 mutex_exit(&state
->efd_lock
);
115 * We have a non-zero value and we own the lock; our behavior now
116 * depends on whether or not EFD_SEMAPHORE was set when the eventfd
119 val
= oval
= state
->efd_value
;
121 if (state
->efd_semaphore
) {
125 state
->efd_value
= 0;
128 err
= uiomove(&val
, sizeof (val
), UIO_READ
, uio
);
131 * Wake any writers blocked on this eventfd as this read operation may
132 * have created adequate capacity for their values.
134 if (state
->efd_bwriters
!= 0) {
135 cv_broadcast(&state
->efd_cv
);
137 mutex_exit(&state
->efd_lock
);
140 * It is necessary to emit POLLOUT events only when the eventfd
141 * transitions from EVENTFD_VALMAX to a lower value. At all other
142 * times, it is already considered writable by poll.
144 if (oval
== EVENTFD_VALMAX
) {
145 pollwakeup(&state
->efd_pollhd
, POLLWRNORM
| POLLOUT
);
153 eventfd_write(dev_t dev
, struct uio
*uio
, cred_t
*credp
)
155 eventfd_state_t
*state
;
156 minor_t minor
= getminor(dev
);
160 if (uio
->uio_resid
< sizeof (val
))
163 if ((err
= uiomove(&val
, sizeof (val
), UIO_WRITE
, uio
)) != 0)
166 if (val
> EVENTFD_VALMAX
)
169 state
= ddi_get_soft_state(eventfd_softstate
, minor
);
171 mutex_enter(&state
->efd_lock
);
173 while (val
> EVENTFD_VALMAX
- state
->efd_value
) {
174 if (uio
->uio_fmode
& (FNDELAY
|FNONBLOCK
)) {
175 mutex_exit(&state
->efd_lock
);
179 state
->efd_bwriters
++;
180 if (!cv_wait_sig_swap(&state
->efd_cv
, &state
->efd_lock
)) {
181 state
->efd_bwriters
--;
182 mutex_exit(&state
->efd_lock
);
185 state
->efd_bwriters
--;
189 * We now know that we can add the value without overflowing.
191 state
->efd_value
= (oval
= state
->efd_value
) + val
;
194 * If the value was previously "empty", notify blocked readers that
198 cv_broadcast(&state
->efd_cv
);
200 mutex_exit(&state
->efd_lock
);
203 * Notify pollers as well if the eventfd is now readable.
206 pollwakeup(&state
->efd_pollhd
, POLLRDNORM
| POLLIN
);
214 eventfd_poll(dev_t dev
, short events
, int anyyet
, short *reventsp
,
215 struct pollhead
**phpp
)
217 eventfd_state_t
*state
;
218 minor_t minor
= getminor(dev
);
221 state
= ddi_get_soft_state(eventfd_softstate
, minor
);
223 mutex_enter(&state
->efd_lock
);
225 if (state
->efd_value
> 0)
226 revents
|= POLLRDNORM
| POLLIN
;
228 if (state
->efd_value
< EVENTFD_VALMAX
)
229 revents
|= POLLWRNORM
| POLLOUT
;
231 *reventsp
= revents
& events
;
232 if ((*reventsp
== 0 && !anyyet
) || (events
& POLLET
)) {
233 *phpp
= &state
->efd_pollhd
;
236 mutex_exit(&state
->efd_lock
);
243 eventfd_ioctl(dev_t dev
, int cmd
, intptr_t arg
, int md
, cred_t
*cr
, int *rv
)
245 eventfd_state_t
*state
;
246 minor_t minor
= getminor(dev
);
248 state
= ddi_get_soft_state(eventfd_softstate
, minor
);
251 case EVENTFDIOC_SEMAPHORE
: {
252 mutex_enter(&state
->efd_lock
);
253 state
->efd_semaphore
^= 1;
254 mutex_exit(&state
->efd_lock
);
268 eventfd_close(dev_t dev
, int flag
, int otyp
, cred_t
*cred_p
)
270 eventfd_state_t
*state
, **sp
;
271 minor_t minor
= getminor(dev
);
273 state
= ddi_get_soft_state(eventfd_softstate
, minor
);
275 if (state
->efd_pollhd
.ph_list
!= NULL
) {
276 pollwakeup(&state
->efd_pollhd
, POLLERR
);
277 pollhead_clean(&state
->efd_pollhd
);
280 mutex_enter(&eventfd_lock
);
283 * Remove our state from our global list.
285 for (sp
= &eventfd_state
; *sp
!= state
; sp
= &((*sp
)->efd_next
))
288 *sp
= (*sp
)->efd_next
;
290 ddi_soft_state_free(eventfd_softstate
, minor
);
291 vmem_free(eventfd_minor
, (void *)(uintptr_t)minor
, 1);
293 mutex_exit(&eventfd_lock
);
299 eventfd_attach(dev_info_t
*devi
, ddi_attach_cmd_t cmd
)
306 return (DDI_SUCCESS
);
309 return (DDI_FAILURE
);
312 mutex_enter(&eventfd_lock
);
314 if (ddi_soft_state_init(&eventfd_softstate
,
315 sizeof (eventfd_state_t
), 0) != 0) {
316 cmn_err(CE_NOTE
, "/dev/eventfd failed to create soft state");
317 mutex_exit(&eventfd_lock
);
318 return (DDI_FAILURE
);
321 if (ddi_create_minor_node(devi
, "eventfd", S_IFCHR
,
322 EVENTFDMNRN_EVENTFD
, DDI_PSEUDO
, NULL
) == DDI_FAILURE
) {
323 cmn_err(CE_NOTE
, "/dev/eventfd couldn't create minor node");
324 ddi_soft_state_fini(&eventfd_softstate
);
325 mutex_exit(&eventfd_lock
);
326 return (DDI_FAILURE
);
329 ddi_report_dev(devi
);
332 eventfd_minor
= vmem_create("eventfd_minor", (void *)EVENTFDMNRN_CLONE
,
333 UINT32_MAX
- EVENTFDMNRN_CLONE
, 1, NULL
, NULL
, NULL
, 0,
334 VM_SLEEP
| VMC_IDENTIFIER
);
336 mutex_exit(&eventfd_lock
);
338 return (DDI_SUCCESS
);
343 eventfd_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
350 return (DDI_SUCCESS
);
353 return (DDI_FAILURE
);
356 mutex_enter(&eventfd_lock
);
357 vmem_destroy(eventfd_minor
);
359 ddi_remove_minor_node(eventfd_devi
, NULL
);
362 ddi_soft_state_fini(&eventfd_softstate
);
363 mutex_exit(&eventfd_lock
);
365 return (DDI_SUCCESS
);
370 eventfd_info(dev_info_t
*dip
, ddi_info_cmd_t infocmd
, void *arg
, void **result
)
375 case DDI_INFO_DEVT2DEVINFO
:
376 *result
= (void *)eventfd_devi
;
379 case DDI_INFO_DEVT2INSTANCE
:
389 static struct cb_ops eventfd_cb_ops
= {
390 eventfd_open
, /* open */
391 eventfd_close
, /* close */
392 nulldev
, /* strategy */
395 eventfd_read
, /* read */
396 eventfd_write
, /* write */
397 eventfd_ioctl
, /* ioctl */
401 eventfd_poll
, /* poll */
402 ddi_prop_op
, /* cb_prop_op */
404 D_NEW
| D_MP
/* Driver compatibility flag */
407 static struct dev_ops eventfd_ops
= {
408 DEVO_REV
, /* devo_rev */
410 eventfd_info
, /* get_dev_info */
411 nulldev
, /* identify */
413 eventfd_attach
, /* attach */
414 eventfd_detach
, /* detach */
416 &eventfd_cb_ops
, /* driver operations */
417 NULL
, /* bus operations */
418 nodev
, /* dev power */
419 ddi_quiesce_not_needed
, /* quiesce */
422 static struct modldrv modldrv
= {
423 &mod_driverops
, /* module type (this is a pseudo driver) */
424 "eventfd support", /* name of module */
425 &eventfd_ops
, /* driver ops */
428 static struct modlinkage modlinkage
= {
437 return (mod_install(&modlinkage
));
441 _info(struct modinfo
*modinfop
)
443 return (mod_info(&modlinkage
, modinfop
));
449 return (mod_remove(&modlinkage
));