2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
13 * Copyright 2016 Joyent, Inc.
17 * Support for the eventfd facility, a Linux-borne facility for user-generated
18 * file descriptor-based events.
22 #include <sys/sunddi.h>
23 #include <sys/eventfd.h>
26 #include <sys/sysmacros.h>
27 #include <sys/filio.h>
32 typedef struct eventfd_state eventfd_state_t
;
34 struct eventfd_state
{
35 kmutex_t efd_lock
; /* lock protecting state */
36 boolean_t efd_semaphore
; /* boolean: sema. semantics */
37 kcondvar_t efd_cv
; /* condvar */
38 pollhead_t efd_pollhd
; /* poll head */
39 uint64_t efd_value
; /* value */
40 size_t efd_bwriters
; /* count of blocked writers */
41 eventfd_state_t
*efd_next
; /* next state on global list */
45 * Internal global variables.
47 static kmutex_t eventfd_lock
; /* lock protecting state */
48 static dev_info_t
*eventfd_devi
; /* device info */
49 static vmem_t
*eventfd_minor
; /* minor number arena */
50 static void *eventfd_softstate
; /* softstate pointer */
51 static eventfd_state_t
*eventfd_state
; /* global list of state */
55 eventfd_open(dev_t
*devp
, int flag
, int otyp
, cred_t
*cred_p
)
57 eventfd_state_t
*state
;
58 major_t major
= getemajor(*devp
);
59 minor_t minor
= getminor(*devp
);
61 if (minor
!= EVENTFDMNRN_EVENTFD
)
64 mutex_enter(&eventfd_lock
);
66 minor
= (minor_t
)(uintptr_t)vmem_alloc(eventfd_minor
, 1,
67 VM_BESTFIT
| VM_SLEEP
);
69 if (ddi_soft_state_zalloc(eventfd_softstate
, minor
) != DDI_SUCCESS
) {
70 vmem_free(eventfd_minor
, (void *)(uintptr_t)minor
, 1);
71 mutex_exit(&eventfd_lock
);
75 state
= ddi_get_soft_state(eventfd_softstate
, minor
);
76 *devp
= makedevice(major
, minor
);
78 state
->efd_next
= eventfd_state
;
79 eventfd_state
= state
;
81 mutex_exit(&eventfd_lock
);
88 eventfd_read(dev_t dev
, uio_t
*uio
, cred_t
*cr
)
90 eventfd_state_t
*state
;
91 minor_t minor
= getminor(dev
);
95 if (uio
->uio_resid
< sizeof (val
))
98 state
= ddi_get_soft_state(eventfd_softstate
, minor
);
100 mutex_enter(&state
->efd_lock
);
102 while (state
->efd_value
== 0) {
103 if (uio
->uio_fmode
& (FNDELAY
|FNONBLOCK
)) {
104 mutex_exit(&state
->efd_lock
);
108 if (!cv_wait_sig_swap(&state
->efd_cv
, &state
->efd_lock
)) {
109 mutex_exit(&state
->efd_lock
);
115 * We have a non-zero value and we own the lock; our behavior now
116 * depends on whether or not EFD_SEMAPHORE was set when the eventfd
119 val
= oval
= state
->efd_value
;
121 if (state
->efd_semaphore
) {
125 state
->efd_value
= 0;
128 err
= uiomove(&val
, sizeof (val
), UIO_READ
, uio
);
131 * Wake any writers blocked on this eventfd as this read operation may
132 * have created adequate capacity for their values.
134 if (state
->efd_bwriters
!= 0) {
135 cv_broadcast(&state
->efd_cv
);
137 mutex_exit(&state
->efd_lock
);
140 * It is necessary to emit POLLOUT events only when the eventfd
141 * transitions from EVENTFD_VALMAX to a lower value. At all other
142 * times, it is already considered writable by poll.
144 if (oval
== EVENTFD_VALMAX
) {
145 pollwakeup(&state
->efd_pollhd
, POLLWRNORM
| POLLOUT
);
153 eventfd_write(dev_t dev
, struct uio
*uio
, cred_t
*credp
)
155 eventfd_state_t
*state
;
156 minor_t minor
= getminor(dev
);
160 if (uio
->uio_resid
< sizeof (val
))
163 if ((err
= uiomove(&val
, sizeof (val
), UIO_WRITE
, uio
)) != 0)
166 if (val
> EVENTFD_VALMAX
)
169 state
= ddi_get_soft_state(eventfd_softstate
, minor
);
171 mutex_enter(&state
->efd_lock
);
173 while (val
> EVENTFD_VALMAX
- state
->efd_value
) {
174 if (uio
->uio_fmode
& (FNDELAY
|FNONBLOCK
)) {
175 mutex_exit(&state
->efd_lock
);
179 state
->efd_bwriters
++;
180 if (!cv_wait_sig_swap(&state
->efd_cv
, &state
->efd_lock
)) {
181 state
->efd_bwriters
--;
182 mutex_exit(&state
->efd_lock
);
185 state
->efd_bwriters
--;
189 * We now know that we can add the value without overflowing.
191 state
->efd_value
= (oval
= state
->efd_value
) + val
;
194 * If the value was previously "empty", notify blocked readers that
198 cv_broadcast(&state
->efd_cv
);
200 mutex_exit(&state
->efd_lock
);
203 * Notify pollers as well if the eventfd is now readable.
206 pollwakeup(&state
->efd_pollhd
, POLLRDNORM
| POLLIN
);
214 eventfd_poll(dev_t dev
, short events
, int anyyet
, short *reventsp
,
215 struct pollhead
**phpp
)
217 eventfd_state_t
*state
;
218 minor_t minor
= getminor(dev
);
221 state
= ddi_get_soft_state(eventfd_softstate
, minor
);
223 mutex_enter(&state
->efd_lock
);
225 if (state
->efd_value
> 0)
226 revents
|= POLLRDNORM
| POLLIN
;
228 if (state
->efd_value
< EVENTFD_VALMAX
)
229 revents
|= POLLWRNORM
| POLLOUT
;
231 if (!(*reventsp
= revents
& events
) && !anyyet
)
232 *phpp
= &state
->efd_pollhd
;
234 mutex_exit(&state
->efd_lock
);
241 eventfd_ioctl(dev_t dev
, int cmd
, intptr_t arg
, int md
, cred_t
*cr
, int *rv
)
243 eventfd_state_t
*state
;
244 minor_t minor
= getminor(dev
);
246 state
= ddi_get_soft_state(eventfd_softstate
, minor
);
249 case EVENTFDIOC_SEMAPHORE
: {
250 mutex_enter(&state
->efd_lock
);
251 state
->efd_semaphore
^= 1;
252 mutex_exit(&state
->efd_lock
);
266 eventfd_close(dev_t dev
, int flag
, int otyp
, cred_t
*cred_p
)
268 eventfd_state_t
*state
, **sp
;
269 minor_t minor
= getminor(dev
);
271 state
= ddi_get_soft_state(eventfd_softstate
, minor
);
273 if (state
->efd_pollhd
.ph_list
!= NULL
) {
274 pollwakeup(&state
->efd_pollhd
, POLLERR
);
275 pollhead_clean(&state
->efd_pollhd
);
278 mutex_enter(&eventfd_lock
);
281 * Remove our state from our global list.
283 for (sp
= &eventfd_state
; *sp
!= state
; sp
= &((*sp
)->efd_next
))
286 *sp
= (*sp
)->efd_next
;
288 ddi_soft_state_free(eventfd_softstate
, minor
);
289 vmem_free(eventfd_minor
, (void *)(uintptr_t)minor
, 1);
291 mutex_exit(&eventfd_lock
);
297 eventfd_attach(dev_info_t
*devi
, ddi_attach_cmd_t cmd
)
304 return (DDI_SUCCESS
);
307 return (DDI_FAILURE
);
310 mutex_enter(&eventfd_lock
);
312 if (ddi_soft_state_init(&eventfd_softstate
,
313 sizeof (eventfd_state_t
), 0) != 0) {
314 cmn_err(CE_NOTE
, "/dev/eventfd failed to create soft state");
315 mutex_exit(&eventfd_lock
);
316 return (DDI_FAILURE
);
319 if (ddi_create_minor_node(devi
, "eventfd", S_IFCHR
,
320 EVENTFDMNRN_EVENTFD
, DDI_PSEUDO
, 0) == DDI_FAILURE
) {
321 cmn_err(CE_NOTE
, "/dev/eventfd couldn't create minor node");
322 ddi_soft_state_fini(&eventfd_softstate
);
323 mutex_exit(&eventfd_lock
);
324 return (DDI_FAILURE
);
327 ddi_report_dev(devi
);
330 eventfd_minor
= vmem_create("eventfd_minor", (void *)EVENTFDMNRN_CLONE
,
331 UINT32_MAX
- EVENTFDMNRN_CLONE
, 1, NULL
, NULL
, NULL
, 0,
332 VM_SLEEP
| VMC_IDENTIFIER
);
334 mutex_exit(&eventfd_lock
);
336 return (DDI_SUCCESS
);
341 eventfd_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
348 return (DDI_SUCCESS
);
351 return (DDI_FAILURE
);
354 mutex_enter(&eventfd_lock
);
355 vmem_destroy(eventfd_minor
);
357 ddi_remove_minor_node(eventfd_devi
, NULL
);
360 ddi_soft_state_fini(&eventfd_softstate
);
361 mutex_exit(&eventfd_lock
);
363 return (DDI_SUCCESS
);
368 eventfd_info(dev_info_t
*dip
, ddi_info_cmd_t infocmd
, void *arg
, void **result
)
373 case DDI_INFO_DEVT2DEVINFO
:
374 *result
= (void *)eventfd_devi
;
377 case DDI_INFO_DEVT2INSTANCE
:
387 static struct cb_ops eventfd_cb_ops
= {
388 eventfd_open
, /* open */
389 eventfd_close
, /* close */
390 nulldev
, /* strategy */
393 eventfd_read
, /* read */
394 eventfd_write
, /* write */
395 eventfd_ioctl
, /* ioctl */
399 eventfd_poll
, /* poll */
400 ddi_prop_op
, /* cb_prop_op */
402 D_NEW
| D_MP
/* Driver compatibility flag */
405 static struct dev_ops eventfd_ops
= {
406 DEVO_REV
, /* devo_rev */
408 eventfd_info
, /* get_dev_info */
409 nulldev
, /* identify */
411 eventfd_attach
, /* attach */
412 eventfd_detach
, /* detach */
414 &eventfd_cb_ops
, /* driver operations */
415 NULL
, /* bus operations */
416 nodev
, /* dev power */
417 ddi_quiesce_not_needed
, /* quiesce */
420 static struct modldrv modldrv
= {
421 &mod_driverops
, /* module type (this is a pseudo driver) */
422 "eventfd support", /* name of module */
423 &eventfd_ops
, /* driver ops */
426 static struct modlinkage modlinkage
= {
435 return (mod_install(&modlinkage
));
439 _info(struct modinfo
*modinfop
)
441 return (mod_info(&modlinkage
, modinfop
));
447 return (mod_remove(&modlinkage
));