Merge commit '2bee374f9ad3367948d472f4e3489135fcac9e1c'
[unleashed.git] / usr / src / uts / common / io / eventfd.c
blobd6d17663f3356c45ca4cce34514ab2c11796279d
1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
13 * Copyright 2016 Joyent, Inc.
17 * Support for the eventfd facility, a Linux-borne facility for user-generated
18 * file descriptor-based events.
21 #include <sys/ddi.h>
22 #include <sys/sunddi.h>
23 #include <sys/eventfd.h>
24 #include <sys/conf.h>
25 #include <sys/vmem.h>
26 #include <sys/sysmacros.h>
27 #include <sys/filio.h>
28 #include <sys/stat.h>
29 #include <sys/file.h>
31 struct eventfd_state;
32 typedef struct eventfd_state eventfd_state_t;
34 struct eventfd_state {
35 kmutex_t efd_lock; /* lock protecting state */
36 boolean_t efd_semaphore; /* boolean: sema. semantics */
37 kcondvar_t efd_cv; /* condvar */
38 pollhead_t efd_pollhd; /* poll head */
39 uint64_t efd_value; /* value */
40 size_t efd_bwriters; /* count of blocked writers */
41 eventfd_state_t *efd_next; /* next state on global list */
45 * Internal global variables.
47 static kmutex_t eventfd_lock; /* lock protecting state */
48 static dev_info_t *eventfd_devi; /* device info */
49 static vmem_t *eventfd_minor; /* minor number arena */
50 static void *eventfd_softstate; /* softstate pointer */
51 static eventfd_state_t *eventfd_state; /* global list of state */
53 /*ARGSUSED*/
54 static int
55 eventfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
57 eventfd_state_t *state;
58 major_t major = getemajor(*devp);
59 minor_t minor = getminor(*devp);
61 if (minor != EVENTFDMNRN_EVENTFD)
62 return (ENXIO);
64 mutex_enter(&eventfd_lock);
66 minor = (minor_t)(uintptr_t)vmem_alloc(eventfd_minor, 1,
67 VM_BESTFIT | VM_SLEEP);
69 if (ddi_soft_state_zalloc(eventfd_softstate, minor) != DDI_SUCCESS) {
70 vmem_free(eventfd_minor, (void *)(uintptr_t)minor, 1);
71 mutex_exit(&eventfd_lock);
72 return (0);
75 state = ddi_get_soft_state(eventfd_softstate, minor);
76 *devp = makedevice(major, minor);
78 state->efd_next = eventfd_state;
79 eventfd_state = state;
81 mutex_exit(&eventfd_lock);
83 return (0);
86 /*ARGSUSED*/
87 static int
88 eventfd_read(dev_t dev, uio_t *uio, cred_t *cr)
90 eventfd_state_t *state;
91 minor_t minor = getminor(dev);
92 uint64_t val, oval;
93 int err;
95 if (uio->uio_resid < sizeof (val))
96 return (EINVAL);
98 state = ddi_get_soft_state(eventfd_softstate, minor);
100 mutex_enter(&state->efd_lock);
102 while (state->efd_value == 0) {
103 if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
104 mutex_exit(&state->efd_lock);
105 return (EAGAIN);
108 if (!cv_wait_sig_swap(&state->efd_cv, &state->efd_lock)) {
109 mutex_exit(&state->efd_lock);
110 return (EINTR);
115 * We have a non-zero value and we own the lock; our behavior now
116 * depends on whether or not EFD_SEMAPHORE was set when the eventfd
117 * was created.
119 val = oval = state->efd_value;
121 if (state->efd_semaphore) {
122 state->efd_value--;
123 val = 1;
124 } else {
125 state->efd_value = 0;
128 err = uiomove(&val, sizeof (val), UIO_READ, uio);
131 * Wake any writers blocked on this eventfd as this read operation may
132 * have created adequate capacity for their values.
134 if (state->efd_bwriters != 0) {
135 cv_broadcast(&state->efd_cv);
137 mutex_exit(&state->efd_lock);
140 * It is necessary to emit POLLOUT events only when the eventfd
141 * transitions from EVENTFD_VALMAX to a lower value. At all other
142 * times, it is already considered writable by poll.
144 if (oval == EVENTFD_VALMAX) {
145 pollwakeup(&state->efd_pollhd, POLLWRNORM | POLLOUT);
148 return (err);
151 /*ARGSUSED*/
152 static int
153 eventfd_write(dev_t dev, struct uio *uio, cred_t *credp)
155 eventfd_state_t *state;
156 minor_t minor = getminor(dev);
157 uint64_t val, oval;
158 int err;
160 if (uio->uio_resid < sizeof (val))
161 return (EINVAL);
163 if ((err = uiomove(&val, sizeof (val), UIO_WRITE, uio)) != 0)
164 return (err);
166 if (val > EVENTFD_VALMAX)
167 return (EINVAL);
169 state = ddi_get_soft_state(eventfd_softstate, minor);
171 mutex_enter(&state->efd_lock);
173 while (val > EVENTFD_VALMAX - state->efd_value) {
174 if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
175 mutex_exit(&state->efd_lock);
176 return (EAGAIN);
179 state->efd_bwriters++;
180 if (!cv_wait_sig_swap(&state->efd_cv, &state->efd_lock)) {
181 state->efd_bwriters--;
182 mutex_exit(&state->efd_lock);
183 return (EINTR);
185 state->efd_bwriters--;
189 * We now know that we can add the value without overflowing.
191 state->efd_value = (oval = state->efd_value) + val;
194 * If the value was previously "empty", notify blocked readers that
195 * data is available.
197 if (oval == 0) {
198 cv_broadcast(&state->efd_cv);
200 mutex_exit(&state->efd_lock);
203 * Notify pollers as well if the eventfd is now readable.
205 if (oval == 0) {
206 pollwakeup(&state->efd_pollhd, POLLRDNORM | POLLIN);
209 return (0);
212 /*ARGSUSED*/
213 static int
214 eventfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
215 struct pollhead **phpp)
217 eventfd_state_t *state;
218 minor_t minor = getminor(dev);
219 short revents = 0;
221 state = ddi_get_soft_state(eventfd_softstate, minor);
223 mutex_enter(&state->efd_lock);
225 if (state->efd_value > 0)
226 revents |= POLLRDNORM | POLLIN;
228 if (state->efd_value < EVENTFD_VALMAX)
229 revents |= POLLWRNORM | POLLOUT;
231 if (!(*reventsp = revents & events) && !anyyet)
232 *phpp = &state->efd_pollhd;
234 mutex_exit(&state->efd_lock);
236 return (0);
239 /*ARGSUSED*/
240 static int
241 eventfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
243 eventfd_state_t *state;
244 minor_t minor = getminor(dev);
246 state = ddi_get_soft_state(eventfd_softstate, minor);
248 switch (cmd) {
249 case EVENTFDIOC_SEMAPHORE: {
250 mutex_enter(&state->efd_lock);
251 state->efd_semaphore ^= 1;
252 mutex_exit(&state->efd_lock);
254 return (0);
257 default:
258 break;
261 return (ENOTTY);
264 /*ARGSUSED*/
265 static int
266 eventfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
268 eventfd_state_t *state, **sp;
269 minor_t minor = getminor(dev);
271 state = ddi_get_soft_state(eventfd_softstate, minor);
273 if (state->efd_pollhd.ph_list != NULL) {
274 pollwakeup(&state->efd_pollhd, POLLERR);
275 pollhead_clean(&state->efd_pollhd);
278 mutex_enter(&eventfd_lock);
281 * Remove our state from our global list.
283 for (sp = &eventfd_state; *sp != state; sp = &((*sp)->efd_next))
284 VERIFY(*sp != NULL);
286 *sp = (*sp)->efd_next;
288 ddi_soft_state_free(eventfd_softstate, minor);
289 vmem_free(eventfd_minor, (void *)(uintptr_t)minor, 1);
291 mutex_exit(&eventfd_lock);
293 return (0);
296 static int
297 eventfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
299 switch (cmd) {
300 case DDI_ATTACH:
301 break;
303 case DDI_RESUME:
304 return (DDI_SUCCESS);
306 default:
307 return (DDI_FAILURE);
310 mutex_enter(&eventfd_lock);
312 if (ddi_soft_state_init(&eventfd_softstate,
313 sizeof (eventfd_state_t), 0) != 0) {
314 cmn_err(CE_NOTE, "/dev/eventfd failed to create soft state");
315 mutex_exit(&eventfd_lock);
316 return (DDI_FAILURE);
319 if (ddi_create_minor_node(devi, "eventfd", S_IFCHR,
320 EVENTFDMNRN_EVENTFD, DDI_PSEUDO, 0) == DDI_FAILURE) {
321 cmn_err(CE_NOTE, "/dev/eventfd couldn't create minor node");
322 ddi_soft_state_fini(&eventfd_softstate);
323 mutex_exit(&eventfd_lock);
324 return (DDI_FAILURE);
327 ddi_report_dev(devi);
328 eventfd_devi = devi;
330 eventfd_minor = vmem_create("eventfd_minor", (void *)EVENTFDMNRN_CLONE,
331 UINT32_MAX - EVENTFDMNRN_CLONE, 1, NULL, NULL, NULL, 0,
332 VM_SLEEP | VMC_IDENTIFIER);
334 mutex_exit(&eventfd_lock);
336 return (DDI_SUCCESS);
339 /*ARGSUSED*/
340 static int
341 eventfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
343 switch (cmd) {
344 case DDI_DETACH:
345 break;
347 case DDI_SUSPEND:
348 return (DDI_SUCCESS);
350 default:
351 return (DDI_FAILURE);
354 mutex_enter(&eventfd_lock);
355 vmem_destroy(eventfd_minor);
357 ddi_remove_minor_node(eventfd_devi, NULL);
358 eventfd_devi = NULL;
360 ddi_soft_state_fini(&eventfd_softstate);
361 mutex_exit(&eventfd_lock);
363 return (DDI_SUCCESS);
366 /*ARGSUSED*/
367 static int
368 eventfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
370 int error;
372 switch (infocmd) {
373 case DDI_INFO_DEVT2DEVINFO:
374 *result = (void *)eventfd_devi;
375 error = DDI_SUCCESS;
376 break;
377 case DDI_INFO_DEVT2INSTANCE:
378 *result = NULL;
379 error = DDI_SUCCESS;
380 break;
381 default:
382 error = DDI_FAILURE;
384 return (error);
387 static struct cb_ops eventfd_cb_ops = {
388 eventfd_open, /* open */
389 eventfd_close, /* close */
390 nulldev, /* strategy */
391 nulldev, /* print */
392 nodev, /* dump */
393 eventfd_read, /* read */
394 eventfd_write, /* write */
395 eventfd_ioctl, /* ioctl */
396 nodev, /* devmap */
397 nodev, /* mmap */
398 nodev, /* segmap */
399 eventfd_poll, /* poll */
400 ddi_prop_op, /* cb_prop_op */
401 0, /* streamtab */
402 D_NEW | D_MP /* Driver compatibility flag */
405 static struct dev_ops eventfd_ops = {
406 DEVO_REV, /* devo_rev */
407 0, /* refcnt */
408 eventfd_info, /* get_dev_info */
409 nulldev, /* identify */
410 nulldev, /* probe */
411 eventfd_attach, /* attach */
412 eventfd_detach, /* detach */
413 nodev, /* reset */
414 &eventfd_cb_ops, /* driver operations */
415 NULL, /* bus operations */
416 nodev, /* dev power */
417 ddi_quiesce_not_needed, /* quiesce */
420 static struct modldrv modldrv = {
421 &mod_driverops, /* module type (this is a pseudo driver) */
422 "eventfd support", /* name of module */
423 &eventfd_ops, /* driver ops */
426 static struct modlinkage modlinkage = {
427 MODREV_1,
428 (void *)&modldrv,
429 NULL
433 _init(void)
435 return (mod_install(&modlinkage));
439 _info(struct modinfo *modinfop)
441 return (mod_info(&modlinkage, modinfop));
445 _fini(void)
447 return (mod_remove(&modlinkage));