4 * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org>
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/init.h>
12 #include <linux/sched.h>
13 #include <linux/kernel.h>
14 #include <linux/list.h>
15 #include <linux/spinlock.h>
16 #include <linux/anon_inodes.h>
17 #include <linux/eventfd.h>
18 #include <linux/syscalls.h>
21 wait_queue_head_t wqh
;
23 * Every time that a write(2) is performed on an eventfd, the
24 * value of the __u64 being written is added to "count" and a
25 * wakeup is performed on "wqh". A read(2) will return the "count"
26 * value to userspace, and will reset "count" to zero. The kernel
27 * size eventfd_signal() also, adds to the "count" counter and
34 * Adds "n" to the eventfd counter "count". Returns "n" in case of
35 * success, or a value lower then "n" in case of coutner overflow.
36 * This function is supposed to be called by the kernel in paths
37 * that do not allow sleeping. In this function we allow the counter
38 * to reach the ULLONG_MAX value, and we signal this as overflow
39 * condition by returining a POLLERR to poll(2).
41 int eventfd_signal(struct file
*file
, int n
)
43 struct eventfd_ctx
*ctx
= file
->private_data
;
48 spin_lock_irqsave(&ctx
->wqh
.lock
, flags
);
49 if (ULLONG_MAX
- ctx
->count
< n
)
50 n
= (int) (ULLONG_MAX
- ctx
->count
);
52 if (waitqueue_active(&ctx
->wqh
))
53 wake_up_locked(&ctx
->wqh
);
54 spin_unlock_irqrestore(&ctx
->wqh
.lock
, flags
);
59 static int eventfd_release(struct inode
*inode
, struct file
*file
)
61 kfree(file
->private_data
);
65 static unsigned int eventfd_poll(struct file
*file
, poll_table
*wait
)
67 struct eventfd_ctx
*ctx
= file
->private_data
;
68 unsigned int events
= 0;
71 poll_wait(file
, &ctx
->wqh
, wait
);
73 spin_lock_irqsave(&ctx
->wqh
.lock
, flags
);
76 if (ctx
->count
== ULLONG_MAX
)
78 if (ULLONG_MAX
- 1 > ctx
->count
)
80 spin_unlock_irqrestore(&ctx
->wqh
.lock
, flags
);
85 static ssize_t
eventfd_read(struct file
*file
, char __user
*buf
, size_t count
,
88 struct eventfd_ctx
*ctx
= file
->private_data
;
91 DECLARE_WAITQUEUE(wait
, current
);
93 if (count
< sizeof(ucnt
))
95 spin_lock_irq(&ctx
->wqh
.lock
);
100 else if (!(file
->f_flags
& O_NONBLOCK
)) {
101 __add_wait_queue(&ctx
->wqh
, &wait
);
103 set_current_state(TASK_INTERRUPTIBLE
);
104 if (ctx
->count
> 0) {
109 if (signal_pending(current
)) {
113 spin_unlock_irq(&ctx
->wqh
.lock
);
115 spin_lock_irq(&ctx
->wqh
.lock
);
117 __remove_wait_queue(&ctx
->wqh
, &wait
);
118 __set_current_state(TASK_RUNNING
);
122 if (waitqueue_active(&ctx
->wqh
))
123 wake_up_locked(&ctx
->wqh
);
125 spin_unlock_irq(&ctx
->wqh
.lock
);
126 if (res
> 0 && put_user(ucnt
, (__u64 __user
*) buf
))
132 static ssize_t
eventfd_write(struct file
*file
, const char __user
*buf
, size_t count
,
135 struct eventfd_ctx
*ctx
= file
->private_data
;
138 DECLARE_WAITQUEUE(wait
, current
);
140 if (count
< sizeof(ucnt
))
142 if (copy_from_user(&ucnt
, buf
, sizeof(ucnt
)))
144 if (ucnt
== ULLONG_MAX
)
146 spin_lock_irq(&ctx
->wqh
.lock
);
148 if (ULLONG_MAX
- ctx
->count
> ucnt
)
150 else if (!(file
->f_flags
& O_NONBLOCK
)) {
151 __add_wait_queue(&ctx
->wqh
, &wait
);
153 set_current_state(TASK_INTERRUPTIBLE
);
154 if (ULLONG_MAX
- ctx
->count
> ucnt
) {
158 if (signal_pending(current
)) {
162 spin_unlock_irq(&ctx
->wqh
.lock
);
164 spin_lock_irq(&ctx
->wqh
.lock
);
166 __remove_wait_queue(&ctx
->wqh
, &wait
);
167 __set_current_state(TASK_RUNNING
);
171 if (waitqueue_active(&ctx
->wqh
))
172 wake_up_locked(&ctx
->wqh
);
174 spin_unlock_irq(&ctx
->wqh
.lock
);
179 static const struct file_operations eventfd_fops
= {
180 .release
= eventfd_release
,
181 .poll
= eventfd_poll
,
182 .read
= eventfd_read
,
183 .write
= eventfd_write
,
186 struct file
*eventfd_fget(int fd
)
192 return ERR_PTR(-EBADF
);
193 if (file
->f_op
!= &eventfd_fops
) {
195 return ERR_PTR(-EINVAL
);
201 asmlinkage
long sys_eventfd(unsigned int count
)
204 struct eventfd_ctx
*ctx
;
206 ctx
= kmalloc(sizeof(*ctx
), GFP_KERNEL
);
210 init_waitqueue_head(&ctx
->wqh
);
214 * When we call this, the initialization must be complete, since
215 * anon_inode_getfd() will install the fd.
217 fd
= anon_inode_getfd("[eventfd]", &eventfd_fops
, ctx
, 0);