4 * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org>
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/init.h>
12 #include <linux/sched.h>
13 #include <linux/kernel.h>
14 #include <linux/list.h>
15 #include <linux/spinlock.h>
16 #include <linux/anon_inodes.h>
17 #include <linux/eventfd.h>
20 wait_queue_head_t wqh
;
22 * Every time that a write(2) is performed on an eventfd, the
23 * value of the __u64 being written is added to "count" and a
24 * wakeup is performed on "wqh". A read(2) will return the "count"
25 * value to userspace, and will reset "count" to zero. The kernel
26 * size eventfd_signal() also, adds to the "count" counter and
33 * Adds "n" to the eventfd counter "count". Returns "n" in case of
34 * success, or a value lower then "n" in case of coutner overflow.
35 * This function is supposed to be called by the kernel in paths
36 * that do not allow sleeping. In this function we allow the counter
37 * to reach the ULLONG_MAX value, and we signal this as overflow
38 * condition by returining a POLLERR to poll(2).
40 int eventfd_signal(struct file
*file
, int n
)
42 struct eventfd_ctx
*ctx
= file
->private_data
;
47 spin_lock_irqsave(&ctx
->wqh
.lock
, flags
);
48 if (ULLONG_MAX
- ctx
->count
< n
)
49 n
= (int) (ULLONG_MAX
- ctx
->count
);
51 if (waitqueue_active(&ctx
->wqh
))
52 wake_up_locked(&ctx
->wqh
);
53 spin_unlock_irqrestore(&ctx
->wqh
.lock
, flags
);
58 static int eventfd_release(struct inode
*inode
, struct file
*file
)
60 kfree(file
->private_data
);
64 static unsigned int eventfd_poll(struct file
*file
, poll_table
*wait
)
66 struct eventfd_ctx
*ctx
= file
->private_data
;
67 unsigned int events
= 0;
70 poll_wait(file
, &ctx
->wqh
, wait
);
72 spin_lock_irqsave(&ctx
->wqh
.lock
, flags
);
75 if (ctx
->count
== ULLONG_MAX
)
77 if (ULLONG_MAX
- 1 > ctx
->count
)
79 spin_unlock_irqrestore(&ctx
->wqh
.lock
, flags
);
84 static ssize_t
eventfd_read(struct file
*file
, char __user
*buf
, size_t count
,
87 struct eventfd_ctx
*ctx
= file
->private_data
;
90 DECLARE_WAITQUEUE(wait
, current
);
92 if (count
< sizeof(ucnt
))
94 spin_lock_irq(&ctx
->wqh
.lock
);
99 else if (!(file
->f_flags
& O_NONBLOCK
)) {
100 __add_wait_queue(&ctx
->wqh
, &wait
);
102 set_current_state(TASK_INTERRUPTIBLE
);
103 if (ctx
->count
> 0) {
108 if (signal_pending(current
)) {
112 spin_unlock_irq(&ctx
->wqh
.lock
);
114 spin_lock_irq(&ctx
->wqh
.lock
);
116 __remove_wait_queue(&ctx
->wqh
, &wait
);
117 __set_current_state(TASK_RUNNING
);
121 if (waitqueue_active(&ctx
->wqh
))
122 wake_up_locked(&ctx
->wqh
);
124 spin_unlock_irq(&ctx
->wqh
.lock
);
125 if (res
> 0 && put_user(ucnt
, (__u64 __user
*) buf
))
131 static ssize_t
eventfd_write(struct file
*file
, const char __user
*buf
, size_t count
,
134 struct eventfd_ctx
*ctx
= file
->private_data
;
137 DECLARE_WAITQUEUE(wait
, current
);
139 if (count
< sizeof(ucnt
))
141 if (copy_from_user(&ucnt
, buf
, sizeof(ucnt
)))
143 if (ucnt
== ULLONG_MAX
)
145 spin_lock_irq(&ctx
->wqh
.lock
);
147 if (ULLONG_MAX
- ctx
->count
> ucnt
)
149 else if (!(file
->f_flags
& O_NONBLOCK
)) {
150 __add_wait_queue(&ctx
->wqh
, &wait
);
152 set_current_state(TASK_INTERRUPTIBLE
);
153 if (ULLONG_MAX
- ctx
->count
> ucnt
) {
157 if (signal_pending(current
)) {
161 spin_unlock_irq(&ctx
->wqh
.lock
);
163 spin_lock_irq(&ctx
->wqh
.lock
);
165 __remove_wait_queue(&ctx
->wqh
, &wait
);
166 __set_current_state(TASK_RUNNING
);
170 if (waitqueue_active(&ctx
->wqh
))
171 wake_up_locked(&ctx
->wqh
);
173 spin_unlock_irq(&ctx
->wqh
.lock
);
178 static const struct file_operations eventfd_fops
= {
179 .release
= eventfd_release
,
180 .poll
= eventfd_poll
,
181 .read
= eventfd_read
,
182 .write
= eventfd_write
,
185 struct file
*eventfd_fget(int fd
)
191 return ERR_PTR(-EBADF
);
192 if (file
->f_op
!= &eventfd_fops
) {
194 return ERR_PTR(-EINVAL
);
200 asmlinkage
long sys_eventfd(unsigned int count
)
203 struct eventfd_ctx
*ctx
;
207 ctx
= kmalloc(sizeof(*ctx
), GFP_KERNEL
);
211 init_waitqueue_head(&ctx
->wqh
);
215 * When we call this, the initialization must be complete, since
216 * anon_inode_getfd() will install the fd.
218 error
= anon_inode_getfd(&fd
, &inode
, &file
, "[eventfd]",