Import 2.3.10pre1
[davej-history.git] / fs / select.c
blob4cb1a6d5515302c3dec87a9cf1c487df6a295923
1 /*
2 * This file contains the procedures for the handling of select and poll
4 * Created for Linux based loosely upon Mathius Lattner's minix
5 * patches by Peter MacDonald. Heavily edited by Linus.
7 * 4 February 1994
8 * COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
9 * flag set in its personality we do *not* modify the given timeout
10 * parameter to reflect time remaining.
13 #include <linux/malloc.h>
14 #include <linux/smp_lock.h>
15 #include <linux/poll.h>
16 #include <linux/file.h>
18 #include <asm/uaccess.h>
20 #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
21 #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
24 * Ok, Peter made a complicated, but straightforward multiple_wait() function.
25 * I have rewritten this, taking some shortcuts: This code may not be easy to
26 * follow, but it should be free of race-conditions, and it's practical. If you
27 * understand what I'm doing here, then you understand how the linux
28 * sleep/wakeup mechanism works.
30 * Two very simple procedures, poll_wait() and free_wait() make all the
31 * work. poll_wait() is an inline-function defined in <linux/poll.h>,
32 * as all select/poll functions have to call it to add an entry to the
33 * poll table.
37 * I rewrote this again to make the poll_table size variable, take some
38 * more shortcuts, improve responsiveness, and remove another race that
39 * Linus noticed. -- jrs
42 static void free_wait(poll_table * p)
44 struct poll_table_entry * entry;
45 poll_table *old;
47 while (p) {
48 entry = p->entry + p->nr;
49 while (p->nr > 0) {
50 p->nr--;
51 entry--;
52 remove_wait_queue(entry->wait_address,&entry->wait);
53 fput(entry->filp);
55 old = p;
56 p = p->next;
57 free_page((unsigned long) old);
61 void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
63 for (;;) {
64 if (p->nr < __MAX_POLL_TABLE_ENTRIES) {
65 struct poll_table_entry * entry;
66 ok_table:
67 entry = p->entry + p->nr;
68 entry->filp = filp;
69 atomic_inc(&filp->f_count);
70 entry->wait_address = wait_address;
71 init_waitqueue_entry(&entry->wait, current);
72 add_wait_queue(wait_address,&entry->wait);
73 p->nr++;
74 return;
76 if (p->next == NULL) {
77 poll_table *tmp = (poll_table *) __get_free_page(GFP_KERNEL);
78 if (!tmp)
79 return;
80 tmp->nr = 0;
81 tmp->entry = (struct poll_table_entry *)(tmp + 1);
82 tmp->next = NULL;
83 p->next = tmp;
84 p = tmp;
85 goto ok_table;
87 p = p->next;
91 #define __IN(fds, n) (fds->in + n)
92 #define __OUT(fds, n) (fds->out + n)
93 #define __EX(fds, n) (fds->ex + n)
94 #define __RES_IN(fds, n) (fds->res_in + n)
95 #define __RES_OUT(fds, n) (fds->res_out + n)
96 #define __RES_EX(fds, n) (fds->res_ex + n)
98 #define BITS(fds, n) (*__IN(fds, n)|*__OUT(fds, n)|*__EX(fds, n))
100 static int max_select_fd(unsigned long n, fd_set_bits *fds)
102 unsigned long *open_fds;
103 unsigned long set;
104 int max;
106 /* handle last in-complete long-word first */
107 set = ~(~0UL << (n & (__NFDBITS-1)));
108 n /= __NFDBITS;
109 open_fds = current->files->open_fds.fds_bits+n;
110 max = 0;
111 if (set) {
112 set &= BITS(fds, n);
113 if (set) {
114 if (!(set & ~*open_fds))
115 goto get_max;
116 return -EBADF;
119 while (n) {
120 open_fds--;
121 n--;
122 set = BITS(fds, n);
123 if (!set)
124 continue;
125 if (set & ~*open_fds)
126 return -EBADF;
127 if (max)
128 continue;
129 get_max:
130 do {
131 max++;
132 set >>= 1;
133 } while (set);
134 max += n * __NFDBITS;
137 return max;
140 #define BIT(i) (1UL << ((i)&(__NFDBITS-1)))
141 #define MEM(i,m) ((m)+(unsigned)(i)/__NFDBITS)
142 #define ISSET(i,m) (((i)&*(m)) != 0)
143 #define SET(i,m) (*(m) |= (i))
145 #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR)
146 #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
147 #define POLLEX_SET (POLLPRI)
149 int do_select(int n, fd_set_bits *fds, long *timeout)
151 poll_table *wait_table, *wait;
152 int retval, i, off;
153 long __timeout = *timeout;
155 wait = wait_table = NULL;
156 if (__timeout) {
157 wait_table = (poll_table *) __get_free_page(GFP_KERNEL);
158 if (!wait_table)
159 return -ENOMEM;
161 wait_table->nr = 0;
162 wait_table->entry = (struct poll_table_entry *)(wait_table + 1);
163 wait_table->next = NULL;
164 wait = wait_table;
167 lock_kernel();
169 retval = max_select_fd(n, fds);
170 if (retval < 0)
171 goto out;
172 n = retval;
173 retval = 0;
174 for (;;) {
175 current->state = TASK_INTERRUPTIBLE;
176 for (i = 0 ; i < n; i++) {
177 unsigned long bit = BIT(i);
178 unsigned long mask;
179 struct file *file;
181 off = i / __NFDBITS;
182 if (!(bit & BITS(fds, off)))
183 continue;
184 file = fget(i);
185 mask = POLLNVAL;
186 if (file) {
187 mask = DEFAULT_POLLMASK;
188 if (file->f_op && file->f_op->poll)
189 mask = file->f_op->poll(file, wait);
190 fput(file);
192 if ((mask & POLLIN_SET) && ISSET(bit, __IN(fds,off))) {
193 SET(bit, __RES_IN(fds,off));
194 retval++;
195 wait = NULL;
197 if ((mask & POLLOUT_SET) && ISSET(bit, __OUT(fds,off))) {
198 SET(bit, __RES_OUT(fds,off));
199 retval++;
200 wait = NULL;
202 if ((mask & POLLEX_SET) && ISSET(bit, __EX(fds,off))) {
203 SET(bit, __RES_EX(fds,off));
204 retval++;
205 wait = NULL;
208 wait = NULL;
209 if (retval || !__timeout || signal_pending(current))
210 break;
211 __timeout = schedule_timeout(__timeout);
213 current->state = TASK_RUNNING;
215 out:
216 if (*timeout)
217 free_wait(wait_table);
220 * Up-to-date the caller timeout.
222 *timeout = __timeout;
223 unlock_kernel();
224 return retval;
228 * We can actually return ERESTARTSYS instead of EINTR, but I'd
229 * like to be certain this leads to no problems. So I return
230 * EINTR just for safety.
232 * Update: ERESTARTSYS breaks at least the xview clock binary, so
233 * I'm trying ERESTARTNOHAND which restart only when you want to.
235 #define MAX_SELECT_SECONDS \
236 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
238 asmlinkage int
239 sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)
241 fd_set_bits fds;
242 char *bits;
243 long timeout;
244 int ret, size;
246 timeout = MAX_SCHEDULE_TIMEOUT;
247 if (tvp) {
248 time_t sec, usec;
250 if ((ret = verify_area(VERIFY_READ, tvp, sizeof(*tvp)))
251 || (ret = __get_user(sec, &tvp->tv_sec))
252 || (ret = __get_user(usec, &tvp->tv_usec)))
253 goto out_nofds;
255 ret = -EINVAL;
256 if (sec < 0 || usec < 0)
257 goto out_nofds;
259 if ((unsigned long) sec < MAX_SELECT_SECONDS) {
260 timeout = ROUND_UP(usec, 1000000/HZ);
261 timeout += sec * (unsigned long) HZ;
265 ret = -EINVAL;
266 if (n < 0)
267 goto out_nofds;
269 if (n > KFDS_NR)
270 n = KFDS_NR;
273 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
274 * since we used fdset we need to allocate memory in units of
275 * long-words.
277 ret = -ENOMEM;
278 size = FDS_BYTES(n);
279 bits = kmalloc(6 * size, GFP_KERNEL);
280 if (!bits)
281 goto out_nofds;
282 fds.in = (unsigned long *) bits;
283 fds.out = (unsigned long *) (bits + size);
284 fds.ex = (unsigned long *) (bits + 2*size);
285 fds.res_in = (unsigned long *) (bits + 3*size);
286 fds.res_out = (unsigned long *) (bits + 4*size);
287 fds.res_ex = (unsigned long *) (bits + 5*size);
289 if ((ret = get_fd_set(n, inp, fds.in)) ||
290 (ret = get_fd_set(n, outp, fds.out)) ||
291 (ret = get_fd_set(n, exp, fds.ex)))
292 goto out;
293 zero_fd_set(n, fds.res_in);
294 zero_fd_set(n, fds.res_out);
295 zero_fd_set(n, fds.res_ex);
297 ret = do_select(n, &fds, &timeout);
299 if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
300 time_t sec = 0, usec = 0;
301 if (timeout) {
302 sec = timeout / HZ;
303 usec = timeout % HZ;
304 usec *= (1000000/HZ);
306 put_user(sec, &tvp->tv_sec);
307 put_user(usec, &tvp->tv_usec);
310 if (ret < 0)
311 goto out;
312 if (!ret) {
313 ret = -ERESTARTNOHAND;
314 if (signal_pending(current))
315 goto out;
316 ret = 0;
319 set_fd_set(n, inp, fds.res_in);
320 set_fd_set(n, outp, fds.res_out);
321 set_fd_set(n, exp, fds.res_ex);
323 out:
324 kfree(bits);
325 out_nofds:
326 return ret;
329 static int do_poll(unsigned int nfds, struct pollfd *fds, poll_table *wait,
330 long timeout)
332 int count = 0;
334 for (;;) {
335 unsigned int j;
336 struct pollfd * fdpnt;
338 current->state = TASK_INTERRUPTIBLE;
339 for (fdpnt = fds, j = 0; j < nfds; j++, fdpnt++) {
340 int fd;
341 unsigned int mask;
343 mask = 0;
344 fd = fdpnt->fd;
345 if (fd >= 0) {
346 struct file * file = fget(fd);
347 mask = POLLNVAL;
348 if (file != NULL) {
349 mask = DEFAULT_POLLMASK;
350 if (file->f_op && file->f_op->poll)
351 mask = file->f_op->poll(file, wait);
352 mask &= fdpnt->events | POLLERR | POLLHUP;
353 fput(file);
355 if (mask) {
356 wait = NULL;
357 count++;
360 fdpnt->revents = mask;
363 wait = NULL;
364 if (count || !timeout || signal_pending(current))
365 break;
366 timeout = schedule_timeout(timeout);
368 current->state = TASK_RUNNING;
369 return count;
372 asmlinkage int sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout)
374 int i, fdcount, err, size;
375 struct pollfd * fds, *fds1;
376 poll_table *wait_table = NULL, *wait = NULL;
378 lock_kernel();
379 /* Do a sanity check on nfds ... */
380 err = -EINVAL;
381 if (nfds > NR_OPEN)
382 goto out;
384 if (timeout) {
385 /* Carefula about overflow in the intermediate values */
386 if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ)
387 timeout = (unsigned long)(timeout*HZ+999)/1000+1;
388 else /* Negative or overflow */
389 timeout = MAX_SCHEDULE_TIMEOUT;
392 err = -ENOMEM;
393 if (timeout) {
394 wait_table = (poll_table *) __get_free_page(GFP_KERNEL);
395 if (!wait_table)
396 goto out;
397 wait_table->nr = 0;
398 wait_table->entry = (struct poll_table_entry *)(wait_table + 1);
399 wait_table->next = NULL;
400 wait = wait_table;
403 size = nfds * sizeof(struct pollfd);
404 fds = (struct pollfd *) kmalloc(size, GFP_KERNEL);
405 if (!fds)
406 goto out;
408 err = -EFAULT;
409 if (copy_from_user(fds, ufds, size))
410 goto out_fds;
412 fdcount = do_poll(nfds, fds, wait, timeout);
414 /* OK, now copy the revents fields back to user space. */
415 fds1 = fds;
416 for(i=0; i < (int)nfds; i++, ufds++, fds1++) {
417 __put_user(fds1->revents, &ufds->revents);
420 err = fdcount;
421 if (!fdcount && signal_pending(current))
422 err = -EINTR;
424 out_fds:
425 kfree(fds);
426 out:
427 if (wait)
428 free_wait(wait_table);
429 unlock_kernel();
430 return err;