Merge with Linux 2.4.0-test5-pre3.
[linux-2.6/linux-mips.git] / fs / select.c
bloba5c2139307bdba0715b5be260b29cb6034964030
1 /*
2 * This file contains the procedures for the handling of select and poll
4 * Created for Linux based loosely upon Mathius Lattner's minix
5 * patches by Peter MacDonald. Heavily edited by Linus.
7 * 4 February 1994
8 * COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
9 * flag set in its personality we do *not* modify the given timeout
10 * parameter to reflect time remaining.
12 * 24 January 2000
13 * Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation
14 * of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian).
17 #include <linux/malloc.h>
18 #include <linux/smp_lock.h>
19 #include <linux/poll.h>
20 #include <linux/file.h>
22 #include <asm/uaccess.h>
24 #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
25 #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
28 * Ok, Peter made a complicated, but straightforward multiple_wait() function.
29 * I have rewritten this, taking some shortcuts: This code may not be easy to
30 * follow, but it should be free of race-conditions, and it's practical. If you
31 * understand what I'm doing here, then you understand how the linux
32 * sleep/wakeup mechanism works.
34 * Two very simple procedures, poll_wait() and poll_freewait() make all the
35 * work. poll_wait() is an inline-function defined in <linux/poll.h>,
36 * as all select/poll functions have to call it to add an entry to the
37 * poll table.
40 void poll_freewait(poll_table* pt)
42 struct poll_table_page * p = pt->table;
43 while (p) {
44 struct poll_table_entry * entry;
45 struct poll_table_page *old;
47 entry = p->entry + p->nr;
48 while (p->nr > 0) {
49 p->nr--;
50 entry--;
51 remove_wait_queue(entry->wait_address,&entry->wait);
52 fput(entry->filp);
54 old = p;
55 p = p->next;
56 free_page((unsigned long) old);
60 void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
62 struct poll_table_page *table = p->table;
64 if (!table || table->nr >= __MAX_POLL_TABLE_ENTRIES) {
65 struct poll_table_page *new_table;
67 new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
68 if (!new_table) {
69 p->error = -ENOMEM;
70 __set_current_state(TASK_RUNNING);
71 return;
73 new_table->nr = 0;
74 new_table->entry = (struct poll_table_entry *)(new_table + 1);
75 new_table->next = table;
76 p->table = new_table;
77 table = new_table;
80 /* Add a new entry */
82 struct poll_table_entry * entry;
83 entry = table->entry + table->nr;
84 table->nr++;
85 get_file(filp);
86 entry->filp = filp;
87 entry->wait_address = wait_address;
88 init_waitqueue_entry(&entry->wait, current);
89 add_wait_queue(wait_address,&entry->wait);
93 #define __IN(fds, n) (fds->in + n)
94 #define __OUT(fds, n) (fds->out + n)
95 #define __EX(fds, n) (fds->ex + n)
96 #define __RES_IN(fds, n) (fds->res_in + n)
97 #define __RES_OUT(fds, n) (fds->res_out + n)
98 #define __RES_EX(fds, n) (fds->res_ex + n)
100 #define BITS(fds, n) (*__IN(fds, n)|*__OUT(fds, n)|*__EX(fds, n))
102 static int max_select_fd(unsigned long n, fd_set_bits *fds)
104 unsigned long *open_fds;
105 unsigned long set;
106 int max;
108 /* handle last in-complete long-word first */
109 set = ~(~0UL << (n & (__NFDBITS-1)));
110 n /= __NFDBITS;
111 open_fds = current->files->open_fds->fds_bits+n;
112 max = 0;
113 if (set) {
114 set &= BITS(fds, n);
115 if (set) {
116 if (!(set & ~*open_fds))
117 goto get_max;
118 return -EBADF;
121 while (n) {
122 open_fds--;
123 n--;
124 set = BITS(fds, n);
125 if (!set)
126 continue;
127 if (set & ~*open_fds)
128 return -EBADF;
129 if (max)
130 continue;
131 get_max:
132 do {
133 max++;
134 set >>= 1;
135 } while (set);
136 max += n * __NFDBITS;
139 return max;
142 #define BIT(i) (1UL << ((i)&(__NFDBITS-1)))
143 #define MEM(i,m) ((m)+(unsigned)(i)/__NFDBITS)
144 #define ISSET(i,m) (((i)&*(m)) != 0)
145 #define SET(i,m) (*(m) |= (i))
147 #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR)
148 #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
149 #define POLLEX_SET (POLLPRI)
151 int do_select(int n, fd_set_bits *fds, long *timeout)
153 poll_table table, *wait;
154 int retval, i, off;
155 long __timeout = *timeout;
157 read_lock(&current->files->file_lock);
158 retval = max_select_fd(n, fds);
159 read_unlock(&current->files->file_lock);
161 if (retval < 0)
162 return retval;
163 n = retval;
165 poll_initwait(&table);
166 wait = &table;
167 if (!__timeout)
168 wait = NULL;
169 retval = 0;
170 for (;;) {
171 set_current_state(TASK_INTERRUPTIBLE);
172 for (i = 0 ; i < n; i++) {
173 unsigned long bit = BIT(i);
174 unsigned long mask;
175 struct file *file;
177 off = i / __NFDBITS;
178 if (!(bit & BITS(fds, off)))
179 continue;
180 file = fget(i);
181 mask = POLLNVAL;
182 if (file) {
183 mask = DEFAULT_POLLMASK;
184 if (file->f_op && file->f_op->poll)
185 mask = file->f_op->poll(file, wait);
186 fput(file);
188 if ((mask & POLLIN_SET) && ISSET(bit, __IN(fds,off))) {
189 SET(bit, __RES_IN(fds,off));
190 retval++;
191 wait = NULL;
193 if ((mask & POLLOUT_SET) && ISSET(bit, __OUT(fds,off))) {
194 SET(bit, __RES_OUT(fds,off));
195 retval++;
196 wait = NULL;
198 if ((mask & POLLEX_SET) && ISSET(bit, __EX(fds,off))) {
199 SET(bit, __RES_EX(fds,off));
200 retval++;
201 wait = NULL;
204 wait = NULL;
205 if (retval || !__timeout || signal_pending(current))
206 break;
207 if(table.error) {
208 retval = table.error;
209 break;
211 __timeout = schedule_timeout(__timeout);
213 current->state = TASK_RUNNING;
215 poll_freewait(&table);
218 * Up-to-date the caller timeout.
220 *timeout = __timeout;
221 return retval;
224 static void *select_bits_alloc(int size)
226 return kmalloc(6 * size, GFP_KERNEL);
229 static void select_bits_free(void *bits, int size)
231 kfree(bits);
235 * We can actually return ERESTARTSYS instead of EINTR, but I'd
236 * like to be certain this leads to no problems. So I return
237 * EINTR just for safety.
239 * Update: ERESTARTSYS breaks at least the xview clock binary, so
240 * I'm trying ERESTARTNOHAND which restart only when you want to.
242 #define MAX_SELECT_SECONDS \
243 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
245 asmlinkage long
246 sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)
248 fd_set_bits fds;
249 char *bits;
250 long timeout;
251 int ret, size;
253 timeout = MAX_SCHEDULE_TIMEOUT;
254 if (tvp) {
255 time_t sec, usec;
257 if ((ret = verify_area(VERIFY_READ, tvp, sizeof(*tvp)))
258 || (ret = __get_user(sec, &tvp->tv_sec))
259 || (ret = __get_user(usec, &tvp->tv_usec)))
260 goto out_nofds;
262 ret = -EINVAL;
263 if (sec < 0 || usec < 0)
264 goto out_nofds;
266 if ((unsigned long) sec < MAX_SELECT_SECONDS) {
267 timeout = ROUND_UP(usec, 1000000/HZ);
268 timeout += sec * (unsigned long) HZ;
272 ret = -EINVAL;
273 if (n < 0)
274 goto out_nofds;
276 if (n > current->files->max_fdset)
277 n = current->files->max_fdset;
280 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
281 * since we used fdset we need to allocate memory in units of
282 * long-words.
284 ret = -ENOMEM;
285 size = FDS_BYTES(n);
286 bits = select_bits_alloc(size);
287 if (!bits)
288 goto out_nofds;
289 fds.in = (unsigned long *) bits;
290 fds.out = (unsigned long *) (bits + size);
291 fds.ex = (unsigned long *) (bits + 2*size);
292 fds.res_in = (unsigned long *) (bits + 3*size);
293 fds.res_out = (unsigned long *) (bits + 4*size);
294 fds.res_ex = (unsigned long *) (bits + 5*size);
296 if ((ret = get_fd_set(n, inp, fds.in)) ||
297 (ret = get_fd_set(n, outp, fds.out)) ||
298 (ret = get_fd_set(n, exp, fds.ex)))
299 goto out;
300 zero_fd_set(n, fds.res_in);
301 zero_fd_set(n, fds.res_out);
302 zero_fd_set(n, fds.res_ex);
304 ret = do_select(n, &fds, &timeout);
306 if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
307 time_t sec = 0, usec = 0;
308 if (timeout) {
309 sec = timeout / HZ;
310 usec = timeout % HZ;
311 usec *= (1000000/HZ);
313 put_user(sec, &tvp->tv_sec);
314 put_user(usec, &tvp->tv_usec);
317 if (ret < 0)
318 goto out;
319 if (!ret) {
320 ret = -ERESTARTNOHAND;
321 if (signal_pending(current))
322 goto out;
323 ret = 0;
326 set_fd_set(n, inp, fds.res_in);
327 set_fd_set(n, outp, fds.res_out);
328 set_fd_set(n, exp, fds.res_ex);
330 out:
331 select_bits_free(bits, size);
332 out_nofds:
333 return ret;
336 #define POLLFD_PER_PAGE ((PAGE_SIZE) / sizeof(struct pollfd))
338 static void do_pollfd(unsigned int num, struct pollfd * fdpage,
339 poll_table ** pwait, int *count)
341 int i;
343 for (i = 0; i < num; i++) {
344 int fd;
345 unsigned int mask;
346 struct pollfd *fdp;
348 mask = 0;
349 fdp = fdpage+i;
350 fd = fdp->fd;
351 if (fd >= 0) {
352 struct file * file = fget(fd);
353 mask = POLLNVAL;
354 if (file != NULL) {
355 mask = DEFAULT_POLLMASK;
356 if (file->f_op && file->f_op->poll)
357 mask = file->f_op->poll(file, *pwait);
358 mask &= fdp->events | POLLERR | POLLHUP;
359 fput(file);
361 if (mask) {
362 *pwait = NULL;
363 (*count)++;
366 fdp->revents = mask;
370 static int do_poll(unsigned int nfds, unsigned int nchunks, unsigned int nleft,
371 struct pollfd *fds[], poll_table *wait, long timeout)
373 int count = 0;
374 poll_table* pt = wait;
376 for (;;) {
377 unsigned int i;
379 set_current_state(TASK_INTERRUPTIBLE);
380 for (i=0; i < nchunks; i++)
381 do_pollfd(POLLFD_PER_PAGE, fds[i], &pt, &count);
382 if (nleft)
383 do_pollfd(nleft, fds[nchunks], &pt, &count);
384 pt = NULL;
385 if (count || !timeout || signal_pending(current))
386 break;
387 if(wait->error) {
388 return wait->error;
390 timeout = schedule_timeout(timeout);
392 current->state = TASK_RUNNING;
393 return count;
396 asmlinkage long sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout)
398 int i, j, fdcount, err;
399 struct pollfd **fds;
400 poll_table table, *wait;
401 int nchunks, nleft;
403 /* Do a sanity check on nfds ... */
404 if (nfds > current->files->max_fds)
405 return -EINVAL;
407 if (timeout) {
408 /* Careful about overflow in the intermediate values */
409 if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ)
410 timeout = (unsigned long)(timeout*HZ+999)/1000+1;
411 else /* Negative or overflow */
412 timeout = MAX_SCHEDULE_TIMEOUT;
415 poll_initwait(&table);
416 wait = &table;
417 if (!timeout)
418 wait = NULL;
420 err = -ENOMEM;
421 fds = NULL;
422 if (nfds != 0) {
423 fds = (struct pollfd **)kmalloc(
424 (1 + (nfds - 1) / POLLFD_PER_PAGE) * sizeof(struct pollfd *),
425 GFP_KERNEL);
426 if (fds == NULL)
427 goto out;
430 nchunks = 0;
431 nleft = nfds;
432 while (nleft > POLLFD_PER_PAGE) { /* allocate complete PAGE_SIZE chunks */
433 fds[nchunks] = (struct pollfd *)__get_free_page(GFP_KERNEL);
434 if (fds[nchunks] == NULL)
435 goto out_fds;
436 nchunks++;
437 nleft -= POLLFD_PER_PAGE;
439 if (nleft) { /* allocate last PAGE_SIZE chunk, only nleft elements used */
440 fds[nchunks] = (struct pollfd *)__get_free_page(GFP_KERNEL);
441 if (fds[nchunks] == NULL)
442 goto out_fds;
445 err = -EFAULT;
446 for (i=0; i < nchunks; i++)
447 if (copy_from_user(fds[i], ufds + i*POLLFD_PER_PAGE, PAGE_SIZE))
448 goto out_fds1;
449 if (nleft) {
450 if (copy_from_user(fds[nchunks], ufds + nchunks*POLLFD_PER_PAGE,
451 nleft * sizeof(struct pollfd)))
452 goto out_fds1;
455 fdcount = do_poll(nfds, nchunks, nleft, fds, wait, timeout);
457 /* OK, now copy the revents fields back to user space. */
458 for(i=0; i < nchunks; i++)
459 for (j=0; j < POLLFD_PER_PAGE; j++, ufds++)
460 __put_user((fds[i] + j)->revents, &ufds->revents);
461 if (nleft)
462 for (j=0; j < nleft; j++, ufds++)
463 __put_user((fds[nchunks] + j)->revents, &ufds->revents);
465 err = fdcount;
466 if (!fdcount && signal_pending(current))
467 err = -EINTR;
469 out_fds1:
470 if (nleft)
471 free_page((unsigned long)(fds[nchunks]));
472 out_fds:
473 for (i=0; i < nchunks; i++)
474 free_page((unsigned long)(fds[i]));
475 if (nfds != 0)
476 kfree(fds);
477 out:
478 poll_freewait(&table);
479 return err;