This should hopefully fix the warnings reported.
[linux-2.6/linux-mips.git] / fs / select.c
blob33e54a9fae372e860389b6ca10bbac96d98f9a4c
1 /*
2 * This file contains the procedures for the handling of select and poll
4 * Created for Linux based loosely upon Mathius Lattner's minix
5 * patches by Peter MacDonald. Heavily edited by Linus.
7 * 4 February 1994
8 * COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
9 * flag set in its personality we do *not* modify the given timeout
10 * parameter to reflect time remaining.
12 * 24 January 2000
13 * Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation
14 * of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian).
17 #include <linux/malloc.h>
18 #include <linux/smp_lock.h>
19 #include <linux/poll.h>
20 #include <linux/file.h>
22 #include <asm/uaccess.h>
24 #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
25 #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
28 * Ok, Peter made a complicated, but straightforward multiple_wait() function.
29 * I have rewritten this, taking some shortcuts: This code may not be easy to
30 * follow, but it should be free of race-conditions, and it's practical. If you
31 * understand what I'm doing here, then you understand how the linux
32 * sleep/wakeup mechanism works.
34 * Two very simple procedures, poll_wait() and free_wait() make all the
35 * work. poll_wait() is an inline-function defined in <linux/poll.h>,
36 * as all select/poll functions have to call it to add an entry to the
37 * poll table.
41 * I rewrote this again to make the poll_table size variable, take some
42 * more shortcuts, improve responsiveness, and remove another race that
43 * Linus noticed. -- jrs
46 static poll_table* alloc_wait(int nfds)
48 poll_table* out;
49 poll_table* walk;
51 out = (poll_table *) __get_free_page(GFP_KERNEL);
52 if(out==NULL)
53 return NULL;
54 out->nr = 0;
55 out->entry = (struct poll_table_entry *)(out + 1);
56 out->next = NULL;
57 nfds -=__MAX_POLL_TABLE_ENTRIES;
58 walk = out;
59 while(nfds > 0) {
60 poll_table *tmp = (poll_table *) __get_free_page(GFP_KERNEL);
61 if (!tmp) {
62 while(out != NULL) {
63 tmp = out->next;
64 free_page((unsigned long)out);
65 out = tmp;
67 return NULL;
69 tmp->nr = 0;
70 tmp->entry = (struct poll_table_entry *)(tmp + 1);
71 tmp->next = NULL;
72 walk->next = tmp;
73 walk = tmp;
74 nfds -=__MAX_POLL_TABLE_ENTRIES;
76 return out;
79 static void free_wait(poll_table * p)
81 struct poll_table_entry * entry;
82 poll_table *old;
84 while (p) {
85 entry = p->entry + p->nr;
86 while (p->nr > 0) {
87 p->nr--;
88 entry--;
89 remove_wait_queue(entry->wait_address,&entry->wait);
90 fput(entry->filp);
92 old = p;
93 p = p->next;
94 free_page((unsigned long) old);
98 void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
100 for (;;) {
101 if (p->nr < __MAX_POLL_TABLE_ENTRIES) {
102 struct poll_table_entry * entry;
103 entry = p->entry + p->nr;
104 get_file(filp);
105 entry->filp = filp;
106 entry->wait_address = wait_address;
107 init_waitqueue_entry(&entry->wait, current);
108 add_wait_queue(wait_address,&entry->wait);
109 p->nr++;
110 return;
112 p = p->next;
116 #define __IN(fds, n) (fds->in + n)
117 #define __OUT(fds, n) (fds->out + n)
118 #define __EX(fds, n) (fds->ex + n)
119 #define __RES_IN(fds, n) (fds->res_in + n)
120 #define __RES_OUT(fds, n) (fds->res_out + n)
121 #define __RES_EX(fds, n) (fds->res_ex + n)
123 #define BITS(fds, n) (*__IN(fds, n)|*__OUT(fds, n)|*__EX(fds, n))
125 static int max_select_fd(unsigned long n, fd_set_bits *fds)
127 unsigned long *open_fds;
128 unsigned long set;
129 int max;
131 /* handle last in-complete long-word first */
132 set = ~(~0UL << (n & (__NFDBITS-1)));
133 n /= __NFDBITS;
134 open_fds = current->files->open_fds->fds_bits+n;
135 max = 0;
136 if (set) {
137 set &= BITS(fds, n);
138 if (set) {
139 if (!(set & ~*open_fds))
140 goto get_max;
141 return -EBADF;
144 while (n) {
145 open_fds--;
146 n--;
147 set = BITS(fds, n);
148 if (!set)
149 continue;
150 if (set & ~*open_fds)
151 return -EBADF;
152 if (max)
153 continue;
154 get_max:
155 do {
156 max++;
157 set >>= 1;
158 } while (set);
159 max += n * __NFDBITS;
162 return max;
165 #define BIT(i) (1UL << ((i)&(__NFDBITS-1)))
166 #define MEM(i,m) ((m)+(unsigned)(i)/__NFDBITS)
167 #define ISSET(i,m) (((i)&*(m)) != 0)
168 #define SET(i,m) (*(m) |= (i))
170 #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR)
171 #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
172 #define POLLEX_SET (POLLPRI)
174 int do_select(int n, fd_set_bits *fds, long *timeout)
176 poll_table *wait, *orig_wait;
177 int retval, i, off;
178 long __timeout = *timeout;
180 orig_wait = wait = NULL;
182 read_lock(&current->files->file_lock);
183 retval = max_select_fd(n, fds);
184 read_unlock(&current->files->file_lock);
186 if (retval < 0)
187 return retval;
188 n = retval;
189 if (__timeout) {
190 orig_wait = wait = alloc_wait(n);
191 if (!wait)
192 return -ENOMEM;
194 retval = 0;
195 for (;;) {
196 set_current_state(TASK_INTERRUPTIBLE);
197 lock_kernel();
198 for (i = 0 ; i < n; i++) {
199 unsigned long bit = BIT(i);
200 unsigned long mask;
201 struct file *file;
203 off = i / __NFDBITS;
204 if (!(bit & BITS(fds, off)))
205 continue;
206 file = fget(i);
207 mask = POLLNVAL;
208 if (file) {
209 mask = DEFAULT_POLLMASK;
210 if (file->f_op && file->f_op->poll)
211 mask = file->f_op->poll(file, wait);
212 fput(file);
214 if ((mask & POLLIN_SET) && ISSET(bit, __IN(fds,off))) {
215 SET(bit, __RES_IN(fds,off));
216 retval++;
217 wait = NULL;
219 if ((mask & POLLOUT_SET) && ISSET(bit, __OUT(fds,off))) {
220 SET(bit, __RES_OUT(fds,off));
221 retval++;
222 wait = NULL;
224 if ((mask & POLLEX_SET) && ISSET(bit, __EX(fds,off))) {
225 SET(bit, __RES_EX(fds,off));
226 retval++;
227 wait = NULL;
230 unlock_kernel();
231 wait = NULL;
232 if (retval || !__timeout || signal_pending(current))
233 break;
234 __timeout = schedule_timeout(__timeout);
236 current->state = TASK_RUNNING;
238 free_wait(orig_wait);
241 * Up-to-date the caller timeout.
243 *timeout = __timeout;
244 return retval;
248 * We can actually return ERESTARTSYS instead of EINTR, but I'd
249 * like to be certain this leads to no problems. So I return
250 * EINTR just for safety.
252 * Update: ERESTARTSYS breaks at least the xview clock binary, so
253 * I'm trying ERESTARTNOHAND which restart only when you want to.
255 #define MAX_SELECT_SECONDS \
256 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
258 asmlinkage long
259 sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)
261 fd_set_bits fds;
262 char *bits;
263 long timeout;
264 int ret, size;
266 timeout = MAX_SCHEDULE_TIMEOUT;
267 if (tvp) {
268 time_t sec, usec;
270 if ((ret = verify_area(VERIFY_READ, tvp, sizeof(*tvp)))
271 || (ret = __get_user(sec, &tvp->tv_sec))
272 || (ret = __get_user(usec, &tvp->tv_usec)))
273 goto out_nofds;
275 ret = -EINVAL;
276 if (sec < 0 || usec < 0)
277 goto out_nofds;
279 if ((unsigned long) sec < MAX_SELECT_SECONDS) {
280 timeout = ROUND_UP(usec, 1000000/HZ);
281 timeout += sec * (unsigned long) HZ;
285 ret = -EINVAL;
286 if (n < 0)
287 goto out_nofds;
289 if (n > current->files->max_fdset)
290 n = current->files->max_fdset;
293 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
294 * since we used fdset we need to allocate memory in units of
295 * long-words.
297 ret = -ENOMEM;
298 size = FDS_BYTES(n);
299 bits = kmalloc(6 * size, GFP_KERNEL);
300 if (!bits)
301 goto out_nofds;
302 fds.in = (unsigned long *) bits;
303 fds.out = (unsigned long *) (bits + size);
304 fds.ex = (unsigned long *) (bits + 2*size);
305 fds.res_in = (unsigned long *) (bits + 3*size);
306 fds.res_out = (unsigned long *) (bits + 4*size);
307 fds.res_ex = (unsigned long *) (bits + 5*size);
309 if ((ret = get_fd_set(n, inp, fds.in)) ||
310 (ret = get_fd_set(n, outp, fds.out)) ||
311 (ret = get_fd_set(n, exp, fds.ex)))
312 goto out;
313 zero_fd_set(n, fds.res_in);
314 zero_fd_set(n, fds.res_out);
315 zero_fd_set(n, fds.res_ex);
317 ret = do_select(n, &fds, &timeout);
319 if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
320 time_t sec = 0, usec = 0;
321 if (timeout) {
322 sec = timeout / HZ;
323 usec = timeout % HZ;
324 usec *= (1000000/HZ);
326 put_user(sec, &tvp->tv_sec);
327 put_user(usec, &tvp->tv_usec);
330 if (ret < 0)
331 goto out;
332 if (!ret) {
333 ret = -ERESTARTNOHAND;
334 if (signal_pending(current))
335 goto out;
336 ret = 0;
339 set_fd_set(n, inp, fds.res_in);
340 set_fd_set(n, outp, fds.res_out);
341 set_fd_set(n, exp, fds.res_ex);
343 out:
344 kfree(bits);
345 out_nofds:
346 return ret;
349 #define POLLFD_PER_PAGE ((PAGE_SIZE) / sizeof(struct pollfd))
351 static void do_pollfd(unsigned int num, struct pollfd * fdpage,
352 poll_table ** pwait, int *count)
354 int i;
356 for (i = 0; i < num; i++) {
357 int fd;
358 unsigned int mask;
359 struct pollfd *fdp;
361 mask = 0;
362 fdp = fdpage+i;
363 fd = fdp->fd;
364 if (fd >= 0) {
365 struct file * file = fget(fd);
366 mask = POLLNVAL;
367 if (file != NULL) {
368 mask = DEFAULT_POLLMASK;
369 if (file->f_op && file->f_op->poll)
370 mask = file->f_op->poll(file, *pwait);
371 mask &= fdp->events | POLLERR | POLLHUP;
372 fput(file);
374 if (mask) {
375 *pwait = NULL;
376 (*count)++;
379 fdp->revents = mask;
383 static int do_poll(unsigned int nfds, unsigned int nchunks, unsigned int nleft,
384 struct pollfd *fds[], poll_table *wait, long timeout)
386 int count = 0;
388 for (;;) {
389 unsigned int i;
391 set_current_state(TASK_INTERRUPTIBLE);
392 for (i=0; i < nchunks; i++)
393 do_pollfd(POLLFD_PER_PAGE, fds[i], &wait, &count);
394 if (nleft)
395 do_pollfd(nleft, fds[nchunks], &wait, &count);
396 wait = NULL;
397 if (count || !timeout || signal_pending(current))
398 break;
399 timeout = schedule_timeout(timeout);
401 current->state = TASK_RUNNING;
402 return count;
405 asmlinkage long sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout)
407 int i, j, fdcount, err;
408 struct pollfd **fds;
409 poll_table *wait = NULL;
410 int nchunks, nleft;
412 /* Do a sanity check on nfds ... */
413 if (nfds > current->files->max_fds)
414 return -EINVAL;
416 if (timeout) {
417 /* Careful about overflow in the intermediate values */
418 if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ)
419 timeout = (unsigned long)(timeout*HZ+999)/1000+1;
420 else /* Negative or overflow */
421 timeout = MAX_SCHEDULE_TIMEOUT;
424 if (timeout) {
425 wait = alloc_wait(nfds);
426 if (!wait)
427 return -ENOMEM;
429 err = -ENOMEM;
431 fds = NULL;
432 if (nfds != 0) {
433 fds = (struct pollfd **)kmalloc(
434 (1 + (nfds - 1) / POLLFD_PER_PAGE) * sizeof(struct pollfd *),
435 GFP_KERNEL);
436 if (fds == NULL)
437 goto out;
440 nchunks = 0;
441 nleft = nfds;
442 while (nleft > POLLFD_PER_PAGE) { /* allocate complete PAGE_SIZE chunks */
443 fds[nchunks] = (struct pollfd *)__get_free_page(GFP_KERNEL);
444 if (fds[nchunks] == NULL)
445 goto out_fds;
446 nchunks++;
447 nleft -= POLLFD_PER_PAGE;
449 if (nleft) { /* allocate last PAGE_SIZE chunk, only nleft elements used */
450 fds[nchunks] = (struct pollfd *)__get_free_page(GFP_KERNEL);
451 if (fds[nchunks] == NULL)
452 goto out_fds;
455 err = -EFAULT;
456 for (i=0; i < nchunks; i++)
457 if (copy_from_user(fds[i], ufds + i*POLLFD_PER_PAGE, PAGE_SIZE))
458 goto out_fds1;
459 if (nleft) {
460 if (copy_from_user(fds[nchunks], ufds + nchunks*POLLFD_PER_PAGE,
461 nleft * sizeof(struct pollfd)))
462 goto out_fds1;
465 lock_kernel();
466 fdcount = do_poll(nfds, nchunks, nleft, fds, wait, timeout);
467 unlock_kernel();
469 /* OK, now copy the revents fields back to user space. */
470 for(i=0; i < nchunks; i++)
471 for (j=0; j < POLLFD_PER_PAGE; j++, ufds++)
472 __put_user((fds[i] + j)->revents, &ufds->revents);
473 if (nleft)
474 for (j=0; j < nleft; j++, ufds++)
475 __put_user((fds[nchunks] + j)->revents, &ufds->revents);
477 err = fdcount;
478 if (!fdcount && signal_pending(current))
479 err = -EINTR;
481 out_fds1:
482 if (nleft)
483 free_page((unsigned long)(fds[nchunks]));
484 out_fds:
485 for (i=0; i < nchunks; i++)
486 free_page((unsigned long)(fds[i]));
487 if (nfds != 0)
488 kfree(fds);
489 out:
490 free_wait(wait);
491 return err;