Fix reported Samba bug.
[linux-2.6/linux-mips.git] / fs / select.c
blobbe09afdd7b6186e16b866eac997d021efedf3450
1 /*
2 * This file contains the procedures for the handling of select and poll
4 * Created for Linux based loosely upon Mathius Lattner's minix
5 * patches by Peter MacDonald. Heavily edited by Linus.
7 * 4 February 1994
8 * COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
9 * flag set in its personality we do *not* modify the given timeout
10 * parameter to reflect time remaining.
12 * 24 January 2000
13 * Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation
14 * of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian).
17 #include <linux/malloc.h>
18 #include <linux/smp_lock.h>
19 #include <linux/poll.h>
20 #include <linux/file.h>
22 #include <asm/uaccess.h>
24 #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
25 #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
28 * Ok, Peter made a complicated, but straightforward multiple_wait() function.
29 * I have rewritten this, taking some shortcuts: This code may not be easy to
30 * follow, but it should be free of race-conditions, and it's practical. If you
31 * understand what I'm doing here, then you understand how the linux
32 * sleep/wakeup mechanism works.
34 * Two very simple procedures, poll_wait() and poll_freewait() make all the
35 * work. poll_wait() is an inline-function defined in <linux/poll.h>,
36 * as all select/poll functions have to call it to add an entry to the
37 * poll table.
40 void poll_freewait(poll_table* pt)
42 struct poll_table_page * p = pt->table;
43 while (p) {
44 struct poll_table_entry * entry;
45 struct poll_table_page *old;
47 entry = p->entry + p->nr;
48 while (p->nr > 0) {
49 p->nr--;
50 entry--;
51 remove_wait_queue(entry->wait_address,&entry->wait);
52 fput(entry->filp);
54 old = p;
55 p = p->next;
56 free_page((unsigned long) old);
60 void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
62 struct poll_table_page *table = p->table;
64 if (!table || table->nr >= __MAX_POLL_TABLE_ENTRIES) {
65 struct poll_table_page *new_table;
67 new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
68 if (!new_table) {
69 p->error = -ENOMEM;
70 __set_current_state(TASK_RUNNING);
71 return;
73 new_table->nr = 0;
74 new_table->entry = (struct poll_table_entry *)(new_table + 1);
75 new_table->next = table;
76 p->table = new_table;
77 table = new_table;
80 /* Add a new entry */
82 struct poll_table_entry * entry;
83 entry = table->entry + table->nr;
84 table->nr++;
85 get_file(filp);
86 entry->filp = filp;
87 entry->wait_address = wait_address;
88 init_waitqueue_entry(&entry->wait, current);
89 add_wait_queue(wait_address,&entry->wait);
93 #define __IN(fds, n) (fds->in + n)
94 #define __OUT(fds, n) (fds->out + n)
95 #define __EX(fds, n) (fds->ex + n)
96 #define __RES_IN(fds, n) (fds->res_in + n)
97 #define __RES_OUT(fds, n) (fds->res_out + n)
98 #define __RES_EX(fds, n) (fds->res_ex + n)
100 #define BITS(fds, n) (*__IN(fds, n)|*__OUT(fds, n)|*__EX(fds, n))
102 static int max_select_fd(unsigned long n, fd_set_bits *fds)
104 unsigned long *open_fds;
105 unsigned long set;
106 int max;
108 /* handle last in-complete long-word first */
109 set = ~(~0UL << (n & (__NFDBITS-1)));
110 n /= __NFDBITS;
111 open_fds = current->files->open_fds->fds_bits+n;
112 max = 0;
113 if (set) {
114 set &= BITS(fds, n);
115 if (set) {
116 if (!(set & ~*open_fds))
117 goto get_max;
118 return -EBADF;
121 while (n) {
122 open_fds--;
123 n--;
124 set = BITS(fds, n);
125 if (!set)
126 continue;
127 if (set & ~*open_fds)
128 return -EBADF;
129 if (max)
130 continue;
131 get_max:
132 do {
133 max++;
134 set >>= 1;
135 } while (set);
136 max += n * __NFDBITS;
139 return max;
142 #define BIT(i) (1UL << ((i)&(__NFDBITS-1)))
143 #define MEM(i,m) ((m)+(unsigned)(i)/__NFDBITS)
144 #define ISSET(i,m) (((i)&*(m)) != 0)
145 #define SET(i,m) (*(m) |= (i))
147 #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR)
148 #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
149 #define POLLEX_SET (POLLPRI)
151 int do_select(int n, fd_set_bits *fds, long *timeout)
153 poll_table table, *wait;
154 int retval, i, off;
155 long __timeout = *timeout;
157 read_lock(&current->files->file_lock);
158 retval = max_select_fd(n, fds);
159 read_unlock(&current->files->file_lock);
161 if (retval < 0)
162 return retval;
163 n = retval;
165 poll_initwait(&table);
166 wait = &table;
167 if (!__timeout)
168 wait = NULL;
169 retval = 0;
170 for (;;) {
171 set_current_state(TASK_INTERRUPTIBLE);
172 for (i = 0 ; i < n; i++) {
173 unsigned long bit = BIT(i);
174 unsigned long mask;
175 struct file *file;
177 off = i / __NFDBITS;
178 if (!(bit & BITS(fds, off)))
179 continue;
180 file = fget(i);
181 mask = POLLNVAL;
182 if (file) {
183 mask = DEFAULT_POLLMASK;
184 if (file->f_op && file->f_op->poll)
185 mask = file->f_op->poll(file, wait);
186 fput(file);
188 if ((mask & POLLIN_SET) && ISSET(bit, __IN(fds,off))) {
189 SET(bit, __RES_IN(fds,off));
190 retval++;
191 wait = NULL;
193 if ((mask & POLLOUT_SET) && ISSET(bit, __OUT(fds,off))) {
194 SET(bit, __RES_OUT(fds,off));
195 retval++;
196 wait = NULL;
198 if ((mask & POLLEX_SET) && ISSET(bit, __EX(fds,off))) {
199 SET(bit, __RES_EX(fds,off));
200 retval++;
201 wait = NULL;
204 wait = NULL;
205 if (retval || !__timeout || signal_pending(current))
206 break;
207 if(table.error) {
208 retval = table.error;
209 break;
211 __timeout = schedule_timeout(__timeout);
213 current->state = TASK_RUNNING;
215 poll_freewait(&table);
218 * Up-to-date the caller timeout.
220 *timeout = __timeout;
221 return retval;
225 * We can actually return ERESTARTSYS instead of EINTR, but I'd
226 * like to be certain this leads to no problems. So I return
227 * EINTR just for safety.
229 * Update: ERESTARTSYS breaks at least the xview clock binary, so
230 * I'm trying ERESTARTNOHAND which restart only when you want to.
232 #define MAX_SELECT_SECONDS \
233 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
235 asmlinkage long
236 sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)
238 fd_set_bits fds;
239 char *bits;
240 long timeout;
241 int ret, size;
243 timeout = MAX_SCHEDULE_TIMEOUT;
244 if (tvp) {
245 time_t sec, usec;
247 if ((ret = verify_area(VERIFY_READ, tvp, sizeof(*tvp)))
248 || (ret = __get_user(sec, &tvp->tv_sec))
249 || (ret = __get_user(usec, &tvp->tv_usec)))
250 goto out_nofds;
252 ret = -EINVAL;
253 if (sec < 0 || usec < 0)
254 goto out_nofds;
256 if ((unsigned long) sec < MAX_SELECT_SECONDS) {
257 timeout = ROUND_UP(usec, 1000000/HZ);
258 timeout += sec * (unsigned long) HZ;
262 ret = -EINVAL;
263 if (n < 0)
264 goto out_nofds;
266 if (n > current->files->max_fdset)
267 n = current->files->max_fdset;
270 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
271 * since we used fdset we need to allocate memory in units of
272 * long-words.
274 ret = -ENOMEM;
275 size = FDS_BYTES(n);
276 bits = kmalloc(6 * size, GFP_KERNEL);
277 if (!bits)
278 goto out_nofds;
279 fds.in = (unsigned long *) bits;
280 fds.out = (unsigned long *) (bits + size);
281 fds.ex = (unsigned long *) (bits + 2*size);
282 fds.res_in = (unsigned long *) (bits + 3*size);
283 fds.res_out = (unsigned long *) (bits + 4*size);
284 fds.res_ex = (unsigned long *) (bits + 5*size);
286 if ((ret = get_fd_set(n, inp, fds.in)) ||
287 (ret = get_fd_set(n, outp, fds.out)) ||
288 (ret = get_fd_set(n, exp, fds.ex)))
289 goto out;
290 zero_fd_set(n, fds.res_in);
291 zero_fd_set(n, fds.res_out);
292 zero_fd_set(n, fds.res_ex);
294 ret = do_select(n, &fds, &timeout);
296 if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
297 time_t sec = 0, usec = 0;
298 if (timeout) {
299 sec = timeout / HZ;
300 usec = timeout % HZ;
301 usec *= (1000000/HZ);
303 put_user(sec, &tvp->tv_sec);
304 put_user(usec, &tvp->tv_usec);
307 if (ret < 0)
308 goto out;
309 if (!ret) {
310 ret = -ERESTARTNOHAND;
311 if (signal_pending(current))
312 goto out;
313 ret = 0;
316 set_fd_set(n, inp, fds.res_in);
317 set_fd_set(n, outp, fds.res_out);
318 set_fd_set(n, exp, fds.res_ex);
320 out:
321 kfree(bits);
322 out_nofds:
323 return ret;
326 #define POLLFD_PER_PAGE ((PAGE_SIZE) / sizeof(struct pollfd))
328 static void do_pollfd(unsigned int num, struct pollfd * fdpage,
329 poll_table ** pwait, int *count)
331 int i;
333 for (i = 0; i < num; i++) {
334 int fd;
335 unsigned int mask;
336 struct pollfd *fdp;
338 mask = 0;
339 fdp = fdpage+i;
340 fd = fdp->fd;
341 if (fd >= 0) {
342 struct file * file = fget(fd);
343 mask = POLLNVAL;
344 if (file != NULL) {
345 mask = DEFAULT_POLLMASK;
346 if (file->f_op && file->f_op->poll)
347 mask = file->f_op->poll(file, *pwait);
348 mask &= fdp->events | POLLERR | POLLHUP;
349 fput(file);
351 if (mask) {
352 *pwait = NULL;
353 (*count)++;
356 fdp->revents = mask;
360 static int do_poll(unsigned int nfds, unsigned int nchunks, unsigned int nleft,
361 struct pollfd *fds[], poll_table *wait, long timeout)
363 int count = 0;
364 poll_table* pt = wait;
366 for (;;) {
367 unsigned int i;
369 set_current_state(TASK_INTERRUPTIBLE);
370 for (i=0; i < nchunks; i++)
371 do_pollfd(POLLFD_PER_PAGE, fds[i], &pt, &count);
372 if (nleft)
373 do_pollfd(nleft, fds[nchunks], &pt, &count);
374 pt = NULL;
375 if (count || !timeout || signal_pending(current))
376 break;
377 if(wait->error) {
378 return wait->error;
380 timeout = schedule_timeout(timeout);
382 current->state = TASK_RUNNING;
383 return count;
386 asmlinkage long sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout)
388 int i, j, fdcount, err;
389 struct pollfd **fds;
390 poll_table table, *wait;
391 int nchunks, nleft;
393 /* Do a sanity check on nfds ... */
394 if (nfds > current->files->max_fds)
395 return -EINVAL;
397 if (timeout) {
398 /* Careful about overflow in the intermediate values */
399 if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ)
400 timeout = (unsigned long)(timeout*HZ+999)/1000+1;
401 else /* Negative or overflow */
402 timeout = MAX_SCHEDULE_TIMEOUT;
405 poll_initwait(&table);
406 wait = &table;
407 if (!timeout)
408 wait = NULL;
410 err = -ENOMEM;
411 fds = NULL;
412 if (nfds != 0) {
413 fds = (struct pollfd **)kmalloc(
414 (1 + (nfds - 1) / POLLFD_PER_PAGE) * sizeof(struct pollfd *),
415 GFP_KERNEL);
416 if (fds == NULL)
417 goto out;
420 nchunks = 0;
421 nleft = nfds;
422 while (nleft > POLLFD_PER_PAGE) { /* allocate complete PAGE_SIZE chunks */
423 fds[nchunks] = (struct pollfd *)__get_free_page(GFP_KERNEL);
424 if (fds[nchunks] == NULL)
425 goto out_fds;
426 nchunks++;
427 nleft -= POLLFD_PER_PAGE;
429 if (nleft) { /* allocate last PAGE_SIZE chunk, only nleft elements used */
430 fds[nchunks] = (struct pollfd *)__get_free_page(GFP_KERNEL);
431 if (fds[nchunks] == NULL)
432 goto out_fds;
435 err = -EFAULT;
436 for (i=0; i < nchunks; i++)
437 if (copy_from_user(fds[i], ufds + i*POLLFD_PER_PAGE, PAGE_SIZE))
438 goto out_fds1;
439 if (nleft) {
440 if (copy_from_user(fds[nchunks], ufds + nchunks*POLLFD_PER_PAGE,
441 nleft * sizeof(struct pollfd)))
442 goto out_fds1;
445 fdcount = do_poll(nfds, nchunks, nleft, fds, wait, timeout);
447 /* OK, now copy the revents fields back to user space. */
448 for(i=0; i < nchunks; i++)
449 for (j=0; j < POLLFD_PER_PAGE; j++, ufds++)
450 __put_user((fds[i] + j)->revents, &ufds->revents);
451 if (nleft)
452 for (j=0; j < nleft; j++, ufds++)
453 __put_user((fds[nchunks] + j)->revents, &ufds->revents);
455 err = fdcount;
456 if (!fdcount && signal_pending(current))
457 err = -EINTR;
459 out_fds1:
460 if (nleft)
461 free_page((unsigned long)(fds[nchunks]));
462 out_fds:
463 for (i=0; i < nchunks; i++)
464 free_page((unsigned long)(fds[i]));
465 if (nfds != 0)
466 kfree(fds);
467 out:
468 poll_freewait(&table);
469 return err;