2.2.0-final
[davej-history.git] / fs / select.c
bloba89425503784d62761d0f08636d0c6573548606e
1 /*
2 * This file contains the procedures for the handling of select and poll
4 * Created for Linux based loosely upon Mathius Lattner's minix
5 * patches by Peter MacDonald. Heavily edited by Linus.
7 * 4 February 1994
8 * COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
9 * flag set in its personality we do *not* modify the given timeout
10 * parameter to reflect time remaining.
13 #include <linux/malloc.h>
14 #include <linux/smp_lock.h>
15 #include <linux/poll.h>
16 #include <linux/file.h>
18 #include <asm/uaccess.h>
20 #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
21 #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
24 * Ok, Peter made a complicated, but straightforward multiple_wait() function.
25 * I have rewritten this, taking some shortcuts: This code may not be easy to
26 * follow, but it should be free of race-conditions, and it's practical. If you
27 * understand what I'm doing here, then you understand how the linux
28 * sleep/wakeup mechanism works.
30 * Two very simple procedures, poll_wait() and free_wait() make all the
31 * work. poll_wait() is an inline-function defined in <linux/poll.h>,
32 * as all select/poll functions have to call it to add an entry to the
33 * poll table.
37 * I rewrote this again to make the poll_table size variable, take some
38 * more shortcuts, improve responsiveness, and remove another race that
39 * Linus noticed. -- jrs
42 static void free_wait(poll_table * p)
44 struct poll_table_entry * entry;
45 poll_table *old;
47 while (p) {
48 entry = p->entry + p->nr;
49 while (p->nr > 0) {
50 p->nr--;
51 entry--;
52 remove_wait_queue(entry->wait_address,&entry->wait);
53 fput(entry->filp);
55 old = p;
56 p = p->next;
57 free_page((unsigned long) old);
61 void __pollwait(struct file * filp, struct wait_queue ** wait_address, poll_table *p)
63 for (;;) {
64 if (p->nr < __MAX_POLL_TABLE_ENTRIES) {
65 struct poll_table_entry * entry;
66 ok_table:
67 entry = p->entry + p->nr;
68 entry->filp = filp;
69 filp->f_count++;
70 entry->wait_address = wait_address;
71 entry->wait.task = current;
72 entry->wait.next = NULL;
73 add_wait_queue(wait_address,&entry->wait);
74 p->nr++;
75 return;
77 if (p->next == NULL) {
78 poll_table *tmp = (poll_table *) __get_free_page(GFP_KERNEL);
79 if (!tmp)
80 return;
81 tmp->nr = 0;
82 tmp->entry = (struct poll_table_entry *)(tmp + 1);
83 tmp->next = NULL;
84 p->next = tmp;
85 p = tmp;
86 goto ok_table;
88 p = p->next;
92 #define __IN(fds, n) (fds->in + n)
93 #define __OUT(fds, n) (fds->out + n)
94 #define __EX(fds, n) (fds->ex + n)
95 #define __RES_IN(fds, n) (fds->res_in + n)
96 #define __RES_OUT(fds, n) (fds->res_out + n)
97 #define __RES_EX(fds, n) (fds->res_ex + n)
99 #define BITS(fds, n) (*__IN(fds, n)|*__OUT(fds, n)|*__EX(fds, n))
101 static int max_select_fd(unsigned long n, fd_set_bits *fds)
103 unsigned long *open_fds;
104 unsigned long set;
105 int max;
107 /* handle last in-complete long-word first */
108 set = ~(~0UL << (n & (__NFDBITS-1)));
109 n /= __NFDBITS;
110 open_fds = current->files->open_fds.fds_bits+n;
111 max = 0;
112 if (set) {
113 set &= BITS(fds, n);
114 if (set) {
115 if (!(set & ~*open_fds))
116 goto get_max;
117 return -EBADF;
120 while (n) {
121 open_fds--;
122 n--;
123 set = BITS(fds, n);
124 if (!set)
125 continue;
126 if (set & ~*open_fds)
127 return -EBADF;
128 if (max)
129 continue;
130 get_max:
131 do {
132 max++;
133 set >>= 1;
134 } while (set);
135 max += n * __NFDBITS;
138 return max;
141 #define BIT(i) (1UL << ((i)&(__NFDBITS-1)))
142 #define MEM(i,m) ((m)+(unsigned)(i)/__NFDBITS)
143 #define ISSET(i,m) (((i)&*(m)) != 0)
144 #define SET(i,m) (*(m) |= (i))
146 #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR)
147 #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
148 #define POLLEX_SET (POLLPRI)
150 int do_select(int n, fd_set_bits *fds, long *timeout)
152 poll_table *wait_table, *wait;
153 int retval, i, off;
154 long __timeout = *timeout;
156 wait = wait_table = NULL;
157 if (__timeout) {
158 wait_table = (poll_table *) __get_free_page(GFP_KERNEL);
159 if (!wait_table)
160 return -ENOMEM;
162 wait_table->nr = 0;
163 wait_table->entry = (struct poll_table_entry *)(wait_table + 1);
164 wait_table->next = NULL;
165 wait = wait_table;
168 lock_kernel();
170 retval = max_select_fd(n, fds);
171 if (retval < 0)
172 goto out;
173 n = retval;
174 retval = 0;
175 for (;;) {
176 current->state = TASK_INTERRUPTIBLE;
177 for (i = 0 ; i < n; i++) {
178 unsigned long bit = BIT(i);
179 unsigned long mask;
180 struct file *file;
182 off = i / __NFDBITS;
183 if (!(bit & BITS(fds, off)))
184 continue;
186 * The poll_wait routine will increment f_count if
187 * the file is added to the wait table, so we don't
188 * need to increment it now.
190 file = fcheck(i);
191 mask = POLLNVAL;
192 if (file) {
193 mask = DEFAULT_POLLMASK;
194 if (file->f_op && file->f_op->poll)
195 mask = file->f_op->poll(file, wait);
197 if ((mask & POLLIN_SET) && ISSET(bit, __IN(fds,off))) {
198 SET(bit, __RES_IN(fds,off));
199 retval++;
200 wait = NULL;
202 if ((mask & POLLOUT_SET) && ISSET(bit, __OUT(fds,off))) {
203 SET(bit, __RES_OUT(fds,off));
204 retval++;
205 wait = NULL;
207 if ((mask & POLLEX_SET) && ISSET(bit, __EX(fds,off))) {
208 SET(bit, __RES_EX(fds,off));
209 retval++;
210 wait = NULL;
213 wait = NULL;
214 if (retval || !__timeout || signal_pending(current))
215 break;
216 __timeout = schedule_timeout(__timeout);
218 current->state = TASK_RUNNING;
220 out:
221 if (*timeout)
222 free_wait(wait_table);
225 * Up-to-date the caller timeout.
227 *timeout = __timeout;
228 unlock_kernel();
229 return retval;
233 * We can actually return ERESTARTSYS instead of EINTR, but I'd
234 * like to be certain this leads to no problems. So I return
235 * EINTR just for safety.
237 * Update: ERESTARTSYS breaks at least the xview clock binary, so
238 * I'm trying ERESTARTNOHAND which restart only when you want to.
240 #define MAX_SELECT_SECONDS \
241 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
243 asmlinkage int
244 sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)
246 fd_set_bits fds;
247 char *bits;
248 long timeout;
249 int ret, size;
251 timeout = MAX_SCHEDULE_TIMEOUT;
252 if (tvp) {
253 time_t sec, usec;
255 if ((ret = verify_area(VERIFY_READ, tvp, sizeof(*tvp)))
256 || (ret = __get_user(sec, &tvp->tv_sec))
257 || (ret = __get_user(usec, &tvp->tv_usec)))
258 goto out_nofds;
260 ret = -EINVAL;
261 if (sec < 0 || usec < 0)
262 goto out_nofds;
264 if ((unsigned long) sec < MAX_SELECT_SECONDS) {
265 timeout = ROUND_UP(usec, 1000000/HZ);
266 timeout += sec * (unsigned long) HZ;
270 ret = -EINVAL;
271 if (n < 0 || n > KFDS_NR)
272 goto out_nofds;
274 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
275 * since we used fdset we need to allocate memory in units of
276 * long-words.
278 ret = -ENOMEM;
279 size = FDS_BYTES(n);
280 bits = kmalloc(6 * size, GFP_KERNEL);
281 if (!bits)
282 goto out_nofds;
283 fds.in = (unsigned long *) bits;
284 fds.out = (unsigned long *) (bits + size);
285 fds.ex = (unsigned long *) (bits + 2*size);
286 fds.res_in = (unsigned long *) (bits + 3*size);
287 fds.res_out = (unsigned long *) (bits + 4*size);
288 fds.res_ex = (unsigned long *) (bits + 5*size);
290 if ((ret = get_fd_set(n, inp, fds.in)) ||
291 (ret = get_fd_set(n, outp, fds.out)) ||
292 (ret = get_fd_set(n, exp, fds.ex)))
293 goto out;
294 zero_fd_set(n, fds.res_in);
295 zero_fd_set(n, fds.res_out);
296 zero_fd_set(n, fds.res_ex);
298 ret = do_select(n, &fds, &timeout);
300 if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
301 time_t sec = 0, usec = 0;
302 if (timeout) {
303 sec = timeout / HZ;
304 usec = timeout % HZ;
305 usec *= (1000000/HZ);
307 put_user(sec, &tvp->tv_sec);
308 put_user(usec, &tvp->tv_usec);
311 if (ret < 0)
312 goto out;
313 if (!ret) {
314 ret = -ERESTARTNOHAND;
315 if (signal_pending(current))
316 goto out;
317 ret = 0;
320 set_fd_set(n, inp, fds.res_in);
321 set_fd_set(n, outp, fds.res_out);
322 set_fd_set(n, exp, fds.res_ex);
324 out:
325 kfree(bits);
326 out_nofds:
327 return ret;
330 static int do_poll(unsigned int nfds, struct pollfd *fds, poll_table *wait,
331 long timeout)
333 int count = 0;
335 for (;;) {
336 unsigned int j;
337 struct pollfd * fdpnt;
339 current->state = TASK_INTERRUPTIBLE;
340 for (fdpnt = fds, j = 0; j < nfds; j++, fdpnt++) {
341 int fd;
342 unsigned int mask;
344 mask = 0;
345 fd = fdpnt->fd;
346 if (fd >= 0) {
347 /* poll_wait increments f_count if needed */
348 struct file * file = fcheck(fd);
349 mask = POLLNVAL;
350 if (file != NULL) {
351 mask = DEFAULT_POLLMASK;
352 if (file->f_op && file->f_op->poll)
353 mask = file->f_op->poll(file, wait);
354 mask &= fdpnt->events | POLLERR | POLLHUP;
356 if (mask) {
357 wait = NULL;
358 count++;
361 fdpnt->revents = mask;
364 wait = NULL;
365 if (count || !timeout || signal_pending(current))
366 break;
367 timeout = schedule_timeout(timeout);
369 current->state = TASK_RUNNING;
370 return count;
373 asmlinkage int sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout)
375 int i, fdcount, err, size;
376 struct pollfd * fds, *fds1;
377 poll_table *wait_table = NULL, *wait = NULL;
379 lock_kernel();
380 /* Do a sanity check on nfds ... */
381 err = -EINVAL;
382 if (nfds > NR_OPEN)
383 goto out;
385 if (timeout) {
386 /* Carefula about overflow in the intermediate values */
387 if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ)
388 timeout = (unsigned long)(timeout*HZ+999)/1000+1;
389 else /* Negative or overflow */
390 timeout = MAX_SCHEDULE_TIMEOUT;
393 err = -ENOMEM;
394 if (timeout) {
395 wait_table = (poll_table *) __get_free_page(GFP_KERNEL);
396 if (!wait_table)
397 goto out;
398 wait_table->nr = 0;
399 wait_table->entry = (struct poll_table_entry *)(wait_table + 1);
400 wait_table->next = NULL;
401 wait = wait_table;
404 size = nfds * sizeof(struct pollfd);
405 fds = (struct pollfd *) kmalloc(size, GFP_KERNEL);
406 if (!fds)
407 goto out;
409 err = -EFAULT;
410 if (copy_from_user(fds, ufds, size))
411 goto out_fds;
413 fdcount = do_poll(nfds, fds, wait, timeout);
415 /* OK, now copy the revents fields back to user space. */
416 fds1 = fds;
417 for(i=0; i < (int)nfds; i++, ufds++, fds1++) {
418 __put_user(fds1->revents, &ufds->revents);
421 err = fdcount;
422 if (!fdcount && signal_pending(current))
423 err = -EINTR;
425 out_fds:
426 kfree(fds);
427 out:
428 if (wait)
429 free_wait(wait_table);
430 unlock_kernel();
431 return err;