Import 2.3.12pre9
[davej-history.git] / fs / select.c
blob9ca5c6d893ecd7feedb709cf2411e6a8a895cdb7
1 /*
2 * This file contains the procedures for the handling of select and poll
4 * Created for Linux based loosely upon Mathius Lattner's minix
5 * patches by Peter MacDonald. Heavily edited by Linus.
7 * 4 February 1994
8 * COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
9 * flag set in its personality we do *not* modify the given timeout
10 * parameter to reflect time remaining.
13 #include <linux/malloc.h>
14 #include <linux/smp_lock.h>
15 #include <linux/poll.h>
16 #include <linux/file.h>
18 #include <asm/uaccess.h>
20 #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
21 #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
24 * Ok, Peter made a complicated, but straightforward multiple_wait() function.
25 * I have rewritten this, taking some shortcuts: This code may not be easy to
26 * follow, but it should be free of race-conditions, and it's practical. If you
27 * understand what I'm doing here, then you understand how the linux
28 * sleep/wakeup mechanism works.
30 * Two very simple procedures, poll_wait() and free_wait() make all the
31 * work. poll_wait() is an inline-function defined in <linux/poll.h>,
32 * as all select/poll functions have to call it to add an entry to the
33 * poll table.
37 * I rewrote this again to make the poll_table size variable, take some
38 * more shortcuts, improve responsiveness, and remove another race that
39 * Linus noticed. -- jrs
42 static void free_wait(poll_table * p)
44 struct poll_table_entry * entry;
45 poll_table *old;
47 while (p) {
48 entry = p->entry + p->nr;
49 while (p->nr > 0) {
50 p->nr--;
51 entry--;
52 remove_wait_queue(entry->wait_address,&entry->wait);
53 fput(entry->filp);
55 old = p;
56 p = p->next;
57 free_page((unsigned long) old);
61 void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
63 for (;;) {
64 if (p->nr < __MAX_POLL_TABLE_ENTRIES) {
65 struct poll_table_entry * entry;
66 ok_table:
67 entry = p->entry + p->nr;
68 get_file(filp);
69 entry->filp = filp;
70 entry->wait_address = wait_address;
71 init_waitqueue_entry(&entry->wait, current);
72 add_wait_queue(wait_address,&entry->wait);
73 p->nr++;
74 return;
76 if (p->next == NULL) {
77 poll_table *tmp = (poll_table *) __get_free_page(GFP_KERNEL);
78 if (!tmp)
79 return;
80 tmp->nr = 0;
81 tmp->entry = (struct poll_table_entry *)(tmp + 1);
82 tmp->next = NULL;
83 p->next = tmp;
84 p = tmp;
85 goto ok_table;
87 p = p->next;
91 #define __IN(fds, n) (fds->in + n)
92 #define __OUT(fds, n) (fds->out + n)
93 #define __EX(fds, n) (fds->ex + n)
94 #define __RES_IN(fds, n) (fds->res_in + n)
95 #define __RES_OUT(fds, n) (fds->res_out + n)
96 #define __RES_EX(fds, n) (fds->res_ex + n)
98 #define BITS(fds, n) (*__IN(fds, n)|*__OUT(fds, n)|*__EX(fds, n))
100 static int max_select_fd(unsigned long n, fd_set_bits *fds)
102 unsigned long *open_fds;
103 unsigned long set;
104 int max;
106 /* handle last in-complete long-word first */
107 set = ~(~0UL << (n & (__NFDBITS-1)));
108 n /= __NFDBITS;
109 open_fds = current->files->open_fds->fds_bits+n;
110 max = 0;
111 if (set) {
112 set &= BITS(fds, n);
113 if (set) {
114 if (!(set & ~*open_fds))
115 goto get_max;
116 return -EBADF;
119 while (n) {
120 open_fds--;
121 n--;
122 set = BITS(fds, n);
123 if (!set)
124 continue;
125 if (set & ~*open_fds)
126 return -EBADF;
127 if (max)
128 continue;
129 get_max:
130 do {
131 max++;
132 set >>= 1;
133 } while (set);
134 max += n * __NFDBITS;
137 return max;
140 #define BIT(i) (1UL << ((i)&(__NFDBITS-1)))
141 #define MEM(i,m) ((m)+(unsigned)(i)/__NFDBITS)
142 #define ISSET(i,m) (((i)&*(m)) != 0)
143 #define SET(i,m) (*(m) |= (i))
145 #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR)
146 #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
147 #define POLLEX_SET (POLLPRI)
149 int do_select(int n, fd_set_bits *fds, long *timeout)
151 poll_table *wait_table, *wait;
152 int retval, i, off;
153 long __timeout = *timeout;
155 wait = wait_table = NULL;
156 if (__timeout) {
157 wait_table = (poll_table *) __get_free_page(GFP_KERNEL);
158 if (!wait_table)
159 return -ENOMEM;
161 wait_table->nr = 0;
162 wait_table->entry = (struct poll_table_entry *)(wait_table + 1);
163 wait_table->next = NULL;
164 wait = wait_table;
167 read_lock(&current->files->file_lock);
168 retval = max_select_fd(n, fds);
169 read_unlock(&current->files->file_lock);
171 lock_kernel();
172 if (retval < 0)
173 goto out;
174 n = retval;
175 retval = 0;
176 for (;;) {
177 current->state = TASK_INTERRUPTIBLE;
178 for (i = 0 ; i < n; i++) {
179 unsigned long bit = BIT(i);
180 unsigned long mask;
181 struct file *file;
183 off = i / __NFDBITS;
184 if (!(bit & BITS(fds, off)))
185 continue;
186 file = fget(i);
187 mask = POLLNVAL;
188 if (file) {
189 mask = DEFAULT_POLLMASK;
190 if (file->f_op && file->f_op->poll)
191 mask = file->f_op->poll(file, wait);
192 fput(file);
194 if ((mask & POLLIN_SET) && ISSET(bit, __IN(fds,off))) {
195 SET(bit, __RES_IN(fds,off));
196 retval++;
197 wait = NULL;
199 if ((mask & POLLOUT_SET) && ISSET(bit, __OUT(fds,off))) {
200 SET(bit, __RES_OUT(fds,off));
201 retval++;
202 wait = NULL;
204 if ((mask & POLLEX_SET) && ISSET(bit, __EX(fds,off))) {
205 SET(bit, __RES_EX(fds,off));
206 retval++;
207 wait = NULL;
210 wait = NULL;
211 if (retval || !__timeout || signal_pending(current))
212 break;
213 __timeout = schedule_timeout(__timeout);
215 current->state = TASK_RUNNING;
217 out:
218 if (*timeout)
219 free_wait(wait_table);
222 * Up-to-date the caller timeout.
224 *timeout = __timeout;
225 unlock_kernel();
226 return retval;
230 * We can actually return ERESTARTSYS instead of EINTR, but I'd
231 * like to be certain this leads to no problems. So I return
232 * EINTR just for safety.
234 * Update: ERESTARTSYS breaks at least the xview clock binary, so
235 * I'm trying ERESTARTNOHAND which restart only when you want to.
237 #define MAX_SELECT_SECONDS \
238 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
240 asmlinkage int
241 sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)
243 fd_set_bits fds;
244 char *bits;
245 long timeout;
246 int ret, size;
248 timeout = MAX_SCHEDULE_TIMEOUT;
249 if (tvp) {
250 time_t sec, usec;
252 if ((ret = verify_area(VERIFY_READ, tvp, sizeof(*tvp)))
253 || (ret = __get_user(sec, &tvp->tv_sec))
254 || (ret = __get_user(usec, &tvp->tv_usec)))
255 goto out_nofds;
257 ret = -EINVAL;
258 if (sec < 0 || usec < 0)
259 goto out_nofds;
261 if ((unsigned long) sec < MAX_SELECT_SECONDS) {
262 timeout = ROUND_UP(usec, 1000000/HZ);
263 timeout += sec * (unsigned long) HZ;
267 ret = -EINVAL;
268 if (n < 0)
269 goto out_nofds;
271 if (n > current->files->max_fdset + 1)
272 n = current->files->max_fdset + 1;
275 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
276 * since we used fdset we need to allocate memory in units of
277 * long-words.
279 ret = -ENOMEM;
280 size = (n + 8 * sizeof(long) - 1) / (8 * sizeof(long)) * sizeof(long);
281 bits = kmalloc(6 * size, GFP_KERNEL);
282 if (!bits)
283 goto out_nofds;
284 fds.in = (unsigned long *) bits;
285 fds.out = (unsigned long *) (bits + size);
286 fds.ex = (unsigned long *) (bits + 2*size);
287 fds.res_in = (unsigned long *) (bits + 3*size);
288 fds.res_out = (unsigned long *) (bits + 4*size);
289 fds.res_ex = (unsigned long *) (bits + 5*size);
291 if ((ret = get_fd_set(n, inp, fds.in)) ||
292 (ret = get_fd_set(n, outp, fds.out)) ||
293 (ret = get_fd_set(n, exp, fds.ex)))
294 goto out;
295 zero_fd_set(n, fds.res_in);
296 zero_fd_set(n, fds.res_out);
297 zero_fd_set(n, fds.res_ex);
299 ret = do_select(n, &fds, &timeout);
301 if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
302 time_t sec = 0, usec = 0;
303 if (timeout) {
304 sec = timeout / HZ;
305 usec = timeout % HZ;
306 usec *= (1000000/HZ);
308 put_user(sec, &tvp->tv_sec);
309 put_user(usec, &tvp->tv_usec);
312 if (ret < 0)
313 goto out;
314 if (!ret) {
315 ret = -ERESTARTNOHAND;
316 if (signal_pending(current))
317 goto out;
318 ret = 0;
321 set_fd_set(n, inp, fds.res_in);
322 set_fd_set(n, outp, fds.res_out);
323 set_fd_set(n, exp, fds.res_ex);
325 out:
326 kfree(bits);
327 out_nofds:
328 return ret;
331 static int do_poll(unsigned int nfds, struct pollfd *fds, poll_table *wait,
332 long timeout)
334 int count = 0;
336 for (;;) {
337 unsigned int j;
338 struct pollfd * fdpnt;
340 current->state = TASK_INTERRUPTIBLE;
341 for (fdpnt = fds, j = 0; j < nfds; j++, fdpnt++) {
342 int fd;
343 unsigned int mask;
345 mask = 0;
346 fd = fdpnt->fd;
347 if (fd >= 0) {
348 struct file * file = fget(fd);
349 mask = POLLNVAL;
350 if (file != NULL) {
351 mask = DEFAULT_POLLMASK;
352 if (file->f_op && file->f_op->poll)
353 mask = file->f_op->poll(file, wait);
354 mask &= fdpnt->events | POLLERR | POLLHUP;
355 fput(file);
357 if (mask) {
358 wait = NULL;
359 count++;
362 fdpnt->revents = mask;
365 wait = NULL;
366 if (count || !timeout || signal_pending(current))
367 break;
368 timeout = schedule_timeout(timeout);
370 current->state = TASK_RUNNING;
371 return count;
374 asmlinkage int sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout)
376 int i, fdcount, err, size;
377 struct pollfd * fds, *fds1;
378 poll_table *wait_table = NULL, *wait = NULL;
380 lock_kernel();
381 /* Do a sanity check on nfds ... */
382 err = -EINVAL;
383 if (nfds > current->files->max_fds)
384 goto out;
386 if (timeout) {
387 /* Carefula about overflow in the intermediate values */
388 if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ)
389 timeout = (unsigned long)(timeout*HZ+999)/1000+1;
390 else /* Negative or overflow */
391 timeout = MAX_SCHEDULE_TIMEOUT;
394 err = -ENOMEM;
395 if (timeout) {
396 wait_table = (poll_table *) __get_free_page(GFP_KERNEL);
397 if (!wait_table)
398 goto out;
399 wait_table->nr = 0;
400 wait_table->entry = (struct poll_table_entry *)(wait_table + 1);
401 wait_table->next = NULL;
402 wait = wait_table;
405 size = nfds * sizeof(struct pollfd);
406 fds = (struct pollfd *) kmalloc(size, GFP_KERNEL);
407 if (!fds)
408 goto out;
410 err = -EFAULT;
411 if (copy_from_user(fds, ufds, size))
412 goto out_fds;
414 fdcount = do_poll(nfds, fds, wait, timeout);
416 /* OK, now copy the revents fields back to user space. */
417 fds1 = fds;
418 for(i=0; i < (int)nfds; i++, ufds++, fds1++) {
419 __put_user(fds1->revents, &ufds->revents);
422 err = fdcount;
423 if (!fdcount && signal_pending(current))
424 err = -EINTR;
426 out_fds:
427 kfree(fds);
428 out:
429 if (wait)
430 free_wait(wait_table);
431 unlock_kernel();
432 return err;