Import 2.3.7pre9
[davej-history.git] / fs / select.c
blobe47e3b0b4ca070e3e8920bcd4548d2302c7847b9
1 /*
2 * This file contains the procedures for the handling of select and poll
4 * Created for Linux based loosely upon Mathius Lattner's minix
5 * patches by Peter MacDonald. Heavily edited by Linus.
7 * 4 February 1994
8 * COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
9 * flag set in its personality we do *not* modify the given timeout
10 * parameter to reflect time remaining.
13 #include <linux/malloc.h>
14 #include <linux/smp_lock.h>
15 #include <linux/poll.h>
16 #include <linux/file.h>
18 #include <asm/uaccess.h>
20 #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
21 #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
24 * Ok, Peter made a complicated, but straightforward multiple_wait() function.
25 * I have rewritten this, taking some shortcuts: This code may not be easy to
26 * follow, but it should be free of race-conditions, and it's practical. If you
27 * understand what I'm doing here, then you understand how the linux
28 * sleep/wakeup mechanism works.
30 * Two very simple procedures, poll_wait() and free_wait() make all the
31 * work. poll_wait() is an inline-function defined in <linux/poll.h>,
32 * as all select/poll functions have to call it to add an entry to the
33 * poll table.
37 * I rewrote this again to make the poll_table size variable, take some
38 * more shortcuts, improve responsiveness, and remove another race that
39 * Linus noticed. -- jrs
42 static void free_wait(poll_table * p)
44 struct poll_table_entry * entry;
45 poll_table *old;
47 while (p) {
48 entry = p->entry + p->nr;
49 while (p->nr > 0) {
50 p->nr--;
51 entry--;
52 remove_wait_queue(entry->wait_address,&entry->wait);
53 fput(entry->filp);
55 old = p;
56 p = p->next;
57 free_page((unsigned long) old);
61 void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
63 for (;;) {
64 if (p->nr < __MAX_POLL_TABLE_ENTRIES) {
65 struct poll_table_entry * entry;
66 ok_table:
67 entry = p->entry + p->nr;
68 entry->filp = filp;
69 filp->f_count++;
70 entry->wait_address = wait_address;
71 init_waitqueue_entry(&entry->wait, current);
72 add_wait_queue(wait_address,&entry->wait);
73 p->nr++;
74 return;
76 if (p->next == NULL) {
77 poll_table *tmp = (poll_table *) __get_free_page(GFP_KERNEL);
78 if (!tmp)
79 return;
80 tmp->nr = 0;
81 tmp->entry = (struct poll_table_entry *)(tmp + 1);
82 tmp->next = NULL;
83 p->next = tmp;
84 p = tmp;
85 goto ok_table;
87 p = p->next;
91 #define __IN(fds, n) (fds->in + n)
92 #define __OUT(fds, n) (fds->out + n)
93 #define __EX(fds, n) (fds->ex + n)
94 #define __RES_IN(fds, n) (fds->res_in + n)
95 #define __RES_OUT(fds, n) (fds->res_out + n)
96 #define __RES_EX(fds, n) (fds->res_ex + n)
98 #define BITS(fds, n) (*__IN(fds, n)|*__OUT(fds, n)|*__EX(fds, n))
100 static int max_select_fd(unsigned long n, fd_set_bits *fds)
102 unsigned long *open_fds;
103 unsigned long set;
104 int max;
106 /* handle last in-complete long-word first */
107 set = ~(~0UL << (n & (__NFDBITS-1)));
108 n /= __NFDBITS;
109 open_fds = current->files->open_fds.fds_bits+n;
110 max = 0;
111 if (set) {
112 set &= BITS(fds, n);
113 if (set) {
114 if (!(set & ~*open_fds))
115 goto get_max;
116 return -EBADF;
119 while (n) {
120 open_fds--;
121 n--;
122 set = BITS(fds, n);
123 if (!set)
124 continue;
125 if (set & ~*open_fds)
126 return -EBADF;
127 if (max)
128 continue;
129 get_max:
130 do {
131 max++;
132 set >>= 1;
133 } while (set);
134 max += n * __NFDBITS;
137 return max;
140 #define BIT(i) (1UL << ((i)&(__NFDBITS-1)))
141 #define MEM(i,m) ((m)+(unsigned)(i)/__NFDBITS)
142 #define ISSET(i,m) (((i)&*(m)) != 0)
143 #define SET(i,m) (*(m) |= (i))
145 #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR)
146 #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
147 #define POLLEX_SET (POLLPRI)
149 int do_select(int n, fd_set_bits *fds, long *timeout)
151 poll_table *wait_table, *wait;
152 int retval, i, off;
153 long __timeout = *timeout;
155 wait = wait_table = NULL;
156 if (__timeout) {
157 wait_table = (poll_table *) __get_free_page(GFP_KERNEL);
158 if (!wait_table)
159 return -ENOMEM;
161 wait_table->nr = 0;
162 wait_table->entry = (struct poll_table_entry *)(wait_table + 1);
163 wait_table->next = NULL;
164 wait = wait_table;
167 lock_kernel();
169 retval = max_select_fd(n, fds);
170 if (retval < 0)
171 goto out;
172 n = retval;
173 retval = 0;
174 for (;;) {
175 current->state = TASK_INTERRUPTIBLE;
176 for (i = 0 ; i < n; i++) {
177 unsigned long bit = BIT(i);
178 unsigned long mask;
179 struct file *file;
181 off = i / __NFDBITS;
182 if (!(bit & BITS(fds, off)))
183 continue;
185 * The poll_wait routine will increment f_count if
186 * the file is added to the wait table, so we don't
187 * need to increment it now.
189 file = fcheck(i);
190 mask = POLLNVAL;
191 if (file) {
192 mask = DEFAULT_POLLMASK;
193 if (file->f_op && file->f_op->poll)
194 mask = file->f_op->poll(file, wait);
196 if ((mask & POLLIN_SET) && ISSET(bit, __IN(fds,off))) {
197 SET(bit, __RES_IN(fds,off));
198 retval++;
199 wait = NULL;
201 if ((mask & POLLOUT_SET) && ISSET(bit, __OUT(fds,off))) {
202 SET(bit, __RES_OUT(fds,off));
203 retval++;
204 wait = NULL;
206 if ((mask & POLLEX_SET) && ISSET(bit, __EX(fds,off))) {
207 SET(bit, __RES_EX(fds,off));
208 retval++;
209 wait = NULL;
212 wait = NULL;
213 if (retval || !__timeout || signal_pending(current))
214 break;
215 __timeout = schedule_timeout(__timeout);
217 current->state = TASK_RUNNING;
219 out:
220 if (*timeout)
221 free_wait(wait_table);
224 * Up-to-date the caller timeout.
226 *timeout = __timeout;
227 unlock_kernel();
228 return retval;
232 * We can actually return ERESTARTSYS instead of EINTR, but I'd
233 * like to be certain this leads to no problems. So I return
234 * EINTR just for safety.
236 * Update: ERESTARTSYS breaks at least the xview clock binary, so
237 * I'm trying ERESTARTNOHAND which restart only when you want to.
239 #define MAX_SELECT_SECONDS \
240 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
242 asmlinkage int
243 sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)
245 fd_set_bits fds;
246 char *bits;
247 long timeout;
248 int ret, size;
250 timeout = MAX_SCHEDULE_TIMEOUT;
251 if (tvp) {
252 time_t sec, usec;
254 if ((ret = verify_area(VERIFY_READ, tvp, sizeof(*tvp)))
255 || (ret = __get_user(sec, &tvp->tv_sec))
256 || (ret = __get_user(usec, &tvp->tv_usec)))
257 goto out_nofds;
259 ret = -EINVAL;
260 if (sec < 0 || usec < 0)
261 goto out_nofds;
263 if ((unsigned long) sec < MAX_SELECT_SECONDS) {
264 timeout = ROUND_UP(usec, 1000000/HZ);
265 timeout += sec * (unsigned long) HZ;
269 ret = -EINVAL;
270 if (n < 0)
271 goto out_nofds;
273 if (n > KFDS_NR)
274 n = KFDS_NR;
277 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
278 * since we used fdset we need to allocate memory in units of
279 * long-words.
281 ret = -ENOMEM;
282 size = FDS_BYTES(n);
283 bits = kmalloc(6 * size, GFP_KERNEL);
284 if (!bits)
285 goto out_nofds;
286 fds.in = (unsigned long *) bits;
287 fds.out = (unsigned long *) (bits + size);
288 fds.ex = (unsigned long *) (bits + 2*size);
289 fds.res_in = (unsigned long *) (bits + 3*size);
290 fds.res_out = (unsigned long *) (bits + 4*size);
291 fds.res_ex = (unsigned long *) (bits + 5*size);
293 if ((ret = get_fd_set(n, inp, fds.in)) ||
294 (ret = get_fd_set(n, outp, fds.out)) ||
295 (ret = get_fd_set(n, exp, fds.ex)))
296 goto out;
297 zero_fd_set(n, fds.res_in);
298 zero_fd_set(n, fds.res_out);
299 zero_fd_set(n, fds.res_ex);
301 ret = do_select(n, &fds, &timeout);
303 if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
304 time_t sec = 0, usec = 0;
305 if (timeout) {
306 sec = timeout / HZ;
307 usec = timeout % HZ;
308 usec *= (1000000/HZ);
310 put_user(sec, &tvp->tv_sec);
311 put_user(usec, &tvp->tv_usec);
314 if (ret < 0)
315 goto out;
316 if (!ret) {
317 ret = -ERESTARTNOHAND;
318 if (signal_pending(current))
319 goto out;
320 ret = 0;
323 set_fd_set(n, inp, fds.res_in);
324 set_fd_set(n, outp, fds.res_out);
325 set_fd_set(n, exp, fds.res_ex);
327 out:
328 kfree(bits);
329 out_nofds:
330 return ret;
333 static int do_poll(unsigned int nfds, struct pollfd *fds, poll_table *wait,
334 long timeout)
336 int count = 0;
338 for (;;) {
339 unsigned int j;
340 struct pollfd * fdpnt;
342 current->state = TASK_INTERRUPTIBLE;
343 for (fdpnt = fds, j = 0; j < nfds; j++, fdpnt++) {
344 int fd;
345 unsigned int mask;
347 mask = 0;
348 fd = fdpnt->fd;
349 if (fd >= 0) {
350 /* poll_wait increments f_count if needed */
351 struct file * file = fcheck(fd);
352 mask = POLLNVAL;
353 if (file != NULL) {
354 mask = DEFAULT_POLLMASK;
355 if (file->f_op && file->f_op->poll)
356 mask = file->f_op->poll(file, wait);
357 mask &= fdpnt->events | POLLERR | POLLHUP;
359 if (mask) {
360 wait = NULL;
361 count++;
364 fdpnt->revents = mask;
367 wait = NULL;
368 if (count || !timeout || signal_pending(current))
369 break;
370 timeout = schedule_timeout(timeout);
372 current->state = TASK_RUNNING;
373 return count;
376 asmlinkage int sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout)
378 int i, fdcount, err, size;
379 struct pollfd * fds, *fds1;
380 poll_table *wait_table = NULL, *wait = NULL;
382 lock_kernel();
383 /* Do a sanity check on nfds ... */
384 err = -EINVAL;
385 if (nfds > NR_OPEN)
386 goto out;
388 if (timeout) {
389 /* Carefula about overflow in the intermediate values */
390 if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ)
391 timeout = (unsigned long)(timeout*HZ+999)/1000+1;
392 else /* Negative or overflow */
393 timeout = MAX_SCHEDULE_TIMEOUT;
396 err = -ENOMEM;
397 if (timeout) {
398 wait_table = (poll_table *) __get_free_page(GFP_KERNEL);
399 if (!wait_table)
400 goto out;
401 wait_table->nr = 0;
402 wait_table->entry = (struct poll_table_entry *)(wait_table + 1);
403 wait_table->next = NULL;
404 wait = wait_table;
407 size = nfds * sizeof(struct pollfd);
408 fds = (struct pollfd *) kmalloc(size, GFP_KERNEL);
409 if (!fds)
410 goto out;
412 err = -EFAULT;
413 if (copy_from_user(fds, ufds, size))
414 goto out_fds;
416 fdcount = do_poll(nfds, fds, wait, timeout);
418 /* OK, now copy the revents fields back to user space. */
419 fds1 = fds;
420 for(i=0; i < (int)nfds; i++, ufds++, fds1++) {
421 __put_user(fds1->revents, &ufds->revents);
424 err = fdcount;
425 if (!fdcount && signal_pending(current))
426 err = -EINTR;
428 out_fds:
429 kfree(fds);
430 out:
431 if (wait)
432 free_wait(wait_table);
433 unlock_kernel();
434 return err;