Merge with Linux 2.5.48.
[linux-2.6/linux-mips.git] / fs / pipe.c
blob3d7e2cc23c91bbbed6ea6f9be0596de0f0c636cc
1 /*
2 * linux/fs/pipe.c
4 * Copyright (C) 1991, 1992, 1999 Linus Torvalds
5 */
7 #include <linux/mm.h>
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/fs.h>
14 #include <linux/mount.h>
15 #include <linux/pipe_fs_i.h>
16 #include <asm/uaccess.h>
17 #include <asm/ioctls.h>
20 * We use a start+len construction, which provides full use of the
21 * allocated memory.
22 * -- Florian Coosmann (FGC)
24 * Reads with count = 0 should always return 0.
25 * -- Julian Bradfield 1999-06-07.
27 * FIFOs and Pipes now generate SIGIO for both readers and writers.
28 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
30 * pipe_read & write cleanup
31 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
34 /* Drop the inode semaphore and wait for a pipe event, atomically */
35 void pipe_wait(struct inode * inode)
37 DEFINE_WAIT(wait);
39 prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE);
40 up(PIPE_SEM(*inode));
41 schedule();
42 finish_wait(PIPE_WAIT(*inode), &wait);
43 down(PIPE_SEM(*inode));
46 static ssize_t
47 pipe_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
49 struct inode *inode = filp->f_dentry->d_inode;
50 int do_wakeup;
51 ssize_t ret;
53 /* pread is not allowed on pipes. */
54 if (unlikely(ppos != &filp->f_pos))
55 return -ESPIPE;
57 /* Null read succeeds. */
58 if (unlikely(count == 0))
59 return 0;
61 do_wakeup = 0;
62 ret = 0;
63 down(PIPE_SEM(*inode));
64 for (;;) {
65 int size = PIPE_LEN(*inode);
66 if (size) {
67 char *pipebuf = PIPE_BASE(*inode) + PIPE_START(*inode);
68 ssize_t chars = PIPE_MAX_RCHUNK(*inode);
70 if (chars > count)
71 chars = count;
72 if (chars > size)
73 chars = size;
75 if (copy_to_user(buf, pipebuf, chars)) {
76 if (!ret) ret = -EFAULT;
77 break;
79 ret += chars;
81 PIPE_START(*inode) += chars;
82 PIPE_START(*inode) &= (PIPE_SIZE - 1);
83 PIPE_LEN(*inode) -= chars;
84 count -= chars;
85 buf += chars;
86 do_wakeup = 1;
88 if (!count)
89 break; /* common path: read succeeded */
90 if (PIPE_LEN(*inode)) /* test for cyclic buffers */
91 continue;
92 if (!PIPE_WRITERS(*inode))
93 break;
94 if (!PIPE_WAITING_WRITERS(*inode)) {
95 /* syscall merging: Usually we must not sleep
96 * if O_NONBLOCK is set, or if we got some data.
97 * But if a writer sleeps in kernel space, then
98 * we can wait for that data without violating POSIX.
100 if (ret)
101 break;
102 if (filp->f_flags & O_NONBLOCK) {
103 ret = -EAGAIN;
104 break;
107 if (signal_pending(current)) {
108 if (!ret) ret = -ERESTARTSYS;
109 break;
111 if (do_wakeup) {
112 wake_up_interruptible_sync(PIPE_WAIT(*inode));
113 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
115 pipe_wait(inode);
117 up(PIPE_SEM(*inode));
118 /* Signal writers asynchronously that there is more room. */
119 if (do_wakeup) {
120 wake_up_interruptible(PIPE_WAIT(*inode));
121 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
123 if (ret > 0)
124 UPDATE_ATIME(inode);
125 return ret;
128 static ssize_t
129 pipe_write(struct file *filp, const char *buf, size_t count, loff_t *ppos)
131 struct inode *inode = filp->f_dentry->d_inode;
132 ssize_t ret;
133 size_t min;
134 int do_wakeup;
136 /* pwrite is not allowed on pipes. */
137 if (unlikely(ppos != &filp->f_pos))
138 return -ESPIPE;
140 /* Null write succeeds. */
141 if (unlikely(count == 0))
142 return 0;
144 do_wakeup = 0;
145 ret = 0;
146 min = count;
147 if (min > PIPE_BUF)
148 min = 1;
149 down(PIPE_SEM(*inode));
150 for (;;) {
151 int free;
152 if (!PIPE_READERS(*inode)) {
153 send_sig(SIGPIPE, current, 0);
154 if (!ret) ret = -EPIPE;
155 break;
157 free = PIPE_FREE(*inode);
158 if (free >= min) {
159 /* transfer data */
160 ssize_t chars = PIPE_MAX_WCHUNK(*inode);
161 char *pipebuf = PIPE_BASE(*inode) + PIPE_END(*inode);
162 /* Always wakeup, even if the copy fails. Otherwise
163 * we lock up (O_NONBLOCK-)readers that sleep due to
164 * syscall merging.
166 do_wakeup = 1;
167 if (chars > count)
168 chars = count;
169 if (chars > free)
170 chars = free;
172 if (copy_from_user(pipebuf, buf, chars)) {
173 if (!ret) ret = -EFAULT;
174 break;
177 ret += chars;
178 PIPE_LEN(*inode) += chars;
179 count -= chars;
180 buf += chars;
182 if (!count)
183 break;
184 if (PIPE_FREE(*inode) && ret) {
185 /* handle cyclic data buffers */
186 min = 1;
187 continue;
189 if (filp->f_flags & O_NONBLOCK) {
190 if (!ret) ret = -EAGAIN;
191 break;
193 if (signal_pending(current)) {
194 if (!ret) ret = -ERESTARTSYS;
195 break;
197 if (do_wakeup) {
198 wake_up_interruptible_sync(PIPE_WAIT(*inode));
199 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
200 do_wakeup = 0;
202 PIPE_WAITING_WRITERS(*inode)++;
203 pipe_wait(inode);
204 PIPE_WAITING_WRITERS(*inode)--;
206 up(PIPE_SEM(*inode));
207 if (do_wakeup) {
208 wake_up_interruptible(PIPE_WAIT(*inode));
209 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
211 if (ret > 0) {
212 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
213 mark_inode_dirty(inode);
215 return ret;
218 static ssize_t
219 bad_pipe_r(struct file *filp, char *buf, size_t count, loff_t *ppos)
221 return -EBADF;
224 static ssize_t
225 bad_pipe_w(struct file *filp, const char *buf, size_t count, loff_t *ppos)
227 return -EBADF;
230 static int
231 pipe_ioctl(struct inode *pino, struct file *filp,
232 unsigned int cmd, unsigned long arg)
234 switch (cmd) {
235 case FIONREAD:
236 return put_user(PIPE_LEN(*pino), (int *)arg);
237 default:
238 return -EINVAL;
242 /* No kernel lock held - fine */
243 static unsigned int
244 pipe_poll(struct file *filp, poll_table *wait)
246 unsigned int mask;
247 struct inode *inode = filp->f_dentry->d_inode;
249 poll_wait(filp, PIPE_WAIT(*inode), wait);
251 /* Reading only -- no need for acquiring the semaphore. */
252 mask = POLLIN | POLLRDNORM;
253 if (PIPE_EMPTY(*inode))
254 mask = POLLOUT | POLLWRNORM;
255 if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
256 mask |= POLLHUP;
257 if (!PIPE_READERS(*inode))
258 mask |= POLLERR;
260 return mask;
263 /* FIXME: most Unices do not set POLLERR for fifos */
264 #define fifo_poll pipe_poll
266 static int
267 pipe_release(struct inode *inode, int decr, int decw)
269 down(PIPE_SEM(*inode));
270 PIPE_READERS(*inode) -= decr;
271 PIPE_WRITERS(*inode) -= decw;
272 if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
273 struct pipe_inode_info *info = inode->i_pipe;
274 inode->i_pipe = NULL;
275 free_page((unsigned long) info->base);
276 kfree(info);
277 } else {
278 wake_up_interruptible(PIPE_WAIT(*inode));
279 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
280 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
282 up(PIPE_SEM(*inode));
284 return 0;
287 static int
288 pipe_read_fasync(int fd, struct file *filp, int on)
290 struct inode *inode = filp->f_dentry->d_inode;
291 int retval;
293 down(PIPE_SEM(*inode));
294 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
295 up(PIPE_SEM(*inode));
297 if (retval < 0)
298 return retval;
300 return 0;
304 static int
305 pipe_write_fasync(int fd, struct file *filp, int on)
307 struct inode *inode = filp->f_dentry->d_inode;
308 int retval;
310 down(PIPE_SEM(*inode));
311 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
312 up(PIPE_SEM(*inode));
314 if (retval < 0)
315 return retval;
317 return 0;
321 static int
322 pipe_rdwr_fasync(int fd, struct file *filp, int on)
324 struct inode *inode = filp->f_dentry->d_inode;
325 int retval;
327 down(PIPE_SEM(*inode));
329 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
331 if (retval >= 0)
332 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
334 up(PIPE_SEM(*inode));
336 if (retval < 0)
337 return retval;
339 return 0;
343 static int
344 pipe_read_release(struct inode *inode, struct file *filp)
346 pipe_read_fasync(-1, filp, 0);
347 return pipe_release(inode, 1, 0);
350 static int
351 pipe_write_release(struct inode *inode, struct file *filp)
353 pipe_write_fasync(-1, filp, 0);
354 return pipe_release(inode, 0, 1);
357 static int
358 pipe_rdwr_release(struct inode *inode, struct file *filp)
360 int decr, decw;
362 pipe_rdwr_fasync(-1, filp, 0);
363 decr = (filp->f_mode & FMODE_READ) != 0;
364 decw = (filp->f_mode & FMODE_WRITE) != 0;
365 return pipe_release(inode, decr, decw);
368 static int
369 pipe_read_open(struct inode *inode, struct file *filp)
371 /* We could have perhaps used atomic_t, but this and friends
372 below are the only places. So it doesn't seem worthwhile. */
373 down(PIPE_SEM(*inode));
374 PIPE_READERS(*inode)++;
375 up(PIPE_SEM(*inode));
377 return 0;
380 static int
381 pipe_write_open(struct inode *inode, struct file *filp)
383 down(PIPE_SEM(*inode));
384 PIPE_WRITERS(*inode)++;
385 up(PIPE_SEM(*inode));
387 return 0;
390 static int
391 pipe_rdwr_open(struct inode *inode, struct file *filp)
393 down(PIPE_SEM(*inode));
394 if (filp->f_mode & FMODE_READ)
395 PIPE_READERS(*inode)++;
396 if (filp->f_mode & FMODE_WRITE)
397 PIPE_WRITERS(*inode)++;
398 up(PIPE_SEM(*inode));
400 return 0;
404 * The file_operations structs are not static because they
405 * are also used in linux/fs/fifo.c to do operations on FIFOs.
407 struct file_operations read_fifo_fops = {
408 .llseek = no_llseek,
409 .read = pipe_read,
410 .write = bad_pipe_w,
411 .poll = fifo_poll,
412 .ioctl = pipe_ioctl,
413 .open = pipe_read_open,
414 .release = pipe_read_release,
415 .fasync = pipe_read_fasync,
418 struct file_operations write_fifo_fops = {
419 .llseek = no_llseek,
420 .read = bad_pipe_r,
421 .write = pipe_write,
422 .poll = fifo_poll,
423 .ioctl = pipe_ioctl,
424 .open = pipe_write_open,
425 .release = pipe_write_release,
426 .fasync = pipe_write_fasync,
429 struct file_operations rdwr_fifo_fops = {
430 .llseek = no_llseek,
431 .read = pipe_read,
432 .write = pipe_write,
433 .poll = fifo_poll,
434 .ioctl = pipe_ioctl,
435 .open = pipe_rdwr_open,
436 .release = pipe_rdwr_release,
437 .fasync = pipe_rdwr_fasync,
440 struct file_operations read_pipe_fops = {
441 .llseek = no_llseek,
442 .read = pipe_read,
443 .write = bad_pipe_w,
444 .poll = pipe_poll,
445 .ioctl = pipe_ioctl,
446 .open = pipe_read_open,
447 .release = pipe_read_release,
448 .fasync = pipe_read_fasync,
451 struct file_operations write_pipe_fops = {
452 .llseek = no_llseek,
453 .read = bad_pipe_r,
454 .write = pipe_write,
455 .poll = pipe_poll,
456 .ioctl = pipe_ioctl,
457 .open = pipe_write_open,
458 .release = pipe_write_release,
459 .fasync = pipe_write_fasync,
462 struct file_operations rdwr_pipe_fops = {
463 .llseek = no_llseek,
464 .read = pipe_read,
465 .write = pipe_write,
466 .poll = pipe_poll,
467 .ioctl = pipe_ioctl,
468 .open = pipe_rdwr_open,
469 .release = pipe_rdwr_release,
470 .fasync = pipe_rdwr_fasync,
473 struct inode* pipe_new(struct inode* inode)
475 unsigned long page;
477 page = __get_free_page(GFP_USER);
478 if (!page)
479 return NULL;
481 inode->i_pipe = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
482 if (!inode->i_pipe)
483 goto fail_page;
485 init_waitqueue_head(PIPE_WAIT(*inode));
486 PIPE_BASE(*inode) = (char*) page;
487 PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
488 PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
489 PIPE_WAITING_WRITERS(*inode) = 0;
490 PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
491 *PIPE_FASYNC_READERS(*inode) = *PIPE_FASYNC_WRITERS(*inode) = NULL;
493 return inode;
494 fail_page:
495 free_page(page);
496 return NULL;
499 static struct vfsmount *pipe_mnt;
500 static int pipefs_delete_dentry(struct dentry *dentry)
502 return 1;
504 static struct dentry_operations pipefs_dentry_operations = {
505 .d_delete = pipefs_delete_dentry,
508 static struct inode * get_pipe_inode(void)
510 struct inode *inode = new_inode(pipe_mnt->mnt_sb);
512 if (!inode)
513 goto fail_inode;
515 if(!pipe_new(inode))
516 goto fail_iput;
517 PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
518 inode->i_fop = &rdwr_pipe_fops;
521 * Mark the inode dirty from the very beginning,
522 * that way it will never be moved to the dirty
523 * list because "mark_inode_dirty()" will think
524 * that it already _is_ on the dirty list.
526 inode->i_state = I_DIRTY;
527 inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
528 inode->i_uid = current->fsuid;
529 inode->i_gid = current->fsgid;
530 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
531 inode->i_blksize = PAGE_SIZE;
532 return inode;
534 fail_iput:
535 iput(inode);
536 fail_inode:
537 return NULL;
540 int do_pipe(int *fd)
542 struct qstr this;
543 char name[32];
544 struct dentry *dentry;
545 struct inode * inode;
546 struct file *f1, *f2;
547 int error;
548 int i,j;
550 error = -ENFILE;
551 f1 = get_empty_filp();
552 if (!f1)
553 goto no_files;
555 f2 = get_empty_filp();
556 if (!f2)
557 goto close_f1;
559 inode = get_pipe_inode();
560 if (!inode)
561 goto close_f12;
563 error = get_unused_fd();
564 if (error < 0)
565 goto close_f12_inode;
566 i = error;
568 error = get_unused_fd();
569 if (error < 0)
570 goto close_f12_inode_i;
571 j = error;
573 error = -ENOMEM;
574 sprintf(name, "[%lu]", inode->i_ino);
575 this.name = name;
576 this.len = strlen(name);
577 this.hash = inode->i_ino; /* will go */
578 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
579 if (!dentry)
580 goto close_f12_inode_i_j;
581 dentry->d_op = &pipefs_dentry_operations;
582 d_add(dentry, inode);
583 f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
584 f1->f_dentry = f2->f_dentry = dget(dentry);
586 /* read file */
587 f1->f_pos = f2->f_pos = 0;
588 f1->f_flags = O_RDONLY;
589 f1->f_op = &read_pipe_fops;
590 f1->f_mode = 1;
591 f1->f_version = 0;
593 /* write file */
594 f2->f_flags = O_WRONLY;
595 f2->f_op = &write_pipe_fops;
596 f2->f_mode = 2;
597 f2->f_version = 0;
599 fd_install(i, f1);
600 fd_install(j, f2);
601 fd[0] = i;
602 fd[1] = j;
603 return 0;
605 close_f12_inode_i_j:
606 put_unused_fd(j);
607 close_f12_inode_i:
608 put_unused_fd(i);
609 close_f12_inode:
610 free_page((unsigned long) PIPE_BASE(*inode));
611 kfree(inode->i_pipe);
612 inode->i_pipe = NULL;
613 iput(inode);
614 close_f12:
615 put_filp(f2);
616 close_f1:
617 put_filp(f1);
618 no_files:
619 return error;
623 * pipefs should _never_ be mounted by userland - too much of security hassle,
624 * no real gain from having the whole whorehouse mounted. So we don't need
625 * any operations on the root directory. However, we need a non-trivial
626 * d_name - pipe: will go nicely and kill the special-casing in procfs.
629 static struct super_block *pipefs_get_sb(struct file_system_type *fs_type,
630 int flags, char *dev_name, void *data)
632 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
635 static struct file_system_type pipe_fs_type = {
636 .name = "pipefs",
637 .get_sb = pipefs_get_sb,
638 .kill_sb = kill_anon_super,
641 static int __init init_pipe_fs(void)
643 int err = register_filesystem(&pipe_fs_type);
644 if (!err) {
645 pipe_mnt = kern_mount(&pipe_fs_type);
646 err = PTR_ERR(pipe_mnt);
647 if (IS_ERR(pipe_mnt))
648 unregister_filesystem(&pipe_fs_type);
649 else
650 err = 0;
652 return err;
655 static void __exit exit_pipe_fs(void)
657 unregister_filesystem(&pipe_fs_type);
658 mntput(pipe_mnt);
661 module_init(init_pipe_fs)
662 module_exit(exit_pipe_fs)