4 * Copyright (C) 1991, 1992, 1999 Linus Torvalds
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
14 #include <linux/mount.h>
15 #include <linux/pipe_fs_i.h>
16 #include <linux/uio.h>
17 #include <asm/uaccess.h>
18 #include <asm/ioctls.h>
21 * We use a start+len construction, which provides full use of the
23 * -- Florian Coosmann (FGC)
25 * Reads with count = 0 should always return 0.
26 * -- Julian Bradfield 1999-06-07.
28 * FIFOs and Pipes now generate SIGIO for both readers and writers.
29 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
31 * pipe_read & write cleanup
32 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
35 /* Drop the inode semaphore and wait for a pipe event, atomically */
36 void pipe_wait(struct inode
* inode
)
40 prepare_to_wait(PIPE_WAIT(*inode
), &wait
, TASK_INTERRUPTIBLE
);
43 finish_wait(PIPE_WAIT(*inode
), &wait
);
44 down(PIPE_SEM(*inode
));
48 pipe_iov_copy_from_user(void *to
, struct iovec
*iov
, unsigned long len
)
55 copy
= min_t(unsigned long, len
, iov
->iov_len
);
57 if (copy_from_user(to
, iov
->iov_base
, copy
))
61 iov
->iov_base
+= copy
;
68 pipe_iov_copy_to_user(struct iovec
*iov
, const void *from
, unsigned long len
)
75 copy
= min_t(unsigned long, len
, iov
->iov_len
);
77 if (copy_to_user(iov
->iov_base
, from
, copy
))
81 iov
->iov_base
+= copy
;
88 pipe_readv(struct file
*filp
, const struct iovec
*_iov
,
89 unsigned long nr_segs
, loff_t
*ppos
)
91 struct inode
*inode
= filp
->f_dentry
->d_inode
;
94 struct iovec
*iov
= (struct iovec
*)_iov
;
97 /* pread is not allowed on pipes. */
98 if (unlikely(ppos
!= &filp
->f_pos
))
101 total_len
= iov_length(iov
, nr_segs
);
102 /* Null read succeeds. */
103 if (unlikely(total_len
== 0))
108 down(PIPE_SEM(*inode
));
110 int size
= PIPE_LEN(*inode
);
112 char *pipebuf
= PIPE_BASE(*inode
) + PIPE_START(*inode
);
113 ssize_t chars
= PIPE_MAX_RCHUNK(*inode
);
115 if (chars
> total_len
)
120 if (pipe_iov_copy_to_user(iov
, pipebuf
, chars
)) {
121 if (!ret
) ret
= -EFAULT
;
126 PIPE_START(*inode
) += chars
;
127 PIPE_START(*inode
) &= (PIPE_SIZE
- 1);
128 PIPE_LEN(*inode
) -= chars
;
132 break; /* common path: read succeeded */
134 if (PIPE_LEN(*inode
)) /* test for cyclic buffers */
136 if (!PIPE_WRITERS(*inode
))
138 if (!PIPE_WAITING_WRITERS(*inode
)) {
139 /* syscall merging: Usually we must not sleep
140 * if O_NONBLOCK is set, or if we got some data.
141 * But if a writer sleeps in kernel space, then
142 * we can wait for that data without violating POSIX.
146 if (filp
->f_flags
& O_NONBLOCK
) {
151 if (signal_pending(current
)) {
152 if (!ret
) ret
= -ERESTARTSYS
;
156 wake_up_interruptible_sync(PIPE_WAIT(*inode
));
157 kill_fasync(PIPE_FASYNC_WRITERS(*inode
), SIGIO
, POLL_OUT
);
161 up(PIPE_SEM(*inode
));
162 /* Signal writers asynchronously that there is more room. */
164 wake_up_interruptible(PIPE_WAIT(*inode
));
165 kill_fasync(PIPE_FASYNC_WRITERS(*inode
), SIGIO
, POLL_OUT
);
173 pipe_read(struct file
*filp
, char __user
*buf
, size_t count
, loff_t
*ppos
)
175 struct iovec iov
= { .iov_base
= buf
, .iov_len
= count
};
176 return pipe_readv(filp
, &iov
, 1, ppos
);
180 pipe_writev(struct file
*filp
, const struct iovec
*_iov
,
181 unsigned long nr_segs
, loff_t
*ppos
)
183 struct inode
*inode
= filp
->f_dentry
->d_inode
;
187 struct iovec
*iov
= (struct iovec
*)_iov
;
190 /* pwrite is not allowed on pipes. */
191 if (unlikely(ppos
!= &filp
->f_pos
))
194 total_len
= iov_length(iov
, nr_segs
);
195 /* Null write succeeds. */
196 if (unlikely(total_len
== 0))
204 down(PIPE_SEM(*inode
));
207 if (!PIPE_READERS(*inode
)) {
208 send_sig(SIGPIPE
, current
, 0);
209 if (!ret
) ret
= -EPIPE
;
212 free
= PIPE_FREE(*inode
);
215 ssize_t chars
= PIPE_MAX_WCHUNK(*inode
);
216 char *pipebuf
= PIPE_BASE(*inode
) + PIPE_END(*inode
);
217 /* Always wakeup, even if the copy fails. Otherwise
218 * we lock up (O_NONBLOCK-)readers that sleep due to
222 if (chars
> total_len
)
227 if (pipe_iov_copy_from_user(pipebuf
, iov
, chars
)) {
228 if (!ret
) ret
= -EFAULT
;
233 PIPE_LEN(*inode
) += chars
;
238 if (PIPE_FREE(*inode
) && ret
) {
239 /* handle cyclic data buffers */
243 if (filp
->f_flags
& O_NONBLOCK
) {
244 if (!ret
) ret
= -EAGAIN
;
247 if (signal_pending(current
)) {
248 if (!ret
) ret
= -ERESTARTSYS
;
252 wake_up_interruptible_sync(PIPE_WAIT(*inode
));
253 kill_fasync(PIPE_FASYNC_READERS(*inode
), SIGIO
, POLL_IN
);
256 PIPE_WAITING_WRITERS(*inode
)++;
258 PIPE_WAITING_WRITERS(*inode
)--;
260 up(PIPE_SEM(*inode
));
262 wake_up_interruptible(PIPE_WAIT(*inode
));
263 kill_fasync(PIPE_FASYNC_READERS(*inode
), SIGIO
, POLL_IN
);
266 inode_update_time(inode
, 1); /* mtime and ctime */
271 pipe_write(struct file
*filp
, const char __user
*buf
,
272 size_t count
, loff_t
*ppos
)
274 struct iovec iov
= { .iov_base
= (void __user
*)buf
, .iov_len
= count
};
275 return pipe_writev(filp
, &iov
, 1, ppos
);
279 bad_pipe_r(struct file
*filp
, char __user
*buf
, size_t count
, loff_t
*ppos
)
285 bad_pipe_w(struct file
*filp
, const char __user
*buf
, size_t count
, loff_t
*ppos
)
291 pipe_ioctl(struct inode
*pino
, struct file
*filp
,
292 unsigned int cmd
, unsigned long arg
)
296 return put_user(PIPE_LEN(*pino
), (int __user
*)arg
);
302 /* No kernel lock held - fine */
304 pipe_poll(struct file
*filp
, poll_table
*wait
)
307 struct inode
*inode
= filp
->f_dentry
->d_inode
;
309 poll_wait(filp
, PIPE_WAIT(*inode
), wait
);
311 /* Reading only -- no need for acquiring the semaphore. */
312 mask
= POLLIN
| POLLRDNORM
;
313 if (PIPE_EMPTY(*inode
))
314 mask
= POLLOUT
| POLLWRNORM
;
315 if (!PIPE_WRITERS(*inode
) && filp
->f_version
!= PIPE_WCOUNTER(*inode
))
317 if (!PIPE_READERS(*inode
))
323 /* FIXME: most Unices do not set POLLERR for fifos */
324 #define fifo_poll pipe_poll
327 pipe_release(struct inode
*inode
, int decr
, int decw
)
329 down(PIPE_SEM(*inode
));
330 PIPE_READERS(*inode
) -= decr
;
331 PIPE_WRITERS(*inode
) -= decw
;
332 if (!PIPE_READERS(*inode
) && !PIPE_WRITERS(*inode
)) {
333 struct pipe_inode_info
*info
= inode
->i_pipe
;
334 inode
->i_pipe
= NULL
;
335 free_page((unsigned long) info
->base
);
338 wake_up_interruptible(PIPE_WAIT(*inode
));
339 kill_fasync(PIPE_FASYNC_READERS(*inode
), SIGIO
, POLL_IN
);
340 kill_fasync(PIPE_FASYNC_WRITERS(*inode
), SIGIO
, POLL_OUT
);
342 up(PIPE_SEM(*inode
));
348 pipe_read_fasync(int fd
, struct file
*filp
, int on
)
350 struct inode
*inode
= filp
->f_dentry
->d_inode
;
353 down(PIPE_SEM(*inode
));
354 retval
= fasync_helper(fd
, filp
, on
, PIPE_FASYNC_READERS(*inode
));
355 up(PIPE_SEM(*inode
));
365 pipe_write_fasync(int fd
, struct file
*filp
, int on
)
367 struct inode
*inode
= filp
->f_dentry
->d_inode
;
370 down(PIPE_SEM(*inode
));
371 retval
= fasync_helper(fd
, filp
, on
, PIPE_FASYNC_WRITERS(*inode
));
372 up(PIPE_SEM(*inode
));
382 pipe_rdwr_fasync(int fd
, struct file
*filp
, int on
)
384 struct inode
*inode
= filp
->f_dentry
->d_inode
;
387 down(PIPE_SEM(*inode
));
389 retval
= fasync_helper(fd
, filp
, on
, PIPE_FASYNC_READERS(*inode
));
392 retval
= fasync_helper(fd
, filp
, on
, PIPE_FASYNC_WRITERS(*inode
));
394 up(PIPE_SEM(*inode
));
404 pipe_read_release(struct inode
*inode
, struct file
*filp
)
406 pipe_read_fasync(-1, filp
, 0);
407 return pipe_release(inode
, 1, 0);
411 pipe_write_release(struct inode
*inode
, struct file
*filp
)
413 pipe_write_fasync(-1, filp
, 0);
414 return pipe_release(inode
, 0, 1);
418 pipe_rdwr_release(struct inode
*inode
, struct file
*filp
)
422 pipe_rdwr_fasync(-1, filp
, 0);
423 decr
= (filp
->f_mode
& FMODE_READ
) != 0;
424 decw
= (filp
->f_mode
& FMODE_WRITE
) != 0;
425 return pipe_release(inode
, decr
, decw
);
429 pipe_read_open(struct inode
*inode
, struct file
*filp
)
431 /* We could have perhaps used atomic_t, but this and friends
432 below are the only places. So it doesn't seem worthwhile. */
433 down(PIPE_SEM(*inode
));
434 PIPE_READERS(*inode
)++;
435 up(PIPE_SEM(*inode
));
441 pipe_write_open(struct inode
*inode
, struct file
*filp
)
443 down(PIPE_SEM(*inode
));
444 PIPE_WRITERS(*inode
)++;
445 up(PIPE_SEM(*inode
));
451 pipe_rdwr_open(struct inode
*inode
, struct file
*filp
)
453 down(PIPE_SEM(*inode
));
454 if (filp
->f_mode
& FMODE_READ
)
455 PIPE_READERS(*inode
)++;
456 if (filp
->f_mode
& FMODE_WRITE
)
457 PIPE_WRITERS(*inode
)++;
458 up(PIPE_SEM(*inode
));
464 * The file_operations structs are not static because they
465 * are also used in linux/fs/fifo.c to do operations on FIFOs.
467 struct file_operations read_fifo_fops
= {
474 .open
= pipe_read_open
,
475 .release
= pipe_read_release
,
476 .fasync
= pipe_read_fasync
,
479 struct file_operations write_fifo_fops
= {
483 .writev
= pipe_writev
,
486 .open
= pipe_write_open
,
487 .release
= pipe_write_release
,
488 .fasync
= pipe_write_fasync
,
491 struct file_operations rdwr_fifo_fops
= {
496 .writev
= pipe_writev
,
499 .open
= pipe_rdwr_open
,
500 .release
= pipe_rdwr_release
,
501 .fasync
= pipe_rdwr_fasync
,
504 struct file_operations read_pipe_fops
= {
511 .open
= pipe_read_open
,
512 .release
= pipe_read_release
,
513 .fasync
= pipe_read_fasync
,
516 struct file_operations write_pipe_fops
= {
520 .writev
= pipe_writev
,
523 .open
= pipe_write_open
,
524 .release
= pipe_write_release
,
525 .fasync
= pipe_write_fasync
,
528 struct file_operations rdwr_pipe_fops
= {
533 .writev
= pipe_writev
,
536 .open
= pipe_rdwr_open
,
537 .release
= pipe_rdwr_release
,
538 .fasync
= pipe_rdwr_fasync
,
541 struct inode
* pipe_new(struct inode
* inode
)
545 page
= __get_free_page(GFP_USER
);
549 inode
->i_pipe
= kmalloc(sizeof(struct pipe_inode_info
), GFP_KERNEL
);
553 init_waitqueue_head(PIPE_WAIT(*inode
));
554 PIPE_BASE(*inode
) = (char*) page
;
555 PIPE_START(*inode
) = PIPE_LEN(*inode
) = 0;
556 PIPE_READERS(*inode
) = PIPE_WRITERS(*inode
) = 0;
557 PIPE_WAITING_WRITERS(*inode
) = 0;
558 PIPE_RCOUNTER(*inode
) = PIPE_WCOUNTER(*inode
) = 1;
559 *PIPE_FASYNC_READERS(*inode
) = *PIPE_FASYNC_WRITERS(*inode
) = NULL
;
567 static struct vfsmount
*pipe_mnt
;
568 static int pipefs_delete_dentry(struct dentry
*dentry
)
572 static struct dentry_operations pipefs_dentry_operations
= {
573 .d_delete
= pipefs_delete_dentry
,
576 static struct inode
* get_pipe_inode(void)
578 struct inode
*inode
= new_inode(pipe_mnt
->mnt_sb
);
585 PIPE_READERS(*inode
) = PIPE_WRITERS(*inode
) = 1;
586 inode
->i_fop
= &rdwr_pipe_fops
;
589 * Mark the inode dirty from the very beginning,
590 * that way it will never be moved to the dirty
591 * list because "mark_inode_dirty()" will think
592 * that it already _is_ on the dirty list.
594 inode
->i_state
= I_DIRTY
;
595 inode
->i_mode
= S_IFIFO
| S_IRUSR
| S_IWUSR
;
596 inode
->i_uid
= current
->fsuid
;
597 inode
->i_gid
= current
->fsgid
;
598 inode
->i_atime
= inode
->i_mtime
= inode
->i_ctime
= CURRENT_TIME
;
599 inode
->i_blksize
= PAGE_SIZE
;
612 struct dentry
*dentry
;
613 struct inode
* inode
;
614 struct file
*f1
, *f2
;
619 f1
= get_empty_filp();
623 f2
= get_empty_filp();
627 inode
= get_pipe_inode();
631 error
= get_unused_fd();
633 goto close_f12_inode
;
636 error
= get_unused_fd();
638 goto close_f12_inode_i
;
642 sprintf(name
, "[%lu]", inode
->i_ino
);
644 this.len
= strlen(name
);
645 this.hash
= inode
->i_ino
; /* will go */
646 dentry
= d_alloc(pipe_mnt
->mnt_sb
->s_root
, &this);
648 goto close_f12_inode_i_j
;
649 dentry
->d_op
= &pipefs_dentry_operations
;
650 d_add(dentry
, inode
);
651 f1
->f_vfsmnt
= f2
->f_vfsmnt
= mntget(mntget(pipe_mnt
));
652 f1
->f_dentry
= f2
->f_dentry
= dget(dentry
);
653 f1
->f_mapping
= f2
->f_mapping
= inode
->i_mapping
;
656 f1
->f_pos
= f2
->f_pos
= 0;
657 f1
->f_flags
= O_RDONLY
;
658 f1
->f_op
= &read_pipe_fops
;
663 f2
->f_flags
= O_WRONLY
;
664 f2
->f_op
= &write_pipe_fops
;
679 free_page((unsigned long) PIPE_BASE(*inode
));
680 kfree(inode
->i_pipe
);
681 inode
->i_pipe
= NULL
;
692 * pipefs should _never_ be mounted by userland - too much of security hassle,
693 * no real gain from having the whole whorehouse mounted. So we don't need
694 * any operations on the root directory. However, we need a non-trivial
695 * d_name - pipe: will go nicely and kill the special-casing in procfs.
698 static struct super_block
*pipefs_get_sb(struct file_system_type
*fs_type
,
699 int flags
, const char *dev_name
, void *data
)
701 return get_sb_pseudo(fs_type
, "pipe:", NULL
, PIPEFS_MAGIC
);
704 static struct file_system_type pipe_fs_type
= {
706 .get_sb
= pipefs_get_sb
,
707 .kill_sb
= kill_anon_super
,
710 static int __init
init_pipe_fs(void)
712 int err
= register_filesystem(&pipe_fs_type
);
714 pipe_mnt
= kern_mount(&pipe_fs_type
);
715 if (IS_ERR(pipe_mnt
)) {
716 err
= PTR_ERR(pipe_mnt
);
717 unregister_filesystem(&pipe_fs_type
);
723 static void __exit
exit_pipe_fs(void)
725 unregister_filesystem(&pipe_fs_type
);
729 module_init(init_pipe_fs
)
730 module_exit(exit_pipe_fs
)