4 * Copyright (C) 1991, 1992, 1999 Linus Torvalds
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
14 #include <linux/mount.h>
15 #include <linux/pipe_fs_i.h>
16 #include <asm/uaccess.h>
17 #include <asm/ioctls.h>
20 * We use a start+len construction, which provides full use of the
22 * -- Florian Coosmann (FGC)
24 * Reads with count = 0 should always return 0.
25 * -- Julian Bradfield 1999-06-07.
27 * FIFOs and Pipes now generate SIGIO for both readers and writers.
28 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
30 * pipe_read & write cleanup
31 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
34 /* Drop the inode semaphore and wait for a pipe event, atomically */
35 void pipe_wait(struct inode
* inode
)
39 prepare_to_wait(PIPE_WAIT(*inode
), &wait
, TASK_INTERRUPTIBLE
);
42 finish_wait(PIPE_WAIT(*inode
), &wait
);
43 down(PIPE_SEM(*inode
));
47 pipe_read(struct file
*filp
, char __user
*buf
, size_t count
, loff_t
*ppos
)
49 struct inode
*inode
= filp
->f_dentry
->d_inode
;
53 /* pread is not allowed on pipes. */
54 if (unlikely(ppos
!= &filp
->f_pos
))
57 /* Null read succeeds. */
58 if (unlikely(count
== 0))
63 down(PIPE_SEM(*inode
));
65 int size
= PIPE_LEN(*inode
);
67 char *pipebuf
= PIPE_BASE(*inode
) + PIPE_START(*inode
);
68 ssize_t chars
= PIPE_MAX_RCHUNK(*inode
);
75 if (copy_to_user(buf
, pipebuf
, chars
)) {
76 if (!ret
) ret
= -EFAULT
;
81 PIPE_START(*inode
) += chars
;
82 PIPE_START(*inode
) &= (PIPE_SIZE
- 1);
83 PIPE_LEN(*inode
) -= chars
;
89 break; /* common path: read succeeded */
90 if (PIPE_LEN(*inode
)) /* test for cyclic buffers */
92 if (!PIPE_WRITERS(*inode
))
94 if (!PIPE_WAITING_WRITERS(*inode
)) {
95 /* syscall merging: Usually we must not sleep
96 * if O_NONBLOCK is set, or if we got some data.
97 * But if a writer sleeps in kernel space, then
98 * we can wait for that data without violating POSIX.
102 if (filp
->f_flags
& O_NONBLOCK
) {
107 if (signal_pending(current
)) {
108 if (!ret
) ret
= -ERESTARTSYS
;
112 wake_up_interruptible_sync(PIPE_WAIT(*inode
));
113 kill_fasync(PIPE_FASYNC_WRITERS(*inode
), SIGIO
, POLL_OUT
);
117 up(PIPE_SEM(*inode
));
118 /* Signal writers asynchronously that there is more room. */
120 wake_up_interruptible(PIPE_WAIT(*inode
));
121 kill_fasync(PIPE_FASYNC_WRITERS(*inode
), SIGIO
, POLL_OUT
);
129 pipe_write(struct file
*filp
, const char __user
*buf
, size_t count
, loff_t
*ppos
)
131 struct inode
*inode
= filp
->f_dentry
->d_inode
;
136 /* pwrite is not allowed on pipes. */
137 if (unlikely(ppos
!= &filp
->f_pos
))
140 /* Null write succeeds. */
141 if (unlikely(count
== 0))
149 down(PIPE_SEM(*inode
));
152 if (!PIPE_READERS(*inode
)) {
153 send_sig(SIGPIPE
, current
, 0);
154 if (!ret
) ret
= -EPIPE
;
157 free
= PIPE_FREE(*inode
);
160 ssize_t chars
= PIPE_MAX_WCHUNK(*inode
);
161 char *pipebuf
= PIPE_BASE(*inode
) + PIPE_END(*inode
);
162 /* Always wakeup, even if the copy fails. Otherwise
163 * we lock up (O_NONBLOCK-)readers that sleep due to
172 if (copy_from_user(pipebuf
, buf
, chars
)) {
173 if (!ret
) ret
= -EFAULT
;
178 PIPE_LEN(*inode
) += chars
;
184 if (PIPE_FREE(*inode
) && ret
) {
185 /* handle cyclic data buffers */
189 if (filp
->f_flags
& O_NONBLOCK
) {
190 if (!ret
) ret
= -EAGAIN
;
193 if (signal_pending(current
)) {
194 if (!ret
) ret
= -ERESTARTSYS
;
198 wake_up_interruptible_sync(PIPE_WAIT(*inode
));
199 kill_fasync(PIPE_FASYNC_READERS(*inode
), SIGIO
, POLL_IN
);
202 PIPE_WAITING_WRITERS(*inode
)++;
204 PIPE_WAITING_WRITERS(*inode
)--;
206 up(PIPE_SEM(*inode
));
208 wake_up_interruptible(PIPE_WAIT(*inode
));
209 kill_fasync(PIPE_FASYNC_READERS(*inode
), SIGIO
, POLL_IN
);
212 inode
->i_ctime
= inode
->i_mtime
= CURRENT_TIME
;
213 mark_inode_dirty(inode
);
219 bad_pipe_r(struct file
*filp
, char __user
*buf
, size_t count
, loff_t
*ppos
)
225 bad_pipe_w(struct file
*filp
, const char __user
*buf
, size_t count
, loff_t
*ppos
)
231 pipe_ioctl(struct inode
*pino
, struct file
*filp
,
232 unsigned int cmd
, unsigned long arg
)
236 return put_user(PIPE_LEN(*pino
), (int __user
*)arg
);
242 /* No kernel lock held - fine */
244 pipe_poll(struct file
*filp
, poll_table
*wait
)
247 struct inode
*inode
= filp
->f_dentry
->d_inode
;
249 poll_wait(filp
, PIPE_WAIT(*inode
), wait
);
251 /* Reading only -- no need for acquiring the semaphore. */
252 mask
= POLLIN
| POLLRDNORM
;
253 if (PIPE_EMPTY(*inode
))
254 mask
= POLLOUT
| POLLWRNORM
;
255 if (!PIPE_WRITERS(*inode
) && filp
->f_version
!= PIPE_WCOUNTER(*inode
))
257 if (!PIPE_READERS(*inode
))
263 /* FIXME: most Unices do not set POLLERR for fifos */
264 #define fifo_poll pipe_poll
267 pipe_release(struct inode
*inode
, int decr
, int decw
)
269 down(PIPE_SEM(*inode
));
270 PIPE_READERS(*inode
) -= decr
;
271 PIPE_WRITERS(*inode
) -= decw
;
272 if (!PIPE_READERS(*inode
) && !PIPE_WRITERS(*inode
)) {
273 struct pipe_inode_info
*info
= inode
->i_pipe
;
274 inode
->i_pipe
= NULL
;
275 free_page((unsigned long) info
->base
);
278 wake_up_interruptible(PIPE_WAIT(*inode
));
279 kill_fasync(PIPE_FASYNC_READERS(*inode
), SIGIO
, POLL_IN
);
280 kill_fasync(PIPE_FASYNC_WRITERS(*inode
), SIGIO
, POLL_OUT
);
282 up(PIPE_SEM(*inode
));
288 pipe_read_fasync(int fd
, struct file
*filp
, int on
)
290 struct inode
*inode
= filp
->f_dentry
->d_inode
;
293 down(PIPE_SEM(*inode
));
294 retval
= fasync_helper(fd
, filp
, on
, PIPE_FASYNC_READERS(*inode
));
295 up(PIPE_SEM(*inode
));
305 pipe_write_fasync(int fd
, struct file
*filp
, int on
)
307 struct inode
*inode
= filp
->f_dentry
->d_inode
;
310 down(PIPE_SEM(*inode
));
311 retval
= fasync_helper(fd
, filp
, on
, PIPE_FASYNC_WRITERS(*inode
));
312 up(PIPE_SEM(*inode
));
322 pipe_rdwr_fasync(int fd
, struct file
*filp
, int on
)
324 struct inode
*inode
= filp
->f_dentry
->d_inode
;
327 down(PIPE_SEM(*inode
));
329 retval
= fasync_helper(fd
, filp
, on
, PIPE_FASYNC_READERS(*inode
));
332 retval
= fasync_helper(fd
, filp
, on
, PIPE_FASYNC_WRITERS(*inode
));
334 up(PIPE_SEM(*inode
));
344 pipe_read_release(struct inode
*inode
, struct file
*filp
)
346 pipe_read_fasync(-1, filp
, 0);
347 return pipe_release(inode
, 1, 0);
351 pipe_write_release(struct inode
*inode
, struct file
*filp
)
353 pipe_write_fasync(-1, filp
, 0);
354 return pipe_release(inode
, 0, 1);
358 pipe_rdwr_release(struct inode
*inode
, struct file
*filp
)
362 pipe_rdwr_fasync(-1, filp
, 0);
363 decr
= (filp
->f_mode
& FMODE_READ
) != 0;
364 decw
= (filp
->f_mode
& FMODE_WRITE
) != 0;
365 return pipe_release(inode
, decr
, decw
);
369 pipe_read_open(struct inode
*inode
, struct file
*filp
)
371 /* We could have perhaps used atomic_t, but this and friends
372 below are the only places. So it doesn't seem worthwhile. */
373 down(PIPE_SEM(*inode
));
374 PIPE_READERS(*inode
)++;
375 up(PIPE_SEM(*inode
));
381 pipe_write_open(struct inode
*inode
, struct file
*filp
)
383 down(PIPE_SEM(*inode
));
384 PIPE_WRITERS(*inode
)++;
385 up(PIPE_SEM(*inode
));
391 pipe_rdwr_open(struct inode
*inode
, struct file
*filp
)
393 down(PIPE_SEM(*inode
));
394 if (filp
->f_mode
& FMODE_READ
)
395 PIPE_READERS(*inode
)++;
396 if (filp
->f_mode
& FMODE_WRITE
)
397 PIPE_WRITERS(*inode
)++;
398 up(PIPE_SEM(*inode
));
404 * The file_operations structs are not static because they
405 * are also used in linux/fs/fifo.c to do operations on FIFOs.
407 struct file_operations read_fifo_fops
= {
413 .open
= pipe_read_open
,
414 .release
= pipe_read_release
,
415 .fasync
= pipe_read_fasync
,
418 struct file_operations write_fifo_fops
= {
424 .open
= pipe_write_open
,
425 .release
= pipe_write_release
,
426 .fasync
= pipe_write_fasync
,
429 struct file_operations rdwr_fifo_fops
= {
435 .open
= pipe_rdwr_open
,
436 .release
= pipe_rdwr_release
,
437 .fasync
= pipe_rdwr_fasync
,
440 struct file_operations read_pipe_fops
= {
446 .open
= pipe_read_open
,
447 .release
= pipe_read_release
,
448 .fasync
= pipe_read_fasync
,
451 struct file_operations write_pipe_fops
= {
457 .open
= pipe_write_open
,
458 .release
= pipe_write_release
,
459 .fasync
= pipe_write_fasync
,
462 struct file_operations rdwr_pipe_fops
= {
468 .open
= pipe_rdwr_open
,
469 .release
= pipe_rdwr_release
,
470 .fasync
= pipe_rdwr_fasync
,
473 struct inode
* pipe_new(struct inode
* inode
)
477 page
= __get_free_page(GFP_USER
);
481 inode
->i_pipe
= kmalloc(sizeof(struct pipe_inode_info
), GFP_KERNEL
);
485 init_waitqueue_head(PIPE_WAIT(*inode
));
486 PIPE_BASE(*inode
) = (char*) page
;
487 PIPE_START(*inode
) = PIPE_LEN(*inode
) = 0;
488 PIPE_READERS(*inode
) = PIPE_WRITERS(*inode
) = 0;
489 PIPE_WAITING_WRITERS(*inode
) = 0;
490 PIPE_RCOUNTER(*inode
) = PIPE_WCOUNTER(*inode
) = 1;
491 *PIPE_FASYNC_READERS(*inode
) = *PIPE_FASYNC_WRITERS(*inode
) = NULL
;
499 static struct vfsmount
*pipe_mnt
;
500 static int pipefs_delete_dentry(struct dentry
*dentry
)
504 static struct dentry_operations pipefs_dentry_operations
= {
505 .d_delete
= pipefs_delete_dentry
,
508 static struct inode
* get_pipe_inode(void)
510 struct inode
*inode
= new_inode(pipe_mnt
->mnt_sb
);
517 PIPE_READERS(*inode
) = PIPE_WRITERS(*inode
) = 1;
518 inode
->i_fop
= &rdwr_pipe_fops
;
521 * Mark the inode dirty from the very beginning,
522 * that way it will never be moved to the dirty
523 * list because "mark_inode_dirty()" will think
524 * that it already _is_ on the dirty list.
526 inode
->i_state
= I_DIRTY
;
527 inode
->i_mode
= S_IFIFO
| S_IRUSR
| S_IWUSR
;
528 inode
->i_uid
= current
->fsuid
;
529 inode
->i_gid
= current
->fsgid
;
530 inode
->i_atime
= inode
->i_mtime
= inode
->i_ctime
= CURRENT_TIME
;
531 inode
->i_blksize
= PAGE_SIZE
;
544 struct dentry
*dentry
;
545 struct inode
* inode
;
546 struct file
*f1
, *f2
;
551 f1
= get_empty_filp();
555 f2
= get_empty_filp();
559 inode
= get_pipe_inode();
563 error
= get_unused_fd();
565 goto close_f12_inode
;
568 error
= get_unused_fd();
570 goto close_f12_inode_i
;
574 sprintf(name
, "[%lu]", inode
->i_ino
);
576 this.len
= strlen(name
);
577 this.hash
= inode
->i_ino
; /* will go */
578 dentry
= d_alloc(pipe_mnt
->mnt_sb
->s_root
, &this);
580 goto close_f12_inode_i_j
;
581 dentry
->d_op
= &pipefs_dentry_operations
;
582 d_add(dentry
, inode
);
583 f1
->f_vfsmnt
= f2
->f_vfsmnt
= mntget(mntget(pipe_mnt
));
584 f1
->f_dentry
= f2
->f_dentry
= dget(dentry
);
587 f1
->f_pos
= f2
->f_pos
= 0;
588 f1
->f_flags
= O_RDONLY
;
589 f1
->f_op
= &read_pipe_fops
;
594 f2
->f_flags
= O_WRONLY
;
595 f2
->f_op
= &write_pipe_fops
;
610 free_page((unsigned long) PIPE_BASE(*inode
));
611 kfree(inode
->i_pipe
);
612 inode
->i_pipe
= NULL
;
623 * pipefs should _never_ be mounted by userland - too much of security hassle,
624 * no real gain from having the whole whorehouse mounted. So we don't need
625 * any operations on the root directory. However, we need a non-trivial
626 * d_name - pipe: will go nicely and kill the special-casing in procfs.
629 static struct super_block
*pipefs_get_sb(struct file_system_type
*fs_type
,
630 int flags
, const char *dev_name
, void *data
)
632 return get_sb_pseudo(fs_type
, "pipe:", NULL
, PIPEFS_MAGIC
);
635 static struct file_system_type pipe_fs_type
= {
637 .get_sb
= pipefs_get_sb
,
638 .kill_sb
= kill_anon_super
,
641 static int __init
init_pipe_fs(void)
643 int err
= register_filesystem(&pipe_fs_type
);
645 pipe_mnt
= kern_mount(&pipe_fs_type
);
646 err
= PTR_ERR(pipe_mnt
);
647 if (IS_ERR(pipe_mnt
))
648 unregister_filesystem(&pipe_fs_type
);
655 static void __exit
exit_pipe_fs(void)
657 unregister_filesystem(&pipe_fs_type
);
661 module_init(init_pipe_fs
)
662 module_exit(exit_pipe_fs
)