Ok. I didn't make 2.4.0 in 2000. Tough. I tried, but we had some
[davej-history.git] / fs / pipe.c
blobc7f27a520a2f664ae267ea3a8e3499fd38784fae
1 /*
2 * linux/fs/pipe.c
4 * Copyright (C) 1991, 1992, 1999 Linus Torvalds
5 */
7 #include <linux/mm.h>
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/malloc.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
14 #include <asm/uaccess.h>
17 * We use a start+len construction, which provides full use of the
18 * allocated memory.
19 * -- Florian Coosmann (FGC)
21 * Reads with count = 0 should always return 0.
22 * -- Julian Bradfield 1999-06-07.
25 /* Drop the inode semaphore and wait for a pipe event, atomically */
26 void pipe_wait(struct inode * inode)
28 DECLARE_WAITQUEUE(wait, current);
29 current->state = TASK_INTERRUPTIBLE;
30 add_wait_queue(PIPE_WAIT(*inode), &wait);
31 up(PIPE_SEM(*inode));
32 schedule();
33 remove_wait_queue(PIPE_WAIT(*inode), &wait);
34 current->state = TASK_RUNNING;
35 down(PIPE_SEM(*inode));
38 static ssize_t
39 pipe_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
41 struct inode *inode = filp->f_dentry->d_inode;
42 ssize_t size, read, ret;
44 /* Seeks are not allowed on pipes. */
45 ret = -ESPIPE;
46 read = 0;
47 if (ppos != &filp->f_pos)
48 goto out_nolock;
50 /* Always return 0 on null read. */
51 ret = 0;
52 if (count == 0)
53 goto out_nolock;
55 /* Get the pipe semaphore */
56 ret = -ERESTARTSYS;
57 if (down_interruptible(PIPE_SEM(*inode)))
58 goto out_nolock;
60 if (PIPE_EMPTY(*inode)) {
61 do_more_read:
62 ret = 0;
63 if (!PIPE_WRITERS(*inode))
64 goto out;
66 ret = -EAGAIN;
67 if (filp->f_flags & O_NONBLOCK)
68 goto out;
70 for (;;) {
71 PIPE_WAITING_READERS(*inode)++;
72 pipe_wait(inode);
73 PIPE_WAITING_READERS(*inode)--;
74 ret = -ERESTARTSYS;
75 if (signal_pending(current))
76 goto out;
77 ret = 0;
78 if (!PIPE_EMPTY(*inode))
79 break;
80 if (!PIPE_WRITERS(*inode))
81 goto out;
85 /* Read what data is available. */
86 ret = -EFAULT;
87 while (count > 0 && (size = PIPE_LEN(*inode))) {
88 char *pipebuf = PIPE_BASE(*inode) + PIPE_START(*inode);
89 ssize_t chars = PIPE_MAX_RCHUNK(*inode);
91 if (chars > count)
92 chars = count;
93 if (chars > size)
94 chars = size;
96 if (copy_to_user(buf, pipebuf, chars))
97 goto out;
99 read += chars;
100 PIPE_START(*inode) += chars;
101 PIPE_START(*inode) &= (PIPE_SIZE - 1);
102 PIPE_LEN(*inode) -= chars;
103 count -= chars;
104 buf += chars;
107 /* Cache behaviour optimization */
108 if (!PIPE_LEN(*inode))
109 PIPE_START(*inode) = 0;
111 if (count && PIPE_WAITING_WRITERS(*inode) && !(filp->f_flags & O_NONBLOCK)) {
113 * We know that we are going to sleep: signal
114 * writers synchronously that there is more
115 * room.
117 wake_up_interruptible_sync(PIPE_WAIT(*inode));
118 if (!PIPE_EMPTY(*inode))
119 BUG();
120 goto do_more_read;
122 /* Signal writers asynchronously that there is more room. */
123 wake_up_interruptible(PIPE_WAIT(*inode));
125 ret = read;
126 out:
127 up(PIPE_SEM(*inode));
128 out_nolock:
129 if (read)
130 ret = read;
131 return ret;
134 static ssize_t
135 pipe_write(struct file *filp, const char *buf, size_t count, loff_t *ppos)
137 struct inode *inode = filp->f_dentry->d_inode;
138 ssize_t free, written, ret;
140 /* Seeks are not allowed on pipes. */
141 ret = -ESPIPE;
142 written = 0;
143 if (ppos != &filp->f_pos)
144 goto out_nolock;
146 /* Null write succeeds. */
147 ret = 0;
148 if (count == 0)
149 goto out_nolock;
151 ret = -ERESTARTSYS;
152 if (down_interruptible(PIPE_SEM(*inode)))
153 goto out_nolock;
155 /* No readers yields SIGPIPE. */
156 if (!PIPE_READERS(*inode))
157 goto sigpipe;
159 /* If count <= PIPE_BUF, we have to make it atomic. */
160 free = (count <= PIPE_BUF ? count : 1);
162 /* Wait, or check for, available space. */
163 if (filp->f_flags & O_NONBLOCK) {
164 ret = -EAGAIN;
165 if (PIPE_FREE(*inode) < free)
166 goto out;
167 } else {
168 while (PIPE_FREE(*inode) < free) {
169 PIPE_WAITING_WRITERS(*inode)++;
170 pipe_wait(inode);
171 PIPE_WAITING_WRITERS(*inode)--;
172 ret = -ERESTARTSYS;
173 if (signal_pending(current))
174 goto out;
176 if (!PIPE_READERS(*inode))
177 goto sigpipe;
181 /* Copy into available space. */
182 ret = -EFAULT;
183 while (count > 0) {
184 int space;
185 char *pipebuf = PIPE_BASE(*inode) + PIPE_END(*inode);
186 ssize_t chars = PIPE_MAX_WCHUNK(*inode);
188 if ((space = PIPE_FREE(*inode)) != 0) {
189 if (chars > count)
190 chars = count;
191 if (chars > space)
192 chars = space;
194 if (copy_from_user(pipebuf, buf, chars))
195 goto out;
197 written += chars;
198 PIPE_LEN(*inode) += chars;
199 count -= chars;
200 buf += chars;
201 space = PIPE_FREE(*inode);
202 continue;
205 ret = written;
206 if (filp->f_flags & O_NONBLOCK)
207 break;
209 do {
211 * Synchronous wake-up: it knows that this process
212 * is going to give up this CPU, so it doesnt have
213 * to do idle reschedules.
215 wake_up_interruptible_sync(PIPE_WAIT(*inode));
216 PIPE_WAITING_WRITERS(*inode)++;
217 pipe_wait(inode);
218 PIPE_WAITING_WRITERS(*inode)--;
219 if (signal_pending(current))
220 goto out;
221 if (!PIPE_READERS(*inode))
222 goto sigpipe;
223 } while (!PIPE_FREE(*inode));
224 ret = -EFAULT;
227 /* Signal readers asynchronously that there is more data. */
228 wake_up_interruptible(PIPE_WAIT(*inode));
230 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
231 mark_inode_dirty(inode);
233 out:
234 up(PIPE_SEM(*inode));
235 out_nolock:
236 if (written)
237 ret = written;
238 return ret;
240 sigpipe:
241 if (written)
242 goto out;
243 up(PIPE_SEM(*inode));
244 send_sig(SIGPIPE, current, 0);
245 return -EPIPE;
248 static loff_t
249 pipe_lseek(struct file *file, loff_t offset, int orig)
251 return -ESPIPE;
254 static ssize_t
255 bad_pipe_r(struct file *filp, char *buf, size_t count, loff_t *ppos)
257 return -EBADF;
260 static ssize_t
261 bad_pipe_w(struct file *filp, const char *buf, size_t count, loff_t *ppos)
263 return -EBADF;
266 static int
267 pipe_ioctl(struct inode *pino, struct file *filp,
268 unsigned int cmd, unsigned long arg)
270 switch (cmd) {
271 case FIONREAD:
272 return put_user(PIPE_LEN(*pino), (int *)arg);
273 default:
274 return -EINVAL;
278 /* No kernel lock held - fine */
279 static unsigned int
280 pipe_poll(struct file *filp, poll_table *wait)
282 unsigned int mask;
283 struct inode *inode = filp->f_dentry->d_inode;
285 poll_wait(filp, PIPE_WAIT(*inode), wait);
287 /* Reading only -- no need for acquiring the semaphore. */
288 mask = POLLIN | POLLRDNORM;
289 if (PIPE_EMPTY(*inode))
290 mask = POLLOUT | POLLWRNORM;
291 if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
292 mask |= POLLHUP;
293 if (!PIPE_READERS(*inode))
294 mask |= POLLERR;
296 return mask;
299 /* FIXME: most Unices do not set POLLERR for fifos */
300 #define fifo_poll pipe_poll
302 static int
303 pipe_release(struct inode *inode, int decr, int decw)
305 down(PIPE_SEM(*inode));
306 PIPE_READERS(*inode) -= decr;
307 PIPE_WRITERS(*inode) -= decw;
308 if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
309 struct pipe_inode_info *info = inode->i_pipe;
310 inode->i_pipe = NULL;
311 free_page((unsigned long) info->base);
312 kfree(info);
313 } else {
314 wake_up_interruptible(PIPE_WAIT(*inode));
316 up(PIPE_SEM(*inode));
318 return 0;
321 static int
322 pipe_read_release(struct inode *inode, struct file *filp)
324 return pipe_release(inode, 1, 0);
327 static int
328 pipe_write_release(struct inode *inode, struct file *filp)
330 return pipe_release(inode, 0, 1);
333 static int
334 pipe_rdwr_release(struct inode *inode, struct file *filp)
336 int decr, decw;
338 decr = (filp->f_mode & FMODE_READ) != 0;
339 decw = (filp->f_mode & FMODE_WRITE) != 0;
340 return pipe_release(inode, decr, decw);
343 static int
344 pipe_read_open(struct inode *inode, struct file *filp)
346 /* We could have perhaps used atomic_t, but this and friends
347 below are the only places. So it doesn't seem worthwhile. */
348 down(PIPE_SEM(*inode));
349 PIPE_READERS(*inode)++;
350 up(PIPE_SEM(*inode));
352 return 0;
355 static int
356 pipe_write_open(struct inode *inode, struct file *filp)
358 down(PIPE_SEM(*inode));
359 PIPE_WRITERS(*inode)++;
360 up(PIPE_SEM(*inode));
362 return 0;
365 static int
366 pipe_rdwr_open(struct inode *inode, struct file *filp)
368 down(PIPE_SEM(*inode));
369 if (filp->f_mode & FMODE_READ)
370 PIPE_READERS(*inode)++;
371 if (filp->f_mode & FMODE_WRITE)
372 PIPE_WRITERS(*inode)++;
373 up(PIPE_SEM(*inode));
375 return 0;
379 * The file_operations structs are not static because they
380 * are also used in linux/fs/fifo.c to do operations on FIFOs.
382 struct file_operations read_fifo_fops = {
383 llseek: pipe_lseek,
384 read: pipe_read,
385 write: bad_pipe_w,
386 poll: fifo_poll,
387 ioctl: pipe_ioctl,
388 open: pipe_read_open,
389 release: pipe_read_release,
392 struct file_operations write_fifo_fops = {
393 llseek: pipe_lseek,
394 read: bad_pipe_r,
395 write: pipe_write,
396 poll: fifo_poll,
397 ioctl: pipe_ioctl,
398 open: pipe_write_open,
399 release: pipe_write_release,
402 struct file_operations rdwr_fifo_fops = {
403 llseek: pipe_lseek,
404 read: pipe_read,
405 write: pipe_write,
406 poll: fifo_poll,
407 ioctl: pipe_ioctl,
408 open: pipe_rdwr_open,
409 release: pipe_rdwr_release,
412 struct file_operations read_pipe_fops = {
413 llseek: pipe_lseek,
414 read: pipe_read,
415 write: bad_pipe_w,
416 poll: pipe_poll,
417 ioctl: pipe_ioctl,
418 open: pipe_read_open,
419 release: pipe_read_release,
422 struct file_operations write_pipe_fops = {
423 llseek: pipe_lseek,
424 read: bad_pipe_r,
425 write: pipe_write,
426 poll: pipe_poll,
427 ioctl: pipe_ioctl,
428 open: pipe_write_open,
429 release: pipe_write_release,
432 struct file_operations rdwr_pipe_fops = {
433 llseek: pipe_lseek,
434 read: pipe_read,
435 write: pipe_write,
436 poll: pipe_poll,
437 ioctl: pipe_ioctl,
438 open: pipe_rdwr_open,
439 release: pipe_rdwr_release,
442 struct inode* pipe_new(struct inode* inode)
444 unsigned long page;
446 page = __get_free_page(GFP_USER);
447 if (!page)
448 return NULL;
450 inode->i_pipe = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
451 if (!inode->i_pipe)
452 goto fail_page;
454 init_waitqueue_head(PIPE_WAIT(*inode));
455 PIPE_BASE(*inode) = (char*) page;
456 PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
457 PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
458 PIPE_WAITING_READERS(*inode) = PIPE_WAITING_WRITERS(*inode) = 0;
459 PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
461 return inode;
462 fail_page:
463 free_page(page);
464 return NULL;
467 static struct vfsmount *pipe_mnt;
468 static int pipefs_delete_dentry(struct dentry *dentry)
470 return 1;
472 static struct dentry_operations pipefs_dentry_operations = {
473 d_delete: pipefs_delete_dentry,
476 static struct inode * get_pipe_inode(void)
478 struct inode *inode = get_empty_inode();
480 if (!inode)
481 goto fail_inode;
483 if(!pipe_new(inode))
484 goto fail_iput;
485 PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
486 inode->i_fop = &rdwr_pipe_fops;
487 inode->i_sb = pipe_mnt->mnt_sb;
490 * Mark the inode dirty from the very beginning,
491 * that way it will never be moved to the dirty
492 * list because "mark_inode_dirty()" will think
493 * that it already _is_ on the dirty list.
495 inode->i_state = I_DIRTY;
496 inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
497 inode->i_uid = current->fsuid;
498 inode->i_gid = current->fsgid;
499 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
500 inode->i_blksize = PAGE_SIZE;
501 return inode;
503 fail_iput:
504 iput(inode);
505 fail_inode:
506 return NULL;
509 int do_pipe(int *fd)
511 struct qstr this;
512 char name[32];
513 struct dentry *dentry;
514 struct inode * inode;
515 struct file *f1, *f2;
516 int error;
517 int i,j;
519 error = -ENFILE;
520 f1 = get_empty_filp();
521 if (!f1)
522 goto no_files;
524 f2 = get_empty_filp();
525 if (!f2)
526 goto close_f1;
528 inode = get_pipe_inode();
529 if (!inode)
530 goto close_f12;
532 error = get_unused_fd();
533 if (error < 0)
534 goto close_f12_inode;
535 i = error;
537 error = get_unused_fd();
538 if (error < 0)
539 goto close_f12_inode_i;
540 j = error;
542 error = -ENOMEM;
543 sprintf(name, "[%lu]", inode->i_ino);
544 this.name = name;
545 this.len = strlen(name);
546 this.hash = inode->i_ino; /* will go */
547 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
548 if (!dentry)
549 goto close_f12_inode_i_j;
550 dentry->d_op = &pipefs_dentry_operations;
551 d_add(dentry, inode);
552 f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
553 f1->f_dentry = f2->f_dentry = dget(dentry);
555 /* read file */
556 f1->f_pos = f2->f_pos = 0;
557 f1->f_flags = O_RDONLY;
558 f1->f_op = &read_pipe_fops;
559 f1->f_mode = 1;
560 f1->f_version = 0;
562 /* write file */
563 f2->f_flags = O_WRONLY;
564 f2->f_op = &write_pipe_fops;
565 f2->f_mode = 2;
566 f2->f_version = 0;
568 fd_install(i, f1);
569 fd_install(j, f2);
570 fd[0] = i;
571 fd[1] = j;
572 return 0;
574 close_f12_inode_i_j:
575 put_unused_fd(j);
576 close_f12_inode_i:
577 put_unused_fd(i);
578 close_f12_inode:
579 free_page((unsigned long) PIPE_BASE(*inode));
580 kfree(inode->i_pipe);
581 inode->i_pipe = NULL;
582 iput(inode);
583 close_f12:
584 put_filp(f2);
585 close_f1:
586 put_filp(f1);
587 no_files:
588 return error;
592 * pipefs should _never_ be mounted by userland - too much of security hassle,
593 * no real gain from having the whole whorehouse mounted. So we don't need
594 * any operations on the root directory. However, we need a non-trivial
595 * d_name - pipe: will go nicely and kill the special-casing in procfs.
597 static int pipefs_statfs(struct super_block *sb, struct statfs *buf)
599 buf->f_type = PIPEFS_MAGIC;
600 buf->f_bsize = 1024;
601 buf->f_namelen = 255;
602 return 0;
605 static struct super_operations pipefs_ops = {
606 statfs: pipefs_statfs,
609 static struct super_block * pipefs_read_super(struct super_block *sb, void *data, int silent)
611 struct inode *root = new_inode(sb);
612 if (!root)
613 return NULL;
614 root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
615 root->i_uid = root->i_gid = 0;
616 root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
617 sb->s_blocksize = 1024;
618 sb->s_blocksize_bits = 10;
619 sb->s_magic = PIPEFS_MAGIC;
620 sb->s_op = &pipefs_ops;
621 sb->s_root = d_alloc(NULL, &(const struct qstr) { "pipe:", 5, 0 });
622 if (!sb->s_root) {
623 iput(root);
624 return NULL;
626 sb->s_root->d_sb = sb;
627 sb->s_root->d_parent = sb->s_root;
628 d_instantiate(sb->s_root, root);
629 return sb;
632 static DECLARE_FSTYPE(pipe_fs_type, "pipefs", pipefs_read_super,
633 FS_NOMOUNT|FS_SINGLE);
635 static int __init init_pipe_fs(void)
637 int err = register_filesystem(&pipe_fs_type);
638 if (!err) {
639 pipe_mnt = kern_mount(&pipe_fs_type);
640 err = PTR_ERR(pipe_mnt);
641 if (IS_ERR(pipe_mnt))
642 unregister_filesystem(&pipe_fs_type);
643 else
644 err = 0;
646 return err;
649 static void __exit exit_pipe_fs(void)
651 unregister_filesystem(&pipe_fs_type);
652 kern_umount(pipe_mnt);
655 module_init(init_pipe_fs)
656 module_exit(exit_pipe_fs)