[NETLIK]: Add a pointer to the Generic Netlink wiki page.
[linux-2.6/cjktty.git] / fs / read_write.c
blobf792000a28e63179ce88fea9bbeba93f48b9dc94
1 /*
2 * linux/fs/read_write.c
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
7 #include <linux/slab.h>
8 #include <linux/stat.h>
9 #include <linux/fcntl.h>
10 #include <linux/file.h>
11 #include <linux/uio.h>
12 #include <linux/smp_lock.h>
13 #include <linux/fsnotify.h>
14 #include <linux/security.h>
15 #include <linux/module.h>
16 #include <linux/syscalls.h>
17 #include <linux/pagemap.h>
18 #include "read_write.h"
20 #include <asm/uaccess.h>
21 #include <asm/unistd.h>
23 const struct file_operations generic_ro_fops = {
24 .llseek = generic_file_llseek,
25 .read = do_sync_read,
26 .aio_read = generic_file_aio_read,
27 .mmap = generic_file_readonly_mmap,
28 .sendfile = generic_file_sendfile,
31 EXPORT_SYMBOL(generic_ro_fops);
33 loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
35 long long retval;
36 struct inode *inode = file->f_mapping->host;
38 mutex_lock(&inode->i_mutex);
39 switch (origin) {
40 case 2:
41 offset += inode->i_size;
42 break;
43 case 1:
44 offset += file->f_pos;
46 retval = -EINVAL;
47 if (offset>=0 && offset<=inode->i_sb->s_maxbytes) {
48 if (offset != file->f_pos) {
49 file->f_pos = offset;
50 file->f_version = 0;
52 retval = offset;
54 mutex_unlock(&inode->i_mutex);
55 return retval;
58 EXPORT_SYMBOL(generic_file_llseek);
60 loff_t remote_llseek(struct file *file, loff_t offset, int origin)
62 long long retval;
64 lock_kernel();
65 switch (origin) {
66 case 2:
67 offset += i_size_read(file->f_dentry->d_inode);
68 break;
69 case 1:
70 offset += file->f_pos;
72 retval = -EINVAL;
73 if (offset>=0 && offset<=file->f_dentry->d_inode->i_sb->s_maxbytes) {
74 if (offset != file->f_pos) {
75 file->f_pos = offset;
76 file->f_version = 0;
78 retval = offset;
80 unlock_kernel();
81 return retval;
83 EXPORT_SYMBOL(remote_llseek);
85 loff_t no_llseek(struct file *file, loff_t offset, int origin)
87 return -ESPIPE;
89 EXPORT_SYMBOL(no_llseek);
91 loff_t default_llseek(struct file *file, loff_t offset, int origin)
93 long long retval;
95 lock_kernel();
96 switch (origin) {
97 case 2:
98 offset += i_size_read(file->f_dentry->d_inode);
99 break;
100 case 1:
101 offset += file->f_pos;
103 retval = -EINVAL;
104 if (offset >= 0) {
105 if (offset != file->f_pos) {
106 file->f_pos = offset;
107 file->f_version = 0;
109 retval = offset;
111 unlock_kernel();
112 return retval;
114 EXPORT_SYMBOL(default_llseek);
116 loff_t vfs_llseek(struct file *file, loff_t offset, int origin)
118 loff_t (*fn)(struct file *, loff_t, int);
120 fn = no_llseek;
121 if (file->f_mode & FMODE_LSEEK) {
122 fn = default_llseek;
123 if (file->f_op && file->f_op->llseek)
124 fn = file->f_op->llseek;
126 return fn(file, offset, origin);
128 EXPORT_SYMBOL(vfs_llseek);
130 asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin)
132 off_t retval;
133 struct file * file;
134 int fput_needed;
136 retval = -EBADF;
137 file = fget_light(fd, &fput_needed);
138 if (!file)
139 goto bad;
141 retval = -EINVAL;
142 if (origin <= 2) {
143 loff_t res = vfs_llseek(file, offset, origin);
144 retval = res;
145 if (res != (loff_t)retval)
146 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
148 fput_light(file, fput_needed);
149 bad:
150 return retval;
153 #ifdef __ARCH_WANT_SYS_LLSEEK
154 asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high,
155 unsigned long offset_low, loff_t __user * result,
156 unsigned int origin)
158 int retval;
159 struct file * file;
160 loff_t offset;
161 int fput_needed;
163 retval = -EBADF;
164 file = fget_light(fd, &fput_needed);
165 if (!file)
166 goto bad;
168 retval = -EINVAL;
169 if (origin > 2)
170 goto out_putf;
172 offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low,
173 origin);
175 retval = (int)offset;
176 if (offset >= 0) {
177 retval = -EFAULT;
178 if (!copy_to_user(result, &offset, sizeof(offset)))
179 retval = 0;
181 out_putf:
182 fput_light(file, fput_needed);
183 bad:
184 return retval;
186 #endif
189 * rw_verify_area doesn't like huge counts. We limit
190 * them to something that fits in "int" so that others
191 * won't have to do range checks all the time.
193 #define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
195 int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
197 struct inode *inode;
198 loff_t pos;
200 if (unlikely((ssize_t) count < 0))
201 goto Einval;
202 pos = *ppos;
203 if (unlikely((pos < 0) || (loff_t) (pos + count) < 0))
204 goto Einval;
206 inode = file->f_dentry->d_inode;
207 if (unlikely(inode->i_flock && MANDATORY_LOCK(inode))) {
208 int retval = locks_mandatory_area(
209 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
210 inode, file, pos, count);
211 if (retval < 0)
212 return retval;
214 return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
216 Einval:
217 return -EINVAL;
220 static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
222 set_current_state(TASK_UNINTERRUPTIBLE);
223 if (!kiocbIsKicked(iocb))
224 schedule();
225 else
226 kiocbClearKicked(iocb);
227 __set_current_state(TASK_RUNNING);
230 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
232 struct iovec iov = { .iov_base = buf, .iov_len = len };
233 struct kiocb kiocb;
234 ssize_t ret;
236 init_sync_kiocb(&kiocb, filp);
237 kiocb.ki_pos = *ppos;
238 kiocb.ki_left = len;
240 for (;;) {
241 ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
242 if (ret != -EIOCBRETRY)
243 break;
244 wait_on_retry_sync_kiocb(&kiocb);
247 if (-EIOCBQUEUED == ret)
248 ret = wait_on_sync_kiocb(&kiocb);
249 *ppos = kiocb.ki_pos;
250 return ret;
253 EXPORT_SYMBOL(do_sync_read);
255 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
257 ssize_t ret;
259 if (!(file->f_mode & FMODE_READ))
260 return -EBADF;
261 if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
262 return -EINVAL;
263 if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
264 return -EFAULT;
266 ret = rw_verify_area(READ, file, pos, count);
267 if (ret >= 0) {
268 count = ret;
269 ret = security_file_permission (file, MAY_READ);
270 if (!ret) {
271 if (file->f_op->read)
272 ret = file->f_op->read(file, buf, count, pos);
273 else
274 ret = do_sync_read(file, buf, count, pos);
275 if (ret > 0) {
276 fsnotify_access(file->f_dentry);
277 current->rchar += ret;
279 current->syscr++;
283 return ret;
286 EXPORT_SYMBOL(vfs_read);
288 ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
290 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
291 struct kiocb kiocb;
292 ssize_t ret;
294 init_sync_kiocb(&kiocb, filp);
295 kiocb.ki_pos = *ppos;
296 kiocb.ki_left = len;
298 for (;;) {
299 ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
300 if (ret != -EIOCBRETRY)
301 break;
302 wait_on_retry_sync_kiocb(&kiocb);
305 if (-EIOCBQUEUED == ret)
306 ret = wait_on_sync_kiocb(&kiocb);
307 *ppos = kiocb.ki_pos;
308 return ret;
311 EXPORT_SYMBOL(do_sync_write);
313 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
315 ssize_t ret;
317 if (!(file->f_mode & FMODE_WRITE))
318 return -EBADF;
319 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
320 return -EINVAL;
321 if (unlikely(!access_ok(VERIFY_READ, buf, count)))
322 return -EFAULT;
324 ret = rw_verify_area(WRITE, file, pos, count);
325 if (ret >= 0) {
326 count = ret;
327 ret = security_file_permission (file, MAY_WRITE);
328 if (!ret) {
329 if (file->f_op->write)
330 ret = file->f_op->write(file, buf, count, pos);
331 else
332 ret = do_sync_write(file, buf, count, pos);
333 if (ret > 0) {
334 fsnotify_modify(file->f_dentry);
335 current->wchar += ret;
337 current->syscw++;
341 return ret;
344 EXPORT_SYMBOL(vfs_write);
346 static inline loff_t file_pos_read(struct file *file)
348 return file->f_pos;
351 static inline void file_pos_write(struct file *file, loff_t pos)
353 file->f_pos = pos;
356 asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count)
358 struct file *file;
359 ssize_t ret = -EBADF;
360 int fput_needed;
362 file = fget_light(fd, &fput_needed);
363 if (file) {
364 loff_t pos = file_pos_read(file);
365 ret = vfs_read(file, buf, count, &pos);
366 file_pos_write(file, pos);
367 fput_light(file, fput_needed);
370 return ret;
372 EXPORT_SYMBOL_GPL(sys_read);
374 asmlinkage ssize_t sys_write(unsigned int fd, const char __user * buf, size_t count)
376 struct file *file;
377 ssize_t ret = -EBADF;
378 int fput_needed;
380 file = fget_light(fd, &fput_needed);
381 if (file) {
382 loff_t pos = file_pos_read(file);
383 ret = vfs_write(file, buf, count, &pos);
384 file_pos_write(file, pos);
385 fput_light(file, fput_needed);
388 return ret;
391 asmlinkage ssize_t sys_pread64(unsigned int fd, char __user *buf,
392 size_t count, loff_t pos)
394 struct file *file;
395 ssize_t ret = -EBADF;
396 int fput_needed;
398 if (pos < 0)
399 return -EINVAL;
401 file = fget_light(fd, &fput_needed);
402 if (file) {
403 ret = -ESPIPE;
404 if (file->f_mode & FMODE_PREAD)
405 ret = vfs_read(file, buf, count, &pos);
406 fput_light(file, fput_needed);
409 return ret;
412 asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char __user *buf,
413 size_t count, loff_t pos)
415 struct file *file;
416 ssize_t ret = -EBADF;
417 int fput_needed;
419 if (pos < 0)
420 return -EINVAL;
422 file = fget_light(fd, &fput_needed);
423 if (file) {
424 ret = -ESPIPE;
425 if (file->f_mode & FMODE_PWRITE)
426 ret = vfs_write(file, buf, count, &pos);
427 fput_light(file, fput_needed);
430 return ret;
434 * Reduce an iovec's length in-place. Return the resulting number of segments
436 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
438 unsigned long seg = 0;
439 size_t len = 0;
441 while (seg < nr_segs) {
442 seg++;
443 if (len + iov->iov_len >= to) {
444 iov->iov_len = to - len;
445 break;
447 len += iov->iov_len;
448 iov++;
450 return seg;
453 EXPORT_UNUSED_SYMBOL(iov_shorten); /* June 2006 */
455 ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
456 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
458 struct kiocb kiocb;
459 ssize_t ret;
461 init_sync_kiocb(&kiocb, filp);
462 kiocb.ki_pos = *ppos;
463 kiocb.ki_left = len;
464 kiocb.ki_nbytes = len;
466 for (;;) {
467 ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
468 if (ret != -EIOCBRETRY)
469 break;
470 wait_on_retry_sync_kiocb(&kiocb);
473 if (ret == -EIOCBQUEUED)
474 ret = wait_on_sync_kiocb(&kiocb);
475 *ppos = kiocb.ki_pos;
476 return ret;
479 /* Do it by hand, with file-ops */
480 ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
481 unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
483 struct iovec *vector = iov;
484 ssize_t ret = 0;
486 while (nr_segs > 0) {
487 void __user *base;
488 size_t len;
489 ssize_t nr;
491 base = vector->iov_base;
492 len = vector->iov_len;
493 vector++;
494 nr_segs--;
496 nr = fn(filp, base, len, ppos);
498 if (nr < 0) {
499 if (!ret)
500 ret = nr;
501 break;
503 ret += nr;
504 if (nr != len)
505 break;
508 return ret;
511 /* A write operation does a read from user space and vice versa */
512 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
514 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
515 unsigned long nr_segs, unsigned long fast_segs,
516 struct iovec *fast_pointer,
517 struct iovec **ret_pointer)
519 unsigned long seg;
520 ssize_t ret;
521 struct iovec *iov = fast_pointer;
524 * SuS says "The readv() function *may* fail if the iovcnt argument
525 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
526 * traditionally returned zero for zero segments, so...
528 if (nr_segs == 0) {
529 ret = 0;
530 goto out;
534 * First get the "struct iovec" from user memory and
535 * verify all the pointers
537 if (nr_segs > UIO_MAXIOV) {
538 ret = -EINVAL;
539 goto out;
541 if (nr_segs > fast_segs) {
542 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
543 if (iov == NULL) {
544 ret = -ENOMEM;
545 goto out;
548 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
549 ret = -EFAULT;
550 goto out;
554 * According to the Single Unix Specification we should return EINVAL
555 * if an element length is < 0 when cast to ssize_t or if the
556 * total length would overflow the ssize_t return value of the
557 * system call.
559 ret = 0;
560 for (seg = 0; seg < nr_segs; seg++) {
561 void __user *buf = iov[seg].iov_base;
562 ssize_t len = (ssize_t)iov[seg].iov_len;
564 /* see if we we're about to use an invalid len or if
565 * it's about to overflow ssize_t */
566 if (len < 0 || (ret + len < ret)) {
567 ret = -EINVAL;
568 goto out;
570 if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
571 ret = -EFAULT;
572 goto out;
575 ret += len;
577 out:
578 *ret_pointer = iov;
579 return ret;
582 static ssize_t do_readv_writev(int type, struct file *file,
583 const struct iovec __user * uvector,
584 unsigned long nr_segs, loff_t *pos)
586 size_t tot_len;
587 struct iovec iovstack[UIO_FASTIOV];
588 struct iovec *iov = iovstack;
589 ssize_t ret;
590 io_fn_t fn;
591 iov_fn_t fnv;
593 if (!file->f_op) {
594 ret = -EINVAL;
595 goto out;
598 ret = rw_copy_check_uvector(type, uvector, nr_segs,
599 ARRAY_SIZE(iovstack), iovstack, &iov);
600 if (ret <= 0)
601 goto out;
603 tot_len = ret;
604 ret = rw_verify_area(type, file, pos, tot_len);
605 if (ret < 0)
606 goto out;
607 ret = security_file_permission(file, type == READ ? MAY_READ : MAY_WRITE);
608 if (ret)
609 goto out;
611 fnv = NULL;
612 if (type == READ) {
613 fn = file->f_op->read;
614 fnv = file->f_op->aio_read;
615 } else {
616 fn = (io_fn_t)file->f_op->write;
617 fnv = file->f_op->aio_write;
620 if (fnv)
621 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
622 pos, fnv);
623 else
624 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
626 out:
627 if (iov != iovstack)
628 kfree(iov);
629 if ((ret + (type == READ)) > 0) {
630 if (type == READ)
631 fsnotify_access(file->f_dentry);
632 else
633 fsnotify_modify(file->f_dentry);
635 return ret;
638 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
639 unsigned long vlen, loff_t *pos)
641 if (!(file->f_mode & FMODE_READ))
642 return -EBADF;
643 if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
644 return -EINVAL;
646 return do_readv_writev(READ, file, vec, vlen, pos);
649 EXPORT_SYMBOL(vfs_readv);
651 ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
652 unsigned long vlen, loff_t *pos)
654 if (!(file->f_mode & FMODE_WRITE))
655 return -EBADF;
656 if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
657 return -EINVAL;
659 return do_readv_writev(WRITE, file, vec, vlen, pos);
662 EXPORT_SYMBOL(vfs_writev);
664 asmlinkage ssize_t
665 sys_readv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen)
667 struct file *file;
668 ssize_t ret = -EBADF;
669 int fput_needed;
671 file = fget_light(fd, &fput_needed);
672 if (file) {
673 loff_t pos = file_pos_read(file);
674 ret = vfs_readv(file, vec, vlen, &pos);
675 file_pos_write(file, pos);
676 fput_light(file, fput_needed);
679 if (ret > 0)
680 current->rchar += ret;
681 current->syscr++;
682 return ret;
685 asmlinkage ssize_t
686 sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen)
688 struct file *file;
689 ssize_t ret = -EBADF;
690 int fput_needed;
692 file = fget_light(fd, &fput_needed);
693 if (file) {
694 loff_t pos = file_pos_read(file);
695 ret = vfs_writev(file, vec, vlen, &pos);
696 file_pos_write(file, pos);
697 fput_light(file, fput_needed);
700 if (ret > 0)
701 current->wchar += ret;
702 current->syscw++;
703 return ret;
706 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
707 size_t count, loff_t max)
709 struct file * in_file, * out_file;
710 struct inode * in_inode, * out_inode;
711 loff_t pos;
712 ssize_t retval;
713 int fput_needed_in, fput_needed_out;
716 * Get input file, and verify that it is ok..
718 retval = -EBADF;
719 in_file = fget_light(in_fd, &fput_needed_in);
720 if (!in_file)
721 goto out;
722 if (!(in_file->f_mode & FMODE_READ))
723 goto fput_in;
724 retval = -EINVAL;
725 in_inode = in_file->f_dentry->d_inode;
726 if (!in_inode)
727 goto fput_in;
728 if (!in_file->f_op || !in_file->f_op->sendfile)
729 goto fput_in;
730 retval = -ESPIPE;
731 if (!ppos)
732 ppos = &in_file->f_pos;
733 else
734 if (!(in_file->f_mode & FMODE_PREAD))
735 goto fput_in;
736 retval = rw_verify_area(READ, in_file, ppos, count);
737 if (retval < 0)
738 goto fput_in;
739 count = retval;
741 retval = security_file_permission (in_file, MAY_READ);
742 if (retval)
743 goto fput_in;
746 * Get output file, and verify that it is ok..
748 retval = -EBADF;
749 out_file = fget_light(out_fd, &fput_needed_out);
750 if (!out_file)
751 goto fput_in;
752 if (!(out_file->f_mode & FMODE_WRITE))
753 goto fput_out;
754 retval = -EINVAL;
755 if (!out_file->f_op || !out_file->f_op->sendpage)
756 goto fput_out;
757 out_inode = out_file->f_dentry->d_inode;
758 retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
759 if (retval < 0)
760 goto fput_out;
761 count = retval;
763 retval = security_file_permission (out_file, MAY_WRITE);
764 if (retval)
765 goto fput_out;
767 if (!max)
768 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
770 pos = *ppos;
771 retval = -EINVAL;
772 if (unlikely(pos < 0))
773 goto fput_out;
774 if (unlikely(pos + count > max)) {
775 retval = -EOVERFLOW;
776 if (pos >= max)
777 goto fput_out;
778 count = max - pos;
781 retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file);
783 if (retval > 0) {
784 current->rchar += retval;
785 current->wchar += retval;
787 current->syscr++;
788 current->syscw++;
790 if (*ppos > max)
791 retval = -EOVERFLOW;
793 fput_out:
794 fput_light(out_file, fput_needed_out);
795 fput_in:
796 fput_light(in_file, fput_needed_in);
797 out:
798 return retval;
801 asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t __user *offset, size_t count)
803 loff_t pos;
804 off_t off;
805 ssize_t ret;
807 if (offset) {
808 if (unlikely(get_user(off, offset)))
809 return -EFAULT;
810 pos = off;
811 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
812 if (unlikely(put_user(pos, offset)))
813 return -EFAULT;
814 return ret;
817 return do_sendfile(out_fd, in_fd, NULL, count, 0);
820 asmlinkage ssize_t sys_sendfile64(int out_fd, int in_fd, loff_t __user *offset, size_t count)
822 loff_t pos;
823 ssize_t ret;
825 if (offset) {
826 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
827 return -EFAULT;
828 ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
829 if (unlikely(put_user(pos, offset)))
830 return -EFAULT;
831 return ret;
834 return do_sendfile(out_fd, in_fd, NULL, count, 0);