Import 2.3.99pre7-7
[davej-history.git] / fs / open.c
blobe23e48194e7dcd1b6d9303675c5e9f3fb50d5e62
1 /*
2 * linux/fs/open.c
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
7 #include <linux/string.h>
8 #include <linux/mm.h>
9 #include <linux/utime.h>
10 #include <linux/file.h>
11 #include <linux/smp_lock.h>
12 #include <linux/quotaops.h>
14 #include <asm/uaccess.h>
16 int vfs_statfs(struct super_block *sb, struct statfs *buf)
18 int retval = -ENODEV;
20 if (sb) {
21 retval = -ENOSYS;
22 if (sb->s_op && sb->s_op->statfs) {
23 memset(buf, 0, sizeof(struct statfs));
24 lock_kernel();
25 retval = sb->s_op->statfs(sb, buf);
26 unlock_kernel();
29 return retval;
33 asmlinkage long sys_statfs(const char * path, struct statfs * buf)
35 struct nameidata nd;
36 int error;
38 error = user_path_walk(path, &nd);
39 if (!error) {
40 struct statfs tmp;
41 error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp);
42 if (!error && copy_to_user(buf, &tmp, sizeof(struct statfs)))
43 error = -EFAULT;
44 path_release(&nd);
46 return error;
49 asmlinkage long sys_fstatfs(unsigned int fd, struct statfs * buf)
51 struct file * file;
52 struct statfs tmp;
53 int error;
55 error = -EBADF;
56 file = fget(fd);
57 if (!file)
58 goto out;
59 error = vfs_statfs(file->f_dentry->d_inode->i_sb, &tmp);
60 if (!error && copy_to_user(buf, &tmp, sizeof(struct statfs)))
61 error = -EFAULT;
62 fput(file);
63 out:
64 return error;
67 int do_truncate(struct dentry *dentry, loff_t length)
69 struct inode *inode = dentry->d_inode;
70 int error;
71 struct iattr newattrs;
73 /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
74 if (length < 0)
75 return -EINVAL;
77 down(&inode->i_sem);
78 newattrs.ia_size = length;
79 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
80 error = notify_change(dentry, &newattrs);
81 up(&inode->i_sem);
82 return error;
85 static inline long do_sys_truncate(const char * path, loff_t length)
87 struct nameidata nd;
88 struct inode * inode;
89 int error;
91 error = -EINVAL;
92 if (length < 0) /* sorry, but loff_t says... */
93 goto out;
95 error = user_path_walk(path, &nd);
96 if (error)
97 goto out;
98 inode = nd.dentry->d_inode;
100 error = -EACCES;
101 if (S_ISDIR(inode->i_mode))
102 goto dput_and_out;
104 error = permission(inode,MAY_WRITE);
105 if (error)
106 goto dput_and_out;
108 error = -EROFS;
109 if (IS_RDONLY(inode))
110 goto dput_and_out;
112 error = -EPERM;
113 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
114 goto dput_and_out;
116 lock_kernel();
117 error = get_write_access(inode);
118 if (error)
119 goto dput_and_out;
121 error = locks_verify_truncate(inode, NULL, length);
122 if (!error) {
123 DQUOT_INIT(inode);
124 error = do_truncate(nd.dentry, length);
126 put_write_access(inode);
127 unlock_kernel();
129 dput_and_out:
130 path_release(&nd);
131 out:
132 return error;
135 asmlinkage long sys_truncate(const char * path, unsigned long length)
137 return do_sys_truncate(path, length);
140 static inline long do_sys_ftruncate(unsigned int fd, loff_t length)
142 struct inode * inode;
143 struct dentry *dentry;
144 struct file * file;
145 int error;
147 error = -EINVAL;
148 if (length < 0)
149 goto out;
150 error = -EBADF;
151 file = fget(fd);
152 if (!file)
153 goto out;
154 dentry = file->f_dentry;
155 inode = dentry->d_inode;
156 error = -EACCES;
157 if (S_ISDIR(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
158 goto out_putf;
159 error = -EPERM;
160 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
161 goto out_putf;
163 lock_kernel();
164 error = locks_verify_truncate(inode, file, length);
165 if (!error)
166 error = do_truncate(dentry, length);
167 unlock_kernel();
168 out_putf:
169 fput(file);
170 out:
171 return error;
174 asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length)
176 return do_sys_ftruncate(fd, length);
179 /* LFS versions of truncate are only needed on 32 bit machines */
180 #if BITS_PER_LONG == 32
181 asmlinkage long sys_truncate64(const char * path, loff_t length)
183 return do_sys_truncate(path, length);
186 asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length)
188 return do_sys_ftruncate(fd, length);
190 #endif
192 #if !(defined(__alpha__) || defined(__ia64__))
195 * sys_utime() can be implemented in user-level using sys_utimes().
196 * Is this for backwards compatibility? If so, why not move it
197 * into the appropriate arch directory (for those architectures that
198 * need it).
201 /* If times==NULL, set access and modification to current time,
202 * must be owner or have write permission.
203 * Else, update from *times, must be owner or super user.
205 asmlinkage long sys_utime(char * filename, struct utimbuf * times)
207 int error;
208 struct nameidata nd;
209 struct inode * inode;
210 struct iattr newattrs;
212 error = user_path_walk(filename, &nd);
213 if (error)
214 goto out;
215 inode = nd.dentry->d_inode;
217 error = -EROFS;
218 if (IS_RDONLY(inode))
219 goto dput_and_out;
221 /* Don't worry, the checks are done in inode_change_ok() */
222 newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
223 if (times) {
224 error = get_user(newattrs.ia_atime, &times->actime);
225 if (!error)
226 error = get_user(newattrs.ia_mtime, &times->modtime);
227 if (error)
228 goto dput_and_out;
230 newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
231 } else {
232 if (current->fsuid != inode->i_uid &&
233 (error = permission(inode,MAY_WRITE)) != 0)
234 goto dput_and_out;
236 error = notify_change(nd.dentry, &newattrs);
237 dput_and_out:
238 path_release(&nd);
239 out:
240 return error;
243 #endif
245 /* If times==NULL, set access and modification to current time,
246 * must be owner or have write permission.
247 * Else, update from *times, must be owner or super user.
249 asmlinkage long sys_utimes(char * filename, struct timeval * utimes)
251 int error;
252 struct nameidata nd;
253 struct inode * inode;
254 struct iattr newattrs;
256 error = user_path_walk(filename, &nd);
258 if (error)
259 goto out;
260 inode = nd.dentry->d_inode;
262 error = -EROFS;
263 if (IS_RDONLY(inode))
264 goto dput_and_out;
266 /* Don't worry, the checks are done in inode_change_ok() */
267 newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
268 if (utimes) {
269 struct timeval times[2];
270 error = -EFAULT;
271 if (copy_from_user(&times, utimes, sizeof(times)))
272 goto dput_and_out;
273 newattrs.ia_atime = times[0].tv_sec;
274 newattrs.ia_mtime = times[1].tv_sec;
275 newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
276 } else {
277 if ((error = permission(inode,MAY_WRITE)) != 0)
278 goto dput_and_out;
280 error = notify_change(nd.dentry, &newattrs);
281 dput_and_out:
282 path_release(&nd);
283 out:
284 return error;
288 * access() needs to use the real uid/gid, not the effective uid/gid.
289 * We do this by temporarily clearing all FS-related capabilities and
290 * switching the fsuid/fsgid around to the real ones.
292 asmlinkage long sys_access(const char * filename, int mode)
294 struct nameidata nd;
295 int old_fsuid, old_fsgid;
296 kernel_cap_t old_cap;
297 int res;
299 if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
300 return -EINVAL;
302 old_fsuid = current->fsuid;
303 old_fsgid = current->fsgid;
304 old_cap = current->cap_effective;
306 current->fsuid = current->uid;
307 current->fsgid = current->gid;
309 /* Clear the capabilities if we switch to a non-root user */
310 if (current->uid)
311 cap_clear(current->cap_effective);
312 else
313 current->cap_effective = current->cap_permitted;
315 res = user_path_walk(filename, &nd);
316 if (!res) {
317 res = permission(nd.dentry->d_inode, mode);
318 /* SuS v2 requires we report a read only fs too */
319 if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode))
320 res = -EROFS;
321 path_release(&nd);
324 current->fsuid = old_fsuid;
325 current->fsgid = old_fsgid;
326 current->cap_effective = old_cap;
328 return res;
331 asmlinkage long sys_chdir(const char * filename)
333 int error;
334 struct nameidata nd;
335 char *name;
337 lock_kernel();
339 name = getname(filename);
340 error = PTR_ERR(name);
341 if (IS_ERR(name))
342 goto out;
344 error = 0;
345 if (path_init(name,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd))
346 error = path_walk(name, &nd);
347 putname(name);
348 if (error)
349 goto out;
351 error = permission(nd.dentry->d_inode,MAY_EXEC);
352 if (error)
353 goto dput_and_out;
355 set_fs_pwd(current->fs, nd.mnt, nd.dentry);
357 dput_and_out:
358 path_release(&nd);
359 out:
360 unlock_kernel();
361 return error;
364 asmlinkage long sys_fchdir(unsigned int fd)
366 struct file *file;
367 struct dentry *dentry;
368 struct inode *inode;
369 struct vfsmount *mnt;
370 int error;
372 error = -EBADF;
373 file = fget(fd);
374 if (!file)
375 goto out;
377 dentry = file->f_dentry;
378 mnt = file->f_vfsmnt;
379 inode = dentry->d_inode;
381 error = -ENOTDIR;
382 if (!S_ISDIR(inode->i_mode))
383 goto out_putf;
385 lock_kernel();
386 error = permission(inode, MAY_EXEC);
387 if (!error)
388 set_fs_pwd(current->fs, mnt, dentry);
389 unlock_kernel();
390 out_putf:
391 fput(file);
392 out:
393 return error;
396 asmlinkage long sys_chroot(const char * filename)
398 int error;
399 struct nameidata nd;
400 char *name;
402 lock_kernel();
404 name = getname(filename);
405 error = PTR_ERR(name);
406 if (IS_ERR(name))
407 goto out;
409 path_init(name, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
410 LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
411 error = path_walk(name, &nd);
412 putname(name);
413 if (error)
414 goto out;
416 error = permission(nd.dentry->d_inode,MAY_EXEC);
417 if (error)
418 goto dput_and_out;
420 error = -EPERM;
421 if (!capable(CAP_SYS_CHROOT))
422 goto dput_and_out;
424 set_fs_root(current->fs, nd.mnt, nd.dentry);
425 set_fs_altroot();
426 error = 0;
427 dput_and_out:
428 path_release(&nd);
429 out:
430 unlock_kernel();
431 return error;
434 asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
436 struct inode * inode;
437 struct dentry * dentry;
438 struct file * file;
439 int err = -EBADF;
440 struct iattr newattrs;
442 file = fget(fd);
443 if (!file)
444 goto out;
446 dentry = file->f_dentry;
447 inode = dentry->d_inode;
449 err = -EROFS;
450 if (IS_RDONLY(inode))
451 goto out_putf;
452 err = -EPERM;
453 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
454 goto out_putf;
455 if (mode == (mode_t) -1)
456 mode = inode->i_mode;
457 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
458 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
459 lock_kernel();
460 err = notify_change(dentry, &newattrs);
461 unlock_kernel();
463 out_putf:
464 fput(file);
465 out:
466 return err;
469 asmlinkage long sys_chmod(const char * filename, mode_t mode)
471 struct nameidata nd;
472 struct inode * inode;
473 int error;
474 struct iattr newattrs;
476 error = user_path_walk(filename, &nd);
477 if (error)
478 goto out;
479 inode = nd.dentry->d_inode;
481 error = -EROFS;
482 if (IS_RDONLY(inode))
483 goto dput_and_out;
485 error = -EPERM;
486 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
487 goto dput_and_out;
489 if (mode == (mode_t) -1)
490 mode = inode->i_mode;
491 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
492 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
493 error = notify_change(nd.dentry, &newattrs);
495 dput_and_out:
496 path_release(&nd);
497 out:
498 return error;
501 static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
503 struct inode * inode;
504 int error;
505 struct iattr newattrs;
507 error = -ENOENT;
508 if (!(inode = dentry->d_inode)) {
509 printk("chown_common: NULL inode\n");
510 goto out;
512 error = -EROFS;
513 if (IS_RDONLY(inode))
514 goto out;
515 error = -EPERM;
516 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
517 goto out;
518 if (user == (uid_t) -1)
519 user = inode->i_uid;
520 if (group == (gid_t) -1)
521 group = inode->i_gid;
522 newattrs.ia_mode = inode->i_mode;
523 newattrs.ia_uid = user;
524 newattrs.ia_gid = group;
525 newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME;
527 * If the user or group of a non-directory has been changed by a
528 * non-root user, remove the setuid bit.
529 * 19981026 David C Niemi <niemi@tux.org>
531 * Changed this to apply to all users, including root, to avoid
532 * some races. This is the behavior we had in 2.0. The check for
533 * non-root was definitely wrong for 2.2 anyway, as it should
534 * have been using CAP_FSETID rather than fsuid -- 19990830 SD.
536 if ((inode->i_mode & S_ISUID) == S_ISUID &&
537 !S_ISDIR(inode->i_mode))
539 newattrs.ia_mode &= ~S_ISUID;
540 newattrs.ia_valid |= ATTR_MODE;
543 * Likewise, if the user or group of a non-directory has been changed
544 * by a non-root user, remove the setgid bit UNLESS there is no group
545 * execute bit (this would be a file marked for mandatory locking).
546 * 19981026 David C Niemi <niemi@tux.org>
548 * Removed the fsuid check (see the comment above) -- 19990830 SD.
550 if (((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
551 && !S_ISDIR(inode->i_mode))
553 newattrs.ia_mode &= ~S_ISGID;
554 newattrs.ia_valid |= ATTR_MODE;
556 error = DQUOT_TRANSFER(dentry, &newattrs);
557 out:
558 return error;
561 asmlinkage long sys_chown(const char * filename, uid_t user, gid_t group)
563 struct nameidata nd;
564 int error;
566 error = user_path_walk(filename, &nd);
567 if (!error) {
568 error = chown_common(nd.dentry, user, group);
569 path_release(&nd);
571 return error;
574 asmlinkage long sys_lchown(const char * filename, uid_t user, gid_t group)
576 struct nameidata nd;
577 int error;
579 error = user_path_walk_link(filename, &nd);
580 if (!error) {
581 error = chown_common(nd.dentry, user, group);
582 path_release(&nd);
584 return error;
588 asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
590 struct file * file;
591 int error = -EBADF;
593 file = fget(fd);
594 if (file) {
595 error = chown_common(file->f_dentry, user, group);
596 fput(file);
598 return error;
602 * Note that while the flag value (low two bits) for sys_open means:
603 * 00 - read-only
604 * 01 - write-only
605 * 10 - read-write
606 * 11 - special
607 * it is changed into
608 * 00 - no permissions needed
609 * 01 - read-permission
610 * 10 - write-permission
611 * 11 - read-write
612 * for the internal routines (ie open_namei()/follow_link() etc). 00 is
613 * used by symlinks.
615 struct file *filp_open(const char * filename, int flags, int mode)
617 int namei_flags, error;
618 struct nameidata nd;
620 namei_flags = flags;
621 if ((namei_flags+1) & O_ACCMODE)
622 namei_flags++;
623 if (namei_flags & O_TRUNC)
624 namei_flags |= 2;
626 error = open_namei(filename, namei_flags, mode, &nd);
627 if (!error)
628 return dentry_open(nd.dentry, nd.mnt, flags);
630 return ERR_PTR(error);
633 struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
635 struct file * f;
636 struct inode *inode;
637 int error;
639 error = -ENFILE;
640 f = get_empty_filp();
641 if (!f)
642 goto cleanup_dentry;
643 f->f_flags = flags;
644 f->f_mode = (flags+1) & O_ACCMODE;
645 inode = dentry->d_inode;
646 if (f->f_mode & FMODE_WRITE) {
647 error = get_write_access(inode);
648 if (error)
649 goto cleanup_file;
652 f->f_dentry = dentry;
653 f->f_vfsmnt = mnt;
654 f->f_pos = 0;
655 f->f_reada = 0;
656 f->f_op = inode->i_fop;
657 if (inode->i_sb)
658 file_move(f, &inode->i_sb->s_files);
659 if (f->f_op && f->f_op->open) {
660 error = f->f_op->open(inode,f);
661 if (error)
662 goto cleanup_all;
664 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
666 return f;
668 cleanup_all:
669 if (f->f_mode & FMODE_WRITE)
670 put_write_access(inode);
671 f->f_dentry = NULL;
672 f->f_vfsmnt = NULL;
673 cleanup_file:
674 put_filp(f);
675 cleanup_dentry:
676 dput(dentry);
677 mntput(mnt);
678 return ERR_PTR(error);
682 * Find an empty file descriptor entry, and mark it busy.
684 int get_unused_fd(void)
686 struct files_struct * files = current->files;
687 int fd, error;
689 error = -EMFILE;
690 write_lock(&files->file_lock);
692 repeat:
693 fd = find_next_zero_bit(files->open_fds,
694 current->files->max_fdset,
695 files->next_fd);
698 * N.B. For clone tasks sharing a files structure, this test
699 * will limit the total number of files that can be opened.
701 if (fd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
702 goto out;
704 /* Do we need to expand the fdset array? */
705 if (fd >= current->files->max_fdset) {
706 error = expand_fdset(files, fd);
707 if (!error) {
708 error = -EMFILE;
709 goto repeat;
711 goto out;
715 * Check whether we need to expand the fd array.
717 if (fd >= files->max_fds) {
718 error = expand_fd_array(files, fd);
719 if (!error) {
720 error = -EMFILE;
721 goto repeat;
723 goto out;
726 FD_SET(fd, files->open_fds);
727 FD_CLR(fd, files->close_on_exec);
728 files->next_fd = fd + 1;
729 #if 1
730 /* Sanity check */
731 if (files->fd[fd] != NULL) {
732 printk("get_unused_fd: slot %d not NULL!\n", fd);
733 files->fd[fd] = NULL;
735 #endif
736 error = fd;
738 out:
739 write_unlock(&files->file_lock);
740 return error;
743 inline void __put_unused_fd(struct files_struct *files, unsigned int fd)
745 FD_CLR(fd, files->open_fds);
746 if (fd < files->next_fd)
747 files->next_fd = fd;
750 inline void put_unused_fd(unsigned int fd)
752 struct files_struct *files = current->files;
754 write_lock(&files->file_lock);
755 __put_unused_fd(files, fd);
756 write_unlock(&files->file_lock);
759 asmlinkage long sys_open(const char * filename, int flags, int mode)
761 char * tmp;
762 int fd, error;
764 #if BITS_PER_LONG != 32
765 flags |= O_LARGEFILE;
766 #endif
767 tmp = getname(filename);
768 fd = PTR_ERR(tmp);
769 if (!IS_ERR(tmp)) {
770 fd = get_unused_fd();
771 if (fd >= 0) {
772 struct file * f;
773 lock_kernel();
774 f = filp_open(tmp, flags, mode);
775 unlock_kernel();
776 error = PTR_ERR(f);
777 if (IS_ERR(f))
778 goto out_error;
779 fd_install(fd, f);
781 out:
782 putname(tmp);
784 return fd;
786 out_error:
787 put_unused_fd(fd);
788 fd = error;
789 goto out;
792 #ifndef __alpha__
795 * For backward compatibility? Maybe this should be moved
796 * into arch/i386 instead?
798 asmlinkage long sys_creat(const char * pathname, int mode)
800 return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
803 #endif
806 * "id" is the POSIX thread ID. We use the
807 * files pointer for this..
809 int filp_close(struct file *filp, fl_owner_t id)
811 int retval;
813 if (!file_count(filp)) {
814 printk("VFS: Close: file count is 0\n");
815 return 0;
817 retval = 0;
818 if (filp->f_op && filp->f_op->flush)
819 retval = filp->f_op->flush(filp);
820 locks_remove_posix(filp, id);
821 fput(filp);
822 return retval;
826 * Careful here! We test whether the file pointer is NULL before
827 * releasing the fd. This ensures that one clone task can't release
828 * an fd while another clone is opening it.
830 * The "release" argument tells us whether or not to mark the fd as free
831 * or not in the open-files bitmap. dup2 uses this to retain the fd
832 * without races.
834 int do_close(unsigned int fd, int release)
836 int error;
837 struct file * filp;
838 struct files_struct * files = current->files;
840 error = -EBADF;
841 write_lock(&files->file_lock);
842 filp = frip(files, fd);
843 if (!filp)
844 goto out_unlock;
845 FD_CLR(fd, files->close_on_exec);
846 if (release)
847 __put_unused_fd(files, fd);
848 write_unlock(&files->file_lock);
849 lock_kernel();
850 error = filp_close(filp, files);
851 unlock_kernel();
852 out:
853 return error;
854 out_unlock:
855 write_unlock(&files->file_lock);
856 goto out;
859 asmlinkage long sys_close(unsigned int fd)
861 return do_close(fd, 1);
865 * This routine simulates a hangup on the tty, to arrange that users
866 * are given clean terminals at login time.
868 asmlinkage long sys_vhangup(void)
870 if (capable(CAP_SYS_TTY_CONFIG)) {
871 tty_vhangup(current->tty);
872 return 0;
874 return -EPERM;