Import 2.3.26pre2
[davej-history.git] / fs / namei.c
blobeba55a751f745519f2fc218ab332eb1cf2a7eaa8
1 /*
2 * linux/fs/namei.c
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
7 /*
8 * Some corrections by tytso.
9 */
11 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
12 * lookup logic.
15 #include <linux/mm.h>
16 #include <linux/proc_fs.h>
17 #include <linux/smp_lock.h>
18 #include <linux/quotaops.h>
20 #include <asm/uaccess.h>
21 #include <asm/unaligned.h>
22 #include <asm/semaphore.h>
23 #include <asm/page.h>
24 #include <asm/pgtable.h>
26 #include <asm/namei.h>
28 /* This can be removed after the beta phase. */
29 #define CACHE_SUPERVISE /* debug the correctness of dcache entries */
30 #undef DEBUG /* some other debugging */
33 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
35 /* [Feb-1997 T. Schoebel-Theuer]
36 * Fundamental changes in the pathname lookup mechanisms (namei)
37 * were necessary because of omirr. The reason is that omirr needs
38 * to know the _real_ pathname, not the user-supplied one, in case
39 * of symlinks (and also when transname replacements occur).
41 * The new code replaces the old recursive symlink resolution with
42 * an iterative one (in case of non-nested symlink chains). It does
43 * this with calls to <fs>_follow_link().
44 * As a side effect, dir_namei(), _namei() and follow_link() are now
45 * replaced with a single function lookup_dentry() that can handle all
46 * the special cases of the former code.
48 * With the new dcache, the pathname is stored at each inode, at least as
49 * long as the refcount of the inode is positive. As a side effect, the
50 * size of the dcache depends on the inode cache and thus is dynamic.
52 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
53 * resolution to correspond with current state of the code.
55 * Note that the symlink resolution is not *completely* iterative.
56 * There is still a significant amount of tail- and mid- recursion in
57 * the algorithm. Also, note that <fs>_readlink() is not used in
58 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
59 * may return different results than <fs>_follow_link(). Many virtual
60 * filesystems (including /proc) exhibit this behavior.
63 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
64 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
65 * and the name already exists in form of a symlink, try to create the new
66 * name indicated by the symlink. The old code always complained that the
67 * name already exists, due to not following the symlink even if its target
68 * is nonexistent. The new semantics affects also mknod() and link() when
69 * the name is a symlink pointing to a non-existant name.
71 * I don't know which semantics is the right one, since I have no access
72 * to standards. But I found by trial that HP-UX 9.0 has the full "new"
73 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
74 * "old" one. Personally, I think the new semantics is much more logical.
75 * Note that "ln old new" where "new" is a symlink pointing to a non-existing
76 * file does succeed in both HP-UX and SunOs, but not in Solaris
77 * and in the old Linux semantics.
80 /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
81 * semantics. See the comments in "open_namei" and "do_link" below.
83 * [10-Sep-98 Alan Modra] Another symlink change.
86 /* In order to reduce some races, while at the same time doing additional
87 * checking and hopefully speeding things up, we copy filenames to the
88 * kernel data space before using them..
90 * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
92 static inline int do_getname(const char *filename, char *page)
94 int retval;
95 unsigned long len = PAGE_SIZE;
97 if ((unsigned long) filename >= TASK_SIZE) {
98 if (!segment_eq(get_fs(), KERNEL_DS))
99 return -EFAULT;
100 } else if (TASK_SIZE - (unsigned long) filename < PAGE_SIZE)
101 len = TASK_SIZE - (unsigned long) filename;
103 retval = strncpy_from_user((char *)page, filename, len);
104 if (retval > 0) {
105 if (retval < len)
106 return 0;
107 return -ENAMETOOLONG;
108 } else if (!retval)
109 retval = -ENOENT;
110 return retval;
113 char * getname(const char * filename)
115 char *tmp, *result;
117 result = ERR_PTR(-ENOMEM);
118 tmp = __getname();
119 if (tmp) {
120 int retval = do_getname(filename, tmp);
122 result = tmp;
123 if (retval < 0) {
124 putname(tmp);
125 result = ERR_PTR(retval);
128 return result;
132 * permission()
134 * is used to check for read/write/execute permissions on a file.
135 * We use "fsuid" for this, letting us set arbitrary permissions
136 * for filesystem access without changing the "normal" uids which
137 * are used for other things..
139 int permission(struct inode * inode,int mask)
141 int mode = inode->i_mode;
143 if (inode->i_op && inode->i_op->permission)
144 return inode->i_op->permission(inode, mask);
145 else if ((mask & S_IWOTH) && IS_RDONLY(inode) &&
146 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
147 return -EROFS; /* Nobody gets write access to a read-only fs */
148 else if ((mask & S_IWOTH) && IS_IMMUTABLE(inode))
149 return -EACCES; /* Nobody gets write access to an immutable file */
150 else if (current->fsuid == inode->i_uid)
151 mode >>= 6;
152 else if (in_group_p(inode->i_gid))
153 mode >>= 3;
154 if (((mode & mask & S_IRWXO) == mask) || capable(CAP_DAC_OVERRIDE))
155 return 0;
156 /* read and search access */
157 if ((mask == S_IROTH) ||
158 (S_ISDIR(mode) && !(mask & ~(S_IROTH | S_IXOTH))))
159 if (capable(CAP_DAC_READ_SEARCH))
160 return 0;
161 return -EACCES;
165 * get_write_access() gets write permission for a file.
166 * put_write_access() releases this write permission.
167 * This is used for regular files.
168 * We cannot support write (and maybe mmap read-write shared) accesses and
169 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
170 * can have the following values:
171 * 0: no writers, no VM_DENYWRITE mappings
172 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
173 * > 0: (i_writecount) users are writing to the file.
175 * WARNING: as soon as we will move get_write_access(), do_mmap() or
176 * prepare_binfmt() out of the big lock we will need a spinlock protecting
177 * the checks in all 3. For the time being it is not needed.
179 int get_write_access(struct inode * inode)
181 if (atomic_read(&inode->i_writecount) < 0)
182 return -ETXTBSY;
183 atomic_inc(&inode->i_writecount);
184 return 0;
187 void put_write_access(struct inode * inode)
189 atomic_dec(&inode->i_writecount);
193 * "." and ".." are special - ".." especially so because it has to be able
194 * to know about the current root directory and parent relationships
196 static struct dentry * reserved_lookup(struct dentry * parent, struct qstr * name)
198 struct dentry *result = NULL;
199 if (name->name[0] == '.') {
200 switch (name->len) {
201 default:
202 break;
203 case 2:
204 if (name->name[1] != '.')
205 break;
207 if (parent != current->fs->root)
208 parent = parent->d_covers->d_parent;
209 /* fallthrough */
210 case 1:
211 result = parent;
214 return dget(result);
218 * Internal lookup() using the new generic dcache.
220 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
222 struct dentry * dentry = d_lookup(parent, name);
224 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
225 if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
226 dput(dentry);
227 dentry = NULL;
230 return dentry;
234 * This is called when everything else fails, and we actually have
235 * to go to the low-level filesystem to find out what we should do..
237 * We get the directory semaphore, and after getting that we also
238 * make sure that nobody added the entry to the dcache in the meantime..
240 static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
242 struct dentry * result;
243 struct inode *dir = parent->d_inode;
245 down(&dir->i_sem);
247 * First re-do the cached lookup just in case it was created
248 * while we waited for the directory semaphore..
250 * FIXME! This could use version numbering or similar to
251 * avoid unnecessary cache lookups.
253 result = d_lookup(parent, name);
254 if (!result) {
255 struct dentry * dentry = d_alloc(parent, name);
256 result = ERR_PTR(-ENOMEM);
257 if (dentry) {
258 result = dir->i_op->lookup(dir, dentry);
259 if (result)
260 dput(dentry);
261 else
262 result = dentry;
264 up(&dir->i_sem);
265 return result;
269 * Uhhuh! Nasty case: the cache was re-populated while
270 * we waited on the semaphore. Need to revalidate, but
271 * we're going to return this entry regardless (same
272 * as if it was busy).
274 up(&dir->i_sem);
275 if (result->d_op && result->d_op->d_revalidate)
276 result->d_op->d_revalidate(result, flags);
277 return result;
280 static struct dentry * do_follow_link(struct dentry *base, struct dentry *dentry, unsigned int follow)
282 struct inode * inode = dentry->d_inode;
284 if ((follow & LOOKUP_FOLLOW)
285 && inode && inode->i_op && inode->i_op->follow_link) {
286 if (current->link_count < 5) {
287 struct dentry * result;
289 current->link_count++;
290 /* This eats the base */
291 result = inode->i_op->follow_link(dentry, base, follow);
292 current->link_count--;
293 dput(dentry);
294 return result;
296 dput(dentry);
297 dentry = ERR_PTR(-ELOOP);
299 dput(base);
300 return dentry;
303 static inline struct dentry * follow_mount(struct dentry * dentry)
305 struct dentry * mnt = dentry->d_mounts;
307 if (mnt != dentry) {
308 dget(mnt);
309 dput(dentry);
310 dentry = mnt;
312 return dentry;
316 * Name resolution.
318 * This is the basic name resolution function, turning a pathname
319 * into the final dentry.
321 struct dentry * lookup_dentry(const char * name, struct dentry * base, unsigned int lookup_flags)
323 struct dentry * dentry;
324 struct inode *inode;
326 if (*name == '/') {
327 if (base)
328 dput(base);
329 do {
330 name++;
331 } while (*name == '/');
332 __prefix_lookup_dentry(name, lookup_flags);
333 base = dget(current->fs->root);
334 } else if (!base) {
335 base = dget(current->fs->pwd);
338 if (!*name)
339 goto return_base;
341 inode = base->d_inode;
342 lookup_flags &= LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_SLASHOK;
344 /* At this point we know we have a real path component. */
345 for(;;) {
346 int err;
347 unsigned long hash;
348 struct qstr this;
349 unsigned int flags;
350 unsigned int c;
352 err = permission(inode, MAY_EXEC);
353 dentry = ERR_PTR(err);
354 if (err)
355 break;
357 this.name = name;
358 c = *(const unsigned char *)name;
360 hash = init_name_hash();
361 do {
362 name++;
363 hash = partial_name_hash(c, hash);
364 c = *(const unsigned char *)name;
365 } while (c && (c != '/'));
366 this.len = name - (const char *) this.name;
367 this.hash = end_name_hash(hash);
369 /* remove trailing slashes? */
370 flags = lookup_flags;
371 if (c) {
372 char tmp;
374 flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
375 do {
376 tmp = *++name;
377 } while (tmp == '/');
378 if (tmp)
379 flags |= LOOKUP_CONTINUE;
383 * See if the low-level filesystem might want
384 * to use its own hash..
386 if (base->d_op && base->d_op->d_hash) {
387 int error;
388 error = base->d_op->d_hash(base, &this);
389 if (error < 0) {
390 dentry = ERR_PTR(error);
391 break;
395 /* This does the actual lookups.. */
396 dentry = reserved_lookup(base, &this);
397 if (!dentry) {
398 dentry = cached_lookup(base, &this, flags);
399 if (!dentry) {
400 dentry = real_lookup(base, &this, flags);
401 if (IS_ERR(dentry))
402 break;
406 /* Check mountpoints.. */
407 dentry = follow_mount(dentry);
409 base = do_follow_link(base, dentry, flags);
410 if (IS_ERR(base))
411 goto return_base;
413 inode = base->d_inode;
414 if (flags & LOOKUP_DIRECTORY) {
415 if (!inode)
416 goto no_inode;
417 dentry = ERR_PTR(-ENOTDIR);
418 if (!inode->i_op || !inode->i_op->lookup)
419 break;
420 if (flags & LOOKUP_CONTINUE)
421 continue;
423 return_base:
424 return base;
426 * The case of a nonexisting file is special.
428 * In the middle of a pathname lookup (ie when
429 * LOOKUP_CONTINUE is set), it's an obvious
430 * error and returns ENOENT.
432 * At the end of a pathname lookup it's legal,
433 * and we return a negative dentry. However, we
434 * get here only if there were trailing slashes,
435 * which is legal only if we know it's supposed
436 * to be a directory (ie "mkdir"). Thus the
437 * LOOKUP_SLASHOK flag.
439 no_inode:
440 dentry = ERR_PTR(-ENOENT);
441 if (flags & LOOKUP_CONTINUE)
442 break;
443 if (flags & LOOKUP_SLASHOK)
444 goto return_base;
445 break;
447 dput(base);
448 return dentry;
452 * namei()
454 * is used by most simple commands to get the inode of a specified name.
455 * Open, link etc use their own routines, but this is enough for things
456 * like 'chmod' etc.
458 * namei exists in two versions: namei/lnamei. The only difference is
459 * that namei follows links, while lnamei does not.
461 struct dentry * __namei(const char *pathname, unsigned int lookup_flags)
463 char *name;
464 struct dentry *dentry;
466 name = getname(pathname);
467 dentry = (struct dentry *) name;
468 if (!IS_ERR(name)) {
469 dentry = lookup_dentry(name, NULL, lookup_flags);
470 putname(name);
471 if (!IS_ERR(dentry)) {
472 if (!dentry->d_inode) {
473 dput(dentry);
474 dentry = ERR_PTR(-ENOENT);
478 return dentry;
482 * It's inline, so penalty for filesystems that don't use sticky bit is
483 * minimal.
485 static inline int check_sticky(struct inode *dir, struct inode *inode)
487 if (!(dir->i_mode & S_ISVTX))
488 return 0;
489 if (inode->i_uid == current->fsuid)
490 return 0;
491 if (dir->i_uid == current->fsuid)
492 return 0;
493 return !capable(CAP_FOWNER);
497 * Check whether we can remove a link victim from directory dir, check
498 * whether the type of victim is right.
499 * 1. We can't do it if dir is read-only (done in permission())
500 * 2. We should have write and exec permissions on dir
501 * 3. We can't remove anything from append-only dir
502 * 4. We can't do anything with immutable dir (done in permission())
503 * 5. If the sticky bit on dir is set we should either
504 * a. be owner of dir, or
505 * b. be owner of victim, or
506 * c. have CAP_FOWNER capability
507 * 6. If the victim is append-only or immutable we can't do antyhing with
508 * links pointing to it.
509 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
510 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
511 * 9. We can't remove a root or mountpoint.
513 static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
515 int error;
516 if (!victim->d_inode || victim->d_parent->d_inode != dir)
517 return -ENOENT;
518 error = permission(dir,MAY_WRITE | MAY_EXEC);
519 if (error)
520 return error;
521 if (IS_APPEND(dir))
522 return -EPERM;
523 if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
524 IS_IMMUTABLE(victim->d_inode))
525 return -EPERM;
526 if (isdir) {
527 if (!S_ISDIR(victim->d_inode->i_mode))
528 return -ENOTDIR;
529 if (IS_ROOT(victim))
530 return -EBUSY;
531 if (victim->d_mounts != victim->d_covers)
532 return -EBUSY;
533 } else if (S_ISDIR(victim->d_inode->i_mode))
534 return -EISDIR;
535 return 0;
538 /* Check whether we can create an object with dentry child in directory
539 * dir.
540 * 1. We can't do it if child already exists (open has special treatment for
541 * this case, but since we are inlined it's OK)
542 * 2. We can't do it if dir is read-only (done in permission())
543 * 3. We should have write and exec permissions on dir
544 * 4. We can't do it if dir is immutable (done in permission())
546 static inline int may_create(struct inode *dir, struct dentry *child) {
547 if (child->d_inode)
548 return -EEXIST;
549 return permission(dir,MAY_WRITE | MAY_EXEC);
552 static inline struct dentry *get_parent(struct dentry *dentry)
554 return dget(dentry->d_parent);
557 static inline void unlock_dir(struct dentry *dir)
559 up(&dir->d_inode->i_sem);
560 dput(dir);
564 * We need to do a check-parent every time
565 * after we have locked the parent - to verify
566 * that the parent is still our parent and
567 * that we are still hashed onto it..
569 * This is requied in case two processes race
570 * on removing (or moving) the same entry: the
571 * parent lock will serialize them, but the
572 * other process will be too late..
574 #define check_parent(dir, dentry) \
575 ((dir) == (dentry)->d_parent && !list_empty(&dentry->d_hash))
578 * Locking the parent is needed to:
579 * - serialize directory operations
580 * - make sure the parent doesn't change from
581 * under us in the middle of an operation.
583 * NOTE! Right now we'd rather use a "struct inode"
584 * for this, but as I expect things to move toward
585 * using dentries instead for most things it is
586 * probably better to start with the conceptually
587 * better interface of relying on a path of dentries.
589 static inline struct dentry *lock_parent(struct dentry *dentry)
591 struct dentry *dir = dget(dentry->d_parent);
593 down(&dir->d_inode->i_sem);
594 return dir;
598 * Whee.. Deadlock country. Happily there are only two VFS
599 * operations that do this..
601 static inline void double_lock(struct dentry *d1, struct dentry *d2)
603 struct semaphore *s1 = &d1->d_inode->i_sem;
604 struct semaphore *s2 = &d2->d_inode->i_sem;
606 if (s1 != s2) {
607 if ((unsigned long) s1 < (unsigned long) s2) {
608 struct semaphore *tmp = s2;
609 s2 = s1; s1 = tmp;
611 down(s1);
613 down(s2);
616 static inline void double_unlock(struct dentry *d1, struct dentry *d2)
618 struct semaphore *s1 = &d1->d_inode->i_sem;
619 struct semaphore *s2 = &d2->d_inode->i_sem;
621 up(s1);
622 if (s1 != s2)
623 up(s2);
624 dput(d1);
625 dput(d2);
630 * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security
631 * reasons.
633 * O_DIRECTORY translates into forcing a directory lookup.
635 static inline int lookup_flags(unsigned int f)
637 unsigned long retval = LOOKUP_FOLLOW;
639 if (f & O_NOFOLLOW)
640 retval &= ~LOOKUP_FOLLOW;
642 if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
643 retval &= ~LOOKUP_FOLLOW;
645 if (f & O_DIRECTORY)
646 retval |= LOOKUP_DIRECTORY;
648 return retval;
651 int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
653 int error;
655 error = may_create(dir, dentry);
656 if (error)
657 goto exit_lock;
659 error = -EACCES; /* shouldn't it be ENOSYS? */
660 if (!dir->i_op || !dir->i_op->create)
661 goto exit_lock;
663 DQUOT_INIT(dir);
664 error = dir->i_op->create(dir, dentry, mode);
665 exit_lock:
666 return error;
670 * open_namei()
672 * namei for open - this is in fact almost the whole open-routine.
674 * Note that the low bits of "flag" aren't the same as in the open
675 * system call - they are 00 - no permissions needed
676 * 01 - read permission needed
677 * 10 - write permission needed
678 * 11 - read/write permissions needed
679 * which is a lot more logical, and also allows the "no perm" needed
680 * for symlinks (where the permissions are checked later).
682 struct dentry * open_namei(const char * pathname, int flag, int mode)
684 int acc_mode, error;
685 struct inode *inode;
686 struct dentry *dentry;
688 mode &= S_IALLUGO & ~current->fs->umask;
689 mode |= S_IFREG;
691 dentry = lookup_dentry(pathname, NULL, lookup_flags(flag));
692 if (IS_ERR(dentry))
693 return dentry;
695 acc_mode = ACC_MODE(flag);
696 if (flag & O_CREAT) {
697 struct dentry *dir;
699 if (dentry->d_inode) {
700 if (!(flag & O_EXCL))
701 goto nocreate;
702 error = -EEXIST;
703 goto exit;
706 dir = lock_parent(dentry);
707 if (!check_parent(dir, dentry)) {
709 * Really nasty race happened. What's the
710 * right error code? We had a dentry, but
711 * before we could use it it was removed
712 * by somebody else. We could just re-try
713 * everything, I guess.
715 * ENOENT is definitely wrong.
717 error = -ENOENT;
718 unlock_dir(dir);
719 goto exit;
723 * Somebody might have created the file while we
724 * waited for the directory lock.. So we have to
725 * re-do the existence test.
727 if (dentry->d_inode) {
728 error = 0;
729 if (flag & O_EXCL)
730 error = -EEXIST;
731 } else {
732 error = vfs_create(dir->d_inode, dentry,mode);
733 /* Don't check for write permission, don't truncate */
734 acc_mode = 0;
735 flag &= ~O_TRUNC;
737 unlock_dir(dir);
738 if (error)
739 goto exit;
742 nocreate:
743 error = -ENOENT;
744 inode = dentry->d_inode;
745 if (!inode)
746 goto exit;
748 error = -ELOOP;
749 if (S_ISLNK(inode->i_mode))
750 goto exit;
752 error = -EISDIR;
753 if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
754 goto exit;
756 error = permission(inode,acc_mode);
757 if (error)
758 goto exit;
761 * FIFO's, sockets and device files are special: they don't
762 * actually live on the filesystem itself, and as such you
763 * can write to them even if the filesystem is read-only.
765 if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
766 flag &= ~O_TRUNC;
767 } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
768 error = -EACCES;
769 if (IS_NODEV(inode))
770 goto exit;
772 flag &= ~O_TRUNC;
773 } else {
774 error = -EROFS;
775 if (IS_RDONLY(inode) && (flag & 2))
776 goto exit;
779 * An append-only file must be opened in append mode for writing.
781 error = -EPERM;
782 if (IS_APPEND(inode)) {
783 if ((flag & FMODE_WRITE) && !(flag & O_APPEND))
784 goto exit;
785 if (flag & O_TRUNC)
786 goto exit;
789 if (flag & O_TRUNC) {
790 error = get_write_access(inode);
791 if (error)
792 goto exit;
795 * Refuse to truncate files with mandatory locks held on them.
797 error = locks_verify_locked(inode);
798 if (!error) {
799 DQUOT_INIT(inode);
801 error = do_truncate(dentry, 0);
803 put_write_access(inode);
804 if (error)
805 goto exit;
806 } else
807 if (flag & FMODE_WRITE)
808 DQUOT_INIT(inode);
810 return dentry;
812 exit:
813 dput(dentry);
814 return ERR_PTR(error);
817 struct dentry * do_mknod(const char * filename, int mode, dev_t dev)
819 int error;
820 struct dentry *dir;
821 struct dentry *dentry, *retval;
823 mode &= ~current->fs->umask;
824 dentry = lookup_dentry(filename, NULL, LOOKUP_FOLLOW);
825 if (IS_ERR(dentry))
826 return dentry;
828 dir = lock_parent(dentry);
829 error = -ENOENT;
830 if (!check_parent(dir, dentry))
831 goto exit_lock;
833 error = may_create(dir->d_inode, dentry);
834 if (error)
835 goto exit_lock;
837 error = -EPERM;
838 if (!dir->d_inode->i_op || !dir->d_inode->i_op->mknod)
839 goto exit_lock;
841 DQUOT_INIT(dir->d_inode);
842 error = dir->d_inode->i_op->mknod(dir->d_inode, dentry, mode, dev);
843 exit_lock:
844 retval = ERR_PTR(error);
845 if (!error)
846 retval = dget(dentry);
847 unlock_dir(dir);
848 dput(dentry);
849 return retval;
852 asmlinkage long sys_mknod(const char * filename, int mode, dev_t dev)
854 int error;
855 char * tmp;
856 struct dentry * dentry;
858 lock_kernel();
859 error = -EPERM;
860 if (S_ISDIR(mode) || (!S_ISFIFO(mode) && !capable(CAP_MKNOD)))
861 goto out;
862 tmp = getname(filename);
863 error = PTR_ERR(tmp);
864 if (IS_ERR(tmp))
865 goto out;
867 error = -EINVAL;
868 switch (mode & S_IFMT) {
869 case 0:
870 mode |= S_IFREG; /* fallthrough */
871 case S_IFREG:
872 mode &= ~current->fs->umask;
873 dentry = lookup_dentry(filename, NULL, LOOKUP_FOLLOW);
874 if (IS_ERR(dentry))
875 error = PTR_ERR(dentry);
876 else {
877 struct dentry *dir = lock_parent(dentry);
878 error = -ENOENT;
879 if (check_parent(dir, dentry))
880 error = vfs_create(dir->d_inode, dentry, mode);
881 dput(dentry);
883 break;
884 case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
885 dentry = do_mknod(tmp,mode,dev);
886 error = PTR_ERR(dentry);
887 if (!IS_ERR(dentry)) {
888 dput(dentry);
889 error = 0;
891 break;
893 putname(tmp);
895 out:
896 unlock_kernel();
897 return error;
900 static inline int do_mkdir(const char * pathname, int mode)
902 int error;
903 struct dentry *dir;
904 struct dentry *dentry;
906 dentry = lookup_dentry(pathname, NULL, LOOKUP_SLASHOK);
907 error = PTR_ERR(dentry);
908 if (IS_ERR(dentry))
909 goto exit;
912 * EEXIST is kind of a strange error code to
913 * return, but basically if the dentry was moved
914 * or unlinked while we locked the parent, we
915 * do know that it _did_ exist before, and as
916 * such it makes perfect sense.. In contrast,
917 * ENOENT doesn't make sense for mkdir.
919 dir = lock_parent(dentry);
920 error = -EEXIST;
921 if (!check_parent(dir, dentry))
922 goto exit_lock;
924 error = may_create(dir->d_inode, dentry);
925 if (error)
926 goto exit_lock;
928 error = -EPERM;
929 if (!dir->d_inode->i_op || !dir->d_inode->i_op->mkdir)
930 goto exit_lock;
932 DQUOT_INIT(dir->d_inode);
933 mode &= (S_IRWXUGO|S_ISVTX) & ~current->fs->umask;
934 error = dir->d_inode->i_op->mkdir(dir->d_inode, dentry, mode);
936 exit_lock:
937 unlock_dir(dir);
938 dput(dentry);
939 exit:
940 return error;
943 asmlinkage long sys_mkdir(const char * pathname, int mode)
945 int error;
946 char * tmp;
948 lock_kernel();
949 tmp = getname(pathname);
950 error = PTR_ERR(tmp);
951 if (!IS_ERR(tmp)) {
952 error = do_mkdir(tmp,mode);
953 putname(tmp);
955 unlock_kernel();
956 return error;
959 int vfs_rmdir(struct inode *dir, struct dentry *dentry)
961 int error;
963 error = may_delete(dir, dentry, 1);
964 if (error)
965 return error;
967 if (!dir->i_op || !dir->i_op->rmdir)
968 return -EPERM;
970 DQUOT_INIT(dir);
973 * We try to drop the dentry early: we should have
974 * a usage count of 2 if we're the only user of this
975 * dentry, and if that is true (possibly after pruning
976 * the dcache), then we drop the dentry now.
978 * A low-level filesystem can, if it choses, legally
979 * do a
981 * if (!list_empty(&dentry->d_hash))
982 * return -EBUSY;
984 * if it cannot handle the case of removing a directory
985 * that is still in use by something else..
987 switch (dentry->d_count) {
988 default:
989 shrink_dcache_parent(dentry);
990 if (dentry->d_count != 2)
991 break;
992 case 2:
993 d_drop(dentry);
996 error = dir->i_op->rmdir(dir, dentry);
998 return error;
1001 static inline int do_rmdir(const char * name)
1003 int error;
1004 struct dentry *dir;
1005 struct dentry *dentry;
1007 dentry = lookup_dentry(name, NULL, 0);
1008 error = PTR_ERR(dentry);
1009 if (IS_ERR(dentry))
1010 goto exit;
1012 error = -ENOENT;
1013 if (!dentry->d_inode)
1014 goto exit_dput;
1016 dir = dget(dentry->d_parent);
1019 * The dentry->d_count stuff confuses d_delete() enough to
1020 * not kill the inode from under us while it is locked. This
1021 * wouldn't be needed, except the dentry semaphore is really
1022 * in the inode, not in the dentry..
1024 dentry->d_count++;
1025 double_lock(dir, dentry);
1027 error = -ENOENT;
1028 if (check_parent(dir, dentry))
1029 error = vfs_rmdir(dir->d_inode, dentry);
1031 double_unlock(dentry, dir);
1032 exit_dput:
1033 dput(dentry);
1034 exit:
1035 return error;
1038 asmlinkage long sys_rmdir(const char * pathname)
1040 int error;
1041 char * tmp;
1043 lock_kernel();
1044 tmp = getname(pathname);
1045 error = PTR_ERR(tmp);
1046 if (!IS_ERR(tmp)) {
1047 error = do_rmdir(tmp);
1048 putname(tmp);
1050 unlock_kernel();
1051 return error;
1054 int vfs_unlink(struct inode *dir, struct dentry *dentry)
1056 int error;
1058 error = may_delete(dir, dentry, 0);
1059 if (!error) {
1060 error = -EPERM;
1061 if (dir->i_op && dir->i_op->unlink) {
1062 DQUOT_INIT(dir);
1063 error = dir->i_op->unlink(dir, dentry);
1066 return error;
1069 static inline int do_unlink(const char * name)
1071 int error;
1072 struct dentry *dir;
1073 struct dentry *dentry;
1075 dentry = lookup_dentry(name, NULL, 0);
1076 error = PTR_ERR(dentry);
1077 if (IS_ERR(dentry))
1078 goto exit;
1080 dir = lock_parent(dentry);
1081 error = -ENOENT;
1082 if (check_parent(dir, dentry))
1083 error = vfs_unlink(dir->d_inode, dentry);
1085 unlock_dir(dir);
1086 dput(dentry);
1087 exit:
1088 return error;
1091 asmlinkage long sys_unlink(const char * pathname)
1093 int error;
1094 char * tmp;
1096 lock_kernel();
1097 tmp = getname(pathname);
1098 error = PTR_ERR(tmp);
1099 if (!IS_ERR(tmp)) {
1100 error = do_unlink(tmp);
1101 putname(tmp);
1103 unlock_kernel();
1104 return error;
1107 static inline int do_symlink(const char * oldname, const char * newname)
1109 int error;
1110 struct dentry *dir;
1111 struct dentry *dentry;
1113 dentry = lookup_dentry(newname, NULL, 0);
1115 error = PTR_ERR(dentry);
1116 if (IS_ERR(dentry))
1117 goto exit;
1119 dir = lock_parent(dentry);
1120 error = -ENOENT;
1121 if (!check_parent(dir, dentry))
1122 goto exit_lock;
1124 error = may_create(dir->d_inode, dentry);
1125 if (error)
1126 goto exit_lock;
1128 error = -EPERM;
1129 if (!dir->d_inode->i_op || !dir->d_inode->i_op->symlink)
1130 goto exit_lock;
1132 DQUOT_INIT(dir->d_inode);
1133 error = dir->d_inode->i_op->symlink(dir->d_inode, dentry, oldname);
1135 exit_lock:
1136 unlock_dir(dir);
1137 dput(dentry);
1138 exit:
1139 return error;
1142 asmlinkage long sys_symlink(const char * oldname, const char * newname)
1144 int error;
1145 char * from;
1147 lock_kernel();
1148 from = getname(oldname);
1149 error = PTR_ERR(from);
1150 if (!IS_ERR(from)) {
1151 char * to;
1152 to = getname(newname);
1153 error = PTR_ERR(to);
1154 if (!IS_ERR(to)) {
1155 error = do_symlink(from,to);
1156 putname(to);
1158 putname(from);
1160 unlock_kernel();
1161 return error;
1164 static inline int do_link(const char * oldname, const char * newname)
1166 struct dentry *old_dentry, *new_dentry, *dir;
1167 struct inode *inode;
1168 int error;
1171 * Hardlinks are often used in delicate situations. We avoid
1172 * security-related surprises by not following symlinks on the
1173 * newname. --KAB
1175 * We don't follow them on the oldname either to be compatible
1176 * with linux 2.0, and to avoid hard-linking to directories
1177 * and other special files. --ADM
1179 old_dentry = lookup_dentry(oldname, NULL, 0);
1180 error = PTR_ERR(old_dentry);
1181 if (IS_ERR(old_dentry))
1182 goto exit;
1184 new_dentry = lookup_dentry(newname, NULL, 0);
1185 error = PTR_ERR(new_dentry);
1186 if (IS_ERR(new_dentry))
1187 goto exit_old;
1189 dir = lock_parent(new_dentry);
1190 error = -ENOENT;
1191 if (!check_parent(dir, new_dentry))
1192 goto exit_lock;
1194 error = -ENOENT;
1195 inode = old_dentry->d_inode;
1196 if (!inode)
1197 goto exit_lock;
1199 error = may_create(dir->d_inode, new_dentry);
1200 if (error)
1201 goto exit_lock;
1203 error = -EXDEV;
1204 if (dir->d_inode->i_dev != inode->i_dev)
1205 goto exit_lock;
1208 * A link to an append-only or immutable file cannot be created.
1210 error = -EPERM;
1211 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1212 goto exit_lock;
1214 error = -EPERM;
1215 if (!dir->d_inode->i_op || !dir->d_inode->i_op->link)
1216 goto exit_lock;
1218 DQUOT_INIT(dir->d_inode);
1219 error = dir->d_inode->i_op->link(old_dentry, dir->d_inode, new_dentry);
1221 exit_lock:
1222 unlock_dir(dir);
1223 dput(new_dentry);
1224 exit_old:
1225 dput(old_dentry);
1226 exit:
1227 return error;
1230 asmlinkage long sys_link(const char * oldname, const char * newname)
1232 int error;
1233 char * from;
1235 lock_kernel();
1236 from = getname(oldname);
1237 error = PTR_ERR(from);
1238 if (!IS_ERR(from)) {
1239 char * to;
1240 to = getname(newname);
1241 error = PTR_ERR(to);
1242 if (!IS_ERR(to)) {
1243 error = do_link(from,to);
1244 putname(to);
1246 putname(from);
1248 unlock_kernel();
1249 return error;
1252 int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
1253 struct inode *new_dir, struct dentry *new_dentry)
1255 int error;
1256 int need_rehash = 0;
1258 if (old_dentry->d_inode == new_dentry->d_inode)
1259 return 0;
1261 error = may_delete(old_dir, old_dentry, 1);
1262 if (error)
1263 return error;
1265 if (new_dir->i_dev != old_dir->i_dev)
1266 return -EXDEV;
1268 if (!new_dentry->d_inode)
1269 error = may_create(new_dir, new_dentry);
1270 else
1271 error = may_delete(new_dir, new_dentry, 1);
1272 if (error)
1273 return error;
1275 if (!old_dir->i_op || !old_dir->i_op->rename)
1276 return -EPERM;
1279 * If we are going to change the parent - check write permissions,
1280 * we'll need to flip '..'.
1282 if (new_dir != old_dir) {
1283 error = permission(old_dentry->d_inode, MAY_WRITE);
1285 if (error)
1286 return error;
1288 DQUOT_INIT(old_dir);
1289 DQUOT_INIT(new_dir);
1290 down(&old_dir->i_sb->s_vfs_rename_sem);
1291 error = -EINVAL;
1292 if (is_subdir(new_dentry, old_dentry))
1293 goto out_unlock;
1294 if (new_dentry->d_inode) {
1295 error = -EBUSY;
1296 if (d_invalidate(new_dentry)<0)
1297 goto out_unlock;
1298 need_rehash = 1;
1300 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1301 if (need_rehash)
1302 d_rehash(new_dentry);
1303 if (!error)
1304 d_move(old_dentry,new_dentry);
1305 out_unlock:
1306 up(&old_dir->i_sb->s_vfs_rename_sem);
1307 return error;
1310 int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
1311 struct inode *new_dir, struct dentry *new_dentry)
1313 int error;
1315 if (old_dentry->d_inode == new_dentry->d_inode)
1316 return 0;
1318 error = may_delete(old_dir, old_dentry, 0);
1319 if (error)
1320 return error;
1322 if (new_dir->i_dev != old_dir->i_dev)
1323 return -EXDEV;
1325 if (!new_dentry->d_inode)
1326 error = may_create(new_dir, new_dentry);
1327 else
1328 error = may_delete(new_dir, new_dentry, 0);
1329 if (error)
1330 return error;
1332 if (!old_dir->i_op || !old_dir->i_op->rename)
1333 return -EPERM;
1335 DQUOT_INIT(old_dir);
1336 DQUOT_INIT(new_dir);
1337 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1338 if (error)
1339 return error;
1340 /* The following d_move() should become unconditional */
1341 if (!(old_dir->i_sb->s_flags & MS_ODD_RENAME)) {
1342 d_move(old_dentry, new_dentry);
1344 return 0;
1347 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1348 struct inode *new_dir, struct dentry *new_dentry)
1350 if (S_ISDIR(old_dentry->d_inode->i_mode))
1351 return vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
1352 else
1353 return vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
1356 static inline int do_rename(const char * oldname, const char * newname)
1358 int error;
1359 struct dentry * old_dir, * new_dir;
1360 struct dentry * old_dentry, *new_dentry;
1362 old_dentry = lookup_dentry(oldname, NULL, 0);
1364 error = PTR_ERR(old_dentry);
1365 if (IS_ERR(old_dentry))
1366 goto exit;
1368 error = -ENOENT;
1369 if (!old_dentry->d_inode)
1370 goto exit_old;
1373 unsigned int flags = 0;
1374 if (S_ISDIR(old_dentry->d_inode->i_mode))
1375 flags = LOOKUP_SLASHOK;
1376 new_dentry = lookup_dentry(newname, NULL, flags);
1379 error = PTR_ERR(new_dentry);
1380 if (IS_ERR(new_dentry))
1381 goto exit_old;
1383 new_dir = get_parent(new_dentry);
1384 old_dir = get_parent(old_dentry);
1386 double_lock(new_dir, old_dir);
1388 error = -ENOENT;
1389 if (check_parent(old_dir, old_dentry) && check_parent(new_dir, new_dentry))
1390 error = vfs_rename(old_dir->d_inode, old_dentry,
1391 new_dir->d_inode, new_dentry);
1393 double_unlock(new_dir, old_dir);
1394 dput(new_dentry);
1395 exit_old:
1396 dput(old_dentry);
1397 exit:
1398 return error;
1401 asmlinkage long sys_rename(const char * oldname, const char * newname)
1403 int error;
1404 char * from;
1406 lock_kernel();
1407 from = getname(oldname);
1408 error = PTR_ERR(from);
1409 if (!IS_ERR(from)) {
1410 char * to;
1411 to = getname(newname);
1412 error = PTR_ERR(to);
1413 if (!IS_ERR(to)) {
1414 error = do_rename(from,to);
1415 putname(to);
1417 putname(from);
1419 unlock_kernel();
1420 return error;