Import 2.4.0-test3pre5
[davej-history.git] / fs / namei.c
blobcba4fb7754e8ccd626f19f293a78114a668def63
1 /*
2 * linux/fs/namei.c
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
7 /*
8 * Some corrections by tytso.
9 */
11 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
12 * lookup logic.
14 /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
17 #include <linux/mm.h>
18 #include <linux/proc_fs.h>
19 #include <linux/smp_lock.h>
20 #include <linux/quotaops.h>
21 #include <linux/pagemap.h>
22 #include <linux/dcache.h>
24 #include <asm/uaccess.h>
25 #include <asm/unaligned.h>
26 #include <asm/semaphore.h>
27 #include <asm/page.h>
28 #include <asm/pgtable.h>
30 #include <asm/namei.h>
32 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
34 /* [Feb-1997 T. Schoebel-Theuer]
35 * Fundamental changes in the pathname lookup mechanisms (namei)
36 * were necessary because of omirr. The reason is that omirr needs
37 * to know the _real_ pathname, not the user-supplied one, in case
38 * of symlinks (and also when transname replacements occur).
40 * The new code replaces the old recursive symlink resolution with
41 * an iterative one (in case of non-nested symlink chains). It does
42 * this with calls to <fs>_follow_link().
43 * As a side effect, dir_namei(), _namei() and follow_link() are now
44 * replaced with a single function lookup_dentry() that can handle all
45 * the special cases of the former code.
47 * With the new dcache, the pathname is stored at each inode, at least as
48 * long as the refcount of the inode is positive. As a side effect, the
49 * size of the dcache depends on the inode cache and thus is dynamic.
51 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
52 * resolution to correspond with current state of the code.
54 * Note that the symlink resolution is not *completely* iterative.
55 * There is still a significant amount of tail- and mid- recursion in
56 * the algorithm. Also, note that <fs>_readlink() is not used in
57 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
58 * may return different results than <fs>_follow_link(). Many virtual
59 * filesystems (including /proc) exhibit this behavior.
62 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
63 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
64 * and the name already exists in form of a symlink, try to create the new
65 * name indicated by the symlink. The old code always complained that the
66 * name already exists, due to not following the symlink even if its target
67 * is nonexistent. The new semantics affects also mknod() and link() when
68 * the name is a symlink pointing to a non-existant name.
70 * I don't know which semantics is the right one, since I have no access
71 * to standards. But I found by trial that HP-UX 9.0 has the full "new"
72 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
73 * "old" one. Personally, I think the new semantics is much more logical.
74 * Note that "ln old new" where "new" is a symlink pointing to a non-existing
75 * file does succeed in both HP-UX and SunOs, but not in Solaris
76 * and in the old Linux semantics.
79 /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
80 * semantics. See the comments in "open_namei" and "do_link" below.
82 * [10-Sep-98 Alan Modra] Another symlink change.
85 /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
86 * inside the path - always follow.
87 * in the last component in creation/removal/renaming - never follow.
88 * if LOOKUP_FOLLOW passed - follow.
89 * if the pathname has trailing slashes - follow.
90 * otherwise - don't follow.
91 * (applied in that order).
93 * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
94 * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
95 * During the 2.4 we need to fix the userland stuff depending on it -
96 * hopefully we will be able to get rid of that wart in 2.5. So far only
97 * XEmacs seems to be relying on it...
100 /* In order to reduce some races, while at the same time doing additional
101 * checking and hopefully speeding things up, we copy filenames to the
102 * kernel data space before using them..
104 * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
106 static inline int do_getname(const char *filename, char *page)
108 int retval;
109 unsigned long len = PAGE_SIZE;
111 if ((unsigned long) filename >= TASK_SIZE) {
112 if (!segment_eq(get_fs(), KERNEL_DS))
113 return -EFAULT;
114 } else if (TASK_SIZE - (unsigned long) filename < PAGE_SIZE)
115 len = TASK_SIZE - (unsigned long) filename;
117 retval = strncpy_from_user((char *)page, filename, len);
118 if (retval > 0) {
119 if (retval < len)
120 return 0;
121 return -ENAMETOOLONG;
122 } else if (!retval)
123 retval = -ENOENT;
124 return retval;
127 char * getname(const char * filename)
129 char *tmp, *result;
131 result = ERR_PTR(-ENOMEM);
132 tmp = __getname();
133 if (tmp) {
134 int retval = do_getname(filename, tmp);
136 result = tmp;
137 if (retval < 0) {
138 putname(tmp);
139 result = ERR_PTR(retval);
142 return result;
146 * permission()
148 * is used to check for read/write/execute permissions on a file.
149 * We use "fsuid" for this, letting us set arbitrary permissions
150 * for filesystem access without changing the "normal" uids which
151 * are used for other things..
153 int permission(struct inode * inode,int mask)
155 int mode = inode->i_mode;
157 if (inode->i_op && inode->i_op->permission) {
158 int retval;
159 lock_kernel();
160 retval = inode->i_op->permission(inode, mask);
161 unlock_kernel();
162 return retval;
165 if ((mask & S_IWOTH) && IS_RDONLY(inode) &&
166 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
167 return -EROFS; /* Nobody gets write access to a read-only fs */
169 if ((mask & S_IWOTH) && IS_IMMUTABLE(inode))
170 return -EACCES; /* Nobody gets write access to an immutable file */
172 if (current->fsuid == inode->i_uid)
173 mode >>= 6;
174 else if (in_group_p(inode->i_gid))
175 mode >>= 3;
177 if (((mode & mask & S_IRWXO) == mask) || capable(CAP_DAC_OVERRIDE))
178 return 0;
180 /* read and search access */
181 if ((mask == S_IROTH) ||
182 (S_ISDIR(mode) && !(mask & ~(S_IROTH | S_IXOTH))))
183 if (capable(CAP_DAC_READ_SEARCH))
184 return 0;
186 return -EACCES;
190 * get_write_access() gets write permission for a file.
191 * put_write_access() releases this write permission.
192 * This is used for regular files.
193 * We cannot support write (and maybe mmap read-write shared) accesses and
194 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
195 * can have the following values:
196 * 0: no writers, no VM_DENYWRITE mappings
197 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
198 * > 0: (i_writecount) users are writing to the file.
200 * Normally we operate on that counter with atomic_{inc,dec} and it's safe
201 * except for the cases where we don't hold i_writecount yet. Then we need to
202 * use {get,deny}_write_access() - these functions check the sign and refuse
203 * to do the change if sign is wrong. Exclusion between them is provided by
204 * spinlock (arbitration_lock) and I'll rip the second arsehole to the first
205 * who will try to move it in struct inode - just leave it here.
207 static spinlock_t arbitration_lock = SPIN_LOCK_UNLOCKED;
208 int get_write_access(struct inode * inode)
210 spin_lock(&arbitration_lock);
211 if (atomic_read(&inode->i_writecount) < 0) {
212 spin_unlock(&arbitration_lock);
213 return -ETXTBSY;
215 atomic_inc(&inode->i_writecount);
216 spin_unlock(&arbitration_lock);
217 return 0;
219 int deny_write_access(struct file * file)
221 spin_lock(&arbitration_lock);
222 if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) {
223 spin_unlock(&arbitration_lock);
224 return -ETXTBSY;
226 atomic_dec(&file->f_dentry->d_inode->i_writecount);
227 spin_unlock(&arbitration_lock);
228 return 0;
231 void path_release(struct nameidata *nd)
233 dput(nd->dentry);
234 mntput(nd->mnt);
238 * Internal lookup() using the new generic dcache.
239 * SMP-safe
241 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
243 struct dentry * dentry = d_lookup(parent, name);
245 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
246 if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
247 dput(dentry);
248 dentry = NULL;
251 return dentry;
255 * This is called when everything else fails, and we actually have
256 * to go to the low-level filesystem to find out what we should do..
258 * We get the directory semaphore, and after getting that we also
259 * make sure that nobody added the entry to the dcache in the meantime..
260 * SMP-safe
262 static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
264 struct dentry * result;
265 struct inode *dir = parent->d_inode;
267 down(&dir->i_sem);
269 * First re-do the cached lookup just in case it was created
270 * while we waited for the directory semaphore..
272 * FIXME! This could use version numbering or similar to
273 * avoid unnecessary cache lookups.
275 result = d_lookup(parent, name);
276 if (!result) {
277 struct dentry * dentry = d_alloc(parent, name);
278 result = ERR_PTR(-ENOMEM);
279 if (dentry) {
280 lock_kernel();
281 result = dir->i_op->lookup(dir, dentry);
282 unlock_kernel();
283 if (result)
284 dput(dentry);
285 else
286 result = dentry;
288 up(&dir->i_sem);
289 return result;
293 * Uhhuh! Nasty case: the cache was re-populated while
294 * we waited on the semaphore. Need to revalidate, but
295 * we're going to return this entry regardless (same
296 * as if it was busy).
298 up(&dir->i_sem);
299 if (result->d_op && result->d_op->d_revalidate)
300 result->d_op->d_revalidate(result, flags);
301 return result;
304 static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
306 int err;
307 if (current->link_count >= 32)
308 goto loop;
309 current->link_count++;
310 UPDATE_ATIME(dentry->d_inode);
311 err = dentry->d_inode->i_op->follow_link(dentry, nd);
312 current->link_count--;
313 return err;
314 loop:
315 path_release(nd);
316 return -ELOOP;
319 static inline int __follow_up(struct vfsmount **mnt, struct dentry **base)
321 struct vfsmount *parent;
322 struct dentry *dentry;
323 spin_lock(&dcache_lock);
324 parent=(*mnt)->mnt_parent;
325 if (parent == *mnt) {
326 spin_unlock(&dcache_lock);
327 return 0;
329 mntget(parent);
330 dentry=dget((*mnt)->mnt_mountpoint);
331 spin_unlock(&dcache_lock);
332 dput(*base);
333 *base = dentry;
334 mntput(*mnt);
335 *mnt = parent;
336 return 1;
339 int follow_up(struct vfsmount **mnt, struct dentry **dentry)
341 return __follow_up(mnt, dentry);
344 static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
346 struct list_head *p;
347 spin_lock(&dcache_lock);
348 p = (*dentry)->d_vfsmnt.next;
349 while (p != &(*dentry)->d_vfsmnt) {
350 struct vfsmount *tmp;
351 tmp = list_entry(p, struct vfsmount, mnt_clash);
352 if (tmp->mnt_parent == *mnt) {
353 *mnt = mntget(tmp);
354 spin_unlock(&dcache_lock);
355 mntput(tmp->mnt_parent);
356 /* tmp holds the mountpoint, so... */
357 dput(*dentry);
358 *dentry = dget(tmp->mnt_root);
359 return 1;
361 p = p->next;
363 spin_unlock(&dcache_lock);
364 return 0;
367 int follow_down(struct vfsmount **mnt, struct dentry **dentry)
369 return __follow_down(mnt,dentry);
372 static inline void follow_dotdot(struct nameidata *nd)
374 while(1) {
375 struct vfsmount *parent;
376 struct dentry *dentry;
377 read_lock(&current->fs->lock);
378 if (nd->dentry == current->fs->root &&
379 nd->mnt == current->fs->rootmnt) {
380 read_unlock(&current->fs->lock);
381 break;
383 read_unlock(&current->fs->lock);
384 spin_lock(&dcache_lock);
385 if (nd->dentry != nd->mnt->mnt_root) {
386 dentry = dget(nd->dentry->d_parent);
387 spin_unlock(&dcache_lock);
388 dput(nd->dentry);
389 nd->dentry = dentry;
390 break;
392 parent=nd->mnt->mnt_parent;
393 if (parent == nd->mnt) {
394 spin_unlock(&dcache_lock);
395 break;
397 mntget(parent);
398 dentry=dget(nd->mnt->mnt_mountpoint);
399 spin_unlock(&dcache_lock);
400 dput(nd->dentry);
401 nd->dentry = dentry;
402 mntput(nd->mnt);
403 nd->mnt = parent;
407 * Name resolution.
409 * This is the basic name resolution function, turning a pathname
410 * into the final dentry.
412 * We expect 'base' to be positive and a directory.
414 int path_walk(const char * name, struct nameidata *nd)
416 struct dentry *dentry;
417 struct inode *inode;
418 int err;
419 unsigned int lookup_flags = nd->flags;
421 while (*name=='/')
422 name++;
423 if (!*name)
424 goto return_base;
426 inode = nd->dentry->d_inode;
427 if (current->link_count)
428 lookup_flags = LOOKUP_FOLLOW;
430 /* At this point we know we have a real path component. */
431 for(;;) {
432 unsigned long hash;
433 struct qstr this;
434 unsigned int c;
436 err = permission(inode, MAY_EXEC);
437 dentry = ERR_PTR(err);
438 if (err)
439 break;
441 this.name = name;
442 c = *(const unsigned char *)name;
444 hash = init_name_hash();
445 do {
446 name++;
447 hash = partial_name_hash(c, hash);
448 c = *(const unsigned char *)name;
449 } while (c && (c != '/'));
450 this.len = name - (const char *) this.name;
451 this.hash = end_name_hash(hash);
453 /* remove trailing slashes? */
454 if (!c)
455 goto last_component;
456 while (*++name == '/');
457 if (!*name)
458 goto last_with_slashes;
461 * "." and ".." are special - ".." especially so because it has
462 * to be able to know about the current root directory and
463 * parent relationships.
465 if (this.name[0] == '.') switch (this.len) {
466 default:
467 break;
468 case 2:
469 if (this.name[1] != '.')
470 break;
471 follow_dotdot(nd);
472 inode = nd->dentry->d_inode;
473 /* fallthrough */
474 case 1:
475 continue;
478 * See if the low-level filesystem might want
479 * to use its own hash..
481 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
482 err = nd->dentry->d_op->d_hash(nd->dentry, &this);
483 if (err < 0)
484 break;
486 /* This does the actual lookups.. */
487 dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
488 if (!dentry) {
489 dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
490 err = PTR_ERR(dentry);
491 if (IS_ERR(dentry))
492 break;
494 /* Check mountpoints.. */
495 while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
498 err = -ENOENT;
499 inode = dentry->d_inode;
500 if (!inode)
501 goto out_dput;
502 err = -ENOTDIR;
503 if (!inode->i_op)
504 goto out_dput;
506 if (inode->i_op->follow_link) {
507 err = do_follow_link(dentry, nd);
508 dput(dentry);
509 if (err)
510 goto return_err;
511 err = -ENOENT;
512 inode = nd->dentry->d_inode;
513 if (!inode)
514 break;
515 err = -ENOTDIR;
516 if (!inode->i_op)
517 break;
518 } else {
519 dput(nd->dentry);
520 nd->dentry = dentry;
522 err = -ENOTDIR;
523 if (!inode->i_op->lookup)
524 break;
525 continue;
526 /* here ends the main loop */
528 last_with_slashes:
529 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
530 last_component:
531 if (lookup_flags & LOOKUP_PARENT)
532 goto lookup_parent;
533 if (this.name[0] == '.') switch (this.len) {
534 default:
535 break;
536 case 2:
537 if (this.name[1] != '.')
538 break;
539 follow_dotdot(nd);
540 inode = nd->dentry->d_inode;
541 /* fallthrough */
542 case 1:
543 goto return_base;
545 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
546 err = nd->dentry->d_op->d_hash(nd->dentry, &this);
547 if (err < 0)
548 break;
550 dentry = cached_lookup(nd->dentry, &this, 0);
551 if (!dentry) {
552 dentry = real_lookup(nd->dentry, &this, 0);
553 err = PTR_ERR(dentry);
554 if (IS_ERR(dentry))
555 break;
557 while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
559 inode = dentry->d_inode;
560 if ((lookup_flags & LOOKUP_FOLLOW)
561 && inode && inode->i_op && inode->i_op->follow_link) {
562 err = do_follow_link(dentry, nd);
563 dput(dentry);
564 if (err)
565 goto return_err;
566 inode = nd->dentry->d_inode;
567 } else {
568 dput(nd->dentry);
569 nd->dentry = dentry;
571 err = -ENOENT;
572 if (!inode)
573 goto no_inode;
574 if (lookup_flags & LOOKUP_DIRECTORY) {
575 err = -ENOTDIR;
576 if (!inode->i_op || !inode->i_op->lookup)
577 break;
579 goto return_base;
580 no_inode:
581 err = -ENOENT;
582 if (lookup_flags & (LOOKUP_POSITIVE|LOOKUP_DIRECTORY))
583 break;
584 goto return_base;
585 lookup_parent:
586 nd->last = this;
587 nd->last_type = LAST_NORM;
588 if (this.name[0] != '.')
589 goto return_base;
590 if (this.len == 1)
591 nd->last_type = LAST_DOT;
592 else if (this.len == 2 && this.name[1] == '.')
593 nd->last_type = LAST_DOTDOT;
594 return_base:
595 return 0;
596 out_dput:
597 dput(dentry);
598 break;
600 path_release(nd);
601 return_err:
602 return err;
605 /* SMP-safe */
606 /* returns 1 if everything is done */
607 static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
609 if (path_walk(name, nd))
610 return 0;
612 if (!nd->dentry->d_inode) {
613 struct nameidata nd_root;
614 nd_root.last_type = LAST_ROOT;
615 nd_root.flags = nd->flags;
616 read_lock(&current->fs->lock);
617 nd_root.mnt = mntget(current->fs->rootmnt);
618 nd_root.dentry = dget(current->fs->root);
619 read_unlock(&current->fs->lock);
620 if (path_walk(name, &nd_root))
621 return 1;
622 if (nd_root.dentry->d_inode) {
623 path_release(nd);
624 nd->dentry = nd_root.dentry;
625 nd->mnt = nd_root.mnt;
626 nd->last = nd_root.last;
627 return 1;
629 path_release(&nd_root);
631 return 1;
634 void set_fs_altroot(void)
636 char *emul = __emul_prefix();
637 struct nameidata nd;
638 struct vfsmount *mnt = NULL, *oldmnt;
639 struct dentry *dentry = NULL, *olddentry;
640 if (emul) {
641 read_lock(&current->fs->lock);
642 nd.mnt = mntget(current->fs->rootmnt);
643 nd.dentry = dget(current->fs->root);
644 read_unlock(&current->fs->lock);
645 nd.flags = LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_POSITIVE;
646 if (path_walk(emul,&nd) == 0) {
647 mnt = nd.mnt;
648 dentry = nd.dentry;
651 write_lock(&current->fs->lock);
652 oldmnt = current->fs->altrootmnt;
653 olddentry = current->fs->altroot;
654 current->fs->altrootmnt = mnt;
655 current->fs->altroot = dentry;
656 write_unlock(&current->fs->lock);
657 if (olddentry) {
658 dput(olddentry);
659 mntput(oldmnt);
663 /* SMP-safe */
664 static inline int
665 walk_init_root(const char *name, struct nameidata *nd)
667 read_lock(&current->fs->lock);
668 if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
669 nd->mnt = mntget(current->fs->altrootmnt);
670 nd->dentry = dget(current->fs->altroot);
671 read_unlock(&current->fs->lock);
672 if (__emul_lookup_dentry(name,nd))
673 return 0;
674 read_lock(&current->fs->lock);
676 nd->mnt = mntget(current->fs->rootmnt);
677 nd->dentry = dget(current->fs->root);
678 read_unlock(&current->fs->lock);
679 return 1;
682 /* SMP-safe */
683 int path_init(const char *name,unsigned int flags,struct nameidata *nd)
685 nd->last_type = LAST_ROOT; /* if there are only slashes... */
686 nd->flags = flags;
687 if (*name=='/')
688 return walk_init_root(name,nd);
689 read_lock(&current->fs->lock);
690 nd->mnt = mntget(current->fs->pwdmnt);
691 nd->dentry = dget(current->fs->pwd);
692 read_unlock(&current->fs->lock);
693 return 1;
697 * Restricted form of lookup. Doesn't follow links, single-component only,
698 * needs parent already locked. Doesn't follow mounts.
699 * SMP-safe.
701 struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
703 struct dentry * dentry;
704 struct inode *inode;
705 int err;
707 inode = base->d_inode;
708 err = permission(inode, MAY_EXEC);
709 dentry = ERR_PTR(err);
710 if (err)
711 goto out;
714 * See if the low-level filesystem might want
715 * to use its own hash..
717 if (base->d_op && base->d_op->d_hash) {
718 err = base->d_op->d_hash(base, name);
719 dentry = ERR_PTR(err);
720 if (err < 0)
721 goto out;
724 dentry = cached_lookup(base, name, 0);
725 if (!dentry) {
726 struct dentry *new = d_alloc(base, name);
727 dentry = ERR_PTR(-ENOMEM);
728 if (!new)
729 goto out;
730 lock_kernel();
731 dentry = inode->i_op->lookup(inode, new);
732 unlock_kernel();
733 if (!dentry)
734 dentry = new;
735 else
736 dput(new);
738 out:
739 return dentry;
742 /* SMP-safe */
743 struct dentry * lookup_one(const char * name, struct dentry * base)
745 unsigned long hash;
746 struct qstr this;
747 unsigned int c;
749 this.name = name;
750 c = *(const unsigned char *)name;
751 if (!c)
752 goto access;
754 hash = init_name_hash();
755 do {
756 name++;
757 if (c == '/')
758 goto access;
759 hash = partial_name_hash(c, hash);
760 c = *(const unsigned char *)name;
761 } while (c);
762 this.len = name - (const char *) this.name;
763 this.hash = end_name_hash(hash);
765 return lookup_hash(&this, base);
766 access:
767 return ERR_PTR(-EACCES);
771 * namei()
773 * is used by most simple commands to get the inode of a specified name.
774 * Open, link etc use their own routines, but this is enough for things
775 * like 'chmod' etc.
777 * namei exists in two versions: namei/lnamei. The only difference is
778 * that namei follows links, while lnamei does not.
779 * SMP-safe
781 int __user_walk(const char *name, unsigned flags, struct nameidata *nd)
783 char *tmp;
784 int err;
786 tmp = getname(name);
787 err = PTR_ERR(tmp);
788 if (!IS_ERR(tmp)) {
789 err = 0;
790 if (path_init(tmp, flags, nd))
791 err = path_walk(tmp, nd);
792 putname(tmp);
794 return err;
798 * It's inline, so penalty for filesystems that don't use sticky bit is
799 * minimal.
801 static inline int check_sticky(struct inode *dir, struct inode *inode)
803 if (!(dir->i_mode & S_ISVTX))
804 return 0;
805 if (inode->i_uid == current->fsuid)
806 return 0;
807 if (dir->i_uid == current->fsuid)
808 return 0;
809 return !capable(CAP_FOWNER);
813 * Check whether we can remove a link victim from directory dir, check
814 * whether the type of victim is right.
815 * 1. We can't do it if dir is read-only (done in permission())
816 * 2. We should have write and exec permissions on dir
817 * 3. We can't remove anything from append-only dir
818 * 4. We can't do anything with immutable dir (done in permission())
819 * 5. If the sticky bit on dir is set we should either
820 * a. be owner of dir, or
821 * b. be owner of victim, or
822 * c. have CAP_FOWNER capability
823 * 6. If the victim is append-only or immutable we can't do antyhing with
824 * links pointing to it.
825 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
826 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
827 * 9. We can't remove a root or mountpoint.
829 static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
831 int error;
832 if (!victim->d_inode || victim->d_parent->d_inode != dir)
833 return -ENOENT;
834 error = permission(dir,MAY_WRITE | MAY_EXEC);
835 if (error)
836 return error;
837 if (IS_APPEND(dir))
838 return -EPERM;
839 if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
840 IS_IMMUTABLE(victim->d_inode))
841 return -EPERM;
842 if (isdir) {
843 if (!S_ISDIR(victim->d_inode->i_mode))
844 return -ENOTDIR;
845 if (IS_ROOT(victim))
846 return -EBUSY;
847 } else if (S_ISDIR(victim->d_inode->i_mode))
848 return -EISDIR;
849 return 0;
852 /* Check whether we can create an object with dentry child in directory
853 * dir.
854 * 1. We can't do it if child already exists (open has special treatment for
855 * this case, but since we are inlined it's OK)
856 * 2. We can't do it if dir is read-only (done in permission())
857 * 3. We should have write and exec permissions on dir
858 * 4. We can't do it if dir is immutable (done in permission())
860 static inline int may_create(struct inode *dir, struct dentry *child) {
861 if (child->d_inode)
862 return -EEXIST;
863 if (IS_DEADDIR(dir))
864 return -ENOENT;
865 return permission(dir,MAY_WRITE | MAY_EXEC);
869 * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security
870 * reasons.
872 * O_DIRECTORY translates into forcing a directory lookup.
874 static inline int lookup_flags(unsigned int f)
876 unsigned long retval = LOOKUP_FOLLOW;
878 if (f & O_NOFOLLOW)
879 retval &= ~LOOKUP_FOLLOW;
881 if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
882 retval &= ~LOOKUP_FOLLOW;
884 if (f & O_DIRECTORY)
885 retval |= LOOKUP_DIRECTORY;
887 return retval;
890 int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
892 int error;
894 mode &= S_IALLUGO & ~current->fs->umask;
895 mode |= S_IFREG;
897 down(&dir->i_zombie);
898 error = may_create(dir, dentry);
899 if (error)
900 goto exit_lock;
902 error = -EACCES; /* shouldn't it be ENOSYS? */
903 if (!dir->i_op || !dir->i_op->create)
904 goto exit_lock;
906 DQUOT_INIT(dir);
907 lock_kernel();
908 error = dir->i_op->create(dir, dentry, mode);
909 unlock_kernel();
910 exit_lock:
911 up(&dir->i_zombie);
912 return error;
916 * open_namei()
918 * namei for open - this is in fact almost the whole open-routine.
920 * Note that the low bits of "flag" aren't the same as in the open
921 * system call - they are 00 - no permissions needed
922 * 01 - read permission needed
923 * 10 - write permission needed
924 * 11 - read/write permissions needed
925 * which is a lot more logical, and also allows the "no perm" needed
926 * for symlinks (where the permissions are checked later).
927 * SMP-safe
929 int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
931 int acc_mode, error = 0;
932 struct inode *inode;
933 struct dentry *dentry;
934 struct dentry *dir;
935 int count = 0;
937 acc_mode = ACC_MODE(flag);
940 * The simplest case - just a plain lookup.
942 if (!(flag & O_CREAT)) {
943 if (path_init(pathname, lookup_flags(flag), nd))
944 error = path_walk(pathname, nd);
945 if (error)
946 return error;
947 dentry = nd->dentry;
948 goto ok;
952 * Create - we need to know the parent.
954 if (path_init(pathname, LOOKUP_PARENT, nd))
955 error = path_walk(pathname, nd);
956 if (error)
957 return error;
960 * We have the parent and last component. First of all, check
961 * that we are not asked to creat(2) an obvious directory - that
962 * will not do.
964 error = -EISDIR;
965 if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
966 goto exit;
968 dir = nd->dentry;
969 down(&dir->d_inode->i_sem);
970 dentry = lookup_hash(&nd->last, nd->dentry);
972 do_last:
973 error = PTR_ERR(dentry);
974 if (IS_ERR(dentry)) {
975 up(&dir->d_inode->i_sem);
976 goto exit;
979 /* Negative dentry, just create the file */
980 if (!dentry->d_inode) {
981 error = vfs_create(dir->d_inode, dentry, mode);
982 up(&dir->d_inode->i_sem);
983 dput(nd->dentry);
984 nd->dentry = dentry;
985 if (error)
986 goto exit;
987 /* Don't check for write permission, don't truncate */
988 acc_mode = 0;
989 flag &= ~O_TRUNC;
990 goto ok;
994 * It already exists.
996 up(&dir->d_inode->i_sem);
998 error = -EEXIST;
999 if (flag & O_EXCL)
1000 goto exit_dput;
1002 if (d_mountpoint(dentry)) {
1003 error = -ELOOP;
1004 if (flag & O_NOFOLLOW)
1005 goto exit_dput;
1006 do __follow_down(&nd->mnt,&dentry); while(d_mountpoint(dentry));
1008 error = -ENOENT;
1009 if (!dentry->d_inode)
1010 goto exit_dput;
1011 if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
1012 goto do_link;
1014 dput(nd->dentry);
1015 nd->dentry = dentry;
1016 error = -EISDIR;
1017 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
1018 goto exit;
1020 error = -ENOENT;
1021 inode = dentry->d_inode;
1022 if (!inode)
1023 goto exit;
1025 error = -ELOOP;
1026 if (S_ISLNK(inode->i_mode))
1027 goto exit;
1029 error = -EISDIR;
1030 if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
1031 goto exit;
1033 error = permission(inode,acc_mode);
1034 if (error)
1035 goto exit;
1038 * FIFO's, sockets and device files are special: they don't
1039 * actually live on the filesystem itself, and as such you
1040 * can write to them even if the filesystem is read-only.
1042 if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
1043 flag &= ~O_TRUNC;
1044 } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
1045 error = -EACCES;
1046 if (IS_NODEV(inode))
1047 goto exit;
1049 flag &= ~O_TRUNC;
1050 } else {
1051 error = -EROFS;
1052 if (IS_RDONLY(inode) && (flag & 2))
1053 goto exit;
1056 * An append-only file must be opened in append mode for writing.
1058 error = -EPERM;
1059 if (IS_APPEND(inode)) {
1060 if ((flag & FMODE_WRITE) && !(flag & O_APPEND))
1061 goto exit;
1062 if (flag & O_TRUNC)
1063 goto exit;
1066 if (flag & O_TRUNC) {
1067 error = get_write_access(inode);
1068 if (error)
1069 goto exit;
1072 * Refuse to truncate files with mandatory locks held on them.
1074 error = locks_verify_locked(inode);
1075 if (!error) {
1076 DQUOT_INIT(inode);
1078 error = do_truncate(dentry, 0);
1080 put_write_access(inode);
1081 if (error)
1082 goto exit;
1083 } else
1084 if (flag & FMODE_WRITE)
1085 DQUOT_INIT(inode);
1087 return 0;
1089 exit_dput:
1090 dput(dentry);
1091 exit:
1092 path_release(nd);
1093 return error;
1095 do_link:
1096 error = -ELOOP;
1097 if (flag & O_NOFOLLOW)
1098 goto exit_dput;
1100 * This is subtle. Instead of calling do_follow_link() we do the
1101 * thing by hands. The reason is that this way we have zero link_count
1102 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
1103 * After that we have the parent and last component, i.e.
1104 * we are in the same situation as after the first path_walk().
1105 * Well, almost - if the last component is normal we get its copy
1106 * stored in nd->last.name and we will have to putname() it when we
1107 * are done. Procfs-like symlinks just set LAST_BIND.
1109 UPDATE_ATIME(dentry->d_inode);
1110 error = dentry->d_inode->i_op->follow_link(dentry, nd);
1111 dput(dentry);
1112 if (error)
1113 return error;
1114 if (nd->last_type == LAST_BIND) {
1115 dentry = nd->dentry;
1116 goto ok;
1118 error = -EISDIR;
1119 if (nd->last_type != LAST_NORM)
1120 goto exit;
1121 if (nd->last.name[nd->last.len]) {
1122 putname(nd->last.name);
1123 goto exit;
1125 if (count++==32) {
1126 dentry = nd->dentry;
1127 putname(nd->last.name);
1128 goto ok;
1130 dir = nd->dentry;
1131 down(&dir->d_inode->i_sem);
1132 dentry = lookup_hash(&nd->last, nd->dentry);
1133 putname(nd->last.name);
1134 goto do_last;
1137 /* SMP-safe */
1138 static struct dentry *lookup_create(struct nameidata *nd, int is_dir)
1140 struct dentry *dentry;
1142 down(&nd->dentry->d_inode->i_sem);
1143 dentry = ERR_PTR(-EEXIST);
1144 if (nd->last_type != LAST_NORM)
1145 goto fail;
1146 dentry = lookup_hash(&nd->last, nd->dentry);
1147 if (IS_ERR(dentry))
1148 goto fail;
1149 if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
1150 goto enoent;
1151 return dentry;
1152 enoent:
1153 dput(dentry);
1154 dentry = ERR_PTR(-ENOENT);
1155 fail:
1156 return dentry;
1159 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1161 int error = -EPERM;
1163 mode &= ~current->fs->umask;
1165 down(&dir->i_zombie);
1166 if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
1167 goto exit_lock;
1169 error = may_create(dir, dentry);
1170 if (error)
1171 goto exit_lock;
1173 error = -EPERM;
1174 if (!dir->i_op || !dir->i_op->mknod)
1175 goto exit_lock;
1177 DQUOT_INIT(dir);
1178 lock_kernel();
1179 error = dir->i_op->mknod(dir, dentry, mode, dev);
1180 unlock_kernel();
1181 exit_lock:
1182 up(&dir->i_zombie);
1183 return error;
1186 asmlinkage long sys_mknod(const char * filename, int mode, dev_t dev)
1188 int error = 0;
1189 char * tmp;
1190 struct dentry * dentry;
1191 struct nameidata nd;
1193 if (S_ISDIR(mode))
1194 return -EPERM;
1195 tmp = getname(filename);
1196 if (IS_ERR(tmp))
1197 return PTR_ERR(tmp);
1199 if (path_init(tmp, LOOKUP_PARENT, &nd))
1200 error = path_walk(tmp, &nd);
1201 if (error)
1202 goto out;
1203 dentry = lookup_create(&nd, 0);
1204 error = PTR_ERR(dentry);
1205 if (!IS_ERR(dentry)) {
1206 switch (mode & S_IFMT) {
1207 case 0: case S_IFREG:
1208 error = vfs_create(nd.dentry->d_inode,dentry,mode);
1209 break;
1210 case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
1211 error = vfs_mknod(nd.dentry->d_inode,dentry,mode,dev);
1212 break;
1213 case S_IFDIR:
1214 error = -EPERM;
1215 break;
1216 default:
1217 error = -EINVAL;
1219 dput(dentry);
1221 up(&nd.dentry->d_inode->i_sem);
1222 path_release(&nd);
1223 out:
1224 putname(tmp);
1226 return error;
1229 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1231 int error;
1233 down(&dir->i_zombie);
1234 error = may_create(dir, dentry);
1235 if (error)
1236 goto exit_lock;
1238 error = -EPERM;
1239 if (!dir->i_op || !dir->i_op->mkdir)
1240 goto exit_lock;
1242 DQUOT_INIT(dir);
1243 mode &= (S_IRWXUGO|S_ISVTX) & ~current->fs->umask;
1244 lock_kernel();
1245 error = dir->i_op->mkdir(dir, dentry, mode);
1246 unlock_kernel();
1248 exit_lock:
1249 up(&dir->i_zombie);
1250 return error;
1253 asmlinkage long sys_mkdir(const char * pathname, int mode)
1255 int error = 0;
1256 char * tmp;
1258 tmp = getname(pathname);
1259 error = PTR_ERR(tmp);
1260 if (!IS_ERR(tmp)) {
1261 struct dentry *dentry;
1262 struct nameidata nd;
1264 if (path_init(tmp, LOOKUP_PARENT, &nd))
1265 error = path_walk(tmp, &nd);
1266 if (error)
1267 goto out;
1268 dentry = lookup_create(&nd, 1);
1269 error = PTR_ERR(dentry);
1270 if (!IS_ERR(dentry)) {
1271 error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
1272 dput(dentry);
1274 up(&nd.dentry->d_inode->i_sem);
1275 path_release(&nd);
1276 out:
1277 putname(tmp);
1280 return error;
1284 * We try to drop the dentry early: we should have
1285 * a usage count of 2 if we're the only user of this
1286 * dentry, and if that is true (possibly after pruning
1287 * the dcache), then we drop the dentry now.
1289 * A low-level filesystem can, if it choses, legally
1290 * do a
1292 * if (!d_unhashed(dentry))
1293 * return -EBUSY;
1295 * if it cannot handle the case of removing a directory
1296 * that is still in use by something else..
1298 static void d_unhash(struct dentry *dentry)
1300 dget(dentry);
1301 switch (atomic_read(&dentry->d_count)) {
1302 default:
1303 shrink_dcache_parent(dentry);
1304 if (atomic_read(&dentry->d_count) != 2)
1305 break;
1306 case 2:
1307 d_drop(dentry);
1311 int vfs_rmdir(struct inode *dir, struct dentry *dentry)
1313 int error;
1315 error = may_delete(dir, dentry, 1);
1316 if (error)
1317 return error;
1319 if (!dir->i_op || !dir->i_op->rmdir)
1320 return -EPERM;
1322 DQUOT_INIT(dir);
1324 double_down(&dir->i_zombie, &dentry->d_inode->i_zombie);
1325 d_unhash(dentry);
1326 if (IS_DEADDIR(dir))
1327 error = -ENOENT;
1328 else if (d_mountpoint(dentry))
1329 error = -EBUSY;
1330 else {
1331 lock_kernel();
1332 error = dir->i_op->rmdir(dir, dentry);
1333 unlock_kernel();
1334 if (!error)
1335 dentry->d_inode->i_flags |= S_DEAD;
1337 double_up(&dir->i_zombie, &dentry->d_inode->i_zombie);
1338 if (!error)
1339 d_delete(dentry);
1340 dput(dentry);
1342 return error;
1345 asmlinkage long sys_rmdir(const char * pathname)
1347 int error = 0;
1348 char * name;
1349 struct dentry *dentry;
1350 struct nameidata nd;
1352 name = getname(pathname);
1353 if(IS_ERR(name))
1354 return PTR_ERR(name);
1356 if (path_init(name, LOOKUP_PARENT, &nd))
1357 error = path_walk(name, &nd);
1358 if (error)
1359 goto exit;
1361 switch(nd.last_type) {
1362 case LAST_DOTDOT:
1363 error = -ENOTEMPTY;
1364 goto exit1;
1365 case LAST_ROOT: case LAST_DOT:
1366 error = -EBUSY;
1367 goto exit1;
1369 down(&nd.dentry->d_inode->i_sem);
1370 dentry = lookup_hash(&nd.last, nd.dentry);
1371 error = PTR_ERR(dentry);
1372 if (!IS_ERR(dentry)) {
1373 error = vfs_rmdir(nd.dentry->d_inode, dentry);
1374 dput(dentry);
1376 up(&nd.dentry->d_inode->i_sem);
1377 exit1:
1378 path_release(&nd);
1379 exit:
1380 putname(name);
1381 return error;
1384 int vfs_unlink(struct inode *dir, struct dentry *dentry)
1386 int error;
1388 down(&dir->i_zombie);
1389 error = may_delete(dir, dentry, 0);
1390 if (!error) {
1391 error = -EPERM;
1392 if (dir->i_op && dir->i_op->unlink) {
1393 DQUOT_INIT(dir);
1394 if (d_mountpoint(dentry))
1395 error = -EBUSY;
1396 else {
1397 lock_kernel();
1398 error = dir->i_op->unlink(dir, dentry);
1399 unlock_kernel();
1400 if (!error)
1401 d_delete(dentry);
1405 up(&dir->i_zombie);
1406 return error;
1409 asmlinkage long sys_unlink(const char * pathname)
1411 int error = 0;
1412 char * name;
1413 struct dentry *dentry;
1414 struct nameidata nd;
1416 name = getname(pathname);
1417 if(IS_ERR(name))
1418 return PTR_ERR(name);
1420 if (path_init(name, LOOKUP_PARENT, &nd))
1421 error = path_walk(name, &nd);
1422 if (error)
1423 goto exit;
1424 error = -EISDIR;
1425 if (nd.last_type != LAST_NORM)
1426 goto exit1;
1427 down(&nd.dentry->d_inode->i_sem);
1428 dentry = lookup_hash(&nd.last, nd.dentry);
1429 error = PTR_ERR(dentry);
1430 if (!IS_ERR(dentry)) {
1431 /* Why not before? Because we want correct error value */
1432 if (nd.last.name[nd.last.len])
1433 goto slashes;
1434 error = vfs_unlink(nd.dentry->d_inode, dentry);
1435 exit2:
1436 dput(dentry);
1438 up(&nd.dentry->d_inode->i_sem);
1439 exit1:
1440 path_release(&nd);
1441 exit:
1442 putname(name);
1444 return error;
1446 slashes:
1447 error = !dentry->d_inode ? -ENOENT :
1448 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
1449 goto exit2;
1452 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
1454 int error;
1456 down(&dir->i_zombie);
1457 error = may_create(dir, dentry);
1458 if (error)
1459 goto exit_lock;
1461 error = -EPERM;
1462 if (!dir->i_op || !dir->i_op->symlink)
1463 goto exit_lock;
1465 DQUOT_INIT(dir);
1466 lock_kernel();
1467 error = dir->i_op->symlink(dir, dentry, oldname);
1468 unlock_kernel();
1470 exit_lock:
1471 up(&dir->i_zombie);
1472 return error;
1475 asmlinkage long sys_symlink(const char * oldname, const char * newname)
1477 int error = 0;
1478 char * from;
1479 char * to;
1481 from = getname(oldname);
1482 if(IS_ERR(from))
1483 return PTR_ERR(from);
1484 to = getname(newname);
1485 error = PTR_ERR(to);
1486 if (!IS_ERR(to)) {
1487 struct dentry *dentry;
1488 struct nameidata nd;
1490 if (path_init(to, LOOKUP_PARENT, &nd))
1491 error = path_walk(to, &nd);
1492 if (error)
1493 goto out;
1494 dentry = lookup_create(&nd, 0);
1495 error = PTR_ERR(dentry);
1496 if (!IS_ERR(dentry)) {
1497 error = vfs_symlink(nd.dentry->d_inode, dentry, from);
1498 dput(dentry);
1500 up(&nd.dentry->d_inode->i_sem);
1501 path_release(&nd);
1502 out:
1503 putname(to);
1505 putname(from);
1506 return error;
1509 int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
1511 struct inode *inode;
1512 int error;
1514 down(&dir->i_zombie);
1515 error = -ENOENT;
1516 inode = old_dentry->d_inode;
1517 if (!inode)
1518 goto exit_lock;
1520 error = may_create(dir, new_dentry);
1521 if (error)
1522 goto exit_lock;
1524 error = -EXDEV;
1525 if (dir->i_dev != inode->i_dev)
1526 goto exit_lock;
1529 * A link to an append-only or immutable file cannot be created.
1531 error = -EPERM;
1532 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1533 goto exit_lock;
1534 if (!dir->i_op || !dir->i_op->link)
1535 goto exit_lock;
1537 DQUOT_INIT(dir);
1538 lock_kernel();
1539 error = dir->i_op->link(old_dentry, dir, new_dentry);
1540 unlock_kernel();
1542 exit_lock:
1543 up(&dir->i_zombie);
1544 return error;
1548 * Hardlinks are often used in delicate situations. We avoid
1549 * security-related surprises by not following symlinks on the
1550 * newname. --KAB
1552 * We don't follow them on the oldname either to be compatible
1553 * with linux 2.0, and to avoid hard-linking to directories
1554 * and other special files. --ADM
1556 asmlinkage long sys_link(const char * oldname, const char * newname)
1558 int error;
1559 char * from;
1560 char * to;
1562 from = getname(oldname);
1563 if(IS_ERR(from))
1564 return PTR_ERR(from);
1565 to = getname(newname);
1566 error = PTR_ERR(to);
1567 if (!IS_ERR(to)) {
1568 struct dentry *new_dentry;
1569 struct nameidata nd, old_nd;
1571 error = 0;
1572 if (path_init(from, LOOKUP_POSITIVE, &old_nd))
1573 error = path_walk(from, &old_nd);
1574 if (error)
1575 goto exit;
1576 if (path_init(to, LOOKUP_PARENT, &nd))
1577 error = path_walk(to, &nd);
1578 if (error)
1579 goto out;
1580 error = -EXDEV;
1581 if (old_nd.mnt != nd.mnt)
1582 goto out;
1583 new_dentry = lookup_create(&nd, 0);
1584 error = PTR_ERR(new_dentry);
1585 if (!IS_ERR(new_dentry)) {
1586 error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
1587 dput(new_dentry);
1589 up(&nd.dentry->d_inode->i_sem);
1590 path_release(&nd);
1591 out:
1592 path_release(&old_nd);
1593 exit:
1594 putname(to);
1596 putname(from);
1598 return error;
1602 * The worst of all namespace operations - renaming directory. "Perverted"
1603 * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
1604 * Problems:
1605 * a) we can get into loop creation. Check is done in is_subdir().
1606 * b) race potential - two innocent renames can create a loop together.
1607 * That's where 4.4 screws up. Current fix: serialization on
1608 * sb->s_vfs_rename_sem. We might be more accurate, but that's another
1609 * story.
1610 * c) we have to lock _three_ objects - parents and victim (if it exists).
1611 * And that - after we got ->i_sem on parents (until then we don't know
1612 * whether the target exists at all, let alone whether it is a directory
1613 * or not). Solution: ->i_zombie. Taken only after ->i_sem. Always taken
1614 * on link creation/removal of any kind. And taken (without ->i_sem) on
1615 * directory that will be removed (both in rmdir() and here).
1616 * d) some filesystems don't support opened-but-unlinked directories,
1617 * either because of layout or because they are not ready to deal with
1618 * all cases correctly. The latter will be fixed (taking this sort of
1619 * stuff into VFS), but the former is not going away. Solution: the same
1620 * trick as in rmdir().
1621 * e) conversion from fhandle to dentry may come in the wrong moment - when
1622 * we are removing the target. Solution: we will have to grab ->i_zombie
1623 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
1624 * ->i_sem on parents, which works but leads to some truely excessive
1625 * locking].
1627 int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
1628 struct inode *new_dir, struct dentry *new_dentry)
1630 int error;
1631 struct inode *target;
1633 if (old_dentry->d_inode == new_dentry->d_inode)
1634 return 0;
1636 error = may_delete(old_dir, old_dentry, 1);
1637 if (error)
1638 return error;
1640 if (new_dir->i_dev != old_dir->i_dev)
1641 return -EXDEV;
1643 if (!new_dentry->d_inode)
1644 error = may_create(new_dir, new_dentry);
1645 else
1646 error = may_delete(new_dir, new_dentry, 1);
1647 if (error)
1648 return error;
1650 if (!old_dir->i_op || !old_dir->i_op->rename)
1651 return -EPERM;
1654 * If we are going to change the parent - check write permissions,
1655 * we'll need to flip '..'.
1657 if (new_dir != old_dir) {
1658 error = permission(old_dentry->d_inode, MAY_WRITE);
1660 if (error)
1661 return error;
1663 DQUOT_INIT(old_dir);
1664 DQUOT_INIT(new_dir);
1665 down(&old_dir->i_sb->s_vfs_rename_sem);
1666 error = -EINVAL;
1667 if (is_subdir(new_dentry, old_dentry))
1668 goto out_unlock;
1669 target = new_dentry->d_inode;
1670 if (target) { /* Hastur! Hastur! Hastur! */
1671 triple_down(&old_dir->i_zombie,
1672 &new_dir->i_zombie,
1673 &target->i_zombie);
1674 d_unhash(new_dentry);
1675 } else
1676 double_down(&old_dir->i_zombie,
1677 &new_dir->i_zombie);
1678 if (IS_DEADDIR(old_dir)||IS_DEADDIR(new_dir))
1679 error = -ENOENT;
1680 else if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
1681 error = -EBUSY;
1682 else
1683 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1684 if (target) {
1685 if (!error)
1686 target->i_flags |= S_DEAD;
1687 triple_up(&old_dir->i_zombie,
1688 &new_dir->i_zombie,
1689 &target->i_zombie);
1690 d_rehash(new_dentry);
1691 dput(new_dentry);
1692 } else
1693 double_up(&old_dir->i_zombie,
1694 &new_dir->i_zombie);
1696 if (!error)
1697 d_move(old_dentry,new_dentry);
1698 out_unlock:
1699 up(&old_dir->i_sb->s_vfs_rename_sem);
1700 return error;
1703 int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
1704 struct inode *new_dir, struct dentry *new_dentry)
1706 int error;
1708 if (old_dentry->d_inode == new_dentry->d_inode)
1709 return 0;
1711 error = may_delete(old_dir, old_dentry, 0);
1712 if (error)
1713 return error;
1715 if (new_dir->i_dev != old_dir->i_dev)
1716 return -EXDEV;
1718 if (!new_dentry->d_inode)
1719 error = may_create(new_dir, new_dentry);
1720 else
1721 error = may_delete(new_dir, new_dentry, 0);
1722 if (error)
1723 return error;
1725 if (!old_dir->i_op || !old_dir->i_op->rename)
1726 return -EPERM;
1728 DQUOT_INIT(old_dir);
1729 DQUOT_INIT(new_dir);
1730 double_down(&old_dir->i_zombie, &new_dir->i_zombie);
1731 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
1732 error = -EBUSY;
1733 else
1734 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1735 double_up(&old_dir->i_zombie, &new_dir->i_zombie);
1736 if (error)
1737 return error;
1738 /* The following d_move() should become unconditional */
1739 if (!(old_dir->i_sb->s_flags & MS_ODD_RENAME)) {
1740 d_move(old_dentry, new_dentry);
1742 return 0;
1745 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1746 struct inode *new_dir, struct dentry *new_dentry)
1748 if (S_ISDIR(old_dentry->d_inode->i_mode))
1749 return vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
1750 else
1751 return vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
1754 static inline int do_rename(const char * oldname, const char * newname)
1756 int error = 0;
1757 struct dentry * old_dir, * new_dir;
1758 struct dentry * old_dentry, *new_dentry;
1759 struct nameidata oldnd, newnd;
1761 if (path_init(oldname, LOOKUP_PARENT, &oldnd))
1762 error = path_walk(oldname, &oldnd);
1764 if (error)
1765 goto exit;
1767 if (path_init(newname, LOOKUP_PARENT, &newnd))
1768 error = path_walk(newname, &newnd);
1769 if (error)
1770 goto exit1;
1772 error = -EXDEV;
1773 if (oldnd.mnt != newnd.mnt)
1774 goto exit2;
1776 old_dir = oldnd.dentry;
1777 error = -EBUSY;
1778 if (oldnd.last_type != LAST_NORM)
1779 goto exit2;
1781 new_dir = newnd.dentry;
1782 if (newnd.last_type != LAST_NORM)
1783 goto exit2;
1785 double_lock(new_dir, old_dir);
1787 old_dentry = lookup_hash(&oldnd.last, old_dir);
1788 error = PTR_ERR(old_dentry);
1789 if (IS_ERR(old_dentry))
1790 goto exit3;
1791 /* source must exist */
1792 error = -ENOENT;
1793 if (!old_dentry->d_inode)
1794 goto exit4;
1795 /* unless the source is a directory trailing slashes give -ENOTDIR */
1796 if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
1797 error = -ENOTDIR;
1798 if (oldnd.last.name[oldnd.last.len])
1799 goto exit4;
1800 if (newnd.last.name[newnd.last.len])
1801 goto exit4;
1803 new_dentry = lookup_hash(&newnd.last, new_dir);
1804 error = PTR_ERR(new_dentry);
1805 if (IS_ERR(new_dentry))
1806 goto exit4;
1808 lock_kernel();
1809 error = vfs_rename(old_dir->d_inode, old_dentry,
1810 new_dir->d_inode, new_dentry);
1811 unlock_kernel();
1813 dput(new_dentry);
1814 exit4:
1815 dput(old_dentry);
1816 exit3:
1817 double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem);
1818 exit2:
1819 path_release(&newnd);
1820 exit1:
1821 path_release(&oldnd);
1822 exit:
1823 return error;
1826 asmlinkage long sys_rename(const char * oldname, const char * newname)
1828 int error;
1829 char * from;
1830 char * to;
1832 from = getname(oldname);
1833 if(IS_ERR(from))
1834 return PTR_ERR(from);
1835 to = getname(newname);
1836 error = PTR_ERR(to);
1837 if (!IS_ERR(to)) {
1838 error = do_rename(from,to);
1839 putname(to);
1841 putname(from);
1842 return error;
1845 int vfs_readlink(struct dentry *dentry, char *buffer, int buflen, const char *link)
1847 int len;
1849 len = PTR_ERR(link);
1850 if (IS_ERR(link))
1851 goto out;
1853 len = strlen(link);
1854 if (len > (unsigned) buflen)
1855 len = buflen;
1856 if (copy_to_user(buffer, link, len))
1857 len = -EFAULT;
1858 out:
1859 return len;
1862 static inline int
1863 __vfs_follow_link(struct nameidata *nd, const char *link)
1865 int res = 0;
1866 char *name;
1867 if (IS_ERR(link))
1868 goto fail;
1870 if (*link == '/') {
1871 path_release(nd);
1872 if (!walk_init_root(link, nd))
1873 /* weird __emul_prefix() stuff did it */
1874 goto out;
1876 res = path_walk(link, nd);
1877 out:
1878 if (current->link_count || res || nd->last_type!=LAST_NORM)
1879 return res;
1881 * If it is an iterative symlinks resolution in open_namei() we
1882 * have to copy the last component. And all that crap because of
1883 * bloody create() on broken symlinks. Furrfu...
1885 name = __getname();
1886 if (IS_ERR(name))
1887 goto fail_name;
1888 strcpy(name, nd->last.name);
1889 nd->last.name = name;
1890 return 0;
1891 fail_name:
1892 link = name;
1893 fail:
1894 path_release(nd);
1895 return PTR_ERR(link);
1898 int vfs_follow_link(struct nameidata *nd, const char *link)
1900 return __vfs_follow_link(nd, link);
1903 /* get the link contents into pagecache */
1904 static char *page_getlink(struct dentry * dentry, struct page **ppage)
1906 struct page * page;
1907 struct address_space *mapping = dentry->d_inode->i_mapping;
1908 page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage,
1909 NULL);
1910 if (IS_ERR(page))
1911 goto sync_fail;
1912 wait_on_page(page);
1913 if (!Page_Uptodate(page))
1914 goto async_fail;
1915 *ppage = page;
1916 return (char*) kmap(page);
1918 async_fail:
1919 page_cache_release(page);
1920 return ERR_PTR(-EIO);
1922 sync_fail:
1923 return (char*)page;
1926 int page_readlink(struct dentry *dentry, char *buffer, int buflen)
1928 struct page *page = NULL;
1929 char *s = page_getlink(dentry, &page);
1930 int res = vfs_readlink(dentry,buffer,buflen,s);
1931 if (page) {
1932 kunmap(page);
1933 page_cache_release(page);
1935 return res;
1938 int page_follow_link(struct dentry *dentry, struct nameidata *nd)
1940 struct page *page = NULL;
1941 char *s = page_getlink(dentry, &page);
1942 int res = __vfs_follow_link(nd, s);
1943 if (page) {
1944 kunmap(page);
1945 page_cache_release(page);
1947 return res;
1950 struct inode_operations page_symlink_inode_operations = {
1951 readlink: page_readlink,
1952 follow_link: page_follow_link,