fs/namei.c

   1 /*
   2  *  linux/fs/namei.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 /*
   8  * Some corrections by tytso.
   9  */
  10
  11 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
  12  * lookup logic.
  13  */
  14
  15 #include <linux/mm.h>
  16 #include <linux/proc_fs.h>
  17 #include <linux/smp_lock.h>
  18 #include <linux/quotaops.h>
  19 #include <linux/pagemap.h>
  20 #include <linux/dcache.h>
  21
  22 #include <asm/uaccess.h>
  23 #include <asm/unaligned.h>
  24 #include <asm/semaphore.h>
  25 #include <asm/page.h>
  26 #include <asm/pgtable.h>
  27
  28 #include <asm/namei.h>
  29
  30 /* This can be removed after the beta phase. */
  31 #define CACHE_SUPERVISE /* debug the correctness of dcache entries */
  32 #undef DEBUG            /* some other debugging */
  33
  34
  35 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
  36
  37 /* [Feb-1997 T. Schoebel-Theuer]
  38  * Fundamental changes in the pathname lookup mechanisms (namei)
  39  * were necessary because of omirr.  The reason is that omirr needs
  40  * to know the _real_ pathname, not the user-supplied one, in case
  41  * of symlinks (and also when transname replacements occur).
  42  *
  43  * The new code replaces the old recursive symlink resolution with
  44  * an iterative one (in case of non-nested symlink chains).  It does
  45  * this with calls to <fs>_follow_link().
  46  * As a side effect, dir_namei(), _namei() and follow_link() are now
  47  * replaced with a single function lookup_dentry() that can handle all
  48  * the special cases of the former code.
  49  *
  50  * With the new dcache, the pathname is stored at each inode, at least as
  51  * long as the refcount of the inode is positive.  As a side effect, the
  52  * size of the dcache depends on the inode cache and thus is dynamic.
  53  *
  54  * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
  55  * resolution to correspond with current state of the code.
  56  *
  57  * Note that the symlink resolution is not *completely* iterative.
  58  * There is still a significant amount of tail- and mid- recursion in
  59  * the algorithm.  Also, note that <fs>_readlink() is not used in
  60  * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
  61  * may return different results than <fs>_follow_link().  Many virtual
  62  * filesystems (including /proc) exhibit this behavior.
  63  */
  64
  65 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
  66  * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
  67  * and the name already exists in form of a symlink, try to create the new
  68  * name indicated by the symlink. The old code always complained that the
  69  * name already exists, due to not following the symlink even if its target
  70  * is nonexistent.  The new semantics affects also mknod() and link() when
  71  * the name is a symlink pointing to a non-existant name.
  72  *
  73  * I don't know which semantics is the right one, since I have no access
  74  * to standards. But I found by trial that HP-UX 9.0 has the full "new"
  75  * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
  76  * "old" one. Personally, I think the new semantics is much more logical.
  77  * Note that "ln old new" where "new" is a symlink pointing to a non-existing
  78  * file does succeed in both HP-UX and SunOs, but not in Solaris
  79  * and in the old Linux semantics.
  80  */
  81
  82 /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
  83  * semantics.  See the comments in "open_namei" and "do_link" below.
  84  *
  85  * [10-Sep-98 Alan Modra] Another symlink change.
  86  */
  87
  88 /* In order to reduce some races, while at the same time doing additional
  89  * checking and hopefully speeding things up, we copy filenames to the
  90  * kernel data space before using them..
  91  *
  92  * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
  93  */
  94 static inline int do_getname(const char *filename, char *page)
  95 {
  96         int retval;
  97         unsigned long len = PAGE_SIZE;
  98
  99         if ((unsigned long) filename >= TASK_SIZE) {
 100                 if (!segment_eq(get_fs(), KERNEL_DS))
 101                         return -EFAULT;
 102         } else if (TASK_SIZE - (unsigned long) filename < PAGE_SIZE)
 103                 len = TASK_SIZE - (unsigned long) filename;
 104
 105         retval = strncpy_from_user((char *)page, filename, len);
 106         if (retval > 0) {
 107                 if (retval < len)
 108                         return 0;
 109                 return -ENAMETOOLONG;
 110         } else if (!retval)
 111                 retval = -ENOENT;
 112         return retval;
 113 }
 114
 115 char * getname(const char * filename)
 116 {
 117         char *tmp, *result;
 118
 119         result = ERR_PTR(-ENOMEM);
 120         tmp = __getname();
 121         if (tmp)  {
 122                 int retval = do_getname(filename, tmp);
 123
 124                 result = tmp;
 125                 if (retval < 0) {
 126                         putname(tmp);
 127                         result = ERR_PTR(retval);
 128                 }
 129         }
 130         return result;
 131 }
 132
 133 /*
 134  *      permission()
 135  *
 136  * is used to check for read/write/execute permissions on a file.
 137  * We use "fsuid" for this, letting us set arbitrary permissions
 138  * for filesystem access without changing the "normal" uids which
 139  * are used for other things..
 140  */
 141 int permission(struct inode * inode,int mask)
 142 {
 143         int mode = inode->i_mode;
 144
 145         if (inode->i_op && inode->i_op->permission)
 146                 return inode->i_op->permission(inode, mask);
 147         else if ((mask & S_IWOTH) && IS_RDONLY(inode) &&
 148                  (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
 149                 return -EROFS; /* Nobody gets write access to a read-only fs */
 150         else if ((mask & S_IWOTH) && IS_IMMUTABLE(inode))
 151                 return -EACCES; /* Nobody gets write access to an immutable file */
 152         else if (current->fsuid == inode->i_uid)
 153                 mode >>= 6;
 154         else if (in_group_p(inode->i_gid))
 155                 mode >>= 3;
 156         if (((mode & mask & S_IRWXO) == mask) || capable(CAP_DAC_OVERRIDE))
 157                 return 0;
 158         /* read and search access */
 159         if ((mask == S_IROTH) ||
 160             (S_ISDIR(mode)  && !(mask & ~(S_IROTH | S_IXOTH))))
 161                 if (capable(CAP_DAC_READ_SEARCH))
 162                         return 0;
 163         return -EACCES;
 164 }
 165
 166 /*
 167  * get_write_access() gets write permission for a file.
 168  * put_write_access() releases this write permission.
 169  * This is used for regular files.
 170  * We cannot support write (and maybe mmap read-write shared) accesses and
 171  * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
 172  * can have the following values:
 173  * 0: no writers, no VM_DENYWRITE mappings
 174  * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
 175  * > 0: (i_writecount) users are writing to the file.
 176  *
 177  * WARNING: as soon as we will move get_write_access(), do_mmap() or
 178  * prepare_binfmt() out of the big lock we will need a spinlock protecting
 179  * the checks in all 3. For the time being it is not needed.
 180  */
 181 int get_write_access(struct inode * inode)
 182 {
 183         if (atomic_read(&inode->i_writecount) < 0)
 184                 return -ETXTBSY;
 185         atomic_inc(&inode->i_writecount);
 186         return 0;
 187 }
 188
 189 void put_write_access(struct inode * inode)
 190 {
 191         atomic_dec(&inode->i_writecount);
 192 }
 193
 194 /*
 195  * Internal lookup() using the new generic dcache.
 196  */
 197 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
 198 {
 199         struct dentry * dentry = d_lookup(parent, name);
 200
 201         if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
 202                 if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
 203                         dput(dentry);
 204                         dentry = NULL;
 205                 }
 206         }
 207         return dentry;
 208 }
 209
 210 /*
 211  * This is called when everything else fails, and we actually have
 212  * to go to the low-level filesystem to find out what we should do..
 213  *
 214  * We get the directory semaphore, and after getting that we also
 215  * make sure that nobody added the entry to the dcache in the meantime..
 216  */
 217 static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
 218 {
 219         struct dentry * result;
 220         struct inode *dir = parent->d_inode;
 221
 222         down(&dir->i_sem);
 223         /*
 224          * First re-do the cached lookup just in case it was created
 225          * while we waited for the directory semaphore..
 226          *
 227          * FIXME! This could use version numbering or similar to
 228          * avoid unnecessary cache lookups.
 229          */
 230         result = d_lookup(parent, name);
 231         if (!result) {
 232                 struct dentry * dentry = d_alloc(parent, name);
 233                 result = ERR_PTR(-ENOMEM);
 234                 if (dentry) {
 235                         result = dir->i_op->lookup(dir, dentry);
 236                         if (result)
 237                                 dput(dentry);
 238                         else
 239                                 result = dentry;
 240                 }
 241                 up(&dir->i_sem);
 242                 return result;
 243         }
 244
 245         /*
 246          * Uhhuh! Nasty case: the cache was re-populated while
 247          * we waited on the semaphore. Need to revalidate, but
 248          * we're going to return this entry regardless (same
 249          * as if it was busy).
 250          */
 251         up(&dir->i_sem);
 252         if (result->d_op && result->d_op->d_revalidate)
 253                 result->d_op->d_revalidate(result, flags);
 254         return result;
 255 }
 256
 257 static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
 258 {
 259         int err;
 260         if (current->link_count >= 32)
 261                 goto loop;
 262         current->link_count++;
 263         UPDATE_ATIME(dentry->d_inode);
 264         err = dentry->d_inode->i_op->follow_link(dentry, nd);
 265         current->link_count--;
 266         return err;
 267 loop:
 268         dput(nd->dentry);
 269         mntput(nd->mnt);
 270         return -ELOOP;
 271 }
 272
 273 static inline int follow_down(struct dentry ** dentry, struct vfsmount **mnt)
 274 {
 275         struct dentry * parent = dget((*dentry)->d_mounts);
 276         dput(*dentry);
 277         *dentry = parent;
 278         return 1;
 279 }
 280
 281 /*
 282  * Name resolution.
 283  *
 284  * This is the basic name resolution function, turning a pathname
 285  * into the final dentry.
 286  *
 287  * We expect 'base' to be positive and a directory.
 288  */
 289 int walk_name(const char * name, unsigned lookup_flags, struct nameidata *nd)
 290 {
 291         struct dentry *dentry;
 292         struct inode *inode;
 293         int err;
 294
 295         while (*name=='/')
 296                 name++;
 297         if (!*name)
 298                 goto return_base;
 299
 300         inode = nd->dentry->d_inode;
 301         if (current->link_count)
 302                 lookup_flags = LOOKUP_FOLLOW;
 303
 304         lookup_flags &= LOOKUP_FOLLOW | LOOKUP_DIRECTORY |
 305                         LOOKUP_SLASHOK | LOOKUP_POSITIVE | LOOKUP_PARENT;
 306
 307         /* At this point we know we have a real path component. */
 308         for(;;) {
 309                 unsigned long hash;
 310                 struct qstr this;
 311                 unsigned int c;
 312
 313                 err = permission(inode, MAY_EXEC);
 314                 dentry = ERR_PTR(err);
 315                 if (err)
 316                         break;
 317
 318                 this.name = name;
 319                 c = *(const unsigned char *)name;
 320
 321                 hash = init_name_hash();
 322                 do {
 323                         name++;
 324                         hash = partial_name_hash(c, hash);
 325                         c = *(const unsigned char *)name;
 326                 } while (c && (c != '/'));
 327                 this.len = name - (const char *) this.name;
 328                 this.hash = end_name_hash(hash);
 329
 330                 /* remove trailing slashes? */
 331                 if (!c)
 332                         goto last_component;
 333                 while (*++name == '/');
 334                 if (!*name)
 335                         goto last_with_slashes;
 336
 337                 /*
 338                  * "." and ".." are special - ".." especially so because it has
 339                  * to be able to know about the current root directory and
 340                  * parent relationships.
 341                  */
 342                 if (this.name[0] == '.') switch (this.len) {
 343                         default:
 344                                 break;
 345                         case 2:
 346                                 if (this.name[1] != '.')
 347                                         break;
 348                                 if (nd->dentry != current->fs->root) {
 349                                         dentry = dget(nd->dentry->d_covers->d_parent);
 350                                         dput(nd->dentry);
 351                                         nd->dentry = dentry;
 352                                         inode = dentry->d_inode;
 353                                 }
 354                                 /* fallthrough */
 355                         case 1:
 356                                 continue;
 357                 }
 358                 /*
 359                  * See if the low-level filesystem might want
 360                  * to use its own hash..
 361                  */
 362                 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
 363                         err = nd->dentry->d_op->d_hash(nd->dentry, &this);
 364                         if (err < 0)
 365                                 break;
 366                 }
 367                 /* This does the actual lookups.. */
 368                 dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
 369                 if (!dentry) {
 370                         dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
 371                         err = PTR_ERR(dentry);
 372                         if (IS_ERR(dentry))
 373                                 break;
 374                 }
 375                 /* Check mountpoints.. */
 376                 while (d_mountpoint(dentry) && follow_down(&dentry, &nd->mnt))
 377                         ;
 378
 379                 err = -ENOENT;
 380                 inode = dentry->d_inode;
 381                 if (!inode)
 382                         break;
 383                 err = -ENOTDIR;
 384                 if (!inode->i_op)
 385                         break;
 386
 387                 if (inode->i_op->follow_link) {
 388                         err = do_follow_link(dentry, nd);
 389                         dput(dentry);
 390                         if (err)
 391                                 goto return_err;
 392                         err = -ENOENT;
 393                         inode = nd->dentry->d_inode;
 394                         if (!inode)
 395                                 break;
 396                         err = -ENOTDIR;
 397                         if (!inode->i_op)
 398                                 break;
 399                 } else {
 400                         dput(nd->dentry);
 401                         nd->dentry = dentry;
 402                 }
 403                 err = -ENOTDIR;
 404                 if (!inode->i_op->lookup)
 405                         break;
 406                 continue;
 407                 /* here ends the main loop */
 408
 409 last_with_slashes:
 410                 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
 411 last_component:
 412                 if (lookup_flags & LOOKUP_PARENT)
 413                         goto lookup_parent;
 414                 if (this.name[0] == '.') switch (this.len) {
 415                         default:
 416                                 break;
 417                         case 2:
 418                                 if (this.name[1] != '.')
 419                                         break;
 420                                 if (nd->dentry != current->fs->root) {
 421                                         dentry = dget(nd->dentry->d_covers->d_parent);
 422                                         dput(nd->dentry);
 423                                         nd->dentry = dentry;
 424                                         inode = dentry->d_inode;
 425                                 }
 426                                 /* fallthrough */
 427                         case 1:
 428                                 goto return_base;
 429                 }
 430                 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
 431                         err = nd->dentry->d_op->d_hash(nd->dentry, &this);
 432                         if (err < 0)
 433                                 break;
 434                 }
 435                 dentry = cached_lookup(nd->dentry, &this, 0);
 436                 if (!dentry) {
 437                         dentry = real_lookup(nd->dentry, &this, 0);
 438                         err = PTR_ERR(dentry);
 439                         if (IS_ERR(dentry))
 440                                 break;
 441                 }
 442                 while (d_mountpoint(dentry) && follow_down(&dentry, &nd->mnt))
 443                         ;
 444                 inode = dentry->d_inode;
 445                 if ((lookup_flags & LOOKUP_FOLLOW)
 446                     && inode && inode->i_op && inode->i_op->follow_link) {
 447                         err = do_follow_link(dentry, nd);
 448                         dput(dentry);
 449                         if (err)
 450                                 goto return_err;
 451                         inode = nd->dentry->d_inode;
 452                 } else {
 453                         dput(nd->dentry);
 454                         nd->dentry = dentry;
 455                 }
 456                 err = -ENOENT;
 457                 if (!inode)
 458                         goto no_inode;
 459                 if (lookup_flags & LOOKUP_DIRECTORY) {
 460                         err = -ENOTDIR;
 461                         if (!inode->i_op || !inode->i_op->lookup)
 462                                 break;
 463                 }
 464                 goto return_base;
 465 no_inode:
 466                 err = -ENOENT;
 467                 if (lookup_flags & LOOKUP_POSITIVE)
 468                         break;
 469                 if (lookup_flags & LOOKUP_DIRECTORY)
 470                         if (!(lookup_flags & LOOKUP_SLASHOK))
 471                                 break;
 472                 goto return_base;
 473 lookup_parent:
 474                 nd->last = this;
 475 return_base:
 476                 return 0;
 477         }
 478         dput(nd->dentry);
 479         mntput(nd->mnt);
 480 return_err:
 481         return err;
 482 }
 483
 484 /* returns 1 if everything is done */
 485 static int __emul_lookup_dentry(const char *name, int lookup_flags,
 486                 struct nameidata *nd)
 487 {
 488         char *emul = __emul_prefix();
 489
 490         if (!emul)
 491                 return 0;
 492
 493         nd->mnt = mntget(current->fs->rootmnt);
 494         nd->dentry = dget(current->fs->root);
 495         if (walk_name(emul,LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_POSITIVE,nd))
 496                 return 0;
 497         if (walk_name(name, lookup_flags, nd))
 498                 return 0;
 499
 500         if (!nd->dentry->d_inode) {
 501                 struct nameidata nd_root;
 502                 nd_root.last.len = 0;
 503                 nd_root.mnt = mntget(current->fs->rootmnt);
 504                 nd_root.dentry = dget(current->fs->root);
 505                 if (walk_name(name, lookup_flags, &nd_root))
 506                         return 1;
 507                 if (nd_root.dentry->d_inode) {
 508                         dput(nd->dentry);
 509                         mntput(nd->mnt);
 510                         nd->dentry = nd_root.dentry;
 511                         nd->mnt = nd_root.mnt;
 512                         nd->last = nd_root.last;
 513                         return 1;
 514                 }
 515                 dput(nd_root.dentry);
 516                 mntput(nd_root.mnt);
 517         }
 518         return 1;
 519 }
 520
 521 static inline int
 522 walk_init_root(const char *name, unsigned flags, struct nameidata *nd)
 523 {
 524         if (current->personality != PER_LINUX)
 525                 if (__emul_lookup_dentry(name,flags,nd));
 526                         return 0;
 527         nd->mnt = mntget(current->fs->rootmnt);
 528         nd->dentry = dget(current->fs->root);
 529         return 1;
 530 }
 531
 532 int walk_init(const char *name,unsigned int flags,struct nameidata *nd)
 533 {
 534         nd->last.len = 0;
 535         if (*name=='/')
 536                 return walk_init_root(name,flags,nd);
 537         nd->mnt = mntget(current->fs->pwdmnt);
 538         nd->dentry = dget(current->fs->pwd);
 539         return 1;
 540 }
 541
 542 struct dentry * lookup_dentry(const char * name, unsigned int lookup_flags)
 543 {
 544         struct nameidata nd;
 545         int err = 0;
 546
 547         if (walk_init(name, lookup_flags, &nd))
 548                 err = walk_name(name, lookup_flags, &nd);
 549         if (!err) {
 550                 mntput(nd.mnt);
 551                 return nd.dentry;
 552         }
 553         return ERR_PTR(err);
 554 }
 555
 556 /*
 557  * Restricted form of lookup. Doesn't follow links, single-component only,
 558  * needs parent already locked. Doesn't follow mounts.
 559  */
 560 static inline struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
 561 {
 562         struct dentry * dentry;
 563         struct inode *inode;
 564         int err;
 565
 566         inode = base->d_inode;
 567         err = permission(inode, MAY_EXEC);
 568         dentry = ERR_PTR(err);
 569         if (err)
 570                 goto out;
 571
 572         /*
 573          * See if the low-level filesystem might want
 574          * to use its own hash..
 575          */
 576         if (base->d_op && base->d_op->d_hash) {
 577                 err = base->d_op->d_hash(base, name);
 578                 dentry = ERR_PTR(err);
 579                 if (err < 0)
 580                         goto out;
 581         }
 582
 583         dentry = cached_lookup(base, name, 0);
 584         if (!dentry) {
 585                 struct dentry *new = d_alloc(base, name);
 586                 dentry = ERR_PTR(-ENOMEM);
 587                 if (!new)
 588                         goto out;
 589                 dentry = inode->i_op->lookup(inode, new);
 590                 if (!dentry)
 591                         dentry = new;
 592                 else {
 593                         dput(new);
 594                         if (IS_ERR(dentry))
 595                                 goto out;
 596                 }
 597         }
 598
 599 out:
 600         dput(base);
 601         return dentry;
 602 }
 603
 604 struct dentry * lookup_one(const char * name, struct dentry * base)
 605 {
 606         unsigned long hash;
 607         struct qstr this;
 608         unsigned int c;
 609
 610         this.name = name;
 611         c = *(const unsigned char *)name;
 612         if (!c)
 613                 goto access;
 614
 615         hash = init_name_hash();
 616         do {
 617                 name++;
 618                 if (c == '/')
 619                         goto access;
 620                 hash = partial_name_hash(c, hash);
 621                 c = *(const unsigned char *)name;
 622         } while (c);
 623         this.len = name - (const char *) this.name;
 624         this.hash = end_name_hash(hash);
 625
 626         return lookup_hash(&this, base);
 627 access:
 628         return ERR_PTR(-EACCES);
 629 }
 630
 631 /*
 632  *      namei()
 633  *
 634  * is used by most simple commands to get the inode of a specified name.
 635  * Open, link etc use their own routines, but this is enough for things
 636  * like 'chmod' etc.
 637  *
 638  * namei exists in two versions: namei/lnamei. The only difference is
 639  * that namei follows links, while lnamei does not.
 640  */
 641 struct dentry * __namei(const char *pathname, unsigned int lookup_flags)
 642 {
 643         char *name;
 644         struct dentry *dentry;
 645
 646         name = getname(pathname);
 647         dentry = (struct dentry *) name;
 648         if (!IS_ERR(name)) {
 649                 dentry = lookup_dentry(name,lookup_flags|LOOKUP_POSITIVE);
 650                 putname(name);
 651         }
 652         return dentry;
 653 }
 654
 655 /*
 656  * It's inline, so penalty for filesystems that don't use sticky bit is
 657  * minimal.
 658  */
 659 static inline int check_sticky(struct inode *dir, struct inode *inode)
 660 {
 661         if (!(dir->i_mode & S_ISVTX))
 662                 return 0;
 663         if (inode->i_uid == current->fsuid)
 664                 return 0;
 665         if (dir->i_uid == current->fsuid)
 666                 return 0;
 667         return !capable(CAP_FOWNER);
 668 }
 669
 670 /*
 671  *      Check whether we can remove a link victim from directory dir, check
 672  *  whether the type of victim is right.
 673  *  1. We can't do it if dir is read-only (done in permission())
 674  *  2. We should have write and exec permissions on dir
 675  *  3. We can't remove anything from append-only dir
 676  *  4. We can't do anything with immutable dir (done in permission())
 677  *  5. If the sticky bit on dir is set we should either
 678  *      a. be owner of dir, or
 679  *      b. be owner of victim, or
 680  *      c. have CAP_FOWNER capability
 681  *  6. If the victim is append-only or immutable we can't do antyhing with
 682  *     links pointing to it.
 683  *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
 684  *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
 685  *  9. We can't remove a root or mountpoint.
 686  */
 687 static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
 688 {
 689         int error;
 690         if (!victim->d_inode || victim->d_parent->d_inode != dir)
 691                 return -ENOENT;
 692         error = permission(dir,MAY_WRITE | MAY_EXEC);
 693         if (error)
 694                 return error;
 695         if (IS_APPEND(dir))
 696                 return -EPERM;
 697         if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
 698             IS_IMMUTABLE(victim->d_inode))
 699                 return -EPERM;
 700         if (isdir) {
 701                 if (!S_ISDIR(victim->d_inode->i_mode))
 702                         return -ENOTDIR;
 703                 if (IS_ROOT(victim))
 704                         return -EBUSY;
 705                 if (d_mountpoint(victim))
 706                         return -EBUSY;
 707         } else if (S_ISDIR(victim->d_inode->i_mode))
 708                 return -EISDIR;
 709         return 0;
 710 }
 711
 712 /*      Check whether we can create an object with dentry child in directory
 713  *  dir.
 714  *  1. We can't do it if child already exists (open has special treatment for
 715  *     this case, but since we are inlined it's OK)
 716  *  2. We can't do it if dir is read-only (done in permission())
 717  *  3. We should have write and exec permissions on dir
 718  *  4. We can't do it if dir is immutable (done in permission())
 719  */
 720 static inline int may_create(struct inode *dir, struct dentry *child) {
 721         if (child->d_inode)
 722                 return -EEXIST;
 723         return permission(dir,MAY_WRITE | MAY_EXEC);
 724 }
 725
 726 /*
 727  * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security
 728  * reasons.
 729  *
 730  * O_DIRECTORY translates into forcing a directory lookup.
 731  */
 732 static inline int lookup_flags(unsigned int f)
 733 {
 734         unsigned long retval = LOOKUP_FOLLOW;
 735
 736         if (f & O_NOFOLLOW)
 737                 retval &= ~LOOKUP_FOLLOW;
 738
 739         if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
 740                 retval &= ~LOOKUP_FOLLOW;
 741
 742         if (f & O_DIRECTORY)
 743                 retval |= LOOKUP_DIRECTORY;
 744
 745         return retval;
 746 }
 747
 748 int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
 749 {
 750         int error;
 751
 752         mode &= S_IALLUGO & ~current->fs->umask;
 753         mode |= S_IFREG;
 754
 755         down(&dir->i_zombie);
 756         error = may_create(dir, dentry);
 757         if (error)
 758                 goto exit_lock;
 759
 760         error = -EACCES;        /* shouldn't it be ENOSYS? */
 761         if (!dir->i_op || !dir->i_op->create)
 762                 goto exit_lock;
 763
 764         DQUOT_INIT(dir);
 765         error = dir->i_op->create(dir, dentry, mode);
 766 exit_lock:
 767         up(&dir->i_zombie);
 768         return error;
 769 }
 770
 771 /*
 772  *      open_namei()
 773  *
 774  * namei for open - this is in fact almost the whole open-routine.
 775  *
 776  * Note that the low bits of "flag" aren't the same as in the open
 777  * system call - they are 00 - no permissions needed
 778  *                        01 - read permission needed
 779  *                        10 - write permission needed
 780  *                        11 - read/write permissions needed
 781  * which is a lot more logical, and also allows the "no perm" needed
 782  * for symlinks (where the permissions are checked later).
 783  */
 784 int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
 785 {
 786         int acc_mode, error = 0;
 787         struct inode *inode;
 788         struct dentry *dentry;
 789
 790         acc_mode = ACC_MODE(flag);
 791         if (!(flag & O_CREAT)) {
 792                 if (walk_init(pathname, lookup_flags(flag), nd))
 793                         error = walk_name(pathname, lookup_flags(flag), nd);
 794                 if (error)
 795                         return error;
 796
 797                 dentry = nd->dentry;
 798         } else {
 799                 struct dentry *dir;
 800
 801                 if (walk_init(pathname, LOOKUP_PARENT, nd))
 802                         error = walk_name(pathname, LOOKUP_PARENT, nd);
 803                 if (error)
 804                         return error;
 805                 /*
 806                  * It's not obvious that open(".", O_CREAT, foo) should
 807                  * fail, but it's even less obvious that it should succeed.
 808                  * Since O_CREAT means an intention to create the thing and
 809                  * open(2) had never created directories, count it as caller's
 810                  * luserdom and let him sod off - -EISDIR it is.
 811                  */
 812                 error = -EISDIR;
 813                 if (!nd->last.len || (nd->last.name[0] == '.' &&
 814                      (nd->last.len == 1 ||
 815                       (nd->last.name[1] == '.' && nd->last.len == 2))))
 816                         goto exit;
 817                 /* same for foo/ */
 818                 if (nd->last.name[nd->last.len])
 819                         goto exit;
 820
 821                 dir = dget(nd->dentry);
 822                 down(&dir->d_inode->i_sem);
 823
 824                 dentry = lookup_hash(&nd->last, dget(nd->dentry));
 825                 error = PTR_ERR(dentry);
 826                 if (IS_ERR(dentry)) {
 827                         up(&dir->d_inode->i_sem);
 828                         dput(dir);
 829                         goto exit;
 830                 }
 831
 832                 if (dentry->d_inode) {
 833                         up(&dir->d_inode->i_sem);
 834                         dput(dir);
 835                         error = -EEXIST;
 836                         if (flag & O_EXCL)
 837                                 goto exit;
 838                         if (dentry->d_inode->i_op &&
 839                             dentry->d_inode->i_op->follow_link) {
 840                                 /*
 841                                  * With O_EXCL it would be -EEXIST.
 842                                  * If symlink is a dangling one it's -ENOENT.
 843                                  * Otherwise we open the object it points to.
 844                                  */
 845                                 error = do_follow_link(dentry, nd);
 846                                 dput(dentry);
 847                                 if (error)
 848                                         return error;
 849                                 dentry = nd->dentry;
 850                         } else {
 851                                 dput(nd->dentry);
 852                                 nd->dentry = dentry;
 853                         }
 854                         error = -EISDIR;
 855                         if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
 856                                 goto exit;
 857                 } else {
 858                         error = vfs_create(dir->d_inode, dentry, mode);
 859                         /* Don't check for write permission, don't truncate */
 860                         acc_mode = 0;
 861                         flag &= ~O_TRUNC;
 862                         dput(nd->dentry);
 863                         nd->dentry = dentry;
 864                         unlock_dir(dir);
 865                         if (error)
 866                                 goto exit;
 867                 }
 868         }
 869
 870         error = -ENOENT;
 871         inode = dentry->d_inode;
 872         if (!inode)
 873                 goto exit;
 874
 875         error = -ELOOP;
 876         if (S_ISLNK(inode->i_mode))
 877                 goto exit;
 878
 879         error = -EISDIR;
 880         if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
 881                 goto exit;
 882
 883         error = permission(inode,acc_mode);
 884         if (error)
 885                 goto exit;
 886
 887         /*
 888          * FIFO's, sockets and device files are special: they don't
 889          * actually live on the filesystem itself, and as such you
 890          * can write to them even if the filesystem is read-only.
 891          */
 892         if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
 893                 flag &= ~O_TRUNC;
 894         } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
 895                 error = -EACCES;
 896                 if (IS_NODEV(inode))
 897                         goto exit;
 898
 899                 flag &= ~O_TRUNC;
 900         } else {
 901                 error = -EROFS;
 902                 if (IS_RDONLY(inode) && (flag & 2))
 903                         goto exit;
 904         }
 905         /*
 906          * An append-only file must be opened in append mode for writing.
 907          */
 908         error = -EPERM;
 909         if (IS_APPEND(inode)) {
 910                 if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))
 911                         goto exit;
 912                 if (flag & O_TRUNC)
 913                         goto exit;
 914         }
 915
 916         if (flag & O_TRUNC) {
 917                 error = get_write_access(inode);
 918                 if (error)
 919                         goto exit;
 920
 921                 /*
 922                  * Refuse to truncate files with mandatory locks held on them.
 923                  */
 924                 error = locks_verify_locked(inode);
 925                 if (!error) {
 926                         DQUOT_INIT(inode);
 927
 928                         error = do_truncate(dentry, 0);
 929                 }
 930                 put_write_access(inode);
 931                 if (error)
 932                         goto exit;
 933         } else
 934                 if (flag & FMODE_WRITE)
 935                         DQUOT_INIT(inode);
 936
 937         return 0;
 938
 939 exit:
 940         dput(nd->dentry);
 941         mntput(nd->mnt);
 942         return error;
 943 }
 944
 945 static struct dentry *lookup_create(const char *name, int is_dir)
 946 {
 947         struct nameidata nd;
 948         struct dentry *dentry;
 949         int err = 0;
 950         if (walk_init(name, LOOKUP_PARENT, &nd))
 951                 err = walk_name(name, LOOKUP_PARENT, &nd);
 952         dentry = ERR_PTR(err);
 953         if (err)
 954                 goto out;
 955         down(&nd.dentry->d_inode->i_sem);
 956         dentry = ERR_PTR(-EEXIST);
 957         if (!nd.last.len || (nd.last.name[0] == '.' &&
 958               (nd.last.len == 1 || (nd.last.name[1] == '.' && nd.last.len == 2))))
 959                 goto fail;
 960         dentry = lookup_hash(&nd.last, dget(nd.dentry));
 961         if (IS_ERR(dentry))
 962                 goto fail;
 963         if (!is_dir && nd.last.name[nd.last.len] && !dentry->d_inode)
 964                 goto enoent;
 965 out_dput:
 966         dput(nd.dentry);
 967         mntput(nd.mnt);
 968 out:
 969         return dentry;
 970 enoent:
 971         dput(dentry);
 972         dentry = ERR_PTR(-ENOENT);
 973 fail:
 974         up(&nd.dentry->d_inode->i_sem);
 975         goto out_dput;
 976 }
 977
 978 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 979 {
 980         int error = -EPERM;
 981
 982         mode &= ~current->fs->umask;
 983
 984         down(&dir->i_zombie);
 985         if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
 986                 goto exit_lock;
 987
 988         error = may_create(dir, dentry);
 989         if (error)
 990                 goto exit_lock;
 991
 992         error = -EPERM;
 993         if (!dir->i_op || !dir->i_op->mknod)
 994                 goto exit_lock;
 995
 996         DQUOT_INIT(dir);
 997         error = dir->i_op->mknod(dir, dentry, mode, dev);
 998 exit_lock:
 999         up(&dir->i_zombie);
1000         return error;
1001 }
1002
1003 struct dentry * do_mknod(const char * filename, int mode, dev_t dev)
1004 {
1005         int error;
1006         struct dentry *dir;
1007         struct dentry *dentry, *retval;
1008
1009         dentry = lookup_create(filename, 0);
1010         if (IS_ERR(dentry))
1011                 return dentry;
1012
1013         dir = dget(dentry->d_parent);
1014
1015         error = vfs_mknod(dir->d_inode, dentry, mode, dev);
1016
1017         retval = ERR_PTR(error);
1018         if (!error)
1019                 retval = dget(dentry);
1020         unlock_dir(dir);
1021         dput(dentry);
1022         return retval;
1023 }
1024
1025 asmlinkage long sys_mknod(const char * filename, int mode, dev_t dev)
1026 {
1027         int error;
1028         char * tmp;
1029         struct dentry * dentry, *dir;
1030
1031         if (S_ISDIR(mode))
1032                 return -EPERM;
1033         tmp = getname(filename);
1034         if (IS_ERR(tmp))
1035                 return PTR_ERR(tmp);
1036
1037         lock_kernel();
1038         dentry = lookup_create(tmp, 0);
1039         error = PTR_ERR(dentry);
1040         if (IS_ERR(dentry))
1041                 goto out;
1042         dir = dget(dentry->d_parent);
1043         switch (mode & S_IFMT) {
1044         case 0: case S_IFREG:
1045                 error = vfs_create(dir->d_inode, dentry, mode);
1046                 break;
1047         case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
1048                 error = vfs_mknod(dir->d_inode, dentry, mode, dev);
1049                 break;
1050         case S_IFDIR:
1051                 error = -EPERM;
1052                 break;
1053         default:
1054                 error = -EINVAL;
1055         }
1056         unlock_dir(dir);
1057         dput(dentry);
1058 out:
1059         unlock_kernel();
1060         putname(tmp);
1061
1062         return error;
1063 }
1064
1065 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1066 {
1067         int error;
1068
1069         down(&dir->i_zombie);
1070         error = may_create(dir, dentry);
1071         if (error)
1072                 goto exit_lock;
1073
1074         error = -EPERM;
1075         if (!dir->i_op || !dir->i_op->mkdir)
1076                 goto exit_lock;
1077
1078         DQUOT_INIT(dir);
1079         mode &= (S_IRWXUGO|S_ISVTX) & ~current->fs->umask;
1080         error = dir->i_op->mkdir(dir, dentry, mode);
1081
1082 exit_lock:
1083         up(&dir->i_zombie);
1084         return error;
1085 }
1086
1087 asmlinkage long sys_mkdir(const char * pathname, int mode)
1088 {
1089         int error;
1090         char * tmp;
1091
1092         tmp = getname(pathname);
1093         error = PTR_ERR(tmp);
1094         if (!IS_ERR(tmp)) {
1095                 struct dentry *dir;
1096                 struct dentry *dentry;
1097
1098                 lock_kernel();
1099                 dentry = lookup_create(tmp, 1);
1100                 error = PTR_ERR(dentry);
1101                 if (!IS_ERR(dentry)) {
1102                         dir = dget(dentry->d_parent);
1103                         error = vfs_mkdir(dir->d_inode, dentry, mode);
1104                         unlock_dir(dir);
1105                         dput(dentry);
1106                 }
1107                 unlock_kernel();
1108         }
1109         putname(tmp);
1110
1111         return error;
1112 }
1113
1114 /*
1115  * We try to drop the dentry early: we should have
1116  * a usage count of 2 if we're the only user of this
1117  * dentry, and if that is true (possibly after pruning
1118  * the dcache), then we drop the dentry now.
1119  *
1120  * A low-level filesystem can, if it choses, legally
1121  * do a
1122  *
1123  *      if (!d_unhashed(dentry))
1124  *              return -EBUSY;
1125  *
1126  * if it cannot handle the case of removing a directory
1127  * that is still in use by something else..
1128  */
1129 static void d_unhash(struct dentry *dentry)
1130 {
1131         dget(dentry);
1132         switch (dentry->d_count) {
1133         default:
1134                 shrink_dcache_parent(dentry);
1135                 if (dentry->d_count != 2)
1136                         break;
1137         case 2:
1138                 d_drop(dentry);
1139         }
1140 }
1141
1142 int vfs_rmdir(struct inode *dir, struct dentry *dentry)
1143 {
1144         int error;
1145
1146         error = may_delete(dir, dentry, 1);
1147         if (error)
1148                 return error;
1149
1150         if (!dir->i_op || !dir->i_op->rmdir)
1151                 return -EPERM;
1152
1153         DQUOT_INIT(dir);
1154
1155         double_down(&dir->i_zombie, &dentry->d_inode->i_zombie);
1156         d_unhash(dentry);
1157         error = dir->i_op->rmdir(dir, dentry);
1158         double_up(&dir->i_zombie, &dentry->d_inode->i_zombie);
1159         dput(dentry);
1160
1161         return error;
1162 }
1163
1164 static inline int do_rmdir(const char * name)
1165 {
1166         int error;
1167         struct dentry *dir;
1168         struct dentry *dentry;
1169
1170         dentry = lookup_dentry(name, LOOKUP_POSITIVE);
1171         error = PTR_ERR(dentry);
1172         if (IS_ERR(dentry))
1173                 goto exit;
1174
1175         dir = lock_parent(dentry);
1176         error = -ENOENT;
1177         if (check_parent(dir, dentry))
1178                 error = vfs_rmdir(dir->d_inode, dentry);
1179         unlock_dir(dir);
1180         dput(dentry);
1181 exit:
1182         return error;
1183 }
1184
1185 asmlinkage long sys_rmdir(const char * pathname)
1186 {
1187         int error;
1188         char * tmp;
1189
1190         tmp = getname(pathname);
1191         if(IS_ERR(tmp))
1192                 return PTR_ERR(tmp);
1193         lock_kernel();
1194         error = do_rmdir(tmp);
1195         unlock_kernel();
1196
1197         putname(tmp);
1198
1199         return error;
1200 }
1201
1202 int vfs_unlink(struct inode *dir, struct dentry *dentry)
1203 {
1204         int error;
1205
1206         down(&dir->i_zombie);
1207         error = may_delete(dir, dentry, 0);
1208         if (!error) {
1209                 error = -EPERM;
1210                 if (dir->i_op && dir->i_op->unlink) {
1211                         DQUOT_INIT(dir);
1212                         error = dir->i_op->unlink(dir, dentry);
1213                 }
1214         }
1215         up(&dir->i_zombie);
1216         return error;
1217 }
1218
1219 static int do_unlink(const char * name)
1220 {
1221         int error;
1222         struct dentry *dir;
1223         struct dentry *dentry;
1224
1225         dentry = lookup_dentry(name, LOOKUP_POSITIVE);
1226         error = PTR_ERR(dentry);
1227         if (IS_ERR(dentry))
1228                 goto exit;
1229
1230         dir = lock_parent(dentry);
1231         error = -ENOENT;
1232         if (check_parent(dir, dentry))
1233                 error = vfs_unlink(dir->d_inode, dentry);
1234
1235         unlock_dir(dir);
1236         dput(dentry);
1237 exit:
1238         return error;
1239 }
1240
1241 asmlinkage long sys_unlink(const char * pathname)
1242 {
1243         int error;
1244         char * tmp;
1245
1246         tmp = getname(pathname);
1247         if(IS_ERR(tmp))
1248                 return PTR_ERR(tmp);
1249         lock_kernel();
1250         error = do_unlink(tmp);
1251         unlock_kernel();
1252         putname(tmp);
1253
1254         return error;
1255 }
1256
1257 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
1258 {
1259         int error;
1260
1261         down(&dir->i_zombie);
1262         error = may_create(dir, dentry);
1263         if (error)
1264                 goto exit_lock;
1265
1266         error = -EPERM;
1267         if (!dir->i_op || !dir->i_op->symlink)
1268                 goto exit_lock;
1269
1270         DQUOT_INIT(dir);
1271         error = dir->i_op->symlink(dir, dentry, oldname);
1272
1273 exit_lock:
1274         up(&dir->i_zombie);
1275         return error;
1276 }
1277
1278 asmlinkage long sys_symlink(const char * oldname, const char * newname)
1279 {
1280         int error;
1281         char * from;
1282         char * to;
1283
1284         from = getname(oldname);
1285         if(IS_ERR(from))
1286                 return PTR_ERR(from);
1287         to = getname(newname);
1288         error = PTR_ERR(to);
1289         if (!IS_ERR(to)) {
1290                 struct dentry *dir;
1291                 struct dentry *dentry;
1292
1293                 lock_kernel();
1294                 dentry = lookup_create(to, 0);
1295                 error = PTR_ERR(dentry);
1296                 if (!IS_ERR(dentry)) {
1297                         dir = dget(dentry->d_parent);
1298                         error = vfs_symlink(dir->d_inode, dentry, from);
1299                         unlock_dir(dir);
1300                         dput(dentry);
1301                 }
1302                 unlock_kernel();
1303                 putname(to);
1304         }
1305         putname(from);
1306         return error;
1307 }
1308
1309 int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
1310 {
1311         struct inode *inode;
1312         int error;
1313
1314         down(&dir->i_zombie);
1315         error = -ENOENT;
1316         inode = old_dentry->d_inode;
1317         if (!inode)
1318                 goto exit_lock;
1319
1320         error = may_create(dir, new_dentry);
1321         if (error)
1322                 goto exit_lock;
1323
1324         error = -EXDEV;
1325         if (dir->i_dev != inode->i_dev)
1326                 goto exit_lock;
1327
1328         /*
1329          * A link to an append-only or immutable file cannot be created.
1330          */
1331         error = -EPERM;
1332         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1333                 goto exit_lock;
1334         if (!dir->i_op || !dir->i_op->link)
1335                 goto exit_lock;
1336
1337         DQUOT_INIT(dir);
1338         error = dir->i_op->link(old_dentry, dir, new_dentry);
1339
1340 exit_lock:
1341         up(&dir->i_zombie);
1342         return error;
1343 }
1344
1345 /*
1346  * Hardlinks are often used in delicate situations.  We avoid
1347  * security-related surprises by not following symlinks on the
1348  * newname.  --KAB
1349  *
1350  * We don't follow them on the oldname either to be compatible
1351  * with linux 2.0, and to avoid hard-linking to directories
1352  * and other special files.  --ADM
1353  */
1354 asmlinkage long sys_link(const char * oldname, const char * newname)
1355 {
1356         int error;
1357         char * from;
1358         char * to;
1359
1360         from = getname(oldname);
1361         if(IS_ERR(from))
1362                 return PTR_ERR(from);
1363         to = getname(newname);
1364         error = PTR_ERR(to);
1365         if (!IS_ERR(to)) {
1366                 struct dentry *old_dentry, *new_dentry, *dir;
1367
1368                 lock_kernel();
1369                 old_dentry = lookup_dentry(from, LOOKUP_POSITIVE);
1370                 error = PTR_ERR(old_dentry);
1371                 if (IS_ERR(old_dentry))
1372                         goto exit;
1373
1374                 new_dentry = lookup_create(to, 0);
1375                 error = PTR_ERR(new_dentry);
1376                 if (!IS_ERR(new_dentry)) {
1377                         dir = dget(new_dentry->d_parent);
1378                         error = vfs_link(old_dentry, dir->d_inode, new_dentry);
1379                         unlock_dir(dir);
1380                         dput(new_dentry);
1381                 }
1382                 dput(old_dentry);
1383 exit:
1384                 unlock_kernel();
1385                 putname(to);
1386         }
1387         putname(from);
1388
1389         return error;
1390 }
1391
1392 /*
1393  * The worst of all namespace operations - renaming directory. "Perverted"
1394  * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
1395  * Problems:
1396  *      a) we can get into loop creation. Check is done in is_subdir().
1397  *      b) race potential - two innocent renames can create a loop together.
1398  *         That's where 4.4 screws up. Current fix: serialization on
1399  *         sb->s_vfs_rename_sem. We might be more accurate, but that's another
1400  *         story.
1401  *      c) we have to lock _three_ objects - parents and victim (if it exists).
1402  *         And that - after we got ->i_sem on parents (until then we don't know
1403  *         whether the target exists at all, let alone whether it is a directory
1404  *         or not). Solution: ->i_zombie. Taken only after ->i_sem. Always taken
1405  *         on link creation/removal of any kind. And taken (without ->i_sem) on
1406  *         directory that will be removed (both in rmdir() and here).
1407  *      d) some filesystems don't support opened-but-unlinked directories,
1408  *         either because of layout or because they are not ready to deal with
1409  *         all cases correctly. The latter will be fixed (taking this sort of
1410  *         stuff into VFS), but the former is not going away. Solution: the same
1411  *         trick as in rmdir().
1412  *      e) conversion from fhandle to dentry may come in the wrong moment - when
1413  *         we are removing the target. Solution: we will have to grab ->i_zombie
1414  *         in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
1415  *         ->i_sem on parents, which works but leads to some truely excessive
1416  *         locking].
1417  */
1418 int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
1419                struct inode *new_dir, struct dentry *new_dentry)
1420 {
1421         int error;
1422         struct inode *target;
1423
1424         if (old_dentry->d_inode == new_dentry->d_inode)
1425                 return 0;
1426
1427         error = may_delete(old_dir, old_dentry, 1);
1428         if (error)
1429                 return error;
1430
1431         if (new_dir->i_dev != old_dir->i_dev)
1432                 return -EXDEV;
1433
1434         if (!new_dentry->d_inode)
1435                 error = may_create(new_dir, new_dentry);
1436         else
1437                 error = may_delete(new_dir, new_dentry, 1);
1438         if (error)
1439                 return error;
1440
1441         if (!old_dir->i_op || !old_dir->i_op->rename)
1442                 return -EPERM;
1443
1444         /*
1445          * If we are going to change the parent - check write permissions,
1446          * we'll need to flip '..'.
1447          */
1448         if (new_dir != old_dir) {
1449                 error = permission(old_dentry->d_inode, MAY_WRITE);
1450         }
1451         if (error)
1452                 return error;
1453
1454         DQUOT_INIT(old_dir);
1455         DQUOT_INIT(new_dir);
1456         down(&old_dir->i_sb->s_vfs_rename_sem);
1457         error = -EINVAL;
1458         if (is_subdir(new_dentry, old_dentry))
1459                 goto out_unlock;
1460         target = new_dentry->d_inode;
1461         if (target) { /* Hastur! Hastur! Hastur! */
1462                 triple_down(&old_dir->i_zombie,
1463                             &new_dir->i_zombie,
1464                             &target->i_zombie);
1465                 d_unhash(new_dentry);
1466         } else
1467                 double_down(&old_dir->i_zombie,
1468                             &new_dir->i_zombie);
1469         error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1470         if (target) {
1471                 triple_up(&old_dir->i_zombie,
1472                           &new_dir->i_zombie,
1473                           &target->i_zombie);
1474                 d_rehash(new_dentry);
1475                 dput(new_dentry);
1476         } else
1477                 double_up(&old_dir->i_zombie,
1478                           &new_dir->i_zombie);
1479
1480         if (!error)
1481                 d_move(old_dentry,new_dentry);
1482 out_unlock:
1483         up(&old_dir->i_sb->s_vfs_rename_sem);
1484         return error;
1485 }
1486
1487 int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
1488                struct inode *new_dir, struct dentry *new_dentry)
1489 {
1490         int error;
1491
1492         if (old_dentry->d_inode == new_dentry->d_inode)
1493                 return 0;
1494
1495         error = may_delete(old_dir, old_dentry, 0);
1496         if (error)
1497                 return error;
1498
1499         if (new_dir->i_dev != old_dir->i_dev)
1500                 return -EXDEV;
1501
1502         if (!new_dentry->d_inode)
1503                 error = may_create(new_dir, new_dentry);
1504         else
1505                 error = may_delete(new_dir, new_dentry, 0);
1506         if (error)
1507                 return error;
1508
1509         if (!old_dir->i_op || !old_dir->i_op->rename)
1510                 return -EPERM;
1511
1512         DQUOT_INIT(old_dir);
1513         DQUOT_INIT(new_dir);
1514         double_down(&old_dir->i_zombie, &new_dir->i_zombie);
1515         error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1516         double_up(&old_dir->i_zombie, &new_dir->i_zombie);
1517         if (error)
1518                 return error;
1519         /* The following d_move() should become unconditional */
1520         if (!(old_dir->i_sb->s_flags & MS_ODD_RENAME)) {
1521                 d_move(old_dentry, new_dentry);
1522         }
1523         return 0;
1524 }
1525
1526 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1527                struct inode *new_dir, struct dentry *new_dentry)
1528 {
1529         if (S_ISDIR(old_dentry->d_inode->i_mode))
1530                 return vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
1531         else
1532                 return vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
1533 }
1534
1535 static inline int do_rename(const char * oldname, const char * newname)
1536 {
1537         int error;
1538         struct dentry * old_dir, * new_dir;
1539         struct dentry * old_dentry, *new_dentry;
1540
1541         old_dentry = lookup_dentry(oldname, LOOKUP_POSITIVE);
1542
1543         error = PTR_ERR(old_dentry);
1544         if (IS_ERR(old_dentry))
1545                 goto exit;
1546
1547         {
1548                 unsigned int flags = 0;
1549                 if (S_ISDIR(old_dentry->d_inode->i_mode))
1550                         flags = LOOKUP_SLASHOK;
1551                 new_dentry = lookup_dentry(newname, flags);
1552         }
1553
1554         error = PTR_ERR(new_dentry);
1555         if (IS_ERR(new_dentry))
1556                 goto exit_old;
1557
1558         new_dir = get_parent(new_dentry);
1559         old_dir = get_parent(old_dentry);
1560
1561         double_lock(new_dir, old_dir);
1562
1563         error = -ENOENT;
1564         if (check_parent(old_dir, old_dentry) && check_parent(new_dir, new_dentry))
1565                 error = vfs_rename(old_dir->d_inode, old_dentry,
1566                                    new_dir->d_inode, new_dentry);
1567
1568         double_unlock(new_dir, old_dir);
1569         dput(new_dentry);
1570 exit_old:
1571         dput(old_dentry);
1572 exit:
1573         return error;
1574 }
1575
1576 asmlinkage long sys_rename(const char * oldname, const char * newname)
1577 {
1578         int error;
1579         char * from;
1580         char * to;
1581
1582         from = getname(oldname);
1583         if(IS_ERR(from))
1584                 return PTR_ERR(from);
1585         to = getname(newname);
1586         error = PTR_ERR(to);
1587         if (!IS_ERR(to)) {
1588                 lock_kernel();
1589                 error = do_rename(from,to);
1590                 unlock_kernel();
1591                 putname(to);
1592         }
1593         putname(from);
1594         return error;
1595 }
1596
1597 int vfs_readlink(struct dentry *dentry, char *buffer, int buflen, const char *link)
1598 {
1599         int len;
1600
1601         len = PTR_ERR(link);
1602         if (IS_ERR(link))
1603                 goto out;
1604
1605         len = strlen(link);
1606         if (len > (unsigned) buflen)
1607                 len = buflen;
1608         if (copy_to_user(buffer, link, len))
1609                 len = -EFAULT;
1610 out:
1611         return len;
1612 }
1613
1614 static inline int
1615 __vfs_follow_link(struct nameidata *nd, const char *link)
1616 {
1617         if (IS_ERR(link))
1618                 goto fail;
1619
1620         if (*link == '/') {
1621                 dput(nd->dentry);
1622                 mntput(nd->mnt);
1623                 if (!walk_init_root(link, LOOKUP_FOLLOW, nd))
1624                         /* weird __emul_prefix() stuff did it */
1625                         return 0;
1626         }
1627         return walk_name(link, LOOKUP_FOLLOW, nd);
1628
1629 fail:
1630         dput(nd->dentry);
1631         mntput(nd->mnt);
1632         return PTR_ERR(link);
1633 }
1634
1635 int vfs_follow_link(struct nameidata *nd, const char *link)
1636 {
1637         return __vfs_follow_link(nd, link);
1638 }
1639
1640 /* get the link contents into pagecache */
1641 static char *page_getlink(struct dentry * dentry, struct page **ppage)
1642 {
1643         struct page * page;
1644         struct address_space *mapping = dentry->d_inode->i_mapping;
1645         page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage,
1646                                 dentry);
1647         if (IS_ERR(page))
1648                 goto sync_fail;
1649         wait_on_page(page);
1650         if (!Page_Uptodate(page))
1651                 goto async_fail;
1652         *ppage = page;
1653         return (char*) kmap(page);
1654
1655 async_fail:
1656         page_cache_release(page);
1657         return ERR_PTR(-EIO);
1658
1659 sync_fail:
1660         return (char*)page;
1661 }
1662
1663 int page_readlink(struct dentry *dentry, char *buffer, int buflen)
1664 {
1665         struct page *page = NULL;
1666         char *s = page_getlink(dentry, &page);
1667         int res = vfs_readlink(dentry,buffer,buflen,s);
1668         if (page) {
1669                 kunmap(page);
1670                 page_cache_release(page);
1671         }
1672         return res;
1673 }
1674
1675 int page_follow_link(struct dentry *dentry, struct nameidata *nd)
1676 {
1677         struct page *page = NULL;
1678         char *s = page_getlink(dentry, &page);
1679         int res = __vfs_follow_link(nd, s);
1680         if (page) {
1681                 kunmap(page);
1682                 page_cache_release(page);
1683         }
1684         return res;
1685 }
1686
1687 struct inode_operations page_symlink_inode_operations = {
1688         readlink:       page_readlink,
1689         follow_link:    page_follow_link,
1690 };