fs/namei.c

   1 /*
   2  *  linux/fs/namei.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 /*
   8  * Some corrections by tytso.
   9  */
  10
  11 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
  12  * lookup logic.
  13  */
  14
  15 #include <linux/mm.h>
  16 #include <linux/proc_fs.h>
  17 #include <linux/smp_lock.h>
  18 #include <linux/quotaops.h>
  19
  20 #include <asm/uaccess.h>
  21 #include <asm/unaligned.h>
  22 #include <asm/semaphore.h>
  23 #include <asm/page.h>
  24 #include <asm/pgtable.h>
  25
  26 #include <asm/namei.h>
  27
  28 /* This can be removed after the beta phase. */
  29 #define CACHE_SUPERVISE /* debug the correctness of dcache entries */
  30 #undef DEBUG            /* some other debugging */
  31
  32
  33 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
  34
  35 /* [Feb-1997 T. Schoebel-Theuer]
  36  * Fundamental changes in the pathname lookup mechanisms (namei)
  37  * were necessary because of omirr.  The reason is that omirr needs
  38  * to know the _real_ pathname, not the user-supplied one, in case
  39  * of symlinks (and also when transname replacements occur).
  40  *
  41  * The new code replaces the old recursive symlink resolution with
  42  * an iterative one (in case of non-nested symlink chains).  It does
  43  * this with calls to <fs>_follow_link().
  44  * As a side effect, dir_namei(), _namei() and follow_link() are now
  45  * replaced with a single function lookup_dentry() that can handle all
  46  * the special cases of the former code.
  47  *
  48  * With the new dcache, the pathname is stored at each inode, at least as
  49  * long as the refcount of the inode is positive.  As a side effect, the
  50  * size of the dcache depends on the inode cache and thus is dynamic.
  51  *
  52  * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
  53  * resolution to correspond with current state of the code.
  54  *
  55  * Note that the symlink resolution is not *completely* iterative.
  56  * There is still a significant amount of tail- and mid- recursion in
  57  * the algorithm.  Also, note that <fs>_readlink() is not used in
  58  * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
  59  * may return different results than <fs>_follow_link().  Many virtual
  60  * filesystems (including /proc) exhibit this behavior.
  61  */
  62
  63 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
  64  * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
  65  * and the name already exists in form of a symlink, try to create the new
  66  * name indicated by the symlink. The old code always complained that the
  67  * name already exists, due to not following the symlink even if its target
  68  * is nonexistent.  The new semantics affects also mknod() and link() when
  69  * the name is a symlink pointing to a non-existant name.
  70  *
  71  * I don't know which semantics is the right one, since I have no access
  72  * to standards. But I found by trial that HP-UX 9.0 has the full "new"
  73  * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
  74  * "old" one. Personally, I think the new semantics is much more logical.
  75  * Note that "ln old new" where "new" is a symlink pointing to a non-existing
  76  * file does succeed in both HP-UX and SunOs, but not in Solaris
  77  * and in the old Linux semantics.
  78  */
  79
  80 /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
  81  * semantics.  See the comments in "open_namei" and "do_link" below.
  82  *
  83  * [10-Sep-98 Alan Modra] Another symlink change.
  84  */
  85
  86 /* In order to reduce some races, while at the same time doing additional
  87  * checking and hopefully speeding things up, we copy filenames to the
  88  * kernel data space before using them..
  89  *
  90  * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
  91  */
  92 static inline int do_getname(const char *filename, char *page)
  93 {
  94         int retval;
  95         unsigned long len = PAGE_SIZE;
  96
  97         if ((unsigned long) filename >= TASK_SIZE) {
  98                 if (!segment_eq(get_fs(), KERNEL_DS))
  99                         return -EFAULT;
 100         } else if (TASK_SIZE - (unsigned long) filename < PAGE_SIZE)
 101                 len = TASK_SIZE - (unsigned long) filename;
 102
 103         retval = strncpy_from_user((char *)page, filename, len);
 104         if (retval > 0) {
 105                 if (retval < len)
 106                         return 0;
 107                 return -ENAMETOOLONG;
 108         } else if (!retval)
 109                 retval = -ENOENT;
 110         return retval;
 111 }
 112
 113 char * getname(const char * filename)
 114 {
 115         char *tmp, *result;
 116
 117         result = ERR_PTR(-ENOMEM);
 118         tmp = __getname();
 119         if (tmp)  {
 120                 int retval = do_getname(filename, tmp);
 121
 122                 result = tmp;
 123                 if (retval < 0) {
 124                         putname(tmp);
 125                         result = ERR_PTR(retval);
 126                 }
 127         }
 128         return result;
 129 }
 130
 131 /*
 132  *      permission()
 133  *
 134  * is used to check for read/write/execute permissions on a file.
 135  * We use "fsuid" for this, letting us set arbitrary permissions
 136  * for filesystem access without changing the "normal" uids which
 137  * are used for other things..
 138  */
 139 int permission(struct inode * inode,int mask)
 140 {
 141         int mode = inode->i_mode;
 142
 143         if (inode->i_op && inode->i_op->permission)
 144                 return inode->i_op->permission(inode, mask);
 145         else if ((mask & S_IWOTH) && IS_RDONLY(inode) &&
 146                  (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
 147                 return -EROFS; /* Nobody gets write access to a read-only fs */
 148         else if ((mask & S_IWOTH) && IS_IMMUTABLE(inode))
 149                 return -EACCES; /* Nobody gets write access to an immutable file */
 150         else if (current->fsuid == inode->i_uid)
 151                 mode >>= 6;
 152         else if (in_group_p(inode->i_gid))
 153                 mode >>= 3;
 154         if (((mode & mask & S_IRWXO) == mask) || capable(CAP_DAC_OVERRIDE))
 155                 return 0;
 156         /* read and search access */
 157         if ((mask == S_IROTH) ||
 158             (S_ISDIR(mode)  && !(mask & ~(S_IROTH | S_IXOTH))))
 159                 if (capable(CAP_DAC_READ_SEARCH))
 160                         return 0;
 161         return -EACCES;
 162 }
 163
 164 /*
 165  * get_write_access() gets write permission for a file.
 166  * put_write_access() releases this write permission.
 167  * This is used for regular files.
 168  * We cannot support write (and maybe mmap read-write shared) accesses and
 169  * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
 170  * can have the following values:
 171  * 0: no writers, no VM_DENYWRITE mappings
 172  * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
 173  * > 0: (i_writecount) users are writing to the file.
 174  *
 175  * WARNING: as soon as we will move get_write_access(), do_mmap() or
 176  * prepare_binfmt() out of the big lock we will need a spinlock protecting
 177  * the checks in all 3. For the time being it is not needed.
 178  */
 179 int get_write_access(struct inode * inode)
 180 {
 181         if (atomic_read(&inode->i_writecount) < 0)
 182                 return -ETXTBSY;
 183         atomic_inc(&inode->i_writecount);
 184         return 0;
 185 }
 186
 187 void put_write_access(struct inode * inode)
 188 {
 189         atomic_dec(&inode->i_writecount);
 190 }
 191
 192 /*
 193  * "." and ".." are special - ".." especially so because it has to be able
 194  * to know about the current root directory and parent relationships
 195  */
 196 static struct dentry * reserved_lookup(struct dentry * parent, struct qstr * name)
 197 {
 198         struct dentry *result = NULL;
 199         if (name->name[0] == '.') {
 200                 switch (name->len) {
 201                 default:
 202                         break;
 203                 case 2:
 204                         if (name->name[1] != '.')
 205                                 break;
 206
 207                         if (parent != current->fs->root)
 208                                 parent = parent->d_covers->d_parent;
 209                         /* fallthrough */
 210                 case 1:
 211                         result = parent;
 212                 }
 213         }
 214         return dget(result);
 215 }
 216
 217 /*
 218  * Internal lookup() using the new generic dcache.
 219  */
 220 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
 221 {
 222         struct dentry * dentry = d_lookup(parent, name);
 223
 224         if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
 225                 if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
 226                         dput(dentry);
 227                         dentry = NULL;
 228                 }
 229         }
 230         return dentry;
 231 }
 232
 233 /*
 234  * This is called when everything else fails, and we actually have
 235  * to go to the low-level filesystem to find out what we should do..
 236  *
 237  * We get the directory semaphore, and after getting that we also
 238  * make sure that nobody added the entry to the dcache in the meantime..
 239  */
 240 static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
 241 {
 242         struct dentry * result;
 243         struct inode *dir = parent->d_inode;
 244
 245         down(&dir->i_sem);
 246         /*
 247          * First re-do the cached lookup just in case it was created
 248          * while we waited for the directory semaphore..
 249          *
 250          * FIXME! This could use version numbering or similar to
 251          * avoid unnecessary cache lookups.
 252          */
 253         result = cached_lookup(parent, name, flags);
 254         if (!result) {
 255                 struct dentry * dentry = d_alloc(parent, name);
 256                 result = ERR_PTR(-ENOMEM);
 257                 if (dentry) {
 258                         result = dir->i_op->lookup(dir, dentry);
 259                         if (result)
 260                                 dput(dentry);
 261                         else
 262                                 result = dentry;
 263                 }
 264         }
 265         up(&dir->i_sem);
 266         return result;
 267 }
 268
 269 static struct dentry * do_follow_link(struct dentry *base, struct dentry *dentry, unsigned int follow)
 270 {
 271         struct inode * inode = dentry->d_inode;
 272
 273         if ((follow & LOOKUP_FOLLOW)
 274             && inode && inode->i_op && inode->i_op->follow_link) {
 275                 if (current->link_count < 5) {
 276                         struct dentry * result;
 277
 278                         current->link_count++;
 279                         /* This eats the base */
 280                         result = inode->i_op->follow_link(dentry, base, follow);
 281                         current->link_count--;
 282                         dput(dentry);
 283                         return result;
 284                 }
 285                 dput(dentry);
 286                 dentry = ERR_PTR(-ELOOP);
 287         }
 288         dput(base);
 289         return dentry;
 290 }
 291
 292 static inline struct dentry * follow_mount(struct dentry * dentry)
 293 {
 294         struct dentry * mnt = dentry->d_mounts;
 295
 296         if (mnt != dentry) {
 297                 dget(mnt);
 298                 dput(dentry);
 299                 dentry = mnt;
 300         }
 301         return dentry;
 302 }
 303
 304 /*
 305  * Name resolution.
 306  *
 307  * This is the basic name resolution function, turning a pathname
 308  * into the final dentry.
 309  */
 310 struct dentry * lookup_dentry(const char * name, struct dentry * base, unsigned int lookup_flags)
 311 {
 312         struct dentry * dentry;
 313         struct inode *inode;
 314
 315         if (*name == '/') {
 316                 if (base)
 317                         dput(base);
 318                 do {
 319                         name++;
 320                 } while (*name == '/');
 321                 __prefix_lookup_dentry(name, lookup_flags);
 322                 base = dget(current->fs->root);
 323         } else if (!base) {
 324                 base = dget(current->fs->pwd);
 325         }
 326
 327         if (!*name)
 328                 goto return_base;
 329
 330         inode = base->d_inode;
 331         lookup_flags &= LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_SLASHOK;
 332
 333         /* At this point we know we have a real path component. */
 334         for(;;) {
 335                 int err;
 336                 unsigned long hash;
 337                 struct qstr this;
 338                 unsigned int flags;
 339                 unsigned int c;
 340
 341                 err = permission(inode, MAY_EXEC);
 342                 dentry = ERR_PTR(err);
 343                 if (err)
 344                         break;
 345
 346                 this.name = name;
 347                 c = *(const unsigned char *)name;
 348
 349                 hash = init_name_hash();
 350                 do {
 351                         name++;
 352                         hash = partial_name_hash(c, hash);
 353                         c = *(const unsigned char *)name;
 354                 } while (c && (c != '/'));
 355                 this.len = name - (const char *) this.name;
 356                 this.hash = end_name_hash(hash);
 357
 358                 /* remove trailing slashes? */
 359                 flags = lookup_flags;
 360                 if (c) {
 361                         char tmp;
 362
 363                         flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
 364                         do {
 365                                 tmp = *++name;
 366                         } while (tmp == '/');
 367                         if (tmp)
 368                                 flags |= LOOKUP_CONTINUE;
 369                 }
 370
 371                 /*
 372                  * See if the low-level filesystem might want
 373                  * to use its own hash..
 374                  */
 375                 if (base->d_op && base->d_op->d_hash) {
 376                         int error;
 377                         error = base->d_op->d_hash(base, &this);
 378                         if (error < 0) {
 379                                 dentry = ERR_PTR(error);
 380                                 break;
 381                         }
 382                 }
 383
 384                 /* This does the actual lookups.. */
 385                 dentry = reserved_lookup(base, &this);
 386                 if (!dentry) {
 387                         dentry = cached_lookup(base, &this, flags);
 388                         if (!dentry) {
 389                                 dentry = real_lookup(base, &this, flags);
 390                                 if (IS_ERR(dentry))
 391                                         break;
 392                         }
 393                 }
 394
 395                 /* Check mountpoints.. */
 396                 dentry = follow_mount(dentry);
 397
 398                 base = do_follow_link(base, dentry, flags);
 399                 if (IS_ERR(base))
 400                         goto return_base;
 401
 402                 inode = base->d_inode;
 403                 if (flags & LOOKUP_DIRECTORY) {
 404                         if (!inode)
 405                                 goto no_inode;
 406                         dentry = ERR_PTR(-ENOTDIR);
 407                         if (!inode->i_op || !inode->i_op->lookup)
 408                                 break;
 409                         if (flags & LOOKUP_CONTINUE)
 410                                 continue;
 411                 }
 412 return_base:
 413                 return base;
 414 /*
 415  * The case of a nonexisting file is special.
 416  *
 417  * In the middle of a pathname lookup (ie when
 418  * LOOKUP_CONTINUE is set), it's an obvious
 419  * error and returns ENOENT.
 420  *
 421  * At the end of a pathname lookup it's legal,
 422  * and we return a negative dentry. However, we
 423  * get here only if there were trailing slashes,
 424  * which is legal only if we know it's supposed
 425  * to be a directory (ie "mkdir"). Thus the
 426  * LOOKUP_SLASHOK flag.
 427  */
 428 no_inode:
 429                 dentry = ERR_PTR(-ENOENT);
 430                 if (flags & LOOKUP_CONTINUE)
 431                         break;
 432                 if (flags & LOOKUP_SLASHOK)
 433                         goto return_base;
 434                 break;
 435         }
 436         dput(base);
 437         return dentry;
 438 }
 439
 440 /*
 441  *      namei()
 442  *
 443  * is used by most simple commands to get the inode of a specified name.
 444  * Open, link etc use their own routines, but this is enough for things
 445  * like 'chmod' etc.
 446  *
 447  * namei exists in two versions: namei/lnamei. The only difference is
 448  * that namei follows links, while lnamei does not.
 449  */
 450 struct dentry * __namei(const char *pathname, unsigned int lookup_flags)
 451 {
 452         char *name;
 453         struct dentry *dentry;
 454
 455         name = getname(pathname);
 456         dentry = (struct dentry *) name;
 457         if (!IS_ERR(name)) {
 458                 dentry = lookup_dentry(name, NULL, lookup_flags);
 459                 putname(name);
 460                 if (!IS_ERR(dentry)) {
 461                         if (!dentry->d_inode) {
 462                                 dput(dentry);
 463                                 dentry = ERR_PTR(-ENOENT);
 464                         }
 465                 }
 466         }
 467         return dentry;
 468 }
 469
 470 /*
 471  * It's inline, so penalty for filesystems that don't use sticky bit is
 472  * minimal.
 473  */
 474 static inline int check_sticky(struct inode *dir, struct inode *inode)
 475 {
 476         if (!(dir->i_mode & S_ISVTX))
 477                 return 0;
 478         if (inode->i_uid == current->fsuid)
 479                 return 0;
 480         if (dir->i_uid == current->fsuid)
 481                 return 0;
 482         return !capable(CAP_FOWNER);
 483 }
 484
 485 /*
 486  *      Check whether we can remove a link victim from directory dir, check
 487  *  whether the type of victim is right.
 488  *  1. We can't do it if dir is read-only (done in permission())
 489  *  2. We should have write and exec permissions on dir
 490  *  3. We can't remove anything from append-only dir
 491  *  4. We can't do anything with immutable dir (done in permission())
 492  *  5. If the sticky bit on dir is set we should either
 493  *      a. be owner of dir, or
 494  *      b. be owner of victim, or
 495  *      c. have CAP_FOWNER capability
 496  *  6. If the victim is append-only or immutable we can't do antyhing with
 497  *     links pointing to it.
 498  *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
 499  *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
 500  *  9. We can't remove a root or mountpoint.
 501  */
 502 static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
 503 {
 504         int error;
 505         if (!victim->d_inode || victim->d_parent->d_inode != dir)
 506                 return -ENOENT;
 507         error = permission(dir,MAY_WRITE | MAY_EXEC);
 508         if (error)
 509                 return error;
 510         if (IS_APPEND(dir))
 511                 return -EPERM;
 512         if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
 513             IS_IMMUTABLE(victim->d_inode))
 514                 return -EPERM;
 515         if (isdir) {
 516                 if (!S_ISDIR(victim->d_inode->i_mode))
 517                         return -ENOTDIR;
 518                 if (IS_ROOT(victim))
 519                         return -EBUSY;
 520                 if (victim->d_mounts != victim->d_covers)
 521                         return -EBUSY;
 522         } else if (S_ISDIR(victim->d_inode->i_mode))
 523                 return -EISDIR;
 524         return 0;
 525 }
 526
 527 /*      Check whether we can create an object with dentry child in directory
 528  *  dir.
 529  *  1. We can't do it if child already exists (open has special treatment for
 530  *     this case, but since we are inlined it's OK)
 531  *  2. We can't do it if dir is read-only (done in permission())
 532  *  3. We should have write and exec permissions on dir
 533  *  4. We can't do it if dir is immutable (done in permission())
 534  */
 535 static inline int may_create(struct inode *dir, struct dentry *child) {
 536         if (child->d_inode)
 537                 return -EEXIST;
 538         return permission(dir,MAY_WRITE | MAY_EXEC);
 539 }
 540
 541 static inline struct dentry *get_parent(struct dentry *dentry)
 542 {
 543         return dget(dentry->d_parent);
 544 }
 545
 546 static inline void unlock_dir(struct dentry *dir)
 547 {
 548         up(&dir->d_inode->i_sem);
 549         dput(dir);
 550 }
 551
 552 /*
 553  * We need to do a check-parent every time
 554  * after we have locked the parent - to verify
 555  * that the parent is still our parent and
 556  * that we are still hashed onto it..
 557  *
 558  * This is requied in case two processes race
 559  * on removing (or moving) the same entry: the
 560  * parent lock will serialize them, but the
 561  * other process will be too late..
 562  */
 563 #define check_parent(dir, dentry) \
 564         ((dir) == (dentry)->d_parent && !list_empty(&dentry->d_hash))
 565
 566 /*
 567  * Locking the parent is needed to:
 568  *  - serialize directory operations
 569  *  - make sure the parent doesn't change from
 570  *    under us in the middle of an operation.
 571  *
 572  * NOTE! Right now we'd rather use a "struct inode"
 573  * for this, but as I expect things to move toward
 574  * using dentries instead for most things it is
 575  * probably better to start with the conceptually
 576  * better interface of relying on a path of dentries.
 577  */
 578 static inline struct dentry *lock_parent(struct dentry *dentry)
 579 {
 580         struct dentry *dir = dget(dentry->d_parent);
 581
 582         down(&dir->d_inode->i_sem);
 583         return dir;
 584 }
 585
 586 /*
 587  * Whee.. Deadlock country. Happily there are only two VFS
 588  * operations that do this..
 589  */
 590 static inline void double_lock(struct dentry *d1, struct dentry *d2)
 591 {
 592         struct semaphore *s1 = &d1->d_inode->i_sem;
 593         struct semaphore *s2 = &d2->d_inode->i_sem;
 594
 595         if (s1 != s2) {
 596                 if ((unsigned long) s1 < (unsigned long) s2) {
 597                         struct semaphore *tmp = s2;
 598                         s2 = s1; s1 = tmp;
 599                 }
 600                 down(s1);
 601         }
 602         down(s2);
 603 }
 604
 605 static inline void double_unlock(struct dentry *d1, struct dentry *d2)
 606 {
 607         struct semaphore *s1 = &d1->d_inode->i_sem;
 608         struct semaphore *s2 = &d2->d_inode->i_sem;
 609
 610         up(s1);
 611         if (s1 != s2)
 612                 up(s2);
 613         dput(d1);
 614         dput(d2);
 615 }
 616
 617
 618 /*
 619  * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security
 620  * reasons.
 621  *
 622  * O_DIRECTORY translates into forcing a directory lookup.
 623  */
 624 static inline int lookup_flags(unsigned int f)
 625 {
 626         unsigned long retval = LOOKUP_FOLLOW;
 627
 628         if (f & O_NOFOLLOW)
 629                 retval &= ~LOOKUP_FOLLOW;
 630
 631         if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
 632                 retval &= ~LOOKUP_FOLLOW;
 633
 634         if (f & O_DIRECTORY)
 635                 retval |= LOOKUP_DIRECTORY;
 636
 637         return retval;
 638 }
 639
 640 int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
 641 {
 642         int error;
 643
 644         error = may_create(dir, dentry);
 645         if (error)
 646                 goto exit_lock;
 647
 648         error = -EACCES;        /* shouldn't it be ENOSYS? */
 649         if (!dir->i_op || !dir->i_op->create)
 650                 goto exit_lock;
 651
 652         DQUOT_INIT(dir);
 653         error = dir->i_op->create(dir, dentry, mode);
 654 exit_lock:
 655         return error;
 656 }
 657
 658 /*
 659  *      open_namei()
 660  *
 661  * namei for open - this is in fact almost the whole open-routine.
 662  *
 663  * Note that the low bits of "flag" aren't the same as in the open
 664  * system call - they are 00 - no permissions needed
 665  *                        01 - read permission needed
 666  *                        10 - write permission needed
 667  *                        11 - read/write permissions needed
 668  * which is a lot more logical, and also allows the "no perm" needed
 669  * for symlinks (where the permissions are checked later).
 670  */
 671 struct dentry * open_namei(const char * pathname, int flag, int mode)
 672 {
 673         int acc_mode, error;
 674         struct inode *inode;
 675         struct dentry *dentry;
 676
 677         mode &= S_IALLUGO & ~current->fs->umask;
 678         mode |= S_IFREG;
 679
 680         dentry = lookup_dentry(pathname, NULL, lookup_flags(flag));
 681         if (IS_ERR(dentry))
 682                 return dentry;
 683
 684         acc_mode = ACC_MODE(flag);
 685         if (flag & O_CREAT) {
 686                 struct dentry *dir;
 687
 688                 if (dentry->d_inode) {
 689                         if (!(flag & O_EXCL))
 690                                 goto nocreate;
 691                         error = -EEXIST;
 692                         goto exit;
 693                 }
 694
 695                 dir = lock_parent(dentry);
 696                 if (!check_parent(dir, dentry)) {
 697                         /*
 698                          * Really nasty race happened. What's the
 699                          * right error code? We had a dentry, but
 700                          * before we could use it it was removed
 701                          * by somebody else. We could just re-try
 702                          * everything, I guess.
 703                          *
 704                          * ENOENT is definitely wrong.
 705                          */
 706                         error = -ENOENT;
 707                         unlock_dir(dir);
 708                         goto exit;
 709                 }
 710
 711                 /*
 712                  * Somebody might have created the file while we
 713                  * waited for the directory lock.. So we have to
 714                  * re-do the existence test.
 715                  */
 716                 if (dentry->d_inode) {
 717                         error = 0;
 718                         if (flag & O_EXCL)
 719                                 error = -EEXIST;
 720                 } else {
 721                         error = vfs_create(dir->d_inode, dentry,mode);
 722                         /* Don't check for write permission, don't truncate */
 723                         acc_mode = 0;
 724                         flag &= ~O_TRUNC;
 725                 }
 726                 unlock_dir(dir);
 727                 if (error)
 728                         goto exit;
 729         }
 730
 731 nocreate:
 732         error = -ENOENT;
 733         inode = dentry->d_inode;
 734         if (!inode)
 735                 goto exit;
 736
 737         error = -ELOOP;
 738         if (S_ISLNK(inode->i_mode))
 739                 goto exit;
 740
 741         error = -EISDIR;
 742         if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
 743                 goto exit;
 744
 745         error = permission(inode,acc_mode);
 746         if (error)
 747                 goto exit;
 748
 749         /*
 750          * FIFO's, sockets and device files are special: they don't
 751          * actually live on the filesystem itself, and as such you
 752          * can write to them even if the filesystem is read-only.
 753          */
 754         if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
 755                 flag &= ~O_TRUNC;
 756         } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
 757                 error = -EACCES;
 758                 if (IS_NODEV(inode))
 759                         goto exit;
 760
 761                 flag &= ~O_TRUNC;
 762         } else {
 763                 error = -EROFS;
 764                 if (IS_RDONLY(inode) && (flag & 2))
 765                         goto exit;
 766         }
 767         /*
 768          * An append-only file must be opened in append mode for writing.
 769          */
 770         error = -EPERM;
 771         if (IS_APPEND(inode)) {
 772                 if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))
 773                         goto exit;
 774                 if (flag & O_TRUNC)
 775                         goto exit;
 776         }
 777
 778         if (flag & O_TRUNC) {
 779                 error = get_write_access(inode);
 780                 if (error)
 781                         goto exit;
 782
 783                 /*
 784                  * Refuse to truncate files with mandatory locks held on them.
 785                  */
 786                 error = locks_verify_locked(inode);
 787                 if (!error) {
 788                         DQUOT_INIT(inode);
 789
 790                         error = do_truncate(dentry, 0);
 791                 }
 792                 put_write_access(inode);
 793                 if (error)
 794                         goto exit;
 795         } else
 796                 if (flag & FMODE_WRITE)
 797                         DQUOT_INIT(inode);
 798
 799         return dentry;
 800
 801 exit:
 802         dput(dentry);
 803         return ERR_PTR(error);
 804 }
 805
 806 struct dentry * do_mknod(const char * filename, int mode, dev_t dev)
 807 {
 808         int error;
 809         struct dentry *dir;
 810         struct dentry *dentry, *retval;
 811
 812         mode &= ~current->fs->umask;
 813         dentry = lookup_dentry(filename, NULL, LOOKUP_FOLLOW);
 814         if (IS_ERR(dentry))
 815                 return dentry;
 816
 817         dir = lock_parent(dentry);
 818         error = -ENOENT;
 819         if (!check_parent(dir, dentry))
 820                 goto exit_lock;
 821
 822         error = may_create(dir->d_inode, dentry);
 823         if (error)
 824                 goto exit_lock;
 825
 826         error = -EPERM;
 827         if (!dir->d_inode->i_op || !dir->d_inode->i_op->mknod)
 828                 goto exit_lock;
 829
 830         DQUOT_INIT(dir->d_inode);
 831         error = dir->d_inode->i_op->mknod(dir->d_inode, dentry, mode, dev);
 832 exit_lock:
 833         retval = ERR_PTR(error);
 834         if (!error)
 835                 retval = dget(dentry);
 836         unlock_dir(dir);
 837         dput(dentry);
 838         return retval;
 839 }
 840
 841 asmlinkage int sys_mknod(const char * filename, int mode, dev_t dev)
 842 {
 843         int error;
 844         char * tmp;
 845         struct dentry * dentry;
 846
 847         lock_kernel();
 848         error = -EPERM;
 849         if (S_ISDIR(mode) || (!S_ISFIFO(mode) && !capable(CAP_SYS_ADMIN)))
 850                 goto out;
 851         tmp = getname(filename);
 852         error = PTR_ERR(tmp);
 853         if (IS_ERR(tmp))
 854                 goto out;
 855
 856         error = -EINVAL;
 857         switch (mode & S_IFMT) {
 858         case 0:
 859                 mode |= S_IFREG;        /* fallthrough */
 860         case S_IFREG:
 861                 mode &= ~current->fs->umask;
 862                 dentry = lookup_dentry(filename, NULL, LOOKUP_FOLLOW);
 863                 if (IS_ERR(dentry))
 864                         error = PTR_ERR(dentry);
 865                 else {
 866                         struct dentry *dir = lock_parent(dentry);
 867                         error = -ENOENT;
 868                         if (check_parent(dir, dentry))
 869                                 error = vfs_create(dir->d_inode, dentry, mode);
 870                         dput(dentry);
 871                 }
 872                 break;
 873         case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
 874                 dentry = do_mknod(tmp,mode,dev);
 875                 error = PTR_ERR(dentry);
 876                 if (!IS_ERR(dentry)) {
 877                         dput(dentry);
 878                         error = 0;
 879                 }
 880                 break;
 881         }
 882         putname(tmp);
 883
 884 out:
 885         unlock_kernel();
 886         return error;
 887 }
 888
 889 static inline int do_mkdir(const char * pathname, int mode)
 890 {
 891         int error;
 892         struct dentry *dir;
 893         struct dentry *dentry;
 894
 895         dentry = lookup_dentry(pathname, NULL, LOOKUP_SLASHOK);
 896         error = PTR_ERR(dentry);
 897         if (IS_ERR(dentry))
 898                 goto exit;
 899
 900         /*
 901          * EEXIST is kind of a strange error code to
 902          * return, but basically if the dentry was moved
 903          * or unlinked while we locked the parent, we
 904          * do know that it _did_ exist before, and as
 905          * such it makes perfect sense.. In contrast,
 906          * ENOENT doesn't make sense for mkdir.
 907          */
 908         dir = lock_parent(dentry);
 909         error = -EEXIST;
 910         if (!check_parent(dir, dentry))
 911                 goto exit_lock;
 912
 913         error = may_create(dir->d_inode, dentry);
 914         if (error)
 915                 goto exit_lock;
 916
 917         error = -EPERM;
 918         if (!dir->d_inode->i_op || !dir->d_inode->i_op->mkdir)
 919                 goto exit_lock;
 920
 921         DQUOT_INIT(dir->d_inode);
 922         mode &= (S_IRWXUGO|S_ISVTX) & ~current->fs->umask;
 923         error = dir->d_inode->i_op->mkdir(dir->d_inode, dentry, mode);
 924
 925 exit_lock:
 926         unlock_dir(dir);
 927         dput(dentry);
 928 exit:
 929         return error;
 930 }
 931
 932 asmlinkage int sys_mkdir(const char * pathname, int mode)
 933 {
 934         int error;
 935         char * tmp;
 936
 937         lock_kernel();
 938         tmp = getname(pathname);
 939         error = PTR_ERR(tmp);
 940         if (!IS_ERR(tmp)) {
 941                 error = do_mkdir(tmp,mode);
 942                 putname(tmp);
 943         }
 944         unlock_kernel();
 945         return error;
 946 }
 947
 948 int vfs_rmdir(struct inode *dir, struct dentry *dentry)
 949 {
 950         int error;
 951
 952         error = may_delete(dir, dentry, 1);
 953         if (error)
 954                 return error;
 955
 956         if (!dir->i_op || !dir->i_op->rmdir)
 957                 return -EPERM;
 958
 959         DQUOT_INIT(dir);
 960
 961         /*
 962          * We try to drop the dentry early: we should have
 963          * a usage count of 2 if we're the only user of this
 964          * dentry, and if that is true (possibly after pruning
 965          * the dcache), then we drop the dentry now.
 966          *
 967          * A low-level filesystem can, if it choses, legally
 968          * do a
 969          *
 970          *      if (!list_empty(&dentry->d_hash))
 971          *              return -EBUSY;
 972          *
 973          * if it cannot handle the case of removing a directory
 974          * that is still in use by something else..
 975          */
 976         switch (dentry->d_count) {
 977         default:
 978                 shrink_dcache_parent(dentry);
 979                 if (dentry->d_count != 2)
 980                         break;
 981         case 2:
 982                 d_drop(dentry);
 983         }
 984
 985         error = dir->i_op->rmdir(dir, dentry);
 986
 987         return error;
 988 }
 989
 990 static inline int do_rmdir(const char * name)
 991 {
 992         int error;
 993         struct dentry *dir;
 994         struct dentry *dentry;
 995
 996         dentry = lookup_dentry(name, NULL, 0);
 997         error = PTR_ERR(dentry);
 998         if (IS_ERR(dentry))
 999                 goto exit;
1000
1001         error = -ENOENT;
1002         if (!dentry->d_inode)
1003                 goto exit_dput;
1004
1005         dir = dget(dentry->d_parent);
1006
1007         /*
1008          * The dentry->d_count stuff confuses d_delete() enough to
1009          * not kill the inode from under us while it is locked. This
1010          * wouldn't be needed, except the dentry semaphore is really
1011          * in the inode, not in the dentry..
1012          */
1013         dentry->d_count++;
1014         double_lock(dir, dentry);
1015
1016         error = -ENOENT;
1017         if (check_parent(dir, dentry))
1018                 error = vfs_rmdir(dir->d_inode, dentry);
1019
1020         double_unlock(dentry, dir);
1021 exit_dput:
1022         dput(dentry);
1023 exit:
1024         return error;
1025 }
1026
1027 asmlinkage int sys_rmdir(const char * pathname)
1028 {
1029         int error;
1030         char * tmp;
1031
1032         lock_kernel();
1033         tmp = getname(pathname);
1034         error = PTR_ERR(tmp);
1035         if (!IS_ERR(tmp)) {
1036                 error = do_rmdir(tmp);
1037                 putname(tmp);
1038         }
1039         unlock_kernel();
1040         return error;
1041 }
1042
1043 int vfs_unlink(struct inode *dir, struct dentry *dentry)
1044 {
1045         int error;
1046
1047         error = may_delete(dir, dentry, 0);
1048         if (!error) {
1049                 error = -EPERM;
1050                 if (dir->i_op && dir->i_op->unlink) {
1051                         DQUOT_INIT(dir);
1052                         error = dir->i_op->unlink(dir, dentry);
1053                 }
1054         }
1055         return error;
1056 }
1057
1058 static inline int do_unlink(const char * name)
1059 {
1060         int error;
1061         struct dentry *dir;
1062         struct dentry *dentry;
1063
1064         dentry = lookup_dentry(name, NULL, 0);
1065         error = PTR_ERR(dentry);
1066         if (IS_ERR(dentry))
1067                 goto exit;
1068
1069         dir = lock_parent(dentry);
1070         error = -ENOENT;
1071         if (check_parent(dir, dentry))
1072                 error = vfs_unlink(dir->d_inode, dentry);
1073
1074         unlock_dir(dir);
1075         dput(dentry);
1076 exit:
1077         return error;
1078 }
1079
1080 asmlinkage int sys_unlink(const char * pathname)
1081 {
1082         int error;
1083         char * tmp;
1084
1085         lock_kernel();
1086         tmp = getname(pathname);
1087         error = PTR_ERR(tmp);
1088         if (!IS_ERR(tmp)) {
1089                 error = do_unlink(tmp);
1090                 putname(tmp);
1091         }
1092         unlock_kernel();
1093         return error;
1094 }
1095
1096 static inline int do_symlink(const char * oldname, const char * newname)
1097 {
1098         int error;
1099         struct dentry *dir;
1100         struct dentry *dentry;
1101
1102         dentry = lookup_dentry(newname, NULL, 0);
1103
1104         error = PTR_ERR(dentry);
1105         if (IS_ERR(dentry))
1106                 goto exit;
1107
1108         dir = lock_parent(dentry);
1109         error = -ENOENT;
1110         if (!check_parent(dir, dentry))
1111                 goto exit_lock;
1112
1113         error = may_create(dir->d_inode, dentry);
1114         if (error)
1115                 goto exit_lock;
1116
1117         error = -EPERM;
1118         if (!dir->d_inode->i_op || !dir->d_inode->i_op->symlink)
1119                 goto exit_lock;
1120
1121         DQUOT_INIT(dir->d_inode);
1122         error = dir->d_inode->i_op->symlink(dir->d_inode, dentry, oldname);
1123
1124 exit_lock:
1125         unlock_dir(dir);
1126         dput(dentry);
1127 exit:
1128         return error;
1129 }
1130
1131 asmlinkage int sys_symlink(const char * oldname, const char * newname)
1132 {
1133         int error;
1134         char * from;
1135
1136         lock_kernel();
1137         from = getname(oldname);
1138         error = PTR_ERR(from);
1139         if (!IS_ERR(from)) {
1140                 char * to;
1141                 to = getname(newname);
1142                 error = PTR_ERR(to);
1143                 if (!IS_ERR(to)) {
1144                         error = do_symlink(from,to);
1145                         putname(to);
1146                 }
1147                 putname(from);
1148         }
1149         unlock_kernel();
1150         return error;
1151 }
1152
1153 static inline int do_link(const char * oldname, const char * newname)
1154 {
1155         struct dentry *old_dentry, *new_dentry, *dir;
1156         struct inode *inode;
1157         int error;
1158
1159         /*
1160          * Hardlinks are often used in delicate situations.  We avoid
1161          * security-related surprises by not following symlinks on the
1162          * newname.  --KAB
1163          *
1164          * We don't follow them on the oldname either to be compatible
1165          * with linux 2.0, and to avoid hard-linking to directories
1166          * and other special files.  --ADM
1167          */
1168         old_dentry = lookup_dentry(oldname, NULL, 0);
1169         error = PTR_ERR(old_dentry);
1170         if (IS_ERR(old_dentry))
1171                 goto exit;
1172
1173         new_dentry = lookup_dentry(newname, NULL, 0);
1174         error = PTR_ERR(new_dentry);
1175         if (IS_ERR(new_dentry))
1176                 goto exit_old;
1177
1178         dir = lock_parent(new_dentry);
1179         error = -ENOENT;
1180         if (!check_parent(dir, new_dentry))
1181                 goto exit_lock;
1182
1183         error = -ENOENT;
1184         inode = old_dentry->d_inode;
1185         if (!inode)
1186                 goto exit_lock;
1187
1188         error = may_create(dir->d_inode, new_dentry);
1189         if (error)
1190                 goto exit_lock;
1191
1192         error = -EXDEV;
1193         if (dir->d_inode->i_dev != inode->i_dev)
1194                 goto exit_lock;
1195
1196         /*
1197          * A link to an append-only or immutable file cannot be created.
1198          */
1199         error = -EPERM;
1200         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1201                 goto exit_lock;
1202
1203         error = -EPERM;
1204         if (!dir->d_inode->i_op || !dir->d_inode->i_op->link)
1205                 goto exit_lock;
1206
1207         DQUOT_INIT(dir->d_inode);
1208         error = dir->d_inode->i_op->link(old_dentry, dir->d_inode, new_dentry);
1209
1210 exit_lock:
1211         unlock_dir(dir);
1212         dput(new_dentry);
1213 exit_old:
1214         dput(old_dentry);
1215 exit:
1216         return error;
1217 }
1218
1219 asmlinkage int sys_link(const char * oldname, const char * newname)
1220 {
1221         int error;
1222         char * from;
1223
1224         lock_kernel();
1225         from = getname(oldname);
1226         error = PTR_ERR(from);
1227         if (!IS_ERR(from)) {
1228                 char * to;
1229                 to = getname(newname);
1230                 error = PTR_ERR(to);
1231                 if (!IS_ERR(to)) {
1232                         error = do_link(from,to);
1233                         putname(to);
1234                 }
1235                 putname(from);
1236         }
1237         unlock_kernel();
1238         return error;
1239 }
1240
1241 int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
1242                struct inode *new_dir, struct dentry *new_dentry)
1243 {
1244         int error;
1245         int need_rehash = 0;
1246
1247         if (old_dentry->d_inode == new_dentry->d_inode)
1248                 return 0;
1249
1250         error = may_delete(old_dir, old_dentry, 1);
1251         if (error)
1252                 return error;
1253
1254         if (new_dir->i_dev != old_dir->i_dev)
1255                 return -EXDEV;
1256
1257         if (!new_dentry->d_inode)
1258                 error = may_create(new_dir, new_dentry);
1259         else
1260                 error = may_delete(new_dir, new_dentry, 1);
1261         if (error)
1262                 return error;
1263
1264         if (!old_dir->i_op || !old_dir->i_op->rename)
1265                 return -EPERM;
1266
1267         /*
1268          * If we are going to change the parent - check write permissions,
1269          * we'll need to flip '..'.
1270          */
1271         if (new_dir != old_dir) {
1272                 error = permission(old_dentry->d_inode, MAY_WRITE);
1273         }
1274         if (error)
1275                 return error;
1276
1277         DQUOT_INIT(old_dir);
1278         DQUOT_INIT(new_dir);
1279         down(&old_dir->i_sb->s_vfs_rename_sem);
1280         error = -EINVAL;
1281         if (is_subdir(new_dentry, old_dentry))
1282                 goto out_unlock;
1283         if (new_dentry->d_inode) {
1284                 error = -EBUSY;
1285                 if (d_invalidate(new_dentry)<0)
1286                         goto out_unlock;
1287                 need_rehash = 1;
1288         }
1289         error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1290         if (need_rehash)
1291                 d_rehash(new_dentry);
1292         if (!error)
1293                 d_move(old_dentry,new_dentry);
1294 out_unlock:
1295         up(&old_dir->i_sb->s_vfs_rename_sem);
1296         return error;
1297 }
1298
1299 int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
1300                struct inode *new_dir, struct dentry *new_dentry)
1301 {
1302         int error;
1303
1304         if (old_dentry->d_inode == new_dentry->d_inode)
1305                 return 0;
1306
1307         error = may_delete(old_dir, old_dentry, 0);
1308         if (error)
1309                 return error;
1310
1311         if (new_dir->i_dev != old_dir->i_dev)
1312                 return -EXDEV;
1313
1314         if (!new_dentry->d_inode)
1315                 error = may_create(new_dir, new_dentry);
1316         else
1317                 error = may_delete(new_dir, new_dentry, 0);
1318         if (error)
1319                 return error;
1320
1321         if (!old_dir->i_op || !old_dir->i_op->rename)
1322                 return -EPERM;
1323
1324         DQUOT_INIT(old_dir);
1325         DQUOT_INIT(new_dir);
1326         error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1327         if (error)
1328                 return error;
1329         /* The following d_move() should become unconditional */
1330         if (!(old_dir->i_sb->s_flags & MS_ODD_RENAME)) {
1331                 d_move(old_dentry, new_dentry);
1332         }
1333         return 0;
1334 }
1335
1336 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1337                struct inode *new_dir, struct dentry *new_dentry)
1338 {
1339         if (S_ISDIR(old_dentry->d_inode->i_mode))
1340                 return vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
1341         else
1342                 return vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
1343 }
1344
1345 static inline int do_rename(const char * oldname, const char * newname)
1346 {
1347         int error;
1348         struct dentry * old_dir, * new_dir;
1349         struct dentry * old_dentry, *new_dentry;
1350
1351         old_dentry = lookup_dentry(oldname, NULL, 0);
1352
1353         error = PTR_ERR(old_dentry);
1354         if (IS_ERR(old_dentry))
1355                 goto exit;
1356
1357         error = -ENOENT;
1358         if (!old_dentry->d_inode)
1359                 goto exit_old;
1360
1361         {
1362                 unsigned int flags = 0;
1363                 if (S_ISDIR(old_dentry->d_inode->i_mode))
1364                         flags = LOOKUP_SLASHOK;
1365                 new_dentry = lookup_dentry(newname, NULL, flags);
1366         }
1367
1368         error = PTR_ERR(new_dentry);
1369         if (IS_ERR(new_dentry))
1370                 goto exit_old;
1371
1372         new_dir = get_parent(new_dentry);
1373         old_dir = get_parent(old_dentry);
1374
1375         double_lock(new_dir, old_dir);
1376
1377         error = -ENOENT;
1378         if (check_parent(old_dir, old_dentry) && check_parent(new_dir, new_dentry))
1379                 error = vfs_rename(old_dir->d_inode, old_dentry,
1380                                    new_dir->d_inode, new_dentry);
1381
1382         double_unlock(new_dir, old_dir);
1383         dput(new_dentry);
1384 exit_old:
1385         dput(old_dentry);
1386 exit:
1387         return error;
1388 }
1389
1390 asmlinkage int sys_rename(const char * oldname, const char * newname)
1391 {
1392         int error;
1393         char * from;
1394
1395         lock_kernel();
1396         from = getname(oldname);
1397         error = PTR_ERR(from);
1398         if (!IS_ERR(from)) {
1399                 char * to;
1400                 to = getname(newname);
1401                 error = PTR_ERR(to);
1402                 if (!IS_ERR(to)) {
1403                         error = do_rename(from,to);
1404                         putname(to);
1405                 }
1406                 putname(from);
1407         }
1408         unlock_kernel();
1409         return error;
1410 }