fs/namei.c

   1 /*
   2  *  linux/fs/namei.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 /*
   8  * Some corrections by tytso.
   9  */
  10
  11 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
  12  * lookup logic.
  13  */
  14
  15 #include <linux/mm.h>
  16 #include <linux/proc_fs.h>
  17 #include <linux/smp_lock.h>
  18 #include <linux/quotaops.h>
  19
  20 #include <asm/uaccess.h>
  21 #include <asm/unaligned.h>
  22 #include <asm/semaphore.h>
  23 #include <asm/page.h>
  24 #include <asm/pgtable.h>
  25
  26 /*
  27  * The bitmask for a lookup event:
  28  *  - follow links at the end
  29  *  - require a directory
  30  *  - ending slashes ok even for nonexistent files
  31  *  - internal "there are more path compnents" flag
  32  */
  33 #define LOOKUP_FOLLOW           (1)
  34 #define LOOKUP_DIRECTORY        (2)
  35 #define LOOKUP_SLASHOK          (4)
  36 #define LOOKUP_CONTINUE         (8)
  37
  38 #include <asm/namei.h>
  39
  40 /* This can be removed after the beta phase. */
  41 #define CACHE_SUPERVISE /* debug the correctness of dcache entries */
  42 #undef DEBUG            /* some other debugging */
  43
  44
  45 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
  46
  47 /* [Feb-1997 T. Schoebel-Theuer]
  48  * Fundamental changes in the pathname lookup mechanisms (namei)
  49  * were necessary because of omirr.  The reason is that omirr needs
  50  * to know the _real_ pathname, not the user-supplied one, in case
  51  * of symlinks (and also when transname replacements occur).
  52  *
  53  * The new code replaces the old recursive symlink resolution with
  54  * an iterative one (in case of non-nested symlink chains).  It does
  55  * this with calls to <fs>_follow_link().
  56  * As a side effect, dir_namei(), _namei() and follow_link() are now
  57  * replaced with a single function lookup_dentry() that can handle all
  58  * the special cases of the former code.
  59  *
  60  * With the new dcache, the pathname is stored at each inode, at least as
  61  * long as the refcount of the inode is positive.  As a side effect, the
  62  * size of the dcache depends on the inode cache and thus is dynamic.
  63  *
  64  * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
  65  * resolution to correspond with current state of the code.
  66  *
  67  * Note that the symlink resolution is not *completely* iterative.
  68  * There is still a significant amount of tail- and mid- recursion in
  69  * the algorithm.  Also, note that <fs>_readlink() is not used in
  70  * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
  71  * may return different results than <fs>_follow_link().  Many virtual
  72  * filesystems (including /proc) exhibit this behavior.
  73  */
  74
  75 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
  76  * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
  77  * and the name already exists in form of a symlink, try to create the new
  78  * name indicated by the symlink. The old code always complained that the
  79  * name already exists, due to not following the symlink even if its target
  80  * is nonexistent.  The new semantics affects also mknod() and link() when
  81  * the name is a symlink pointing to a non-existant name.
  82  *
  83  * I don't know which semantics is the right one, since I have no access
  84  * to standards. But I found by trial that HP-UX 9.0 has the full "new"
  85  * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
  86  * "old" one. Personally, I think the new semantics is much more logical.
  87  * Note that "ln old new" where "new" is a symlink pointing to a non-existing
  88  * file does succeed in both HP-UX and SunOs, but not in Solaris
  89  * and in the old Linux semantics.
  90  */
  91
  92 /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
  93  * semantics.  See the comments in "open_namei" and "do_link" below.
  94  *
  95  * [10-Sep-98 Alan Modra] Another symlink change.
  96  */
  97
  98 /* In order to reduce some races, while at the same time doing additional
  99  * checking and hopefully speeding things up, we copy filenames to the
 100  * kernel data space before using them..
 101  *
 102  * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
 103  */
 104 static inline int do_getname(const char *filename, char *page)
 105 {
 106         int retval;
 107         unsigned long len = PAGE_SIZE;
 108
 109         if ((unsigned long) filename >= TASK_SIZE) {
 110                 if (!segment_eq(get_fs(), KERNEL_DS))
 111                         return -EFAULT;
 112         } else if (TASK_SIZE - (unsigned long) filename < PAGE_SIZE)
 113                 len = TASK_SIZE - (unsigned long) filename;
 114
 115         retval = strncpy_from_user((char *)page, filename, len);
 116         if (retval > 0) {
 117                 if (retval < len)
 118                         return 0;
 119                 return -ENAMETOOLONG;
 120         } else if (!retval)
 121                 retval = -ENOENT;
 122         return retval;
 123 }
 124
 125 char * getname(const char * filename)
 126 {
 127         char *tmp, *result;
 128
 129         result = ERR_PTR(-ENOMEM);
 130         tmp = __getname();
 131         if (tmp)  {
 132                 int retval = do_getname(filename, tmp);
 133
 134                 result = tmp;
 135                 if (retval < 0) {
 136                         putname(tmp);
 137                         result = ERR_PTR(retval);
 138                 }
 139         }
 140         return result;
 141 }
 142
 143 /*
 144  *      permission()
 145  *
 146  * is used to check for read/write/execute permissions on a file.
 147  * We use "fsuid" for this, letting us set arbitrary permissions
 148  * for filesystem access without changing the "normal" uids which
 149  * are used for other things..
 150  */
 151 int permission(struct inode * inode,int mask)
 152 {
 153         int mode = inode->i_mode;
 154
 155         if (inode->i_op && inode->i_op->permission)
 156                 return inode->i_op->permission(inode, mask);
 157         else if ((mask & S_IWOTH) && IS_RDONLY(inode) &&
 158                  (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
 159                 return -EROFS; /* Nobody gets write access to a read-only fs */
 160         else if ((mask & S_IWOTH) && IS_IMMUTABLE(inode))
 161                 return -EACCES; /* Nobody gets write access to an immutable file */
 162         else if (current->fsuid == inode->i_uid)
 163                 mode >>= 6;
 164         else if (in_group_p(inode->i_gid))
 165                 mode >>= 3;
 166         if (((mode & mask & S_IRWXO) == mask) || capable(CAP_DAC_OVERRIDE))
 167                 return 0;
 168         /* read and search access */
 169         if ((mask == S_IROTH) ||
 170             (S_ISDIR(mode)  && !(mask & ~(S_IROTH | S_IXOTH))))
 171                 if (capable(CAP_DAC_READ_SEARCH))
 172                         return 0;
 173         return -EACCES;
 174 }
 175
 176 /*
 177  * get_write_access() gets write permission for a file.
 178  * put_write_access() releases this write permission.
 179  * This is used for regular files.
 180  * We cannot support write (and maybe mmap read-write shared) accesses and
 181  * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
 182  * can have the following values:
 183  * 0: no writers, no VM_DENYWRITE mappings
 184  * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
 185  * > 0: (i_writecount) users are writing to the file.
 186  */
 187 int get_write_access(struct inode * inode)
 188 {
 189         if (inode->i_writecount < 0)
 190                 return -ETXTBSY;
 191         inode->i_writecount++;
 192         return 0;
 193 }
 194
 195 void put_write_access(struct inode * inode)
 196 {
 197         inode->i_writecount--;
 198 }
 199
 200 /*
 201  * "." and ".." are special - ".." especially so because it has to be able
 202  * to know about the current root directory and parent relationships
 203  */
 204 static struct dentry * reserved_lookup(struct dentry * parent, struct qstr * name)
 205 {
 206         struct dentry *result = NULL;
 207         if (name->name[0] == '.') {
 208                 switch (name->len) {
 209                 default:
 210                         break;
 211                 case 2:
 212                         if (name->name[1] != '.')
 213                                 break;
 214
 215                         if (parent != current->fs->root)
 216                                 parent = parent->d_covers->d_parent;
 217                         /* fallthrough */
 218                 case 1:
 219                         result = parent;
 220                 }
 221         }
 222         return dget(result);
 223 }
 224
 225 /*
 226  * Internal lookup() using the new generic dcache.
 227  */
 228 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name)
 229 {
 230         struct dentry * dentry = d_lookup(parent, name);
 231
 232         if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
 233                 if (!dentry->d_op->d_revalidate(dentry) && !d_invalidate(dentry)) {
 234                         dput(dentry);
 235                         dentry = NULL;
 236                 }
 237         }
 238         return dentry;
 239 }
 240
 241 /*
 242  * This is called when everything else fails, and we actually have
 243  * to go to the low-level filesystem to find out what we should do..
 244  *
 245  * We get the directory semaphore, and after getting that we also
 246  * make sure that nobody added the entry to the dcache in the meantime..
 247  */
 248 static struct dentry * real_lookup(struct dentry * parent, struct qstr * name)
 249 {
 250         struct dentry * result;
 251         struct inode *dir = parent->d_inode;
 252
 253         down(&dir->i_sem);
 254         /*
 255          * First re-do the cached lookup just in case it was created
 256          * while we waited for the directory semaphore..
 257          *
 258          * FIXME! This could use version numbering or similar to
 259          * avoid unnecessary cache lookups.
 260          */
 261         result = cached_lookup(parent, name);
 262         if (!result) {
 263                 struct dentry * dentry = d_alloc(parent, name);
 264                 result = ERR_PTR(-ENOMEM);
 265                 if (dentry) {
 266                         int error = dir->i_op->lookup(dir, dentry);
 267                         result = dentry;
 268                         if (error) {
 269                                 dput(dentry);
 270                                 result = ERR_PTR(error);
 271                         }
 272                 }
 273         }
 274         up(&dir->i_sem);
 275         return result;
 276 }
 277
 278 static struct dentry * do_follow_link(struct dentry *base, struct dentry *dentry, unsigned int follow)
 279 {
 280         struct inode * inode = dentry->d_inode;
 281
 282         if (inode && inode->i_op && inode->i_op->follow_link) {
 283                 if (current->link_count < 5) {
 284                         struct dentry * result;
 285
 286                         current->link_count++;
 287                         /* This eats the base */
 288                         result = inode->i_op->follow_link(dentry, base, follow);
 289                         current->link_count--;
 290                         dput(dentry);
 291                         return result;
 292                 }
 293                 dput(dentry);
 294                 dentry = ERR_PTR(-ELOOP);
 295         }
 296         dput(base);
 297         return dentry;
 298 }
 299
 300 static inline struct dentry * follow_mount(struct dentry * dentry)
 301 {
 302         struct dentry * mnt = dentry->d_mounts;
 303
 304         if (mnt != dentry) {
 305                 dget(mnt);
 306                 dput(dentry);
 307                 dentry = mnt;
 308         }
 309         return dentry;
 310 }
 311
 312 /*
 313  * Name resolution.
 314  *
 315  * This is the basic name resolution function, turning a pathname
 316  * into the final dentry.
 317  */
 318 struct dentry * lookup_dentry(const char * name, struct dentry * base, unsigned int lookup_flags)
 319 {
 320         struct dentry * dentry;
 321         struct inode *inode;
 322
 323         if (*name == '/') {
 324                 if (base)
 325                         dput(base);
 326                 do {
 327                         name++;
 328                 } while (*name == '/');
 329                 __prefix_lookup_dentry(name, lookup_flags);
 330                 base = dget(current->fs->root);
 331         } else if (!base) {
 332                 base = dget(current->fs->pwd);
 333         }
 334
 335         if (!*name)
 336                 goto return_base;
 337
 338         inode = base->d_inode;
 339         lookup_flags &= LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_SLASHOK;
 340
 341         /* At this point we know we have a real path component. */
 342         for(;;) {
 343                 int err;
 344                 unsigned long hash;
 345                 struct qstr this;
 346                 unsigned int flags;
 347                 unsigned int c;
 348
 349                 err = permission(inode, MAY_EXEC);
 350                 dentry = ERR_PTR(err);
 351                 if (err)
 352                         break;
 353
 354                 this.name = name;
 355                 c = *(const unsigned char *)name;
 356
 357                 hash = init_name_hash();
 358                 do {
 359                         name++;
 360                         hash = partial_name_hash(c, hash);
 361                         c = *(const unsigned char *)name;
 362                 } while (c && (c != '/'));
 363                 this.len = name - (const char *) this.name;
 364                 this.hash = end_name_hash(hash);
 365
 366                 /* remove trailing slashes? */
 367                 flags = lookup_flags;
 368                 if (c) {
 369                         char tmp;
 370
 371                         flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
 372                         do {
 373                                 tmp = *++name;
 374                         } while (tmp == '/');
 375                         if (tmp)
 376                                 flags |= LOOKUP_CONTINUE;
 377                 }
 378
 379                 /*
 380                  * See if the low-level filesystem might want
 381                  * to use its own hash..
 382                  */
 383                 if (base->d_op && base->d_op->d_hash) {
 384                         int error;
 385                         error = base->d_op->d_hash(base, &this);
 386                         if (error < 0) {
 387                                 dentry = ERR_PTR(error);
 388                                 break;
 389                         }
 390                 }
 391
 392                 /* This does the actual lookups.. */
 393                 dentry = reserved_lookup(base, &this);
 394                 if (!dentry) {
 395                         dentry = cached_lookup(base, &this);
 396                         if (!dentry) {
 397                                 dentry = real_lookup(base, &this);
 398                                 if (IS_ERR(dentry))
 399                                         break;
 400                         }
 401                 }
 402
 403                 /* Check mountpoints.. */
 404                 dentry = follow_mount(dentry);
 405
 406                 if (!(flags & LOOKUP_FOLLOW))
 407                         break;
 408
 409                 base = do_follow_link(base, dentry, flags);
 410                 if (IS_ERR(base))
 411                         goto return_base;
 412
 413                 inode = base->d_inode;
 414                 if (flags & LOOKUP_DIRECTORY) {
 415                         if (!inode)
 416                                 goto no_inode;
 417                         dentry = ERR_PTR(-ENOTDIR);
 418                         if (!inode->i_op || !inode->i_op->lookup)
 419                                 break;
 420                         if (flags & LOOKUP_CONTINUE)
 421                                 continue;
 422                 }
 423 return_base:
 424                 return base;
 425 /*
 426  * The case of a nonexisting file is special.
 427  *
 428  * In the middle of a pathname lookup (ie when
 429  * LOOKUP_CONTINUE is set), it's an obvious
 430  * error and returns ENOENT.
 431  *
 432  * At the end of a pathname lookup it's legal,
 433  * and we return a negative dentry. However, we
 434  * get here only if there were trailing slashes,
 435  * which is legal only if we know it's supposed
 436  * to be a directory (ie "mkdir"). Thus the
 437  * LOOKUP_SLASHOK flag.
 438  */
 439 no_inode:
 440                 dentry = ERR_PTR(-ENOENT);
 441                 if (flags & LOOKUP_CONTINUE)
 442                         break;
 443                 if (flags & LOOKUP_SLASHOK)
 444                         goto return_base;
 445                 break;
 446         }
 447         dput(base);
 448         return dentry;
 449 }
 450
 451 /*
 452  *      namei()
 453  *
 454  * is used by most simple commands to get the inode of a specified name.
 455  * Open, link etc use their own routines, but this is enough for things
 456  * like 'chmod' etc.
 457  *
 458  * namei exists in two versions: namei/lnamei. The only difference is
 459  * that namei follows links, while lnamei does not.
 460  */
 461 struct dentry * __namei(const char *pathname, unsigned int lookup_flags)
 462 {
 463         char *name;
 464         struct dentry *dentry;
 465
 466         name = getname(pathname);
 467         dentry = (struct dentry *) name;
 468         if (!IS_ERR(name)) {
 469                 dentry = lookup_dentry(name, NULL, lookup_flags);
 470                 putname(name);
 471                 if (!IS_ERR(dentry)) {
 472                         if (!dentry->d_inode) {
 473                                 dput(dentry);
 474                                 dentry = ERR_PTR(-ENOENT);
 475                         }
 476                 }
 477         }
 478         return dentry;
 479 }
 480
 481 /*
 482  * It's inline, so penalty for filesystems that don't use sticky bit is
 483  * minimal.
 484  */
 485 static inline int check_sticky(struct inode *dir, struct inode *inode)
 486 {
 487         if (!(dir->i_mode & S_ISVTX))
 488                 return 0;
 489         if (inode->i_uid == current->fsuid)
 490                 return 0;
 491         if (dir->i_uid == current->fsuid)
 492                 return 0;
 493         return !capable(CAP_FOWNER);
 494 }
 495
 496 /*
 497  *      Check whether we can remove a link victim from directory dir, check
 498  *  whether the type of victim is right.
 499  *  1. We can't do it if dir is read-only (done in permission())
 500  *  2. We should have write and exec permissions on dir
 501  *  3. We can't remove anything from append-only dir
 502  *  4. We can't do anything with immutable dir (done in permission())
 503  *  5. If the sticky bit on dir is set we should either
 504  *      a. be owner of dir, or
 505  *      b. be owner of victim, or
 506  *      c. have CAP_FOWNER capability
 507  *  6. If the victim is append-only or immutable we can't do antyhing with
 508  *     links pointing to it.
 509  *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
 510  *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
 511  *  9. We can't remove a root or mountpoint.
 512  */
 513 static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
 514 {
 515         int error;
 516         if (!victim->d_inode || victim->d_parent->d_inode != dir)
 517                 return -ENOENT;
 518         error = permission(dir,MAY_WRITE | MAY_EXEC);
 519         if (error)
 520                 return error;
 521         if (IS_APPEND(dir))
 522                 return -EPERM;
 523         if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
 524             IS_IMMUTABLE(victim->d_inode))
 525                 return -EPERM;
 526         if (isdir) {
 527                 if (!S_ISDIR(victim->d_inode->i_mode))
 528                         return -ENOTDIR;
 529                 if (IS_ROOT(victim))
 530                         return -EBUSY;
 531                 if (victim->d_mounts != victim->d_covers)
 532                         return -EBUSY;
 533         } else if (S_ISDIR(victim->d_inode->i_mode))
 534                 return -EISDIR;
 535         return 0;
 536 }
 537
 538 /*      Check whether we can create an object with dentry child in directory
 539  *  dir.
 540  *  1. We can't do it if child already exists (open has special treatment for
 541  *     this case, but since we are inlined it's OK)
 542  *  2. We can't do it if dir is read-only (done in permission())
 543  *  3. We should have write and exec permissions on dir
 544  *  4. We can't do it if dir is immutable (done in permission())
 545  */
 546 static inline int may_create(struct inode *dir, struct dentry *child) {
 547         if (child->d_inode)
 548                 return -EEXIST;
 549         return permission(dir,MAY_WRITE | MAY_EXEC);
 550 }
 551
 552 static inline struct dentry *get_parent(struct dentry *dentry)
 553 {
 554         return dget(dentry->d_parent);
 555 }
 556
 557 static inline void unlock_dir(struct dentry *dir)
 558 {
 559         up(&dir->d_inode->i_sem);
 560         dput(dir);
 561 }
 562
 563 /*
 564  * We need to do a check-parent every time
 565  * after we have locked the parent - to verify
 566  * that the parent is still our parent and
 567  * that we are still hashed onto it..
 568  *
 569  * This is requied in case two processes race
 570  * on removing (or moving) the same entry: the
 571  * parent lock will serialize them, but the
 572  * other process will be too late..
 573  */
 574 #define check_parent(dir, dentry) \
 575         ((dir) == (dentry)->d_parent && !list_empty(&dentry->d_hash))
 576
 577 /*
 578  * Locking the parent is needed to:
 579  *  - serialize directory operations
 580  *  - make sure the parent doesn't change from
 581  *    under us in the middle of an operation.
 582  *
 583  * NOTE! Right now we'd rather use a "struct inode"
 584  * for this, but as I expect things to move toward
 585  * using dentries instead for most things it is
 586  * probably better to start with the conceptually
 587  * better interface of relying on a path of dentries.
 588  */
 589 static inline struct dentry *lock_parent(struct dentry *dentry)
 590 {
 591         struct dentry *dir = dget(dentry->d_parent);
 592
 593         down(&dir->d_inode->i_sem);
 594         return dir;
 595 }
 596
 597 /*
 598  * Whee.. Deadlock country. Happily there are only two VFS
 599  * operations that do this..
 600  */
 601 static inline void double_lock(struct dentry *d1, struct dentry *d2)
 602 {
 603         struct semaphore *s1 = &d1->d_inode->i_sem;
 604         struct semaphore *s2 = &d2->d_inode->i_sem;
 605
 606         if (s1 != s2) {
 607                 if ((unsigned long) s1 < (unsigned long) s2) {
 608                         struct semaphore *tmp = s2;
 609                         s2 = s1; s1 = tmp;
 610                 }
 611                 down(s1);
 612         }
 613         down(s2);
 614 }
 615
 616 static inline void double_unlock(struct dentry *d1, struct dentry *d2)
 617 {
 618         struct semaphore *s1 = &d1->d_inode->i_sem;
 619         struct semaphore *s2 = &d2->d_inode->i_sem;
 620
 621         up(s1);
 622         if (s1 != s2)
 623                 up(s2);
 624         dput(d1);
 625         dput(d2);
 626 }
 627
 628
 629 /*
 630  * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security
 631  * reasons.
 632  *
 633  * O_DIRECTORY translates into forcing a directory lookup.
 634  */
 635 static inline int lookup_flags(unsigned int f)
 636 {
 637         unsigned long retval = LOOKUP_FOLLOW;
 638
 639         if (f & O_NOFOLLOW)
 640                 retval &= ~LOOKUP_FOLLOW;
 641
 642         if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
 643                 retval &= ~LOOKUP_FOLLOW;
 644
 645         if (f & O_DIRECTORY)
 646                 retval |= LOOKUP_DIRECTORY;
 647
 648         return retval;
 649 }
 650
 651 /*
 652  *      open_namei()
 653  *
 654  * namei for open - this is in fact almost the whole open-routine.
 655  *
 656  * Note that the low bits of "flag" aren't the same as in the open
 657  * system call - they are 00 - no permissions needed
 658  *                        01 - read permission needed
 659  *                        10 - write permission needed
 660  *                        11 - read/write permissions needed
 661  * which is a lot more logical, and also allows the "no perm" needed
 662  * for symlinks (where the permissions are checked later).
 663  */
 664 struct dentry * open_namei(const char * pathname, int flag, int mode)
 665 {
 666         int acc_mode, error;
 667         struct inode *inode;
 668         struct dentry *dentry;
 669
 670         mode &= S_IALLUGO & ~current->fs->umask;
 671         mode |= S_IFREG;
 672
 673         dentry = lookup_dentry(pathname, NULL, lookup_flags(flag));
 674         if (IS_ERR(dentry))
 675                 return dentry;
 676
 677         acc_mode = ACC_MODE(flag);
 678         if (flag & O_CREAT) {
 679                 struct dentry *dir;
 680
 681                 if (dentry->d_inode) {
 682                         if (!(flag & O_EXCL))
 683                                 goto nocreate;
 684                         error = -EEXIST;
 685                         goto exit;
 686                 }
 687
 688                 dir = lock_parent(dentry);
 689                 if (!check_parent(dir, dentry)) {
 690                         /*
 691                          * Really nasty race happened. What's the
 692                          * right error code? We had a dentry, but
 693                          * before we could use it it was removed
 694                          * by somebody else. We could just re-try
 695                          * everything, I guess.
 696                          *
 697                          * ENOENT is definitely wrong.
 698                          */
 699                         error = -ENOENT;
 700                         unlock_dir(dir);
 701                         goto exit;
 702                 }
 703
 704                 /*
 705                  * Somebody might have created the file while we
 706                  * waited for the directory lock.. So we have to
 707                  * re-do the existence test.
 708                  */
 709                 if (dentry->d_inode) {
 710                         error = 0;
 711                         if (flag & O_EXCL)
 712                                 error = -EEXIST;
 713                 } else if ((error = may_create(dir->d_inode, dentry)) == 0) {
 714                         if (!dir->d_inode->i_op || !dir->d_inode->i_op->create)
 715                                 error = -EACCES;
 716                         else {
 717                                 DQUOT_INIT(dir->d_inode);
 718                                 error = dir->d_inode->i_op->create(dir->d_inode, dentry, mode);
 719                                 /* Don't check for write permission, don't truncate */
 720                                 acc_mode = 0;
 721                                 flag &= ~O_TRUNC;
 722                         }
 723                 }
 724                 unlock_dir(dir);
 725                 if (error)
 726                         goto exit;
 727         }
 728
 729 nocreate:
 730         error = -ENOENT;
 731         inode = dentry->d_inode;
 732         if (!inode)
 733                 goto exit;
 734
 735         error = -ELOOP;
 736         if (S_ISLNK(inode->i_mode))
 737                 goto exit;
 738
 739         error = -EISDIR;
 740         if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
 741                 goto exit;
 742
 743         error = permission(inode,acc_mode);
 744         if (error)
 745                 goto exit;
 746
 747         /*
 748          * FIFO's, sockets and device files are special: they don't
 749          * actually live on the filesystem itself, and as such you
 750          * can write to them even if the filesystem is read-only.
 751          */
 752         if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
 753                 flag &= ~O_TRUNC;
 754         } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
 755                 error = -EACCES;
 756                 if (IS_NODEV(inode))
 757                         goto exit;
 758
 759                 flag &= ~O_TRUNC;
 760         } else {
 761                 error = -EROFS;
 762                 if (IS_RDONLY(inode) && (flag & 2))
 763                         goto exit;
 764         }
 765         /*
 766          * An append-only file must be opened in append mode for writing.
 767          */
 768         error = -EPERM;
 769         if (IS_APPEND(inode)) {
 770                 if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))
 771                         goto exit;
 772                 if (flag & O_TRUNC)
 773                         goto exit;
 774         }
 775
 776         if (flag & O_TRUNC) {
 777                 error = get_write_access(inode);
 778                 if (error)
 779                         goto exit;
 780
 781                 /*
 782                  * Refuse to truncate files with mandatory locks held on them.
 783                  */
 784                 error = locks_verify_locked(inode);
 785                 if (!error) {
 786                         DQUOT_INIT(inode);
 787
 788                         error = do_truncate(dentry, 0);
 789                 }
 790                 put_write_access(inode);
 791                 if (error)
 792                         goto exit;
 793         } else
 794                 if (flag & FMODE_WRITE)
 795                         DQUOT_INIT(inode);
 796
 797         return dentry;
 798
 799 exit:
 800         dput(dentry);
 801         return ERR_PTR(error);
 802 }
 803
 804 struct dentry * do_mknod(const char * filename, int mode, dev_t dev)
 805 {
 806         int error;
 807         struct dentry *dir;
 808         struct dentry *dentry, *retval;
 809
 810         mode &= ~current->fs->umask;
 811         dentry = lookup_dentry(filename, NULL, LOOKUP_FOLLOW);
 812         if (IS_ERR(dentry))
 813                 return dentry;
 814
 815         dir = lock_parent(dentry);
 816         error = -ENOENT;
 817         if (!check_parent(dir, dentry))
 818                 goto exit_lock;
 819
 820         error = may_create(dir->d_inode, dentry);
 821         if (error)
 822                 goto exit_lock;
 823
 824         error = -EPERM;
 825         if (!dir->d_inode->i_op || !dir->d_inode->i_op->mknod)
 826                 goto exit_lock;
 827
 828         DQUOT_INIT(dir->d_inode);
 829         error = dir->d_inode->i_op->mknod(dir->d_inode, dentry, mode, dev);
 830 exit_lock:
 831         retval = ERR_PTR(error);
 832         if (!error)
 833                 retval = dget(dentry);
 834         unlock_dir(dir);
 835         dput(dentry);
 836         return retval;
 837 }
 838
 839 asmlinkage int sys_mknod(const char * filename, int mode, dev_t dev)
 840 {
 841         int error;
 842         char * tmp;
 843
 844         lock_kernel();
 845         error = -EPERM;
 846         if (S_ISDIR(mode) || (!S_ISFIFO(mode) && !capable(CAP_SYS_ADMIN)))
 847                 goto out;
 848         error = -EINVAL;
 849         switch (mode & S_IFMT) {
 850         case 0:
 851                 mode |= S_IFREG;
 852                 break;
 853         case S_IFREG: case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
 854                 break;
 855         default:
 856                 goto out;
 857         }
 858         tmp = getname(filename);
 859         error = PTR_ERR(tmp);
 860         if (!IS_ERR(tmp)) {
 861                 struct dentry * dentry = do_mknod(tmp,mode,dev);
 862                 putname(tmp);
 863                 error = PTR_ERR(dentry);
 864                 if (!IS_ERR(dentry)) {
 865                         dput(dentry);
 866                         error = 0;
 867                 }
 868         }
 869 out:
 870         unlock_kernel();
 871         return error;
 872 }
 873
 874 /*
 875  * Look out: this function may change a normal dentry
 876  * into a directory dentry (different size)..
 877  */
 878 static inline int do_mkdir(const char * pathname, int mode)
 879 {
 880         int error;
 881         struct dentry *dir;
 882         struct dentry *dentry;
 883
 884         dentry = lookup_dentry(pathname, NULL, LOOKUP_SLASHOK);
 885         error = PTR_ERR(dentry);
 886         if (IS_ERR(dentry))
 887                 goto exit;
 888
 889         /*
 890          * EEXIST is kind of a strange error code to
 891          * return, but basically if the dentry was moved
 892          * or unlinked while we locked the parent, we
 893          * do know that it _did_ exist before, and as
 894          * such it makes perfect sense.. In contrast,
 895          * ENOENT doesn't make sense for mkdir.
 896          */
 897         dir = lock_parent(dentry);
 898         error = -EEXIST;
 899         if (!check_parent(dir, dentry))
 900                 goto exit_lock;
 901
 902         error = may_create(dir->d_inode, dentry);
 903         if (error)
 904                 goto exit_lock;
 905
 906         error = -EPERM;
 907         if (!dir->d_inode->i_op || !dir->d_inode->i_op->mkdir)
 908                 goto exit_lock;
 909
 910         DQUOT_INIT(dir->d_inode);
 911         mode &= 0777 & ~current->fs->umask;
 912         error = dir->d_inode->i_op->mkdir(dir->d_inode, dentry, mode);
 913
 914 exit_lock:
 915         unlock_dir(dir);
 916         dput(dentry);
 917 exit:
 918         return error;
 919 }
 920
 921 asmlinkage int sys_mkdir(const char * pathname, int mode)
 922 {
 923         int error;
 924         char * tmp;
 925
 926         lock_kernel();
 927         tmp = getname(pathname);
 928         error = PTR_ERR(tmp);
 929         if (!IS_ERR(tmp)) {
 930                 error = do_mkdir(tmp,mode);
 931                 putname(tmp);
 932         }
 933         unlock_kernel();
 934         return error;
 935 }
 936
 937 int vfs_rmdir(struct inode *dir, struct dentry *dentry)
 938 {
 939         int error;
 940
 941         error = may_delete(dir, dentry, 1);
 942         if (error)
 943                 return error;
 944
 945         if (!dir->i_op || !dir->i_op->rmdir)
 946                 return -EPERM;
 947
 948         DQUOT_INIT(dir);
 949
 950         /*
 951          * We try to drop the dentry early: we should have
 952          * a usage count of 2 if we're the only user of this
 953          * dentry, and if that is true (possibly after pruning
 954          * the dcache), then we drop the dentry now.
 955          *
 956          * A low-level filesystem can, if it choses, legally
 957          * do a
 958          *
 959          *      if (!list_empty(&dentry->d_hash))
 960          *              return -EBUSY;
 961          *
 962          * if it cannot handle the case of removing a directory
 963          * that is still in use by something else..
 964          */
 965         switch (dentry->d_count) {
 966         default:
 967                 shrink_dcache_parent(dentry);
 968                 if (dentry->d_count != 2)
 969                         break;
 970         case 2:
 971                 d_drop(dentry);
 972         }
 973
 974         error = dir->i_op->rmdir(dir, dentry);
 975
 976         return error;
 977 }
 978
 979 static inline int do_rmdir(const char * name)
 980 {
 981         int error;
 982         struct dentry *dir;
 983         struct dentry *dentry;
 984
 985         dentry = lookup_dentry(name, NULL, 0);
 986         error = PTR_ERR(dentry);
 987         if (IS_ERR(dentry))
 988                 goto exit;
 989
 990         error = -ENOENT;
 991         if (!dentry->d_inode)
 992                 goto exit_dput;
 993
 994         dir = dget(dentry->d_parent);
 995
 996         /*
 997          * The dentry->d_count stuff confuses d_delete() enough to
 998          * not kill the inode from under us while it is locked. This
 999          * wouldn't be needed, except the dentry semaphore is really
1000          * in the inode, not in the dentry..
1001          */
1002         dentry->d_count++;
1003         double_lock(dir, dentry);
1004
1005         error = -ENOENT;
1006         if (check_parent(dir, dentry))
1007                 error = vfs_rmdir(dir->d_inode, dentry);
1008
1009         double_unlock(dentry, dir);
1010 exit_dput:
1011         dput(dentry);
1012 exit:
1013         return error;
1014 }
1015
1016 asmlinkage int sys_rmdir(const char * pathname)
1017 {
1018         int error;
1019         char * tmp;
1020
1021         lock_kernel();
1022         tmp = getname(pathname);
1023         error = PTR_ERR(tmp);
1024         if (!IS_ERR(tmp)) {
1025                 error = do_rmdir(tmp);
1026                 putname(tmp);
1027         }
1028         unlock_kernel();
1029         return error;
1030 }
1031
1032 int vfs_unlink(struct inode *dir, struct dentry *dentry)
1033 {
1034         int error;
1035
1036         error = may_delete(dir, dentry, 0);
1037         if (error)
1038                 goto exit_lock;
1039
1040         if (!dir->i_op || !dir->i_op->unlink)
1041                 goto exit_lock;
1042
1043         DQUOT_INIT(dir);
1044
1045         error = dir->i_op->unlink(dir, dentry);
1046
1047 exit_lock:
1048         return error;
1049 }
1050
1051 static inline int do_unlink(const char * name)
1052 {
1053         int error;
1054         struct dentry *dir;
1055         struct dentry *dentry;
1056
1057         dentry = lookup_dentry(name, NULL, 0);
1058         error = PTR_ERR(dentry);
1059         if (IS_ERR(dentry))
1060                 goto exit;
1061
1062         dir = lock_parent(dentry);
1063         error = -ENOENT;
1064         if (check_parent(dir, dentry))
1065                 error = vfs_unlink(dir->d_inode, dentry);
1066
1067         unlock_dir(dir);
1068         dput(dentry);
1069 exit:
1070         return error;
1071 }
1072
1073 asmlinkage int sys_unlink(const char * pathname)
1074 {
1075         int error;
1076         char * tmp;
1077
1078         lock_kernel();
1079         tmp = getname(pathname);
1080         error = PTR_ERR(tmp);
1081         if (!IS_ERR(tmp)) {
1082                 error = do_unlink(tmp);
1083                 putname(tmp);
1084         }
1085         unlock_kernel();
1086         return error;
1087 }
1088
1089 static inline int do_symlink(const char * oldname, const char * newname)
1090 {
1091         int error;
1092         struct dentry *dir;
1093         struct dentry *dentry;
1094
1095         dentry = lookup_dentry(newname, NULL, 0);
1096
1097         error = PTR_ERR(dentry);
1098         if (IS_ERR(dentry))
1099                 goto exit;
1100
1101         dir = lock_parent(dentry);
1102         error = -ENOENT;
1103         if (!check_parent(dir, dentry))
1104                 goto exit_lock;
1105
1106         error = may_create(dir->d_inode, dentry);
1107         if (error)
1108                 goto exit_lock;
1109
1110         error = -EPERM;
1111         if (!dir->d_inode->i_op || !dir->d_inode->i_op->symlink)
1112                 goto exit_lock;
1113
1114         DQUOT_INIT(dir->d_inode);
1115         error = dir->d_inode->i_op->symlink(dir->d_inode, dentry, oldname);
1116
1117 exit_lock:
1118         unlock_dir(dir);
1119         dput(dentry);
1120 exit:
1121         return error;
1122 }
1123
1124 asmlinkage int sys_symlink(const char * oldname, const char * newname)
1125 {
1126         int error;
1127         char * from;
1128
1129         lock_kernel();
1130         from = getname(oldname);
1131         error = PTR_ERR(from);
1132         if (!IS_ERR(from)) {
1133                 char * to;
1134                 to = getname(newname);
1135                 error = PTR_ERR(to);
1136                 if (!IS_ERR(to)) {
1137                         error = do_symlink(from,to);
1138                         putname(to);
1139                 }
1140                 putname(from);
1141         }
1142         unlock_kernel();
1143         return error;
1144 }
1145
1146 static inline int do_link(const char * oldname, const char * newname)
1147 {
1148         struct dentry *old_dentry, *new_dentry, *dir;
1149         struct inode *inode;
1150         int error;
1151
1152         /*
1153          * Hardlinks are often used in delicate situations.  We avoid
1154          * security-related surprises by not following symlinks on the
1155          * newname.  --KAB
1156          *
1157          * We don't follow them on the oldname either to be compatible
1158          * with linux 2.0, and to avoid hard-linking to directories
1159          * and other special files.  --ADM
1160          */
1161         old_dentry = lookup_dentry(oldname, NULL, 0);
1162         error = PTR_ERR(old_dentry);
1163         if (IS_ERR(old_dentry))
1164                 goto exit;
1165
1166         new_dentry = lookup_dentry(newname, NULL, 0);
1167         error = PTR_ERR(new_dentry);
1168         if (IS_ERR(new_dentry))
1169                 goto exit_old;
1170
1171         dir = lock_parent(new_dentry);
1172         error = -ENOENT;
1173         if (!check_parent(dir, new_dentry))
1174                 goto exit_lock;
1175
1176         error = -ENOENT;
1177         inode = old_dentry->d_inode;
1178         if (!inode)
1179                 goto exit_lock;
1180
1181         error = may_create(dir->d_inode, new_dentry);
1182         if (error)
1183                 goto exit_lock;
1184
1185         error = -EXDEV;
1186         if (dir->d_inode->i_dev != inode->i_dev)
1187                 goto exit_lock;
1188
1189         /*
1190          * A link to an append-only or immutable file cannot be created.
1191          */
1192         error = -EPERM;
1193         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1194                 goto exit_lock;
1195
1196         error = -EPERM;
1197         if (!dir->d_inode->i_op || !dir->d_inode->i_op->link)
1198                 goto exit_lock;
1199
1200         DQUOT_INIT(dir->d_inode);
1201         error = dir->d_inode->i_op->link(old_dentry, dir->d_inode, new_dentry);
1202
1203 exit_lock:
1204         unlock_dir(dir);
1205         dput(new_dentry);
1206 exit_old:
1207         dput(old_dentry);
1208 exit:
1209         return error;
1210 }
1211
1212 asmlinkage int sys_link(const char * oldname, const char * newname)
1213 {
1214         int error;
1215         char * from;
1216
1217         lock_kernel();
1218         from = getname(oldname);
1219         error = PTR_ERR(from);
1220         if (!IS_ERR(from)) {
1221                 char * to;
1222                 to = getname(newname);
1223                 error = PTR_ERR(to);
1224                 if (!IS_ERR(to)) {
1225                         error = do_link(from,to);
1226                         putname(to);
1227                 }
1228                 putname(from);
1229         }
1230         unlock_kernel();
1231         return error;
1232 }
1233
1234 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1235                struct inode *new_dir, struct dentry *new_dentry)
1236 {
1237         int error;
1238         int isdir;
1239
1240         isdir = S_ISDIR(old_dentry->d_inode->i_mode);
1241
1242         error = may_delete(old_dir, old_dentry, isdir); /* XXX */
1243         if (error)
1244                 return error;
1245
1246         if (new_dir->i_dev != old_dir->i_dev)
1247                 return -EXDEV;
1248
1249         if (!new_dentry->d_inode)
1250                 error = may_create(new_dir, new_dentry);
1251         else
1252                 error = may_delete(new_dir, new_dentry, isdir);
1253         if (error)
1254                 return error;
1255
1256         if (!old_dir->i_op || !old_dir->i_op->rename)
1257                 return -EPERM;
1258
1259         DQUOT_INIT(old_dir);
1260         DQUOT_INIT(new_dir);
1261         error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1262
1263         return error;
1264 }
1265
1266 static inline int do_rename(const char * oldname, const char * newname)
1267 {
1268         int error;
1269         struct dentry * old_dir, * new_dir;
1270         struct dentry * old_dentry, *new_dentry;
1271
1272         old_dentry = lookup_dentry(oldname, NULL, 0);
1273
1274         error = PTR_ERR(old_dentry);
1275         if (IS_ERR(old_dentry))
1276                 goto exit;
1277
1278         error = -ENOENT;
1279         if (!old_dentry->d_inode)
1280                 goto exit_old;
1281
1282         {
1283                 unsigned int flags = 0;
1284                 if (S_ISDIR(old_dentry->d_inode->i_mode))
1285                         flags = LOOKUP_SLASHOK;
1286                 new_dentry = lookup_dentry(newname, NULL, flags);
1287         }
1288
1289         error = PTR_ERR(new_dentry);
1290         if (IS_ERR(new_dentry))
1291                 goto exit_old;
1292
1293         new_dir = get_parent(new_dentry);
1294         old_dir = get_parent(old_dentry);
1295
1296         double_lock(new_dir, old_dir);
1297
1298         error = -ENOENT;
1299         if (check_parent(old_dir, old_dentry) && check_parent(new_dir, new_dentry))
1300                 error = vfs_rename(old_dir->d_inode, old_dentry,
1301                                    new_dir->d_inode, new_dentry);
1302
1303         double_unlock(new_dir, old_dir);
1304         dput(new_dentry);
1305 exit_old:
1306         dput(old_dentry);
1307 exit:
1308         return error;
1309 }
1310
1311 asmlinkage int sys_rename(const char * oldname, const char * newname)
1312 {
1313         int error;
1314         char * from;
1315
1316         lock_kernel();
1317         from = getname(oldname);
1318         error = PTR_ERR(from);
1319         if (!IS_ERR(from)) {
1320                 char * to;
1321                 to = getname(newname);
1322                 error = PTR_ERR(to);
1323                 if (!IS_ERR(to)) {
1324                         error = do_rename(from,to);
1325                         putname(to);
1326                 }
1327                 putname(from);
1328         }
1329         unlock_kernel();
1330         return error;
1331 }