fs/namei.c

   1 /*
   2  *  linux/fs/namei.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 /*
   8  * Some corrections by tytso.
   9  */
  10
  11 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
  12  * lookup logic.
  13  */
  14 /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
  15  */
  16
  17 #include <linux/mm.h>
  18 #include <linux/proc_fs.h>
  19 #include <linux/smp_lock.h>
  20 #include <linux/quotaops.h>
  21 #include <linux/pagemap.h>
  22 #include <linux/dcache.h>
  23
  24 #include <asm/uaccess.h>
  25 #include <asm/unaligned.h>
  26 #include <asm/semaphore.h>
  27 #include <asm/page.h>
  28 #include <asm/pgtable.h>
  29
  30 #include <asm/namei.h>
  31
  32 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
  33
  34 /* [Feb-1997 T. Schoebel-Theuer]
  35  * Fundamental changes in the pathname lookup mechanisms (namei)
  36  * were necessary because of omirr.  The reason is that omirr needs
  37  * to know the _real_ pathname, not the user-supplied one, in case
  38  * of symlinks (and also when transname replacements occur).
  39  *
  40  * The new code replaces the old recursive symlink resolution with
  41  * an iterative one (in case of non-nested symlink chains).  It does
  42  * this with calls to <fs>_follow_link().
  43  * As a side effect, dir_namei(), _namei() and follow_link() are now
  44  * replaced with a single function lookup_dentry() that can handle all
  45  * the special cases of the former code.
  46  *
  47  * With the new dcache, the pathname is stored at each inode, at least as
  48  * long as the refcount of the inode is positive.  As a side effect, the
  49  * size of the dcache depends on the inode cache and thus is dynamic.
  50  *
  51  * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
  52  * resolution to correspond with current state of the code.
  53  *
  54  * Note that the symlink resolution is not *completely* iterative.
  55  * There is still a significant amount of tail- and mid- recursion in
  56  * the algorithm.  Also, note that <fs>_readlink() is not used in
  57  * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
  58  * may return different results than <fs>_follow_link().  Many virtual
  59  * filesystems (including /proc) exhibit this behavior.
  60  */
  61
  62 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
  63  * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
  64  * and the name already exists in form of a symlink, try to create the new
  65  * name indicated by the symlink. The old code always complained that the
  66  * name already exists, due to not following the symlink even if its target
  67  * is nonexistent.  The new semantics affects also mknod() and link() when
  68  * the name is a symlink pointing to a non-existant name.
  69  *
  70  * I don't know which semantics is the right one, since I have no access
  71  * to standards. But I found by trial that HP-UX 9.0 has the full "new"
  72  * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
  73  * "old" one. Personally, I think the new semantics is much more logical.
  74  * Note that "ln old new" where "new" is a symlink pointing to a non-existing
  75  * file does succeed in both HP-UX and SunOs, but not in Solaris
  76  * and in the old Linux semantics.
  77  */
  78
  79 /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
  80  * semantics.  See the comments in "open_namei" and "do_link" below.
  81  *
  82  * [10-Sep-98 Alan Modra] Another symlink change.
  83  */
  84
  85 /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
  86  *      inside the path - always follow.
  87  *      in the last component in creation/removal/renaming - never follow.
  88  *      if LOOKUP_FOLLOW passed - follow.
  89  *      if the pathname has trailing slashes - follow.
  90  *      otherwise - don't follow.
  91  * (applied in that order).
  92  */
  93
  94 /* In order to reduce some races, while at the same time doing additional
  95  * checking and hopefully speeding things up, we copy filenames to the
  96  * kernel data space before using them..
  97  *
  98  * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
  99  */
 100 static inline int do_getname(const char *filename, char *page)
 101 {
 102         int retval;
 103         unsigned long len = PAGE_SIZE;
 104
 105         if ((unsigned long) filename >= TASK_SIZE) {
 106                 if (!segment_eq(get_fs(), KERNEL_DS))
 107                         return -EFAULT;
 108         } else if (TASK_SIZE - (unsigned long) filename < PAGE_SIZE)
 109                 len = TASK_SIZE - (unsigned long) filename;
 110
 111         retval = strncpy_from_user((char *)page, filename, len);
 112         if (retval > 0) {
 113                 if (retval < len)
 114                         return 0;
 115                 return -ENAMETOOLONG;
 116         } else if (!retval)
 117                 retval = -ENOENT;
 118         return retval;
 119 }
 120
 121 char * getname(const char * filename)
 122 {
 123         char *tmp, *result;
 124
 125         result = ERR_PTR(-ENOMEM);
 126         tmp = __getname();
 127         if (tmp)  {
 128                 int retval = do_getname(filename, tmp);
 129
 130                 result = tmp;
 131                 if (retval < 0) {
 132                         putname(tmp);
 133                         result = ERR_PTR(retval);
 134                 }
 135         }
 136         return result;
 137 }
 138
 139 /*
 140  *      permission()
 141  *
 142  * is used to check for read/write/execute permissions on a file.
 143  * We use "fsuid" for this, letting us set arbitrary permissions
 144  * for filesystem access without changing the "normal" uids which
 145  * are used for other things..
 146  */
 147 int permission(struct inode * inode,int mask)
 148 {
 149         int mode = inode->i_mode;
 150
 151         if (inode->i_op && inode->i_op->permission)
 152                 return inode->i_op->permission(inode, mask);
 153         else if ((mask & S_IWOTH) && IS_RDONLY(inode) &&
 154                  (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
 155                 return -EROFS; /* Nobody gets write access to a read-only fs */
 156         else if ((mask & S_IWOTH) && IS_IMMUTABLE(inode))
 157                 return -EACCES; /* Nobody gets write access to an immutable file */
 158         else if (current->fsuid == inode->i_uid)
 159                 mode >>= 6;
 160         else if (in_group_p(inode->i_gid))
 161                 mode >>= 3;
 162         if (((mode & mask & S_IRWXO) == mask) || capable(CAP_DAC_OVERRIDE))
 163                 return 0;
 164         /* read and search access */
 165         if ((mask == S_IROTH) ||
 166             (S_ISDIR(mode)  && !(mask & ~(S_IROTH | S_IXOTH))))
 167                 if (capable(CAP_DAC_READ_SEARCH))
 168                         return 0;
 169         return -EACCES;
 170 }
 171
 172 /*
 173  * get_write_access() gets write permission for a file.
 174  * put_write_access() releases this write permission.
 175  * This is used for regular files.
 176  * We cannot support write (and maybe mmap read-write shared) accesses and
 177  * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
 178  * can have the following values:
 179  * 0: no writers, no VM_DENYWRITE mappings
 180  * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
 181  * > 0: (i_writecount) users are writing to the file.
 182  *
 183  * WARNING: as soon as we will move get_write_access(), do_mmap() or
 184  * prepare_binfmt() out of the big lock we will need a spinlock protecting
 185  * the checks in all 3. For the time being it is not needed.
 186  */
 187 int get_write_access(struct inode * inode)
 188 {
 189         if (atomic_read(&inode->i_writecount) < 0)
 190                 return -ETXTBSY;
 191         atomic_inc(&inode->i_writecount);
 192         return 0;
 193 }
 194
 195 void put_write_access(struct inode * inode)
 196 {
 197         atomic_dec(&inode->i_writecount);
 198 }
 199
 200 /*
 201  * Internal lookup() using the new generic dcache.
 202  */
 203 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
 204 {
 205         struct dentry * dentry = d_lookup(parent, name);
 206
 207         if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
 208                 if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
 209                         dput(dentry);
 210                         dentry = NULL;
 211                 }
 212         }
 213         return dentry;
 214 }
 215
 216 /*
 217  * This is called when everything else fails, and we actually have
 218  * to go to the low-level filesystem to find out what we should do..
 219  *
 220  * We get the directory semaphore, and after getting that we also
 221  * make sure that nobody added the entry to the dcache in the meantime..
 222  */
 223 static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
 224 {
 225         struct dentry * result;
 226         struct inode *dir = parent->d_inode;
 227
 228         down(&dir->i_sem);
 229         /*
 230          * First re-do the cached lookup just in case it was created
 231          * while we waited for the directory semaphore..
 232          *
 233          * FIXME! This could use version numbering or similar to
 234          * avoid unnecessary cache lookups.
 235          */
 236         result = d_lookup(parent, name);
 237         if (!result) {
 238                 struct dentry * dentry = d_alloc(parent, name);
 239                 result = ERR_PTR(-ENOMEM);
 240                 if (dentry) {
 241                         result = dir->i_op->lookup(dir, dentry);
 242                         if (result)
 243                                 dput(dentry);
 244                         else
 245                                 result = dentry;
 246                 }
 247                 up(&dir->i_sem);
 248                 return result;
 249         }
 250
 251         /*
 252          * Uhhuh! Nasty case: the cache was re-populated while
 253          * we waited on the semaphore. Need to revalidate, but
 254          * we're going to return this entry regardless (same
 255          * as if it was busy).
 256          */
 257         up(&dir->i_sem);
 258         if (result->d_op && result->d_op->d_revalidate)
 259                 result->d_op->d_revalidate(result, flags);
 260         return result;
 261 }
 262
 263 static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
 264 {
 265         int err;
 266         if (current->link_count >= 32)
 267                 goto loop;
 268         current->link_count++;
 269         UPDATE_ATIME(dentry->d_inode);
 270         err = dentry->d_inode->i_op->follow_link(dentry, nd);
 271         current->link_count--;
 272         return err;
 273 loop:
 274         dput(nd->dentry);
 275         mntput(nd->mnt);
 276         return -ELOOP;
 277 }
 278
 279 static inline int follow_up(struct vfsmount **mnt, struct dentry **base)
 280 {
 281         struct vfsmount *parent=(*mnt)->mnt_parent;
 282         struct dentry *dentry;
 283         if (parent == *mnt)
 284                 return 0;
 285         dentry=dget((*mnt)->mnt_mountpoint);
 286         mntget(parent);
 287         mntput(*mnt);
 288         *mnt = parent;
 289         dput(*base);
 290         *base = dentry;
 291         return 1;
 292 }
 293
 294 static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
 295 {
 296         struct list_head *p = (*dentry)->d_vfsmnt.next;
 297         while (p != &(*dentry)->d_vfsmnt) {
 298                 struct vfsmount *tmp;
 299                 tmp = list_entry(p, struct vfsmount, mnt_clash);
 300                 if (tmp->mnt_parent == *mnt) {
 301                         *mnt = mntget(tmp);
 302                         mntput(tmp->mnt_parent);
 303                         /* tmp holds the mountpoint, so... */
 304                         dput(*dentry);
 305                         *dentry = dget(tmp->mnt_root);
 306                         return 1;
 307                 }
 308                 p = p->next;
 309         }
 310         return 0;
 311 }
 312
 313 int follow_down(struct vfsmount **mnt, struct dentry **dentry)
 314 {
 315         return __follow_down(mnt,dentry);
 316 }
 317
 318 /*
 319  * Name resolution.
 320  *
 321  * This is the basic name resolution function, turning a pathname
 322  * into the final dentry.
 323  *
 324  * We expect 'base' to be positive and a directory.
 325  */
 326 int walk_name(const char * name, struct nameidata *nd)
 327 {
 328         struct dentry *dentry;
 329         struct inode *inode;
 330         int err;
 331         unsigned int lookup_flags = nd->flags;
 332
 333         while (*name=='/')
 334                 name++;
 335         if (!*name)
 336                 goto return_base;
 337
 338         inode = nd->dentry->d_inode;
 339         if (current->link_count)
 340                 lookup_flags = LOOKUP_FOLLOW;
 341
 342         /* At this point we know we have a real path component. */
 343         for(;;) {
 344                 unsigned long hash;
 345                 struct qstr this;
 346                 unsigned int c;
 347
 348                 err = permission(inode, MAY_EXEC);
 349                 dentry = ERR_PTR(err);
 350                 if (err)
 351                         break;
 352
 353                 this.name = name;
 354                 c = *(const unsigned char *)name;
 355
 356                 hash = init_name_hash();
 357                 do {
 358                         name++;
 359                         hash = partial_name_hash(c, hash);
 360                         c = *(const unsigned char *)name;
 361                 } while (c && (c != '/'));
 362                 this.len = name - (const char *) this.name;
 363                 this.hash = end_name_hash(hash);
 364
 365                 /* remove trailing slashes? */
 366                 if (!c)
 367                         goto last_component;
 368                 while (*++name == '/');
 369                 if (!*name)
 370                         goto last_with_slashes;
 371
 372                 /*
 373                  * "." and ".." are special - ".." especially so because it has
 374                  * to be able to know about the current root directory and
 375                  * parent relationships.
 376                  */
 377                 if (this.name[0] == '.') switch (this.len) {
 378                         default:
 379                                 break;
 380                         case 2:
 381                                 if (this.name[1] != '.')
 382                                         break;
 383                                 while (1) {
 384                                         if (nd->dentry == current->fs->root &&
 385                                             nd->mnt == current->fs->rootmnt)
 386                                                 break;
 387                                         if (nd->dentry != nd->mnt->mnt_root) {
 388                                                 dentry = dget(nd->dentry->d_parent);
 389                                                 dput(nd->dentry);
 390                                                 nd->dentry = dentry;
 391                                                 break;
 392                                         }
 393                                         if (!follow_up(&nd->mnt, &nd->dentry))
 394                                                 break;
 395                                 }
 396                                 inode = nd->dentry->d_inode;
 397                                 /* fallthrough */
 398                         case 1:
 399                                 continue;
 400                 }
 401                 /*
 402                  * See if the low-level filesystem might want
 403                  * to use its own hash..
 404                  */
 405                 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
 406                         err = nd->dentry->d_op->d_hash(nd->dentry, &this);
 407                         if (err < 0)
 408                                 break;
 409                 }
 410                 /* This does the actual lookups.. */
 411                 dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
 412                 if (!dentry) {
 413                         dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
 414                         err = PTR_ERR(dentry);
 415                         if (IS_ERR(dentry))
 416                                 break;
 417                 }
 418                 /* Check mountpoints.. */
 419                 while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
 420                         ;
 421
 422                 err = -ENOENT;
 423                 inode = dentry->d_inode;
 424                 if (!inode)
 425                         goto out_dput;
 426                 err = -ENOTDIR;
 427                 if (!inode->i_op)
 428                         goto out_dput;
 429
 430                 if (inode->i_op->follow_link) {
 431                         err = do_follow_link(dentry, nd);
 432                         dput(dentry);
 433                         if (err)
 434                                 goto return_err;
 435                         err = -ENOENT;
 436                         inode = nd->dentry->d_inode;
 437                         if (!inode)
 438                                 break;
 439                         err = -ENOTDIR;
 440                         if (!inode->i_op)
 441                                 break;
 442                 } else {
 443                         dput(nd->dentry);
 444                         nd->dentry = dentry;
 445                 }
 446                 err = -ENOTDIR;
 447                 if (!inode->i_op->lookup)
 448                         break;
 449                 continue;
 450                 /* here ends the main loop */
 451
 452 last_with_slashes:
 453                 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
 454 last_component:
 455                 if (lookup_flags & LOOKUP_PARENT)
 456                         goto lookup_parent;
 457                 if (this.name[0] == '.') switch (this.len) {
 458                         default:
 459                                 break;
 460                         case 2:
 461                                 if (this.name[1] != '.')
 462                                         break;
 463                                 while (1) {
 464                                         if (nd->dentry == current->fs->root &&
 465                                             nd->mnt == current->fs->rootmnt)
 466                                                 break;
 467                                         if (nd->dentry != nd->mnt->mnt_root) {
 468                                                 dentry = dget(nd->dentry->d_parent);
 469                                                 dput(nd->dentry);
 470                                                 nd->dentry = dentry;
 471                                                 break;
 472                                         }
 473                                         if (!follow_up(&nd->mnt, &nd->dentry))
 474                                                 break;
 475                                 }
 476                                 inode = nd->dentry->d_inode;
 477                                 /* fallthrough */
 478                         case 1:
 479                                 goto return_base;
 480                 }
 481                 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
 482                         err = nd->dentry->d_op->d_hash(nd->dentry, &this);
 483                         if (err < 0)
 484                                 break;
 485                 }
 486                 dentry = cached_lookup(nd->dentry, &this, 0);
 487                 if (!dentry) {
 488                         dentry = real_lookup(nd->dentry, &this, 0);
 489                         err = PTR_ERR(dentry);
 490                         if (IS_ERR(dentry))
 491                                 break;
 492                 }
 493                 while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
 494                         ;
 495                 inode = dentry->d_inode;
 496                 if ((lookup_flags & LOOKUP_FOLLOW)
 497                     && inode && inode->i_op && inode->i_op->follow_link) {
 498                         err = do_follow_link(dentry, nd);
 499                         dput(dentry);
 500                         if (err)
 501                                 goto return_err;
 502                         inode = nd->dentry->d_inode;
 503                 } else {
 504                         dput(nd->dentry);
 505                         nd->dentry = dentry;
 506                 }
 507                 err = -ENOENT;
 508                 if (!inode)
 509                         goto no_inode;
 510                 if (lookup_flags & LOOKUP_DIRECTORY) {
 511                         err = -ENOTDIR;
 512                         if (!inode->i_op || !inode->i_op->lookup)
 513                                 break;
 514                 }
 515                 goto return_base;
 516 no_inode:
 517                 err = -ENOENT;
 518                 if (lookup_flags & (LOOKUP_POSITIVE|LOOKUP_DIRECTORY))
 519                         break;
 520                 goto return_base;
 521 lookup_parent:
 522                 nd->last = this;
 523                 nd->last_type = LAST_NORM;
 524                 if (this.name[0] != '.')
 525                         goto return_base;
 526                 if (this.len == 1)
 527                         nd->last_type = LAST_DOT;
 528                 else if (this.len == 2 && this.name[1] == '.')
 529                         nd->last_type = LAST_DOTDOT;
 530 return_base:
 531                 return 0;
 532 out_dput:
 533                 dput(dentry);
 534                 break;
 535         }
 536         dput(nd->dentry);
 537         mntput(nd->mnt);
 538 return_err:
 539         return err;
 540 }
 541
 542 /* returns 1 if everything is done */
 543 static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
 544 {
 545         nd->mnt = mntget(current->fs->altrootmnt);
 546         nd->dentry = dget(current->fs->altroot);
 547         if (walk_name(name, nd))
 548                 return 0;
 549
 550         if (!nd->dentry->d_inode) {
 551                 struct nameidata nd_root;
 552                 nd_root.last_type = LAST_ROOT;
 553                 nd_root.flags = nd->flags;
 554                 nd_root.mnt = mntget(current->fs->rootmnt);
 555                 nd_root.dentry = dget(current->fs->root);
 556                 if (walk_name(name, &nd_root))
 557                         return 1;
 558                 if (nd_root.dentry->d_inode) {
 559                         dput(nd->dentry);
 560                         mntput(nd->mnt);
 561                         nd->dentry = nd_root.dentry;
 562                         nd->mnt = nd_root.mnt;
 563                         nd->last = nd_root.last;
 564                         return 1;
 565                 }
 566                 dput(nd_root.dentry);
 567                 mntput(nd_root.mnt);
 568         }
 569         return 1;
 570 }
 571
 572 void set_fs_altroot(void)
 573 {
 574         char *emul = __emul_prefix();
 575         struct nameidata nd;
 576         struct vfsmount *mnt = NULL, *oldmnt;
 577         struct dentry *dentry = NULL, *olddentry;
 578         if (emul) {
 579                 nd.mnt = mntget(current->fs->rootmnt);
 580                 nd.dentry = dget(current->fs->root);
 581                 nd.flags = LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_POSITIVE;
 582                 if (walk_name(emul,&nd) == 0) {
 583                         mnt = nd.mnt;
 584                         dentry = nd.dentry;
 585                 }
 586         }
 587         oldmnt = current->fs->altrootmnt;
 588         olddentry = current->fs->altroot;
 589         current->fs->altrootmnt = mnt;
 590         current->fs->altroot = dentry;
 591         if (olddentry) {
 592                 dput(olddentry);
 593                 mntput(oldmnt);
 594         }
 595 }
 596
 597 static inline int
 598 walk_init_root(const char *name, struct nameidata *nd)
 599 {
 600         if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT))
 601                 if (__emul_lookup_dentry(name,nd))
 602                         return 0;
 603         nd->mnt = mntget(current->fs->rootmnt);
 604         nd->dentry = dget(current->fs->root);
 605         return 1;
 606 }
 607
 608 int walk_init(const char *name,unsigned int flags,struct nameidata *nd)
 609 {
 610         nd->last_type = LAST_ROOT; /* if there are only slashes... */
 611         nd->flags = flags;
 612         if (*name=='/')
 613                 return walk_init_root(name,nd);
 614         nd->mnt = mntget(current->fs->pwdmnt);
 615         nd->dentry = dget(current->fs->pwd);
 616         return 1;
 617 }
 618
 619 struct dentry * lookup_dentry(const char * name, unsigned int lookup_flags)
 620 {
 621         struct nameidata nd;
 622         int err = 0;
 623
 624         if (walk_init(name, lookup_flags, &nd))
 625                 err = walk_name(name, &nd);
 626         if (!err) {
 627                 mntput(nd.mnt);
 628                 return nd.dentry;
 629         }
 630         return ERR_PTR(err);
 631 }
 632
 633 /*
 634  * Restricted form of lookup. Doesn't follow links, single-component only,
 635  * needs parent already locked. Doesn't follow mounts.
 636  */
 637 static inline struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
 638 {
 639         struct dentry * dentry;
 640         struct inode *inode;
 641         int err;
 642
 643         inode = base->d_inode;
 644         err = permission(inode, MAY_EXEC);
 645         dentry = ERR_PTR(err);
 646         if (err)
 647                 goto out;
 648
 649         /*
 650          * See if the low-level filesystem might want
 651          * to use its own hash..
 652          */
 653         if (base->d_op && base->d_op->d_hash) {
 654                 err = base->d_op->d_hash(base, name);
 655                 dentry = ERR_PTR(err);
 656                 if (err < 0)
 657                         goto out;
 658         }
 659
 660         dentry = cached_lookup(base, name, 0);
 661         if (!dentry) {
 662                 struct dentry *new = d_alloc(base, name);
 663                 dentry = ERR_PTR(-ENOMEM);
 664                 if (!new)
 665                         goto out;
 666                 dentry = inode->i_op->lookup(inode, new);
 667                 if (!dentry)
 668                         dentry = new;
 669                 else
 670                         dput(new);
 671         }
 672 out:
 673         return dentry;
 674 }
 675
 676 struct dentry * lookup_one(const char * name, struct dentry * base)
 677 {
 678         unsigned long hash;
 679         struct qstr this;
 680         unsigned int c;
 681
 682         this.name = name;
 683         c = *(const unsigned char *)name;
 684         if (!c)
 685                 goto access;
 686
 687         hash = init_name_hash();
 688         do {
 689                 name++;
 690                 if (c == '/')
 691                         goto access;
 692                 hash = partial_name_hash(c, hash);
 693                 c = *(const unsigned char *)name;
 694         } while (c);
 695         this.len = name - (const char *) this.name;
 696         this.hash = end_name_hash(hash);
 697
 698         return lookup_hash(&this, base);
 699 access:
 700         return ERR_PTR(-EACCES);
 701 }
 702
 703 /*
 704  *      namei()
 705  *
 706  * is used by most simple commands to get the inode of a specified name.
 707  * Open, link etc use their own routines, but this is enough for things
 708  * like 'chmod' etc.
 709  *
 710  * namei exists in two versions: namei/lnamei. The only difference is
 711  * that namei follows links, while lnamei does not.
 712  */
 713 struct dentry * __namei(const char *pathname, unsigned int lookup_flags)
 714 {
 715         char *name;
 716         struct dentry *dentry;
 717
 718         name = getname(pathname);
 719         dentry = (struct dentry *) name;
 720         if (!IS_ERR(name)) {
 721                 dentry = lookup_dentry(name,lookup_flags|LOOKUP_POSITIVE);
 722                 putname(name);
 723         }
 724         return dentry;
 725 }
 726
 727 /*
 728  * It's inline, so penalty for filesystems that don't use sticky bit is
 729  * minimal.
 730  */
 731 static inline int check_sticky(struct inode *dir, struct inode *inode)
 732 {
 733         if (!(dir->i_mode & S_ISVTX))
 734                 return 0;
 735         if (inode->i_uid == current->fsuid)
 736                 return 0;
 737         if (dir->i_uid == current->fsuid)
 738                 return 0;
 739         return !capable(CAP_FOWNER);
 740 }
 741
 742 /*
 743  *      Check whether we can remove a link victim from directory dir, check
 744  *  whether the type of victim is right.
 745  *  1. We can't do it if dir is read-only (done in permission())
 746  *  2. We should have write and exec permissions on dir
 747  *  3. We can't remove anything from append-only dir
 748  *  4. We can't do anything with immutable dir (done in permission())
 749  *  5. If the sticky bit on dir is set we should either
 750  *      a. be owner of dir, or
 751  *      b. be owner of victim, or
 752  *      c. have CAP_FOWNER capability
 753  *  6. If the victim is append-only or immutable we can't do antyhing with
 754  *     links pointing to it.
 755  *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
 756  *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
 757  *  9. We can't remove a root or mountpoint.
 758  */
 759 static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
 760 {
 761         int error;
 762         if (!victim->d_inode || victim->d_parent->d_inode != dir)
 763                 return -ENOENT;
 764         if (IS_DEADDIR(dir))
 765                 return -ENOENT;
 766         error = permission(dir,MAY_WRITE | MAY_EXEC);
 767         if (error)
 768                 return error;
 769         if (IS_APPEND(dir))
 770                 return -EPERM;
 771         if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
 772             IS_IMMUTABLE(victim->d_inode))
 773                 return -EPERM;
 774         if (isdir) {
 775                 if (!S_ISDIR(victim->d_inode->i_mode))
 776                         return -ENOTDIR;
 777                 if (IS_ROOT(victim))
 778                         return -EBUSY;
 779                 if (d_mountpoint(victim))
 780                         return -EBUSY;
 781         } else if (S_ISDIR(victim->d_inode->i_mode))
 782                 return -EISDIR;
 783         return 0;
 784 }
 785
 786 /*      Check whether we can create an object with dentry child in directory
 787  *  dir.
 788  *  1. We can't do it if child already exists (open has special treatment for
 789  *     this case, but since we are inlined it's OK)
 790  *  2. We can't do it if dir is read-only (done in permission())
 791  *  3. We should have write and exec permissions on dir
 792  *  4. We can't do it if dir is immutable (done in permission())
 793  */
 794 static inline int may_create(struct inode *dir, struct dentry *child) {
 795         if (child->d_inode)
 796                 return -EEXIST;
 797         if (IS_DEADDIR(dir))
 798                 return -ENOENT;
 799         return permission(dir,MAY_WRITE | MAY_EXEC);
 800 }
 801
 802 /*
 803  * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security
 804  * reasons.
 805  *
 806  * O_DIRECTORY translates into forcing a directory lookup.
 807  */
 808 static inline int lookup_flags(unsigned int f)
 809 {
 810         unsigned long retval = LOOKUP_FOLLOW;
 811
 812         if (f & O_NOFOLLOW)
 813                 retval &= ~LOOKUP_FOLLOW;
 814
 815         if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
 816                 retval &= ~LOOKUP_FOLLOW;
 817
 818         if (f & O_DIRECTORY)
 819                 retval |= LOOKUP_DIRECTORY;
 820
 821         return retval;
 822 }
 823
 824 int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
 825 {
 826         int error;
 827
 828         mode &= S_IALLUGO & ~current->fs->umask;
 829         mode |= S_IFREG;
 830
 831         down(&dir->i_zombie);
 832         error = may_create(dir, dentry);
 833         if (error)
 834                 goto exit_lock;
 835
 836         error = -EACCES;        /* shouldn't it be ENOSYS? */
 837         if (!dir->i_op || !dir->i_op->create)
 838                 goto exit_lock;
 839
 840         DQUOT_INIT(dir);
 841         error = dir->i_op->create(dir, dentry, mode);
 842 exit_lock:
 843         up(&dir->i_zombie);
 844         return error;
 845 }
 846
 847 /*
 848  *      open_namei()
 849  *
 850  * namei for open - this is in fact almost the whole open-routine.
 851  *
 852  * Note that the low bits of "flag" aren't the same as in the open
 853  * system call - they are 00 - no permissions needed
 854  *                        01 - read permission needed
 855  *                        10 - write permission needed
 856  *                        11 - read/write permissions needed
 857  * which is a lot more logical, and also allows the "no perm" needed
 858  * for symlinks (where the permissions are checked later).
 859  */
 860 int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
 861 {
 862         int acc_mode, error = 0;
 863         struct inode *inode;
 864         struct dentry *dentry;
 865
 866         acc_mode = ACC_MODE(flag);
 867         if (!(flag & O_CREAT)) {
 868                 if (walk_init(pathname, lookup_flags(flag), nd))
 869                         error = walk_name(pathname, nd);
 870                 if (error)
 871                         return error;
 872
 873                 dentry = nd->dentry;
 874         } else {
 875                 struct dentry *dir;
 876
 877                 if (walk_init(pathname, LOOKUP_PARENT, nd))
 878                         error = walk_name(pathname, nd);
 879                 if (error)
 880                         return error;
 881                 /*
 882                  * It's not obvious that open(".", O_CREAT, foo) should
 883                  * fail, but it's even less obvious that it should succeed.
 884                  * Since O_CREAT means an intention to create the thing and
 885                  * open(2) had never created directories, count it as caller's
 886                  * luserdom and let him sod off - -EISDIR it is.
 887                  */
 888                 error = -EISDIR;
 889                 if (nd->last_type != LAST_NORM)
 890                         goto exit;
 891                 /* same for foo/ */
 892                 if (nd->last.name[nd->last.len])
 893                         goto exit;
 894
 895                 dir = dget(nd->dentry);
 896                 down(&dir->d_inode->i_sem);
 897
 898                 dentry = lookup_hash(&nd->last, nd->dentry);
 899                 error = PTR_ERR(dentry);
 900                 if (IS_ERR(dentry)) {
 901                         up(&dir->d_inode->i_sem);
 902                         dput(dir);
 903                         goto exit;
 904                 }
 905
 906                 if (dentry->d_inode) {
 907                         up(&dir->d_inode->i_sem);
 908                         dput(dir);
 909                         error = -EEXIST;
 910                         if (flag & O_EXCL)
 911                                 goto exit_dput;
 912                         if (dentry->d_inode->i_op &&
 913                             dentry->d_inode->i_op->follow_link) {
 914                                 /*
 915                                  * With O_EXCL it would be -EEXIST.
 916                                  * If symlink is a dangling one it's -ENOENT.
 917                                  * Otherwise we open the object it points to.
 918                                  */
 919                                 error = do_follow_link(dentry, nd);
 920                                 dput(dentry);
 921                                 if (error)
 922                                         return error;
 923                                 dentry = nd->dentry;
 924                         } else {
 925                                 dput(nd->dentry);
 926                                 nd->dentry = dentry;
 927                         }
 928                         error = -EISDIR;
 929                         if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
 930                                 goto exit;
 931                 } else {
 932                         error = vfs_create(dir->d_inode, dentry, mode);
 933                         /* Don't check for write permission, don't truncate */
 934                         acc_mode = 0;
 935                         flag &= ~O_TRUNC;
 936                         dput(nd->dentry);
 937                         nd->dentry = dentry;
 938                         unlock_dir(dir);
 939                         if (error)
 940                                 goto exit;
 941                 }
 942         }
 943
 944         error = -ENOENT;
 945         inode = dentry->d_inode;
 946         if (!inode)
 947                 goto exit;
 948
 949         error = -ELOOP;
 950         if (S_ISLNK(inode->i_mode))
 951                 goto exit;
 952
 953         error = -EISDIR;
 954         if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
 955                 goto exit;
 956
 957         error = permission(inode,acc_mode);
 958         if (error)
 959                 goto exit;
 960
 961         /*
 962          * FIFO's, sockets and device files are special: they don't
 963          * actually live on the filesystem itself, and as such you
 964          * can write to them even if the filesystem is read-only.
 965          */
 966         if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
 967                 flag &= ~O_TRUNC;
 968         } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
 969                 error = -EACCES;
 970                 if (IS_NODEV(inode))
 971                         goto exit;
 972
 973                 flag &= ~O_TRUNC;
 974         } else {
 975                 error = -EROFS;
 976                 if (IS_RDONLY(inode) && (flag & 2))
 977                         goto exit;
 978         }
 979         /*
 980          * An append-only file must be opened in append mode for writing.
 981          */
 982         error = -EPERM;
 983         if (IS_APPEND(inode)) {
 984                 if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))
 985                         goto exit;
 986                 if (flag & O_TRUNC)
 987                         goto exit;
 988         }
 989
 990         if (flag & O_TRUNC) {
 991                 error = get_write_access(inode);
 992                 if (error)
 993                         goto exit;
 994
 995                 /*
 996                  * Refuse to truncate files with mandatory locks held on them.
 997                  */
 998                 error = locks_verify_locked(inode);
 999                 if (!error) {
1000                         DQUOT_INIT(inode);
1001
1002                         error = do_truncate(dentry, 0);
1003                 }
1004                 put_write_access(inode);
1005                 if (error)
1006                         goto exit;
1007         } else
1008                 if (flag & FMODE_WRITE)
1009                         DQUOT_INIT(inode);
1010
1011         return 0;
1012
1013 exit_dput:
1014         dput(dentry);
1015 exit:
1016         dput(nd->dentry);
1017         mntput(nd->mnt);
1018         return error;
1019 }
1020
1021 static struct dentry *lookup_create(const char *name, int is_dir)
1022 {
1023         struct nameidata nd;
1024         struct dentry *dentry;
1025         int err = 0;
1026         if (walk_init(name, LOOKUP_PARENT, &nd))
1027                 err = walk_name(name, &nd);
1028         dentry = ERR_PTR(err);
1029         if (err)
1030                 goto out;
1031         down(&nd.dentry->d_inode->i_sem);
1032         dentry = ERR_PTR(-EEXIST);
1033         if (nd.last_type != LAST_NORM)
1034                 goto fail;
1035         dentry = lookup_hash(&nd.last, nd.dentry);
1036         if (IS_ERR(dentry))
1037                 goto fail;
1038         if (!is_dir && nd.last.name[nd.last.len] && !dentry->d_inode)
1039                 goto enoent;
1040 out_dput:
1041         dput(nd.dentry);
1042         mntput(nd.mnt);
1043 out:
1044         return dentry;
1045 enoent:
1046         dput(dentry);
1047         dentry = ERR_PTR(-ENOENT);
1048 fail:
1049         up(&nd.dentry->d_inode->i_sem);
1050         goto out_dput;
1051 }
1052
1053 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1054 {
1055         int error = -EPERM;
1056
1057         mode &= ~current->fs->umask;
1058
1059         down(&dir->i_zombie);
1060         if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
1061                 goto exit_lock;
1062
1063         error = may_create(dir, dentry);
1064         if (error)
1065                 goto exit_lock;
1066
1067         error = -EPERM;
1068         if (!dir->i_op || !dir->i_op->mknod)
1069                 goto exit_lock;
1070
1071         DQUOT_INIT(dir);
1072         error = dir->i_op->mknod(dir, dentry, mode, dev);
1073 exit_lock:
1074         up(&dir->i_zombie);
1075         return error;
1076 }
1077
1078 struct dentry * do_mknod(const char * filename, int mode, dev_t dev)
1079 {
1080         int error;
1081         struct dentry *dir;
1082         struct dentry *dentry, *retval;
1083
1084         dentry = lookup_create(filename, 0);
1085         if (IS_ERR(dentry))
1086                 return dentry;
1087
1088         dir = dget(dentry->d_parent);
1089
1090         error = vfs_mknod(dir->d_inode, dentry, mode, dev);
1091
1092         retval = ERR_PTR(error);
1093         if (!error)
1094                 retval = dget(dentry);
1095         unlock_dir(dir);
1096         dput(dentry);
1097         return retval;
1098 }
1099
1100 asmlinkage long sys_mknod(const char * filename, int mode, dev_t dev)
1101 {
1102         int error;
1103         char * tmp;
1104         struct dentry * dentry, *dir;
1105
1106         if (S_ISDIR(mode))
1107                 return -EPERM;
1108         tmp = getname(filename);
1109         if (IS_ERR(tmp))
1110                 return PTR_ERR(tmp);
1111
1112         lock_kernel();
1113         dentry = lookup_create(tmp, 0);
1114         error = PTR_ERR(dentry);
1115         if (IS_ERR(dentry))
1116                 goto out;
1117         dir = dget(dentry->d_parent);
1118         switch (mode & S_IFMT) {
1119         case 0: case S_IFREG:
1120                 error = vfs_create(dir->d_inode, dentry, mode);
1121                 break;
1122         case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
1123                 error = vfs_mknod(dir->d_inode, dentry, mode, dev);
1124                 break;
1125         case S_IFDIR:
1126                 error = -EPERM;
1127                 break;
1128         default:
1129                 error = -EINVAL;
1130         }
1131         unlock_dir(dir);
1132         dput(dentry);
1133 out:
1134         unlock_kernel();
1135         putname(tmp);
1136
1137         return error;
1138 }
1139
1140 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1141 {
1142         int error;
1143
1144         down(&dir->i_zombie);
1145         error = may_create(dir, dentry);
1146         if (error)
1147                 goto exit_lock;
1148
1149         error = -EPERM;
1150         if (!dir->i_op || !dir->i_op->mkdir)
1151                 goto exit_lock;
1152
1153         DQUOT_INIT(dir);
1154         mode &= (S_IRWXUGO|S_ISVTX) & ~current->fs->umask;
1155         error = dir->i_op->mkdir(dir, dentry, mode);
1156
1157 exit_lock:
1158         up(&dir->i_zombie);
1159         return error;
1160 }
1161
1162 asmlinkage long sys_mkdir(const char * pathname, int mode)
1163 {
1164         int error;
1165         char * tmp;
1166
1167         tmp = getname(pathname);
1168         error = PTR_ERR(tmp);
1169         if (!IS_ERR(tmp)) {
1170                 struct dentry *dir;
1171                 struct dentry *dentry;
1172
1173                 lock_kernel();
1174                 dentry = lookup_create(tmp, 1);
1175                 error = PTR_ERR(dentry);
1176                 if (!IS_ERR(dentry)) {
1177                         dir = dget(dentry->d_parent);
1178                         error = vfs_mkdir(dir->d_inode, dentry, mode);
1179                         unlock_dir(dir);
1180                         dput(dentry);
1181                 }
1182                 unlock_kernel();
1183         }
1184         putname(tmp);
1185
1186         return error;
1187 }
1188
1189 /*
1190  * We try to drop the dentry early: we should have
1191  * a usage count of 2 if we're the only user of this
1192  * dentry, and if that is true (possibly after pruning
1193  * the dcache), then we drop the dentry now.
1194  *
1195  * A low-level filesystem can, if it choses, legally
1196  * do a
1197  *
1198  *      if (!d_unhashed(dentry))
1199  *              return -EBUSY;
1200  *
1201  * if it cannot handle the case of removing a directory
1202  * that is still in use by something else..
1203  */
1204 static void d_unhash(struct dentry *dentry)
1205 {
1206         dget(dentry);
1207         switch (dentry->d_count) {
1208         default:
1209                 shrink_dcache_parent(dentry);
1210                 if (dentry->d_count != 2)
1211                         break;
1212         case 2:
1213                 d_drop(dentry);
1214         }
1215 }
1216
1217 int vfs_rmdir(struct inode *dir, struct dentry *dentry)
1218 {
1219         int error;
1220
1221         error = may_delete(dir, dentry, 1);
1222         if (error)
1223                 return error;
1224
1225         if (!dir->i_op || !dir->i_op->rmdir)
1226                 return -EPERM;
1227
1228         DQUOT_INIT(dir);
1229
1230         double_down(&dir->i_zombie, &dentry->d_inode->i_zombie);
1231         d_unhash(dentry);
1232         error = dir->i_op->rmdir(dir, dentry);
1233         if (!error)
1234                 dentry->d_inode->i_flags |= S_DEAD;
1235         double_up(&dir->i_zombie, &dentry->d_inode->i_zombie);
1236         dput(dentry);
1237
1238         return error;
1239 }
1240
1241 asmlinkage long sys_rmdir(const char * pathname)
1242 {
1243         int error = 0;
1244         char * name;
1245         struct dentry *dentry;
1246         struct nameidata nd;
1247
1248         name = getname(pathname);
1249         if(IS_ERR(name))
1250                 return PTR_ERR(name);
1251         lock_kernel();
1252
1253         if (walk_init(name, LOOKUP_PARENT, &nd))
1254                 error = walk_name(name, &nd);
1255         if (error)
1256                 goto exit;
1257
1258         switch(nd.last_type) {
1259                 case LAST_DOTDOT:
1260                         error = -ENOTEMPTY;
1261                         goto exit1;
1262                 case LAST_ROOT: case LAST_DOT:
1263                         error = -EBUSY;
1264                         goto exit1;
1265         }
1266         down(&nd.dentry->d_inode->i_sem);
1267         dentry = lookup_hash(&nd.last, nd.dentry);
1268         error = PTR_ERR(dentry);
1269         if (!IS_ERR(dentry)) {
1270                 error = vfs_rmdir(nd.dentry->d_inode, dentry);
1271                 dput(dentry);
1272         }
1273         up(&nd.dentry->d_inode->i_sem);
1274 exit1:
1275         dput(nd.dentry);
1276         mntput(nd.mnt);
1277 exit:
1278         unlock_kernel();
1279         putname(name);
1280         return error;
1281 }
1282
1283 int vfs_unlink(struct inode *dir, struct dentry *dentry)
1284 {
1285         int error;
1286
1287         down(&dir->i_zombie);
1288         error = may_delete(dir, dentry, 0);
1289         if (!error) {
1290                 error = -EPERM;
1291                 if (dir->i_op && dir->i_op->unlink) {
1292                         DQUOT_INIT(dir);
1293                         error = dir->i_op->unlink(dir, dentry);
1294                 }
1295         }
1296         up(&dir->i_zombie);
1297         return error;
1298 }
1299
1300 asmlinkage long sys_unlink(const char * pathname)
1301 {
1302         int error = 0;
1303         char * name;
1304         struct dentry *dentry;
1305         struct nameidata nd;
1306
1307         name = getname(pathname);
1308         if(IS_ERR(name))
1309                 return PTR_ERR(name);
1310         lock_kernel();
1311
1312         if (walk_init(name, LOOKUP_PARENT, &nd))
1313                 error = walk_name(name, &nd);
1314         if (error)
1315                 goto exit;
1316         error = -EISDIR;
1317         if (nd.last_type != LAST_NORM)
1318                 goto exit1;
1319         down(&nd.dentry->d_inode->i_sem);
1320         dentry = lookup_hash(&nd.last, nd.dentry);
1321         error = PTR_ERR(dentry);
1322         if (!IS_ERR(dentry)) {
1323                 /* Why not before? Because we want correct error value */
1324                 if (nd.last.name[nd.last.len])
1325                         goto slashes;
1326                 error = vfs_unlink(nd.dentry->d_inode, dentry);
1327         exit2:
1328                 dput(dentry);
1329         }
1330         up(&nd.dentry->d_inode->i_sem);
1331 exit1:
1332         dput(nd.dentry);
1333         mntput(nd.mnt);
1334 exit:
1335         unlock_kernel();
1336         putname(name);
1337
1338         return error;
1339
1340 slashes:
1341         error = !dentry->d_inode ? -ENOENT :
1342                 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
1343         goto exit2;
1344 }
1345
1346 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
1347 {
1348         int error;
1349
1350         down(&dir->i_zombie);
1351         error = may_create(dir, dentry);
1352         if (error)
1353                 goto exit_lock;
1354
1355         error = -EPERM;
1356         if (!dir->i_op || !dir->i_op->symlink)
1357                 goto exit_lock;
1358
1359         DQUOT_INIT(dir);
1360         error = dir->i_op->symlink(dir, dentry, oldname);
1361
1362 exit_lock:
1363         up(&dir->i_zombie);
1364         return error;
1365 }
1366
1367 asmlinkage long sys_symlink(const char * oldname, const char * newname)
1368 {
1369         int error;
1370         char * from;
1371         char * to;
1372
1373         from = getname(oldname);
1374         if(IS_ERR(from))
1375                 return PTR_ERR(from);
1376         to = getname(newname);
1377         error = PTR_ERR(to);
1378         if (!IS_ERR(to)) {
1379                 struct dentry *dir;
1380                 struct dentry *dentry;
1381
1382                 lock_kernel();
1383                 dentry = lookup_create(to, 0);
1384                 error = PTR_ERR(dentry);
1385                 if (!IS_ERR(dentry)) {
1386                         dir = dget(dentry->d_parent);
1387                         error = vfs_symlink(dir->d_inode, dentry, from);
1388                         unlock_dir(dir);
1389                         dput(dentry);
1390                 }
1391                 unlock_kernel();
1392                 putname(to);
1393         }
1394         putname(from);
1395         return error;
1396 }
1397
1398 int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
1399 {
1400         struct inode *inode;
1401         int error;
1402
1403         down(&dir->i_zombie);
1404         error = -ENOENT;
1405         inode = old_dentry->d_inode;
1406         if (!inode)
1407                 goto exit_lock;
1408
1409         error = may_create(dir, new_dentry);
1410         if (error)
1411                 goto exit_lock;
1412
1413         error = -EXDEV;
1414         if (dir->i_dev != inode->i_dev)
1415                 goto exit_lock;
1416
1417         /*
1418          * A link to an append-only or immutable file cannot be created.
1419          */
1420         error = -EPERM;
1421         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1422                 goto exit_lock;
1423         if (!dir->i_op || !dir->i_op->link)
1424                 goto exit_lock;
1425
1426         DQUOT_INIT(dir);
1427         error = dir->i_op->link(old_dentry, dir, new_dentry);
1428
1429 exit_lock:
1430         up(&dir->i_zombie);
1431         return error;
1432 }
1433
1434 /*
1435  * Hardlinks are often used in delicate situations.  We avoid
1436  * security-related surprises by not following symlinks on the
1437  * newname.  --KAB
1438  *
1439  * We don't follow them on the oldname either to be compatible
1440  * with linux 2.0, and to avoid hard-linking to directories
1441  * and other special files.  --ADM
1442  */
1443 asmlinkage long sys_link(const char * oldname, const char * newname)
1444 {
1445         int error;
1446         char * from;
1447         char * to;
1448
1449         from = getname(oldname);
1450         if(IS_ERR(from))
1451                 return PTR_ERR(from);
1452         to = getname(newname);
1453         error = PTR_ERR(to);
1454         if (!IS_ERR(to)) {
1455                 struct dentry *old_dentry, *new_dentry, *dir;
1456
1457                 lock_kernel();
1458                 old_dentry = lookup_dentry(from, LOOKUP_POSITIVE);
1459                 error = PTR_ERR(old_dentry);
1460                 if (IS_ERR(old_dentry))
1461                         goto exit;
1462
1463                 new_dentry = lookup_create(to, 0);
1464                 error = PTR_ERR(new_dentry);
1465                 if (!IS_ERR(new_dentry)) {
1466                         dir = dget(new_dentry->d_parent);
1467                         error = vfs_link(old_dentry, dir->d_inode, new_dentry);
1468                         unlock_dir(dir);
1469                         dput(new_dentry);
1470                 }
1471                 dput(old_dentry);
1472 exit:
1473                 unlock_kernel();
1474                 putname(to);
1475         }
1476         putname(from);
1477
1478         return error;
1479 }
1480
1481 /*
1482  * The worst of all namespace operations - renaming directory. "Perverted"
1483  * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
1484  * Problems:
1485  *      a) we can get into loop creation. Check is done in is_subdir().
1486  *      b) race potential - two innocent renames can create a loop together.
1487  *         That's where 4.4 screws up. Current fix: serialization on
1488  *         sb->s_vfs_rename_sem. We might be more accurate, but that's another
1489  *         story.
1490  *      c) we have to lock _three_ objects - parents and victim (if it exists).
1491  *         And that - after we got ->i_sem on parents (until then we don't know
1492  *         whether the target exists at all, let alone whether it is a directory
1493  *         or not). Solution: ->i_zombie. Taken only after ->i_sem. Always taken
1494  *         on link creation/removal of any kind. And taken (without ->i_sem) on
1495  *         directory that will be removed (both in rmdir() and here).
1496  *      d) some filesystems don't support opened-but-unlinked directories,
1497  *         either because of layout or because they are not ready to deal with
1498  *         all cases correctly. The latter will be fixed (taking this sort of
1499  *         stuff into VFS), but the former is not going away. Solution: the same
1500  *         trick as in rmdir().
1501  *      e) conversion from fhandle to dentry may come in the wrong moment - when
1502  *         we are removing the target. Solution: we will have to grab ->i_zombie
1503  *         in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
1504  *         ->i_sem on parents, which works but leads to some truely excessive
1505  *         locking].
1506  */
1507 int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
1508                struct inode *new_dir, struct dentry *new_dentry)
1509 {
1510         int error;
1511         struct inode *target;
1512
1513         if (old_dentry->d_inode == new_dentry->d_inode)
1514                 return 0;
1515
1516         error = may_delete(old_dir, old_dentry, 1);
1517         if (error)
1518                 return error;
1519
1520         if (new_dir->i_dev != old_dir->i_dev)
1521                 return -EXDEV;
1522
1523         if (!new_dentry->d_inode)
1524                 error = may_create(new_dir, new_dentry);
1525         else
1526                 error = may_delete(new_dir, new_dentry, 1);
1527         if (error)
1528                 return error;
1529
1530         if (!old_dir->i_op || !old_dir->i_op->rename)
1531                 return -EPERM;
1532
1533         /*
1534          * If we are going to change the parent - check write permissions,
1535          * we'll need to flip '..'.
1536          */
1537         if (new_dir != old_dir) {
1538                 error = permission(old_dentry->d_inode, MAY_WRITE);
1539         }
1540         if (error)
1541                 return error;
1542
1543         DQUOT_INIT(old_dir);
1544         DQUOT_INIT(new_dir);
1545         down(&old_dir->i_sb->s_vfs_rename_sem);
1546         error = -EINVAL;
1547         if (is_subdir(new_dentry, old_dentry))
1548                 goto out_unlock;
1549         target = new_dentry->d_inode;
1550         if (target) { /* Hastur! Hastur! Hastur! */
1551                 triple_down(&old_dir->i_zombie,
1552                             &new_dir->i_zombie,
1553                             &target->i_zombie);
1554                 d_unhash(new_dentry);
1555         } else
1556                 double_down(&old_dir->i_zombie,
1557                             &new_dir->i_zombie);
1558         error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1559         if (target) {
1560                 if (!error)
1561                         target->i_flags |= S_DEAD;
1562                 triple_up(&old_dir->i_zombie,
1563                           &new_dir->i_zombie,
1564                           &target->i_zombie);
1565                 d_rehash(new_dentry);
1566                 dput(new_dentry);
1567         } else
1568                 double_up(&old_dir->i_zombie,
1569                           &new_dir->i_zombie);
1570
1571         if (!error)
1572                 d_move(old_dentry,new_dentry);
1573 out_unlock:
1574         up(&old_dir->i_sb->s_vfs_rename_sem);
1575         return error;
1576 }
1577
1578 int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
1579                struct inode *new_dir, struct dentry *new_dentry)
1580 {
1581         int error;
1582
1583         if (old_dentry->d_inode == new_dentry->d_inode)
1584                 return 0;
1585
1586         error = may_delete(old_dir, old_dentry, 0);
1587         if (error)
1588                 return error;
1589
1590         if (new_dir->i_dev != old_dir->i_dev)
1591                 return -EXDEV;
1592
1593         if (!new_dentry->d_inode)
1594                 error = may_create(new_dir, new_dentry);
1595         else
1596                 error = may_delete(new_dir, new_dentry, 0);
1597         if (error)
1598                 return error;
1599
1600         if (!old_dir->i_op || !old_dir->i_op->rename)
1601                 return -EPERM;
1602
1603         DQUOT_INIT(old_dir);
1604         DQUOT_INIT(new_dir);
1605         double_down(&old_dir->i_zombie, &new_dir->i_zombie);
1606         error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1607         double_up(&old_dir->i_zombie, &new_dir->i_zombie);
1608         if (error)
1609                 return error;
1610         /* The following d_move() should become unconditional */
1611         if (!(old_dir->i_sb->s_flags & MS_ODD_RENAME)) {
1612                 d_move(old_dentry, new_dentry);
1613         }
1614         return 0;
1615 }
1616
1617 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1618                struct inode *new_dir, struct dentry *new_dentry)
1619 {
1620         if (S_ISDIR(old_dentry->d_inode->i_mode))
1621                 return vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
1622         else
1623                 return vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
1624 }
1625
1626 static inline int do_rename(const char * oldname, const char * newname)
1627 {
1628         int error = 0;
1629         struct dentry * old_dir, * new_dir;
1630         struct dentry * old_dentry, *new_dentry;
1631         struct nameidata oldnd, newnd;
1632
1633         if (walk_init(oldname, LOOKUP_PARENT, &oldnd))
1634                 error = walk_name(oldname, &oldnd);
1635
1636         if (error)
1637                 goto exit;
1638
1639         if (walk_init(newname, LOOKUP_PARENT, &newnd))
1640                 error = walk_name(newname, &newnd);
1641         if (error)
1642                 goto exit1;
1643
1644         error = -EXDEV;
1645         if (oldnd.mnt != newnd.mnt)
1646                 goto exit2;
1647
1648         old_dir = oldnd.dentry;
1649         error = -EBUSY;
1650         if (oldnd.last_type != LAST_NORM)
1651                 goto exit2;
1652
1653         new_dir = newnd.dentry;
1654         if (newnd.last_type != LAST_NORM)
1655                 goto exit2;
1656
1657         double_lock(new_dir, old_dir);
1658
1659         old_dentry = lookup_hash(&oldnd.last, old_dir);
1660         error = PTR_ERR(old_dentry);
1661         if (IS_ERR(old_dentry))
1662                 goto exit3;
1663         /* source must exist */
1664         error = -ENOENT;
1665         if (!old_dentry->d_inode)
1666                 goto exit4;
1667         /* unless the source is a directory trailing slashes give -ENOTDIR */
1668         if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
1669                 error = -ENOTDIR;
1670                 if (oldnd.last.name[oldnd.last.len])
1671                         goto exit4;
1672                 if (newnd.last.name[newnd.last.len])
1673                         goto exit4;
1674         }
1675         new_dentry = lookup_hash(&newnd.last, new_dir);
1676         error = PTR_ERR(new_dentry);
1677         if (IS_ERR(new_dentry))
1678                 goto exit4;
1679
1680         error = vfs_rename(old_dir->d_inode, old_dentry,
1681                                    new_dir->d_inode, new_dentry);
1682
1683         dput(new_dentry);
1684 exit4:
1685         dput(old_dentry);
1686 exit3:
1687         double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem);
1688 exit2:
1689         dput(newnd.dentry);
1690         mntput(newnd.mnt);
1691 exit1:
1692         dput(oldnd.dentry);
1693         mntput(oldnd.mnt);
1694 exit:
1695         return error;
1696 }
1697
1698 asmlinkage long sys_rename(const char * oldname, const char * newname)
1699 {
1700         int error;
1701         char * from;
1702         char * to;
1703
1704         from = getname(oldname);
1705         if(IS_ERR(from))
1706                 return PTR_ERR(from);
1707         to = getname(newname);
1708         error = PTR_ERR(to);
1709         if (!IS_ERR(to)) {
1710                 lock_kernel();
1711                 error = do_rename(from,to);
1712                 unlock_kernel();
1713                 putname(to);
1714         }
1715         putname(from);
1716         return error;
1717 }
1718
1719 int vfs_readlink(struct dentry *dentry, char *buffer, int buflen, const char *link)
1720 {
1721         int len;
1722
1723         len = PTR_ERR(link);
1724         if (IS_ERR(link))
1725                 goto out;
1726
1727         len = strlen(link);
1728         if (len > (unsigned) buflen)
1729                 len = buflen;
1730         if (copy_to_user(buffer, link, len))
1731                 len = -EFAULT;
1732 out:
1733         return len;
1734 }
1735
1736 static inline int
1737 __vfs_follow_link(struct nameidata *nd, const char *link)
1738 {
1739         if (IS_ERR(link))
1740                 goto fail;
1741
1742         if (*link == '/') {
1743                 dput(nd->dentry);
1744                 mntput(nd->mnt);
1745                 if (!walk_init_root(link, nd))
1746                         /* weird __emul_prefix() stuff did it */
1747                         return 0;
1748         }
1749         return walk_name(link, nd);
1750
1751 fail:
1752         dput(nd->dentry);
1753         mntput(nd->mnt);
1754         return PTR_ERR(link);
1755 }
1756
1757 int vfs_follow_link(struct nameidata *nd, const char *link)
1758 {
1759         return __vfs_follow_link(nd, link);
1760 }
1761
1762 /* get the link contents into pagecache */
1763 static char *page_getlink(struct dentry * dentry, struct page **ppage)
1764 {
1765         struct page * page;
1766         struct address_space *mapping = dentry->d_inode->i_mapping;
1767         page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage,
1768                                 dentry);
1769         if (IS_ERR(page))
1770                 goto sync_fail;
1771         wait_on_page(page);
1772         if (!Page_Uptodate(page))
1773                 goto async_fail;
1774         *ppage = page;
1775         return (char*) kmap(page);
1776
1777 async_fail:
1778         page_cache_release(page);
1779         return ERR_PTR(-EIO);
1780
1781 sync_fail:
1782         return (char*)page;
1783 }
1784
1785 int page_readlink(struct dentry *dentry, char *buffer, int buflen)
1786 {
1787         struct page *page = NULL;
1788         char *s = page_getlink(dentry, &page);
1789         int res = vfs_readlink(dentry,buffer,buflen,s);
1790         if (page) {
1791                 kunmap(page);
1792                 page_cache_release(page);
1793         }
1794         return res;
1795 }
1796
1797 int page_follow_link(struct dentry *dentry, struct nameidata *nd)
1798 {
1799         struct page *page = NULL;
1800         char *s = page_getlink(dentry, &page);
1801         int res = __vfs_follow_link(nd, s);
1802         if (page) {
1803                 kunmap(page);
1804                 page_cache_release(page);
1805         }
1806         return res;
1807 }
1808
1809 struct inode_operations page_symlink_inode_operations = {
1810         readlink:       page_readlink,
1811         follow_link:    page_follow_link,
1812 };