fs/nfs/dir.c

   1 /*
   2  *  linux/fs/nfs/dir.c
   3  *
   4  *  Copyright (C) 1992  Rick Sladkey
   5  *
   6  *  nfs directory handling functions
   7  *
   8  * 10 Apr 1996  Added silly rename for unlink   --okir
   9  * 28 Sep 1996  Improved directory cache --okir
  10  * 23 Aug 1997  Claus Heine claus@momo.math.rwth-aachen.de
  11  *              Re-implemented silly rename for unlink, newly implemented
  12  *              silly rename for nfs_rename() following the suggestions
  13  *              of Olaf Kirch (okir) found in this file.
  14  *              Following Linus comments on my original hack, this version
  15  *              depends only on the dcache stuff and doesn't touch the inode
  16  *              layer (iput() and friends).
  17  *  6 Jun 1999  Cache readdir lookups in the page cache. -DaveM
  18  */
  19
  20 #define NFS_NEED_XDR_TYPES
  21 #include <linux/sched.h>
  22 #include <linux/errno.h>
  23 #include <linux/stat.h>
  24 #include <linux/fcntl.h>
  25 #include <linux/string.h>
  26 #include <linux/kernel.h>
  27 #include <linux/malloc.h>
  28 #include <linux/mm.h>
  29 #include <linux/sunrpc/clnt.h>
  30 #include <linux/nfs_fs.h>
  31 #include <linux/nfs.h>
  32 #include <linux/pagemap.h>
  33
  34 #include <asm/segment.h>        /* for fs functions */
  35
  36 #define NFS_PARANOIA 1
  37 /* #define NFS_DEBUG_VERBOSE 1 */
  38
  39 static int nfs_safe_remove(struct dentry *);
  40
  41 static int nfs_readdir(struct file *, void *, filldir_t);
  42 static struct dentry *nfs_lookup(struct inode *, struct dentry *);
  43 static int nfs_create(struct inode *, struct dentry *, int);
  44 static int nfs_mkdir(struct inode *, struct dentry *, int);
  45 static int nfs_rmdir(struct inode *, struct dentry *);
  46 static int nfs_unlink(struct inode *, struct dentry *);
  47 static int nfs_symlink(struct inode *, struct dentry *, const char *);
  48 static int nfs_link(struct dentry *, struct inode *, struct dentry *);
  49 static int nfs_mknod(struct inode *, struct dentry *, int, int);
  50 static int nfs_rename(struct inode *, struct dentry *,
  51                       struct inode *, struct dentry *);
  52
  53 struct file_operations nfs_dir_operations = {
  54         read:           generic_read_dir,
  55         readdir:        nfs_readdir,
  56         open:           nfs_open,
  57         release:        nfs_release,
  58 };
  59
  60 struct inode_operations nfs_dir_inode_operations = {
  61         create:         nfs_create,
  62         lookup:         nfs_lookup,
  63         link:           nfs_link,
  64         unlink:         nfs_unlink,
  65         symlink:        nfs_symlink,
  66         mkdir:          nfs_mkdir,
  67         rmdir:          nfs_rmdir,
  68         mknod:          nfs_mknod,
  69         rename:         nfs_rename,
  70         revalidate:     nfs_revalidate,
  71         setattr:        nfs_notify_change,
  72 };
  73
  74 /* Each readdir response is composed of entries which look
  75  * like the following, as per the NFSv2 RFC:
  76  *
  77  *      __u32   not_end                 zero if end of response
  78  *      __u32   file ID                 opaque ino_t
  79  *      __u32   namelen                 size of name string
  80  *      VAR     name string             the string, padded to modulo 4 bytes
  81  *      __u32   cookie                  opaque ID of next entry
  82  *
  83  * When you hit not_end being zero, the next __u32 is non-zero if
  84  * this is the end of the complete set of readdir entires for this
  85  * directory.  This can be used, for example, to initiate pre-fetch.
  86  *
  87  * In order to know what to ask the server for, we only need to know
  88  * the final cookie of the previous page, and offset zero has cookie
  89  * zero, so we cache cookie to page offset translations in chunks.
  90  */
  91 #define COOKIES_PER_CHUNK (8 - ((sizeof(void *) / sizeof(__u32))))
  92 struct nfs_cookie_table {
  93         struct nfs_cookie_table *next;
  94         __u32   cookies[COOKIES_PER_CHUNK];
  95 };
  96 static kmem_cache_t *nfs_cookie_cachep;
  97
  98 /* This whole scheme relies on the fact that dirent cookies
  99  * are monotonically increasing.
 100  *
 101  * Another invariant is that once we have a valid non-zero
 102  * EOF marker cached, we also have the complete set of cookie
 103  * table entries.
 104  *
 105  * We return the page offset assosciated with the page where
 106  * cookie must be if it exists at all, however if we can not
 107  * figure that out conclusively, we return < 0.
 108  */
 109 static long __nfs_readdir_offset(struct inode *inode, __u32 cookie)
 110 {
 111         struct nfs_cookie_table *p;
 112         unsigned long ret = 0;
 113
 114         for(p = NFS_COOKIES(inode); p != NULL; p = p->next) {
 115                 int i;
 116
 117                 for (i = 0; i < COOKIES_PER_CHUNK; i++) {
 118                         __u32 this_cookie = p->cookies[i];
 119
 120                         /* End of known cookies, EOF is our only hope. */
 121                         if (!this_cookie)
 122                                 goto check_eof;
 123
 124                         /* Next cookie is larger, must be in previous page. */
 125                         if (this_cookie > cookie)
 126                                 return ret;
 127
 128                         ret += 1;
 129
 130                         /* Exact cookie match, it must be in this page :-) */
 131                         if (this_cookie == cookie)
 132                                 return ret;
 133                 }
 134         }
 135 check_eof:
 136         if (NFS_DIREOF(inode) != 0)
 137                 return ret;
 138
 139         return -1L;
 140 }
 141
 142 static __inline__ long nfs_readdir_offset(struct inode *inode, __u32 cookie)
 143 {
 144         /* Cookie zero is always at page offset zero.   Optimize the
 145          * other common case since most directories fit entirely
 146          * in one page.
 147          */
 148         if (!cookie || (!NFS_COOKIES(inode) && NFS_DIREOF(inode)))
 149                 return 0;
 150         return __nfs_readdir_offset(inode, cookie);
 151 }
 152
 153 /* Since a cookie of zero is declared special by the NFS
 154  * protocol, we easily can tell if a cookie in an existing
 155  * table chunk is valid or not.
 156  *
 157  * NOTE: The cookies are indexed off-by-one because zero
 158  *       need not an entry.
 159  */
 160 static __inline__ __u32 *find_cookie(struct inode *inode, unsigned long off)
 161 {
 162         static __u32 cookie_zero = 0;
 163         struct nfs_cookie_table *p;
 164         __u32 *ret;
 165
 166         if (!off)
 167                 return &cookie_zero;
 168         off -= 1;
 169         p = NFS_COOKIES(inode);
 170         while(off >= COOKIES_PER_CHUNK && p) {
 171                 off -= COOKIES_PER_CHUNK;
 172                 p = p->next;
 173         }
 174         ret = NULL;
 175         if (p) {
 176                 ret = &p->cookies[off];
 177                 if (!*ret)
 178                         ret = NULL;
 179         }
 180         return ret;
 181 }
 182
 183 #define NFS_NAMELEN_ALIGN(__len) ((((__len)+3)>>2)<<2)
 184 static int create_cookie(__u32 cookie, unsigned long off, struct inode *inode)
 185 {
 186         struct nfs_cookie_table **cpp;
 187
 188         cpp = (struct nfs_cookie_table **) &NFS_COOKIES(inode);
 189         while (off >= COOKIES_PER_CHUNK && *cpp) {
 190                 off -= COOKIES_PER_CHUNK;
 191                 cpp = &(*cpp)->next;
 192         }
 193         if (*cpp) {
 194                 (*cpp)->cookies[off] = cookie;
 195         } else {
 196                 struct nfs_cookie_table *new;
 197                 int i;
 198
 199                 new = kmem_cache_alloc(nfs_cookie_cachep, SLAB_ATOMIC);
 200                 if(!new)
 201                         return -1;
 202                 *cpp = new;
 203                 new->next = NULL;
 204                 for(i = 0; i < COOKIES_PER_CHUNK; i++) {
 205                         if (i == off) {
 206                                 new->cookies[i] = cookie;
 207                         } else {
 208                                 new->cookies[i] = 0;
 209                         }
 210                 }
 211         }
 212         return 0;
 213 }
 214
 215 static struct page *try_to_get_dirent_page(struct file *, __u32, int);
 216
 217 /* Recover from a revalidation flush.  The case here is that
 218  * the inode for the directory got invalidated somehow, and
 219  * all of our cached information is lost.  In order to get
 220  * a correct cookie for the current readdir request from the
 221  * user, we must (re-)fetch older readdir page cache entries.
 222  *
 223  * Returns < 0 if some error occurrs, else it is the page offset
 224  * to fetch.
 225  */
 226 static long refetch_to_readdir_cookie(struct file *file, struct inode *inode)
 227 {
 228         struct page *page;
 229         u32 goal_cookie = file->f_pos;
 230         long cur_off, ret = -1L;
 231
 232 again:
 233         cur_off = 0;
 234         for (;;) {
 235                 page = find_get_page(&inode->i_data, cur_off);
 236                 if (page) {
 237                         if (!Page_Uptodate(page))
 238                                 goto out_error;
 239                 } else {
 240                         __u32 *cp = find_cookie(inode, cur_off);
 241
 242                         if (!cp)
 243                                 goto out_error;
 244
 245                         page = try_to_get_dirent_page(file, *cp, 0);
 246                         if (!page) {
 247                                 if (!cur_off)
 248                                         goto out_error;
 249
 250                                 /* Someone touched the dir on us. */
 251                                 goto again;
 252                         }
 253                 }
 254                 page_cache_release(page);
 255
 256                 if ((ret = nfs_readdir_offset(inode, goal_cookie)) >= 0)
 257                         goto out;
 258
 259                 cur_off += 1;
 260         }
 261 out:
 262         return ret;
 263
 264 out_error:
 265         if (page)
 266                 page_cache_release(page);
 267         goto out;
 268 }
 269
 270 /* Now we cache directories properly, by stuffing the dirent
 271  * data directly in the page cache.
 272  *
 273  * Inode invalidation due to refresh etc. takes care of
 274  * _everything_, no sloppy entry flushing logic, no extraneous
 275  * copying, network direct to page cache, the way it was meant
 276  * to be.
 277  *
 278  * NOTE: Dirent information verification is done always by the
 279  *       page-in of the RPC reply, nowhere else, this simplies
 280  *       things substantially.
 281  */
 282
 283 static int nfs_dir_filler(struct dentry *dentry, struct page *page)
 284 {
 285         struct nfs_readdirargs rd_args;
 286         struct nfs_readdirres rd_res;
 287         struct inode *inode = dentry->d_inode;
 288         long offset = page->index;
 289         __u32 *cookiep;
 290         int err;
 291
 292         kmap(page);
 293
 294         err = -EIO;
 295         cookiep = find_cookie(inode, offset);
 296         if (!cookiep)
 297                 goto fail;
 298
 299         rd_args.fh = NFS_FH(dentry);
 300         rd_res.buffer = (char *)page_address(page);
 301         rd_res.bufsiz = PAGE_CACHE_SIZE;
 302         rd_res.cookie = *cookiep;
 303         do {
 304                 rd_args.buffer = rd_res.buffer;
 305                 rd_args.bufsiz = rd_res.bufsiz;
 306                 rd_args.cookie = rd_res.cookie;
 307                 err = rpc_call(NFS_CLIENT(inode),
 308                              NFSPROC_READDIR, &rd_args, &rd_res, 0);
 309                 if (err < 0)
 310                         goto fail;
 311         } while(rd_res.bufsiz > 0);
 312
 313         err = -EIO;
 314         if (rd_res.bufsiz < 0)
 315                 NFS_DIREOF(inode) = rd_res.cookie;
 316         else if (create_cookie(rd_res.cookie, offset, inode))
 317                 goto fail;
 318
 319         SetPageUptodate(page);
 320         kunmap(page);
 321         UnlockPage(page);
 322         return 0;
 323 fail:
 324         SetPageError(page);
 325         kunmap(page);
 326         UnlockPage(page);
 327         return err;
 328 }
 329
 330 static struct page *try_to_get_dirent_page(struct file *file, __u32 cookie, int refetch_ok)
 331 {
 332         struct dentry *dentry = file->f_dentry;
 333         struct inode *inode = dentry->d_inode;
 334         struct page *page;
 335         long offset;
 336
 337         if ((offset = nfs_readdir_offset(inode, cookie)) < 0) {
 338                 if (!refetch_ok ||
 339                     (offset = refetch_to_readdir_cookie(file, inode)) < 0) {
 340                         goto fail;
 341                 }
 342         }
 343
 344         page = read_cache_page(&inode->i_data, offset,
 345                                 (filler_t *)nfs_dir_filler, dentry);
 346         if (IS_ERR(page))
 347                 goto fail;
 348         if (!Page_Uptodate(page))
 349                 goto fail2;
 350         return page;
 351
 352 fail2:
 353         page_cache_release(page);
 354 fail:
 355         return NULL;
 356 }
 357
 358 /* Seek up to dirent assosciated with the passed in cookie,
 359  * then fill in dirents found.  Return the last cookie
 360  * actually given to the user, to update the file position.
 361  */
 362 static __inline__ u32 nfs_do_filldir(__u32 *p, u32 cookie,
 363                                      void *dirent, filldir_t filldir)
 364 {
 365         u32 end;
 366
 367         while((end = *p++) != 0) {
 368                 __u32 fileid, len, skip, this_cookie;
 369                 char *name;
 370
 371                 fileid = *p++;
 372                 len = *p++;
 373                 name = (char *) p;
 374                 skip = NFS_NAMELEN_ALIGN(len);
 375                 p += (skip >> 2);
 376                 this_cookie = *p++;
 377
 378                 if (this_cookie < cookie)
 379                         continue;
 380
 381                 cookie = this_cookie;
 382                 if (filldir(dirent, name, len, cookie, fileid) < 0)
 383                         break;
 384         }
 385
 386         return cookie;
 387 }
 388
 389 /* The file offset position is represented in pure bytes, to
 390  * make the page cache interface straight forward.
 391  *
 392  * However, some way is needed to make the connection between the
 393  * opaque NFS directory entry cookies and our offsets, so a per-inode
 394  * cookie cache table is used.
 395  */
 396 static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 397 {
 398         struct dentry *dentry = filp->f_dentry;
 399         struct inode *inode = dentry->d_inode;
 400         struct page *page;
 401         long offset;
 402         int res;
 403
 404         res = nfs_revalidate_inode(NFS_DSERVER(dentry), dentry);
 405         if (res < 0)
 406                 return res;
 407
 408         if (NFS_DIREOF(inode) && filp->f_pos >= NFS_DIREOF(inode))
 409                 return 0;
 410
 411         if ((offset = nfs_readdir_offset(inode, filp->f_pos)) < 0)
 412                 goto no_dirent_page;
 413
 414         page = find_get_page(&inode->i_data, offset);
 415         if (!page)
 416                 goto no_dirent_page;
 417         if (!Page_Uptodate(page))
 418                 goto dirent_read_error;
 419 success:
 420         kmap(page);
 421         filp->f_pos = nfs_do_filldir((__u32 *) page_address(page),
 422                                      filp->f_pos, dirent, filldir);
 423         kunmap(page);
 424         page_cache_release(page);
 425         return 0;
 426
 427 no_dirent_page:
 428         page = try_to_get_dirent_page(filp, filp->f_pos, 1);
 429         if (!page)
 430                 goto no_page;
 431
 432         if (Page_Uptodate(page))
 433                 goto success;
 434 dirent_read_error:
 435         page_cache_release(page);
 436 no_page:
 437         return -EIO;
 438 }
 439
 440 /* Flush directory cookie and EOF caches for an inode.
 441  * So we don't thrash allocating/freeing cookie tables,
 442  * we keep the cookies around until the inode is
 443  * deleted/reused.
 444  */
 445 __inline__ void nfs_flush_dircache(struct inode *inode)
 446 {
 447         struct nfs_cookie_table *p = NFS_COOKIES(inode);
 448
 449         while (p != NULL) {
 450                 int i;
 451
 452                 for(i = 0; i < COOKIES_PER_CHUNK; i++)
 453                         p->cookies[i] = 0;
 454
 455                 p = p->next;
 456         }
 457         NFS_DIREOF(inode) = 0;
 458 }
 459
 460 /* Free up directory cache state, this happens when
 461  * nfs_delete_inode is called on an NFS directory.
 462  */
 463 void nfs_free_dircache(struct inode *inode)
 464 {
 465         struct nfs_cookie_table *p = NFS_COOKIES(inode);
 466
 467         while (p != NULL) {
 468                 struct nfs_cookie_table *next = p->next;
 469                 kmem_cache_free(nfs_cookie_cachep, p);
 470                 p = next;
 471         }
 472         NFS_COOKIES(inode) = NULL;
 473         NFS_DIREOF(inode) = 0;
 474 }
 475
 476 /*
 477  * Whenever an NFS operation succeeds, we know that the dentry
 478  * is valid, so we update the revalidation timestamp.
 479  */
 480 static inline void nfs_renew_times(struct dentry * dentry)
 481 {
 482         dentry->d_time = jiffies;
 483 }
 484
 485 static inline int nfs_dentry_force_reval(struct dentry *dentry, int flags)
 486 {
 487         struct inode *inode = dentry->d_inode;
 488         unsigned long timeout = NFS_ATTRTIMEO(inode);
 489
 490         /*
 491          * If it's the last lookup in a series, we use a stricter
 492          * cache consistency check by looking at the parent mtime.
 493          *
 494          * If it's been modified in the last hour, be really strict.
 495          * (This still means that we can avoid doing unnecessary
 496          * work on directories like /usr/share/bin etc which basically
 497          * never change).
 498          */
 499         if (!(flags & LOOKUP_CONTINUE)) {
 500                 long diff = CURRENT_TIME - dentry->d_parent->d_inode->i_mtime;
 501
 502                 if (diff < 15*60)
 503                         timeout = 0;
 504         }
 505
 506         return time_after(jiffies,dentry->d_time + timeout);
 507 }
 508
 509 /*
 510  * We judge how long we want to trust negative
 511  * dentries by looking at the parent inode mtime.
 512  *
 513  * If mtime is close to present time, we revalidate
 514  * more often.
 515  */
 516 #define NFS_REVALIDATE_NEGATIVE (1 * HZ)
 517 static inline int nfs_neg_need_reval(struct dentry *dentry)
 518 {
 519         struct inode *dir = dentry->d_parent->d_inode;
 520         unsigned long timeout = NFS_ATTRTIMEO(dir);
 521         long diff = CURRENT_TIME - dir->i_mtime;
 522
 523         if (diff < 5*60 && timeout > NFS_REVALIDATE_NEGATIVE)
 524                 timeout = NFS_REVALIDATE_NEGATIVE;
 525
 526         return time_after(jiffies, dentry->d_time + timeout);
 527 }
 528
 529 /*
 530  * This is called every time the dcache has a lookup hit,
 531  * and we should check whether we can really trust that
 532  * lookup.
 533  *
 534  * NOTE! The hit can be a negative hit too, don't assume
 535  * we have an inode!
 536  *
 537  * If the dentry is older than the revalidation interval,
 538  * we do a new lookup and verify that the dentry is still
 539  * correct.
 540  */
 541 static int nfs_lookup_revalidate(struct dentry * dentry, int flags)
 542 {
 543         struct dentry * parent = dentry->d_parent;
 544         struct inode * inode = dentry->d_inode;
 545         int error;
 546         struct nfs_fh fhandle;
 547         struct nfs_fattr fattr;
 548
 549         /*
 550          * If we don't have an inode, let's look at the parent
 551          * directory mtime to get a hint about how often we
 552          * should validate things..
 553          */
 554         if (!inode) {
 555                 if (nfs_neg_need_reval(dentry))
 556                         goto out_bad;
 557                 goto out_valid;
 558         }
 559
 560         if (is_bad_inode(inode)) {
 561                 dfprintk(VFS, "nfs_lookup_validate: %s/%s has dud inode\n",
 562                         parent->d_name.name, dentry->d_name.name);
 563                 goto out_bad;
 564         }
 565
 566         if (!nfs_dentry_force_reval(dentry, flags))
 567                 goto out_valid;
 568
 569         if (IS_ROOT(dentry)) {
 570                 __nfs_revalidate_inode(NFS_DSERVER(dentry), dentry);
 571                 goto out_valid_renew;
 572         }
 573
 574         /*
 575          * Do a new lookup and check the dentry attributes.
 576          */
 577         error = nfs_proc_lookup(NFS_DSERVER(parent), NFS_FH(parent),
 578                                 dentry->d_name.name, &fhandle, &fattr);
 579         if (error)
 580                 goto out_bad;
 581
 582         /* Inode number matches? */
 583         if (NFS_FSID(inode) != fattr.fsid ||
 584             NFS_FILEID(inode) != fattr.fileid)
 585                 goto out_bad;
 586
 587         /* Filehandle matches? */
 588         if (memcmp(dentry->d_fsdata, &fhandle, sizeof(struct nfs_fh)))
 589                 goto out_bad;
 590
 591         /* Ok, remeber that we successfully checked it.. */
 592         nfs_refresh_inode(inode, &fattr);
 593
 594  out_valid_renew:
 595         nfs_renew_times(dentry);
 596 out_valid:
 597         return 1;
 598 out_bad:
 599         d_drop(dentry);
 600         if (!list_empty(&dentry->d_subdirs))
 601                 shrink_dcache_parent(dentry);
 602         /* Purge readdir caches. */
 603         if (dentry->d_parent->d_inode) {
 604                 nfs_zap_caches(dentry->d_parent->d_inode);
 605                 NFS_CACHEINV(dentry->d_parent->d_inode);
 606         }
 607         return 0;
 608 }
 609
 610 /*
 611  * This is called from dput() when d_count is going to 0.
 612  * We use it to clean up silly-renamed files.
 613  */
 614 static void nfs_dentry_delete(struct dentry *dentry)
 615 {
 616         dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n",
 617                 dentry->d_parent->d_name.name, dentry->d_name.name,
 618                 dentry->d_flags);
 619
 620         if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
 621                 int error;
 622
 623                 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
 624                 /* Unhash it first */
 625                 d_drop(dentry);
 626                 error = nfs_safe_remove(dentry);
 627                 if (error)
 628                         printk("NFS: can't silly-delete %s/%s, error=%d\n",
 629                                 dentry->d_parent->d_name.name,
 630                                 dentry->d_name.name, error);
 631         }
 632
 633 }
 634
 635 static kmem_cache_t *nfs_fh_cachep;
 636
 637 __inline__ struct nfs_fh *nfs_fh_alloc(void)
 638 {
 639         return kmem_cache_alloc(nfs_fh_cachep, SLAB_KERNEL);
 640 }
 641
 642 __inline__ void nfs_fh_free(struct nfs_fh *p)
 643 {
 644         kmem_cache_free(nfs_fh_cachep, p);
 645 }
 646
 647 /*
 648  * Called when the dentry is being freed to release private memory.
 649  */
 650 static void nfs_dentry_release(struct dentry *dentry)
 651 {
 652         if (dentry->d_fsdata)
 653                 nfs_fh_free(dentry->d_fsdata);
 654 }
 655
 656 struct dentry_operations nfs_dentry_operations = {
 657         d_revalidate:   nfs_lookup_revalidate,
 658         d_delete:       nfs_dentry_delete,
 659         d_release:      nfs_dentry_release,
 660 };
 661
 662 #if 0 /* dead code */
 663 #ifdef NFS_PARANOIA
 664 /*
 665  * Display all dentries holding the specified inode.
 666  */
 667 static void show_dentry(struct list_head * dlist)
 668 {
 669         struct list_head *tmp = dlist;
 670
 671         while ((tmp = tmp->next) != dlist) {
 672                 struct dentry * dentry = list_entry(tmp, struct dentry, d_alias);
 673                 const char * unhashed = "";
 674
 675                 if (list_empty(&dentry->d_hash))
 676                         unhashed = "(unhashed)";
 677
 678                 printk("show_dentry: %s/%s, d_count=%d%s\n",
 679                         dentry->d_parent->d_name.name,
 680                         dentry->d_name.name, dentry->d_count,
 681                         unhashed);
 682         }
 683 }
 684 #endif /* NFS_PARANOIA */
 685 #endif /* 0 */
 686
 687 static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry)
 688 {
 689         struct inode *inode;
 690         int error;
 691         struct nfs_fh fhandle;
 692         struct nfs_fattr fattr;
 693
 694         dfprintk(VFS, "NFS: lookup(%s/%s)\n",
 695                 dentry->d_parent->d_name.name, dentry->d_name.name);
 696
 697         error = -ENAMETOOLONG;
 698         if (dentry->d_name.len > NFS_MAXNAMLEN)
 699                 goto out;
 700
 701         error = -ENOMEM;
 702         if (!dentry->d_fsdata) {
 703                 dentry->d_fsdata = nfs_fh_alloc();
 704                 if (!dentry->d_fsdata)
 705                         goto out;
 706         }
 707         dentry->d_op = &nfs_dentry_operations;
 708
 709         error = nfs_proc_lookup(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
 710                                 dentry->d_name.name, &fhandle, &fattr);
 711         inode = NULL;
 712         if (error == -ENOENT)
 713                 goto no_entry;
 714         if (!error) {
 715                 error = -EACCES;
 716                 inode = nfs_fhget(dentry, &fhandle, &fattr);
 717                 if (inode) {
 718             no_entry:
 719                         d_add(dentry, inode);
 720                         nfs_renew_times(dentry);
 721                         error = 0;
 722                 }
 723         }
 724 out:
 725         return ERR_PTR(error);
 726 }
 727
 728 /*
 729  * Code common to create, mkdir, and mknod.
 730  */
 731 static int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
 732                                 struct nfs_fattr *fattr)
 733 {
 734         struct inode *inode;
 735         int error = -EACCES;
 736
 737         inode = nfs_fhget(dentry, fhandle, fattr);
 738         if (inode) {
 739                 d_instantiate(dentry, inode);
 740                 nfs_renew_times(dentry);
 741                 error = 0;
 742         }
 743         return error;
 744 }
 745
 746 /*
 747  * Following a failed create operation, we drop the dentry rather
 748  * than retain a negative dentry. This avoids a problem in the event
 749  * that the operation succeeded on the server, but an error in the
 750  * reply path made it appear to have failed.
 751  */
 752 static int nfs_create(struct inode *dir, struct dentry *dentry, int mode)
 753 {
 754         int error;
 755         struct iattr attr;
 756         struct nfs_fattr fattr;
 757         struct nfs_fh fhandle;
 758
 759         dfprintk(VFS, "NFS: create(%x/%ld, %s\n",
 760                 dir->i_dev, dir->i_ino, dentry->d_name.name);
 761
 762         attr.ia_mode = mode;
 763         attr.ia_valid = ATTR_MODE;
 764
 765         /*
 766          * Invalidate the dir cache before the operation to avoid a race.
 767          */
 768         invalidate_inode_pages(dir);
 769         nfs_flush_dircache(dir);
 770         error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
 771                         dentry->d_name.name, &attr, &fhandle, &fattr);
 772         if (!error)
 773                 error = nfs_instantiate(dentry, &fhandle, &fattr);
 774         if (error)
 775                 d_drop(dentry);
 776         return error;
 777 }
 778
 779 /*
 780  * See comments for nfs_proc_create regarding failed operations.
 781  */
 782 static int nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rdev)
 783 {
 784         int error;
 785         struct iattr attr;
 786         struct nfs_fattr fattr;
 787         struct nfs_fh fhandle;
 788
 789         dfprintk(VFS, "NFS: mknod(%x/%ld, %s\n",
 790                 dir->i_dev, dir->i_ino, dentry->d_name.name);
 791
 792         attr.ia_mode = mode;
 793         attr.ia_valid = ATTR_MODE;
 794         /* FIXME: move this to a special nfs_proc_mknod() */
 795         if (S_ISCHR(mode) || S_ISBLK(mode)) {
 796                 attr.ia_size = rdev; /* get out your barf bag */
 797                 attr.ia_valid |= ATTR_SIZE;
 798         }
 799
 800         invalidate_inode_pages(dir);
 801         nfs_flush_dircache(dir);
 802         error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
 803                                 dentry->d_name.name, &attr, &fhandle, &fattr);
 804         if (!error)
 805                 error = nfs_instantiate(dentry, &fhandle, &fattr);
 806         if (error)
 807                 d_drop(dentry);
 808         return error;
 809 }
 810
 811 /*
 812  * See comments for nfs_proc_create regarding failed operations.
 813  */
 814 static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 815 {
 816         int error;
 817         struct iattr attr;
 818         struct nfs_fattr fattr;
 819         struct nfs_fh fhandle;
 820
 821         dfprintk(VFS, "NFS: mkdir(%x/%ld, %s\n",
 822                 dir->i_dev, dir->i_ino, dentry->d_name.name);
 823
 824         attr.ia_valid = ATTR_MODE;
 825         attr.ia_mode = mode | S_IFDIR;
 826
 827         /*
 828          * Always drop the dentry, we can't always depend on
 829          * the fattr returned by the server (AIX seems to be
 830          * broken). We're better off doing another lookup than
 831          * depending on potentially bogus information.
 832          */
 833         d_drop(dentry);
 834         invalidate_inode_pages(dir);
 835         nfs_flush_dircache(dir);
 836         error = nfs_proc_mkdir(NFS_DSERVER(dentry), NFS_FH(dentry->d_parent),
 837                                 dentry->d_name.name, &attr, &fhandle, &fattr);
 838         if (!error)
 839                 dir->i_nlink++;
 840         return error;
 841 }
 842
 843 static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
 844 {
 845         int error;
 846
 847         dfprintk(VFS, "NFS: rmdir(%x/%ld, %s\n",
 848                 dir->i_dev, dir->i_ino, dentry->d_name.name);
 849
 850         invalidate_inode_pages(dir);
 851         nfs_flush_dircache(dir);
 852         error = nfs_proc_rmdir(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
 853                                 dentry->d_name.name);
 854
 855         /* Update i_nlink and invalidate dentry. */
 856         if (!error) {
 857                 d_drop(dentry);
 858                 if (dir->i_nlink)
 859                         dir->i_nlink--;
 860         }
 861
 862         return error;
 863 }
 864
 865
 866 /*  Note: we copy the code from lookup_dentry() here, only: we have to
 867  *  omit the directory lock. We are already the owner of the lock when
 868  *  we reach here. And "down(&dir->i_sem)" would make us sleep forever
 869  *  ('cause WE have the lock)
 870  *
 871  *  VERY IMPORTANT: calculate the hash for this dentry!!!!!!!!
 872  *  Otherwise the cached lookup DEFINITELY WILL fail. And a new dentry
 873  *  is created. Without the DCACHE_NFSFS_RENAMED flag. And with d_count
 874  *  == 1. And trouble.
 875  *
 876  *  Concerning my choice of the temp name: it is just nice to have
 877  *  i_ino part of the temp name, as this offers another check whether
 878  *  somebody attempts to remove the "silly renamed" dentry itself.
 879  *  Which is something that I consider evil. Your opinion may vary.
 880  *  BUT:
 881  *  Now that I compute the hash value right, it should be possible to simply
 882  *  check for the DCACHE_NFSFS_RENAMED flag in dentry->d_flag instead of
 883  *  doing the string compare.
 884  *  WHICH MEANS:
 885  *  This offers the opportunity to shorten the temp name. Currently, I use
 886  *  the hex representation of i_ino + an event counter. This sums up to
 887  *  as much as 36 characters for a 64 bit machine, and needs 20 chars on
 888  *  a 32 bit machine.
 889  *  QUINTESSENCE
 890  *  The use of i_ino is simply cosmetic. All we need is a unique temp
 891  *  file name for the .nfs files. The event counter seemed to be adequate.
 892  *  And as we retry in case such a file already exists, we are guaranteed
 893  *  to succeed.
 894  */
 895
 896 static
 897 struct dentry *nfs_silly_lookup(struct dentry *parent, char *silly, int slen)
 898 {
 899         struct qstr    sqstr;
 900         struct dentry *sdentry;
 901         struct dentry *res;
 902
 903         sqstr.name = silly;
 904         sqstr.len  = slen;
 905         sqstr.hash = full_name_hash(silly, slen);
 906         sdentry = d_lookup(parent, &sqstr);
 907         if (!sdentry) {
 908                 sdentry = d_alloc(parent, &sqstr);
 909                 if (sdentry == NULL)
 910                         return ERR_PTR(-ENOMEM);
 911                 res = nfs_lookup(parent->d_inode, sdentry);
 912                 if (res) {
 913                         dput(sdentry);
 914                         return res;
 915                 }
 916         }
 917         return sdentry;
 918 }
 919
 920 static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 921 {
 922         static unsigned int sillycounter = 0;
 923         const int      i_inosize  = sizeof(dir->i_ino)*2;
 924         const int      countersize = sizeof(sillycounter)*2;
 925         const int      slen       = strlen(".nfs") + i_inosize + countersize;
 926         char           silly[slen+1];
 927         struct dentry *sdentry;
 928         int            error = -EIO;
 929
 930         dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
 931                 dentry->d_parent->d_name.name, dentry->d_name.name,
 932                 dentry->d_count);
 933
 934         /*
 935          * Note that a silly-renamed file can be deleted once it's
 936          * no longer in use -- it's just an ordinary file now.
 937          */
 938         if (dentry->d_count == 1) {
 939                 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
 940                 goto out;  /* No need to silly rename. */
 941         }
 942
 943 #ifdef NFS_PARANOIA
 944 if (!dentry->d_inode)
 945 printk("NFS: silly-renaming %s/%s, negative dentry??\n",
 946 dentry->d_parent->d_name.name, dentry->d_name.name);
 947 #endif
 948         /*
 949          * We don't allow a dentry to be silly-renamed twice.
 950          */
 951         error = -EBUSY;
 952         if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
 953                 goto out;
 954
 955         sprintf(silly, ".nfs%*.*lx",
 956                 i_inosize, i_inosize, dentry->d_inode->i_ino);
 957
 958         sdentry = NULL;
 959         do {
 960                 char *suffix = silly + slen - countersize;
 961
 962                 dput(sdentry);
 963                 sillycounter++;
 964                 sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
 965
 966                 dfprintk(VFS, "trying to rename %s to %s\n",
 967                          dentry->d_name.name, silly);
 968
 969                 sdentry = nfs_silly_lookup(dentry->d_parent, silly, slen);
 970                 /*
 971                  * N.B. Better to return EBUSY here ... it could be
 972                  * dangerous to delete the file while it's in use.
 973                  */
 974                 if (IS_ERR(sdentry))
 975                         goto out;
 976         } while(sdentry->d_inode != NULL); /* need negative lookup */
 977
 978         invalidate_inode_pages(dir);
 979         nfs_flush_dircache(dir);
 980         error = nfs_proc_rename(NFS_SERVER(dir),
 981                                 NFS_FH(dentry->d_parent), dentry->d_name.name,
 982                                 NFS_FH(dentry->d_parent), silly);
 983         if (!error) {
 984                 nfs_renew_times(dentry);
 985                 d_move(dentry, sdentry);
 986                 dentry->d_flags |= DCACHE_NFSFS_RENAMED;
 987                 /* If we return 0 we don't unlink */
 988         }
 989         dput(sdentry);
 990 out:
 991         return error;
 992 }
 993
 994 /*
 995  * Remove a file after making sure there are no pending writes,
 996  * and after checking that the file has only one user.
 997  *
 998  * We update inode->i_nlink and free the inode prior to the operation
 999  * to avoid possible races if the server reuses the inode.
1000  */
1001 static int nfs_safe_remove(struct dentry *dentry)
1002 {
1003         struct inode *dir = dentry->d_parent->d_inode;
1004         struct inode *inode = dentry->d_inode;
1005         int error, rehash = 0;
1006
1007         dfprintk(VFS, "NFS: safe_remove(%s/%s, %ld)\n",
1008                 dentry->d_parent->d_name.name, dentry->d_name.name,
1009                 inode->i_ino);
1010
1011         /* N.B. not needed now that d_delete is done in advance? */
1012         error = -EBUSY;
1013         if (!inode) {
1014 #ifdef NFS_PARANOIA
1015 printk("nfs_safe_remove: %s/%s already negative??\n",
1016 dentry->d_parent->d_name.name, dentry->d_name.name);
1017 #endif
1018         }
1019
1020         if (dentry->d_count > 1) {
1021 #ifdef NFS_PARANOIA
1022 printk("nfs_safe_remove: %s/%s busy, d_count=%d\n",
1023 dentry->d_parent->d_name.name, dentry->d_name.name, dentry->d_count);
1024 #endif
1025                 goto out;
1026         }
1027         /*
1028          * Unhash the dentry while we remove the file ...
1029          */
1030         if (!list_empty(&dentry->d_hash)) {
1031                 d_drop(dentry);
1032                 rehash = 1;
1033         }
1034         /*
1035          * Update i_nlink and free the inode before unlinking.
1036          */
1037         if (inode) {
1038                 if (inode->i_nlink)
1039                         inode->i_nlink --;
1040                 d_delete(dentry);
1041         }
1042         invalidate_inode_pages(dir);
1043         nfs_flush_dircache(dir);
1044         error = nfs_proc_remove(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
1045                                 dentry->d_name.name);
1046         /*
1047          * Rehash the negative dentry if the operation succeeded.
1048          */
1049         if (!error && rehash)
1050                 d_add(dentry, NULL);
1051 out:
1052         return error;
1053 }
1054
1055 /*  We do silly rename. In case sillyrename() returns -EBUSY, the inode
1056  *  belongs to an active ".nfs..." file and we return -EBUSY.
1057  *
1058  *  If sillyrename() returns 0, we do nothing, otherwise we unlink.
1059  */
1060 static int nfs_unlink(struct inode *dir, struct dentry *dentry)
1061 {
1062         int error;
1063
1064         dfprintk(VFS, "NFS: unlink(%x/%ld, %s)\n",
1065                 dir->i_dev, dir->i_ino, dentry->d_name.name);
1066
1067         error = nfs_sillyrename(dir, dentry);
1068         if (error && error != -EBUSY) {
1069                 error = nfs_safe_remove(dentry);
1070                 if (!error) {
1071                         nfs_renew_times(dentry);
1072                 }
1073         }
1074         return error;
1075 }
1076
1077 static int
1078 nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1079 {
1080         struct iattr attr;
1081         int error;
1082
1083         dfprintk(VFS, "NFS: symlink(%x/%ld, %s, %s)\n",
1084                 dir->i_dev, dir->i_ino, dentry->d_name.name, symname);
1085
1086         error = -ENAMETOOLONG;
1087         if (strlen(symname) > NFS_MAXPATHLEN)
1088                 goto out;
1089
1090 #ifdef NFS_PARANOIA
1091 if (dentry->d_inode)
1092 printk("nfs_proc_symlink: %s/%s not negative!\n",
1093 dentry->d_parent->d_name.name, dentry->d_name.name);
1094 #endif
1095         /*
1096          * Fill in the sattr for the call.
1097          * Note: SunOS 4.1.2 crashes if the mode isn't initialized!
1098          */
1099         attr.ia_valid = ATTR_MODE;
1100         attr.ia_mode = S_IFLNK | S_IRWXUGO;
1101
1102         /*
1103          * Drop the dentry in advance to force a new lookup.
1104          * Since nfs_proc_symlink doesn't return a fattr, we
1105          * can't instantiate the new inode.
1106          */
1107         d_drop(dentry);
1108         invalidate_inode_pages(dir);
1109         nfs_flush_dircache(dir);
1110         error = nfs_proc_symlink(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
1111                                 dentry->d_name.name, symname, &attr);
1112         if (!error) {
1113                 nfs_renew_times(dentry->d_parent);
1114         } else if (error == -EEXIST) {
1115                 printk("nfs_proc_symlink: %s/%s already exists??\n",
1116                         dentry->d_parent->d_name.name, dentry->d_name.name);
1117         }
1118
1119 out:
1120         return error;
1121 }
1122
1123 static int
1124 nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
1125 {
1126         struct inode *inode = old_dentry->d_inode;
1127         int error;
1128
1129         dfprintk(VFS, "NFS: link(%s/%s -> %s/%s)\n",
1130                 old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
1131                 dentry->d_parent->d_name.name, dentry->d_name.name);
1132
1133         /*
1134          * Drop the dentry in advance to force a new lookup.
1135          * Since nfs_proc_link doesn't return a file handle,
1136          * we can't use the existing dentry.
1137          */
1138         d_drop(dentry);
1139         invalidate_inode_pages(dir);
1140         nfs_flush_dircache(dir);
1141         error = nfs_proc_link(NFS_DSERVER(old_dentry), NFS_FH(old_dentry),
1142                                 NFS_FH(dentry->d_parent), dentry->d_name.name);
1143         if (!error) {
1144                 /*
1145                  * Update the link count immediately, as some apps
1146                  * (e.g. pine) test this after making a link.
1147                  */
1148                 inode->i_nlink++;
1149         }
1150         return error;
1151 }
1152
1153 /*
1154  * RENAME
1155  * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
1156  * different file handle for the same inode after a rename (e.g. when
1157  * moving to a different directory). A fail-safe method to do so would
1158  * be to look up old_dir/old_name, create a link to new_dir/new_name and
1159  * rename the old file using the sillyrename stuff. This way, the original
1160  * file in old_dir will go away when the last process iput()s the inode.
1161  *
1162  * FIXED.
1163  *
1164  * It actually works quite well. One needs to have the possibility for
1165  * at least one ".nfs..." file in each directory the file ever gets
1166  * moved or linked to which happens automagically with the new
1167  * implementation that only depends on the dcache stuff instead of
1168  * using the inode layer
1169  *
1170  * Unfortunately, things are a little more complicated than indicated
1171  * above. For a cross-directory move, we want to make sure we can get
1172  * rid of the old inode after the operation.  This means there must be
1173  * no pending writes (if it's a file), and the use count must be 1.
1174  * If these conditions are met, we can drop the dentries before doing
1175  * the rename.
1176  */
1177 static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1178                       struct inode *new_dir, struct dentry *new_dentry)
1179 {
1180         struct inode *old_inode = old_dentry->d_inode;
1181         struct inode *new_inode = new_dentry->d_inode;
1182         struct dentry *dentry = NULL;
1183         int error, rehash = 0;
1184
1185         dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n",
1186                  old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
1187                  new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
1188                  new_dentry->d_count);
1189
1190         /*
1191          * First check whether the target is busy ... we can't
1192          * safely do _any_ rename if the target is in use.
1193          *
1194          * For files, make a copy of the dentry and then do a
1195          * silly-rename. If the silly-rename succeeds, the
1196          * copied dentry is hashed and becomes the new target.
1197          *
1198          * With directories check is done in VFS.
1199          */
1200         error = -EBUSY;
1201         if (new_dentry->d_count > 1 && new_inode) {
1202                 int err;
1203                 /* copy the target dentry's name */
1204                 dentry = d_alloc(new_dentry->d_parent,
1205                                  &new_dentry->d_name);
1206                 if (!dentry)
1207                         goto out;
1208
1209                 /* silly-rename the existing target ... */
1210                 err = nfs_sillyrename(new_dir, new_dentry);
1211                 if (!err) {
1212                         new_dentry = dentry;
1213                         new_inode = NULL;
1214                         /* hash the replacement target */
1215                         d_add(new_dentry, NULL);
1216                 }
1217
1218                 /* dentry still busy? */
1219                 if (new_dentry->d_count > 1) {
1220 #ifdef NFS_PARANOIA
1221                         printk("nfs_rename: target %s/%s busy, d_count=%d\n",
1222                                new_dentry->d_parent->d_name.name,
1223                                new_dentry->d_name.name,
1224                                new_dentry->d_count);
1225 #endif
1226                         goto out;
1227                 }
1228         }
1229
1230         /*
1231          * ... prune child dentries and writebacks if needed.
1232          */
1233         if (old_dentry->d_count > 1) {
1234                 nfs_wb_all(old_inode);
1235                 shrink_dcache_parent(old_dentry);
1236         }
1237
1238         if (new_dentry->d_count > 1 && new_inode) {
1239 #ifdef NFS_PARANOIA
1240                 printk("nfs_rename: new dentry %s/%s busy, d_count=%d\n",
1241                        new_dentry->d_parent->d_name.name,
1242                        new_dentry->d_name.name,
1243                        new_dentry->d_count);
1244 #endif
1245                 goto out;
1246         }
1247
1248         /*
1249          * To prevent any new references to the target during the rename,
1250          * we unhash the dentry and free the inode in advance.
1251          */
1252         if (!list_empty(&new_dentry->d_hash)) {
1253                 d_drop(new_dentry);
1254                 rehash = 1;
1255         }
1256         if (new_inode)
1257                 d_delete(new_dentry);
1258
1259         invalidate_inode_pages(new_dir);
1260         nfs_flush_dircache(new_dir);
1261         invalidate_inode_pages(old_dir);
1262         nfs_flush_dircache(old_dir);
1263         error = nfs_proc_rename(NFS_DSERVER(old_dentry),
1264                         NFS_FH(old_dentry->d_parent), old_dentry->d_name.name,
1265                         NFS_FH(new_dentry->d_parent), new_dentry->d_name.name);
1266
1267         NFS_CACHEINV(old_dir);
1268         NFS_CACHEINV(new_dir);
1269         /* Update the dcache if needed */
1270         if (rehash)
1271                 d_add(new_dentry, NULL);
1272         if (!error && !S_ISDIR(old_inode->i_mode))
1273                 d_move(old_dentry, new_dentry);
1274
1275 out:
1276         /* new dentry created? */
1277         if (dentry)
1278                 dput(dentry);
1279         return error;
1280 }
1281
1282 int nfs_init_fhcache(void)
1283 {
1284         nfs_fh_cachep = kmem_cache_create("nfs_fh",
1285                                           sizeof(struct nfs_fh),
1286                                           0, SLAB_HWCACHE_ALIGN,
1287                                           NULL, NULL);
1288         if (nfs_fh_cachep == NULL)
1289                 return -ENOMEM;
1290
1291         nfs_cookie_cachep = kmem_cache_create("nfs_dcookie",
1292                                               sizeof(struct nfs_cookie_table),
1293                                               0, SLAB_HWCACHE_ALIGN,
1294                                               NULL, NULL);
1295         if (nfs_cookie_cachep == NULL)
1296                 return -ENOMEM;
1297
1298         return 0;
1299 }
1300
1301 /*
1302  * Local variables:
1303  *  version-control: t
1304  *  kept-new-versions: 5
1305  * End:
1306  */