Import 2.3.7pre3
[davej-history.git] / fs / nfs / dir.c
blobc64a0222967230c0035f029064451e074eb9e8bb
1 /*
2 * linux/fs/nfs/dir.c
4 * Copyright (C) 1992 Rick Sladkey
6 * nfs directory handling functions
8 * 10 Apr 1996 Added silly rename for unlink --okir
9 * 28 Sep 1996 Improved directory cache --okir
10 * 23 Aug 1997 Claus Heine claus@momo.math.rwth-aachen.de
11 * Re-implemented silly rename for unlink, newly implemented
12 * silly rename for nfs_rename() following the suggestions
13 * of Olaf Kirch (okir) found in this file.
14 * Following Linus comments on my original hack, this version
15 * depends only on the dcache stuff and doesn't touch the inode
16 * layer (iput() and friends).
17 * 6 Jun 1999 Cache readdir lookups in the page cache. -DaveM
20 #define NFS_NEED_XDR_TYPES
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/stat.h>
24 #include <linux/fcntl.h>
25 #include <linux/string.h>
26 #include <linux/kernel.h>
27 #include <linux/malloc.h>
28 #include <linux/mm.h>
29 #include <linux/sunrpc/clnt.h>
30 #include <linux/nfs_fs.h>
31 #include <linux/nfs.h>
32 #include <linux/pagemap.h>
34 #include <asm/segment.h> /* for fs functions */
36 #define NFS_PARANOIA 1
37 /* #define NFS_DEBUG_VERBOSE 1 */
39 static int nfs_safe_remove(struct dentry *);
41 static ssize_t nfs_dir_read(struct file *, char *, size_t, loff_t *);
42 static int nfs_readdir(struct file *, void *, filldir_t);
43 static struct dentry *nfs_lookup(struct inode *, struct dentry *);
44 static int nfs_create(struct inode *, struct dentry *, int);
45 static int nfs_mkdir(struct inode *, struct dentry *, int);
46 static int nfs_rmdir(struct inode *, struct dentry *);
47 static int nfs_unlink(struct inode *, struct dentry *);
48 static int nfs_symlink(struct inode *, struct dentry *, const char *);
49 static int nfs_link(struct dentry *, struct inode *, struct dentry *);
50 static int nfs_mknod(struct inode *, struct dentry *, int, int);
51 static int nfs_rename(struct inode *, struct dentry *,
52 struct inode *, struct dentry *);
54 static struct file_operations nfs_dir_operations = {
55 NULL, /* lseek - default */
56 nfs_dir_read, /* read - bad */
57 NULL, /* write - bad */
58 nfs_readdir, /* readdir */
59 NULL, /* select - default */
60 NULL, /* ioctl - default */
61 NULL, /* mmap */
62 nfs_open, /* open */
63 NULL, /* flush */
64 nfs_release, /* release */
65 NULL /* fsync */
68 struct inode_operations nfs_dir_inode_operations = {
69 &nfs_dir_operations, /* default directory file-ops */
70 nfs_create, /* create */
71 nfs_lookup, /* lookup */
72 nfs_link, /* link */
73 nfs_unlink, /* unlink */
74 nfs_symlink, /* symlink */
75 nfs_mkdir, /* mkdir */
76 nfs_rmdir, /* rmdir */
77 nfs_mknod, /* mknod */
78 nfs_rename, /* rename */
79 NULL, /* readlink */
80 NULL, /* follow_link */
81 NULL, /* readpage */
82 NULL, /* writepage */
83 NULL, /* bmap */
84 NULL, /* truncate */
85 NULL, /* permission */
86 NULL, /* smap */
87 NULL, /* updatepage */
88 nfs_revalidate, /* revalidate */
91 static ssize_t
92 nfs_dir_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
94 return -EISDIR;
97 /* Each readdir response is composed of entries which look
98 * like the following, as per the NFSv2 RFC:
100 * __u32 not_end zero if end of response
101 * __u32 file ID opaque ino_t
102 * __u32 namelen size of name string
103 * VAR name string the string, padded to modulo 4 bytes
104 * __u32 cookie opaque ID of next entry
106 * When you hit not_end being zero, the next __u32 is non-zero if
107 * this is the end of the complete set of readdir entires for this
108 * directory. This can be used, for example, to initiate pre-fetch.
110 * In order to know what to ask the server for, we only need to know
111 * the final cookie of the previous page, and offset zero has cookie
112 * zero, so we cache cookie to page offset translations in chunks.
114 #define COOKIES_PER_CHUNK (8 - ((sizeof(void *) / sizeof(__u32))))
115 struct nfs_cookie_table {
116 struct nfs_cookie_table *next;
117 __u32 cookies[COOKIES_PER_CHUNK];
119 static kmem_cache_t *nfs_cookie_cachep;
121 /* This whole scheme relies on the fact that dirent cookies
122 * are monotonically increasing.
124 * Another invariant is that once we have a valid non-zero
125 * EOF marker cached, we also have the complete set of cookie
126 * table entries.
128 * We return the page offset assosciated with the page where
129 * cookie must be if it exists at all, however if we can not
130 * figure that out conclusively, we return < 0.
132 static long __nfs_readdir_offset(struct inode *inode, __u32 cookie)
134 struct nfs_cookie_table *p;
135 unsigned long ret = 0;
137 for(p = NFS_COOKIES(inode); p != NULL; p = p->next) {
138 int i;
140 for (i = 0; i < COOKIES_PER_CHUNK; i++) {
141 __u32 this_cookie = p->cookies[i];
143 /* End of known cookies, EOF is our only hope. */
144 if (!this_cookie)
145 goto check_eof;
147 /* Next cookie is larger, must be in previous page. */
148 if (this_cookie > cookie)
149 return ret;
151 ret += 1;
153 /* Exact cookie match, it must be in this page :-) */
154 if (this_cookie == cookie)
155 return ret;
158 check_eof:
159 if (NFS_DIREOF(inode) != 0)
160 return ret;
162 return -1L;
165 static __inline__ long nfs_readdir_offset(struct inode *inode, __u32 cookie)
167 /* Cookie zero is always at page offset zero. Optimize the
168 * other common case since most directories fit entirely
169 * in one page.
171 if (!cookie || (!NFS_COOKIES(inode) && NFS_DIREOF(inode)))
172 return 0;
173 return __nfs_readdir_offset(inode, cookie);
176 /* Since a cookie of zero is declared special by the NFS
177 * protocol, we easily can tell if a cookie in an existing
178 * table chunk is valid or not.
180 * NOTE: The cookies are indexed off-by-one because zero
181 * need not an entry.
183 static __inline__ __u32 *find_cookie(struct inode *inode, unsigned long off)
185 static __u32 cookie_zero = 0;
186 struct nfs_cookie_table *p;
187 __u32 *ret;
189 if (!off)
190 return &cookie_zero;
191 off -= 1;
192 p = NFS_COOKIES(inode);
193 while(off >= COOKIES_PER_CHUNK && p) {
194 off -= COOKIES_PER_CHUNK;
195 p = p->next;
197 ret = NULL;
198 if (p) {
199 ret = &p->cookies[off];
200 if (!*ret)
201 ret = NULL;
203 return ret;
206 #define NFS_NAMELEN_ALIGN(__len) ((((__len)+3)>>2)<<2)
207 static int create_cookie(__u32 cookie, unsigned long off, struct inode *inode)
209 struct nfs_cookie_table **cpp;
211 cpp = (struct nfs_cookie_table **) &NFS_COOKIES(inode);
212 while (off >= COOKIES_PER_CHUNK && *cpp) {
213 off -= COOKIES_PER_CHUNK;
214 cpp = &(*cpp)->next;
216 if (*cpp) {
217 (*cpp)->cookies[off] = cookie;
218 } else {
219 struct nfs_cookie_table *new;
220 int i;
222 new = kmem_cache_alloc(nfs_cookie_cachep, SLAB_ATOMIC);
223 if(!new)
224 return -1;
225 *cpp = new;
226 new->next = NULL;
227 for(i = 0; i < COOKIES_PER_CHUNK; i++) {
228 if (i == off) {
229 new->cookies[i] = cookie;
230 } else {
231 new->cookies[i] = 0;
235 return 0;
238 static struct page *try_to_get_dirent_page(struct file *, __u32, int);
240 /* Recover from a revalidation flush. The case here is that
241 * the inode for the directory got invalidated somehow, and
242 * all of our cached information is lost. In order to get
243 * a correct cookie for the current readdir request from the
244 * user, we must (re-)fetch older readdir page cache entries.
246 * Returns < 0 if some error occurrs, else it is the page offset
247 * to fetch.
249 static long refetch_to_readdir_cookie(struct file *file, struct inode *inode)
251 struct page *page;
252 u32 goal_cookie = file->f_pos;
253 long cur_off, ret = -1L;
255 again:
256 cur_off = 0;
257 for (;;) {
258 page = find_get_page(inode, cur_off);
259 if (page) {
260 if (!Page_Uptodate(page))
261 goto out_error;
262 } else {
263 __u32 *cp = find_cookie(inode, cur_off);
265 if (!cp)
266 goto out_error;
268 page = try_to_get_dirent_page(file, *cp, 0);
269 if (!page) {
270 if (!cur_off)
271 goto out_error;
273 /* Someone touched the dir on us. */
274 goto again;
277 page_cache_release(page);
279 if ((ret = nfs_readdir_offset(inode, goal_cookie)) >= 0)
280 goto out;
282 cur_off += 1;
284 out:
285 return ret;
287 out_error:
288 if (page)
289 page_cache_release(page);
290 goto out;
293 /* Now we cache directories properly, by stuffing the dirent
294 * data directly in the page cache.
296 * Inode invalidation due to refresh etc. takes care of
297 * _everything_, no sloppy entry flushing logic, no extraneous
298 * copying, network direct to page cache, the way it was meant
299 * to be.
301 * NOTE: Dirent information verification is done always by the
302 * page-in of the RPC reply, nowhere else, this simplies
303 * things substantially.
305 static struct page *try_to_get_dirent_page(struct file *file, __u32 cookie, int refetch_ok)
307 struct nfs_readdirargs rd_args;
308 struct nfs_readdirres rd_res;
309 struct dentry *dentry = file->f_dentry;
310 struct inode *inode = dentry->d_inode;
311 struct page *page, **hash;
312 unsigned long page_cache;
313 long offset;
314 __u32 *cookiep;
316 page = NULL;
317 page_cache = page_cache_alloc();
318 if (!page_cache)
319 goto out;
321 if ((offset = nfs_readdir_offset(inode, cookie)) < 0) {
322 if (!refetch_ok ||
323 (offset = refetch_to_readdir_cookie(file, inode)) < 0) {
324 page_cache_free(page_cache);
325 goto out;
329 cookiep = find_cookie(inode, offset);
330 if (!cookiep) {
331 /* Gross fatal error. */
332 page_cache_free(page_cache);
333 goto out;
336 hash = page_hash(inode, offset);
337 repeat:
338 page = __find_lock_page(inode, offset, *hash);
339 if (page) {
340 page_cache_free(page_cache);
341 goto unlock_out;
344 page = page_cache_entry(page_cache);
345 if (add_to_page_cache_unique(page, inode, offset, hash)) {
346 page_cache_release(page);
347 goto repeat;
350 rd_args.fh = NFS_FH(dentry);
351 rd_res.buffer = (char *)page_cache;
352 rd_res.bufsiz = PAGE_CACHE_SIZE;
353 rd_res.cookie = *cookiep;
354 do {
355 rd_args.buffer = rd_res.buffer;
356 rd_args.bufsiz = rd_res.bufsiz;
357 rd_args.cookie = rd_res.cookie;
358 if (rpc_call(NFS_CLIENT(inode),
359 NFSPROC_READDIR, &rd_args, &rd_res, 0) < 0)
360 goto error;
361 } while(rd_res.bufsiz > 0);
363 if (rd_res.bufsiz < 0)
364 NFS_DIREOF(inode) = rd_res.cookie;
365 else if (create_cookie(rd_res.cookie, offset, inode))
366 goto error;
368 SetPageUptodate(page);
369 unlock_out:
370 UnlockPage(page);
371 out:
372 return page;
374 error:
375 SetPageError(page);
376 goto unlock_out;
379 /* Seek up to dirent assosciated with the passed in cookie,
380 * then fill in dirents found. Return the last cookie
381 * actually given to the user, to update the file position.
383 static __inline__ u32 nfs_do_filldir(__u32 *p, u32 cookie,
384 void *dirent, filldir_t filldir)
386 u32 end;
388 while((end = *p++) != 0) {
389 __u32 fileid, len, skip, this_cookie;
390 char *name;
392 fileid = *p++;
393 len = *p++;
394 name = (char *) p;
395 skip = NFS_NAMELEN_ALIGN(len);
396 p += (skip >> 2);
397 this_cookie = *p++;
399 if (this_cookie < cookie)
400 continue;
402 cookie = this_cookie;
403 if (filldir(dirent, name, len, cookie, fileid) < 0)
404 break;
407 return cookie;
410 /* The file offset position is represented in pure bytes, to
411 * make the page cache interface straight forward.
413 * However, some way is needed to make the connection between the
414 * opaque NFS directory entry cookies and our offsets, so a per-inode
415 * cookie cache table is used.
417 static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
419 struct dentry *dentry = filp->f_dentry;
420 struct inode *inode = dentry->d_inode;
421 struct page *page, **hash;
422 long offset;
423 int res;
425 res = nfs_revalidate_inode(NFS_DSERVER(dentry), dentry);
426 if (res < 0)
427 return res;
429 if (NFS_DIREOF(inode) && filp->f_pos >= NFS_DIREOF(inode))
430 return 0;
432 if ((offset = nfs_readdir_offset(inode, filp->f_pos)) < 0)
433 goto no_dirent_page;
435 hash = page_hash(inode, offset);
436 page = __find_get_page(inode, offset, *hash);
437 if (!page)
438 goto no_dirent_page;
439 if (!Page_Uptodate(page))
440 goto dirent_read_error;
441 success:
442 filp->f_pos = nfs_do_filldir((__u32 *) page_address(page),
443 filp->f_pos, dirent, filldir);
444 page_cache_release(page);
445 return 0;
447 no_dirent_page:
448 page = try_to_get_dirent_page(filp, filp->f_pos, 1);
449 if (!page)
450 goto no_page;
452 if (Page_Uptodate(page))
453 goto success;
454 dirent_read_error:
455 page_cache_release(page);
456 no_page:
457 return -EIO;
460 /* Flush directory cookie and EOF caches for an inode.
461 * So we don't thrash allocating/freeing cookie tables,
462 * we keep the cookies around until the inode is
463 * deleted/reused.
465 __inline__ void nfs_flush_dircache(struct inode *inode)
467 struct nfs_cookie_table *p = NFS_COOKIES(inode);
469 while (p != NULL) {
470 int i;
472 for(i = 0; i < COOKIES_PER_CHUNK; i++)
473 p->cookies[i] = 0;
475 p = p->next;
477 NFS_DIREOF(inode) = 0;
480 /* Free up directory cache state, this happens when
481 * nfs_delete_inode is called on an NFS directory.
483 void nfs_free_dircache(struct inode *inode)
485 struct nfs_cookie_table *p = NFS_COOKIES(inode);
487 while (p != NULL) {
488 struct nfs_cookie_table *next = p->next;
489 kmem_cache_free(nfs_cookie_cachep, p);
490 p = next;
492 NFS_COOKIES(inode) = NULL;
493 NFS_DIREOF(inode) = 0;
497 * Whenever an NFS operation succeeds, we know that the dentry
498 * is valid, so we update the revalidation timestamp.
500 static inline void nfs_renew_times(struct dentry * dentry)
502 dentry->d_time = jiffies;
505 static inline int nfs_dentry_force_reval(struct dentry *dentry, int flags)
507 struct inode *inode = dentry->d_inode;
508 unsigned long timeout = NFS_ATTRTIMEO(inode);
511 * If it's the last lookup in a series, we use a stricter
512 * cache consistency check by looking at the parent mtime.
514 * If it's been modified in the last hour, be really strict.
515 * (This still means that we can avoid doing unnecessary
516 * work on directories like /usr/share/bin etc which basically
517 * never change).
519 if (!(flags & LOOKUP_CONTINUE)) {
520 long diff = CURRENT_TIME - dentry->d_parent->d_inode->i_mtime;
522 if (diff < 15*60)
523 timeout = 0;
526 return time_after(jiffies,dentry->d_time + timeout);
530 * We judge how long we want to trust negative
531 * dentries by looking at the parent inode mtime.
533 * If mtime is close to present time, we revalidate
534 * more often.
536 static inline int nfs_neg_need_reval(struct dentry *dentry)
538 unsigned long timeout = 30 * HZ;
539 long diff = CURRENT_TIME - dentry->d_parent->d_inode->i_mtime;
541 if (diff < 5*60)
542 timeout = 1 * HZ;
544 return time_after(jiffies, dentry->d_time + timeout);
548 * This is called every time the dcache has a lookup hit,
549 * and we should check whether we can really trust that
550 * lookup.
552 * NOTE! The hit can be a negative hit too, don't assume
553 * we have an inode!
555 * If the dentry is older than the revalidation interval,
556 * we do a new lookup and verify that the dentry is still
557 * correct.
559 static int nfs_lookup_revalidate(struct dentry * dentry, int flags)
561 struct dentry * parent = dentry->d_parent;
562 struct inode * inode = dentry->d_inode;
563 int error;
564 struct nfs_fh fhandle;
565 struct nfs_fattr fattr;
568 * If we don't have an inode, let's look at the parent
569 * directory mtime to get a hint about how often we
570 * should validate things..
572 if (!inode) {
573 if (nfs_neg_need_reval(dentry))
574 goto out_bad;
575 goto out_valid;
578 if (is_bad_inode(inode)) {
579 dfprintk(VFS, "nfs_lookup_validate: %s/%s has dud inode\n",
580 parent->d_name.name, dentry->d_name.name);
581 goto out_bad;
584 if (IS_ROOT(dentry))
585 goto out_valid;
587 if (!nfs_dentry_force_reval(dentry, flags))
588 goto out_valid;
591 * Do a new lookup and check the dentry attributes.
593 error = nfs_proc_lookup(NFS_DSERVER(parent), NFS_FH(parent),
594 dentry->d_name.name, &fhandle, &fattr);
595 if (error)
596 goto out_bad;
598 /* Inode number matches? */
599 if (fattr.fileid != inode->i_ino)
600 goto out_bad;
602 /* Filehandle matches? */
603 if (memcmp(dentry->d_fsdata, &fhandle, sizeof(struct nfs_fh))) {
604 if (dentry->d_count < 2)
605 goto out_bad;
608 /* Ok, remeber that we successfully checked it.. */
609 nfs_renew_times(dentry);
610 nfs_refresh_inode(inode, &fattr);
612 out_valid:
613 return 1;
614 out_bad:
615 /* Purge readdir caches. */
616 if (dentry->d_parent->d_inode) {
617 invalidate_inode_pages(dentry->d_parent->d_inode);
618 nfs_flush_dircache(dentry->d_parent->d_inode);
620 if (inode && S_ISDIR(inode->i_mode)) {
621 invalidate_inode_pages(inode);
622 nfs_flush_dircache(inode);
624 return 0;
628 * This is called from dput() when d_count is going to 0.
629 * We use it to clean up silly-renamed files.
631 static void nfs_dentry_delete(struct dentry *dentry)
633 dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n",
634 dentry->d_parent->d_name.name, dentry->d_name.name,
635 dentry->d_flags);
637 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
638 int error;
640 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
641 /* Unhash it first */
642 d_drop(dentry);
643 error = nfs_safe_remove(dentry);
644 if (error)
645 printk("NFS: can't silly-delete %s/%s, error=%d\n",
646 dentry->d_parent->d_name.name,
647 dentry->d_name.name, error);
650 #ifdef NFS_PARANOIA
652 * Sanity check: if the dentry has been unhashed and the
653 * inode still has users, we could have problems ...
655 if (list_empty(&dentry->d_hash) && dentry->d_inode) {
656 struct inode *inode = dentry->d_inode;
657 int max_count = (S_ISDIR(inode->i_mode) ? 1 : inode->i_nlink);
658 if (inode->i_count > max_count) {
659 printk("nfs_dentry_delete: %s/%s: ino=%ld, count=%d, nlink=%d\n",
660 dentry->d_parent->d_name.name, dentry->d_name.name,
661 inode->i_ino, inode->i_count, inode->i_nlink);
664 #endif
667 static kmem_cache_t *nfs_fh_cachep;
669 __inline__ struct nfs_fh *nfs_fh_alloc(void)
671 return kmem_cache_alloc(nfs_fh_cachep, SLAB_KERNEL);
674 __inline__ void nfs_fh_free(struct nfs_fh *p)
676 kmem_cache_free(nfs_fh_cachep, p);
680 * Called when the dentry is being freed to release private memory.
682 static void nfs_dentry_release(struct dentry *dentry)
684 if (dentry->d_fsdata)
685 nfs_fh_free(dentry->d_fsdata);
688 struct dentry_operations nfs_dentry_operations = {
689 nfs_lookup_revalidate, /* d_revalidate(struct dentry *, int) */
690 NULL, /* d_hash */
691 NULL, /* d_compare */
692 nfs_dentry_delete, /* d_delete(struct dentry *) */
693 nfs_dentry_release, /* d_release(struct dentry *) */
694 NULL /* d_iput */
697 #ifdef NFS_PARANOIA
699 * Display all dentries holding the specified inode.
701 static void show_dentry(struct list_head * dlist)
703 struct list_head *tmp = dlist;
705 while ((tmp = tmp->next) != dlist) {
706 struct dentry * dentry = list_entry(tmp, struct dentry, d_alias);
707 const char * unhashed = "";
709 if (list_empty(&dentry->d_hash))
710 unhashed = "(unhashed)";
712 printk("show_dentry: %s/%s, d_count=%d%s\n",
713 dentry->d_parent->d_name.name,
714 dentry->d_name.name, dentry->d_count,
715 unhashed);
718 #endif
720 static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry)
722 struct inode *inode;
723 int error;
724 struct nfs_fh fhandle;
725 struct nfs_fattr fattr;
727 dfprintk(VFS, "NFS: lookup(%s/%s)\n",
728 dentry->d_parent->d_name.name, dentry->d_name.name);
730 error = -ENAMETOOLONG;
731 if (dentry->d_name.len > NFS_MAXNAMLEN)
732 goto out;
734 error = -ENOMEM;
735 if (!dentry->d_fsdata) {
736 dentry->d_fsdata = nfs_fh_alloc();
737 if (!dentry->d_fsdata)
738 goto out;
740 dentry->d_op = &nfs_dentry_operations;
742 error = nfs_proc_lookup(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
743 dentry->d_name.name, &fhandle, &fattr);
744 inode = NULL;
745 if (error == -ENOENT)
746 goto no_entry;
747 if (!error) {
748 error = -EACCES;
749 inode = nfs_fhget(dentry, &fhandle, &fattr);
750 if (inode) {
751 #ifdef NFS_PARANOIA
752 if (inode->i_count > (S_ISDIR(inode->i_mode) ? 1 : inode->i_nlink)) {
753 printk("nfs_lookup: %s/%s ino=%ld in use, count=%d, nlink=%d\n",
754 dentry->d_parent->d_name.name, dentry->d_name.name,
755 inode->i_ino, inode->i_count, inode->i_nlink);
756 show_dentry(&inode->i_dentry);
758 #endif
759 no_entry:
760 d_add(dentry, inode);
761 nfs_renew_times(dentry);
762 error = 0;
765 out:
766 return ERR_PTR(error);
770 * Code common to create, mkdir, and mknod.
772 static int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
773 struct nfs_fattr *fattr)
775 struct inode *inode;
776 int error = -EACCES;
778 inode = nfs_fhget(dentry, fhandle, fattr);
779 if (inode) {
780 #ifdef NFS_PARANOIA
781 if (inode->i_count > (S_ISDIR(inode->i_mode) ? 1 : inode->i_nlink)) {
782 printk("nfs_instantiate: %s/%s ino=%ld in use, count=%d, nlink=%d\n",
783 dentry->d_parent->d_name.name, dentry->d_name.name,
784 inode->i_ino, inode->i_count, inode->i_nlink);
785 show_dentry(&inode->i_dentry);
787 #endif
788 d_instantiate(dentry, inode);
789 nfs_renew_times(dentry);
790 error = 0;
792 return error;
796 * Following a failed create operation, we drop the dentry rather
797 * than retain a negative dentry. This avoids a problem in the event
798 * that the operation succeeded on the server, but an error in the
799 * reply path made it appear to have failed.
801 static int nfs_create(struct inode *dir, struct dentry *dentry, int mode)
803 int error;
804 struct nfs_sattr sattr;
805 struct nfs_fattr fattr;
806 struct nfs_fh fhandle;
808 dfprintk(VFS, "NFS: create(%x/%ld, %s\n",
809 dir->i_dev, dir->i_ino, dentry->d_name.name);
811 sattr.mode = mode;
812 sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
813 sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
816 * Invalidate the dir cache before the operation to avoid a race.
818 invalidate_inode_pages(dir);
819 nfs_flush_dircache(dir);
820 error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
821 dentry->d_name.name, &sattr, &fhandle, &fattr);
822 if (!error)
823 error = nfs_instantiate(dentry, &fhandle, &fattr);
824 if (error)
825 d_drop(dentry);
826 return error;
830 * See comments for nfs_proc_create regarding failed operations.
832 static int nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rdev)
834 int error;
835 struct nfs_sattr sattr;
836 struct nfs_fattr fattr;
837 struct nfs_fh fhandle;
839 dfprintk(VFS, "NFS: mknod(%x/%ld, %s\n",
840 dir->i_dev, dir->i_ino, dentry->d_name.name);
842 sattr.mode = mode;
843 sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
844 if (S_ISCHR(mode) || S_ISBLK(mode))
845 sattr.size = rdev; /* get out your barf bag */
846 sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
848 invalidate_inode_pages(dir);
849 nfs_flush_dircache(dir);
850 error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
851 dentry->d_name.name, &sattr, &fhandle, &fattr);
852 if (!error)
853 error = nfs_instantiate(dentry, &fhandle, &fattr);
854 if (error)
855 d_drop(dentry);
856 return error;
860 * See comments for nfs_proc_create regarding failed operations.
862 static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
864 int error;
865 struct nfs_sattr sattr;
866 struct nfs_fattr fattr;
867 struct nfs_fh fhandle;
869 dfprintk(VFS, "NFS: mkdir(%x/%ld, %s\n",
870 dir->i_dev, dir->i_ino, dentry->d_name.name);
872 sattr.mode = mode | S_IFDIR;
873 sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
874 sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
877 * Always drop the dentry, we can't always depend on
878 * the fattr returned by the server (AIX seems to be
879 * broken). We're better off doing another lookup than
880 * depending on potentially bogus information.
882 d_drop(dentry);
883 invalidate_inode_pages(dir);
884 nfs_flush_dircache(dir);
885 error = nfs_proc_mkdir(NFS_DSERVER(dentry), NFS_FH(dentry->d_parent),
886 dentry->d_name.name, &sattr, &fhandle, &fattr);
887 return error;
890 static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
892 int error;
894 dfprintk(VFS, "NFS: rmdir(%x/%ld, %s\n",
895 dir->i_dev, dir->i_ino, dentry->d_name.name);
897 #ifdef NFS_PARANOIA
898 if (dentry->d_inode->i_count > 1)
899 printk("nfs_rmdir: %s/%s inode busy?? i_count=%d, i_nlink=%d\n",
900 dentry->d_parent->d_name.name, dentry->d_name.name,
901 dentry->d_inode->i_count, dentry->d_inode->i_nlink);
902 #endif
904 invalidate_inode_pages(dir);
905 nfs_flush_dircache(dir);
906 error = nfs_proc_rmdir(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
907 dentry->d_name.name);
909 /* Update i_nlink and invalidate dentry. */
910 if (!error) {
911 d_drop(dentry);
912 if (dentry->d_inode->i_nlink)
913 dentry->d_inode->i_nlink --;
916 return error;
920 /* Note: we copy the code from lookup_dentry() here, only: we have to
921 * omit the directory lock. We are already the owner of the lock when
922 * we reach here. And "down(&dir->i_sem)" would make us sleep forever
923 * ('cause WE have the lock)
925 * VERY IMPORTANT: calculate the hash for this dentry!!!!!!!!
926 * Otherwise the cached lookup DEFINITELY WILL fail. And a new dentry
927 * is created. Without the DCACHE_NFSFS_RENAMED flag. And with d_count
928 * == 1. And trouble.
930 * Concerning my choice of the temp name: it is just nice to have
931 * i_ino part of the temp name, as this offers another check whether
932 * somebody attempts to remove the "silly renamed" dentry itself.
933 * Which is something that I consider evil. Your opinion may vary.
934 * BUT:
935 * Now that I compute the hash value right, it should be possible to simply
936 * check for the DCACHE_NFSFS_RENAMED flag in dentry->d_flag instead of
937 * doing the string compare.
938 * WHICH MEANS:
939 * This offers the opportunity to shorten the temp name. Currently, I use
940 * the hex representation of i_ino + an event counter. This sums up to
941 * as much as 36 characters for a 64 bit machine, and needs 20 chars on
942 * a 32 bit machine.
943 * QUINTESSENCE
944 * The use of i_ino is simply cosmetic. All we need is a unique temp
945 * file name for the .nfs files. The event counter seemed to be adequate.
946 * And as we retry in case such a file already exists, we are guaranteed
947 * to succeed.
950 static
951 struct dentry *nfs_silly_lookup(struct dentry *parent, char *silly, int slen)
953 struct qstr sqstr;
954 struct dentry *sdentry;
955 struct dentry *res;
957 sqstr.name = silly;
958 sqstr.len = slen;
959 sqstr.hash = full_name_hash(silly, slen);
960 sdentry = d_lookup(parent, &sqstr);
961 if (!sdentry) {
962 sdentry = d_alloc(parent, &sqstr);
963 if (sdentry == NULL)
964 return ERR_PTR(-ENOMEM);
965 res = nfs_lookup(parent->d_inode, sdentry);
966 if (res) {
967 dput(sdentry);
968 return res;
971 return sdentry;
974 static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
976 static unsigned int sillycounter = 0;
977 const int i_inosize = sizeof(dir->i_ino)*2;
978 const int countersize = sizeof(sillycounter)*2;
979 const int slen = strlen(".nfs") + i_inosize + countersize;
980 char silly[slen+1];
981 struct dentry *sdentry;
982 int error = -EIO;
984 dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
985 dentry->d_parent->d_name.name, dentry->d_name.name,
986 dentry->d_count);
989 * Note that a silly-renamed file can be deleted once it's
990 * no longer in use -- it's just an ordinary file now.
992 if (dentry->d_count == 1) {
993 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
994 goto out; /* No need to silly rename. */
997 #ifdef NFS_PARANOIA
998 if (!dentry->d_inode)
999 printk("NFS: silly-renaming %s/%s, negative dentry??\n",
1000 dentry->d_parent->d_name.name, dentry->d_name.name);
1001 #endif
1003 * We don't allow a dentry to be silly-renamed twice.
1005 error = -EBUSY;
1006 if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
1007 goto out;
1009 sprintf(silly, ".nfs%*.*lx",
1010 i_inosize, i_inosize, dentry->d_inode->i_ino);
1012 sdentry = NULL;
1013 do {
1014 char *suffix = silly + slen - countersize;
1016 dput(sdentry);
1017 sillycounter++;
1018 sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
1020 dfprintk(VFS, "trying to rename %s to %s\n",
1021 dentry->d_name.name, silly);
1023 sdentry = nfs_silly_lookup(dentry->d_parent, silly, slen);
1025 * N.B. Better to return EBUSY here ... it could be
1026 * dangerous to delete the file while it's in use.
1028 if (IS_ERR(sdentry))
1029 goto out;
1030 } while(sdentry->d_inode != NULL); /* need negative lookup */
1032 invalidate_inode_pages(dir);
1033 nfs_flush_dircache(dir);
1034 error = nfs_proc_rename(NFS_SERVER(dir),
1035 NFS_FH(dentry->d_parent), dentry->d_name.name,
1036 NFS_FH(dentry->d_parent), silly);
1037 if (!error) {
1038 nfs_renew_times(dentry);
1039 d_move(dentry, sdentry);
1040 dentry->d_flags |= DCACHE_NFSFS_RENAMED;
1041 /* If we return 0 we don't unlink */
1043 dput(sdentry);
1044 out:
1045 return error;
1049 * Remove a file after making sure there are no pending writes,
1050 * and after checking that the file has only one user.
1052 * We update inode->i_nlink and free the inode prior to the operation
1053 * to avoid possible races if the server reuses the inode.
1055 static int nfs_safe_remove(struct dentry *dentry)
1057 struct inode *dir = dentry->d_parent->d_inode;
1058 struct inode *inode = dentry->d_inode;
1059 int error, rehash = 0;
1061 dfprintk(VFS, "NFS: safe_remove(%s/%s, %ld)\n",
1062 dentry->d_parent->d_name.name, dentry->d_name.name,
1063 inode->i_ino);
1065 /* N.B. not needed now that d_delete is done in advance? */
1066 error = -EBUSY;
1067 if (!inode) {
1068 #ifdef NFS_PARANOIA
1069 printk("nfs_safe_remove: %s/%s already negative??\n",
1070 dentry->d_parent->d_name.name, dentry->d_name.name);
1071 #endif
1074 if (dentry->d_count > 1) {
1075 #ifdef NFS_PARANOIA
1076 printk("nfs_safe_remove: %s/%s busy, d_count=%d\n",
1077 dentry->d_parent->d_name.name, dentry->d_name.name, dentry->d_count);
1078 #endif
1079 goto out;
1081 #ifdef NFS_PARANOIA
1082 if (inode && inode->i_count > inode->i_nlink)
1083 printk("nfs_safe_remove: %s/%s inode busy?? i_count=%d, i_nlink=%d\n",
1084 dentry->d_parent->d_name.name, dentry->d_name.name,
1085 inode->i_count, inode->i_nlink);
1086 #endif
1088 * Unhash the dentry while we remove the file ...
1090 if (!list_empty(&dentry->d_hash)) {
1091 d_drop(dentry);
1092 rehash = 1;
1095 * Update i_nlink and free the inode before unlinking.
1097 if (inode) {
1098 if (inode->i_nlink)
1099 inode->i_nlink --;
1100 d_delete(dentry);
1102 invalidate_inode_pages(dir);
1103 nfs_flush_dircache(dir);
1104 error = nfs_proc_remove(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
1105 dentry->d_name.name);
1107 * Rehash the negative dentry if the operation succeeded.
1109 if (!error && rehash)
1110 d_add(dentry, NULL);
1111 out:
1112 return error;
1115 /* We do silly rename. In case sillyrename() returns -EBUSY, the inode
1116 * belongs to an active ".nfs..." file and we return -EBUSY.
1118 * If sillyrename() returns 0, we do nothing, otherwise we unlink.
1120 static int nfs_unlink(struct inode *dir, struct dentry *dentry)
1122 int error;
1124 dfprintk(VFS, "NFS: unlink(%x/%ld, %s)\n",
1125 dir->i_dev, dir->i_ino, dentry->d_name.name);
1127 error = nfs_sillyrename(dir, dentry);
1128 if (error && error != -EBUSY) {
1129 error = nfs_safe_remove(dentry);
1130 if (!error) {
1131 nfs_renew_times(dentry);
1134 return error;
1137 static int
1138 nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1140 struct nfs_sattr sattr;
1141 int error;
1143 dfprintk(VFS, "NFS: symlink(%x/%ld, %s, %s)\n",
1144 dir->i_dev, dir->i_ino, dentry->d_name.name, symname);
1146 error = -ENAMETOOLONG;
1147 if (strlen(symname) > NFS_MAXPATHLEN)
1148 goto out;
1150 #ifdef NFS_PARANOIA
1151 if (dentry->d_inode)
1152 printk("nfs_proc_symlink: %s/%s not negative!\n",
1153 dentry->d_parent->d_name.name, dentry->d_name.name);
1154 #endif
1156 * Fill in the sattr for the call.
1157 * Note: SunOS 4.1.2 crashes if the mode isn't initialized!
1159 sattr.mode = S_IFLNK | S_IRWXUGO;
1160 sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
1161 sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
1164 * Drop the dentry in advance to force a new lookup.
1165 * Since nfs_proc_symlink doesn't return a fattr, we
1166 * can't instantiate the new inode.
1168 d_drop(dentry);
1169 invalidate_inode_pages(dir);
1170 nfs_flush_dircache(dir);
1171 error = nfs_proc_symlink(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
1172 dentry->d_name.name, symname, &sattr);
1173 if (!error) {
1174 nfs_renew_times(dentry->d_parent);
1175 } else if (error == -EEXIST) {
1176 printk("nfs_proc_symlink: %s/%s already exists??\n",
1177 dentry->d_parent->d_name.name, dentry->d_name.name);
1180 out:
1181 return error;
1184 static int
1185 nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
1187 struct inode *inode = old_dentry->d_inode;
1188 int error;
1190 dfprintk(VFS, "NFS: link(%s/%s -> %s/%s)\n",
1191 old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
1192 dentry->d_parent->d_name.name, dentry->d_name.name);
1195 * Drop the dentry in advance to force a new lookup.
1196 * Since nfs_proc_link doesn't return a file handle,
1197 * we can't use the existing dentry.
1199 d_drop(dentry);
1200 invalidate_inode_pages(dir);
1201 nfs_flush_dircache(dir);
1202 error = nfs_proc_link(NFS_DSERVER(old_dentry), NFS_FH(old_dentry),
1203 NFS_FH(dentry->d_parent), dentry->d_name.name);
1204 if (!error) {
1206 * Update the link count immediately, as some apps
1207 * (e.g. pine) test this after making a link.
1209 inode->i_nlink++;
1211 return error;
1215 * RENAME
1216 * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
1217 * different file handle for the same inode after a rename (e.g. when
1218 * moving to a different directory). A fail-safe method to do so would
1219 * be to look up old_dir/old_name, create a link to new_dir/new_name and
1220 * rename the old file using the sillyrename stuff. This way, the original
1221 * file in old_dir will go away when the last process iput()s the inode.
1223 * FIXED.
1225 * It actually works quite well. One needs to have the possibility for
1226 * at least one ".nfs..." file in each directory the file ever gets
1227 * moved or linked to which happens automagically with the new
1228 * implementation that only depends on the dcache stuff instead of
1229 * using the inode layer
1231 * Unfortunately, things are a little more complicated than indicated
1232 * above. For a cross-directory move, we want to make sure we can get
1233 * rid of the old inode after the operation. This means there must be
1234 * no pending writes (if it's a file), and the use count must be 1.
1235 * If these conditions are met, we can drop the dentries before doing
1236 * the rename.
1238 static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1239 struct inode *new_dir, struct dentry *new_dentry)
1241 struct inode *old_inode = old_dentry->d_inode;
1242 struct inode *new_inode = new_dentry->d_inode;
1243 struct dentry *dentry = NULL;
1244 int error, rehash = 0, update = 1;
1246 dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n",
1247 old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
1248 new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
1249 new_dentry->d_count);
1252 * First check whether the target is busy ... we can't
1253 * safely do _any_ rename if the target is in use.
1255 * For files, make a copy of the dentry and then do a
1256 * silly-rename. If the silly-rename succeeds, the
1257 * copied dentry is hashed and becomes the new target.
1259 * With directories check is done in VFS.
1261 error = -EBUSY;
1262 if (new_dentry->d_count > 1 && new_inode) {
1263 int err;
1264 /* copy the target dentry's name */
1265 dentry = d_alloc(new_dentry->d_parent,
1266 &new_dentry->d_name);
1267 if (!dentry)
1268 goto out;
1270 /* silly-rename the existing target ... */
1271 err = nfs_sillyrename(new_dir, new_dentry);
1272 if (!err) {
1273 new_dentry = dentry;
1274 new_inode = NULL;
1275 /* hash the replacement target */
1276 d_add(new_dentry, NULL);
1279 /* dentry still busy? */
1280 if (new_dentry->d_count > 1) {
1281 #ifdef NFS_PARANOIA
1282 printk("nfs_rename: target %s/%s busy, d_count=%d\n",
1283 new_dentry->d_parent->d_name.name,new_dentry->d_name.name,new_dentry->d_count);
1284 #endif
1285 goto out;
1290 * Check for within-directory rename ... no complications.
1292 if (new_dir == old_dir)
1293 goto do_rename;
1295 * Cross-directory move ...
1297 * ... prune child dentries and writebacks if needed.
1299 if (old_dentry->d_count > 1) {
1300 nfs_wb_all(old_inode);
1301 shrink_dcache_parent(old_dentry);
1305 * Now check the use counts ... we can't safely do the
1306 * rename unless we can drop the dentries first.
1308 if (old_dentry->d_count > 1) {
1309 #ifdef NFS_PARANOIA
1310 printk("nfs_rename: old dentry %s/%s busy, d_count=%d\n",
1311 old_dentry->d_parent->d_name.name,old_dentry->d_name.name,old_dentry->d_count);
1312 #endif
1313 goto out;
1315 if (new_dentry->d_count > 1 && new_inode) {
1316 #ifdef NFS_PARANOIA
1317 printk("nfs_rename: new dentry %s/%s busy, d_count=%d\n",
1318 new_dentry->d_parent->d_name.name,new_dentry->d_name.name,new_dentry->d_count);
1319 #endif
1320 goto out;
1323 d_drop(old_dentry);
1324 update = 0;
1326 do_rename:
1328 * To prevent any new references to the target during the rename,
1329 * we unhash the dentry and free the inode in advance.
1331 #ifdef NFS_PARANOIA
1332 if (new_inode &&
1333 new_inode->i_count > (S_ISDIR(new_inode->i_mode) ? 1 : new_inode->i_nlink))
1334 printk("nfs_rename: %s/%s inode busy?? i_count=%d, i_nlink=%d\n",
1335 new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
1336 new_inode->i_count, new_inode->i_nlink);
1337 #endif
1338 if (!list_empty(&new_dentry->d_hash)) {
1339 d_drop(new_dentry);
1340 rehash = update;
1342 if (new_inode) {
1343 d_delete(new_dentry);
1346 invalidate_inode_pages(new_dir);
1347 nfs_flush_dircache(new_dir);
1348 invalidate_inode_pages(old_dir);
1349 nfs_flush_dircache(old_dir);
1350 error = nfs_proc_rename(NFS_DSERVER(old_dentry),
1351 NFS_FH(old_dentry->d_parent), old_dentry->d_name.name,
1352 NFS_FH(new_dentry->d_parent), new_dentry->d_name.name);
1353 if (!error && !S_ISDIR(old_inode->i_mode)) {
1354 /* Update the dcache if needed */
1355 if (rehash)
1356 d_add(new_dentry, NULL);
1357 if (update)
1358 d_move(old_dentry, new_dentry);
1361 out:
1362 /* new dentry created? */
1363 if (dentry)
1364 dput(dentry);
1365 return error;
1368 int nfs_init_fhcache(void)
1370 nfs_fh_cachep = kmem_cache_create("nfs_fh",
1371 sizeof(struct nfs_fh),
1372 0, SLAB_HWCACHE_ALIGN,
1373 NULL, NULL);
1374 if (nfs_fh_cachep == NULL)
1375 return -ENOMEM;
1377 nfs_cookie_cachep = kmem_cache_create("nfs_dcookie",
1378 sizeof(struct nfs_cookie_table),
1379 0, SLAB_HWCACHE_ALIGN,
1380 NULL, NULL);
1381 if (nfs_cookie_cachep == NULL)
1382 return -ENOMEM;
1384 return 0;
1388 * Local variables:
1389 * version-control: t
1390 * kept-new-versions: 5
1391 * End: