Import 2.3.26pre2
[davej-history.git] / fs / dcache.c
blobf96f8cf7f0db90d93d1f9035a24f10f56e58df12
1 /*
2 * fs/dcache.c
4 * Complete reimplementation
5 * (C) 1997 Thomas Schoebel-Theuer,
6 * with heavy changes by Linus Torvalds
7 */
9 /*
10 * Notes on the allocation strategy:
12 * The dcache is a master of the icache - whenever a dcache entry
13 * exists, the inode will always exist. "iput()" is done either when
14 * the dcache entry is deleted or garbage collected.
17 #include <linux/string.h>
18 #include <linux/mm.h>
19 #include <linux/fs.h>
20 #include <linux/malloc.h>
21 #include <linux/slab.h>
22 #include <linux/init.h>
23 #include <linux/smp_lock.h>
25 #include <asm/uaccess.h>
27 #define DCACHE_PARANOIA 1
28 /* #define DCACHE_DEBUG 1 */
30 /* For managing the dcache */
31 extern unsigned long num_physpages, page_cache_size;
32 extern int inodes_stat[];
33 #define nr_inodes (inodes_stat[0])
35 kmem_cache_t *dentry_cache;
38 * This is the single most critical data structure when it comes
39 * to the dcache: the hashtable for lookups. Somebody should try
40 * to make this good - I've just made it work.
42 * This hash-function tries to avoid losing too many bits of hash
43 * information, yet avoid using a prime hash-size or similar.
45 #define D_HASHBITS 14
46 #define D_HASHSIZE (1UL << D_HASHBITS)
47 #define D_HASHMASK (D_HASHSIZE-1)
49 static struct list_head dentry_hashtable[D_HASHSIZE];
50 static LIST_HEAD(dentry_unused);
52 struct {
53 int nr_dentry;
54 int nr_unused;
55 int age_limit; /* age in seconds */
56 int want_pages; /* pages requested by system */
57 int dummy[2];
58 } dentry_stat = {0, 0, 45, 0,};
60 static inline void d_free(struct dentry *dentry)
62 if (dentry->d_op && dentry->d_op->d_release)
63 dentry->d_op->d_release(dentry);
64 if (dname_external(dentry))
65 kfree(dentry->d_name.name);
66 kmem_cache_free(dentry_cache, dentry);
70 * Release the dentry's inode, using the fileystem
71 * d_iput() operation if defined.
73 static inline void dentry_iput(struct dentry * dentry)
75 struct inode *inode = dentry->d_inode;
76 if (inode) {
77 dentry->d_inode = NULL;
78 list_del(&dentry->d_alias);
79 INIT_LIST_HEAD(&dentry->d_alias);
80 if (dentry->d_op && dentry->d_op->d_iput)
81 dentry->d_op->d_iput(dentry, inode);
82 else
83 iput(inode);
88 * dput()
90 * This is complicated by the fact that we do not want to put
91 * dentries that are no longer on any hash chain on the unused
92 * list: we'd much rather just get rid of them immediately.
94 * However, that implies that we have to traverse the dentry
95 * tree upwards to the parents which might _also_ now be
96 * scheduled for deletion (it may have been only waiting for
97 * its last child to go away).
99 * This tail recursion is done by hand as we don't want to depend
100 * on the compiler to always get this right (gcc generally doesn't).
101 * Real recursion would eat up our stack space.
103 void dput(struct dentry *dentry)
105 int count;
107 if (!dentry)
108 return;
110 repeat:
111 count = dentry->d_count - 1;
112 if (count != 0)
113 goto out;
116 * Note that if d_op->d_delete blocks,
117 * the dentry could go back in use.
118 * Each fs will have to watch for this.
120 if (dentry->d_op && dentry->d_op->d_delete) {
121 dentry->d_op->d_delete(dentry);
123 count = dentry->d_count - 1;
124 if (count != 0)
125 goto out;
128 if (!list_empty(&dentry->d_lru)) {
129 dentry_stat.nr_unused--;
130 list_del(&dentry->d_lru);
132 if (list_empty(&dentry->d_hash)) {
133 struct dentry * parent;
135 list_del(&dentry->d_child);
136 dentry_iput(dentry);
137 parent = dentry->d_parent;
138 d_free(dentry);
139 if (dentry == parent)
140 return;
141 dentry = parent;
142 goto repeat;
144 list_add(&dentry->d_lru, &dentry_unused);
145 dentry_stat.nr_unused++;
147 * Update the timestamp
149 dentry->d_reftime = jiffies;
151 out:
152 if (count >= 0) {
153 dentry->d_count = count;
154 return;
157 printk(KERN_CRIT "Negative d_count (%d) for %s/%s\n",
158 count,
159 dentry->d_parent->d_name.name,
160 dentry->d_name.name);
161 *(int *)0 = 0;
165 * Try to invalidate the dentry if it turns out to be
166 * possible. If there are other dentries that can be
167 * reached through this one we can't delete it.
169 int d_invalidate(struct dentry * dentry)
172 * If it's already been dropped, return OK.
174 if (list_empty(&dentry->d_hash))
175 return 0;
177 * Check whether to do a partial shrink_dcache
178 * to get rid of unused child entries.
180 if (!list_empty(&dentry->d_subdirs)) {
181 shrink_dcache_parent(dentry);
185 * Somebody else still using it?
187 * If it's a directory, we can't drop it
188 * for fear of somebody re-populating it
189 * with children (even though dropping it
190 * would make it unreachable from the root,
191 * we might still populate it if it was a
192 * working directory or similar).
194 if (dentry->d_count > 1) {
195 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
196 return -EBUSY;
199 d_drop(dentry);
200 return 0;
204 * Throw away a dentry - free the inode, dput the parent.
205 * This requires that the LRU list has already been
206 * removed.
208 static inline void prune_one_dentry(struct dentry * dentry)
210 struct dentry * parent;
212 list_del(&dentry->d_hash);
213 list_del(&dentry->d_child);
214 dentry_iput(dentry);
215 parent = dentry->d_parent;
216 d_free(dentry);
217 if (parent != dentry)
218 dput(parent);
222 * Shrink the dcache. This is done when we need
223 * more memory, or simply when we need to unmount
224 * something (at which point we need to unuse
225 * all dentries).
227 void prune_dcache(int count)
229 for (;;) {
230 struct dentry *dentry;
231 struct list_head *tmp = dentry_unused.prev;
233 if (tmp == &dentry_unused)
234 break;
235 dentry_stat.nr_unused--;
236 list_del(tmp);
237 INIT_LIST_HEAD(tmp);
238 dentry = list_entry(tmp, struct dentry, d_lru);
239 if (!dentry->d_count) {
240 prune_one_dentry(dentry);
241 if (!--count)
242 break;
248 * Shrink the dcache for the specified super block.
249 * This allows us to unmount a device without disturbing
250 * the dcache for the other devices.
252 * This implementation makes just two traversals of the
253 * unused list. On the first pass we move the selected
254 * dentries to the most recent end, and on the second
255 * pass we free them. The second pass must restart after
256 * each dput(), but since the target dentries are all at
257 * the end, it's really just a single traversal.
259 void shrink_dcache_sb(struct super_block * sb)
261 struct list_head *tmp, *next;
262 struct dentry *dentry;
265 * Pass one ... move the dentries for the specified
266 * superblock to the most recent end of the unused list.
268 next = dentry_unused.next;
269 while (next != &dentry_unused) {
270 tmp = next;
271 next = tmp->next;
272 dentry = list_entry(tmp, struct dentry, d_lru);
273 if (dentry->d_sb != sb)
274 continue;
275 list_del(tmp);
276 list_add(tmp, &dentry_unused);
280 * Pass two ... free the dentries for this superblock.
282 repeat:
283 next = dentry_unused.next;
284 while (next != &dentry_unused) {
285 tmp = next;
286 next = tmp->next;
287 dentry = list_entry(tmp, struct dentry, d_lru);
288 if (dentry->d_sb != sb)
289 continue;
290 if (dentry->d_count)
291 continue;
292 dentry_stat.nr_unused--;
293 list_del(tmp);
294 INIT_LIST_HEAD(tmp);
295 prune_one_dentry(dentry);
296 goto repeat;
301 * Check whether a root dentry would be in use if all of its
302 * child dentries were freed. This allows a non-destructive
303 * test for unmounting a device.
305 int is_root_busy(struct dentry *root)
307 struct dentry *this_parent = root;
308 struct list_head *next;
309 int count = root->d_count;
311 repeat:
312 next = this_parent->d_subdirs.next;
313 resume:
314 while (next != &this_parent->d_subdirs) {
315 struct list_head *tmp = next;
316 struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
317 next = tmp->next;
318 /* Decrement count for unused children */
319 count += (dentry->d_count - 1);
320 if (!list_empty(&dentry->d_subdirs)) {
321 this_parent = dentry;
322 goto repeat;
324 /* root is busy if any leaf is busy */
325 if (dentry->d_count)
326 return 1;
329 * All done at this level ... ascend and resume the search.
331 if (this_parent != root) {
332 next = this_parent->d_child.next;
333 this_parent = this_parent->d_parent;
334 goto resume;
336 return (count > 1); /* remaining users? */
340 * Search the dentry child list for the specified parent,
341 * and move any unused dentries to the end of the unused
342 * list for prune_dcache(). We descend to the next level
343 * whenever the d_subdirs list is non-empty and continue
344 * searching.
346 static int select_parent(struct dentry * parent)
348 struct dentry *this_parent = parent;
349 struct list_head *next;
350 int found = 0;
352 repeat:
353 next = this_parent->d_subdirs.next;
354 resume:
355 while (next != &this_parent->d_subdirs) {
356 struct list_head *tmp = next;
357 struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
358 next = tmp->next;
359 if (!dentry->d_count) {
360 list_del(&dentry->d_lru);
361 list_add(&dentry->d_lru, dentry_unused.prev);
362 found++;
365 * Descend a level if the d_subdirs list is non-empty.
367 if (!list_empty(&dentry->d_subdirs)) {
368 this_parent = dentry;
369 #ifdef DCACHE_DEBUG
370 printk(KERN_DEBUG "select_parent: descending to %s/%s, found=%d\n",
371 dentry->d_parent->d_name.name, dentry->d_name.name, found);
372 #endif
373 goto repeat;
377 * All done at this level ... ascend and resume the search.
379 if (this_parent != parent) {
380 next = this_parent->d_child.next;
381 this_parent = this_parent->d_parent;
382 #ifdef DCACHE_DEBUG
383 printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n",
384 this_parent->d_parent->d_name.name, this_parent->d_name.name, found);
385 #endif
386 goto resume;
388 return found;
392 * Prune the dcache to remove unused children of the parent dentry.
394 void shrink_dcache_parent(struct dentry * parent)
396 int found;
398 while ((found = select_parent(parent)) != 0)
399 prune_dcache(found);
403 * This is called from kswapd when we think we need some
404 * more memory, but aren't really sure how much. So we
405 * carefully try to free a _bit_ of our dcache, but not
406 * too much.
408 * Priority:
409 * 0 - very urgent: shrink everything
410 * ...
411 * 6 - base-level: try to shrink a bit.
413 int shrink_dcache_memory(int priority, unsigned int gfp_mask)
415 if (gfp_mask & __GFP_IO) {
416 int count = 0;
417 lock_kernel();
418 if (priority)
419 count = dentry_stat.nr_unused / priority;
420 prune_dcache(count);
421 unlock_kernel();
422 /* FIXME: kmem_cache_shrink here should tell us
423 the number of pages freed, and it should
424 work in a __GFP_DMA/__GFP_HIGHMEM behaviour
425 to free only the interesting pages in
426 function of the needs of the current allocation. */
427 kmem_cache_shrink(dentry_cache);
430 return 0;
433 #define NAME_ALLOC_LEN(len) ((len+16) & ~15)
435 struct dentry * d_alloc(struct dentry * parent, const struct qstr *name)
437 char * str;
438 struct dentry *dentry;
440 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
441 if (!dentry)
442 return NULL;
444 if (name->len > DNAME_INLINE_LEN-1) {
445 str = kmalloc(NAME_ALLOC_LEN(name->len), GFP_KERNEL);
446 if (!str) {
447 kmem_cache_free(dentry_cache, dentry);
448 return NULL;
450 } else
451 str = dentry->d_iname;
453 memcpy(str, name->name, name->len);
454 str[name->len] = 0;
456 dentry->d_count = 1;
457 dentry->d_flags = 0;
458 dentry->d_inode = NULL;
459 dentry->d_parent = NULL;
460 dentry->d_sb = NULL;
461 if (parent) {
462 dentry->d_parent = dget(parent);
463 dentry->d_sb = parent->d_sb;
464 list_add(&dentry->d_child, &parent->d_subdirs);
465 } else
466 INIT_LIST_HEAD(&dentry->d_child);
468 dentry->d_mounts = dentry;
469 dentry->d_covers = dentry;
470 INIT_LIST_HEAD(&dentry->d_hash);
471 INIT_LIST_HEAD(&dentry->d_lru);
472 INIT_LIST_HEAD(&dentry->d_subdirs);
473 INIT_LIST_HEAD(&dentry->d_alias);
475 dentry->d_name.name = str;
476 dentry->d_name.len = name->len;
477 dentry->d_name.hash = name->hash;
478 dentry->d_op = NULL;
479 dentry->d_fsdata = NULL;
480 return dentry;
484 * Fill in inode information in the entry.
486 * This turns negative dentries into productive full members
487 * of society.
489 * NOTE! This assumes that the inode count has been incremented
490 * (or otherwise set) by the caller to indicate that it is now
491 * in use by the dcache..
493 void d_instantiate(struct dentry *entry, struct inode * inode)
495 if (inode)
496 list_add(&entry->d_alias, &inode->i_dentry);
497 entry->d_inode = inode;
500 struct dentry * d_alloc_root(struct inode * root_inode)
502 struct dentry *res = NULL;
504 if (root_inode) {
505 res = d_alloc(NULL, &(const struct qstr) { "/", 1, 0 });
506 if (res) {
507 res->d_sb = root_inode->i_sb;
508 res->d_parent = res;
509 d_instantiate(res, root_inode);
512 return res;
515 static inline struct list_head * d_hash(struct dentry * parent, unsigned long hash)
517 hash += (unsigned long) parent;
518 hash = hash ^ (hash >> D_HASHBITS) ^ (hash >> D_HASHBITS*2);
519 return dentry_hashtable + (hash & D_HASHMASK);
522 struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
524 unsigned int len = name->len;
525 unsigned int hash = name->hash;
526 const unsigned char *str = name->name;
527 struct list_head *head = d_hash(parent,hash);
528 struct list_head *tmp = head->next;
530 for (;;) {
531 struct dentry * dentry = list_entry(tmp, struct dentry, d_hash);
532 if (tmp == head)
533 break;
534 tmp = tmp->next;
535 if (dentry->d_name.hash != hash)
536 continue;
537 if (dentry->d_parent != parent)
538 continue;
539 if (parent->d_op && parent->d_op->d_compare) {
540 if (parent->d_op->d_compare(parent, &dentry->d_name, name))
541 continue;
542 } else {
543 if (dentry->d_name.len != len)
544 continue;
545 if (memcmp(dentry->d_name.name, str, len))
546 continue;
548 return dget(dentry);
550 return NULL;
554 * An insecure source has sent us a dentry, here we verify it.
556 * This is just to make knfsd able to have the dentry pointer
557 * in the NFS file handle.
559 * NOTE! Do _not_ dereference the pointers before we have
560 * validated them. We can test the pointer values, but we
561 * must not actually use them until we have found a valid
562 * copy of the pointer in kernel space..
564 int d_validate(struct dentry *dentry, struct dentry *dparent,
565 unsigned int hash, unsigned int len)
567 struct list_head *base, *lhp;
568 int valid = 1;
570 if (dentry != dparent) {
571 base = d_hash(dparent, hash);
572 lhp = base;
573 while ((lhp = lhp->next) != base) {
574 if (dentry == list_entry(lhp, struct dentry, d_hash))
575 goto out;
577 } else {
579 * Special case: local mount points don't live in
580 * the hashes, so we search the super blocks.
582 struct super_block *sb = sb_entry(super_blocks.next);
584 for (; sb != sb_entry(&super_blocks);
585 sb = sb_entry(sb->s_list.next)) {
586 if (!sb->s_dev)
587 continue;
588 if (sb->s_root == dentry)
589 goto out;
592 valid = 0;
593 out:
594 return valid;
598 * When a file is deleted, we have two options:
599 * - turn this dentry into a negative dentry
600 * - unhash this dentry and free it.
602 * Usually, we want to just turn this into
603 * a negative dentry, but if anybody else is
604 * currently using the dentry or the inode
605 * we can't do that and we fall back on removing
606 * it from the hash queues and waiting for
607 * it to be deleted later when it has no users
609 void d_delete(struct dentry * dentry)
612 * Are we the only user?
614 if (dentry->d_count == 1) {
615 dentry_iput(dentry);
616 return;
620 * If not, just drop the dentry and let dput
621 * pick up the tab..
623 d_drop(dentry);
626 void d_rehash(struct dentry * entry)
628 struct dentry * parent = entry->d_parent;
630 list_add(&entry->d_hash, d_hash(parent, entry->d_name.hash));
633 #define do_switch(x,y) do { \
634 __typeof__ (x) __tmp = x; \
635 x = y; y = __tmp; } while (0)
638 * When switching names, the actual string doesn't strictly have to
639 * be preserved in the target - because we're dropping the target
640 * anyway. As such, we can just do a simple memcpy() to copy over
641 * the new name before we switch.
643 * Note that we have to be a lot more careful about getting the hash
644 * switched - we have to switch the hash value properly even if it
645 * then no longer matches the actual (corrupted) string of the target.
646 * The has value has to match the hash queue that the dentry is on..
648 static inline void switch_names(struct dentry * dentry, struct dentry * target)
650 const unsigned char *old_name, *new_name;
652 memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN);
653 old_name = target->d_name.name;
654 new_name = dentry->d_name.name;
655 if (old_name == target->d_iname)
656 old_name = dentry->d_iname;
657 if (new_name == dentry->d_iname)
658 new_name = target->d_iname;
659 target->d_name.name = new_name;
660 dentry->d_name.name = old_name;
664 * We cannibalize "target" when moving dentry on top of it,
665 * because it's going to be thrown away anyway. We could be more
666 * polite about it, though.
668 * This forceful removal will result in ugly /proc output if
669 * somebody holds a file open that got deleted due to a rename.
670 * We could be nicer about the deleted file, and let it show
671 * up under the name it got deleted rather than the name that
672 * deleted it.
674 * Careful with the hash switch. The hash switch depends on
675 * the fact that any list-entry can be a head of the list.
676 * Think about it.
678 void d_move(struct dentry * dentry, struct dentry * target)
680 if (!dentry->d_inode)
681 printk(KERN_WARNING "VFS: moving negative dcache entry\n");
683 /* Move the dentry to the target hash queue */
684 list_del(&dentry->d_hash);
685 list_add(&dentry->d_hash, &target->d_hash);
687 /* Unhash the target: dput() will then get rid of it */
688 list_del(&target->d_hash);
689 INIT_LIST_HEAD(&target->d_hash);
691 list_del(&dentry->d_child);
692 list_del(&target->d_child);
694 /* Switch the parents and the names.. */
695 switch_names(dentry, target);
696 do_switch(dentry->d_parent, target->d_parent);
697 do_switch(dentry->d_name.len, target->d_name.len);
698 do_switch(dentry->d_name.hash, target->d_name.hash);
700 /* And add them back to the (new) parent lists */
701 list_add(&target->d_child, &target->d_parent->d_subdirs);
702 list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
706 * "buflen" should be PAGE_SIZE or more.
708 char * d_path(struct dentry *dentry, char *buffer, int buflen)
710 char * end = buffer+buflen;
711 char * retval;
712 struct dentry * root = current->fs->root;
714 *--end = '\0';
715 buflen--;
716 if (!IS_ROOT(dentry) && list_empty(&dentry->d_hash)) {
717 buflen -= 10;
718 end -= 10;
719 memcpy(end, " (deleted)", 10);
722 /* Get '/' right */
723 retval = end-1;
724 *retval = '/';
726 for (;;) {
727 struct dentry * parent;
728 int namelen;
730 if (dentry == root)
731 break;
732 dentry = dentry->d_covers;
733 parent = dentry->d_parent;
734 if (dentry == parent)
735 break;
736 namelen = dentry->d_name.len;
737 buflen -= namelen + 1;
738 if (buflen < 0)
739 break;
740 end -= namelen;
741 memcpy(end, dentry->d_name.name, namelen);
742 *--end = '/';
743 retval = end;
744 dentry = parent;
746 return retval;
750 * NOTE! The user-level library version returns a
751 * character pointer. The kernel system call just
752 * returns the length of the buffer filled (which
753 * includes the ending '\0' character), or a negative
754 * error value. So libc would do something like
756 * char *getcwd(char * buf, size_t size)
758 * int retval;
760 * retval = sys_getcwd(buf, size);
761 * if (retval >= 0)
762 * return buf;
763 * errno = -retval;
764 * return NULL;
767 asmlinkage long sys_getcwd(char *buf, unsigned long size)
769 int error;
770 struct dentry *pwd = current->fs->pwd;
772 error = -ENOENT;
773 /* Has the current directory has been unlinked? */
774 if (pwd->d_parent == pwd || !list_empty(&pwd->d_hash)) {
775 char *page = (char *) __get_free_page(GFP_USER);
776 error = -ENOMEM;
777 if (page) {
778 unsigned long len;
779 char * cwd = d_path(pwd, page, PAGE_SIZE);
781 error = -ERANGE;
782 len = PAGE_SIZE + page - cwd;
783 if (len <= size) {
784 error = len;
785 if (copy_to_user(buf, cwd, len))
786 error = -EFAULT;
788 free_page((unsigned long) page);
791 return error;
795 * Test whether new_dentry is a subdirectory of old_dentry.
797 * Trivially implemented using the dcache structure
799 int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry)
801 int result;
803 result = 0;
804 for (;;) {
805 if (new_dentry != old_dentry) {
806 struct dentry * parent = new_dentry->d_parent;
807 if (parent == new_dentry)
808 break;
809 new_dentry = parent;
810 continue;
812 result = 1;
813 break;
815 return result;
819 * Check whether a dentry already exists for the given name,
820 * and return the inode number if it has an inode.
822 * This routine is used to post-process directory listings for
823 * filesystems using synthetic inode numbers, and is necessary
824 * to keep getcwd() working.
826 ino_t find_inode_number(struct dentry *dir, struct qstr *name)
828 struct dentry * dentry;
829 ino_t ino = 0;
832 * Check for a fs-specific hash function. Note that we must
833 * calculate the standard hash first, as the d_op->d_hash()
834 * routine may choose to leave the hash value unchanged.
836 name->hash = full_name_hash(name->name, name->len);
837 if (dir->d_op && dir->d_op->d_hash)
839 if (dir->d_op->d_hash(dir, name) != 0)
840 goto out;
843 dentry = d_lookup(dir, name);
844 if (dentry)
846 if (dentry->d_inode)
847 ino = dentry->d_inode->i_ino;
848 dput(dentry);
850 out:
851 return ino;
854 void __init dcache_init(void)
856 int i;
857 struct list_head *d = dentry_hashtable;
860 * A constructor could be added for stable state like the lists,
861 * but it is probably not worth it because of the cache nature
862 * of the dcache.
863 * If fragmentation is too bad then the SLAB_HWCACHE_ALIGN
864 * flag could be removed here, to hint to the allocator that
865 * it should not try to get multiple page regions.
867 dentry_cache = kmem_cache_create("dentry_cache",
868 sizeof(struct dentry),
870 SLAB_HWCACHE_ALIGN,
871 NULL, NULL);
872 if (!dentry_cache)
873 panic("Cannot create dentry cache");
875 i = D_HASHSIZE;
876 do {
877 INIT_LIST_HEAD(d);
878 d++;
879 i--;
880 } while (i);