4 * Complete reimplementation
5 * (C) 1997 Thomas Schoebel-Theuer
9 * Notes on the allocation strategy:
11 * The dcache is a master of the icache - whenever a dcache entry
12 * exists, the inode will always exist. "iput()" is done either when
13 * the dcache entry is deleted or garbage collected.
16 #include <linux/string.h>
19 #include <linux/malloc.h>
20 #include <linux/slab.h>
21 #include <linux/init.h>
23 #include <asm/uaccess.h>
25 #define DCACHE_PARANOIA 1
26 /* #define DCACHE_DEBUG 1 */
28 /* For managing the dcache */
29 extern unsigned long num_physpages
, page_cache_size
;
30 extern int inodes_stat
[];
31 #define nr_inodes (inodes_stat[0])
33 kmem_cache_t
*dentry_cache
;
36 * This is the single most critical data structure when it comes
37 * to the dcache: the hashtable for lookups. Somebody should try
38 * to make this good - I've just made it work.
40 * This hash-function tries to avoid losing too many bits of hash
41 * information, yet avoid using a prime hash-size or similar.
44 #define D_HASHSIZE (1UL << D_HASHBITS)
45 #define D_HASHMASK (D_HASHSIZE-1)
47 static struct list_head dentry_hashtable
[D_HASHSIZE
];
48 static LIST_HEAD(dentry_unused
);
53 int age_limit
; /* age in seconds */
54 int want_pages
; /* pages requested by system */
56 } dentry_stat
= {0, 0, 45, 0,};
58 static inline void d_free(struct dentry
*dentry
)
60 if (dentry
->d_op
&& dentry
->d_op
->d_release
)
61 dentry
->d_op
->d_release(dentry
);
62 if (dname_external(dentry
))
63 kfree(dentry
->d_name
.name
);
64 kmem_cache_free(dentry_cache
, dentry
);
68 * Release the dentry's inode, using the fileystem
69 * d_iput() operation if defined.
71 static inline void dentry_iput(struct dentry
* dentry
)
73 struct inode
*inode
= dentry
->d_inode
;
75 dentry
->d_inode
= NULL
;
76 list_del(&dentry
->d_alias
);
77 INIT_LIST_HEAD(&dentry
->d_alias
);
78 if (dentry
->d_op
&& dentry
->d_op
->d_iput
)
79 dentry
->d_op
->d_iput(dentry
, inode
);
88 * This is complicated by the fact that we do not want to put
89 * dentries that are no longer on any hash chain on the unused
90 * list: we'd much rather just get rid of them immediately.
92 * However, that implies that we have to traverse the dentry
93 * tree upwards to the parents which might _also_ now be
94 * scheduled for deletion (it may have been only waiting for
95 * its last child to go away).
97 * This tail recursion is done by hand as we don't want to depend
98 * on the compiler to always get this right (gcc generally doesn't).
99 * Real recursion would eat up our stack space.
101 void dput(struct dentry
*dentry
)
109 count
= dentry
->d_count
- 1;
114 * Note that if d_op->d_delete blocks,
115 * the dentry could go back in use.
116 * Each fs will have to watch for this.
118 if (dentry
->d_op
&& dentry
->d_op
->d_delete
) {
119 dentry
->d_op
->d_delete(dentry
);
121 count
= dentry
->d_count
- 1;
126 if (!list_empty(&dentry
->d_lru
)) {
127 dentry_stat
.nr_unused
--;
128 list_del(&dentry
->d_lru
);
130 if (list_empty(&dentry
->d_hash
)) {
131 struct dentry
* parent
;
133 list_del(&dentry
->d_child
);
135 parent
= dentry
->d_parent
;
137 if (dentry
== parent
)
142 list_add(&dentry
->d_lru
, &dentry_unused
);
143 dentry_stat
.nr_unused
++;
145 * Update the timestamp
147 dentry
->d_reftime
= jiffies
;
151 dentry
->d_count
= count
;
155 printk(KERN_CRIT
"Negative d_count (%d) for %s/%s\n",
157 dentry
->d_parent
->d_name
.name
,
158 dentry
->d_name
.name
);
163 * Try to invalidate the dentry if it turns out to be
164 * possible. If there are other users of the dentry we
165 * can't invalidate it.
167 int d_invalidate(struct dentry
* dentry
)
169 /* Check whether to do a partial shrink_dcache */
170 if (!list_empty(&dentry
->d_subdirs
))
171 shrink_dcache_parent(dentry
);
173 if (dentry
->d_count
!= 1)
181 * Select less valuable dentries to be pruned when we need
182 * inodes or memory. The selected dentries are moved to the
183 * old end of the list where prune_dcache() can find them.
185 * Negative dentries are included in the selection so that
186 * they don't accumulate at the end of the list. The count
187 * returned is the total number of dentries selected, which
188 * may be much larger than the requested number of inodes.
190 int select_dcache(int inode_count
, int page_count
)
192 struct list_head
*next
, *tail
= &dentry_unused
;
193 int found
= 0, forward
= 0, young
= 8;
194 int depth
= dentry_stat
.nr_unused
>> 1;
195 unsigned long min_value
= 0, max_value
= 4;
201 while (next
!= &dentry_unused
&& depth
--) {
202 struct list_head
*tmp
= next
;
203 struct dentry
*dentry
= list_entry(tmp
, struct dentry
, d_lru
);
204 struct inode
*inode
= dentry
->d_inode
;
205 unsigned long value
= 0;
210 if (dentry
->d_count
) {
211 dentry_stat
.nr_unused
--;
217 * Check the dentry's age to see whether to change direction.
220 int age
= (jiffies
- dentry
->d_reftime
) / HZ
;
221 if (age
< dentry_stat
.age_limit
) {
224 next
= dentry_unused
.next
;
226 * Update the limits -- we don't want
227 * files with too few or too many pages.
234 printk("select_dcache: %s/%s age=%d, scanning forward\n",
235 dentry
->d_parent
->d_name
.name
, dentry
->d_name
.name
, age
);
243 * Select dentries based on the page cache count ...
244 * should factor in number of uses as well. We take
245 * all negative dentries so that they don't accumulate.
246 * (We skip inodes that aren't immediately available.)
249 value
= inode
->i_nrpages
;
250 if (value
>= max_value
|| value
< min_value
)
252 if (inode
->i_state
|| inode
->i_count
> 1)
257 * Move the selected dentries behind the tail.
259 if (tmp
!= tail
->prev
) {
261 list_add(tmp
, tail
->prev
);
265 if (inode
&& --inode_count
<= 0)
267 if (page_count
&& (page_count
-= value
) <= 0)
274 * Throw away a dentry - free the inode, dput the parent.
275 * This requires that the LRU list has already been
278 static inline void prune_one_dentry(struct dentry
* dentry
)
280 struct dentry
* parent
;
282 list_del(&dentry
->d_hash
);
283 list_del(&dentry
->d_child
);
285 parent
= dentry
->d_parent
;
291 * Shrink the dcache. This is done when we need
292 * more memory, or simply when we need to unmount
293 * something (at which point we need to unuse
296 void prune_dcache(int count
)
299 struct dentry
*dentry
;
300 struct list_head
*tmp
= dentry_unused
.prev
;
302 if (tmp
== &dentry_unused
)
304 dentry_stat
.nr_unused
--;
307 dentry
= list_entry(tmp
, struct dentry
, d_lru
);
308 if (!dentry
->d_count
) {
309 prune_one_dentry(dentry
);
317 * Shrink the dcache for the specified super block.
318 * This allows us to unmount a device without disturbing
319 * the dcache for the other devices.
321 * This implementation makes just two traversals of the
322 * unused list. On the first pass we move the selected
323 * dentries to the most recent end, and on the second
324 * pass we free them. The second pass must restart after
325 * each dput(), but since the target dentries are all at
326 * the end, it's really just a single traversal.
328 void shrink_dcache_sb(struct super_block
* sb
)
330 struct list_head
*tmp
, *next
;
331 struct dentry
*dentry
;
334 * Pass one ... move the dentries for the specified
335 * superblock to the most recent end of the unused list.
337 next
= dentry_unused
.next
;
338 while (next
!= &dentry_unused
) {
341 dentry
= list_entry(tmp
, struct dentry
, d_lru
);
342 if (dentry
->d_sb
!= sb
)
345 list_add(tmp
, &dentry_unused
);
349 * Pass two ... free the dentries for this superblock.
352 next
= dentry_unused
.next
;
353 while (next
!= &dentry_unused
) {
356 dentry
= list_entry(tmp
, struct dentry
, d_lru
);
357 if (dentry
->d_sb
!= sb
)
361 dentry_stat
.nr_unused
--;
364 prune_one_dentry(dentry
);
370 * Check whether a root dentry would be in use if all of its
371 * child dentries were freed. This allows a non-destructive
372 * test for unmounting a device.
374 int is_root_busy(struct dentry
*root
)
376 struct dentry
*this_parent
= root
;
377 struct list_head
*next
;
378 int count
= root
->d_count
;
381 next
= this_parent
->d_subdirs
.next
;
383 while (next
!= &this_parent
->d_subdirs
) {
384 struct list_head
*tmp
= next
;
385 struct dentry
*dentry
= list_entry(tmp
, struct dentry
, d_child
);
387 /* Decrement count for unused children */
388 count
+= (dentry
->d_count
- 1);
389 if (!list_empty(&dentry
->d_subdirs
)) {
390 this_parent
= dentry
;
393 /* root is busy if any leaf is busy */
398 * All done at this level ... ascend and resume the search.
400 if (this_parent
!= root
) {
401 next
= this_parent
->d_child
.next
;
402 this_parent
= this_parent
->d_parent
;
405 return (count
> 1); /* remaining users? */
409 * Search the dentry child list for the specified parent,
410 * and move any unused dentries to the end of the unused
411 * list for prune_dcache(). We descend to the next level
412 * whenever the d_subdirs list is non-empty and continue
415 static int select_parent(struct dentry
* parent
)
417 struct dentry
*this_parent
= parent
;
418 struct list_head
*next
;
422 next
= this_parent
->d_subdirs
.next
;
424 while (next
!= &this_parent
->d_subdirs
) {
425 struct list_head
*tmp
= next
;
426 struct dentry
*dentry
= list_entry(tmp
, struct dentry
, d_child
);
428 if (!dentry
->d_count
) {
429 list_del(&dentry
->d_lru
);
430 list_add(&dentry
->d_lru
, dentry_unused
.prev
);
434 * Descend a level if the d_subdirs list is non-empty.
436 if (!list_empty(&dentry
->d_subdirs
)) {
437 this_parent
= dentry
;
439 printk(KERN_DEBUG
"select_parent: descending to %s/%s, found=%d\n",
440 dentry
->d_parent
->d_name
.name
, dentry
->d_name
.name
, found
);
446 * All done at this level ... ascend and resume the search.
448 if (this_parent
!= parent
) {
449 next
= this_parent
->d_child
.next
;
450 this_parent
= this_parent
->d_parent
;
452 printk(KERN_DEBUG
"select_parent: ascending to %s/%s, found=%d\n",
453 this_parent
->d_parent
->d_name
.name
, this_parent
->d_name
.name
, found
);
461 * Prune the dcache to remove unused children of the parent dentry.
463 void shrink_dcache_parent(struct dentry
* parent
)
467 while ((found
= select_parent(parent
)) != 0)
472 * This is called from kswapd when we think we need some
473 * more memory, but aren't really sure how much. So we
474 * carefully try to free a _bit_ of our dcache, but not
478 * 0 - very urgent: schrink everything
480 * 6 - base-level: try to shrink a bit.
482 void shrink_dcache_memory(int priority
, unsigned int gfp_mask
)
487 #define NAME_ALLOC_LEN(len) ((len+16) & ~15)
489 struct dentry
* d_alloc(struct dentry
* parent
, const struct qstr
*name
)
492 struct dentry
*dentry
;
495 * Prune the dcache if there are too many unused dentries.
497 if (dentry_stat
.nr_unused
> 3*(nr_inodes
>> 1)) {
499 printk("d_alloc: %d unused, pruning dcache\n", dentry_stat
.nr_unused
);
502 free_inode_memory(8);
505 dentry
= kmem_cache_alloc(dentry_cache
, GFP_KERNEL
);
509 if (name
->len
> DNAME_INLINE_LEN
-1) {
510 str
= kmalloc(NAME_ALLOC_LEN(name
->len
), GFP_KERNEL
);
512 kmem_cache_free(dentry_cache
, dentry
);
516 str
= dentry
->d_iname
;
518 memcpy(str
, name
->name
, name
->len
);
523 dentry
->d_inode
= NULL
;
524 dentry
->d_parent
= NULL
;
527 dentry
->d_parent
= dget(parent
);
528 dentry
->d_sb
= parent
->d_sb
;
529 list_add(&dentry
->d_child
, &parent
->d_subdirs
);
531 INIT_LIST_HEAD(&dentry
->d_child
);
533 dentry
->d_mounts
= dentry
;
534 dentry
->d_covers
= dentry
;
535 INIT_LIST_HEAD(&dentry
->d_hash
);
536 INIT_LIST_HEAD(&dentry
->d_lru
);
537 INIT_LIST_HEAD(&dentry
->d_subdirs
);
538 INIT_LIST_HEAD(&dentry
->d_alias
);
540 dentry
->d_name
.name
= str
;
541 dentry
->d_name
.len
= name
->len
;
542 dentry
->d_name
.hash
= name
->hash
;
544 dentry
->d_fsdata
= NULL
;
549 * Fill in inode information in the entry.
551 * This turns negative dentries into productive full members
554 * NOTE! This assumes that the inode count has been incremented
555 * (or otherwise set) by the caller to indicate that it is now
556 * in use by the dcache..
558 void d_instantiate(struct dentry
*entry
, struct inode
* inode
)
561 list_add(&entry
->d_alias
, &inode
->i_dentry
);
562 entry
->d_inode
= inode
;
565 struct dentry
* d_alloc_root(struct inode
* root_inode
, struct dentry
*old_root
)
567 struct dentry
*res
= NULL
;
570 res
= d_alloc(NULL
, &(const struct qstr
) { "/", 1, 0 });
572 res
->d_sb
= root_inode
->i_sb
;
574 d_instantiate(res
, root_inode
);
580 static inline struct list_head
* d_hash(struct dentry
* parent
, unsigned long hash
)
582 hash
+= (unsigned long) parent
;
583 hash
= hash
^ (hash
>> D_HASHBITS
) ^ (hash
>> D_HASHBITS
*2);
584 return dentry_hashtable
+ (hash
& D_HASHMASK
);
587 struct dentry
* d_lookup(struct dentry
* parent
, struct qstr
* name
)
589 unsigned int len
= name
->len
;
590 unsigned int hash
= name
->hash
;
591 const unsigned char *str
= name
->name
;
592 struct list_head
*head
= d_hash(parent
,hash
);
593 struct list_head
*tmp
= head
->next
;
595 while (tmp
!= head
) {
596 struct dentry
* dentry
= list_entry(tmp
, struct dentry
, d_hash
);
599 if (dentry
->d_name
.hash
!= hash
)
601 if (dentry
->d_parent
!= parent
)
603 if (parent
->d_op
&& parent
->d_op
->d_compare
) {
604 if (parent
->d_op
->d_compare(parent
, &dentry
->d_name
, name
))
607 if (dentry
->d_name
.len
!= len
)
609 if (memcmp(dentry
->d_name
.name
, str
, len
))
618 * An insecure source has sent us a dentry, here we verify it.
620 * This is just to make knfsd able to have the dentry pointer
621 * in the NFS file handle.
623 * NOTE! Do _not_ dereference the pointers before we have
624 * validated them. We can test the pointer values, but we
625 * must not actually use them until we have found a valid
626 * copy of the pointer in kernel space..
628 int d_validate(struct dentry
*dentry
, struct dentry
*dparent
,
629 unsigned int hash
, unsigned int len
)
631 struct list_head
*base
, *lhp
;
634 if (dentry
!= dparent
) {
635 base
= d_hash(dparent
, hash
);
637 while ((lhp
= lhp
->next
) != base
) {
638 if (dentry
== list_entry(lhp
, struct dentry
, d_hash
))
643 * Special case: local mount points don't live in
644 * the hashes, so we search the super blocks.
646 struct super_block
*sb
= sb_entry(super_blocks
.next
);
648 for (; sb
!= sb_entry(&super_blocks
);
649 sb
= sb_entry(sb
->s_list
.next
)) {
652 if (sb
->s_root
== dentry
)
662 * When a file is deleted, we have two options:
663 * - turn this dentry into a negative dentry
664 * - unhash this dentry and free it.
666 * Usually, we want to just turn this into
667 * a negative dentry, but if anybody else is
668 * currently using the dentry or the inode
669 * we can't do that and we fall back on removing
670 * it from the hash queues and waiting for
671 * it to be deleted later when it has no users
673 void d_delete(struct dentry
* dentry
)
676 * Are we the only user?
678 if (dentry
->d_count
== 1) {
684 * If not, just drop the dentry and let dput
690 void d_add(struct dentry
* entry
, struct inode
* inode
)
692 struct dentry
* parent
= entry
->d_parent
;
694 list_add(&entry
->d_hash
, d_hash(parent
, entry
->d_name
.hash
));
695 d_instantiate(entry
, inode
);
698 #define do_switch(x,y) do { \
699 __typeof__ (x) __tmp = x; \
700 x = y; y = __tmp; } while (0)
703 * When switching names, the actual string doesn't strictly have to
704 * be preserved in the target - because we're dropping the target
705 * anyway. As such, we can just do a simple memcpy() to copy over
706 * the new name before we switch.
708 * Note that we have to be a lot more careful about getting the hash
709 * switched - we have to switch the hash value properly even if it
710 * then no longer matches the actual (corrupted) string of the target.
711 * The has value has to match the hash queue that the dentry is on..
713 static inline void switch_names(struct dentry
* dentry
, struct dentry
* target
)
715 const unsigned char *old_name
, *new_name
;
717 memcpy(dentry
->d_iname
, target
->d_iname
, DNAME_INLINE_LEN
);
718 old_name
= target
->d_name
.name
;
719 new_name
= dentry
->d_name
.name
;
720 if (old_name
== target
->d_iname
)
721 old_name
= dentry
->d_iname
;
722 if (new_name
== dentry
->d_iname
)
723 new_name
= target
->d_iname
;
724 target
->d_name
.name
= new_name
;
725 dentry
->d_name
.name
= old_name
;
729 * We cannibalize "target" when moving dentry on top of it,
730 * because it's going to be thrown away anyway. We could be more
731 * polite about it, though.
733 * This forceful removal will result in ugly /proc output if
734 * somebody holds a file open that got deleted due to a rename.
735 * We could be nicer about the deleted file, and let it show
736 * up under the name it got deleted rather than the name that
739 * Careful with the hash switch. The hash switch depends on
740 * the fact that any list-entry can be a head of the list.
743 void d_move(struct dentry
* dentry
, struct dentry
* target
)
745 if (!dentry
->d_inode
)
746 printk(KERN_WARNING
"VFS: moving negative dcache entry\n");
748 /* Move the dentry to the target hash queue */
749 list_del(&dentry
->d_hash
);
750 list_add(&dentry
->d_hash
, &target
->d_hash
);
752 /* Unhash the target: dput() will then get rid of it */
753 list_del(&target
->d_hash
);
754 INIT_LIST_HEAD(&target
->d_hash
);
756 list_del(&dentry
->d_child
);
757 list_del(&target
->d_child
);
759 /* Switch the parents and the names.. */
760 switch_names(dentry
, target
);
761 do_switch(dentry
->d_parent
, target
->d_parent
);
762 do_switch(dentry
->d_name
.len
, target
->d_name
.len
);
763 do_switch(dentry
->d_name
.hash
, target
->d_name
.hash
);
765 /* And add them back to the (new) parent lists */
766 list_add(&target
->d_child
, &target
->d_parent
->d_subdirs
);
767 list_add(&dentry
->d_child
, &dentry
->d_parent
->d_subdirs
);
771 * "buflen" should be PAGE_SIZE or more.
773 char * d_path(struct dentry
*dentry
, char *buffer
, int buflen
)
775 char * end
= buffer
+buflen
;
777 struct dentry
* root
= current
->fs
->root
;
781 if (dentry
->d_parent
!= dentry
&& list_empty(&dentry
->d_hash
)) {
784 memcpy(end
, " (deleted)", 10);
792 struct dentry
* parent
;
797 dentry
= dentry
->d_covers
;
798 parent
= dentry
->d_parent
;
799 if (dentry
== parent
)
801 namelen
= dentry
->d_name
.len
;
802 buflen
-= namelen
+ 1;
806 memcpy(end
, dentry
->d_name
.name
, namelen
);
815 * NOTE! The user-level library version returns a
816 * character pointer. The kernel system call just
817 * returns the length of the buffer filled (which
818 * includes the ending '\0' character), or a negative
819 * error value. So libc would do something like
821 * char *getcwd(char * buf, size_t size)
825 * retval = sys_getcwd(buf, size);
832 asmlinkage
int sys_getcwd(char *buf
, unsigned long size
)
835 struct dentry
*pwd
= current
->fs
->pwd
;
838 /* Has the current directory has been unlinked? */
839 if (pwd
->d_parent
== pwd
|| !list_empty(&pwd
->d_hash
)) {
840 char *page
= (char *) __get_free_page(GFP_USER
);
844 char * cwd
= d_path(pwd
, page
, PAGE_SIZE
);
847 len
= PAGE_SIZE
+ page
- cwd
;
850 if (copy_to_user(buf
, cwd
, len
))
853 free_page((unsigned long) page
);
860 * Test whether new_dentry is a subdirectory of old_dentry.
862 * Trivially implemented using the dcache structure
864 int is_subdir(struct dentry
* new_dentry
, struct dentry
* old_dentry
)
870 if (new_dentry
!= old_dentry
) {
871 struct dentry
* parent
= new_dentry
->d_parent
;
872 if (parent
== new_dentry
)
884 * Check whether a dentry already exists for the given name,
885 * and return the inode number if it has an inode.
887 * This routine is used to post-process directory listings for
888 * filesystems using synthetic inode numbers, and is necessary
889 * to keep getcwd() working.
891 ino_t
find_inode_number(struct dentry
*dir
, struct qstr
*name
)
893 struct dentry
* dentry
;
897 * Check for a fs-specific hash function. Note that we must
898 * calculate the standard hash first, as the d_op->d_hash()
899 * routine may choose to leave the hash value unchanged.
901 name
->hash
= full_name_hash(name
->name
, name
->len
);
902 if (dir
->d_op
&& dir
->d_op
->d_hash
)
904 if (dir
->d_op
->d_hash(dir
, name
) != 0)
908 dentry
= d_lookup(dir
, name
);
912 ino
= dentry
->d_inode
->i_ino
;
919 void __init
dcache_init(void)
922 struct list_head
*d
= dentry_hashtable
;
925 * A constructor could be added for stable state like the lists,
926 * but it is probably not worth it because of the cache nature
928 * If fragmentation is too bad then the SLAB_HWCACHE_ALIGN
929 * flag could be removed here, to hint to the allocator that
930 * it should not try to get multiple page regions.
932 dentry_cache
= kmem_cache_create("dentry_cache",
933 sizeof(struct dentry
),
938 panic("Cannot create dentry cache");