4 * Complete reimplementation
5 * (C) 1997 Thomas Schoebel-Theuer,
6 * with heavy changes by Linus Torvalds
10 * Notes on the allocation strategy:
12 * The dcache is a master of the icache - whenever a dcache entry
13 * exists, the inode will always exist. "iput()" is done either when
14 * the dcache entry is deleted or garbage collected.
17 #include <linux/string.h>
20 #include <linux/malloc.h>
21 #include <linux/slab.h>
22 #include <linux/init.h>
23 #include <linux/smp_lock.h>
25 #include <asm/uaccess.h>
27 #define DCACHE_PARANOIA 1
28 /* #define DCACHE_DEBUG 1 */
30 /* For managing the dcache */
31 extern unsigned long num_physpages
, page_cache_size
;
32 extern int inodes_stat
[];
33 #define nr_inodes (inodes_stat[0])
35 kmem_cache_t
*dentry_cache
;
38 * This is the single most critical data structure when it comes
39 * to the dcache: the hashtable for lookups. Somebody should try
40 * to make this good - I've just made it work.
42 * This hash-function tries to avoid losing too many bits of hash
43 * information, yet avoid using a prime hash-size or similar.
46 #define D_HASHSIZE (1UL << D_HASHBITS)
47 #define D_HASHMASK (D_HASHSIZE-1)
49 static struct list_head dentry_hashtable
[D_HASHSIZE
];
50 static LIST_HEAD(dentry_unused
);
55 int age_limit
; /* age in seconds */
56 int want_pages
; /* pages requested by system */
58 } dentry_stat
= {0, 0, 45, 0,};
60 static inline void d_free(struct dentry
*dentry
)
62 if (dentry
->d_op
&& dentry
->d_op
->d_release
)
63 dentry
->d_op
->d_release(dentry
);
64 if (dname_external(dentry
))
65 kfree(dentry
->d_name
.name
);
66 kmem_cache_free(dentry_cache
, dentry
);
70 * Release the dentry's inode, using the fileystem
71 * d_iput() operation if defined.
73 static inline void dentry_iput(struct dentry
* dentry
)
75 struct inode
*inode
= dentry
->d_inode
;
77 dentry
->d_inode
= NULL
;
78 list_del(&dentry
->d_alias
);
79 INIT_LIST_HEAD(&dentry
->d_alias
);
80 if (dentry
->d_op
&& dentry
->d_op
->d_iput
)
81 dentry
->d_op
->d_iput(dentry
, inode
);
90 * This is complicated by the fact that we do not want to put
91 * dentries that are no longer on any hash chain on the unused
92 * list: we'd much rather just get rid of them immediately.
94 * However, that implies that we have to traverse the dentry
95 * tree upwards to the parents which might _also_ now be
96 * scheduled for deletion (it may have been only waiting for
97 * its last child to go away).
99 * This tail recursion is done by hand as we don't want to depend
100 * on the compiler to always get this right (gcc generally doesn't).
101 * Real recursion would eat up our stack space.
103 void dput(struct dentry
*dentry
)
111 count
= dentry
->d_count
- 1;
116 * Note that if d_op->d_delete blocks,
117 * the dentry could go back in use.
118 * Each fs will have to watch for this.
120 if (dentry
->d_op
&& dentry
->d_op
->d_delete
) {
121 dentry
->d_op
->d_delete(dentry
);
123 count
= dentry
->d_count
- 1;
128 if (!list_empty(&dentry
->d_lru
)) {
129 dentry_stat
.nr_unused
--;
130 list_del(&dentry
->d_lru
);
132 if (list_empty(&dentry
->d_hash
)) {
133 struct dentry
* parent
;
135 list_del(&dentry
->d_child
);
137 parent
= dentry
->d_parent
;
139 if (dentry
== parent
)
144 list_add(&dentry
->d_lru
, &dentry_unused
);
145 dentry_stat
.nr_unused
++;
147 * Update the timestamp
149 dentry
->d_reftime
= jiffies
;
153 dentry
->d_count
= count
;
157 printk(KERN_CRIT
"Negative d_count (%d) for %s/%s\n",
159 dentry
->d_parent
->d_name
.name
,
160 dentry
->d_name
.name
);
165 * Try to invalidate the dentry if it turns out to be
166 * possible. If there are other dentries that can be
167 * reached through this one we can't delete it.
169 int d_invalidate(struct dentry
* dentry
)
172 * If it's already been dropped, return OK.
174 if (list_empty(&dentry
->d_hash
))
177 * Check whether to do a partial shrink_dcache
178 * to get rid of unused child entries.
180 if (!list_empty(&dentry
->d_subdirs
)) {
181 shrink_dcache_parent(dentry
);
185 * Somebody else still using it?
187 * If it's a directory, we can't drop it
188 * for fear of somebody re-populating it
189 * with children (even though dropping it
190 * would make it unreachable from the root,
191 * we might still populate it if it was a
192 * working directory or similar).
194 if (dentry
->d_count
> 1) {
195 if (dentry
->d_inode
&& S_ISDIR(dentry
->d_inode
->i_mode
))
204 * Throw away a dentry - free the inode, dput the parent.
205 * This requires that the LRU list has already been
208 static inline void prune_one_dentry(struct dentry
* dentry
)
210 struct dentry
* parent
;
212 list_del(&dentry
->d_hash
);
213 list_del(&dentry
->d_child
);
215 parent
= dentry
->d_parent
;
217 if (parent
!= dentry
)
222 * Shrink the dcache. This is done when we need
223 * more memory, or simply when we need to unmount
224 * something (at which point we need to unuse
227 void prune_dcache(int count
)
230 struct dentry
*dentry
;
231 struct list_head
*tmp
= dentry_unused
.prev
;
233 if (tmp
== &dentry_unused
)
235 dentry_stat
.nr_unused
--;
238 dentry
= list_entry(tmp
, struct dentry
, d_lru
);
239 if (!dentry
->d_count
) {
240 prune_one_dentry(dentry
);
248 * Shrink the dcache for the specified super block.
249 * This allows us to unmount a device without disturbing
250 * the dcache for the other devices.
252 * This implementation makes just two traversals of the
253 * unused list. On the first pass we move the selected
254 * dentries to the most recent end, and on the second
255 * pass we free them. The second pass must restart after
256 * each dput(), but since the target dentries are all at
257 * the end, it's really just a single traversal.
259 void shrink_dcache_sb(struct super_block
* sb
)
261 struct list_head
*tmp
, *next
;
262 struct dentry
*dentry
;
265 * Pass one ... move the dentries for the specified
266 * superblock to the most recent end of the unused list.
268 next
= dentry_unused
.next
;
269 while (next
!= &dentry_unused
) {
272 dentry
= list_entry(tmp
, struct dentry
, d_lru
);
273 if (dentry
->d_sb
!= sb
)
276 list_add(tmp
, &dentry_unused
);
280 * Pass two ... free the dentries for this superblock.
283 next
= dentry_unused
.next
;
284 while (next
!= &dentry_unused
) {
287 dentry
= list_entry(tmp
, struct dentry
, d_lru
);
288 if (dentry
->d_sb
!= sb
)
292 dentry_stat
.nr_unused
--;
295 prune_one_dentry(dentry
);
301 * Check whether a root dentry would be in use if all of its
302 * child dentries were freed. This allows a non-destructive
303 * test for unmounting a device.
305 int is_root_busy(struct dentry
*root
)
307 struct dentry
*this_parent
= root
;
308 struct list_head
*next
;
309 int count
= root
->d_count
;
312 next
= this_parent
->d_subdirs
.next
;
314 while (next
!= &this_parent
->d_subdirs
) {
315 struct list_head
*tmp
= next
;
316 struct dentry
*dentry
= list_entry(tmp
, struct dentry
, d_child
);
318 /* Decrement count for unused children */
319 count
+= (dentry
->d_count
- 1);
320 if (!list_empty(&dentry
->d_subdirs
)) {
321 this_parent
= dentry
;
324 /* root is busy if any leaf is busy */
329 * All done at this level ... ascend and resume the search.
331 if (this_parent
!= root
) {
332 next
= this_parent
->d_child
.next
;
333 this_parent
= this_parent
->d_parent
;
336 return (count
> 1); /* remaining users? */
340 * Search the dentry child list for the specified parent,
341 * and move any unused dentries to the end of the unused
342 * list for prune_dcache(). We descend to the next level
343 * whenever the d_subdirs list is non-empty and continue
346 static int select_parent(struct dentry
* parent
)
348 struct dentry
*this_parent
= parent
;
349 struct list_head
*next
;
353 next
= this_parent
->d_subdirs
.next
;
355 while (next
!= &this_parent
->d_subdirs
) {
356 struct list_head
*tmp
= next
;
357 struct dentry
*dentry
= list_entry(tmp
, struct dentry
, d_child
);
359 if (!dentry
->d_count
) {
360 list_del(&dentry
->d_lru
);
361 list_add(&dentry
->d_lru
, dentry_unused
.prev
);
365 * Descend a level if the d_subdirs list is non-empty.
367 if (!list_empty(&dentry
->d_subdirs
)) {
368 this_parent
= dentry
;
370 printk(KERN_DEBUG
"select_parent: descending to %s/%s, found=%d\n",
371 dentry
->d_parent
->d_name
.name
, dentry
->d_name
.name
, found
);
377 * All done at this level ... ascend and resume the search.
379 if (this_parent
!= parent
) {
380 next
= this_parent
->d_child
.next
;
381 this_parent
= this_parent
->d_parent
;
383 printk(KERN_DEBUG
"select_parent: ascending to %s/%s, found=%d\n",
384 this_parent
->d_parent
->d_name
.name
, this_parent
->d_name
.name
, found
);
392 * Prune the dcache to remove unused children of the parent dentry.
394 void shrink_dcache_parent(struct dentry
* parent
)
398 while ((found
= select_parent(parent
)) != 0)
403 * This is called from kswapd when we think we need some
404 * more memory, but aren't really sure how much. So we
405 * carefully try to free a _bit_ of our dcache, but not
409 * 0 - very urgent: shrink everything
411 * 6 - base-level: try to shrink a bit.
413 int shrink_dcache_memory(int priority
, unsigned int gfp_mask
)
415 if (gfp_mask
& __GFP_IO
) {
419 count
= dentry_stat
.nr_unused
/ priority
;
422 /* FIXME: kmem_cache_shrink here should tell us
423 the number of pages freed, and it should
424 work in a __GFP_DMA/__GFP_HIGHMEM behaviour
425 to free only the interesting pages in
426 function of the needs of the current allocation. */
427 kmem_cache_shrink(dentry_cache
);
433 #define NAME_ALLOC_LEN(len) ((len+16) & ~15)
435 struct dentry
* d_alloc(struct dentry
* parent
, const struct qstr
*name
)
438 struct dentry
*dentry
;
440 dentry
= kmem_cache_alloc(dentry_cache
, GFP_KERNEL
);
444 if (name
->len
> DNAME_INLINE_LEN
-1) {
445 str
= kmalloc(NAME_ALLOC_LEN(name
->len
), GFP_KERNEL
);
447 kmem_cache_free(dentry_cache
, dentry
);
451 str
= dentry
->d_iname
;
453 memcpy(str
, name
->name
, name
->len
);
458 dentry
->d_inode
= NULL
;
459 dentry
->d_parent
= NULL
;
462 dentry
->d_parent
= dget(parent
);
463 dentry
->d_sb
= parent
->d_sb
;
464 list_add(&dentry
->d_child
, &parent
->d_subdirs
);
466 INIT_LIST_HEAD(&dentry
->d_child
);
468 dentry
->d_mounts
= dentry
;
469 dentry
->d_covers
= dentry
;
470 INIT_LIST_HEAD(&dentry
->d_hash
);
471 INIT_LIST_HEAD(&dentry
->d_lru
);
472 INIT_LIST_HEAD(&dentry
->d_subdirs
);
473 INIT_LIST_HEAD(&dentry
->d_alias
);
475 dentry
->d_name
.name
= str
;
476 dentry
->d_name
.len
= name
->len
;
477 dentry
->d_name
.hash
= name
->hash
;
479 dentry
->d_fsdata
= NULL
;
484 * Fill in inode information in the entry.
486 * This turns negative dentries into productive full members
489 * NOTE! This assumes that the inode count has been incremented
490 * (or otherwise set) by the caller to indicate that it is now
491 * in use by the dcache..
493 void d_instantiate(struct dentry
*entry
, struct inode
* inode
)
496 list_add(&entry
->d_alias
, &inode
->i_dentry
);
497 entry
->d_inode
= inode
;
500 struct dentry
* d_alloc_root(struct inode
* root_inode
)
502 struct dentry
*res
= NULL
;
505 res
= d_alloc(NULL
, &(const struct qstr
) { "/", 1, 0 });
507 res
->d_sb
= root_inode
->i_sb
;
509 d_instantiate(res
, root_inode
);
515 static inline struct list_head
* d_hash(struct dentry
* parent
, unsigned long hash
)
517 hash
+= (unsigned long) parent
;
518 hash
= hash
^ (hash
>> D_HASHBITS
) ^ (hash
>> D_HASHBITS
*2);
519 return dentry_hashtable
+ (hash
& D_HASHMASK
);
522 struct dentry
* d_lookup(struct dentry
* parent
, struct qstr
* name
)
524 unsigned int len
= name
->len
;
525 unsigned int hash
= name
->hash
;
526 const unsigned char *str
= name
->name
;
527 struct list_head
*head
= d_hash(parent
,hash
);
528 struct list_head
*tmp
= head
->next
;
531 struct dentry
* dentry
= list_entry(tmp
, struct dentry
, d_hash
);
535 if (dentry
->d_name
.hash
!= hash
)
537 if (dentry
->d_parent
!= parent
)
539 if (parent
->d_op
&& parent
->d_op
->d_compare
) {
540 if (parent
->d_op
->d_compare(parent
, &dentry
->d_name
, name
))
543 if (dentry
->d_name
.len
!= len
)
545 if (memcmp(dentry
->d_name
.name
, str
, len
))
554 * An insecure source has sent us a dentry, here we verify it.
556 * This is just to make knfsd able to have the dentry pointer
557 * in the NFS file handle.
559 * NOTE! Do _not_ dereference the pointers before we have
560 * validated them. We can test the pointer values, but we
561 * must not actually use them until we have found a valid
562 * copy of the pointer in kernel space..
564 int d_validate(struct dentry
*dentry
, struct dentry
*dparent
,
565 unsigned int hash
, unsigned int len
)
567 struct list_head
*base
, *lhp
;
570 if (dentry
!= dparent
) {
571 base
= d_hash(dparent
, hash
);
573 while ((lhp
= lhp
->next
) != base
) {
574 if (dentry
== list_entry(lhp
, struct dentry
, d_hash
))
579 * Special case: local mount points don't live in
580 * the hashes, so we search the super blocks.
582 struct super_block
*sb
= sb_entry(super_blocks
.next
);
584 for (; sb
!= sb_entry(&super_blocks
);
585 sb
= sb_entry(sb
->s_list
.next
)) {
588 if (sb
->s_root
== dentry
)
598 * When a file is deleted, we have two options:
599 * - turn this dentry into a negative dentry
600 * - unhash this dentry and free it.
602 * Usually, we want to just turn this into
603 * a negative dentry, but if anybody else is
604 * currently using the dentry or the inode
605 * we can't do that and we fall back on removing
606 * it from the hash queues and waiting for
607 * it to be deleted later when it has no users
609 void d_delete(struct dentry
* dentry
)
612 * Are we the only user?
614 if (dentry
->d_count
== 1) {
620 * If not, just drop the dentry and let dput
626 void d_rehash(struct dentry
* entry
)
628 struct dentry
* parent
= entry
->d_parent
;
630 list_add(&entry
->d_hash
, d_hash(parent
, entry
->d_name
.hash
));
633 #define do_switch(x,y) do { \
634 __typeof__ (x) __tmp = x; \
635 x = y; y = __tmp; } while (0)
638 * When switching names, the actual string doesn't strictly have to
639 * be preserved in the target - because we're dropping the target
640 * anyway. As such, we can just do a simple memcpy() to copy over
641 * the new name before we switch.
643 * Note that we have to be a lot more careful about getting the hash
644 * switched - we have to switch the hash value properly even if it
645 * then no longer matches the actual (corrupted) string of the target.
646 * The has value has to match the hash queue that the dentry is on..
648 static inline void switch_names(struct dentry
* dentry
, struct dentry
* target
)
650 const unsigned char *old_name
, *new_name
;
652 memcpy(dentry
->d_iname
, target
->d_iname
, DNAME_INLINE_LEN
);
653 old_name
= target
->d_name
.name
;
654 new_name
= dentry
->d_name
.name
;
655 if (old_name
== target
->d_iname
)
656 old_name
= dentry
->d_iname
;
657 if (new_name
== dentry
->d_iname
)
658 new_name
= target
->d_iname
;
659 target
->d_name
.name
= new_name
;
660 dentry
->d_name
.name
= old_name
;
664 * We cannibalize "target" when moving dentry on top of it,
665 * because it's going to be thrown away anyway. We could be more
666 * polite about it, though.
668 * This forceful removal will result in ugly /proc output if
669 * somebody holds a file open that got deleted due to a rename.
670 * We could be nicer about the deleted file, and let it show
671 * up under the name it got deleted rather than the name that
674 * Careful with the hash switch. The hash switch depends on
675 * the fact that any list-entry can be a head of the list.
678 void d_move(struct dentry
* dentry
, struct dentry
* target
)
680 if (!dentry
->d_inode
)
681 printk(KERN_WARNING
"VFS: moving negative dcache entry\n");
683 /* Move the dentry to the target hash queue */
684 list_del(&dentry
->d_hash
);
685 list_add(&dentry
->d_hash
, &target
->d_hash
);
687 /* Unhash the target: dput() will then get rid of it */
688 list_del(&target
->d_hash
);
689 INIT_LIST_HEAD(&target
->d_hash
);
691 list_del(&dentry
->d_child
);
692 list_del(&target
->d_child
);
694 /* Switch the parents and the names.. */
695 switch_names(dentry
, target
);
696 do_switch(dentry
->d_parent
, target
->d_parent
);
697 do_switch(dentry
->d_name
.len
, target
->d_name
.len
);
698 do_switch(dentry
->d_name
.hash
, target
->d_name
.hash
);
700 /* And add them back to the (new) parent lists */
701 list_add(&target
->d_child
, &target
->d_parent
->d_subdirs
);
702 list_add(&dentry
->d_child
, &dentry
->d_parent
->d_subdirs
);
706 * "buflen" should be PAGE_SIZE or more.
708 char * d_path(struct dentry
*dentry
, char *buffer
, int buflen
)
710 char * end
= buffer
+buflen
;
712 struct dentry
* root
= current
->fs
->root
;
716 if (!IS_ROOT(dentry
) && list_empty(&dentry
->d_hash
)) {
719 memcpy(end
, " (deleted)", 10);
727 struct dentry
* parent
;
732 dentry
= dentry
->d_covers
;
733 parent
= dentry
->d_parent
;
734 if (dentry
== parent
)
736 namelen
= dentry
->d_name
.len
;
737 buflen
-= namelen
+ 1;
741 memcpy(end
, dentry
->d_name
.name
, namelen
);
750 * NOTE! The user-level library version returns a
751 * character pointer. The kernel system call just
752 * returns the length of the buffer filled (which
753 * includes the ending '\0' character), or a negative
754 * error value. So libc would do something like
756 * char *getcwd(char * buf, size_t size)
760 * retval = sys_getcwd(buf, size);
767 asmlinkage
long sys_getcwd(char *buf
, unsigned long size
)
770 struct dentry
*pwd
= current
->fs
->pwd
;
773 /* Has the current directory has been unlinked? */
774 if (pwd
->d_parent
== pwd
|| !list_empty(&pwd
->d_hash
)) {
775 char *page
= (char *) __get_free_page(GFP_USER
);
779 char * cwd
= d_path(pwd
, page
, PAGE_SIZE
);
782 len
= PAGE_SIZE
+ page
- cwd
;
785 if (copy_to_user(buf
, cwd
, len
))
788 free_page((unsigned long) page
);
795 * Test whether new_dentry is a subdirectory of old_dentry.
797 * Trivially implemented using the dcache structure
799 int is_subdir(struct dentry
* new_dentry
, struct dentry
* old_dentry
)
805 if (new_dentry
!= old_dentry
) {
806 struct dentry
* parent
= new_dentry
->d_parent
;
807 if (parent
== new_dentry
)
819 * Check whether a dentry already exists for the given name,
820 * and return the inode number if it has an inode.
822 * This routine is used to post-process directory listings for
823 * filesystems using synthetic inode numbers, and is necessary
824 * to keep getcwd() working.
826 ino_t
find_inode_number(struct dentry
*dir
, struct qstr
*name
)
828 struct dentry
* dentry
;
832 * Check for a fs-specific hash function. Note that we must
833 * calculate the standard hash first, as the d_op->d_hash()
834 * routine may choose to leave the hash value unchanged.
836 name
->hash
= full_name_hash(name
->name
, name
->len
);
837 if (dir
->d_op
&& dir
->d_op
->d_hash
)
839 if (dir
->d_op
->d_hash(dir
, name
) != 0)
843 dentry
= d_lookup(dir
, name
);
847 ino
= dentry
->d_inode
->i_ino
;
854 void __init
dcache_init(void)
857 struct list_head
*d
= dentry_hashtable
;
860 * A constructor could be added for stable state like the lists,
861 * but it is probably not worth it because of the cache nature
863 * If fragmentation is too bad then the SLAB_HWCACHE_ALIGN
864 * flag could be removed here, to hint to the allocator that
865 * it should not try to get multiple page regions.
867 dentry_cache
= kmem_cache_create("dentry_cache",
868 sizeof(struct dentry
),
873 panic("Cannot create dentry cache");