Import 2.1.112pre1
[davej-history.git] / fs / dcache.c
blob912474d45f503e1c5d855f2666311a9920a9bb0b
1 /*
2 * fs/dcache.c
4 * Complete reimplementation
5 * (C) 1997 Thomas Schoebel-Theuer
6 */
8 /*
9 * Notes on the allocation strategy:
11 * The dcache is a master of the icache - whenever a dcache entry
12 * exists, the inode will always exist. "iput()" is done either when
13 * the dcache entry is deleted or garbage collected.
16 #include <linux/string.h>
17 #include <linux/mm.h>
18 #include <linux/fs.h>
19 #include <linux/malloc.h>
20 #include <linux/init.h>
22 #include <asm/uaccess.h>
24 #define DCACHE_PARANOIA 1
25 /* #define DCACHE_DEBUG 1 */
27 /* For managing the dcache */
28 extern unsigned long num_physpages, page_cache_size;
29 extern int inodes_stat[];
30 #define nr_inodes (inodes_stat[0])
33 * This is the single most critical data structure when it comes
34 * to the dcache: the hashtable for lookups. Somebody should try
35 * to make this good - I've just made it work.
37 * This hash-function tries to avoid losing too many bits of hash
38 * information, yet avoid using a prime hash-size or similar.
40 #define D_HASHBITS 10
41 #define D_HASHSIZE (1UL << D_HASHBITS)
42 #define D_HASHMASK (D_HASHSIZE-1)
44 static struct list_head dentry_hashtable[D_HASHSIZE];
45 static LIST_HEAD(dentry_unused);
47 struct {
48 int nr_dentry;
49 int nr_unused;
50 int age_limit; /* age in seconds */
51 int want_pages; /* pages requested by system */
52 int dummy[2];
53 } dentry_stat = {0, 0, 45, 0,};
55 static inline void d_free(struct dentry *dentry)
57 if (dentry->d_op && dentry->d_op->d_release)
58 dentry->d_op->d_release(dentry);
59 kfree(dentry->d_name.name);
60 kfree(dentry);
64 * Release the dentry's inode, using the fileystem
65 * d_iput() operation if defined.
67 static inline void dentry_iput(struct dentry * dentry)
69 struct inode *inode = dentry->d_inode;
70 if (inode) {
71 dentry->d_inode = NULL;
72 list_del(&dentry->d_alias);
73 INIT_LIST_HEAD(&dentry->d_alias);
74 if (dentry->d_op && dentry->d_op->d_iput)
75 dentry->d_op->d_iput(dentry, inode);
76 else
77 iput(inode);
82 * dput()
84 * This is complicated by the fact that we do not want to put
85 * dentries that are no longer on any hash chain on the unused
86 * list: we'd much rather just get rid of them immediately.
88 * However, that implies that we have to traverse the dentry
89 * tree upwards to the parents which might _also_ now be
90 * scheduled for deletion (it may have been only waiting for
91 * its last child to go away).
93 * This tail recursion is done by hand as we don't want to depend
94 * on the compiler to always get this right (gcc generally doesn't).
95 * Real recursion would eat up our stack space.
97 void dput(struct dentry *dentry)
99 int count;
101 if (!dentry)
102 return;
104 repeat:
105 count = dentry->d_count - 1;
106 if (count != 0)
107 goto out;
110 * Note that if d_op->d_delete blocks,
111 * the dentry could go back in use.
112 * Each fs will have to watch for this.
114 if (dentry->d_op && dentry->d_op->d_delete) {
115 dentry->d_op->d_delete(dentry);
117 count = dentry->d_count - 1;
118 if (count != 0)
119 goto out;
122 if (!list_empty(&dentry->d_lru)) {
123 dentry_stat.nr_unused--;
124 list_del(&dentry->d_lru);
126 if (list_empty(&dentry->d_hash)) {
127 struct dentry * parent;
129 list_del(&dentry->d_child);
130 dentry_iput(dentry);
131 parent = dentry->d_parent;
132 d_free(dentry);
133 if (dentry == parent)
134 return;
135 dentry = parent;
136 goto repeat;
138 list_add(&dentry->d_lru, &dentry_unused);
139 dentry_stat.nr_unused++;
141 * Update the timestamp
143 dentry->d_reftime = jiffies;
145 out:
146 if (count >= 0) {
147 dentry->d_count = count;
148 return;
151 printk(KERN_CRIT "Negative d_count (%d) for %s/%s\n",
152 count,
153 dentry->d_parent->d_name.name,
154 dentry->d_name.name);
155 *(int *)0 = 0;
159 * Try to invalidate the dentry if it turns out to be
160 * possible. If there are other users of the dentry we
161 * can't invalidate it.
163 int d_invalidate(struct dentry * dentry)
165 /* Check whether to do a partial shrink_dcache */
166 if (!list_empty(&dentry->d_subdirs))
167 shrink_dcache_parent(dentry);
169 if (dentry->d_count != 1)
170 return -EBUSY;
172 d_drop(dentry);
173 return 0;
177 * Select less valuable dentries to be pruned when we need
178 * inodes or memory. The selected dentries are moved to the
179 * old end of the list where prune_dcache() can find them.
181 * Negative dentries are included in the selection so that
182 * they don't accumulate at the end of the list. The count
183 * returned is the total number of dentries selected, which
184 * may be much larger than the requested number of inodes.
186 int select_dcache(int inode_count, int page_count)
188 struct list_head *next, *tail = &dentry_unused;
189 int found = 0, forward = 0, young = 8;
190 int depth = dentry_stat.nr_unused >> 1;
191 unsigned long min_value = 0, max_value = 4;
193 if (page_count)
194 max_value = -1;
196 next = tail->prev;
197 while (next != &dentry_unused && depth--) {
198 struct list_head *tmp = next;
199 struct dentry *dentry = list_entry(tmp, struct dentry, d_lru);
200 struct inode *inode = dentry->d_inode;
201 unsigned long value = 0;
203 next = tmp->prev;
204 if (forward)
205 next = tmp->next;
206 if (dentry->d_count) {
207 dentry_stat.nr_unused--;
208 list_del(tmp);
209 INIT_LIST_HEAD(tmp);
210 continue;
213 * Check the dentry's age to see whether to change direction.
215 if (!forward) {
216 int age = (jiffies - dentry->d_reftime) / HZ;
217 if (age < dentry_stat.age_limit) {
218 if (!--young) {
219 forward = 1;
220 next = dentry_unused.next;
222 * Update the limits -- we don't want
223 * files with too few or too many pages.
225 if (page_count) {
226 min_value = 3;
227 max_value = 15;
229 #ifdef DCACHE_DEBUG
230 printk("select_dcache: %s/%s age=%d, scanning forward\n",
231 dentry->d_parent->d_name.name, dentry->d_name.name, age);
232 #endif
234 continue;
239 * Select dentries based on the page cache count ...
240 * should factor in number of uses as well. We take
241 * all negative dentries so that they don't accumulate.
242 * (We skip inodes that aren't immediately available.)
244 if (inode) {
245 value = inode->i_nrpages;
246 if (value >= max_value || value < min_value)
247 continue;
248 if (inode->i_state || inode->i_count > 1)
249 continue;
253 * Move the selected dentries behind the tail.
255 if (tmp != tail->prev) {
256 list_del(tmp);
257 list_add(tmp, tail->prev);
259 tail = tmp;
260 found++;
261 if (inode && --inode_count <= 0)
262 break;
263 if (page_count && (page_count -= value) <= 0)
264 break;
266 return found;
270 * Throw away a dentry - free the inode, dput the parent.
271 * This requires that the LRU list has already been
272 * removed.
274 static inline void prune_one_dentry(struct dentry * dentry)
276 struct dentry * parent;
278 list_del(&dentry->d_hash);
279 list_del(&dentry->d_child);
280 dentry_iput(dentry);
281 parent = dentry->d_parent;
282 d_free(dentry);
283 dput(parent);
287 * Shrink the dcache. This is done when we need
288 * more memory, or simply when we need to unmount
289 * something (at which point we need to unuse
290 * all dentries).
292 void prune_dcache(int count)
294 for (;;) {
295 struct dentry *dentry;
296 struct list_head *tmp = dentry_unused.prev;
298 if (tmp == &dentry_unused)
299 break;
300 dentry_stat.nr_unused--;
301 list_del(tmp);
302 INIT_LIST_HEAD(tmp);
303 dentry = list_entry(tmp, struct dentry, d_lru);
304 if (!dentry->d_count) {
305 prune_one_dentry(dentry);
306 if (!--count)
307 break;
313 * Shrink the dcache for the specified super block.
314 * This allows us to unmount a device without disturbing
315 * the dcache for the other devices.
317 * This implementation makes just two traversals of the
318 * unused list. On the first pass we move the selected
319 * dentries to the most recent end, and on the second
320 * pass we free them. The second pass must restart after
321 * each dput(), but since the target dentries are all at
322 * the end, it's really just a single traversal.
324 void shrink_dcache_sb(struct super_block * sb)
326 struct list_head *tmp, *next;
327 struct dentry *dentry;
330 * Pass one ... move the dentries for the specified
331 * superblock to the most recent end of the unused list.
333 next = dentry_unused.next;
334 while (next != &dentry_unused) {
335 tmp = next;
336 next = tmp->next;
337 dentry = list_entry(tmp, struct dentry, d_lru);
338 if (dentry->d_sb != sb)
339 continue;
340 list_del(tmp);
341 list_add(tmp, &dentry_unused);
345 * Pass two ... free the dentries for this superblock.
347 repeat:
348 next = dentry_unused.next;
349 while (next != &dentry_unused) {
350 tmp = next;
351 next = tmp->next;
352 dentry = list_entry(tmp, struct dentry, d_lru);
353 if (dentry->d_sb != sb)
354 continue;
355 if (dentry->d_count)
356 continue;
357 dentry_stat.nr_unused--;
358 list_del(tmp);
359 INIT_LIST_HEAD(tmp);
360 prune_one_dentry(dentry);
361 goto repeat;
366 * Check whether a root dentry would be in use if all of its
367 * child dentries were freed. This allows a non-destructive
368 * test for unmounting a device.
370 int is_root_busy(struct dentry *root)
372 struct dentry *this_parent = root;
373 struct list_head *next;
374 int count = root->d_count;
376 repeat:
377 next = this_parent->d_subdirs.next;
378 resume:
379 while (next != &this_parent->d_subdirs) {
380 struct list_head *tmp = next;
381 struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
382 next = tmp->next;
383 /* Decrement count for unused children */
384 count += (dentry->d_count - 1);
385 if (!list_empty(&dentry->d_subdirs)) {
386 this_parent = dentry;
387 goto repeat;
389 /* root is busy if any leaf is busy */
390 if (dentry->d_count)
391 return 1;
394 * All done at this level ... ascend and resume the search.
396 if (this_parent != root) {
397 next = this_parent->d_child.next;
398 this_parent = this_parent->d_parent;
399 goto resume;
401 return (count == 1); /* one remaining use count? */
405 * Search the dentry child list for the specified parent,
406 * and move any unused dentries to the end of the unused
407 * list for prune_dcache(). We descend to the next level
408 * whenever the d_subdirs list is non-empty and continue
409 * searching.
411 static int select_parent(struct dentry * parent)
413 struct dentry *this_parent = parent;
414 struct list_head *next;
415 int found = 0;
417 repeat:
418 next = this_parent->d_subdirs.next;
419 resume:
420 while (next != &this_parent->d_subdirs) {
421 struct list_head *tmp = next;
422 struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
423 next = tmp->next;
424 if (!dentry->d_count) {
425 list_del(&dentry->d_lru);
426 list_add(&dentry->d_lru, dentry_unused.prev);
427 found++;
430 * Descend a level if the d_subdirs list is non-empty.
432 if (!list_empty(&dentry->d_subdirs)) {
433 this_parent = dentry;
434 #ifdef DCACHE_DEBUG
435 printk(KERN_DEBUG "select_parent: descending to %s/%s, found=%d\n",
436 dentry->d_parent->d_name.name, dentry->d_name.name, found);
437 #endif
438 goto repeat;
442 * All done at this level ... ascend and resume the search.
444 if (this_parent != parent) {
445 next = this_parent->d_child.next;
446 this_parent = this_parent->d_parent;
447 #ifdef DCACHE_DEBUG
448 printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n",
449 this_parent->d_parent->d_name.name, this_parent->d_name.name, found);
450 #endif
451 goto resume;
453 return found;
457 * Prune the dcache to remove unused children of the parent dentry.
459 void shrink_dcache_parent(struct dentry * parent)
461 int found;
463 while ((found = select_parent(parent)) != 0)
464 prune_dcache(found);
468 * This is called from kswapd when we think we need some
469 * more memory, but aren't really sure how much. So we
470 * carefully try to free a _bit_ of our dcache, but not
471 * too much.
473 * Priority:
474 * 0 - very urgent: schrink everything
475 * ...
476 * 6 - base-level: try to shrink a bit.
478 void shrink_dcache_memory(int priority, unsigned int gfp_mask)
480 prune_dcache(0);
483 #define NAME_ALLOC_LEN(len) ((len+16) & ~15)
485 struct dentry * d_alloc(struct dentry * parent, const struct qstr *name)
487 char * str;
488 struct dentry *dentry;
491 * Prune the dcache if there are too many unused dentries.
493 if (dentry_stat.nr_unused > 3*(nr_inodes >> 1)) {
494 #ifdef DCACHE_DEBUG
495 printk("d_alloc: %d unused, pruning dcache\n", dentry_stat.nr_unused);
496 #endif
497 prune_dcache(8);
498 free_inode_memory(8);
501 dentry = kmalloc(sizeof(struct dentry), GFP_KERNEL);
502 if (!dentry)
503 return NULL;
505 str = kmalloc(NAME_ALLOC_LEN(name->len), GFP_KERNEL);
506 if (!str) {
507 kfree(dentry);
508 return NULL;
511 memcpy(str, name->name, name->len);
512 str[name->len] = 0;
514 dentry->d_count = 1;
515 dentry->d_flags = 0;
516 dentry->d_inode = NULL;
517 dentry->d_parent = NULL;
518 dentry->d_sb = NULL;
519 if (parent) {
520 dentry->d_parent = dget(parent);
521 dentry->d_sb = parent->d_sb;
522 list_add(&dentry->d_child, &parent->d_subdirs);
523 } else
524 INIT_LIST_HEAD(&dentry->d_child);
526 dentry->d_mounts = dentry;
527 dentry->d_covers = dentry;
528 INIT_LIST_HEAD(&dentry->d_hash);
529 INIT_LIST_HEAD(&dentry->d_lru);
530 INIT_LIST_HEAD(&dentry->d_subdirs);
531 INIT_LIST_HEAD(&dentry->d_alias);
533 dentry->d_name.name = str;
534 dentry->d_name.len = name->len;
535 dentry->d_name.hash = name->hash;
536 dentry->d_op = NULL;
537 dentry->d_fsdata = NULL;
538 return dentry;
542 * Fill in inode information in the entry.
544 * This turns negative dentries into productive full members
545 * of society.
547 * NOTE! This assumes that the inode count has been incremented
548 * (or otherwise set) by the caller to indicate that it is now
549 * in use by the dcache..
551 void d_instantiate(struct dentry *entry, struct inode * inode)
553 if (inode)
554 list_add(&entry->d_alias, &inode->i_dentry);
555 entry->d_inode = inode;
558 struct dentry * d_alloc_root(struct inode * root_inode, struct dentry *old_root)
560 struct dentry *res = NULL;
562 if (root_inode) {
563 res = d_alloc(NULL, &(const struct qstr) { "/", 1, 0 });
564 if (res) {
565 res->d_sb = root_inode->i_sb;
566 res->d_parent = res;
567 d_instantiate(res, root_inode);
570 return res;
573 static inline struct list_head * d_hash(struct dentry * parent, unsigned long hash)
575 hash += (unsigned long) parent;
576 hash = hash ^ (hash >> D_HASHBITS) ^ (hash >> D_HASHBITS*2);
577 return dentry_hashtable + (hash & D_HASHMASK);
580 struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
582 unsigned int len = name->len;
583 unsigned int hash = name->hash;
584 const unsigned char *str = name->name;
585 struct list_head *head = d_hash(parent,hash);
586 struct list_head *tmp = head->next;
588 while (tmp != head) {
589 struct dentry * dentry = list_entry(tmp, struct dentry, d_hash);
591 tmp = tmp->next;
592 if (dentry->d_name.hash != hash)
593 continue;
594 if (dentry->d_parent != parent)
595 continue;
596 if (parent->d_op && parent->d_op->d_compare) {
597 if (parent->d_op->d_compare(parent, &dentry->d_name, name))
598 continue;
599 } else {
600 if (dentry->d_name.len != len)
601 continue;
602 if (memcmp(dentry->d_name.name, str, len))
603 continue;
605 return dget(dentry);
607 return NULL;
611 * An insecure source has sent us a dentry, here we verify it.
613 * This is just to make knfsd able to have the dentry pointer
614 * in the NFS file handle.
616 * NOTE! Do _not_ dereference the pointers before we have
617 * validated them. We can test the pointer values, but we
618 * must not actually use them until we have found a valid
619 * copy of the pointer in kernel space..
621 int d_validate(struct dentry *dentry, struct dentry *dparent,
622 unsigned int hash, unsigned int len)
624 struct list_head *base, *lhp;
625 int valid = 1;
627 if (dentry != dparent) {
628 base = d_hash(dparent, hash);
629 lhp = base;
630 while ((lhp = lhp->next) != base) {
631 if (dentry == list_entry(lhp, struct dentry, d_hash))
632 goto out;
634 } else {
636 * Special case: local mount points don't live in
637 * the hashes, so we search the super blocks.
639 struct super_block *sb = super_blocks + 0;
641 for (; sb < super_blocks + NR_SUPER; sb++) {
642 if (!sb->s_dev)
643 continue;
644 if (sb->s_root == dentry)
645 goto out;
648 valid = 0;
649 out:
650 return valid;
654 * When a file is deleted, we have two options:
655 * - turn this dentry into a negative dentry
656 * - unhash this dentry and free it.
658 * Usually, we want to just turn this into
659 * a negative dentry, but if anybody else is
660 * currently using the dentry or the inode
661 * we can't do that and we fall back on removing
662 * it from the hash queues and waiting for
663 * it to be deleted later when it has no users
665 void d_delete(struct dentry * dentry)
668 * Are we the only user?
670 if (dentry->d_count == 1) {
671 dentry_iput(dentry);
672 return;
676 * If not, just drop the dentry and let dput
677 * pick up the tab..
679 d_drop(dentry);
682 void d_add(struct dentry * entry, struct inode * inode)
684 struct dentry * parent = entry->d_parent;
686 list_add(&entry->d_hash, d_hash(parent, entry->d_name.hash));
687 d_instantiate(entry, inode);
690 #define do_switch(x,y) do { \
691 __typeof__ (x) __tmp = x; \
692 x = y; y = __tmp; } while (0)
695 * We cannibalize "target" when moving dentry on top of it,
696 * because it's going to be thrown away anyway. We could be more
697 * polite about it, though.
699 * This forceful removal will result in ugly /proc output if
700 * somebody holds a file open that got deleted due to a rename.
701 * We could be nicer about the deleted file, and let it show
702 * up under the name it got deleted rather than the name that
703 * deleted it.
705 * Careful with the hash switch. The hash switch depends on
706 * the fact that any list-entry can be a head of the list.
707 * Think about it.
709 void d_move(struct dentry * dentry, struct dentry * target)
711 if (!dentry->d_inode)
712 printk(KERN_WARNING "VFS: moving negative dcache entry\n");
714 /* Move the dentry to the target hash queue */
715 list_del(&dentry->d_hash);
716 list_add(&dentry->d_hash, &target->d_hash);
718 /* Unhash the target: dput() will then get rid of it */
719 list_del(&target->d_hash);
720 INIT_LIST_HEAD(&target->d_hash);
722 list_del(&dentry->d_child);
723 list_del(&target->d_child);
725 /* Switch the parents and the names.. */
726 do_switch(dentry->d_parent, target->d_parent);
727 do_switch(dentry->d_name.name, target->d_name.name);
728 do_switch(dentry->d_name.len, target->d_name.len);
729 do_switch(dentry->d_name.hash, target->d_name.hash);
730 list_add(&target->d_child, &target->d_parent->d_subdirs);
731 list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
735 * "buflen" should be PAGE_SIZE or more.
737 char * d_path(struct dentry *dentry, char *buffer, int buflen)
739 char * end = buffer+buflen;
740 char * retval;
741 struct dentry * root = current->fs->root;
743 *--end = '\0';
744 buflen--;
745 if (dentry->d_parent != dentry && list_empty(&dentry->d_hash)) {
746 buflen -= 10;
747 end -= 10;
748 memcpy(end, " (deleted)", 10);
751 /* Get '/' right */
752 retval = end-1;
753 *retval = '/';
755 for (;;) {
756 struct dentry * parent;
757 int namelen;
759 if (dentry == root)
760 break;
761 dentry = dentry->d_covers;
762 parent = dentry->d_parent;
763 if (dentry == parent)
764 break;
765 namelen = dentry->d_name.len;
766 buflen -= namelen + 1;
767 if (buflen < 0)
768 break;
769 end -= namelen;
770 memcpy(end, dentry->d_name.name, namelen);
771 *--end = '/';
772 retval = end;
773 dentry = parent;
775 return retval;
779 * NOTE! The user-level library version returns a
780 * character pointer. The kernel system call just
781 * returns the length of the buffer filled (which
782 * includes the ending '\0' character), or a negative
783 * error value. So libc would do something like
785 * char *getcwd(char * buf, size_t size)
787 * int retval;
789 * retval = sys_getcwd(buf, size);
790 * if (retval >= 0)
791 * return buf;
792 * errno = -retval;
793 * return NULL;
796 asmlinkage int sys_getcwd(char *buf, unsigned long size)
798 int error;
799 struct dentry *pwd = current->fs->pwd;
801 error = -ENOENT;
802 /* Has the current directory has been unlinked? */
803 if (pwd->d_parent == pwd || !list_empty(&pwd->d_hash)) {
804 char *page = (char *) __get_free_page(GFP_USER);
805 error = -ENOMEM;
806 if (page) {
807 unsigned long len;
808 char * cwd = d_path(pwd, page, PAGE_SIZE);
810 error = -ERANGE;
811 len = PAGE_SIZE + page - cwd;
812 if (len <= size) {
813 error = len;
814 if (copy_to_user(buf, cwd, len))
815 error = -EFAULT;
817 free_page((unsigned long) page);
820 return error;
824 * Test whether new_dentry is a subdirectory of old_dentry.
826 * Trivially implemented using the dcache structure
828 int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry)
830 int result;
832 result = 0;
833 for (;;) {
834 if (new_dentry != old_dentry) {
835 struct dentry * parent = new_dentry->d_parent;
836 if (parent == new_dentry)
837 break;
838 new_dentry = parent;
839 continue;
841 result = 1;
842 break;
844 return result;
848 * Check whether a dentry already exists for the given name,
849 * and return the inode number if it has an inode.
851 * This routine is used to post-process directory listings for
852 * filesystems using synthetic inode numbers, and is necessary
853 * to keep getcwd() working.
855 ino_t find_inode_number(struct dentry *dir, struct qstr *name)
857 struct dentry * dentry;
858 ino_t ino = 0;
861 * Check for a fs-specific hash function. Note that we must
862 * calculate the standard hash first, as the d_op->d_hash()
863 * routine may choose to leave the hash value unchanged.
865 name->hash = full_name_hash(name->name, name->len);
866 if (dir->d_op && dir->d_op->d_hash)
868 if (dir->d_op->d_hash(dir, name) != 0)
869 goto out;
872 dentry = d_lookup(dir, name);
873 if (dentry)
875 if (dentry->d_inode)
876 ino = dentry->d_inode->i_ino;
877 dput(dentry);
879 out:
880 return ino;
883 __initfunc(void dcache_init(void))
885 int i;
886 struct list_head *d = dentry_hashtable;
888 i = D_HASHSIZE;
889 do {
890 INIT_LIST_HEAD(d);
891 d++;
892 i--;
893 } while (i);