Merge with Linux 2.3.40.
[linux-2.6/linux-mips.git] / fs / inode.c
blobd6298d349d8a90a71b79e64ada9610a323dc4d7e
1 /*
2 * linux/fs/inode.c
4 * (C) 1997 Linus Torvalds
5 */
7 #include <linux/config.h>
8 #include <linux/fs.h>
9 #include <linux/string.h>
10 #include <linux/mm.h>
11 #include <linux/dcache.h>
12 #include <linux/init.h>
13 #include <linux/quotaops.h>
14 #include <linux/slab.h>
17 * New inode.c implementation.
19 * This implementation has the basic premise of trying
20 * to be extremely low-overhead and SMP-safe, yet be
21 * simple enough to be "obviously correct".
23 * Famous last words.
26 /* inode dynamic allocation 1999, Andrea Arcangeli <andrea@suse.de> */
28 #define INODE_PARANOIA 1
29 /* #define INODE_DEBUG 1 */
32 * Inode lookup is no longer as critical as it used to be:
33 * most of the lookups are going to be through the dcache.
35 #define HASH_BITS 14
36 #define HASH_SIZE (1UL << HASH_BITS)
37 #define HASH_MASK (HASH_SIZE-1)
40 * Each inode can be on two separate lists. One is
41 * the hash list of the inode, used for lookups. The
42 * other linked list is the "type" list:
43 * "in_use" - valid inode, i_count > 0, i_nlink > 0
44 * "dirty" - as "in_use" but also dirty
45 * "unused" - valid inode, i_count = 0
47 * A "dirty" list is maintained for each super block,
48 * allowing for low-overhead inode sync() operations.
51 static LIST_HEAD(inode_in_use);
52 static LIST_HEAD(inode_unused);
53 static struct list_head inode_hashtable[HASH_SIZE];
54 static LIST_HEAD(anon_hash_chain); /* for inodes with NULL i_sb */
57 * A simple spinlock to protect the list manipulations.
59 * NOTE! You also have to own the lock if you change
60 * the i_state of an inode while it is in use..
62 spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
65 * Statistics gathering..
67 struct {
68 int nr_inodes;
69 int nr_unused;
70 int dummy[5];
71 } inodes_stat = {0, 0,};
73 static kmem_cache_t * inode_cachep;
75 #define alloc_inode() \
76 ((struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL))
77 #define destroy_inode(inode) kmem_cache_free(inode_cachep, (inode))
80 * These are initializations that only need to be done
81 * once, because the fields are idempotent across use
82 * of the inode, so let the slab aware of that.
84 static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
86 struct inode * inode = (struct inode *) foo;
88 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
89 SLAB_CTOR_CONSTRUCTOR)
91 memset(inode, 0, sizeof(*inode));
92 init_waitqueue_head(&inode->i_wait);
93 INIT_LIST_HEAD(&inode->i_hash);
94 INIT_LIST_HEAD(&inode->i_data.pages);
95 INIT_LIST_HEAD(&inode->i_dentry);
96 sema_init(&inode->i_sem, 1);
97 spin_lock_init(&inode->i_shared_lock);
102 * Put the inode on the super block's dirty list.
104 * CAREFUL! We mark it dirty unconditionally, but
105 * move it onto the dirty list only if it is hashed.
106 * If it was not hashed, it will never be added to
107 * the dirty list even if it is later hashed, as it
108 * will have been marked dirty already.
110 * In short, make sure you hash any inodes _before_
111 * you start marking them dirty..
113 void __mark_inode_dirty(struct inode *inode)
115 struct super_block * sb = inode->i_sb;
117 if (sb) {
118 spin_lock(&inode_lock);
119 if (!(inode->i_state & I_DIRTY)) {
120 inode->i_state |= I_DIRTY;
121 /* Only add valid (ie hashed) inodes to the dirty list */
122 if (!list_empty(&inode->i_hash)) {
123 list_del(&inode->i_list);
124 list_add(&inode->i_list, &sb->s_dirty);
127 spin_unlock(&inode_lock);
131 static void __wait_on_inode(struct inode * inode)
133 DECLARE_WAITQUEUE(wait, current);
135 add_wait_queue(&inode->i_wait, &wait);
136 repeat:
137 set_current_state(TASK_UNINTERRUPTIBLE);
138 if (inode->i_state & I_LOCK) {
139 schedule();
140 goto repeat;
142 remove_wait_queue(&inode->i_wait, &wait);
143 current->state = TASK_RUNNING;
146 static inline void wait_on_inode(struct inode *inode)
148 if (inode->i_state & I_LOCK)
149 __wait_on_inode(inode);
153 static inline void write_inode(struct inode *inode)
155 if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode)
156 inode->i_sb->s_op->write_inode(inode);
159 static inline void sync_one(struct inode *inode)
161 if (inode->i_state & I_LOCK) {
162 spin_unlock(&inode_lock);
163 __wait_on_inode(inode);
164 spin_lock(&inode_lock);
165 } else {
166 list_del(&inode->i_list);
167 list_add(&inode->i_list,
168 inode->i_count ? &inode_in_use : &inode_unused);
169 /* Set I_LOCK, reset I_DIRTY */
170 inode->i_state ^= I_DIRTY | I_LOCK;
171 spin_unlock(&inode_lock);
173 write_inode(inode);
175 spin_lock(&inode_lock);
176 inode->i_state &= ~I_LOCK;
177 wake_up(&inode->i_wait);
181 static inline void sync_list(struct list_head *head)
183 struct list_head * tmp;
185 while ((tmp = head->prev) != head)
186 sync_one(list_entry(tmp, struct inode, i_list));
190 * "sync_inodes()" goes through the super block's dirty list,
191 * writes them out, and puts them back on the normal list.
193 void sync_inodes(kdev_t dev)
195 struct super_block * sb = sb_entry(super_blocks.next);
198 * Search the super_blocks array for the device(s) to sync.
200 spin_lock(&inode_lock);
201 for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
202 if (!sb->s_dev)
203 continue;
204 if (dev && sb->s_dev != dev)
205 continue;
207 sync_list(&sb->s_dirty);
209 if (dev)
210 break;
212 spin_unlock(&inode_lock);
216 * Called with the spinlock already held..
218 static void sync_all_inodes(void)
220 struct super_block * sb = sb_entry(super_blocks.next);
221 for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
222 if (!sb->s_dev)
223 continue;
224 sync_list(&sb->s_dirty);
229 * Needed by knfsd
231 void write_inode_now(struct inode *inode)
233 struct super_block * sb = inode->i_sb;
235 if (sb) {
236 spin_lock(&inode_lock);
237 while (inode->i_state & I_DIRTY)
238 sync_one(inode);
239 spin_unlock(&inode_lock);
241 else
242 printk("write_inode_now: no super block\n");
246 * This is called by the filesystem to tell us
247 * that the inode is no longer useful. We just
248 * terminate it with extreme prejudice.
250 void clear_inode(struct inode *inode)
252 if (inode->i_data.nrpages)
253 BUG();
254 if (!(inode->i_state & I_FREEING))
255 BUG();
256 wait_on_inode(inode);
257 if (IS_QUOTAINIT(inode))
258 DQUOT_DROP(inode);
259 if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->clear_inode)
260 inode->i_sb->s_op->clear_inode(inode);
261 if (inode->i_bdev) {
262 bdput(inode->i_bdev);
263 inode->i_bdev = NULL;
265 inode->i_state = 0;
269 * Dispose-list gets a local list with local inodes in it, so it doesn't
270 * need to worry about list corruption and SMP locks.
272 static void dispose_list(struct list_head * head)
274 struct list_head * inode_entry;
275 struct inode * inode;
277 while ((inode_entry = head->next) != head)
279 list_del(inode_entry);
281 inode = list_entry(inode_entry, struct inode, i_list);
282 if (inode->i_data.nrpages)
283 truncate_inode_pages(inode, 0);
284 clear_inode(inode);
285 destroy_inode(inode);
290 * Invalidate all inodes for a device.
292 static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
294 struct list_head *next;
295 int busy = 0, count = 0;
297 next = head->next;
298 for (;;) {
299 struct list_head * tmp = next;
300 struct inode * inode;
302 next = next->next;
303 if (tmp == head)
304 break;
305 inode = list_entry(tmp, struct inode, i_list);
306 if (inode->i_sb != sb)
307 continue;
308 if (!inode->i_count) {
309 list_del(&inode->i_hash);
310 INIT_LIST_HEAD(&inode->i_hash);
311 list_del(&inode->i_list);
312 list_add(&inode->i_list, dispose);
313 inode->i_state |= I_FREEING;
314 count++;
315 continue;
317 busy = 1;
319 /* only unused inodes may be cached with i_count zero */
320 inodes_stat.nr_unused -= count;
321 return busy;
325 * This is a two-stage process. First we collect all
326 * offending inodes onto the throw-away list, and in
327 * the second stage we actually dispose of them. This
328 * is because we don't want to sleep while messing
329 * with the global lists..
331 int invalidate_inodes(struct super_block * sb)
333 int busy;
334 LIST_HEAD(throw_away);
336 spin_lock(&inode_lock);
337 busy = invalidate_list(&inode_in_use, sb, &throw_away);
338 busy |= invalidate_list(&inode_unused, sb, &throw_away);
339 busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
340 spin_unlock(&inode_lock);
342 dispose_list(&throw_away);
344 return busy;
348 * This is called with the inode lock held. It searches
349 * the in-use for freeable inodes, which are moved to a
350 * temporary list and then placed on the unused list by
351 * dispose_list.
353 * We don't expect to have to call this very often.
355 * N.B. The spinlock is released during the call to
356 * dispose_list.
358 #define CAN_UNUSE(inode) \
359 (((inode)->i_state | (inode)->i_data.nrpages) == 0)
360 #define INODE(entry) (list_entry(entry, struct inode, i_list))
362 void prune_icache(int goal)
364 LIST_HEAD(list);
365 struct list_head *entry, *freeable = &list;
366 int count = 0;
367 struct inode * inode;
369 spin_lock(&inode_lock);
370 /* go simple and safe syncing everything before starting */
371 sync_all_inodes();
373 entry = inode_unused.prev;
374 while (entry != &inode_unused)
376 struct list_head *tmp = entry;
378 entry = entry->prev;
379 inode = INODE(tmp);
380 if (!CAN_UNUSE(inode))
381 continue;
382 if (inode->i_count)
383 BUG();
384 list_del(tmp);
385 list_del(&inode->i_hash);
386 INIT_LIST_HEAD(&inode->i_hash);
387 list_add(tmp, freeable);
388 inode->i_state |= I_FREEING;
389 count++;
390 if (!--goal)
391 break;
393 inodes_stat.nr_unused -= count;
394 spin_unlock(&inode_lock);
396 dispose_list(freeable);
399 int shrink_icache_memory(int priority, int gfp_mask, zone_t *zone)
401 if (gfp_mask & __GFP_IO)
403 int count = 0;
405 if (priority)
406 count = inodes_stat.nr_unused / priority;
407 prune_icache(count);
408 /* FIXME: kmem_cache_shrink here should tell us
409 the number of pages freed, and it should
410 work in a __GFP_DMA/__GFP_HIGHMEM behaviour
411 to free only the interesting pages in
412 function of the needs of the current allocation. */
413 kmem_cache_shrink(inode_cachep);
416 return 0;
419 static inline void __iget(struct inode * inode)
421 if (!inode->i_count++)
423 if (!(inode->i_state & I_DIRTY))
425 list_del(&inode->i_list);
426 list_add(&inode->i_list, &inode_in_use);
428 inodes_stat.nr_unused--;
433 * Called with the inode lock held.
434 * NOTE: we are not increasing the inode-refcount, you must call __iget()
435 * by hand after calling find_inode now! This simplify iunique and won't
436 * add any additional branch in the common code.
438 static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque)
440 struct list_head *tmp;
441 struct inode * inode;
443 tmp = head;
444 for (;;) {
445 tmp = tmp->next;
446 inode = NULL;
447 if (tmp == head)
448 break;
449 inode = list_entry(tmp, struct inode, i_hash);
450 if (inode->i_sb != sb)
451 continue;
452 if (inode->i_ino != ino)
453 continue;
454 if (find_actor && !find_actor(inode, ino, opaque))
455 continue;
456 break;
458 return inode;
462 * This just initializes the inode fields
463 * to known values before returning the inode..
465 * i_sb, i_ino, i_count, i_state and the lists have
466 * been initialized elsewhere..
468 static void clean_inode(struct inode *inode)
470 memset(&inode->u, 0, sizeof(inode->u));
471 inode->i_sock = 0;
472 inode->i_op = NULL;
473 inode->i_nlink = 1;
474 atomic_set(&inode->i_writecount, 0);
475 inode->i_size = 0;
476 inode->i_generation = 0;
477 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
478 inode->i_pipe = NULL;
479 inode->i_bdev = NULL;
483 * This is called by things like the networking layer
484 * etc that want to get an inode without any inode
485 * number, or filesystems that allocate new inodes with
486 * no pre-existing information.
488 struct inode * get_empty_inode(void)
490 static unsigned long last_ino = 0;
491 struct inode * inode;
493 inode = alloc_inode();
494 if (inode)
496 spin_lock(&inode_lock);
497 list_add(&inode->i_list, &inode_in_use);
498 inode->i_sb = NULL;
499 inode->i_dev = 0;
500 inode->i_ino = ++last_ino;
501 inode->i_flags = 0;
502 inode->i_count = 1;
503 inode->i_state = 0;
504 spin_unlock(&inode_lock);
505 clean_inode(inode);
507 return inode;
511 * This is called without the inode lock held.. Be careful.
513 * We no longer cache the sb_flags in i_flags - see fs.h
514 * -- rmk@arm.uk.linux.org
516 static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque)
518 struct inode * inode;
520 inode = alloc_inode();
521 if (inode) {
522 struct inode * old;
524 spin_lock(&inode_lock);
525 /* We released the lock, so.. */
526 old = find_inode(sb, ino, head, find_actor, opaque);
527 if (!old) {
528 list_add(&inode->i_list, &inode_in_use);
529 list_add(&inode->i_hash, head);
530 inode->i_sb = sb;
531 inode->i_dev = sb->s_dev;
532 inode->i_ino = ino;
533 inode->i_flags = 0;
534 inode->i_count = 1;
535 inode->i_state = I_LOCK;
536 spin_unlock(&inode_lock);
538 clean_inode(inode);
539 sb->s_op->read_inode(inode);
542 * This is special! We do not need the spinlock
543 * when clearing I_LOCK, because we're guaranteed
544 * that nobody else tries to do anything about the
545 * state of the inode when it is locked, as we
546 * just created it (so there can be no old holders
547 * that haven't tested I_LOCK).
549 inode->i_state &= ~I_LOCK;
550 wake_up(&inode->i_wait);
552 return inode;
556 * Uhhuh, somebody else created the same inode under
557 * us. Use the old inode instead of the one we just
558 * allocated.
560 __iget(old);
561 spin_unlock(&inode_lock);
562 destroy_inode(inode);
563 inode = old;
564 wait_on_inode(inode);
566 return inode;
569 static inline unsigned long hash(struct super_block *sb, unsigned long i_ino)
571 unsigned long tmp = i_ino | (unsigned long) sb;
572 tmp = tmp + (tmp >> HASH_BITS) + (tmp >> HASH_BITS*2);
573 return tmp & HASH_MASK;
576 /* Yeah, I know about quadratic hash. Maybe, later. */
577 ino_t iunique(struct super_block *sb, ino_t max_reserved)
579 static ino_t counter = 0;
580 struct inode *inode;
581 struct list_head * head;
582 ino_t res;
583 spin_lock(&inode_lock);
584 retry:
585 if (counter > max_reserved) {
586 head = inode_hashtable + hash(sb,counter);
587 inode = find_inode(sb, res = counter++, head, NULL, NULL);
588 if (!inode) {
589 spin_unlock(&inode_lock);
590 return res;
592 } else {
593 counter = max_reserved + 1;
595 goto retry;
599 struct inode *igrab(struct inode *inode)
601 spin_lock(&inode_lock);
602 if (!(inode->i_state & I_FREEING))
603 __iget(inode);
604 else
605 inode = NULL;
606 spin_unlock(&inode_lock);
607 if (inode)
608 wait_on_inode(inode);
609 return inode;
612 struct inode *iget4(struct super_block *sb, unsigned long ino, find_inode_t find_actor, void *opaque)
614 struct list_head * head = inode_hashtable + hash(sb,ino);
615 struct inode * inode;
617 spin_lock(&inode_lock);
618 inode = find_inode(sb, ino, head, find_actor, opaque);
619 if (inode) {
620 __iget(inode);
621 spin_unlock(&inode_lock);
622 wait_on_inode(inode);
623 return inode;
625 spin_unlock(&inode_lock);
628 * get_new_inode() will do the right thing, re-trying the search
629 * in case it had to block at any point.
631 return get_new_inode(sb, ino, head, find_actor, opaque);
634 void insert_inode_hash(struct inode *inode)
636 struct list_head *head = &anon_hash_chain;
637 if (inode->i_sb)
638 head = inode_hashtable + hash(inode->i_sb, inode->i_ino);
639 spin_lock(&inode_lock);
640 list_add(&inode->i_hash, head);
641 spin_unlock(&inode_lock);
644 void remove_inode_hash(struct inode *inode)
646 spin_lock(&inode_lock);
647 list_del(&inode->i_hash);
648 INIT_LIST_HEAD(&inode->i_hash);
649 spin_unlock(&inode_lock);
652 void iput(struct inode *inode)
654 if (inode) {
655 struct super_operations *op = NULL;
656 int destroy = 0;
658 if (inode->i_sb && inode->i_sb->s_op)
659 op = inode->i_sb->s_op;
660 if (op && op->put_inode)
661 op->put_inode(inode);
663 spin_lock(&inode_lock);
664 if (!--inode->i_count) {
665 if (!inode->i_nlink) {
666 list_del(&inode->i_hash);
667 INIT_LIST_HEAD(&inode->i_hash);
668 list_del(&inode->i_list);
669 INIT_LIST_HEAD(&inode->i_list);
670 inode->i_state|=I_FREEING;
671 if (op && op->delete_inode) {
672 void (*delete)(struct inode *) = op->delete_inode;
673 spin_unlock(&inode_lock);
674 if (inode->i_data.nrpages)
675 truncate_inode_pages(inode, 0);
676 delete(inode);
677 spin_lock(&inode_lock);
680 if (list_empty(&inode->i_hash)) {
681 list_del(&inode->i_list);
682 INIT_LIST_HEAD(&inode->i_list);
683 inode->i_state|=I_FREEING;
684 spin_unlock(&inode_lock);
685 clear_inode(inode);
686 destroy = 1;
687 spin_lock(&inode_lock);
689 else
691 if (!(inode->i_state & I_DIRTY)) {
692 list_del(&inode->i_list);
693 list_add(&inode->i_list,
694 &inode_unused);
696 inodes_stat.nr_unused++;
698 #ifdef INODE_PARANOIA
699 if (inode->i_flock)
700 printk(KERN_ERR "iput: inode %s/%ld still has locks!\n",
701 kdevname(inode->i_dev), inode->i_ino);
702 if (!list_empty(&inode->i_dentry))
703 printk(KERN_ERR "iput: device %s inode %ld still has aliases!\n",
704 kdevname(inode->i_dev), inode->i_ino);
705 if (inode->i_count)
706 printk(KERN_ERR "iput: device %s inode %ld count changed, count=%d\n",
707 kdevname(inode->i_dev), inode->i_ino, inode->i_count);
708 if (atomic_read(&inode->i_sem.count) != 1)
709 printk(KERN_ERR "iput: Aieee, semaphore in use inode %s/%ld, count=%d\n",
710 kdevname(inode->i_dev), inode->i_ino, atomic_read(&inode->i_sem.count));
711 #endif
713 if (inode->i_count > (1<<31)) {
714 printk(KERN_ERR "iput: inode %s/%ld count wrapped\n",
715 kdevname(inode->i_dev), inode->i_ino);
717 spin_unlock(&inode_lock);
718 if (destroy)
719 destroy_inode(inode);
723 int bmap(struct inode * inode, int block)
725 struct buffer_head tmp;
727 if (inode->i_op && inode->i_op->get_block) {
728 tmp.b_state = 0;
729 tmp.b_blocknr = 0;
730 inode->i_op->get_block(inode, block, &tmp, 0);
731 return tmp.b_blocknr;
733 return 0;
737 * Initialize the hash tables.
739 void __init inode_init(void)
741 int i;
742 struct list_head *head = inode_hashtable;
744 i = HASH_SIZE;
745 do {
746 INIT_LIST_HEAD(head);
747 head++;
748 i--;
749 } while (i);
751 /* inode slab cache */
752 inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode),
753 0, SLAB_HWCACHE_ALIGN, init_once,
754 NULL);
755 if (!inode_cachep)
756 panic("cannot create inode slab cache");
759 void update_atime (struct inode *inode)
761 if ( IS_NOATIME (inode) ) return;
762 if ( IS_NODIRATIME (inode) && S_ISDIR (inode->i_mode) ) return;
763 if ( IS_RDONLY (inode) ) return;
764 inode->i_atime = CURRENT_TIME;
765 mark_inode_dirty (inode);
766 } /* End Function update_atime */
770 * Quota functions that want to walk the inode lists..
772 #ifdef CONFIG_QUOTA
774 /* Functions back in dquot.c */
775 void put_dquot_list(struct list_head *);
776 int remove_inode_dquot_ref(struct inode *, short, struct list_head *);
778 void remove_dquot_ref(kdev_t dev, short type)
780 struct super_block *sb = get_super(dev);
781 struct inode *inode;
782 struct list_head *act_head;
783 LIST_HEAD(tofree_head);
785 if (!sb || !sb->dq_op)
786 return; /* nothing to do */
788 /* We have to be protected against other CPUs */
789 spin_lock(&inode_lock);
791 for (act_head = inode_in_use.next; act_head != &inode_in_use; act_head = act_head->next) {
792 inode = list_entry(act_head, struct inode, i_list);
793 if (inode->i_sb != sb || !IS_QUOTAINIT(inode))
794 continue;
795 remove_inode_dquot_ref(inode, type, &tofree_head);
797 for (act_head = inode_unused.next; act_head != &inode_unused; act_head = act_head->next) {
798 inode = list_entry(act_head, struct inode, i_list);
799 if (inode->i_sb != sb || !IS_QUOTAINIT(inode))
800 continue;
801 remove_inode_dquot_ref(inode, type, &tofree_head);
803 for (act_head = sb->s_dirty.next; act_head != &sb->s_dirty; act_head = act_head->next) {
804 inode = list_entry(act_head, struct inode, i_list);
805 if (!IS_QUOTAINIT(inode))
806 continue;
807 remove_inode_dquot_ref(inode, type, &tofree_head);
809 spin_unlock(&inode_lock);
811 put_dquot_list(&tofree_head);
814 #endif