Import 2.3.26pre2
[davej-history.git] / fs / inode.c
blob811bf575de66e81a5484f46e2fe9f09eb9ef54bc
1 /*
2 * linux/fs/inode.c
4 * (C) 1997 Linus Torvalds
5 */
7 #include <linux/fs.h>
8 #include <linux/string.h>
9 #include <linux/mm.h>
10 #include <linux/dcache.h>
11 #include <linux/init.h>
12 #include <linux/quotaops.h>
13 #include <linux/slab.h>
16 * New inode.c implementation.
18 * This implementation has the basic premise of trying
19 * to be extremely low-overhead and SMP-safe, yet be
20 * simple enough to be "obviously correct".
22 * Famous last words.
25 /* inode dynamic allocation 1999, Andrea Arcangeli <andrea@suse.de> */
27 #define INODE_PARANOIA 1
28 /* #define INODE_DEBUG 1 */
31 * Inode lookup is no longer as critical as it used to be:
32 * most of the lookups are going to be through the dcache.
34 #define HASH_BITS 14
35 #define HASH_SIZE (1UL << HASH_BITS)
36 #define HASH_MASK (HASH_SIZE-1)
39 * Each inode can be on two separate lists. One is
40 * the hash list of the inode, used for lookups. The
41 * other linked list is the "type" list:
42 * "in_use" - valid inode, i_count > 0, i_nlink > 0
43 * "dirty" - as "in_use" but also dirty
44 * "unused" - valid inode, i_count = 0
46 * A "dirty" list is maintained for each super block,
47 * allowing for low-overhead inode sync() operations.
50 static LIST_HEAD(inode_in_use);
51 static LIST_HEAD(inode_unused);
52 static struct list_head inode_hashtable[HASH_SIZE];
55 * A simple spinlock to protect the list manipulations.
57 * NOTE! You also have to own the lock if you change
58 * the i_state of an inode while it is in use..
60 spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
63 * Statistics gathering..
65 struct {
66 int nr_inodes;
67 int nr_unused;
68 int dummy[5];
69 } inodes_stat = {0, 0,};
71 static kmem_cache_t * inode_cachep;
73 #define alloc_inode() \
74 ((struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL))
75 #define destroy_inode(inode) kmem_cache_free(inode_cachep, (inode))
78 * These are initializations that only need to be done
79 * once, because the fields are idempotent across use
80 * of the inode, so let the slab aware of that.
82 static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
84 struct inode * inode = (struct inode *) foo;
86 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
87 SLAB_CTOR_CONSTRUCTOR)
89 memset(inode, 0, sizeof(*inode));
90 init_waitqueue_head(&inode->i_wait);
91 INIT_LIST_HEAD(&inode->i_hash);
92 INIT_LIST_HEAD(&inode->i_data.pages);
93 INIT_LIST_HEAD(&inode->i_dentry);
94 sema_init(&inode->i_sem, 1);
95 spin_lock_init(&inode->i_shared_lock);
100 * Put the inode on the super block's dirty list.
102 * CAREFUL! We mark it dirty unconditionally, but
103 * move it onto the dirty list only if it is hashed.
104 * If it was not hashed, it will never be added to
105 * the dirty list even if it is later hashed, as it
106 * will have been marked dirty already.
108 * In short, make sure you hash any inodes _before_
109 * you start marking them dirty..
111 void __mark_inode_dirty(struct inode *inode)
113 struct super_block * sb = inode->i_sb;
115 if (sb) {
116 spin_lock(&inode_lock);
117 if (!(inode->i_state & I_DIRTY)) {
118 inode->i_state |= I_DIRTY;
119 /* Only add valid (ie hashed) inodes to the dirty list */
120 if (!list_empty(&inode->i_hash)) {
121 list_del(&inode->i_list);
122 list_add(&inode->i_list, &sb->s_dirty);
125 spin_unlock(&inode_lock);
129 static void __wait_on_inode(struct inode * inode)
131 DECLARE_WAITQUEUE(wait, current);
133 add_wait_queue(&inode->i_wait, &wait);
134 repeat:
135 set_current_state(TASK_UNINTERRUPTIBLE);
136 if (inode->i_state & I_LOCK) {
137 schedule();
138 goto repeat;
140 remove_wait_queue(&inode->i_wait, &wait);
141 current->state = TASK_RUNNING;
144 static inline void wait_on_inode(struct inode *inode)
146 if (inode->i_state & I_LOCK)
147 __wait_on_inode(inode);
151 static inline void write_inode(struct inode *inode)
153 if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode)
154 inode->i_sb->s_op->write_inode(inode);
157 static inline void sync_one(struct inode *inode)
159 if (inode->i_state & I_LOCK) {
160 spin_unlock(&inode_lock);
161 __wait_on_inode(inode);
162 spin_lock(&inode_lock);
163 } else {
164 list_del(&inode->i_list);
165 list_add(&inode->i_list,
166 inode->i_count ? &inode_in_use : &inode_unused);
167 /* Set I_LOCK, reset I_DIRTY */
168 inode->i_state ^= I_DIRTY | I_LOCK;
169 spin_unlock(&inode_lock);
171 write_inode(inode);
173 spin_lock(&inode_lock);
174 inode->i_state &= ~I_LOCK;
175 wake_up(&inode->i_wait);
179 static inline void sync_list(struct list_head *head)
181 struct list_head * tmp;
183 while ((tmp = head->prev) != head)
184 sync_one(list_entry(tmp, struct inode, i_list));
188 * "sync_inodes()" goes through the super block's dirty list,
189 * writes them out, and puts them back on the normal list.
191 void sync_inodes(kdev_t dev)
193 struct super_block * sb = sb_entry(super_blocks.next);
196 * Search the super_blocks array for the device(s) to sync.
198 spin_lock(&inode_lock);
199 for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
200 if (!sb->s_dev)
201 continue;
202 if (dev && sb->s_dev != dev)
203 continue;
205 sync_list(&sb->s_dirty);
207 if (dev)
208 break;
210 spin_unlock(&inode_lock);
214 * Called with the spinlock already held..
216 static void sync_all_inodes(void)
218 struct super_block * sb = sb_entry(super_blocks.next);
219 for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
220 if (!sb->s_dev)
221 continue;
222 sync_list(&sb->s_dirty);
227 * Needed by knfsd
229 void write_inode_now(struct inode *inode)
231 struct super_block * sb = inode->i_sb;
233 if (sb) {
234 spin_lock(&inode_lock);
235 while (inode->i_state & I_DIRTY)
236 sync_one(inode);
237 spin_unlock(&inode_lock);
239 else
240 printk("write_inode_now: no super block\n");
244 * This is called by the filesystem to tell us
245 * that the inode is no longer useful. We just
246 * terminate it with extreme prejudice.
248 void clear_inode(struct inode *inode)
250 if (inode->i_data.nrpages)
251 BUG();
252 if (!(inode->i_state & I_FREEING))
253 BUG();
254 wait_on_inode(inode);
255 if (IS_QUOTAINIT(inode))
256 DQUOT_DROP(inode);
257 if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->clear_inode)
258 inode->i_sb->s_op->clear_inode(inode);
260 inode->i_state = 0;
264 * Dispose-list gets a local list with local inodes in it, so it doesn't
265 * need to worry about list corruption and SMP locks.
267 static void dispose_list(struct list_head * head)
269 struct list_head * inode_entry;
270 struct inode * inode;
272 while ((inode_entry = head->next) != head)
274 list_del(inode_entry);
276 inode = list_entry(inode_entry, struct inode, i_list);
277 if (inode->i_data.nrpages)
278 truncate_inode_pages(inode, 0);
279 clear_inode(inode);
280 destroy_inode(inode);
285 * Invalidate all inodes for a device.
287 static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
289 struct list_head *next;
290 int busy = 0, count = 0;
292 next = head->next;
293 for (;;) {
294 struct list_head * tmp = next;
295 struct inode * inode;
297 next = next->next;
298 if (tmp == head)
299 break;
300 inode = list_entry(tmp, struct inode, i_list);
301 if (inode->i_sb != sb)
302 continue;
303 if (!inode->i_count) {
304 list_del(&inode->i_hash);
305 INIT_LIST_HEAD(&inode->i_hash);
306 list_del(&inode->i_list);
307 list_add(&inode->i_list, dispose);
308 inode->i_state |= I_FREEING;
309 count++;
310 continue;
312 busy = 1;
314 /* only unused inodes may be cached with i_count zero */
315 inodes_stat.nr_unused -= count;
316 return busy;
320 * This is a two-stage process. First we collect all
321 * offending inodes onto the throw-away list, and in
322 * the second stage we actually dispose of them. This
323 * is because we don't want to sleep while messing
324 * with the global lists..
326 int invalidate_inodes(struct super_block * sb)
328 int busy;
329 LIST_HEAD(throw_away);
331 spin_lock(&inode_lock);
332 busy = invalidate_list(&inode_in_use, sb, &throw_away);
333 busy |= invalidate_list(&inode_unused, sb, &throw_away);
334 busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
335 spin_unlock(&inode_lock);
337 dispose_list(&throw_away);
339 return busy;
343 * This is called with the inode lock held. It searches
344 * the in-use for freeable inodes, which are moved to a
345 * temporary list and then placed on the unused list by
346 * dispose_list.
348 * We don't expect to have to call this very often.
350 * N.B. The spinlock is released during the call to
351 * dispose_list.
353 #define CAN_UNUSE(inode) \
354 (((inode)->i_state | (inode)->i_data.nrpages) == 0)
355 #define INODE(entry) (list_entry(entry, struct inode, i_list))
357 void prune_icache(int goal)
359 LIST_HEAD(list);
360 struct list_head *entry, *freeable = &list;
361 int count = 0;
362 struct inode * inode;
364 spin_lock(&inode_lock);
365 /* go simple and safe syncing everything before starting */
366 sync_all_inodes();
368 entry = inode_unused.prev;
369 while (entry != &inode_unused)
371 struct list_head *tmp = entry;
373 entry = entry->prev;
374 inode = INODE(tmp);
375 if (!CAN_UNUSE(inode))
376 continue;
377 if (inode->i_count)
378 BUG();
379 list_del(tmp);
380 list_del(&inode->i_hash);
381 INIT_LIST_HEAD(&inode->i_hash);
382 list_add(tmp, freeable);
383 inode->i_state |= I_FREEING;
384 count++;
385 if (!--goal)
386 break;
388 inodes_stat.nr_unused -= count;
389 spin_unlock(&inode_lock);
391 dispose_list(freeable);
394 int shrink_icache_memory(int priority, int gfp_mask)
396 if (gfp_mask & __GFP_IO)
398 int count = 0;
400 if (priority)
401 count = inodes_stat.nr_unused / priority;
402 prune_icache(count);
403 /* FIXME: kmem_cache_shrink here should tell us
404 the number of pages freed, and it should
405 work in a __GFP_DMA/__GFP_HIGHMEM behaviour
406 to free only the interesting pages in
407 function of the needs of the current allocation. */
408 kmem_cache_shrink(inode_cachep);
411 return 0;
414 static inline void __iget(struct inode * inode)
416 if (!inode->i_count++)
418 if (!(inode->i_state & I_DIRTY))
420 list_del(&inode->i_list);
421 list_add(&inode->i_list, &inode_in_use);
423 inodes_stat.nr_unused--;
428 * Called with the inode lock held.
429 * NOTE: we are not increasing the inode-refcount, you must call __iget()
430 * by hand after calling find_inode now! This simplify iunique and won't
431 * add any additional branch in the common code.
433 static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque)
435 struct list_head *tmp;
436 struct inode * inode;
438 tmp = head;
439 for (;;) {
440 tmp = tmp->next;
441 inode = NULL;
442 if (tmp == head)
443 break;
444 inode = list_entry(tmp, struct inode, i_hash);
445 if (inode->i_sb != sb)
446 continue;
447 if (inode->i_ino != ino)
448 continue;
449 if (find_actor && !find_actor(inode, ino, opaque))
450 continue;
451 break;
453 return inode;
457 * This just initializes the inode fields
458 * to known values before returning the inode..
460 * i_sb, i_ino, i_count, i_state and the lists have
461 * been initialized elsewhere..
463 static void clean_inode(struct inode *inode)
465 memset(&inode->u, 0, sizeof(inode->u));
466 inode->i_sock = 0;
467 inode->i_op = NULL;
468 inode->i_nlink = 1;
469 atomic_set(&inode->i_writecount, 0);
470 inode->i_size = 0;
471 inode->i_generation = 0;
472 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
473 inode->i_pipe = NULL;
477 * This is called by things like the networking layer
478 * etc that want to get an inode without any inode
479 * number, or filesystems that allocate new inodes with
480 * no pre-existing information.
482 struct inode * get_empty_inode(void)
484 static unsigned long last_ino = 0;
485 struct inode * inode;
487 inode = alloc_inode();
488 if (inode)
490 spin_lock(&inode_lock);
491 list_add(&inode->i_list, &inode_in_use);
492 inode->i_sb = NULL;
493 inode->i_dev = 0;
494 inode->i_ino = ++last_ino;
495 inode->i_flags = 0;
496 inode->i_count = 1;
497 inode->i_state = 0;
498 spin_unlock(&inode_lock);
499 clean_inode(inode);
501 return inode;
505 * This is called without the inode lock held.. Be careful.
507 * We no longer cache the sb_flags in i_flags - see fs.h
508 * -- rmk@arm.uk.linux.org
510 static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque)
512 struct inode * inode;
514 inode = alloc_inode();
515 if (inode) {
516 struct inode * old;
518 spin_lock(&inode_lock);
519 /* We released the lock, so.. */
520 old = find_inode(sb, ino, head, find_actor, opaque);
521 if (!old) {
522 list_add(&inode->i_list, &inode_in_use);
523 list_add(&inode->i_hash, head);
524 inode->i_sb = sb;
525 inode->i_dev = sb->s_dev;
526 inode->i_ino = ino;
527 inode->i_flags = 0;
528 inode->i_count = 1;
529 inode->i_state = I_LOCK;
530 spin_unlock(&inode_lock);
532 clean_inode(inode);
533 sb->s_op->read_inode(inode);
536 * This is special! We do not need the spinlock
537 * when clearing I_LOCK, because we're guaranteed
538 * that nobody else tries to do anything about the
539 * state of the inode when it is locked, as we
540 * just created it (so there can be no old holders
541 * that haven't tested I_LOCK).
543 inode->i_state &= ~I_LOCK;
544 wake_up(&inode->i_wait);
546 return inode;
550 * Uhhuh, somebody else created the same inode under
551 * us. Use the old inode instead of the one we just
552 * allocated.
554 __iget(old);
555 spin_unlock(&inode_lock);
556 destroy_inode(inode);
557 inode = old;
558 wait_on_inode(inode);
560 return inode;
563 static inline unsigned long hash(struct super_block *sb, unsigned long i_ino)
565 unsigned long tmp = i_ino | (unsigned long) sb;
566 tmp = tmp + (tmp >> HASH_BITS) + (tmp >> HASH_BITS*2);
567 return tmp & HASH_MASK;
570 /* Yeah, I know about quadratic hash. Maybe, later. */
571 ino_t iunique(struct super_block *sb, ino_t max_reserved)
573 static ino_t counter = 0;
574 struct inode *inode;
575 struct list_head * head;
576 ino_t res;
577 spin_lock(&inode_lock);
578 retry:
579 if (counter > max_reserved) {
580 head = inode_hashtable + hash(sb,counter);
581 inode = find_inode(sb, res = counter++, head, NULL, NULL);
582 if (!inode) {
583 spin_unlock(&inode_lock);
584 return res;
586 } else {
587 counter = max_reserved + 1;
589 goto retry;
593 struct inode *igrab(struct inode *inode)
595 spin_lock(&inode_lock);
596 if (!(inode->i_state & I_FREEING))
597 __iget(inode);
598 else
599 inode = NULL;
600 spin_unlock(&inode_lock);
601 if (inode)
602 wait_on_inode(inode);
603 return inode;
606 struct inode *iget4(struct super_block *sb, unsigned long ino, find_inode_t find_actor, void *opaque)
608 struct list_head * head = inode_hashtable + hash(sb,ino);
609 struct inode * inode;
611 spin_lock(&inode_lock);
612 inode = find_inode(sb, ino, head, find_actor, opaque);
613 if (inode) {
614 __iget(inode);
615 spin_unlock(&inode_lock);
616 wait_on_inode(inode);
617 return inode;
619 spin_unlock(&inode_lock);
622 * get_new_inode() will do the right thing, re-trying the search
623 * in case it had to block at any point.
625 return get_new_inode(sb, ino, head, find_actor, opaque);
628 void insert_inode_hash(struct inode *inode)
630 struct list_head *head = inode_hashtable + hash(inode->i_sb, inode->i_ino);
631 spin_lock(&inode_lock);
632 list_add(&inode->i_hash, head);
633 spin_unlock(&inode_lock);
636 void remove_inode_hash(struct inode *inode)
638 spin_lock(&inode_lock);
639 list_del(&inode->i_hash);
640 INIT_LIST_HEAD(&inode->i_hash);
641 spin_unlock(&inode_lock);
644 void iput(struct inode *inode)
646 if (inode) {
647 struct super_operations *op = NULL;
648 int destroy = 0;
650 if (inode->i_sb && inode->i_sb->s_op)
651 op = inode->i_sb->s_op;
652 if (op && op->put_inode)
653 op->put_inode(inode);
655 spin_lock(&inode_lock);
656 if (!--inode->i_count) {
657 if (!inode->i_nlink) {
658 list_del(&inode->i_hash);
659 INIT_LIST_HEAD(&inode->i_hash);
660 list_del(&inode->i_list);
661 INIT_LIST_HEAD(&inode->i_list);
662 inode->i_state|=I_FREEING;
663 if (op && op->delete_inode) {
664 void (*delete)(struct inode *) = op->delete_inode;
665 spin_unlock(&inode_lock);
666 if (inode->i_data.nrpages)
667 truncate_inode_pages(inode, 0);
668 delete(inode);
669 spin_lock(&inode_lock);
672 if (list_empty(&inode->i_hash)) {
673 list_del(&inode->i_list);
674 INIT_LIST_HEAD(&inode->i_list);
675 inode->i_state|=I_FREEING;
676 spin_unlock(&inode_lock);
677 clear_inode(inode);
678 destroy = 1;
679 spin_lock(&inode_lock);
681 else
683 if (!(inode->i_state & I_DIRTY)) {
684 list_del(&inode->i_list);
685 list_add(&inode->i_list,
686 &inode_unused);
688 inodes_stat.nr_unused++;
690 #ifdef INODE_PARANOIA
691 if (inode->i_flock)
692 printk(KERN_ERR "iput: inode %s/%ld still has locks!\n",
693 kdevname(inode->i_dev), inode->i_ino);
694 if (!list_empty(&inode->i_dentry))
695 printk(KERN_ERR "iput: device %s inode %ld still has aliases!\n",
696 kdevname(inode->i_dev), inode->i_ino);
697 if (inode->i_count)
698 printk(KERN_ERR "iput: device %s inode %ld count changed, count=%d\n",
699 kdevname(inode->i_dev), inode->i_ino, inode->i_count);
700 if (atomic_read(&inode->i_sem.count) != 1)
701 printk(KERN_ERR "iput: Aieee, semaphore in use inode %s/%ld, count=%d\n",
702 kdevname(inode->i_dev), inode->i_ino, atomic_read(&inode->i_sem.count));
703 #endif
705 if (inode->i_count > (1<<31)) {
706 printk(KERN_ERR "iput: inode %s/%ld count wrapped\n",
707 kdevname(inode->i_dev), inode->i_ino);
709 spin_unlock(&inode_lock);
710 if (destroy)
711 destroy_inode(inode);
715 int bmap(struct inode * inode, int block)
717 struct buffer_head tmp;
719 if (inode->i_op && inode->i_op->get_block) {
720 tmp.b_state = 0;
721 tmp.b_blocknr = 0;
722 inode->i_op->get_block(inode, block, &tmp, 0);
723 return tmp.b_blocknr;
725 return 0;
729 * Initialize the hash tables.
731 void __init inode_init(void)
733 int i;
734 struct list_head *head = inode_hashtable;
736 i = HASH_SIZE;
737 do {
738 INIT_LIST_HEAD(head);
739 head++;
740 i--;
741 } while (i);
743 /* inode slab cache */
744 inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode),
745 0, SLAB_HWCACHE_ALIGN, init_once,
746 NULL);
747 if (!inode_cachep)
748 panic("cannot create inode slab cache");
751 void update_atime (struct inode *inode)
753 if ( IS_NOATIME (inode) ) return;
754 if ( IS_NODIRATIME (inode) && S_ISDIR (inode->i_mode) ) return;
755 if ( IS_RDONLY (inode) ) return;
756 inode->i_atime = CURRENT_TIME;
757 mark_inode_dirty (inode);
758 } /* End Function update_atime */