Import 2.3.99pre4-2
[davej-history.git] / fs / inode.c
blob73406ae0aa748d5667d3bfde68ae855765e9ecf5
1 /*
2 * linux/fs/inode.c
4 * (C) 1997 Linus Torvalds
5 */
7 #include <linux/config.h>
8 #include <linux/fs.h>
9 #include <linux/string.h>
10 #include <linux/mm.h>
11 #include <linux/dcache.h>
12 #include <linux/init.h>
13 #include <linux/quotaops.h>
14 #include <linux/slab.h>
17 * New inode.c implementation.
19 * This implementation has the basic premise of trying
20 * to be extremely low-overhead and SMP-safe, yet be
21 * simple enough to be "obviously correct".
23 * Famous last words.
26 /* inode dynamic allocation 1999, Andrea Arcangeli <andrea@suse.de> */
28 #define INODE_PARANOIA 1
29 /* #define INODE_DEBUG 1 */
32 * Inode lookup is no longer as critical as it used to be:
33 * most of the lookups are going to be through the dcache.
35 #define HASH_BITS 14
36 #define HASH_SIZE (1UL << HASH_BITS)
37 #define HASH_MASK (HASH_SIZE-1)
40 * Each inode can be on two separate lists. One is
41 * the hash list of the inode, used for lookups. The
42 * other linked list is the "type" list:
43 * "in_use" - valid inode, i_count > 0, i_nlink > 0
44 * "dirty" - as "in_use" but also dirty
45 * "unused" - valid inode, i_count = 0
47 * A "dirty" list is maintained for each super block,
48 * allowing for low-overhead inode sync() operations.
51 static LIST_HEAD(inode_in_use);
52 static LIST_HEAD(inode_unused);
53 static struct list_head inode_hashtable[HASH_SIZE];
54 static LIST_HEAD(anon_hash_chain); /* for inodes with NULL i_sb */
57 * A simple spinlock to protect the list manipulations.
59 * NOTE! You also have to own the lock if you change
60 * the i_state of an inode while it is in use..
62 spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
65 * Statistics gathering..
67 struct {
68 int nr_inodes;
69 int nr_unused;
70 int dummy[5];
71 } inodes_stat = {0, 0,};
73 static kmem_cache_t * inode_cachep;
75 #define alloc_inode() \
76 ((struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL))
77 #define destroy_inode(inode) kmem_cache_free(inode_cachep, (inode))
80 * These are initializations that only need to be done
81 * once, because the fields are idempotent across use
82 * of the inode, so let the slab aware of that.
84 static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
86 struct inode * inode = (struct inode *) foo;
88 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
89 SLAB_CTOR_CONSTRUCTOR)
91 memset(inode, 0, sizeof(*inode));
92 init_waitqueue_head(&inode->i_wait);
93 INIT_LIST_HEAD(&inode->i_hash);
94 INIT_LIST_HEAD(&inode->i_data.pages);
95 INIT_LIST_HEAD(&inode->i_dentry);
96 sema_init(&inode->i_sem, 1);
97 sema_init(&inode->i_zombie, 1);
98 spin_lock_init(&inode->i_data.i_shared_lock);
103 * Put the inode on the super block's dirty list.
105 * CAREFUL! We mark it dirty unconditionally, but
106 * move it onto the dirty list only if it is hashed.
107 * If it was not hashed, it will never be added to
108 * the dirty list even if it is later hashed, as it
109 * will have been marked dirty already.
111 * In short, make sure you hash any inodes _before_
112 * you start marking them dirty..
114 void __mark_inode_dirty(struct inode *inode)
116 struct super_block * sb = inode->i_sb;
118 if (sb) {
119 spin_lock(&inode_lock);
120 if (!(inode->i_state & I_DIRTY)) {
121 inode->i_state |= I_DIRTY;
122 /* Only add valid (ie hashed) inodes to the dirty list */
123 if (!list_empty(&inode->i_hash)) {
124 list_del(&inode->i_list);
125 list_add(&inode->i_list, &sb->s_dirty);
128 spin_unlock(&inode_lock);
132 static void __wait_on_inode(struct inode * inode)
134 DECLARE_WAITQUEUE(wait, current);
136 add_wait_queue(&inode->i_wait, &wait);
137 repeat:
138 set_current_state(TASK_UNINTERRUPTIBLE);
139 if (inode->i_state & I_LOCK) {
140 schedule();
141 goto repeat;
143 remove_wait_queue(&inode->i_wait, &wait);
144 current->state = TASK_RUNNING;
147 static inline void wait_on_inode(struct inode *inode)
149 if (inode->i_state & I_LOCK)
150 __wait_on_inode(inode);
154 static inline void write_inode(struct inode *inode)
156 if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode)
157 inode->i_sb->s_op->write_inode(inode);
160 static inline void __iget(struct inode * inode)
162 if (!inode->i_count++)
164 if (!(inode->i_state & I_DIRTY))
166 list_del(&inode->i_list);
167 list_add(&inode->i_list, &inode_in_use);
169 inodes_stat.nr_unused--;
173 static inline void sync_one(struct inode *inode)
175 if (inode->i_state & I_LOCK) {
176 __iget(inode);
177 spin_unlock(&inode_lock);
178 __wait_on_inode(inode);
179 iput(inode);
180 spin_lock(&inode_lock);
181 } else {
182 list_del(&inode->i_list);
183 list_add(&inode->i_list,
184 inode->i_count ? &inode_in_use : &inode_unused);
185 /* Set I_LOCK, reset I_DIRTY */
186 inode->i_state ^= I_DIRTY | I_LOCK;
187 spin_unlock(&inode_lock);
189 write_inode(inode);
191 spin_lock(&inode_lock);
192 inode->i_state &= ~I_LOCK;
193 wake_up(&inode->i_wait);
197 static inline void sync_list(struct list_head *head)
199 struct list_head * tmp;
201 while ((tmp = head->prev) != head)
202 sync_one(list_entry(tmp, struct inode, i_list));
206 * "sync_inodes()" goes through the super block's dirty list,
207 * writes them out, and puts them back on the normal list.
209 void sync_inodes(kdev_t dev)
211 struct super_block * sb = sb_entry(super_blocks.next);
214 * Search the super_blocks array for the device(s) to sync.
216 spin_lock(&inode_lock);
217 for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
218 if (!sb->s_dev)
219 continue;
220 if (dev && sb->s_dev != dev)
221 continue;
223 sync_list(&sb->s_dirty);
225 if (dev)
226 break;
228 spin_unlock(&inode_lock);
232 * Called with the spinlock already held..
234 static void sync_all_inodes(void)
236 struct super_block * sb = sb_entry(super_blocks.next);
237 for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
238 if (!sb->s_dev)
239 continue;
240 sync_list(&sb->s_dirty);
245 * Needed by knfsd
247 void write_inode_now(struct inode *inode)
249 struct super_block * sb = inode->i_sb;
251 if (sb) {
252 spin_lock(&inode_lock);
253 while (inode->i_state & I_DIRTY)
254 sync_one(inode);
255 spin_unlock(&inode_lock);
257 else
258 printk("write_inode_now: no super block\n");
262 * This is called by the filesystem to tell us
263 * that the inode is no longer useful. We just
264 * terminate it with extreme prejudice.
266 void clear_inode(struct inode *inode)
268 if (inode->i_data.nrpages)
269 BUG();
270 if (!(inode->i_state & I_FREEING))
271 BUG();
272 if (inode->i_state & I_CLEAR)
273 BUG();
274 wait_on_inode(inode);
275 if (IS_QUOTAINIT(inode))
276 DQUOT_DROP(inode);
277 if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->clear_inode)
278 inode->i_sb->s_op->clear_inode(inode);
279 if (inode->i_bdev) {
280 bdput(inode->i_bdev);
281 inode->i_bdev = NULL;
283 inode->i_state = I_CLEAR;
287 * Dispose-list gets a local list with local inodes in it, so it doesn't
288 * need to worry about list corruption and SMP locks.
290 static void dispose_list(struct list_head * head)
292 struct list_head * inode_entry;
293 struct inode * inode;
295 while ((inode_entry = head->next) != head)
297 list_del(inode_entry);
299 inode = list_entry(inode_entry, struct inode, i_list);
300 if (inode->i_data.nrpages)
301 truncate_inode_pages(&inode->i_data, 0);
302 clear_inode(inode);
303 destroy_inode(inode);
308 * Invalidate all inodes for a device.
310 static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
312 struct list_head *next;
313 int busy = 0, count = 0;
315 next = head->next;
316 for (;;) {
317 struct list_head * tmp = next;
318 struct inode * inode;
320 next = next->next;
321 if (tmp == head)
322 break;
323 inode = list_entry(tmp, struct inode, i_list);
324 if (inode->i_sb != sb)
325 continue;
326 if (!inode->i_count) {
327 list_del(&inode->i_hash);
328 INIT_LIST_HEAD(&inode->i_hash);
329 list_del(&inode->i_list);
330 list_add(&inode->i_list, dispose);
331 inode->i_state |= I_FREEING;
332 count++;
333 continue;
335 busy = 1;
337 /* only unused inodes may be cached with i_count zero */
338 inodes_stat.nr_unused -= count;
339 return busy;
343 * This is a two-stage process. First we collect all
344 * offending inodes onto the throw-away list, and in
345 * the second stage we actually dispose of them. This
346 * is because we don't want to sleep while messing
347 * with the global lists..
349 int invalidate_inodes(struct super_block * sb)
351 int busy;
352 LIST_HEAD(throw_away);
354 spin_lock(&inode_lock);
355 busy = invalidate_list(&inode_in_use, sb, &throw_away);
356 busy |= invalidate_list(&inode_unused, sb, &throw_away);
357 busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
358 spin_unlock(&inode_lock);
360 dispose_list(&throw_away);
362 return busy;
366 * This is called with the inode lock held. It searches
367 * the in-use for freeable inodes, which are moved to a
368 * temporary list and then placed on the unused list by
369 * dispose_list.
371 * We don't expect to have to call this very often.
373 * N.B. The spinlock is released during the call to
374 * dispose_list.
376 #define CAN_UNUSE(inode) \
377 (((inode)->i_state | (inode)->i_data.nrpages) == 0)
378 #define INODE(entry) (list_entry(entry, struct inode, i_list))
380 void prune_icache(int goal)
382 LIST_HEAD(list);
383 struct list_head *entry, *freeable = &list;
384 int count = 0;
385 struct inode * inode;
387 spin_lock(&inode_lock);
388 /* go simple and safe syncing everything before starting */
389 sync_all_inodes();
391 entry = inode_unused.prev;
392 while (entry != &inode_unused)
394 struct list_head *tmp = entry;
396 entry = entry->prev;
397 inode = INODE(tmp);
398 if (inode->i_state & (I_FREEING|I_CLEAR))
399 BUG();
400 if (!CAN_UNUSE(inode))
401 continue;
402 if (inode->i_count)
403 BUG();
404 list_del(tmp);
405 list_del(&inode->i_hash);
406 INIT_LIST_HEAD(&inode->i_hash);
407 list_add(tmp, freeable);
408 inode->i_state |= I_FREEING;
409 count++;
410 if (!--goal)
411 break;
413 inodes_stat.nr_unused -= count;
414 spin_unlock(&inode_lock);
416 dispose_list(freeable);
419 int shrink_icache_memory(int priority, int gfp_mask, zone_t *zone)
421 int count = 0;
423 if (priority)
424 count = inodes_stat.nr_unused / priority;
425 prune_icache(count);
426 /* FIXME: kmem_cache_shrink here should tell us
427 the number of pages freed, and it should
428 work in a __GFP_DMA/__GFP_HIGHMEM behaviour
429 to free only the interesting pages in
430 function of the needs of the current allocation. */
431 kmem_cache_shrink(inode_cachep);
433 return 0;
437 * Called with the inode lock held.
438 * NOTE: we are not increasing the inode-refcount, you must call __iget()
439 * by hand after calling find_inode now! This simplify iunique and won't
440 * add any additional branch in the common code.
442 static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque)
444 struct list_head *tmp;
445 struct inode * inode;
447 tmp = head;
448 for (;;) {
449 tmp = tmp->next;
450 inode = NULL;
451 if (tmp == head)
452 break;
453 inode = list_entry(tmp, struct inode, i_hash);
454 if (inode->i_sb != sb)
455 continue;
456 if (inode->i_ino != ino)
457 continue;
458 if (find_actor && !find_actor(inode, ino, opaque))
459 continue;
460 break;
462 return inode;
466 * This just initializes the inode fields
467 * to known values before returning the inode..
469 * i_sb, i_ino, i_count, i_state and the lists have
470 * been initialized elsewhere..
472 static void clean_inode(struct inode *inode)
474 static struct address_space_operations empty_aops = {};
475 static struct inode_operations empty_iops = {};
476 static struct file_operations empty_fops = {};
477 memset(&inode->u, 0, sizeof(inode->u));
478 inode->i_sock = 0;
479 inode->i_op = &empty_iops;
480 inode->i_fop = &empty_fops;
481 inode->i_nlink = 1;
482 atomic_set(&inode->i_writecount, 0);
483 inode->i_size = 0;
484 inode->i_generation = 0;
485 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
486 inode->i_pipe = NULL;
487 inode->i_bdev = NULL;
488 inode->i_data.a_ops = &empty_aops;
489 inode->i_data.host = (void*)inode;
490 inode->i_mapping = &inode->i_data;
494 * This is called by things like the networking layer
495 * etc that want to get an inode without any inode
496 * number, or filesystems that allocate new inodes with
497 * no pre-existing information.
499 struct inode * get_empty_inode(void)
501 static unsigned long last_ino = 0;
502 struct inode * inode;
504 inode = alloc_inode();
505 if (inode)
507 spin_lock(&inode_lock);
508 list_add(&inode->i_list, &inode_in_use);
509 inode->i_sb = NULL;
510 inode->i_dev = 0;
511 inode->i_ino = ++last_ino;
512 inode->i_flags = 0;
513 inode->i_count = 1;
514 inode->i_state = 0;
515 spin_unlock(&inode_lock);
516 clean_inode(inode);
518 return inode;
522 * This is called without the inode lock held.. Be careful.
524 * We no longer cache the sb_flags in i_flags - see fs.h
525 * -- rmk@arm.uk.linux.org
527 static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque)
529 struct inode * inode;
531 inode = alloc_inode();
532 if (inode) {
533 struct inode * old;
535 spin_lock(&inode_lock);
536 /* We released the lock, so.. */
537 old = find_inode(sb, ino, head, find_actor, opaque);
538 if (!old) {
539 list_add(&inode->i_list, &inode_in_use);
540 list_add(&inode->i_hash, head);
541 inode->i_sb = sb;
542 inode->i_dev = sb->s_dev;
543 inode->i_ino = ino;
544 inode->i_flags = 0;
545 inode->i_count = 1;
546 inode->i_state = I_LOCK;
547 spin_unlock(&inode_lock);
549 clean_inode(inode);
550 sb->s_op->read_inode(inode);
553 * This is special! We do not need the spinlock
554 * when clearing I_LOCK, because we're guaranteed
555 * that nobody else tries to do anything about the
556 * state of the inode when it is locked, as we
557 * just created it (so there can be no old holders
558 * that haven't tested I_LOCK).
560 inode->i_state &= ~I_LOCK;
561 wake_up(&inode->i_wait);
563 return inode;
567 * Uhhuh, somebody else created the same inode under
568 * us. Use the old inode instead of the one we just
569 * allocated.
571 __iget(old);
572 spin_unlock(&inode_lock);
573 destroy_inode(inode);
574 inode = old;
575 wait_on_inode(inode);
577 return inode;
580 static inline unsigned long hash(struct super_block *sb, unsigned long i_ino)
582 unsigned long tmp = i_ino | (unsigned long) sb;
583 tmp = tmp + (tmp >> HASH_BITS) + (tmp >> HASH_BITS*2);
584 return tmp & HASH_MASK;
587 /* Yeah, I know about quadratic hash. Maybe, later. */
588 ino_t iunique(struct super_block *sb, ino_t max_reserved)
590 static ino_t counter = 0;
591 struct inode *inode;
592 struct list_head * head;
593 ino_t res;
594 spin_lock(&inode_lock);
595 retry:
596 if (counter > max_reserved) {
597 head = inode_hashtable + hash(sb,counter);
598 inode = find_inode(sb, res = counter++, head, NULL, NULL);
599 if (!inode) {
600 spin_unlock(&inode_lock);
601 return res;
603 } else {
604 counter = max_reserved + 1;
606 goto retry;
610 struct inode *igrab(struct inode *inode)
612 spin_lock(&inode_lock);
613 if (!(inode->i_state & I_FREEING))
614 __iget(inode);
615 else
617 * Handle the case where s_op->clear_inode is not been
618 * called yet, and somebody is calling igrab
619 * while the inode is getting freed.
621 inode = NULL;
622 spin_unlock(&inode_lock);
623 if (inode)
624 wait_on_inode(inode);
625 return inode;
628 struct inode *iget4(struct super_block *sb, unsigned long ino, find_inode_t find_actor, void *opaque)
630 struct list_head * head = inode_hashtable + hash(sb,ino);
631 struct inode * inode;
633 spin_lock(&inode_lock);
634 inode = find_inode(sb, ino, head, find_actor, opaque);
635 if (inode) {
636 __iget(inode);
637 spin_unlock(&inode_lock);
638 wait_on_inode(inode);
639 return inode;
641 spin_unlock(&inode_lock);
644 * get_new_inode() will do the right thing, re-trying the search
645 * in case it had to block at any point.
647 return get_new_inode(sb, ino, head, find_actor, opaque);
650 void insert_inode_hash(struct inode *inode)
652 struct list_head *head = &anon_hash_chain;
653 if (inode->i_sb)
654 head = inode_hashtable + hash(inode->i_sb, inode->i_ino);
655 spin_lock(&inode_lock);
656 list_add(&inode->i_hash, head);
657 spin_unlock(&inode_lock);
660 void remove_inode_hash(struct inode *inode)
662 spin_lock(&inode_lock);
663 list_del(&inode->i_hash);
664 INIT_LIST_HEAD(&inode->i_hash);
665 spin_unlock(&inode_lock);
668 void iput(struct inode *inode)
670 if (inode) {
671 struct super_operations *op = NULL;
672 int destroy = 0;
674 if (inode->i_sb && inode->i_sb->s_op)
675 op = inode->i_sb->s_op;
676 if (op && op->put_inode)
677 op->put_inode(inode);
679 spin_lock(&inode_lock);
680 if (!--inode->i_count) {
681 if (!inode->i_nlink) {
682 list_del(&inode->i_hash);
683 INIT_LIST_HEAD(&inode->i_hash);
684 list_del(&inode->i_list);
685 INIT_LIST_HEAD(&inode->i_list);
686 inode->i_state|=I_FREEING;
687 spin_unlock(&inode_lock);
689 if (inode->i_data.nrpages)
690 truncate_inode_pages(&inode->i_data, 0);
692 destroy = 1;
693 if (op && op->delete_inode) {
694 void (*delete)(struct inode *) = op->delete_inode;
695 /* s_op->delete_inode internally recalls clear_inode() */
696 delete(inode);
697 } else
698 clear_inode(inode);
699 if (inode->i_state != I_CLEAR)
700 BUG();
702 spin_lock(&inode_lock);
703 } else {
704 if (!list_empty(&inode->i_hash)) {
705 if (!(inode->i_state & I_DIRTY)) {
706 list_del(&inode->i_list);
707 list_add(&inode->i_list,
708 &inode_unused);
710 inodes_stat.nr_unused++;
711 } else {
712 /* magic nfs path */
713 list_del(&inode->i_list);
714 INIT_LIST_HEAD(&inode->i_list);
715 inode->i_state|=I_FREEING;
716 spin_unlock(&inode_lock);
717 clear_inode(inode);
718 destroy = 1;
719 spin_lock(&inode_lock);
722 #ifdef INODE_PARANOIA
723 if (inode->i_flock)
724 printk(KERN_ERR "iput: inode %s/%ld still has locks!\n",
725 kdevname(inode->i_dev), inode->i_ino);
726 if (!list_empty(&inode->i_dentry))
727 printk(KERN_ERR "iput: device %s inode %ld still has aliases!\n",
728 kdevname(inode->i_dev), inode->i_ino);
729 if (inode->i_count)
730 printk(KERN_ERR "iput: device %s inode %ld count changed, count=%d\n",
731 kdevname(inode->i_dev), inode->i_ino, inode->i_count);
732 if (atomic_read(&inode->i_sem.count) != 1)
733 printk(KERN_ERR "iput: Aieee, semaphore in use inode %s/%ld, count=%d\n",
734 kdevname(inode->i_dev), inode->i_ino, atomic_read(&inode->i_sem.count));
735 #endif
737 if (inode->i_count > (1<<31)) {
738 printk(KERN_ERR "iput: inode %s/%ld count wrapped\n",
739 kdevname(inode->i_dev), inode->i_ino);
741 spin_unlock(&inode_lock);
742 if (destroy)
743 destroy_inode(inode);
747 int bmap(struct inode * inode, int block)
749 int res = 0;
750 if (inode->i_mapping->a_ops->bmap)
751 res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block);
752 return res;
756 * Initialize the hash tables.
758 void __init inode_init(void)
760 int i;
761 struct list_head *head = inode_hashtable;
763 i = HASH_SIZE;
764 do {
765 INIT_LIST_HEAD(head);
766 head++;
767 i--;
768 } while (i);
770 /* inode slab cache */
771 inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode),
772 0, SLAB_HWCACHE_ALIGN, init_once,
773 NULL);
774 if (!inode_cachep)
775 panic("cannot create inode slab cache");
778 void update_atime (struct inode *inode)
780 if ( IS_NOATIME (inode) ) return;
781 if ( IS_NODIRATIME (inode) && S_ISDIR (inode->i_mode) ) return;
782 if ( IS_RDONLY (inode) ) return;
783 inode->i_atime = CURRENT_TIME;
784 mark_inode_dirty (inode);
785 } /* End Function update_atime */
789 * Quota functions that want to walk the inode lists..
791 #ifdef CONFIG_QUOTA
793 /* Functions back in dquot.c */
794 void put_dquot_list(struct list_head *);
795 int remove_inode_dquot_ref(struct inode *, short, struct list_head *);
797 void remove_dquot_ref(kdev_t dev, short type)
799 struct super_block *sb = get_super(dev);
800 struct inode *inode;
801 struct list_head *act_head;
802 LIST_HEAD(tofree_head);
804 if (!sb || !sb->dq_op)
805 return; /* nothing to do */
807 /* We have to be protected against other CPUs */
808 spin_lock(&inode_lock);
810 for (act_head = inode_in_use.next; act_head != &inode_in_use; act_head = act_head->next) {
811 inode = list_entry(act_head, struct inode, i_list);
812 if (inode->i_sb != sb || !IS_QUOTAINIT(inode))
813 continue;
814 remove_inode_dquot_ref(inode, type, &tofree_head);
816 for (act_head = inode_unused.next; act_head != &inode_unused; act_head = act_head->next) {
817 inode = list_entry(act_head, struct inode, i_list);
818 if (inode->i_sb != sb || !IS_QUOTAINIT(inode))
819 continue;
820 remove_inode_dquot_ref(inode, type, &tofree_head);
822 for (act_head = sb->s_dirty.next; act_head != &sb->s_dirty; act_head = act_head->next) {
823 inode = list_entry(act_head, struct inode, i_list);
824 if (!IS_QUOTAINIT(inode))
825 continue;
826 remove_inode_dquot_ref(inode, type, &tofree_head);
828 spin_unlock(&inode_lock);
830 put_dquot_list(&tofree_head);
833 #endif