4 * (C) 1997 Linus Torvalds
7 #include <linux/config.h>
9 #include <linux/string.h>
11 #include <linux/dcache.h>
12 #include <linux/init.h>
13 #include <linux/quotaops.h>
14 #include <linux/slab.h>
17 * New inode.c implementation.
19 * This implementation has the basic premise of trying
20 * to be extremely low-overhead and SMP-safe, yet be
21 * simple enough to be "obviously correct".
26 /* inode dynamic allocation 1999, Andrea Arcangeli <andrea@suse.de> */
28 #define INODE_PARANOIA 1
29 /* #define INODE_DEBUG 1 */
32 * Inode lookup is no longer as critical as it used to be:
33 * most of the lookups are going to be through the dcache.
36 #define HASH_SIZE (1UL << HASH_BITS)
37 #define HASH_MASK (HASH_SIZE-1)
40 * Each inode can be on two separate lists. One is
41 * the hash list of the inode, used for lookups. The
42 * other linked list is the "type" list:
43 * "in_use" - valid inode, i_count > 0, i_nlink > 0
44 * "dirty" - as "in_use" but also dirty
45 * "unused" - valid inode, i_count = 0
47 * A "dirty" list is maintained for each super block,
48 * allowing for low-overhead inode sync() operations.
51 static LIST_HEAD(inode_in_use
);
52 static LIST_HEAD(inode_unused
);
53 static struct list_head inode_hashtable
[HASH_SIZE
];
54 static LIST_HEAD(anon_hash_chain
); /* for inodes with NULL i_sb */
57 * A simple spinlock to protect the list manipulations.
59 * NOTE! You also have to own the lock if you change
60 * the i_state of an inode while it is in use..
62 spinlock_t inode_lock
= SPIN_LOCK_UNLOCKED
;
65 * Statistics gathering..
71 } inodes_stat
= {0, 0,};
73 static kmem_cache_t
* inode_cachep
;
75 #define alloc_inode() \
76 ((struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL))
77 #define destroy_inode(inode) kmem_cache_free(inode_cachep, (inode))
80 * These are initializations that only need to be done
81 * once, because the fields are idempotent across use
82 * of the inode, so let the slab aware of that.
84 static void init_once(void * foo
, kmem_cache_t
* cachep
, unsigned long flags
)
86 struct inode
* inode
= (struct inode
*) foo
;
88 if ((flags
& (SLAB_CTOR_VERIFY
|SLAB_CTOR_CONSTRUCTOR
)) ==
89 SLAB_CTOR_CONSTRUCTOR
)
91 memset(inode
, 0, sizeof(*inode
));
92 init_waitqueue_head(&inode
->i_wait
);
93 INIT_LIST_HEAD(&inode
->i_hash
);
94 INIT_LIST_HEAD(&inode
->i_data
.pages
);
95 INIT_LIST_HEAD(&inode
->i_dentry
);
96 sema_init(&inode
->i_sem
, 1);
97 sema_init(&inode
->i_zombie
, 1);
98 spin_lock_init(&inode
->i_data
.i_shared_lock
);
103 * Put the inode on the super block's dirty list.
105 * CAREFUL! We mark it dirty unconditionally, but
106 * move it onto the dirty list only if it is hashed.
107 * If it was not hashed, it will never be added to
108 * the dirty list even if it is later hashed, as it
109 * will have been marked dirty already.
111 * In short, make sure you hash any inodes _before_
112 * you start marking them dirty..
114 void __mark_inode_dirty(struct inode
*inode
)
116 struct super_block
* sb
= inode
->i_sb
;
119 spin_lock(&inode_lock
);
120 if (!(inode
->i_state
& I_DIRTY
)) {
121 inode
->i_state
|= I_DIRTY
;
122 /* Only add valid (ie hashed) inodes to the dirty list */
123 if (!list_empty(&inode
->i_hash
)) {
124 list_del(&inode
->i_list
);
125 list_add(&inode
->i_list
, &sb
->s_dirty
);
128 spin_unlock(&inode_lock
);
132 static void __wait_on_inode(struct inode
* inode
)
134 DECLARE_WAITQUEUE(wait
, current
);
136 add_wait_queue(&inode
->i_wait
, &wait
);
138 set_current_state(TASK_UNINTERRUPTIBLE
);
139 if (inode
->i_state
& I_LOCK
) {
143 remove_wait_queue(&inode
->i_wait
, &wait
);
144 current
->state
= TASK_RUNNING
;
147 static inline void wait_on_inode(struct inode
*inode
)
149 if (inode
->i_state
& I_LOCK
)
150 __wait_on_inode(inode
);
154 static inline void write_inode(struct inode
*inode
)
156 if (inode
->i_sb
&& inode
->i_sb
->s_op
&& inode
->i_sb
->s_op
->write_inode
)
157 inode
->i_sb
->s_op
->write_inode(inode
);
160 static inline void __iget(struct inode
* inode
)
162 if (!inode
->i_count
++)
164 if (!(inode
->i_state
& I_DIRTY
))
166 list_del(&inode
->i_list
);
167 list_add(&inode
->i_list
, &inode_in_use
);
169 inodes_stat
.nr_unused
--;
173 static inline void sync_one(struct inode
*inode
)
175 if (inode
->i_state
& I_LOCK
) {
177 spin_unlock(&inode_lock
);
178 __wait_on_inode(inode
);
180 spin_lock(&inode_lock
);
182 list_del(&inode
->i_list
);
183 list_add(&inode
->i_list
,
184 inode
->i_count
? &inode_in_use
: &inode_unused
);
185 /* Set I_LOCK, reset I_DIRTY */
186 inode
->i_state
^= I_DIRTY
| I_LOCK
;
187 spin_unlock(&inode_lock
);
191 spin_lock(&inode_lock
);
192 inode
->i_state
&= ~I_LOCK
;
193 wake_up(&inode
->i_wait
);
197 static inline void sync_list(struct list_head
*head
)
199 struct list_head
* tmp
;
201 while ((tmp
= head
->prev
) != head
)
202 sync_one(list_entry(tmp
, struct inode
, i_list
));
206 * "sync_inodes()" goes through the super block's dirty list,
207 * writes them out, and puts them back on the normal list.
209 void sync_inodes(kdev_t dev
)
211 struct super_block
* sb
= sb_entry(super_blocks
.next
);
214 * Search the super_blocks array for the device(s) to sync.
216 spin_lock(&inode_lock
);
217 for (; sb
!= sb_entry(&super_blocks
); sb
= sb_entry(sb
->s_list
.next
)) {
220 if (dev
&& sb
->s_dev
!= dev
)
223 sync_list(&sb
->s_dirty
);
228 spin_unlock(&inode_lock
);
232 * Called with the spinlock already held..
234 static void sync_all_inodes(void)
236 struct super_block
* sb
= sb_entry(super_blocks
.next
);
237 for (; sb
!= sb_entry(&super_blocks
); sb
= sb_entry(sb
->s_list
.next
)) {
240 sync_list(&sb
->s_dirty
);
247 void write_inode_now(struct inode
*inode
)
249 struct super_block
* sb
= inode
->i_sb
;
252 spin_lock(&inode_lock
);
253 while (inode
->i_state
& I_DIRTY
)
255 spin_unlock(&inode_lock
);
258 printk("write_inode_now: no super block\n");
262 * This is called by the filesystem to tell us
263 * that the inode is no longer useful. We just
264 * terminate it with extreme prejudice.
266 void clear_inode(struct inode
*inode
)
268 if (inode
->i_data
.nrpages
)
270 if (!(inode
->i_state
& I_FREEING
))
272 if (inode
->i_state
& I_CLEAR
)
274 wait_on_inode(inode
);
275 if (IS_QUOTAINIT(inode
))
277 if (inode
->i_sb
&& inode
->i_sb
->s_op
&& inode
->i_sb
->s_op
->clear_inode
)
278 inode
->i_sb
->s_op
->clear_inode(inode
);
280 bdput(inode
->i_bdev
);
281 inode
->i_bdev
= NULL
;
283 inode
->i_state
= I_CLEAR
;
287 * Dispose-list gets a local list with local inodes in it, so it doesn't
288 * need to worry about list corruption and SMP locks.
290 static void dispose_list(struct list_head
* head
)
292 struct list_head
* inode_entry
;
293 struct inode
* inode
;
295 while ((inode_entry
= head
->next
) != head
)
297 list_del(inode_entry
);
299 inode
= list_entry(inode_entry
, struct inode
, i_list
);
300 if (inode
->i_data
.nrpages
)
301 truncate_inode_pages(&inode
->i_data
, 0);
303 destroy_inode(inode
);
308 * Invalidate all inodes for a device.
310 static int invalidate_list(struct list_head
*head
, struct super_block
* sb
, struct list_head
* dispose
)
312 struct list_head
*next
;
313 int busy
= 0, count
= 0;
317 struct list_head
* tmp
= next
;
318 struct inode
* inode
;
323 inode
= list_entry(tmp
, struct inode
, i_list
);
324 if (inode
->i_sb
!= sb
)
326 if (!inode
->i_count
) {
327 list_del(&inode
->i_hash
);
328 INIT_LIST_HEAD(&inode
->i_hash
);
329 list_del(&inode
->i_list
);
330 list_add(&inode
->i_list
, dispose
);
331 inode
->i_state
|= I_FREEING
;
337 /* only unused inodes may be cached with i_count zero */
338 inodes_stat
.nr_unused
-= count
;
343 * This is a two-stage process. First we collect all
344 * offending inodes onto the throw-away list, and in
345 * the second stage we actually dispose of them. This
346 * is because we don't want to sleep while messing
347 * with the global lists..
349 int invalidate_inodes(struct super_block
* sb
)
352 LIST_HEAD(throw_away
);
354 spin_lock(&inode_lock
);
355 busy
= invalidate_list(&inode_in_use
, sb
, &throw_away
);
356 busy
|= invalidate_list(&inode_unused
, sb
, &throw_away
);
357 busy
|= invalidate_list(&sb
->s_dirty
, sb
, &throw_away
);
358 spin_unlock(&inode_lock
);
360 dispose_list(&throw_away
);
366 * This is called with the inode lock held. It searches
367 * the in-use for freeable inodes, which are moved to a
368 * temporary list and then placed on the unused list by
371 * We don't expect to have to call this very often.
373 * N.B. The spinlock is released during the call to
376 #define CAN_UNUSE(inode) \
377 (((inode)->i_state | (inode)->i_data.nrpages) == 0)
378 #define INODE(entry) (list_entry(entry, struct inode, i_list))
380 void prune_icache(int goal
)
383 struct list_head
*entry
, *freeable
= &list
;
385 struct inode
* inode
;
387 spin_lock(&inode_lock
);
388 /* go simple and safe syncing everything before starting */
391 entry
= inode_unused
.prev
;
392 while (entry
!= &inode_unused
)
394 struct list_head
*tmp
= entry
;
398 if (inode
->i_state
& (I_FREEING
|I_CLEAR
))
400 if (!CAN_UNUSE(inode
))
405 list_del(&inode
->i_hash
);
406 INIT_LIST_HEAD(&inode
->i_hash
);
407 list_add(tmp
, freeable
);
408 inode
->i_state
|= I_FREEING
;
413 inodes_stat
.nr_unused
-= count
;
414 spin_unlock(&inode_lock
);
416 dispose_list(freeable
);
419 int shrink_icache_memory(int priority
, int gfp_mask
, zone_t
*zone
)
424 count
= inodes_stat
.nr_unused
/ priority
;
426 /* FIXME: kmem_cache_shrink here should tell us
427 the number of pages freed, and it should
428 work in a __GFP_DMA/__GFP_HIGHMEM behaviour
429 to free only the interesting pages in
430 function of the needs of the current allocation. */
431 kmem_cache_shrink(inode_cachep
);
437 * Called with the inode lock held.
438 * NOTE: we are not increasing the inode-refcount, you must call __iget()
439 * by hand after calling find_inode now! This simplify iunique and won't
440 * add any additional branch in the common code.
442 static struct inode
* find_inode(struct super_block
* sb
, unsigned long ino
, struct list_head
*head
, find_inode_t find_actor
, void *opaque
)
444 struct list_head
*tmp
;
445 struct inode
* inode
;
453 inode
= list_entry(tmp
, struct inode
, i_hash
);
454 if (inode
->i_sb
!= sb
)
456 if (inode
->i_ino
!= ino
)
458 if (find_actor
&& !find_actor(inode
, ino
, opaque
))
466 * This just initializes the inode fields
467 * to known values before returning the inode..
469 * i_sb, i_ino, i_count, i_state and the lists have
470 * been initialized elsewhere..
472 static void clean_inode(struct inode
*inode
)
474 static struct address_space_operations empty_aops
= {};
475 static struct inode_operations empty_iops
= {};
476 static struct file_operations empty_fops
= {};
477 memset(&inode
->u
, 0, sizeof(inode
->u
));
479 inode
->i_op
= &empty_iops
;
480 inode
->i_fop
= &empty_fops
;
482 atomic_set(&inode
->i_writecount
, 0);
484 inode
->i_generation
= 0;
485 memset(&inode
->i_dquot
, 0, sizeof(inode
->i_dquot
));
486 inode
->i_pipe
= NULL
;
487 inode
->i_bdev
= NULL
;
488 inode
->i_data
.a_ops
= &empty_aops
;
489 inode
->i_data
.host
= (void*)inode
;
490 inode
->i_mapping
= &inode
->i_data
;
494 * This is called by things like the networking layer
495 * etc that want to get an inode without any inode
496 * number, or filesystems that allocate new inodes with
497 * no pre-existing information.
499 struct inode
* get_empty_inode(void)
501 static unsigned long last_ino
= 0;
502 struct inode
* inode
;
504 inode
= alloc_inode();
507 spin_lock(&inode_lock
);
508 list_add(&inode
->i_list
, &inode_in_use
);
511 inode
->i_ino
= ++last_ino
;
515 spin_unlock(&inode_lock
);
522 * This is called without the inode lock held.. Be careful.
524 * We no longer cache the sb_flags in i_flags - see fs.h
525 * -- rmk@arm.uk.linux.org
527 static struct inode
* get_new_inode(struct super_block
*sb
, unsigned long ino
, struct list_head
*head
, find_inode_t find_actor
, void *opaque
)
529 struct inode
* inode
;
531 inode
= alloc_inode();
535 spin_lock(&inode_lock
);
536 /* We released the lock, so.. */
537 old
= find_inode(sb
, ino
, head
, find_actor
, opaque
);
539 list_add(&inode
->i_list
, &inode_in_use
);
540 list_add(&inode
->i_hash
, head
);
542 inode
->i_dev
= sb
->s_dev
;
546 inode
->i_state
= I_LOCK
;
547 spin_unlock(&inode_lock
);
550 sb
->s_op
->read_inode(inode
);
553 * This is special! We do not need the spinlock
554 * when clearing I_LOCK, because we're guaranteed
555 * that nobody else tries to do anything about the
556 * state of the inode when it is locked, as we
557 * just created it (so there can be no old holders
558 * that haven't tested I_LOCK).
560 inode
->i_state
&= ~I_LOCK
;
561 wake_up(&inode
->i_wait
);
567 * Uhhuh, somebody else created the same inode under
568 * us. Use the old inode instead of the one we just
572 spin_unlock(&inode_lock
);
573 destroy_inode(inode
);
575 wait_on_inode(inode
);
580 static inline unsigned long hash(struct super_block
*sb
, unsigned long i_ino
)
582 unsigned long tmp
= i_ino
| (unsigned long) sb
;
583 tmp
= tmp
+ (tmp
>> HASH_BITS
) + (tmp
>> HASH_BITS
*2);
584 return tmp
& HASH_MASK
;
587 /* Yeah, I know about quadratic hash. Maybe, later. */
588 ino_t
iunique(struct super_block
*sb
, ino_t max_reserved
)
590 static ino_t counter
= 0;
592 struct list_head
* head
;
594 spin_lock(&inode_lock
);
596 if (counter
> max_reserved
) {
597 head
= inode_hashtable
+ hash(sb
,counter
);
598 inode
= find_inode(sb
, res
= counter
++, head
, NULL
, NULL
);
600 spin_unlock(&inode_lock
);
604 counter
= max_reserved
+ 1;
610 struct inode
*igrab(struct inode
*inode
)
612 spin_lock(&inode_lock
);
613 if (!(inode
->i_state
& I_FREEING
))
617 * Handle the case where s_op->clear_inode is not been
618 * called yet, and somebody is calling igrab
619 * while the inode is getting freed.
622 spin_unlock(&inode_lock
);
624 wait_on_inode(inode
);
628 struct inode
*iget4(struct super_block
*sb
, unsigned long ino
, find_inode_t find_actor
, void *opaque
)
630 struct list_head
* head
= inode_hashtable
+ hash(sb
,ino
);
631 struct inode
* inode
;
633 spin_lock(&inode_lock
);
634 inode
= find_inode(sb
, ino
, head
, find_actor
, opaque
);
637 spin_unlock(&inode_lock
);
638 wait_on_inode(inode
);
641 spin_unlock(&inode_lock
);
644 * get_new_inode() will do the right thing, re-trying the search
645 * in case it had to block at any point.
647 return get_new_inode(sb
, ino
, head
, find_actor
, opaque
);
650 void insert_inode_hash(struct inode
*inode
)
652 struct list_head
*head
= &anon_hash_chain
;
654 head
= inode_hashtable
+ hash(inode
->i_sb
, inode
->i_ino
);
655 spin_lock(&inode_lock
);
656 list_add(&inode
->i_hash
, head
);
657 spin_unlock(&inode_lock
);
660 void remove_inode_hash(struct inode
*inode
)
662 spin_lock(&inode_lock
);
663 list_del(&inode
->i_hash
);
664 INIT_LIST_HEAD(&inode
->i_hash
);
665 spin_unlock(&inode_lock
);
668 void iput(struct inode
*inode
)
671 struct super_operations
*op
= NULL
;
674 if (inode
->i_sb
&& inode
->i_sb
->s_op
)
675 op
= inode
->i_sb
->s_op
;
676 if (op
&& op
->put_inode
)
677 op
->put_inode(inode
);
679 spin_lock(&inode_lock
);
680 if (!--inode
->i_count
) {
681 if (!inode
->i_nlink
) {
682 list_del(&inode
->i_hash
);
683 INIT_LIST_HEAD(&inode
->i_hash
);
684 list_del(&inode
->i_list
);
685 INIT_LIST_HEAD(&inode
->i_list
);
686 inode
->i_state
|=I_FREEING
;
687 spin_unlock(&inode_lock
);
689 if (inode
->i_data
.nrpages
)
690 truncate_inode_pages(&inode
->i_data
, 0);
693 if (op
&& op
->delete_inode
) {
694 void (*delete)(struct inode
*) = op
->delete_inode
;
695 /* s_op->delete_inode internally recalls clear_inode() */
699 if (inode
->i_state
!= I_CLEAR
)
702 spin_lock(&inode_lock
);
704 if (!list_empty(&inode
->i_hash
)) {
705 if (!(inode
->i_state
& I_DIRTY
)) {
706 list_del(&inode
->i_list
);
707 list_add(&inode
->i_list
,
710 inodes_stat
.nr_unused
++;
713 list_del(&inode
->i_list
);
714 INIT_LIST_HEAD(&inode
->i_list
);
715 inode
->i_state
|=I_FREEING
;
716 spin_unlock(&inode_lock
);
719 spin_lock(&inode_lock
);
722 #ifdef INODE_PARANOIA
724 printk(KERN_ERR
"iput: inode %s/%ld still has locks!\n",
725 kdevname(inode
->i_dev
), inode
->i_ino
);
726 if (!list_empty(&inode
->i_dentry
))
727 printk(KERN_ERR
"iput: device %s inode %ld still has aliases!\n",
728 kdevname(inode
->i_dev
), inode
->i_ino
);
730 printk(KERN_ERR
"iput: device %s inode %ld count changed, count=%d\n",
731 kdevname(inode
->i_dev
), inode
->i_ino
, inode
->i_count
);
732 if (atomic_read(&inode
->i_sem
.count
) != 1)
733 printk(KERN_ERR
"iput: Aieee, semaphore in use inode %s/%ld, count=%d\n",
734 kdevname(inode
->i_dev
), inode
->i_ino
, atomic_read(&inode
->i_sem
.count
));
737 if (inode
->i_count
> (1<<31)) {
738 printk(KERN_ERR
"iput: inode %s/%ld count wrapped\n",
739 kdevname(inode
->i_dev
), inode
->i_ino
);
741 spin_unlock(&inode_lock
);
743 destroy_inode(inode
);
747 int bmap(struct inode
* inode
, int block
)
750 if (inode
->i_mapping
->a_ops
->bmap
)
751 res
= inode
->i_mapping
->a_ops
->bmap(inode
->i_mapping
, block
);
756 * Initialize the hash tables.
758 void __init
inode_init(void)
761 struct list_head
*head
= inode_hashtable
;
765 INIT_LIST_HEAD(head
);
770 /* inode slab cache */
771 inode_cachep
= kmem_cache_create("inode_cache", sizeof(struct inode
),
772 0, SLAB_HWCACHE_ALIGN
, init_once
,
775 panic("cannot create inode slab cache");
778 void update_atime (struct inode
*inode
)
780 if ( IS_NOATIME (inode
) ) return;
781 if ( IS_NODIRATIME (inode
) && S_ISDIR (inode
->i_mode
) ) return;
782 if ( IS_RDONLY (inode
) ) return;
783 inode
->i_atime
= CURRENT_TIME
;
784 mark_inode_dirty (inode
);
785 } /* End Function update_atime */
789 * Quota functions that want to walk the inode lists..
793 /* Functions back in dquot.c */
794 void put_dquot_list(struct list_head
*);
795 int remove_inode_dquot_ref(struct inode
*, short, struct list_head
*);
797 void remove_dquot_ref(kdev_t dev
, short type
)
799 struct super_block
*sb
= get_super(dev
);
801 struct list_head
*act_head
;
802 LIST_HEAD(tofree_head
);
804 if (!sb
|| !sb
->dq_op
)
805 return; /* nothing to do */
807 /* We have to be protected against other CPUs */
808 spin_lock(&inode_lock
);
810 for (act_head
= inode_in_use
.next
; act_head
!= &inode_in_use
; act_head
= act_head
->next
) {
811 inode
= list_entry(act_head
, struct inode
, i_list
);
812 if (inode
->i_sb
!= sb
|| !IS_QUOTAINIT(inode
))
814 remove_inode_dquot_ref(inode
, type
, &tofree_head
);
816 for (act_head
= inode_unused
.next
; act_head
!= &inode_unused
; act_head
= act_head
->next
) {
817 inode
= list_entry(act_head
, struct inode
, i_list
);
818 if (inode
->i_sb
!= sb
|| !IS_QUOTAINIT(inode
))
820 remove_inode_dquot_ref(inode
, type
, &tofree_head
);
822 for (act_head
= sb
->s_dirty
.next
; act_head
!= &sb
->s_dirty
; act_head
= act_head
->next
) {
823 inode
= list_entry(act_head
, struct inode
, i_list
);
824 if (!IS_QUOTAINIT(inode
))
826 remove_inode_dquot_ref(inode
, type
, &tofree_head
);
828 spin_unlock(&inode_lock
);
830 put_dquot_list(&tofree_head
);