4 * (C) 1997 Linus Torvalds
7 #include <linux/config.h>
9 #include <linux/string.h>
11 #include <linux/dcache.h>
12 #include <linux/init.h>
13 #include <linux/quotaops.h>
14 #include <linux/slab.h>
17 * New inode.c implementation.
19 * This implementation has the basic premise of trying
20 * to be extremely low-overhead and SMP-safe, yet be
21 * simple enough to be "obviously correct".
26 /* inode dynamic allocation 1999, Andrea Arcangeli <andrea@suse.de> */
28 #define INODE_PARANOIA 1
29 /* #define INODE_DEBUG 1 */
32 * Inode lookup is no longer as critical as it used to be:
33 * most of the lookups are going to be through the dcache.
36 #define HASH_SIZE (1UL << HASH_BITS)
37 #define HASH_MASK (HASH_SIZE-1)
40 * Each inode can be on two separate lists. One is
41 * the hash list of the inode, used for lookups. The
42 * other linked list is the "type" list:
43 * "in_use" - valid inode, i_count > 0, i_nlink > 0
44 * "dirty" - as "in_use" but also dirty
45 * "unused" - valid inode, i_count = 0
47 * A "dirty" list is maintained for each super block,
48 * allowing for low-overhead inode sync() operations.
51 static LIST_HEAD(inode_in_use
);
52 static LIST_HEAD(inode_unused
);
53 static struct list_head inode_hashtable
[HASH_SIZE
];
54 static LIST_HEAD(anon_hash_chain
); /* for inodes with NULL i_sb */
57 * A simple spinlock to protect the list manipulations.
59 * NOTE! You also have to own the lock if you change
60 * the i_state of an inode while it is in use..
62 spinlock_t inode_lock
= SPIN_LOCK_UNLOCKED
;
65 * Statistics gathering..
71 } inodes_stat
= {0, 0,};
73 static kmem_cache_t
* inode_cachep
;
75 #define alloc_inode() \
76 ((struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL))
77 #define destroy_inode(inode) kmem_cache_free(inode_cachep, (inode))
80 * These are initializations that only need to be done
81 * once, because the fields are idempotent across use
82 * of the inode, so let the slab aware of that.
84 static void init_once(void * foo
, kmem_cache_t
* cachep
, unsigned long flags
)
86 struct inode
* inode
= (struct inode
*) foo
;
88 if ((flags
& (SLAB_CTOR_VERIFY
|SLAB_CTOR_CONSTRUCTOR
)) ==
89 SLAB_CTOR_CONSTRUCTOR
)
91 memset(inode
, 0, sizeof(*inode
));
92 init_waitqueue_head(&inode
->i_wait
);
93 INIT_LIST_HEAD(&inode
->i_hash
);
94 INIT_LIST_HEAD(&inode
->i_data
.pages
);
95 INIT_LIST_HEAD(&inode
->i_dentry
);
96 sema_init(&inode
->i_sem
, 1);
97 spin_lock_init(&inode
->i_shared_lock
);
102 * Put the inode on the super block's dirty list.
104 * CAREFUL! We mark it dirty unconditionally, but
105 * move it onto the dirty list only if it is hashed.
106 * If it was not hashed, it will never be added to
107 * the dirty list even if it is later hashed, as it
108 * will have been marked dirty already.
110 * In short, make sure you hash any inodes _before_
111 * you start marking them dirty..
113 void __mark_inode_dirty(struct inode
*inode
)
115 struct super_block
* sb
= inode
->i_sb
;
118 spin_lock(&inode_lock
);
119 if (!(inode
->i_state
& I_DIRTY
)) {
120 inode
->i_state
|= I_DIRTY
;
121 /* Only add valid (ie hashed) inodes to the dirty list */
122 if (!list_empty(&inode
->i_hash
)) {
123 list_del(&inode
->i_list
);
124 list_add(&inode
->i_list
, &sb
->s_dirty
);
127 spin_unlock(&inode_lock
);
131 static void __wait_on_inode(struct inode
* inode
)
133 DECLARE_WAITQUEUE(wait
, current
);
135 add_wait_queue(&inode
->i_wait
, &wait
);
137 set_current_state(TASK_UNINTERRUPTIBLE
);
138 if (inode
->i_state
& I_LOCK
) {
142 remove_wait_queue(&inode
->i_wait
, &wait
);
143 current
->state
= TASK_RUNNING
;
146 static inline void wait_on_inode(struct inode
*inode
)
148 if (inode
->i_state
& I_LOCK
)
149 __wait_on_inode(inode
);
153 static inline void write_inode(struct inode
*inode
)
155 if (inode
->i_sb
&& inode
->i_sb
->s_op
&& inode
->i_sb
->s_op
->write_inode
)
156 inode
->i_sb
->s_op
->write_inode(inode
);
159 static inline void sync_one(struct inode
*inode
)
161 if (inode
->i_state
& I_LOCK
) {
162 spin_unlock(&inode_lock
);
163 __wait_on_inode(inode
);
164 spin_lock(&inode_lock
);
166 list_del(&inode
->i_list
);
167 list_add(&inode
->i_list
,
168 inode
->i_count
? &inode_in_use
: &inode_unused
);
169 /* Set I_LOCK, reset I_DIRTY */
170 inode
->i_state
^= I_DIRTY
| I_LOCK
;
171 spin_unlock(&inode_lock
);
175 spin_lock(&inode_lock
);
176 inode
->i_state
&= ~I_LOCK
;
177 wake_up(&inode
->i_wait
);
181 static inline void sync_list(struct list_head
*head
)
183 struct list_head
* tmp
;
185 while ((tmp
= head
->prev
) != head
)
186 sync_one(list_entry(tmp
, struct inode
, i_list
));
190 * "sync_inodes()" goes through the super block's dirty list,
191 * writes them out, and puts them back on the normal list.
193 void sync_inodes(kdev_t dev
)
195 struct super_block
* sb
= sb_entry(super_blocks
.next
);
198 * Search the super_blocks array for the device(s) to sync.
200 spin_lock(&inode_lock
);
201 for (; sb
!= sb_entry(&super_blocks
); sb
= sb_entry(sb
->s_list
.next
)) {
204 if (dev
&& sb
->s_dev
!= dev
)
207 sync_list(&sb
->s_dirty
);
212 spin_unlock(&inode_lock
);
216 * Called with the spinlock already held..
218 static void sync_all_inodes(void)
220 struct super_block
* sb
= sb_entry(super_blocks
.next
);
221 for (; sb
!= sb_entry(&super_blocks
); sb
= sb_entry(sb
->s_list
.next
)) {
224 sync_list(&sb
->s_dirty
);
231 void write_inode_now(struct inode
*inode
)
233 struct super_block
* sb
= inode
->i_sb
;
236 spin_lock(&inode_lock
);
237 while (inode
->i_state
& I_DIRTY
)
239 spin_unlock(&inode_lock
);
242 printk("write_inode_now: no super block\n");
246 * This is called by the filesystem to tell us
247 * that the inode is no longer useful. We just
248 * terminate it with extreme prejudice.
250 void clear_inode(struct inode
*inode
)
252 if (inode
->i_data
.nrpages
)
254 if (!(inode
->i_state
& I_FREEING
))
256 wait_on_inode(inode
);
257 if (IS_QUOTAINIT(inode
))
259 if (inode
->i_sb
&& inode
->i_sb
->s_op
&& inode
->i_sb
->s_op
->clear_inode
)
260 inode
->i_sb
->s_op
->clear_inode(inode
);
262 bdput(inode
->i_bdev
);
263 inode
->i_bdev
= NULL
;
269 * Dispose-list gets a local list with local inodes in it, so it doesn't
270 * need to worry about list corruption and SMP locks.
272 static void dispose_list(struct list_head
* head
)
274 struct list_head
* inode_entry
;
275 struct inode
* inode
;
277 while ((inode_entry
= head
->next
) != head
)
279 list_del(inode_entry
);
281 inode
= list_entry(inode_entry
, struct inode
, i_list
);
282 if (inode
->i_data
.nrpages
)
283 truncate_inode_pages(inode
, 0);
285 destroy_inode(inode
);
290 * Invalidate all inodes for a device.
292 static int invalidate_list(struct list_head
*head
, struct super_block
* sb
, struct list_head
* dispose
)
294 struct list_head
*next
;
295 int busy
= 0, count
= 0;
299 struct list_head
* tmp
= next
;
300 struct inode
* inode
;
305 inode
= list_entry(tmp
, struct inode
, i_list
);
306 if (inode
->i_sb
!= sb
)
308 if (!inode
->i_count
) {
309 list_del(&inode
->i_hash
);
310 INIT_LIST_HEAD(&inode
->i_hash
);
311 list_del(&inode
->i_list
);
312 list_add(&inode
->i_list
, dispose
);
313 inode
->i_state
|= I_FREEING
;
319 /* only unused inodes may be cached with i_count zero */
320 inodes_stat
.nr_unused
-= count
;
325 * This is a two-stage process. First we collect all
326 * offending inodes onto the throw-away list, and in
327 * the second stage we actually dispose of them. This
328 * is because we don't want to sleep while messing
329 * with the global lists..
331 int invalidate_inodes(struct super_block
* sb
)
334 LIST_HEAD(throw_away
);
336 spin_lock(&inode_lock
);
337 busy
= invalidate_list(&inode_in_use
, sb
, &throw_away
);
338 busy
|= invalidate_list(&inode_unused
, sb
, &throw_away
);
339 busy
|= invalidate_list(&sb
->s_dirty
, sb
, &throw_away
);
340 spin_unlock(&inode_lock
);
342 dispose_list(&throw_away
);
348 * This is called with the inode lock held. It searches
349 * the in-use for freeable inodes, which are moved to a
350 * temporary list and then placed on the unused list by
353 * We don't expect to have to call this very often.
355 * N.B. The spinlock is released during the call to
358 #define CAN_UNUSE(inode) \
359 (((inode)->i_state | (inode)->i_data.nrpages) == 0)
360 #define INODE(entry) (list_entry(entry, struct inode, i_list))
362 void prune_icache(int goal
)
365 struct list_head
*entry
, *freeable
= &list
;
367 struct inode
* inode
;
369 spin_lock(&inode_lock
);
370 /* go simple and safe syncing everything before starting */
373 entry
= inode_unused
.prev
;
374 while (entry
!= &inode_unused
)
376 struct list_head
*tmp
= entry
;
380 if (!CAN_UNUSE(inode
))
385 list_del(&inode
->i_hash
);
386 INIT_LIST_HEAD(&inode
->i_hash
);
387 list_add(tmp
, freeable
);
388 inode
->i_state
|= I_FREEING
;
393 inodes_stat
.nr_unused
-= count
;
394 spin_unlock(&inode_lock
);
396 dispose_list(freeable
);
399 int shrink_icache_memory(int priority
, int gfp_mask
, zone_t
*zone
)
401 if (gfp_mask
& __GFP_IO
)
406 count
= inodes_stat
.nr_unused
/ priority
;
408 /* FIXME: kmem_cache_shrink here should tell us
409 the number of pages freed, and it should
410 work in a __GFP_DMA/__GFP_HIGHMEM behaviour
411 to free only the interesting pages in
412 function of the needs of the current allocation. */
413 kmem_cache_shrink(inode_cachep
);
419 static inline void __iget(struct inode
* inode
)
421 if (!inode
->i_count
++)
423 if (!(inode
->i_state
& I_DIRTY
))
425 list_del(&inode
->i_list
);
426 list_add(&inode
->i_list
, &inode_in_use
);
428 inodes_stat
.nr_unused
--;
433 * Called with the inode lock held.
434 * NOTE: we are not increasing the inode-refcount, you must call __iget()
435 * by hand after calling find_inode now! This simplify iunique and won't
436 * add any additional branch in the common code.
438 static struct inode
* find_inode(struct super_block
* sb
, unsigned long ino
, struct list_head
*head
, find_inode_t find_actor
, void *opaque
)
440 struct list_head
*tmp
;
441 struct inode
* inode
;
449 inode
= list_entry(tmp
, struct inode
, i_hash
);
450 if (inode
->i_sb
!= sb
)
452 if (inode
->i_ino
!= ino
)
454 if (find_actor
&& !find_actor(inode
, ino
, opaque
))
462 * This just initializes the inode fields
463 * to known values before returning the inode..
465 * i_sb, i_ino, i_count, i_state and the lists have
466 * been initialized elsewhere..
468 static void clean_inode(struct inode
*inode
)
470 memset(&inode
->u
, 0, sizeof(inode
->u
));
474 atomic_set(&inode
->i_writecount
, 0);
476 inode
->i_generation
= 0;
477 memset(&inode
->i_dquot
, 0, sizeof(inode
->i_dquot
));
478 inode
->i_pipe
= NULL
;
479 inode
->i_bdev
= NULL
;
483 * This is called by things like the networking layer
484 * etc that want to get an inode without any inode
485 * number, or filesystems that allocate new inodes with
486 * no pre-existing information.
488 struct inode
* get_empty_inode(void)
490 static unsigned long last_ino
= 0;
491 struct inode
* inode
;
493 inode
= alloc_inode();
496 spin_lock(&inode_lock
);
497 list_add(&inode
->i_list
, &inode_in_use
);
500 inode
->i_ino
= ++last_ino
;
504 spin_unlock(&inode_lock
);
511 * This is called without the inode lock held.. Be careful.
513 * We no longer cache the sb_flags in i_flags - see fs.h
514 * -- rmk@arm.uk.linux.org
516 static struct inode
* get_new_inode(struct super_block
*sb
, unsigned long ino
, struct list_head
*head
, find_inode_t find_actor
, void *opaque
)
518 struct inode
* inode
;
520 inode
= alloc_inode();
524 spin_lock(&inode_lock
);
525 /* We released the lock, so.. */
526 old
= find_inode(sb
, ino
, head
, find_actor
, opaque
);
528 list_add(&inode
->i_list
, &inode_in_use
);
529 list_add(&inode
->i_hash
, head
);
531 inode
->i_dev
= sb
->s_dev
;
535 inode
->i_state
= I_LOCK
;
536 spin_unlock(&inode_lock
);
539 sb
->s_op
->read_inode(inode
);
542 * This is special! We do not need the spinlock
543 * when clearing I_LOCK, because we're guaranteed
544 * that nobody else tries to do anything about the
545 * state of the inode when it is locked, as we
546 * just created it (so there can be no old holders
547 * that haven't tested I_LOCK).
549 inode
->i_state
&= ~I_LOCK
;
550 wake_up(&inode
->i_wait
);
556 * Uhhuh, somebody else created the same inode under
557 * us. Use the old inode instead of the one we just
561 spin_unlock(&inode_lock
);
562 destroy_inode(inode
);
564 wait_on_inode(inode
);
569 static inline unsigned long hash(struct super_block
*sb
, unsigned long i_ino
)
571 unsigned long tmp
= i_ino
| (unsigned long) sb
;
572 tmp
= tmp
+ (tmp
>> HASH_BITS
) + (tmp
>> HASH_BITS
*2);
573 return tmp
& HASH_MASK
;
576 /* Yeah, I know about quadratic hash. Maybe, later. */
577 ino_t
iunique(struct super_block
*sb
, ino_t max_reserved
)
579 static ino_t counter
= 0;
581 struct list_head
* head
;
583 spin_lock(&inode_lock
);
585 if (counter
> max_reserved
) {
586 head
= inode_hashtable
+ hash(sb
,counter
);
587 inode
= find_inode(sb
, res
= counter
++, head
, NULL
, NULL
);
589 spin_unlock(&inode_lock
);
593 counter
= max_reserved
+ 1;
599 struct inode
*igrab(struct inode
*inode
)
601 spin_lock(&inode_lock
);
602 if (!(inode
->i_state
& I_FREEING
))
606 spin_unlock(&inode_lock
);
608 wait_on_inode(inode
);
612 struct inode
*iget4(struct super_block
*sb
, unsigned long ino
, find_inode_t find_actor
, void *opaque
)
614 struct list_head
* head
= inode_hashtable
+ hash(sb
,ino
);
615 struct inode
* inode
;
617 spin_lock(&inode_lock
);
618 inode
= find_inode(sb
, ino
, head
, find_actor
, opaque
);
621 spin_unlock(&inode_lock
);
622 wait_on_inode(inode
);
625 spin_unlock(&inode_lock
);
628 * get_new_inode() will do the right thing, re-trying the search
629 * in case it had to block at any point.
631 return get_new_inode(sb
, ino
, head
, find_actor
, opaque
);
634 void insert_inode_hash(struct inode
*inode
)
636 struct list_head
*head
= &anon_hash_chain
;
638 head
= inode_hashtable
+ hash(inode
->i_sb
, inode
->i_ino
);
639 spin_lock(&inode_lock
);
640 list_add(&inode
->i_hash
, head
);
641 spin_unlock(&inode_lock
);
644 void remove_inode_hash(struct inode
*inode
)
646 spin_lock(&inode_lock
);
647 list_del(&inode
->i_hash
);
648 INIT_LIST_HEAD(&inode
->i_hash
);
649 spin_unlock(&inode_lock
);
652 void iput(struct inode
*inode
)
655 struct super_operations
*op
= NULL
;
658 if (inode
->i_sb
&& inode
->i_sb
->s_op
)
659 op
= inode
->i_sb
->s_op
;
660 if (op
&& op
->put_inode
)
661 op
->put_inode(inode
);
663 spin_lock(&inode_lock
);
664 if (!--inode
->i_count
) {
665 if (!inode
->i_nlink
) {
666 list_del(&inode
->i_hash
);
667 INIT_LIST_HEAD(&inode
->i_hash
);
668 list_del(&inode
->i_list
);
669 INIT_LIST_HEAD(&inode
->i_list
);
670 inode
->i_state
|=I_FREEING
;
671 if (op
&& op
->delete_inode
) {
672 void (*delete)(struct inode
*) = op
->delete_inode
;
673 spin_unlock(&inode_lock
);
674 if (inode
->i_data
.nrpages
)
675 truncate_inode_pages(inode
, 0);
677 spin_lock(&inode_lock
);
680 if (list_empty(&inode
->i_hash
)) {
681 list_del(&inode
->i_list
);
682 INIT_LIST_HEAD(&inode
->i_list
);
683 inode
->i_state
|=I_FREEING
;
684 spin_unlock(&inode_lock
);
687 spin_lock(&inode_lock
);
691 if (!(inode
->i_state
& I_DIRTY
)) {
692 list_del(&inode
->i_list
);
693 list_add(&inode
->i_list
,
696 inodes_stat
.nr_unused
++;
698 #ifdef INODE_PARANOIA
700 printk(KERN_ERR
"iput: inode %s/%ld still has locks!\n",
701 kdevname(inode
->i_dev
), inode
->i_ino
);
702 if (!list_empty(&inode
->i_dentry
))
703 printk(KERN_ERR
"iput: device %s inode %ld still has aliases!\n",
704 kdevname(inode
->i_dev
), inode
->i_ino
);
706 printk(KERN_ERR
"iput: device %s inode %ld count changed, count=%d\n",
707 kdevname(inode
->i_dev
), inode
->i_ino
, inode
->i_count
);
708 if (atomic_read(&inode
->i_sem
.count
) != 1)
709 printk(KERN_ERR
"iput: Aieee, semaphore in use inode %s/%ld, count=%d\n",
710 kdevname(inode
->i_dev
), inode
->i_ino
, atomic_read(&inode
->i_sem
.count
));
713 if (inode
->i_count
> (1<<31)) {
714 printk(KERN_ERR
"iput: inode %s/%ld count wrapped\n",
715 kdevname(inode
->i_dev
), inode
->i_ino
);
717 spin_unlock(&inode_lock
);
719 destroy_inode(inode
);
723 int bmap(struct inode
* inode
, int block
)
725 struct buffer_head tmp
;
727 if (inode
->i_op
&& inode
->i_op
->get_block
) {
730 inode
->i_op
->get_block(inode
, block
, &tmp
, 0);
731 return tmp
.b_blocknr
;
737 * Initialize the hash tables.
739 void __init
inode_init(void)
742 struct list_head
*head
= inode_hashtable
;
746 INIT_LIST_HEAD(head
);
751 /* inode slab cache */
752 inode_cachep
= kmem_cache_create("inode_cache", sizeof(struct inode
),
753 0, SLAB_HWCACHE_ALIGN
, init_once
,
756 panic("cannot create inode slab cache");
759 void update_atime (struct inode
*inode
)
761 if ( IS_NOATIME (inode
) ) return;
762 if ( IS_NODIRATIME (inode
) && S_ISDIR (inode
->i_mode
) ) return;
763 if ( IS_RDONLY (inode
) ) return;
764 inode
->i_atime
= CURRENT_TIME
;
765 mark_inode_dirty (inode
);
766 } /* End Function update_atime */
770 * Quota functions that want to walk the inode lists..
774 /* Functions back in dquot.c */
775 void put_dquot_list(struct list_head
*);
776 int remove_inode_dquot_ref(struct inode
*, short, struct list_head
*);
778 void remove_dquot_ref(kdev_t dev
, short type
)
780 struct super_block
*sb
= get_super(dev
);
782 struct list_head
*act_head
;
783 LIST_HEAD(tofree_head
);
785 if (!sb
|| !sb
->dq_op
)
786 return; /* nothing to do */
788 /* We have to be protected against other CPUs */
789 spin_lock(&inode_lock
);
791 for (act_head
= inode_in_use
.next
; act_head
!= &inode_in_use
; act_head
= act_head
->next
) {
792 inode
= list_entry(act_head
, struct inode
, i_list
);
793 if (inode
->i_sb
!= sb
|| !IS_QUOTAINIT(inode
))
795 remove_inode_dquot_ref(inode
, type
, &tofree_head
);
797 for (act_head
= inode_unused
.next
; act_head
!= &inode_unused
; act_head
= act_head
->next
) {
798 inode
= list_entry(act_head
, struct inode
, i_list
);
799 if (inode
->i_sb
!= sb
|| !IS_QUOTAINIT(inode
))
801 remove_inode_dquot_ref(inode
, type
, &tofree_head
);
803 for (act_head
= sb
->s_dirty
.next
; act_head
!= &sb
->s_dirty
; act_head
= act_head
->next
) {
804 inode
= list_entry(act_head
, struct inode
, i_list
);
805 if (!IS_QUOTAINIT(inode
))
807 remove_inode_dquot_ref(inode
, type
, &tofree_head
);
809 spin_unlock(&inode_lock
);
811 put_dquot_list(&tofree_head
);