4 * (C) 1997 Linus Torvalds
8 #include <linux/string.h>
10 #include <linux/dcache.h>
11 #include <linux/init.h>
12 #include <linux/quotaops.h>
13 #include <linux/slab.h>
16 * New inode.c implementation.
18 * This implementation has the basic premise of trying
19 * to be extremely low-overhead and SMP-safe, yet be
20 * simple enough to be "obviously correct".
25 /* inode dynamic allocation 1999, Andrea Arcangeli <andrea@suse.de> */
27 #define INODE_PARANOIA 1
28 /* #define INODE_DEBUG 1 */
31 * Inode lookup is no longer as critical as it used to be:
32 * most of the lookups are going to be through the dcache.
35 #define HASH_SIZE (1UL << HASH_BITS)
36 #define HASH_MASK (HASH_SIZE-1)
39 * Each inode can be on two separate lists. One is
40 * the hash list of the inode, used for lookups. The
41 * other linked list is the "type" list:
42 * "in_use" - valid inode, i_count > 0, i_nlink > 0
43 * "dirty" - as "in_use" but also dirty
44 * "unused" - valid inode, i_count = 0
46 * A "dirty" list is maintained for each super block,
47 * allowing for low-overhead inode sync() operations.
50 static LIST_HEAD(inode_in_use
);
51 static LIST_HEAD(inode_unused
);
52 static struct list_head inode_hashtable
[HASH_SIZE
];
55 * A simple spinlock to protect the list manipulations.
57 * NOTE! You also have to own the lock if you change
58 * the i_state of an inode while it is in use..
60 spinlock_t inode_lock
= SPIN_LOCK_UNLOCKED
;
63 * Statistics gathering..
69 } inodes_stat
= {0, 0,};
71 static kmem_cache_t
* inode_cachep
;
73 #define alloc_inode() \
74 ((struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL))
75 #define destroy_inode(inode) kmem_cache_free(inode_cachep, (inode))
78 * These are initializations that only need to be done
79 * once, because the fields are idempotent across use
80 * of the inode, so let the slab aware of that.
82 static void init_once(void * foo
, kmem_cache_t
* cachep
, unsigned long flags
)
84 struct inode
* inode
= (struct inode
*) foo
;
86 if ((flags
& (SLAB_CTOR_VERIFY
|SLAB_CTOR_CONSTRUCTOR
)) ==
87 SLAB_CTOR_CONSTRUCTOR
)
89 memset(inode
, 0, sizeof(*inode
));
90 init_waitqueue_head(&inode
->i_wait
);
91 INIT_LIST_HEAD(&inode
->i_hash
);
92 INIT_LIST_HEAD(&inode
->i_data
.pages
);
93 INIT_LIST_HEAD(&inode
->i_dentry
);
94 sema_init(&inode
->i_sem
, 1);
95 spin_lock_init(&inode
->i_shared_lock
);
100 * Put the inode on the super block's dirty list.
102 * CAREFUL! We mark it dirty unconditionally, but
103 * move it onto the dirty list only if it is hashed.
104 * If it was not hashed, it will never be added to
105 * the dirty list even if it is later hashed, as it
106 * will have been marked dirty already.
108 * In short, make sure you hash any inodes _before_
109 * you start marking them dirty..
111 void __mark_inode_dirty(struct inode
*inode
)
113 struct super_block
* sb
= inode
->i_sb
;
116 spin_lock(&inode_lock
);
117 if (!(inode
->i_state
& I_DIRTY
)) {
118 inode
->i_state
|= I_DIRTY
;
119 /* Only add valid (ie hashed) inodes to the dirty list */
120 if (!list_empty(&inode
->i_hash
)) {
121 list_del(&inode
->i_list
);
122 list_add(&inode
->i_list
, &sb
->s_dirty
);
125 spin_unlock(&inode_lock
);
129 static void __wait_on_inode(struct inode
* inode
)
131 DECLARE_WAITQUEUE(wait
, current
);
133 add_wait_queue(&inode
->i_wait
, &wait
);
135 set_current_state(TASK_UNINTERRUPTIBLE
);
136 if (inode
->i_state
& I_LOCK
) {
140 remove_wait_queue(&inode
->i_wait
, &wait
);
141 current
->state
= TASK_RUNNING
;
144 static inline void wait_on_inode(struct inode
*inode
)
146 if (inode
->i_state
& I_LOCK
)
147 __wait_on_inode(inode
);
151 static inline void write_inode(struct inode
*inode
)
153 if (inode
->i_sb
&& inode
->i_sb
->s_op
&& inode
->i_sb
->s_op
->write_inode
)
154 inode
->i_sb
->s_op
->write_inode(inode
);
157 static inline void sync_one(struct inode
*inode
)
159 if (inode
->i_state
& I_LOCK
) {
160 spin_unlock(&inode_lock
);
161 __wait_on_inode(inode
);
162 spin_lock(&inode_lock
);
164 list_del(&inode
->i_list
);
165 list_add(&inode
->i_list
,
166 inode
->i_count
? &inode_in_use
: &inode_unused
);
167 /* Set I_LOCK, reset I_DIRTY */
168 inode
->i_state
^= I_DIRTY
| I_LOCK
;
169 spin_unlock(&inode_lock
);
173 spin_lock(&inode_lock
);
174 inode
->i_state
&= ~I_LOCK
;
175 wake_up(&inode
->i_wait
);
179 static inline void sync_list(struct list_head
*head
)
181 struct list_head
* tmp
;
183 while ((tmp
= head
->prev
) != head
)
184 sync_one(list_entry(tmp
, struct inode
, i_list
));
188 * "sync_inodes()" goes through the super block's dirty list,
189 * writes them out, and puts them back on the normal list.
191 void sync_inodes(kdev_t dev
)
193 struct super_block
* sb
= sb_entry(super_blocks
.next
);
196 * Search the super_blocks array for the device(s) to sync.
198 spin_lock(&inode_lock
);
199 for (; sb
!= sb_entry(&super_blocks
); sb
= sb_entry(sb
->s_list
.next
)) {
202 if (dev
&& sb
->s_dev
!= dev
)
205 sync_list(&sb
->s_dirty
);
210 spin_unlock(&inode_lock
);
214 * Called with the spinlock already held..
216 static void sync_all_inodes(void)
218 struct super_block
* sb
= sb_entry(super_blocks
.next
);
219 for (; sb
!= sb_entry(&super_blocks
); sb
= sb_entry(sb
->s_list
.next
)) {
222 sync_list(&sb
->s_dirty
);
229 void write_inode_now(struct inode
*inode
)
231 struct super_block
* sb
= inode
->i_sb
;
234 spin_lock(&inode_lock
);
235 while (inode
->i_state
& I_DIRTY
)
237 spin_unlock(&inode_lock
);
240 printk("write_inode_now: no super block\n");
244 * This is called by the filesystem to tell us
245 * that the inode is no longer useful. We just
246 * terminate it with extreme prejudice.
248 void clear_inode(struct inode
*inode
)
250 if (inode
->i_data
.nrpages
)
252 if (!(inode
->i_state
& I_FREEING
))
254 wait_on_inode(inode
);
255 if (IS_QUOTAINIT(inode
))
257 if (inode
->i_sb
&& inode
->i_sb
->s_op
&& inode
->i_sb
->s_op
->clear_inode
)
258 inode
->i_sb
->s_op
->clear_inode(inode
);
264 * Dispose-list gets a local list with local inodes in it, so it doesn't
265 * need to worry about list corruption and SMP locks.
267 static void dispose_list(struct list_head
* head
)
269 struct list_head
* inode_entry
;
270 struct inode
* inode
;
272 while ((inode_entry
= head
->next
) != head
)
274 list_del(inode_entry
);
276 inode
= list_entry(inode_entry
, struct inode
, i_list
);
277 if (inode
->i_data
.nrpages
)
278 truncate_inode_pages(inode
, 0);
280 destroy_inode(inode
);
285 * Invalidate all inodes for a device.
287 static int invalidate_list(struct list_head
*head
, struct super_block
* sb
, struct list_head
* dispose
)
289 struct list_head
*next
;
290 int busy
= 0, count
= 0;
294 struct list_head
* tmp
= next
;
295 struct inode
* inode
;
300 inode
= list_entry(tmp
, struct inode
, i_list
);
301 if (inode
->i_sb
!= sb
)
303 if (!inode
->i_count
) {
304 list_del(&inode
->i_hash
);
305 INIT_LIST_HEAD(&inode
->i_hash
);
306 list_del(&inode
->i_list
);
307 list_add(&inode
->i_list
, dispose
);
308 inode
->i_state
|= I_FREEING
;
314 /* only unused inodes may be cached with i_count zero */
315 inodes_stat
.nr_unused
-= count
;
320 * This is a two-stage process. First we collect all
321 * offending inodes onto the throw-away list, and in
322 * the second stage we actually dispose of them. This
323 * is because we don't want to sleep while messing
324 * with the global lists..
326 int invalidate_inodes(struct super_block
* sb
)
329 LIST_HEAD(throw_away
);
331 spin_lock(&inode_lock
);
332 busy
= invalidate_list(&inode_in_use
, sb
, &throw_away
);
333 busy
|= invalidate_list(&inode_unused
, sb
, &throw_away
);
334 busy
|= invalidate_list(&sb
->s_dirty
, sb
, &throw_away
);
335 spin_unlock(&inode_lock
);
337 dispose_list(&throw_away
);
343 * This is called with the inode lock held. It searches
344 * the in-use for freeable inodes, which are moved to a
345 * temporary list and then placed on the unused list by
348 * We don't expect to have to call this very often.
350 * N.B. The spinlock is released during the call to
353 #define CAN_UNUSE(inode) \
354 (((inode)->i_state | (inode)->i_data.nrpages) == 0)
355 #define INODE(entry) (list_entry(entry, struct inode, i_list))
357 void prune_icache(int goal
)
360 struct list_head
*entry
, *freeable
= &list
;
362 struct inode
* inode
;
364 spin_lock(&inode_lock
);
365 /* go simple and safe syncing everything before starting */
368 entry
= inode_unused
.prev
;
369 while (entry
!= &inode_unused
)
371 struct list_head
*tmp
= entry
;
375 if (!CAN_UNUSE(inode
))
380 list_del(&inode
->i_hash
);
381 INIT_LIST_HEAD(&inode
->i_hash
);
382 list_add(tmp
, freeable
);
383 inode
->i_state
|= I_FREEING
;
388 inodes_stat
.nr_unused
-= count
;
389 spin_unlock(&inode_lock
);
391 dispose_list(freeable
);
394 int shrink_icache_memory(int priority
, int gfp_mask
)
396 if (gfp_mask
& __GFP_IO
)
401 count
= inodes_stat
.nr_unused
/ priority
;
403 /* FIXME: kmem_cache_shrink here should tell us
404 the number of pages freed, and it should
405 work in a __GFP_DMA/__GFP_HIGHMEM behaviour
406 to free only the interesting pages in
407 function of the needs of the current allocation. */
408 kmem_cache_shrink(inode_cachep
);
414 static inline void __iget(struct inode
* inode
)
416 if (!inode
->i_count
++)
418 if (!(inode
->i_state
& I_DIRTY
))
420 list_del(&inode
->i_list
);
421 list_add(&inode
->i_list
, &inode_in_use
);
423 inodes_stat
.nr_unused
--;
428 * Called with the inode lock held.
429 * NOTE: we are not increasing the inode-refcount, you must call __iget()
430 * by hand after calling find_inode now! This simplify iunique and won't
431 * add any additional branch in the common code.
433 static struct inode
* find_inode(struct super_block
* sb
, unsigned long ino
, struct list_head
*head
, find_inode_t find_actor
, void *opaque
)
435 struct list_head
*tmp
;
436 struct inode
* inode
;
444 inode
= list_entry(tmp
, struct inode
, i_hash
);
445 if (inode
->i_sb
!= sb
)
447 if (inode
->i_ino
!= ino
)
449 if (find_actor
&& !find_actor(inode
, ino
, opaque
))
457 * This just initializes the inode fields
458 * to known values before returning the inode..
460 * i_sb, i_ino, i_count, i_state and the lists have
461 * been initialized elsewhere..
463 static void clean_inode(struct inode
*inode
)
465 memset(&inode
->u
, 0, sizeof(inode
->u
));
469 atomic_set(&inode
->i_writecount
, 0);
471 inode
->i_generation
= 0;
472 memset(&inode
->i_dquot
, 0, sizeof(inode
->i_dquot
));
473 inode
->i_pipe
= NULL
;
477 * This is called by things like the networking layer
478 * etc that want to get an inode without any inode
479 * number, or filesystems that allocate new inodes with
480 * no pre-existing information.
482 struct inode
* get_empty_inode(void)
484 static unsigned long last_ino
= 0;
485 struct inode
* inode
;
487 inode
= alloc_inode();
490 spin_lock(&inode_lock
);
491 list_add(&inode
->i_list
, &inode_in_use
);
494 inode
->i_ino
= ++last_ino
;
498 spin_unlock(&inode_lock
);
505 * This is called without the inode lock held.. Be careful.
507 * We no longer cache the sb_flags in i_flags - see fs.h
508 * -- rmk@arm.uk.linux.org
510 static struct inode
* get_new_inode(struct super_block
*sb
, unsigned long ino
, struct list_head
*head
, find_inode_t find_actor
, void *opaque
)
512 struct inode
* inode
;
514 inode
= alloc_inode();
518 spin_lock(&inode_lock
);
519 /* We released the lock, so.. */
520 old
= find_inode(sb
, ino
, head
, find_actor
, opaque
);
522 list_add(&inode
->i_list
, &inode_in_use
);
523 list_add(&inode
->i_hash
, head
);
525 inode
->i_dev
= sb
->s_dev
;
529 inode
->i_state
= I_LOCK
;
530 spin_unlock(&inode_lock
);
533 sb
->s_op
->read_inode(inode
);
536 * This is special! We do not need the spinlock
537 * when clearing I_LOCK, because we're guaranteed
538 * that nobody else tries to do anything about the
539 * state of the inode when it is locked, as we
540 * just created it (so there can be no old holders
541 * that haven't tested I_LOCK).
543 inode
->i_state
&= ~I_LOCK
;
544 wake_up(&inode
->i_wait
);
550 * Uhhuh, somebody else created the same inode under
551 * us. Use the old inode instead of the one we just
555 spin_unlock(&inode_lock
);
556 destroy_inode(inode
);
558 wait_on_inode(inode
);
563 static inline unsigned long hash(struct super_block
*sb
, unsigned long i_ino
)
565 unsigned long tmp
= i_ino
| (unsigned long) sb
;
566 tmp
= tmp
+ (tmp
>> HASH_BITS
) + (tmp
>> HASH_BITS
*2);
567 return tmp
& HASH_MASK
;
570 /* Yeah, I know about quadratic hash. Maybe, later. */
571 ino_t
iunique(struct super_block
*sb
, ino_t max_reserved
)
573 static ino_t counter
= 0;
575 struct list_head
* head
;
577 spin_lock(&inode_lock
);
579 if (counter
> max_reserved
) {
580 head
= inode_hashtable
+ hash(sb
,counter
);
581 inode
= find_inode(sb
, res
= counter
++, head
, NULL
, NULL
);
583 spin_unlock(&inode_lock
);
587 counter
= max_reserved
+ 1;
593 struct inode
*igrab(struct inode
*inode
)
595 spin_lock(&inode_lock
);
596 if (!(inode
->i_state
& I_FREEING
))
600 spin_unlock(&inode_lock
);
602 wait_on_inode(inode
);
606 struct inode
*iget4(struct super_block
*sb
, unsigned long ino
, find_inode_t find_actor
, void *opaque
)
608 struct list_head
* head
= inode_hashtable
+ hash(sb
,ino
);
609 struct inode
* inode
;
611 spin_lock(&inode_lock
);
612 inode
= find_inode(sb
, ino
, head
, find_actor
, opaque
);
615 spin_unlock(&inode_lock
);
616 wait_on_inode(inode
);
619 spin_unlock(&inode_lock
);
622 * get_new_inode() will do the right thing, re-trying the search
623 * in case it had to block at any point.
625 return get_new_inode(sb
, ino
, head
, find_actor
, opaque
);
628 void insert_inode_hash(struct inode
*inode
)
630 struct list_head
*head
= inode_hashtable
+ hash(inode
->i_sb
, inode
->i_ino
);
631 spin_lock(&inode_lock
);
632 list_add(&inode
->i_hash
, head
);
633 spin_unlock(&inode_lock
);
636 void remove_inode_hash(struct inode
*inode
)
638 spin_lock(&inode_lock
);
639 list_del(&inode
->i_hash
);
640 INIT_LIST_HEAD(&inode
->i_hash
);
641 spin_unlock(&inode_lock
);
644 void iput(struct inode
*inode
)
647 struct super_operations
*op
= NULL
;
650 if (inode
->i_sb
&& inode
->i_sb
->s_op
)
651 op
= inode
->i_sb
->s_op
;
652 if (op
&& op
->put_inode
)
653 op
->put_inode(inode
);
655 spin_lock(&inode_lock
);
656 if (!--inode
->i_count
) {
657 if (!inode
->i_nlink
) {
658 list_del(&inode
->i_hash
);
659 INIT_LIST_HEAD(&inode
->i_hash
);
660 list_del(&inode
->i_list
);
661 INIT_LIST_HEAD(&inode
->i_list
);
662 inode
->i_state
|=I_FREEING
;
663 if (op
&& op
->delete_inode
) {
664 void (*delete)(struct inode
*) = op
->delete_inode
;
665 spin_unlock(&inode_lock
);
666 if (inode
->i_data
.nrpages
)
667 truncate_inode_pages(inode
, 0);
669 spin_lock(&inode_lock
);
672 if (list_empty(&inode
->i_hash
)) {
673 list_del(&inode
->i_list
);
674 INIT_LIST_HEAD(&inode
->i_list
);
675 inode
->i_state
|=I_FREEING
;
676 spin_unlock(&inode_lock
);
679 spin_lock(&inode_lock
);
683 if (!(inode
->i_state
& I_DIRTY
)) {
684 list_del(&inode
->i_list
);
685 list_add(&inode
->i_list
,
688 inodes_stat
.nr_unused
++;
690 #ifdef INODE_PARANOIA
692 printk(KERN_ERR
"iput: inode %s/%ld still has locks!\n",
693 kdevname(inode
->i_dev
), inode
->i_ino
);
694 if (!list_empty(&inode
->i_dentry
))
695 printk(KERN_ERR
"iput: device %s inode %ld still has aliases!\n",
696 kdevname(inode
->i_dev
), inode
->i_ino
);
698 printk(KERN_ERR
"iput: device %s inode %ld count changed, count=%d\n",
699 kdevname(inode
->i_dev
), inode
->i_ino
, inode
->i_count
);
700 if (atomic_read(&inode
->i_sem
.count
) != 1)
701 printk(KERN_ERR
"iput: Aieee, semaphore in use inode %s/%ld, count=%d\n",
702 kdevname(inode
->i_dev
), inode
->i_ino
, atomic_read(&inode
->i_sem
.count
));
705 if (inode
->i_count
> (1<<31)) {
706 printk(KERN_ERR
"iput: inode %s/%ld count wrapped\n",
707 kdevname(inode
->i_dev
), inode
->i_ino
);
709 spin_unlock(&inode_lock
);
711 destroy_inode(inode
);
715 int bmap(struct inode
* inode
, int block
)
717 struct buffer_head tmp
;
719 if (inode
->i_op
&& inode
->i_op
->get_block
) {
722 inode
->i_op
->get_block(inode
, block
, &tmp
, 0);
723 return tmp
.b_blocknr
;
729 * Initialize the hash tables.
731 void __init
inode_init(void)
734 struct list_head
*head
= inode_hashtable
;
738 INIT_LIST_HEAD(head
);
743 /* inode slab cache */
744 inode_cachep
= kmem_cache_create("inode_cache", sizeof(struct inode
),
745 0, SLAB_HWCACHE_ALIGN
, init_once
,
748 panic("cannot create inode slab cache");
751 void update_atime (struct inode
*inode
)
753 if ( IS_NOATIME (inode
) ) return;
754 if ( IS_NODIRATIME (inode
) && S_ISDIR (inode
->i_mode
) ) return;
755 if ( IS_RDONLY (inode
) ) return;
756 inode
->i_atime
= CURRENT_TIME
;
757 mark_inode_dirty (inode
);
758 } /* End Function update_atime */