4 * (C) 1997 Linus Torvalds
8 #include <linux/string.h>
10 #include <linux/dcache.h>
11 #include <linux/init.h>
12 #include <linux/quotaops.h>
15 * New inode.c implementation.
17 * This implementation has the basic premise of trying
18 * to be extremely low-overhead and SMP-safe, yet be
19 * simple enough to be "obviously correct".
24 #define INODE_PARANOIA 1
25 /* #define INODE_DEBUG 1 */
28 * Inode lookup is no longer as critical as it used to be:
29 * most of the lookups are going to be through the dcache.
32 #define HASH_SIZE (1UL << HASH_BITS)
33 #define HASH_MASK (HASH_SIZE-1)
36 * Each inode can be on two separate lists. One is
37 * the hash list of the inode, used for lookups. The
38 * other linked list is the "type" list:
39 * "in_use" - valid inode, hashed if i_nlink > 0
40 * "dirty" - valid inode, hashed if i_nlink > 0, dirty.
41 * "unused" - ready to be re-used. Not hashed.
43 * A "dirty" list is maintained for each super block,
44 * allowing for low-overhead inode sync() operations.
47 static LIST_HEAD(inode_in_use
);
48 static LIST_HEAD(inode_unused
);
49 static struct list_head inode_hashtable
[HASH_SIZE
];
52 * A simple spinlock to protect the list manipulations.
54 * NOTE! You also have to own the lock if you change
55 * the i_state of an inode while it is in use..
57 spinlock_t inode_lock
= SPIN_LOCK_UNLOCKED
;
60 * Statistics gathering..
66 } inodes_stat
= {0, 0,};
71 * Put the inode on the super block's dirty list.
73 * CAREFUL! We mark it dirty unconditionally, but
74 * move it onto the dirty list only if it is hashed.
75 * If it was not hashed, it will never be added to
76 * the dirty list even if it is later hashed, as it
77 * will have been marked dirty already.
79 * In short, make sure you hash any inodes _before_
80 * you start marking them dirty..
82 void __mark_inode_dirty(struct inode
*inode
)
84 struct super_block
* sb
= inode
->i_sb
;
87 spin_lock(&inode_lock
);
88 if (!(inode
->i_state
& I_DIRTY
)) {
89 inode
->i_state
|= I_DIRTY
;
90 /* Only add valid (ie hashed) inodes to the dirty list */
91 if (!list_empty(&inode
->i_hash
)) {
92 list_del(&inode
->i_list
);
93 list_add(&inode
->i_list
, &sb
->s_dirty
);
96 spin_unlock(&inode_lock
);
100 static void __wait_on_inode(struct inode
* inode
)
102 DECLARE_WAITQUEUE(wait
, current
);
104 add_wait_queue(&inode
->i_wait
, &wait
);
106 current
->state
= TASK_UNINTERRUPTIBLE
;
107 if (inode
->i_state
& I_LOCK
) {
111 remove_wait_queue(&inode
->i_wait
, &wait
);
112 current
->state
= TASK_RUNNING
;
115 static inline void wait_on_inode(struct inode
*inode
)
117 if (inode
->i_state
& I_LOCK
)
118 __wait_on_inode(inode
);
122 * These are initializations that only need to be done
123 * once, because the fields are idempotent across use
126 static inline void init_once(struct inode
* inode
)
128 memset(inode
, 0, sizeof(*inode
));
129 init_waitqueue_head(&inode
->i_wait
);
130 INIT_LIST_HEAD(&inode
->i_hash
);
131 INIT_LIST_HEAD(&inode
->i_dentry
);
132 sema_init(&inode
->i_sem
, 1);
133 sema_init(&inode
->i_atomic_write
, 1);
136 static inline void write_inode(struct inode
*inode
)
138 if (inode
->i_sb
&& inode
->i_sb
->s_op
&& inode
->i_sb
->s_op
->write_inode
)
139 inode
->i_sb
->s_op
->write_inode(inode
);
142 static inline void sync_one(struct inode
*inode
)
144 if (inode
->i_state
& I_LOCK
) {
145 spin_unlock(&inode_lock
);
146 __wait_on_inode(inode
);
147 spin_lock(&inode_lock
);
149 list_del(&inode
->i_list
);
150 list_add(&inode
->i_list
, &inode_in_use
);
151 /* Set I_LOCK, reset I_DIRTY */
152 inode
->i_state
^= I_DIRTY
| I_LOCK
;
153 spin_unlock(&inode_lock
);
157 spin_lock(&inode_lock
);
158 inode
->i_state
&= ~I_LOCK
;
159 wake_up(&inode
->i_wait
);
163 static inline void sync_list(struct list_head
*head
)
165 struct list_head
* tmp
;
167 while ((tmp
= head
->prev
) != head
)
168 sync_one(list_entry(tmp
, struct inode
, i_list
));
172 * "sync_inodes()" goes through the super block's dirty list,
173 * writes them out, and puts them back on the normal list.
175 void sync_inodes(kdev_t dev
)
177 struct super_block
* sb
= sb_entry(super_blocks
.next
);
180 * Search the super_blocks array for the device(s) to sync.
182 spin_lock(&inode_lock
);
183 for (; sb
!= sb_entry(&super_blocks
); sb
= sb_entry(sb
->s_list
.next
)) {
186 if (dev
&& sb
->s_dev
!= dev
)
189 sync_list(&sb
->s_dirty
);
194 spin_unlock(&inode_lock
);
198 * Called with the spinlock already held..
200 static void sync_all_inodes(void)
202 struct super_block
* sb
= sb_entry(super_blocks
.next
);
203 for (; sb
!= sb_entry(&super_blocks
); sb
= sb_entry(sb
->s_list
.next
)) {
206 sync_list(&sb
->s_dirty
);
213 void write_inode_now(struct inode
*inode
)
215 struct super_block
* sb
= inode
->i_sb
;
218 spin_lock(&inode_lock
);
219 while (inode
->i_state
& I_DIRTY
)
221 spin_unlock(&inode_lock
);
224 printk("write_inode_now: no super block\n");
228 * This is called by the filesystem to tell us
229 * that the inode is no longer useful. We just
230 * terminate it with extreme prejudice.
232 void clear_inode(struct inode
*inode
)
234 if (inode
->i_nrpages
)
235 truncate_inode_pages(inode
, 0);
236 wait_on_inode(inode
);
237 if (IS_QUOTAINIT(inode
))
239 if (inode
->i_sb
&& inode
->i_sb
->s_op
&& inode
->i_sb
->s_op
->clear_inode
)
240 inode
->i_sb
->s_op
->clear_inode(inode
);
246 * Dispose-list gets a local list, so it doesn't need to
247 * worry about list corruption. It releases the inode lock
248 * while clearing the inodes.
250 static void dispose_list(struct list_head
* head
)
252 struct list_head
*next
;
255 spin_unlock(&inode_lock
);
258 struct list_head
* tmp
= next
;
259 struct inode
* inode
;
264 inode
= list_entry(tmp
, struct inode
, i_list
);
269 /* Add them all to the unused list in one fell swoop */
270 spin_lock(&inode_lock
);
271 list_splice(head
, &inode_unused
);
272 inodes_stat
.nr_free_inodes
+= count
;
276 * Invalidate all inodes for a device.
278 static int invalidate_list(struct list_head
*head
, struct super_block
* sb
, struct list_head
* dispose
)
280 struct list_head
*next
;
285 struct list_head
* tmp
= next
;
286 struct inode
* inode
;
291 inode
= list_entry(tmp
, struct inode
, i_list
);
292 if (inode
->i_sb
!= sb
)
294 if (!inode
->i_count
) {
295 list_del(&inode
->i_hash
);
296 INIT_LIST_HEAD(&inode
->i_hash
);
297 list_del(&inode
->i_list
);
298 list_add(&inode
->i_list
, dispose
);
299 inode
->i_state
|= I_FREEING
;
308 * This is a two-stage process. First we collect all
309 * offending inodes onto the throw-away list, and in
310 * the second stage we actually dispose of them. This
311 * is because we don't want to sleep while messing
312 * with the global lists..
314 int invalidate_inodes(struct super_block
* sb
)
317 LIST_HEAD(throw_away
);
319 spin_lock(&inode_lock
);
320 busy
= invalidate_list(&inode_in_use
, sb
, &throw_away
);
321 busy
|= invalidate_list(&sb
->s_dirty
, sb
, &throw_away
);
322 dispose_list(&throw_away
);
323 spin_unlock(&inode_lock
);
329 * This is called with the inode lock held. It searches
330 * the in-use for freeable inodes, which are moved to a
331 * temporary list and then placed on the unused list by
334 * We don't expect to have to call this very often.
336 * N.B. The spinlock is released during the call to
339 #define CAN_UNUSE(inode) \
340 (((inode)->i_count | (inode)->i_state) == 0)
341 #define INODE(entry) (list_entry(entry, struct inode, i_list))
343 static int free_inodes(void)
345 struct list_head list
, *entry
, *freeable
= &list
;
348 INIT_LIST_HEAD(freeable
);
349 entry
= inode_in_use
.next
;
350 while (entry
!= &inode_in_use
) {
351 struct list_head
*tmp
= entry
;
354 if (!CAN_UNUSE(INODE(tmp
)))
357 list_del(&INODE(tmp
)->i_hash
);
358 INIT_LIST_HEAD(&INODE(tmp
)->i_hash
);
359 list_add(tmp
, freeable
);
360 list_entry(tmp
, struct inode
, i_list
)->i_state
= I_FREEING
;
365 dispose_list(freeable
);
371 * Searches the inodes list for freeable inodes,
372 * shrinking the dcache before (and possible after,
375 static void try_to_free_inodes(int goal
)
378 * First stry to just get rid of unused inodes.
380 * If we can't reach our goal that way, we'll have
381 * to try to shrink the dcache and sync existing
385 goal
-= inodes_stat
.nr_free_inodes
;
387 spin_unlock(&inode_lock
);
388 select_dcache(goal
, 0);
390 spin_lock(&inode_lock
);
397 * This is the externally visible routine for
398 * inode memory management.
400 void free_inode_memory(int goal
)
402 spin_lock(&inode_lock
);
404 spin_unlock(&inode_lock
);
409 * This is called with the spinlock held, but releases
410 * the lock when freeing or allocating inodes.
411 * Look out! This returns with the inode lock held if
414 * We do inode allocations two pages at a time to reduce
417 #define INODE_PAGE_ORDER 1
418 #define INODE_ALLOCATION_SIZE (PAGE_SIZE << INODE_PAGE_ORDER)
419 #define INODES_PER_ALLOCATION (INODE_ALLOCATION_SIZE/sizeof(struct inode))
421 static struct inode
* grow_inodes(void)
423 struct inode
* inode
;
426 * Check whether to restock the unused list.
428 if (inodes_stat
.nr_inodes
> max_inodes
) {
429 struct list_head
*tmp
;
430 try_to_free_inodes(inodes_stat
.nr_inodes
>> 2);
431 tmp
= inode_unused
.next
;
432 if (tmp
!= &inode_unused
) {
433 inodes_stat
.nr_free_inodes
--;
435 inode
= list_entry(tmp
, struct inode
, i_list
);
440 spin_unlock(&inode_lock
);
441 inode
= (struct inode
*)__get_free_pages(GFP_KERNEL
,INODE_PAGE_ORDER
);
446 size
= INODE_ALLOCATION_SIZE
- 2*sizeof(struct inode
);
448 spin_lock(&inode_lock
);
452 list_add(&tmp
->i_list
, &inode_unused
);
453 size
-= sizeof(struct inode
);
457 * Update the inode statistics
459 inodes_stat
.nr_inodes
+= INODES_PER_ALLOCATION
;
460 inodes_stat
.nr_free_inodes
+= INODES_PER_ALLOCATION
- 1;
465 * If the allocation failed, do an extensive pruning of
466 * the dcache and then try again to free some inodes.
468 prune_dcache(inodes_stat
.nr_inodes
>> 2);
470 spin_lock(&inode_lock
);
473 struct list_head
*tmp
= inode_unused
.next
;
474 if (tmp
!= &inode_unused
) {
475 inodes_stat
.nr_free_inodes
--;
477 inode
= list_entry(tmp
, struct inode
, i_list
);
481 spin_unlock(&inode_lock
);
483 printk("grow_inodes: allocation failed\n");
488 * Called with the inode lock held.
490 static struct inode
* find_inode(struct super_block
* sb
, unsigned long ino
, struct list_head
*head
)
492 struct list_head
*tmp
;
493 struct inode
* inode
;
501 inode
= list_entry(tmp
, struct inode
, i_hash
);
502 if (inode
->i_sb
!= sb
)
504 if (inode
->i_ino
!= ino
)
513 * This just initializes the inode fields
514 * to known values before returning the inode..
516 * i_sb, i_ino, i_count, i_state and the lists have
517 * been initialized elsewhere..
519 void clean_inode(struct inode
*inode
)
521 memset(&inode
->u
, 0, sizeof(inode
->u
));
525 inode
->i_writecount
= 0;
527 inode
->i_generation
= 0;
528 memset(&inode
->i_dquot
, 0, sizeof(inode
->i_dquot
));
529 sema_init(&inode
->i_sem
, 1);
530 inode
->i_pipe
= NULL
;
534 * This is called by things like the networking layer
535 * etc that want to get an inode without any inode
536 * number, or filesystems that allocate new inodes with
537 * no pre-existing information.
539 struct inode
* get_empty_inode(void)
541 static unsigned long last_ino
= 0;
542 struct inode
* inode
;
543 struct list_head
* tmp
;
545 spin_lock(&inode_lock
);
546 tmp
= inode_unused
.next
;
547 if (tmp
!= &inode_unused
) {
549 inodes_stat
.nr_free_inodes
--;
550 inode
= list_entry(tmp
, struct inode
, i_list
);
552 list_add(&inode
->i_list
, &inode_in_use
);
555 inode
->i_ino
= ++last_ino
;
559 spin_unlock(&inode_lock
);
565 * Warning: if this succeeded, we will now
566 * return with the inode lock.
568 inode
= grow_inodes();
576 * This is called with the inode lock held.. Be careful.
578 * We no longer cache the sb_flags in i_flags - see fs.h
579 * -- rmk@arm.uk.linux.org
581 static struct inode
* get_new_inode(struct super_block
*sb
, unsigned long ino
, struct list_head
*head
)
583 struct inode
* inode
;
584 struct list_head
* tmp
= inode_unused
.next
;
586 if (tmp
!= &inode_unused
) {
588 inodes_stat
.nr_free_inodes
--;
589 inode
= list_entry(tmp
, struct inode
, i_list
);
591 list_add(&inode
->i_list
, &inode_in_use
);
592 list_add(&inode
->i_hash
, head
);
594 inode
->i_dev
= sb
->s_dev
;
598 inode
->i_state
= I_LOCK
;
599 spin_unlock(&inode_lock
);
602 sb
->s_op
->read_inode(inode
);
605 * This is special! We do not need the spinlock
606 * when clearing I_LOCK, because we're guaranteed
607 * that nobody else tries to do anything about the
608 * state of the inode when it is locked, as we
609 * just created it (so there can be no old holders
610 * that haven't tested I_LOCK).
612 inode
->i_state
&= ~I_LOCK
;
613 wake_up(&inode
->i_wait
);
619 * We need to expand. Note that "grow_inodes()" will
620 * release the spinlock, but will return with the lock
621 * held again if the allocation succeeded.
623 inode
= grow_inodes();
625 /* We released the lock, so.. */
626 struct inode
* old
= find_inode(sb
, ino
, head
);
629 list_add(&inode
->i_list
, &inode_unused
);
630 inodes_stat
.nr_free_inodes
++;
631 spin_unlock(&inode_lock
);
638 static inline unsigned long hash(struct super_block
*sb
, unsigned long i_ino
)
640 unsigned long tmp
= i_ino
| (unsigned long) sb
;
641 tmp
= tmp
+ (tmp
>> HASH_BITS
) + (tmp
>> HASH_BITS
*2);
642 return tmp
& HASH_MASK
;
645 /* Yeah, I know about quadratic hash. Maybe, later. */
646 ino_t
iunique(struct super_block
*sb
, ino_t max_reserved
)
648 static ino_t counter
= 0;
650 struct list_head
* head
;
652 spin_lock(&inode_lock
);
654 if (counter
> max_reserved
) {
655 head
= inode_hashtable
+ hash(sb
,counter
);
656 inode
= find_inode(sb
, res
= counter
++, head
);
658 spin_unlock(&inode_lock
);
661 inode
->i_count
--; /* compensate find_inode() */
663 counter
= max_reserved
+ 1;
669 struct inode
*igrab(struct inode
*inode
)
671 spin_lock(&inode_lock
);
672 if (inode
->i_state
& I_FREEING
)
676 spin_unlock(&inode_lock
);
678 wait_on_inode(inode
);
682 struct inode
*iget(struct super_block
*sb
, unsigned long ino
)
684 struct list_head
* head
= inode_hashtable
+ hash(sb
,ino
);
685 struct inode
* inode
;
687 spin_lock(&inode_lock
);
688 inode
= find_inode(sb
, ino
, head
);
690 spin_unlock(&inode_lock
);
691 wait_on_inode(inode
);
695 * get_new_inode() will do the right thing, releasing
696 * the inode lock and re-trying the search in case it
697 * had to block at any point.
699 return get_new_inode(sb
, ino
, head
);
702 void insert_inode_hash(struct inode
*inode
)
704 struct list_head
*head
= inode_hashtable
+ hash(inode
->i_sb
, inode
->i_ino
);
705 spin_lock(&inode_lock
);
706 list_add(&inode
->i_hash
, head
);
707 spin_unlock(&inode_lock
);
710 void remove_inode_hash(struct inode
*inode
)
712 spin_lock(&inode_lock
);
713 list_del(&inode
->i_hash
);
714 INIT_LIST_HEAD(&inode
->i_hash
);
715 spin_unlock(&inode_lock
);
718 void iput(struct inode
*inode
)
721 struct super_operations
*op
= NULL
;
723 if (inode
->i_sb
&& inode
->i_sb
->s_op
)
724 op
= inode
->i_sb
->s_op
;
725 if (op
&& op
->put_inode
)
726 op
->put_inode(inode
);
728 spin_lock(&inode_lock
);
729 if (!--inode
->i_count
) {
730 if (!inode
->i_nlink
) {
731 list_del(&inode
->i_hash
);
732 INIT_LIST_HEAD(&inode
->i_hash
);
733 list_del(&inode
->i_list
);
734 INIT_LIST_HEAD(&inode
->i_list
);
735 inode
->i_state
|=I_FREEING
;
736 if (op
&& op
->delete_inode
) {
737 void (*delete)(struct inode
*) = op
->delete_inode
;
738 spin_unlock(&inode_lock
);
740 spin_lock(&inode_lock
);
743 if (list_empty(&inode
->i_hash
)) {
744 list_del(&inode
->i_list
);
745 INIT_LIST_HEAD(&inode
->i_list
);
746 inode
->i_state
|=I_FREEING
;
747 spin_unlock(&inode_lock
);
749 spin_lock(&inode_lock
);
750 list_add(&inode
->i_list
, &inode_unused
);
751 inodes_stat
.nr_free_inodes
++;
753 else if (!(inode
->i_state
& I_DIRTY
)) {
754 list_del(&inode
->i_list
);
755 list_add(&inode
->i_list
, &inode_in_use
);
757 #ifdef INODE_PARANOIA
759 printk(KERN_ERR
"iput: inode %s/%ld still has locks!\n",
760 kdevname(inode
->i_dev
), inode
->i_ino
);
761 if (!list_empty(&inode
->i_dentry
))
762 printk(KERN_ERR
"iput: device %s inode %ld still has aliases!\n",
763 kdevname(inode
->i_dev
), inode
->i_ino
);
765 printk(KERN_ERR
"iput: device %s inode %ld count changed, count=%d\n",
766 kdevname(inode
->i_dev
), inode
->i_ino
, inode
->i_count
);
767 if (atomic_read(&inode
->i_sem
.count
) != 1)
768 printk(KERN_ERR
"iput: Aieee, semaphore in use inode %s/%ld, count=%d\n",
769 kdevname(inode
->i_dev
), inode
->i_ino
, atomic_read(&inode
->i_sem
.count
));
770 if (atomic_read(&inode
->i_atomic_write
.count
) != 1)
771 printk(KERN_ERR
"iput: Aieee, atomic write semaphore in use inode %s/%ld, count=%d\n",
772 kdevname(inode
->i_dev
), inode
->i_ino
, atomic_read(&inode
->i_sem
.count
));
775 if (inode
->i_count
> (1<<31)) {
776 printk(KERN_ERR
"iput: inode %s/%ld count wrapped\n",
777 kdevname(inode
->i_dev
), inode
->i_ino
);
779 spin_unlock(&inode_lock
);
783 int bmap(struct inode
* inode
, int block
)
785 if (inode
->i_op
&& inode
->i_op
->bmap
)
786 return inode
->i_op
->bmap(inode
, block
);
791 * Initialize the hash tables and default
792 * value for max inodes
794 #define MAX_INODE (16384)
796 void __init
inode_init(void)
799 struct list_head
*head
= inode_hashtable
;
803 INIT_LIST_HEAD(head
);
808 /* Initial guess at reasonable inode number */
809 max
= num_physpages
>> 1;
815 /* This belongs in file_table.c, not here... */
816 int fs_may_remount_ro(struct super_block
*sb
)
820 /* Check that no files are currently opened for writing. */
821 for (file
= inuse_filps
; file
; file
= file
->f_next
) {
825 inode
= file
->f_dentry
->d_inode
;
826 if (!inode
|| inode
->i_sb
!= sb
)
829 /* File with pending delete? */
830 if (inode
->i_nlink
== 0)
834 if (S_ISREG(inode
->i_mode
) && (file
->f_mode
& FMODE_WRITE
))
837 return 1; /* Tis' cool bro. */
840 void update_atime (struct inode
*inode
)
842 if ( IS_NOATIME (inode
) ) return;
843 if ( IS_NODIRATIME (inode
) && S_ISDIR (inode
->i_mode
) ) return;
844 if ( IS_RDONLY (inode
) ) return;
845 inode
->i_atime
= CURRENT_TIME
;
846 mark_inode_dirty (inode
);
847 } /* End Function update_atime */