4 * (C) 1997 Linus Torvalds
8 #include <linux/string.h>
12 * New inode.c implementation.
14 * This implementation has the basic premise of trying
15 * to be extremely low-overhead and SMP-safe, yet be
16 * simple enough to be "obviously correct".
22 * Inode lookup is no longer as critical as it used to be:
23 * most of the lookups are going to be through the dcache.
26 #define HASH_SIZE (1UL << HASH_BITS)
27 #define HASH_MASK (HASH_SIZE-1)
30 * Each inode can be on two separate lists. One is
31 * the hash list of the inode, used for lookups. The
32 * other linked list is the "type" list:
33 * "in_use" - valid inode, hashed if i_nlink > 0
34 * "dirty" - valid inode, hashed if i_nlink > 0, dirty.
35 * "unused" - ready to be re-used. Not hashed.
37 * A "dirty" list is maintained for each super block,
38 * allowing for low-overhead inode sync() operations.
41 static LIST_HEAD(inode_in_use
);
42 static LIST_HEAD(inode_unused
);
43 static struct list_head inode_hashtable
[HASH_SIZE
];
46 * A simple spinlock to protect the list manipulations.
48 * NOTE! You also have to own the lock if you change
49 * the i_state of an inode while it is in use..
51 spinlock_t inode_lock
= SPIN_LOCK_UNLOCKED
;
54 * Statistics gathering.. Not actually done yet.
62 int max_inodes
= NR_INODE
;
65 * Put the inode on the super block's dirty list
67 void __mark_inode_dirty(struct inode
*inode
)
69 struct super_block
* sb
= inode
->i_sb
;
72 spin_lock(&inode_lock
);
73 if (!(inode
->i_state
& I_DIRTY
)) {
74 inode
->i_state
|= I_DIRTY
;
75 /* Only add valid (ie hashed) inodes to the dirty list */
76 if (!list_empty(&inode
->i_hash
)) {
77 list_del(&inode
->i_list
);
78 list_add(&inode
->i_list
, &sb
->s_dirty
);
81 spin_unlock(&inode_lock
);
85 static void __wait_on_inode(struct inode
* inode
)
87 struct wait_queue wait
= { current
, NULL
};
89 add_wait_queue(&inode
->i_wait
, &wait
);
91 current
->state
= TASK_UNINTERRUPTIBLE
;
92 if (inode
->i_state
& I_LOCK
) {
96 remove_wait_queue(&inode
->i_wait
, &wait
);
97 current
->state
= TASK_RUNNING
;
100 static inline void wait_on_inode(struct inode
*inode
)
102 if (inode
->i_state
& I_LOCK
)
103 __wait_on_inode(inode
);
107 * These are initializations that only need to be done
108 * once, because the fields are idempotent across use
111 static inline void init_once(struct inode
* inode
)
113 memset(inode
, 0, sizeof(*inode
));
114 init_waitqueue(&inode
->i_wait
);
115 INIT_LIST_HEAD(&inode
->i_hash
);
116 sema_init(&inode
->i_sem
, 1);
121 * Look out! This returns with the inode lock held if
124 static struct inode
* grow_inodes(void)
126 struct inode
* inode
= (struct inode
*)__get_free_page(GFP_KERNEL
);
132 spin_lock(&inode_lock
);
133 size
= PAGE_SIZE
- 2*sizeof(struct inode
);
138 list_add(&tmp
->i_list
, &inode_unused
);
139 size
-= sizeof(struct inode
);
146 static inline void write_inode(struct inode
*inode
)
148 if (inode
->i_sb
&& inode
->i_sb
->s_op
&& inode
->i_sb
->s_op
->write_inode
)
149 inode
->i_sb
->s_op
->write_inode(inode
);
152 static inline void sync_one(struct inode
*inode
)
154 if (inode
->i_state
& I_LOCK
) {
155 spin_unlock(&inode_lock
);
156 __wait_on_inode(inode
);
157 spin_lock(&inode_lock
);
159 list_del(&inode
->i_list
);
160 list_add(&inode
->i_list
, &inode_in_use
);
162 /* Set I_LOCK, reset I_DIRTY */
163 inode
->i_state
^= I_DIRTY
| I_LOCK
;
164 spin_unlock(&inode_lock
);
168 spin_lock(&inode_lock
);
169 inode
->i_state
&= ~I_LOCK
;
170 wake_up(&inode
->i_wait
);
174 static inline void sync_list(struct list_head
*head
)
176 struct list_head
* tmp
;
178 while ((tmp
= head
->prev
) != head
)
179 sync_one(list_entry(tmp
, struct inode
, i_list
));
183 * "sync_inodes()" goes through the super block's dirty list,
184 * writes them out, and puts them back on the normal list.
186 void sync_inodes(kdev_t dev
)
188 struct super_block
* sb
= super_blocks
+ 0;
192 * Search the super_blocks array for the device(s) to sync.
194 spin_lock(&inode_lock
);
195 for (i
= NR_SUPER
; i
-- ; sb
++) {
198 if (dev
&& sb
->s_dev
!= dev
)
201 sync_list(&sb
->s_dirty
);
205 spin_unlock(&inode_lock
);
211 void write_inode_now(struct inode
*inode
)
213 struct super_block
* sb
= inode
->i_sb
;
216 spin_lock(&inode_lock
);
217 if (inode
->i_state
& I_DIRTY
)
219 spin_unlock(&inode_lock
);
222 printk("write_inode_now: no super block\n");
226 * This is called by the filesystem to tell us
227 * that the inode is no longer useful. We just
228 * terminate it with extreme predjudice.
230 void clear_inode(struct inode
*inode
)
232 truncate_inode_pages(inode
, 0);
233 wait_on_inode(inode
);
234 if (IS_WRITABLE(inode
) && inode
->i_sb
&& inode
->i_sb
->dq_op
)
235 inode
->i_sb
->dq_op
->drop(inode
);
241 * Dispose-list gets a local list, so it doesn't need to
242 * worry about list corruption.
244 static void dispose_list(struct list_head
* head
)
246 struct list_head
*next
;
250 struct list_head
* tmp
= next
;
251 struct inode
* inode
;
256 inode
= list_entry(tmp
, struct inode
, i_list
);
257 truncate_inode_pages(inode
, 0);
260 /* Add them all to the unused list in one fell swoop */
261 spin_lock(&inode_lock
);
262 list_splice(head
, &inode_unused
);
263 spin_unlock(&inode_lock
);
267 * Invalidate all inodes for a device.
269 static int invalidate_list(struct list_head
*head
, struct super_block
* sb
, struct list_head
* dispose
)
271 struct list_head
*next
;
276 struct list_head
* tmp
= next
;
277 struct inode
* inode
;
282 inode
= list_entry(tmp
, struct inode
, i_list
);
283 if (inode
->i_sb
!= sb
)
285 if (!inode
->i_count
) {
286 list_del(&inode
->i_hash
);
287 INIT_LIST_HEAD(&inode
->i_hash
);
288 list_del(&inode
->i_list
);
289 list_add(&inode
->i_list
, dispose
);
298 * This is a two-stage process. First we collect all
299 * offending inodes onto the throw-away list, and in
300 * the second stage we actually dispose of them. This
301 * is because we don't want to sleep while messing
302 * with the global lists..
304 int invalidate_inodes(struct super_block
* sb
)
307 LIST_HEAD(throw_away
);
309 spin_lock(&inode_lock
);
310 busy
= invalidate_list(&inode_in_use
, sb
, &throw_away
);
311 busy
|= invalidate_list(&sb
->s_dirty
, sb
, &throw_away
);
312 spin_unlock(&inode_lock
);
314 dispose_list(&throw_away
);
320 * This is called with the inode lock held. It just looks at the last
321 * inode on the in-use list, and if the inode is trivially freeable
322 * we just move it to the unused list.
324 * Otherwise we just move the inode to be the first inode and expect to
325 * get back to the problem later..
327 #define CAN_UNUSE(inode) \
328 (((inode)->i_count == 0) && \
329 ((inode)->i_nrpages == 0) && \
332 static void try_to_free_inodes(void)
334 struct list_head
* tmp
;
335 struct list_head
*head
= &inode_in_use
;
339 struct inode
* inode
;
342 inode
= list_entry(tmp
, struct inode
, i_list
);
343 if (CAN_UNUSE(inode
)) {
344 list_del(&inode
->i_hash
);
345 INIT_LIST_HEAD(&inode
->i_hash
);
346 head
= &inode_unused
;
353 static struct inode
* find_inode(struct super_block
* sb
, unsigned long ino
, struct list_head
*head
)
355 struct list_head
*tmp
;
356 struct inode
* inode
;
364 inode
= list_entry(tmp
, struct inode
, i_hash
);
365 if (inode
->i_sb
!= sb
)
367 if (inode
->i_ino
!= ino
)
376 * This just initializes the inode fields
377 * to known values before returning the inode..
379 * i_sb, i_ino, i_count, i_state and the lists have
380 * been initialized elsewhere..
382 void clean_inode(struct inode
*inode
)
384 memset(&inode
->u
, 0, sizeof(inode
->u
));
388 inode
->i_writecount
= 0;
390 memset(&inode
->i_dquot
, 0, sizeof(inode
->i_dquot
));
391 sema_init(&inode
->i_sem
, 1);
395 * This gets called with I_LOCK held: it needs
396 * to read the inode and then unlock it
398 static inline void read_inode(struct inode
*inode
, struct super_block
*sb
)
400 sb
->s_op
->read_inode(inode
);
403 struct inode
* get_empty_inode(void)
405 static unsigned long last_ino
= 0;
406 struct inode
* inode
;
407 struct list_head
* tmp
;
409 spin_lock(&inode_lock
);
410 try_to_free_inodes();
411 tmp
= inode_unused
.next
;
412 if (tmp
!= &inode_unused
) {
414 inode
= list_entry(tmp
, struct inode
, i_list
);
418 inode
->i_ino
= ++last_ino
;
420 list_add(&inode
->i_list
, &inode_in_use
);
422 spin_unlock(&inode_lock
);
428 * Warning: if this succeeded, we will now
429 * return with the inode lock.
431 spin_unlock(&inode_lock
);
432 inode
= grow_inodes();
440 * This is called with the inode lock held.. Be careful.
442 static struct inode
* get_new_inode(struct super_block
*sb
, unsigned long ino
, struct list_head
*head
)
444 struct inode
* inode
;
445 struct list_head
* tmp
= inode_unused
.next
;
447 if (tmp
!= &inode_unused
) {
449 inode
= list_entry(tmp
, struct inode
, i_list
);
451 list_add(&inode
->i_list
, &inode_in_use
);
452 list_add(&inode
->i_hash
, head
);
454 inode
->i_dev
= sb
->s_dev
;
456 inode
->i_flags
= sb
->s_flags
;
458 inode
->i_state
= I_LOCK
;
459 spin_unlock(&inode_lock
);
462 read_inode(inode
, sb
);
465 * This is special! We do not need the spinlock
466 * when clearing I_LOCK, because we're guaranteed
467 * that nobody else tries to do anything about the
468 * state of the inode when it is locked, as we
469 * just created it (so there can be no old holders
470 * that haven't tested I_LOCK).
472 * Verify this some day!
474 inode
->i_state
&= ~I_LOCK
;
480 * Uhhuh.. We need to expand. Unlock for the allocation,
481 * but note that "grow_inodes()" will return with the
482 * lock held again if the allocation succeeded.
484 spin_unlock(&inode_lock
);
485 inode
= grow_inodes();
487 /* We released the lock, so.. */
488 struct inode
* old
= find_inode(sb
, ino
, head
);
491 list_add(&inode
->i_list
, &inode_unused
);
492 spin_unlock(&inode_lock
);
499 static inline unsigned long hash(struct super_block
*sb
, unsigned long i_ino
)
501 unsigned long tmp
= i_ino
| (unsigned long) sb
;
502 tmp
= tmp
+ (tmp
>> HASH_BITS
) + (tmp
>> HASH_BITS
*2);
503 return tmp
& HASH_MASK
;
506 struct inode
*iget(struct super_block
*sb
, unsigned long ino
)
508 struct list_head
* head
= inode_hashtable
+ hash(sb
,ino
);
509 struct inode
* inode
;
511 spin_lock(&inode_lock
);
512 inode
= find_inode(sb
, ino
, head
);
514 try_to_free_inodes();
515 return get_new_inode(sb
, ino
, head
);
517 spin_unlock(&inode_lock
);
518 wait_on_inode(inode
);
522 void insert_inode_hash(struct inode
*inode
)
524 struct list_head
*head
= inode_hashtable
+ hash(inode
->i_sb
, inode
->i_ino
);
525 list_add(&inode
->i_hash
, head
);
528 void iput(struct inode
*inode
)
531 struct super_operations
*op
= NULL
;
533 if (inode
->i_sb
&& inode
->i_sb
->s_op
)
534 op
= inode
->i_sb
->s_op
;
535 if (op
&& op
->put_inode
)
536 op
->put_inode(inode
);
538 spin_lock(&inode_lock
);
539 if (!--inode
->i_count
) {
540 if (!inode
->i_nlink
) {
541 list_del(&inode
->i_hash
);
542 INIT_LIST_HEAD(&inode
->i_hash
);
543 list_del(&inode
->i_list
);
544 INIT_LIST_HEAD(&inode
->i_list
);
545 if (op
&& op
->delete_inode
) {
546 void (*delete)(struct inode
*) = op
->delete_inode
;
547 spin_unlock(&inode_lock
);
549 spin_lock(&inode_lock
);
552 if (list_empty(&inode
->i_hash
)) {
553 list_del(&inode
->i_list
);
554 list_add(&inode
->i_list
, &inode_unused
);
557 spin_unlock(&inode_lock
);
561 int bmap(struct inode
* inode
, int block
)
563 if (inode
->i_op
&& inode
->i_op
->bmap
)
564 return inode
->i_op
->bmap(inode
, block
);
569 * Initialize the hash tables
571 void inode_init(void)
574 struct list_head
*head
= inode_hashtable
;
578 INIT_LIST_HEAD(head
);
584 /* This belongs in file_table.c, not here... */
585 int fs_may_remount_ro(struct super_block
*sb
)
589 /* Check that no files are currently opened for writing. */
590 for (file
= inuse_filps
; file
; file
= file
->f_next
) {
594 inode
= file
->f_dentry
->d_inode
;
595 if (!inode
|| inode
->i_sb
!= sb
)
597 if (S_ISREG(inode
->i_mode
) && file
->f_mode
& FMODE_WRITE
)
600 return 1; /* Tis' cool bro. */