Import 2.1.55pre1
[davej-history.git] / fs / inode.c
blobcac5ffd4c869f29be2dbf3e917165b7e542cc802
1 /*
2 * linux/fs/inode.c
4 * (C) 1997 Linus Torvalds
5 */
7 #include <linux/fs.h>
8 #include <linux/string.h>
9 #include <linux/mm.h>
12 * New inode.c implementation.
14 * This implementation has the basic premise of trying
15 * to be extremely low-overhead and SMP-safe, yet be
16 * simple enough to be "obviously correct".
18 * Famous last words.
22 * Inode lookup is no longer as critical as it used to be:
23 * most of the lookups are going to be through the dcache.
25 #define HASH_BITS 8
26 #define HASH_SIZE (1UL << HASH_BITS)
27 #define HASH_MASK (HASH_SIZE-1)
30 * Each inode can be on two separate lists. One is
31 * the hash list of the inode, used for lookups. The
32 * other linked list is the "type" list:
33 * "in_use" - valid inode, hashed if i_nlink > 0
34 * "dirty" - valid inode, hashed if i_nlink > 0, dirty.
35 * "unused" - ready to be re-used. Not hashed.
37 * A "dirty" list is maintained for each super block,
38 * allowing for low-overhead inode sync() operations.
41 static LIST_HEAD(inode_in_use);
42 static LIST_HEAD(inode_unused);
43 static struct list_head inode_hashtable[HASH_SIZE];
46 * A simple spinlock to protect the list manipulations.
48 * NOTE! You also have to own the lock if you change
49 * the i_state of an inode while it is in use..
51 spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
54 * Statistics gathering.. Not actually done yet.
56 struct {
57 int nr_inodes;
58 int nr_free_inodes;
59 int dummy[10];
60 } inodes_stat;
62 int max_inodes = NR_INODE;
65 * Put the inode on the super block's dirty list
67 void __mark_inode_dirty(struct inode *inode)
69 struct super_block * sb = inode->i_sb;
71 if (sb) {
72 spin_lock(&inode_lock);
73 if (!(inode->i_state & I_DIRTY)) {
74 inode->i_state |= I_DIRTY;
75 /* Only add valid (ie hashed) inodes to the dirty list */
76 if (!list_empty(&inode->i_hash)) {
77 list_del(&inode->i_list);
78 list_add(&inode->i_list, &sb->s_dirty);
81 spin_unlock(&inode_lock);
85 static void __wait_on_inode(struct inode * inode)
87 struct wait_queue wait = { current, NULL };
89 add_wait_queue(&inode->i_wait, &wait);
90 repeat:
91 current->state = TASK_UNINTERRUPTIBLE;
92 if (inode->i_state & I_LOCK) {
93 schedule();
94 goto repeat;
96 remove_wait_queue(&inode->i_wait, &wait);
97 current->state = TASK_RUNNING;
100 static inline void wait_on_inode(struct inode *inode)
102 if (inode->i_state & I_LOCK)
103 __wait_on_inode(inode);
107 * These are initializations that only need to be done
108 * once, because the fields are idempotent across use
109 * of the inode..
111 static inline void init_once(struct inode * inode)
113 memset(inode, 0, sizeof(*inode));
114 init_waitqueue(&inode->i_wait);
115 INIT_LIST_HEAD(&inode->i_hash);
116 sema_init(&inode->i_sem, 1);
121 * Look out! This returns with the inode lock held if
122 * it got an inode..
124 static struct inode * grow_inodes(void)
126 struct inode * inode = (struct inode *)__get_free_page(GFP_KERNEL);
128 if (inode) {
129 int size;
130 struct inode * tmp;
132 spin_lock(&inode_lock);
133 size = PAGE_SIZE - 2*sizeof(struct inode);
134 tmp = inode;
135 do {
136 tmp++;
137 init_once(tmp);
138 list_add(&tmp->i_list, &inode_unused);
139 size -= sizeof(struct inode);
140 } while (size >= 0);
141 init_once(inode);
143 return inode;
146 static inline void write_inode(struct inode *inode)
148 if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode)
149 inode->i_sb->s_op->write_inode(inode);
152 static inline void sync_one(struct inode *inode)
154 if (inode->i_state & I_LOCK) {
155 spin_unlock(&inode_lock);
156 __wait_on_inode(inode);
157 spin_lock(&inode_lock);
158 } else {
159 list_del(&inode->i_list);
160 list_add(&inode->i_list, &inode_in_use);
162 /* Set I_LOCK, reset I_DIRTY */
163 inode->i_state ^= I_DIRTY | I_LOCK;
164 spin_unlock(&inode_lock);
166 write_inode(inode);
168 spin_lock(&inode_lock);
169 inode->i_state &= ~I_LOCK;
170 wake_up(&inode->i_wait);
174 static inline void sync_list(struct list_head *head)
176 struct list_head * tmp;
178 while ((tmp = head->prev) != head)
179 sync_one(list_entry(tmp, struct inode, i_list));
183 * "sync_inodes()" goes through the super block's dirty list,
184 * writes them out, and puts them back on the normal list.
186 void sync_inodes(kdev_t dev)
188 struct super_block * sb = super_blocks + 0;
189 int i;
192 * Search the super_blocks array for the device(s) to sync.
194 spin_lock(&inode_lock);
195 for (i = NR_SUPER ; i-- ; sb++) {
196 if (!sb->s_dev)
197 continue;
198 if (dev && sb->s_dev != dev)
199 continue;
201 sync_list(&sb->s_dirty);
202 if (dev)
203 break;
205 spin_unlock(&inode_lock);
209 * Needed by knfsd
211 void write_inode_now(struct inode *inode)
213 struct super_block * sb = inode->i_sb;
215 if (sb) {
216 spin_lock(&inode_lock);
217 if (inode->i_state & I_DIRTY)
218 sync_one(inode);
219 spin_unlock(&inode_lock);
221 else
222 printk("write_inode_now: no super block\n");
226 * This is called by the filesystem to tell us
227 * that the inode is no longer useful. We just
228 * terminate it with extreme predjudice.
230 void clear_inode(struct inode *inode)
232 truncate_inode_pages(inode, 0);
233 wait_on_inode(inode);
234 if (IS_WRITABLE(inode) && inode->i_sb && inode->i_sb->dq_op)
235 inode->i_sb->dq_op->drop(inode);
237 inode->i_state = 0;
241 * Dispose-list gets a local list, so it doesn't need to
242 * worry about list corruption.
244 static void dispose_list(struct list_head * head)
246 struct list_head *next;
248 next = head->next;
249 for (;;) {
250 struct list_head * tmp = next;
251 struct inode * inode;
253 next = next->next;
254 if (tmp == head)
255 break;
256 inode = list_entry(tmp, struct inode, i_list);
257 truncate_inode_pages(inode, 0);
260 /* Add them all to the unused list in one fell swoop */
261 spin_lock(&inode_lock);
262 list_splice(head, &inode_unused);
263 spin_unlock(&inode_lock);
267 * Invalidate all inodes for a device.
269 static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
271 struct list_head *next;
272 int busy = 0;
274 next = head->next;
275 for (;;) {
276 struct list_head * tmp = next;
277 struct inode * inode;
279 next = next->next;
280 if (tmp == head)
281 break;
282 inode = list_entry(tmp, struct inode, i_list);
283 if (inode->i_sb != sb)
284 continue;
285 if (!inode->i_count) {
286 list_del(&inode->i_hash);
287 INIT_LIST_HEAD(&inode->i_hash);
288 list_del(&inode->i_list);
289 list_add(&inode->i_list, dispose);
290 continue;
292 busy = 1;
294 return busy;
298 * This is a two-stage process. First we collect all
299 * offending inodes onto the throw-away list, and in
300 * the second stage we actually dispose of them. This
301 * is because we don't want to sleep while messing
302 * with the global lists..
304 int invalidate_inodes(struct super_block * sb)
306 int busy;
307 LIST_HEAD(throw_away);
309 spin_lock(&inode_lock);
310 busy = invalidate_list(&inode_in_use, sb, &throw_away);
311 busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
312 spin_unlock(&inode_lock);
314 dispose_list(&throw_away);
316 return busy;
320 * This is called with the inode lock held. It just looks at the last
321 * inode on the in-use list, and if the inode is trivially freeable
322 * we just move it to the unused list.
324 * Otherwise we just move the inode to be the first inode and expect to
325 * get back to the problem later..
327 #define CAN_UNUSE(inode) \
328 (((inode)->i_count == 0) && \
329 ((inode)->i_nrpages == 0) && \
330 (!(inode)->i_state))
332 static void try_to_free_inodes(void)
334 struct list_head * tmp;
335 struct list_head *head = &inode_in_use;
337 tmp = head->prev;
338 if (tmp != head) {
339 struct inode * inode;
341 list_del(tmp);
342 inode = list_entry(tmp, struct inode, i_list);
343 if (CAN_UNUSE(inode)) {
344 list_del(&inode->i_hash);
345 INIT_LIST_HEAD(&inode->i_hash);
346 head = &inode_unused;
348 list_add(tmp, head);
353 static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head)
355 struct list_head *tmp;
356 struct inode * inode;
358 tmp = head;
359 for (;;) {
360 tmp = tmp->next;
361 inode = NULL;
362 if (tmp == head)
363 break;
364 inode = list_entry(tmp, struct inode, i_hash);
365 if (inode->i_sb != sb)
366 continue;
367 if (inode->i_ino != ino)
368 continue;
369 inode->i_count++;
370 break;
372 return inode;
376 * This just initializes the inode fields
377 * to known values before returning the inode..
379 * i_sb, i_ino, i_count, i_state and the lists have
380 * been initialized elsewhere..
382 void clean_inode(struct inode *inode)
384 memset(&inode->u, 0, sizeof(inode->u));
385 inode->i_sock = 0;
386 inode->i_op = NULL;
387 inode->i_nlink = 1;
388 inode->i_writecount = 0;
389 inode->i_size = 0;
390 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
391 sema_init(&inode->i_sem, 1);
395 * This gets called with I_LOCK held: it needs
396 * to read the inode and then unlock it
398 static inline void read_inode(struct inode *inode, struct super_block *sb)
400 sb->s_op->read_inode(inode);
403 struct inode * get_empty_inode(void)
405 static unsigned long last_ino = 0;
406 struct inode * inode;
407 struct list_head * tmp;
409 spin_lock(&inode_lock);
410 try_to_free_inodes();
411 tmp = inode_unused.next;
412 if (tmp != &inode_unused) {
413 list_del(tmp);
414 inode = list_entry(tmp, struct inode, i_list);
415 add_new_inode:
416 inode->i_sb = NULL;
417 inode->i_dev = 0;
418 inode->i_ino = ++last_ino;
419 inode->i_count = 1;
420 list_add(&inode->i_list, &inode_in_use);
421 inode->i_state = 0;
422 spin_unlock(&inode_lock);
423 clean_inode(inode);
424 return inode;
428 * Warning: if this succeeded, we will now
429 * return with the inode lock.
431 spin_unlock(&inode_lock);
432 inode = grow_inodes();
433 if (inode)
434 goto add_new_inode;
436 return inode;
440 * This is called with the inode lock held.. Be careful.
442 static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, struct list_head *head)
444 struct inode * inode;
445 struct list_head * tmp = inode_unused.next;
447 if (tmp != &inode_unused) {
448 list_del(tmp);
449 inode = list_entry(tmp, struct inode, i_list);
450 add_new_inode:
451 list_add(&inode->i_list, &inode_in_use);
452 list_add(&inode->i_hash, head);
453 inode->i_sb = sb;
454 inode->i_dev = sb->s_dev;
455 inode->i_ino = ino;
456 inode->i_flags = sb->s_flags;
457 inode->i_count = 1;
458 inode->i_state = I_LOCK;
459 spin_unlock(&inode_lock);
461 clean_inode(inode);
462 read_inode(inode, sb);
465 * This is special! We do not need the spinlock
466 * when clearing I_LOCK, because we're guaranteed
467 * that nobody else tries to do anything about the
468 * state of the inode when it is locked, as we
469 * just created it (so there can be no old holders
470 * that haven't tested I_LOCK).
472 * Verify this some day!
474 inode->i_state &= ~I_LOCK;
476 return inode;
480 * Uhhuh.. We need to expand. Unlock for the allocation,
481 * but note that "grow_inodes()" will return with the
482 * lock held again if the allocation succeeded.
484 spin_unlock(&inode_lock);
485 inode = grow_inodes();
486 if (inode) {
487 /* We released the lock, so.. */
488 struct inode * old = find_inode(sb, ino, head);
489 if (!old)
490 goto add_new_inode;
491 list_add(&inode->i_list, &inode_unused);
492 spin_unlock(&inode_lock);
493 wait_on_inode(old);
494 return old;
496 return inode;
499 static inline unsigned long hash(struct super_block *sb, unsigned long i_ino)
501 unsigned long tmp = i_ino | (unsigned long) sb;
502 tmp = tmp + (tmp >> HASH_BITS) + (tmp >> HASH_BITS*2);
503 return tmp & HASH_MASK;
506 struct inode *iget(struct super_block *sb, unsigned long ino)
508 struct list_head * head = inode_hashtable + hash(sb,ino);
509 struct inode * inode;
511 spin_lock(&inode_lock);
512 inode = find_inode(sb, ino, head);
513 if (!inode) {
514 try_to_free_inodes();
515 return get_new_inode(sb, ino, head);
517 spin_unlock(&inode_lock);
518 wait_on_inode(inode);
519 return inode;
522 void insert_inode_hash(struct inode *inode)
524 struct list_head *head = inode_hashtable + hash(inode->i_sb, inode->i_ino);
525 list_add(&inode->i_hash, head);
528 void iput(struct inode *inode)
530 if (inode) {
531 struct super_operations *op = NULL;
533 if (inode->i_sb && inode->i_sb->s_op)
534 op = inode->i_sb->s_op;
535 if (op && op->put_inode)
536 op->put_inode(inode);
538 spin_lock(&inode_lock);
539 if (!--inode->i_count) {
540 if (!inode->i_nlink) {
541 list_del(&inode->i_hash);
542 INIT_LIST_HEAD(&inode->i_hash);
543 list_del(&inode->i_list);
544 INIT_LIST_HEAD(&inode->i_list);
545 if (op && op->delete_inode) {
546 void (*delete)(struct inode *) = op->delete_inode;
547 spin_unlock(&inode_lock);
548 delete(inode);
549 spin_lock(&inode_lock);
552 if (list_empty(&inode->i_hash)) {
553 list_del(&inode->i_list);
554 list_add(&inode->i_list, &inode_unused);
557 spin_unlock(&inode_lock);
561 int bmap(struct inode * inode, int block)
563 if (inode->i_op && inode->i_op->bmap)
564 return inode->i_op->bmap(inode, block);
565 return 0;
569 * Initialize the hash tables
571 void inode_init(void)
573 int i;
574 struct list_head *head = inode_hashtable;
576 i = HASH_SIZE;
577 do {
578 INIT_LIST_HEAD(head);
579 head++;
580 i--;
581 } while (i);
584 /* This belongs in file_table.c, not here... */
585 int fs_may_remount_ro(struct super_block *sb)
587 struct file *file;
589 /* Check that no files are currently opened for writing. */
590 for (file = inuse_filps; file; file = file->f_next) {
591 struct inode *inode;
592 if (!file->f_dentry)
593 continue;
594 inode = file->f_dentry->d_inode;
595 if (!inode || inode->i_sb != sb)
596 continue;
597 if (S_ISREG(inode->i_mode) && file->f_mode & FMODE_WRITE)
598 return 0;
600 return 1; /* Tis' cool bro. */