Linux 2.3.7pre1
[davej-history.git] / fs / inode.c
blobee8602939f1affa3d7e6500ead9071ac8d980f85
1 /*
2 * linux/fs/inode.c
4 * (C) 1997 Linus Torvalds
5 */
7 #include <linux/fs.h>
8 #include <linux/string.h>
9 #include <linux/mm.h>
10 #include <linux/dcache.h>
11 #include <linux/init.h>
12 #include <linux/quotaops.h>
15 * New inode.c implementation.
17 * This implementation has the basic premise of trying
18 * to be extremely low-overhead and SMP-safe, yet be
19 * simple enough to be "obviously correct".
21 * Famous last words.
24 #define INODE_PARANOIA 1
25 /* #define INODE_DEBUG 1 */
28 * Inode lookup is no longer as critical as it used to be:
29 * most of the lookups are going to be through the dcache.
31 #define HASH_BITS 8
32 #define HASH_SIZE (1UL << HASH_BITS)
33 #define HASH_MASK (HASH_SIZE-1)
36 * Each inode can be on two separate lists. One is
37 * the hash list of the inode, used for lookups. The
38 * other linked list is the "type" list:
39 * "in_use" - valid inode, hashed if i_nlink > 0
40 * "dirty" - valid inode, hashed if i_nlink > 0, dirty.
41 * "unused" - ready to be re-used. Not hashed.
43 * A "dirty" list is maintained for each super block,
44 * allowing for low-overhead inode sync() operations.
47 static LIST_HEAD(inode_in_use);
48 static LIST_HEAD(inode_unused);
49 static struct list_head inode_hashtable[HASH_SIZE];
52 * A simple spinlock to protect the list manipulations.
54 * NOTE! You also have to own the lock if you change
55 * the i_state of an inode while it is in use..
57 spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
60 * Statistics gathering..
62 struct {
63 int nr_inodes;
64 int nr_free_inodes;
65 int dummy[5];
66 } inodes_stat = {0, 0,};
68 int max_inodes;
71 * Put the inode on the super block's dirty list.
73 * CAREFUL! We mark it dirty unconditionally, but
74 * move it onto the dirty list only if it is hashed.
75 * If it was not hashed, it will never be added to
76 * the dirty list even if it is later hashed, as it
77 * will have been marked dirty already.
79 * In short, make sure you hash any inodes _before_
80 * you start marking them dirty..
82 void __mark_inode_dirty(struct inode *inode)
84 struct super_block * sb = inode->i_sb;
86 if (sb) {
87 spin_lock(&inode_lock);
88 if (!(inode->i_state & I_DIRTY)) {
89 inode->i_state |= I_DIRTY;
90 /* Only add valid (ie hashed) inodes to the dirty list */
91 if (!list_empty(&inode->i_hash)) {
92 list_del(&inode->i_list);
93 list_add(&inode->i_list, &sb->s_dirty);
96 spin_unlock(&inode_lock);
100 static void __wait_on_inode(struct inode * inode)
102 DECLARE_WAITQUEUE(wait, current);
104 add_wait_queue(&inode->i_wait, &wait);
105 repeat:
106 current->state = TASK_UNINTERRUPTIBLE;
107 if (inode->i_state & I_LOCK) {
108 schedule();
109 goto repeat;
111 remove_wait_queue(&inode->i_wait, &wait);
112 current->state = TASK_RUNNING;
115 static inline void wait_on_inode(struct inode *inode)
117 if (inode->i_state & I_LOCK)
118 __wait_on_inode(inode);
122 * These are initializations that only need to be done
123 * once, because the fields are idempotent across use
124 * of the inode..
126 static inline void init_once(struct inode * inode)
128 memset(inode, 0, sizeof(*inode));
129 init_waitqueue_head(&inode->i_wait);
130 INIT_LIST_HEAD(&inode->i_hash);
131 INIT_LIST_HEAD(&inode->i_dentry);
132 sema_init(&inode->i_sem, 1);
133 sema_init(&inode->i_atomic_write, 1);
136 static inline void write_inode(struct inode *inode)
138 if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode)
139 inode->i_sb->s_op->write_inode(inode);
142 static inline void sync_one(struct inode *inode)
144 if (inode->i_state & I_LOCK) {
145 spin_unlock(&inode_lock);
146 __wait_on_inode(inode);
147 spin_lock(&inode_lock);
148 } else {
149 list_del(&inode->i_list);
150 list_add(&inode->i_list, &inode_in_use);
151 /* Set I_LOCK, reset I_DIRTY */
152 inode->i_state ^= I_DIRTY | I_LOCK;
153 spin_unlock(&inode_lock);
155 write_inode(inode);
157 spin_lock(&inode_lock);
158 inode->i_state &= ~I_LOCK;
159 wake_up(&inode->i_wait);
163 static inline void sync_list(struct list_head *head)
165 struct list_head * tmp;
167 while ((tmp = head->prev) != head)
168 sync_one(list_entry(tmp, struct inode, i_list));
172 * "sync_inodes()" goes through the super block's dirty list,
173 * writes them out, and puts them back on the normal list.
175 void sync_inodes(kdev_t dev)
177 struct super_block * sb = sb_entry(super_blocks.next);
180 * Search the super_blocks array for the device(s) to sync.
182 spin_lock(&inode_lock);
183 for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
184 if (!sb->s_dev)
185 continue;
186 if (dev && sb->s_dev != dev)
187 continue;
189 sync_list(&sb->s_dirty);
191 if (dev)
192 break;
194 spin_unlock(&inode_lock);
198 * Called with the spinlock already held..
200 static void sync_all_inodes(void)
202 struct super_block * sb = sb_entry(super_blocks.next);
203 for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
204 if (!sb->s_dev)
205 continue;
206 sync_list(&sb->s_dirty);
211 * Needed by knfsd
213 void write_inode_now(struct inode *inode)
215 struct super_block * sb = inode->i_sb;
217 if (sb) {
218 spin_lock(&inode_lock);
219 while (inode->i_state & I_DIRTY)
220 sync_one(inode);
221 spin_unlock(&inode_lock);
223 else
224 printk("write_inode_now: no super block\n");
228 * This is called by the filesystem to tell us
229 * that the inode is no longer useful. We just
230 * terminate it with extreme prejudice.
232 void clear_inode(struct inode *inode)
234 if (inode->i_nrpages)
235 truncate_inode_pages(inode, 0);
236 wait_on_inode(inode);
237 if (IS_QUOTAINIT(inode))
238 DQUOT_DROP(inode);
239 if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->clear_inode)
240 inode->i_sb->s_op->clear_inode(inode);
242 inode->i_state = 0;
246 * Dispose-list gets a local list, so it doesn't need to
247 * worry about list corruption. It releases the inode lock
248 * while clearing the inodes.
250 static void dispose_list(struct list_head * head)
252 struct list_head *next;
253 int count = 0;
255 spin_unlock(&inode_lock);
256 next = head->next;
257 for (;;) {
258 struct list_head * tmp = next;
259 struct inode * inode;
261 next = next->next;
262 if (tmp == head)
263 break;
264 inode = list_entry(tmp, struct inode, i_list);
265 clear_inode(inode);
266 count++;
269 /* Add them all to the unused list in one fell swoop */
270 spin_lock(&inode_lock);
271 list_splice(head, &inode_unused);
272 inodes_stat.nr_free_inodes += count;
276 * Invalidate all inodes for a device.
278 static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
280 struct list_head *next;
281 int busy = 0;
283 next = head->next;
284 for (;;) {
285 struct list_head * tmp = next;
286 struct inode * inode;
288 next = next->next;
289 if (tmp == head)
290 break;
291 inode = list_entry(tmp, struct inode, i_list);
292 if (inode->i_sb != sb)
293 continue;
294 if (!inode->i_count) {
295 list_del(&inode->i_hash);
296 INIT_LIST_HEAD(&inode->i_hash);
297 list_del(&inode->i_list);
298 list_add(&inode->i_list, dispose);
299 inode->i_state |= I_FREEING;
300 continue;
302 busy = 1;
304 return busy;
308 * This is a two-stage process. First we collect all
309 * offending inodes onto the throw-away list, and in
310 * the second stage we actually dispose of them. This
311 * is because we don't want to sleep while messing
312 * with the global lists..
314 int invalidate_inodes(struct super_block * sb)
316 int busy;
317 LIST_HEAD(throw_away);
319 spin_lock(&inode_lock);
320 busy = invalidate_list(&inode_in_use, sb, &throw_away);
321 busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
322 dispose_list(&throw_away);
323 spin_unlock(&inode_lock);
325 return busy;
329 * This is called with the inode lock held. It searches
330 * the in-use for freeable inodes, which are moved to a
331 * temporary list and then placed on the unused list by
332 * dispose_list.
334 * We don't expect to have to call this very often.
336 * N.B. The spinlock is released during the call to
337 * dispose_list.
339 #define CAN_UNUSE(inode) \
340 (((inode)->i_count | (inode)->i_state) == 0)
341 #define INODE(entry) (list_entry(entry, struct inode, i_list))
343 static int free_inodes(void)
345 struct list_head list, *entry, *freeable = &list;
346 int found = 0;
348 INIT_LIST_HEAD(freeable);
349 entry = inode_in_use.next;
350 while (entry != &inode_in_use) {
351 struct list_head *tmp = entry;
353 entry = entry->next;
354 if (!CAN_UNUSE(INODE(tmp)))
355 continue;
356 list_del(tmp);
357 list_del(&INODE(tmp)->i_hash);
358 INIT_LIST_HEAD(&INODE(tmp)->i_hash);
359 list_add(tmp, freeable);
360 list_entry(tmp, struct inode, i_list)->i_state = I_FREEING;
361 found = 1;
364 if (found)
365 dispose_list(freeable);
367 return found;
371 * Searches the inodes list for freeable inodes,
372 * shrinking the dcache before (and possible after,
373 * if we're low)
375 static void try_to_free_inodes(int goal)
378 * First stry to just get rid of unused inodes.
380 * If we can't reach our goal that way, we'll have
381 * to try to shrink the dcache and sync existing
382 * inodes..
384 free_inodes();
385 goal -= inodes_stat.nr_free_inodes;
386 if (goal > 0) {
387 spin_unlock(&inode_lock);
388 select_dcache(goal, 0);
389 prune_dcache(goal);
390 spin_lock(&inode_lock);
391 sync_all_inodes();
392 free_inodes();
397 * This is the externally visible routine for
398 * inode memory management.
400 void free_inode_memory(int goal)
402 spin_lock(&inode_lock);
403 free_inodes();
404 spin_unlock(&inode_lock);
409 * This is called with the spinlock held, but releases
410 * the lock when freeing or allocating inodes.
411 * Look out! This returns with the inode lock held if
412 * it got an inode..
414 * We do inode allocations two pages at a time to reduce
415 * fragmentation.
417 #define INODE_PAGE_ORDER 1
418 #define INODE_ALLOCATION_SIZE (PAGE_SIZE << INODE_PAGE_ORDER)
419 #define INODES_PER_ALLOCATION (INODE_ALLOCATION_SIZE/sizeof(struct inode))
421 static struct inode * grow_inodes(void)
423 struct inode * inode;
426 * Check whether to restock the unused list.
428 if (inodes_stat.nr_inodes > max_inodes) {
429 struct list_head *tmp;
430 try_to_free_inodes(inodes_stat.nr_inodes >> 2);
431 tmp = inode_unused.next;
432 if (tmp != &inode_unused) {
433 inodes_stat.nr_free_inodes--;
434 list_del(tmp);
435 inode = list_entry(tmp, struct inode, i_list);
436 return inode;
440 spin_unlock(&inode_lock);
441 inode = (struct inode *)__get_free_pages(GFP_KERNEL,INODE_PAGE_ORDER);
442 if (inode) {
443 int size;
444 struct inode * tmp;
446 size = INODE_ALLOCATION_SIZE - 2*sizeof(struct inode);
447 tmp = inode;
448 spin_lock(&inode_lock);
449 do {
450 tmp++;
451 init_once(tmp);
452 list_add(&tmp->i_list, &inode_unused);
453 size -= sizeof(struct inode);
454 } while (size >= 0);
455 init_once(inode);
457 * Update the inode statistics
459 inodes_stat.nr_inodes += INODES_PER_ALLOCATION;
460 inodes_stat.nr_free_inodes += INODES_PER_ALLOCATION - 1;
461 return inode;
465 * If the allocation failed, do an extensive pruning of
466 * the dcache and then try again to free some inodes.
468 prune_dcache(inodes_stat.nr_inodes >> 2);
470 spin_lock(&inode_lock);
471 free_inodes();
473 struct list_head *tmp = inode_unused.next;
474 if (tmp != &inode_unused) {
475 inodes_stat.nr_free_inodes--;
476 list_del(tmp);
477 inode = list_entry(tmp, struct inode, i_list);
478 return inode;
481 spin_unlock(&inode_lock);
483 printk("grow_inodes: allocation failed\n");
484 return NULL;
488 * Called with the inode lock held.
490 static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head)
492 struct list_head *tmp;
493 struct inode * inode;
495 tmp = head;
496 for (;;) {
497 tmp = tmp->next;
498 inode = NULL;
499 if (tmp == head)
500 break;
501 inode = list_entry(tmp, struct inode, i_hash);
502 if (inode->i_sb != sb)
503 continue;
504 if (inode->i_ino != ino)
505 continue;
506 inode->i_count++;
507 break;
509 return inode;
513 * This just initializes the inode fields
514 * to known values before returning the inode..
516 * i_sb, i_ino, i_count, i_state and the lists have
517 * been initialized elsewhere..
519 void clean_inode(struct inode *inode)
521 memset(&inode->u, 0, sizeof(inode->u));
522 inode->i_sock = 0;
523 inode->i_op = NULL;
524 inode->i_nlink = 1;
525 inode->i_writecount = 0;
526 inode->i_size = 0;
527 inode->i_generation = 0;
528 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
529 sema_init(&inode->i_sem, 1);
530 inode->i_pipe = NULL;
534 * This is called by things like the networking layer
535 * etc that want to get an inode without any inode
536 * number, or filesystems that allocate new inodes with
537 * no pre-existing information.
539 struct inode * get_empty_inode(void)
541 static unsigned long last_ino = 0;
542 struct inode * inode;
543 struct list_head * tmp;
545 spin_lock(&inode_lock);
546 tmp = inode_unused.next;
547 if (tmp != &inode_unused) {
548 list_del(tmp);
549 inodes_stat.nr_free_inodes--;
550 inode = list_entry(tmp, struct inode, i_list);
551 add_new_inode:
552 list_add(&inode->i_list, &inode_in_use);
553 inode->i_sb = NULL;
554 inode->i_dev = 0;
555 inode->i_ino = ++last_ino;
556 inode->i_flags = 0;
557 inode->i_count = 1;
558 inode->i_state = 0;
559 spin_unlock(&inode_lock);
560 clean_inode(inode);
561 return inode;
565 * Warning: if this succeeded, we will now
566 * return with the inode lock.
568 inode = grow_inodes();
569 if (inode)
570 goto add_new_inode;
572 return inode;
576 * This is called with the inode lock held.. Be careful.
578 * We no longer cache the sb_flags in i_flags - see fs.h
579 * -- rmk@arm.uk.linux.org
581 static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, struct list_head *head)
583 struct inode * inode;
584 struct list_head * tmp = inode_unused.next;
586 if (tmp != &inode_unused) {
587 list_del(tmp);
588 inodes_stat.nr_free_inodes--;
589 inode = list_entry(tmp, struct inode, i_list);
590 add_new_inode:
591 list_add(&inode->i_list, &inode_in_use);
592 list_add(&inode->i_hash, head);
593 inode->i_sb = sb;
594 inode->i_dev = sb->s_dev;
595 inode->i_ino = ino;
596 inode->i_flags = 0;
597 inode->i_count = 1;
598 inode->i_state = I_LOCK;
599 spin_unlock(&inode_lock);
601 clean_inode(inode);
602 sb->s_op->read_inode(inode);
605 * This is special! We do not need the spinlock
606 * when clearing I_LOCK, because we're guaranteed
607 * that nobody else tries to do anything about the
608 * state of the inode when it is locked, as we
609 * just created it (so there can be no old holders
610 * that haven't tested I_LOCK).
612 inode->i_state &= ~I_LOCK;
613 wake_up(&inode->i_wait);
615 return inode;
619 * We need to expand. Note that "grow_inodes()" will
620 * release the spinlock, but will return with the lock
621 * held again if the allocation succeeded.
623 inode = grow_inodes();
624 if (inode) {
625 /* We released the lock, so.. */
626 struct inode * old = find_inode(sb, ino, head);
627 if (!old)
628 goto add_new_inode;
629 list_add(&inode->i_list, &inode_unused);
630 inodes_stat.nr_free_inodes++;
631 spin_unlock(&inode_lock);
632 wait_on_inode(old);
633 return old;
635 return inode;
638 static inline unsigned long hash(struct super_block *sb, unsigned long i_ino)
640 unsigned long tmp = i_ino | (unsigned long) sb;
641 tmp = tmp + (tmp >> HASH_BITS) + (tmp >> HASH_BITS*2);
642 return tmp & HASH_MASK;
645 /* Yeah, I know about quadratic hash. Maybe, later. */
646 ino_t iunique(struct super_block *sb, ino_t max_reserved)
648 static ino_t counter = 0;
649 struct inode *inode;
650 struct list_head * head;
651 ino_t res;
652 spin_lock(&inode_lock);
653 retry:
654 if (counter > max_reserved) {
655 head = inode_hashtable + hash(sb,counter);
656 inode = find_inode(sb, res = counter++, head);
657 if (!inode) {
658 spin_unlock(&inode_lock);
659 return res;
661 inode->i_count--; /* compensate find_inode() */
662 } else {
663 counter = max_reserved + 1;
665 goto retry;
669 struct inode *igrab(struct inode *inode)
671 spin_lock(&inode_lock);
672 if (inode->i_state & I_FREEING)
673 inode = NULL;
674 else
675 inode->i_count++;
676 spin_unlock(&inode_lock);
677 if (inode)
678 wait_on_inode(inode);
679 return inode;
682 struct inode *iget(struct super_block *sb, unsigned long ino)
684 struct list_head * head = inode_hashtable + hash(sb,ino);
685 struct inode * inode;
687 spin_lock(&inode_lock);
688 inode = find_inode(sb, ino, head);
689 if (inode) {
690 spin_unlock(&inode_lock);
691 wait_on_inode(inode);
692 return inode;
695 * get_new_inode() will do the right thing, releasing
696 * the inode lock and re-trying the search in case it
697 * had to block at any point.
699 return get_new_inode(sb, ino, head);
702 void insert_inode_hash(struct inode *inode)
704 struct list_head *head = inode_hashtable + hash(inode->i_sb, inode->i_ino);
705 spin_lock(&inode_lock);
706 list_add(&inode->i_hash, head);
707 spin_unlock(&inode_lock);
710 void remove_inode_hash(struct inode *inode)
712 spin_lock(&inode_lock);
713 list_del(&inode->i_hash);
714 INIT_LIST_HEAD(&inode->i_hash);
715 spin_unlock(&inode_lock);
718 void iput(struct inode *inode)
720 if (inode) {
721 struct super_operations *op = NULL;
723 if (inode->i_sb && inode->i_sb->s_op)
724 op = inode->i_sb->s_op;
725 if (op && op->put_inode)
726 op->put_inode(inode);
728 spin_lock(&inode_lock);
729 if (!--inode->i_count) {
730 if (!inode->i_nlink) {
731 list_del(&inode->i_hash);
732 INIT_LIST_HEAD(&inode->i_hash);
733 list_del(&inode->i_list);
734 INIT_LIST_HEAD(&inode->i_list);
735 inode->i_state|=I_FREEING;
736 if (op && op->delete_inode) {
737 void (*delete)(struct inode *) = op->delete_inode;
738 spin_unlock(&inode_lock);
739 delete(inode);
740 spin_lock(&inode_lock);
743 if (list_empty(&inode->i_hash)) {
744 list_del(&inode->i_list);
745 INIT_LIST_HEAD(&inode->i_list);
746 inode->i_state|=I_FREEING;
747 spin_unlock(&inode_lock);
748 clear_inode(inode);
749 spin_lock(&inode_lock);
750 list_add(&inode->i_list, &inode_unused);
751 inodes_stat.nr_free_inodes++;
753 else if (!(inode->i_state & I_DIRTY)) {
754 list_del(&inode->i_list);
755 list_add(&inode->i_list, &inode_in_use);
757 #ifdef INODE_PARANOIA
758 if (inode->i_flock)
759 printk(KERN_ERR "iput: inode %s/%ld still has locks!\n",
760 kdevname(inode->i_dev), inode->i_ino);
761 if (!list_empty(&inode->i_dentry))
762 printk(KERN_ERR "iput: device %s inode %ld still has aliases!\n",
763 kdevname(inode->i_dev), inode->i_ino);
764 if (inode->i_count)
765 printk(KERN_ERR "iput: device %s inode %ld count changed, count=%d\n",
766 kdevname(inode->i_dev), inode->i_ino, inode->i_count);
767 if (atomic_read(&inode->i_sem.count) != 1)
768 printk(KERN_ERR "iput: Aieee, semaphore in use inode %s/%ld, count=%d\n",
769 kdevname(inode->i_dev), inode->i_ino, atomic_read(&inode->i_sem.count));
770 if (atomic_read(&inode->i_atomic_write.count) != 1)
771 printk(KERN_ERR "iput: Aieee, atomic write semaphore in use inode %s/%ld, count=%d\n",
772 kdevname(inode->i_dev), inode->i_ino, atomic_read(&inode->i_sem.count));
773 #endif
775 if (inode->i_count > (1<<31)) {
776 printk(KERN_ERR "iput: inode %s/%ld count wrapped\n",
777 kdevname(inode->i_dev), inode->i_ino);
779 spin_unlock(&inode_lock);
783 int bmap(struct inode * inode, int block)
785 if (inode->i_op && inode->i_op->bmap)
786 return inode->i_op->bmap(inode, block);
787 return 0;
791 * Initialize the hash tables and default
792 * value for max inodes
794 #define MAX_INODE (16384)
796 void __init inode_init(void)
798 int i, max;
799 struct list_head *head = inode_hashtable;
801 i = HASH_SIZE;
802 do {
803 INIT_LIST_HEAD(head);
804 head++;
805 i--;
806 } while (i);
808 /* Initial guess at reasonable inode number */
809 max = num_physpages >> 1;
810 if (max > MAX_INODE)
811 max = MAX_INODE;
812 max_inodes = max;
815 /* This belongs in file_table.c, not here... */
816 int fs_may_remount_ro(struct super_block *sb)
818 struct file *file;
820 /* Check that no files are currently opened for writing. */
821 for (file = inuse_filps; file; file = file->f_next) {
822 struct inode *inode;
823 if (!file->f_dentry)
824 continue;
825 inode = file->f_dentry->d_inode;
826 if (!inode || inode->i_sb != sb)
827 continue;
829 /* File with pending delete? */
830 if (inode->i_nlink == 0)
831 return 0;
833 /* Writable file? */
834 if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
835 return 0;
837 return 1; /* Tis' cool bro. */
840 void update_atime (struct inode *inode)
842 if ( IS_NOATIME (inode) ) return;
843 if ( IS_NODIRATIME (inode) && S_ISDIR (inode->i_mode) ) return;
844 if ( IS_RDONLY (inode) ) return;
845 inode->i_atime = CURRENT_TIME;
846 mark_inode_dirty (inode);
847 } /* End Function update_atime */