Import 2.3.7pre9
[davej-history.git] / fs / inode.c
blobba9cc7a785438b9918b9dfbe2e9ddba0df6f555f
1 /*
2 * linux/fs/inode.c
4 * (C) 1997 Linus Torvalds
5 */
7 #include <linux/fs.h>
8 #include <linux/string.h>
9 #include <linux/mm.h>
10 #include <linux/dcache.h>
11 #include <linux/init.h>
12 #include <linux/quotaops.h>
15 * New inode.c implementation.
17 * This implementation has the basic premise of trying
18 * to be extremely low-overhead and SMP-safe, yet be
19 * simple enough to be "obviously correct".
21 * Famous last words.
24 #define INODE_PARANOIA 1
25 /* #define INODE_DEBUG 1 */
28 * Inode lookup is no longer as critical as it used to be:
29 * most of the lookups are going to be through the dcache.
31 #define HASH_BITS 8
32 #define HASH_SIZE (1UL << HASH_BITS)
33 #define HASH_MASK (HASH_SIZE-1)
36 * Each inode can be on two separate lists. One is
37 * the hash list of the inode, used for lookups. The
38 * other linked list is the "type" list:
39 * "in_use" - valid inode, hashed if i_nlink > 0
40 * "dirty" - valid inode, hashed if i_nlink > 0, dirty.
41 * "unused" - ready to be re-used. Not hashed.
43 * A "dirty" list is maintained for each super block,
44 * allowing for low-overhead inode sync() operations.
47 static LIST_HEAD(inode_in_use);
48 static LIST_HEAD(inode_unused);
49 static struct list_head inode_hashtable[HASH_SIZE];
52 * A simple spinlock to protect the list manipulations.
54 * NOTE! You also have to own the lock if you change
55 * the i_state of an inode while it is in use..
57 spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
60 * Statistics gathering..
62 struct {
63 int nr_inodes;
64 int nr_free_inodes;
65 int dummy[5];
66 } inodes_stat = {0, 0,};
68 int max_inodes;
71 * Put the inode on the super block's dirty list.
73 * CAREFUL! We mark it dirty unconditionally, but
74 * move it onto the dirty list only if it is hashed.
75 * If it was not hashed, it will never be added to
76 * the dirty list even if it is later hashed, as it
77 * will have been marked dirty already.
79 * In short, make sure you hash any inodes _before_
80 * you start marking them dirty..
82 void __mark_inode_dirty(struct inode *inode)
84 struct super_block * sb = inode->i_sb;
86 if (sb) {
87 spin_lock(&inode_lock);
88 if (!(inode->i_state & I_DIRTY)) {
89 inode->i_state |= I_DIRTY;
90 /* Only add valid (ie hashed) inodes to the dirty list */
91 if (!list_empty(&inode->i_hash)) {
92 list_del(&inode->i_list);
93 list_add(&inode->i_list, &sb->s_dirty);
96 spin_unlock(&inode_lock);
100 static void __wait_on_inode(struct inode * inode)
102 DECLARE_WAITQUEUE(wait, current);
104 add_wait_queue(&inode->i_wait, &wait);
105 repeat:
106 current->state = TASK_UNINTERRUPTIBLE;
107 if (inode->i_state & I_LOCK) {
108 schedule();
109 goto repeat;
111 remove_wait_queue(&inode->i_wait, &wait);
112 current->state = TASK_RUNNING;
115 static inline void wait_on_inode(struct inode *inode)
117 if (inode->i_state & I_LOCK)
118 __wait_on_inode(inode);
122 * These are initializations that only need to be done
123 * once, because the fields are idempotent across use
124 * of the inode..
126 static inline void init_once(struct inode * inode)
128 memset(inode, 0, sizeof(*inode));
129 init_waitqueue_head(&inode->i_wait);
130 INIT_LIST_HEAD(&inode->i_hash);
131 INIT_LIST_HEAD(&inode->i_dentry);
132 sema_init(&inode->i_sem, 1);
135 static inline void write_inode(struct inode *inode)
137 if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode)
138 inode->i_sb->s_op->write_inode(inode);
141 static inline void sync_one(struct inode *inode)
143 if (inode->i_state & I_LOCK) {
144 spin_unlock(&inode_lock);
145 __wait_on_inode(inode);
146 spin_lock(&inode_lock);
147 } else {
148 list_del(&inode->i_list);
149 list_add(&inode->i_list, &inode_in_use);
150 /* Set I_LOCK, reset I_DIRTY */
151 inode->i_state ^= I_DIRTY | I_LOCK;
152 spin_unlock(&inode_lock);
154 write_inode(inode);
156 spin_lock(&inode_lock);
157 inode->i_state &= ~I_LOCK;
158 wake_up(&inode->i_wait);
162 static inline void sync_list(struct list_head *head)
164 struct list_head * tmp;
166 while ((tmp = head->prev) != head)
167 sync_one(list_entry(tmp, struct inode, i_list));
171 * "sync_inodes()" goes through the super block's dirty list,
172 * writes them out, and puts them back on the normal list.
174 void sync_inodes(kdev_t dev)
176 struct super_block * sb = sb_entry(super_blocks.next);
179 * Search the super_blocks array for the device(s) to sync.
181 spin_lock(&inode_lock);
182 for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
183 if (!sb->s_dev)
184 continue;
185 if (dev && sb->s_dev != dev)
186 continue;
188 sync_list(&sb->s_dirty);
190 if (dev)
191 break;
193 spin_unlock(&inode_lock);
197 * Called with the spinlock already held..
199 static void sync_all_inodes(void)
201 struct super_block * sb = sb_entry(super_blocks.next);
202 for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
203 if (!sb->s_dev)
204 continue;
205 sync_list(&sb->s_dirty);
210 * Needed by knfsd
212 void write_inode_now(struct inode *inode)
214 struct super_block * sb = inode->i_sb;
216 if (sb) {
217 spin_lock(&inode_lock);
218 while (inode->i_state & I_DIRTY)
219 sync_one(inode);
220 spin_unlock(&inode_lock);
222 else
223 printk("write_inode_now: no super block\n");
227 * This is called by the filesystem to tell us
228 * that the inode is no longer useful. We just
229 * terminate it with extreme prejudice.
231 void clear_inode(struct inode *inode)
233 if (inode->i_nrpages)
234 truncate_inode_pages(inode, 0);
235 wait_on_inode(inode);
236 if (IS_QUOTAINIT(inode))
237 DQUOT_DROP(inode);
238 if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->clear_inode)
239 inode->i_sb->s_op->clear_inode(inode);
241 inode->i_state = 0;
245 * Dispose-list gets a local list, so it doesn't need to
246 * worry about list corruption. It releases the inode lock
247 * while clearing the inodes.
249 static void dispose_list(struct list_head * head)
251 struct list_head *next;
252 int count = 0;
254 spin_unlock(&inode_lock);
255 next = head->next;
256 for (;;) {
257 struct list_head * tmp = next;
258 struct inode * inode;
260 next = next->next;
261 if (tmp == head)
262 break;
263 inode = list_entry(tmp, struct inode, i_list);
264 clear_inode(inode);
265 count++;
268 /* Add them all to the unused list in one fell swoop */
269 spin_lock(&inode_lock);
270 list_splice(head, &inode_unused);
271 inodes_stat.nr_free_inodes += count;
275 * Invalidate all inodes for a device.
277 static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
279 struct list_head *next;
280 int busy = 0;
282 next = head->next;
283 for (;;) {
284 struct list_head * tmp = next;
285 struct inode * inode;
287 next = next->next;
288 if (tmp == head)
289 break;
290 inode = list_entry(tmp, struct inode, i_list);
291 if (inode->i_sb != sb)
292 continue;
293 if (!inode->i_count) {
294 list_del(&inode->i_hash);
295 INIT_LIST_HEAD(&inode->i_hash);
296 list_del(&inode->i_list);
297 list_add(&inode->i_list, dispose);
298 inode->i_state |= I_FREEING;
299 continue;
301 busy = 1;
303 return busy;
307 * This is a two-stage process. First we collect all
308 * offending inodes onto the throw-away list, and in
309 * the second stage we actually dispose of them. This
310 * is because we don't want to sleep while messing
311 * with the global lists..
313 int invalidate_inodes(struct super_block * sb)
315 int busy;
316 LIST_HEAD(throw_away);
318 spin_lock(&inode_lock);
319 busy = invalidate_list(&inode_in_use, sb, &throw_away);
320 busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
321 dispose_list(&throw_away);
322 spin_unlock(&inode_lock);
324 return busy;
328 * This is called with the inode lock held. It searches
329 * the in-use for freeable inodes, which are moved to a
330 * temporary list and then placed on the unused list by
331 * dispose_list.
333 * We don't expect to have to call this very often.
335 * N.B. The spinlock is released during the call to
336 * dispose_list.
338 #define CAN_UNUSE(inode) \
339 (((inode)->i_count | (inode)->i_state | (inode)->i_nrpages) == 0)
340 #define INODE(entry) (list_entry(entry, struct inode, i_list))
342 static int free_inodes(void)
344 struct list_head list, *entry, *freeable = &list;
345 int found = 0;
347 INIT_LIST_HEAD(freeable);
348 entry = inode_in_use.next;
349 while (entry != &inode_in_use) {
350 struct list_head *tmp = entry;
352 entry = entry->next;
353 if (!CAN_UNUSE(INODE(tmp)))
354 continue;
355 list_del(tmp);
356 list_del(&INODE(tmp)->i_hash);
357 INIT_LIST_HEAD(&INODE(tmp)->i_hash);
358 list_add(tmp, freeable);
359 list_entry(tmp, struct inode, i_list)->i_state = I_FREEING;
360 found = 1;
363 if (found)
364 dispose_list(freeable);
366 return found;
370 * Searches the inodes list for freeable inodes,
371 * shrinking the dcache before (and possible after,
372 * if we're low)
374 static void try_to_free_inodes(int goal)
377 * First stry to just get rid of unused inodes.
379 * If we can't reach our goal that way, we'll have
380 * to try to shrink the dcache and sync existing
381 * inodes..
383 free_inodes();
384 goal -= inodes_stat.nr_free_inodes;
385 if (goal > 0) {
386 spin_unlock(&inode_lock);
387 select_dcache(goal, 0);
388 prune_dcache(goal);
389 spin_lock(&inode_lock);
390 sync_all_inodes();
391 free_inodes();
396 * This is the externally visible routine for
397 * inode memory management.
399 void free_inode_memory(int goal)
401 spin_lock(&inode_lock);
402 free_inodes();
403 spin_unlock(&inode_lock);
408 * This is called with the spinlock held, but releases
409 * the lock when freeing or allocating inodes.
410 * Look out! This returns with the inode lock held if
411 * it got an inode..
413 * We do inode allocations two pages at a time to reduce
414 * fragmentation.
416 #define INODE_PAGE_ORDER 1
417 #define INODE_ALLOCATION_SIZE (PAGE_SIZE << INODE_PAGE_ORDER)
418 #define INODES_PER_ALLOCATION (INODE_ALLOCATION_SIZE/sizeof(struct inode))
420 static struct inode * grow_inodes(void)
422 struct inode * inode;
425 * Check whether to restock the unused list.
427 if (inodes_stat.nr_inodes > max_inodes) {
428 struct list_head *tmp;
429 try_to_free_inodes(inodes_stat.nr_inodes >> 2);
430 tmp = inode_unused.next;
431 if (tmp != &inode_unused) {
432 inodes_stat.nr_free_inodes--;
433 list_del(tmp);
434 inode = list_entry(tmp, struct inode, i_list);
435 return inode;
439 spin_unlock(&inode_lock);
440 inode = (struct inode *)__get_free_pages(GFP_KERNEL,INODE_PAGE_ORDER);
441 if (inode) {
442 int size;
443 struct inode * tmp;
445 size = INODE_ALLOCATION_SIZE - 2*sizeof(struct inode);
446 tmp = inode;
447 spin_lock(&inode_lock);
448 do {
449 tmp++;
450 init_once(tmp);
451 list_add(&tmp->i_list, &inode_unused);
452 size -= sizeof(struct inode);
453 } while (size >= 0);
454 init_once(inode);
456 * Update the inode statistics
458 inodes_stat.nr_inodes += INODES_PER_ALLOCATION;
459 inodes_stat.nr_free_inodes += INODES_PER_ALLOCATION - 1;
460 return inode;
464 * If the allocation failed, do an extensive pruning of
465 * the dcache and then try again to free some inodes.
467 prune_dcache(inodes_stat.nr_inodes >> 2);
469 spin_lock(&inode_lock);
470 free_inodes();
472 struct list_head *tmp = inode_unused.next;
473 if (tmp != &inode_unused) {
474 inodes_stat.nr_free_inodes--;
475 list_del(tmp);
476 inode = list_entry(tmp, struct inode, i_list);
477 return inode;
480 spin_unlock(&inode_lock);
482 printk("grow_inodes: allocation failed\n");
483 return NULL;
487 * Called with the inode lock held.
489 static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head)
491 struct list_head *tmp;
492 struct inode * inode;
494 tmp = head;
495 for (;;) {
496 tmp = tmp->next;
497 inode = NULL;
498 if (tmp == head)
499 break;
500 inode = list_entry(tmp, struct inode, i_hash);
501 if (inode->i_sb != sb)
502 continue;
503 if (inode->i_ino != ino)
504 continue;
505 inode->i_count++;
506 break;
508 return inode;
512 * This just initializes the inode fields
513 * to known values before returning the inode..
515 * i_sb, i_ino, i_count, i_state and the lists have
516 * been initialized elsewhere..
518 void clean_inode(struct inode *inode)
520 memset(&inode->u, 0, sizeof(inode->u));
521 inode->i_sock = 0;
522 inode->i_op = NULL;
523 inode->i_nlink = 1;
524 inode->i_writecount = 0;
525 inode->i_size = 0;
526 inode->i_generation = 0;
527 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
528 sema_init(&inode->i_sem, 1);
529 inode->i_pipe = NULL;
533 * This is called by things like the networking layer
534 * etc that want to get an inode without any inode
535 * number, or filesystems that allocate new inodes with
536 * no pre-existing information.
538 struct inode * get_empty_inode(void)
540 static unsigned long last_ino = 0;
541 struct inode * inode;
542 struct list_head * tmp;
544 spin_lock(&inode_lock);
545 tmp = inode_unused.next;
546 if (tmp != &inode_unused) {
547 list_del(tmp);
548 inodes_stat.nr_free_inodes--;
549 inode = list_entry(tmp, struct inode, i_list);
550 add_new_inode:
551 list_add(&inode->i_list, &inode_in_use);
552 inode->i_sb = NULL;
553 inode->i_dev = 0;
554 inode->i_ino = ++last_ino;
555 inode->i_flags = 0;
556 inode->i_count = 1;
557 inode->i_state = 0;
558 spin_unlock(&inode_lock);
559 clean_inode(inode);
560 return inode;
564 * Warning: if this succeeded, we will now
565 * return with the inode lock.
567 inode = grow_inodes();
568 if (inode)
569 goto add_new_inode;
571 return inode;
575 * This is called with the inode lock held.. Be careful.
577 * We no longer cache the sb_flags in i_flags - see fs.h
578 * -- rmk@arm.uk.linux.org
580 static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, struct list_head *head)
582 struct inode * inode;
583 struct list_head * tmp = inode_unused.next;
585 if (tmp != &inode_unused) {
586 list_del(tmp);
587 inodes_stat.nr_free_inodes--;
588 inode = list_entry(tmp, struct inode, i_list);
589 add_new_inode:
590 list_add(&inode->i_list, &inode_in_use);
591 list_add(&inode->i_hash, head);
592 inode->i_sb = sb;
593 inode->i_dev = sb->s_dev;
594 inode->i_ino = ino;
595 inode->i_flags = 0;
596 inode->i_count = 1;
597 inode->i_state = I_LOCK;
598 spin_unlock(&inode_lock);
600 clean_inode(inode);
601 sb->s_op->read_inode(inode);
604 * This is special! We do not need the spinlock
605 * when clearing I_LOCK, because we're guaranteed
606 * that nobody else tries to do anything about the
607 * state of the inode when it is locked, as we
608 * just created it (so there can be no old holders
609 * that haven't tested I_LOCK).
611 inode->i_state &= ~I_LOCK;
612 wake_up(&inode->i_wait);
614 return inode;
618 * We need to expand. Note that "grow_inodes()" will
619 * release the spinlock, but will return with the lock
620 * held again if the allocation succeeded.
622 inode = grow_inodes();
623 if (inode) {
624 /* We released the lock, so.. */
625 struct inode * old = find_inode(sb, ino, head);
626 if (!old)
627 goto add_new_inode;
628 list_add(&inode->i_list, &inode_unused);
629 inodes_stat.nr_free_inodes++;
630 spin_unlock(&inode_lock);
631 wait_on_inode(old);
632 return old;
634 return inode;
637 static inline unsigned long hash(struct super_block *sb, unsigned long i_ino)
639 unsigned long tmp = i_ino | (unsigned long) sb;
640 tmp = tmp + (tmp >> HASH_BITS) + (tmp >> HASH_BITS*2);
641 return tmp & HASH_MASK;
644 /* Yeah, I know about quadratic hash. Maybe, later. */
645 ino_t iunique(struct super_block *sb, ino_t max_reserved)
647 static ino_t counter = 0;
648 struct inode *inode;
649 struct list_head * head;
650 ino_t res;
651 spin_lock(&inode_lock);
652 retry:
653 if (counter > max_reserved) {
654 head = inode_hashtable + hash(sb,counter);
655 inode = find_inode(sb, res = counter++, head);
656 if (!inode) {
657 spin_unlock(&inode_lock);
658 return res;
660 inode->i_count--; /* compensate find_inode() */
661 } else {
662 counter = max_reserved + 1;
664 goto retry;
668 struct inode *igrab(struct inode *inode)
670 spin_lock(&inode_lock);
671 if (inode->i_state & I_FREEING)
672 inode = NULL;
673 else
674 inode->i_count++;
675 spin_unlock(&inode_lock);
676 if (inode)
677 wait_on_inode(inode);
678 return inode;
681 struct inode *iget(struct super_block *sb, unsigned long ino)
683 struct list_head * head = inode_hashtable + hash(sb,ino);
684 struct inode * inode;
686 spin_lock(&inode_lock);
687 inode = find_inode(sb, ino, head);
688 if (inode) {
689 spin_unlock(&inode_lock);
690 wait_on_inode(inode);
691 return inode;
694 * get_new_inode() will do the right thing, releasing
695 * the inode lock and re-trying the search in case it
696 * had to block at any point.
698 return get_new_inode(sb, ino, head);
701 void insert_inode_hash(struct inode *inode)
703 struct list_head *head = inode_hashtable + hash(inode->i_sb, inode->i_ino);
704 spin_lock(&inode_lock);
705 list_add(&inode->i_hash, head);
706 spin_unlock(&inode_lock);
709 void remove_inode_hash(struct inode *inode)
711 spin_lock(&inode_lock);
712 list_del(&inode->i_hash);
713 INIT_LIST_HEAD(&inode->i_hash);
714 spin_unlock(&inode_lock);
717 void iput(struct inode *inode)
719 if (inode) {
720 struct super_operations *op = NULL;
722 if (inode->i_sb && inode->i_sb->s_op)
723 op = inode->i_sb->s_op;
724 if (op && op->put_inode)
725 op->put_inode(inode);
727 spin_lock(&inode_lock);
728 if (!--inode->i_count) {
729 if (!inode->i_nlink) {
730 list_del(&inode->i_hash);
731 INIT_LIST_HEAD(&inode->i_hash);
732 list_del(&inode->i_list);
733 INIT_LIST_HEAD(&inode->i_list);
734 inode->i_state|=I_FREEING;
735 if (op && op->delete_inode) {
736 void (*delete)(struct inode *) = op->delete_inode;
737 spin_unlock(&inode_lock);
738 delete(inode);
739 spin_lock(&inode_lock);
742 if (list_empty(&inode->i_hash)) {
743 list_del(&inode->i_list);
744 INIT_LIST_HEAD(&inode->i_list);
745 inode->i_state|=I_FREEING;
746 spin_unlock(&inode_lock);
747 clear_inode(inode);
748 spin_lock(&inode_lock);
749 list_add(&inode->i_list, &inode_unused);
750 inodes_stat.nr_free_inodes++;
752 else if (!(inode->i_state & I_DIRTY)) {
753 list_del(&inode->i_list);
754 list_add(&inode->i_list, &inode_in_use);
756 #ifdef INODE_PARANOIA
757 if (inode->i_flock)
758 printk(KERN_ERR "iput: inode %s/%ld still has locks!\n",
759 kdevname(inode->i_dev), inode->i_ino);
760 if (!list_empty(&inode->i_dentry))
761 printk(KERN_ERR "iput: device %s inode %ld still has aliases!\n",
762 kdevname(inode->i_dev), inode->i_ino);
763 if (inode->i_count)
764 printk(KERN_ERR "iput: device %s inode %ld count changed, count=%d\n",
765 kdevname(inode->i_dev), inode->i_ino, inode->i_count);
766 if (atomic_read(&inode->i_sem.count) != 1)
767 printk(KERN_ERR "iput: Aieee, semaphore in use inode %s/%ld, count=%d\n",
768 kdevname(inode->i_dev), inode->i_ino, atomic_read(&inode->i_sem.count));
769 #endif
771 if (inode->i_count > (1<<31)) {
772 printk(KERN_ERR "iput: inode %s/%ld count wrapped\n",
773 kdevname(inode->i_dev), inode->i_ino);
775 spin_unlock(&inode_lock);
779 int bmap(struct inode * inode, int block)
781 if (inode->i_op && inode->i_op->bmap)
782 return inode->i_op->bmap(inode, block);
783 return 0;
787 * Initialize the hash tables and default
788 * value for max inodes
790 #define MAX_INODE (16384)
792 void __init inode_init(void)
794 int i, max;
795 struct list_head *head = inode_hashtable;
797 i = HASH_SIZE;
798 do {
799 INIT_LIST_HEAD(head);
800 head++;
801 i--;
802 } while (i);
804 /* Initial guess at reasonable inode number */
805 max = num_physpages >> 1;
806 if (max > MAX_INODE)
807 max = MAX_INODE;
808 max_inodes = max;
811 /* This belongs in file_table.c, not here... */
812 int fs_may_remount_ro(struct super_block *sb)
814 struct file *file;
816 /* Check that no files are currently opened for writing. */
817 for (file = inuse_filps; file; file = file->f_next) {
818 struct inode *inode;
819 if (!file->f_dentry)
820 continue;
821 inode = file->f_dentry->d_inode;
822 if (!inode || inode->i_sb != sb)
823 continue;
825 /* File with pending delete? */
826 if (inode->i_nlink == 0)
827 return 0;
829 /* Writable file? */
830 if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
831 return 0;
833 return 1; /* Tis' cool bro. */
836 void update_atime (struct inode *inode)
838 if ( IS_NOATIME (inode) ) return;
839 if ( IS_NODIRATIME (inode) && S_ISDIR (inode->i_mode) ) return;
840 if ( IS_RDONLY (inode) ) return;
841 inode->i_atime = CURRENT_TIME;
842 mark_inode_dirty (inode);
843 } /* End Function update_atime */