fs/inode.c

   1 /*
   2  * linux/fs/inode.c
   3  *
   4  * (C) 1997 Linus Torvalds
   5  */
   6
   7 #include <linux/fs.h>
   8 #include <linux/string.h>
   9 #include <linux/mm.h>
  10
  11 /*
  12  * New inode.c implementation.
  13  *
  14  * This implementation has the basic premise of trying
  15  * to be extremely low-overhead and SMP-safe, yet be
  16  * simple enough to be "obviously correct".
  17  *
  18  * Famous last words.
  19  */
  20
  21 /*
  22  * Inode lookup is no longer as critical as it used to be:
  23  * most of the lookups are going to be through the dcache.
  24  */
  25 #define HASH_BITS       8
  26 #define HASH_SIZE       (1UL << HASH_BITS)
  27 #define HASH_MASK       (HASH_SIZE-1)
  28
  29 /*
  30  * Each inode can be on two separate lists. One is
  31  * the hash list of the inode, used for lookups. The
  32  * other linked list is the "type" list:
  33  *  "in_use" - valid inode, hashed if i_nlink > 0
  34  *  "dirty"  - valid inode, hashed if i_nlink > 0, dirty.
  35  *  "unused" - ready to be re-used. Not hashed.
  36  *
  37  * A "dirty" list is maintained for each super block,
  38  * allowing for low-overhead inode sync() operations.
  39  */
  40
  41 static LIST_HEAD(inode_in_use);
  42 static LIST_HEAD(inode_unused);
  43 static struct list_head inode_hashtable[HASH_SIZE];
  44
  45 /*
  46  * A simple spinlock to protect the list manipulations.
  47  *
  48  * NOTE! You also have to own the lock if you change
  49  * the i_state of an inode while it is in use..
  50  */
  51 spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
  52
  53 /*
  54  * Statistics gathering.. Not actually done yet.
  55  */
  56 struct {
  57         int nr_inodes;
  58         int nr_free_inodes;
  59         int dummy[10];
  60 } inodes_stat;
  61
  62 int max_inodes = NR_INODE;
  63
  64 /*
  65  * Put the inode on the super block's dirty list
  66  */
  67 void __mark_inode_dirty(struct inode *inode)
  68 {
  69         struct super_block * sb = inode->i_sb;
  70
  71         if (sb) {
  72                 spin_lock(&inode_lock);
  73                 if (!(inode->i_state & I_DIRTY)) {
  74                         inode->i_state |= I_DIRTY;
  75                         /* Only add valid (ie hashed) inodes to the dirty list */
  76                         if (!list_empty(&inode->i_hash)) {
  77                                 list_del(&inode->i_list);
  78                                 list_add(&inode->i_list, &sb->s_dirty);
  79                         }
  80                 }
  81                 spin_unlock(&inode_lock);
  82         }
  83 }
  84
  85 static void __wait_on_inode(struct inode * inode)
  86 {
  87         struct wait_queue wait = { current, NULL };
  88
  89         add_wait_queue(&inode->i_wait, &wait);
  90 repeat:
  91         current->state = TASK_UNINTERRUPTIBLE;
  92         if (inode->i_state & I_LOCK) {
  93                 schedule();
  94                 goto repeat;
  95         }
  96         remove_wait_queue(&inode->i_wait, &wait);
  97         current->state = TASK_RUNNING;
  98 }
  99
 100 static inline void wait_on_inode(struct inode *inode)
 101 {
 102         if (inode->i_state & I_LOCK)
 103                 __wait_on_inode(inode);
 104 }
 105
 106 /*
 107  * These are initializations that only need to be done
 108  * once, because the fields are idempotent across use
 109  * of the inode..
 110  */
 111 static inline void init_once(struct inode * inode)
 112 {
 113         memset(inode, 0, sizeof(*inode));
 114         init_waitqueue(&inode->i_wait);
 115         INIT_LIST_HEAD(&inode->i_hash);
 116         sema_init(&inode->i_sem, 1);
 117 }
 118
 119
 120 /*
 121  * Look out! This returns with the inode lock held if
 122  * it got an inode..
 123  */
 124 static struct inode * grow_inodes(void)
 125 {
 126         struct inode * inode = (struct inode *)__get_free_page(GFP_KERNEL);
 127
 128         if (inode) {
 129                 int size;
 130                 struct inode * tmp;
 131
 132                 spin_lock(&inode_lock);
 133                 size = PAGE_SIZE - 2*sizeof(struct inode);
 134                 tmp = inode;
 135                 do {
 136                         tmp++;
 137                         init_once(tmp);
 138                         list_add(&tmp->i_list, &inode_unused);
 139                         size -= sizeof(struct inode);
 140                 } while (size >= 0);
 141                 init_once(inode);
 142         }
 143         return inode;
 144 }
 145
 146 static inline void write_inode(struct inode *inode)
 147 {
 148         if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode)
 149                 inode->i_sb->s_op->write_inode(inode);
 150 }
 151
 152 static inline void sync_one(struct inode *inode)
 153 {
 154         if (inode->i_state & I_LOCK) {
 155                 spin_unlock(&inode_lock);
 156                 __wait_on_inode(inode);
 157                 spin_lock(&inode_lock);
 158         } else {
 159                 list_del(&inode->i_list);
 160                 list_add(&inode->i_list, &inode_in_use);
 161
 162                 /* Set I_LOCK, reset I_DIRTY */
 163                 inode->i_state ^= I_DIRTY | I_LOCK;
 164                 spin_unlock(&inode_lock);
 165
 166                 write_inode(inode);
 167
 168                 spin_lock(&inode_lock);
 169                 inode->i_state &= ~I_LOCK;
 170                 wake_up(&inode->i_wait);
 171         }
 172 }
 173
 174 static inline void sync_list(struct list_head *head)
 175 {
 176         struct list_head * tmp;
 177
 178         while ((tmp = head->prev) != head)
 179                 sync_one(list_entry(tmp, struct inode, i_list));
 180 }
 181
 182 /*
 183  * "sync_inodes()" goes through the super block's dirty list,
 184  * writes them out, and puts them back on the normal list.
 185  */
 186 void sync_inodes(kdev_t dev)
 187 {
 188         struct super_block * sb = super_blocks + 0;
 189         int i;
 190
 191         /*
 192          * Search the super_blocks array for the device(s) to sync.
 193          */
 194         spin_lock(&inode_lock);
 195         for (i = NR_SUPER ; i-- ; sb++) {
 196                 if (!sb->s_dev)
 197                         continue;
 198                 if (dev && sb->s_dev != dev)
 199                         continue;
 200
 201                 sync_list(&sb->s_dirty);
 202                 if (dev)
 203                         break;
 204         }
 205         spin_unlock(&inode_lock);
 206 }
 207
 208 /*
 209  * Needed by knfsd
 210  */
 211 void write_inode_now(struct inode *inode)
 212 {
 213         struct super_block * sb = inode->i_sb;
 214
 215         if (sb) {
 216                 spin_lock(&inode_lock);
 217                 if (inode->i_state & I_DIRTY)
 218                         sync_one(inode);
 219                 spin_unlock(&inode_lock);
 220         }
 221         else
 222                 printk("write_inode_now: no super block\n");
 223 }
 224
 225 /*
 226  * This is called by the filesystem to tell us
 227  * that the inode is no longer useful. We just
 228  * terminate it with extreme predjudice.
 229  */
 230 void clear_inode(struct inode *inode)
 231 {
 232         truncate_inode_pages(inode, 0);
 233         wait_on_inode(inode);
 234         if (IS_WRITABLE(inode) && inode->i_sb && inode->i_sb->dq_op)
 235                 inode->i_sb->dq_op->drop(inode);
 236
 237         inode->i_state = 0;
 238 }
 239
 240 /*
 241  * Dispose-list gets a local list, so it doesn't need to
 242  * worry about list corruption.
 243  */
 244 static void dispose_list(struct list_head * head)
 245 {
 246         struct list_head *next;
 247
 248         next = head->next;
 249         for (;;) {
 250                 struct list_head * tmp = next;
 251                 struct inode * inode;
 252
 253                 next = next->next;
 254                 if (tmp == head)
 255                         break;
 256                 inode = list_entry(tmp, struct inode, i_list);
 257                 truncate_inode_pages(inode, 0);
 258         }
 259
 260         /* Add them all to the unused list in one fell swoop */
 261         spin_lock(&inode_lock);
 262         list_splice(head, &inode_unused);
 263         spin_unlock(&inode_lock);
 264 }
 265
 266 /*
 267  * Invalidate all inodes for a device.
 268  */
 269 static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
 270 {
 271         struct list_head *next;
 272         int busy = 0;
 273
 274         next = head->next;
 275         for (;;) {
 276                 struct list_head * tmp = next;
 277                 struct inode * inode;
 278
 279                 next = next->next;
 280                 if (tmp == head)
 281                         break;
 282                 inode = list_entry(tmp, struct inode, i_list);
 283                 if (inode->i_sb != sb)
 284                         continue;
 285                 if (!inode->i_count) {
 286                         list_del(&inode->i_hash);
 287                         INIT_LIST_HEAD(&inode->i_hash);
 288                         list_del(&inode->i_list);
 289                         list_add(&inode->i_list, dispose);
 290                         continue;
 291                 }
 292                 busy = 1;
 293         }
 294         return busy;
 295 }
 296
 297 /*
 298  * This is a two-stage process. First we collect all
 299  * offending inodes onto the throw-away list, and in
 300  * the second stage we actually dispose of them. This
 301  * is because we don't want to sleep while messing
 302  * with the global lists..
 303  */
 304 int invalidate_inodes(struct super_block * sb)
 305 {
 306         int busy;
 307         LIST_HEAD(throw_away);
 308
 309         spin_lock(&inode_lock);
 310         busy = invalidate_list(&inode_in_use, sb, &throw_away);
 311         busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
 312         spin_unlock(&inode_lock);
 313
 314         dispose_list(&throw_away);
 315
 316         return busy;
 317 }
 318
 319 /*
 320  * This is called with the inode lock held. It just looks at the last
 321  * inode on the in-use list, and if the inode is trivially freeable
 322  * we just move it to the unused list.
 323  *
 324  * Otherwise we just move the inode to be the first inode and expect to
 325  * get back to the problem later..
 326  */
 327 #define CAN_UNUSE(inode) \
 328         (((inode)->i_count == 0) && \
 329          ((inode)->i_nrpages == 0) && \
 330          (!(inode)->i_state))
 331
 332 static void try_to_free_inodes(void)
 333 {
 334         struct list_head * tmp;
 335         struct list_head *head = &inode_in_use;
 336
 337         tmp = head->prev;
 338         if (tmp != head) {
 339                 struct inode * inode;
 340
 341                 list_del(tmp);
 342                 inode = list_entry(tmp, struct inode, i_list);
 343                 if (CAN_UNUSE(inode)) {
 344                         list_del(&inode->i_hash);
 345                         INIT_LIST_HEAD(&inode->i_hash);
 346                         head = &inode_unused;
 347                 }
 348                 list_add(tmp, head);
 349         }
 350 }
 351
 352
 353 static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head)
 354 {
 355         struct list_head *tmp;
 356         struct inode * inode;
 357
 358         tmp = head;
 359         for (;;) {
 360                 tmp = tmp->next;
 361                 inode = NULL;
 362                 if (tmp == head)
 363                         break;
 364                 inode = list_entry(tmp, struct inode, i_hash);
 365                 if (inode->i_sb != sb)
 366                         continue;
 367                 if (inode->i_ino != ino)
 368                         continue;
 369                 inode->i_count++;
 370                 break;
 371         }
 372         return inode;
 373 }
 374
 375 /*
 376  * This just initializes the inode fields
 377  * to known values before returning the inode..
 378  *
 379  * i_sb, i_ino, i_count, i_state and the lists have
 380  * been initialized elsewhere..
 381  */
 382 void clean_inode(struct inode *inode)
 383 {
 384         memset(&inode->u, 0, sizeof(inode->u));
 385         inode->i_sock = 0;
 386         inode->i_op = NULL;
 387         inode->i_nlink = 1;
 388         inode->i_writecount = 0;
 389         inode->i_size = 0;
 390         memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
 391         sema_init(&inode->i_sem, 1);
 392 }
 393
 394 /*
 395  * This gets called with I_LOCK held: it needs
 396  * to read the inode and then unlock it
 397  */
 398 static inline void read_inode(struct inode *inode, struct super_block *sb)
 399 {
 400         sb->s_op->read_inode(inode);
 401 }
 402
 403 struct inode * get_empty_inode(void)
 404 {
 405         static unsigned long last_ino = 0;
 406         struct inode * inode;
 407         struct list_head * tmp;
 408
 409         spin_lock(&inode_lock);
 410         try_to_free_inodes();
 411         tmp = inode_unused.next;
 412         if (tmp != &inode_unused) {
 413                 list_del(tmp);
 414                 inode = list_entry(tmp, struct inode, i_list);
 415 add_new_inode:
 416                 inode->i_sb = NULL;
 417                 inode->i_dev = 0;
 418                 inode->i_ino = ++last_ino;
 419                 inode->i_count = 1;
 420                 list_add(&inode->i_list, &inode_in_use);
 421                 inode->i_state = 0;
 422                 spin_unlock(&inode_lock);
 423                 clean_inode(inode);
 424                 return inode;
 425         }
 426
 427         /*
 428          * Warning: if this succeeded, we will now
 429          * return with the inode lock.
 430          */
 431         spin_unlock(&inode_lock);
 432         inode = grow_inodes();
 433         if (inode)
 434                 goto add_new_inode;
 435
 436         return inode;
 437 }
 438
 439 /*
 440  * This is called with the inode lock held.. Be careful.
 441  */
 442 static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, struct list_head *head)
 443 {
 444         struct inode * inode;
 445         struct list_head * tmp = inode_unused.next;
 446
 447         if (tmp != &inode_unused) {
 448                 list_del(tmp);
 449                 inode = list_entry(tmp, struct inode, i_list);
 450 add_new_inode:
 451                 list_add(&inode->i_list, &inode_in_use);
 452                 list_add(&inode->i_hash, head);
 453                 inode->i_sb = sb;
 454                 inode->i_dev = sb->s_dev;
 455                 inode->i_ino = ino;
 456                 inode->i_flags = sb->s_flags;
 457                 inode->i_count = 1;
 458                 inode->i_state = I_LOCK;
 459                 spin_unlock(&inode_lock);
 460
 461                 clean_inode(inode);
 462                 read_inode(inode, sb);
 463
 464                 /*
 465                  * This is special!  We do not need the spinlock
 466                  * when clearing I_LOCK, because we're guaranteed
 467                  * that nobody else tries to do anything about the
 468                  * state of the inode when it is locked, as we
 469                  * just created it (so there can be no old holders
 470                  * that haven't tested I_LOCK).
 471                  *
 472                  * Verify this some day!
 473                  */
 474                 inode->i_state &= ~I_LOCK;
 475
 476                 return inode;
 477         }
 478
 479         /*
 480          * Uhhuh.. We need to expand.  Unlock for the allocation,
 481          * but note that "grow_inodes()" will return with the
 482          * lock held again if the allocation succeeded.
 483          */
 484         spin_unlock(&inode_lock);
 485         inode = grow_inodes();
 486         if (inode) {
 487                 /* We released the lock, so.. */
 488                 struct inode * old = find_inode(sb, ino, head);
 489                 if (!old)
 490                         goto add_new_inode;
 491                 list_add(&inode->i_list, &inode_unused);
 492                 spin_unlock(&inode_lock);
 493                 wait_on_inode(old);
 494                 return old;
 495         }
 496         return inode;
 497 }
 498
 499 static inline unsigned long hash(struct super_block *sb, unsigned long i_ino)
 500 {
 501         unsigned long tmp = i_ino | (unsigned long) sb;
 502         tmp = tmp + (tmp >> HASH_BITS) + (tmp >> HASH_BITS*2);
 503         return tmp & HASH_MASK;
 504 }
 505
 506 struct inode *iget(struct super_block *sb, unsigned long ino)
 507 {
 508         struct list_head * head = inode_hashtable + hash(sb,ino);
 509         struct inode * inode;
 510
 511         spin_lock(&inode_lock);
 512         inode = find_inode(sb, ino, head);
 513         if (!inode) {
 514                 try_to_free_inodes();
 515                 return get_new_inode(sb, ino, head);
 516         }
 517         spin_unlock(&inode_lock);
 518         wait_on_inode(inode);
 519         return inode;
 520 }
 521
 522 void insert_inode_hash(struct inode *inode)
 523 {
 524         struct list_head *head = inode_hashtable + hash(inode->i_sb, inode->i_ino);
 525         list_add(&inode->i_hash, head);
 526 }
 527
 528 void iput(struct inode *inode)
 529 {
 530         if (inode) {
 531                 struct super_operations *op = NULL;
 532
 533                 if (inode->i_sb && inode->i_sb->s_op)
 534                         op = inode->i_sb->s_op;
 535                 if (op && op->put_inode)
 536                         op->put_inode(inode);
 537
 538                 spin_lock(&inode_lock);
 539                 if (!--inode->i_count) {
 540                         if (!inode->i_nlink) {
 541                                 list_del(&inode->i_hash);
 542                                 INIT_LIST_HEAD(&inode->i_hash);
 543                                 list_del(&inode->i_list);
 544                                 INIT_LIST_HEAD(&inode->i_list);
 545                                 if (op && op->delete_inode) {
 546                                         void (*delete)(struct inode *) = op->delete_inode;
 547                                         spin_unlock(&inode_lock);
 548                                         delete(inode);
 549                                         spin_lock(&inode_lock);
 550                                 }
 551                         }
 552                         if (list_empty(&inode->i_hash)) {
 553                                 list_del(&inode->i_list);
 554                                 list_add(&inode->i_list, &inode_unused);
 555                         }
 556                 }
 557                 spin_unlock(&inode_lock);
 558         }
 559 }
 560
 561 int bmap(struct inode * inode, int block)
 562 {
 563         if (inode->i_op && inode->i_op->bmap)
 564                 return inode->i_op->bmap(inode, block);
 565         return 0;
 566 }
 567
 568 /*
 569  * Initialize the hash tables
 570  */
 571 void inode_init(void)
 572 {
 573         int i;
 574         struct list_head *head = inode_hashtable;
 575
 576         i = HASH_SIZE;
 577         do {
 578                 INIT_LIST_HEAD(head);
 579                 head++;
 580                 i--;
 581         } while (i);
 582 }
 583
 584 /* This belongs in file_table.c, not here... */
 585 int fs_may_remount_ro(struct super_block *sb)
 586 {
 587         struct file *file;
 588
 589         /* Check that no files are currently opened for writing. */
 590         for (file = inuse_filps; file; file = file->f_next) {
 591                 struct inode *inode;
 592                 if (!file->f_dentry)
 593                         continue;
 594                 inode = file->f_dentry->d_inode;
 595                 if (!inode || inode->i_sb != sb)
 596                         continue;
 597                 if (S_ISREG(inode->i_mode) && file->f_mode & FMODE_WRITE)
 598                         return 0;
 599         }
 600         return 1; /* Tis' cool bro. */
 601 }