fs/buffer.c

   1 /*
   2  *  linux/fs/buffer.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 /*
   8  *  'buffer.c' implements the buffer-cache functions. Race-conditions have
   9  * been avoided by NEVER letting an interrupt change a buffer (except for the
  10  * data, of course), but instead letting the caller do it.
  11  */
  12
  13 /* Start bdflush() with kernel_thread not syscall - Paul Gortmaker, 12/95 */
  14
  15 /* Removed a lot of unnecessary code and simplified things now that
  16  * the buffer cache isn't our primary cache - Andrew Tridgell 12/96
  17  */
  18
  19 /* Speed up hash, lru, and free list operations.  Use gfp() for allocating
  20  * hash table, use SLAB cache for buffer heads. -DaveM
  21  */
  22
  23 /* Added 32k buffer block sizes - these are required older ARM systems.
  24  * - RMK
  25  */
  26
  27 #include <linux/malloc.h>
  28 #include <linux/locks.h>
  29 #include <linux/errno.h>
  30 #include <linux/swap.h>
  31 #include <linux/swapctl.h>
  32 #include <linux/smp_lock.h>
  33 #include <linux/vmalloc.h>
  34 #include <linux/blkdev.h>
  35 #include <linux/sysrq.h>
  36 #include <linux/file.h>
  37 #include <linux/init.h>
  38 #include <linux/quotaops.h>
  39
  40 #include <asm/uaccess.h>
  41 #include <asm/io.h>
  42 #include <asm/bitops.h>
  43
  44 #define NR_SIZES 7
  45 static char buffersize_index[65] =
  46 {-1,  0,  1, -1,  2, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1,
  47   4, -1, -1, -1, -1, -1, -1, -1, -1,-1, -1, -1, -1, -1, -1, -1,
  48   5, -1, -1, -1, -1, -1, -1, -1, -1,-1, -1, -1, -1, -1, -1, -1,
  49  -1, -1, -1, -1, -1, -1, -1, -1, -1,-1, -1, -1, -1, -1, -1, -1,
  50   6};
  51
  52 #define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
  53 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
  54 #define NR_RESERVED (2*MAX_BUF_PER_PAGE)
  55 #define MAX_UNUSED_BUFFERS NR_RESERVED+20 /* don't ever have more than this
  56                                              number of unused buffer heads */
  57
  58 /*
  59  * Hash table mask..
  60  */
  61 static unsigned long bh_hash_mask = 0;
  62
  63 static int grow_buffers(int size);
  64
  65 static struct buffer_head ** hash_table;
  66 static struct buffer_head * lru_list[NR_LIST] = {NULL, };
  67 static struct buffer_head * free_list[NR_SIZES] = {NULL, };
  68
  69 static kmem_cache_t *bh_cachep;
  70
  71 static struct buffer_head * unused_list = NULL;
  72 static struct buffer_head * reuse_list = NULL;
  73 static DECLARE_WAIT_QUEUE_HEAD(buffer_wait);
  74
  75 static int nr_buffers = 0;
  76 static int nr_buffers_type[NR_LIST] = {0,};
  77 static int nr_buffer_heads = 0;
  78 static int nr_unused_buffer_heads = 0;
  79 static int nr_hashed_buffers = 0;
  80
  81 /* This is used by some architectures to estimate available memory. */
  82 int buffermem = 0;
  83
  84 /* Here is the parameter block for the bdflush process. If you add or
  85  * remove any of the parameters, make sure to update kernel/sysctl.c.
  86  */
  87
  88 #define N_PARAM 9
  89
  90 /* The dummy values in this structure are left in there for compatibility
  91  * with old programs that play with the /proc entries.
  92  */
  93 union bdflush_param {
  94         struct {
  95                 int nfract;  /* Percentage of buffer cache dirty to
  96                                 activate bdflush */
  97                 int ndirty;  /* Maximum number of dirty blocks to write out per
  98                                 wake-cycle */
  99                 int nrefill; /* Number of clean buffers to try to obtain
 100                                 each time we call refill */
 101                 int nref_dirt; /* Dirty buffer threshold for activating bdflush
 102                                   when trying to refill buffers. */
 103                 int dummy1;    /* unused */
 104                 int age_buffer;  /* Time for normal buffer to age before
 105                                     we flush it */
 106                 int age_super;  /* Time for superblock to age before we
 107                                    flush it */
 108                 int dummy2;    /* unused */
 109                 int dummy3;    /* unused */
 110         } b_un;
 111         unsigned int data[N_PARAM];
 112 } bdf_prm = {{40, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
 113
 114 /* These are the min and max parameter values that we will allow to be assigned */
 115 int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   1*HZ,   1*HZ, 1, 1};
 116 int bdflush_max[N_PARAM] = {100,50000, 20000, 20000,1000, 6000*HZ, 6000*HZ, 2047, 5};
 117
 118 void wakeup_bdflush(int);
 119
 120 /*
 121  * Rewrote the wait-routines to use the "new" wait-queue functionality,
 122  * and getting rid of the cli-sti pairs. The wait-queue routines still
 123  * need cli-sti, but now it's just a couple of 386 instructions or so.
 124  *
 125  * Note that the real wait_on_buffer() is an inline function that checks
 126  * if 'b_wait' is set before calling this, so that the queues aren't set
 127  * up unnecessarily.
 128  */
 129 void __wait_on_buffer(struct buffer_head * bh)
 130 {
 131         struct task_struct *tsk = current;
 132         DECLARE_WAITQUEUE(wait, tsk);
 133
 134         bh->b_count++;
 135         add_wait_queue(&bh->b_wait, &wait);
 136 repeat:
 137         tsk->state = TASK_UNINTERRUPTIBLE;
 138         run_task_queue(&tq_disk);
 139         if (buffer_locked(bh)) {
 140                 schedule();
 141                 goto repeat;
 142         }
 143         tsk->state = TASK_RUNNING;
 144         remove_wait_queue(&bh->b_wait, &wait);
 145         bh->b_count--;
 146 }
 147
 148 /* Call sync_buffers with wait!=0 to ensure that the call does not
 149  * return until all buffer writes have completed.  Sync() may return
 150  * before the writes have finished; fsync() may not.
 151  */
 152
 153 /* Godamity-damn.  Some buffers (bitmaps for filesystems)
 154  * spontaneously dirty themselves without ever brelse being called.
 155  * We will ultimately want to put these in a separate list, but for
 156  * now we search all of the lists for dirty buffers.
 157  */
 158 static int sync_buffers(kdev_t dev, int wait)
 159 {
 160         int i, retry, pass = 0, err = 0;
 161         struct buffer_head * bh, *next;
 162
 163         /* One pass for no-wait, three for wait:
 164          * 0) write out all dirty, unlocked buffers;
 165          * 1) write out all dirty buffers, waiting if locked;
 166          * 2) wait for completion by waiting for all buffers to unlock.
 167          */
 168         do {
 169                 retry = 0;
 170 repeat:
 171                 /* We search all lists as a failsafe mechanism, not because we expect
 172                  * there to be dirty buffers on any of the other lists.
 173                  */
 174                 bh = lru_list[BUF_DIRTY];
 175                 if (!bh)
 176                         goto repeat2;
 177                 for (i = nr_buffers_type[BUF_DIRTY]*2 ; i-- > 0 ; bh = next) {
 178                         if (bh->b_list != BUF_DIRTY)
 179                                 goto repeat;
 180                         next = bh->b_next_free;
 181                         if (!lru_list[BUF_DIRTY])
 182                                 break;
 183                         if (dev && bh->b_dev != dev)
 184                                 continue;
 185                         if (buffer_locked(bh)) {
 186                                 /* Buffer is locked; skip it unless wait is
 187                                  * requested AND pass > 0.
 188                                  */
 189                                 if (!wait || !pass) {
 190                                         retry = 1;
 191                                         continue;
 192                                 }
 193                                 wait_on_buffer (bh);
 194                                 goto repeat;
 195                         }
 196
 197                         /* If an unlocked buffer is not uptodate, there has
 198                          * been an IO error. Skip it.
 199                          */
 200                         if (wait && buffer_req(bh) && !buffer_locked(bh) &&
 201                             !buffer_dirty(bh) && !buffer_uptodate(bh)) {
 202                                 err = -EIO;
 203                                 continue;
 204                         }
 205
 206                         /* Don't write clean buffers.  Don't write ANY buffers
 207                          * on the third pass.
 208                          */
 209                         if (!buffer_dirty(bh) || pass >= 2)
 210                                 continue;
 211
 212                         /* Don't bother about locked buffers.
 213                          *
 214                          * XXX We checked if it was locked above and there is no
 215                          * XXX way we could have slept in between. -DaveM
 216                          */
 217                         if (buffer_locked(bh))
 218                                 continue;
 219                         bh->b_count++;
 220                         next->b_count++;
 221                         bh->b_flushtime = 0;
 222                         ll_rw_block(WRITE, 1, &bh);
 223                         bh->b_count--;
 224                         next->b_count--;
 225                         retry = 1;
 226                 }
 227
 228     repeat2:
 229                 bh = lru_list[BUF_LOCKED];
 230                 if (!bh)
 231                         break;
 232                 for (i = nr_buffers_type[BUF_LOCKED]*2 ; i-- > 0 ; bh = next) {
 233                         if (bh->b_list != BUF_LOCKED)
 234                                 goto repeat2;
 235                         next = bh->b_next_free;
 236                         if (!lru_list[BUF_LOCKED])
 237                                 break;
 238                         if (dev && bh->b_dev != dev)
 239                                 continue;
 240                         if (buffer_locked(bh)) {
 241                                 /* Buffer is locked; skip it unless wait is
 242                                  * requested AND pass > 0.
 243                                  */
 244                                 if (!wait || !pass) {
 245                                         retry = 1;
 246                                         continue;
 247                                 }
 248                                 wait_on_buffer (bh);
 249                                 goto repeat2;
 250                         }
 251                 }
 252
 253                 /* If we are waiting for the sync to succeed, and if any dirty
 254                  * blocks were written, then repeat; on the second pass, only
 255                  * wait for buffers being written (do not pass to write any
 256                  * more buffers on the second pass).
 257                  */
 258         } while (wait && retry && ++pass<=2);
 259         return err;
 260 }
 261
 262 void sync_dev(kdev_t dev)
 263 {
 264         sync_buffers(dev, 0);
 265         sync_supers(dev);
 266         sync_inodes(dev);
 267         sync_buffers(dev, 0);
 268         DQUOT_SYNC(dev);
 269         /*
 270          * FIXME(eric) we need to sync the physical devices here.
 271          * This is because some (scsi) controllers have huge amounts of
 272          * cache onboard (hundreds of Mb), and we need to instruct
 273          * them to commit all of the dirty memory to disk, and we should
 274          * not return until this has happened.
 275          *
 276          * This would need to get implemented by going through the assorted
 277          * layers so that each block major number can be synced, and this
 278          * would call down into the upper and mid-layer scsi.
 279          */
 280 }
 281
 282 int fsync_dev(kdev_t dev)
 283 {
 284         sync_buffers(dev, 0);
 285         sync_supers(dev);
 286         sync_inodes(dev);
 287         DQUOT_SYNC(dev);
 288         return sync_buffers(dev, 1);
 289 }
 290
 291 asmlinkage int sys_sync(void)
 292 {
 293         lock_kernel();
 294         fsync_dev(0);
 295         unlock_kernel();
 296         return 0;
 297 }
 298
 299 /*
 300  *      filp may be NULL if called via the msync of a vma.
 301  */
 302
 303 int file_fsync(struct file *filp, struct dentry *dentry)
 304 {
 305         struct inode * inode = dentry->d_inode;
 306         struct super_block * sb;
 307         kdev_t dev;
 308
 309         /* sync the inode to buffers */
 310         write_inode_now(inode);
 311
 312         /* sync the superblock to buffers */
 313         sb = inode->i_sb;
 314         wait_on_super(sb);
 315         if (sb->s_op && sb->s_op->write_super)
 316                 sb->s_op->write_super(sb);
 317
 318         /* .. finally sync the buffers to disk */
 319         dev = inode->i_dev;
 320         return sync_buffers(dev, 1);
 321 }
 322
 323 asmlinkage int sys_fsync(unsigned int fd)
 324 {
 325         struct file * file;
 326         struct dentry * dentry;
 327         struct inode * inode;
 328         int err;
 329
 330         lock_kernel();
 331         err = -EBADF;
 332         file = fget(fd);
 333         if (!file)
 334                 goto out;
 335
 336         dentry = file->f_dentry;
 337         if (!dentry)
 338                 goto out_putf;
 339
 340         inode = dentry->d_inode;
 341         if (!inode)
 342                 goto out_putf;
 343
 344         err = -EINVAL;
 345         if (!file->f_op || !file->f_op->fsync)
 346                 goto out_putf;
 347
 348         /* We need to protect against concurrent writers.. */
 349         down(&inode->i_sem);
 350         err = file->f_op->fsync(file, dentry);
 351         up(&inode->i_sem);
 352
 353 out_putf:
 354         fput(file);
 355 out:
 356         unlock_kernel();
 357         return err;
 358 }
 359
 360 asmlinkage int sys_fdatasync(unsigned int fd)
 361 {
 362         struct file * file;
 363         struct dentry * dentry;
 364         struct inode * inode;
 365         int err;
 366
 367         lock_kernel();
 368         err = -EBADF;
 369         file = fget(fd);
 370         if (!file)
 371                 goto out;
 372
 373         dentry = file->f_dentry;
 374         if (!dentry)
 375                 goto out_putf;
 376
 377         inode = dentry->d_inode;
 378         if (!inode)
 379                 goto out_putf;
 380
 381         err = -EINVAL;
 382         if (!file->f_op || !file->f_op->fsync)
 383                 goto out_putf;
 384
 385         /* this needs further work, at the moment it is identical to fsync() */
 386         down(&inode->i_sem);
 387         err = file->f_op->fsync(file, dentry);
 388         up(&inode->i_sem);
 389
 390 out_putf:
 391         fput(file);
 392 out:
 393         unlock_kernel();
 394         return err;
 395 }
 396
 397 void invalidate_buffers(kdev_t dev)
 398 {
 399         int i;
 400         int nlist;
 401         struct buffer_head * bh;
 402
 403         for(nlist = 0; nlist < NR_LIST; nlist++) {
 404                 bh = lru_list[nlist];
 405                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
 406                         if (bh->b_dev != dev)
 407                                 continue;
 408                         wait_on_buffer(bh);
 409                         if (bh->b_dev != dev)
 410                                 continue;
 411                         if (bh->b_count)
 412                                 continue;
 413                         bh->b_flushtime = 0;
 414                         clear_bit(BH_Protected, &bh->b_state);
 415                         clear_bit(BH_Uptodate, &bh->b_state);
 416                         clear_bit(BH_Dirty, &bh->b_state);
 417                         clear_bit(BH_Req, &bh->b_state);
 418                 }
 419         }
 420 }
 421
 422 #define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block)) & bh_hash_mask)
 423 #define hash(dev,block) hash_table[_hashfn(dev,block)]
 424
 425 static void insert_into_hash_list(struct buffer_head * bh)
 426 {
 427         bh->b_next = NULL;
 428         bh->b_pprev = NULL;
 429         if (bh->b_dev) {
 430                 struct buffer_head **bhp = &hash(bh->b_dev, bh->b_blocknr);
 431                 struct buffer_head *next = *bhp;
 432
 433                 if (next) {
 434                         bh->b_next = next;
 435                         next->b_pprev = &bh->b_next;
 436                 }
 437                 *bhp = bh;
 438                 bh->b_pprev = bhp;
 439                 nr_hashed_buffers++;
 440         }
 441 }
 442
 443 static void remove_from_hash_queue(struct buffer_head * bh)
 444 {
 445         struct buffer_head **pprev = bh->b_pprev;
 446         if (pprev) {
 447                 struct buffer_head * next = bh->b_next;
 448                 if (next) {
 449                         next->b_pprev = pprev;
 450                         bh->b_next = NULL;
 451                 }
 452                 *pprev = next;
 453                 bh->b_pprev = NULL;
 454                 nr_hashed_buffers--;
 455         }
 456 }
 457
 458 static void insert_into_lru_list(struct buffer_head * bh)
 459 {
 460         struct buffer_head **bhp = &lru_list[bh->b_list];
 461
 462         if (bh->b_dev == B_FREE)
 463                 BUG();
 464
 465         if(!*bhp) {
 466                 *bhp = bh;
 467                 bh->b_prev_free = bh;
 468         }
 469
 470         if (bh->b_next_free)
 471                 panic("VFS: buffer LRU pointers corrupted");
 472
 473         bh->b_next_free = *bhp;
 474         bh->b_prev_free = (*bhp)->b_prev_free;
 475         (*bhp)->b_prev_free->b_next_free = bh;
 476         (*bhp)->b_prev_free = bh;
 477
 478         nr_buffers++;
 479         nr_buffers_type[bh->b_list]++;
 480 }
 481
 482 static void remove_from_lru_list(struct buffer_head * bh)
 483 {
 484         if (!(bh->b_prev_free) || !(bh->b_next_free))
 485                 return;
 486
 487         if (bh->b_dev == B_FREE) {
 488                 printk("LRU list corrupted");
 489                 *(int*)0 = 0;
 490         }
 491         bh->b_prev_free->b_next_free = bh->b_next_free;
 492         bh->b_next_free->b_prev_free = bh->b_prev_free;
 493
 494         if (lru_list[bh->b_list] == bh)
 495                  lru_list[bh->b_list] = bh->b_next_free;
 496         if (lru_list[bh->b_list] == bh)
 497                  lru_list[bh->b_list] = NULL;
 498         bh->b_next_free = bh->b_prev_free = NULL;
 499
 500         nr_buffers--;
 501         nr_buffers_type[bh->b_list]--;
 502 }
 503
 504 static void remove_from_free_list(struct buffer_head * bh)
 505 {
 506         int isize = BUFSIZE_INDEX(bh->b_size);
 507         if (!(bh->b_prev_free) || !(bh->b_next_free))
 508                 panic("VFS: Free block list corrupted");
 509         if(bh->b_dev != B_FREE)
 510                 panic("Free list corrupted");
 511         if(!free_list[isize])
 512                 panic("Free list empty");
 513         if(bh->b_next_free == bh)
 514                  free_list[isize] = NULL;
 515         else {
 516                 bh->b_prev_free->b_next_free = bh->b_next_free;
 517                 bh->b_next_free->b_prev_free = bh->b_prev_free;
 518                 if (free_list[isize] == bh)
 519                          free_list[isize] = bh->b_next_free;
 520         }
 521         bh->b_next_free = bh->b_prev_free = NULL;
 522 }
 523
 524 static void remove_from_queues(struct buffer_head * bh)
 525 {
 526         if (bh->b_dev == B_FREE)
 527                 BUG();
 528         remove_from_hash_queue(bh);
 529         remove_from_lru_list(bh);
 530 }
 531
 532 static void put_last_free(struct buffer_head * bh)
 533 {
 534         if (bh) {
 535                 struct buffer_head **bhp = &free_list[BUFSIZE_INDEX(bh->b_size)];
 536
 537                 if (bh->b_count)
 538                         BUG();
 539
 540                 bh->b_dev = B_FREE;  /* So it is obvious we are on the free list. */
 541
 542                 /* Add to back of free list. */
 543                 if(!*bhp) {
 544                         *bhp = bh;
 545                         bh->b_prev_free = bh;
 546                 }
 547
 548                 bh->b_next_free = *bhp;
 549                 bh->b_prev_free = (*bhp)->b_prev_free;
 550                 (*bhp)->b_prev_free->b_next_free = bh;
 551                 (*bhp)->b_prev_free = bh;
 552         }
 553 }
 554
 555 struct buffer_head * find_buffer(kdev_t dev, int block, int size)
 556 {
 557         struct buffer_head * next;
 558
 559         next = hash(dev,block);
 560         for (;;) {
 561                 struct buffer_head *tmp = next;
 562                 if (!next)
 563                         break;
 564                 next = tmp->b_next;
 565                 if (tmp->b_blocknr != block || tmp->b_size != size || tmp->b_dev != dev)
 566                         continue;
 567                 next = tmp;
 568                 break;
 569         }
 570         return next;
 571 }
 572
 573 /*
 574  * Why like this, I hear you say... The reason is race-conditions.
 575  * As we don't lock buffers (unless we are reading them, that is),
 576  * something might happen to it while we sleep (ie a read-error
 577  * will force it bad). This shouldn't really happen currently, but
 578  * the code is ready.
 579  */
 580 struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
 581 {
 582         struct buffer_head * bh;
 583         bh = find_buffer(dev,block,size);
 584         if (bh)
 585                 bh->b_count++;
 586         return bh;
 587 }
 588
 589 unsigned int get_hardblocksize(kdev_t dev)
 590 {
 591         /*
 592          * Get the hard sector size for the given device.  If we don't know
 593          * what it is, return 0.
 594          */
 595         if (hardsect_size[MAJOR(dev)] != NULL) {
 596                 int blksize = hardsect_size[MAJOR(dev)][MINOR(dev)];
 597                 if (blksize != 0)
 598                         return blksize;
 599         }
 600
 601         /*
 602          * We don't know what the hardware sector size for this device is.
 603          * Return 0 indicating that we don't know.
 604          */
 605         return 0;
 606 }
 607
 608 void set_blocksize(kdev_t dev, int size)
 609 {
 610         extern int *blksize_size[];
 611         int i, nlist;
 612         struct buffer_head * bh, *bhnext;
 613
 614         if (!blksize_size[MAJOR(dev)])
 615                 return;
 616
 617         /* Size must be a power of two, and between 512 and PAGE_SIZE */
 618         if (size > PAGE_SIZE || size < 512 || (size & (size-1)))
 619                 panic("Invalid blocksize passed to set_blocksize");
 620
 621         if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
 622                 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 623                 return;
 624         }
 625         if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
 626                 return;
 627         sync_buffers(dev, 2);
 628         blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 629
 630         /* We need to be quite careful how we do this - we are moving entries
 631          * around on the free list, and we can get in a loop if we are not careful.
 632          */
 633         for(nlist = 0; nlist < NR_LIST; nlist++) {
 634                 bh = lru_list[nlist];
 635                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
 636                         if(!bh)
 637                                 break;
 638
 639                         bhnext = bh->b_next_free;
 640                         if (bh->b_dev != dev)
 641                                  continue;
 642                         if (bh->b_size == size)
 643                                  continue;
 644                         bhnext->b_count++;
 645                         bh->b_count++;
 646                         wait_on_buffer(bh);
 647                         bhnext->b_count--;
 648                         if (bh->b_dev == dev && bh->b_size != size) {
 649                                 clear_bit(BH_Dirty, &bh->b_state);
 650                                 clear_bit(BH_Uptodate, &bh->b_state);
 651                                 clear_bit(BH_Req, &bh->b_state);
 652                                 bh->b_flushtime = 0;
 653                         }
 654                         if (--bh->b_count)
 655                                 continue;
 656                         remove_from_queues(bh);
 657                         put_last_free(bh);
 658                 }
 659         }
 660 }
 661
 662 /*
 663  * We used to try various strange things. Let's not.
 664  */
 665 static void refill_freelist(int size)
 666 {
 667         if (!grow_buffers(size)) {
 668                 wakeup_bdflush(1);
 669                 current->policy |= SCHED_YIELD;
 670                 schedule();
 671         }
 672 }
 673
 674 void init_buffer(struct buffer_head *bh, kdev_t dev, int block,
 675                  bh_end_io_t *handler, void *dev_id)
 676 {
 677         bh->b_list = BUF_CLEAN;
 678         bh->b_flushtime = 0;
 679         bh->b_dev = dev;
 680         bh->b_blocknr = block;
 681         bh->b_end_io = handler;
 682         bh->b_dev_id = dev_id;
 683 }
 684
 685 static void end_buffer_io_sync(struct buffer_head *bh, int uptodate)
 686 {
 687         mark_buffer_uptodate(bh, uptodate);
 688         unlock_buffer(bh);
 689 }
 690
 691 /*
 692  * Ok, this is getblk, and it isn't very clear, again to hinder
 693  * race-conditions. Most of the code is seldom used, (ie repeating),
 694  * so it should be much more efficient than it looks.
 695  *
 696  * The algorithm is changed: hopefully better, and an elusive bug removed.
 697  *
 698  * 14.02.92: changed it to sync dirty buffers a bit: better performance
 699  * when the filesystem starts to get full of dirty blocks (I hope).
 700  */
 701 struct buffer_head * getblk(kdev_t dev, int block, int size)
 702 {
 703         struct buffer_head * bh;
 704         int isize;
 705
 706 repeat:
 707         bh = get_hash_table(dev, block, size);
 708         if (bh) {
 709                 if (!buffer_dirty(bh)) {
 710                         bh->b_flushtime = 0;
 711                 }
 712                 goto out;
 713         }
 714
 715         isize = BUFSIZE_INDEX(size);
 716 get_free:
 717         bh = free_list[isize];
 718         if (!bh)
 719                 goto refill;
 720         remove_from_free_list(bh);
 721
 722         /* OK, FINALLY we know that this buffer is the only one of its kind,
 723          * and that it's unused (b_count=0), unlocked, and clean.
 724          */
 725         init_buffer(bh, dev, block, end_buffer_io_sync, NULL);
 726         bh->b_count = 1;
 727         bh->b_state = 0;
 728
 729         /* Insert the buffer into the regular lists */
 730         insert_into_lru_list(bh);
 731         insert_into_hash_list(bh);
 732         goto out;
 733
 734         /*
 735          * If we block while refilling the free list, somebody may
 736          * create the buffer first ... search the hashes again.
 737          */
 738 refill:
 739         refill_freelist(size);
 740         if (!find_buffer(dev,block,size))
 741                 goto get_free;
 742         goto repeat;
 743 out:
 744         return bh;
 745 }
 746
 747 void set_writetime(struct buffer_head * buf, int flag)
 748 {
 749         int newtime;
 750
 751         if (buffer_dirty(buf)) {
 752                 /* Move buffer to dirty list if jiffies is clear. */
 753                 newtime = jiffies + (flag ? bdf_prm.b_un.age_super :
 754                                      bdf_prm.b_un.age_buffer);
 755                 if(!buf->b_flushtime || buf->b_flushtime > newtime)
 756                          buf->b_flushtime = newtime;
 757         } else {
 758                 buf->b_flushtime = 0;
 759         }
 760 }
 761
 762 /*
 763  * Put a buffer into the appropriate list, without side-effects.
 764  */
 765 static void file_buffer(struct buffer_head *bh, int list)
 766 {
 767         remove_from_lru_list(bh);
 768         bh->b_list = list;
 769         insert_into_lru_list(bh);
 770 }
 771
 772 /*
 773  * if a new dirty buffer is created we need to balance bdflush.
 774  */
 775 static inline void balance_dirty (kdev_t dev)
 776 {
 777         int too_many = (nr_buffers * bdf_prm.b_un.nfract/100);
 778
 779         /* This buffer is dirty, maybe we need to start flushing.
 780          * If too high a percentage of the buffers are dirty...
 781          */
 782         if (nr_buffers_type[BUF_DIRTY] > too_many) {
 783                 wakeup_bdflush(1);
 784         }
 785
 786         /* If this is a loop device, and
 787          * more than half of the buffers are dirty...
 788          * (Prevents no-free-buffers deadlock with loop device.)
 789          */
 790         if (MAJOR(dev) == LOOP_MAJOR &&
 791             nr_buffers_type[BUF_DIRTY]*2>nr_buffers)
 792                 wakeup_bdflush(1);
 793 }
 794
 795 /*
 796  * A buffer may need to be moved from one buffer list to another
 797  * (e.g. in case it is not shared any more). Handle this.
 798  */
 799 void refile_buffer(struct buffer_head * buf)
 800 {
 801         int dispose;
 802
 803         if(buf->b_dev == B_FREE) {
 804                 printk("Attempt to refile free buffer\n");
 805                 return;
 806         }
 807         if (buffer_dirty(buf))
 808                 dispose = BUF_DIRTY;
 809         else if (buffer_locked(buf))
 810                 dispose = BUF_LOCKED;
 811         else
 812                 dispose = BUF_CLEAN;
 813         if(dispose != buf->b_list) {
 814                 file_buffer(buf, dispose);
 815                 if (dispose == BUF_DIRTY)
 816                         balance_dirty(buf->b_dev);
 817         }
 818 }
 819
 820 /*
 821  * Release a buffer head
 822  */
 823 void __brelse(struct buffer_head * buf)
 824 {
 825         /* If dirty, mark the time this buffer should be written back. */
 826         set_writetime(buf, 0);
 827         refile_buffer(buf);
 828         touch_buffer(buf);
 829
 830         if (buf->b_count) {
 831                 buf->b_count--;
 832                 wake_up(&buffer_wait);
 833                 return;
 834         }
 835         printk("VFS: brelse: Trying to free free buffer\n");
 836 }
 837
 838 /*
 839  * bforget() is like brelse(), except it puts the buffer on the
 840  * free list if it can.. We can NOT free the buffer if:
 841  *  - there are other users of it
 842  *  - it is locked and thus can have active IO
 843  */
 844 void __bforget(struct buffer_head * buf)
 845 {
 846         if (buf->b_count != 1 || buffer_locked(buf)) {
 847                 __brelse(buf);
 848                 return;
 849         }
 850         buf->b_count = 0;
 851         buf->b_state = 0;
 852         remove_from_queues(buf);
 853         put_last_free(buf);
 854 }
 855
 856 /*
 857  * bread() reads a specified block and returns the buffer that contains
 858  * it. It returns NULL if the block was unreadable.
 859  */
 860 struct buffer_head * bread(kdev_t dev, int block, int size)
 861 {
 862         struct buffer_head * bh;
 863
 864         bh = getblk(dev, block, size);
 865         if (buffer_uptodate(bh))
 866                 return bh;
 867         ll_rw_block(READ, 1, &bh);
 868         wait_on_buffer(bh);
 869         if (buffer_uptodate(bh))
 870                 return bh;
 871         brelse(bh);
 872         return NULL;
 873 }
 874
 875 /*
 876  * Ok, breada can be used as bread, but additionally to mark other
 877  * blocks for reading as well. End the argument list with a negative
 878  * number.
 879  */
 880
 881 #define NBUF 16
 882
 883 struct buffer_head * breada(kdev_t dev, int block, int bufsize,
 884         unsigned int pos, unsigned int filesize)
 885 {
 886         struct buffer_head * bhlist[NBUF];
 887         unsigned int blocks;
 888         struct buffer_head * bh;
 889         int index;
 890         int i, j;
 891
 892         if (pos >= filesize)
 893                 return NULL;
 894
 895         if (block < 0)
 896                 return NULL;
 897
 898         bh = getblk(dev, block, bufsize);
 899         index = BUFSIZE_INDEX(bh->b_size);
 900
 901         if (buffer_uptodate(bh))
 902                 return(bh);
 903         else ll_rw_block(READ, 1, &bh);
 904
 905         blocks = (filesize - pos) >> (9+index);
 906
 907         if (blocks < (read_ahead[MAJOR(dev)] >> index))
 908                 blocks = read_ahead[MAJOR(dev)] >> index;
 909         if (blocks > NBUF)
 910                 blocks = NBUF;
 911
 912 /*      if (blocks) printk("breada (new) %d blocks\n",blocks); */
 913
 914         bhlist[0] = bh;
 915         j = 1;
 916         for(i=1; i<blocks; i++) {
 917                 bh = getblk(dev,block+i,bufsize);
 918                 if (buffer_uptodate(bh)) {
 919                         brelse(bh);
 920                         break;
 921                 }
 922                 else bhlist[j++] = bh;
 923         }
 924
 925         /* Request the read for these buffers, and then release them. */
 926         if (j>1)
 927                 ll_rw_block(READA, (j-1), bhlist+1);
 928         for(i=1; i<j; i++)
 929                 brelse(bhlist[i]);
 930
 931         /* Wait for this buffer, and then continue on. */
 932         bh = bhlist[0];
 933         wait_on_buffer(bh);
 934         if (buffer_uptodate(bh))
 935                 return bh;
 936         brelse(bh);
 937         return NULL;
 938 }
 939
 940 /*
 941  * Note: the caller should wake up the buffer_wait list if needed.
 942  */
 943 static void put_unused_buffer_head(struct buffer_head * bh)
 944 {
 945         if (nr_unused_buffer_heads >= MAX_UNUSED_BUFFERS) {
 946                 nr_buffer_heads--;
 947                 kmem_cache_free(bh_cachep, bh);
 948                 return;
 949         }
 950
 951 //      memset(bh, 0, sizeof(*bh));
 952         bh->b_blocknr = -1;
 953         init_waitqueue_head(&bh->b_wait);
 954         nr_unused_buffer_heads++;
 955         bh->b_next_free = unused_list;
 956         unused_list = bh;
 957 }
 958
 959 /*
 960  * We can't put completed temporary IO buffer_heads directly onto the
 961  * unused_list when they become unlocked, since the device driver
 962  * end_request routines still expect access to the buffer_head's
 963  * fields after the final unlock.  So, the device driver puts them on
 964  * the reuse_list instead once IO completes, and we recover these to
 965  * the unused_list here.
 966  *
 967  * Note that we don't do a wakeup here, but return a flag indicating
 968  * whether we got any buffer heads. A task ready to sleep can check
 969  * the returned value, and any tasks already sleeping will have been
 970  * awakened when the buffer heads were added to the reuse list.
 971  */
 972 static inline int recover_reusable_buffer_heads(void)
 973 {
 974         struct buffer_head *head = xchg(&reuse_list, NULL);
 975         int found = 0;
 976
 977         if (head) {
 978                 do {
 979                         struct buffer_head *bh = head;
 980                         head = head->b_next_free;
 981                         put_unused_buffer_head(bh);
 982                 } while (head);
 983                 found = 1;
 984         }
 985         return found;
 986 }
 987
 988 /*
 989  * Reserve NR_RESERVED buffer heads for async IO requests to avoid
 990  * no-buffer-head deadlock.  Return NULL on failure; waiting for
 991  * buffer heads is now handled in create_buffers().
 992  */
 993 static struct buffer_head * get_unused_buffer_head(int async)
 994 {
 995         struct buffer_head * bh;
 996
 997         recover_reusable_buffer_heads();
 998         if (nr_unused_buffer_heads > NR_RESERVED) {
 999                 bh = unused_list;
1000                 unused_list = bh->b_next_free;
1001                 nr_unused_buffer_heads--;
1002                 return bh;
1003         }
1004
1005         /* This is critical.  We can't swap out pages to get
1006          * more buffer heads, because the swap-out may need
1007          * more buffer-heads itself.  Thus SLAB_BUFFER.
1008          */
1009         if((bh = kmem_cache_alloc(bh_cachep, SLAB_BUFFER)) != NULL) {
1010                 memset(bh, 0, sizeof(*bh));
1011                 init_waitqueue_head(&bh->b_wait);
1012                 nr_buffer_heads++;
1013                 return bh;
1014         }
1015
1016         /*
1017          * If we need an async buffer, use the reserved buffer heads.
1018          */
1019         if (async && unused_list) {
1020                 bh = unused_list;
1021                 unused_list = bh->b_next_free;
1022                 nr_unused_buffer_heads--;
1023                 return bh;
1024         }
1025
1026 #if 0
1027         /*
1028          * (Pending further analysis ...)
1029          * Ordinary (non-async) requests can use a different memory priority
1030          * to free up pages. Any swapping thus generated will use async
1031          * buffer heads.
1032          */
1033         if(!async &&
1034            (bh = kmem_cache_alloc(bh_cachep, SLAB_KERNEL)) != NULL) {
1035                 memset(bh, 0, sizeof(*bh));
1036                 init_waitqueue_head(&bh->b_wait);
1037                 nr_buffer_heads++;
1038                 return bh;
1039         }
1040 #endif
1041
1042         return NULL;
1043 }
1044
1045 /*
1046  * Create the appropriate buffers when given a page for data area and
1047  * the size of each buffer.. Use the bh->b_this_page linked list to
1048  * follow the buffers created.  Return NULL if unable to create more
1049  * buffers.
1050  * The async flag is used to differentiate async IO (paging, swapping)
1051  * from ordinary buffer allocations, and only async requests are allowed
1052  * to sleep waiting for buffer heads.
1053  */
1054 static struct buffer_head * create_buffers(unsigned long page,
1055                                                 unsigned long size, int async)
1056 {
1057         DECLARE_WAITQUEUE(wait, current);
1058         struct buffer_head *bh, *head;
1059         long offset;
1060
1061 try_again:
1062         head = NULL;
1063         offset = PAGE_SIZE;
1064         while ((offset -= size) >= 0) {
1065                 bh = get_unused_buffer_head(async);
1066                 if (!bh)
1067                         goto no_grow;
1068
1069                 bh->b_dev = B_FREE;  /* Flag as unused */
1070                 bh->b_this_page = head;
1071                 head = bh;
1072
1073                 bh->b_state = 0;
1074                 bh->b_next_free = NULL;
1075                 bh->b_count = 0;
1076                 bh->b_size = size;
1077
1078                 bh->b_data = (char *) (page+offset);
1079                 bh->b_list = 0;
1080         }
1081         return head;
1082 /*
1083  * In case anything failed, we just free everything we got.
1084  */
1085 no_grow:
1086         if (head) {
1087                 do {
1088                         bh = head;
1089                         head = head->b_this_page;
1090                         put_unused_buffer_head(bh);
1091                 } while (head);
1092
1093                 /* Wake up any waiters ... */
1094                 wake_up(&buffer_wait);
1095         }
1096
1097         /*
1098          * Return failure for non-async IO requests.  Async IO requests
1099          * are not allowed to fail, so we have to wait until buffer heads
1100          * become available.  But we don't want tasks sleeping with
1101          * partially complete buffers, so all were released above.
1102          */
1103         if (!async)
1104                 return NULL;
1105
1106         /* We're _really_ low on memory. Now we just
1107          * wait for old buffer heads to become free due to
1108          * finishing IO.  Since this is an async request and
1109          * the reserve list is empty, we're sure there are
1110          * async buffer heads in use.
1111          */
1112         run_task_queue(&tq_disk);
1113
1114         /*
1115          * Set our state for sleeping, then check again for buffer heads.
1116          * This ensures we won't miss a wake_up from an interrupt.
1117          */
1118         add_wait_queue(&buffer_wait, &wait);
1119         current->state = TASK_UNINTERRUPTIBLE;
1120         if (!recover_reusable_buffer_heads())
1121                 schedule();
1122         remove_wait_queue(&buffer_wait, &wait);
1123         current->state = TASK_RUNNING;
1124         goto try_again;
1125 }
1126
1127 /* Run the hooks that have to be done when a page I/O has completed. */
1128 static inline void after_unlock_page (struct page * page)
1129 {
1130         if (test_and_clear_bit(PG_decr_after, &page->flags)) {
1131                 atomic_dec(&nr_async_pages);
1132 #ifdef DEBUG_SWAP
1133                 printk ("DebugVM: Finished IO on page %p, nr_async_pages %d\n",
1134                         (char *) page_address(page),
1135                         atomic_read(&nr_async_pages));
1136 #endif
1137         }
1138         if (test_and_clear_bit(PG_swap_unlock_after, &page->flags))
1139                 swap_after_unlock_page(page->offset);
1140         if (test_and_clear_bit(PG_free_after, &page->flags))
1141                 __free_page(page);
1142 }
1143
1144 /*
1145  * Free all temporary buffers belonging to a page.
1146  * This needs to be called with interrupts disabled.
1147  */
1148 static inline void free_async_buffers (struct buffer_head * bh)
1149 {
1150         struct buffer_head *tmp, *tail;
1151
1152         /*
1153          * Link all the buffers into the b_next_free list,
1154          * so we only have to do one xchg() operation ...
1155          */
1156         tail = bh;
1157         while ((tmp = tail->b_this_page) != bh) {
1158                 tail->b_next_free = tmp;
1159                 tail = tmp;
1160         };
1161
1162         /* Update the reuse list */
1163         tail->b_next_free = xchg(&reuse_list, NULL);
1164         reuse_list = bh;
1165
1166         /* Wake up any waiters ... */
1167         wake_up(&buffer_wait);
1168 }
1169
1170 static void end_buffer_io_async(struct buffer_head * bh, int uptodate)
1171 {
1172         unsigned long flags;
1173         struct buffer_head *tmp;
1174         struct page *page;
1175
1176         mark_buffer_uptodate(bh, uptodate);
1177
1178         /* This is a temporary buffer used for page I/O. */
1179         page = mem_map + MAP_NR(bh->b_data);
1180
1181         if (!uptodate)
1182                 SetPageError(page);
1183
1184         /*
1185          * Be _very_ careful from here on. Bad things can happen if
1186          * two buffer heads end IO at almost the same time and both
1187          * decide that the page is now completely done.
1188          *
1189          * Async buffer_heads are here only as labels for IO, and get
1190          * thrown away once the IO for this page is complete.  IO is
1191          * deemed complete once all buffers have been visited
1192          * (b_count==0) and are now unlocked. We must make sure that
1193          * only the _last_ buffer that decrements its count is the one
1194          * that free's the page..
1195          */
1196         save_flags(flags);
1197         cli();
1198         unlock_buffer(bh);
1199         tmp = bh->b_this_page;
1200         while (tmp != bh) {
1201                 if (buffer_locked(tmp))
1202                         goto still_busy;
1203                 tmp = tmp->b_this_page;
1204         }
1205
1206         /* OK, the async IO on this page is complete. */
1207         restore_flags(flags);
1208
1209         after_unlock_page(page);
1210         /*
1211          * if none of the buffers had errors then we can set the
1212          * page uptodate:
1213          */
1214         if (!PageError(page))
1215                 SetPageUptodate(page);
1216         if (page->owner != -1)
1217                 PAGE_BUG(page);
1218         page->owner = (int)current;
1219         UnlockPage(page);
1220
1221         return;
1222
1223 still_busy:
1224         restore_flags(flags);
1225         return;
1226 }
1227
1228 static int create_page_buffers (int rw, struct page *page, kdev_t dev, int b[], int size, int bmap)
1229 {
1230         struct buffer_head *head, *bh, *tail;
1231         int block;
1232
1233         if (!PageLocked(page))
1234                 BUG();
1235         if (page->owner != (int)current)
1236                 PAGE_BUG(page);
1237         /*
1238          * Allocate async buffer heads pointing to this page, just for I/O.
1239          * They show up in the buffer hash table and are registered in
1240          * page->buffers.
1241          */
1242         head = create_buffers(page_address(page), size, 1);
1243         if (page->buffers)
1244                 BUG();
1245         if (!head)
1246                 BUG();
1247         tail = head;
1248         for (bh = head; bh; bh = bh->b_this_page) {
1249                 block = *(b++);
1250
1251                 tail = bh;
1252                 init_buffer(bh, dev, block, end_buffer_io_async, NULL);
1253
1254                 /*
1255                  * When we use bmap, we define block zero to represent
1256                  * a hole.  ll_rw_page, however, may legitimately
1257                  * access block zero, and we need to distinguish the
1258                  * two cases.
1259                  */
1260                 if (bmap && !block) {
1261                         set_bit(BH_Uptodate, &bh->b_state);
1262                         memset(bh->b_data, 0, size);
1263                 }
1264         }
1265         tail->b_this_page = head;
1266         get_page(page);
1267         page->buffers = head;
1268         return 0;
1269 }
1270
1271 /*
1272  * We don't have to release all buffers here, but
1273  * we have to be sure that no dirty buffer is left
1274  * and no IO is going on (no buffer is locked), because
1275  * we have truncated the file and are going to free the
1276  * blocks on-disk..
1277  */
1278 int generic_block_flushpage(struct inode *inode, struct page *page, unsigned long offset)
1279 {
1280         struct buffer_head *head, *bh, *next;
1281         unsigned int curr_off = 0;
1282
1283         if (!PageLocked(page))
1284                 BUG();
1285         if (!page->buffers)
1286                 return 0;
1287
1288         head = page->buffers;
1289         bh = head;
1290         do {
1291                 unsigned int next_off = curr_off + bh->b_size;
1292                 next = bh->b_this_page;
1293
1294                 /*
1295                  * is this block fully flushed?
1296                  */
1297                 if (offset <= curr_off) {
1298                         if (bh->b_blocknr) {
1299                                 bh->b_count++;
1300                                 wait_on_buffer(bh);
1301                                 if (bh->b_dev == B_FREE)
1302                                         BUG();
1303                                 mark_buffer_clean(bh);
1304                                 bh->b_blocknr = 0;
1305                                 bh->b_count--;
1306                         }
1307                 }
1308                 curr_off = next_off;
1309                 bh = next;
1310         } while (bh != head);
1311
1312         /*
1313          * subtle. We release buffer-heads only if this is
1314          * the 'final' flushpage. We invalidate the bmap
1315          * cached value in all cases.
1316          */
1317         if (!offset)
1318                 try_to_free_buffers(page);
1319
1320         return 0;
1321 }
1322
1323 static inline void create_empty_buffers (struct page *page,
1324                         struct inode *inode, unsigned long blocksize)
1325 {
1326         struct buffer_head *bh, *head, *tail;
1327
1328         head = create_buffers(page_address(page), blocksize, 1);
1329         if (page->buffers)
1330                 BUG();
1331
1332         bh = head;
1333         do {
1334                 bh->b_dev = inode->i_dev;
1335                 bh->b_blocknr = 0;
1336                 tail = bh;
1337                 bh = bh->b_this_page;
1338         } while (bh);
1339         tail->b_this_page = head;
1340         page->buffers = head;
1341         get_page(page);
1342 }
1343
1344 int block_write_full_page (struct file *file, struct page *page, fs_getblock_t fs_get_block)
1345 {
1346         struct dentry *dentry = file->f_dentry;
1347         struct inode *inode = dentry->d_inode;
1348         int err, created, i;
1349         unsigned long block, phys, offset;
1350         struct buffer_head *bh, *head;
1351
1352         if (!PageLocked(page))
1353                 BUG();
1354
1355         if (!page->buffers)
1356                 create_empty_buffers(page, inode, inode->i_sb->s_blocksize);
1357         head = page->buffers;
1358
1359         offset = page->offset;
1360         block = offset >> inode->i_sb->s_blocksize_bits;
1361
1362         // FIXME: currently we assume page alignment.
1363         if (offset & (PAGE_SIZE-1))
1364                 BUG();
1365
1366         bh = head;
1367         i = 0;
1368         do {
1369                 if (!bh)
1370                         BUG();
1371
1372                 if (!bh->b_blocknr) {
1373                         err = -EIO;
1374                         down(&inode->i_sem);
1375                         phys = fs_get_block (inode, block, 1, &err, &created);
1376                         up(&inode->i_sem);
1377                         if (!phys)
1378                                 goto out;
1379
1380                         init_buffer(bh, inode->i_dev, phys, end_buffer_io_sync, NULL);
1381                         bh->b_state = (1<<BH_Uptodate);
1382                 } else {
1383                         /*
1384                          * block already exists, just mark it dirty:
1385                          */
1386                         bh->b_end_io = end_buffer_io_sync;
1387                         set_bit(BH_Uptodate, &bh->b_state);
1388                 }
1389                 mark_buffer_dirty(bh, 0);
1390
1391                 bh = bh->b_this_page;
1392                 block++;
1393         } while (bh != head);
1394
1395         SetPageUptodate(page);
1396         return 0;
1397 out:
1398         ClearPageUptodate(page);
1399         return err;
1400 }
1401
1402 int block_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf, fs_getblock_t fs_get_block)
1403 {
1404         struct dentry *dentry = file->f_dentry;
1405         struct inode *inode = dentry->d_inode;
1406         unsigned long block;
1407         int err, created;
1408         unsigned long blocksize, start_block, end_block;
1409         unsigned long start_offset, start_bytes, end_bytes;
1410         unsigned long bbits, phys, blocks, i, len;
1411         struct buffer_head *bh, *head;
1412         char * target_buf;
1413
1414         target_buf = (char *)page_address(page) + offset;
1415         lock_kernel();
1416
1417         if (!PageLocked(page))
1418                 BUG();
1419
1420         blocksize = inode->i_sb->s_blocksize;
1421         if (!page->buffers)
1422                 create_empty_buffers(page, inode, blocksize);
1423         head = page->buffers;
1424
1425         bbits = inode->i_sb->s_blocksize_bits;
1426         block = page->offset >> bbits;
1427         blocks = PAGE_SIZE >> bbits;
1428         start_block = offset >> bbits;
1429         end_block = (offset + bytes - 1) >> bbits;
1430         start_offset = offset & (blocksize - 1);
1431         start_bytes = blocksize - start_offset;
1432         if (start_bytes > bytes)
1433                 start_bytes = bytes;
1434         end_bytes = (offset+bytes) & (blocksize - 1);
1435         if (end_bytes > bytes)
1436                 end_bytes = bytes;
1437
1438         if (offset < 0 || offset >= PAGE_SIZE)
1439                 BUG();
1440         if (bytes+offset < 0 || bytes+offset > PAGE_SIZE)
1441                 BUG();
1442         if (start_block < 0 || start_block >= blocks)
1443                 BUG();
1444         if (end_block < 0 || end_block >= blocks)
1445                 BUG();
1446         // FIXME: currently we assume page alignment.
1447         if (page->offset & (PAGE_SIZE-1))
1448                 BUG();
1449
1450         i = 0;
1451         bh = head;
1452         do {
1453                 if (!bh)
1454                         BUG();
1455
1456                 if ((i < start_block) || (i > end_block)) {
1457                         goto skip;
1458                 }
1459                 unlock_kernel();
1460
1461                 err = -EFAULT;
1462                 if (start_offset) {
1463                         len = start_bytes;
1464                         start_offset = 0;
1465                 } else
1466                 if (end_bytes && (i == end_block)) {
1467                         len = end_bytes;
1468                         end_bytes = 0;
1469                 } else {
1470                         /*
1471                          * Overwritten block.
1472                          */
1473                         len = blocksize;
1474                 }
1475                 if (copy_from_user(target_buf, buf, len))
1476                         goto out_nolock;
1477                 target_buf += len;
1478                 buf += len;
1479
1480                 /*
1481                  * we dirty buffers only after copying the data into
1482                  * the page - this way we can dirty the buffer even if
1483                  * the bh is still doing IO.
1484                  */
1485                 lock_kernel();
1486                 if (!bh->b_blocknr) {
1487                         err = -EIO;
1488                         down(&inode->i_sem);
1489                         phys = fs_get_block (inode, block, 1, &err, &created);
1490                         up(&inode->i_sem);
1491                         if (!phys)
1492                                 goto out;
1493
1494                         init_buffer(bh, inode->i_dev, phys, end_buffer_io_sync, NULL);
1495
1496                         /*
1497                          * if partially written block which has contents on
1498                          * disk, then we have to read it first.
1499                          */
1500                         if (!created && (start_offset ||
1501                                         (end_bytes && (i == end_block)))) {
1502                                 bh->b_state = 0;
1503                                 ll_rw_block(READ, 1, &bh);
1504                                 wait_on_buffer(bh);
1505                                 err = -EIO;
1506                                 if (!buffer_uptodate(bh))
1507                                         goto out;
1508                         }
1509
1510                         bh->b_state = (1<<BH_Uptodate);
1511                 } else {
1512                         /*
1513                          * block already exists, just mark it uptodate:
1514                          */
1515                         bh->b_end_io = end_buffer_io_sync;
1516                         set_bit(BH_Uptodate, &bh->b_state);
1517                 }
1518                 mark_buffer_dirty(bh, 0);
1519 skip:
1520                 i++;
1521                 block++;
1522                 bh = bh->b_this_page;
1523         } while (bh != head);
1524         unlock_kernel();
1525
1526         SetPageUptodate(page);
1527         return bytes;
1528 out:
1529         unlock_kernel();
1530 out_nolock:
1531         ClearPageUptodate(page);
1532         return err;
1533 }
1534
1535 /*
1536  * Start I/O on a page.
1537  * This function expects the page to be locked and may return
1538  * before I/O is complete. You then have to check page->locked,
1539  * page->uptodate, and maybe wait on page->wait.
1540  */
1541 int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size, int bmap)
1542 {
1543         struct buffer_head *head, *bh, *arr[MAX_BUF_PER_PAGE];
1544         int nr, fresh, block;
1545
1546         if (!PageLocked(page))
1547                 panic("brw_page: page not locked for I/O");
1548 //      clear_bit(PG_error, &page->flags);
1549         /*
1550          * We pretty much rely on the page lock for this, because
1551          * create_page_buffers() might sleep.
1552          */
1553         fresh = 0;
1554         if (!page->buffers) {
1555                 create_page_buffers(rw, page, dev, b, size, bmap);
1556                 fresh = 1;
1557         }
1558         if (!page->buffers)
1559                 BUG();
1560         page->owner = -1;
1561
1562         head = page->buffers;
1563         bh = head;
1564         nr = 0;
1565         do {
1566                 block = *(b++);
1567
1568                 if (fresh && (bh->b_count != 0))
1569                         BUG();
1570                 if (rw == READ) {
1571                         if (!fresh)
1572                                 BUG();
1573                         if (bmap && !block) {
1574                                 if (block)
1575                                         BUG();
1576                         } else {
1577                                 if (bmap && !block)
1578                                         BUG();
1579                                 if (!buffer_uptodate(bh)) {
1580                                         arr[nr++] = bh;
1581                                 }
1582                         }
1583                 } else { /* WRITE */
1584                         if (!bh->b_blocknr) {
1585                                 if (!block)
1586                                         BUG();
1587                                 bh->b_blocknr = block;
1588                         } else {
1589                                 if (!block)
1590                                         BUG();
1591                         }
1592                         set_bit(BH_Uptodate, &bh->b_state);
1593                         mark_buffer_dirty(bh, 0);
1594                         arr[nr++] = bh;
1595                 }
1596                 bh = bh->b_this_page;
1597         } while (bh != head);
1598         if (rw == READ)
1599                 ++current->maj_flt;
1600         if ((rw == READ) && nr) {
1601                 if (Page_Uptodate(page))
1602                         BUG();
1603                 unlock_kernel();
1604                 ll_rw_block(rw, nr, arr);
1605                 lock_kernel();
1606         } else {
1607                 if (!nr && rw == READ) {
1608                         SetPageUptodate(page);
1609                         page->owner = (int)current;
1610                         UnlockPage(page);
1611                 }
1612                 if (nr && (rw == WRITE)) {
1613                         unlock_kernel();
1614                         ll_rw_block(rw, nr, arr);
1615                         lock_kernel();
1616                 }
1617         }
1618         return 0;
1619 }
1620
1621 /*
1622  * This is called by end_request() when I/O has completed.
1623  */
1624 void mark_buffer_uptodate(struct buffer_head * bh, int on)
1625 {
1626         if (on) {
1627                 struct buffer_head *tmp = bh;
1628                 struct page *page;
1629                 set_bit(BH_Uptodate, &bh->b_state);
1630                 /* If a page has buffers and all these buffers are uptodate,
1631                  * then the page is uptodate. */
1632                 do {
1633                         if (!test_bit(BH_Uptodate, &tmp->b_state))
1634                                 return;
1635                         tmp=tmp->b_this_page;
1636                 } while (tmp && tmp != bh);
1637                 page = mem_map + MAP_NR(bh->b_data);
1638                 SetPageUptodate(page);
1639                 return;
1640         }
1641         clear_bit(BH_Uptodate, &bh->b_state);
1642 }
1643
1644 /*
1645  * Generic "readpage" function for block devices that have the normal
1646  * bmap functionality. This is most of the block device filesystems.
1647  * Reads the page asynchronously --- the unlock_buffer() and
1648  * mark_buffer_uptodate() functions propagate buffer state into the
1649  * page struct once IO has completed.
1650  */
1651 int generic_readpage(struct file * file, struct page * page)
1652 {
1653         struct dentry *dentry = file->f_dentry;
1654         struct inode *inode = dentry->d_inode;
1655         unsigned long block;
1656         int *p, nr[PAGE_SIZE/512];
1657         int i;
1658
1659         if (page->buffers) {
1660                 printk("hm, no brw_page(%p) because IO already started.\n",
1661                                          page);
1662                 goto out;
1663         }
1664
1665         i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
1666         block = page->offset >> inode->i_sb->s_blocksize_bits;
1667         p = nr;
1668         do {
1669                 *p = inode->i_op->bmap(inode, block);
1670                 i--;
1671                 block++;
1672                 p++;
1673         } while (i > 0);
1674
1675         /* IO start */
1676         brw_page(READ, page, inode->i_dev, nr, inode->i_sb->s_blocksize, 1);
1677 out:
1678         return 0;
1679 }
1680
1681 /*
1682  * Try to increase the number of buffers available: the size argument
1683  * is used to determine what kind of buffers we want.
1684  */
1685 static int grow_buffers(int size)
1686 {
1687         unsigned long page;
1688         struct buffer_head *bh, *tmp;
1689         struct buffer_head * insert_point;
1690         int isize;
1691
1692         if ((size & 511) || (size > PAGE_SIZE)) {
1693                 printk("VFS: grow_buffers: size = %d\n",size);
1694                 return 0;
1695         }
1696
1697         if (!(page = __get_free_page(GFP_BUFFER)))
1698                 return 0;
1699         bh = create_buffers(page, size, 0);
1700         if (!bh) {
1701                 free_page(page);
1702                 return 0;
1703         }
1704
1705         isize = BUFSIZE_INDEX(size);
1706         insert_point = free_list[isize];
1707
1708         tmp = bh;
1709         while (1) {
1710                 if (insert_point) {
1711                         tmp->b_next_free = insert_point->b_next_free;
1712                         tmp->b_prev_free = insert_point;
1713                         insert_point->b_next_free->b_prev_free = tmp;
1714                         insert_point->b_next_free = tmp;
1715                 } else {
1716                         tmp->b_prev_free = tmp;
1717                         tmp->b_next_free = tmp;
1718                 }
1719                 insert_point = tmp;
1720                 if (tmp->b_this_page)
1721                         tmp = tmp->b_this_page;
1722                 else
1723                         break;
1724         }
1725         tmp->b_this_page = bh;
1726         free_list[isize] = bh;
1727         mem_map[MAP_NR(page)].buffers = bh;
1728         buffermem += PAGE_SIZE;
1729         return 1;
1730 }
1731
1732 /*
1733  * Can the buffer be thrown out?
1734  */
1735 #define BUFFER_BUSY_BITS        ((1<<BH_Dirty) | (1<<BH_Lock) | (1<<BH_Protected))
1736 #define buffer_busy(bh) ((bh)->b_count || ((bh)->b_state & BUFFER_BUSY_BITS))
1737
1738 /*
1739  * try_to_free_buffers() checks if all the buffers on this particular page
1740  * are unused, and free's the page if so.
1741  *
1742  * Wake up bdflush() if this fails - if we're running low on memory due
1743  * to dirty buffers, we need to flush them out as quickly as possible.
1744  */
1745 int try_to_free_buffers(struct page * page)
1746 {
1747         struct buffer_head * tmp, * bh = page->buffers;
1748
1749         tmp = bh;
1750         do {
1751                 struct buffer_head * p = tmp;
1752
1753                 tmp = tmp->b_this_page;
1754                 if (!buffer_busy(p))
1755                         continue;
1756
1757                 wakeup_bdflush(0);
1758                 return 0;
1759         } while (tmp != bh);
1760
1761         tmp = bh;
1762         do {
1763                 struct buffer_head * p = tmp;
1764                 tmp = tmp->b_this_page;
1765
1766                 /* The buffer can be either on the regular queues or on the free list.. */
1767                 if (p->b_dev == B_FREE)
1768                         remove_from_free_list(p);
1769                 else
1770                         remove_from_queues(p);
1771
1772                 put_unused_buffer_head(p);
1773         } while (tmp != bh);
1774
1775         /* Wake up anyone waiting for buffer heads */
1776         wake_up(&buffer_wait);
1777
1778         /* And free the page */
1779         page->buffers = NULL;
1780         if (__free_page(page)) {
1781                 buffermem -= PAGE_SIZE;
1782                 return 1;
1783         }
1784         return 0;
1785 }
1786
1787 /* ================== Debugging =================== */
1788
1789 void show_buffers(void)
1790 {
1791         struct buffer_head * bh;
1792         int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
1793         int protected = 0;
1794         int nlist;
1795         static char *buf_types[NR_LIST] = {"CLEAN","LOCKED","DIRTY"};
1796
1797         printk("Buffer memory:   %6dkB\n",buffermem>>10);
1798         printk("Buffer heads:    %6d\n",nr_buffer_heads);
1799         printk("Buffer blocks:   %6d\n",nr_buffers);
1800         printk("Buffer hashed:   %6d\n",nr_hashed_buffers);
1801
1802         for(nlist = 0; nlist < NR_LIST; nlist++) {
1803           found = locked = dirty = used = lastused = protected = 0;
1804           bh = lru_list[nlist];
1805           if(!bh) continue;
1806
1807           do {
1808                 found++;
1809                 if (buffer_locked(bh))
1810                         locked++;
1811                 if (buffer_protected(bh))
1812                         protected++;
1813                 if (buffer_dirty(bh))
1814                         dirty++;
1815                 if (bh->b_count)
1816                         used++, lastused = found;
1817                 bh = bh->b_next_free;
1818           } while (bh != lru_list[nlist]);
1819           printk("%8s: %d buffers, %d used (last=%d), "
1820                  "%d locked, %d protected, %d dirty\n",
1821                  buf_types[nlist], found, used, lastused,
1822                  locked, protected, dirty);
1823         };
1824 }
1825
1826
1827 /* ===================== Init ======================= */
1828
1829 /*
1830  * allocate the hash table and init the free list
1831  * Use gfp() for the hash table to decrease TLB misses, use
1832  * SLAB cache for buffer heads.
1833  */
1834 void __init buffer_init(unsigned long memory_size)
1835 {
1836         int order;
1837         unsigned int nr_hash;
1838
1839         /* we need to guess at the right sort of size for a buffer cache.
1840            the heuristic from working with large databases and getting
1841            fsync times (ext2) manageable, is the following */
1842
1843         memory_size >>= 22;
1844         for (order = 5; (1UL << order) < memory_size; order++);
1845
1846         /* try to allocate something until we get it or we're asking
1847            for something that is really too small */
1848
1849         do {
1850                 nr_hash = (1UL << order) * PAGE_SIZE /
1851                     sizeof(struct buffer_head *);
1852                 hash_table = (struct buffer_head **)
1853                     __get_free_pages(GFP_ATOMIC, order);
1854         } while (hash_table == NULL && --order > 4);
1855         printk("buffer-cache hash table entries: %d (order: %d, %ld bytes)\n", nr_hash, order, (1UL<<order) * PAGE_SIZE);
1856
1857         if (!hash_table)
1858                 panic("Failed to allocate buffer hash table\n");
1859         memset(hash_table, 0, nr_hash * sizeof(struct buffer_head *));
1860         bh_hash_mask = nr_hash-1;
1861
1862         bh_cachep = kmem_cache_create("buffer_head",
1863                                       sizeof(struct buffer_head),
1864                                       0,
1865                                       SLAB_HWCACHE_ALIGN, NULL, NULL);
1866         if(!bh_cachep)
1867                 panic("Cannot create buffer head SLAB cache\n");
1868         /*
1869          * Allocate the reserved buffer heads.
1870          */
1871         while (nr_buffer_heads < NR_RESERVED) {
1872                 struct buffer_head * bh;
1873
1874                 bh = kmem_cache_alloc(bh_cachep, SLAB_ATOMIC);
1875                 if (!bh)
1876                         break;
1877                 put_unused_buffer_head(bh);
1878                 nr_buffer_heads++;
1879         }
1880
1881         lru_list[BUF_CLEAN] = 0;
1882         grow_buffers(BLOCK_SIZE);
1883 }
1884
1885
1886 /* ====================== bdflush support =================== */
1887
1888 /* This is a simple kernel daemon, whose job it is to provide a dynamic
1889  * response to dirty buffers.  Once this process is activated, we write back
1890  * a limited number of buffers to the disks and then go back to sleep again.
1891  */
1892 static DECLARE_WAIT_QUEUE_HEAD(bdflush_wait);
1893 static DECLARE_WAIT_QUEUE_HEAD(bdflush_done);
1894 struct task_struct *bdflush_tsk = 0;
1895
1896 void wakeup_bdflush(int wait)
1897 {
1898         if (current == bdflush_tsk)
1899                 return;
1900         if (wait)
1901                 run_task_queue(&tq_disk);
1902         wake_up(&bdflush_wait);
1903         if (wait)
1904                 sleep_on(&bdflush_done);
1905 }
1906
1907
1908 /*
1909  * Here we attempt to write back old buffers.  We also try to flush inodes
1910  * and supers as well, since this function is essentially "update", and
1911  * otherwise there would be no way of ensuring that these quantities ever
1912  * get written back.  Ideally, we would have a timestamp on the inodes
1913  * and superblocks so that we could write back only the old ones as well
1914  */
1915
1916 static int sync_old_buffers(void)
1917 {
1918         int i;
1919         int ndirty, nwritten;
1920         int nlist;
1921         int ncount;
1922         struct buffer_head * bh, *next;
1923
1924         sync_supers(0);
1925         sync_inodes(0);
1926
1927         ncount = 0;
1928 #ifdef DEBUG
1929         for(nlist = 0; nlist < NR_LIST; nlist++)
1930 #else
1931         for(nlist = BUF_LOCKED; nlist <= BUF_DIRTY; nlist++)
1932 #endif
1933         {
1934                 ndirty = 0;
1935                 nwritten = 0;
1936         repeat:
1937
1938                 bh = lru_list[nlist];
1939                 if(bh)
1940                          for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) {
1941                                  /* We may have stalled while waiting for I/O to complete. */
1942                                  if(bh->b_list != nlist) goto repeat;
1943                                  next = bh->b_next_free;
1944                                  if(!lru_list[nlist]) {
1945                                          printk("Dirty list empty %d\n", i);
1946                                          break;
1947                                  }
1948
1949                                  /* Clean buffer on dirty list?  Refile it */
1950                                  if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh)) {
1951                                          refile_buffer(bh);
1952                                          continue;
1953                                  }
1954
1955                                   /* Unlocked buffer on locked list?  Refile it */
1956                                   if (nlist == BUF_LOCKED && !buffer_locked(bh)) {
1957                                           refile_buffer(bh);
1958                                           continue;
1959                                   }
1960
1961                                  if (buffer_locked(bh) || !buffer_dirty(bh))
1962                                           continue;
1963                                  ndirty++;
1964                                  if(time_before(jiffies, bh->b_flushtime))
1965                                         continue;
1966                                  nwritten++;
1967                                  next->b_count++;
1968                                  bh->b_count++;
1969                                  bh->b_flushtime = 0;
1970 #ifdef DEBUG
1971                                  if(nlist != BUF_DIRTY) ncount++;
1972 #endif
1973                                  ll_rw_block(WRITE, 1, &bh);
1974                                  bh->b_count--;
1975                                  next->b_count--;
1976                          }
1977         }
1978         run_task_queue(&tq_disk);
1979 #ifdef DEBUG
1980         if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
1981         printk("Wrote %d/%d buffers\n", nwritten, ndirty);
1982 #endif
1983         run_task_queue(&tq_disk);
1984         return 0;
1985 }
1986
1987
1988 /* This is the interface to bdflush.  As we get more sophisticated, we can
1989  * pass tuning parameters to this "process", to adjust how it behaves.
1990  * We would want to verify each parameter, however, to make sure that it
1991  * is reasonable. */
1992
1993 asmlinkage int sys_bdflush(int func, long data)
1994 {
1995         int i, error = -EPERM;
1996
1997         lock_kernel();
1998         if (!capable(CAP_SYS_ADMIN))
1999                 goto out;
2000
2001         if (func == 1) {
2002                  error = sync_old_buffers();
2003                  goto out;
2004         }
2005
2006         /* Basically func 1 means read param 1, 2 means write param 1, etc */
2007         if (func >= 2) {
2008                 i = (func-2) >> 1;
2009                 error = -EINVAL;
2010                 if (i < 0 || i >= N_PARAM)
2011                         goto out;
2012                 if((func & 1) == 0) {
2013                         error = put_user(bdf_prm.data[i], (int*)data);
2014                         goto out;
2015                 }
2016                 if (data < bdflush_min[i] || data > bdflush_max[i])
2017                         goto out;
2018                 bdf_prm.data[i] = data;
2019                 error = 0;
2020                 goto out;
2021         };
2022
2023         /* Having func 0 used to launch the actual bdflush and then never
2024          * return (unless explicitly killed). We return zero here to
2025          * remain semi-compatible with present update(8) programs.
2026          */
2027         error = 0;
2028 out:
2029         unlock_kernel();
2030         return error;
2031 }
2032
2033 /* This is the actual bdflush daemon itself. It used to be started from
2034  * the syscall above, but now we launch it ourselves internally with
2035  * kernel_thread(...)  directly after the first thread in init/main.c */
2036
2037 /* To prevent deadlocks for a loop device:
2038  * 1) Do non-blocking writes to loop (avoids deadlock with running
2039  *      out of request blocks).
2040  * 2) But do a blocking write if the only dirty buffers are loop buffers
2041  *      (otherwise we go into an infinite busy-loop).
2042  * 3) Quit writing loop blocks if a freelist went low (avoids deadlock
2043  *      with running out of free buffers for loop's "real" device).
2044 */
2045 int bdflush(void * unused)
2046 {
2047         int i;
2048         int ndirty;
2049         int nlist;
2050         int ncount;
2051         struct buffer_head * bh, *next;
2052         int major;
2053         int wrta_cmd = WRITEA;  /* non-blocking write for LOOP */
2054
2055         /*
2056          *      We have a bare-bones task_struct, and really should fill
2057          *      in a few more things so "top" and /proc/2/{exe,root,cwd}
2058          *      display semi-sane things. Not real crucial though...
2059          */
2060
2061         current->session = 1;
2062         current->pgrp = 1;
2063         sprintf(current->comm, "kflushd");
2064         bdflush_tsk = current;
2065
2066         /*
2067          *      As a kernel thread we want to tamper with system buffers
2068          *      and other internals and thus be subject to the SMP locking
2069          *      rules. (On a uniprocessor box this does nothing).
2070          */
2071         lock_kernel();
2072
2073         for (;;) {
2074 #ifdef DEBUG
2075                 printk("bdflush() activated...");
2076 #endif
2077
2078                 CHECK_EMERGENCY_SYNC
2079
2080                 ncount = 0;
2081 #ifdef DEBUG
2082                 for(nlist = 0; nlist < NR_LIST; nlist++)
2083 #else
2084                 for(nlist = BUF_LOCKED; nlist <= BUF_DIRTY; nlist++)
2085 #endif
2086                  {
2087                          ndirty = 0;
2088                  repeat:
2089
2090                          bh = lru_list[nlist];
2091                          if(bh)
2092                                   for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty;
2093                                        bh = next) {
2094                                           /* We may have stalled while waiting for I/O to complete. */
2095                                           if(bh->b_list != nlist) goto repeat;
2096                                           next = bh->b_next_free;
2097                                           if(!lru_list[nlist]) {
2098                                                   printk("Dirty list empty %d\n", i);
2099                                                   break;
2100                                           }
2101
2102                                           /* Clean buffer on dirty list?  Refile it */
2103                                           if (nlist == BUF_DIRTY && !buffer_dirty(bh)) {
2104                                                   refile_buffer(bh);
2105                                                   continue;
2106                                           }
2107
2108                                           /* Unlocked buffer on locked list?  Refile it */
2109                                           if (nlist == BUF_LOCKED && !buffer_locked(bh)) {
2110                                                   refile_buffer(bh);
2111                                                   continue;
2112                                           }
2113
2114                                           if (buffer_locked(bh) || !buffer_dirty(bh))
2115                                                    continue;
2116                                           major = MAJOR(bh->b_dev);
2117                                           /* Should we write back buffers that are shared or not??
2118                                              currently dirty buffers are not shared, so it does not matter */
2119                                           next->b_count++;
2120                                           bh->b_count++;
2121                                           ndirty++;
2122                                           bh->b_flushtime = 0;
2123                                           if (major == LOOP_MAJOR) {
2124                                                   ll_rw_block(wrta_cmd,1, &bh);
2125                                                   wrta_cmd = WRITEA;
2126                                                   if (buffer_dirty(bh))
2127                                                           --ndirty;
2128                                           }
2129                                           else
2130                                           ll_rw_block(WRITE, 1, &bh);
2131 #ifdef DEBUG
2132                                           if(nlist != BUF_DIRTY) ncount++;
2133 #endif
2134                                           bh->b_count--;
2135                                           next->b_count--;
2136                                           wake_up(&buffer_wait);
2137                                   }
2138                  }
2139 #ifdef DEBUG
2140                 if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount);
2141                 printk("sleeping again.\n");
2142 #endif
2143                 /* If we didn't write anything, but there are still
2144                  * dirty buffers, then make the next write to a
2145                  * loop device to be a blocking write.
2146                  * This lets us block--which we _must_ do! */
2147                 if (ndirty == 0 && nr_buffers_type[BUF_DIRTY] > 0 && wrta_cmd != WRITE) {
2148                         wrta_cmd = WRITE;
2149                         continue;
2150                 }
2151                 run_task_queue(&tq_disk);
2152                 wake_up(&bdflush_done);
2153
2154                 /* If there are still a lot of dirty buffers around, skip the sleep
2155                    and flush some more */
2156                 if(ndirty == 0 || nr_buffers_type[BUF_DIRTY] <= nr_buffers * bdf_prm.b_un.nfract/100) {
2157                         spin_lock_irq(&current->sigmask_lock);
2158                         flush_signals(current);
2159                         spin_unlock_irq(&current->sigmask_lock);
2160
2161                         interruptible_sleep_on(&bdflush_wait);
2162                 }
2163         }
2164 }