sys/vfs/hammer/hammer_io.c

   1 /*
   2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
   3  *
   4  * This code is derived from software contributed to The DragonFly Project
   5  * by Matthew Dillon <dillon@backplane.com>
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in
  15  *    the documentation and/or other materials provided with the
  16  *    distribution.
  17  * 3. Neither the name of The DragonFly Project nor the names of its
  18  *    contributors may be used to endorse or promote products derived
  19  *    from this software without specific, prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  *
  34  * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.11 2008/01/01 01:00:03 dillon Exp $
  35  */
  36 /*
  37  * IO Primitives and buffer cache management
  38  *
  39  * All major data-tracking structures in HAMMER contain a struct hammer_io
  40  * which is used to manage their backing store.  We use filesystem buffers
  41  * for backing store and we leave them passively associated with their
  42  * HAMMER structures.
  43  *
  44  * If the kernel tries to release a passively associated buf which we cannot
  45  * yet let go we set B_LOCKED in the buffer and then actively released it
  46  * later when we can.
  47  */
  48
  49 #include "hammer.h"
  50 #include <sys/fcntl.h>
  51 #include <sys/nlookup.h>
  52 #include <sys/buf.h>
  53 #include <sys/buf2.h>
  54
  55 /*
  56  * Helper routine to disassociate a buffer cache buffer from an I/O
  57  * structure.
  58  */
  59 static void
  60 hammer_io_disassociate(union hammer_io_structure *io)
  61 {
  62         struct buf *bp = io->io.bp;
  63
  64         KKASSERT(io->io.released && io->io.modified == 0);
  65         LIST_INIT(&bp->b_dep);  /* clear the association */
  66         bp->b_ops = NULL;
  67         io->io.bp = NULL;
  68         bp->b_flags &= ~B_LOCKED;
  69
  70         switch(io->io.type) {
  71         case HAMMER_STRUCTURE_VOLUME:
  72                 io->volume.ondisk = NULL;
  73                 io->volume.alist.meta = NULL;
  74                 break;
  75         case HAMMER_STRUCTURE_SUPERCL:
  76                 io->supercl.ondisk = NULL;
  77                 io->supercl.alist.meta = NULL;
  78                 break;
  79         case HAMMER_STRUCTURE_CLUSTER:
  80                 io->cluster.ondisk = NULL;
  81                 io->cluster.alist_master.meta = NULL;
  82                 io->cluster.alist_btree.meta = NULL;
  83                 io->cluster.alist_record.meta = NULL;
  84                 io->cluster.alist_mdata.meta = NULL;
  85                 break;
  86         case HAMMER_STRUCTURE_BUFFER:
  87                 io->buffer.ondisk = NULL;
  88                 io->buffer.alist.meta = NULL;
  89                 break;
  90         }
  91 }
  92
  93 /*
  94  * Mark a cluster as being closed.  This is done as late as possible,
  95  * only when we are asked to flush the cluster
  96  */
  97 static void
  98 hammer_close_cluster(hammer_cluster_t cluster)
  99 {
 100         while (cluster->state == HAMMER_CLUSTER_ASYNC)
 101                 tsleep(cluster, 0, "hmrdep", 0);
 102         if (cluster->state == HAMMER_CLUSTER_OPEN) {
 103                 cluster->state = HAMMER_CLUSTER_IDLE;
 104                 hammer_modify_cluster(cluster);
 105                 cluster->ondisk->clu_flags &= ~HAMMER_CLUF_OPEN;
 106                 hammer_modify_cluster_done(cluster);
 107                 kprintf("CLOSE CLUSTER\n");
 108         }
 109 }
 110
 111
 112 /*
 113  * Load bp for a HAMMER structure.
 114  */
 115 int
 116 hammer_io_read(struct vnode *devvp, struct hammer_io *io)
 117 {
 118         struct buf *bp;
 119         int error;
 120
 121         if ((bp = io->bp) == NULL) {
 122                 error = bread(devvp, io->offset, HAMMER_BUFSIZE, &io->bp);
 123                 if (error == 0) {
 124                         bp = io->bp;
 125                         bp->b_ops = &hammer_bioops;
 126                         LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node);
 127                         BUF_KERNPROC(bp);
 128                 }
 129                 io->modified = 0;       /* no new modifications yet */
 130                 io->released = 0;       /* we hold an active lock on bp */
 131         } else {
 132                 error = 0;
 133         }
 134         return(error);
 135 }
 136
 137 /*
 138  * Similar to hammer_io_read() but returns a zero'd out buffer instead.
 139  * vfs_bio_clrbuf() is kinda nasty, enforce serialization against background
 140  * I/O so we can call it.
 141  */
 142 int
 143 hammer_io_new(struct vnode *devvp, struct hammer_io *io)
 144 {
 145         struct buf *bp;
 146
 147         if ((bp = io->bp) == NULL) {
 148                 io->bp = getblk(devvp, io->offset, HAMMER_BUFSIZE, 0, 0);
 149                 bp = io->bp;
 150                 bp->b_ops = &hammer_bioops;
 151                 LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node);
 152                 io->released = 0;       /* we hold an active lock on bp */
 153                 BUF_KERNPROC(bp);
 154         } else {
 155                 if (io->released) {
 156                         regetblk(bp);
 157                         BUF_KERNPROC(bp);
 158                         io->released = 0;
 159                 }
 160         }
 161         io->modified = 1;
 162         vfs_bio_clrbuf(bp);
 163         return(0);
 164 }
 165
 166 /*
 167  * This routine is called when a buffer within a cluster is modified.  We
 168  * mark the cluster open and immediately initiate asynchronous I/O.  Any
 169  * related hammer_buffer write I/O blocks until our async write completes.
 170  * This guarentees (inasmuch as the OS can) that the cluster recovery code
 171  * will see a cluster marked open if a crash occured while the filesystem
 172  * still had dirty buffers associated with that cluster.
 173  *
 174  * XXX
 175  */
 176 void
 177 hammer_io_notify_cluster(hammer_cluster_t cluster)
 178 {
 179         struct hammer_io *io = &cluster->io;
 180
 181         if (cluster->state == HAMMER_CLUSTER_IDLE) {
 182                 hammer_lock_ex(&cluster->io.lock);
 183                 if (cluster->state == HAMMER_CLUSTER_IDLE) {
 184                         if (io->released)
 185                                 regetblk(io->bp);
 186                         else
 187                                 io->released = 1;
 188                         kprintf("MARK CLUSTER OPEN\n");
 189                         cluster->ondisk->clu_flags |= HAMMER_CLUF_OPEN;
 190                         cluster->state = HAMMER_CLUSTER_ASYNC;
 191                         cluster->io.modified = 1;
 192                         bawrite(io->bp);
 193                 }
 194                 hammer_unlock(&cluster->io.lock);
 195         }
 196 }
 197
 198 /*
 199  * This routine is called on the last reference to a hammer structure.
 200  * Regardless of the state io->modified must be cleared when we return.
 201  *
 202  * If flush is non-zero we have to completely disassociate the bp from the
 203  * structure (which may involve blocking).  Otherwise we can leave the bp
 204  * passively associated with the structure.
 205  *
 206  * The caller is holding io->lock exclusively.
 207  */
 208 void
 209 hammer_io_release(struct hammer_io *io, int flush)
 210 {
 211         union hammer_io_structure *iou = (void *)io;
 212         hammer_cluster_t cluster;
 213         struct buf *bp;
 214         int modified;
 215
 216         if ((bp = io->bp) != NULL) {
 217                 /*
 218                  * If neither we nor the kernel want to flush the bp, we can
 219                  * stop here.  Make sure the bp is passively released
 220                  * before returning.  Even though we are still holding it,
 221                  * we want to be notified when the kernel wishes to flush
 222                  * it out so make sure B_DELWRI is properly set if we had
 223                  * made modifications.
 224                  */
 225                 if (flush == 0 && (bp->b_flags & B_LOCKED) == 0) {
 226                         if ((bp->b_flags & B_DELWRI) == 0 && io->modified) {
 227                                 if (io->released)
 228                                         regetblk(bp);
 229                                 else
 230                                         io->released = 1;
 231                                 io->modified = 0;
 232                                 bdwrite(bp);
 233                         } else if (io->released == 0) {
 234                                 /* buffer write state already synchronized */
 235                                 io->modified = 0;
 236                                 io->released = 1;
 237                                 bqrelse(bp);
 238                         } else {
 239                                 /* buffer write state already synchronized */
 240                                 io->modified = 0;
 241                         }
 242                         return;
 243                 }
 244
 245                 /*
 246                  * Either we want to flush the buffer or the kernel tried.
 247                  *
 248                  * If this is a hammer_buffer we may have to wait for the
 249                  * cluster header write to complete.
 250                  */
 251                 if (iou->io.type == HAMMER_STRUCTURE_BUFFER &&
 252                     (io->modified || (bp->b_flags & B_DELWRI))) {
 253                         cluster = iou->buffer.cluster;
 254                         while (cluster->state == HAMMER_CLUSTER_ASYNC)
 255                                 tsleep(iou->buffer.cluster, 0, "hmrdep", 0);
 256                 }
 257
 258                 /*
 259                  * If we have an open cluster header, close it
 260                  */
 261                 if (iou->io.type == HAMMER_STRUCTURE_CLUSTER) {
 262                         hammer_close_cluster(&iou->cluster);
 263                 }
 264
 265                 /*
 266                  * Gain ownership of the buffer.  Nothing can take it away
 267                  * from the io structure while we have it locked, so we
 268                  * can safely reget.
 269                  *
 270                  * Once our thread owns the buffer we can disassociate it
 271                  * from the io structure.
 272                  */
 273                 if (io->released)
 274                         regetblk(bp);
 275                 else
 276                         io->released = 1;
 277                 modified = io->modified;
 278                 io->modified = 0;
 279                 hammer_io_disassociate(iou);
 280
 281                 /*
 282                  * Now dispose of the buffer.  Someone tried to flush, so
 283                  * issue the I/O immediately.
 284                  */
 285                 if (modified || (bp->b_flags & B_DELWRI))
 286                         bawrite(bp);
 287                 else
 288                         bqrelse(bp);
 289         }
 290 }
 291
 292 /*
 293  * Flush dirty data, if any.
 294  */
 295 void
 296 hammer_io_flush(struct hammer_io *io, struct hammer_sync_info *info)
 297 {
 298         struct buf *bp;
 299         int error;
 300
 301 again:
 302         if ((bp = io->bp) == NULL)
 303                 return;
 304         if (bp->b_flags & B_DELWRI)
 305                 io->modified = 1;
 306
 307         /*
 308          * We can't initiate a write while the buffer is being modified
 309          * by someone.
 310          */
 311         while (io->lock.modifying) {
 312                 io->lock.wanted = 1;
 313                 kprintf("DELAYING IO FLUSH BP %p TYPE %d REFS %d modifying %d\n",
 314                         bp, io->type, io->lock.refs, io->lock.modifying);
 315                 tsleep(&io->lock, 0, "hmrfls", 0);
 316         }
 317         hammer_lock_ex(&io->lock);
 318         if (io->lock.modifying || io->bp == NULL) {
 319                 hammer_unlock(&io->lock);
 320                 goto again;
 321         }
 322
 323         /*
 324          * Acquire ownership of the buffer cache buffer so we can flush it
 325          * out.
 326          */
 327         if (io->released) {
 328                 if (io->modified == 0)
 329                         goto done;
 330                 regetblk(bp);
 331         } else {
 332                 io->released = 1;
 333         }
 334
 335         /*
 336          * Return the bp to the system, issuing I/O if necessary.  The
 337          * system will issue a callback to us when it actually wants to
 338          * throw the bp away.
 339          */
 340         if (io->modified == 0) {
 341                 bqrelse(bp);
 342         } else if (info->waitfor & MNT_WAIT) {
 343                 io->modified = 0;
 344                 error = bwrite(bp);
 345                 if (error)
 346                         info->error = error;
 347         } else {
 348                 io->modified = 0;
 349                 bawrite(bp);
 350         }
 351 done:
 352         hammer_unlock(&io->lock);
 353 }
 354
 355 /*
 356  * Called prior to any modifications being made to ondisk data.  This
 357  * forces the caller to wait for any writes to complete.  We explicitly
 358  * avoid the write-modify race.
 359  *
 360  * This routine is only called on hammer structures which are already
 361  * actively referenced.
 362  */
 363 void
 364 hammer_io_intend_modify(struct hammer_io *io)
 365 {
 366         KKASSERT(io->lock.refs != 0 && io->bp != NULL);
 367         if (io->released) {
 368                 hammer_lock_ex(&io->lock);
 369                 if (io->released) {
 370                         regetblk(io->bp);
 371                         BUF_KERNPROC(io->bp);
 372                         io->released = 0;
 373                 }
 374                 hammer_unlock(&io->lock);
 375         }
 376 }
 377
 378 void
 379 hammer_io_modify_done(struct hammer_io *io)
 380 {
 381         KKASSERT(io->lock.modifying > 0);
 382         --io->lock.modifying;
 383         if (io->lock.wanted && io->lock.modifying == 0) {
 384                 io->lock.wanted = 0;
 385                 wakeup(&io->lock);
 386         }
 387 }
 388
 389 /*
 390  * HAMMER_BIOOPS
 391  */
 392
 393 /*
 394  * Pre and post I/O callbacks.
 395  */
 396 static void hammer_io_deallocate(struct buf *bp);
 397
 398 static void
 399 hammer_io_start(struct buf *bp)
 400 {
 401 #if 0
 402         union hammer_io_structure *io = (void *)LIST_FIRST(&bp->b_dep);
 403
 404         if (io->io.type == HAMMER_STRUCTURE_BUFFER) {
 405                 while (io->buffer.cluster->io_in_progress) {
 406                         kprintf("hammer_io_start: wait for cluster\n");
 407                         tsleep(io->buffer.cluster, 0, "hmrdep", 0);
 408                         kprintf("hammer_io_start: wait for cluster done\n");
 409                 }
 410         }
 411 #endif
 412 }
 413
 414 static void
 415 hammer_io_complete(struct buf *bp)
 416 {
 417         union hammer_io_structure *io = (void *)LIST_FIRST(&bp->b_dep);
 418
 419         if (io->io.type == HAMMER_STRUCTURE_CLUSTER) {
 420                 if (io->cluster.state == HAMMER_CLUSTER_ASYNC) {
 421                         io->cluster.state = HAMMER_CLUSTER_OPEN;
 422                         wakeup(&io->cluster);
 423                 }
 424         }
 425 }
 426
 427 /*
 428  * Callback from kernel when it wishes to deallocate a passively
 429  * associated structure.  This can only occur if the buffer is
 430  * passively associated with the structure.  The kernel has locked
 431  * the buffer.
 432  *
 433  * If we cannot disassociate we set B_LOCKED to prevent the buffer
 434  * from getting reused.
 435  */
 436 static void
 437 hammer_io_deallocate(struct buf *bp)
 438 {
 439         union hammer_io_structure *io = (void *)LIST_FIRST(&bp->b_dep);
 440
 441         /* XXX memory interlock, spinlock to sync cpus */
 442
 443         /*
 444          * Since the kernel is passing us a locked buffer, the HAMMER
 445          * structure had better not believe it has a lock on the buffer.
 446          */
 447         KKASSERT(io->io.released);
 448         crit_enter();
 449
 450         /*
 451          * First, ref the structure to prevent either the buffer or the
 452          * structure from going away or being unexpectedly flushed.
 453          */
 454         hammer_ref(&io->io.lock);
 455
 456         /*
 457          * Buffers can have active references from cached hammer_node's,
 458          * even if those nodes are themselves passively cached.  Attempt
 459          * to clean them out.  This may not succeed.
 460          *
 461          * We have to do some magic with io.released because
 462          * hammer_io_intend_modify() can be called indirectly from the
 463          * flush code, otherwise we might panic with a recursive bp lock.
 464          */
 465         if (io->io.type == HAMMER_STRUCTURE_BUFFER &&
 466             hammer_lock_ex_try(&io->io.lock) == 0) {
 467                 io->io.released = 0;
 468                 hammer_flush_buffer_nodes(&io->buffer);
 469                 KKASSERT(io->io.released == 0);
 470                 io->io.released = 1;
 471                 hammer_unlock(&io->io.lock);
 472         }
 473
 474         if (hammer_islastref(&io->io.lock)) {
 475                 /*
 476                  * If we are the only ref left we can disassociate the I/O.
 477                  * It had better still be in a released state because the
 478                  * kernel is holding a lock on the buffer.  Any passive
 479                  * modifications should have already been synchronized with
 480                  * the buffer.
 481                  */
 482                 KKASSERT(io->io.modified == 0);
 483                 hammer_io_disassociate(io);
 484
 485                 /*
 486                  * Perform final rights on the structure.  This can cause
 487                  * a chain reaction - e.g. last buffer -> last cluster ->
 488                  * last supercluster -> last volume.
 489                  */
 490                 switch(io->io.type) {
 491                 case HAMMER_STRUCTURE_VOLUME:
 492                         hammer_rel_volume(&io->volume, 1);
 493                         break;
 494                 case HAMMER_STRUCTURE_SUPERCL:
 495                         hammer_rel_supercl(&io->supercl, 1);
 496                         break;
 497                 case HAMMER_STRUCTURE_CLUSTER:
 498                         hammer_rel_cluster(&io->cluster, 1);
 499                         break;
 500                 case HAMMER_STRUCTURE_BUFFER:
 501                         hammer_rel_buffer(&io->buffer, 1);
 502                         break;
 503                 }
 504         } else {
 505                 /*
 506                  * Otherwise tell the kernel not to destroy the buffer.
 507                  *
 508                  * We have to unref the structure without performing any
 509                  * final rights to it to avoid a deadlock.
 510                  */
 511                 bp->b_flags |= B_LOCKED;
 512                 hammer_unref(&io->io.lock);
 513         }
 514         crit_exit();
 515 }
 516
 517 static int
 518 hammer_io_fsync(struct vnode *vp)
 519 {
 520         return(0);
 521 }
 522
 523 /*
 524  * NOTE: will not be called unless we tell the kernel about the
 525  * bioops.  Unused... we use the mount's VFS_SYNC instead.
 526  */
 527 static int
 528 hammer_io_sync(struct mount *mp)
 529 {
 530         return(0);
 531 }
 532
 533 static void
 534 hammer_io_movedeps(struct buf *bp1, struct buf *bp2)
 535 {
 536 }
 537
 538 /*
 539  * I/O pre-check for reading and writing.  HAMMER only uses this for
 540  * B_CACHE buffers so checkread just shouldn't happen, but if it does
 541  * allow it.
 542  *
 543  * Writing is a different case.  We don't want the kernel to try to write
 544  * out a buffer that HAMMER may be modifying passively or which has a
 545  * dependancy.
 546  *
 547  * This code enforces the following write ordering: buffers, then cluster
 548  * headers, then volume headers.
 549  */
 550 static int
 551 hammer_io_checkread(struct buf *bp)
 552 {
 553         return(0);
 554 }
 555
 556 static int
 557 hammer_io_checkwrite(struct buf *bp)
 558 {
 559         union hammer_io_structure *iou = (void *)LIST_FIRST(&bp->b_dep);
 560
 561         if (iou->io.type == HAMMER_STRUCTURE_BUFFER &&
 562             iou->buffer.cluster->state == HAMMER_CLUSTER_ASYNC) {
 563                 /*
 564                  * Cannot write out a cluster buffer if the cluster header
 565                  * I/O opening the cluster has not completed.
 566                  */
 567                 bp->b_flags |= B_LOCKED;
 568                 return(-1);
 569         } else if (iou->io.lock.refs) {
 570                 /*
 571                  * Cannot write out a bp if its associated buffer has active
 572                  * references.
 573                  */
 574                 bp->b_flags |= B_LOCKED;
 575                 return(-1);
 576         } else {
 577                 /*
 578                  * We're good, but before we can let the kernel proceed we
 579                  * may have to make some adjustments.
 580                  *
 581                  * Since there are no refs on the io structure, HAMMER must
 582                  * have already synchronized its modify state with the bp
 583                  * so iou->io.modified should be 0.
 584                  */
 585                 if (iou->io.type == HAMMER_STRUCTURE_CLUSTER)
 586                         hammer_close_cluster(&iou->cluster);
 587                 hammer_io_disassociate(iou);
 588                 return(0);
 589         }
 590 }
 591
 592 /*
 593  * Return non-zero if the caller should flush the structure associated
 594  * with this io sub-structure.
 595  */
 596 int
 597 hammer_io_checkflush(struct hammer_io *io)
 598 {
 599         if (io->bp == NULL || (io->bp->b_flags & B_LOCKED))
 600                 return(1);
 601         return(0);
 602 }
 603
 604 /*
 605  * Return non-zero if we wish to delay the kernel's attempt to flush
 606  * this buffer to disk.
 607  */
 608 static int
 609 hammer_io_countdeps(struct buf *bp, int n)
 610 {
 611         return(0);
 612 }
 613
 614 struct bio_ops hammer_bioops = {
 615         .io_start       = hammer_io_start,
 616         .io_complete    = hammer_io_complete,
 617         .io_deallocate  = hammer_io_deallocate,
 618         .io_fsync       = hammer_io_fsync,
 619         .io_sync        = hammer_io_sync,
 620         .io_movedeps    = hammer_io_movedeps,
 621         .io_countdeps   = hammer_io_countdeps,
 622         .io_checkread   = hammer_io_checkread,
 623         .io_checkwrite  = hammer_io_checkwrite,
 624 };
 625