sys/vfs/hammer/hammer_flusher.c

   1 /*
   2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
   3  *
   4  * This code is derived from software contributed to The DragonFly Project
   5  * by Matthew Dillon <dillon@backplane.com>
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in
  15  *    the documentation and/or other materials provided with the
  16  *    distribution.
  17  * 3. Neither the name of The DragonFly Project nor the names of its
  18  *    contributors may be used to endorse or promote products derived
  19  *    from this software without specific, prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  *
  34  * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.22 2008/06/10 05:06:20 dillon Exp $
  35  */
  36 /*
  37  * HAMMER dependancy flusher thread
  38  *
  39  * Meta data updates create buffer dependancies which are arranged as a
  40  * hierarchy of lists.
  41  */
  42
  43 #include "hammer.h"
  44
  45 static void hammer_flusher_thread(void *arg);
  46 static void hammer_flusher_clean_loose_ios(hammer_mount_t hmp);
  47 static void hammer_flusher_flush(hammer_mount_t hmp);
  48 static void hammer_flusher_flush_inode(hammer_inode_t ip,
  49                                         hammer_transaction_t trans);
  50 static int hammer_must_finalize_undo(hammer_mount_t hmp);
  51 static void hammer_flusher_finalize(hammer_transaction_t trans, int final);
  52
  53 #define HAMMER_FLUSHER_IMMEDIATE        16
  54
  55 void
  56 hammer_flusher_sync(hammer_mount_t hmp)
  57 {
  58         int seq;
  59
  60         if (hmp->flusher_td) {
  61                 seq = hmp->flusher_next;
  62                 if (hmp->flusher_signal++ == 0)
  63                         wakeup(&hmp->flusher_signal);
  64                 while ((int)(seq - hmp->flusher_done) > 0)
  65                         tsleep(&hmp->flusher_done, 0, "hmrfls", 0);
  66         }
  67 }
  68
  69 void
  70 hammer_flusher_async(hammer_mount_t hmp)
  71 {
  72         if (hmp->flusher_td) {
  73                 if (hmp->flusher_signal++ == 0)
  74                         wakeup(&hmp->flusher_signal);
  75         }
  76 }
  77
  78 void
  79 hammer_flusher_create(hammer_mount_t hmp)
  80 {
  81         hmp->flusher_signal = 0;
  82         hmp->flusher_act = 0;
  83         hmp->flusher_done = 0;
  84         hmp->flusher_next = 1;
  85         lwkt_create(hammer_flusher_thread, hmp, &hmp->flusher_td, NULL,
  86                     0, -1, "hammer");
  87 }
  88
  89 void
  90 hammer_flusher_destroy(hammer_mount_t hmp)
  91 {
  92         if (hmp->flusher_td) {
  93                 hmp->flusher_exiting = 1;
  94                 while (hmp->flusher_td) {
  95                         ++hmp->flusher_signal;
  96                         wakeup(&hmp->flusher_signal);
  97                         tsleep(&hmp->flusher_exiting, 0, "hmrwex", 0);
  98                 }
  99         }
 100 }
 101
 102 static void
 103 hammer_flusher_thread(void *arg)
 104 {
 105         hammer_mount_t hmp = arg;
 106
 107         for (;;) {
 108                 while (hmp->flusher_lock)
 109                         tsleep(&hmp->flusher_lock, 0, "hmrhld", 0);
 110                 kprintf("S");
 111                 hmp->flusher_act = hmp->flusher_next;
 112                 ++hmp->flusher_next;
 113                 hammer_flusher_clean_loose_ios(hmp);
 114                 hammer_flusher_flush(hmp);
 115                 hammer_flusher_clean_loose_ios(hmp);
 116                 hmp->flusher_done = hmp->flusher_act;
 117
 118                 wakeup(&hmp->flusher_done);
 119
 120                 /*
 121                  * Wait for activity.
 122                  */
 123                 if (hmp->flusher_exiting && TAILQ_EMPTY(&hmp->flush_list))
 124                         break;
 125
 126                 /*
 127                  * This is a hack until we can dispose of frontend buffer
 128                  * cache buffers on the frontend.
 129                  */
 130                 while (hmp->flusher_signal == 0)
 131                         tsleep(&hmp->flusher_signal, 0, "hmrwwa", 0);
 132                 hmp->flusher_signal = 0;
 133         }
 134         hmp->flusher_td = NULL;
 135         wakeup(&hmp->flusher_exiting);
 136         lwkt_exit();
 137 }
 138
 139 static void
 140 hammer_flusher_clean_loose_ios(hammer_mount_t hmp)
 141 {
 142         hammer_buffer_t buffer;
 143         hammer_io_t io;
 144
 145         /*
 146          * loose ends - buffers without bp's aren't tracked by the kernel
 147          * and can build up, so clean them out.  This can occur when an
 148          * IO completes on a buffer with no references left.
 149          */
 150         while ((io = TAILQ_FIRST(&hmp->lose_list)) != NULL) {
 151                 KKASSERT(io->mod_list == &hmp->lose_list);
 152                 TAILQ_REMOVE(io->mod_list, io, mod_entry);
 153                 io->mod_list = NULL;
 154                 hammer_ref(&io->lock);
 155                 buffer = (void *)io;
 156                 hammer_rel_buffer(buffer, 0);
 157         }
 158 }
 159
 160 /*
 161  * Flush all inodes in the current flush group.
 162  */
 163 static void
 164 hammer_flusher_flush(hammer_mount_t hmp)
 165 {
 166         struct hammer_transaction trans;
 167         hammer_inode_t ip;
 168         hammer_reserve_t resv;
 169
 170         /*
 171          * Flush the inodes
 172          */
 173         hammer_start_transaction_fls(&trans, hmp);
 174         while ((ip = TAILQ_FIRST(&hmp->flush_list)) != NULL) {
 175                 if (ip->flush_group != hmp->flusher_act)
 176                         break;
 177                 TAILQ_REMOVE(&hmp->flush_list, ip, flush_entry);
 178                 hammer_flusher_flush_inode(ip, &trans);
 179         }
 180         hammer_flusher_finalize(&trans, 1);
 181         hmp->flusher_tid = trans.tid;
 182
 183         /*
 184          * Clean up any freed big-blocks (typically zone-2).
 185          * resv->flush_group is typically set several flush groups ahead
 186          * of the free to ensure that the freed block is not reused until
 187          * it can no longer be reused.
 188          */
 189         while ((resv = TAILQ_FIRST(&hmp->delay_list)) != NULL) {
 190                 if (resv->flush_group != hmp->flusher_act)
 191                         break;
 192                 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
 193                 hammer_blockmap_reserve_complete(hmp, resv);
 194         }
 195
 196
 197         hammer_done_transaction(&trans);
 198 }
 199
 200 /*
 201  * Flush a single inode that is part of a flush group.
 202  */
 203 static
 204 void
 205 hammer_flusher_flush_inode(hammer_inode_t ip, hammer_transaction_t trans)
 206 {
 207         hammer_mount_t hmp = ip->hmp;
 208
 209         /*hammer_lock_ex(&ip->lock);*/
 210         ip->error = hammer_sync_inode(ip);
 211         hammer_flush_inode_done(ip);
 212         /*hammer_unlock(&ip->lock);*/
 213
 214         if (hammer_must_finalize_undo(hmp)) {
 215                 kprintf("HAMMER: Warning: UNDO area too small!");
 216                 hammer_flusher_finalize(trans, 1);
 217         } else if (trans->hmp->locked_dirty_count +
 218                    trans->hmp->io_running_count > hammer_limit_dirtybufs) {
 219                 kprintf("t");
 220                 hammer_flusher_finalize(trans, 0);
 221         }
 222 }
 223
 224 /*
 225  * If the UNDO area gets over half full we have to flush it.  We can't
 226  * afford the UNDO area becoming completely full as that would break
 227  * the crash recovery atomicy.
 228  */
 229 static
 230 int
 231 hammer_must_finalize_undo(hammer_mount_t hmp)
 232 {
 233         if (hammer_undo_space(hmp) < hammer_undo_max(hmp) / 2) {
 234                 hkprintf("*");
 235                 return(1);
 236         } else {
 237                 return(0);
 238         }
 239 }
 240
 241 /*
 242  * Flush all pending UNDOs, wait for write completion, update the volume
 243  * header with the new UNDO end position, and flush it.  Then
 244  * asynchronously flush the meta-data.
 245  *
 246  * If this is the last finalization in a flush group we also synchronize
 247  * our cached blockmap and set hmp->flusher_undo_start and our cached undo
 248  * fifo first_offset so the next flush resets the FIFO pointers.
 249  */
 250 static
 251 void
 252 hammer_flusher_finalize(hammer_transaction_t trans, int final)
 253 {
 254         hammer_volume_t root_volume;
 255         hammer_blockmap_t cundomap, dundomap;
 256         hammer_mount_t hmp;
 257         hammer_io_t io;
 258         int count;
 259         int i;
 260
 261         hmp = trans->hmp;
 262         root_volume = trans->rootvol;
 263
 264         /*
 265          * Flush data buffers.  This can occur asynchronously and at any
 266          * time.  We must interlock against the frontend direct-data write
 267          * but do not have to acquire the sync-lock yet.
 268          */
 269         count = 0;
 270         while ((io = TAILQ_FIRST(&hmp->data_list)) != NULL) {
 271                 hammer_ref(&io->lock);
 272                 hammer_io_write_interlock(io);
 273                 KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
 274                 hammer_io_flush(io);
 275                 hammer_io_done_interlock(io);
 276                 hammer_rel_buffer((hammer_buffer_t)io, 0);
 277                 ++count;
 278         }
 279
 280         /*
 281          * The sync-lock is required for the remaining sequence.  This lock
 282          * prevents meta-data from being modified.
 283          */
 284         hammer_sync_lock_ex(trans);
 285
 286         /*
 287          * If we have been asked to finalize the volume header sync the
 288          * cached blockmap to the on-disk blockmap.  Generate an UNDO
 289          * record for the update.
 290          */
 291         if (final) {
 292                 cundomap = &hmp->blockmap[0];
 293                 dundomap = &root_volume->ondisk->vol0_blockmap[0];
 294                 if (root_volume->io.modified) {
 295                         hammer_modify_volume(trans, root_volume,
 296                                              dundomap, sizeof(hmp->blockmap));
 297                         for (i = 0; i < HAMMER_MAX_ZONES; ++i)
 298                                 hammer_crc_set_blockmap(&cundomap[i]);
 299                         bcopy(cundomap, dundomap, sizeof(hmp->blockmap));
 300                         hammer_modify_volume_done(root_volume);
 301                 }
 302         }
 303
 304         /*
 305          * Flush UNDOs
 306          */
 307         count = 0;
 308         while ((io = TAILQ_FIRST(&hmp->undo_list)) != NULL) {
 309                 KKASSERT(io->modify_refs == 0);
 310                 hammer_ref(&io->lock);
 311                 KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
 312                 hammer_io_flush(io);
 313                 hammer_rel_buffer((hammer_buffer_t)io, 0);
 314                 ++count;
 315         }
 316
 317         /*
 318          * Wait for I/Os to complete
 319          */
 320         crit_enter();
 321         while (hmp->io_running_count)
 322                 tsleep(&hmp->io_running_count, 0, "hmrfl1", 0);
 323         crit_exit();
 324
 325         /*
 326          * Update the on-disk volume header with new UNDO FIFO end position
 327          * (do not generate new UNDO records for this change).  We have to
 328          * do this for the UNDO FIFO whether (final) is set or not.
 329          *
 330          * Also update the on-disk next_tid field.  This does not require
 331          * an UNDO.  However, because our TID is generated before we get
 332          * the sync lock another sync may have beat us to the punch.
 333          *
 334          * The volume header will be flushed out synchronously.
 335          */
 336         dundomap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
 337         cundomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
 338
 339         if (dundomap->first_offset != cundomap->first_offset ||
 340             dundomap->next_offset != cundomap->next_offset) {
 341                 hammer_modify_volume(NULL, root_volume, NULL, 0);
 342                 dundomap->first_offset = cundomap->first_offset;
 343                 dundomap->next_offset = cundomap->next_offset;
 344                 hammer_crc_set_blockmap(dundomap);
 345                 hammer_crc_set_volume(root_volume->ondisk);
 346                 if (root_volume->ondisk->vol0_next_tid < trans->tid)
 347                         root_volume->ondisk->vol0_next_tid = trans->tid;
 348                 hammer_modify_volume_done(root_volume);
 349         }
 350
 351         if (root_volume->io.modified) {
 352                 hammer_io_flush(&root_volume->io);
 353         }
 354
 355         /*
 356          * Wait for I/Os to complete
 357          */
 358         crit_enter();
 359         while (hmp->io_running_count)
 360                 tsleep(&hmp->io_running_count, 0, "hmrfl2", 0);
 361         crit_exit();
 362
 363         /*
 364          * Flush meta-data.  The meta-data will be undone if we crash
 365          * so we can safely flush it asynchronously.
 366          *
 367          * Repeated catchups will wind up flushing this update's meta-data
 368          * and the UNDO buffers for the next update simultaniously.  This
 369          * is ok.
 370          */
 371         count = 0;
 372         while ((io = TAILQ_FIRST(&hmp->meta_list)) != NULL) {
 373                 KKASSERT(io->modify_refs == 0);
 374                 hammer_ref(&io->lock);
 375                 KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
 376                 hammer_io_flush(io);
 377                 hammer_rel_buffer((hammer_buffer_t)io, 0);
 378                 ++count;
 379         }
 380
 381         /*
 382          * If this is the final finalization for the flush group set
 383          * up for the next sequence by setting a new first_offset in
 384          * our cached blockmap and
 385          * clearing the undo history.
 386          */
 387         if (final) {
 388                 cundomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
 389                 cundomap->first_offset = cundomap->next_offset;
 390                 hammer_clear_undo_history(hmp);
 391         }
 392
 393         hammer_sync_unlock(trans);
 394 }
 395