sys/vfs/hammer/hammer_inode.c

   1 /*
   2  * Copyright (c) 2007-2008 The DragonFly Project.  All rights reserved.
   3  *
   4  * This code is derived from software contributed to The DragonFly Project
   5  * by Matthew Dillon <dillon@backplane.com>
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in
  15  *    the documentation and/or other materials provided with the
  16  *    distribution.
  17  * 3. Neither the name of The DragonFly Project nor the names of its
  18  *    contributors may be used to endorse or promote products derived
  19  *    from this software without specific, prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  *
  34  * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.68 2008/06/10 05:06:20 dillon Exp $
  35  */
  36
  37 #include "hammer.h"
  38 #include <vm/vm_extern.h>
  39 #include <sys/buf.h>
  40 #include <sys/buf2.h>
  41
  42 static int hammer_unload_inode(struct hammer_inode *ip);
  43 static void hammer_flush_inode_core(hammer_inode_t ip, int flags);
  44 static int hammer_setup_child_callback(hammer_record_t rec, void *data);
  45 static int hammer_setup_parent_inodes(hammer_record_t record);
  46
  47 #ifdef DEBUG_TRUNCATE
  48 extern struct hammer_inode *HammerTruncIp;
  49 #endif
  50
  51 /*
  52  * The kernel is not actively referencing this vnode but is still holding
  53  * it cached.
  54  *
  55  * This is called from the frontend.
  56  */
  57 int
  58 hammer_vop_inactive(struct vop_inactive_args *ap)
  59 {
  60         struct hammer_inode *ip = VTOI(ap->a_vp);
  61
  62         /*
  63          * Degenerate case
  64          */
  65         if (ip == NULL) {
  66                 vrecycle(ap->a_vp);
  67                 return(0);
  68         }
  69
  70         /*
  71          * If the inode no longer has visibility in the filesystem and is
  72          * fairly clean, try to recycle it immediately.  This can deadlock
  73          * in vfsync() if we aren't careful.
  74          *
  75          * Do not queue the inode to the flusher if we still have visibility,
  76          * otherwise namespace calls such as chmod will unnecessarily generate
  77          * multiple inode updates.
  78          */
  79         hammer_inode_unloadable_check(ip, 0);
  80         if (ip->ino_data.nlinks == 0) {
  81                 if (ip->flags & HAMMER_INODE_MODMASK)
  82                         hammer_flush_inode(ip, 0);
  83                 else
  84                         vrecycle(ap->a_vp);
  85         }
  86         return(0);
  87 }
  88
  89 /*
  90  * Release the vnode association.  This is typically (but not always)
  91  * the last reference on the inode.
  92  *
  93  * Once the association is lost we are on our own with regards to
  94  * flushing the inode.
  95  */
  96 int
  97 hammer_vop_reclaim(struct vop_reclaim_args *ap)
  98 {
  99         struct hammer_inode *ip;
 100         struct vnode *vp;
 101
 102         vp = ap->a_vp;
 103
 104         if ((ip = vp->v_data) != NULL) {
 105                 vp->v_data = NULL;
 106                 ip->vp = NULL;
 107                 if ((ip->flags & HAMMER_INODE_RECLAIM) == 0) {
 108                         ++hammer_count_reclaiming;
 109                         ++ip->hmp->inode_reclaims;
 110                         ip->flags |= HAMMER_INODE_RECLAIM;
 111                 }
 112                 hammer_rel_inode(ip, 1);
 113         }
 114         return(0);
 115 }
 116
 117 /*
 118  * Return a locked vnode for the specified inode.  The inode must be
 119  * referenced but NOT LOCKED on entry and will remain referenced on
 120  * return.
 121  *
 122  * Called from the frontend.
 123  */
 124 int
 125 hammer_get_vnode(struct hammer_inode *ip, struct vnode **vpp)
 126 {
 127         hammer_mount_t hmp;
 128         struct vnode *vp;
 129         int error = 0;
 130
 131         hmp = ip->hmp;
 132
 133         for (;;) {
 134                 if ((vp = ip->vp) == NULL) {
 135                         error = getnewvnode(VT_HAMMER, hmp->mp, vpp, 0, 0);
 136                         if (error)
 137                                 break;
 138                         hammer_lock_ex(&ip->lock);
 139                         if (ip->vp != NULL) {
 140                                 hammer_unlock(&ip->lock);
 141                                 vp->v_type = VBAD;
 142                                 vx_put(vp);
 143                                 continue;
 144                         }
 145                         hammer_ref(&ip->lock);
 146                         vp = *vpp;
 147                         ip->vp = vp;
 148                         vp->v_type =
 149                                 hammer_get_vnode_type(ip->ino_data.obj_type);
 150
 151                         if (ip->flags & HAMMER_INODE_RECLAIM) {
 152                                 --hammer_count_reclaiming;
 153                                 --hmp->inode_reclaims;
 154                                 ip->flags &= ~HAMMER_INODE_RECLAIM;
 155                                 if (hmp->flags & HAMMER_MOUNT_WAITIMAX)
 156                                         hammer_inode_wakereclaims(hmp);
 157                         }
 158
 159                         switch(ip->ino_data.obj_type) {
 160                         case HAMMER_OBJTYPE_CDEV:
 161                         case HAMMER_OBJTYPE_BDEV:
 162                                 vp->v_ops = &hmp->mp->mnt_vn_spec_ops;
 163                                 addaliasu(vp, ip->ino_data.rmajor,
 164                                           ip->ino_data.rminor);
 165                                 break;
 166                         case HAMMER_OBJTYPE_FIFO:
 167                                 vp->v_ops = &hmp->mp->mnt_vn_fifo_ops;
 168                                 break;
 169                         default:
 170                                 break;
 171                         }
 172
 173                         /*
 174                          * Only mark as the root vnode if the ip is not
 175                          * historical, otherwise the VFS cache will get
 176                          * confused.  The other half of the special handling
 177                          * is in hammer_vop_nlookupdotdot().
 178                          */
 179                         if (ip->obj_id == HAMMER_OBJID_ROOT &&
 180                             ip->obj_asof == hmp->asof) {
 181                                 vp->v_flag |= VROOT;
 182                         }
 183
 184                         vp->v_data = (void *)ip;
 185                         /* vnode locked by getnewvnode() */
 186                         /* make related vnode dirty if inode dirty? */
 187                         hammer_unlock(&ip->lock);
 188                         if (vp->v_type == VREG)
 189                                 vinitvmio(vp, ip->ino_data.size);
 190                         break;
 191                 }
 192
 193                 /*
 194                  * loop if the vget fails (aka races), or if the vp
 195                  * no longer matches ip->vp.
 196                  */
 197                 if (vget(vp, LK_EXCLUSIVE) == 0) {
 198                         if (vp == ip->vp)
 199                                 break;
 200                         vput(vp);
 201                 }
 202         }
 203         *vpp = vp;
 204         return(error);
 205 }
 206
 207 /*
 208  * Acquire a HAMMER inode.  The returned inode is not locked.  These functions
 209  * do not attach or detach the related vnode (use hammer_get_vnode() for
 210  * that).
 211  *
 212  * The flags argument is only applied for newly created inodes, and only
 213  * certain flags are inherited.
 214  *
 215  * Called from the frontend.
 216  */
 217 struct hammer_inode *
 218 hammer_get_inode(hammer_transaction_t trans, struct hammer_node **cache,
 219                  u_int64_t obj_id, hammer_tid_t asof, int flags, int *errorp)
 220 {
 221         hammer_mount_t hmp = trans->hmp;
 222         struct hammer_inode_info iinfo;
 223         struct hammer_cursor cursor;
 224         struct hammer_inode *ip;
 225
 226         /*
 227          * Determine if we already have an inode cached.  If we do then
 228          * we are golden.
 229          */
 230         iinfo.obj_id = obj_id;
 231         iinfo.obj_asof = asof;
 232 loop:
 233         ip = hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp->rb_inos_root, &iinfo);
 234         if (ip) {
 235                 hammer_ref(&ip->lock);
 236                 *errorp = 0;
 237                 return(ip);
 238         }
 239
 240         /*
 241          * Impose a slow-down if HAMMER is heavily backlogged on cleaning
 242          * out reclaimed inodes.
 243          */
 244         if (hmp->inode_reclaims > HAMMER_RECLAIM_MIN &&
 245             curthread != hmp->flusher_td) {
 246                 hammer_inode_waitreclaims(hmp);
 247         }
 248
 249         /*
 250          * Allocate a new inode structure and deal with races later.
 251          */
 252         ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
 253         ++hammer_count_inodes;
 254         ++hmp->count_inodes;
 255         ip->obj_id = obj_id;
 256         ip->obj_asof = iinfo.obj_asof;
 257         ip->hmp = hmp;
 258         ip->flags = flags & HAMMER_INODE_RO;
 259         if (hmp->ronly)
 260                 ip->flags |= HAMMER_INODE_RO;
 261         ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
 262         RB_INIT(&ip->rec_tree);
 263         TAILQ_INIT(&ip->target_list);
 264
 265         /*
 266          * Locate the on-disk inode.
 267          */
 268 retry:
 269         hammer_init_cursor(trans, &cursor, cache, NULL);
 270         cursor.key_beg.localization = HAMMER_LOCALIZE_INODE;
 271         cursor.key_beg.obj_id = ip->obj_id;
 272         cursor.key_beg.key = 0;
 273         cursor.key_beg.create_tid = 0;
 274         cursor.key_beg.delete_tid = 0;
 275         cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
 276         cursor.key_beg.obj_type = 0;
 277         cursor.asof = iinfo.obj_asof;
 278         cursor.flags = HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_GET_DATA |
 279                        HAMMER_CURSOR_ASOF;
 280
 281         *errorp = hammer_btree_lookup(&cursor);
 282         if (*errorp == EDEADLK) {
 283                 hammer_done_cursor(&cursor);
 284                 goto retry;
 285         }
 286
 287         /*
 288          * On success the B-Tree lookup will hold the appropriate
 289          * buffer cache buffers and provide a pointer to the requested
 290          * information.  Copy the information to the in-memory inode
 291          * and cache the B-Tree node to improve future operations.
 292          */
 293         if (*errorp == 0) {
 294                 ip->ino_leaf = cursor.node->ondisk->elms[cursor.index].leaf;
 295                 ip->ino_data = cursor.data->inode;
 296                 hammer_cache_node(cursor.node, &ip->cache[0]);
 297                 if (cache)
 298                         hammer_cache_node(cursor.node, cache);
 299         }
 300
 301         /*
 302          * On success load the inode's record and data and insert the
 303          * inode into the B-Tree.  It is possible to race another lookup
 304          * insertion of the same inode so deal with that condition too.
 305          *
 306          * The cursor's locked node interlocks against others creating and
 307          * destroying ip while we were blocked.
 308          */
 309         if (*errorp == 0) {
 310                 hammer_ref(&ip->lock);
 311                 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
 312                         hammer_uncache_node(&ip->cache[0]);
 313                         hammer_uncache_node(&ip->cache[1]);
 314                         KKASSERT(ip->lock.refs == 1);
 315                         --hammer_count_inodes;
 316                         --hmp->count_inodes;
 317                         kfree(ip, M_HAMMER);
 318                         hammer_done_cursor(&cursor);
 319                         goto loop;
 320                 }
 321                 ip->flags |= HAMMER_INODE_ONDISK;
 322         } else {
 323                 /*
 324                  * Do not panic on read-only accesses which fail, particularly
 325                  * historical accesses where the snapshot might not have
 326                  * complete connectivity.
 327                  */
 328                 if ((flags & HAMMER_INODE_RO) == 0) {
 329                         kprintf("hammer_get_inode: failed ip %p obj_id %016llx cursor %p error %d\n",
 330                                 ip, ip->obj_id, &cursor, *errorp);
 331                         Debugger("x");
 332                 }
 333                 if (ip->flags & HAMMER_INODE_RSV_INODES) {
 334                         ip->flags &= ~HAMMER_INODE_RSV_INODES; /* sanity */
 335                         --hmp->rsv_inodes;
 336                 }
 337                 hmp->rsv_databufs -= ip->rsv_databufs;
 338                 ip->rsv_databufs = 0;                          /* sanity */
 339
 340                 --hammer_count_inodes;
 341                 --hmp->count_inodes;
 342                 kfree(ip, M_HAMMER);
 343                 ip = NULL;
 344         }
 345         hammer_done_cursor(&cursor);
 346         return (ip);
 347 }
 348
 349 /*
 350  * Create a new filesystem object, returning the inode in *ipp.  The
 351  * returned inode will be referenced.
 352  *
 353  * The inode is created in-memory.
 354  */
 355 int
 356 hammer_create_inode(hammer_transaction_t trans, struct vattr *vap,
 357                     struct ucred *cred, hammer_inode_t dip,
 358                     struct hammer_inode **ipp)
 359 {
 360         hammer_mount_t hmp;
 361         hammer_inode_t ip;
 362         uid_t xuid;
 363
 364         hmp = trans->hmp;
 365         ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
 366         ++hammer_count_inodes;
 367         ++hmp->count_inodes;
 368         ip->obj_id = hammer_alloc_objid(trans, dip);
 369         KKASSERT(ip->obj_id != 0);
 370         ip->obj_asof = hmp->asof;
 371         ip->hmp = hmp;
 372         ip->flush_state = HAMMER_FST_IDLE;
 373         ip->flags = HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES;
 374
 375         ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
 376         RB_INIT(&ip->rec_tree);
 377         TAILQ_INIT(&ip->target_list);
 378
 379         ip->ino_leaf.atime = trans->time;
 380         ip->ino_data.mtime = trans->time;
 381         ip->ino_data.size = 0;
 382         ip->ino_data.nlinks = 0;
 383
 384         /*
 385          * A nohistory designator on the parent directory is inherited by
 386          * the child.
 387          */
 388         ip->ino_data.uflags = dip->ino_data.uflags &
 389                               (SF_NOHISTORY|UF_NOHISTORY|UF_NODUMP);
 390
 391         ip->ino_leaf.base.btype = HAMMER_BTREE_TYPE_RECORD;
 392         ip->ino_leaf.base.localization = HAMMER_LOCALIZE_INODE;
 393         ip->ino_leaf.base.obj_id = ip->obj_id;
 394         ip->ino_leaf.base.key = 0;
 395         ip->ino_leaf.base.create_tid = 0;
 396         ip->ino_leaf.base.delete_tid = 0;
 397         ip->ino_leaf.base.rec_type = HAMMER_RECTYPE_INODE;
 398         ip->ino_leaf.base.obj_type = hammer_get_obj_type(vap->va_type);
 399
 400         ip->ino_data.obj_type = ip->ino_leaf.base.obj_type;
 401         ip->ino_data.version = HAMMER_INODE_DATA_VERSION;
 402         ip->ino_data.mode = vap->va_mode;
 403         ip->ino_data.ctime = trans->time;
 404         ip->ino_data.parent_obj_id = (dip) ? dip->ino_leaf.base.obj_id : 0;
 405
 406         switch(ip->ino_leaf.base.obj_type) {
 407         case HAMMER_OBJTYPE_CDEV:
 408         case HAMMER_OBJTYPE_BDEV:
 409                 ip->ino_data.rmajor = vap->va_rmajor;
 410                 ip->ino_data.rminor = vap->va_rminor;
 411                 break;
 412         default:
 413                 break;
 414         }
 415
 416         /*
 417          * Calculate default uid/gid and overwrite with information from
 418          * the vap.
 419          */
 420         xuid = hammer_to_unix_xid(&dip->ino_data.uid);
 421         xuid = vop_helper_create_uid(hmp->mp, dip->ino_data.mode, xuid, cred,
 422                                      &vap->va_mode);
 423         ip->ino_data.mode = vap->va_mode;
 424
 425         if (vap->va_vaflags & VA_UID_UUID_VALID)
 426                 ip->ino_data.uid = vap->va_uid_uuid;
 427         else if (vap->va_uid != (uid_t)VNOVAL)
 428                 hammer_guid_to_uuid(&ip->ino_data.uid, vap->va_uid);
 429         else
 430                 hammer_guid_to_uuid(&ip->ino_data.uid, xuid);
 431
 432         if (vap->va_vaflags & VA_GID_UUID_VALID)
 433                 ip->ino_data.gid = vap->va_gid_uuid;
 434         else if (vap->va_gid != (gid_t)VNOVAL)
 435                 hammer_guid_to_uuid(&ip->ino_data.gid, vap->va_gid);
 436         else
 437                 ip->ino_data.gid = dip->ino_data.gid;
 438
 439         hammer_ref(&ip->lock);
 440         if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
 441                 hammer_unref(&ip->lock);
 442                 panic("hammer_create_inode: duplicate obj_id %llx", ip->obj_id);
 443         }
 444         *ipp = ip;
 445         return(0);
 446 }
 447
 448 /*
 449  * Called by hammer_sync_inode().
 450  */
 451 static int
 452 hammer_update_inode(hammer_cursor_t cursor, hammer_inode_t ip)
 453 {
 454         hammer_transaction_t trans = cursor->trans;
 455         hammer_record_t record;
 456         int error;
 457
 458 retry:
 459         error = 0;
 460
 461         /*
 462          * If the inode has a presence on-disk then locate it and mark
 463          * it deleted, setting DELONDISK.
 464          *
 465          * The record may or may not be physically deleted, depending on
 466          * the retention policy.
 467          */
 468         if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
 469             HAMMER_INODE_ONDISK) {
 470                 hammer_normalize_cursor(cursor);
 471                 cursor->key_beg.localization = HAMMER_LOCALIZE_INODE;
 472                 cursor->key_beg.obj_id = ip->obj_id;
 473                 cursor->key_beg.key = 0;
 474                 cursor->key_beg.create_tid = 0;
 475                 cursor->key_beg.delete_tid = 0;
 476                 cursor->key_beg.rec_type = HAMMER_RECTYPE_INODE;
 477                 cursor->key_beg.obj_type = 0;
 478                 cursor->asof = ip->obj_asof;
 479                 cursor->flags &= ~HAMMER_CURSOR_INITMASK;
 480                 cursor->flags |= HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_ASOF;
 481                 cursor->flags |= HAMMER_CURSOR_BACKEND;
 482
 483                 error = hammer_btree_lookup(cursor);
 484                 if (hammer_debug_inode)
 485                         kprintf("IPDEL %p %08x %d", ip, ip->flags, error);
 486                 if (error) {
 487                         kprintf("error %d\n", error);
 488                         Debugger("hammer_update_inode");
 489                 }
 490
 491                 if (error == 0) {
 492                         error = hammer_ip_delete_record(cursor, ip, trans->tid);
 493                         if (hammer_debug_inode)
 494                                 kprintf(" error %d\n", error);
 495                         if (error && error != EDEADLK) {
 496                                 kprintf("error %d\n", error);
 497                                 Debugger("hammer_update_inode2");
 498                         }
 499                         if (error == 0) {
 500                                 ip->flags |= HAMMER_INODE_DELONDISK;
 501                         }
 502                         if (cursor->node)
 503                                 hammer_cache_node(cursor->node, &ip->cache[0]);
 504                 }
 505                 if (error == EDEADLK) {
 506                         hammer_done_cursor(cursor);
 507                         error = hammer_init_cursor(trans, cursor,
 508                                                    &ip->cache[0], ip);
 509                         if (hammer_debug_inode)
 510                                 kprintf("IPDED %p %d\n", ip, error);
 511                         if (error == 0)
 512                                 goto retry;
 513                 }
 514         }
 515
 516         /*
 517          * Ok, write out the initial record or a new record (after deleting
 518          * the old one), unless the DELETED flag is set.  This routine will
 519          * clear DELONDISK if it writes out a record.
 520          *
 521          * Update our inode statistics if this is the first application of
 522          * the inode on-disk.
 523          */
 524         if (error == 0 && (ip->flags & HAMMER_INODE_DELETED) == 0) {
 525                 /*
 526                  * Generate a record and write it to the media
 527                  */
 528                 record = hammer_alloc_mem_record(ip, 0);
 529                 record->type = HAMMER_MEM_RECORD_INODE;
 530                 record->flush_state = HAMMER_FST_FLUSH;
 531                 record->leaf = ip->sync_ino_leaf;
 532                 record->leaf.base.create_tid = trans->tid;
 533                 record->leaf.data_len = sizeof(ip->sync_ino_data);
 534                 record->data = (void *)&ip->sync_ino_data;
 535                 record->flags |= HAMMER_RECF_INTERLOCK_BE;
 536                 for (;;) {
 537                         error = hammer_ip_sync_record_cursor(cursor, record);
 538                         if (hammer_debug_inode)
 539                                 kprintf("GENREC %p rec %08x %d\n",
 540                                         ip, record->flags, error);
 541                         if (error != EDEADLK)
 542                                 break;
 543                         hammer_done_cursor(cursor);
 544                         error = hammer_init_cursor(trans, cursor,
 545                                                    &ip->cache[0], ip);
 546                         if (hammer_debug_inode)
 547                                 kprintf("GENREC reinit %d\n", error);
 548                         if (error)
 549                                 break;
 550                 }
 551                 if (error) {
 552                         kprintf("error %d\n", error);
 553                         Debugger("hammer_update_inode3");
 554                 }
 555
 556                 /*
 557                  * The record isn't managed by the inode's record tree,
 558                  * destroy it whether we succeed or fail.
 559                  */
 560                 record->flags &= ~HAMMER_RECF_INTERLOCK_BE;
 561                 record->flags |= HAMMER_RECF_DELETED_FE;
 562                 record->flush_state = HAMMER_FST_IDLE;
 563                 hammer_rel_mem_record(record);
 564
 565                 /*
 566                  * Finish up.
 567                  */
 568                 if (error == 0) {
 569                         if (hammer_debug_inode)
 570                                 kprintf("CLEANDELOND %p %08x\n", ip, ip->flags);
 571                         ip->sync_flags &= ~(HAMMER_INODE_DDIRTY |
 572                                             HAMMER_INODE_ITIMES);
 573                         ip->flags &= ~HAMMER_INODE_DELONDISK;
 574
 575                         /*
 576                          * Root volume count of inodes
 577                          */
 578                         if ((ip->flags & HAMMER_INODE_ONDISK) == 0) {
 579                                 hammer_modify_volume_field(trans,
 580                                                            trans->rootvol,
 581                                                            vol0_stat_inodes);
 582                                 ++ip->hmp->rootvol->ondisk->vol0_stat_inodes;
 583                                 hammer_modify_volume_done(trans->rootvol);
 584                                 ip->flags |= HAMMER_INODE_ONDISK;
 585                                 if (hammer_debug_inode)
 586                                         kprintf("NOWONDISK %p\n", ip);
 587                         }
 588                 }
 589         }
 590
 591         /*
 592          * If the inode has been destroyed, clean out any left-over flags
 593          * that may have been set by the frontend.
 594          */
 595         if (error == 0 && (ip->flags & HAMMER_INODE_DELETED)) {
 596                 ip->sync_flags &= ~(HAMMER_INODE_DDIRTY |
 597                                     HAMMER_INODE_ITIMES);
 598         }
 599         return(error);
 600 }
 601
 602 /*
 603  * Update only the itimes fields.  This is done no-historically.  The
 604  * record is updated in-place on the disk.
 605  */
 606 static int
 607 hammer_update_itimes(hammer_cursor_t cursor, hammer_inode_t ip)
 608 {
 609         hammer_transaction_t trans = cursor->trans;
 610         struct hammer_btree_leaf_elm *leaf;
 611         int error;
 612
 613 retry:
 614         error = 0;
 615         if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
 616             HAMMER_INODE_ONDISK) {
 617                 hammer_normalize_cursor(cursor);
 618                 cursor->key_beg.localization = HAMMER_LOCALIZE_INODE;
 619                 cursor->key_beg.obj_id = ip->obj_id;
 620                 cursor->key_beg.key = 0;
 621                 cursor->key_beg.create_tid = 0;
 622                 cursor->key_beg.delete_tid = 0;
 623                 cursor->key_beg.rec_type = HAMMER_RECTYPE_INODE;
 624                 cursor->key_beg.obj_type = 0;
 625                 cursor->asof = ip->obj_asof;
 626                 cursor->flags &= ~HAMMER_CURSOR_INITMASK;
 627                 cursor->flags |= HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_ASOF;
 628                 cursor->flags |= HAMMER_CURSOR_BACKEND;
 629
 630                 error = hammer_btree_lookup(cursor);
 631                 if (error) {
 632                         kprintf("error %d\n", error);
 633                         Debugger("hammer_update_itimes1");
 634                 }
 635                 if (error == 0) {
 636                         /*
 637                          * Do not generate UNDO records for atime updates.
 638                          */
 639                         leaf = cursor->leaf;
 640                         hammer_modify_node(trans, cursor->node,
 641                                            &leaf->atime, sizeof(leaf->atime));
 642                         leaf->atime = ip->sync_ino_leaf.atime;
 643                         hammer_modify_node_done(cursor->node);
 644                         /*rec->ino_mtime = ip->sync_ino_rec.ino_mtime;*/
 645                         ip->sync_flags &= ~HAMMER_INODE_ITIMES;
 646                         /* XXX recalculate crc */
 647                         hammer_cache_node(cursor->node, &ip->cache[0]);
 648                 }
 649                 if (error == EDEADLK) {
 650                         hammer_done_cursor(cursor);
 651                         error = hammer_init_cursor(trans, cursor,
 652                                                    &ip->cache[0], ip);
 653                         if (error == 0)
 654                                 goto retry;
 655                 }
 656         }
 657         return(error);
 658 }
 659
 660 /*
 661  * Release a reference on an inode, flush as requested.
 662  *
 663  * On the last reference we queue the inode to the flusher for its final
 664  * disposition.
 665  */
 666 void
 667 hammer_rel_inode(struct hammer_inode *ip, int flush)
 668 {
 669         hammer_mount_t hmp = ip->hmp;
 670
 671         /*
 672          * Handle disposition when dropping the last ref.
 673          */
 674         for (;;) {
 675                 if (ip->lock.refs == 1) {
 676                         /*
 677                          * Determine whether on-disk action is needed for
 678                          * the inode's final disposition.
 679                          */
 680                         KKASSERT(ip->vp == NULL);
 681                         hammer_inode_unloadable_check(ip, 0);
 682                         if (ip->flags & HAMMER_INODE_MODMASK) {
 683                                 if (hmp->rsv_inodes > desiredvnodes) {
 684                                         hammer_flush_inode(ip,
 685                                                            HAMMER_FLUSH_SIGNAL);
 686                                 } else {
 687                                         hammer_flush_inode(ip, 0);
 688                                 }
 689                         } else if (ip->lock.refs == 1) {
 690                                 hammer_unload_inode(ip);
 691                                 break;
 692                         }
 693                 } else {
 694                         if (flush)
 695                                 hammer_flush_inode(ip, 0);
 696
 697                         /*
 698                          * The inode still has multiple refs, try to drop
 699                          * one ref.
 700                          */
 701                         KKASSERT(ip->lock.refs >= 1);
 702                         if (ip->lock.refs > 1) {
 703                                 hammer_unref(&ip->lock);
 704                                 break;
 705                         }
 706                 }
 707         }
 708 }
 709
 710 /*
 711  * Unload and destroy the specified inode.  Must be called with one remaining
 712  * reference.  The reference is disposed of.
 713  *
 714  * This can only be called in the context of the flusher.
 715  */
 716 static int
 717 hammer_unload_inode(struct hammer_inode *ip)
 718 {
 719         hammer_mount_t hmp = ip->hmp;
 720
 721         KASSERT(ip->lock.refs == 1,
 722                 ("hammer_unload_inode: %d refs\n", ip->lock.refs));
 723         KKASSERT(ip->vp == NULL);
 724         KKASSERT(ip->flush_state == HAMMER_FST_IDLE);
 725         KKASSERT(ip->cursor_ip_refs == 0);
 726         KKASSERT(ip->lock.lockcount == 0);
 727         KKASSERT((ip->flags & HAMMER_INODE_MODMASK) == 0);
 728
 729         KKASSERT(RB_EMPTY(&ip->rec_tree));
 730         KKASSERT(TAILQ_EMPTY(&ip->target_list));
 731
 732         RB_REMOVE(hammer_ino_rb_tree, &hmp->rb_inos_root, ip);
 733
 734         hammer_uncache_node(&ip->cache[0]);
 735         hammer_uncache_node(&ip->cache[1]);
 736         if (ip->objid_cache)
 737                 hammer_clear_objid(ip);
 738         --hammer_count_inodes;
 739         --hmp->count_inodes;
 740         if (hmp->flags & HAMMER_MOUNT_WAITIMAX)
 741                 hammer_inode_wakereclaims(hmp);
 742
 743         if (ip->flags & HAMMER_INODE_RECLAIM) {
 744                 --hammer_count_reclaiming;
 745                 --hmp->inode_reclaims;
 746                 ip->flags &= ~HAMMER_INODE_RECLAIM;
 747         }
 748         kfree(ip, M_HAMMER);
 749
 750         return(0);
 751 }
 752
 753 /*
 754  * Called on mount -u when switching from RW to RO or vise-versa.  Adjust
 755  * the read-only flag for cached inodes.
 756  *
 757  * This routine is called from a RB_SCAN().
 758  */
 759 int
 760 hammer_reload_inode(hammer_inode_t ip, void *arg __unused)
 761 {
 762         hammer_mount_t hmp = ip->hmp;
 763
 764         if (hmp->ronly || hmp->asof != HAMMER_MAX_TID)
 765                 ip->flags |= HAMMER_INODE_RO;
 766         else
 767                 ip->flags &= ~HAMMER_INODE_RO;
 768         return(0);
 769 }
 770
 771 /*
 772  * A transaction has modified an inode, requiring updates as specified by
 773  * the passed flags.
 774  *
 775  * HAMMER_INODE_DDIRTY: Inode data has been updated
 776  * HAMMER_INODE_XDIRTY: Dirty in-memory records
 777  * HAMMER_INODE_BUFS:   Dirty buffer cache buffers
 778  * HAMMER_INODE_DELETED: Inode record/data must be deleted
 779  * HAMMER_INODE_ITIMES: mtime/atime has been updated
 780  */
 781 void
 782 hammer_modify_inode(hammer_inode_t ip, int flags)
 783 {
 784         KKASSERT ((ip->flags & HAMMER_INODE_RO) == 0 ||
 785                   (flags & (HAMMER_INODE_DDIRTY |
 786                             HAMMER_INODE_XDIRTY | HAMMER_INODE_BUFS |
 787                             HAMMER_INODE_DELETED | HAMMER_INODE_ITIMES)) == 0);
 788         if ((ip->flags & HAMMER_INODE_RSV_INODES) == 0) {
 789                 ip->flags |= HAMMER_INODE_RSV_INODES;
 790                 ++ip->hmp->rsv_inodes;
 791         }
 792
 793         ip->flags |= flags;
 794 }
 795
 796 /*
 797  * Request that an inode be flushed.  This whole mess cannot block and may
 798  * recurse.  Once requested HAMMER will attempt to actively flush it until
 799  * the flush can be done.
 800  *
 801  * The inode may already be flushing, or may be in a setup state.  We can
 802  * place the inode in a flushing state if it is currently idle and flag it
 803  * to reflush if it is currently flushing.
 804  */
 805 void
 806 hammer_flush_inode(hammer_inode_t ip, int flags)
 807 {
 808         hammer_record_t depend;
 809         int r, good;
 810
 811         /*
 812          * Trivial 'nothing to flush' case.  If the inode is ina SETUP
 813          * state we have to put it back into an IDLE state so we can
 814          * drop the extra ref.
 815          */
 816         if ((ip->flags & HAMMER_INODE_MODMASK) == 0) {
 817                 if (ip->flush_state == HAMMER_FST_SETUP) {
 818                         ip->flush_state = HAMMER_FST_IDLE;
 819                         hammer_rel_inode(ip, 0);
 820                 }
 821                 return;
 822         }
 823
 824         /*
 825          * Our flush action will depend on the current state.
 826          */
 827         switch(ip->flush_state) {
 828         case HAMMER_FST_IDLE:
 829                 /*
 830                  * We have no dependancies and can flush immediately.  Some
 831                  * our children may not be flushable so we have to re-test
 832                  * with that additional knowledge.
 833                  */
 834                 hammer_flush_inode_core(ip, flags);
 835                 break;
 836         case HAMMER_FST_SETUP:
 837                 /*
 838                  * Recurse upwards through dependancies via target_list
 839                  * and start their flusher actions going if possible.
 840                  *
 841                  * 'good' is our connectivity.  -1 means we have none and
 842                  * can't flush, 0 means there weren't any dependancies, and
 843                  * 1 means we have good connectivity.
 844                  */
 845                 good = 0;
 846                 TAILQ_FOREACH(depend, &ip->target_list, target_entry) {
 847                         r = hammer_setup_parent_inodes(depend);
 848                         if (r < 0 && good == 0)
 849                                 good = -1;
 850                         if (r > 0)
 851                                 good = 1;
 852                 }
 853
 854                 /*
 855                  * We can continue if good >= 0.  Determine how many records
 856                  * under our inode can be flushed (and mark them).
 857                  */
 858                 if (good >= 0) {
 859                         hammer_flush_inode_core(ip, flags);
 860                 } else {
 861                         ip->flags |= HAMMER_INODE_REFLUSH;
 862                         if (flags & HAMMER_FLUSH_SIGNAL) {
 863                                 ip->flags |= HAMMER_INODE_RESIGNAL;
 864                                 hammer_flusher_async(ip->hmp);
 865                         }
 866                 }
 867                 break;
 868         default:
 869                 /*
 870                  * We are already flushing, flag the inode to reflush
 871                  * if needed after it completes its current flush.
 872                  */
 873                 if ((ip->flags & HAMMER_INODE_REFLUSH) == 0)
 874                         ip->flags |= HAMMER_INODE_REFLUSH;
 875                 if (flags & HAMMER_FLUSH_SIGNAL) {
 876                         ip->flags |= HAMMER_INODE_RESIGNAL;
 877                         hammer_flusher_async(ip->hmp);
 878                 }
 879                 break;
 880         }
 881 }
 882
 883 /*
 884  * We are asked to recurse upwards and convert the record from SETUP
 885  * to FLUSH if possible.  record->ip is a parent of the caller's inode,
 886  * and record->target_ip is the caller's inode.
 887  *
 888  * Return 1 if the record gives us connectivity
 889  *
 890  * Return 0 if the record is not relevant
 891  *
 892  * Return -1 if we can't resolve the dependancy and there is no connectivity.
 893  */
 894 static int
 895 hammer_setup_parent_inodes(hammer_record_t record)
 896 {
 897         hammer_mount_t hmp = record->ip->hmp;
 898         hammer_record_t depend;
 899         hammer_inode_t ip;
 900         int r, good;
 901
 902         KKASSERT(record->flush_state != HAMMER_FST_IDLE);
 903         ip = record->ip;
 904
 905         /*
 906          * If the record is already flushing, is it in our flush group?
 907          *
 908          * If it is in our flush group but it is a general record or a
 909          * delete-on-disk, it does not improve our connectivity (return 0),
 910          * and if the target inode is not trying to destroy itself we can't
 911          * allow the operation yet anyway (the second return -1).
 912          */
 913         if (record->flush_state == HAMMER_FST_FLUSH) {
 914                 if (record->flush_group != hmp->flusher_next) {
 915                         ip->flags |= HAMMER_INODE_REFLUSH;
 916                         return(-1);
 917                 }
 918                 if (record->type == HAMMER_MEM_RECORD_ADD)
 919                         return(1);
 920                 /* GENERAL or DEL */
 921                 return(0);
 922         }
 923
 924         /*
 925          * It must be a setup record.  Try to resolve the setup dependancies
 926          * by recursing upwards so we can place ip on the flush list.
 927          */
 928         KKASSERT(record->flush_state == HAMMER_FST_SETUP);
 929
 930         good = 0;
 931         TAILQ_FOREACH(depend, &ip->target_list, target_entry) {
 932                 r = hammer_setup_parent_inodes(depend);
 933                 if (r < 0 && good == 0)
 934                         good = -1;
 935                 if (r > 0)
 936                         good = 1;
 937         }
 938
 939         /*
 940          * We can't flush ip because it has no connectivity (XXX also check
 941          * nlinks for pre-existing connectivity!).  Flag it so any resolution
 942          * recurses back down.
 943          */
 944         if (good < 0) {
 945                 ip->flags |= HAMMER_INODE_REFLUSH;
 946                 return(good);
 947         }
 948
 949         /*
 950          * We are go, place the parent inode in a flushing state so we can
 951          * place its record in a flushing state.  Note that the parent
 952          * may already be flushing.  The record must be in the same flush
 953          * group as the parent.
 954          */
 955         if (ip->flush_state != HAMMER_FST_FLUSH)
 956                 hammer_flush_inode_core(ip, HAMMER_FLUSH_RECURSION);
 957         KKASSERT(ip->flush_state == HAMMER_FST_FLUSH);
 958         KKASSERT(record->flush_state == HAMMER_FST_SETUP);
 959
 960 #if 0
 961         if (record->type == HAMMER_MEM_RECORD_DEL &&
 962             (record->target_ip->flags & (HAMMER_INODE_DELETED|HAMMER_INODE_DELONDISK)) == 0) {
 963                 /*
 964                  * Regardless of flushing state we cannot sync this path if the
 965                  * record represents a delete-on-disk but the target inode
 966                  * is not ready to sync its own deletion.
 967                  *
 968                  * XXX need to count effective nlinks to determine whether
 969                  * the flush is ok, otherwise removing a hardlink will
 970                  * just leave the DEL record to rot.
 971                  */
 972                 record->target_ip->flags |= HAMMER_INODE_REFLUSH;
 973                 return(-1);
 974         } else
 975 #endif
 976         if (ip->flush_group == ip->hmp->flusher_next) {
 977                 /*
 978                  * This is the record we wanted to synchronize.
 979                  */
 980                 record->flush_state = HAMMER_FST_FLUSH;
 981                 record->flush_group = ip->flush_group;
 982                 hammer_ref(&record->lock);
 983                 if (record->type == HAMMER_MEM_RECORD_ADD)
 984                         return(1);
 985
 986                 /*
 987                  * A general or delete-on-disk record does not contribute
 988                  * to our visibility.  We can still flush it, however.
 989                  */
 990                 return(0);
 991         } else {
 992                 /*
 993                  * We couldn't resolve the dependancies, request that the
 994                  * inode be flushed when the dependancies can be resolved.
 995                  */
 996                 ip->flags |= HAMMER_INODE_REFLUSH;
 997                 return(-1);
 998         }
 999 }
1000
1001 /*
1002  * This is the core routine placing an inode into the FST_FLUSH state.
1003  */
1004 static void
1005 hammer_flush_inode_core(hammer_inode_t ip, int flags)
1006 {
1007         int go_count;
1008
1009         /*
1010          * Set flush state and prevent the flusher from cycling into
1011          * the next flush group.  Do not place the ip on the list yet.
1012          * Inodes not in the idle state get an extra reference.
1013          */
1014         KKASSERT(ip->flush_state != HAMMER_FST_FLUSH);
1015         if (ip->flush_state == HAMMER_FST_IDLE)
1016                 hammer_ref(&ip->lock);
1017         ip->flush_state = HAMMER_FST_FLUSH;
1018         ip->flush_group = ip->hmp->flusher_next;
1019         ++ip->hmp->flusher_lock;
1020
1021         /*
1022          * We need to be able to vfsync/truncate from the backend.
1023          */
1024         KKASSERT((ip->flags & HAMMER_INODE_VHELD) == 0);
1025         if (ip->vp && (ip->vp->v_flag & VINACTIVE) == 0) {
1026                 ip->flags |= HAMMER_INODE_VHELD;
1027                 vref(ip->vp);
1028         }
1029
1030         /*
1031          * Figure out how many in-memory records we can actually flush
1032          * (not including inode meta-data, buffers, etc).
1033          */
1034         if (flags & HAMMER_FLUSH_RECURSION) {
1035                 go_count = 1;
1036         } else {
1037                 go_count = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
1038                                    hammer_setup_child_callback, NULL);
1039         }
1040
1041         /*
1042          * This is a more involved test that includes go_count.  If we
1043          * can't flush, flag the inode and return.  If go_count is 0 we
1044          * were are unable to flush any records in our rec_tree and
1045          * must ignore the XDIRTY flag.
1046          */
1047         if (go_count == 0) {
1048                 if ((ip->flags & HAMMER_INODE_MODMASK_NOXDIRTY) == 0) {
1049                         ip->flags |= HAMMER_INODE_REFLUSH;
1050                         ip->flush_state = HAMMER_FST_SETUP;
1051                         if (ip->flags & HAMMER_INODE_VHELD) {
1052                                 ip->flags &= ~HAMMER_INODE_VHELD;
1053                                 vrele(ip->vp);
1054                         }
1055                         if (flags & HAMMER_FLUSH_SIGNAL) {
1056                                 ip->flags |= HAMMER_INODE_RESIGNAL;
1057                                 hammer_flusher_async(ip->hmp);
1058                         }
1059                         if (--ip->hmp->flusher_lock == 0)
1060                                 wakeup(&ip->hmp->flusher_lock);
1061                         return;
1062                 }
1063         }
1064
1065         /*
1066          * Snapshot the state of the inode for the backend flusher.
1067          *
1068          * The truncation must be retained in the frontend until after
1069          * we've actually performed the record deletion.
1070          *
1071          * NOTE: The DELETING flag is a mod flag, but it is also sticky,
1072          * and stays in ip->flags.  Once set, it stays set until the
1073          * inode is destroyed.
1074          */
1075         ip->sync_flags = (ip->flags & HAMMER_INODE_MODMASK);
1076         ip->sync_trunc_off = ip->trunc_off;
1077         ip->sync_ino_leaf = ip->ino_leaf;
1078         ip->sync_ino_data = ip->ino_data;
1079         ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
1080         ip->flags &= ~HAMMER_INODE_MODMASK;
1081 #ifdef DEBUG_TRUNCATE
1082         if ((ip->sync_flags & HAMMER_INODE_TRUNCATED) && ip == HammerTruncIp)
1083                 kprintf("truncateS %016llx\n", ip->sync_trunc_off);
1084 #endif
1085
1086         /*
1087          * The flusher list inherits our inode and reference.
1088          */
1089         TAILQ_INSERT_TAIL(&ip->hmp->flush_list, ip, flush_entry);
1090         if (--ip->hmp->flusher_lock == 0)
1091                 wakeup(&ip->hmp->flusher_lock);
1092
1093         if (flags & HAMMER_FLUSH_SIGNAL) {
1094                 hammer_flusher_async(ip->hmp);
1095         }
1096 }
1097
1098 /*
1099  * Callback for scan of ip->rec_tree.  Try to include each record in our
1100  * flush.  ip->flush_group has been set but the inode has not yet been
1101  * moved into a flushing state.
1102  *
1103  * If we get stuck on a record we have to set HAMMER_INODE_REFLUSH on
1104  * both inodes.
1105  *
1106  * We return 1 for any record placed or found in FST_FLUSH, which prevents
1107  * the caller from shortcutting the flush.
1108  */
1109 static int
1110 hammer_setup_child_callback(hammer_record_t rec, void *data)
1111 {
1112         hammer_inode_t target_ip;
1113         hammer_inode_t ip;
1114         int r;
1115
1116         /*
1117          * If the record has been deleted by the backend (it's being held
1118          * by the frontend in a race), just ignore it.
1119          */
1120         if (rec->flags & HAMMER_RECF_DELETED_BE)
1121                 return(0);
1122
1123         /*
1124          * If the record is in an idle state it has no dependancies and
1125          * can be flushed.
1126          */
1127         ip = rec->ip;
1128         r = 0;
1129
1130         switch(rec->flush_state) {
1131         case HAMMER_FST_IDLE:
1132                 /*
1133                  * Record has no setup dependancy, we can flush it.
1134                  */
1135                 KKASSERT(rec->target_ip == NULL);
1136                 rec->flush_state = HAMMER_FST_FLUSH;
1137                 rec->flush_group = ip->flush_group;
1138                 hammer_ref(&rec->lock);
1139                 r = 1;
1140                 break;
1141         case HAMMER_FST_SETUP:
1142                 /*
1143                  * Record has a setup dependancy.  Try to include the
1144                  * target ip in the flush.
1145                  *
1146                  * We have to be careful here, if we do not do the right
1147                  * thing we can lose track of dirty inodes and the system
1148                  * will lockup trying to allocate buffers.
1149                  */
1150                 target_ip = rec->target_ip;
1151                 KKASSERT(target_ip != NULL);
1152                 KKASSERT(target_ip->flush_state != HAMMER_FST_IDLE);
1153                 if (target_ip->flush_state == HAMMER_FST_FLUSH) {
1154                         /*
1155                          * If the target IP is already flushing in our group
1156                          * we are golden, otherwise make sure the target
1157                          * reflushes.
1158                          */
1159                         if (target_ip->flush_group == ip->flush_group) {
1160                                 rec->flush_state = HAMMER_FST_FLUSH;
1161                                 rec->flush_group = ip->flush_group;
1162                                 hammer_ref(&rec->lock);
1163                                 r = 1;
1164                         } else {
1165                                 target_ip->flags |= HAMMER_INODE_REFLUSH;
1166                         }
1167                 } else if (rec->type == HAMMER_MEM_RECORD_ADD) {
1168                         /*
1169                          * If the target IP is not flushing we can force
1170                          * it to flush, even if it is unable to write out
1171                          * any of its own records we have at least one in
1172                          * hand that we CAN deal with.
1173                          */
1174                         rec->flush_state = HAMMER_FST_FLUSH;
1175                         rec->flush_group = ip->flush_group;
1176                         hammer_ref(&rec->lock);
1177                         hammer_flush_inode_core(target_ip,
1178                                                 HAMMER_FLUSH_RECURSION);
1179                         r = 1;
1180                 } else {
1181                         /*
1182                          * General or delete-on-disk record.
1183                          *
1184                          * XXX this needs help.  If a delete-on-disk we could
1185                          * disconnect the target.  If the target has its own
1186                          * dependancies they really need to be flushed.
1187                          *
1188                          * XXX
1189                          */
1190                         rec->flush_state = HAMMER_FST_FLUSH;
1191                         rec->flush_group = ip->flush_group;
1192                         hammer_ref(&rec->lock);
1193                         hammer_flush_inode_core(target_ip,
1194                                                 HAMMER_FLUSH_RECURSION);
1195                         r = 1;
1196                 }
1197                 break;
1198         case HAMMER_FST_FLUSH:
1199                 /*
1200                  * Record already associated with a flush group.  It had
1201                  * better be ours.
1202                  */
1203                 KKASSERT(rec->flush_group == ip->flush_group);
1204                 r = 1;
1205                 break;
1206         }
1207         return(r);
1208 }
1209
1210 /*
1211  * Wait for a previously queued flush to complete
1212  */
1213 void
1214 hammer_wait_inode(hammer_inode_t ip)
1215 {
1216         while (ip->flush_state != HAMMER_FST_IDLE) {
1217                 if (ip->flush_state == HAMMER_FST_SETUP) {
1218                         hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1219                 } else {
1220                         ip->flags |= HAMMER_INODE_FLUSHW;
1221                         tsleep(&ip->flags, 0, "hmrwin", 0);
1222                 }
1223         }
1224 }
1225
1226 /*
1227  * Called by the backend code when a flush has been completed.
1228  * The inode has already been removed from the flush list.
1229  *
1230  * A pipelined flush can occur, in which case we must re-enter the
1231  * inode on the list and re-copy its fields.
1232  */
1233 void
1234 hammer_flush_inode_done(hammer_inode_t ip)
1235 {
1236         int dorel = 0;
1237
1238         KKASSERT(ip->flush_state == HAMMER_FST_FLUSH);
1239
1240         /*
1241          * Merge left-over flags back into the frontend and fix the state.
1242          */
1243         ip->flags |= ip->sync_flags;
1244
1245         /*
1246          * The backend may have adjusted nlinks, so if the adjusted nlinks
1247          * does not match the fronttend set the frontend's RDIRTY flag again.
1248          */
1249         if (ip->ino_data.nlinks != ip->sync_ino_data.nlinks)
1250                 ip->flags |= HAMMER_INODE_DDIRTY;
1251
1252         /*
1253          * Fix up the dirty buffer status.  IO completions will also
1254          * try to clean up rsv_databufs.
1255          */
1256         if (ip->vp && RB_ROOT(&ip->vp->v_rbdirty_tree)) {
1257                 ip->flags |= HAMMER_INODE_BUFS;
1258         } else {
1259                 ip->hmp->rsv_databufs -= ip->rsv_databufs;
1260                 ip->rsv_databufs = 0;
1261         }
1262
1263         /*
1264          * Re-set the XDIRTY flag if some of the inode's in-memory records
1265          * could not be flushed.
1266          */
1267         KKASSERT((RB_EMPTY(&ip->rec_tree) &&
1268                   (ip->flags & HAMMER_INODE_XDIRTY) == 0) ||
1269                  (!RB_EMPTY(&ip->rec_tree) &&
1270                   (ip->flags & HAMMER_INODE_XDIRTY) != 0));
1271
1272         /*
1273          * Do not lose track of inodes which no longer have vnode
1274          * assocations, otherwise they may never get flushed again.
1275          */
1276         if ((ip->flags & HAMMER_INODE_MODMASK) && ip->vp == NULL)
1277                 ip->flags |= HAMMER_INODE_REFLUSH;
1278
1279         /*
1280          * Adjust flush_state.  The target state (idle or setup) shouldn't
1281          * be terribly important since we will reflush if we really need
1282          * to do anything. XXX
1283          */
1284         if (TAILQ_EMPTY(&ip->target_list) && RB_EMPTY(&ip->rec_tree)) {
1285                 ip->flush_state = HAMMER_FST_IDLE;
1286                 dorel = 1;
1287         } else {
1288                 ip->flush_state = HAMMER_FST_SETUP;
1289         }
1290
1291         /*
1292          * Clean up the vnode ref
1293          */
1294         if (ip->flags & HAMMER_INODE_VHELD) {
1295                 ip->flags &= ~HAMMER_INODE_VHELD;
1296                 vrele(ip->vp);
1297         }
1298
1299         /*
1300          * If the frontend made more changes and requested another flush,
1301          * then try to get it running.
1302          */
1303         if (ip->flags & HAMMER_INODE_REFLUSH) {
1304                 ip->flags &= ~HAMMER_INODE_REFLUSH;
1305                 if (ip->flags & HAMMER_INODE_RESIGNAL) {
1306                         ip->flags &= ~HAMMER_INODE_RESIGNAL;
1307                         hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1308                 } else {
1309                         hammer_flush_inode(ip, 0);
1310                 }
1311         }
1312
1313         /*
1314          * If the inode is now clean drop the space reservation.
1315          */
1316         if ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
1317             (ip->flags & HAMMER_INODE_RSV_INODES)) {
1318                 ip->flags &= ~HAMMER_INODE_RSV_INODES;
1319                 --ip->hmp->rsv_inodes;
1320         }
1321
1322         /*
1323          * Finally, if the frontend is waiting for a flush to complete,
1324          * wake it up.
1325          */
1326         if (ip->flush_state != HAMMER_FST_FLUSH) {
1327                 if (ip->flags & HAMMER_INODE_FLUSHW) {
1328                         ip->flags &= ~HAMMER_INODE_FLUSHW;
1329                         wakeup(&ip->flags);
1330                 }
1331         }
1332         if (dorel)
1333                 hammer_rel_inode(ip, 0);
1334 }
1335
1336 /*
1337  * Called from hammer_sync_inode() to synchronize in-memory records
1338  * to the media.
1339  */
1340 static int
1341 hammer_sync_record_callback(hammer_record_t record, void *data)
1342 {
1343         hammer_cursor_t cursor = data;
1344         hammer_transaction_t trans = cursor->trans;
1345         int error;
1346
1347         /*
1348          * Skip records that do not belong to the current flush.
1349          */
1350         ++hammer_stats_record_iterations;
1351         if (record->flush_state != HAMMER_FST_FLUSH)
1352                 return(0);
1353
1354 #if 1
1355         if (record->flush_group != record->ip->flush_group) {
1356                 kprintf("sync_record %p ip %p bad flush group %d %d\n", record, record->ip, record->flush_group ,record->ip->flush_group);
1357                 Debugger("blah2");
1358                 return(0);
1359         }
1360 #endif
1361         KKASSERT(record->flush_group == record->ip->flush_group);
1362
1363         /*
1364          * Interlock the record using the BE flag.  Once BE is set the
1365          * frontend cannot change the state of FE.
1366          *
1367          * NOTE: If FE is set prior to us setting BE we still sync the
1368          * record out, but the flush completion code converts it to
1369          * a delete-on-disk record instead of destroying it.
1370          */
1371         KKASSERT((record->flags & HAMMER_RECF_INTERLOCK_BE) == 0);
1372         record->flags |= HAMMER_RECF_INTERLOCK_BE;
1373
1374         /*
1375          * The backend may have already disposed of the record.
1376          */
1377         if (record->flags & HAMMER_RECF_DELETED_BE) {
1378                 error = 0;
1379                 goto done;
1380         }
1381
1382         /*
1383          * If the whole inode is being deleting all on-disk records will
1384          * be deleted very soon, we can't sync any new records to disk
1385          * because they will be deleted in the same transaction they were
1386          * created in (delete_tid == create_tid), which will assert.
1387          *
1388          * XXX There may be a case with RECORD_ADD with DELETED_FE set
1389          * that we currently panic on.
1390          */
1391         if (record->ip->sync_flags & HAMMER_INODE_DELETING) {
1392                 switch(record->type) {
1393                 case HAMMER_MEM_RECORD_DATA:
1394                         /*
1395                          * We don't have to do anything, if the record was
1396                          * committed the space will have been accounted for
1397                          * in the blockmap.
1398                          */
1399                         /* fall through */
1400                 case HAMMER_MEM_RECORD_GENERAL:
1401                         record->flags |= HAMMER_RECF_DELETED_FE;
1402                         record->flags |= HAMMER_RECF_DELETED_BE;
1403                         error = 0;
1404                         goto done;
1405                 case HAMMER_MEM_RECORD_ADD:
1406                         panic("hammer_sync_record_callback: illegal add "
1407                               "during inode deletion record %p", record);
1408                         break; /* NOT REACHED */
1409                 case HAMMER_MEM_RECORD_INODE:
1410                         panic("hammer_sync_record_callback: attempt to "
1411                               "sync inode record %p?", record);
1412                         break; /* NOT REACHED */
1413                 case HAMMER_MEM_RECORD_DEL:
1414                         /*
1415                          * Follow through and issue the on-disk deletion
1416                          */
1417                         break;
1418                 }
1419         }
1420
1421         /*
1422          * If DELETED_FE is set we may have already sent dependant pieces
1423          * to the disk and we must flush the record as if it hadn't been
1424          * deleted.  This creates a bit of a mess because we have to
1425          * have ip_sync_record convert the record to MEM_RECORD_DEL before
1426          * it inserts the B-Tree record.  Otherwise the media sync might
1427          * be visible to the frontend.
1428          */
1429         if (record->flags & HAMMER_RECF_DELETED_FE) {
1430                 if (record->type == HAMMER_MEM_RECORD_ADD) {
1431                         record->flags |= HAMMER_RECF_CONVERT_DELETE;
1432                 } else {
1433                         KKASSERT(record->type != HAMMER_MEM_RECORD_DEL);
1434                         return(0);
1435                 }
1436         }
1437
1438         /*
1439          * Assign the create_tid for new records.  Deletions already
1440          * have the record's entire key properly set up.
1441          */
1442         if (record->type != HAMMER_MEM_RECORD_DEL)
1443                 record->leaf.base.create_tid = trans->tid;
1444         for (;;) {
1445                 error = hammer_ip_sync_record_cursor(cursor, record);
1446                 if (error != EDEADLK)
1447                         break;
1448                 hammer_done_cursor(cursor);
1449                 error = hammer_init_cursor(trans, cursor, &record->ip->cache[0],
1450                                            record->ip);
1451                 if (error)
1452                         break;
1453         }
1454         record->flags &= ~HAMMER_RECF_CONVERT_DELETE;
1455
1456         if (error) {
1457                 error = -error;
1458                 if (error != -ENOSPC) {
1459                         kprintf("hammer_sync_record_callback: sync failed rec "
1460                                 "%p, error %d\n", record, error);
1461                         Debugger("sync failed rec");
1462                 }
1463         }
1464 done:
1465         hammer_flush_record_done(record, error);
1466         return(error);
1467 }
1468
1469 /*
1470  * XXX error handling
1471  */
1472 int
1473 hammer_sync_inode(hammer_inode_t ip)
1474 {
1475         struct hammer_transaction trans;
1476         struct hammer_cursor cursor;
1477         hammer_record_t depend;
1478         hammer_record_t next;
1479         int error, tmp_error;
1480         u_int64_t nlinks;
1481
1482         if ((ip->sync_flags & HAMMER_INODE_MODMASK) == 0)
1483                 return(0);
1484
1485         hammer_start_transaction_fls(&trans, ip->hmp);
1486         error = hammer_init_cursor(&trans, &cursor, &ip->cache[0], ip);
1487         if (error)
1488                 goto done;
1489
1490         /*
1491          * Any directory records referencing this inode which are not in
1492          * our current flush group must adjust our nlink count for the
1493          * purposes of synchronization to disk.
1494          *
1495          * Records which are in our flush group can be unlinked from our
1496          * inode now, potentially allowing the inode to be physically
1497          * deleted.
1498          */
1499         nlinks = ip->ino_data.nlinks;
1500         next = TAILQ_FIRST(&ip->target_list);
1501         while ((depend = next) != NULL) {
1502                 next = TAILQ_NEXT(depend, target_entry);
1503                 if (depend->flush_state == HAMMER_FST_FLUSH &&
1504                     depend->flush_group == ip->hmp->flusher_act) {
1505                         /*
1506                          * If this is an ADD that was deleted by the frontend
1507                          * the frontend nlinks count will have already been
1508                          * decremented, but the backend is going to sync its
1509                          * directory entry and must account for it.  The
1510                          * record will be converted to a delete-on-disk when
1511                          * it gets synced.
1512                          *
1513                          * If the ADD was not deleted by the frontend we
1514                          * can remove the dependancy from our target_list.
1515                          */
1516                         if (depend->flags & HAMMER_RECF_DELETED_FE) {
1517                                 ++nlinks;
1518                         } else {
1519                                 TAILQ_REMOVE(&ip->target_list, depend,
1520                                              target_entry);
1521                                 depend->target_ip = NULL;
1522                         }
1523                 } else if ((depend->flags & HAMMER_RECF_DELETED_FE) == 0) {
1524                         /*
1525                          * Not part of our flush group
1526                          */
1527                         KKASSERT((depend->flags & HAMMER_RECF_DELETED_BE) == 0);
1528                         switch(depend->type) {
1529                         case HAMMER_MEM_RECORD_ADD:
1530                                 --nlinks;
1531                                 break;
1532                         case HAMMER_MEM_RECORD_DEL:
1533                                 ++nlinks;
1534                                 break;
1535                         default:
1536                                 break;
1537                         }
1538                 }
1539         }
1540
1541         /*
1542          * Set dirty if we had to modify the link count.
1543          */
1544         if (ip->sync_ino_data.nlinks != nlinks) {
1545                 KKASSERT((int64_t)nlinks >= 0);
1546                 ip->sync_ino_data.nlinks = nlinks;
1547                 ip->sync_flags |= HAMMER_INODE_DDIRTY;
1548         }
1549
1550 #if 0
1551         /*
1552          * XXX DISABLED FOR NOW.  With the new reservation support
1553          * we cannot resync pending data without confusing the hell
1554          * out of the in-memory record tree.
1555          */
1556         /*
1557          * Queue up as many dirty buffers as we can then set a flag to
1558          * cause any further BIOs to go to the alternative queue.
1559          */
1560         if (ip->flags & HAMMER_INODE_VHELD)
1561                 error = vfsync(ip->vp, MNT_NOWAIT, 1, NULL, NULL);
1562         ip->flags |= HAMMER_INODE_WRITE_ALT;
1563
1564         /*
1565          * The buffer cache may contain dirty buffers beyond the inode
1566          * state we copied from the frontend to the backend.  Because
1567          * we are syncing our buffer cache on the backend, resync
1568          * the truncation point and the file size so we don't wipe out
1569          * any data.
1570          *
1571          * Syncing the buffer cache on the frontend has serious problems
1572          * because it prevents us from passively queueing dirty inodes
1573          * to the backend (the BIO's could stall indefinitely).
1574          */
1575         if (ip->flags & HAMMER_INODE_TRUNCATED) {
1576                 ip->sync_trunc_off = ip->trunc_off;
1577                 ip->sync_flags |= HAMMER_INODE_TRUNCATED;
1578         }
1579         if (ip->sync_ino_data.size != ip->ino_data.size) {
1580                 ip->sync_ino_data.size = ip->ino_data.size;
1581                 ip->sync_flags |= HAMMER_INODE_DDIRTY;
1582         }
1583 #endif
1584
1585         /*
1586          * If there is a trunction queued destroy any data past the (aligned)
1587          * truncation point.  Userland will have dealt with the buffer
1588          * containing the truncation point for us.
1589          *
1590          * We don't flush pending frontend data buffers until after we've
1591          * dealth with the truncation.
1592          *
1593          * Don't bother if the inode is or has been deleted.
1594          */
1595         if (ip->sync_flags & HAMMER_INODE_TRUNCATED) {
1596                 /*
1597                  * Interlock trunc_off.  The VOP front-end may continue to
1598                  * make adjustments to it while we are blocked.
1599                  */
1600                 off_t trunc_off;
1601                 off_t aligned_trunc_off;
1602
1603                 trunc_off = ip->sync_trunc_off;
1604                 aligned_trunc_off = (trunc_off + HAMMER_BUFMASK) &
1605                                     ~HAMMER_BUFMASK64;
1606
1607                 /*
1608                  * Delete any whole blocks on-media.  The front-end has
1609                  * already cleaned out any partial block and made it
1610                  * pending.  The front-end may have updated trunc_off
1611                  * while we were blocked so we only use sync_trunc_off.
1612                  */
1613                 error = hammer_ip_delete_range(&cursor, ip,
1614                                                 aligned_trunc_off,
1615                                                 0x7FFFFFFFFFFFFFFFLL, 1);
1616                 if (error)
1617                         Debugger("hammer_ip_delete_range errored");
1618
1619                 /*
1620                  * Clear the truncation flag on the backend after we have
1621                  * complete the deletions.  Backend data is now good again
1622                  * (including new records we are about to sync, below).
1623                  */
1624                 ip->sync_flags &= ~HAMMER_INODE_TRUNCATED;
1625                 ip->sync_trunc_off = 0x7FFFFFFFFFFFFFFFLL;
1626         } else {
1627                 error = 0;
1628         }
1629
1630         /*
1631          * Now sync related records.  These will typically be directory
1632          * entries or delete-on-disk records.
1633          *
1634          * Not all records will be flushed, but clear XDIRTY anyway.  We
1635          * will set it again in the frontend hammer_flush_inode_done()
1636          * if records remain.
1637          */
1638         if (error == 0) {
1639                 tmp_error = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
1640                                     hammer_sync_record_callback, &cursor);
1641                 if (tmp_error < 0)
1642                         tmp_error = -error;
1643                 if (tmp_error)
1644                         error = tmp_error;
1645         }
1646
1647         /*
1648          * If we are deleting the inode the frontend had better not have
1649          * any active references on elements making up the inode.
1650          */
1651         if (error == 0 && ip->sync_ino_data.nlinks == 0 &&
1652                 RB_EMPTY(&ip->rec_tree)  &&
1653             (ip->sync_flags & HAMMER_INODE_DELETING) &&
1654             (ip->flags & HAMMER_INODE_DELETED) == 0) {
1655                 int count1 = 0;
1656
1657                 ip->flags |= HAMMER_INODE_DELETED;
1658                 error = hammer_ip_delete_range_all(&cursor, ip, &count1);
1659                 if (error == 0) {
1660                         ip->sync_flags &= ~HAMMER_INODE_DELETING;
1661                         ip->sync_flags &= ~HAMMER_INODE_TRUNCATED;
1662                         KKASSERT(RB_EMPTY(&ip->rec_tree));
1663
1664                         /*
1665                          * Set delete_tid in both the frontend and backend
1666                          * copy of the inode record.  The DELETED flag handles
1667                          * this, do not set RDIRTY.
1668                          */
1669                         ip->ino_leaf.base.delete_tid = trans.tid;
1670                         ip->sync_ino_leaf.base.delete_tid = trans.tid;
1671
1672                         /*
1673                          * Adjust the inode count in the volume header
1674                          */
1675                         if (ip->flags & HAMMER_INODE_ONDISK) {
1676                                 hammer_modify_volume_field(&trans,
1677                                                            trans.rootvol,
1678                                                            vol0_stat_inodes);
1679                                 --ip->hmp->rootvol->ondisk->vol0_stat_inodes;
1680                                 hammer_modify_volume_done(trans.rootvol);
1681                         }
1682                 } else {
1683                         ip->flags &= ~HAMMER_INODE_DELETED;
1684                         Debugger("hammer_ip_delete_range_all errored");
1685                 }
1686         }
1687
1688         ip->sync_flags &= ~HAMMER_INODE_BUFS;
1689
1690         if (error)
1691                 Debugger("RB_SCAN errored");
1692
1693         /*
1694          * Now update the inode's on-disk inode-data and/or on-disk record.
1695          * DELETED and ONDISK are managed only in ip->flags.
1696          */
1697         switch(ip->flags & (HAMMER_INODE_DELETED | HAMMER_INODE_ONDISK)) {
1698         case HAMMER_INODE_DELETED|HAMMER_INODE_ONDISK:
1699                 /*
1700                  * If deleted and on-disk, don't set any additional flags.
1701                  * the delete flag takes care of things.
1702                  *
1703                  * Clear flags which may have been set by the frontend.
1704                  */
1705                 ip->sync_flags &= ~(HAMMER_INODE_DDIRTY|
1706                                     HAMMER_INODE_XDIRTY|HAMMER_INODE_ITIMES|
1707                                     HAMMER_INODE_DELETING);
1708                 break;
1709         case HAMMER_INODE_DELETED:
1710                 /*
1711                  * Take care of the case where a deleted inode was never
1712                  * flushed to the disk in the first place.
1713                  *
1714                  * Clear flags which may have been set by the frontend.
1715                  */
1716                 ip->sync_flags &= ~(HAMMER_INODE_DDIRTY|
1717                                     HAMMER_INODE_XDIRTY|HAMMER_INODE_ITIMES|
1718                                     HAMMER_INODE_DELETING);
1719                 while (RB_ROOT(&ip->rec_tree)) {
1720                         hammer_record_t record = RB_ROOT(&ip->rec_tree);
1721                         hammer_ref(&record->lock);
1722                         KKASSERT(record->lock.refs == 1);
1723                         record->flags |= HAMMER_RECF_DELETED_FE;
1724                         record->flags |= HAMMER_RECF_DELETED_BE;
1725                         hammer_rel_mem_record(record);
1726                 }
1727                 break;
1728         case HAMMER_INODE_ONDISK:
1729                 /*
1730                  * If already on-disk, do not set any additional flags.
1731                  */
1732                 break;
1733         default:
1734                 /*
1735                  * If not on-disk and not deleted, set both dirty flags
1736                  * to force an initial record to be written.  Also set
1737                  * the create_tid for the inode.
1738                  *
1739                  * Set create_tid in both the frontend and backend
1740                  * copy of the inode record.
1741                  */
1742                 ip->ino_leaf.base.create_tid = trans.tid;
1743                 ip->sync_ino_leaf.base.create_tid = trans.tid;
1744                 ip->sync_flags |= HAMMER_INODE_DDIRTY;
1745                 break;
1746         }
1747
1748         /*
1749          * If RDIRTY or DDIRTY is set, write out a new record.  If the inode
1750          * is already on-disk the old record is marked as deleted.
1751          *
1752          * If DELETED is set hammer_update_inode() will delete the existing
1753          * record without writing out a new one.
1754          *
1755          * If *ONLY* the ITIMES flag is set we can update the record in-place.
1756          */
1757         if (ip->flags & HAMMER_INODE_DELETED) {
1758                 error = hammer_update_inode(&cursor, ip);
1759         } else
1760         if ((ip->sync_flags & (HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES)) ==
1761             HAMMER_INODE_ITIMES) {
1762                 error = hammer_update_itimes(&cursor, ip);
1763         } else
1764         if (ip->sync_flags & (HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES)) {
1765                 error = hammer_update_inode(&cursor, ip);
1766         }
1767         if (error)
1768                 Debugger("hammer_update_itimes/inode errored");
1769 done:
1770         /*
1771          * Save the TID we used to sync the inode with to make sure we
1772          * do not improperly reuse it.
1773          */
1774         hammer_done_cursor(&cursor);
1775         hammer_done_transaction(&trans);
1776         return(error);
1777 }
1778
1779 /*
1780  * This routine is called when the OS is no longer actively referencing
1781  * the inode (but might still be keeping it cached), or when releasing
1782  * the last reference to an inode.
1783  *
1784  * At this point if the inode's nlinks count is zero we want to destroy
1785  * it, which may mean destroying it on-media too.
1786  */
1787 void
1788 hammer_inode_unloadable_check(hammer_inode_t ip, int getvp)
1789 {
1790         struct vnode *vp;
1791
1792         /*
1793          * Set the DELETING flag when the link count drops to 0 and the
1794          * OS no longer has any opens on the inode.
1795          *
1796          * The backend will clear DELETING (a mod flag) and set DELETED
1797          * (a state flag) when it is actually able to perform the
1798          * operation.
1799          */
1800         if (ip->ino_data.nlinks == 0 &&
1801             (ip->flags & (HAMMER_INODE_DELETING|HAMMER_INODE_DELETED)) == 0) {
1802                 ip->flags |= HAMMER_INODE_DELETING;
1803                 ip->flags |= HAMMER_INODE_TRUNCATED;
1804                 ip->trunc_off = 0;
1805                 vp = NULL;
1806                 if (getvp) {
1807                         if (hammer_get_vnode(ip, &vp) != 0)
1808                                 return;
1809                 }
1810
1811                 /*
1812                  * Final cleanup
1813                  */
1814                 if (ip->vp) {
1815                         vtruncbuf(ip->vp, 0, HAMMER_BUFSIZE);
1816                         vnode_pager_setsize(ip->vp, 0);
1817                 }
1818                 if (getvp) {
1819                         vput(vp);
1820                 }
1821         }
1822 }
1823
1824 /*
1825  * Re-test an inode when a dependancy had gone away to see if we
1826  * can chain flush it.
1827  */
1828 void
1829 hammer_test_inode(hammer_inode_t ip)
1830 {
1831         if (ip->flags & HAMMER_INODE_REFLUSH) {
1832                 ip->flags &= ~HAMMER_INODE_REFLUSH;
1833                 hammer_ref(&ip->lock);
1834                 if (ip->flags & HAMMER_INODE_RESIGNAL) {
1835                         ip->flags &= ~HAMMER_INODE_RESIGNAL;
1836                         hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1837                 } else {
1838                         hammer_flush_inode(ip, 0);
1839                 }
1840                 hammer_rel_inode(ip, 0);
1841         }
1842 }
1843
1844 /*
1845  * When a HAMMER inode is reclaimed it may have to be queued to the backend
1846  * for its final sync to disk.  Programs like blogbench can cause the backlog
1847  * to grow indefinitely.  Put a cap on the number of inodes we allow to be
1848  * in this state by giving the flusher time to drain.
1849  */
1850 void
1851 hammer_inode_waitreclaims(hammer_mount_t hmp)
1852 {
1853         int count;
1854         int delay;
1855         int minpt;
1856         int maxpt;
1857
1858         while (hmp->inode_reclaims > HAMMER_RECLAIM_MIN) {
1859                 count = hmp->count_inodes;
1860                 minpt = count * HAMMER_RECLAIM_SLOPCT / 100;
1861                 maxpt = count * HAMMER_RECLAIM_MAXPCT / 100;
1862
1863                 if (hmp->inode_reclaims < minpt)
1864                         break;
1865                 if (hmp->inode_reclaims < maxpt) {
1866                         delay = (hmp->inode_reclaims - minpt) * hz /
1867                                 (maxpt - minpt);
1868                         if (delay == 0)
1869                                 delay = 1;
1870                         hammer_flusher_async(hmp);
1871                         tsleep(&count, 0, "hmitik", delay);
1872                         break;
1873                 }
1874                 hmp->flags |= HAMMER_MOUNT_WAITIMAX;
1875                 hammer_flusher_async(hmp);
1876                 tsleep(&hmp->inode_reclaims, 0, "hmimax", hz / 10);
1877         }
1878 }
1879
1880 void
1881 hammer_inode_wakereclaims(hammer_mount_t hmp)
1882 {
1883         int maxpt;
1884
1885         if ((hmp->flags & HAMMER_MOUNT_WAITIMAX) == 0)
1886                 return;
1887         maxpt = hmp->count_inodes * HAMMER_RECLAIM_MAXPCT / 100;
1888         if (hmp->inode_reclaims <= HAMMER_RECLAIM_MIN ||
1889             hmp->inode_reclaims < maxpt) {
1890                 hmp->flags &= ~HAMMER_MOUNT_WAITIMAX;
1891                 wakeup(&hmp->inode_reclaims);
1892         }
1893 }
1894