HAMMER: MFC to 2.0
authorMatthew Dillon <dillon@dragonflybsd.org>
Sat, 2 Aug 2008 21:24:28 +0000 (2 21:24 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Sat, 2 Aug 2008 21:24:28 +0000 (2 21:24 +0000)
* Bug fix: fsync indefinite blocking

* Bug fix: missed invalidation which can cause an assertion

* Bug fix: kmalloc exhaustion panic on machines with > 2G of ram

* Feature: Streaming mirroring

27 files changed:
sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_blockmap.c
sys/vfs/hammer/hammer_btree.c
sys/vfs/hammer/hammer_btree.h
sys/vfs/hammer/hammer_cursor.c
sys/vfs/hammer/hammer_cursor.h
sys/vfs/hammer/hammer_disk.h
sys/vfs/hammer/hammer_flusher.c
sys/vfs/hammer/hammer_freemap.c
sys/vfs/hammer/hammer_inode.c
sys/vfs/hammer/hammer_io.c
sys/vfs/hammer/hammer_ioctl.c
sys/vfs/hammer/hammer_ioctl.h
sys/vfs/hammer/hammer_mirror.c
sys/vfs/hammer/hammer_mount.h
sys/vfs/hammer/hammer_object.c
sys/vfs/hammer/hammer_ondisk.c
sys/vfs/hammer/hammer_pfs.c
sys/vfs/hammer/hammer_prune.c
sys/vfs/hammer/hammer_reblock.c
sys/vfs/hammer/hammer_recover.c
sys/vfs/hammer/hammer_signal.c
sys/vfs/hammer/hammer_subs.c
sys/vfs/hammer/hammer_transaction.c
sys/vfs/hammer/hammer_undo.c
sys/vfs/hammer/hammer_vfsops.c
sys/vfs/hammer/hammer_vnops.c

index 88299b3..4b18b63 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.117.2.5 2008/07/30 07:53:01 mneumann Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.117.2.6 2008/08/02 21:24:27 dillon Exp $
  */
 /*
  * This header file contains structures used internally by the HAMMERFS
@@ -66,6 +66,7 @@
 #if defined(_KERNEL) || defined(_KERNEL_STRUCTURES)
 
 MALLOC_DECLARE(M_HAMMER);
+MALLOC_DECLARE(M_HAMMER_INO);
 
 /*
  * Kernel trace
@@ -322,7 +323,7 @@ typedef struct hammer_inode *hammer_inode_t;
 #define HAMMER_INODE_VHELD     0x0400  /* vnode held on sync */
 #define HAMMER_INODE_DONDISK   0x0800  /* data records may be on disk */
 #define HAMMER_INODE_BUFS      0x1000  /* dirty high level bps present */
-#define HAMMER_INODE_REFLUSH   0x2000  /* pipelined flush during flush */
+#define HAMMER_INODE_REFLUSH   0x2000  /* flush on dependancy / reflush */
 #define HAMMER_INODE_RECLAIM   0x4000  /* trying to reclaim */
 #define HAMMER_INODE_FLUSHW    0x8000  /* Someone waiting for flush */
 
@@ -397,6 +398,7 @@ struct hammer_record {
        struct hammer_btree_leaf_elm    leaf;
        union hammer_data_ondisk        *data;
        int                             flags;
+       hammer_off_t                    zone2_offset;   /* direct-write only */
 };
 
 typedef struct hammer_record *hammer_record_t;
@@ -415,6 +417,7 @@ typedef struct hammer_record *hammer_record_t;
 #define HAMMER_RECF_CONVERT_DELETE     0x0100  /* special case */
 #define HAMMER_RECF_DIRECT_IO          0x0200  /* related direct I/O running*/
 #define HAMMER_RECF_DIRECT_WAIT                0x0400  /* related direct I/O running*/
+#define HAMMER_RECF_DIRECT_INVAL       0x0800  /* buffer alias invalidation */
 
 /*
  * hammer_delete_at_cursor() flags
@@ -719,7 +722,9 @@ struct hammer_mount {
        int     error;                          /* critical I/O error */
        struct krate    krate;                  /* rate limited kprintf */
        hammer_tid_t    asof;                   /* snapshot mount */
-       hammer_off_t    next_tid;
+       hammer_tid_t    next_tid;
+       hammer_tid_t    flush_tid1;             /* flusher tid sequencing */
+       hammer_tid_t    flush_tid2;             /* flusher tid sequencing */
        int64_t copy_stat_freebigblocks;        /* number of free bigblocks */
 
        u_int32_t namekey_iterator;
@@ -843,6 +848,7 @@ int hammer_install_volume(hammer_mount_t hmp, const char *volname,
                        struct vnode *devvp);
 int    hammer_mountcheck_volumes(hammer_mount_t hmp);
 
+int    hammer_mem_add(hammer_record_t record);
 int    hammer_ip_lookup(hammer_cursor_t cursor);
 int    hammer_ip_first(hammer_cursor_t cursor);
 int    hammer_ip_next(hammer_cursor_t cursor);
@@ -1139,6 +1145,8 @@ int hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
                         struct hammer_ioc_pseudofs_rw *pfs);
 int hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
                         struct hammer_ioc_pseudofs_rw *pfs);
+int hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
+                        struct hammer_ioc_pseudofs_rw *pfs);
 
 int hammer_signal_check(hammer_mount_t hmp);
 
index 06ffd5d..e047081 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.24.2.2 2008/07/18 00:21:09 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.24.2.3 2008/08/02 21:24:27 dillon Exp $
  */
 
 /*
@@ -573,7 +573,11 @@ hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
                /*
                 * If we are releasing a zone and all of its reservations
                 * were undone we have to clean out all hammer and device
-                * buffers associated with the big block.
+                * buffers associated with the big block.  We do this
+                * primarily because the large-block may be reallocated
+                * from non-large-data to large-data or vise-versa, resulting
+                * in a different mix of 16K and 64K buffer cache buffers.
+                * XXX - this isn't fun and needs to be redone.
                 *
                 * Any direct allocations will cause this test to fail
                 * (bytes_freed will never reach append_off), which is
index 2b84711..98fe254 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.71.2.3 2008/07/19 18:46:20 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.71.2.4 2008/08/02 21:24:27 dillon Exp $
  */
 
 /*
@@ -704,8 +704,11 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags)
        KKASSERT(data_len >= 0 && data_len <= HAMMER_XBUFSIZE);
        cursor->data = hammer_bread_ext(hmp, data_off, data_len,
                                        &error, &cursor->data_buffer);
-       if (hammer_crc_test_leaf(cursor->data, &elm->leaf) == 0)
+       if (hammer_crc_test_leaf(cursor->data, &elm->leaf) == 0) {
+               kprintf("CRC DATA @ %016llx/%d FAILED\n",
+                       elm->leaf.data_offset, elm->leaf.data_len);
                Debugger("CRC FAILED: DATA");
+       }
        return(error);
 }
 
index 43fccb9..80bb84d 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.24 2008/06/26 04:06:22 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.24.2.1 2008/08/02 21:24:27 dillon Exp $
  */
 
 /*
index 5458bf9..1804607 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.41 2008/07/11 01:22:29 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.41.2.1 2008/08/02 21:24:27 dillon Exp $
  */
 
 /*
index 66f7e3d..4629401 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.25 2008/07/10 04:44:33 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.25.2.1 2008/08/02 21:24:27 dillon Exp $
  */
 
 struct hammer_cmirror;
index 62a1b9c..9a5e4f5 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.50.2.1 2008/07/19 18:46:20 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.50.2.2 2008/08/02 21:24:28 dillon Exp $
  */
 
 #ifndef VFS_HAMMER_DISK_H_
@@ -498,7 +498,7 @@ struct hammer_volume_ondisk {
        int64_t vol0_stat_inodes;       /* for statfs only */
        int64_t vol0_stat_records;      /* total records in filesystem */
        hammer_off_t vol0_btree_root;   /* B-Tree root */
-       hammer_tid_t vol0_next_tid;     /* highest synchronized TID */
+       hammer_tid_t vol0_next_tid;     /* highest partially synchronized TID */
        hammer_off_t vol0_unused03;
 
        /*
index 8363226..633ee7e 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.40.2.4 2008/07/19 04:51:09 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.40.2.5 2008/08/02 21:24:28 dillon Exp $
  */
 /*
  * HAMMER dependancy flusher thread
@@ -231,7 +231,13 @@ hammer_flusher_master_thread(void *arg)
                        break;
                while (hmp->flusher.signal == 0)
                        tsleep(&hmp->flusher.signal, 0, "hmrwwa", 0);
-               hmp->flusher.signal = 0;
+
+               /*
+                * Flush for each count on signal but only allow one extra
+                * flush request to build up.
+                */
+               if (--hmp->flusher.signal != 0)
+                       hmp->flusher.signal = 1;
        }
 
        /*
@@ -665,6 +671,13 @@ hammer_flusher_finalize(hammer_transaction_t trans, int final)
                hammer_modify_volume_done(root_volume);
        }
 
+       /*
+        * vol0_next_tid is used for TID selection and is updated without
+        * an UNDO so we do not reuse a TID that may have been rolled-back.
+        *
+        * vol0_last_tid is the highest fully-synchronized TID.  It is
+        * set-up when the UNDO fifo is fully synced, later on (not here).
+        */
        if (root_volume->io.modified) {
                hammer_modify_volume(NULL, root_volume, NULL, 0);
                if (root_volume->ondisk->vol0_next_tid < trans->tid)
@@ -722,6 +735,18 @@ hammer_flusher_finalize(hammer_transaction_t trans, int final)
                        hmp->hflags |= HMNT_UNDO_DIRTY;
                }
                hammer_clear_undo_history(hmp);
+
+               /*
+                * Flush tid sequencing.  flush_tid1 is fully synchronized,
+                * meaning a crash will not roll it back.  flush_tid2 has
+                * been written out asynchronously and a crash will roll
+                * it back.  flush_tid1 is used for all mirroring masters.
+                */
+               if (hmp->flush_tid1 != hmp->flush_tid2) {
+                       hmp->flush_tid1 = hmp->flush_tid2;
+                       wakeup(&hmp->flush_tid1);
+               }
+               hmp->flush_tid2 = trans->tid;
        }
 
        /*
@@ -738,6 +763,7 @@ failed:
 
 done:
        hammer_unlock(&hmp->flusher.finalize_lock);
+
        if (--hmp->flusher.finalize_want == 0)
                wakeup(&hmp->flusher.finalize_want);
        hammer_stats_commits += final;
index a971e47..ad40687 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_freemap.c,v 1.18 2008/06/20 05:38:26 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_freemap.c,v 1.18.2.1 2008/08/02 21:24:28 dillon Exp $
  */
 
 /*
index 8fa958b..69b2efc 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.103.2.2 2008/07/18 00:21:09 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.103.2.3 2008/08/02 21:24:28 dillon Exp $
  */
 
 #include "hammer.h"
@@ -378,7 +378,7 @@ loop:
        /*
         * Allocate a new inode structure and deal with races later.
         */
-       ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
+       ip = kmalloc(sizeof(*ip), M_HAMMER_INO, M_WAITOK|M_ZERO);
        ++hammer_count_inodes;
        ++hmp->count_inodes;
        ip->obj_id = obj_id;
@@ -511,7 +511,7 @@ hammer_create_inode(hammer_transaction_t trans, struct vattr *vap,
 
        hmp = trans->hmp;
 
-       ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
+       ip = kmalloc(sizeof(*ip), M_HAMMER_INO, M_WAITOK|M_ZERO);
        ++hammer_count_inodes;
        ++hmp->count_inodes;
 
@@ -672,7 +672,7 @@ hammer_free_inode(hammer_inode_t ip)
                hammer_rel_pseudofs(ip->hmp, ip->pfsm);
                ip->pfsm = NULL;
        }
-       kfree(ip, M_HAMMER);
+       kfree(ip, M_HAMMER_INO);
        ip = NULL;
 }
 
@@ -1360,8 +1360,9 @@ hammer_modify_inode(hammer_inode_t ip, int flags)
  * place the inode in a flushing state if it is currently idle and flag it
  * to reflush if it is currently flushing.
  *
- * If the HAMMER_FLUSH_SYNCHRONOUS flag is specified we will attempt to
- * flush the indoe synchronously using the caller's context.
+ * Upon return if the inode could not be flushed due to a setup
+ * dependancy, then it will be automatically flushed when the dependancy
+ * is satisfied.
  */
 void
 hammer_flush_inode(hammer_inode_t ip, int flags)
@@ -1440,10 +1441,14 @@ hammer_flush_inode(hammer_inode_t ip, int flags)
                        hammer_flush_inode_core(ip, flg, flags);
                } else {
                        /*
-                        * parent has no connectivity, tell it to flush
+                        * Parent has no connectivity, tell it to flush
                         * us as soon as it does.
+                        *
+                        * The REFLUSH flag is also needed to trigger
+                        * dependancy wakeups.
                         */
-                       ip->flags |= HAMMER_INODE_CONN_DOWN;
+                       ip->flags |= HAMMER_INODE_CONN_DOWN |
+                                    HAMMER_INODE_REFLUSH;
                        if (flags & HAMMER_FLUSH_SIGNAL) {
                                ip->flags |= HAMMER_INODE_RESIGNAL;
                                hammer_flusher_async(ip->hmp, flg);
@@ -1454,6 +1459,9 @@ hammer_flush_inode(hammer_inode_t ip, int flags)
                /*
                 * We are already flushing, flag the inode to reflush
                 * if needed after it completes its current flush.
+                *
+                * The REFLUSH flag is also needed to trigger
+                * dependancy wakeups.
                 */
                if ((ip->flags & HAMMER_INODE_REFLUSH) == 0)
                        ip->flags |= HAMMER_INODE_REFLUSH;
@@ -1706,17 +1714,22 @@ hammer_flush_inode_core(hammer_inode_t ip, hammer_flush_group_t flg, int flags)
         */
        if (go_count == 0) {
                if ((ip->flags & HAMMER_INODE_MODMASK_NOXDIRTY) == 0) {
-                       ip->flags |= HAMMER_INODE_REFLUSH;
-
                        --ip->hmp->count_iqueued;
                        --hammer_count_iqueued;
 
+                       --flg->total_count;
                        ip->flush_state = HAMMER_FST_SETUP;
                        ip->flush_group = NULL;
                        if (ip->flags & HAMMER_INODE_VHELD) {
                                ip->flags &= ~HAMMER_INODE_VHELD;
                                vrele(ip->vp);
                        }
+
+                       /*
+                        * REFLUSH is needed to trigger dependancy wakeups
+                        * when an inode is in SETUP.
+                        */
+                       ip->flags |= HAMMER_INODE_REFLUSH;
                        if (flags & HAMMER_FLUSH_SIGNAL) {
                                ip->flags |= HAMMER_INODE_RESIGNAL;
                                hammer_flusher_async(ip->hmp, flg);
@@ -1909,8 +1922,8 @@ hammer_setup_child_callback(hammer_record_t rec, void *data)
                         * flush groups before it can be completely
                         * flushed.
                         */
-                       ip->flags |= HAMMER_INODE_REFLUSH;
-                       ip->flags |= HAMMER_INODE_RESIGNAL;
+                       ip->flags |= HAMMER_INODE_RESIGNAL |
+                                    HAMMER_INODE_REFLUSH;
                        r = -1;
                } else if (rec->type == HAMMER_MEM_RECORD_ADD) {
                        /*
@@ -1994,13 +2007,14 @@ hammer_wait_inode(hammer_inode_t ip)
 
        flg = NULL;
        if ((ip->hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR) == 0) {
-               if (ip->flush_state == HAMMER_FST_SETUP) {
-                       hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
-               }
                while (ip->flush_state != HAMMER_FST_IDLE &&
                       (ip->hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR) == 0) {
-                       ip->flags |= HAMMER_INODE_FLUSHW;
-                       tsleep(&ip->flags, 0, "hmrwin", 0);
+                       if (ip->flush_state == HAMMER_FST_SETUP)
+                               hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
+                       if (ip->flush_state != HAMMER_FST_IDLE) {
+                               ip->flags |= HAMMER_INODE_FLUSHW;
+                               tsleep(&ip->flags, 0, "hmrwin", 0);
+                       }
                }
        }
 }
index 04ac624..6ca02b9 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.49.2.2 2008/07/18 00:21:09 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.49.2.3 2008/08/02 21:24:28 dillon Exp $
  */
 /*
  * IO Primitives and buffer cache management
@@ -248,7 +248,11 @@ hammer_io_new(struct vnode *devvp, struct hammer_io *io)
 
 /*
  * Remove potential device level aliases against buffers managed by high level
- * vnodes.
+ * vnodes.  Aliases can also be created due to mixed buffer sizes.
+ *
+ * This is nasty because the buffers are also VMIO-backed.  Even if a buffer
+ * does not exist its backing VM pages might, and we have to invalidate
+ * those as well or a getblk() will reinstate them.
  */
 void
 hammer_io_inval(hammer_volume_t volume, hammer_off_t zone2_offset)
@@ -260,20 +264,21 @@ hammer_io_inval(hammer_volume_t volume, hammer_off_t zone2_offset)
        phys_offset = volume->ondisk->vol_buf_beg +
                      (zone2_offset & HAMMER_OFF_SHORT_MASK);
        crit_enter();
-       if ((bp = findblk(volume->devvp, phys_offset)) != NULL) {
+       if ((bp = findblk(volume->devvp, phys_offset)) != NULL)
                bp = getblk(volume->devvp, phys_offset, bp->b_bufsize, 0, 0);
-               if ((iou = (void *)LIST_FIRST(&bp->b_dep)) != NULL) {
-                       hammer_io_clear_modify(&iou->io, 1);
-                       bundirty(bp);
-                       iou->io.reclaim = 1;
-                       hammer_io_deallocate(bp);
-               } else {
-                       KKASSERT((bp->b_flags & B_LOCKED) == 0);
-                       bundirty(bp);
-                       bp->b_flags |= B_NOCACHE|B_RELBUF;
-               }
-               brelse(bp);
+       else
+               bp = getblk(volume->devvp, phys_offset, HAMMER_BUFSIZE, 0, 0);
+       if ((iou = (void *)LIST_FIRST(&bp->b_dep)) != NULL) {
+               hammer_io_clear_modify(&iou->io, 1);
+               bundirty(bp);
+               iou->io.reclaim = 1;
+               hammer_io_deallocate(bp);
+       } else {
+               KKASSERT((bp->b_flags & B_LOCKED) == 0);
+               bundirty(bp);
+               bp->b_flags |= B_NOCACHE|B_RELBUF;
        }
+       brelse(bp);
        crit_exit();
 }
 
@@ -995,9 +1000,6 @@ struct bio_ops hammer_bioops = {
  * disk media.  The bio may be issued asynchronously.  If leaf is non-NULL
  * we validate the CRC.
  *
- * A second-level bio already resolved to a zone-2 offset (typically by
- * the BMAP code, or by a previous hammer_io_direct_write()), is passed. 
- *
  * We must check for the presence of a HAMMER buffer to handle the case
  * where the reblocker has rewritten the data (which it does via the HAMMER
  * buffer system, not via the high-level vnode buffer cache), but not yet
@@ -1048,11 +1050,12 @@ hammer_io_direct_read(hammer_mount_t hmp, struct bio *bio,
                error = EIO;
 
        if (error == 0) {
-               zone2_offset &= HAMMER_OFF_SHORT_MASK;
-
+               /*
+                * 3rd level bio
+                */
                nbio = push_bio(bio);
                nbio->bio_offset = volume->ondisk->vol_buf_beg +
-                                  zone2_offset;
+                                  (zone2_offset & HAMMER_OFF_SHORT_MASK);
 #if 0
                /*
                 * XXX disabled - our CRC check doesn't work if the OS
@@ -1110,7 +1113,7 @@ hammer_io_direct_read_complete(struct bio *nbio)
  * disk media.  The bio may be issued asynchronously.
  *
  * The BIO is associated with the specified record and RECF_DIRECT_IO
- * is set.
+ * is set.  The recorded is added to its object.
  */
 int
 hammer_io_direct_write(hammer_mount_t hmp, hammer_record_t record,
@@ -1148,8 +1151,10 @@ hammer_io_direct_write(hammer_mount_t hmp, hammer_record_t record,
                if (error == 0) {
                        bp = bio->bio_buf;
                        KKASSERT((bp->b_bufsize & HAMMER_BUFMASK) == 0);
+                       /*
                        hammer_del_buffers(hmp, buf_offset,
                                           zone2_offset, bp->b_bufsize);
+                       */
 
                        /*
                         * Second level bio - cached zone2 offset.
@@ -1161,7 +1166,9 @@ hammer_io_direct_write(hammer_mount_t hmp, hammer_record_t record,
                        nbio->bio_offset = zone2_offset;
                        nbio->bio_done = hammer_io_direct_write_complete;
                        nbio->bio_caller_info1.ptr = record;
-                       record->flags |= HAMMER_RECF_DIRECT_IO;
+                       record->zone2_offset = zone2_offset;
+                       record->flags |= HAMMER_RECF_DIRECT_IO |
+                                        HAMMER_RECF_DIRECT_INVAL;
 
                        /*
                         * Third level bio - raw offset specific to the
@@ -1195,7 +1202,17 @@ hammer_io_direct_write(hammer_mount_t hmp, hammer_record_t record,
                        biodone(bio);
                }
        }
-       if (error) {
+       if (error == 0) {
+               /*
+                * The record is all setup now, add it.  Potential conflics
+                * have already been dealt with.
+                */
+               error = hammer_mem_add(record);
+               KKASSERT(error == 0);
+       } else {
+               /*
+                * Major suckage occured.
+                */
                kprintf("hammer_direct_write: failed @ %016llx\n",
                        leaf->data_offset);
                bp = bio->bio_buf;
@@ -1203,6 +1220,8 @@ hammer_io_direct_write(hammer_mount_t hmp, hammer_record_t record,
                bp->b_error = EIO;
                bp->b_flags |= B_ERROR;
                biodone(bio);
+               record->flags |= HAMMER_RECF_DELETED_FE;
+               hammer_rel_mem_record(record);
        }
        return(error);
 }
@@ -1220,17 +1239,21 @@ void
 hammer_io_direct_write_complete(struct bio *nbio)
 {
        struct bio *obio;
+       struct buf *bp;
        hammer_record_t record = nbio->bio_caller_info1.ptr;
 
+       bp = nbio->bio_buf;
        obio = pop_bio(nbio);
-       if (obio->bio_buf->b_flags & B_ERROR) {
+       if (bp->b_flags & B_ERROR) {
                hammer_critical_error(record->ip->hmp, record->ip,
-                                     obio->bio_buf->b_error,
+                                     bp->b_error,
                                      "while writing bulk data");
-               obio->bio_buf->b_flags |= B_INVAL;
+               bp->b_flags |= B_INVAL;
        }
        biodone(obio);
-       KKASSERT(record != NULL && (record->flags & HAMMER_RECF_DIRECT_IO));
+
+       KKASSERT(record != NULL);
+       KKASSERT(record->flags & HAMMER_RECF_DIRECT_IO);
        record->flags &= ~HAMMER_RECF_DIRECT_IO;
        if (record->flags & HAMMER_RECF_DIRECT_WAIT) {
                record->flags &= ~HAMMER_RECF_DIRECT_WAIT;
@@ -1241,22 +1264,40 @@ hammer_io_direct_write_complete(struct bio *nbio)
 
 /*
  * This is called before a record is either committed to the B-Tree
- * or destroyed, to resolve any associated direct-IO.  We must
- * ensure that the data is available on-media to other consumers
- * such as the reblocker or mirroring code.
+ * or destroyed, to resolve any associated direct-IO. 
  *
- * Note that other consumers might access the data via the block
- * device's buffer cache and not the high level vnode's buffer cache.
+ * (1) We must wait for any direct-IO related to the record to complete.
+ *
+ * (2) We must remove any buffer cache aliases for data accessed via
+ *     leaf->data_offset or zone2_offset so non-direct-IO consumers  
+ *     (the mirroring and reblocking code) do not see stale data.
  */
 void
 hammer_io_direct_wait(hammer_record_t record)
 {
-       crit_enter();
-       while (record->flags & HAMMER_RECF_DIRECT_IO) {
-               record->flags |= HAMMER_RECF_DIRECT_WAIT;
-               tsleep(&record->flags, 0, "hmdiow", 0);
+       /*
+        * Wait for I/O to complete
+        */
+       if (record->flags & HAMMER_RECF_DIRECT_IO) {
+               crit_enter();
+               while (record->flags & HAMMER_RECF_DIRECT_IO) {
+                       record->flags |= HAMMER_RECF_DIRECT_WAIT;
+                       tsleep(&record->flags, 0, "hmdiow", 0);
+               }
+               crit_exit();
+       }
+
+       /*
+        * Invalidate any related buffer cache aliases.
+        */
+       if (record->flags & HAMMER_RECF_DIRECT_INVAL) {
+               KKASSERT(record->leaf.data_offset);
+               hammer_del_buffers(record->ip->hmp,
+                                  record->leaf.data_offset,
+                                  record->zone2_offset,
+                                  record->leaf.data_len);
+               record->flags &= ~HAMMER_RECF_DIRECT_INVAL;
        }
-       crit_exit();
 }
 
 /*
index 17da8d1..f8ed40c 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.28.2.1 2008/07/16 18:39:31 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.28.2.2 2008/08/02 21:24:28 dillon Exp $
  */
 
 #include "hammer.h"
@@ -101,6 +101,12 @@ hammer_ioctl(hammer_inode_t ip, u_long com, caddr_t data, int fflag,
                                    (struct hammer_ioc_pseudofs_rw *)data);
                }
                break;
+       case HAMMERIOC_WAI_PSEUDOFS:
+               if (error == 0) {
+                       error = hammer_ioc_wait_pseudofs(&trans, ip,
+                                   (struct hammer_ioc_pseudofs_rw *)data);
+               }
+               break;
        case HAMMERIOC_MIRROR_READ:
                if (error == 0) {
                        error = hammer_ioc_mirror_read(&trans, ip,
index 2c31b8e..29330df 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.h,v 1.21 2008/07/12 23:04:50 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.h,v 1.21.2.1 2008/08/02 21:24:28 dillon Exp $
  */
 /*
  * HAMMER ioctl's.  This file can be #included from userland
@@ -300,6 +300,7 @@ typedef union hammer_ioc_mrecord_any *hammer_ioc_mrecord_any_t;
 #define HAMMER_MREC_TYPE_SKIP          5       /* skip-range */
 #define HAMMER_MREC_TYPE_PASS          6       /* record for cmp only (pass) */
 #define HAMMER_MREC_TYPE_TERM          7       /* (userland only) */
+#define HAMMER_MREC_TYPE_IDLE          8       /* (userland only) */
 
 #define HAMMER_MREC_CRCOFF     (offsetof(struct hammer_ioc_mrecord_head, rec_size))
 #define HAMMER_MREC_HEADSIZE   sizeof(struct hammer_ioc_mrecord_head)
@@ -322,6 +323,7 @@ typedef union hammer_ioc_mrecord_any *hammer_ioc_mrecord_any_t;
 #define HAMMERIOC_UPG_PSEUDOFS _IOWR('h',9,struct hammer_ioc_pseudofs_rw)
 #define HAMMERIOC_DGD_PSEUDOFS _IOWR('h',10,struct hammer_ioc_pseudofs_rw)
 #define HAMMERIOC_RMR_PSEUDOFS _IOWR('h',11,struct hammer_ioc_pseudofs_rw)
+#define HAMMERIOC_WAI_PSEUDOFS _IOWR('h',12,struct hammer_ioc_pseudofs_rw)
 
 #endif
 
index 34c63c3..6da1c01 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_mirror.c,v 1.15 2008/07/13 01:12:41 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_mirror.c,v 1.15.2.1 2008/08/02 21:24:28 dillon Exp $
  */
 /*
  * HAMMER mirroring ioctls - serialize and deserialize modifications made
@@ -186,10 +186,21 @@ retry:
                elm = &cursor.node->ondisk->elms[cursor.index].leaf;
                mirror->key_cur = elm->base;
 
-               if ((elm->base.create_tid < mirror->tid_beg ||
-                   elm->base.create_tid > mirror->tid_end) &&
-                   (elm->base.delete_tid < mirror->tid_beg ||
-                   elm->base.delete_tid > mirror->tid_end)) {
+               /*
+                * Determine if we should generate a PASS or a REC.  PASS
+                * records are records without any data payload.  Such
+                * records will be generated if the target is already expected
+                * to have the record, allowing it to delete the gaps.
+                *
+                * A PASS record is also used to perform deletions on the
+                * target.
+                *
+                * Such deletions are needed if the master or files on the
+                * master are no-history, or if the slave is so far behind
+                * the master has already been pruned.
+                */
+               if (elm->base.create_tid < mirror->tid_beg ||
+                   elm->base.create_tid > mirror->tid_end) {
                        bytes = sizeof(mrec.rec);
                        if (mirror->count + HAMMER_HEAD_DOALIGN(bytes) >
                            mirror->size) {
@@ -197,15 +208,7 @@ retry:
                        }
 
                        /*
-                        * Fill mrec.  PASS records are records which are
-                        * outside the TID range needed for the mirror
-                        * update.  They are sent without any data payload
-                        * because the mirroring target must still compare
-                        * records that fall outside the SKIP ranges to
-                        * determine what might need to be deleted.  Such
-                        * deletions are needed if the master or files on
-                        * the master are no-history, or if the slave is
-                        * so far behind the master has already been pruned.
+                        * Fill mrec.
                         */
                        mrec.head.signature = HAMMER_IOC_MIRROR_SIGNATURE;
                        mrec.head.type = HAMMER_MREC_TYPE_PASS;
@@ -246,7 +249,7 @@ retry:
                mrec.head.type = HAMMER_MREC_TYPE_REC;
                mrec.head.rec_size = bytes;
                mrec.rec.leaf = *elm;
-               if (elm->base.delete_tid >= mirror->tid_end)
+               if (elm->base.delete_tid > mirror->tid_end)
                        mrec.rec.leaf.base.delete_tid = 0;
                rec_crc = crc32(&mrec.head.rec_size,
                                sizeof(mrec.rec) - crc_start);
@@ -561,10 +564,12 @@ hammer_ioc_mirror_write_rec(hammer_cursor_t cursor,
         *
         * If the record exists only the delete_tid may be updated.
         *
-        * If the record does not exist we create it.  For now we
-        * ignore records with a non-zero delete_tid.  Note that
-        * mirror operations are effective an as-of operation and
-        * delete_tid can be 0 for mirroring purposes even if it is
+        * If the record does not exist we can create it only if the
+        * create_tid is not too old.  If the create_tid is too old
+        * it may have already been destroyed on the slave from pruning.
+        *
+        * Note that mirror operations are effectively as-of operations
+        * and delete_tid can be 0 for mirroring purposes even if it is
         * not actually 0 at the originator.
         *
         * These functions can return EDEADLK
@@ -576,10 +581,11 @@ hammer_ioc_mirror_write_rec(hammer_cursor_t cursor,
 
        if (error == 0 && hammer_mirror_check(cursor, mrec)) {
                error = hammer_mirror_update(cursor, mrec);
-       } else if (error == ENOENT && mrec->leaf.base.delete_tid == 0) {
-               error = hammer_mirror_write(cursor, mrec, uptr);
        } else if (error == ENOENT) {
-               error = 0;
+               if (mrec->leaf.base.create_tid >= mirror->tid_beg)
+                       error = hammer_mirror_write(cursor, mrec, uptr);
+               else
+                       error = 0;
        }
        if (error == 0 || error == EALREADY)
                mirror->key_cur = mrec->leaf.base;
@@ -630,7 +636,9 @@ hammer_ioc_mirror_write_pass(hammer_cursor_t cursor,
        error = hammer_mirror_delete_to(cursor, mirror);
 
        /*
-        * Locate the record and get past it by setting ATEDISK.
+        * Locate the record and get past it by setting ATEDISK.  Perform
+        * any necessary deletions.  We have no data payload and cannot
+        * create a new record.
         */
        if (error == 0) {
                mirror->key_cur = mrec->leaf.base;
@@ -638,10 +646,13 @@ hammer_ioc_mirror_write_pass(hammer_cursor_t cursor,
                cursor->flags |= HAMMER_CURSOR_BACKEND;
                cursor->flags &= ~HAMMER_CURSOR_INSERT;
                error = hammer_btree_lookup(cursor);
-               if (error == 0)
+               if (error == 0) {
+                       if (hammer_mirror_check(cursor, mrec))
+                               error = hammer_mirror_update(cursor, mrec);
                        cursor->flags |= HAMMER_CURSOR_ATEDISK;
-               else
+               } else {
                        cursor->flags &= ~HAMMER_CURSOR_ATEDISK;
+               }
                if (error == ENOENT)
                        error = 0;
        }
@@ -668,14 +679,13 @@ hammer_mirror_delete_to(hammer_cursor_t cursor,
        while (error == 0) {
                elm = &cursor->node->ondisk->elms[cursor->index].leaf;
                KKASSERT(elm->base.btype == HAMMER_BTREE_TYPE_RECORD);
+               cursor->flags |= HAMMER_CURSOR_ATEDISK;
                if (elm->base.delete_tid == 0) {
                        error = hammer_delete_at_cursor(cursor,
                                                        HAMMER_DELETE_ADJUST,
                                                        mirror->tid_end,
                                                        time_second,
                                                        1, NULL);
-                       if (error == 0)
-                               cursor->flags |= HAMMER_CURSOR_ATEDISK;
                }
                if (error == 0)
                        error = hammer_btree_iterate(cursor);
index 24732e1..1d03143 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_mount.h,v 1.9.2.1 2008/07/19 18:46:20 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_mount.h,v 1.9.2.2 2008/08/02 21:24:28 dillon Exp $
  */
 
 #ifndef _SYS_TYPES_H_
index 26b43e7..144bb29 100644 (file)
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.90.2.2 2008/07/19 04:51:09 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.90.2.3 2008/08/02 21:24:28 dillon Exp $
  */
 
 #include "hammer.h"
 
-static int hammer_mem_add(hammer_record_t record);
 static int hammer_mem_lookup(hammer_cursor_t cursor);
 static int hammer_mem_first(hammer_cursor_t cursor);
 static int hammer_frontend_trunc_callback(hammer_record_t record,
@@ -399,10 +398,13 @@ hammer_rel_mem_record(struct hammer_record *record)
 
                        /*
                         * We must wait for any direct-IO to complete before
-                        * we can destroy the record.
+                        * we can destroy the record because the bio may
+                        * have a reference to it.
                         */
-                       if (record->flags & HAMMER_RECF_DIRECT_IO)
+                       if (record->flags & 
+                          (HAMMER_RECF_DIRECT_IO | HAMMER_RECF_DIRECT_INVAL)) {
                                hammer_io_direct_wait(record);
+                       }
 
 
                        /*
@@ -668,10 +670,16 @@ hammer_ip_add_directory(struct hammer_transaction *trans,
        /*
         * The inode now has a dependancy and must be taken out of the idle
         * state.  An inode not in an idle state is given an extra reference.
+        *
+        * When transitioning to a SETUP state flag for an automatic reflush
+        * when the dependancies are disposed of if someone is waiting on
+        * the inode.
         */
        if (ip->flush_state == HAMMER_FST_IDLE) {
                hammer_ref(&ip->lock);
                ip->flush_state = HAMMER_FST_SETUP;
+               if (ip->flags & HAMMER_INODE_FLUSHW)
+                       ip->flags |= HAMMER_INODE_REFLUSH;
        }
        error = hammer_mem_add(record);
        if (error == 0) {
@@ -742,10 +750,16 @@ hammer_ip_del_directory(struct hammer_transaction *trans,
                 * The inode now has a dependancy and must be taken out of
                 * the idle state.  An inode not in an idle state is given
                 * an extra reference.
+                *
+                * When transitioning to a SETUP state flag for an automatic
+                * reflush when the dependancies are disposed of if someone
+                * is waiting on the inode.
                 */
                if (ip->flush_state == HAMMER_FST_IDLE) {
                        hammer_ref(&ip->lock);
                        ip->flush_state = HAMMER_FST_SETUP;
+                       if (ip->flags & HAMMER_INODE_FLUSHW)
+                               ip->flags |= HAMMER_INODE_REFLUSH;
                }
 
                error = hammer_mem_add(record);
@@ -843,6 +857,8 @@ hammer_ip_get_bulk(hammer_inode_t ip, off_t file_offset, int bytes)
  * flush a buffer cache buffer.  The frontend has locked the related buffer
  * cache buffers and we should be able to manipulate any overlapping
  * in-memory records.
+ *
+ * The caller is responsible for adding the returned record.
  */
 hammer_record_t
 hammer_ip_add_bulk(hammer_inode_t ip, off_t file_offset, void *data, int bytes,
@@ -851,7 +867,6 @@ hammer_ip_add_bulk(hammer_inode_t ip, off_t file_offset, void *data, int bytes,
        hammer_record_t record;
        hammer_record_t conflict;
        int zone;
-       int flags;
 
        /*
         * Deal with conflicting in-memory records.  We cannot have multiple
@@ -903,30 +918,8 @@ hammer_ip_add_bulk(hammer_inode_t ip, off_t file_offset, void *data, int bytes,
                                         HAMMER_LOCALIZE_MISC;
        record->leaf.data_len = bytes;
        hammer_crc_set_leaf(data, &record->leaf);
-       flags = record->flags;
-
-       hammer_ref(&record->lock);      /* mem_add eats a reference */
-       *errorp = hammer_mem_add(record);
-       if (*errorp) {
-               conflict = hammer_ip_get_bulk(ip, file_offset, bytes);
-               kprintf("hammer_ip_add_bulk: error %d conflict %p file_offset %lld bytes %d\n",
-                       *errorp, conflict, file_offset, bytes);
-               if (conflict)
-                       kprintf("conflict %lld %d\n", conflict->leaf.base.key, conflict->leaf.data_len);
-               if (conflict)
-                       hammer_rel_mem_record(conflict);
-       }
        KKASSERT(*errorp == 0);
-       conflict = hammer_ip_get_bulk(ip, file_offset, bytes);
-       if (conflict != record) {
-               kprintf("conflict mismatch %p %p %08x\n", conflict, record, record->flags);
-               if (conflict)
-                   kprintf("conflict mismatch %lld/%d %lld/%d\n", conflict->leaf.base.key, conflict->leaf.data_len, record->leaf.base.key, record->leaf.data_len);
-       }
-       KKASSERT(conflict == record);
-       hammer_rel_mem_record(conflict);
-
-       return (record);
+       return(record);
 }
 
 /*
@@ -1018,6 +1011,13 @@ hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record)
        KKASSERT(record->leaf.base.localization != 0);
 
        /*
+        * Any direct-write related to the record must complete before we
+        * can sync the record to the on-disk media.
+        */
+       if (record->flags & (HAMMER_RECF_DIRECT_IO | HAMMER_RECF_DIRECT_INVAL))
+               hammer_io_direct_wait(record);
+
+       /*
         * If this is a bulk-data record placemarker there may be an existing
         * record on-disk, indicating a data overwrite.  If there is the
         * on-disk record must be deleted before we can insert our new record.
@@ -1164,13 +1164,6 @@ hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record)
                record->leaf.data_crc = 0;
        }
 
-       /*
-        * If the record's data was direct-written we cannot insert
-        * it until the direct-IO has completed.
-        */
-       if (record->flags & HAMMER_RECF_DIRECT_IO)
-               hammer_io_direct_wait(record);
-
        error = hammer_btree_insert(cursor, &record->leaf, &doprop);
        if (hammer_debug_inode && error)
                kprintf("BTREE INSERT error %d @ %016llx:%d key %016llx\n", error, cursor->node->node_offset, cursor->index, record->leaf.base.key);
@@ -1224,7 +1217,6 @@ done:
  * A copy of the temporary record->data pointer provided by the caller
  * will be made.
  */
-static
 int
 hammer_mem_add(hammer_record_t record)
 {
@@ -1925,7 +1917,6 @@ hammer_ip_delete_record(hammer_cursor_t cursor, hammer_inode_t ip,
                        hammer_tid_t tid)
 {
        hammer_record_t iprec;
-       hammer_btree_elm_t elm;
        hammer_mount_t hmp;
        int error;
 
@@ -1960,7 +1951,6 @@ hammer_ip_delete_record(hammer_cursor_t cursor, hammer_inode_t ip,
         * hammer_delete_at_cursor() not to.
         */
        error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_LEAF);
-       elm = NULL;
 
        if (error == 0) {
                error = hammer_delete_at_cursor(
index b1a1fc9..de20a32 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.69.2.3 2008/07/30 07:53:01 mneumann Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.69.2.4 2008/08/02 21:24:28 dillon Exp $
  */
 /*
  * Manage HAMMER's on-disk structures.  These routines are primarily
@@ -526,7 +526,7 @@ again:
                hammer_ref(&buffer->io.lock);
 
                /*
-                * Onced refed the ondisk field will not be cleared by
+                * Once refed the ondisk field will not be cleared by
                 * any other action.
                 */
                if (buffer->ondisk && buffer->io.loading == 0) {
@@ -1478,6 +1478,7 @@ hammer_sync_hmp(hammer_mount_t hmp, int waitfor)
                 hammer_flusher_sync(hmp);
        } else {
                 hammer_flusher_async(hmp, NULL);
+                hammer_flusher_async(hmp, NULL);
        }
        return(info.error);
 }
index 920c507..e5ba329 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_pfs.c,v 1.1.2.3 2008/07/19 18:46:20 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_pfs.c,v 1.1.2.4 2008/08/02 21:24:28 dillon Exp $
  */
 /*
  * HAMMER PFS ioctls - Manage pseudo-fs configurations
@@ -76,9 +76,13 @@ hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
         * If the PFS is a master the sync tid is set by normal operation
         * rather then the mirroring code, and will always track the
         * real HAMMER filesystem.
+        *
+        * We use flush_tid1, which is the highest fully committed TID.
+        * flush_tid2 is the TID most recently flushed, but the UNDO hasn't
+        * caught up to it yet so a crash will roll us back to flush_tid1.
         */
        if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0)
-               pfsm->pfsd.sync_end_tid = trans->rootvol->ondisk->vol0_next_tid;
+               pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1;
 
        /*
         * Copy out to userland.
@@ -126,6 +130,11 @@ hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
                        error = hammer_mkroot_pseudofs(trans, cred, pfsm);
                if (error == 0)
                        error = hammer_save_pseudofs(trans, pfsm);
+
+               /*
+                * Wakeup anyone waiting for a TID update for this PFS
+                */
+               wakeup(&pfsm->pfsd.sync_end_tid);
                hammer_rel_pseudofs(trans->hmp, pfsm);
        }
        return(error);
@@ -256,6 +265,48 @@ hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
 }
 
 /*
+ * Wait for the PFS to sync past the specified TID
+ */
+int
+hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
+                        struct hammer_ioc_pseudofs_rw *pfs)
+{
+       hammer_pseudofs_inmem_t pfsm;
+       struct hammer_pseudofs_data pfsd;
+       u_int32_t localization;
+       hammer_tid_t tid;
+       void *waitp;
+       int error;
+
+       if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
+               return(error);
+       localization = (u_int32_t)pfs->pfs_id << 16;
+
+       if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0)
+               return(error);
+
+       pfsm = hammer_load_pseudofs(trans, localization, &error);
+       if (error == 0) {
+               if (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) {
+                       tid = pfsm->pfsd.sync_end_tid;
+                       waitp = &pfsm->pfsd.sync_end_tid;
+               } else {
+                       tid = trans->hmp->flush_tid1;
+                       waitp = &trans->hmp->flush_tid1;
+               }
+               if (tid <= pfsd.sync_end_tid)
+                       tsleep(waitp, PCATCH, "hmrmwt", 0);
+       }
+       hammer_rel_pseudofs(trans->hmp, pfsm);
+       if (error == EINTR) {
+               pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
+               error = 0;
+       }
+       return(error);
+}
+
+
+/*
  * Auto-detect the pseudofs and do basic bounds checking.
  */
 static
index 0c807a5..ec56d5d 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_prune.c,v 1.18 2008/07/14 03:20:49 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_prune.c,v 1.18.2.1 2008/08/02 21:24:28 dillon Exp $
  */
 
 #include "hammer.h"
index ff677a4..94e80c0 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.32.2.1 2008/07/16 18:39:32 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.32.2.2 2008/08/02 21:24:28 dillon Exp $
  */
 /*
  * HAMMER reblocker - This code frees up fragmented physical space
index d1d9980..6a1cffe 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.28.2.1 2008/07/26 05:37:20 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.28.2.2 2008/08/02 21:24:28 dillon Exp $
  */
 
 #include "hammer.h"
index c0ad1bf..95a5dcf 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_signal.c,v 1.1 2008/03/20 06:08:40 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_signal.c,v 1.1.2.1 2008/08/02 21:24:28 dillon Exp $
  */
 /*
  * Check for interruption when doing a long ioctl operation.
index 573c8e7..dd6c4ab 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.34 2008/07/11 01:22:29 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.34.2.1 2008/08/02 21:24:28 dillon Exp $
  */
 /*
  * HAMMER structural locking
index 3a8d2c7..5c722af 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_transaction.c,v 1.22.2.2 2008/07/19 18:46:20 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_transaction.c,v 1.22.2.3 2008/08/02 21:24:28 dillon Exp $
  */
 
 #include "hammer.h"
index 6dcc704..6514696 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_undo.c,v 1.18.2.2 2008/07/18 00:21:09 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_undo.c,v 1.18.2.3 2008/08/02 21:24:28 dillon Exp $
  */
 
 /*
index 6ed236c..406e4b0 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.63.2.5 2008/07/30 07:53:01 mneumann Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.63.2.6 2008/08/02 21:24:28 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -248,7 +248,8 @@ static struct vfsops hammer_vfsops = {
        .vfs_checkexp   = hammer_vfs_checkexp
 };
 
-MALLOC_DEFINE(M_HAMMER, "hammer-mount", "hammer mount");
+MALLOC_DEFINE(M_HAMMER, "hammer-general", "hammer general");
+MALLOC_DEFINE(M_HAMMER_INO, "hammer-inodes", "hammer inodes");
 
 VFS_SET(hammer_vfsops, hammer, 0);
 MODULE_VERSION(hammer, 1);
@@ -582,6 +583,8 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data,
         * on-disk first_offset represents the LAST flush cycle.
         */
        hmp->next_tid = rootvol->ondisk->vol0_next_tid;
+       hmp->flush_tid1 = hmp->next_tid;
+       hmp->flush_tid2 = hmp->next_tid;
        bcopy(rootvol->ondisk->vol0_blockmap, hmp->blockmap,
              sizeof(hmp->blockmap));
        hmp->copy_stat_freebigblocks = rootvol->ondisk->vol0_stat_freebigblocks;
@@ -871,8 +874,6 @@ hammer_vfs_sync(struct mount *mp, int waitfor)
 
        if (panicstr == NULL) {
                error = hammer_sync_hmp(hmp, waitfor);
-               if (error == 0)
-                       error = hammer_sync_hmp(hmp, waitfor);
        } else {
                error = EIO;
        }
index b669d89..9c2dfda 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.91.2.2 2008/07/19 04:51:09 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.91.2.3 2008/08/02 21:24:28 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -2610,7 +2610,6 @@ hammer_vop_strategy_write(struct vop_strategy_args *ap)
                                    bytes, &error);
        if (record) {
                hammer_io_direct_write(hmp, record, bio);
-               hammer_rel_mem_record(record);
                if (ip->rsv_recs > 1 && hmp->rsv_recs > hammer_limit_recs)
                        hammer_flush_inode(ip, 0);
        } else {