MFC HAMMER 2.0:02 - rmdir, stability
authorMatthew Dillon <dillon@dragonflybsd.org>
Sun, 10 Aug 2008 17:01:09 +0000 (10 17:01 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Sun, 10 Aug 2008 17:01:09 +0000 (10 17:01 +0000)
sys/vfs/hammer/hammer_inode.c
sys/vfs/hammer/hammer_object.c
sys/vfs/hammer/hammer_vnops.c

index fac61e8..cbded3a 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.103.2.4 2008/08/06 15:41:56 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.103.2.5 2008/08/10 17:01:08 dillon Exp $
  */
 
 #include "hammer.h"
@@ -1606,9 +1606,9 @@ hammer_setup_parent_inodes_helper(hammer_record_t record,
 #endif
        if (pip->flush_group == flg) {
                /*
-                * If the parent is in the same flush group as us we can
-                * just set the record to a flushing state and we are
-                * done.
+                * Because we have not calculated nlinks yet we can just
+                * set records to the flush state if the parent is in
+                * the same flush group as we are.
                 */
                record->flush_state = HAMMER_FST_FLUSH;
                record->flush_group = flg;
@@ -1674,12 +1674,14 @@ hammer_flush_inode_core(hammer_inode_t ip, hammer_flush_group_t flg, int flags)
         * Figure out how many in-memory records we can actually flush
         * (not including inode meta-data, buffers, etc).
         */
+       KKASSERT((ip->flags & HAMMER_INODE_WOULDBLOCK) == 0);
        if (flags & HAMMER_FLUSH_RECURSION) {
                /*
                 * If this is a upwards recursion we do not want to
                 * recurse down again!
                 */
                go_count = 1;
+#if 0
        } else if (ip->flags & HAMMER_INODE_WOULDBLOCK) {
                /*
                 * No new records are added if we must complete a flush
@@ -1691,6 +1693,7 @@ hammer_flush_inode_core(hammer_inode_t ip, hammer_flush_group_t flg, int flags)
                                   hammer_syncgrp_child_callback, NULL);
 #endif
                go_count = 1;
+#endif
        } else {
                /*
                 * Normal flush, scan records and bring them into the flush.
@@ -1750,17 +1753,8 @@ hammer_flush_inode_core(hammer_inode_t ip, hammer_flush_group_t flg, int flags)
         * NOTE: The DELETING flag is a mod flag, but it is also sticky,
         * and stays in ip->flags.  Once set, it stays set until the
         * inode is destroyed.
-        *
-        * NOTE: If a truncation from a previous flush cycle had to be
-        * continued into this one, the TRUNCATED flag will still be
-        * set in sync_flags as will WOULDBLOCK.  When this occurs
-        * we CANNOT safely integrate a new truncation from the front-end
-        * because there may be data records in-memory assigned a flush
-        * state from the previous cycle that are supposed to be flushed
-        * before the next frontend truncation.
         */
-       if ((ip->flags & (HAMMER_INODE_TRUNCATED | HAMMER_INODE_WOULDBLOCK)) ==
-           HAMMER_INODE_TRUNCATED) {
+       if (ip->flags & HAMMER_INODE_TRUNCATED) {
                KKASSERT((ip->sync_flags & HAMMER_INODE_TRUNCATED) == 0);
                ip->sync_trunc_off = ip->trunc_off;
                ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
@@ -1960,12 +1954,7 @@ hammer_setup_child_callback(hammer_record_t rec, void *data)
                break;
        case HAMMER_FST_FLUSH:
                /* 
-                * If the WOULDBLOCK flag is set records may have been left
-                * over from a previous flush attempt.  The flush group will
-                * have been left intact - we are probably reflushing it
-                * now.
-                *
-                * If a flush error occured ip->error will be non-zero.
+                * The flush_group should already match.
                 */
                KKASSERT(rec->flush_group == flg);
                r = 1;
@@ -2081,10 +2070,11 @@ hammer_flush_inode_done(hammer_inode_t ip, int error)
                /*
                 * We were unable to flush out all our records, leave the
                 * inode in a flush state and in the current flush group.
+                * The flush group will be re-run.
                 *
-                * This occurs if the UNDO block gets too full
-                * or there is too much dirty meta-data and allows the
-                * flusher to finalize the UNDO block and then re-flush.
+                * This occurs if the UNDO block gets too full or there is
+                * too much dirty meta-data and allows the flusher to
+                * finalize the UNDO block and then re-flush.
                 */
                ip->flags &= ~HAMMER_INODE_WOULDBLOCK;
                dorel = 0;
@@ -2124,21 +2114,21 @@ hammer_flush_inode_done(hammer_inode_t ip, int error)
                        ip->flags &= ~HAMMER_INODE_FLUSHW;
                        wakeup(&ip->flags);
                }
-       }
 
-       /*
-        * If the frontend made more changes and requested another flush,
-        * then try to get it running.
-        *
-        * Reflushes are aborted when the inode is errored out.
-        */
-       if (ip->flags & HAMMER_INODE_REFLUSH) {
-               ip->flags &= ~HAMMER_INODE_REFLUSH;
-               if (ip->flags & HAMMER_INODE_RESIGNAL) {
-                       ip->flags &= ~HAMMER_INODE_RESIGNAL;
-                       hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
-               } else {
-                       hammer_flush_inode(ip, 0);
+               /*
+                * If the frontend made more changes and requested another
+                * flush, then try to get it running.
+                *
+                * Reflushes are aborted when the inode is errored out.
+                */
+               if (ip->flags & HAMMER_INODE_REFLUSH) {
+                       ip->flags &= ~HAMMER_INODE_REFLUSH;
+                       if (ip->flags & HAMMER_INODE_RESIGNAL) {
+                               ip->flags &= ~HAMMER_INODE_RESIGNAL;
+                               hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
+                       } else {
+                               hammer_flush_inode(ip, 0);
+                       }
                }
        }
 
@@ -2317,7 +2307,7 @@ done:
 }
 
 /*
- * XXX error handling
+ * Backend function called by the flusher to sync an inode to media.
  */
 int
 hammer_sync_inode(hammer_transaction_t trans, hammer_inode_t ip)
index e4d306c..e84592a 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.90.2.4 2008/08/06 15:41:56 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.90.2.5 2008/08/10 17:01:08 dillon Exp $
  */
 
 #include "hammer.h"
@@ -40,6 +40,7 @@ static int hammer_mem_lookup(hammer_cursor_t cursor);
 static int hammer_mem_first(hammer_cursor_t cursor);
 static int hammer_frontend_trunc_callback(hammer_record_t record,
                                void *data __unused);
+static int hammer_bulk_scan_callback(hammer_record_t record, void *data);
 static int hammer_record_needs_overwrite_delete(hammer_record_t record);
 static int hammer_delete_general(hammer_cursor_t cursor, hammer_inode_t ip,
                      hammer_btree_leaf_elm_t leaf);
@@ -49,6 +50,11 @@ struct rec_trunc_info {
        int64_t         trunc_off;
 };
 
+struct hammer_bulk_info {
+       hammer_record_t record;
+       struct hammer_btree_leaf_elm leaf;
+};
+
 /*
  * Red-black tree support.  Comparison code for insertion.
  */
@@ -108,52 +114,46 @@ hammer_rec_cmp(hammer_base_elm_t elm, hammer_record_t rec)
 }
 
 /*
- * Special LOOKUP_INFO to locate an overlapping record.  This used by
- * the reservation code to implement small-block records (whos keys will
- * be different depending on data_len, when representing the same base
- * offset).
+ * Ranged scan to locate overlapping record(s).  This is used by
+ * hammer_ip_get_bulk() to locate an overlapping record.  We have
+ * to use a ranged scan because the keys for data records with the
+ * same file base offset can be different due to differing data_len's.
  *
  * NOTE: The base file offset of a data record is (key - data_len), not (key).
  */
 static int
-hammer_rec_overlap_compare(hammer_btree_leaf_elm_t leaf, hammer_record_t rec)
+hammer_rec_overlap_cmp(hammer_record_t rec, void *data)
 {
-       if (leaf->base.rec_type < rec->leaf.base.rec_type)
+       struct hammer_bulk_info *info = data;
+       hammer_btree_leaf_elm_t leaf = &info->leaf;
+
+       if (rec->leaf.base.rec_type < leaf->base.rec_type)
                return(-3);
-       if (leaf->base.rec_type > rec->leaf.base.rec_type)
+       if (rec->leaf.base.rec_type > leaf->base.rec_type)
                return(3);
 
        /*
         * Overlap compare
         */
        if (leaf->base.rec_type == HAMMER_RECTYPE_DATA) {
-               /* leaf_end <= rec_beg */
-               if (leaf->base.key <= rec->leaf.base.key - rec->leaf.data_len)
-                       return(-2);
-               /* leaf_beg >= rec_end */
-               if (leaf->base.key - leaf->data_len >= rec->leaf.base.key)
+               /* rec_beg >= leaf_end */
+               if (rec->leaf.base.key - rec->leaf.data_len >= leaf->base.key)
                        return(2);
+               /* rec_end <= leaf_beg */
+               if (rec->leaf.base.key <= leaf->base.key - leaf->data_len)
+                       return(-2);
        } else {
-               if (leaf->base.key < rec->leaf.base.key)
+               if (rec->leaf.base.key < leaf->base.key)
                        return(-2);
-               if (leaf->base.key > rec->leaf.base.key)
+               if (rec->leaf.base.key > leaf->base.key)
                        return(2);
        }
 
        /*
-        * Never match against an item deleted by the front-end.
-        * leaf is less then rec if rec is marked deleted.
-        *
-        * We must still return the proper code for the scan to continue
-        * along the correct branches.
+        * We have to return 0 at this point, even if DELETED_FE is set,
+        * because returning anything else will cause the scan to ignore
+        * one of the branches when we really want it to check both.
         */
-       if (rec->flags & HAMMER_RECF_DELETED_FE) {
-               if (leaf->base.key < rec->leaf.base.key)
-                       return(-2);
-               if (leaf->base.key > rec->leaf.base.key)
-                       return(2);
-               return(-1);
-       }
         return(0);
 }
 
@@ -240,8 +240,6 @@ hammer_rec_trunc_cmp(hammer_record_t rec, void *data)
 }
 
 RB_GENERATE(hammer_rec_rb_tree, hammer_record, rb_node, hammer_rec_rb_compare);
-RB_GENERATE_XLOOKUP(hammer_rec_rb_tree, INFO, hammer_record, rb_node,
-                   hammer_rec_overlap_compare, hammer_btree_leaf_elm_t);
 
 /*
  * Allocate a record for the caller to finish filling in.  The record is
@@ -827,27 +825,44 @@ hammer_ip_add_record(struct hammer_transaction *trans, hammer_record_t record)
  * to queue the BIO to the flusher.  Only the related record gets queued
  * to the flusher.
  */
+
 static hammer_record_t
 hammer_ip_get_bulk(hammer_inode_t ip, off_t file_offset, int bytes)
 {
-       hammer_record_t record;
-       struct hammer_btree_leaf_elm leaf;
+       struct hammer_bulk_info info;
+       
+       bzero(&info, sizeof(info));
+       info.leaf.base.obj_id = ip->obj_id;
+       info.leaf.base.key = file_offset + bytes;
+       info.leaf.base.create_tid = 0;
+       info.leaf.base.delete_tid = 0;
+       info.leaf.base.rec_type = HAMMER_RECTYPE_DATA;
+       info.leaf.base.obj_type = 0;                            /* unused */
+       info.leaf.base.btype = HAMMER_BTREE_TYPE_RECORD;        /* unused */
+       info.leaf.base.localization = ip->obj_localization +    /* unused */
+                                     HAMMER_LOCALIZE_MISC;
+       info.leaf.data_len = bytes;
 
-       bzero(&leaf, sizeof(leaf));
-       leaf.base.obj_id = ip->obj_id;
-       leaf.base.key = file_offset + bytes;
-       leaf.base.create_tid = 0;
-       leaf.base.delete_tid = 0;
-       leaf.base.rec_type = HAMMER_RECTYPE_DATA;
-       leaf.base.obj_type = 0;                 /* unused */
-       leaf.base.btype = HAMMER_BTREE_TYPE_RECORD;     /* unused */
-       leaf.base.localization = ip->obj_localization + HAMMER_LOCALIZE_MISC;
-       leaf.data_len = bytes;
-
-       record = hammer_rec_rb_tree_RB_LOOKUP_INFO(&ip->rec_tree, &leaf);
-       if (record)
-               hammer_ref(&record->lock);
-       return(record);
+       hammer_rec_rb_tree_RB_SCAN(&ip->rec_tree, hammer_rec_overlap_cmp,
+                                  hammer_bulk_scan_callback, &info);
+
+       return(info.record);    /* may be NULL */
+}
+
+/*
+ * Take records vetted by overlap_cmp.  The first non-deleted record
+ * (if any) stops the scan.
+ */
+static int
+hammer_bulk_scan_callback(hammer_record_t record, void *data)
+{
+       struct hammer_bulk_info *info = data;
+
+       if (record->flags & HAMMER_RECF_DELETED_FE)
+               return(0);
+       hammer_ref(&record->lock);
+       info->record = record;
+       return(-1);                     /* stop scan */
 }
 
 /*
@@ -870,10 +885,10 @@ hammer_ip_add_bulk(hammer_inode_t ip, off_t file_offset, void *data, int bytes,
 
        /*
         * Deal with conflicting in-memory records.  We cannot have multiple
-        * in-memory records for the same offset without seriously confusing
-        * the backend, including but not limited to the backend issuing
-        * delete-create-delete sequences and asserting on the delete_tid
-        * being the same as the create_tid.
+        * in-memory records for the same base offset without seriously
+        * confusing the backend, including but not limited to the backend
+        * issuing delete-create-delete or create-delete-create sequences
+        * and asserting on the delete_tid being the same as the create_tid.
         *
         * If we encounter a record with the backend interlock set we cannot
         * immediately delete it without confusing the backend.
index e98d5bb..2ff9f11 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.91.2.4 2008/08/06 15:41:56 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.91.2.5 2008/08/10 17:01:09 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -156,7 +156,8 @@ struct hammer_inode *HammerTruncIp;
 #endif
 
 static int hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch,
-                          struct vnode *dvp, struct ucred *cred, int flags);
+                          struct vnode *dvp, struct ucred *cred,
+                          int flags, int isdir);
 static int hammer_vop_strategy_read(struct vop_strategy_args *ap);
 static int hammer_vop_strategy_write(struct vop_strategy_args *ap);
 
@@ -1491,7 +1492,7 @@ hammer_vop_nremove(struct vop_nremove_args *ap)
 
        hammer_start_transaction(&trans, dip->hmp);
        ++hammer_stats_file_iopsw;
-       error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0);
+       error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0, 0);
        hammer_done_transaction(&trans);
 
        return (error);
@@ -1550,7 +1551,8 @@ hammer_vop_nrename(struct vop_nrename_args *ap)
         * Force the inode sync-time to match the transaction so it is
         * in-sync with the creation of the target directory entry.
         */
-       error = hammer_dounlink(&trans, ap->a_tnch, ap->a_tdvp, ap->a_cred, 0);
+       error = hammer_dounlink(&trans, ap->a_tnch, ap->a_tdvp,
+                               ap->a_cred, 0, -1);
        if (error == 0 || error == ENOENT) {
                error = hammer_ip_add_directory(&trans, tdip,
                                                tncp->nc_name, tncp->nc_nlen,
@@ -1652,8 +1654,6 @@ hammer_vop_nrmdir(struct vop_nrmdir_args *ap)
        int error;
 
        dip = VTOI(ap->a_dvp);
-       if (dip->ino_data.obj_type != HAMMER_OBJTYPE_DIRECTORY)
-               return(ENOTDIR);
 
        if (hammer_nohistory(dip) == 0 &&
            (error = hammer_checkspace(dip->hmp, HAMMER_CHKSPC_REMOVE)) != 0) {
@@ -1662,7 +1662,7 @@ hammer_vop_nrmdir(struct vop_nrmdir_args *ap)
 
        hammer_start_transaction(&trans, dip->hmp);
        ++hammer_stats_file_iopsw;
-       error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0);
+       error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0, 1);
        hammer_done_transaction(&trans);
 
        return (error);
@@ -1994,7 +1994,7 @@ hammer_vop_nwhiteout(struct vop_nwhiteout_args *ap)
        hammer_start_transaction(&trans, dip->hmp);
        ++hammer_stats_file_iopsw;
        error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp,
-                               ap->a_cred, ap->a_flags);
+                               ap->a_cred, ap->a_flags, -1);
        hammer_done_transaction(&trans);
 
        return (error);
@@ -2630,7 +2630,8 @@ hammer_vop_strategy_write(struct vop_strategy_args *ap)
  */
 static int
 hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch,
-               struct vnode *dvp, struct ucred *cred, int flags)
+               struct vnode *dvp, struct ucred *cred, 
+               int flags, int isdir)
 {
        struct namecache *ncp;
        hammer_inode_t dip;
@@ -2713,6 +2714,20 @@ retry:
                }
 
                /*
+                * If isdir >= 0 we validate that the entry is or is not a
+                * directory.  If isdir < 0 we don't care.
+                */
+               if (error == 0 && isdir >= 0) {
+                       if (isdir &&
+                           ip->ino_data.obj_type != HAMMER_OBJTYPE_DIRECTORY) {
+                               error = ENOTDIR;
+                       } else if (isdir == 0 &&
+                           ip->ino_data.obj_type == HAMMER_OBJTYPE_DIRECTORY) {
+                               error = EISDIR;
+                       }
+               }
+
+               /*
                 * If we are trying to remove a directory the directory must
                 * be empty.
                 *