From: Matthew Dillon Date: Sun, 10 Aug 2008 17:01:09 +0000 (+0000) Subject: MFC HAMMER 2.0:02 - rmdir, stability X-Git-Tag: v2.0.1~26 X-Git-Url: https://repo.or.cz/w/dragonfly.git/commitdiff_plain/4c68eed06a99a6c8b07283cf51d8485c104e36cb MFC HAMMER 2.0:02 - rmdir, stability --- diff --git a/sys/vfs/hammer/hammer_inode.c b/sys/vfs/hammer/hammer_inode.c index fac61e8cef..cbded3a4f1 100644 --- a/sys/vfs/hammer/hammer_inode.c +++ b/sys/vfs/hammer/hammer_inode.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.103.2.4 2008/08/06 15:41:56 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.103.2.5 2008/08/10 17:01:08 dillon Exp $ */ #include "hammer.h" @@ -1606,9 +1606,9 @@ hammer_setup_parent_inodes_helper(hammer_record_t record, #endif if (pip->flush_group == flg) { /* - * If the parent is in the same flush group as us we can - * just set the record to a flushing state and we are - * done. + * Because we have not calculated nlinks yet we can just + * set records to the flush state if the parent is in + * the same flush group as we are. */ record->flush_state = HAMMER_FST_FLUSH; record->flush_group = flg; @@ -1674,12 +1674,14 @@ hammer_flush_inode_core(hammer_inode_t ip, hammer_flush_group_t flg, int flags) * Figure out how many in-memory records we can actually flush * (not including inode meta-data, buffers, etc). */ + KKASSERT((ip->flags & HAMMER_INODE_WOULDBLOCK) == 0); if (flags & HAMMER_FLUSH_RECURSION) { /* * If this is a upwards recursion we do not want to * recurse down again! */ go_count = 1; +#if 0 } else if (ip->flags & HAMMER_INODE_WOULDBLOCK) { /* * No new records are added if we must complete a flush @@ -1691,6 +1693,7 @@ hammer_flush_inode_core(hammer_inode_t ip, hammer_flush_group_t flg, int flags) hammer_syncgrp_child_callback, NULL); #endif go_count = 1; +#endif } else { /* * Normal flush, scan records and bring them into the flush. @@ -1750,17 +1753,8 @@ hammer_flush_inode_core(hammer_inode_t ip, hammer_flush_group_t flg, int flags) * NOTE: The DELETING flag is a mod flag, but it is also sticky, * and stays in ip->flags. Once set, it stays set until the * inode is destroyed. - * - * NOTE: If a truncation from a previous flush cycle had to be - * continued into this one, the TRUNCATED flag will still be - * set in sync_flags as will WOULDBLOCK. When this occurs - * we CANNOT safely integrate a new truncation from the front-end - * because there may be data records in-memory assigned a flush - * state from the previous cycle that are supposed to be flushed - * before the next frontend truncation. */ - if ((ip->flags & (HAMMER_INODE_TRUNCATED | HAMMER_INODE_WOULDBLOCK)) == - HAMMER_INODE_TRUNCATED) { + if (ip->flags & HAMMER_INODE_TRUNCATED) { KKASSERT((ip->sync_flags & HAMMER_INODE_TRUNCATED) == 0); ip->sync_trunc_off = ip->trunc_off; ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL; @@ -1960,12 +1954,7 @@ hammer_setup_child_callback(hammer_record_t rec, void *data) break; case HAMMER_FST_FLUSH: /* - * If the WOULDBLOCK flag is set records may have been left - * over from a previous flush attempt. The flush group will - * have been left intact - we are probably reflushing it - * now. - * - * If a flush error occured ip->error will be non-zero. + * The flush_group should already match. */ KKASSERT(rec->flush_group == flg); r = 1; @@ -2081,10 +2070,11 @@ hammer_flush_inode_done(hammer_inode_t ip, int error) /* * We were unable to flush out all our records, leave the * inode in a flush state and in the current flush group. + * The flush group will be re-run. * - * This occurs if the UNDO block gets too full - * or there is too much dirty meta-data and allows the - * flusher to finalize the UNDO block and then re-flush. + * This occurs if the UNDO block gets too full or there is + * too much dirty meta-data and allows the flusher to + * finalize the UNDO block and then re-flush. */ ip->flags &= ~HAMMER_INODE_WOULDBLOCK; dorel = 0; @@ -2124,21 +2114,21 @@ hammer_flush_inode_done(hammer_inode_t ip, int error) ip->flags &= ~HAMMER_INODE_FLUSHW; wakeup(&ip->flags); } - } - /* - * If the frontend made more changes and requested another flush, - * then try to get it running. - * - * Reflushes are aborted when the inode is errored out. - */ - if (ip->flags & HAMMER_INODE_REFLUSH) { - ip->flags &= ~HAMMER_INODE_REFLUSH; - if (ip->flags & HAMMER_INODE_RESIGNAL) { - ip->flags &= ~HAMMER_INODE_RESIGNAL; - hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL); - } else { - hammer_flush_inode(ip, 0); + /* + * If the frontend made more changes and requested another + * flush, then try to get it running. + * + * Reflushes are aborted when the inode is errored out. + */ + if (ip->flags & HAMMER_INODE_REFLUSH) { + ip->flags &= ~HAMMER_INODE_REFLUSH; + if (ip->flags & HAMMER_INODE_RESIGNAL) { + ip->flags &= ~HAMMER_INODE_RESIGNAL; + hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL); + } else { + hammer_flush_inode(ip, 0); + } } } @@ -2317,7 +2307,7 @@ done: } /* - * XXX error handling + * Backend function called by the flusher to sync an inode to media. */ int hammer_sync_inode(hammer_transaction_t trans, hammer_inode_t ip) diff --git a/sys/vfs/hammer/hammer_object.c b/sys/vfs/hammer/hammer_object.c index e4d306cfee..e84592a660 100644 --- a/sys/vfs/hammer/hammer_object.c +++ b/sys/vfs/hammer/hammer_object.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.90.2.4 2008/08/06 15:41:56 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.90.2.5 2008/08/10 17:01:08 dillon Exp $ */ #include "hammer.h" @@ -40,6 +40,7 @@ static int hammer_mem_lookup(hammer_cursor_t cursor); static int hammer_mem_first(hammer_cursor_t cursor); static int hammer_frontend_trunc_callback(hammer_record_t record, void *data __unused); +static int hammer_bulk_scan_callback(hammer_record_t record, void *data); static int hammer_record_needs_overwrite_delete(hammer_record_t record); static int hammer_delete_general(hammer_cursor_t cursor, hammer_inode_t ip, hammer_btree_leaf_elm_t leaf); @@ -49,6 +50,11 @@ struct rec_trunc_info { int64_t trunc_off; }; +struct hammer_bulk_info { + hammer_record_t record; + struct hammer_btree_leaf_elm leaf; +}; + /* * Red-black tree support. Comparison code for insertion. */ @@ -108,52 +114,46 @@ hammer_rec_cmp(hammer_base_elm_t elm, hammer_record_t rec) } /* - * Special LOOKUP_INFO to locate an overlapping record. This used by - * the reservation code to implement small-block records (whos keys will - * be different depending on data_len, when representing the same base - * offset). + * Ranged scan to locate overlapping record(s). This is used by + * hammer_ip_get_bulk() to locate an overlapping record. We have + * to use a ranged scan because the keys for data records with the + * same file base offset can be different due to differing data_len's. * * NOTE: The base file offset of a data record is (key - data_len), not (key). */ static int -hammer_rec_overlap_compare(hammer_btree_leaf_elm_t leaf, hammer_record_t rec) +hammer_rec_overlap_cmp(hammer_record_t rec, void *data) { - if (leaf->base.rec_type < rec->leaf.base.rec_type) + struct hammer_bulk_info *info = data; + hammer_btree_leaf_elm_t leaf = &info->leaf; + + if (rec->leaf.base.rec_type < leaf->base.rec_type) return(-3); - if (leaf->base.rec_type > rec->leaf.base.rec_type) + if (rec->leaf.base.rec_type > leaf->base.rec_type) return(3); /* * Overlap compare */ if (leaf->base.rec_type == HAMMER_RECTYPE_DATA) { - /* leaf_end <= rec_beg */ - if (leaf->base.key <= rec->leaf.base.key - rec->leaf.data_len) - return(-2); - /* leaf_beg >= rec_end */ - if (leaf->base.key - leaf->data_len >= rec->leaf.base.key) + /* rec_beg >= leaf_end */ + if (rec->leaf.base.key - rec->leaf.data_len >= leaf->base.key) return(2); + /* rec_end <= leaf_beg */ + if (rec->leaf.base.key <= leaf->base.key - leaf->data_len) + return(-2); } else { - if (leaf->base.key < rec->leaf.base.key) + if (rec->leaf.base.key < leaf->base.key) return(-2); - if (leaf->base.key > rec->leaf.base.key) + if (rec->leaf.base.key > leaf->base.key) return(2); } /* - * Never match against an item deleted by the front-end. - * leaf is less then rec if rec is marked deleted. - * - * We must still return the proper code for the scan to continue - * along the correct branches. + * We have to return 0 at this point, even if DELETED_FE is set, + * because returning anything else will cause the scan to ignore + * one of the branches when we really want it to check both. */ - if (rec->flags & HAMMER_RECF_DELETED_FE) { - if (leaf->base.key < rec->leaf.base.key) - return(-2); - if (leaf->base.key > rec->leaf.base.key) - return(2); - return(-1); - } return(0); } @@ -240,8 +240,6 @@ hammer_rec_trunc_cmp(hammer_record_t rec, void *data) } RB_GENERATE(hammer_rec_rb_tree, hammer_record, rb_node, hammer_rec_rb_compare); -RB_GENERATE_XLOOKUP(hammer_rec_rb_tree, INFO, hammer_record, rb_node, - hammer_rec_overlap_compare, hammer_btree_leaf_elm_t); /* * Allocate a record for the caller to finish filling in. The record is @@ -827,27 +825,44 @@ hammer_ip_add_record(struct hammer_transaction *trans, hammer_record_t record) * to queue the BIO to the flusher. Only the related record gets queued * to the flusher. */ + static hammer_record_t hammer_ip_get_bulk(hammer_inode_t ip, off_t file_offset, int bytes) { - hammer_record_t record; - struct hammer_btree_leaf_elm leaf; + struct hammer_bulk_info info; + + bzero(&info, sizeof(info)); + info.leaf.base.obj_id = ip->obj_id; + info.leaf.base.key = file_offset + bytes; + info.leaf.base.create_tid = 0; + info.leaf.base.delete_tid = 0; + info.leaf.base.rec_type = HAMMER_RECTYPE_DATA; + info.leaf.base.obj_type = 0; /* unused */ + info.leaf.base.btype = HAMMER_BTREE_TYPE_RECORD; /* unused */ + info.leaf.base.localization = ip->obj_localization + /* unused */ + HAMMER_LOCALIZE_MISC; + info.leaf.data_len = bytes; - bzero(&leaf, sizeof(leaf)); - leaf.base.obj_id = ip->obj_id; - leaf.base.key = file_offset + bytes; - leaf.base.create_tid = 0; - leaf.base.delete_tid = 0; - leaf.base.rec_type = HAMMER_RECTYPE_DATA; - leaf.base.obj_type = 0; /* unused */ - leaf.base.btype = HAMMER_BTREE_TYPE_RECORD; /* unused */ - leaf.base.localization = ip->obj_localization + HAMMER_LOCALIZE_MISC; - leaf.data_len = bytes; - - record = hammer_rec_rb_tree_RB_LOOKUP_INFO(&ip->rec_tree, &leaf); - if (record) - hammer_ref(&record->lock); - return(record); + hammer_rec_rb_tree_RB_SCAN(&ip->rec_tree, hammer_rec_overlap_cmp, + hammer_bulk_scan_callback, &info); + + return(info.record); /* may be NULL */ +} + +/* + * Take records vetted by overlap_cmp. The first non-deleted record + * (if any) stops the scan. + */ +static int +hammer_bulk_scan_callback(hammer_record_t record, void *data) +{ + struct hammer_bulk_info *info = data; + + if (record->flags & HAMMER_RECF_DELETED_FE) + return(0); + hammer_ref(&record->lock); + info->record = record; + return(-1); /* stop scan */ } /* @@ -870,10 +885,10 @@ hammer_ip_add_bulk(hammer_inode_t ip, off_t file_offset, void *data, int bytes, /* * Deal with conflicting in-memory records. We cannot have multiple - * in-memory records for the same offset without seriously confusing - * the backend, including but not limited to the backend issuing - * delete-create-delete sequences and asserting on the delete_tid - * being the same as the create_tid. + * in-memory records for the same base offset without seriously + * confusing the backend, including but not limited to the backend + * issuing delete-create-delete or create-delete-create sequences + * and asserting on the delete_tid being the same as the create_tid. * * If we encounter a record with the backend interlock set we cannot * immediately delete it without confusing the backend. diff --git a/sys/vfs/hammer/hammer_vnops.c b/sys/vfs/hammer/hammer_vnops.c index e98d5bb747..2ff9f11be5 100644 --- a/sys/vfs/hammer/hammer_vnops.c +++ b/sys/vfs/hammer/hammer_vnops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.91.2.4 2008/08/06 15:41:56 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.91.2.5 2008/08/10 17:01:09 dillon Exp $ */ #include @@ -156,7 +156,8 @@ struct hammer_inode *HammerTruncIp; #endif static int hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch, - struct vnode *dvp, struct ucred *cred, int flags); + struct vnode *dvp, struct ucred *cred, + int flags, int isdir); static int hammer_vop_strategy_read(struct vop_strategy_args *ap); static int hammer_vop_strategy_write(struct vop_strategy_args *ap); @@ -1491,7 +1492,7 @@ hammer_vop_nremove(struct vop_nremove_args *ap) hammer_start_transaction(&trans, dip->hmp); ++hammer_stats_file_iopsw; - error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0); + error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0, 0); hammer_done_transaction(&trans); return (error); @@ -1550,7 +1551,8 @@ hammer_vop_nrename(struct vop_nrename_args *ap) * Force the inode sync-time to match the transaction so it is * in-sync with the creation of the target directory entry. */ - error = hammer_dounlink(&trans, ap->a_tnch, ap->a_tdvp, ap->a_cred, 0); + error = hammer_dounlink(&trans, ap->a_tnch, ap->a_tdvp, + ap->a_cred, 0, -1); if (error == 0 || error == ENOENT) { error = hammer_ip_add_directory(&trans, tdip, tncp->nc_name, tncp->nc_nlen, @@ -1652,8 +1654,6 @@ hammer_vop_nrmdir(struct vop_nrmdir_args *ap) int error; dip = VTOI(ap->a_dvp); - if (dip->ino_data.obj_type != HAMMER_OBJTYPE_DIRECTORY) - return(ENOTDIR); if (hammer_nohistory(dip) == 0 && (error = hammer_checkspace(dip->hmp, HAMMER_CHKSPC_REMOVE)) != 0) { @@ -1662,7 +1662,7 @@ hammer_vop_nrmdir(struct vop_nrmdir_args *ap) hammer_start_transaction(&trans, dip->hmp); ++hammer_stats_file_iopsw; - error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0); + error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0, 1); hammer_done_transaction(&trans); return (error); @@ -1994,7 +1994,7 @@ hammer_vop_nwhiteout(struct vop_nwhiteout_args *ap) hammer_start_transaction(&trans, dip->hmp); ++hammer_stats_file_iopsw; error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, - ap->a_cred, ap->a_flags); + ap->a_cred, ap->a_flags, -1); hammer_done_transaction(&trans); return (error); @@ -2630,7 +2630,8 @@ hammer_vop_strategy_write(struct vop_strategy_args *ap) */ static int hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch, - struct vnode *dvp, struct ucred *cred, int flags) + struct vnode *dvp, struct ucred *cred, + int flags, int isdir) { struct namecache *ncp; hammer_inode_t dip; @@ -2713,6 +2714,20 @@ retry: } /* + * If isdir >= 0 we validate that the entry is or is not a + * directory. If isdir < 0 we don't care. + */ + if (error == 0 && isdir >= 0) { + if (isdir && + ip->ino_data.obj_type != HAMMER_OBJTYPE_DIRECTORY) { + error = ENOTDIR; + } else if (isdir == 0 && + ip->ino_data.obj_type == HAMMER_OBJTYPE_DIRECTORY) { + error = EISDIR; + } + } + + /* * If we are trying to remove a directory the directory must * be empty. *