From d2058105c61ec61df3a2dd3f839fed8c3fe7bfd6 Mon Sep 17 00:00:00 2001 From: "Justin T. Gibbs" Date: Sun, 11 Oct 2015 09:16:29 -0700 Subject: [PATCH] 6267 dn_bonus evicted too early Reviewed by: Richard Yao Reviewed by: Xin LI Reviewed by: Matthew Ahrens Approved by: Richard Lowe --- usr/src/uts/common/fs/zfs/dbuf.c | 47 ++++++++---------------------- usr/src/uts/common/fs/zfs/dmu_objset.c | 1 - usr/src/uts/common/fs/zfs/dnode_sync.c | 14 +++++---- usr/src/uts/common/fs/zfs/sys/dbuf.h | 18 +++++++++++- usr/src/uts/common/fs/zfs/sys/dmu_objset.h | 1 - 5 files changed, 38 insertions(+), 43 deletions(-) diff --git a/usr/src/uts/common/fs/zfs/dbuf.c b/usr/src/uts/common/fs/zfs/dbuf.c index 514aaa2365..8b0c71db5c 100644 --- a/usr/src/uts/common/fs/zfs/dbuf.c +++ b/usr/src/uts/common/fs/zfs/dbuf.c @@ -272,7 +272,7 @@ dbuf_verify_user(dmu_buf_impl_t *db, dbvu_verify_type_t verify_type) */ ASSERT3U(holds, >=, db->db_dirtycnt); } else { - if (db->db_immediate_evict == TRUE) + if (db->db_user_immediate_evict == TRUE) ASSERT3U(holds, >=, db->db_dirtycnt); else ASSERT3U(holds, >, 0); @@ -1875,8 +1875,9 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, db->db_blkptr = blkptr; db->db_user = NULL; - db->db_immediate_evict = 0; - db->db_freed_in_flight = 0; + db->db_user_immediate_evict = FALSE; + db->db_freed_in_flight = FALSE; + db->db_pending_evict = FALSE; if (blkid == DMU_BONUS_BLKID) { ASSERT3P(parent, ==, dn->dn_dbuf); @@ -2432,12 +2433,13 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) arc_buf_freeze(db->db_buf); if (holds == db->db_dirtycnt && - db->db_level == 0 && db->db_immediate_evict) + db->db_level == 0 && db->db_user_immediate_evict) dbuf_evict_user(db); if (holds == 0) { if (db->db_blkid == DMU_BONUS_BLKID) { dnode_t *dn; + boolean_t evict_dbuf = db->db_pending_evict; /* * If the dnode moves here, we cannot cross this @@ -2452,7 +2454,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) * Decrementing the dbuf count means that the bonus * buffer's dnode hold is no longer discounted in * dnode_move(). The dnode cannot move until after - * the dnode_rele_and_unlock() below. + * the dnode_rele() below. */ DB_DNODE_EXIT(db); @@ -2462,35 +2464,10 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) */ mutex_exit(&db->db_mtx); - /* - * If the dnode has been freed, evict the bonus - * buffer immediately. The data in the bonus - * buffer is no longer relevant and this prevents - * a stale bonus buffer from being associated - * with this dnode_t should the dnode_t be reused - * prior to being destroyed. - */ - mutex_enter(&dn->dn_mtx); - if (dn->dn_type == DMU_OT_NONE || - dn->dn_free_txg != 0) { - /* - * Drop dn_mtx. It is a leaf lock and - * cannot be held when dnode_evict_bonus() - * acquires other locks in order to - * perform the eviction. - * - * Freed dnodes cannot be reused until the - * last hold is released. Since this bonus - * buffer has a hold, the dnode will remain - * in the free state, even without dn_mtx - * held, until the dnode_rele_and_unlock() - * below. - */ - mutex_exit(&dn->dn_mtx); + if (evict_dbuf) dnode_evict_bonus(dn); - mutex_enter(&dn->dn_mtx); - } - dnode_rele_and_unlock(dn, db); + + dnode_rele(dn, db); } else if (db->db_buf == NULL) { /* * This is a special case: we never associated this @@ -2537,7 +2514,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) } else { dbuf_clear(db); } - } else if (db->db_objset->os_evicting || + } else if (db->db_pending_evict || arc_buf_eviction_needed(db->db_buf)) { dbuf_clear(db); } else { @@ -2585,7 +2562,7 @@ dmu_buf_set_user_ie(dmu_buf_t *db_fake, dmu_buf_user_t *user) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; - db->db_immediate_evict = TRUE; + db->db_user_immediate_evict = TRUE; return (dmu_buf_set_user(db_fake, user)); } diff --git a/usr/src/uts/common/fs/zfs/dmu_objset.c b/usr/src/uts/common/fs/zfs/dmu_objset.c index 1dfa867bb3..79de1d127d 100644 --- a/usr/src/uts/common/fs/zfs/dmu_objset.c +++ b/usr/src/uts/common/fs/zfs/dmu_objset.c @@ -706,7 +706,6 @@ dmu_objset_evict(objset_t *os) if (os->os_sa) sa_tear_down(os); - os->os_evicting = B_TRUE; dmu_objset_evict_dbufs(os); mutex_enter(&os->os_lock); diff --git a/usr/src/uts/common/fs/zfs/dnode_sync.c b/usr/src/uts/common/fs/zfs/dnode_sync.c index 0787885229..9aee513818 100644 --- a/usr/src/uts/common/fs/zfs/dnode_sync.c +++ b/usr/src/uts/common/fs/zfs/dnode_sync.c @@ -424,6 +424,7 @@ dnode_evict_dbufs(dnode_t *dn) db_next = AVL_NEXT(&dn->dn_dbufs, &db_marker); avl_remove(&dn->dn_dbufs, &db_marker); } else { + db->db_pending_evict = TRUE; mutex_exit(&db->db_mtx); db_next = AVL_NEXT(&dn->dn_dbufs, db); } @@ -437,10 +438,14 @@ void dnode_evict_bonus(dnode_t *dn) { rw_enter(&dn->dn_struct_rwlock, RW_WRITER); - if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) { - mutex_enter(&dn->dn_bonus->db_mtx); - dbuf_evict(dn->dn_bonus); - dn->dn_bonus = NULL; + if (dn->dn_bonus != NULL) { + if (refcount_is_zero(&dn->dn_bonus->db_holds)) { + mutex_enter(&dn->dn_bonus->db_mtx); + dbuf_evict(dn->dn_bonus); + dn->dn_bonus = NULL; + } else { + dn->dn_bonus->db_pending_evict = TRUE; + } } rw_exit(&dn->dn_struct_rwlock); } @@ -492,7 +497,6 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]); dnode_evict_dbufs(dn); - ASSERT(avl_is_empty(&dn->dn_dbufs)); /* * XXX - It would be nice to assert this, but we may still diff --git a/usr/src/uts/common/fs/zfs/sys/dbuf.h b/usr/src/uts/common/fs/zfs/sys/dbuf.h index 482ccb01da..233d541342 100644 --- a/usr/src/uts/common/fs/zfs/sys/dbuf.h +++ b/usr/src/uts/common/fs/zfs/sys/dbuf.h @@ -230,9 +230,25 @@ typedef struct dmu_buf_impl { /* User callback information. */ dmu_buf_user_t *db_user; - uint8_t db_immediate_evict; + /* + * Evict user data as soon as the dirty and reference + * counts are equal. + */ + uint8_t db_user_immediate_evict; + + /* + * This block was freed while a read or write was + * active. + */ uint8_t db_freed_in_flight; + /* + * dnode_evict_dbufs() or dnode_evict_bonus() tried to + * evict this dbuf, but couldn't due to outstanding + * references. Evict once the refcount drops to 0. + */ + uint8_t db_pending_evict; + uint8_t db_dirtycnt; } dmu_buf_impl_t; diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h index 9e98350f00..8a263a39e6 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h @@ -93,7 +93,6 @@ struct objset { uint8_t os_copies; enum zio_checksum os_dedup_checksum; boolean_t os_dedup_verify; - boolean_t os_evicting; zfs_logbias_op_t os_logbias; zfs_cache_type_t os_primary_cache; zfs_cache_type_t os_secondary_cache; -- 2.11.4.GIT