svn commit: r288549 - in stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys
Alexander Motin
mav at FreeBSD.org
Sat Oct 3 07:29:59 UTC 2015
Author: mav
Date: Sat Oct 3 07:29:56 2015
New Revision: 288549
URL: https://svnweb.freebsd.org/changeset/base/288549
Log:
MFC r286575: 5056 ZFS deadlock on db_mtx and dn_holds
Reviewed by: Will Andrews <willa at spectralogic.com>
Reviewed by: Matt Ahrens <mahrens at delphix.com>
Reviewed by: George Wilson <george.wilson at delphix.com>
Approved by: Dan McDonald <danmcd at omniti.com>
Author: Justin Gibbs <justing at spectralogic.com>
illumos/illumos-gate at bc9014e6a81272073b9854d9f65dd59e18d18c35
Modified:
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_bookmark.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/sa.h
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/sa_impl.h
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_leaf.h
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_sa.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
Directory Properties:
stable/10/ (props changed)
Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c Sat Oct 3 07:28:52 2015 (r288548)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c Sat Oct 3 07:29:56 2015 (r288549)
@@ -24,6 +24,7 @@
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -54,10 +55,16 @@ static void dbuf_destroy(dmu_buf_impl_t
static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
+#ifndef __lint
+extern inline void dmu_buf_init_user(dmu_buf_user_t *dbu,
+ dmu_buf_evict_func_t *evict_func, dmu_buf_t **clear_on_evict_dbufp);
+#endif /* ! __lint */
+
/*
* Global data structures and functions for the dbuf cache.
*/
static kmem_cache_t *dbuf_cache;
+static taskq_t *dbu_evict_taskq;
/* ARGSUSED */
static int
@@ -231,17 +238,72 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
static arc_evict_func_t dbuf_do_evict;
+typedef enum {
+ DBVU_EVICTING,
+ DBVU_NOT_EVICTING
+} dbvu_verify_type_t;
+
+static void
+dbuf_verify_user(dmu_buf_impl_t *db, dbvu_verify_type_t verify_type)
+{
+#ifdef ZFS_DEBUG
+ int64_t holds;
+
+ if (db->db_user == NULL)
+ return;
+
+ /* Only data blocks support the attachment of user data. */
+ ASSERT(db->db_level == 0);
+
+ /* Clients must resolve a dbuf before attaching user data. */
+ ASSERT(db->db.db_data != NULL);
+ ASSERT3U(db->db_state, ==, DB_CACHED);
+
+ holds = refcount_count(&db->db_holds);
+ if (verify_type == DBVU_EVICTING) {
+ /*
+ * Immediate eviction occurs when holds == dirtycnt.
+ * For normal eviction buffers, holds is zero on
+ * eviction, except when dbuf_fix_old_data() calls
+ * dbuf_clear_data(). However, the hold count can grow
+ * during eviction even though db_mtx is held (see
+ * dmu_bonus_hold() for an example), so we can only
+ * test the generic invariant that holds >= dirtycnt.
+ */
+ ASSERT3U(holds, >=, db->db_dirtycnt);
+ } else {
+ if (db->db_immediate_evict == TRUE)
+ ASSERT3U(holds, >=, db->db_dirtycnt);
+ else
+ ASSERT3U(holds, >, 0);
+ }
+#endif
+}
+
static void
dbuf_evict_user(dmu_buf_impl_t *db)
{
+ dmu_buf_user_t *dbu = db->db_user;
+
ASSERT(MUTEX_HELD(&db->db_mtx));
- if (db->db_level != 0 || db->db_evict_func == NULL)
+ if (dbu == NULL)
return;
- db->db_evict_func(&db->db, db->db_user_ptr);
- db->db_user_ptr = NULL;
- db->db_evict_func = NULL;
+ dbuf_verify_user(db, DBVU_EVICTING);
+ db->db_user = NULL;
+
+#ifdef ZFS_DEBUG
+ if (dbu->dbu_clear_on_evict_dbufp != NULL)
+ *dbu->dbu_clear_on_evict_dbufp = NULL;
+#endif
+
+ /*
+ * Invoke the callback from a taskq to avoid lock order reversals
+ * and limit stack depth.
+ */
+ taskq_dispatch_ent(dbu_evict_taskq, dbu->dbu_evict_func, dbu, 0,
+ &dbu->dbu_tqent);
}
boolean_t
@@ -302,6 +364,12 @@ retry:
for (i = 0; i < DBUF_MUTEXES; i++)
mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL);
+
+ /*
+ * All entries are queued via taskq_dispatch_ent(), so min/maxalloc
+ * configuration is not required.
+ */
+ dbu_evict_taskq = taskq_create("dbu_evict", 1, minclsyspri, 0, 0, 0);
}
void
@@ -314,6 +382,7 @@ dbuf_fini(void)
mutex_destroy(&h->hash_mutexes[i]);
kmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *));
kmem_cache_destroy(dbuf_cache);
+ taskq_destroy(dbu_evict_taskq);
}
/*
@@ -431,21 +500,27 @@ dbuf_verify(dmu_buf_impl_t *db)
#endif
static void
+dbuf_clear_data(dmu_buf_impl_t *db)
+{
+ ASSERT(MUTEX_HELD(&db->db_mtx));
+ dbuf_evict_user(db);
+ db->db_buf = NULL;
+ db->db.db_data = NULL;
+ if (db->db_state != DB_NOFILL)
+ db->db_state = DB_UNCACHED;
+}
+
+static void
dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf)
{
ASSERT(MUTEX_HELD(&db->db_mtx));
+ ASSERT(buf != NULL);
+
db->db_buf = buf;
- if (buf != NULL) {
- ASSERT(buf->b_data != NULL);
- db->db.db_data = buf->b_data;
- if (!arc_released(buf))
- arc_set_callback(buf, dbuf_do_evict, db);
- } else {
- dbuf_evict_user(db);
- db->db.db_data = NULL;
- if (db->db_state != DB_NOFILL)
- db->db_state = DB_UNCACHED;
- }
+ ASSERT(buf->b_data != NULL);
+ db->db.db_data = buf->b_data;
+ if (!arc_released(buf))
+ arc_set_callback(buf, dbuf_do_evict, db);
}
/*
@@ -467,7 +542,7 @@ dbuf_loan_arcbuf(dmu_buf_impl_t *db)
} else {
abuf = db->db_buf;
arc_loan_inuse_buf(abuf, db);
- dbuf_set_data(db, NULL);
+ dbuf_clear_data(db);
mutex_exit(&db->db_mtx);
}
return (abuf);
@@ -703,7 +778,7 @@ dbuf_noread(dmu_buf_impl_t *db)
dbuf_set_data(db, arc_buf_alloc(spa, db->db.db_size, db, type));
db->db_state = DB_FILL;
} else if (db->db_state == DB_NOFILL) {
- dbuf_set_data(db, NULL);
+ dbuf_clear_data(db);
} else {
ASSERT3U(db->db_state, ==, DB_CACHED);
}
@@ -759,7 +834,7 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, ui
dr->dt.dl.dr_data = arc_buf_alloc(spa, size, db, type);
bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
} else {
- dbuf_set_data(db, NULL);
+ dbuf_clear_data(db);
}
}
@@ -810,7 +885,8 @@ void
dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
dmu_tx_t *tx)
{
- dmu_buf_impl_t *db, *db_next, db_search;
+ dmu_buf_impl_t db_search;
+ dmu_buf_impl_t *db, *db_next;
uint64_t txg = tx->tx_txg;
avl_index_t where;
@@ -1388,7 +1464,7 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_
arc_buf_t *buf = db->db_buf;
ASSERT(db->db_state == DB_NOFILL || arc_released(buf));
- dbuf_set_data(db, NULL);
+ dbuf_clear_data(db);
VERIFY(arc_buf_remove_ref(buf, db));
dbuf_evict(db);
return (B_TRUE);
@@ -1728,8 +1804,7 @@ dbuf_create(dnode_t *dn, uint8_t level,
db->db_parent = parent;
db->db_blkptr = blkptr;
- db->db_user_ptr = NULL;
- db->db_evict_func = NULL;
+ db->db_user = NULL;
db->db_immediate_evict = 0;
db->db_freed_in_flight = 0;
@@ -2195,7 +2270,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db,
/*
* This dbuf has anonymous data associated with it.
*/
- dbuf_set_data(db, NULL);
+ dbuf_clear_data(db);
VERIFY(arc_buf_remove_ref(buf, db));
dbuf_evict(db);
} else {
@@ -2228,7 +2303,8 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db,
} else {
dbuf_clear(db);
}
- } else if (arc_buf_eviction_needed(db->db_buf)) {
+ } else if (db->db_objset->os_evicting ||
+ arc_buf_eviction_needed(db->db_buf)) {
dbuf_clear(db);
} else {
mutex_exit(&db->db_mtx);
@@ -2247,51 +2323,57 @@ dbuf_refcount(dmu_buf_impl_t *db)
}
void *
-dmu_buf_set_user(dmu_buf_t *db_fake, void *user_ptr,
- dmu_buf_evict_func_t *evict_func)
+dmu_buf_replace_user(dmu_buf_t *db_fake, dmu_buf_user_t *old_user,
+ dmu_buf_user_t *new_user)
{
- return (dmu_buf_update_user(db_fake, NULL, user_ptr, evict_func));
+ dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
+
+ mutex_enter(&db->db_mtx);
+ dbuf_verify_user(db, DBVU_NOT_EVICTING);
+ if (db->db_user == old_user)
+ db->db_user = new_user;
+ else
+ old_user = db->db_user;
+ dbuf_verify_user(db, DBVU_NOT_EVICTING);
+ mutex_exit(&db->db_mtx);
+
+ return (old_user);
}
void *
-dmu_buf_set_user_ie(dmu_buf_t *db_fake, void *user_ptr,
- dmu_buf_evict_func_t *evict_func)
+dmu_buf_set_user(dmu_buf_t *db_fake, dmu_buf_user_t *user)
{
- dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
-
- db->db_immediate_evict = TRUE;
- return (dmu_buf_update_user(db_fake, NULL, user_ptr, evict_func));
+ return (dmu_buf_replace_user(db_fake, NULL, user));
}
void *
-dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr, void *user_ptr,
- dmu_buf_evict_func_t *evict_func)
+dmu_buf_set_user_ie(dmu_buf_t *db_fake, dmu_buf_user_t *user)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
- ASSERT(db->db_level == 0);
-
- ASSERT((user_ptr == NULL) == (evict_func == NULL));
-
- mutex_enter(&db->db_mtx);
- if (db->db_user_ptr == old_user_ptr) {
- db->db_user_ptr = user_ptr;
- db->db_evict_func = evict_func;
- } else {
- old_user_ptr = db->db_user_ptr;
- }
+ db->db_immediate_evict = TRUE;
+ return (dmu_buf_set_user(db_fake, user));
+}
- mutex_exit(&db->db_mtx);
- return (old_user_ptr);
+void *
+dmu_buf_remove_user(dmu_buf_t *db_fake, dmu_buf_user_t *user)
+{
+ return (dmu_buf_replace_user(db_fake, user, NULL));
}
void *
dmu_buf_get_user(dmu_buf_t *db_fake)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
- ASSERT(!refcount_is_zero(&db->db_holds));
- return (db->db_user_ptr);
+ dbuf_verify_user(db, DBVU_NOT_EVICTING);
+ return (db->db_user);
+}
+
+void
+dmu_buf_user_evict_wait()
+{
+ taskq_wait(dbu_evict_taskq);
}
boolean_t
Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c Sat Oct 3 07:28:52 2015 (r288548)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c Sat Oct 3 07:29:56 2015 (r288549)
@@ -23,6 +23,7 @@
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
@@ -357,7 +358,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dat
zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
secondary_cache_changed_cb, os);
}
- if (!dsl_dataset_is_snapshot(ds)) {
+ if (!ds->ds_is_snapshot) {
if (err == 0) {
err = dsl_prop_register(ds,
zfs_prop_to_name(ZFS_PROP_CHECKSUM),
@@ -419,7 +420,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dat
os->os_secondary_cache = ZFS_CACHE_ALL;
}
- if (ds == NULL || !dsl_dataset_is_snapshot(ds))
+ if (ds == NULL || !ds->ds_is_snapshot)
os->os_zil_header = os->os_phys->os_zil_header;
os->os_zil = zil_alloc(os, &os->os_zil_header);
@@ -438,16 +439,13 @@ dmu_objset_open_impl(spa_t *spa, dsl_dat
mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
- DMU_META_DNODE(os) = dnode_special_open(os,
- &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT,
- &os->os_meta_dnode);
+ dnode_special_open(os, &os->os_phys->os_meta_dnode,
+ DMU_META_DNODE_OBJECT, &os->os_meta_dnode);
if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) {
- DMU_USERUSED_DNODE(os) = dnode_special_open(os,
- &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT,
- &os->os_userused_dnode);
- DMU_GROUPUSED_DNODE(os) = dnode_special_open(os,
- &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT,
- &os->os_groupused_dnode);
+ dnode_special_open(os, &os->os_phys->os_userused_dnode,
+ DMU_USERUSED_OBJECT, &os->os_userused_dnode);
+ dnode_special_open(os, &os->os_phys->os_groupused_dnode,
+ DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode);
}
*osp = os;
@@ -535,7 +533,7 @@ dmu_objset_own(const char *name, dmu_obj
} else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
dsl_dataset_disown(ds, tag);
return (SET_ERROR(EINVAL));
- } else if (!readonly && dsl_dataset_is_snapshot(ds)) {
+ } else if (!readonly && ds->ds_is_snapshot) {
dsl_dataset_disown(ds, tag);
return (SET_ERROR(EROFS));
}
@@ -591,41 +589,53 @@ dmu_objset_disown(objset_t *os, void *ta
void
dmu_objset_evict_dbufs(objset_t *os)
{
+ dnode_t dn_marker;
dnode_t *dn;
mutex_enter(&os->os_lock);
-
- /* process the mdn last, since the other dnodes have holds on it */
- list_remove(&os->os_dnodes, DMU_META_DNODE(os));
- list_insert_tail(&os->os_dnodes, DMU_META_DNODE(os));
-
- /*
- * Find the first dnode with holds. We have to do this dance
- * because dnode_add_ref() only works if you already have a
- * hold. If there are no holds then it has no dbufs so OK to
- * skip.
- */
- for (dn = list_head(&os->os_dnodes);
- dn && !dnode_add_ref(dn, FTAG);
- dn = list_next(&os->os_dnodes, dn))
- continue;
-
- while (dn) {
- dnode_t *next_dn = dn;
-
- do {
- next_dn = list_next(&os->os_dnodes, next_dn);
- } while (next_dn && !dnode_add_ref(next_dn, FTAG));
-
- mutex_exit(&os->os_lock);
- dnode_evict_dbufs(dn);
- dnode_rele(dn, FTAG);
- mutex_enter(&os->os_lock);
- dn = next_dn;
+ dn = list_head(&os->os_dnodes);
+ while (dn != NULL) {
+ /*
+ * Skip dnodes without holds. We have to do this dance
+ * because dnode_add_ref() only works if there is already a
+ * hold. If the dnode has no holds, then it has no dbufs.
+ */
+ if (dnode_add_ref(dn, FTAG)) {
+ list_insert_after(&os->os_dnodes, dn, &dn_marker);
+ mutex_exit(&os->os_lock);
+
+ dnode_evict_dbufs(dn);
+ dnode_rele(dn, FTAG);
+
+ mutex_enter(&os->os_lock);
+ dn = list_next(&os->os_dnodes, &dn_marker);
+ list_remove(&os->os_dnodes, &dn_marker);
+ } else {
+ dn = list_next(&os->os_dnodes, dn);
+ }
}
mutex_exit(&os->os_lock);
+
+ if (DMU_USERUSED_DNODE(os) != NULL) {
+ dnode_evict_dbufs(DMU_GROUPUSED_DNODE(os));
+ dnode_evict_dbufs(DMU_USERUSED_DNODE(os));
+ }
+ dnode_evict_dbufs(DMU_META_DNODE(os));
}
+/*
+ * Objset eviction processing is split into into two pieces.
+ * The first marks the objset as evicting, evicts any dbufs that
+ * have a refcount of zero, and then queues up the objset for the
+ * second phase of eviction. Once os->os_dnodes has been cleared by
+ * dnode_buf_pageout()->dnode_destroy(), the second phase is executed.
+ * The second phase closes the special dnodes, dequeues the objset from
+ * the list of those undergoing eviction, and finally frees the objset.
+ *
+ * NOTE: Due to asynchronous eviction processing (invocation of
+ * dnode_buf_pageout()), it is possible for the meta dnode for the
+ * objset to have no holds even though os->os_dnodes is not empty.
+ */
void
dmu_objset_evict(objset_t *os)
{
@@ -635,7 +645,7 @@ dmu_objset_evict(objset_t *os)
ASSERT(!dmu_objset_is_dirty(os, t));
if (ds) {
- if (!dsl_dataset_is_snapshot(ds)) {
+ if (!ds->ds_is_snapshot) {
VERIFY0(dsl_prop_unregister(ds,
zfs_prop_to_name(ZFS_PROP_CHECKSUM),
checksum_changed_cb, os));
@@ -672,8 +682,24 @@ dmu_objset_evict(objset_t *os)
if (os->os_sa)
sa_tear_down(os);
+ os->os_evicting = B_TRUE;
dmu_objset_evict_dbufs(os);
+ mutex_enter(&os->os_lock);
+ spa_evicting_os_register(os->os_spa, os);
+ if (list_is_empty(&os->os_dnodes)) {
+ mutex_exit(&os->os_lock);
+ dmu_objset_evict_done(os);
+ } else {
+ mutex_exit(&os->os_lock);
+ }
+}
+
+void
+dmu_objset_evict_done(objset_t *os)
+{
+ ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
+
dnode_special_close(&os->os_meta_dnode);
if (DMU_USERUSED_DNODE(os)) {
dnode_special_close(&os->os_userused_dnode);
@@ -681,8 +707,6 @@ dmu_objset_evict(objset_t *os)
}
zil_free(os->os_zil);
- ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
-
VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf));
/*
@@ -697,6 +721,7 @@ dmu_objset_evict(objset_t *os)
mutex_destroy(&os->os_lock);
mutex_destroy(&os->os_obj_lock);
mutex_destroy(&os->os_user_ptr_lock);
+ spa_evicting_os_deregister(os->os_spa, os);
kmem_free(os, sizeof (objset_t));
}
@@ -895,7 +920,7 @@ dmu_objset_clone_check(void *arg, dmu_tx
return (error);
/* You can only clone snapshots, not the head datasets. */
- if (!dsl_dataset_is_snapshot(origin)) {
+ if (!origin->ds_is_snapshot) {
dsl_dataset_rele(origin, FTAG);
return (SET_ERROR(EINVAL));
}
@@ -1459,7 +1484,7 @@ int
dmu_objset_is_snapshot(objset_t *os)
{
if (os->os_dsl_dataset != NULL)
- return (dsl_dataset_is_snapshot(os->os_dsl_dataset));
+ return (os->os_dsl_dataset->ds_is_snapshot);
else
return (B_FALSE);
}
Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c Sat Oct 3 07:28:52 2015 (r288548)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c Sat Oct 3 07:29:56 2015 (r288549)
@@ -636,7 +636,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp,
fromtxg = fromzb->zbm_creation_txg;
}
dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname);
- if (!dsl_dataset_is_snapshot(ds)) {
+ if (!ds->ds_is_snapshot) {
(void) strlcat(drr->drr_u.drr_begin.drr_toname, "@--head--",
sizeof (drr->drr_u.drr_begin.drr_toname));
}
@@ -852,11 +852,11 @@ dmu_send_estimate(dsl_dataset_t *ds, dsl
ASSERT(dsl_pool_config_held(dp));
/* tosnap must be a snapshot */
- if (!dsl_dataset_is_snapshot(ds))
+ if (!ds->ds_is_snapshot)
return (SET_ERROR(EINVAL));
/* fromsnap, if provided, must be a snapshot */
- if (fromds != NULL && !dsl_dataset_is_snapshot(fromds))
+ if (fromds != NULL && !fromds->ds_is_snapshot)
return (SET_ERROR(EINVAL));
/*
@@ -1105,7 +1105,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t
dsl_dataset_rele(ds, FTAG);
return (error);
}
- if (!dsl_dataset_is_snapshot(origin)) {
+ if (!origin->ds_is_snapshot) {
dsl_dataset_rele(origin, FTAG);
dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(EINVAL));
Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c Sat Oct 3 07:28:52 2015 (r288548)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c Sat Oct 3 07:29:56 2015 (r288549)
@@ -534,7 +534,7 @@ traverse_impl(spa_t *spa, dsl_dataset_t
cv_init(&pd.pd_cv, NULL, CV_DEFAULT, NULL);
/* See comment on ZIL traversal in dsl_scan_visitds. */
- if (ds != NULL && !dsl_dataset_is_snapshot(ds) && !BP_IS_HOLE(rootbp)) {
+ if (ds != NULL && !ds->ds_is_snapshot && !BP_IS_HOLE(rootbp)) {
arc_flags_t flags = ARC_FLAG_WAIT;
objset_phys_t *osp;
arc_buf_t *buf;
Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c Sat Oct 3 07:28:52 2015 (r288548)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c Sat Oct 3 07:29:56 2015 (r288549)
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -405,8 +406,9 @@ static dnode_t *
dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
uint64_t object, dnode_handle_t *dnh)
{
- dnode_t *dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
+ dnode_t *dn;
+ dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
ASSERT(!POINTER_IS_VALID(dn->dn_objset));
dn->dn_moved = 0;
@@ -443,13 +445,31 @@ dnode_create(objset_t *os, dnode_phys_t
ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
mutex_enter(&os->os_lock);
- list_insert_head(&os->os_dnodes, dn);
+ if (dnh->dnh_dnode != NULL) {
+ /* Lost the allocation race. */
+ mutex_exit(&os->os_lock);
+ kmem_cache_free(dnode_cache, dn);
+ return (dnh->dnh_dnode);
+ }
+
+ /*
+ * Exclude special dnodes from os_dnodes so an empty os_dnodes
+ * signifies that the special dnodes have no references from
+ * their children (the entries in os_dnodes). This allows
+ * dnode_destroy() to easily determine if the last child has
+ * been removed and then complete eviction of the objset.
+ */
+ if (!DMU_OBJECT_IS_SPECIAL(object))
+ list_insert_head(&os->os_dnodes, dn);
membar_producer();
+
/*
- * Everything else must be valid before assigning dn_objset makes the
- * dnode eligible for dnode_move().
+ * Everything else must be valid before assigning dn_objset
+ * makes the dnode eligible for dnode_move().
*/
dn->dn_objset = os;
+
+ dnh->dnh_dnode = dn;
mutex_exit(&os->os_lock);
arc_space_consume(sizeof (dnode_t), ARC_SPACE_OTHER);
@@ -463,12 +483,18 @@ static void
dnode_destroy(dnode_t *dn)
{
objset_t *os = dn->dn_objset;
+ boolean_t complete_os_eviction = B_FALSE;
ASSERT((dn->dn_id_flags & DN_ID_NEW_EXIST) == 0);
mutex_enter(&os->os_lock);
POINTER_INVALIDATE(&dn->dn_objset);
- list_remove(&os->os_dnodes, dn);
+ if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
+ list_remove(&os->os_dnodes, dn);
+ complete_os_eviction =
+ list_is_empty(&os->os_dnodes) &&
+ list_link_active(&os->os_evicting_node);
+ }
mutex_exit(&os->os_lock);
/* the dnode can no longer move, so we can release the handle */
@@ -503,6 +529,9 @@ dnode_destroy(dnode_t *dn)
dmu_zfetch_rele(&dn->dn_zfetch);
kmem_cache_free(dnode_cache, dn);
arc_space_return(sizeof (dnode_t), ARC_SPACE_OTHER);
+
+ if (complete_os_eviction)
+ dmu_objset_evict_done(os);
}
void
@@ -971,33 +1000,32 @@ dnode_special_close(dnode_handle_t *dnh)
*/
while (refcount_count(&dn->dn_holds) > 0)
delay(1);
+ ASSERT(dn->dn_dbuf == NULL ||
+ dmu_buf_get_user(&dn->dn_dbuf->db) == NULL);
zrl_add(&dnh->dnh_zrlock);
dnode_destroy(dn); /* implicit zrl_remove() */
zrl_destroy(&dnh->dnh_zrlock);
dnh->dnh_dnode = NULL;
}
-dnode_t *
+void
dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object,
dnode_handle_t *dnh)
{
- dnode_t *dn = dnode_create(os, dnp, NULL, object, dnh);
- dnh->dnh_dnode = dn;
+ dnode_t *dn;
+
+ dn = dnode_create(os, dnp, NULL, object, dnh);
zrl_init(&dnh->dnh_zrlock);
DNODE_VERIFY(dn);
- return (dn);
}
static void
-dnode_buf_pageout(dmu_buf_t *db, void *arg)
+dnode_buf_pageout(void *dbu)
{
- dnode_children_t *children_dnodes = arg;
+ dnode_children_t *children_dnodes = dbu;
int i;
- int epb = db->db_size >> DNODE_SHIFT;
- ASSERT(epb == children_dnodes->dnc_count);
-
- for (i = 0; i < epb; i++) {
+ for (i = 0; i < children_dnodes->dnc_count; i++) {
dnode_handle_t *dnh = &children_dnodes->dnc_children[i];
dnode_t *dn;
@@ -1027,7 +1055,7 @@ dnode_buf_pageout(dmu_buf_t *db, void *a
dnh->dnh_dnode = NULL;
}
kmem_free(children_dnodes, sizeof (dnode_children_t) +
- epb * sizeof (dnode_handle_t));
+ children_dnodes->dnc_count * sizeof (dnode_handle_t));
}
/*
@@ -1117,10 +1145,11 @@ dnode_hold_impl(objset_t *os, uint64_t o
dnh = &children_dnodes->dnc_children[0];
for (i = 0; i < epb; i++) {
zrl_init(&dnh[i].dnh_zrlock);
- dnh[i].dnh_dnode = NULL;
}
- if (winner = dmu_buf_set_user(&db->db, children_dnodes,
- dnode_buf_pageout)) {
+ dmu_buf_init_user(&children_dnodes->dnc_dbu,
+ dnode_buf_pageout, NULL);
+ winner = dmu_buf_set_user(&db->db, &children_dnodes->dnc_dbu);
+ if (winner != NULL) {
for (i = 0; i < epb; i++) {
zrl_destroy(&dnh[i].dnh_zrlock);
@@ -1135,17 +1164,11 @@ dnode_hold_impl(objset_t *os, uint64_t o
dnh = &children_dnodes->dnc_children[idx];
zrl_add(&dnh->dnh_zrlock);
- if ((dn = dnh->dnh_dnode) == NULL) {
+ dn = dnh->dnh_dnode;
+ if (dn == NULL) {
dnode_phys_t *phys = (dnode_phys_t *)db->db.db_data+idx;
- dnode_t *winner;
dn = dnode_create(os, phys, db, object, dnh);
- winner = atomic_cas_ptr(&dnh->dnh_dnode, NULL, dn);
- if (winner != NULL) {
- zrl_add(&dnh->dnh_zrlock);
- dnode_destroy(dn); /* implicit zrl_remove() */
- dn = winner;
- }
}
mutex_enter(&dn->dn_mtx);
@@ -1159,10 +1182,10 @@ dnode_hold_impl(objset_t *os, uint64_t o
dbuf_rele(db, FTAG);
return (type == DMU_OT_NONE ? ENOENT : EEXIST);
}
- mutex_exit(&dn->dn_mtx);
-
if (refcount_add(&dn->dn_holds, tag) == 1)
dbuf_add_ref(db, dnh);
+ mutex_exit(&dn->dn_mtx);
+
/* Now we can rely on the hold to prevent the dnode from moving. */
zrl_remove(&dnh->dnh_zrlock);
Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c Sat Oct 3 07:28:52 2015 (r288548)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c Sat Oct 3 07:29:56 2015 (r288549)
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -397,49 +398,37 @@ dnode_sync_free_range(void *arg, uint64_
void
dnode_evict_dbufs(dnode_t *dn)
{
- int progress;
- int pass = 0;
+ dmu_buf_impl_t db_marker;
+ dmu_buf_impl_t *db, *db_next;
+
+ mutex_enter(&dn->dn_dbufs_mtx);
+ for (db = avl_first(&dn->dn_dbufs); db != NULL; db = db_next) {
- do {
- dmu_buf_impl_t *db, *db_next;
- int evicting = FALSE;
-
- progress = FALSE;
- mutex_enter(&dn->dn_dbufs_mtx);
- for (db = avl_first(&dn->dn_dbufs); db != NULL; db = db_next) {
- db_next = AVL_NEXT(&dn->dn_dbufs, db);
#ifdef DEBUG
- DB_DNODE_ENTER(db);
- ASSERT3P(DB_DNODE(db), ==, dn);
- DB_DNODE_EXIT(db);
+ DB_DNODE_ENTER(db);
+ ASSERT3P(DB_DNODE(db), ==, dn);
+ DB_DNODE_EXIT(db);
#endif /* DEBUG */
- mutex_enter(&db->db_mtx);
- if (db->db_state == DB_EVICTING) {
- progress = TRUE;
- evicting = TRUE;
- mutex_exit(&db->db_mtx);
- } else if (refcount_is_zero(&db->db_holds)) {
- progress = TRUE;
- dbuf_clear(db); /* exits db_mtx for us */
- } else {
- mutex_exit(&db->db_mtx);
- }
+ mutex_enter(&db->db_mtx);
+ if (db->db_state != DB_EVICTING &&
+ refcount_is_zero(&db->db_holds)) {
+ db_marker.db_level = db->db_level;
+ db_marker.db_blkid = db->db_blkid;
+ db_marker.db_state = DB_SEARCH;
+ avl_insert_here(&dn->dn_dbufs, &db_marker, db,
+ AVL_BEFORE);
+
+ dbuf_clear(db);
+ db_next = AVL_NEXT(&dn->dn_dbufs, &db_marker);
+ avl_remove(&dn->dn_dbufs, &db_marker);
+ } else {
+ mutex_exit(&db->db_mtx);
+ db_next = AVL_NEXT(&dn->dn_dbufs, db);
}
- /*
- * NB: we need to drop dn_dbufs_mtx between passes so
- * that any DB_EVICTING dbufs can make progress.
- * Ideally, we would have some cv we could wait on, but
- * since we don't, just wait a bit to give the other
- * thread a chance to run.
- */
- mutex_exit(&dn->dn_dbufs_mtx);
- if (evicting)
- delay(1);
- pass++;
- ASSERT(pass < 100); /* sanity check */
- } while (progress);
+ }
+ mutex_exit(&dn->dn_dbufs_mtx);
dnode_evict_bonus(dn);
}
@@ -504,7 +493,6 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *t
dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
dnode_evict_dbufs(dn);
ASSERT(avl_is_empty(&dn->dn_dbufs));
- ASSERT3P(dn->dn_bonus, ==, NULL);
/*
* XXX - It would be nice to assert this, but we may still
Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_bookmark.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_bookmark.c Sat Oct 3 07:28:52 2015 (r288548)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_bookmark.c Sat Oct 3 07:29:56 2015 (r288549)
@@ -120,7 +120,7 @@ dsl_bookmark_create_check_impl(dsl_datas
int error;
zfs_bookmark_phys_t bmark_phys;
- if (!dsl_dataset_is_snapshot(snapds))
+ if (!snapds->ds_is_snapshot)
return (SET_ERROR(EINVAL));
error = dsl_bookmark_hold_ds(dp, bookmark_name,
Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c Sat Oct 3 07:28:52 2015 (r288548)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c Sat Oct 3 07:29:56 2015 (r288549)
@@ -24,6 +24,7 @@
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2014, Joyent, Inc. All rights reserved.
* Copyright (c) 2014 RackTop Systems.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#include <sys/dmu_objset.h>
@@ -77,7 +78,6 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, max_recor
#define DS_REF_MAX (1ULL << 62)
extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds);
-extern inline boolean_t dsl_dataset_is_snapshot(dsl_dataset_t *ds);
/*
* Figure out how much of this delta should be propogated to the dsl_dir
@@ -161,7 +161,7 @@ dsl_dataset_block_kill(dsl_dataset_t *ds
}
ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
- ASSERT(!dsl_dataset_is_snapshot(ds));
+ ASSERT(!ds->ds_is_snapshot);
dmu_buf_will_dirty(ds->ds_dbuf, tx);
if (bp->blk_birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
@@ -259,14 +259,15 @@ dsl_dataset_block_freeable(dsl_dataset_t
return (B_TRUE);
}
-/* ARGSUSED */
static void
-dsl_dataset_evict(dmu_buf_t *db, void *dsv)
+dsl_dataset_evict(void *dbu)
{
- dsl_dataset_t *ds = dsv;
+ dsl_dataset_t *ds = dbu;
ASSERT(ds->ds_owner == NULL);
+ ds->ds_dbuf = NULL;
+
unique_remove(ds->ds_fsid_guid);
if (ds->ds_objset != NULL)
@@ -278,10 +279,10 @@ dsl_dataset_evict(dmu_buf_t *db, void *d
}
bplist_destroy(&ds->ds_pending_deadlist);
- if (dsl_dataset_phys(ds)->ds_deadlist_obj != 0)
+ if (ds->ds_deadlist.dl_os != NULL)
dsl_deadlist_close(&ds->ds_deadlist);
if (ds->ds_dir)
- dsl_dir_rele(ds->ds_dir, ds);
+ dsl_dir_async_rele(ds->ds_dir, ds);
ASSERT(!list_link_active(&ds->ds_synced_link));
@@ -417,6 +418,7 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uin
ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
ds->ds_dbuf = dbuf;
ds->ds_object = dsobj;
+ ds->ds_is_snapshot = dsl_dataset_phys(ds)->ds_num_children != 0;
mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -456,7 +458,7 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uin
return (err);
}
- if (!dsl_dataset_is_snapshot(ds)) {
+ if (!ds->ds_is_snapshot) {
ds->ds_snapname[0] = '\0';
if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
err = dsl_dataset_hold_obj(dp,
@@ -483,7 +485,7 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uin
}
}
- if (err == 0 && !dsl_dataset_is_snapshot(ds)) {
+ if (err == 0 && !ds->ds_is_snapshot) {
err = dsl_prop_get_int_ds(ds,
zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
&ds->ds_reserved);
@@ -496,8 +498,11 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uin
ds->ds_reserved = ds->ds_quota = 0;
}
- if (err != 0 || (winner = dmu_buf_set_user_ie(dbuf, ds,
- dsl_dataset_evict)) != NULL) {
+ dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict, &ds->ds_dbuf);
+ if (err == 0)
+ winner = dmu_buf_set_user_ie(dbuf, &ds->ds_dbu);
+
+ if (err != 0 || winner != NULL) {
bplist_destroy(&ds->ds_pending_deadlist);
dsl_deadlist_close(&ds->ds_deadlist);
if (ds->ds_prev)
@@ -919,7 +924,7 @@ dsl_dataset_recalc_head_uniq(dsl_dataset
uint64_t mrs_used;
uint64_t dlused, dlcomp, dluncomp;
- ASSERT(!dsl_dataset_is_snapshot(ds));
+ ASSERT(!ds->ds_is_snapshot);
if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0)
mrs_used = dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes;
@@ -1675,7 +1680,7 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvl
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED,
dsl_dataset_phys(ds)->ds_uncompressed_bytes);
- if (dsl_dataset_is_snapshot(ds)) {
+ if (ds->ds_is_snapshot) {
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
dsl_dataset_phys(ds)->ds_unique_bytes);
@@ -1743,7 +1748,7 @@ dsl_dataset_fast_stat(dsl_dataset_t *ds,
dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT;
stat->dds_guid = dsl_dataset_phys(ds)->ds_guid;
stat->dds_origin[0] = '\0';
- if (dsl_dataset_is_snapshot(ds)) {
+ if (ds->ds_is_snapshot) {
stat->dds_is_snapshot = B_TRUE;
stat->dds_num_clones =
dsl_dataset_phys(ds)->ds_num_children - 1;
@@ -2023,7 +2028,7 @@ dsl_dataset_rollback_check(void *arg, dm
return (error);
/* must not be a snapshot */
- if (dsl_dataset_is_snapshot(ds)) {
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-stable-10
mailing list