svn commit: r354961 - vendor-sys/illumos/dist/common/zfs vendor-sys/illumos/dist/uts/common/fs/zfs vendor-sys/illumos/dist/uts/common/fs/zfs/sys vendor-sys/illumos/dist/uts/common/sys/fs vendor/ill...
Andriy Gapon
avg at FreeBSD.org
Thu Nov 21 14:10:56 UTC 2019
Author: avg
Date: Thu Nov 21 14:10:53 2019
New Revision: 354961
URL: https://svnweb.freebsd.org/changeset/base/354961
Log:
10952 defer new resilvers and misc. resilver-related fixes
illumos/illumos-gate at e4c795beb33bf59dd4ad2e3f88f493111484b890
https://github.com/illumos/illumos-gate/commit/e4c795beb33bf59dd4ad2e3f88f493111484b890
https://www.illumos.org/issues/10952
From ZoL
612c4930dd2 Fix the spelling of deferred ???
cef48f14da6 Remove races from scrub / resilver tests
4021ba4cfaa Make vdev_set_deferred_resilver() recursive
8cb119e3dc0 Fix 2 small bugs with cached dsl_scan_phys_t
5e0bd0ae056 Fix issue with scanning dedup blocks as scan ends
b3d7725c943 Remove zfs_gitrev.h (this shouldn't be part of 80a91e74696)
80a91e74696 Defer new resilvers until the current one ends
Portions contributed by: Jerry Jelinek <jerry.jelinek at joyent.com>
Portions contributed by: Brian Behlendorf <behlendorf1 at llnl.gov>
Portions contributed by: Arkadiusz Bubała <arkadiusz.bubala at open-e.com>
Author: Tom Caputi <tcaputi at datto.com>
Modified:
vendor-sys/illumos/dist/common/zfs/zfeature_common.c
vendor-sys/illumos/dist/common/zfs/zfeature_common.h
vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_scan.c
vendor-sys/illumos/dist/uts/common/fs/zfs/spa.c
vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa_impl.h
vendor-sys/illumos/dist/uts/common/fs/zfs/sys/vdev.h
vendor-sys/illumos/dist/uts/common/fs/zfs/sys/vdev_impl.h
vendor-sys/illumos/dist/uts/common/fs/zfs/vdev.c
vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_indirect.c
vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_label.c
vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_removal.c
vendor-sys/illumos/dist/uts/common/fs/zfs/zil.c
vendor-sys/illumos/dist/uts/common/sys/fs/zfs.h
Changes in other areas also in this revision:
Modified:
vendor/illumos/dist/cmd/zpool/zpool_main.c
vendor/illumos/dist/lib/libzfs/common/libzfs.h
vendor/illumos/dist/lib/libzfs/common/libzfs_pool.c
vendor/illumos/dist/lib/libzfs/common/libzfs_util.c
vendor/illumos/dist/man/man1m/zpool.1m
vendor/illumos/dist/man/man5/zpool-features.5
Modified: vendor-sys/illumos/dist/common/zfs/zfeature_common.c
==============================================================================
--- vendor-sys/illumos/dist/common/zfs/zfeature_common.c Thu Nov 21 14:09:46 2019 (r354960)
+++ vendor-sys/illumos/dist/common/zfs/zfeature_common.c Thu Nov 21 14:10:53 2019 (r354961)
@@ -300,10 +300,13 @@ zpool_feature_init(void)
"freed or remapped.",
ZFEATURE_FLAG_READONLY_COMPAT, obsolete_counts_deps);
- {
zfeature_register(SPA_FEATURE_ALLOCATION_CLASSES,
"org.zfsonlinux:allocation_classes", "allocation_classes",
"Support for separate allocation classes.",
ZFEATURE_FLAG_READONLY_COMPAT, NULL);
- }
+
+ zfeature_register(SPA_FEATURE_RESILVER_DEFER,
+ "com.datto:resilver_defer", "resilver_defer",
+ "Support for defering new resilvers when one is already running.",
+ ZFEATURE_FLAG_READONLY_COMPAT, NULL);
}
Modified: vendor-sys/illumos/dist/common/zfs/zfeature_common.h
==============================================================================
--- vendor-sys/illumos/dist/common/zfs/zfeature_common.h Thu Nov 21 14:09:46 2019 (r354960)
+++ vendor-sys/illumos/dist/common/zfs/zfeature_common.h Thu Nov 21 14:10:53 2019 (r354961)
@@ -63,6 +63,7 @@ typedef enum spa_feature {
SPA_FEATURE_POOL_CHECKPOINT,
SPA_FEATURE_SPACEMAP_V2,
SPA_FEATURE_ALLOCATION_CLASSES,
+ SPA_FEATURE_RESILVER_DEFER,
SPA_FEATURES
} spa_feature_t;
Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_scan.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_scan.c Thu Nov 21 14:09:46 2019 (r354960)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_scan.c Thu Nov 21 14:10:53 2019 (r354961)
@@ -183,12 +183,15 @@ unsigned int zfs_free_min_time_ms = 1000; /* min milli
unsigned int zfs_obsolete_min_time_ms = 500;
/* min millisecs to resilver per txg */
unsigned int zfs_resilver_min_time_ms = 3000;
+int zfs_scan_suspend_progress = 0; /* set to prevent scans from progressing */
boolean_t zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */
boolean_t zfs_no_scrub_prefetch = B_FALSE; /* set to disable scrub prefetch */
enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
/* max number of blocks to free in a single TXG */
uint64_t zfs_async_block_max_blocks = UINT64_MAX;
+int zfs_resilver_disable_defer = 0; /* set to disable resilver deferring */
+
/*
* We wait a few txgs after importing a pool to begin scanning so that
* the import / mounting code isn't held up by scrub / resilver IO.
@@ -455,7 +458,6 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
scn->scn_async_destroying = spa_feature_is_active(dp->dp_spa,
SPA_FEATURE_ASYNC_DESTROY);
- bcopy(&scn->scn_phys, &scn->scn_phys_cached, sizeof (scn->scn_phys));
avl_create(&scn->scn_queue, scan_ds_queue_compare, sizeof (scan_ds_t),
offsetof(scan_ds_t, sds_node));
avl_create(&scn->scn_prefetch_queue, scan_prefetch_queue_compare,
@@ -513,6 +515,8 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
}
}
+ bcopy(&scn->scn_phys, &scn->scn_phys_cached, sizeof (scn->scn_phys));
+
/* reload the queue into the in-core state */
if (scn->scn_phys.scn_queue_obj != 0) {
zap_cursor_t zc;
@@ -751,6 +755,11 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
spa->spa_scrub_reopen = B_FALSE;
(void) spa_vdev_state_exit(spa, NULL, 0);
+ if (func == POOL_SCAN_RESILVER) {
+ dsl_resilver_restart(spa->spa_dsl_pool, 0);
+ return (0);
+ }
+
if (func == POOL_SCAN_SCRUB && dsl_scan_is_paused_scrub(scn)) {
/* got scrub start cmd, resume paused scrub */
int err = dsl_scrub_set_pause_resume(scn->scn_dp,
@@ -766,6 +775,41 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED));
}
+/*
+ * Sets the resilver defer flag to B_FALSE on all leaf devs under vd. Returns
+ * B_TRUE if we have devices that need to be resilvered and are available to
+ * accept resilver I/Os.
+ */
+static boolean_t
+dsl_scan_clear_deferred(vdev_t *vd, dmu_tx_t *tx)
+{
+ boolean_t resilver_needed = B_FALSE;
+ spa_t *spa = vd->vdev_spa;
+
+ for (int c = 0; c < vd->vdev_children; c++) {
+ resilver_needed |=
+ dsl_scan_clear_deferred(vd->vdev_child[c], tx);
+ }
+
+ if (vd == spa->spa_root_vdev &&
+ spa_feature_is_active(spa, SPA_FEATURE_RESILVER_DEFER)) {
+ spa_feature_decr(spa, SPA_FEATURE_RESILVER_DEFER, tx);
+ vdev_config_dirty(vd);
+ spa->spa_resilver_deferred = B_FALSE;
+ return (resilver_needed);
+ }
+
+ if (!vdev_is_concrete(vd) || vd->vdev_aux ||
+ !vd->vdev_ops->vdev_op_leaf)
+ return (resilver_needed);
+
+ if (vd->vdev_resilver_deferred)
+ vd->vdev_resilver_deferred = B_FALSE;
+
+ return (!vdev_is_dead(vd) && !vd->vdev_offline &&
+ vdev_resilver_needed(vd, NULL, NULL));
+}
+
/* ARGSUSED */
static void
dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
@@ -865,6 +909,25 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu
* Let the async thread assess this and handle the detach.
*/
spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
+
+ /*
+ * Clear any deferred_resilver flags in the config.
+ * If there are drives that need resilvering, kick
+ * off an asynchronous request to start resilver.
+ * dsl_scan_clear_deferred() may update the config
+ * before the resilver can restart. In the event of
+ * a crash during this period, the spa loading code
+ * will find the drives that need to be resilvered
+ * when the machine reboots and start the resilver then.
+ */
+ boolean_t resilver_needed =
+ dsl_scan_clear_deferred(spa->spa_root_vdev, tx);
+ if (resilver_needed) {
+ spa_history_log_internal(spa,
+ "starting deferred resilver", tx,
+ "errors=%llu", spa_get_errlog_size(spa));
+ spa_async_request(spa, SPA_ASYNC_RESILVER);
+ }
}
scn->scn_phys.scn_end_time = gethrestime_sec();
@@ -935,6 +998,7 @@ dsl_scrub_pause_resume_sync(void *arg, dmu_tx_t *tx)
/* can't pause a scrub when there is no in-progress scrub */
spa->spa_scan_pass_scrub_pause = gethrestime_sec();
scn->scn_phys.scn_flags |= DSF_SCRUB_PAUSED;
+ scn->scn_phys_cached.scn_flags |= DSF_SCRUB_PAUSED;
dsl_scan_sync_state(scn, tx, SYNC_CACHED);
spa_event_notify(spa, NULL, NULL, ESC_ZFS_SCRUB_PAUSED);
} else {
@@ -949,6 +1013,7 @@ dsl_scrub_pause_resume_sync(void *arg, dmu_tx_t *tx)
gethrestime_sec() - spa->spa_scan_pass_scrub_pause;
spa->spa_scan_pass_scrub_pause = 0;
scn->scn_phys.scn_flags &= ~DSF_SCRUB_PAUSED;
+ scn->scn_phys_cached.scn_flags &= ~DSF_SCRUB_PAUSED;
dsl_scan_sync_state(scn, tx, SYNC_CACHED);
}
}
@@ -2335,6 +2400,20 @@ dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum
if (scn->scn_phys.scn_state != DSS_SCANNING)
return;
+ /*
+ * This function is special because it is the only thing
+ * that can add scan_io_t's to the vdev scan queues from
+ * outside dsl_scan_sync(). For the most part this is ok
+ * as long as it is called from within syncing context.
+ * However, dsl_scan_sync() expects that no new sio's will
+ * be added between when all the work for a scan is done
+ * and the next txg when the scan is actually marked as
+ * completed. This check ensures we do not issue new sio's
+ * during this period.
+ */
+ if (scn->scn_done_txg != 0)
+ return;
+
for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
if (ddp->ddp_phys_birth == 0 ||
ddp->ddp_phys_birth > scn->scn_phys.scn_max_txg)
@@ -2986,6 +3065,26 @@ dsl_scan_active(dsl_scan_t *scn)
}
static boolean_t
+dsl_scan_check_deferred(vdev_t *vd)
+{
+ boolean_t need_resilver = B_FALSE;
+
+ for (int c = 0; c < vd->vdev_children; c++) {
+ need_resilver |=
+ dsl_scan_check_deferred(vd->vdev_child[c]);
+ }
+
+ if (!vdev_is_concrete(vd) || vd->vdev_aux ||
+ !vd->vdev_ops->vdev_op_leaf)
+ return (need_resilver);
+
+ if (!vd->vdev_resilver_deferred)
+ need_resilver = B_TRUE;
+
+ return (need_resilver);
+}
+
+static boolean_t
dsl_scan_need_resilver(spa_t *spa, const dva_t *dva, size_t psize,
uint64_t phys_birth)
{
@@ -3032,6 +3131,13 @@ dsl_scan_need_resilver(spa_t *spa, const dva_t *dva, s
if (!vdev_dtl_need_resilver(vd, DVA_GET_OFFSET(dva), psize))
return (B_FALSE);
+ /*
+ * Check that this top-level vdev has a device under it which
+ * is resilvering and is not deferred.
+ */
+ if (!dsl_scan_check_deferred(vd))
+ return (B_FALSE);
+
return (B_TRUE);
}
@@ -3193,12 +3299,19 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
int err = 0;
state_sync_type_t sync_type = SYNC_OPTIONAL;
+ if (spa->spa_resilver_deferred &&
+ !spa_feature_is_active(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER))
+ spa_feature_incr(spa, SPA_FEATURE_RESILVER_DEFER, tx);
+
/*
* Check for scn_restart_txg before checking spa_load_state, so
* that we can restart an old-style scan while the pool is being
- * imported (see dsl_scan_init).
+ * imported (see dsl_scan_init). We also restart scans if there
+ * is a deferred resilver and the user has manually disabled
+ * deferred resilvers via the tunable.
*/
- if (dsl_scan_restarting(scn, tx)) {
+ if (dsl_scan_restarting(scn, tx) ||
+ (spa->spa_resilver_deferred && zfs_resilver_disable_defer)) {
pool_scan_func_t func = POOL_SCAN_SCRUB;
dsl_scan_done(scn, B_FALSE, tx);
if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL))
@@ -3265,6 +3378,27 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
return;
/*
+ * zfs_scan_suspend_progress can be set to disable scan progress.
+ * We don't want to spin the txg_sync thread, so we add a delay
+ * here to simulate the time spent doing a scan. This is mostly
+ * useful for testing and debugging.
+ */
+ if (zfs_scan_suspend_progress) {
+ uint64_t scan_time_ns = gethrtime() - scn->scn_sync_start_time;
+ int mintime = (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) ?
+ zfs_resilver_min_time_ms : zfs_scrub_min_time_ms;
+
+ while (zfs_scan_suspend_progress &&
+ !txg_sync_waiting(scn->scn_dp) &&
+ !spa_shutting_down(scn->scn_dp->dp_spa) &&
+ NSEC2MSEC(scan_time_ns) < mintime) {
+ delay(hz);
+ scan_time_ns = gethrtime() - scn->scn_sync_start_time;
+ }
+ return;
+ }
+
+ /*
* It is possible to switch from unsorted to sorted at any time,
* but afterwards the scan will remain sorted unless reloaded from
* a checkpoint after a reboot.
@@ -3393,6 +3527,8 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
(longlong_t)tx->tx_txg);
}
} else if (scn->scn_is_sorted && scn->scn_bytes_pending != 0) {
+ ASSERT(scn->scn_clearing);
+
/* need to issue scrubbing IOs from per-vdev queues */
scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
NULL, ZIO_FLAG_CANFAIL);
Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/spa.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/spa.c Thu Nov 21 14:09:46 2019 (r354960)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/spa.c Thu Nov 21 14:10:53 2019 (r354961)
@@ -6176,9 +6176,14 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *n
/*
* Schedule the resilver to restart in the future. We do this to
* ensure that dmu_sync-ed blocks have been stitched into the
- * respective datasets.
+ * respective datasets. We do not do this if resilvers have been
+ * deferred.
*/
- dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg);
+ if (dsl_scan_resilvering(spa_get_dsl(spa)) &&
+ spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER))
+ vdev_set_deferred_resilver(spa, newvd);
+ else
+ dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg);
if (spa->spa_bootfs)
spa_event_notify(spa, newvd, NULL, ESC_ZFS_BOOTFS_VDEV_ATTACH);
@@ -7069,6 +7074,10 @@ spa_scan(spa_t *spa, pool_scan_func_t func)
if (func >= POOL_SCAN_FUNCS || func == POOL_SCAN_NONE)
return (SET_ERROR(ENOTSUP));
+ if (func == POOL_SCAN_RESILVER &&
+ !spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER))
+ return (SET_ERROR(ENOTSUP));
+
/*
* If a resilver was requested, but there is no DTL on a
* writeable leaf device, we have nothing to do.
@@ -7160,6 +7169,7 @@ static void
spa_async_thread(void *arg)
{
spa_t *spa = (spa_t *)arg;
+ dsl_pool_t *dp = spa->spa_dsl_pool;
int tasks;
ASSERT(spa->spa_sync_on);
@@ -7235,8 +7245,10 @@ spa_async_thread(void *arg)
/*
* Kick off a resilver.
*/
- if (tasks & SPA_ASYNC_RESILVER)
- dsl_resilver_restart(spa->spa_dsl_pool, 0);
+ if (tasks & SPA_ASYNC_RESILVER &&
+ (!dsl_scan_resilvering(dp) ||
+ !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)))
+ dsl_resilver_restart(dp, 0);
if (tasks & SPA_ASYNC_INITIALIZE_RESTART) {
mutex_enter(&spa_namespace_lock);
Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa_impl.h
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa_impl.h Thu Nov 21 14:09:46 2019 (r354960)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa_impl.h Thu Nov 21 14:10:53 2019 (r354961)
@@ -279,6 +279,13 @@ struct spa {
uint64_t spa_scan_pass_scrub_spent_paused; /* total paused */
uint64_t spa_scan_pass_exam; /* examined bytes per pass */
uint64_t spa_scan_pass_issued; /* issued bytes per pass */
+
+ /*
+ * We are in the middle of a resilver, and another resilver
+ * is needed once this one completes. This is set iff any
+ * vdev_resilver_deferred is set.
+ */
+ boolean_t spa_resilver_deferred;
kmutex_t spa_async_lock; /* protect async state */
kthread_t *spa_async_thread; /* thread doing async task */
int spa_async_suspended; /* async tasks suspended */
Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/vdev.h
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/vdev.h Thu Nov 21 14:09:46 2019 (r354960)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/vdev.h Thu Nov 21 14:10:53 2019 (r354961)
@@ -149,6 +149,8 @@ extern int vdev_config_sync(vdev_t **svd, int svdcount
extern void vdev_state_dirty(vdev_t *vd);
extern void vdev_state_clean(vdev_t *vd);
+extern void vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd);
+
typedef enum vdev_config_flag {
VDEV_CONFIG_SPARE = 1 << 0,
VDEV_CONFIG_L2CACHE = 1 << 1,
Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/vdev_impl.h
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/vdev_impl.h Thu Nov 21 14:09:46 2019 (r354960)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/vdev_impl.h Thu Nov 21 14:10:53 2019 (r354961)
@@ -346,6 +346,7 @@ struct vdev {
boolean_t vdev_cant_write; /* vdev is failing all writes */
boolean_t vdev_isspare; /* was a hot spare */
boolean_t vdev_isl2cache; /* was a l2cache device */
+ boolean_t vdev_resilver_deferred; /* resilver deferred */
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
vdev_cache_t vdev_cache; /* physical block cache */
spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */
Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/vdev.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/vdev.c Thu Nov 21 14:09:46 2019 (r354960)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/vdev.c Thu Nov 21 14:10:53 2019 (r354961)
@@ -760,6 +760,9 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vde
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVER_TXG,
&vd->vdev_resilver_txg);
+ if (nvlist_exists(nv, ZPOOL_CONFIG_RESILVER_DEFER))
+ vdev_set_deferred_resilver(spa, vd);
+
/*
* When importing a pool, we want to ignore the persistent fault
* state, as the diagnosis made on another system may not be
@@ -1733,8 +1736,13 @@ vdev_open(vdev_t *vd)
* since this would just restart the scrub we are already doing.
*/
if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen &&
- vdev_resilver_needed(vd, NULL, NULL))
- spa_async_request(spa, SPA_ASYNC_RESILVER);
+ vdev_resilver_needed(vd, NULL, NULL)) {
+ if (dsl_scan_resilvering(spa->spa_dsl_pool) &&
+ spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER))
+ vdev_set_deferred_resilver(spa, vd);
+ else
+ spa_async_request(spa, SPA_ASYNC_RESILVER);
+ }
return (0);
}
@@ -2441,6 +2449,9 @@ vdev_dtl_should_excise(vdev_t *vd)
if (vd->vdev_state < VDEV_STATE_DEGRADED)
return (B_FALSE);
+ if (vd->vdev_resilver_deferred)
+ return (B_FALSE);
+
if (vd->vdev_resilver_txg == 0 ||
range_tree_is_empty(vd->vdev_dtl[DTL_MISSING]))
return (B_TRUE);
@@ -3474,8 +3485,14 @@ vdev_clear(spa_t *spa, vdev_t *vd)
if (vd != rvd && vdev_writeable(vd->vdev_top))
vdev_state_dirty(vd->vdev_top);
- if (vd->vdev_aux == NULL && !vdev_is_dead(vd))
- spa_async_request(spa, SPA_ASYNC_RESILVER);
+ if (vd->vdev_aux == NULL && !vdev_is_dead(vd)) {
+ if (dsl_scan_resilvering(spa->spa_dsl_pool) &&
+ spa_feature_is_enabled(spa,
+ SPA_FEATURE_RESILVER_DEFER))
+ vdev_set_deferred_resilver(spa, vd);
+ else
+ spa_async_request(spa, SPA_ASYNC_RESILVER);
+ }
spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_CLEAR);
}
@@ -3618,6 +3635,8 @@ vdev_get_stats(vdev_t *vd, vdev_stat_t *vs)
vs->vs_fragmentation = (vd->vdev_mg != NULL) ?
vd->vdev_mg->mg_fragmentation : 0;
}
+ if (vd->vdev_ops->vdev_op_leaf)
+ vs->vs_resilver_deferred = vd->vdev_resilver_deferred;
/*
* If we're getting stats on the root vdev, aggregate the I/O counts
@@ -4330,4 +4349,19 @@ vdev_deadman(vdev_t *vd)
}
mutex_exit(&vq->vq_lock);
}
+}
+
+void
+vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd)
+{
+ for (uint64_t i = 0; i < vd->vdev_children; i++)
+ vdev_set_deferred_resilver(spa, vd->vdev_child[i]);
+
+ if (!vd->vdev_ops->vdev_op_leaf || !vdev_writeable(vd) ||
+ range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) {
+ return;
+ }
+
+ vd->vdev_resilver_deferred = B_TRUE;
+ spa->spa_resilver_deferred = B_TRUE;
}
Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_indirect.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_indirect.c Thu Nov 21 14:09:46 2019 (r354960)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_indirect.c Thu Nov 21 14:10:53 2019 (r354961)
@@ -1239,6 +1239,8 @@ vdev_indirect_read_all(zio_t *zio)
{
indirect_vsd_t *iv = zio->io_vsd;
+ ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
+
for (indirect_split_t *is = list_head(&iv->iv_splits);
is != NULL; is = list_next(&iv->iv_splits, is)) {
for (int i = 0; i < is->is_children; i++) {
@@ -1321,7 +1323,8 @@ vdev_indirect_io_start(zio_t *zio)
vdev_indirect_child_io_done, zio));
} else {
iv->iv_split_block = B_TRUE;
- if (zio->io_flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER)) {
+ if (zio->io_type == ZIO_TYPE_READ &&
+ zio->io_flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER)) {
/*
* Read all copies. Note that for simplicity,
* we don't bother consulting the DTL in the
@@ -1330,13 +1333,17 @@ vdev_indirect_io_start(zio_t *zio)
vdev_indirect_read_all(zio);
} else {
/*
- * Read one copy of each split segment, from the
- * top-level vdev. Since we don't know the
- * checksum of each split individually, the child
- * zio can't ensure that we get the right data.
- * E.g. if it's a mirror, it will just read from a
- * random (healthy) leaf vdev. We have to verify
- * the checksum in vdev_indirect_io_done().
+ * If this is a read zio, we read one copy of each
+ * split segment, from the top-level vdev. Since
+ * we don't know the checksum of each split
+ * individually, the child zio can't ensure that
+ * we get the right data. E.g. if it's a mirror,
+ * it will just read from a random (healthy) leaf
+ * vdev. We have to verify the checksum in
+ * vdev_indirect_io_done().
+ *
+ * For write zios, the vdev code will ensure we write
+ * to all children.
*/
for (indirect_split_t *is = list_head(&iv->iv_splits);
is != NULL; is = list_next(&iv->iv_splits, is)) {
Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_label.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_label.c Thu Nov 21 14:09:46 2019 (r354960)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_label.c Thu Nov 21 14:10:53 2019 (r354961)
@@ -377,6 +377,12 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t
fnvlist_add_uint64(nv, ZPOOL_CONFIG_VDEV_TOP_ZAP,
vd->vdev_top_zap);
}
+
+ if (vd->vdev_resilver_deferred) {
+ ASSERT(vd->vdev_ops->vdev_op_leaf);
+ ASSERT(spa->spa_resilver_deferred);
+ fnvlist_add_boolean(nv, ZPOOL_CONFIG_RESILVER_DEFER);
+ }
}
if (getstats) {
Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_removal.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_removal.c Thu Nov 21 14:09:46 2019 (r354960)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_removal.c Thu Nov 21 14:10:53 2019 (r354961)
@@ -127,7 +127,7 @@ int vdev_removal_max_span = 32 * 1024;
* This is used by the test suite so that it can ensure that certain
* actions happen while in the middle of a removal.
*/
-uint64_t zfs_remove_max_bytes_pause = UINT64_MAX;
+int zfs_removal_suspend_progress = 0;
#define VDEV_REMOVAL_ZAP_OBJS "lzap"
@@ -1433,14 +1433,14 @@ spa_vdev_remove_thread(void *arg)
/*
* This delay will pause the removal around the point
- * specified by zfs_remove_max_bytes_pause. We do this
+ * specified by zfs_removal_suspend_progress. We do this
* solely from the test suite or during debugging.
*/
uint64_t bytes_copied =
spa->spa_removing_phys.sr_copied;
for (int i = 0; i < TXG_SIZE; i++)
bytes_copied += svr->svr_bytes_done[i];
- while (zfs_remove_max_bytes_pause <= bytes_copied &&
+ while (zfs_removal_suspend_progress &&
!svr->svr_thread_exit)
delay(hz);
Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/zil.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/zil.c Thu Nov 21 14:09:46 2019 (r354960)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/zil.c Thu Nov 21 14:10:53 2019 (r354961)
@@ -1252,7 +1252,7 @@ zil_lwb_set_zio_dependency(zilog_t *zilog, lwb_t *lwb)
* root zios). This is required because of how we can
* defer the DKIOCFLUSHWRITECACHE commands for each lwb.
*
- * When the DKIOCFLUSHWRITECACHE commands are defered,
+ * When the DKIOCFLUSHWRITECACHE commands are deferred,
* the previous lwb will rely on this lwb to flush the
* vdevs written to by that previous lwb. Thus, we need
* to ensure this lwb doesn't issue the flush until
Modified: vendor-sys/illumos/dist/uts/common/sys/fs/zfs.h
==============================================================================
--- vendor-sys/illumos/dist/uts/common/sys/fs/zfs.h Thu Nov 21 14:09:46 2019 (r354960)
+++ vendor-sys/illumos/dist/uts/common/sys/fs/zfs.h Thu Nov 21 14:10:53 2019 (r354961)
@@ -597,6 +597,7 @@ typedef struct zpool_load_policy {
#define ZPOOL_CONFIG_VDEV_TOP_ZAP "com.delphix:vdev_zap_top"
#define ZPOOL_CONFIG_VDEV_LEAF_ZAP "com.delphix:vdev_zap_leaf"
#define ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS "com.delphix:has_per_vdev_zaps"
+#define ZPOOL_CONFIG_RESILVER_DEFER "com.datto:resilver_defer"
#define ZPOOL_CONFIG_CACHEFILE "cachefile" /* not stored on disk */
#define ZPOOL_CONFIG_MMP_STATE "mmp_state" /* not stored on disk */
#define ZPOOL_CONFIG_MMP_TXG "mmp_txg" /* not stored on disk */
@@ -896,6 +897,7 @@ typedef struct vdev_stat {
uint64_t vs_initialize_state; /* vdev_initialzing_state_t */
uint64_t vs_initialize_action_time; /* time_t */
uint64_t vs_checkpoint_space; /* checkpoint-consumed space */
+ uint64_t vs_resilver_deferred; /* resilver deferred */
} vdev_stat_t;
/*
More information about the svn-src-all
mailing list