svn commit: r255538 - in stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys
Alexander Motin
mav at FreeBSD.org
Sat Sep 14 08:33:59 UTC 2013
Author: mav
Date: Sat Sep 14 08:33:58 2013
New Revision: 255538
URL: http://svnweb.freebsd.org/changeset/base/255538
Log:
MFC r253990:
Make ZFS to use separate thread to handle SPA_ASYNC_REMOVE async events.
Existing async thread is running only on successfull spa_sync() completion,
that is impossible in case of pool loosing required (last) disk(s). That
indefinite delay of SPA_ASYNC_REMOVE processing made ZFS to not close the
lost disks, preventing GEOM/CAM from destroying devices and reusing names
on later disk reattach.
In earlier version of the patch I've tried to just run existing thread
immediately, unrelated to spa_sync() completion, but that exposed number
of situations where it could stuck due to locks held by stuck spa_sync(),
that are required for other kinds of async events.
Experiments with OpenIndiana snapshot confirmed that they also have this
issue with lost disks reattach.
Modified:
stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
Directory Properties:
stable/9/sys/ (props changed)
stable/9/sys/cddl/contrib/opensolaris/ (props changed)
Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c Sat Sep 14 08:32:17 2013 (r255537)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c Sat Sep 14 08:33:58 2013 (r255538)
@@ -5763,7 +5763,7 @@ spa_async_thread(void *arg)
mutex_enter(&spa->spa_async_lock);
tasks = spa->spa_async_tasks;
- spa->spa_async_tasks = 0;
+ spa->spa_async_tasks &= SPA_ASYNC_REMOVE;
mutex_exit(&spa->spa_async_lock);
/*
@@ -5789,19 +5789,6 @@ spa_async_thread(void *arg)
}
}
- /*
- * See if any devices need to be marked REMOVED.
- */
- if (tasks & SPA_ASYNC_REMOVE) {
- spa_vdev_state_enter(spa, SCL_NONE);
- spa_async_remove(spa, spa->spa_root_vdev);
- for (int i = 0; i < spa->spa_l2cache.sav_count; i++)
- spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]);
- for (int i = 0; i < spa->spa_spares.sav_count; i++)
- spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]);
- (void) spa_vdev_state_exit(spa, NULL, 0);
- }
-
if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) {
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
spa_async_autoexpand(spa, spa->spa_root_vdev);
@@ -5839,12 +5826,53 @@ spa_async_thread(void *arg)
thread_exit();
}
+static void
+spa_async_thread_vd(void *arg)
+{
+ spa_t *spa = arg;
+ int tasks;
+
+ ASSERT(spa->spa_sync_on);
+
+ mutex_enter(&spa->spa_async_lock);
+ tasks = spa->spa_async_tasks;
+retry:
+ spa->spa_async_tasks &= ~SPA_ASYNC_REMOVE;
+ mutex_exit(&spa->spa_async_lock);
+
+ /*
+ * See if any devices need to be marked REMOVED.
+ */
+ if (tasks & SPA_ASYNC_REMOVE) {
+ spa_vdev_state_enter(spa, SCL_NONE);
+ spa_async_remove(spa, spa->spa_root_vdev);
+ for (int i = 0; i < spa->spa_l2cache.sav_count; i++)
+ spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]);
+ for (int i = 0; i < spa->spa_spares.sav_count; i++)
+ spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]);
+ (void) spa_vdev_state_exit(spa, NULL, 0);
+ }
+
+ /*
+ * Let the world know that we're done.
+ */
+ mutex_enter(&spa->spa_async_lock);
+ tasks = spa->spa_async_tasks;
+ if ((tasks & SPA_ASYNC_REMOVE) != 0)
+ goto retry;
+ spa->spa_async_thread_vd = NULL;
+ cv_broadcast(&spa->spa_async_cv);
+ mutex_exit(&spa->spa_async_lock);
+ thread_exit();
+}
+
void
spa_async_suspend(spa_t *spa)
{
mutex_enter(&spa->spa_async_lock);
spa->spa_async_suspended++;
- while (spa->spa_async_thread != NULL)
+ while (spa->spa_async_thread != NULL &&
+ spa->spa_async_thread_vd != NULL)
cv_wait(&spa->spa_async_cv, &spa->spa_async_lock);
mutex_exit(&spa->spa_async_lock);
}
@@ -5865,7 +5893,8 @@ spa_async_tasks_pending(spa_t *spa)
uint_t config_task;
boolean_t config_task_suspended;
- non_config_tasks = spa->spa_async_tasks & ~SPA_ASYNC_CONFIG_UPDATE;
+ non_config_tasks = spa->spa_async_tasks & ~(SPA_ASYNC_CONFIG_UPDATE |
+ SPA_ASYNC_REMOVE);
config_task = spa->spa_async_tasks & SPA_ASYNC_CONFIG_UPDATE;
if (spa->spa_ccw_fail_time == 0) {
config_task_suspended = B_FALSE;
@@ -5891,6 +5920,19 @@ spa_async_dispatch(spa_t *spa)
mutex_exit(&spa->spa_async_lock);
}
+static void
+spa_async_dispatch_vd(spa_t *spa)
+{
+ mutex_enter(&spa->spa_async_lock);
+ if ((spa->spa_async_tasks & SPA_ASYNC_REMOVE) != 0 &&
+ !spa->spa_async_suspended &&
+ spa->spa_async_thread_vd == NULL &&
+ rootdir != NULL)
+ spa->spa_async_thread_vd = thread_create(NULL, 0,
+ spa_async_thread_vd, spa, 0, &p0, TS_RUN, maxclsyspri);
+ mutex_exit(&spa->spa_async_lock);
+}
+
void
spa_async_request(spa_t *spa, int task)
{
@@ -5898,6 +5940,7 @@ spa_async_request(spa_t *spa, int task)
mutex_enter(&spa->spa_async_lock);
spa->spa_async_tasks |= task;
mutex_exit(&spa->spa_async_lock);
+ spa_async_dispatch_vd(spa);
}
/*
@@ -6486,6 +6529,7 @@ spa_sync(spa_t *spa, uint64_t txg)
* If any async tasks have been requested, kick them off.
*/
spa_async_dispatch(spa);
+ spa_async_dispatch_vd(spa);
}
/*
Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h Sat Sep 14 08:32:17 2013 (r255537)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h Sat Sep 14 08:33:58 2013 (r255538)
@@ -169,6 +169,7 @@ struct spa {
uint64_t spa_scan_pass_exam; /* examined bytes per pass */
kmutex_t spa_async_lock; /* protect async state */
kthread_t *spa_async_thread; /* thread doing async task */
+ kthread_t *spa_async_thread_vd; /* thread doing vd async task */
int spa_async_suspended; /* async tasks suspended */
kcondvar_t spa_async_cv; /* wait for thread_exit() */
uint16_t spa_async_tasks; /* async task mask */
More information about the svn-src-stable-9
mailing list