git: f698c1e99b99 - main - zfsd: add support for hotplugging spares
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 06 Apr 2023 17:59:24 UTC
The branch main has been updated by asomers: URL: https://cgit.FreeBSD.org/src/commit/?id=f698c1e99b999df73b8f4eceec8662ccef67ebbb commit f698c1e99b999df73b8f4eceec8662ccef67ebbb Author: Alan Somers <asomers@FreeBSD.org> AuthorDate: 2023-04-05 20:30:43 +0000 Commit: Alan Somers <asomers@FreeBSD.org> CommitDate: 2023-04-06 17:58:55 +0000 zfsd: add support for hotplugging spares If you remove an unused spare and then reinsert it, zfsd will now online it in all pools. Do not MFC without 2a58b312b62 (but it's ok to MFC that one without this one). Submitted by: Ameer Hamza <ahamza@ixsystems.com> (zfsd), Me (tests) MFC after: 2 weeks MFC with: 2a58b312b62f908ec92311d1bd8536dbaeb8e55b Sponsored by: iX Systems, Axcient Pull Request: https://github.com/freebsd/freebsd-src/pull/697 --- cddl/usr.sbin/zfsd/case_file.cc | 35 ++++++++++- cddl/usr.sbin/zfsd/case_file.h | 19 ++++++ cddl/usr.sbin/zfsd/vdev_iterator.cc | 9 +++ cddl/usr.sbin/zfsd/zfsd_event.cc | 14 +++-- tests/sys/cddl/zfs/tests/zfsd/Makefile | 2 + tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib | 11 +++- .../cddl/zfs/tests/zfsd/zfsd_replace_004_pos.ksh | 64 ++++++++++++++++++++ .../cddl/zfs/tests/zfsd/zfsd_replace_005_pos.ksh | 70 ++++++++++++++++++++++ tests/sys/cddl/zfs/tests/zfsd/zfsd_test.sh | 58 ++++++++++++++++++ 9 files changed, 271 insertions(+), 11 deletions(-) diff --git a/cddl/usr.sbin/zfsd/case_file.cc b/cddl/usr.sbin/zfsd/case_file.cc index 34234a5736a5..5064d3c266dd 100644 --- a/cddl/usr.sbin/zfsd/case_file.cc +++ b/cddl/usr.sbin/zfsd/case_file.cc @@ -116,6 +116,26 @@ CaseFile::Find(Guid poolGUID, Guid vdevGUID) return (NULL); } +void +CaseFile::Find(Guid poolGUID, Guid vdevGUID, CaseFileList &cases) +{ + for (CaseFileList::iterator curCase = s_activeCases.begin(); + curCase != s_activeCases.end(); curCase++) { + if (((*curCase)->PoolGUID() != poolGUID && + Guid::InvalidGuid() != poolGUID) || + (*curCase)->VdevGUID() != vdevGUID) + continue; + + /* + * We can have multiple cases for spare vdevs + */ + cases.push_back(*curCase); + if (!(*curCase)->IsSpare()) { + return; + } + } +} + CaseFile * CaseFile::Find(const string &physPath) { @@ -217,6 +237,12 @@ CaseFile::PurgeAll() } +int +CaseFile::IsSpare() +{ + return (m_is_spare); +} + //- CaseFile Public Methods ---------------------------------------------------- bool CaseFile::RefreshVdevState() @@ -240,6 +266,7 @@ CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev) { ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front()); + int flags = ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE; if (pool == NULL || !RefreshVdevState()) { /* @@ -280,9 +307,10 @@ CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev) || vdev->PoolGUID() == Guid::InvalidGuid()) && vdev->GUID() == m_vdevGUID) { + if (IsSpare()) + flags |= ZFS_ONLINE_SPARE; if (zpool_vdev_online(pool, vdev->GUIDString().c_str(), - ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, - &m_vdevState) != 0) { + flags, &m_vdevState) != 0) { syslog(LOG_ERR, "Failed to online vdev(%s/%s:%s): %s: %s\n", zpool_get_name(pool), vdev->GUIDString().c_str(), @@ -790,7 +818,8 @@ CaseFile::CaseFile(const Vdev &vdev) : m_poolGUID(vdev.PoolGUID()), m_vdevGUID(vdev.GUID()), m_vdevState(vdev.State()), - m_vdevPhysPath(vdev.PhysicalPath()) + m_vdevPhysPath(vdev.PhysicalPath()), + m_is_spare(vdev.IsSpare()) { stringstream guidString; diff --git a/cddl/usr.sbin/zfsd/case_file.h b/cddl/usr.sbin/zfsd/case_file.h index b4dc2dee5d96..d7475d331a76 100644 --- a/cddl/usr.sbin/zfsd/case_file.h +++ b/cddl/usr.sbin/zfsd/case_file.h @@ -98,6 +98,19 @@ public: */ static CaseFile *Find(DevdCtl::Guid poolGUID, DevdCtl::Guid vdevGUID); + /** + * \brief Find multiple CaseFile objects by a vdev's pool/vdev + * GUID tuple (special case for spare vdevs) + * + * \param poolGUID Pool GUID for the vdev of the CaseFile to find. + * If InvalidGuid, then only match the vdev GUID + * instead of both pool and vdev GUIDs. + * \param vdevGUID Vdev GUID for the vdev of the CaseFile to find. + * \param caseList List of cases associated with the vdev. + */ + static void Find(DevdCtl::Guid poolGUID, DevdCtl::Guid vdevGUID, + CaseFileList &caseList); + /** * \brief Find a CaseFile object by a vdev's current/last known * physical path. @@ -219,6 +232,11 @@ public: */ bool ShouldFault() const; + /** + * \brief If this vdev is spare + */ + int IsSpare(); + protected: enum { /** @@ -384,6 +402,7 @@ protected: string m_poolGUIDString; string m_vdevGUIDString; string m_vdevPhysPath; + int m_is_spare; /** * \brief Callout activated when a grace period diff --git a/cddl/usr.sbin/zfsd/vdev_iterator.cc b/cddl/usr.sbin/zfsd/vdev_iterator.cc index b5a4f22c1c60..c23399ed68a8 100644 --- a/cddl/usr.sbin/zfsd/vdev_iterator.cc +++ b/cddl/usr.sbin/zfsd/vdev_iterator.cc @@ -78,8 +78,10 @@ VdevIterator::Reset() { nvlist_t *rootVdev; nvlist **cache_child; + nvlist **spare_child; int result; uint_t cache_children; + uint_t spare_children; result = nvlist_lookup_nvlist(m_poolConfig, ZPOOL_CONFIG_VDEV_TREE, @@ -95,6 +97,13 @@ VdevIterator::Reset() if (result == 0) for (uint_t c = 0; c < cache_children; c++) m_vdevQueue.push_back(cache_child[c]); + result = nvlist_lookup_nvlist_array(rootVdev, + ZPOOL_CONFIG_SPARES, + &spare_child, + &spare_children); + if (result == 0) + for (uint_t c = 0; c < spare_children; c++) + m_vdevQueue.push_back(spare_child[c]); } nvlist_t * diff --git a/cddl/usr.sbin/zfsd/zfsd_event.cc b/cddl/usr.sbin/zfsd/zfsd_event.cc index 688e7c0354a2..45fc7adbb31b 100644 --- a/cddl/usr.sbin/zfsd/zfsd_event.cc +++ b/cddl/usr.sbin/zfsd/zfsd_event.cc @@ -229,7 +229,9 @@ bool GeomEvent::OnlineByLabel(const string &devPath, const string& physPath, nvlist_t *devConfig) { + bool ret = false; try { + CaseFileList case_list; /* * A device with ZFS label information has been * inserted. If it matches a device for which we @@ -238,10 +240,12 @@ GeomEvent::OnlineByLabel(const string &devPath, const string& physPath, syslog(LOG_INFO, "Interrogating VDEV label for %s\n", devPath.c_str()); Vdev vdev(devConfig); - CaseFile *caseFile(CaseFile::Find(vdev.PoolGUID(), - vdev.GUID())); - if (caseFile != NULL) - return (caseFile->ReEvaluate(devPath, physPath, &vdev)); + CaseFile::Find(vdev.PoolGUID(),vdev.GUID(), case_list); + for (CaseFileList::iterator curr = case_list.begin(); + curr != case_list.end(); curr++) { + ret |= (*curr)->ReEvaluate(devPath, physPath, &vdev); + } + return (ret); } catch (ZfsdException &exp) { string context("GeomEvent::OnlineByLabel: " + devPath + ": "); @@ -249,7 +253,7 @@ GeomEvent::OnlineByLabel(const string &devPath, const string& physPath, exp.GetString().insert(0, context); exp.Log(); } - return (false); + return (ret); } diff --git a/tests/sys/cddl/zfs/tests/zfsd/Makefile b/tests/sys/cddl/zfs/tests/zfsd/Makefile index 6bdbae7886a6..fa662e0c41b6 100644 --- a/tests/sys/cddl/zfs/tests/zfsd/Makefile +++ b/tests/sys/cddl/zfs/tests/zfsd/Makefile @@ -34,5 +34,7 @@ ${PACKAGE}FILES+= zfsd_import_001_pos.ksh ${PACKAGE}FILES+= zfsd_replace_001_pos.ksh ${PACKAGE}FILES+= zfsd_replace_002_pos.ksh ${PACKAGE}FILES+= zfsd_replace_003_pos.ksh +${PACKAGE}FILES+= zfsd_replace_004_pos.ksh +${PACKAGE}FILES+= zfsd_replace_005_pos.ksh .include <bsd.test.mk> diff --git a/tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib b/tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib index 4a7f9b09e182..a14136677ba8 100644 --- a/tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib +++ b/tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib @@ -35,15 +35,20 @@ function wait_for_pool_dev_state_change typeset -i timeout=$1 typeset disk=$2 typeset state=$3 + typeset pool=$4 + + if [ -z "$pool" ]; then + pool=$TESTPOOL + fi log_note "Waiting up to $timeout seconds for $disk to become $state ..." for ((; $timeout > 0; timeout=$timeout-1)); do - check_state $TESTPOOL "$disk" "$state" + check_state $pool "$disk" "$state" [ $? -eq 0 ] && return $SLEEP 1 done - log_must $ZPOOL status $TESTPOOL - log_fail "ERROR: Disk $disk not marked as $state in $TESTPOOL" + log_must $ZPOOL status $pool + log_fail "ERROR: Disk $disk not marked as $state in $pool" } function wait_for_pool_removal diff --git a/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_004_pos.ksh b/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_004_pos.ksh new file mode 100644 index 000000000000..343c30bf1da6 --- /dev/null +++ b/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_004_pos.ksh @@ -0,0 +1,64 @@ +#!/usr/local/bin/ksh93 -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2023 Axcient. All rights reserved. +# Use is subject to license terms. +# +# $FreeBSD$ + +. $STF_SUITE/tests/hotspare/hotspare.kshlib +. $STF_SUITE/tests/zfsd/zfsd.kshlib +. $STF_SUITE/include/libtest.kshlib +. $STF_SUITE/include/libgnop.kshlib + +log_assert "ZFSD will automatically replace a spare that disappears and reappears in the same location, with the same devname" + +ensure_zfsd_running + +set_disks + +typeset DISK0_NOP=${DISK0}.nop +typeset DISK1_NOP=${DISK1}.nop + +log_must create_gnops $DISK0 $DISK1 + +# Create a pool on the supplied disks +create_pool $TESTPOOL $DISK0_NOP spare $DISK1_NOP + +# Disable the first disk. +log_must destroy_gnop $DISK1 + +# Check to make sure ZFS sees the disk as removed +wait_for_pool_dev_state_change 20 $DISK1_NOP REMOVED + +# Re-enable the disk +log_must create_gnop $DISK1 + +# Disk should auto-join the zpool +wait_for_pool_dev_state_change 20 $DISK1_NOP AVAIL + +$ZPOOL status $TESTPOOL +destroy_pool $TESTPOOL +log_must $RM -rf /$TESTPOOL + +log_pass diff --git a/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_005_pos.ksh b/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_005_pos.ksh new file mode 100644 index 000000000000..d21454168048 --- /dev/null +++ b/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_005_pos.ksh @@ -0,0 +1,70 @@ +#!/usr/local/bin/ksh93 -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2023 Axcient. All rights reserved. +# Use is subject to license terms. +# +# $FreeBSD$ + +. $STF_SUITE/tests/hotspare/hotspare.kshlib +. $STF_SUITE/tests/zfsd/zfsd.kshlib +. $STF_SUITE/include/libtest.kshlib +. $STF_SUITE/include/libgnop.kshlib + +log_assert "ZFSD will automatically replace a multi-pool spare that disappears and reappears" + +ensure_zfsd_running + +set_disks + +typeset DISK0_NOP=${DISK0}.nop +typeset DISK1_NOP=${DISK1}.nop +typeset DISK2_NOP=${DISK2}.nop + +log_must create_gnops $DISK0 $DISK1 $DISK2 + +# Create pools on the supplied disks +create_pool $TESTPOOL $DISK0_NOP spare $DISK2_NOP +create_pool $TESTPOOL1 $DISK1_NOP spare $DISK2_NOP + +# Disable the spare disk. +log_must destroy_gnop $DISK2 + +# Check to make sure ZFS sees the disk as removed +wait_for_pool_dev_state_change 20 $DISK2_NOP REMOVED +wait_for_pool_dev_state_change 20 $DISK2_NOP REMOVED $TESTPOOL1 + +# Re-enable the disk +log_must create_gnop $DISK2 + +# Disk should auto-join the zpools +wait_for_pool_dev_state_change 20 $DISK2_NOP AVAIL +wait_for_pool_dev_state_change 20 $DISK2_NOP AVAIL $TESTPOOL1 + +$ZPOOL status $TESTPOOL +$ZPOOL status $TESTPOOL1 +destroy_pool $TESTPOOL +destroy_pool $TESTPOOL1 +log_must $RM -rf /$TESTPOOL + +log_pass diff --git a/tests/sys/cddl/zfs/tests/zfsd/zfsd_test.sh b/tests/sys/cddl/zfs/tests/zfsd/zfsd_test.sh index e6bc16a56442..b15208973bfe 100755 --- a/tests/sys/cddl/zfs/tests/zfsd/zfsd_test.sh +++ b/tests/sys/cddl/zfs/tests/zfsd/zfsd_test.sh @@ -538,6 +538,62 @@ zfsd_replace_003_pos_cleanup() ksh93 $(atf_get_srcdir)/cleanup.ksh || atf_fail "Cleanup failed" } +atf_test_case zfsd_replace_004_pos cleanup +zfsd_replace_004_pos_head() +{ + atf_set "descr" "ZFSD will automatically replace a spare that disappears and reappears in the same location, with the same devname" + atf_set "require.progs" "ksh93 zpool zfs gnop" +} +zfsd_replace_004_pos_body() +{ + . $(atf_get_srcdir)/../../include/default.cfg + . $(atf_get_srcdir)/zfsd.cfg + + verify_disk_count "$DISKS" 2 + verify_zfsd_running + ksh93 $(atf_get_srcdir)/setup.ksh || atf_fail "Setup failed" + ksh93 $(atf_get_srcdir)/zfsd_replace_004_pos.ksh + if [[ $? != 0 ]]; then + save_artifacts + atf_fail "Testcase failed" + fi +} +zfsd_replace_004_pos_cleanup() +{ + . $(atf_get_srcdir)/../../include/default.cfg + . $(atf_get_srcdir)/zfsd.cfg + + ksh93 $(atf_get_srcdir)/cleanup.ksh || atf_fail "Cleanup failed" +} + +atf_test_case zfsd_replace_005_pos cleanup +zfsd_replace_005_pos_head() +{ + atf_set "descr" "ZFSD will automatically replace a multi-pool spare that disappears and reappears" + atf_set "require.progs" "ksh93 zpool zfs gnop" +} +zfsd_replace_005_pos_body() +{ + . $(atf_get_srcdir)/../../include/default.cfg + . $(atf_get_srcdir)/zfsd.cfg + + verify_disk_count "$DISKS" 3 + verify_zfsd_running + ksh93 $(atf_get_srcdir)/setup.ksh || atf_fail "Setup failed" + ksh93 $(atf_get_srcdir)/zfsd_replace_005_pos.ksh + if [[ $? != 0 ]]; then + save_artifacts + atf_fail "Testcase failed" + fi +} +zfsd_replace_005_pos_cleanup() +{ + . $(atf_get_srcdir)/../../include/default.cfg + . $(atf_get_srcdir)/zfsd.cfg + + ksh93 $(atf_get_srcdir)/cleanup.ksh || atf_fail "Cleanup failed" +} + atf_test_case zfsd_import_001_pos cleanup zfsd_import_001_pos_head() { @@ -591,6 +647,8 @@ atf_init_test_cases() atf_add_test_case zfsd_replace_001_pos atf_add_test_case zfsd_replace_002_pos atf_add_test_case zfsd_replace_003_pos + atf_add_test_case zfsd_replace_004_pos + atf_add_test_case zfsd_replace_005_pos atf_add_test_case zfsd_import_001_pos }