svn commit: r323937 - in projects/zfsd/head/tests/sys/cddl/zfs: include tests/hotspare tests/zfsd
Alan Somers
asomers at FreeBSD.org
Fri Sep 22 22:00:29 UTC 2017
Author: asomers
Date: Fri Sep 22 22:00:26 2017
New Revision: 323937
URL: https://svnweb.freebsd.org/changeset/base/323937
Log:
Fix intermittency in ZFS tests that disable SAS phys
tests/sys/cddl/zfs/include/libsas.kshlib
In disable_sas_disk, wait for the disk to disappear before
returning. If it doesn't disappear within 2 seconds, try disabling
the phy again. This is necessary because disabling the phy
sometimes fails if there is a command outstanding at the time. I
think the HBA's error recovery code might be resetting the phy in
that case. Also, in enable_sas_disk, wait for the disk to reappear.
tests/sys/cddl/zfs/tests/hotspare/hotspare_replace_003_neg.ksh
tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib
tests/sys/cddl/zfs/tests/zfsd/zfsd_hotspare_004_pos.ksh
tests/sys/cddl/zfs/tests/zfsd/zfsd_hotspare_007_pos.ksh
tests/sys/cddl/zfs/tests/zfsd/zfsd_import_001_pos.ksh
tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_001_pos.ksh
tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_002_pos.ksh
tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_003_pos.ksh
Every place that was using (enable|disable)_sas_disk was already
waiting for the disk to (re|dis)appear, so move that code into a
common location in libsas.kshlib. Also remove some superfluous
rescan_disk calls.
Sponsored by: Spectra Logic Corp
Modified:
projects/zfsd/head/tests/sys/cddl/zfs/include/libsas.kshlib
projects/zfsd/head/tests/sys/cddl/zfs/tests/hotspare/hotspare_replace_003_neg.ksh
projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib
projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_hotspare_004_pos.ksh
projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_hotspare_007_pos.ksh
projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_import_001_pos.ksh
projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_001_pos.ksh
projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_002_pos.ksh
projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_003_pos.ksh
Modified: projects/zfsd/head/tests/sys/cddl/zfs/include/libsas.kshlib
==============================================================================
--- projects/zfsd/head/tests/sys/cddl/zfs/include/libsas.kshlib Fri Sep 22 21:55:41 2017 (r323936)
+++ projects/zfsd/head/tests/sys/cddl/zfs/include/libsas.kshlib Fri Sep 22 22:00:26 2017 (r323937)
@@ -154,9 +154,24 @@ function disable_sas_disk
{
typeset EXPANDER=$1
typeset PHY=$2
+ typeset DISK=${3##*/}
# Disable the phy for this particular device
log_must camcontrol smppc $EXPANDER -v -p $PHY -o disable
+ # Wait up to 16 seconds for the disk to disappear.
+ for (( i=0; i<8; i=i+1)); do
+ # CAM waits 2 seconds to ensure the disk is really gone
+ sleep 2
+ if [ -c /dev/${DISK} ]; then
+ # Error recovery routines in the HBA sometimes reenable
+ # the phy if a command fails at the wrong time, so we
+ # may have to disable it multiple times.
+ log_must camcontrol smppc $EXPANDER -v -p $PHY -o disable
+ else
+ return
+ fi
+ done
+ log_fail "Disk $DISK never disappeared"
}
# Given an expander and phy on that expander, enable the phy.
@@ -169,6 +184,7 @@ function enable_sas_disk
# Send a link reset to bring the device back
log_must camcontrol smppc $EXPANDER -p $PHY -o linkreset
+ wait_for_disk_to_reappear 30 $EXPANDER $PHY
}
function rescan_disks
@@ -181,4 +197,18 @@ function rescan_disks
for device in $(echo $* | sort -u); do
log_must camcontrol rescan $device >/dev/null
done
+}
+
+function wait_for_disk_to_reappear
+{
+ typeset -i timeout=$1
+ typeset EXPANDER=$2
+ typeset PHY=$3
+
+ for ((; $timeout > 0; timeout=$timeout-1)); do
+ find_disk_by_phy $EXPANDER $PHY
+ [ -n "$FOUNDDISK" -a -e "/dev/$FOUNDDISK" ] && return
+ $SLEEP 1
+ done
+ log_fail "ERROR: Disk at ${EXPANDER}:${PHY} never reappeared"
}
Modified: projects/zfsd/head/tests/sys/cddl/zfs/tests/hotspare/hotspare_replace_003_neg.ksh
==============================================================================
--- projects/zfsd/head/tests/sys/cddl/zfs/tests/hotspare/hotspare_replace_003_neg.ksh Fri Sep 22 21:55:41 2017 (r323936)
+++ projects/zfsd/head/tests/sys/cddl/zfs/tests/hotspare/hotspare_replace_003_neg.ksh Fri Sep 22 22:00:26 2017 (r323937)
@@ -57,7 +57,6 @@ cleanup() {
[[ $DISK0_PHY != 0 ]] && enable_sas_disk $DISK0_EXPANDER $DISK0_PHY
[[ $SPARE0_PHY != 0 ]] && enable_sas_disk $SPARE0_EXPANDER $SPARE0_PHY
[[ $SPARE1_PHY != 0 ]] && enable_sas_disk $SPARE1_EXPANDER $SPARE1_PHY
- rescan_disks
if poolexists $TESTPOOL; then
# Test failed, provide something useful.
log_note "For reference, here is the final $TESTPOOL status:"
@@ -132,9 +131,8 @@ disable_sas_disk $SPARE0_EXPANDER $SPARE0_PHY
log_must $ZPOOL replace $TESTPOOL $SPARE0_GUID $SPARE1_NAME
wait_until_resilvered
-enable_sas_disk $SPARE0_EXPANDER $SPARE0_PHY
enable_sas_disk $DISK0_EXPANDER $DISK0_PHY
-rescan_disks
+enable_sas_disk $SPARE0_EXPANDER $SPARE0_PHY
log_must destroy_pool $TESTPOOL
Modified: projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib
==============================================================================
--- projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib Fri Sep 22 21:55:41 2017 (r323936)
+++ projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib Fri Sep 22 22:00:26 2017 (r323937)
@@ -27,20 +27,6 @@
# Common routines used by multiple zfsd tests
-function wait_for_disk_to_reappear
-{
- typeset -i timeout=$1
- typeset EXPANDER=$2
- typeset PHY=$3
-
- for ((; $timeout > 0; timeout=$timeout-1)); do
- find_disk_by_phy $EXPANDER $PHY
- [ -n "$FOUNDDISK" -a -e "/dev/$FOUNDDISK" ] && return
- $SLEEP 1
- done
- log_fail "ERROR: Disk at ${EXPANDER}:${PHY} never reappeared"
-}
-
function wait_for_pool_dev_state_change
{
typeset -i timeout=$1
@@ -109,11 +95,8 @@ function do_autoreplace
# Remove a vdev by disabling its SAS phy
find_verify_sas_disk $REMOVAL_DISK
log_note "Disabling \"$REMOVAL_DISK\" on expander $EXPANDER phy $PHY"
- disable_sas_disk $EXPANDER $PHY
+ disable_sas_disk $EXPANDER $PHY $REMOVAL_DISK
- # Check to make sure the disk is gone
- log_mustnot camcontrol inquiry $REMOVAL_DISK
-
# Check to make sure ZFS sees the disk as removed
wait_for_pool_removal 30
@@ -137,8 +120,6 @@ function do_autoreplace
# Reenable the missing dev's SAS phy
log_note "Reenabling phy on expander $EXPANDER phy $PHY"
enable_sas_disk $EXPANDER $PHY
- rescan_disks $EXPANDER
- wait_for_disk_to_reappear 30 $EXPANDER $PHY
# Erase the missing dev's ZFS label
log_must $ZPOOL labelclear -f $( find_disks $FOUNDDISK )
@@ -146,11 +127,8 @@ function do_autoreplace
# Disable the missing dev's SAS phy again
find_verify_sas_disk $FOUNDDISK
log_note "Disabling \"$FOUNDDISK\" on expander $EXPANDER phy $PHY"
- disable_sas_disk $EXPANDER $PHY
+ disable_sas_disk $EXPANDER $PHY $FOUNDDISK
- # Check to make sure the disk is gone
- log_mustnot camcontrol inquiry $REMOVAL_DISK
-
# Import the pool
log_must $ZPOOL import $TESTPOOL
# Wait 5 seconds before enabling the phy so zfsd.log will be easier
@@ -160,8 +138,6 @@ function do_autoreplace
# Reenable the missing dev's SAS phy
log_note "Reenabling phy on expander $EXPANDER phy $PHY"
enable_sas_disk $EXPANDER $PHY
- rescan_disks $EXPANDER
- wait_for_disk_to_reappear 30 $EXPANDER $PHY
}
function autoreplace_cleanup
@@ -172,7 +148,6 @@ function autoreplace_cleanup
if [ -n "$REMOVAL_DISK" -a -n "$EXPANDER" -a -n "$PHY" ]; then
log_note "Renabling ${EXPANDER}:${PHY} for disk ${REMOVAL_DISK}"
enable_sas_disk $EXPANDER $PHY
- rescan_disks $EXPANDER
# For debugging purposes, log the partial output of
# camcontrol to see if the disk actually came back.
Modified: projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_hotspare_004_pos.ksh
==============================================================================
--- projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_hotspare_004_pos.ksh Fri Sep 22 21:55:41 2017 (r323936)
+++ projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_hotspare_004_pos.ksh Fri Sep 22 22:00:26 2017 (r323937)
@@ -73,10 +73,6 @@ function verify_assertion # spare_dev
log_note "Disabling \"$REMOVAL_DISK\" on expander $EXPANDER phy $PHY"
disable_sas_disk $EXPANDER $PHY
- # Check to make sure the disk is gone
- find_disk_by_phy $EXPANDER $PHY
- [ -n "$FOUNDDISK" ] && log_fail "Disk \"$REMOVAL_DISK\" was not removed"
-
# Check to make sure ZFS sees the disk as removed
wait_for_pool_removal 20
@@ -87,7 +83,6 @@ function verify_assertion # spare_dev
# Reenable the missing disk
log_note "Reenabling phy on expander $EXPANDER phy $PHY"
enable_sas_disk $EXPANDER $PHY
- wait_for_disk_to_reappear 20 $EXPANDER $PHY
# Check that the disk has rejoined the pool & resilvered
wait_for_pool_dev_state_change 20 $REMOVAL_DISK ONLINE
Modified: projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_hotspare_007_pos.ksh
==============================================================================
--- projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_hotspare_007_pos.ksh Fri Sep 22 21:55:41 2017 (r323936)
+++ projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_hotspare_007_pos.ksh Fri Sep 22 22:00:26 2017 (r323937)
@@ -75,10 +75,6 @@ function verify_assertion # spare_dev
log_note "Disabling \"$REMOVAL_DISK\" on expander $EXPANDER phy $PHY"
disable_sas_disk $EXPANDER $PHY
- # Check to make sure the disk is gone
- find_disk_by_phy $EXPANDER $PHY
- [ -n "$FOUNDDISK" ] && log_fail "Disk \"$REMOVAL_DISK\" was not removed"
-
# Check to make sure ZFS sees the disk as removed
wait_for_pool_removal 20
@@ -90,7 +86,6 @@ function verify_assertion # spare_dev
# Reenable the missing disk
log_note "Reenabling phy on expander $EXPANDER phy $PHY"
enable_sas_disk $EXPANDER $PHY
- wait_for_disk_to_reappear 20 $EXPANDER $PHY
}
typeset REMOVAL_DISK=$DISK0
Modified: projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_import_001_pos.ksh
==============================================================================
--- projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_import_001_pos.ksh Fri Sep 22 21:55:41 2017 (r323936)
+++ projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_import_001_pos.ksh Fri Sep 22 22:00:26 2017 (r323937)
@@ -73,10 +73,6 @@ function verify_assertion # spare_dev
log_note "Disabling \"$REMOVAL_DISK\" on expander $EXPANDER phy $PHY"
disable_sas_disk $EXPANDER $PHY
- # Check to make sure the disk is gone
- find_disk_by_phy $EXPANDER $PHY
- [ -n "$FOUNDDISK" ] && log_fail "Disk \"$REMOVAL_DISK\" was not removed"
-
# Check to make sure ZFS sees the disk as removed
wait_for_pool_removal 20
@@ -90,9 +86,6 @@ function verify_assertion # spare_dev
# Reenable the missing disk
log_note "Reenabling phy on expander $EXPANDER phy $PHY"
enable_sas_disk $EXPANDER $PHY
-
- # Check that the disk has returned
- wait_for_disk_to_reappear 20 $EXPANDER $PHY
# Import the pool
log_must $ZPOOL import $TESTPOOL
Modified: projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_001_pos.ksh
==============================================================================
--- projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_001_pos.ksh Fri Sep 22 21:55:41 2017 (r323936)
+++ projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_001_pos.ksh Fri Sep 22 22:00:26 2017 (r323937)
@@ -65,10 +65,6 @@ for type in "raidz" "mirror"; do
# there is I/O active to the
disable_sas_disk $EXPANDER $PHY
- # Check to make sure disk is gone.
- find_disk_by_phy $EXPANDER $PHY
- [ -n "$FOUNDDISK" ] && log_fail "Disk \"$REMOVAL_DISK\" was not removed"
-
# Write out data to make sure we can do I/O after the disk failure
log_must $DD if=/dev/zero of=$TESTDIR/$TESTFILE bs=1m count=512
@@ -78,7 +74,6 @@ for type in "raidz" "mirror"; do
# Re-enable the disk, we don't want to leave it turned off
log_note "Re-enabling phy $PHY on expander $EXPANDER"
enable_sas_disk $EXPANDER $PHY
- wait_for_disk_to_reappear 20 $EXPANDER $PHY
# Disk should auto-join the zpool & be resilvered.
wait_for_pool_dev_state_change 20 $REMOVAL_DISK ONLINE
Modified: projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_002_pos.ksh
==============================================================================
--- projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_002_pos.ksh Fri Sep 22 21:55:41 2017 (r323936)
+++ projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_002_pos.ksh Fri Sep 22 22:00:26 2017 (r323937)
@@ -102,14 +102,7 @@ for type in "raidz" "mirror"; do
# there is I/O active to the
disable_sas_disk $EXPANDER $PHY
done
- rescan_disks
- # Now go through the list of disks, and make sure they are all gone.
- for CURDISK in ${TMPDISKS[*]}; do
- # Check to make sure disk is gone.
- log_mustnot camcontrol inquiry $CURDISK
- done
-
# Make sure that the pool status is "UNAVAIL". We have taken all
# of the drives offline, so it should be.
log_must is_pool_state $TESTPOOL UNAVAIL
@@ -121,7 +114,6 @@ for type in "raidz" "mirror"; do
log_note "Re-enabling phy ${PHY_LIST[$CURDISK]} on expander ${EXPANDER_LIST[$CURDISK]}"
enable_sas_disk ${EXPANDER_LIST[$CURDISK]} ${PHY_LIST[$CURDISK]}
done
- rescan_disks
unset DISK_FOUND
typeset -A DISK_FOUND
Modified: projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_003_pos.ksh
==============================================================================
--- projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_003_pos.ksh Fri Sep 22 21:55:41 2017 (r323936)
+++ projects/zfsd/head/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_003_pos.ksh Fri Sep 22 22:00:26 2017 (r323937)
@@ -63,10 +63,6 @@ function remove_disk
# Disable the first disk.
disable_sas_disk $EXPANDER $PHY
- # Check to make sure disk is gone.
- find_disk_by_phy $EXPANDER $PHY
- [ -n "$FOUNDDISK" ] && log_fail "Disk \"$DISK\" was not removed"
-
# Check to make sure ZFS sees the disk as removed
wait_for_pool_dev_state_change 20 $DISK "REMOVED|UNAVAIL"
}
@@ -86,8 +82,6 @@ function reconnect_disk
enable_sas_disk $EXPANDER $PHY
log_note "Checking to see whether disk has reappeared"
- # Make sure the disk is back in the topology
- wait_for_disk_to_reappear 20 $EXPANDER $PHY
prev_disk=$(find_disks $DISK)
cur_disk=$(find_disks $FOUNDDISK)
More information about the svn-src-projects
mailing list