svn commit: r292351 - in projects/zfsd/head/tests/sys/cddl/zfs: include tests/redundancy
Alan Somers
asomers at FreeBSD.org
Wed Dec 16 19:27:21 UTC 2015
Author: asomers
Date: Wed Dec 16 19:27:20 2015
New Revision: 292351
URL: https://svnweb.freebsd.org/changeset/base/292351
Log:
Fix several redundancy test reliability & debuggability issues.
tests/sys/cddl/zfs/include/libtest.kshlib:
- Add a generic wait_for mechanism that takes a timeout, dt, and a
command+args to run until it returns true.
- Add an is_pool_state <pool> <state> command.
tests/sys/cddl/zfs/tests/redundancy/redundancy.kshlib:
- cleanup: Always log the verbose status of the pool. This is
cleaner than doing it in umpteen different error cases, and in any
case we don't usually bother generating output for success.
- sync_pool: Don't check the pool state. Make this the
responsibility of the several callers, which need to check for
different states.
- damage_devs: In the damage-all-even-labels case, repeat the effort
to damage every vdev until it goes UNAVAIL. Previously, sometimes
the pool would sync its labels to the device after the damage had
been done but before the pool found the damage itself.
- clear_errors: Wait for the pool to become healthy rather than
requiring the 'zpool clear' to have this effect; resilvering will
not necessarily complete immediately; it's issued async from
clear.
- remove_devs: After removing the device files, wait for each of
them to become UNAVAIL in the pool config.
Submitted by: Will
Sponsored by: Spectra Logic Corp
Modified:
projects/zfsd/head/tests/sys/cddl/zfs/include/libtest.kshlib
projects/zfsd/head/tests/sys/cddl/zfs/tests/redundancy/redundancy.kshlib
Modified: projects/zfsd/head/tests/sys/cddl/zfs/include/libtest.kshlib
==============================================================================
--- projects/zfsd/head/tests/sys/cddl/zfs/include/libtest.kshlib Wed Dec 16 19:23:10 2015 (r292350)
+++ projects/zfsd/head/tests/sys/cddl/zfs/include/libtest.kshlib Wed Dec 16 19:27:20 2015 (r292351)
@@ -1297,6 +1297,41 @@ function reexport_pool
}
#
+# Wait for something to return true, checked by the caller.
+#
+function wait_for_checked # timeout dt <method> [args...]
+{
+ typeset timeout=$1
+ typeset dt=$2
+ shift; shift
+ typeset -i start=$(date '+%s')
+ typeset -i endtime
+
+ ((endtime = start + timeout))
+ while :; do
+ $*
+ [ $? -eq 0 ] && return
+ curtime=$(date '+%s')
+ [ $curtime -gt $endtime ] && return 1
+ sleep $dt
+ done
+ return 0
+}
+
+#
+# Wait for something to return true.
+#
+function wait_for # timeout dt <method> [args...]
+{
+ typeset timeout=$1
+ typeset dt=$2
+ shift; shift
+
+ wait_for_checked $timeout $dt $* || \
+ log_fail "ERROR: Timed out waiting for: $*"
+}
+
+#
# Verify a given disk is online or offline
#
# Return 0 is pool/disk matches expected state, 1 otherwise
@@ -1330,6 +1365,26 @@ function wait_for_state_exit
}
#
+# Wait for a given disk to enter a state
+#
+function wait_for_state_enter
+{
+ typeset -i timeout=$1
+ typeset pool=$2
+ typeset disk=$3
+ typeset state=$4
+
+ log_note "Waiting up to $timeout seconds for $disk to become $state ..."
+ for ((; $timeout > 0; timeout=$timeout-1)); do
+ check_state $pool "$disk" "$state"
+ [ $? -eq 0 ] && return
+ $SLEEP 1
+ done
+ log_must $ZPOOL status $pool
+ log_fail "ERROR: Disk $disk not marked as $state in $pool"
+}
+
+#
# Get the mountpoint of snapshot
# as its mountpoint
#
@@ -1615,6 +1670,12 @@ function is_pool_scrub_stopped #pool
return $?
}
+function is_pool_state # pool state
+{
+ check_pool_status "$1" "state" "$2"
+ return $?
+}
+
#
# Erase the partition tables and destroy any zfs labels
#
Modified: projects/zfsd/head/tests/sys/cddl/zfs/tests/redundancy/redundancy.kshlib
==============================================================================
--- projects/zfsd/head/tests/sys/cddl/zfs/tests/redundancy/redundancy.kshlib Wed Dec 16 19:23:10 2015 (r292350)
+++ projects/zfsd/head/tests/sys/cddl/zfs/tests/redundancy/redundancy.kshlib Wed Dec 16 19:27:20 2015 (r292351)
@@ -30,6 +30,8 @@
function cleanup
{
+ # Log the status of the pool to assist failures.
+ poolexists $TESTPOOL && $ZPOOL status -v $TESTPOOL
destroy_pool $TESTPOOL
typeset dir
for dir in $TESTDIR $BASEDIR; do
@@ -184,6 +186,7 @@ function sync_pool #pool
log_must $SLEEP 2
# Flush all the pool data.
typeset -i ret
+
# If the OS has detected corruption on the pool, it will have
# automatically initiated a scrub. In that case, our "zpool scrub"
# command will fail. So we ignore its exit status and just check that
@@ -191,12 +194,6 @@ function sync_pool #pool
$ZPOOL scrub $pool >/dev/null 2>&1
is_pool_scrubbing $pool || is_pool_scrubbed $pool || \
log_fail "$ZPOOL scrub $pool failed."
-
- # The pool has been damaged; the sync should notice this fact.
- log_note "Waiting for pool to sync..."
- while ! is_pool_scrubbed $pool || is_pool_resilvered $pool; do
- log_must $SLEEP 2
- done
}
#
@@ -214,10 +211,28 @@ function replace_missing_devs
for vdev in $@; do
log_must $MKFILE $DEV_SIZE $vdev
log_must $ZPOOL replace -f $pool $vdev $vdev
- while ! is_pool_resilvered $pool; do
- log_must $SLEEP 2
- done
+ wait_for 20 2 is_pool_resilvered $pool
+ done
+}
+
+#
+# Damage the labels of the specified devices. Returns 0 if all such devices
+# are UNAVAIL, 1 otherwise.
+#
+function damage_dev_labels # pool <vdev> [vdev ...]
+{
+ typeset pool=$1
+ typeset -i ret=0
+ shift
+
+ for vdev in $*; do
+ check_state $pool $vdev UNAVAIL && continue
+ log_must $MKFILE $DEV_SIZE $vdev
+ ret=1
done
+ [ $ret -eq 0 ] && return $ret
+ sync_pool $pool
+ return $ret
}
#
@@ -243,14 +258,16 @@ function damage_devs
for dev in $vdevs; do
bs_count=$($LS -l $dev | $AWK '{print $5}')
(( bs_count = bs_count/1024 - 512 ))
- $DD if=/dev/zero of=$dev seek=512 bs=1024 \
- count=$bs_count conv=notrunc >/dev/null 2>&1
+ log_must $DD if=/dev/zero of=$dev seek=512 bs=1024 \
+ count=$bs_count conv=notrunc
done
+ sync_pool $pool
else
- log_must $MKFILE $DEV_SIZE $vdevs
+ # The pool can be syncing, thus fixing its labels. So we
+ # have to keep trying until all the devices go offline.
+ wait_for 20 2 damage_dev_labels $pool $vdevs
fi
- sync_pool $pool
log_note "Pool $pool vdevs $vdevs damage completed."
}
@@ -264,12 +281,10 @@ function clear_errors
typeset pool=$1
log_must $ZPOOL clear $pool
+ # The pool may need to resilver (issued async by 'zpool clear'),
+ # give it a chance to do so.
+ wait_for 30 2 is_healthy $pool
- if ! is_healthy $pool ; then
- $ZPOOL status -x $pool
- log_note "$pool should be healthy."
- return 1
- fi
if ! is_data_valid $pool ; then
$ZPOOL status -x $pool
log_note "Data should be valid in $pool."
@@ -296,6 +311,9 @@ function remove_devs
log_must $RM -f $vdevs
sync_pool $pool
+ for vdev in $vdevs; do
+ wait_for 20 2 check_state $pool $vdev UNAVAIL
+ done
}
#
More information about the svn-src-projects
mailing list