Re: armv7 kyua runs via chroot on aarch64: zfs tests leave behind processes from timed out tests

From: Mark Millard <marklmi_at_yahoo.com>
Date: Thu, 03 Aug 2023 14:18:21 UTC
On Aug 3, 2023, at 00:19, Mark Millard <marklmi@yahoo.com> wrote:

> This is after the patch (leading whitespace might
> not have been preserved in what you see):
> 
> # git -C /usr/main-src/ diff sys/dev/md/
> diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
> index a719dccb1955..365296ec4276 100644
> --- a/sys/dev/md/md.c
> +++ b/sys/dev/md/md.c
> @@ -147,8 +147,15 @@ struct md_ioctl32 {
>        int             md_fwsectors;
>        uint32_t        md_label;
>        int             md_pad[MDNPAD];
> +#ifdef __aarch64__
> +       uint32_t        md_pad0;
> +#endif
> } __attribute__((__packed__));
> +#ifdef __aarch64__
> +CTASSERT((sizeof(struct md_ioctl32)) == 440);
> +#else
> CTASSERT((sizeof(struct md_ioctl32)) == 436);
> +#endif
> 
> #define        MDIOCATTACH_32  _IOC_NEWTYPE(MDIOCATTACH, struct md_ioctl32)
> #define        MDIOCDETACH_32  _IOC_NEWTYPE(MDIOCDETACH, struct md_ioctl32)
> 
> 
> The kyua run is still in process, but at this point there is
> the following accumulation from the zfs testing timouts:
> 
> # ps -alxdww
> UID   PID  PPID C PRI NI   VSZ   RSS MWCHAN   STAT TT        TIME COMMAND
> . . .
>  0 17491     1 6  20  0 36460 12324 -        T     -     0:24.71 |-- fsync_integrity /testdir2316/testfile2316
>  0 17551     1 5  20  0 10600  7512 tx->tx_s D     -     0:00.00 |-- /sbin/zpool destroy -f testpool.2316
>  0 17739     1 7  20  0 10600  7308 zfs tear D     -     0:00.00 |-- /sbin/zpool destroy -f testpool.2316
>  0 17841     1 3  20  0 10600  7316 tx->tx_s D     -     0:00.00 |-- /sbin/zpool destroy -f testpool.2316
>  0 17860     1 0  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 17888     1 3  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 17907     1 6  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 17928     1 7  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 17955     1 0  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 17976     1 4  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 17995     1 2  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18023     1 2  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18043     1 2  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18064     1 3  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18085     1 0  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18114     1 7  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18135     1 2  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18157     1 6  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18177     1 6  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18205     1 4  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18224     1 1  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18255     1 3  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18275     1 1  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18296     1 5  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18317     1 4  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18345     1 4  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18365     1 2  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18386     1 3  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18412     1 1  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18447     1 5  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18466     1 5  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18516     1 6  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18535     1 2  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18632     1 0  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade

It has added:

  0 18656     1 7  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
  0 18748     1 0  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
  0 18767     1 4  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
  0 18858     1 5  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
  0 18877     1 0  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
  0 18907     1 7  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
  0 18926     1 5  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
  0 18956     1 7  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
  0 18975     1 7  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
  0 19005     1 4  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
  0 19026     1 4  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
  0 19298     1 6  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
  0 19317     1 6  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
  0 19408     1 7  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
  0 19427     1 2  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
  0 19518     1 4  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
  0 19537     1 4  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
  0 19635     1 5  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade


> Lots of these are from 300s timeouts but some are from 1200s or
> 1800s or 3600s timeouts.
> 
> For reference:
> 
> sys/cddl/zfs/tests/txg_integrity/txg_integrity_test:fsync_integrity_001_pos  ->  broken: Test case body timed out  [1800.053s]
> sys/cddl/zfs/tests/txg_integrity/txg_integrity_test:txg_integrity_001_pos  ->  passed  [63.702s]
> sys/cddl/zfs/tests/userquota/userquota_test:groupspace_001_pos  ->  skipped: Required program 'runwattr' not found in PATH  [0.003s]
> sys/cddl/zfs/tests/userquota/userquota_test:groupspace_002_pos  ->  skipped: Required program 'runwattr' not found in PATH  [0.002s]
> sys/cddl/zfs/tests/userquota/userquota_test:userquota_001_pos  ->  skipped: Required program 'runwattr' not found in PATH  [0.002s]
> sys/cddl/zfs/tests/userquota/userquota_test:userquota_002_pos  ->  broken: Test case cleanup timed out  [0.148s]
> sys/cddl/zfs/tests/userquota/userquota_test:userquota_003_pos  ->  broken: Test case cleanup timed out  [0.151s]
> sys/cddl/zfs/tests/userquota/userquota_test:userquota_004_pos  ->  skipped: Required program 'runwattr' not found in PATH  [0.002s]
> sys/cddl/zfs/tests/userquota/userquota_test:userquota_005_neg  ->  broken: Test case body timed out  [300.021s]
> sys/cddl/zfs/tests/userquota/userquota_test:userquota_006_pos  ->  broken: Test case body timed out  [300.080s]
> sys/cddl/zfs/tests/userquota/userquota_test:userquota_007_pos  ->  skipped: Required program 'runwattr' not found in PATH  [0.002s]
> sys/cddl/zfs/tests/userquota/userquota_test:userquota_008_pos  ->  broken: Test case body timed out  [300.034s]
> sys/cddl/zfs/tests/userquota/userquota_test:userquota_009_pos  ->  broken: Test case body timed out  [300.143s]
> sys/cddl/zfs/tests/userquota/userquota_test:userquota_010_pos  ->  skipped: Required program 'runwattr' not found in PATH  [0.002s]
> sys/cddl/zfs/tests/userquota/userquota_test:userquota_011_pos  ->  broken: Test case body timed out  [300.003s]
> sys/cddl/zfs/tests/userquota/userquota_test:userquota_012_neg  ->  broken: Test case body timed out  [300.019s]
> sys/cddl/zfs/tests/userquota/userquota_test:userspace_001_pos  ->  skipped: Required program 'runwattr' not found in PATH  [0.002s]
> sys/cddl/zfs/tests/userquota/userquota_test:userspace_002_pos  ->  skipped: Required program 'runwattr' not found in PATH  [0.002s]
> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_001_pos  ->  broken: Test case body timed out  [300.052s]
> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_002_pos  ->  skipped: Required program 'labelit' not found in PATH  [0.002s]
> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_003_pos  ->  broken: Test case body timed out  [300.076s]
> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_004_pos  ->  broken: Test case body timed out  [300.106s]
> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_005_pos  ->  skipped: Required program 'ff' not found in PATH  [0.002s]
> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_006_pos  ->  broken: Test case body timed out  [300.015s]
> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_007_pos  ->  broken: Test case body timed out  [300.005s]
> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_008_pos  ->  skipped: Required program 'ncheck' not found in PATH  [0.002s]
> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_009_pos  ->  broken: Test case body timed out  [300.051s]
> sys/cddl/zfs/tests/write_dirs/write_dirs_test:write_dirs_001_pos  ->  broken: Test case body timed out  [1200.056s]
> sys/cddl/zfs/tests/write_dirs/write_dirs_test:write_dirs_002_pos  ->  broken: Test case body timed out  [1200.046s]
> sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_autoreplace_001_neg  ->  broken: Test case body timed out  [3600.055s]

And added:

sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_autoreplace_002_pos  ->  broken: Test case body timed out  [3600.028s]
sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_autoreplace_003_pos  ->  broken: Test case body timed out  [3600.146s]
sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_degrade_001_pos  ->  broken: Test case body timed out  [600.067s]
sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_degrade_002_pos  ->  broken: Test case body timed out  [600.015s]
sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_fault_001_pos  ->  broken: Test case body timed out  [300.061s]
sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_hotspare_001_pos  ->  broken: Test case body timed out  [3600.042s]
sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_hotspare_002_pos  ->  broken: Test case body timed out  [3600.161s]
sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_hotspare_003_pos  ->  broken: Test case body timed out  [3600.033s]
sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_hotspare_004_pos  ->  broken: Test case body timed out  [3600.007s]

> Other timeouts not from zfs tests have not had an accumulation
> of processes left behind. But these may be the set of tests
> that use ksh93 for scripting. I make no claim of knowing the
> zfs vs. ksh93 vs. both vs. ??? for what is contributing.
> 
> 
> I'll note that the system was booted via a bectl BE environment
> on the only FreeBSD media enabled, so is a zfs-root boot context.
> 
> For reference:
> 
> # uname -apKU
> FreeBSD CA78C-WDK23-ZFS 14.0-CURRENT FreeBSD 14.0-CURRENT aarch64 1400093 #6 main-n264334-215bab7924f6-dirty: Wed Aug  2 14:12:14 PDT 2023     root@CA78C-WDK23-ZFS:/usr/obj/BUILDs/main-CA78C-nodbg-clang/usr/main-src/arm64.aarch64/sys/GENERIC-NODBG-CA78C arm64 aarch64 1400093 1400093
> 
> I preload various modules (6 are commented out [not preloaded]
> and some listed may be actually built into the kernel):
> 
> # grep kldload ~/prekyua-kldloads.sh 
> kldload -v -n zfs.ko
> kldload -v -n cryptodev.ko
> kldload -v -n nullfs.ko
> kldload -v -n fdescfs.ko
> kldload -v -n filemon.ko
> kldload -v -n nfsd.ko
> kldload -v -n tarfs.ko
> kldload -v -n xz.ko
> kldload -v -n geom_concat.ko
> kldload -v -n geom_eli.ko
> kldload -v -n geom_nop.ko
> kldload -v -n geom_gate.ko
> kldload -v -n geom_mirror.ko
> kldload -v -n geom_multipath.ko
> kldload -v -n sdt.ko
> kldload -v -n dtrace.ko
> kldload -v -n opensolaris.ko
> kldload -v -n geom_raid3.ko
> kldload -v -n geom_shsec.ko
> kldload -v -n geom_stripe.ko
> kldload -v -n geom_uzip.ko
> kldload -v -n if_epair.ko
> kldload -v -n if_gif.ko
> kldload -v -n if_tuntap.ko
> kldload -v -n if_lagg.ko
> kldload -v -n if_infiniband.ko
> kldload -v -n if_wg.ko
> kldload -v -n ng_socket.ko
> kldload -v -n netgraph.ko
> kldload -v -n ng_hub.ko
> kldload -v -n ng_bridge.ko
> kldload -v -n ng_ether.ko
> kldload -v -n ng_vlan_rotate.ko
> kldload -v -n ipdivert.ko
> kldload -v -n pf.ko
> kldload -v -n if_bridge.ko
> kldload -v -n bridgestp.ko
> kldload -v -n mqueuefs.ko
> kldload -v -n tcpmd5.ko
> kldload -v -n carp.ko
> kldload -v -n sctp.ko
> kldload -v -n if_stf.ko
> kldload -v -n if_ovpn.ko
> kldload -v -n ipsec.ko
> #kldload -v -n ipfw.ko
> #kldload -v -n pflog.ko
> #kldload -v -n pfsync.ko
> kldload -v -n dummynet.ko
> #kldload -v -n mac_bsdextended.ko
> #kldload -v -n mac_ipacl.ko
> #kldload -v -n mac_portacl.ko
> 
> armv7 ports built and installed in the armv7 chroot
> area include:
> 
> # more ~/origins/kyua-origins.txt
> archivers/gtar
> devel/gdb
> devel/py-pytest
> devel/py-pytest-twisted
> devel/py-twisted
> lang/perl5.32
> lang/python
> net/scapy
> security/nist-kat
> security/openvpn
> security/sudo
> shells/ksh93
> shells/bash
> sysutils/coreutils
> sysutils/sg3_utils
> textproc/jq
> 
> (Those cause others to also be installed.)

I tried gdb -p PID against a couple of the processes.
Each got stuck, not reaching the gdb prompt. I also
show a Control-T output:

Attaching to process 17491
load: 0.24  cmd: gdb131 19693 [uwait] 32.27r 0.02u 0.06s 0% 32152k
#0 0xffff00000049fe20 at mi_switch+0xe0
#1 0xffff0000004f3658 at sleepq_catch_signals+0x318
#2 0xffff0000004f3318 at sleepq_wait_sig+0x8
#3 0xffff00000049f410 at _sleep+0x1d0
#4 0xffff0000004b52dc at umtxq_sleep+0x27c
#5 0xffff0000004bab7c at do_wait+0x25c
#6 0xffff0000004b8cdc at __umtx_op_wait_uint_private+0x5c
#7 0xffff0000004b6e64 at sys__umtx_op+0x84
#8 0xffff0000008267d4 at do_el0_sync+0x9b4
#9 0xffff000000805910 at handle_el0_sync+0x44

and:

Attaching to process 17860
load: 0.23  cmd: gdb131 19697 [uwait] 13.14r 0.06u 0.01s 0% 32184k
#0 0xffff00000049fe20 at mi_switch+0xe0
#1 0xffff0000004f3658 at sleepq_catch_signals+0x318
#2 0xffff0000004f3318 at sleepq_wait_sig+0x8
#3 0xffff00000049f410 at _sleep+0x1d0
#4 0xffff0000004b52dc at umtxq_sleep+0x27c
#5 0xffff0000004bab7c at do_wait+0x25c
#6 0xffff0000004b8cdc at __umtx_op_wait_uint_private+0x5c
#7 0xffff0000004b6e64 at sys__umtx_op+0x84
#8 0xffff0000008267d4 at do_el0_sync+0x9b4
#9 0xffff000000805910 at handle_el0_sync+0x44

I was unable to Control-C the gdb's to gain control
but was able to put them in the background (Control-Z
then bg).

===
Mark Millard
marklmi at yahoo.com