Re: armv7 kyua runs via chroot on aarch64: zfs tests leave behind processes from timed out tests

From: Mark Millard <marklmi_at_yahoo.com>
Date: Thu, 03 Aug 2023 17:20:00 UTC
On Aug 3, 2023, at 07:18, Mark Millard <marklmi@yahoo.com> wrote:

> On Aug 3, 2023, at 00:19, Mark Millard <marklmi@yahoo.com> wrote:
> 
>> This is after the patch (leading whitespace might
>> not have been preserved in what you see):
>> 
>> # git -C /usr/main-src/ diff sys/dev/md/
>> diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
>> index a719dccb1955..365296ec4276 100644
>> --- a/sys/dev/md/md.c
>> +++ b/sys/dev/md/md.c
>> @@ -147,8 +147,15 @@ struct md_ioctl32 {
>>       int             md_fwsectors;
>>       uint32_t        md_label;
>>       int             md_pad[MDNPAD];
>> +#ifdef __aarch64__
>> +       uint32_t        md_pad0;
>> +#endif
>> } __attribute__((__packed__));
>> +#ifdef __aarch64__
>> +CTASSERT((sizeof(struct md_ioctl32)) == 440);
>> +#else
>> CTASSERT((sizeof(struct md_ioctl32)) == 436);
>> +#endif
>> 
>> #define        MDIOCATTACH_32  _IOC_NEWTYPE(MDIOCATTACH, struct md_ioctl32)
>> #define        MDIOCDETACH_32  _IOC_NEWTYPE(MDIOCDETACH, struct md_ioctl32)
>> 
>> 
>> The kyua run is still in process, but at this point there is
>> the following accumulation from the zfs testing timouts:
>> 
>> # ps -alxdww
>> UID   PID  PPID C PRI NI   VSZ   RSS MWCHAN   STAT TT        TIME COMMAND
>> . . .
>> 0 17491     1 6  20  0 36460 12324 -        T     -     0:24.71 |-- fsync_integrity /testdir2316/testfile2316
>> 0 17551     1 5  20  0 10600  7512 tx->tx_s D     -     0:00.00 |-- /sbin/zpool destroy -f testpool.2316
>> 0 17739     1 7  20  0 10600  7308 zfs tear D     -     0:00.00 |-- /sbin/zpool destroy -f testpool.2316
>> 0 17841     1 3  20  0 10600  7316 tx->tx_s D     -     0:00.00 |-- /sbin/zpool destroy -f testpool.2316
>> 0 17860     1 0  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 17888     1 3  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 17907     1 6  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 17928     1 7  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 17955     1 0  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 17976     1 4  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 17995     1 2  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18023     1 2  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18043     1 2  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18064     1 3  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18085     1 0  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18114     1 7  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18135     1 2  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18157     1 6  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18177     1 6  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18205     1 4  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18224     1 1  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18255     1 3  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18275     1 1  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18296     1 5  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18317     1 4  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18345     1 4  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18365     1 2  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18386     1 3  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18412     1 1  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18447     1 5  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18466     1 5  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18516     1 6  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18535     1 2  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>> 0 18632     1 0  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
> 
> It has added:
> 
>  0 18656     1 7  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18748     1 0  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18767     1 4  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18858     1 5  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18877     1 0  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18907     1 7  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18926     1 5  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18956     1 7  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 18975     1 7  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 19005     1 4  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 19026     1 4  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 19298     1 6  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 19317     1 6  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 19408     1 7  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 19427     1 2  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 19518     1 4  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 19537     1 4  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
>  0 19635     1 5  20  0  10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade

  0 19654     1 5  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
  0 19746     1 6  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
  0 19767     1 6  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
  0 19854     1 6  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
  0 19873     1 0  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade
  0 19960     1 1  20  0 10080  6956 spa_name D     -     0:00.00 |-- /sbin/zfs upgrade

>> Lots of these are from 300s timeouts but some are from 1200s or
>> 1800s or 3600s timeouts.
>> 
>> For reference:
>> 
>> sys/cddl/zfs/tests/txg_integrity/txg_integrity_test:fsync_integrity_001_pos  ->  broken: Test case body timed out  [1800.053s]
>> sys/cddl/zfs/tests/txg_integrity/txg_integrity_test:txg_integrity_001_pos  ->  passed  [63.702s]
>> sys/cddl/zfs/tests/userquota/userquota_test:groupspace_001_pos  ->  skipped: Required program 'runwattr' not found in PATH  [0.003s]
>> sys/cddl/zfs/tests/userquota/userquota_test:groupspace_002_pos  ->  skipped: Required program 'runwattr' not found in PATH  [0.002s]
>> sys/cddl/zfs/tests/userquota/userquota_test:userquota_001_pos  ->  skipped: Required program 'runwattr' not found in PATH  [0.002s]
>> sys/cddl/zfs/tests/userquota/userquota_test:userquota_002_pos  ->  broken: Test case cleanup timed out  [0.148s]
>> sys/cddl/zfs/tests/userquota/userquota_test:userquota_003_pos  ->  broken: Test case cleanup timed out  [0.151s]
>> sys/cddl/zfs/tests/userquota/userquota_test:userquota_004_pos  ->  skipped: Required program 'runwattr' not found in PATH  [0.002s]
>> sys/cddl/zfs/tests/userquota/userquota_test:userquota_005_neg  ->  broken: Test case body timed out  [300.021s]
>> sys/cddl/zfs/tests/userquota/userquota_test:userquota_006_pos  ->  broken: Test case body timed out  [300.080s]
>> sys/cddl/zfs/tests/userquota/userquota_test:userquota_007_pos  ->  skipped: Required program 'runwattr' not found in PATH  [0.002s]
>> sys/cddl/zfs/tests/userquota/userquota_test:userquota_008_pos  ->  broken: Test case body timed out  [300.034s]
>> sys/cddl/zfs/tests/userquota/userquota_test:userquota_009_pos  ->  broken: Test case body timed out  [300.143s]
>> sys/cddl/zfs/tests/userquota/userquota_test:userquota_010_pos  ->  skipped: Required program 'runwattr' not found in PATH  [0.002s]
>> sys/cddl/zfs/tests/userquota/userquota_test:userquota_011_pos  ->  broken: Test case body timed out  [300.003s]
>> sys/cddl/zfs/tests/userquota/userquota_test:userquota_012_neg  ->  broken: Test case body timed out  [300.019s]
>> sys/cddl/zfs/tests/userquota/userquota_test:userspace_001_pos  ->  skipped: Required program 'runwattr' not found in PATH  [0.002s]
>> sys/cddl/zfs/tests/userquota/userquota_test:userspace_002_pos  ->  skipped: Required program 'runwattr' not found in PATH  [0.002s]
>> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_001_pos  ->  broken: Test case body timed out  [300.052s]
>> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_002_pos  ->  skipped: Required program 'labelit' not found in PATH  [0.002s]
>> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_003_pos  ->  broken: Test case body timed out  [300.076s]
>> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_004_pos  ->  broken: Test case body timed out  [300.106s]
>> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_005_pos  ->  skipped: Required program 'ff' not found in PATH  [0.002s]
>> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_006_pos  ->  broken: Test case body timed out  [300.015s]
>> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_007_pos  ->  broken: Test case body timed out  [300.005s]
>> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_008_pos  ->  skipped: Required program 'ncheck' not found in PATH  [0.002s]
>> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_009_pos  ->  broken: Test case body timed out  [300.051s]
>> sys/cddl/zfs/tests/write_dirs/write_dirs_test:write_dirs_001_pos  ->  broken: Test case body timed out  [1200.056s]
>> sys/cddl/zfs/tests/write_dirs/write_dirs_test:write_dirs_002_pos  ->  broken: Test case body timed out  [1200.046s]
>> sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_autoreplace_001_neg  ->  broken: Test case body timed out  [3600.055s]
> 
> And added:
> 
> sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_autoreplace_002_pos  ->  broken: Test case body timed out  [3600.028s]
> sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_autoreplace_003_pos  ->  broken: Test case body timed out  [3600.146s]
> sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_degrade_001_pos  ->  broken: Test case body timed out  [600.067s]
> sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_degrade_002_pos  ->  broken: Test case body timed out  [600.015s]
> sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_fault_001_pos  ->  broken: Test case body timed out  [300.061s]
> sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_hotspare_001_pos  ->  broken: Test case body timed out  [3600.042s]
> sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_hotspare_002_pos  ->  broken: Test case body timed out  [3600.161s]
> sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_hotspare_003_pos  ->  broken: Test case body timed out  [3600.033s]
> sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_hotspare_004_pos  ->  broken: Test case body timed out  [3600.007s]

sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_hotspare_005_pos  ->  broken: Test case body timed out  [3600.065s]
sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_hotspare_006_pos  ->  broken: Test case body timed out  [3600.014s]
sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_hotspare_007_pos  ->  broken: Test case body timed out  [3600.066s]

>> Other timeouts not from zfs tests have not had an accumulation
>> of processes left behind. But these may be the set of tests
>> that use ksh93 for scripting. I make no claim of knowing the
>> zfs vs. ksh93 vs. both vs. ??? for what is contributing.
>> 
>> 
>> I'll note that the system was booted via a bectl BE environment
>> on the only FreeBSD media enabled, so is a zfs-root boot context.
>> 
>> For reference:
>> 
>> # uname -apKU
>> FreeBSD CA78C-WDK23-ZFS 14.0-CURRENT FreeBSD 14.0-CURRENT aarch64 1400093 #6 main-n264334-215bab7924f6-dirty: Wed Aug  2 14:12:14 PDT 2023     root@CA78C-WDK23-ZFS:/usr/obj/BUILDs/main-CA78C-nodbg-clang/usr/main-src/arm64.aarch64/sys/GENERIC-NODBG-CA78C arm64 aarch64 1400093 1400093
>> 
>> I preload various modules (6 are commented out [not preloaded]
>> and some listed may be actually built into the kernel):
>> 
>> # grep kldload ~/prekyua-kldloads.sh 
>> kldload -v -n zfs.ko
>> kldload -v -n cryptodev.ko
>> kldload -v -n nullfs.ko
>> kldload -v -n fdescfs.ko
>> kldload -v -n filemon.ko
>> kldload -v -n nfsd.ko
>> kldload -v -n tarfs.ko
>> kldload -v -n xz.ko
>> kldload -v -n geom_concat.ko
>> kldload -v -n geom_eli.ko
>> kldload -v -n geom_nop.ko
>> kldload -v -n geom_gate.ko
>> kldload -v -n geom_mirror.ko
>> kldload -v -n geom_multipath.ko
>> kldload -v -n sdt.ko
>> kldload -v -n dtrace.ko
>> kldload -v -n opensolaris.ko
>> kldload -v -n geom_raid3.ko
>> kldload -v -n geom_shsec.ko
>> kldload -v -n geom_stripe.ko
>> kldload -v -n geom_uzip.ko
>> kldload -v -n if_epair.ko
>> kldload -v -n if_gif.ko
>> kldload -v -n if_tuntap.ko
>> kldload -v -n if_lagg.ko
>> kldload -v -n if_infiniband.ko
>> kldload -v -n if_wg.ko
>> kldload -v -n ng_socket.ko
>> kldload -v -n netgraph.ko
>> kldload -v -n ng_hub.ko
>> kldload -v -n ng_bridge.ko
>> kldload -v -n ng_ether.ko
>> kldload -v -n ng_vlan_rotate.ko
>> kldload -v -n ipdivert.ko
>> kldload -v -n pf.ko
>> kldload -v -n if_bridge.ko
>> kldload -v -n bridgestp.ko
>> kldload -v -n mqueuefs.ko
>> kldload -v -n tcpmd5.ko
>> kldload -v -n carp.ko
>> kldload -v -n sctp.ko
>> kldload -v -n if_stf.ko
>> kldload -v -n if_ovpn.ko
>> kldload -v -n ipsec.ko
>> #kldload -v -n ipfw.ko
>> #kldload -v -n pflog.ko
>> #kldload -v -n pfsync.ko
>> kldload -v -n dummynet.ko
>> #kldload -v -n mac_bsdextended.ko
>> #kldload -v -n mac_ipacl.ko
>> #kldload -v -n mac_portacl.ko
>> 
>> armv7 ports built and installed in the armv7 chroot
>> area include:
>> 
>> # more ~/origins/kyua-origins.txt
>> archivers/gtar
>> devel/gdb
>> devel/py-pytest
>> devel/py-pytest-twisted
>> devel/py-twisted
>> lang/perl5.32
>> lang/python
>> net/scapy
>> security/nist-kat
>> security/openvpn
>> security/sudo
>> shells/ksh93
>> shells/bash
>> sysutils/coreutils
>> sysutils/sg3_utils
>> textproc/jq
>> 
>> (Those cause others to also be installed.)
> 
> I tried gdb -p PID against a couple of the processes.
> Each got stuck, not reaching the gdb prompt. I also
> show a Control-T output:
> 
> Attaching to process 17491
> load: 0.24  cmd: gdb131 19693 [uwait] 32.27r 0.02u 0.06s 0% 32152k
> #0 0xffff00000049fe20 at mi_switch+0xe0
> #1 0xffff0000004f3658 at sleepq_catch_signals+0x318
> #2 0xffff0000004f3318 at sleepq_wait_sig+0x8
> #3 0xffff00000049f410 at _sleep+0x1d0
> #4 0xffff0000004b52dc at umtxq_sleep+0x27c
> #5 0xffff0000004bab7c at do_wait+0x25c
> #6 0xffff0000004b8cdc at __umtx_op_wait_uint_private+0x5c
> #7 0xffff0000004b6e64 at sys__umtx_op+0x84
> #8 0xffff0000008267d4 at do_el0_sync+0x9b4
> #9 0xffff000000805910 at handle_el0_sync+0x44
> 
> and:
> 
> Attaching to process 17860
> load: 0.23  cmd: gdb131 19697 [uwait] 13.14r 0.06u 0.01s 0% 32184k
> #0 0xffff00000049fe20 at mi_switch+0xe0
> #1 0xffff0000004f3658 at sleepq_catch_signals+0x318
> #2 0xffff0000004f3318 at sleepq_wait_sig+0x8
> #3 0xffff00000049f410 at _sleep+0x1d0
> #4 0xffff0000004b52dc at umtxq_sleep+0x27c
> #5 0xffff0000004bab7c at do_wait+0x25c
> #6 0xffff0000004b8cdc at __umtx_op_wait_uint_private+0x5c
> #7 0xffff0000004b6e64 at sys__umtx_op+0x84
> #8 0xffff0000008267d4 at do_el0_sync+0x9b4
> #9 0xffff000000805910 at handle_el0_sync+0x44
> 
> I was unable to Control-C the gdb's to gain control
> but was able to put them in the background (Control-Z
> then bg).

Looks like I'm going to have to reboot instead of letting
the kyua run go to completion. The periodic daily is stuck
as well.

  0 19064  1657 1  20  0 12980  2484 piperd   I     -     0:00.00 | `-- cron: running job (cron)
  0 19066 19064 3  40  0 13436  2928 wait     Is    -     0:00.00 |   `-- /bin/sh - /usr/sbin/periodic daily
. . .
  0 19237 19235 0  68  0 13436  2936 wait     I     -     0:00.00 |         |         | `-- /bin/sh - /etc/periodic/security/100.chksetuid
  0 19242 19237 6  68  0 21912 10292 zfs      D     -     0:10.21 |         |         |   |-- / /var/mail . . . /dev/null (find)
  0 19243 19237 7  68  0 13436  2932 wait     I     -     0:00.00 |         |         |   `-- /bin/sh - /etc/periodic/security/100.chksetuid
  0 19245 19243 1  68  0 15204  2212 piperd   I     -     0:00.00 |         |         |     `-- cat

is also stuck. So the problems are now not limited to
the kyua run.

===
Mark Millard
marklmi at yahoo.com