Re: armv7 kyua runs via chroot on aarch64: zfs tests leave behind processes from timed out tests
Date: Thu, 03 Aug 2023 17:20:00 UTC
On Aug 3, 2023, at 07:18, Mark Millard <marklmi@yahoo.com> wrote: > On Aug 3, 2023, at 00:19, Mark Millard <marklmi@yahoo.com> wrote: > >> This is after the patch (leading whitespace might >> not have been preserved in what you see): >> >> # git -C /usr/main-src/ diff sys/dev/md/ >> diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c >> index a719dccb1955..365296ec4276 100644 >> --- a/sys/dev/md/md.c >> +++ b/sys/dev/md/md.c >> @@ -147,8 +147,15 @@ struct md_ioctl32 { >> int md_fwsectors; >> uint32_t md_label; >> int md_pad[MDNPAD]; >> +#ifdef __aarch64__ >> + uint32_t md_pad0; >> +#endif >> } __attribute__((__packed__)); >> +#ifdef __aarch64__ >> +CTASSERT((sizeof(struct md_ioctl32)) == 440); >> +#else >> CTASSERT((sizeof(struct md_ioctl32)) == 436); >> +#endif >> >> #define MDIOCATTACH_32 _IOC_NEWTYPE(MDIOCATTACH, struct md_ioctl32) >> #define MDIOCDETACH_32 _IOC_NEWTYPE(MDIOCDETACH, struct md_ioctl32) >> >> >> The kyua run is still in process, but at this point there is >> the following accumulation from the zfs testing timouts: >> >> # ps -alxdww >> UID PID PPID C PRI NI VSZ RSS MWCHAN STAT TT TIME COMMAND >> . . . >> 0 17491 1 6 20 0 36460 12324 - T - 0:24.71 |-- fsync_integrity /testdir2316/testfile2316 >> 0 17551 1 5 20 0 10600 7512 tx->tx_s D - 0:00.00 |-- /sbin/zpool destroy -f testpool.2316 >> 0 17739 1 7 20 0 10600 7308 zfs tear D - 0:00.00 |-- /sbin/zpool destroy -f testpool.2316 >> 0 17841 1 3 20 0 10600 7316 tx->tx_s D - 0:00.00 |-- /sbin/zpool destroy -f testpool.2316 >> 0 17860 1 0 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 17888 1 3 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 17907 1 6 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 17928 1 7 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 17955 1 0 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 17976 1 4 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 17995 1 2 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18023 1 2 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18043 1 2 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18064 1 3 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18085 1 0 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18114 1 7 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18135 1 2 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18157 1 6 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18177 1 6 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18205 1 4 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18224 1 1 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18255 1 3 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18275 1 1 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18296 1 5 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18317 1 4 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18345 1 4 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18365 1 2 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18386 1 3 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18412 1 1 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18447 1 5 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18466 1 5 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18516 1 6 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18535 1 2 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> 0 18632 1 0 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade > > It has added: > > 0 18656 1 7 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade > 0 18748 1 0 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade > 0 18767 1 4 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade > 0 18858 1 5 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade > 0 18877 1 0 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade > 0 18907 1 7 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade > 0 18926 1 5 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade > 0 18956 1 7 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade > 0 18975 1 7 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade > 0 19005 1 4 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade > 0 19026 1 4 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade > 0 19298 1 6 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade > 0 19317 1 6 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade > 0 19408 1 7 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade > 0 19427 1 2 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade > 0 19518 1 4 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade > 0 19537 1 4 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade > 0 19635 1 5 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade 0 19654 1 5 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade 0 19746 1 6 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade 0 19767 1 6 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade 0 19854 1 6 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade 0 19873 1 0 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade 0 19960 1 1 20 0 10080 6956 spa_name D - 0:00.00 |-- /sbin/zfs upgrade >> Lots of these are from 300s timeouts but some are from 1200s or >> 1800s or 3600s timeouts. >> >> For reference: >> >> sys/cddl/zfs/tests/txg_integrity/txg_integrity_test:fsync_integrity_001_pos -> broken: Test case body timed out [1800.053s] >> sys/cddl/zfs/tests/txg_integrity/txg_integrity_test:txg_integrity_001_pos -> passed [63.702s] >> sys/cddl/zfs/tests/userquota/userquota_test:groupspace_001_pos -> skipped: Required program 'runwattr' not found in PATH [0.003s] >> sys/cddl/zfs/tests/userquota/userquota_test:groupspace_002_pos -> skipped: Required program 'runwattr' not found in PATH [0.002s] >> sys/cddl/zfs/tests/userquota/userquota_test:userquota_001_pos -> skipped: Required program 'runwattr' not found in PATH [0.002s] >> sys/cddl/zfs/tests/userquota/userquota_test:userquota_002_pos -> broken: Test case cleanup timed out [0.148s] >> sys/cddl/zfs/tests/userquota/userquota_test:userquota_003_pos -> broken: Test case cleanup timed out [0.151s] >> sys/cddl/zfs/tests/userquota/userquota_test:userquota_004_pos -> skipped: Required program 'runwattr' not found in PATH [0.002s] >> sys/cddl/zfs/tests/userquota/userquota_test:userquota_005_neg -> broken: Test case body timed out [300.021s] >> sys/cddl/zfs/tests/userquota/userquota_test:userquota_006_pos -> broken: Test case body timed out [300.080s] >> sys/cddl/zfs/tests/userquota/userquota_test:userquota_007_pos -> skipped: Required program 'runwattr' not found in PATH [0.002s] >> sys/cddl/zfs/tests/userquota/userquota_test:userquota_008_pos -> broken: Test case body timed out [300.034s] >> sys/cddl/zfs/tests/userquota/userquota_test:userquota_009_pos -> broken: Test case body timed out [300.143s] >> sys/cddl/zfs/tests/userquota/userquota_test:userquota_010_pos -> skipped: Required program 'runwattr' not found in PATH [0.002s] >> sys/cddl/zfs/tests/userquota/userquota_test:userquota_011_pos -> broken: Test case body timed out [300.003s] >> sys/cddl/zfs/tests/userquota/userquota_test:userquota_012_neg -> broken: Test case body timed out [300.019s] >> sys/cddl/zfs/tests/userquota/userquota_test:userspace_001_pos -> skipped: Required program 'runwattr' not found in PATH [0.002s] >> sys/cddl/zfs/tests/userquota/userquota_test:userspace_002_pos -> skipped: Required program 'runwattr' not found in PATH [0.002s] >> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_001_pos -> broken: Test case body timed out [300.052s] >> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_002_pos -> skipped: Required program 'labelit' not found in PATH [0.002s] >> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_003_pos -> broken: Test case body timed out [300.076s] >> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_004_pos -> broken: Test case body timed out [300.106s] >> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_005_pos -> skipped: Required program 'ff' not found in PATH [0.002s] >> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_006_pos -> broken: Test case body timed out [300.015s] >> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_007_pos -> broken: Test case body timed out [300.005s] >> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_008_pos -> skipped: Required program 'ncheck' not found in PATH [0.002s] >> sys/cddl/zfs/tests/utils_test/utils_test_test:utils_test_009_pos -> broken: Test case body timed out [300.051s] >> sys/cddl/zfs/tests/write_dirs/write_dirs_test:write_dirs_001_pos -> broken: Test case body timed out [1200.056s] >> sys/cddl/zfs/tests/write_dirs/write_dirs_test:write_dirs_002_pos -> broken: Test case body timed out [1200.046s] >> sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_autoreplace_001_neg -> broken: Test case body timed out [3600.055s] > > And added: > > sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_autoreplace_002_pos -> broken: Test case body timed out [3600.028s] > sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_autoreplace_003_pos -> broken: Test case body timed out [3600.146s] > sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_degrade_001_pos -> broken: Test case body timed out [600.067s] > sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_degrade_002_pos -> broken: Test case body timed out [600.015s] > sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_fault_001_pos -> broken: Test case body timed out [300.061s] > sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_hotspare_001_pos -> broken: Test case body timed out [3600.042s] > sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_hotspare_002_pos -> broken: Test case body timed out [3600.161s] > sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_hotspare_003_pos -> broken: Test case body timed out [3600.033s] > sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_hotspare_004_pos -> broken: Test case body timed out [3600.007s] sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_hotspare_005_pos -> broken: Test case body timed out [3600.065s] sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_hotspare_006_pos -> broken: Test case body timed out [3600.014s] sys/cddl/zfs/tests/zfsd/zfsd_test:zfsd_hotspare_007_pos -> broken: Test case body timed out [3600.066s] >> Other timeouts not from zfs tests have not had an accumulation >> of processes left behind. But these may be the set of tests >> that use ksh93 for scripting. I make no claim of knowing the >> zfs vs. ksh93 vs. both vs. ??? for what is contributing. >> >> >> I'll note that the system was booted via a bectl BE environment >> on the only FreeBSD media enabled, so is a zfs-root boot context. >> >> For reference: >> >> # uname -apKU >> FreeBSD CA78C-WDK23-ZFS 14.0-CURRENT FreeBSD 14.0-CURRENT aarch64 1400093 #6 main-n264334-215bab7924f6-dirty: Wed Aug 2 14:12:14 PDT 2023 root@CA78C-WDK23-ZFS:/usr/obj/BUILDs/main-CA78C-nodbg-clang/usr/main-src/arm64.aarch64/sys/GENERIC-NODBG-CA78C arm64 aarch64 1400093 1400093 >> >> I preload various modules (6 are commented out [not preloaded] >> and some listed may be actually built into the kernel): >> >> # grep kldload ~/prekyua-kldloads.sh >> kldload -v -n zfs.ko >> kldload -v -n cryptodev.ko >> kldload -v -n nullfs.ko >> kldload -v -n fdescfs.ko >> kldload -v -n filemon.ko >> kldload -v -n nfsd.ko >> kldload -v -n tarfs.ko >> kldload -v -n xz.ko >> kldload -v -n geom_concat.ko >> kldload -v -n geom_eli.ko >> kldload -v -n geom_nop.ko >> kldload -v -n geom_gate.ko >> kldload -v -n geom_mirror.ko >> kldload -v -n geom_multipath.ko >> kldload -v -n sdt.ko >> kldload -v -n dtrace.ko >> kldload -v -n opensolaris.ko >> kldload -v -n geom_raid3.ko >> kldload -v -n geom_shsec.ko >> kldload -v -n geom_stripe.ko >> kldload -v -n geom_uzip.ko >> kldload -v -n if_epair.ko >> kldload -v -n if_gif.ko >> kldload -v -n if_tuntap.ko >> kldload -v -n if_lagg.ko >> kldload -v -n if_infiniband.ko >> kldload -v -n if_wg.ko >> kldload -v -n ng_socket.ko >> kldload -v -n netgraph.ko >> kldload -v -n ng_hub.ko >> kldload -v -n ng_bridge.ko >> kldload -v -n ng_ether.ko >> kldload -v -n ng_vlan_rotate.ko >> kldload -v -n ipdivert.ko >> kldload -v -n pf.ko >> kldload -v -n if_bridge.ko >> kldload -v -n bridgestp.ko >> kldload -v -n mqueuefs.ko >> kldload -v -n tcpmd5.ko >> kldload -v -n carp.ko >> kldload -v -n sctp.ko >> kldload -v -n if_stf.ko >> kldload -v -n if_ovpn.ko >> kldload -v -n ipsec.ko >> #kldload -v -n ipfw.ko >> #kldload -v -n pflog.ko >> #kldload -v -n pfsync.ko >> kldload -v -n dummynet.ko >> #kldload -v -n mac_bsdextended.ko >> #kldload -v -n mac_ipacl.ko >> #kldload -v -n mac_portacl.ko >> >> armv7 ports built and installed in the armv7 chroot >> area include: >> >> # more ~/origins/kyua-origins.txt >> archivers/gtar >> devel/gdb >> devel/py-pytest >> devel/py-pytest-twisted >> devel/py-twisted >> lang/perl5.32 >> lang/python >> net/scapy >> security/nist-kat >> security/openvpn >> security/sudo >> shells/ksh93 >> shells/bash >> sysutils/coreutils >> sysutils/sg3_utils >> textproc/jq >> >> (Those cause others to also be installed.) > > I tried gdb -p PID against a couple of the processes. > Each got stuck, not reaching the gdb prompt. I also > show a Control-T output: > > Attaching to process 17491 > load: 0.24 cmd: gdb131 19693 [uwait] 32.27r 0.02u 0.06s 0% 32152k > #0 0xffff00000049fe20 at mi_switch+0xe0 > #1 0xffff0000004f3658 at sleepq_catch_signals+0x318 > #2 0xffff0000004f3318 at sleepq_wait_sig+0x8 > #3 0xffff00000049f410 at _sleep+0x1d0 > #4 0xffff0000004b52dc at umtxq_sleep+0x27c > #5 0xffff0000004bab7c at do_wait+0x25c > #6 0xffff0000004b8cdc at __umtx_op_wait_uint_private+0x5c > #7 0xffff0000004b6e64 at sys__umtx_op+0x84 > #8 0xffff0000008267d4 at do_el0_sync+0x9b4 > #9 0xffff000000805910 at handle_el0_sync+0x44 > > and: > > Attaching to process 17860 > load: 0.23 cmd: gdb131 19697 [uwait] 13.14r 0.06u 0.01s 0% 32184k > #0 0xffff00000049fe20 at mi_switch+0xe0 > #1 0xffff0000004f3658 at sleepq_catch_signals+0x318 > #2 0xffff0000004f3318 at sleepq_wait_sig+0x8 > #3 0xffff00000049f410 at _sleep+0x1d0 > #4 0xffff0000004b52dc at umtxq_sleep+0x27c > #5 0xffff0000004bab7c at do_wait+0x25c > #6 0xffff0000004b8cdc at __umtx_op_wait_uint_private+0x5c > #7 0xffff0000004b6e64 at sys__umtx_op+0x84 > #8 0xffff0000008267d4 at do_el0_sync+0x9b4 > #9 0xffff000000805910 at handle_el0_sync+0x44 > > I was unable to Control-C the gdb's to gain control > but was able to put them in the background (Control-Z > then bg). Looks like I'm going to have to reboot instead of letting the kyua run go to completion. The periodic daily is stuck as well. 0 19064 1657 1 20 0 12980 2484 piperd I - 0:00.00 | `-- cron: running job (cron) 0 19066 19064 3 40 0 13436 2928 wait Is - 0:00.00 | `-- /bin/sh - /usr/sbin/periodic daily . . . 0 19237 19235 0 68 0 13436 2936 wait I - 0:00.00 | | | `-- /bin/sh - /etc/periodic/security/100.chksetuid 0 19242 19237 6 68 0 21912 10292 zfs D - 0:10.21 | | | |-- / /var/mail . . . /dev/null (find) 0 19243 19237 7 68 0 13436 2932 wait I - 0:00.00 | | | `-- /bin/sh - /etc/periodic/security/100.chksetuid 0 19245 19243 1 68 0 15204 2212 piperd I - 0:00.00 | | | `-- cat is also stuck. So the problems are now not limited to the kyua run. === Mark Millard marklmi at yahoo.com