Re: kernel crash: devctl set driver -f mlx5_core6 ppt

From: Konstantin Belousov <kostikbel_at_gmail.com>
Date: Fri, 29 Dec 2023 03:19:00 UTC
On Sat, Sep 30, 2023 at 11:00:03PM +0000, John wrote:
> ----- Konstantin Belousov's Original Message -----
> > On Fri, Sep 29, 2023 at 04:32:30AM +0000, John wrote:
> > > Hi Folks,
> > > 
> > >    Working against 13.2-STABLE.
> > > 
> > >    I have a chance to get some bhyve VMs running on new hardware
> > > with Mellanox 100Gb/s cards. After creating VF entries with iovctl
> > > at boottime, a devctl command to detach the mlx5 driver and attach
> > > the ppt driver causes the kernel to crash:
> > > 
> > >  # devctl set driver -f mlx5_core6 ppt
> > > 
> > > backtrace here: https://people.freebsd.org/~jwd/mlx5.dump.txt
> > What is the line number for pci_iov_detach_method+0x5e?
> > Better, load vmcore into debugger and get the backtrace from kgdb.
> 
> Took a bit to get a netdump off the system. Results:
> 
> #12 0xffffffff810e0a89 in trap_fatal (frame=0xfffffe278b8e9860, eva=2016) at /usr/src/sys/amd64/amd64/trap.c:940
> #13 0xffffffff810e0adf in trap_pfault (frame=0xfffffe278b8e9860, usermode=false, signo=<optimized out>, ucode=<optimized out>) at /usr/src/sys/amd64/amd64/trap.c:759
> #14 <signal handler called>
> 
> #15 0xffffffff80860a0d in PCI_IOV_UNINIT (dev=0xfffffa0085704400) at ./pci_iov_if.h:44
> 
> #16 pci_iov_delete_iov_children (dinfo=0xfffffa0086e3b300) at /usr/src/sys/dev/pci/pci_iov.c:873
> #17 0xffffffff808607ce in pci_iov_detach_method (bus=<optimized out>, dev=0xfffffa0085704400) at /usr/src/sys/dev/pci/pci_iov.c:208
> #18 0xffffffff80ea64a1 in PCI_IOV_DETACH (dev=0xfffff80110ffb400, child=0xfffffa0085704400) at ./pci_if.h:510
> #19 pci_iov_detach (dev=0xfffffa0085704400) at /usr/src/sys/dev/pci/pci_iov.h:47
> #20 remove_one (pdev=0xfffff8011000c180) at /usr/src/sys/dev/mlx5/mlx5_core/mlx5_main.c:1739
> #21 0xffffffff80e82666 in linux_pci_detach_device (pdev=pdev@entry=0xfffff8011000c180) at /usr/src/sys/compat/linuxkpi/common/src/linux_pci.c:524
> #22 0xffffffff80e84e74 in linux_pci_detach (dev=0xfffffa0085704400) at /usr/src/sys/compat/linuxkpi/common/src/linux_pci.c:514
> #23 0xffffffff80c4d9f6 in DEVICE_DETACH (dev=0xfffffa0085704400) at ./device_if.h:234
> #24 device_detach (dev=dev@entry=0xfffffa0085704400) at /usr/src/sys/kern/subr_bus.c:3093
> #25 0xffffffff80c54f7b in devctl2_ioctl (cdev=<optimized out>, cmd=<optimized out>, data=0xfffff8014bcc8500 "mlx5_core6", 
>     fflag=<optimized out>, td=<optimized out>) at /usr/src/sys/kern/subr_bus.c:5949
> #26 0xffffffff80aa376c in devfs_ioctl (ap=0xfffffe278b8e9ba8) at /usr/src/sys/fs/devfs/devfs_vnops.c:942
> #27 0xffffffff80d0c7b8 in vn_ioctl (fp=0xfffff80150025730, com=18446744071589646072, data=0xfffff8014bcc8500, 
>     active_cred=0xfffff80166137300, td=0x0) at /usr/src/sys/kern/vfs_vnops.c:1701
> #28 0xffffffff80aa3e3e in devfs_ioctl_f (fp=0x6, com=18446744071589646072, data=0xfc, cred=0x0, td=0x0)
>     at /usr/src/sys/fs/devfs/devfs_vnops.c:873
> 
> Moving up the stack frames:
> 
> 41      static __inline void PCI_IOV_UNINIT(device_t dev)
> 42      {
> 43              kobjop_t _m;
> 44              KOBJOPLOOKUP(((kobj_t)dev)->ops,pci_iov_uninit);
> 45              ((pci_iov_uninit_t *) _m)(dev);
> 46      }
> 
>  p ((kobj_t)dev)->ops
> $8 = (kobj_ops_t) 0x0
> 
> #define KOBJOPLOOKUP(OPS,OP) do {                               \
>         kobjop_desc_t _desc = &OP##_##desc;                     \
>         kobj_method_t **_cep =                                  \
>             &OPS->cache[_desc->id & (KOBJ_CACHE_SIZE-1)];       \
> 
> Leading to OPS == NULL
> 
> > >    If I create the VF in ppt mode I can correctly detach
> > > the ppt driver and attach the mlx5 driver.
> > > 
> > >    Also of note, if multiple VFs are created and a single
> > > VF is targeted for the detach operation, all VFs are operated
> > > on. It seems the VFs are not seen as individual entities
> > > but a group of children in detach_method().

Sorry for the long delay, I only now had some nvidia time for this.
If you are still interested, could you try the following patch against
the reported panic?  It does not try to address the reported issue of
total VFs detach.

commit 3b8a10ad76f6bf2f85eee099496fcf63e29e17d4
Author: Konstantin Belousov <kib@FreeBSD.org>
Date:   Fri Dec 29 05:14:56 2023 +0200

    mlx5(4): only detach IOV children if iov was successfully initialized
    
    Reported by:    jwd
    Sponsored by:   NVidia networking
    MFC after:      1 week

diff --git a/sys/dev/mlx5/driver.h b/sys/dev/mlx5/driver.h
index 9daa1235bd9c..6ebe4171a503 100644
--- a/sys/dev/mlx5/driver.h
+++ b/sys/dev/mlx5/driver.h
@@ -736,6 +736,8 @@ struct mlx5_core_dev {
 	bool dump_copyout;
 	struct mtx dump_lock;
 
+	bool			iov_pf;
+
 	struct sysctl_ctx_list	sysctl_ctx;
 	int			msix_eqvec;
 	int			pwr_status;
diff --git a/sys/dev/mlx5/mlx5_core/mlx5_main.c b/sys/dev/mlx5/mlx5_core/mlx5_main.c
index f6dc1158f085..6b9b63a24714 100644
--- a/sys/dev/mlx5/mlx5_core/mlx5_main.c
+++ b/sys/dev/mlx5/mlx5_core/mlx5_main.c
@@ -1719,7 +1719,9 @@ static int init_one(struct pci_dev *pdev,
 			pci_iov_schema_add_uint64(vf_schema, iov_port_guid_name,
 			    0, 0);
 			err = pci_iov_attach(bsddev, pf_schema, vf_schema);
-			if (err != 0) {
+			if (err == 0) {
+				dev->iov_pf = true;
+			} else {
 				device_printf(bsddev,
 			    "Failed to initialize SR-IOV support, error %d\n",
 				    err);
@@ -1753,8 +1755,11 @@ static void remove_one(struct pci_dev *pdev)
 	struct mlx5_priv *priv = &dev->priv;
 
 #ifdef PCI_IOV
-	pci_iov_detach(pdev->dev.bsddev);
-	mlx5_eswitch_disable_sriov(priv->eswitch);
+	if (dev->iov_pf) {
+		pci_iov_detach(pdev->dev.bsddev);
+		mlx5_eswitch_disable_sriov(priv->eswitch);
+		dev->iov_pf = false;
+	}
 #endif
 
 	if (mlx5_unload_one(dev, priv, true)) {