Re: git: 011e3d0b8b90 - main - cxgbe(4): Perform Conventional Reset instead of FLR on the device.
Date: Thu, 03 Apr 2025 00:52:03 UTC
On Sun, Mar 16, 2025 at 02:35:18AM +0000, Navdeep Parhar wrote: > The branch main has been updated by np: > > URL: https://cgit.FreeBSD.org/src/commit/?id=011e3d0b8b90a4330f14b2cb7da45ed7b805ed10 > > commit 011e3d0b8b90a4330f14b2cb7da45ed7b805ed10 > Author: Navdeep Parhar <np@FreeBSD.org> > AuthorDate: 2024-12-07 08:00:49 +0000 > Commit: Navdeep Parhar <np@FreeBSD.org> > CommitDate: 2025-03-16 01:16:42 +0000 > > cxgbe(4): Perform Conventional Reset instead of FLR on the device. > > The driver uses bus_reset_child on its parent to reset itself but that > performs an FLR whereas the hardware needs a Conventional Reset[1] for > full re-initialization. Add routines that perform conventional hot > reset and use them instead. The available reset mechanisms are: > * PCIe secondary bus reset (default) > * PCIe link bounce > > hw.cxgbe.reset_method can be used to override the default. The internal > PL_RST is also available but is for testing only. > > [1] 6.6.1 in PCI Express® Base Specification 5.0 version 1.0 > > MFC after: 1 month > Sponsored by: Chelsio Communications > --- > sys/dev/cxgbe/t4_main.c | 137 +++++++++++++++++++++++++++++++++++++++++------- > 1 file changed, 118 insertions(+), 19 deletions(-) > > diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c > index 6ee839151db0..20df6a97aa87 100644 > --- a/sys/dev/cxgbe/t4_main.c > +++ b/sys/dev/cxgbe/t4_main.c > @@ -633,6 +633,10 @@ static int t4_reset_on_fatal_err = 0; > SYSCTL_INT(_hw_cxgbe, OID_AUTO, reset_on_fatal_err, CTLFLAG_RWTUN, > &t4_reset_on_fatal_err, 0, "reset adapter on fatal errors"); > > +static int t4_reset_method = 1; > +SYSCTL_INT(_hw_cxgbe, OID_AUTO, reset_method, CTLFLAG_RWTUN, &t4_reset_method, > + 0, "reset method: 0 = PL_RST, 1 = PCIe secondary bus reset, 2 = PCIe link bounce"); > + > static int t4_clock_gate_on_suspend = 0; > SYSCTL_INT(_hw_cxgbe, OID_AUTO, clock_gate_on_suspend, CTLFLAG_RWTUN, > &t4_clock_gate_on_suspend, 0, "gate the clock on suspend"); > @@ -2535,40 +2539,135 @@ t4_reset_post(device_t dev, device_t child) > return (0); > } > > -static int > -reset_adapter_with_pci_bus_reset(struct adapter *sc) > -{ > - int rc; > - > - mtx_lock(&Giant); > - rc = BUS_RESET_CHILD(device_get_parent(sc->dev), sc->dev, 0); > - mtx_unlock(&Giant); > - return (rc); > -} > - > static int > reset_adapter_with_pl_rst(struct adapter *sc) > { > - suspend_adapter(sc); > - > /* This is a t4_write_reg without the hw_off_limits check. */ > MPASS(sc->error_flags & HW_OFF_LIMITS); > bus_space_write_4(sc->bt, sc->bh, A_PL_RST, > F_PIORSTMODE | F_PIORST | F_AUTOPCIEPAUSE); > pause("pl_rst", 1 * hz); /* Wait 1s for reset */ > + return (0); > +} > > - resume_adapter(sc); > +static int > +reset_adapter_with_pcie_sbr(struct adapter *sc) > +{ > + device_t pdev = device_get_parent(sc->dev); > + device_t gpdev = device_get_parent(pdev); > + device_t *children; > + int rc, i, lcap, lsta, nchildren; > + uint32_t v; > > - return (0); > + rc = pci_find_cap(gpdev, PCIY_EXPRESS, &v); > + if (rc != 0) { > + CH_ERR(sc, "%s: pci_find_cap(%s, pcie) failed: %d\n", __func__, > + device_get_nameunit(gpdev), rc); > + return (ENOTSUP); > + } > + lcap = v + PCIER_LINK_CAP; > + lsta = v + PCIER_LINK_STA; > + > + nchildren = 0; > + device_get_children(pdev, &children, &nchildren); > + for (i = 0; i < nchildren; i++) > + pci_save_state(children[i]); > + v = pci_read_config(gpdev, PCIR_BRIDGECTL_1, 2); > + pci_write_config(gpdev, PCIR_BRIDGECTL_1, v | PCIB_BCR_SECBUS_RESET, 2); > + pause("pcie_sbr1", hz / 10); /* 100ms */ > + pci_write_config(gpdev, PCIR_BRIDGECTL_1, v, 2); > + pause("pcie_sbr2", hz); /* Wait 1s before restore_state. */ > + v = pci_read_config(gpdev, lsta, 2); > + if (pci_read_config(gpdev, lcap, 2) & PCIEM_LINK_CAP_DL_ACTIVE) > + rc = v & PCIEM_LINK_STA_DL_ACTIVE ? 0 : ETIMEDOUT; > + else if (v & (PCIEM_LINK_STA_TRAINING_ERROR | PCIEM_LINK_STA_TRAINING)) > + rc = ETIMEDOUT; > + else > + rc = 0; > + if (rc != 0) > + CH_ERR(sc, "%s: PCIe link is down after reset, LINK_STA 0x%x\n", > + __func__, v); > + else { > + for (i = 0; i < nchildren; i++) > + pci_restore_state(children[i]); > + } > + free(children, M_TEMP); > + > + return (rc); > +} > + > +static int > +reset_adapter_with_pcie_link_bounce(struct adapter *sc) We already have pcie_link_reset() that is used for BUS_RESET_CHILD() on pcib. See e.g. mlx5_core/mlx5_fwdump.c::mlx5_fw_reset() for its use. We needed exactly link bounce for that.